// ============================================================================
// hakmem_learner.c - Background Learner (自動CAP/W_MAX調整)
// ============================================================================
//
// 機能: バックグラウンドスレッドでヒット率・統計を監視し、ポリシーを自動調整
//
// 学習アルゴリズム:
//   ┌───────────────────────────────────────────────────────────────┐
//   │ 1. CAP学習（ヒット率ベース）                                   │
//   │    - 定期的にヒット率をサンプリング（デフォルト1秒ごと）      │
//   │    - 目標ヒット率と比較（Mid: 0.65、Large: 0.55）            │
//   │    - 不足していればCAP増加、十分ならCAP減少                   │
//   │    - Dwell（安定期間）で振動抑制                              │
//   │                                                                │
//   │ 2. Budget enforcement + Water-filling                         │
//   │    - 合計CAP上限（Budget）を設定可能                          │
//   │    - 超過時: 需要の低いクラスから削減                         │
//   │    - 未達時: 需要の高いクラスへ配分（Water-filling有効時）    │
//   │                                                                │
//   │ 3. W_MAX学習（UCB1 + Canary Deployment）                      │
//   │    - 複数のW_MAX候補をUCB1（多腕バンディット）で探索          │
//   │    - Canary方式: 一時的に候補を適用し、効果測定               │
//   │    - 改善なければ最良値へロールバック                         │
//   │                                                                │
//   │ 4. DYN1/DYN2自動割り当て                                       │
//   │    - サイズヒストグラムからピーク検出                         │
//   │    - 固定クラスと被らない範囲で動的クラスを設定               │
//   └───────────────────────────────────────────────────────────────┘
//
// 環境変数（主要なもの）:
//   ┌─────────────────────────────────┬─────────┬──────────────────┐
//   │ 変数                            │ デフォルト│ 説明            │
//   ├─────────────────────────────────┼─────────┼──────────────────┤
//   │ HAKMEM_LEARN                    │ 0       │ 学習モード有効化 │
//   │ HAKMEM_LEARN_WINDOW_MS          │ 1000    │ サンプリング間隔 │
//   │ HAKMEM_TARGET_HIT_MID           │ 0.65    │ Mid目標ヒット率  │
//   │ HAKMEM_TARGET_HIT_LARGE         │ 0.55    │ Large目標ヒット率│
//   │ HAKMEM_CAP_STEP_MID             │ 4       │ Mid CAP更新幅    │
//   │ HAKMEM_CAP_STEP_LARGE           │ 1       │ Large CAP更新幅  │
//   │ HAKMEM_CAP_DWELL_SEC_MID        │ 3       │ Mid安定期間(秒)  │
//   │ HAKMEM_CAP_DWELL_SEC_LG         │ 5       │ Large安定期間    │
//   │ HAKMEM_BUDGET_MID               │ 0       │ Mid総CAP上限     │
//   │ HAKMEM_BUDGET_LARGE             │ 0       │ Large総CAP上限   │
//   │ HAKMEM_WF                       │ 0       │ Water-filling    │
//   │ HAKMEM_DYN1_AUTO                │ 0       │ DYN1自動割り当て │
//   │ HAKMEM_WMAX_LEARN               │ 0       │ W_MAX学習有効化  │
//   │ HAKMEM_WMAX_CANDIDATES_MID      │ 1.4,... │ Mid候補リスト    │
//   │ HAKMEM_WMAX_CANDIDATES_LARGE    │ 1.25,...│ Large候補リスト  │
//   │ HAKMEM_WMAX_CANARY              │ 1       │ Canary方式有効   │
//   │ HAKMEM_THP_LEARN                │ 0       │ THP閾値学習      │
//   └─────────────────────────────────┴─────────┴──────────────────┘
//
// 使用例:
//   # 基本的な学習モード（CAP自動調整のみ）
//   HAKMEM_LEARN=1 ./app
//
//   # 目標ヒット率を調整（Mid: 70%、Large: 60%）
//   HAKMEM_LEARN=1 HAKMEM_TARGET_HIT_MID=0.7 HAKMEM_TARGET_HIT_LARGE=0.6 ./app
//
//   # Budget制約（Mid: 300 pages、Large: 50 bundles）+ Water-filling
//   HAKMEM_LEARN=1 HAKMEM_BUDGET_MID=300 HAKMEM_BUDGET_LARGE=50 HAKMEM_WF=1 ./app
//
//   # DYN1自動割り当て（8-16KBギャップをピークで埋める）
//   HAKMEM_LEARN=1 HAKMEM_DYN1_AUTO=1 HAKMEM_CAP_MID_DYN1=64 ./app
//
//   # W_MAX学習（Canary方式で安全に探索）
//   HAKMEM_LEARN=1 HAKMEM_WMAX_LEARN=1 HAKMEM_WMAX_CANDIDATES_MID=1.4,1.6,1.8 HAKMEM_WMAX_CANDIDATES_LARGE=1.3,1.6,2.0 ./app
//
// 注意事項:
//   - 学習モードは高負荷ワークロードで効果的
//   - 低トラフィック時は誤調整の可能性あり（min_samples調整推奨）
//   - W_MAX学習はリスクあり（内部断片化増加）→ Canary推奨
//   - FrozenPolicy更新はRCUライク（grace period未実装）
// ============================================================================

#include "hakmem_learner.h"
#include "hakmem_internal.h"
#include "hakmem_syscall.h"   // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
#include "hakmem_policy.h"
#include "hakmem_pool.h"
#include "hakmem_l25_pool.h"
#include "hakmem_ace_stats.h"
#include "hakmem_size_hist.h"
#include "hakmem_learn_log.h"
#include "hakmem_tiny_superslab.h"  // Phase 8.4: ACE Observer
#include "box/learner_env_box.h"    // Box: Learner ENV decision
#include <pthread.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <math.h>

static pthread_t g_thr;
static int g_run = 0;

// Previous snapshots for delta computation
static uint64_t prev_mid_hits[POOL_NUM_CLASSES];
static uint64_t prev_mid_misses[POOL_NUM_CLASSES];
static uint64_t prev_lg_hits[L25_NUM_CLASSES];
static uint64_t prev_lg_misses[L25_NUM_CLASSES];
static uint64_t prev_try_attempts = 0;
static uint64_t prev_try_success  = 0;
static uint64_t prev_ring_underflow = 0;

static inline int get_env_int(const char* name, int defv) {
    const char* e = getenv(name);
    return (e ? atoi(e) : defv);
}

static int parse_float_list(const char* s, double* out, int maxn) {
    if (!s) return 0;
    int n = 0; const char* p = s;
    char buf[64];
    while (*p && n < maxn) {
        int i = 0;
        while (*p && *p != ',' && i < (int)sizeof(buf)-1) buf[i++] = *p++;
        buf[i] = '\0';
        if (i > 0) out[n++] = atof(buf);
        if (*p == ',') p++;
    }
    return n;
}

// --------------------- UCB1 helpers (discrete candidates) ------------------
typedef struct {
    double values[16];
    int    pulls[16];
    double sum_score[16];
    int    n;
    int    cur;
    int    dwell_ticks;
    int    dwell_sec;
} ucb1_t;

static void ucb1_init(ucb1_t* u, const double* vals, int n, int dwell_sec) {
    memset(u, 0, sizeof(*u));
    if (n > 16) n = 16;
    u->n = n;
    for (int i=0;i<n;i++){ u->values[i]=vals[i]; u->pulls[i]=0; u->sum_score[i]=0.0; }
    u->cur = 0; u->dwell_ticks = 0; u->dwell_sec = dwell_sec;
}

static int ucb1_select(ucb1_t* u) {
    // if any never pulled, pick it first
    for (int i=0;i<u->n;i++) if (u->pulls[i]==0) return i;
    // otherwise compute UCB
    double total = 0.0; for (int i=0;i<u->n;i++) total += (double)u->pulls[i];
    double best_ucb = -1e100; int best_i = 0;
    for (int i=0;i<u->n;i++) {
        double mean = u->sum_score[i] / (double)u->pulls[i];
        double bonus = 1.5 * sqrt(log(total) / (double)u->pulls[i]);
        double ucb = mean + bonus;
        if (ucb > best_ucb) { best_ucb = ucb; best_i = i; }
    }
    return best_i;
}

static void ucb1_update(ucb1_t* u, int arm, double score) {
    if (arm < 0 || arm >= u->n) return;
    u->pulls[arm] += 1;
    u->sum_score[arm] += score;
}

static void* learner_main(void* arg) {
    (void)arg;
    // Config
    int window_ms = get_env_int("HAKMEM_LEARN_WINDOW_MS", 1000);
    double tgt_mid = (getenv("HAKMEM_TARGET_HIT_MID") ? atof(getenv("HAKMEM_TARGET_HIT_MID")) : 0.65);
    double tgt_lg  = (getenv("HAKMEM_TARGET_HIT_LARGE") ? atof(getenv("HAKMEM_TARGET_HIT_LARGE")) : 0.55);
    double eps = 0.03; // hysteresis band
    int step_mid = get_env_int("HAKMEM_CAP_STEP_MID", 4);      // pages per update
    int step_lg  = get_env_int("HAKMEM_CAP_STEP_LARGE", 1);    // bundles per update
    int min_mid  = get_env_int("HAKMEM_CAP_MIN_MID", 8);
    int max_mid  = get_env_int("HAKMEM_CAP_MAX_MID", 2048);
    int min_lg   = get_env_int("HAKMEM_CAP_MIN_LARGE", 1);
    int max_lg   = get_env_int("HAKMEM_CAP_MAX_LARGE", 512);
    int budget_mid = get_env_int("HAKMEM_BUDGET_MID", 0);      // 0=disabled
    int budget_lg  = get_env_int("HAKMEM_BUDGET_LARGE", 0);    // 0=disabled
    int min_samples = get_env_int("HAKMEM_LEARN_MIN_SAMPLES", 256);
    // Dwell (stability) for CAP updates
    int cap_dwell_mid = get_env_int("HAKMEM_CAP_DWELL_SEC_MID", 3);
    int cap_dwell_lg  = get_env_int("HAKMEM_CAP_DWELL_SEC_LG", 5);
    int wf_enabled  = get_env_int("HAKMEM_WF", 0);
    // weights for simple gain proxy (if needed later)
    double w_miss = (getenv("HAKMEM_GAIN_W_MISS") ? atof(getenv("HAKMEM_GAIN_W_MISS")) : 1.0);
    (void)w_miss;

    // Initialize prev counters
    memset(prev_mid_hits, 0, sizeof(prev_mid_hits));
    memset(prev_mid_misses, 0, sizeof(prev_mid_misses));
    memset(prev_lg_hits, 0, sizeof(prev_lg_hits));
    memset(prev_lg_misses, 0, sizeof(prev_lg_misses));

    // per-class dwell counters (seconds since last change)
    static int mid_dwell_ticks[POOL_NUM_CLASSES] = {0};
    static int lg_dwell_ticks[L25_NUM_CLASSES] = {0};

    // Phase 8.4: Check ACE Observer setting once (outside loop)
    const char* ace_observe = getenv("HAKMEM_ACE_OBSERVE");
    int ace_enabled = (ace_observe && atoi(ace_observe) != 0);
    const char* ace_debug = getenv("HAKMEM_ACE_DEBUG");
    int ace_debug_enabled = (ace_debug && atoi(ace_debug) != 0);

    if (ace_enabled && ace_debug_enabled) {
        fprintf(stderr, "[Learner] ACE Observer enabled (debug on)\n");
    }

    while (g_run) {
        usleep(window_ms * 1000);
        if (!g_run) break;

        // Phase 8.4: ACE Observer (Tiny SuperSlab sizing)
        if (ace_enabled) {
            if (ace_debug_enabled) {
                fprintf(stderr, "[Learner] Calling ACE Observer...\n");
            }
            hak_tiny_superslab_ace_observe_all();
        }

        // Optional: flush sampled logs to file
        const char* logf = getenv("HAKMEM_LOG_FILE");
        if (logf && *logf) {
            hkm_log_flush_file(logf);
            // Append metrics line: M,timestamp_ns,try_attempts_delta,try_success_delta,ring_underflow_delta,trylock_rate
            uint64_t cur_try=0, cur_succ=0, cur_uf=0;
            hak_pool_extra_metrics_snapshot(&cur_try, &cur_succ, &cur_uf);
            uint64_t d_try = (cur_try >= prev_try_attempts) ? (cur_try - prev_try_attempts) : 0;
            uint64_t d_suc = (cur_succ >= prev_try_success) ? (cur_succ - prev_try_success) : 0;
            uint64_t d_uf  = (cur_uf >= prev_ring_underflow) ? (cur_uf - prev_ring_underflow) : 0;
            prev_try_attempts = cur_try; prev_try_success = cur_succ; prev_ring_underflow = cur_uf;
            double rate = (d_try > 0) ? ((double)d_suc / (double)d_try) : 0.0;
            // get timestamp
            struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts);
            unsigned long long ts_ns = (unsigned long long)ts.tv_sec*1000000000ull + (unsigned long long)ts.tv_nsec;
            FILE* fp = fopen(logf, "a");
            if (fp) {
                fprintf(fp, "M,%llu,%llu,%llu,%llu,%.6f\n", ts_ns,
                    (unsigned long long)d_try, (unsigned long long)d_suc, (unsigned long long)d_uf, rate);
                fclose(fp);
            }
        }

        // Snapshot current stats
        uint64_t mid_hits[POOL_NUM_CLASSES], mid_misses[POOL_NUM_CLASSES], mid_refills[POOL_NUM_CLASSES], mid_frees[POOL_NUM_CLASSES];
        uint64_t lg_hits[L25_NUM_CLASSES], lg_misses[L25_NUM_CLASSES], lg_refills[L25_NUM_CLASSES], lg_frees[L25_NUM_CLASSES];
        hak_pool_stats_snapshot(mid_hits, mid_misses, mid_refills, mid_frees);
        hak_l25_pool_stats_snapshot(lg_hits, lg_misses, lg_refills, lg_frees);

        const FrozenPolicy* cur = hkm_policy_get();
        if (!cur) continue;

        FrozenPolicy* np = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
        if (!np) continue;
        memcpy(np, cur, sizeof(FrozenPolicy));

        // Adjust Mid caps by hit rate vs target (delta over window) with dwell
        int mid_classes = 5;
        if (cur->mid_dyn1_bytes != 0 && cur->mid_dyn2_bytes != 0) mid_classes = 7;
        else if (cur->mid_dyn1_bytes != 0 || cur->mid_dyn2_bytes != 0) mid_classes = 6;
        for (int i = 0; i < mid_classes; i++) {
            uint64_t dh = mid_hits[i] - prev_mid_hits[i];
            uint64_t dm = mid_misses[i] - prev_mid_misses[i];
            uint64_t dt = dh + dm;
            if (dt < (uint64_t)min_samples) continue;
            double hit = (dt > 0) ? ((double)dh / (double)dt) : 1.0;
            int cap;
            if (i < 5) cap = np->mid_cap[i];
            else if (i == 5) cap = np->mid_cap_dyn1;
            else cap = np->mid_cap_dyn2;
            // dwell gate per class
            mid_dwell_ticks[i] += window_ms/1000;
            if (mid_dwell_ticks[i] < cap_dwell_mid) {
                // skip change this window
            } else {
                if (hit < (tgt_mid - eps)) {
                    cap += step_mid;
                } else if (hit > (tgt_mid + eps)) {
                    cap -= step_mid;
                }
                if (cap < min_mid) cap = min_mid;
                if (cap > max_mid) cap = max_mid;
                // reset dwell only if actual change happens
                int old = (i < 5) ? np->mid_cap[i] : (i==5 ? np->mid_cap_dyn1 : np->mid_cap_dyn2);
                if (cap != old) mid_dwell_ticks[i] = 0;
            }
            if (i < 5) np->mid_cap[i] = (uint16_t)cap;
            else if (i == 5) np->mid_cap_dyn1 = (uint16_t)cap;
            else np->mid_cap_dyn2 = (uint16_t)cap;
        }

        // Optional: auto-assign DYN1 to peak size in 2–32KiB when enabled
        const char* dyn_auto = getenv("HAKMEM_DYN1_AUTO");
        const char* dyn2_auto = getenv("HAKMEM_DYN2_AUTO");
        if ((dyn_auto && atoi(dyn_auto) != 0) || (dyn2_auto && atoi(dyn2_auto) != 0)) {
            // Snapshot first 40 KiB (bins up to 40), reset after read
            uint64_t bins[41];
            hkm_size_hist_snapshot(bins, 41, 1);
            int best_kb = 0, second_kb = 0; uint64_t best_cnt = 0, second_cnt = 0;
            for (int kb = 2; kb <= 32; kb++) {
                uint64_t c = bins[kb];
                if (c > best_cnt) { second_cnt = best_cnt; second_kb = best_kb; best_cnt = c; best_kb = kb; }
                else if (c > second_cnt && kb != best_kb) { second_cnt = c; second_kb = kb; }
            }
            // Choose DYN1 around peak if not conflicting with fixed classes
            if (dyn_auto && atoi(dyn_auto) != 0 && best_kb >= 2 && best_kb <= 32) {
                size_t dyn_bytes = (size_t)best_kb * 1024;
                if (dyn_bytes != 2048 && dyn_bytes != 4096 && dyn_bytes != 8192 && dyn_bytes != 16384 && dyn_bytes != 32768) {
                    np->mid_dyn1_bytes = (uint32_t)dyn_bytes;
                }
            }
            if (dyn2_auto && atoi(dyn2_auto) != 0 && second_kb >= 2 && second_kb <= 32) {
                size_t dyn_bytes = (size_t)second_kb * 1024;
                if (dyn_bytes != 2048 && dyn_bytes != 4096 && dyn_bytes != 8192 && dyn_bytes != 16384 && dyn_bytes != 32768 && dyn_bytes != np->mid_dyn1_bytes) {
                    np->mid_dyn2_bytes = (uint32_t)dyn_bytes;
                }
            }
        }

        // Adjust Large caps similarly with dwell
        for (int i = 0; i < L25_NUM_CLASSES; i++) {
            uint64_t dh = lg_hits[i] - prev_lg_hits[i];
            uint64_t dm = lg_misses[i] - prev_lg_misses[i];
            uint64_t dt = dh + dm;
            if (dt < (uint64_t)min_samples) continue;
            double hit = (dt > 0) ? ((double)dh / (double)dt) : 1.0;
            int cap = np->large_cap[i];
            lg_dwell_ticks[i] += window_ms/1000;
            if (lg_dwell_ticks[i] >= cap_dwell_lg) {
                if (hit < (tgt_lg - eps)) {
                    cap += step_lg;
                } else if (hit > (tgt_lg + eps)) {
                    cap -= step_lg;
                }
                int old = np->large_cap[i];
                if (cap != old) lg_dwell_ticks[i] = 0;
            }
            if (cap < min_lg) cap = min_lg;
            if (cap > max_lg) cap = max_lg;
            np->large_cap[i] = (uint16_t)cap;
        }

        // Budget enforcement / Water-filling for Mid
        if (budget_mid > 0) {
            // Assemble class arrays
            int idx_map[7]; int m=0; for (int i=0;i<5;i++) idx_map[m++]=i; if (cur->mid_dyn1_bytes) idx_map[m++]=5; if (cur->mid_dyn2_bytes) idx_map[m++]=6;
            // compute sum caps
            #define GET_MID_CAP(npX, slotX) ((slotX)<5 ? (npX)->mid_cap[(slotX)] : ((slotX)==5 ? (npX)->mid_cap_dyn1 : (npX)->mid_cap_dyn2))
            #define SET_MID_CAP(npX, slotX, valX) do { if ((slotX)<5) (npX)->mid_cap[(slotX)] = (uint16_t)(valX); else if ((slotX)==5) (npX)->mid_cap_dyn1 = (uint16_t)(valX); else (npX)->mid_cap_dyn2 = (uint16_t)(valX); } while(0)
            int sum = 0; for (int k=0;k<m;k++){ int i=idx_map[k]; int v=GET_MID_CAP(np, i); sum += v; }
            // need score = miss ratio in window（高いほど需要大）
            double need[7]; for (int k=0;k<m;k++){ int i=idx_map[k]; uint64_t dh = (i<5? (mid_hits[i]-prev_mid_hits[i]) : 0); uint64_t dm = (i<5? (mid_misses[i]-prev_mid_misses[i]) : 0); double dt=(double)(dh+dm); need[k] = (dt>0.0)? ((double)dm/dt) : 0.0; }
            // If sum > budget: remove from lowest-need first
            if (sum > budget_mid) {
                while (sum > budget_mid) {
                    // find min need with cap>min_mid
                    int best_k = -1; double best_need = 1e9;
                    for (int k=0;k<m;k++){ int slot=idx_map[k]; int cap=GET_MID_CAP(np, slot); if (cap<=min_mid) continue; if (need[k] < best_need){ best_need=need[k]; best_k=k; } }
                    if (best_k < 0) break;
                    int slot = idx_map[best_k]; int nv = GET_MID_CAP(np, slot) - step_mid; if (nv < min_mid) nv = min_mid; SET_MID_CAP(np, slot, nv); sum = 0; for (int k=0;k<m;k++){ int sl=idx_map[k]; sum += GET_MID_CAP(np, sl); }
                }
            } else if (wf_enabled && sum < budget_mid) {
                // sum < budget: allocate to highest-need first
                while (sum < budget_mid) {
                    int best_k = -1; double best_need = -1e9;
                    for (int k=0;k<m;k++){ if (need[k] > best_need){ best_need=need[k]; best_k=k; } }
                    if (best_k < 0) break;
                    int slot = idx_map[best_k]; int nv = GET_MID_CAP(np, slot) + step_mid; SET_MID_CAP(np, slot, nv); sum += step_mid;
                }
            }
        }
        if (budget_lg > 0) {
            int sum = 0; for (int i=0;i<L25_NUM_CLASSES;i++) sum += np->large_cap[i];
            // need score = miss ratio（Large）
            double need_lg[L25_NUM_CLASSES]; for (int i=0;i<L25_NUM_CLASSES;i++){ uint64_t dh=lg_hits[i]-prev_lg_hits[i]; uint64_t dm=lg_misses[i]-prev_lg_misses[i]; double dt=(double)(dh+dm); need_lg[i]=(dt>0.0)?((double)dm/dt):0.0; }
            if (sum > budget_lg) {
                while (sum > budget_lg) {
                    int best=-1; double best_need=1e9;
                    for (int i=0;i<L25_NUM_CLASSES;i++){ if (np->large_cap[i] <= min_lg) continue; if (need_lg[i] < best_need){ best_need=need_lg[i]; best=i; } }
                    if (best<0) break;
                    int nv=np->large_cap[best]-step_lg; if (nv<min_lg) nv=min_lg; np->large_cap[best]=nv; sum=0; for (int i=0;i<L25_NUM_CLASSES;i++) sum += np->large_cap[i];
                }
            } else if (wf_enabled && sum < budget_lg) {
                while (sum < budget_lg) {
                    int best=-1; double best_need=-1e9; for (int i=0;i<L25_NUM_CLASSES;i++){ if (need_lg[i] > best_need){ best_need=need_lg[i]; best=i; } }
                    if (best<0) break;
                    np->large_cap[best]+=step_lg; sum += step_lg;
                }
            }
        }

        // Publish new policy
        hkm_policy_publish(np);
        if (cur->mid_dyn1_bytes != 0 || cur->mid_dyn2_bytes != 0) {
            fprintf(stderr, "[Learner] Published caps: Mid={%u,%u,%u,%u,%u,D1:%u,D2:%u} Large={%u,%u,%u,%u,%u}\n",
                (unsigned)np->mid_cap[0], (unsigned)np->mid_cap[1], (unsigned)np->mid_cap[2], (unsigned)np->mid_cap[3], (unsigned)np->mid_cap[4], (unsigned)np->mid_cap_dyn1, (unsigned)np->mid_cap_dyn2,
                (unsigned)np->large_cap[0], (unsigned)np->large_cap[1], (unsigned)np->large_cap[2], (unsigned)np->large_cap[3], (unsigned)np->large_cap[4]);
        } else {
            fprintf(stderr, "[Learner] Published caps: Mid={%u,%u,%u,%u,%u} Large={%u,%u,%u,%u,%u}\n",
                (unsigned)np->mid_cap[0], (unsigned)np->mid_cap[1], (unsigned)np->mid_cap[2], (unsigned)np->mid_cap[3], (unsigned)np->mid_cap[4],
                (unsigned)np->large_cap[0], (unsigned)np->large_cap[1], (unsigned)np->large_cap[2], (unsigned)np->large_cap[3], (unsigned)np->large_cap[4]);
        }

        // Update prev snapshots
        for (int i=0;i<mid_classes;i++){ prev_mid_hits[i]=mid_hits[i]; prev_mid_misses[i]=mid_misses[i]; }
        for (int i=0;i<L25_NUM_CLASSES;i++){ prev_lg_hits[i]=lg_hits[i]; prev_lg_misses[i]=lg_misses[i]; }

        // Optional: W_MAX learning (discrete candidates + dwell + canary)
        const char* wlearn = getenv("HAKMEM_WMAX_LEARN");
        if (wlearn && atoi(wlearn) != 0) {
            static ucb1_t u_mid, u_lg; static int inited=0;
            static double vals_mid[8], vals_lg[8]; static int n_mid=0, n_lg=0;
            // Canary control
            static int canary_mid=0, canary_lg=0; // active flags
            static double base_mid_mean=0.0, base_lg_mean=0.0; // baselines
            static double trial_mid_sum=0.0, trial_lg_sum=0.0; static int trial_mid_n=0, trial_lg_n=0; static int trial_mid_sec=0, trial_lg_sec=0;
            static int trial_sec = 0; static double adopt_pct = 0.01; static int canary_on = 0;
            if (!inited) {
                n_mid = parse_float_list(getenv("HAKMEM_WMAX_CANDIDATES_MID"), vals_mid, 8);
                if (n_mid <= 0) { vals_mid[0]=1.4; vals_mid[1]=1.6; vals_mid[2]=1.7; n_mid=3; }
                n_lg  = parse_float_list(getenv("HAKMEM_WMAX_CANDIDATES_LARGE"), vals_lg, 8);
                if (n_lg <= 0) { vals_lg[0]=1.25; vals_lg[1]=1.30; vals_lg[2]=1.40; n_lg=3; }
                ucb1_init(&u_mid, vals_mid, n_mid, get_env_int("HAKMEM_WMAX_DWELL_SEC", 10));
                ucb1_init(&u_lg,  vals_lg,  n_lg,  get_env_int("HAKMEM_WMAX_DWELL_SEC", 10));
                trial_sec = get_env_int("HAKMEM_WMAX_TRIAL_SEC", 5);
                adopt_pct = (getenv("HAKMEM_WMAX_ADOPT_PCT") ? atof(getenv("HAKMEM_WMAX_ADOPT_PCT")) : 0.01);
                canary_on = get_env_int("HAKMEM_WMAX_CANARY", 1);
                inited = 1;
            }
            u_mid.dwell_ticks += window_ms/1000; u_lg.dwell_ticks += window_ms/1000;
            // score（簡易）: mid_hit+large_hit - (mid_miss+large_miss) - 2*l1_fallback
            hkm_ace_stats_snapshot_t ace;
            hkm_ace_stats_snapshot(&ace, 1);
            double score = (double)(ace.mid_hit + ace.large_hit) - (double)(ace.mid_miss + ace.large_miss) - 2.0*(double)ace.l1_fallback;
            // update current arms with window score
            ucb1_update(&u_mid, u_mid.cur, score);
            ucb1_update(&u_lg,  u_lg.cur,  score);
            // accumulate trial score if canary active
            if (canary_mid) { trial_mid_sum += score; trial_mid_n++; trial_mid_sec += window_ms/1000; }
            if (canary_lg)  { trial_lg_sum  += score; trial_lg_n++;  trial_lg_sec  += window_ms/1000; }
            // maybe switch arm if dwell passed
            if (u_mid.dwell_ticks >= u_mid.dwell_sec && !canary_mid) {
                u_mid.dwell_ticks = 0; int next = ucb1_select(&u_mid);
                if (next != u_mid.cur) {
                    if (canary_on) {
                        // start canary: publish candidate temporarily and measure
                        FrozenPolicy* np3 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                        if (np3) {
                            const FrozenPolicy* cur2 = hkm_policy_get();
                            double prev_mean = (u_mid.pulls[u_mid.cur] > 0) ? (u_mid.sum_score[u_mid.cur] / (double)u_mid.pulls[u_mid.cur]) : 0.0;
                            base_mid_mean = prev_mean; trial_mid_sum = 0.0; trial_mid_n = 0; trial_mid_sec = 0;
                            memcpy(np3, cur2, sizeof(FrozenPolicy));
                            np3->w_max_mid = u_mid.values[next]; hkm_policy_publish(np3);
                            canary_mid = 1; // set active; do not change u_mid.cur yet
                            fprintf(stderr, "[Learner] W_MAX mid canary start: %.2f (base=%.3f)\n", np3->w_max_mid, base_mid_mean);
                            // store candidate index into u_mid.cur temporarily for scoring but keep canary flag
                            u_mid.cur = next;
                        }
                    } else {
                        FrozenPolicy* np3 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                        if (np3) {
                            memcpy(np3, hkm_policy_get(), sizeof(FrozenPolicy));
                            np3->w_max_mid = u_mid.values[next]; hkm_policy_publish(np3);
                            u_mid.cur = next; fprintf(stderr, "[Learner] W_MAX mid=%.2f (UCB1)\n", np3->w_max_mid);
                        }
                    }
                }
            }
            if (u_lg.dwell_ticks >= u_lg.dwell_sec && !canary_lg) {
                u_lg.dwell_ticks = 0; int next = ucb1_select(&u_lg);
                if (next != u_lg.cur) {
                    if (canary_on) {
                        FrozenPolicy* np4 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                        if (np4) {
                            const FrozenPolicy* cur2 = hkm_policy_get();
                            double prev_mean = (u_lg.pulls[u_lg.cur] > 0) ? (u_lg.sum_score[u_lg.cur] / (double)u_lg.pulls[u_lg.cur]) : 0.0;
                            base_lg_mean = prev_mean; trial_lg_sum = 0.0; trial_lg_n = 0; trial_lg_sec = 0;
                            memcpy(np4, cur2, sizeof(FrozenPolicy));
                            np4->w_max_large = u_lg.values[next]; hkm_policy_publish(np4);
                            canary_lg = 1; u_lg.cur = next;
                            fprintf(stderr, "[Learner] W_MAX large canary start: %.2f (base=%.3f)\n", np4->w_max_large, base_lg_mean);
                        }
                    } else {
                        FrozenPolicy* np4 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                        if (np4) {
                            memcpy(np4, hkm_policy_get(), sizeof(FrozenPolicy));
                            np4->w_max_large = u_lg.values[next]; hkm_policy_publish(np4);
                            u_lg.cur = next; fprintf(stderr, "[Learner] W_MAX large=%.2f (UCB1)\n", np4->w_max_large);
                        }
                    }
                }
            }
            // finish canary trials if time elapsed
            if (canary_mid && trial_mid_sec >= trial_sec && trial_mid_n > 0) {
                double trial_mean = trial_mid_sum / (double)trial_mid_n;
                double thresh = base_mid_mean * (1.0 + adopt_pct);
                if (!(trial_mean >= thresh)) {
                    // revert to best baseline arm (approx: pick best mean so far)
                    int besti = 0; double bestm=-1e100; for (int i=0;i<u_mid.n;i++){ if (u_mid.pulls[i]>0){ double m=u_mid.sum_score[i]/(double)u_mid.pulls[i]; if (m>bestm){bestm=m;besti=i;} } }
                    FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                    if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->w_max_mid = u_mid.values[besti]; hkm_policy_publish(npR); u_mid.cur = besti; fprintf(stderr, "[Learner] W_MAX mid canary revert to %.2f (trial=%.3f base=%.3f)\n", npR->w_max_mid, trial_mean, base_mid_mean); }
                } else {
                    fprintf(stderr, "[Learner] W_MAX mid canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_mid_mean);
                }
                canary_mid = 0; trial_mid_sum=0.0; trial_mid_n=0; trial_mid_sec=0;
            }
            if (canary_lg && trial_lg_sec >= trial_sec && trial_lg_n > 0) {
                double trial_mean = trial_lg_sum / (double)trial_lg_n;
                double thresh = base_lg_mean * (1.0 + adopt_pct);
                if (!(trial_mean >= thresh)) {
                    int besti = 0; double bestm=-1e100; for (int i=0;i<u_lg.n;i++){ if (u_lg.pulls[i]>0){ double m=u_lg.sum_score[i]/(double)u_lg.pulls[i]; if (m>bestm){bestm=m;besti=i;} } }
                    FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                    if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->w_max_large = u_lg.values[besti]; hkm_policy_publish(npR); u_lg.cur = besti; fprintf(stderr, "[Learner] W_MAX large canary revert to %.2f (trial=%.3f base=%.3f)\n", npR->w_max_large, trial_mean, base_lg_mean); }
                } else {
                    fprintf(stderr, "[Learner] W_MAX large canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_lg_mean);
                }
                canary_lg = 0; trial_lg_sum=0.0; trial_lg_n=0; trial_lg_sec=0;
            }
        }

        // Optional: THP threshold learning (discrete + canary)
        const char* thp_learn = getenv("HAKMEM_THP_LEARN");
        if (thp_learn && atoi(thp_learn) != 0) {
            static ucb1_t u_thp; static int inited=0;
            static double thp_vals[8]; int n=0;
            static int canary=0; static double base_mean=0.0; static double trial_sum=0.0; static int trial_n=0; static int trial_sec=0; static int trial_elapsed=0; static double adopt_pct=0.01; static int canary_on=1;
            if (!inited) {
                const char* s = getenv("HAKMEM_THP_CANDIDATES");
                n = parse_float_list(s, thp_vals, 8);
                if (n <= 0) { thp_vals[0]=524288; thp_vals[1]=786432; thp_vals[2]=1048576; thp_vals[3]=1572864; thp_vals[4]=2097152; n=5; }
                ucb1_init(&u_thp, thp_vals, n, get_env_int("HAKMEM_THP_DWELL_SEC", 15));
                trial_sec = get_env_int("HAKMEM_THP_TRIAL_SEC", 6);
                adopt_pct = (getenv("HAKMEM_THP_ADOPT_PCT") ? atof(getenv("HAKMEM_THP_ADOPT_PCT")) : 0.015);
                canary_on = get_env_int("HAKMEM_THP_CANARY", 1);
                inited=1;
            }
            u_thp.dwell_ticks += window_ms/1000;
            // reuse same score proxy
            hkm_ace_stats_snapshot_t ace;
            hkm_ace_stats_snapshot(&ace, 1);
            double score = (double)(ace.mid_hit + ace.large_hit) - (double)(ace.mid_miss + ace.large_miss) - 2.0*(double)ace.l1_fallback;
            ucb1_update(&u_thp, u_thp.cur, score);
            if (canary) { trial_sum += score; trial_n++; trial_elapsed += window_ms/1000; }
            if (u_thp.dwell_ticks >= u_thp.dwell_sec && !canary) {
                u_thp.dwell_ticks = 0; int next = ucb1_select(&u_thp);
                if (next != u_thp.cur) {
                    if (canary_on) {
                        FrozenPolicy* np5 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                        if (np5) {
                            const FrozenPolicy* cur2 = hkm_policy_get();
                            double prev_mean = (u_thp.pulls[u_thp.cur] > 0) ? (u_thp.sum_score[u_thp.cur] / (double)u_thp.pulls[u_thp.cur]) : 0.0;
                            base_mean = prev_mean; trial_sum = 0.0; trial_n = 0; trial_elapsed = 0;
                            memcpy(np5, cur2, sizeof(FrozenPolicy));
                            np5->thp_threshold = (size_t)u_thp.values[next]; hkm_policy_publish(np5);
                            canary = 1; u_thp.cur = next;
                            fprintf(stderr, "[Learner] THP canary start: %zu (base=%.3f)\n", np5->thp_threshold, base_mean);
                        }
                    } else {
                        FrozenPolicy* np5 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                        if (np5) {
                            memcpy(np5, hkm_policy_get(), sizeof(FrozenPolicy));
                            np5->thp_threshold = (size_t)u_thp.values[next]; hkm_policy_publish(np5);
                            u_thp.cur = next; fprintf(stderr, "[Learner] THP threshold=%zu (UCB1)\n", np5->thp_threshold);
                        }
                    }
                }
            }
            if (canary && trial_elapsed >= trial_sec && trial_n > 0) {
                double trial_mean = trial_sum / (double)trial_n;
                double thresh = base_mean * (1.0 + adopt_pct);
                if (!(trial_mean >= thresh)) {
                    int besti = 0; double bestm=-1e100; for (int i=0;i<u_thp.n;i++){ if (u_thp.pulls[i]>0){ double m=u_thp.sum_score[i]/(double)u_thp.pulls[i]; if (m>bestm){bestm=m;besti=i;} } }
                    FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy));  // Phase 6.X P0
                    if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->thp_threshold = (size_t)u_thp.values[besti]; hkm_policy_publish(npR); u_thp.cur = besti; fprintf(stderr, "[Learner] THP canary revert to %zu (trial=%.3f base=%.3f)\n", npR->thp_threshold, trial_mean, base_mean); }
                } else {
                    fprintf(stderr, "[Learner] THP canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_mean);
                }
                canary = 0; trial_sum = 0.0; trial_n = 0; trial_elapsed = 0;
            }
        }
    }
    return NULL;
}

void hkm_learner_init(void) {
    if (!hak_learner_env_should_run()) {
        return;
    }
    if (g_run) return;
    g_run = 1;
    if (pthread_create(&g_thr, NULL, learner_main, NULL) != 0) {
        g_run = 0;
        fprintf(stderr, "[Learner] pthread_create failed\n");
    } else {
        fprintf(stderr, "[Learner] Started (CAP auto-tuner)\n");
    }
}

void hkm_learner_shutdown(void) {
    if (!g_run) return;
    g_run = 0;
    pthread_join(g_thr, NULL);
    fprintf(stderr, "[Learner] Stopped\n");
}