// ============================================================================ // hakmem_learner.c - Background Learner (自動CAP/W_MAX調整) // ============================================================================ // // 機能: バックグラウンドスレッドでヒット率・統計を監視し、ポリシーを自動調整 // // 学習アルゴリズム: // ┌───────────────────────────────────────────────────────────────┐ // │ 1. CAP学習(ヒット率ベース) │ // │ - 定期的にヒット率をサンプリング(デフォルト1秒ごと) │ // │ - 目標ヒット率と比較(Mid: 0.65、Large: 0.55) │ // │ - 不足していればCAP増加、十分ならCAP減少 │ // │ - Dwell(安定期間)で振動抑制 │ // │ │ // │ 2. Budget enforcement + Water-filling │ // │ - 合計CAP上限(Budget)を設定可能 │ // │ - 超過時: 需要の低いクラスから削減 │ // │ - 未達時: 需要の高いクラスへ配分(Water-filling有効時) │ // │ │ // │ 3. W_MAX学習(UCB1 + Canary Deployment) │ // │ - 複数のW_MAX候補をUCB1(多腕バンディット)で探索 │ // │ - Canary方式: 一時的に候補を適用し、効果測定 │ // │ - 改善なければ最良値へロールバック │ // │ │ // │ 4. DYN1/DYN2自動割り当て │ // │ - サイズヒストグラムからピーク検出 │ // │ - 固定クラスと被らない範囲で動的クラスを設定 │ // └───────────────────────────────────────────────────────────────┘ // // 環境変数(主要なもの): // ┌─────────────────────────────────┬─────────┬──────────────────┐ // │ 変数 │ デフォルト│ 説明 │ // ├─────────────────────────────────┼─────────┼──────────────────┤ // │ HAKMEM_LEARN │ 0 │ 学習モード有効化 │ // │ HAKMEM_LEARN_WINDOW_MS │ 1000 │ サンプリング間隔 │ // │ HAKMEM_TARGET_HIT_MID │ 0.65 │ Mid目標ヒット率 │ // │ HAKMEM_TARGET_HIT_LARGE │ 0.55 │ Large目標ヒット率│ // │ HAKMEM_CAP_STEP_MID │ 4 │ Mid CAP更新幅 │ // │ HAKMEM_CAP_STEP_LARGE │ 1 │ Large CAP更新幅 │ // │ HAKMEM_CAP_DWELL_SEC_MID │ 3 │ Mid安定期間(秒) │ // │ HAKMEM_CAP_DWELL_SEC_LG │ 5 │ Large安定期間 │ // │ HAKMEM_BUDGET_MID │ 0 │ Mid総CAP上限 │ // │ HAKMEM_BUDGET_LARGE │ 0 │ Large総CAP上限 │ // │ HAKMEM_WF │ 0 │ Water-filling │ // │ HAKMEM_DYN1_AUTO │ 0 │ DYN1自動割り当て │ // │ HAKMEM_WMAX_LEARN │ 0 │ W_MAX学習有効化 │ // │ HAKMEM_WMAX_CANDIDATES_MID │ 1.4,... │ Mid候補リスト │ // │ HAKMEM_WMAX_CANDIDATES_LARGE │ 1.25,...│ Large候補リスト │ // │ HAKMEM_WMAX_CANARY │ 1 │ Canary方式有効 │ // │ HAKMEM_THP_LEARN │ 0 │ THP閾値学習 │ // └─────────────────────────────────┴─────────┴──────────────────┘ // // 使用例: // # 基本的な学習モード(CAP自動調整のみ) // HAKMEM_LEARN=1 ./app // // # 目標ヒット率を調整(Mid: 70%、Large: 60%) // HAKMEM_LEARN=1 HAKMEM_TARGET_HIT_MID=0.7 HAKMEM_TARGET_HIT_LARGE=0.6 ./app // // # Budget制約(Mid: 300 pages、Large: 50 bundles)+ Water-filling // HAKMEM_LEARN=1 HAKMEM_BUDGET_MID=300 HAKMEM_BUDGET_LARGE=50 HAKMEM_WF=1 ./app // // # DYN1自動割り当て(8-16KBギャップをピークで埋める) // HAKMEM_LEARN=1 HAKMEM_DYN1_AUTO=1 HAKMEM_CAP_MID_DYN1=64 ./app // // # W_MAX学習(Canary方式で安全に探索) // HAKMEM_LEARN=1 HAKMEM_WMAX_LEARN=1 HAKMEM_WMAX_CANDIDATES_MID=1.4,1.6,1.8 HAKMEM_WMAX_CANDIDATES_LARGE=1.3,1.6,2.0 ./app // // 注意事項: // - 学習モードは高負荷ワークロードで効果的 // - 低トラフィック時は誤調整の可能性あり(min_samples調整推奨) // - W_MAX学習はリスクあり(内部断片化増加)→ Canary推奨 // - FrozenPolicy更新はRCUライク(grace period未実装) // ============================================================================ #include "hakmem_learner.h" #include "hakmem_internal.h" #include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD) #include "hakmem_policy.h" #include "hakmem_pool.h" #include "hakmem_l25_pool.h" #include "hakmem_ace_stats.h" #include "hakmem_size_hist.h" #include "hakmem_learn_log.h" #include "hakmem_tiny_superslab.h" // Phase 8.4: ACE Observer #include "box/learner_env_box.h" // Box: Learner ENV decision #include #include #include #include #include #include static pthread_t g_thr; static int g_run = 0; // Previous snapshots for delta computation static uint64_t prev_mid_hits[POOL_NUM_CLASSES]; static uint64_t prev_mid_misses[POOL_NUM_CLASSES]; static uint64_t prev_lg_hits[L25_NUM_CLASSES]; static uint64_t prev_lg_misses[L25_NUM_CLASSES]; static uint64_t prev_try_attempts = 0; static uint64_t prev_try_success = 0; static uint64_t prev_ring_underflow = 0; static inline int get_env_int(const char* name, int defv) { const char* e = getenv(name); return (e ? atoi(e) : defv); } static int parse_float_list(const char* s, double* out, int maxn) { if (!s) return 0; int n = 0; const char* p = s; char buf[64]; while (*p && n < maxn) { int i = 0; while (*p && *p != ',' && i < (int)sizeof(buf)-1) buf[i++] = *p++; buf[i] = '\0'; if (i > 0) out[n++] = atof(buf); if (*p == ',') p++; } return n; } // --------------------- UCB1 helpers (discrete candidates) ------------------ typedef struct { double values[16]; int pulls[16]; double sum_score[16]; int n; int cur; int dwell_ticks; int dwell_sec; } ucb1_t; static void ucb1_init(ucb1_t* u, const double* vals, int n, int dwell_sec) { memset(u, 0, sizeof(*u)); if (n > 16) n = 16; u->n = n; for (int i=0;ivalues[i]=vals[i]; u->pulls[i]=0; u->sum_score[i]=0.0; } u->cur = 0; u->dwell_ticks = 0; u->dwell_sec = dwell_sec; } static int ucb1_select(ucb1_t* u) { // if any never pulled, pick it first for (int i=0;in;i++) if (u->pulls[i]==0) return i; // otherwise compute UCB double total = 0.0; for (int i=0;in;i++) total += (double)u->pulls[i]; double best_ucb = -1e100; int best_i = 0; for (int i=0;in;i++) { double mean = u->sum_score[i] / (double)u->pulls[i]; double bonus = 1.5 * sqrt(log(total) / (double)u->pulls[i]); double ucb = mean + bonus; if (ucb > best_ucb) { best_ucb = ucb; best_i = i; } } return best_i; } static void ucb1_update(ucb1_t* u, int arm, double score) { if (arm < 0 || arm >= u->n) return; u->pulls[arm] += 1; u->sum_score[arm] += score; } static void* learner_main(void* arg) { (void)arg; // Config int window_ms = get_env_int("HAKMEM_LEARN_WINDOW_MS", 1000); double tgt_mid = (getenv("HAKMEM_TARGET_HIT_MID") ? atof(getenv("HAKMEM_TARGET_HIT_MID")) : 0.65); double tgt_lg = (getenv("HAKMEM_TARGET_HIT_LARGE") ? atof(getenv("HAKMEM_TARGET_HIT_LARGE")) : 0.55); double eps = 0.03; // hysteresis band int step_mid = get_env_int("HAKMEM_CAP_STEP_MID", 4); // pages per update int step_lg = get_env_int("HAKMEM_CAP_STEP_LARGE", 1); // bundles per update int min_mid = get_env_int("HAKMEM_CAP_MIN_MID", 8); int max_mid = get_env_int("HAKMEM_CAP_MAX_MID", 2048); int min_lg = get_env_int("HAKMEM_CAP_MIN_LARGE", 1); int max_lg = get_env_int("HAKMEM_CAP_MAX_LARGE", 512); int budget_mid = get_env_int("HAKMEM_BUDGET_MID", 0); // 0=disabled int budget_lg = get_env_int("HAKMEM_BUDGET_LARGE", 0); // 0=disabled int min_samples = get_env_int("HAKMEM_LEARN_MIN_SAMPLES", 256); // Dwell (stability) for CAP updates int cap_dwell_mid = get_env_int("HAKMEM_CAP_DWELL_SEC_MID", 3); int cap_dwell_lg = get_env_int("HAKMEM_CAP_DWELL_SEC_LG", 5); int wf_enabled = get_env_int("HAKMEM_WF", 0); // weights for simple gain proxy (if needed later) double w_miss = (getenv("HAKMEM_GAIN_W_MISS") ? atof(getenv("HAKMEM_GAIN_W_MISS")) : 1.0); (void)w_miss; // Initialize prev counters memset(prev_mid_hits, 0, sizeof(prev_mid_hits)); memset(prev_mid_misses, 0, sizeof(prev_mid_misses)); memset(prev_lg_hits, 0, sizeof(prev_lg_hits)); memset(prev_lg_misses, 0, sizeof(prev_lg_misses)); // per-class dwell counters (seconds since last change) static int mid_dwell_ticks[POOL_NUM_CLASSES] = {0}; static int lg_dwell_ticks[L25_NUM_CLASSES] = {0}; // Phase 8.4: Check ACE Observer setting once (outside loop) const char* ace_observe = getenv("HAKMEM_ACE_OBSERVE"); int ace_enabled = (ace_observe && atoi(ace_observe) != 0); const char* ace_debug = getenv("HAKMEM_ACE_DEBUG"); int ace_debug_enabled = (ace_debug && atoi(ace_debug) != 0); if (ace_enabled && ace_debug_enabled) { fprintf(stderr, "[Learner] ACE Observer enabled (debug on)\n"); } while (g_run) { usleep(window_ms * 1000); if (!g_run) break; // Phase 8.4: ACE Observer (Tiny SuperSlab sizing) if (ace_enabled) { if (ace_debug_enabled) { fprintf(stderr, "[Learner] Calling ACE Observer...\n"); } hak_tiny_superslab_ace_observe_all(); } // Optional: flush sampled logs to file const char* logf = getenv("HAKMEM_LOG_FILE"); if (logf && *logf) { hkm_log_flush_file(logf); // Append metrics line: M,timestamp_ns,try_attempts_delta,try_success_delta,ring_underflow_delta,trylock_rate uint64_t cur_try=0, cur_succ=0, cur_uf=0; hak_pool_extra_metrics_snapshot(&cur_try, &cur_succ, &cur_uf); uint64_t d_try = (cur_try >= prev_try_attempts) ? (cur_try - prev_try_attempts) : 0; uint64_t d_suc = (cur_succ >= prev_try_success) ? (cur_succ - prev_try_success) : 0; uint64_t d_uf = (cur_uf >= prev_ring_underflow) ? (cur_uf - prev_ring_underflow) : 0; prev_try_attempts = cur_try; prev_try_success = cur_succ; prev_ring_underflow = cur_uf; double rate = (d_try > 0) ? ((double)d_suc / (double)d_try) : 0.0; // get timestamp struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); unsigned long long ts_ns = (unsigned long long)ts.tv_sec*1000000000ull + (unsigned long long)ts.tv_nsec; FILE* fp = fopen(logf, "a"); if (fp) { fprintf(fp, "M,%llu,%llu,%llu,%llu,%.6f\n", ts_ns, (unsigned long long)d_try, (unsigned long long)d_suc, (unsigned long long)d_uf, rate); fclose(fp); } } // Snapshot current stats uint64_t mid_hits[POOL_NUM_CLASSES], mid_misses[POOL_NUM_CLASSES], mid_refills[POOL_NUM_CLASSES], mid_frees[POOL_NUM_CLASSES]; uint64_t lg_hits[L25_NUM_CLASSES], lg_misses[L25_NUM_CLASSES], lg_refills[L25_NUM_CLASSES], lg_frees[L25_NUM_CLASSES]; hak_pool_stats_snapshot(mid_hits, mid_misses, mid_refills, mid_frees); hak_l25_pool_stats_snapshot(lg_hits, lg_misses, lg_refills, lg_frees); const FrozenPolicy* cur = hkm_policy_get(); if (!cur) continue; FrozenPolicy* np = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (!np) continue; memcpy(np, cur, sizeof(FrozenPolicy)); // Adjust Mid caps by hit rate vs target (delta over window) with dwell int mid_classes = 5; if (cur->mid_dyn1_bytes != 0 && cur->mid_dyn2_bytes != 0) mid_classes = 7; else if (cur->mid_dyn1_bytes != 0 || cur->mid_dyn2_bytes != 0) mid_classes = 6; for (int i = 0; i < mid_classes; i++) { uint64_t dh = mid_hits[i] - prev_mid_hits[i]; uint64_t dm = mid_misses[i] - prev_mid_misses[i]; uint64_t dt = dh + dm; if (dt < (uint64_t)min_samples) continue; double hit = (dt > 0) ? ((double)dh / (double)dt) : 1.0; int cap; if (i < 5) cap = np->mid_cap[i]; else if (i == 5) cap = np->mid_cap_dyn1; else cap = np->mid_cap_dyn2; // dwell gate per class mid_dwell_ticks[i] += window_ms/1000; if (mid_dwell_ticks[i] < cap_dwell_mid) { // skip change this window } else { if (hit < (tgt_mid - eps)) { cap += step_mid; } else if (hit > (tgt_mid + eps)) { cap -= step_mid; } if (cap < min_mid) cap = min_mid; if (cap > max_mid) cap = max_mid; // reset dwell only if actual change happens int old = (i < 5) ? np->mid_cap[i] : (i==5 ? np->mid_cap_dyn1 : np->mid_cap_dyn2); if (cap != old) mid_dwell_ticks[i] = 0; } if (i < 5) np->mid_cap[i] = (uint16_t)cap; else if (i == 5) np->mid_cap_dyn1 = (uint16_t)cap; else np->mid_cap_dyn2 = (uint16_t)cap; } // Optional: auto-assign DYN1 to peak size in 2–32KiB when enabled const char* dyn_auto = getenv("HAKMEM_DYN1_AUTO"); const char* dyn2_auto = getenv("HAKMEM_DYN2_AUTO"); if ((dyn_auto && atoi(dyn_auto) != 0) || (dyn2_auto && atoi(dyn2_auto) != 0)) { // Snapshot first 40 KiB (bins up to 40), reset after read uint64_t bins[41]; hkm_size_hist_snapshot(bins, 41, 1); int best_kb = 0, second_kb = 0; uint64_t best_cnt = 0, second_cnt = 0; for (int kb = 2; kb <= 32; kb++) { uint64_t c = bins[kb]; if (c > best_cnt) { second_cnt = best_cnt; second_kb = best_kb; best_cnt = c; best_kb = kb; } else if (c > second_cnt && kb != best_kb) { second_cnt = c; second_kb = kb; } } // Choose DYN1 around peak if not conflicting with fixed classes if (dyn_auto && atoi(dyn_auto) != 0 && best_kb >= 2 && best_kb <= 32) { size_t dyn_bytes = (size_t)best_kb * 1024; if (dyn_bytes != 2048 && dyn_bytes != 4096 && dyn_bytes != 8192 && dyn_bytes != 16384 && dyn_bytes != 32768) { np->mid_dyn1_bytes = (uint32_t)dyn_bytes; } } if (dyn2_auto && atoi(dyn2_auto) != 0 && second_kb >= 2 && second_kb <= 32) { size_t dyn_bytes = (size_t)second_kb * 1024; if (dyn_bytes != 2048 && dyn_bytes != 4096 && dyn_bytes != 8192 && dyn_bytes != 16384 && dyn_bytes != 32768 && dyn_bytes != np->mid_dyn1_bytes) { np->mid_dyn2_bytes = (uint32_t)dyn_bytes; } } } // Adjust Large caps similarly with dwell for (int i = 0; i < L25_NUM_CLASSES; i++) { uint64_t dh = lg_hits[i] - prev_lg_hits[i]; uint64_t dm = lg_misses[i] - prev_lg_misses[i]; uint64_t dt = dh + dm; if (dt < (uint64_t)min_samples) continue; double hit = (dt > 0) ? ((double)dh / (double)dt) : 1.0; int cap = np->large_cap[i]; lg_dwell_ticks[i] += window_ms/1000; if (lg_dwell_ticks[i] >= cap_dwell_lg) { if (hit < (tgt_lg - eps)) { cap += step_lg; } else if (hit > (tgt_lg + eps)) { cap -= step_lg; } int old = np->large_cap[i]; if (cap != old) lg_dwell_ticks[i] = 0; } if (cap < min_lg) cap = min_lg; if (cap > max_lg) cap = max_lg; np->large_cap[i] = (uint16_t)cap; } // Budget enforcement / Water-filling for Mid if (budget_mid > 0) { // Assemble class arrays int idx_map[7]; int m=0; for (int i=0;i<5;i++) idx_map[m++]=i; if (cur->mid_dyn1_bytes) idx_map[m++]=5; if (cur->mid_dyn2_bytes) idx_map[m++]=6; // compute sum caps #define GET_MID_CAP(npX, slotX) ((slotX)<5 ? (npX)->mid_cap[(slotX)] : ((slotX)==5 ? (npX)->mid_cap_dyn1 : (npX)->mid_cap_dyn2)) #define SET_MID_CAP(npX, slotX, valX) do { if ((slotX)<5) (npX)->mid_cap[(slotX)] = (uint16_t)(valX); else if ((slotX)==5) (npX)->mid_cap_dyn1 = (uint16_t)(valX); else (npX)->mid_cap_dyn2 = (uint16_t)(valX); } while(0) int sum = 0; for (int k=0;k0.0)? ((double)dm/dt) : 0.0; } // If sum > budget: remove from lowest-need first if (sum > budget_mid) { while (sum > budget_mid) { // find min need with cap>min_mid int best_k = -1; double best_need = 1e9; for (int k=0;k best_need){ best_need=need[k]; best_k=k; } } if (best_k < 0) break; int slot = idx_map[best_k]; int nv = GET_MID_CAP(np, slot) + step_mid; SET_MID_CAP(np, slot, nv); sum += step_mid; } } } if (budget_lg > 0) { int sum = 0; for (int i=0;ilarge_cap[i]; // need score = miss ratio(Large) double need_lg[L25_NUM_CLASSES]; for (int i=0;i0.0)?((double)dm/dt):0.0; } if (sum > budget_lg) { while (sum > budget_lg) { int best=-1; double best_need=1e9; for (int i=0;ilarge_cap[i] <= min_lg) continue; if (need_lg[i] < best_need){ best_need=need_lg[i]; best=i; } } if (best<0) break; int nv=np->large_cap[best]-step_lg; if (nvlarge_cap[best]=nv; sum=0; for (int i=0;ilarge_cap[i]; } } else if (wf_enabled && sum < budget_lg) { while (sum < budget_lg) { int best=-1; double best_need=-1e9; for (int i=0;i best_need){ best_need=need_lg[i]; best=i; } } if (best<0) break; np->large_cap[best]+=step_lg; sum += step_lg; } } } // Publish new policy hkm_policy_publish(np); if (cur->mid_dyn1_bytes != 0 || cur->mid_dyn2_bytes != 0) { fprintf(stderr, "[Learner] Published caps: Mid={%u,%u,%u,%u,%u,D1:%u,D2:%u} Large={%u,%u,%u,%u,%u}\n", (unsigned)np->mid_cap[0], (unsigned)np->mid_cap[1], (unsigned)np->mid_cap[2], (unsigned)np->mid_cap[3], (unsigned)np->mid_cap[4], (unsigned)np->mid_cap_dyn1, (unsigned)np->mid_cap_dyn2, (unsigned)np->large_cap[0], (unsigned)np->large_cap[1], (unsigned)np->large_cap[2], (unsigned)np->large_cap[3], (unsigned)np->large_cap[4]); } else { fprintf(stderr, "[Learner] Published caps: Mid={%u,%u,%u,%u,%u} Large={%u,%u,%u,%u,%u}\n", (unsigned)np->mid_cap[0], (unsigned)np->mid_cap[1], (unsigned)np->mid_cap[2], (unsigned)np->mid_cap[3], (unsigned)np->mid_cap[4], (unsigned)np->large_cap[0], (unsigned)np->large_cap[1], (unsigned)np->large_cap[2], (unsigned)np->large_cap[3], (unsigned)np->large_cap[4]); } // Update prev snapshots for (int i=0;i= u_mid.dwell_sec && !canary_mid) { u_mid.dwell_ticks = 0; int next = ucb1_select(&u_mid); if (next != u_mid.cur) { if (canary_on) { // start canary: publish candidate temporarily and measure FrozenPolicy* np3 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (np3) { const FrozenPolicy* cur2 = hkm_policy_get(); double prev_mean = (u_mid.pulls[u_mid.cur] > 0) ? (u_mid.sum_score[u_mid.cur] / (double)u_mid.pulls[u_mid.cur]) : 0.0; base_mid_mean = prev_mean; trial_mid_sum = 0.0; trial_mid_n = 0; trial_mid_sec = 0; memcpy(np3, cur2, sizeof(FrozenPolicy)); np3->w_max_mid = u_mid.values[next]; hkm_policy_publish(np3); canary_mid = 1; // set active; do not change u_mid.cur yet fprintf(stderr, "[Learner] W_MAX mid canary start: %.2f (base=%.3f)\n", np3->w_max_mid, base_mid_mean); // store candidate index into u_mid.cur temporarily for scoring but keep canary flag u_mid.cur = next; } } else { FrozenPolicy* np3 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (np3) { memcpy(np3, hkm_policy_get(), sizeof(FrozenPolicy)); np3->w_max_mid = u_mid.values[next]; hkm_policy_publish(np3); u_mid.cur = next; fprintf(stderr, "[Learner] W_MAX mid=%.2f (UCB1)\n", np3->w_max_mid); } } } } if (u_lg.dwell_ticks >= u_lg.dwell_sec && !canary_lg) { u_lg.dwell_ticks = 0; int next = ucb1_select(&u_lg); if (next != u_lg.cur) { if (canary_on) { FrozenPolicy* np4 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (np4) { const FrozenPolicy* cur2 = hkm_policy_get(); double prev_mean = (u_lg.pulls[u_lg.cur] > 0) ? (u_lg.sum_score[u_lg.cur] / (double)u_lg.pulls[u_lg.cur]) : 0.0; base_lg_mean = prev_mean; trial_lg_sum = 0.0; trial_lg_n = 0; trial_lg_sec = 0; memcpy(np4, cur2, sizeof(FrozenPolicy)); np4->w_max_large = u_lg.values[next]; hkm_policy_publish(np4); canary_lg = 1; u_lg.cur = next; fprintf(stderr, "[Learner] W_MAX large canary start: %.2f (base=%.3f)\n", np4->w_max_large, base_lg_mean); } } else { FrozenPolicy* np4 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (np4) { memcpy(np4, hkm_policy_get(), sizeof(FrozenPolicy)); np4->w_max_large = u_lg.values[next]; hkm_policy_publish(np4); u_lg.cur = next; fprintf(stderr, "[Learner] W_MAX large=%.2f (UCB1)\n", np4->w_max_large); } } } } // finish canary trials if time elapsed if (canary_mid && trial_mid_sec >= trial_sec && trial_mid_n > 0) { double trial_mean = trial_mid_sum / (double)trial_mid_n; double thresh = base_mid_mean * (1.0 + adopt_pct); if (!(trial_mean >= thresh)) { // revert to best baseline arm (approx: pick best mean so far) int besti = 0; double bestm=-1e100; for (int i=0;i0){ double m=u_mid.sum_score[i]/(double)u_mid.pulls[i]; if (m>bestm){bestm=m;besti=i;} } } FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->w_max_mid = u_mid.values[besti]; hkm_policy_publish(npR); u_mid.cur = besti; fprintf(stderr, "[Learner] W_MAX mid canary revert to %.2f (trial=%.3f base=%.3f)\n", npR->w_max_mid, trial_mean, base_mid_mean); } } else { fprintf(stderr, "[Learner] W_MAX mid canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_mid_mean); } canary_mid = 0; trial_mid_sum=0.0; trial_mid_n=0; trial_mid_sec=0; } if (canary_lg && trial_lg_sec >= trial_sec && trial_lg_n > 0) { double trial_mean = trial_lg_sum / (double)trial_lg_n; double thresh = base_lg_mean * (1.0 + adopt_pct); if (!(trial_mean >= thresh)) { int besti = 0; double bestm=-1e100; for (int i=0;i0){ double m=u_lg.sum_score[i]/(double)u_lg.pulls[i]; if (m>bestm){bestm=m;besti=i;} } } FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->w_max_large = u_lg.values[besti]; hkm_policy_publish(npR); u_lg.cur = besti; fprintf(stderr, "[Learner] W_MAX large canary revert to %.2f (trial=%.3f base=%.3f)\n", npR->w_max_large, trial_mean, base_lg_mean); } } else { fprintf(stderr, "[Learner] W_MAX large canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_lg_mean); } canary_lg = 0; trial_lg_sum=0.0; trial_lg_n=0; trial_lg_sec=0; } } // Optional: THP threshold learning (discrete + canary) const char* thp_learn = getenv("HAKMEM_THP_LEARN"); if (thp_learn && atoi(thp_learn) != 0) { static ucb1_t u_thp; static int inited=0; static double thp_vals[8]; int n=0; static int canary=0; static double base_mean=0.0; static double trial_sum=0.0; static int trial_n=0; static int trial_sec=0; static int trial_elapsed=0; static double adopt_pct=0.01; static int canary_on=1; if (!inited) { const char* s = getenv("HAKMEM_THP_CANDIDATES"); n = parse_float_list(s, thp_vals, 8); if (n <= 0) { thp_vals[0]=524288; thp_vals[1]=786432; thp_vals[2]=1048576; thp_vals[3]=1572864; thp_vals[4]=2097152; n=5; } ucb1_init(&u_thp, thp_vals, n, get_env_int("HAKMEM_THP_DWELL_SEC", 15)); trial_sec = get_env_int("HAKMEM_THP_TRIAL_SEC", 6); adopt_pct = (getenv("HAKMEM_THP_ADOPT_PCT") ? atof(getenv("HAKMEM_THP_ADOPT_PCT")) : 0.015); canary_on = get_env_int("HAKMEM_THP_CANARY", 1); inited=1; } u_thp.dwell_ticks += window_ms/1000; // reuse same score proxy hkm_ace_stats_snapshot_t ace; hkm_ace_stats_snapshot(&ace, 1); double score = (double)(ace.mid_hit + ace.large_hit) - (double)(ace.mid_miss + ace.large_miss) - 2.0*(double)ace.l1_fallback; ucb1_update(&u_thp, u_thp.cur, score); if (canary) { trial_sum += score; trial_n++; trial_elapsed += window_ms/1000; } if (u_thp.dwell_ticks >= u_thp.dwell_sec && !canary) { u_thp.dwell_ticks = 0; int next = ucb1_select(&u_thp); if (next != u_thp.cur) { if (canary_on) { FrozenPolicy* np5 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (np5) { const FrozenPolicy* cur2 = hkm_policy_get(); double prev_mean = (u_thp.pulls[u_thp.cur] > 0) ? (u_thp.sum_score[u_thp.cur] / (double)u_thp.pulls[u_thp.cur]) : 0.0; base_mean = prev_mean; trial_sum = 0.0; trial_n = 0; trial_elapsed = 0; memcpy(np5, cur2, sizeof(FrozenPolicy)); np5->thp_threshold = (size_t)u_thp.values[next]; hkm_policy_publish(np5); canary = 1; u_thp.cur = next; fprintf(stderr, "[Learner] THP canary start: %zu (base=%.3f)\n", np5->thp_threshold, base_mean); } } else { FrozenPolicy* np5 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (np5) { memcpy(np5, hkm_policy_get(), sizeof(FrozenPolicy)); np5->thp_threshold = (size_t)u_thp.values[next]; hkm_policy_publish(np5); u_thp.cur = next; fprintf(stderr, "[Learner] THP threshold=%zu (UCB1)\n", np5->thp_threshold); } } } } if (canary && trial_elapsed >= trial_sec && trial_n > 0) { double trial_mean = trial_sum / (double)trial_n; double thresh = base_mean * (1.0 + adopt_pct); if (!(trial_mean >= thresh)) { int besti = 0; double bestm=-1e100; for (int i=0;i0){ double m=u_thp.sum_score[i]/(double)u_thp.pulls[i]; if (m>bestm){bestm=m;besti=i;} } } FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0 if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->thp_threshold = (size_t)u_thp.values[besti]; hkm_policy_publish(npR); u_thp.cur = besti; fprintf(stderr, "[Learner] THP canary revert to %zu (trial=%.3f base=%.3f)\n", npR->thp_threshold, trial_mean, base_mean); } } else { fprintf(stderr, "[Learner] THP canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_mean); } canary = 0; trial_sum = 0.0; trial_n = 0; trial_elapsed = 0; } } } return NULL; } void hkm_learner_init(void) { if (!hak_learner_env_should_run()) { return; } if (g_run) return; g_run = 1; if (pthread_create(&g_thr, NULL, learner_main, NULL) != 0) { g_run = 0; fprintf(stderr, "[Learner] pthread_create failed\n"); } else { fprintf(stderr, "[Learner] Started (CAP auto-tuner)\n"); } } void hkm_learner_shutdown(void) { if (!g_run) return; g_run = 0; pthread_join(g_thr, NULL); fprintf(stderr, "[Learner] Stopped\n"); }