Files
hakmem/core/hakmem_learner.c
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

605 lines
35 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ============================================================================
// hakmem_learner.c - Background Learner (自動CAP/W_MAX調整)
// ============================================================================
//
// 機能: バックグラウンドスレッドでヒット率・統計を監視し、ポリシーを自動調整
//
// 学習アルゴリズム:
// ┌───────────────────────────────────────────────────────────────┐
// │ 1. CAP学習ヒット率ベース
// │ - 定期的にヒット率をサンプリングデフォルト1秒ごと
// │ - 目標ヒット率と比較Mid: 0.65、Large: 0.55
// │ - 不足していればCAP増加、十分ならCAP減少 │
// │ - Dwell安定期間で振動抑制 │
// │ │
// │ 2. Budget enforcement + Water-filling │
// │ - 合計CAP上限Budgetを設定可能 │
// │ - 超過時: 需要の低いクラスから削減 │
// │ - 未達時: 需要の高いクラスへ配分Water-filling有効時
// │ │
// │ 3. W_MAX学習UCB1 + Canary Deployment
// │ - 複数のW_MAX候補をUCB1多腕バンディットで探索 │
// │ - Canary方式: 一時的に候補を適用し、効果測定 │
// │ - 改善なければ最良値へロールバック │
// │ │
// │ 4. DYN1/DYN2自動割り当て │
// │ - サイズヒストグラムからピーク検出 │
// │ - 固定クラスと被らない範囲で動的クラスを設定 │
// └───────────────────────────────────────────────────────────────┘
//
// 環境変数(主要なもの):
// ┌─────────────────────────────────┬─────────┬──────────────────┐
// │ 変数 │ デフォルト│ 説明 │
// ├─────────────────────────────────┼─────────┼──────────────────┤
// │ HAKMEM_LEARN │ 0 │ 学習モード有効化 │
// │ HAKMEM_LEARN_WINDOW_MS │ 1000 │ サンプリング間隔 │
// │ HAKMEM_TARGET_HIT_MID │ 0.65 │ Mid目標ヒット率 │
// │ HAKMEM_TARGET_HIT_LARGE │ 0.55 │ Large目標ヒット率│
// │ HAKMEM_CAP_STEP_MID │ 4 │ Mid CAP更新幅 │
// │ HAKMEM_CAP_STEP_LARGE │ 1 │ Large CAP更新幅 │
// │ HAKMEM_CAP_DWELL_SEC_MID │ 3 │ Mid安定期間(秒) │
// │ HAKMEM_CAP_DWELL_SEC_LG │ 5 │ Large安定期間 │
// │ HAKMEM_BUDGET_MID │ 0 │ Mid総CAP上限 │
// │ HAKMEM_BUDGET_LARGE │ 0 │ Large総CAP上限 │
// │ HAKMEM_WF │ 0 │ Water-filling │
// │ HAKMEM_DYN1_AUTO │ 0 │ DYN1自動割り当て │
// │ HAKMEM_WMAX_LEARN │ 0 │ W_MAX学習有効化 │
// │ HAKMEM_WMAX_CANDIDATES_MID │ 1.4,... │ Mid候補リスト │
// │ HAKMEM_WMAX_CANDIDATES_LARGE │ 1.25,...│ Large候補リスト │
// │ HAKMEM_WMAX_CANARY │ 1 │ Canary方式有効 │
// │ HAKMEM_THP_LEARN │ 0 │ THP閾値学習 │
// └─────────────────────────────────┴─────────┴──────────────────┘
//
// 使用例:
// # 基本的な学習モードCAP自動調整のみ
// HAKMEM_LEARN=1 ./app
//
// # 目標ヒット率を調整Mid: 70%、Large: 60%
// HAKMEM_LEARN=1 HAKMEM_TARGET_HIT_MID=0.7 HAKMEM_TARGET_HIT_LARGE=0.6 ./app
//
// # Budget制約Mid: 300 pages、Large: 50 bundles+ Water-filling
// HAKMEM_LEARN=1 HAKMEM_BUDGET_MID=300 HAKMEM_BUDGET_LARGE=50 HAKMEM_WF=1 ./app
//
// # DYN1自動割り当て8-16KBギャップをピークで埋める
// HAKMEM_LEARN=1 HAKMEM_DYN1_AUTO=1 HAKMEM_CAP_MID_DYN1=64 ./app
//
// # W_MAX学習Canary方式で安全に探索
// HAKMEM_LEARN=1 HAKMEM_WMAX_LEARN=1 HAKMEM_WMAX_CANDIDATES_MID=1.4,1.6,1.8 HAKMEM_WMAX_CANDIDATES_LARGE=1.3,1.6,2.0 ./app
//
// 注意事項:
// - 学習モードは高負荷ワークロードで効果的
// - 低トラフィック時は誤調整の可能性ありmin_samples調整推奨
// - W_MAX学習はリスクあり内部断片化増加→ Canary推奨
// - FrozenPolicy更新はRCUライクgrace period未実装
// ============================================================================
#include "hakmem_learner.h"
#include "hakmem_internal.h"
#include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
#include "hakmem_policy.h"
#include "hakmem_pool.h"
#include "hakmem_l25_pool.h"
#include "hakmem_ace_stats.h"
#include "hakmem_size_hist.h"
#include "hakmem_learn_log.h"
#include "hakmem_tiny_superslab.h" // Phase 8.4: ACE Observer
#include "box/learner_env_box.h" // Box: Learner ENV decision
#include <pthread.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <math.h>
static pthread_t g_thr;
static int g_run = 0;
// Previous snapshots for delta computation
static uint64_t prev_mid_hits[POOL_NUM_CLASSES];
static uint64_t prev_mid_misses[POOL_NUM_CLASSES];
static uint64_t prev_lg_hits[L25_NUM_CLASSES];
static uint64_t prev_lg_misses[L25_NUM_CLASSES];
static uint64_t prev_try_attempts = 0;
static uint64_t prev_try_success = 0;
static uint64_t prev_ring_underflow = 0;
static inline int get_env_int(const char* name, int defv) {
const char* e = getenv(name);
return (e ? atoi(e) : defv);
}
static int parse_float_list(const char* s, double* out, int maxn) {
if (!s) return 0;
int n = 0; const char* p = s;
char buf[64];
while (*p && n < maxn) {
int i = 0;
while (*p && *p != ',' && i < (int)sizeof(buf)-1) buf[i++] = *p++;
buf[i] = '\0';
if (i > 0) out[n++] = atof(buf);
if (*p == ',') p++;
}
return n;
}
// --------------------- UCB1 helpers (discrete candidates) ------------------
typedef struct {
double values[16];
int pulls[16];
double sum_score[16];
int n;
int cur;
int dwell_ticks;
int dwell_sec;
} ucb1_t;
static void ucb1_init(ucb1_t* u, const double* vals, int n, int dwell_sec) {
memset(u, 0, sizeof(*u));
if (n > 16) n = 16;
u->n = n;
for (int i=0;i<n;i++){ u->values[i]=vals[i]; u->pulls[i]=0; u->sum_score[i]=0.0; }
u->cur = 0; u->dwell_ticks = 0; u->dwell_sec = dwell_sec;
}
static int ucb1_select(ucb1_t* u) {
// if any never pulled, pick it first
for (int i=0;i<u->n;i++) if (u->pulls[i]==0) return i;
// otherwise compute UCB
double total = 0.0; for (int i=0;i<u->n;i++) total += (double)u->pulls[i];
double best_ucb = -1e100; int best_i = 0;
for (int i=0;i<u->n;i++) {
double mean = u->sum_score[i] / (double)u->pulls[i];
double bonus = 1.5 * sqrt(log(total) / (double)u->pulls[i]);
double ucb = mean + bonus;
if (ucb > best_ucb) { best_ucb = ucb; best_i = i; }
}
return best_i;
}
static void ucb1_update(ucb1_t* u, int arm, double score) {
if (arm < 0 || arm >= u->n) return;
u->pulls[arm] += 1;
u->sum_score[arm] += score;
}
static void* learner_main(void* arg) {
(void)arg;
// Config
int window_ms = get_env_int("HAKMEM_LEARN_WINDOW_MS", 1000);
double tgt_mid = (getenv("HAKMEM_TARGET_HIT_MID") ? atof(getenv("HAKMEM_TARGET_HIT_MID")) : 0.65);
double tgt_lg = (getenv("HAKMEM_TARGET_HIT_LARGE") ? atof(getenv("HAKMEM_TARGET_HIT_LARGE")) : 0.55);
double eps = 0.03; // hysteresis band
int step_mid = get_env_int("HAKMEM_CAP_STEP_MID", 4); // pages per update
int step_lg = get_env_int("HAKMEM_CAP_STEP_LARGE", 1); // bundles per update
int min_mid = get_env_int("HAKMEM_CAP_MIN_MID", 8);
int max_mid = get_env_int("HAKMEM_CAP_MAX_MID", 2048);
int min_lg = get_env_int("HAKMEM_CAP_MIN_LARGE", 1);
int max_lg = get_env_int("HAKMEM_CAP_MAX_LARGE", 512);
int budget_mid = get_env_int("HAKMEM_BUDGET_MID", 0); // 0=disabled
int budget_lg = get_env_int("HAKMEM_BUDGET_LARGE", 0); // 0=disabled
int min_samples = get_env_int("HAKMEM_LEARN_MIN_SAMPLES", 256);
// Dwell (stability) for CAP updates
int cap_dwell_mid = get_env_int("HAKMEM_CAP_DWELL_SEC_MID", 3);
int cap_dwell_lg = get_env_int("HAKMEM_CAP_DWELL_SEC_LG", 5);
int wf_enabled = get_env_int("HAKMEM_WF", 0);
// weights for simple gain proxy (if needed later)
double w_miss = (getenv("HAKMEM_GAIN_W_MISS") ? atof(getenv("HAKMEM_GAIN_W_MISS")) : 1.0);
(void)w_miss;
// Initialize prev counters
memset(prev_mid_hits, 0, sizeof(prev_mid_hits));
memset(prev_mid_misses, 0, sizeof(prev_mid_misses));
memset(prev_lg_hits, 0, sizeof(prev_lg_hits));
memset(prev_lg_misses, 0, sizeof(prev_lg_misses));
// per-class dwell counters (seconds since last change)
static int mid_dwell_ticks[POOL_NUM_CLASSES] = {0};
static int lg_dwell_ticks[L25_NUM_CLASSES] = {0};
// Phase 8.4: Check ACE Observer setting once (outside loop)
const char* ace_observe = getenv("HAKMEM_ACE_OBSERVE");
int ace_enabled = (ace_observe && atoi(ace_observe) != 0);
const char* ace_debug = getenv("HAKMEM_ACE_DEBUG");
int ace_debug_enabled = (ace_debug && atoi(ace_debug) != 0);
if (ace_enabled && ace_debug_enabled) {
fprintf(stderr, "[Learner] ACE Observer enabled (debug on)\n");
}
while (g_run) {
usleep(window_ms * 1000);
if (!g_run) break;
// Phase 8.4: ACE Observer (Tiny SuperSlab sizing)
if (ace_enabled) {
if (ace_debug_enabled) {
fprintf(stderr, "[Learner] Calling ACE Observer...\n");
}
hak_tiny_superslab_ace_observe_all();
}
// Optional: flush sampled logs to file
const char* logf = getenv("HAKMEM_LOG_FILE");
if (logf && *logf) {
hkm_log_flush_file(logf);
// Append metrics line: M,timestamp_ns,try_attempts_delta,try_success_delta,ring_underflow_delta,trylock_rate
uint64_t cur_try=0, cur_succ=0, cur_uf=0;
hak_pool_extra_metrics_snapshot(&cur_try, &cur_succ, &cur_uf);
uint64_t d_try = (cur_try >= prev_try_attempts) ? (cur_try - prev_try_attempts) : 0;
uint64_t d_suc = (cur_succ >= prev_try_success) ? (cur_succ - prev_try_success) : 0;
uint64_t d_uf = (cur_uf >= prev_ring_underflow) ? (cur_uf - prev_ring_underflow) : 0;
prev_try_attempts = cur_try; prev_try_success = cur_succ; prev_ring_underflow = cur_uf;
double rate = (d_try > 0) ? ((double)d_suc / (double)d_try) : 0.0;
// get timestamp
struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts);
unsigned long long ts_ns = (unsigned long long)ts.tv_sec*1000000000ull + (unsigned long long)ts.tv_nsec;
FILE* fp = fopen(logf, "a");
if (fp) {
fprintf(fp, "M,%llu,%llu,%llu,%llu,%.6f\n", ts_ns,
(unsigned long long)d_try, (unsigned long long)d_suc, (unsigned long long)d_uf, rate);
fclose(fp);
}
}
// Snapshot current stats
uint64_t mid_hits[POOL_NUM_CLASSES], mid_misses[POOL_NUM_CLASSES], mid_refills[POOL_NUM_CLASSES], mid_frees[POOL_NUM_CLASSES];
uint64_t lg_hits[L25_NUM_CLASSES], lg_misses[L25_NUM_CLASSES], lg_refills[L25_NUM_CLASSES], lg_frees[L25_NUM_CLASSES];
hak_pool_stats_snapshot(mid_hits, mid_misses, mid_refills, mid_frees);
hak_l25_pool_stats_snapshot(lg_hits, lg_misses, lg_refills, lg_frees);
const FrozenPolicy* cur = hkm_policy_get();
if (!cur) continue;
FrozenPolicy* np = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (!np) continue;
memcpy(np, cur, sizeof(FrozenPolicy));
// Adjust Mid caps by hit rate vs target (delta over window) with dwell
int mid_classes = 5;
if (cur->mid_dyn1_bytes != 0 && cur->mid_dyn2_bytes != 0) mid_classes = 7;
else if (cur->mid_dyn1_bytes != 0 || cur->mid_dyn2_bytes != 0) mid_classes = 6;
for (int i = 0; i < mid_classes; i++) {
uint64_t dh = mid_hits[i] - prev_mid_hits[i];
uint64_t dm = mid_misses[i] - prev_mid_misses[i];
uint64_t dt = dh + dm;
if (dt < (uint64_t)min_samples) continue;
double hit = (dt > 0) ? ((double)dh / (double)dt) : 1.0;
int cap;
if (i < 5) cap = np->mid_cap[i];
else if (i == 5) cap = np->mid_cap_dyn1;
else cap = np->mid_cap_dyn2;
// dwell gate per class
mid_dwell_ticks[i] += window_ms/1000;
if (mid_dwell_ticks[i] < cap_dwell_mid) {
// skip change this window
} else {
if (hit < (tgt_mid - eps)) {
cap += step_mid;
} else if (hit > (tgt_mid + eps)) {
cap -= step_mid;
}
if (cap < min_mid) cap = min_mid;
if (cap > max_mid) cap = max_mid;
// reset dwell only if actual change happens
int old = (i < 5) ? np->mid_cap[i] : (i==5 ? np->mid_cap_dyn1 : np->mid_cap_dyn2);
if (cap != old) mid_dwell_ticks[i] = 0;
}
if (i < 5) np->mid_cap[i] = (uint16_t)cap;
else if (i == 5) np->mid_cap_dyn1 = (uint16_t)cap;
else np->mid_cap_dyn2 = (uint16_t)cap;
}
// Optional: auto-assign DYN1 to peak size in 232KiB when enabled
const char* dyn_auto = getenv("HAKMEM_DYN1_AUTO");
const char* dyn2_auto = getenv("HAKMEM_DYN2_AUTO");
if ((dyn_auto && atoi(dyn_auto) != 0) || (dyn2_auto && atoi(dyn2_auto) != 0)) {
// Snapshot first 40 KiB (bins up to 40), reset after read
uint64_t bins[41];
hkm_size_hist_snapshot(bins, 41, 1);
int best_kb = 0, second_kb = 0; uint64_t best_cnt = 0, second_cnt = 0;
for (int kb = 2; kb <= 32; kb++) {
uint64_t c = bins[kb];
if (c > best_cnt) { second_cnt = best_cnt; second_kb = best_kb; best_cnt = c; best_kb = kb; }
else if (c > second_cnt && kb != best_kb) { second_cnt = c; second_kb = kb; }
}
// Choose DYN1 around peak if not conflicting with fixed classes
if (dyn_auto && atoi(dyn_auto) != 0 && best_kb >= 2 && best_kb <= 32) {
size_t dyn_bytes = (size_t)best_kb * 1024;
if (dyn_bytes != 2048 && dyn_bytes != 4096 && dyn_bytes != 8192 && dyn_bytes != 16384 && dyn_bytes != 32768) {
np->mid_dyn1_bytes = (uint32_t)dyn_bytes;
}
}
if (dyn2_auto && atoi(dyn2_auto) != 0 && second_kb >= 2 && second_kb <= 32) {
size_t dyn_bytes = (size_t)second_kb * 1024;
if (dyn_bytes != 2048 && dyn_bytes != 4096 && dyn_bytes != 8192 && dyn_bytes != 16384 && dyn_bytes != 32768 && dyn_bytes != np->mid_dyn1_bytes) {
np->mid_dyn2_bytes = (uint32_t)dyn_bytes;
}
}
}
// Adjust Large caps similarly with dwell
for (int i = 0; i < L25_NUM_CLASSES; i++) {
uint64_t dh = lg_hits[i] - prev_lg_hits[i];
uint64_t dm = lg_misses[i] - prev_lg_misses[i];
uint64_t dt = dh + dm;
if (dt < (uint64_t)min_samples) continue;
double hit = (dt > 0) ? ((double)dh / (double)dt) : 1.0;
int cap = np->large_cap[i];
lg_dwell_ticks[i] += window_ms/1000;
if (lg_dwell_ticks[i] >= cap_dwell_lg) {
if (hit < (tgt_lg - eps)) {
cap += step_lg;
} else if (hit > (tgt_lg + eps)) {
cap -= step_lg;
}
int old = np->large_cap[i];
if (cap != old) lg_dwell_ticks[i] = 0;
}
if (cap < min_lg) cap = min_lg;
if (cap > max_lg) cap = max_lg;
np->large_cap[i] = (uint16_t)cap;
}
// Budget enforcement / Water-filling for Mid
if (budget_mid > 0) {
// Assemble class arrays
int idx_map[7]; int m=0; for (int i=0;i<5;i++) idx_map[m++]=i; if (cur->mid_dyn1_bytes) idx_map[m++]=5; if (cur->mid_dyn2_bytes) idx_map[m++]=6;
// compute sum caps
#define GET_MID_CAP(npX, slotX) ((slotX)<5 ? (npX)->mid_cap[(slotX)] : ((slotX)==5 ? (npX)->mid_cap_dyn1 : (npX)->mid_cap_dyn2))
#define SET_MID_CAP(npX, slotX, valX) do { if ((slotX)<5) (npX)->mid_cap[(slotX)] = (uint16_t)(valX); else if ((slotX)==5) (npX)->mid_cap_dyn1 = (uint16_t)(valX); else (npX)->mid_cap_dyn2 = (uint16_t)(valX); } while(0)
int sum = 0; for (int k=0;k<m;k++){ int i=idx_map[k]; int v=GET_MID_CAP(np, i); sum += v; }
// need score = miss ratio in window高いほど需要大
double need[7]; for (int k=0;k<m;k++){ int i=idx_map[k]; uint64_t dh = (i<5? (mid_hits[i]-prev_mid_hits[i]) : 0); uint64_t dm = (i<5? (mid_misses[i]-prev_mid_misses[i]) : 0); double dt=(double)(dh+dm); need[k] = (dt>0.0)? ((double)dm/dt) : 0.0; }
// If sum > budget: remove from lowest-need first
if (sum > budget_mid) {
while (sum > budget_mid) {
// find min need with cap>min_mid
int best_k = -1; double best_need = 1e9;
for (int k=0;k<m;k++){ int slot=idx_map[k]; int cap=GET_MID_CAP(np, slot); if (cap<=min_mid) continue; if (need[k] < best_need){ best_need=need[k]; best_k=k; } }
if (best_k < 0) break;
int slot = idx_map[best_k]; int nv = GET_MID_CAP(np, slot) - step_mid; if (nv < min_mid) nv = min_mid; SET_MID_CAP(np, slot, nv); sum = 0; for (int k=0;k<m;k++){ int sl=idx_map[k]; sum += GET_MID_CAP(np, sl); }
}
} else if (wf_enabled && sum < budget_mid) {
// sum < budget: allocate to highest-need first
while (sum < budget_mid) {
int best_k = -1; double best_need = -1e9;
for (int k=0;k<m;k++){ if (need[k] > best_need){ best_need=need[k]; best_k=k; } }
if (best_k < 0) break;
int slot = idx_map[best_k]; int nv = GET_MID_CAP(np, slot) + step_mid; SET_MID_CAP(np, slot, nv); sum += step_mid;
}
}
}
if (budget_lg > 0) {
int sum = 0; for (int i=0;i<L25_NUM_CLASSES;i++) sum += np->large_cap[i];
// need score = miss ratioLarge
double need_lg[L25_NUM_CLASSES]; for (int i=0;i<L25_NUM_CLASSES;i++){ uint64_t dh=lg_hits[i]-prev_lg_hits[i]; uint64_t dm=lg_misses[i]-prev_lg_misses[i]; double dt=(double)(dh+dm); need_lg[i]=(dt>0.0)?((double)dm/dt):0.0; }
if (sum > budget_lg) {
while (sum > budget_lg) {
int best=-1; double best_need=1e9;
for (int i=0;i<L25_NUM_CLASSES;i++){ if (np->large_cap[i] <= min_lg) continue; if (need_lg[i] < best_need){ best_need=need_lg[i]; best=i; } }
if (best<0) break;
int nv=np->large_cap[best]-step_lg; if (nv<min_lg) nv=min_lg; np->large_cap[best]=nv; sum=0; for (int i=0;i<L25_NUM_CLASSES;i++) sum += np->large_cap[i];
}
} else if (wf_enabled && sum < budget_lg) {
while (sum < budget_lg) {
int best=-1; double best_need=-1e9; for (int i=0;i<L25_NUM_CLASSES;i++){ if (need_lg[i] > best_need){ best_need=need_lg[i]; best=i; } }
if (best<0) break;
np->large_cap[best]+=step_lg; sum += step_lg;
}
}
}
// Publish new policy
hkm_policy_publish(np);
if (cur->mid_dyn1_bytes != 0 || cur->mid_dyn2_bytes != 0) {
fprintf(stderr, "[Learner] Published caps: Mid={%u,%u,%u,%u,%u,D1:%u,D2:%u} Large={%u,%u,%u,%u,%u}\n",
(unsigned)np->mid_cap[0], (unsigned)np->mid_cap[1], (unsigned)np->mid_cap[2], (unsigned)np->mid_cap[3], (unsigned)np->mid_cap[4], (unsigned)np->mid_cap_dyn1, (unsigned)np->mid_cap_dyn2,
(unsigned)np->large_cap[0], (unsigned)np->large_cap[1], (unsigned)np->large_cap[2], (unsigned)np->large_cap[3], (unsigned)np->large_cap[4]);
} else {
fprintf(stderr, "[Learner] Published caps: Mid={%u,%u,%u,%u,%u} Large={%u,%u,%u,%u,%u}\n",
(unsigned)np->mid_cap[0], (unsigned)np->mid_cap[1], (unsigned)np->mid_cap[2], (unsigned)np->mid_cap[3], (unsigned)np->mid_cap[4],
(unsigned)np->large_cap[0], (unsigned)np->large_cap[1], (unsigned)np->large_cap[2], (unsigned)np->large_cap[3], (unsigned)np->large_cap[4]);
}
// Update prev snapshots
for (int i=0;i<mid_classes;i++){ prev_mid_hits[i]=mid_hits[i]; prev_mid_misses[i]=mid_misses[i]; }
for (int i=0;i<L25_NUM_CLASSES;i++){ prev_lg_hits[i]=lg_hits[i]; prev_lg_misses[i]=lg_misses[i]; }
// Optional: W_MAX learning (discrete candidates + dwell + canary)
const char* wlearn = getenv("HAKMEM_WMAX_LEARN");
if (wlearn && atoi(wlearn) != 0) {
static ucb1_t u_mid, u_lg; static int inited=0;
static double vals_mid[8], vals_lg[8]; static int n_mid=0, n_lg=0;
// Canary control
static int canary_mid=0, canary_lg=0; // active flags
static double base_mid_mean=0.0, base_lg_mean=0.0; // baselines
static double trial_mid_sum=0.0, trial_lg_sum=0.0; static int trial_mid_n=0, trial_lg_n=0; static int trial_mid_sec=0, trial_lg_sec=0;
static int trial_sec = 0; static double adopt_pct = 0.01; static int canary_on = 0;
if (!inited) {
n_mid = parse_float_list(getenv("HAKMEM_WMAX_CANDIDATES_MID"), vals_mid, 8);
if (n_mid <= 0) { vals_mid[0]=1.4; vals_mid[1]=1.6; vals_mid[2]=1.7; n_mid=3; }
n_lg = parse_float_list(getenv("HAKMEM_WMAX_CANDIDATES_LARGE"), vals_lg, 8);
if (n_lg <= 0) { vals_lg[0]=1.25; vals_lg[1]=1.30; vals_lg[2]=1.40; n_lg=3; }
ucb1_init(&u_mid, vals_mid, n_mid, get_env_int("HAKMEM_WMAX_DWELL_SEC", 10));
ucb1_init(&u_lg, vals_lg, n_lg, get_env_int("HAKMEM_WMAX_DWELL_SEC", 10));
trial_sec = get_env_int("HAKMEM_WMAX_TRIAL_SEC", 5);
adopt_pct = (getenv("HAKMEM_WMAX_ADOPT_PCT") ? atof(getenv("HAKMEM_WMAX_ADOPT_PCT")) : 0.01);
canary_on = get_env_int("HAKMEM_WMAX_CANARY", 1);
inited = 1;
}
u_mid.dwell_ticks += window_ms/1000; u_lg.dwell_ticks += window_ms/1000;
// score簡易: mid_hit+large_hit - (mid_miss+large_miss) - 2*l1_fallback
hkm_ace_stats_snapshot_t ace;
hkm_ace_stats_snapshot(&ace, 1);
double score = (double)(ace.mid_hit + ace.large_hit) - (double)(ace.mid_miss + ace.large_miss) - 2.0*(double)ace.l1_fallback;
// update current arms with window score
ucb1_update(&u_mid, u_mid.cur, score);
ucb1_update(&u_lg, u_lg.cur, score);
// accumulate trial score if canary active
if (canary_mid) { trial_mid_sum += score; trial_mid_n++; trial_mid_sec += window_ms/1000; }
if (canary_lg) { trial_lg_sum += score; trial_lg_n++; trial_lg_sec += window_ms/1000; }
// maybe switch arm if dwell passed
if (u_mid.dwell_ticks >= u_mid.dwell_sec && !canary_mid) {
u_mid.dwell_ticks = 0; int next = ucb1_select(&u_mid);
if (next != u_mid.cur) {
if (canary_on) {
// start canary: publish candidate temporarily and measure
FrozenPolicy* np3 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (np3) {
const FrozenPolicy* cur2 = hkm_policy_get();
double prev_mean = (u_mid.pulls[u_mid.cur] > 0) ? (u_mid.sum_score[u_mid.cur] / (double)u_mid.pulls[u_mid.cur]) : 0.0;
base_mid_mean = prev_mean; trial_mid_sum = 0.0; trial_mid_n = 0; trial_mid_sec = 0;
memcpy(np3, cur2, sizeof(FrozenPolicy));
np3->w_max_mid = u_mid.values[next]; hkm_policy_publish(np3);
canary_mid = 1; // set active; do not change u_mid.cur yet
fprintf(stderr, "[Learner] W_MAX mid canary start: %.2f (base=%.3f)\n", np3->w_max_mid, base_mid_mean);
// store candidate index into u_mid.cur temporarily for scoring but keep canary flag
u_mid.cur = next;
}
} else {
FrozenPolicy* np3 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (np3) {
memcpy(np3, hkm_policy_get(), sizeof(FrozenPolicy));
np3->w_max_mid = u_mid.values[next]; hkm_policy_publish(np3);
u_mid.cur = next; fprintf(stderr, "[Learner] W_MAX mid=%.2f (UCB1)\n", np3->w_max_mid);
}
}
}
}
if (u_lg.dwell_ticks >= u_lg.dwell_sec && !canary_lg) {
u_lg.dwell_ticks = 0; int next = ucb1_select(&u_lg);
if (next != u_lg.cur) {
if (canary_on) {
FrozenPolicy* np4 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (np4) {
const FrozenPolicy* cur2 = hkm_policy_get();
double prev_mean = (u_lg.pulls[u_lg.cur] > 0) ? (u_lg.sum_score[u_lg.cur] / (double)u_lg.pulls[u_lg.cur]) : 0.0;
base_lg_mean = prev_mean; trial_lg_sum = 0.0; trial_lg_n = 0; trial_lg_sec = 0;
memcpy(np4, cur2, sizeof(FrozenPolicy));
np4->w_max_large = u_lg.values[next]; hkm_policy_publish(np4);
canary_lg = 1; u_lg.cur = next;
fprintf(stderr, "[Learner] W_MAX large canary start: %.2f (base=%.3f)\n", np4->w_max_large, base_lg_mean);
}
} else {
FrozenPolicy* np4 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (np4) {
memcpy(np4, hkm_policy_get(), sizeof(FrozenPolicy));
np4->w_max_large = u_lg.values[next]; hkm_policy_publish(np4);
u_lg.cur = next; fprintf(stderr, "[Learner] W_MAX large=%.2f (UCB1)\n", np4->w_max_large);
}
}
}
}
// finish canary trials if time elapsed
if (canary_mid && trial_mid_sec >= trial_sec && trial_mid_n > 0) {
double trial_mean = trial_mid_sum / (double)trial_mid_n;
double thresh = base_mid_mean * (1.0 + adopt_pct);
if (!(trial_mean >= thresh)) {
// revert to best baseline arm (approx: pick best mean so far)
int besti = 0; double bestm=-1e100; for (int i=0;i<u_mid.n;i++){ if (u_mid.pulls[i]>0){ double m=u_mid.sum_score[i]/(double)u_mid.pulls[i]; if (m>bestm){bestm=m;besti=i;} } }
FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->w_max_mid = u_mid.values[besti]; hkm_policy_publish(npR); u_mid.cur = besti; fprintf(stderr, "[Learner] W_MAX mid canary revert to %.2f (trial=%.3f base=%.3f)\n", npR->w_max_mid, trial_mean, base_mid_mean); }
} else {
fprintf(stderr, "[Learner] W_MAX mid canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_mid_mean);
}
canary_mid = 0; trial_mid_sum=0.0; trial_mid_n=0; trial_mid_sec=0;
}
if (canary_lg && trial_lg_sec >= trial_sec && trial_lg_n > 0) {
double trial_mean = trial_lg_sum / (double)trial_lg_n;
double thresh = base_lg_mean * (1.0 + adopt_pct);
if (!(trial_mean >= thresh)) {
int besti = 0; double bestm=-1e100; for (int i=0;i<u_lg.n;i++){ if (u_lg.pulls[i]>0){ double m=u_lg.sum_score[i]/(double)u_lg.pulls[i]; if (m>bestm){bestm=m;besti=i;} } }
FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->w_max_large = u_lg.values[besti]; hkm_policy_publish(npR); u_lg.cur = besti; fprintf(stderr, "[Learner] W_MAX large canary revert to %.2f (trial=%.3f base=%.3f)\n", npR->w_max_large, trial_mean, base_lg_mean); }
} else {
fprintf(stderr, "[Learner] W_MAX large canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_lg_mean);
}
canary_lg = 0; trial_lg_sum=0.0; trial_lg_n=0; trial_lg_sec=0;
}
}
// Optional: THP threshold learning (discrete + canary)
const char* thp_learn = getenv("HAKMEM_THP_LEARN");
if (thp_learn && atoi(thp_learn) != 0) {
static ucb1_t u_thp; static int inited=0;
static double thp_vals[8]; int n=0;
static int canary=0; static double base_mean=0.0; static double trial_sum=0.0; static int trial_n=0; static int trial_sec=0; static int trial_elapsed=0; static double adopt_pct=0.01; static int canary_on=1;
if (!inited) {
const char* s = getenv("HAKMEM_THP_CANDIDATES");
n = parse_float_list(s, thp_vals, 8);
if (n <= 0) { thp_vals[0]=524288; thp_vals[1]=786432; thp_vals[2]=1048576; thp_vals[3]=1572864; thp_vals[4]=2097152; n=5; }
ucb1_init(&u_thp, thp_vals, n, get_env_int("HAKMEM_THP_DWELL_SEC", 15));
trial_sec = get_env_int("HAKMEM_THP_TRIAL_SEC", 6);
adopt_pct = (getenv("HAKMEM_THP_ADOPT_PCT") ? atof(getenv("HAKMEM_THP_ADOPT_PCT")) : 0.015);
canary_on = get_env_int("HAKMEM_THP_CANARY", 1);
inited=1;
}
u_thp.dwell_ticks += window_ms/1000;
// reuse same score proxy
hkm_ace_stats_snapshot_t ace;
hkm_ace_stats_snapshot(&ace, 1);
double score = (double)(ace.mid_hit + ace.large_hit) - (double)(ace.mid_miss + ace.large_miss) - 2.0*(double)ace.l1_fallback;
ucb1_update(&u_thp, u_thp.cur, score);
if (canary) { trial_sum += score; trial_n++; trial_elapsed += window_ms/1000; }
if (u_thp.dwell_ticks >= u_thp.dwell_sec && !canary) {
u_thp.dwell_ticks = 0; int next = ucb1_select(&u_thp);
if (next != u_thp.cur) {
if (canary_on) {
FrozenPolicy* np5 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (np5) {
const FrozenPolicy* cur2 = hkm_policy_get();
double prev_mean = (u_thp.pulls[u_thp.cur] > 0) ? (u_thp.sum_score[u_thp.cur] / (double)u_thp.pulls[u_thp.cur]) : 0.0;
base_mean = prev_mean; trial_sum = 0.0; trial_n = 0; trial_elapsed = 0;
memcpy(np5, cur2, sizeof(FrozenPolicy));
np5->thp_threshold = (size_t)u_thp.values[next]; hkm_policy_publish(np5);
canary = 1; u_thp.cur = next;
fprintf(stderr, "[Learner] THP canary start: %zu (base=%.3f)\n", np5->thp_threshold, base_mean);
}
} else {
FrozenPolicy* np5 = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (np5) {
memcpy(np5, hkm_policy_get(), sizeof(FrozenPolicy));
np5->thp_threshold = (size_t)u_thp.values[next]; hkm_policy_publish(np5);
u_thp.cur = next; fprintf(stderr, "[Learner] THP threshold=%zu (UCB1)\n", np5->thp_threshold);
}
}
}
}
if (canary && trial_elapsed >= trial_sec && trial_n > 0) {
double trial_mean = trial_sum / (double)trial_n;
double thresh = base_mean * (1.0 + adopt_pct);
if (!(trial_mean >= thresh)) {
int besti = 0; double bestm=-1e100; for (int i=0;i<u_thp.n;i++){ if (u_thp.pulls[i]>0){ double m=u_thp.sum_score[i]/(double)u_thp.pulls[i]; if (m>bestm){bestm=m;besti=i;} } }
FrozenPolicy* npR = (FrozenPolicy*)hkm_libc_malloc(sizeof(FrozenPolicy)); // Phase 6.X P0
if (npR) { memcpy(npR, hkm_policy_get(), sizeof(FrozenPolicy)); npR->thp_threshold = (size_t)u_thp.values[besti]; hkm_policy_publish(npR); u_thp.cur = besti; fprintf(stderr, "[Learner] THP canary revert to %zu (trial=%.3f base=%.3f)\n", npR->thp_threshold, trial_mean, base_mean); }
} else {
fprintf(stderr, "[Learner] THP canary adopt (trial=%.3f base=%.3f)\n", trial_mean, base_mean);
}
canary = 0; trial_sum = 0.0; trial_n = 0; trial_elapsed = 0;
}
}
}
return NULL;
}
void hkm_learner_init(void) {
if (!hak_learner_env_should_run()) {
return;
}
if (g_run) return;
g_run = 1;
if (pthread_create(&g_thr, NULL, learner_main, NULL) != 0) {
g_run = 0;
fprintf(stderr, "[Learner] pthread_create failed\n");
} else {
fprintf(stderr, "[Learner] Started (CAP auto-tuner)\n");
}
}
void hkm_learner_shutdown(void) {
if (!g_run) return;
g_run = 0;
pthread_join(g_thr, NULL);
fprintf(stderr, "[Learner] Stopped\n");
}