hakmem/core/hakmem_tiny_intel.inc

typedef struct {
    uint64_t ts_ns;         // timestamp (ns, monotonic)
    uint32_t size;          // requested/served size
    uint32_t site_id;       // callsite id (optional; 0 if unknown)
    uint16_t latency_bucket;// latency bucket (optional; 0 if unknown)
    uint8_t  tier_hit;      // which tier handled/refilled (SLL/MAG/SLAB/SUPER/FRONT)
    uint8_t  flags;         // burst/sequential/random (bit flags; 0 unused)
    uint16_t class_idx;     // tiny class
    uint16_t thread_id;     // low bits of thread id (best-effort)
} AllocEvent;

// Forward decl (defined in ss_os_acquire_box.h)
extern int ss_os_madvise_guarded(void* ptr, size_t len, int advice, const char* where);

#define EVENTQ_CAP 65536u
#define EVENTQ_MASK (EVENTQ_CAP - 1u)
static _Atomic uint32_t g_ev_tail = 0;
static _Atomic uint32_t g_ev_head = 0;
static AllocEvent g_ev_ring[EVENTQ_CAP];
static int g_int_engine = 0;  // HAKMEM_INT_ENGINE=1
static pthread_t g_int_thread;
static volatile int g_int_stop = 0;
static int g_int_started = 0;

// Lightweight observation ring (async aggregation for TLS stats)
typedef struct {
    uint8_t kind;
    uint8_t class_idx;
    uint16_t count;
} TinyObsEvent;
typedef struct {
    uint64_t hit;
    uint64_t miss;
    uint64_t spill_ss;
    uint64_t spill_owner;
    uint64_t spill_mag;
    uint64_t spill_requeue;
} TinyObsStats;

enum {
    TINY_OBS_TLS_HIT = 1,
    TINY_OBS_TLS_MISS = 2,
    TINY_OBS_SPILL_SS = 3,
    TINY_OBS_SPILL_OWNER = 4,
    TINY_OBS_SPILL_MAG = 5,
    TINY_OBS_SPILL_REQUEUE = 6,
};

#define TINY_OBS_CAP 4096u
#define TINY_OBS_MASK (TINY_OBS_CAP - 1u)
static _Atomic uint32_t g_obs_tail = 0;
static _Atomic uint32_t g_obs_head = 0;
static TinyObsEvent g_obs_ring[TINY_OBS_CAP];
static _Atomic uint8_t g_obs_ready[TINY_OBS_CAP];
static int g_obs_enable = 0;  // ENV toggle removed: observation disabled by default
static int g_obs_started = 0;
static pthread_t g_obs_thread;
static volatile int g_obs_stop = 0;
static TinyObsStats g_obs_stats[TINY_NUM_CLASSES];
static uint64_t g_obs_epoch = 0;
static uint32_t g_obs_interval_default = 65536;
static uint32_t g_obs_interval_current = 65536;
static uint32_t g_obs_interval_min = 256;
static uint32_t g_obs_interval_max = 65536;
static uint32_t g_obs_interval_cooldown = 4;
static uint64_t g_obs_last_interval_epoch = 0;
static int g_obs_auto_tune = 0;  // Default: Disable auto-tuning for predictable memory usage
static int g_obs_mag_step = 8;
static int g_obs_sll_step = 16;
static int g_obs_debug = 0;
static uint64_t g_obs_last_hit[TINY_NUM_CLASSES];
static uint64_t g_obs_last_miss[TINY_NUM_CLASSES];
static uint64_t g_obs_last_spill_ss[TINY_NUM_CLASSES];
static uint64_t g_obs_last_spill_owner[TINY_NUM_CLASSES];
static uint64_t g_obs_last_spill_mag[TINY_NUM_CLASSES];
static uint64_t g_obs_last_spill_requeue[TINY_NUM_CLASSES];

// ---------------------------------------------------------------------------
// Tiny ACE (Adaptive Cache Engine) state machine
// ---------------------------------------------------------------------------

typedef enum {
    ACE_STATE_STEADY = 0,
    ACE_STATE_BURST = 1,
    ACE_STATE_REMOTE_HEAVY = 2,
    ACE_STATE_MEM_TIGHT = 3
} TinyAceStateId;

typedef struct {
    uint64_t ema_ops;
    uint64_t ema_spill;
    uint64_t ema_remote;
    uint64_t ema_miss;
    TinyAceStateId state;
    uint64_t last_switch_ns;
} TinyAceState;

typedef struct {
    uint16_t mag_cap;
    uint16_t sll_cap;
    uint16_t fast_cap;
    uint16_t batch;
    uint16_t hotmag_cap;
    uint16_t hotmag_refill;
    uint8_t drain_mask;
    uint8_t slab_lg;
    TinyAceStateId state;
    uint8_t hot_rank;
    uint8_t request_trim;
    uint64_t ema_ops_snapshot;
} TinyAcePolicy;

static TinyAceState g_ace_state[TINY_NUM_CLASSES];
static TinyAcePolicy g_ace_policy[TINY_NUM_CLASSES];
static uint64_t g_ace_tick_now_ns = 0;
static int g_ace_mem_tight_flag = 0;
static uint64_t g_ace_last_rss_check_ns = 0;
static int g_tiny_rss_budget_kb = 0;             // HAKMEM_TINY_RSS_BUDGET_KB (0=disabled)
static int g_tiny_int_tight = 0;                 // HAKMEM_TINY_INT_TIGHT=1 → bias caps downward
static int g_tiny_diet_step = 16;                // HAKMEM_TINY_DIET_STEP (cap decrement step)
static int g_tiny_cap_floor[TINY_NUM_CLASSES] = { 64, 64, 64, 128, 64, 64, 64, 64 }; // min MAG cap per class

#define ACE_COOLDOWN_NS        (800ULL * 1000 * 1000)  // 0.8s
#define ACE_RSS_CHECK_NS       (500ULL * 1000 * 1000)  // 0.5s
#define ACE_EMA_WEIGHT         8
#define ACE_MAG_STEP_DEFAULT   8
#define ACE_SLL_STEP_DEFAULT   16

static inline uint64_t tiny_ace_now_ns(void) {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
}

static inline uint64_t tiny_ace_ema(uint64_t prev, uint64_t sample) {
    if (prev == 0) return sample;
    return ((prev * (ACE_EMA_WEIGHT - 1)) + sample) / ACE_EMA_WEIGHT;
}

// EXTRACTED: static int get_rss_kb_self(void);

static void tiny_ace_update_mem_tight(uint64_t now_ns) {
    if (g_tiny_rss_budget_kb <= 0) {
        g_ace_mem_tight_flag = 0;
        return;
    }
    if (now_ns - g_ace_last_rss_check_ns < ACE_RSS_CHECK_NS) {
        return;
    }
    g_ace_last_rss_check_ns = now_ns;
    int rss_kb = get_rss_kb_self();
    if (rss_kb > 0) {
        int high = (g_tiny_rss_budget_kb * 95) / 100;
        int low = (g_tiny_rss_budget_kb * 85) / 100;
        if (rss_kb >= high) g_ace_mem_tight_flag = 1;
        else if (rss_kb <= low) g_ace_mem_tight_flag = 0;
    }
}

static void tiny_ace_collect_stats(int idx, const TinyObsStats* st);
static void tiny_ace_refresh_hot_ranks(void);
static void tiny_ace_apply_policies(void);
static void tiny_ace_init_defaults(void);
static void tiny_obs_update_interval(void);

static __thread uint32_t g_obs_hit_accum[TINY_NUM_CLASSES];

static inline void tiny_obs_enqueue(uint8_t kind, int class_idx, uint16_t count) {
    uint32_t tail;
    for (;;) {
        tail = atomic_load_explicit(&g_obs_tail, memory_order_relaxed);
        uint32_t head = atomic_load_explicit(&g_obs_head, memory_order_acquire);
        if (tail - head >= TINY_OBS_CAP) return;  // drop on overflow
        uint32_t desired = tail + 1u;
        if (atomic_compare_exchange_weak_explicit(&g_obs_tail,
                                                  &tail,
                                                  desired,
                                                  memory_order_acq_rel,
                                                  memory_order_relaxed)) {
            break;
        }
    }
    uint32_t idx = tail & TINY_OBS_MASK;
    TinyObsEvent ev;
    ev.kind = kind;
    ev.class_idx = (uint8_t)class_idx;
    ev.count = count;
    g_obs_ring[idx] = ev;
    atomic_store_explicit(&g_obs_ready[idx], 1u, memory_order_release);
}

static inline void tiny_obs_record(uint8_t kind, int class_idx) {
    if (__builtin_expect(!g_obs_enable, 0)) return;
    if (__builtin_expect(kind == TINY_OBS_TLS_HIT, 1)) {
        uint32_t interval = g_obs_interval_current;
        if (interval <= 1u) {
            tiny_obs_enqueue(kind, class_idx, 1u);
            return;
        }
        uint32_t accum = ++g_obs_hit_accum[class_idx];
        if (accum < interval) return;
        uint32_t emit = interval;
        if (emit > UINT16_MAX) emit = UINT16_MAX;
        if (accum > emit) {
            g_obs_hit_accum[class_idx] = accum - emit;
        } else {
            g_obs_hit_accum[class_idx] = 0u;
        }
        tiny_obs_enqueue(kind, class_idx, (uint16_t)emit);
        return;
    }
    tiny_obs_enqueue(kind, class_idx, 1u);
}

static inline void tiny_obs_process(const TinyObsEvent* ev) {
    int idx = ev->class_idx;
    uint16_t count = ev->count;
    if (idx < 0 || idx >= TINY_NUM_CLASSES || count == 0) return;
    switch (ev->kind) {
        case TINY_OBS_TLS_HIT:
            g_tls_hit_count[idx] += count;
            break;
        case TINY_OBS_TLS_MISS:
            g_tls_miss_count[idx] += count;
            break;
        case TINY_OBS_SPILL_SS:
            g_tls_spill_ss_count[idx] += count;
            break;
        case TINY_OBS_SPILL_OWNER:
            g_tls_spill_owner_count[idx] += count;
            break;
        case TINY_OBS_SPILL_MAG:
            g_tls_spill_mag_count[idx] += count;
            break;
        case TINY_OBS_SPILL_REQUEUE:
            g_tls_spill_requeue_count[idx] += count;
            break;
        default:
            break;
    }
}

static void tiny_ace_collect_stats(int idx, const TinyObsStats* st) {
    TinyAceState* cs = &g_ace_state[idx];
    TinyAcePolicy pol = g_ace_policy[idx];
    uint64_t now = g_ace_tick_now_ns;

    uint64_t ops = st->hit + st->miss;
    uint64_t spills_total = st->spill_ss + st->spill_owner + st->spill_mag;
    uint64_t remote_spill = st->spill_owner;
    uint64_t miss = st->miss;

    cs->ema_ops = tiny_ace_ema(cs->ema_ops, ops);
    cs->ema_spill = tiny_ace_ema(cs->ema_spill, spills_total);
    cs->ema_remote = tiny_ace_ema(cs->ema_remote, remote_spill);
    cs->ema_miss = tiny_ace_ema(cs->ema_miss, miss);

    if (ops == 0 && spills_total == 0 && st->spill_requeue == 0) {
        pol.ema_ops_snapshot = cs->ema_ops;
        g_ace_policy[idx] = pol;
        return;
    }

    TinyAceStateId next_state;
    if (g_ace_mem_tight_flag) {
        next_state = ACE_STATE_MEM_TIGHT;
    } else if (st->spill_requeue > 0) {
        next_state = ACE_STATE_BURST;
    } else if (cs->ema_remote > 16 && cs->ema_remote >= (cs->ema_spill / 3 + 1)) {
        next_state = ACE_STATE_REMOTE_HEAVY;
    } else if (cs->ema_spill > 32 || cs->ema_miss > 16 || miss > 16) {
        next_state = ACE_STATE_BURST;
    } else {
        next_state = ACE_STATE_STEADY;
    }

    if (next_state != cs->state) {
        if (now - cs->last_switch_ns >= ACE_COOLDOWN_NS) {
            cs->state = next_state;
            cs->last_switch_ns = now;
        } else {
            next_state = cs->state;
        }
    }

    pol.state = cs->state;
    pol.hot_rank = 0;
    pol.request_trim = 0;

    int base_mag = tiny_default_cap(idx);
    if (base_mag > g_mag_cap_limit) base_mag = g_mag_cap_limit;
    if (base_mag < 16) base_mag = 16;
    int mag_min = base_mag;
    int mag_max = tiny_cap_max_for_class(idx);
    if (mag_max > g_mag_cap_limit) mag_max = g_mag_cap_limit;
    if (mag_max < mag_min) mag_max = mag_min;

    int current_mag = g_mag_cap_override[idx];
    if (current_mag <= 0) current_mag = base_mag;
    if (current_mag < mag_min) current_mag = mag_min;
    if (current_mag > mag_max) current_mag = mag_max;

    int mag_step = (g_obs_mag_step > 0) ? g_obs_mag_step : ACE_MAG_STEP_DEFAULT;
    if (mag_step < 1) mag_step = 1;

    // Phase12: g_sll_cap_override はレガシー互換ダミー。SLL cap は TinyAcePolicy に直接保持する。
    int current_sll = pol.sll_cap;
    if (current_sll < current_mag) current_sll = current_mag;
    if (current_sll < 32) current_sll = 32;
    int sll_step = (g_obs_sll_step > 0) ? g_obs_sll_step : ACE_SLL_STEP_DEFAULT;
    if (sll_step < 1) sll_step = 1;
    int sll_max = TINY_TLS_MAG_CAP;

    uint16_t base_fast = g_fast_cap_defaults[idx];
    uint16_t current_fast = g_fast_cap[idx];
    if (current_fast == 0 && base_fast > 0) current_fast = base_fast;
    uint16_t new_fast = current_fast;
    uint16_t new_batch = (idx <= 3) ? 64 : 48;
    uint8_t new_drain = 2;
    uint8_t new_slab_lg = 20;

    int new_mag = current_mag;
    int new_sll = current_sll;
    int hot_cap_new = (int)hotmag_effective_cap(idx);
    int hot_refill_new = (int)hotmag_refill_target(idx);
    int hot_cap_limit = g_hotmag_cap_default + 64;
    if (hot_cap_limit < 32) hot_cap_limit = 32;
    if (hot_cap_limit > 512) hot_cap_limit = 512;
    int hot_cap_floor = 24;
    if (hot_cap_floor > hot_cap_limit) hot_cap_floor = hot_cap_limit;

    switch (cs->state) {
        case ACE_STATE_STEADY: {
            if (new_mag > mag_min) {
                int dec = mag_step / 2;
                if (dec < 1) dec = 1;
                new_mag -= dec;
                if (new_mag < mag_min) new_mag = mag_min;
            }
            int target_sll = new_mag * ((g_sll_multiplier > 0) ? g_sll_multiplier : 2);
            if (target_sll < new_mag) target_sll = new_mag;
            if (new_sll > target_sll) {
                int dec = sll_step / 2;
                if (dec < 1) dec = 1;
                new_sll -= dec;
                if (new_sll < target_sll) new_sll = target_sll;
            }
            if (g_hotmag_enable && idx <= 3) {
                if (!g_hotmag_cap_locked[idx]) hot_cap_new -= 16;
                if (!g_hotmag_refill_locked[idx]) hot_refill_new -= 8;
            }
            if (g_fast_enable) new_fast = base_fast;
            new_drain = 2;
            break;
        }
        case ACE_STATE_BURST: {
            if (g_hotmag_enable && idx <= 3) {
                if (!g_hotmag_cap_locked[idx]) hot_cap_new += 32;
                if (!g_hotmag_refill_locked[idx]) hot_refill_new += 16;
            }
            new_mag += mag_step;
            if (new_mag > mag_max) new_mag = mag_max;
            int target_sll = new_mag * ((g_sll_multiplier > 0) ? g_sll_multiplier : 2) + sll_step;
            if (target_sll > sll_max) target_sll = sll_max;
            if (target_sll > new_sll) new_sll = target_sll;
            if (g_fast_enable) {
                uint32_t f = (uint32_t)((base_fast > 0) ? base_fast : current_fast) + 64u;
                if (f > TINY_TLS_MAG_CAP) f = TINY_TLS_MAG_CAP;
                new_fast = (uint16_t)f;
            }
            new_batch = (idx <= 3) ? 96 : 64;
            new_drain = 1;
            if (idx <= 3) new_slab_lg = 21;
            break;
        }
        case ACE_STATE_REMOTE_HEAVY: {
            if (g_hotmag_enable && idx <= 3) {
                if (!g_hotmag_cap_locked[idx]) hot_cap_new += 16;
                if (!g_hotmag_refill_locked[idx]) hot_refill_new += 8;
            }
            int target_sll = new_sll + sll_step;
            if (target_sll > sll_max) target_sll = sll_max;
            new_sll = target_sll;
            if (new_mag < mag_max) {
                int inc = mag_step / 2;
                if (inc < 1) inc = 1;
                new_mag += inc;
                if (new_mag > mag_max) new_mag = mag_max;
            }
            new_drain = 0;
            if (g_fast_enable) new_fast = base_fast;
            break;
        }
        case ACE_STATE_MEM_TIGHT: {
            if (g_hotmag_enable && idx <= 3) {
                if (!g_hotmag_cap_locked[idx]) hot_cap_new -= 24;
                if (!g_hotmag_refill_locked[idx]) hot_refill_new /= 2;
            }
            new_mag -= mag_step * 2;
            if (new_mag < mag_min) new_mag = mag_min;
            new_sll -= sll_step;
            if (new_sll < new_mag) new_sll = new_mag;
            pol.request_trim = 1;
            if (g_fast_enable) {
                if (base_fast > 0) {
                    uint32_t f = base_fast / 2;
                    if (f < 16) f = 16;
                    if (f > TINY_TLS_MAG_CAP) f = TINY_TLS_MAG_CAP;
                    new_fast = (uint16_t)f;
                } else {
                    new_fast = 0;
                }
            }
            new_batch = (idx <= 3) ? 48 : 32;
            new_drain = 2;
            new_slab_lg = 20;
            break;
        }
    }

    if (g_hotmag_enable && idx <= 3) {
        if (!g_hotmag_cap_locked[idx]) {
            if (hot_cap_new > hot_cap_limit) hot_cap_new = hot_cap_limit;
            if (hot_cap_new < hot_cap_floor) hot_cap_new = hot_cap_floor;
        } else {
            hot_cap_new = (int)hotmag_effective_cap(idx);
        }
        if (!g_hotmag_refill_locked[idx]) {
            if (hot_refill_new < 0) hot_refill_new = 0;
            if (hot_refill_new > hot_cap_new) hot_refill_new = hot_cap_new;
            if (hot_refill_new > 0 && hot_refill_new < 8) hot_refill_new = 8;
        } else {
            hot_refill_new = (int)hotmag_refill_target(idx);
        }
    } else {
        hot_cap_new = (int)hotmag_effective_cap(idx);
        hot_refill_new = (int)hotmag_refill_target(idx);
    }

    if (new_mag > mag_max) new_mag = mag_max;
    if (new_mag < mag_min) new_mag = mag_min;
    if (new_sll > sll_max) new_sll = sll_max;
    if (new_sll < new_mag) new_sll = new_mag;

    if (new_mag < current_mag) pol.request_trim = 1;

    if (!g_fast_enable) new_fast = 0;
    if (new_fast > TINY_TLS_MAG_CAP) new_fast = TINY_TLS_MAG_CAP;

    pol.mag_cap = (uint16_t)new_mag;
    pol.sll_cap = (uint16_t)new_sll;
    pol.fast_cap = new_fast;
    pol.batch = new_batch;
    pol.drain_mask = new_drain;
    pol.slab_lg = new_slab_lg;
    pol.hotmag_cap = (uint16_t)hot_cap_new;
    pol.hotmag_refill = (uint16_t)hot_refill_new;
    pol.ema_ops_snapshot = cs->ema_ops;

    if (g_obs_debug) {
        static const char* state_names[] = {"steady", "burst", "remote", "tight"};
        fprintf(stderr,
                "[ace] class %d state=%s ops=%llu spill=%llu remote=%llu miss=%llu mag=%d->%d sll=%d fast=%u hot=%d/%d\n",
                idx,
                state_names[cs->state],
                (unsigned long long)ops,
                (unsigned long long)spills_total,
                (unsigned long long)remote_spill,
                (unsigned long long)miss,
                current_mag,
                new_mag,
                new_sll,
                (unsigned)new_fast,
                hot_cap_new,
                hot_refill_new);
    }

    g_ace_policy[idx] = pol;
}

static void tiny_ace_refresh_hot_ranks(void) {
    int top1 = -1, top2 = -1, top3 = -1;
    uint64_t val1 = 0, val2 = 0, val3 = 0;
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        uint64_t ops = g_ace_state[i].ema_ops;
        if (ops > val1) {
            val3 = val2; top3 = top2;
            val2 = val1; top2 = top1;
            val1 = ops;  top1 = i;
        } else if (ops > val2) {
            val3 = val2; top3 = top2;
            val2 = ops;  top2 = i;
        } else if (ops > val3) {
            val3 = ops;  top3 = i;
        }
    }
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        TinyAcePolicy* pol = &g_ace_policy[i];
        pol->hot_rank = 0;
        if (!g_fast_enable) {
            pol->fast_cap = 0;
        }
    }
    if (top1 >= 0) g_ace_policy[top1].hot_rank = 2;
    if (top2 >= 0) g_ace_policy[top2].hot_rank = 1;
    if (top3 >= 0) g_ace_policy[top3].hot_rank = 3;

    if (!g_fast_enable) {
        for (int i = 0; i < TINY_NUM_CLASSES; i++) {
            g_hot_alloc_fn[i] = NULL;
        }
        return;
    }

    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        TinyAcePolicy* pol = &g_ace_policy[i];
        uint32_t target = pol->fast_cap;
        if (pol->state == ACE_STATE_MEM_TIGHT) {
            // keep tightened fast cap
        } else if (pol->hot_rank == 2) {
            target += 48u;
        } else if (pol->hot_rank == 1) {
            target += 24u;
        } else if (pol->hot_rank == 3) {
            target += 16u;
        } else {
            // relax toward base
            uint32_t base = g_fast_cap_defaults[i];
            if (target > base) {
                uint32_t dec = target - base;
                if (dec > 32u) dec = 32u;
                if (target > dec) target -= dec;
                else target = base;
            } else {
                target = base;
            }
        }
        if (target > TINY_TLS_MAG_CAP) target = TINY_TLS_MAG_CAP;
        pol->fast_cap = (uint16_t)target;
    }

    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        TinyHotAllocFn fn = NULL;
        if (g_ace_policy[i].hot_rank != 0) {
            switch (i) {
                case 0: fn = tiny_hot_pop_class0; break;
                case 1: fn = tiny_hot_pop_class1; break;
                case 2: fn = tiny_hot_pop_class2; break;
                case 3: fn = tiny_hot_pop_class3; break;
                default: fn = NULL; break;
            }
        }
        g_hot_alloc_fn[i] = fn;
    }
}

static void tiny_ace_apply_policies(void) {
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        TinyAcePolicy* pol = &g_ace_policy[i];

        int prev_mag = g_mag_cap_override[i];
        if (prev_mag <= 0) prev_mag = tiny_default_cap(i);

        int new_mag = pol->mag_cap;
        if (new_mag < 16) new_mag = 16;
        if (new_mag > TINY_TLS_MAG_CAP) new_mag = TINY_TLS_MAG_CAP;

        if (new_mag != g_mag_cap_override[i]) {
            g_mag_cap_override[i] = new_mag;
            tiny_tls_publish_targets(i, (uint32_t)new_mag);
        }
        if (pol->request_trim || new_mag < prev_mag) {
            tiny_tls_request_trim(i, g_obs_epoch);
        }

        int new_sll = pol->sll_cap;
        if (new_sll < new_mag) new_sll = new_mag;
        if (new_sll > TINY_TLS_MAG_CAP) new_sll = TINY_TLS_MAG_CAP;
        pol->sll_cap = (uint16_t)new_sll;  // publish only into policy (no global override)

        if (g_fast_enable && !g_fast_cap_locked[i]) {
            uint16_t new_fast = pol->fast_cap;
            if (new_fast > TINY_TLS_MAG_CAP) new_fast = TINY_TLS_MAG_CAP;
            g_fast_cap[i] = new_fast;
        }

        if (g_hotmag_enable && hkm_is_hot_class(i)) {
            if (!g_hotmag_cap_locked[i]) {
                uint16_t target_cap = pol->hotmag_cap;
                if (target_cap < 16) target_cap = 16;
                if (target_cap > 512) target_cap = 512;
                if (g_hotmag_cap_current[i] != target_cap) {
                    g_hotmag_cap_current[i] = target_cap;
                }
            }
            if (!g_hotmag_refill_locked[i]) {
                uint16_t target_ref = pol->hotmag_refill;
                if (target_ref > g_hotmag_cap_current[i]) target_ref = g_hotmag_cap_current[i];
                g_hotmag_refill_current[i] = target_ref;
            }
            hotmag_init_if_needed(i);
        }
    }
}

static void tiny_ace_init_defaults(void) {
    uint64_t now = tiny_ace_now_ns();
    int mult = (g_sll_multiplier > 0) ? g_sll_multiplier : 2;
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        TinyAceState* cs = &g_ace_state[i];
        cs->ema_ops = 0;
        cs->ema_spill = 0;
        cs->ema_remote = 0;
        cs->ema_miss = 0;
        cs->state = ACE_STATE_STEADY;
        cs->last_switch_ns = now;

        TinyAcePolicy* pol = &g_ace_policy[i];
        pol->state = ACE_STATE_STEADY;
        pol->hot_rank = 0;
        pol->request_trim = 0;
        pol->ema_ops_snapshot = 0;
        int base_mag = tiny_default_cap(i);
        if (base_mag > g_mag_cap_limit) base_mag = g_mag_cap_limit;
        if (base_mag < 16) base_mag = 16;
        pol->mag_cap = (uint16_t)base_mag;
        int sll = base_mag * mult;
        if (sll > TINY_TLS_MAG_CAP) sll = TINY_TLS_MAG_CAP;
        pol->sll_cap = (uint16_t)sll;
        pol->fast_cap = g_fast_cap_defaults[i];
        pol->batch = (i <= 3) ? 64 : 48;
        pol->drain_mask = 2;
        pol->slab_lg = 20;
        pol->hotmag_cap = hotmag_effective_cap(i);
        pol->hotmag_refill = hotmag_refill_target(i);

        if (g_mag_cap_override[i] <= 0) g_mag_cap_override[i] = pol->mag_cap;
        // Phase12: g_sll_cap_override は使用しない（互換用ダミー）
        switch (i) {
            case 0: g_hot_alloc_fn[i] = tiny_hot_pop_class0; break;
            case 1: g_hot_alloc_fn[i] = tiny_hot_pop_class1; break;
            case 2: g_hot_alloc_fn[i] = tiny_hot_pop_class2; break;
            default: g_hot_alloc_fn[i] = NULL; break;
        }
    }

    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        tiny_tls_publish_targets(i, (uint32_t)g_mag_cap_override[i]);
    }
}

static void tiny_obs_update_interval(void) {
    if (!g_obs_auto_tune) return;
    uint32_t current = g_obs_interval_current;
    int active_states = 0;
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        if (g_ace_policy[i].state != ACE_STATE_STEADY) {
            active_states++;
        }
    }
    int urgent = g_ace_mem_tight_flag || (active_states > 0);
    if (urgent) {
        uint32_t target = g_obs_interval_min;
        if (target < 1u) target = 1u;
        if (current != target) {
            g_obs_interval_current = target;
            g_obs_last_interval_epoch = g_obs_epoch;
            if (g_obs_debug) {
                fprintf(stderr, "[obs] interval -> %u (urgent)\n", target);
            }
        }
        return;
    }
    if (current >= g_obs_interval_max) return;
    if ((g_obs_epoch - g_obs_last_interval_epoch) < g_obs_interval_cooldown) return;
    uint32_t target = current << 1;
    if (target < current) target = g_obs_interval_max;  // overflow guard
    if (target > g_obs_interval_max) target = g_obs_interval_max;
    if (target != current) {
        g_obs_interval_current = target;
        g_obs_last_interval_epoch = g_obs_epoch;
        if (g_obs_debug) {
            fprintf(stderr, "[obs] interval -> %u (steady)\n", target);
        }
    }
}

static inline void superslab_partial_release(SuperSlab* ss, uint32_t epoch) {
#if defined(MADV_DONTNEED)
    if (!g_ss_partial_enable) return;
    if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
    uint32_t prev = ss->partial_epoch;
    if (epoch != 0 && (epoch - prev) < g_ss_partial_interval) return;
    size_t len = (size_t)1 << ss->lg_size;
    if (ss_os_madvise_guarded(ss, len, MADV_DONTNEED, "tiny_ss_partial") == 0) {
        ss->partial_epoch = epoch;
    }
#else
    (void)ss; (void)epoch;
#endif
}

static inline void tiny_obs_adjust_class(int idx, const TinyObsStats* st) {
    if (!g_obs_auto_tune) return;
    tiny_ace_collect_stats(idx, st);
}

static void tiny_obs_apply_tuning(void) {
    g_obs_epoch++;
    g_ace_tick_now_ns = tiny_ace_now_ns();
    tiny_ace_update_mem_tight(g_ace_tick_now_ns);
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        uint64_t cur_hit = g_tls_hit_count[i];
        uint64_t cur_miss = g_tls_miss_count[i];
        uint64_t cur_spill_ss = g_tls_spill_ss_count[i];
        uint64_t cur_spill_owner = g_tls_spill_owner_count[i];
        uint64_t cur_spill_mag = g_tls_spill_mag_count[i];
        uint64_t cur_spill_requeue = g_tls_spill_requeue_count[i];

        TinyObsStats* stats = &g_obs_stats[i];
        stats->hit = cur_hit - g_obs_last_hit[i];
        stats->miss = cur_miss - g_obs_last_miss[i];
        stats->spill_ss = cur_spill_ss - g_obs_last_spill_ss[i];
        stats->spill_owner = cur_spill_owner - g_obs_last_spill_owner[i];
        stats->spill_mag = cur_spill_mag - g_obs_last_spill_mag[i];
        stats->spill_requeue = cur_spill_requeue - g_obs_last_spill_requeue[i];

        g_obs_last_hit[i] = cur_hit;
        g_obs_last_miss[i] = cur_miss;
        g_obs_last_spill_ss[i] = cur_spill_ss;
        g_obs_last_spill_owner[i] = cur_spill_owner;
        g_obs_last_spill_mag[i] = cur_spill_mag;
        g_obs_last_spill_requeue[i] = cur_spill_requeue;

        tiny_obs_adjust_class(i, stats);
    }
    if (g_obs_auto_tune) {
        tiny_ace_refresh_hot_ranks();
        tiny_ace_apply_policies();
        tiny_obs_update_interval();
    }
}

static void* tiny_obs_worker(void* arg) {
    (void)arg;
    uint32_t processed = 0;
    while (!g_obs_stop) {
        uint32_t head = atomic_load_explicit(&g_obs_head, memory_order_relaxed);
        uint32_t tail = atomic_load_explicit(&g_obs_tail, memory_order_acquire);
        if (head == tail) {
            if (processed > 0) {
                tiny_obs_apply_tuning();
                processed = 0;
            }
            struct timespec ts = {0, 1000000};  // 1.0 ms backoff when idle
            nanosleep(&ts, NULL);
            continue;
        }
        uint32_t idx = head & TINY_OBS_MASK;
        if (!atomic_load_explicit(&g_obs_ready[idx], memory_order_acquire)) {
            sched_yield();
            continue;
        }
        TinyObsEvent ev = g_obs_ring[idx];
        atomic_store_explicit(&g_obs_ready[idx], 0u, memory_order_release);
        atomic_store_explicit(&g_obs_head, head + 1u, memory_order_relaxed);
        tiny_obs_process(&ev);
        if (++processed >= g_obs_interval_current) {
            tiny_obs_apply_tuning();
            processed = 0;
        }
    }
    // Drain remaining events before exit
    for (;;) {
        uint32_t head = atomic_load_explicit(&g_obs_head, memory_order_relaxed);
        uint32_t tail = atomic_load_explicit(&g_obs_tail, memory_order_acquire);
        if (head == tail) break;
        uint32_t idx = head & TINY_OBS_MASK;
        if (!atomic_load_explicit(&g_obs_ready[idx], memory_order_acquire)) {
            sched_yield();
            continue;
        }
        TinyObsEvent ev = g_obs_ring[idx];
        atomic_store_explicit(&g_obs_ready[idx], 0u, memory_order_release);
        atomic_store_explicit(&g_obs_head, head + 1u, memory_order_relaxed);
        tiny_obs_process(&ev);
    }
    tiny_obs_apply_tuning();
    return NULL;
}

static void tiny_obs_start_if_needed(void) {
    // OBS runtime knobs removed; keep disabled for predictable memory use.
    g_obs_enable = 0;
    g_obs_started = 0;
    (void)g_obs_interval_default;
    (void)g_obs_interval_current;
    (void)g_obs_interval_min;
    (void)g_obs_interval_max;
    (void)g_obs_auto_tune;
    (void)g_obs_mag_step;
    (void)g_obs_sll_step;
    (void)g_obs_debug;
}

static void tiny_obs_shutdown(void) {
    if (!g_obs_started) return;
    g_obs_stop = 1;
    pthread_join(g_obs_thread, NULL);
    g_obs_started = 0;
    g_obs_enable = 0;
}
// Tiny diet (memory-tight) controls
// Event logging options: default minimal (no timestamp, no thread id)
static int g_int_event_ts = 0;               // HAKMEM_INT_EVENT_TS=1 to include timestamp
static unsigned g_int_sample_mask = 0;       // HAKMEM_INT_SAMPLE=(N) → mask=(1<<N)-1; 0=log all
static __thread unsigned g_tls_ev_seq = 0;   // per-thread event seq