typedef struct { uint64_t ts_ns; // timestamp (ns, monotonic) uint32_t size; // requested/served size uint32_t site_id; // callsite id (optional; 0 if unknown) uint16_t latency_bucket;// latency bucket (optional; 0 if unknown) uint8_t tier_hit; // which tier handled/refilled (SLL/MAG/SLAB/SUPER/FRONT) uint8_t flags; // burst/sequential/random (bit flags; 0 unused) uint16_t class_idx; // tiny class uint16_t thread_id; // low bits of thread id (best-effort) } AllocEvent; #define EVENTQ_CAP 65536u #define EVENTQ_MASK (EVENTQ_CAP - 1u) static _Atomic uint32_t g_ev_tail = 0; static _Atomic uint32_t g_ev_head = 0; static AllocEvent g_ev_ring[EVENTQ_CAP]; static int g_int_engine = 0; // HAKMEM_INT_ENGINE=1 static pthread_t g_int_thread; static volatile int g_int_stop = 0; static int g_int_started = 0; // Lightweight observation ring (async aggregation for TLS stats) typedef struct { uint8_t kind; uint8_t class_idx; uint16_t count; } TinyObsEvent; typedef struct { uint64_t hit; uint64_t miss; uint64_t spill_ss; uint64_t spill_owner; uint64_t spill_mag; uint64_t spill_requeue; } TinyObsStats; enum { TINY_OBS_TLS_HIT = 1, TINY_OBS_TLS_MISS = 2, TINY_OBS_SPILL_SS = 3, TINY_OBS_SPILL_OWNER = 4, TINY_OBS_SPILL_MAG = 5, TINY_OBS_SPILL_REQUEUE = 6, }; #define TINY_OBS_CAP 4096u #define TINY_OBS_MASK (TINY_OBS_CAP - 1u) static _Atomic uint32_t g_obs_tail = 0; static _Atomic uint32_t g_obs_head = 0; static TinyObsEvent g_obs_ring[TINY_OBS_CAP]; static _Atomic uint8_t g_obs_ready[TINY_OBS_CAP]; static int g_obs_enable = 0; // ENV toggle removed: observation disabled by default static int g_obs_started = 0; static pthread_t g_obs_thread; static volatile int g_obs_stop = 0; static TinyObsStats g_obs_stats[TINY_NUM_CLASSES]; static uint64_t g_obs_epoch = 0; static uint32_t g_obs_interval_default = 65536; static uint32_t g_obs_interval_current = 65536; static uint32_t g_obs_interval_min = 256; static uint32_t g_obs_interval_max = 65536; static uint32_t g_obs_interval_cooldown = 4; static uint64_t g_obs_last_interval_epoch = 0; static int g_obs_auto_tune = 0; // Default: Disable auto-tuning for predictable memory usage static int g_obs_mag_step = 8; static int g_obs_sll_step = 16; static int g_obs_debug = 0; static uint64_t g_obs_last_hit[TINY_NUM_CLASSES]; static uint64_t g_obs_last_miss[TINY_NUM_CLASSES]; static uint64_t g_obs_last_spill_ss[TINY_NUM_CLASSES]; static uint64_t g_obs_last_spill_owner[TINY_NUM_CLASSES]; static uint64_t g_obs_last_spill_mag[TINY_NUM_CLASSES]; static uint64_t g_obs_last_spill_requeue[TINY_NUM_CLASSES]; // --------------------------------------------------------------------------- // Tiny ACE (Adaptive Cache Engine) state machine // --------------------------------------------------------------------------- typedef enum { ACE_STATE_STEADY = 0, ACE_STATE_BURST = 1, ACE_STATE_REMOTE_HEAVY = 2, ACE_STATE_MEM_TIGHT = 3 } TinyAceStateId; typedef struct { uint64_t ema_ops; uint64_t ema_spill; uint64_t ema_remote; uint64_t ema_miss; TinyAceStateId state; uint64_t last_switch_ns; } TinyAceState; typedef struct { uint16_t mag_cap; uint16_t sll_cap; uint16_t fast_cap; uint16_t batch; uint16_t hotmag_cap; uint16_t hotmag_refill; uint8_t drain_mask; uint8_t slab_lg; TinyAceStateId state; uint8_t hot_rank; uint8_t request_trim; uint64_t ema_ops_snapshot; } TinyAcePolicy; static TinyAceState g_ace_state[TINY_NUM_CLASSES]; static TinyAcePolicy g_ace_policy[TINY_NUM_CLASSES]; static uint64_t g_ace_tick_now_ns = 0; static int g_ace_mem_tight_flag = 0; static uint64_t g_ace_last_rss_check_ns = 0; static int g_tiny_rss_budget_kb = 0; // HAKMEM_TINY_RSS_BUDGET_KB (0=disabled) static int g_tiny_int_tight = 0; // HAKMEM_TINY_INT_TIGHT=1 → bias caps downward static int g_tiny_diet_step = 16; // HAKMEM_TINY_DIET_STEP (cap decrement step) static int g_tiny_cap_floor[TINY_NUM_CLASSES] = { 64, 64, 64, 128, 64, 64, 64, 64 }; // min MAG cap per class #define ACE_COOLDOWN_NS (800ULL * 1000 * 1000) // 0.8s #define ACE_RSS_CHECK_NS (500ULL * 1000 * 1000) // 0.5s #define ACE_EMA_WEIGHT 8 #define ACE_MAG_STEP_DEFAULT 8 #define ACE_SLL_STEP_DEFAULT 16 static inline uint64_t tiny_ace_now_ns(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec; } static inline uint64_t tiny_ace_ema(uint64_t prev, uint64_t sample) { if (prev == 0) return sample; return ((prev * (ACE_EMA_WEIGHT - 1)) + sample) / ACE_EMA_WEIGHT; } // EXTRACTED: static int get_rss_kb_self(void); static void tiny_ace_update_mem_tight(uint64_t now_ns) { if (g_tiny_rss_budget_kb <= 0) { g_ace_mem_tight_flag = 0; return; } if (now_ns - g_ace_last_rss_check_ns < ACE_RSS_CHECK_NS) { return; } g_ace_last_rss_check_ns = now_ns; int rss_kb = get_rss_kb_self(); if (rss_kb > 0) { int high = (g_tiny_rss_budget_kb * 95) / 100; int low = (g_tiny_rss_budget_kb * 85) / 100; if (rss_kb >= high) g_ace_mem_tight_flag = 1; else if (rss_kb <= low) g_ace_mem_tight_flag = 0; } } static void tiny_ace_collect_stats(int idx, const TinyObsStats* st); static void tiny_ace_refresh_hot_ranks(void); static void tiny_ace_apply_policies(void); static void tiny_ace_init_defaults(void); static void tiny_obs_update_interval(void); static __thread uint32_t g_obs_hit_accum[TINY_NUM_CLASSES]; static inline void tiny_obs_enqueue(uint8_t kind, int class_idx, uint16_t count) { uint32_t tail; for (;;) { tail = atomic_load_explicit(&g_obs_tail, memory_order_relaxed); uint32_t head = atomic_load_explicit(&g_obs_head, memory_order_acquire); if (tail - head >= TINY_OBS_CAP) return; // drop on overflow uint32_t desired = tail + 1u; if (atomic_compare_exchange_weak_explicit(&g_obs_tail, &tail, desired, memory_order_acq_rel, memory_order_relaxed)) { break; } } uint32_t idx = tail & TINY_OBS_MASK; TinyObsEvent ev; ev.kind = kind; ev.class_idx = (uint8_t)class_idx; ev.count = count; g_obs_ring[idx] = ev; atomic_store_explicit(&g_obs_ready[idx], 1u, memory_order_release); } static inline void tiny_obs_record(uint8_t kind, int class_idx) { if (__builtin_expect(!g_obs_enable, 0)) return; if (__builtin_expect(kind == TINY_OBS_TLS_HIT, 1)) { uint32_t interval = g_obs_interval_current; if (interval <= 1u) { tiny_obs_enqueue(kind, class_idx, 1u); return; } uint32_t accum = ++g_obs_hit_accum[class_idx]; if (accum < interval) return; uint32_t emit = interval; if (emit > UINT16_MAX) emit = UINT16_MAX; if (accum > emit) { g_obs_hit_accum[class_idx] = accum - emit; } else { g_obs_hit_accum[class_idx] = 0u; } tiny_obs_enqueue(kind, class_idx, (uint16_t)emit); return; } tiny_obs_enqueue(kind, class_idx, 1u); } static inline void tiny_obs_process(const TinyObsEvent* ev) { int idx = ev->class_idx; uint16_t count = ev->count; if (idx < 0 || idx >= TINY_NUM_CLASSES || count == 0) return; switch (ev->kind) { case TINY_OBS_TLS_HIT: g_tls_hit_count[idx] += count; break; case TINY_OBS_TLS_MISS: g_tls_miss_count[idx] += count; break; case TINY_OBS_SPILL_SS: g_tls_spill_ss_count[idx] += count; break; case TINY_OBS_SPILL_OWNER: g_tls_spill_owner_count[idx] += count; break; case TINY_OBS_SPILL_MAG: g_tls_spill_mag_count[idx] += count; break; case TINY_OBS_SPILL_REQUEUE: g_tls_spill_requeue_count[idx] += count; break; default: break; } } static void tiny_ace_collect_stats(int idx, const TinyObsStats* st) { TinyAceState* cs = &g_ace_state[idx]; TinyAcePolicy pol = g_ace_policy[idx]; uint64_t now = g_ace_tick_now_ns; uint64_t ops = st->hit + st->miss; uint64_t spills_total = st->spill_ss + st->spill_owner + st->spill_mag; uint64_t remote_spill = st->spill_owner; uint64_t miss = st->miss; cs->ema_ops = tiny_ace_ema(cs->ema_ops, ops); cs->ema_spill = tiny_ace_ema(cs->ema_spill, spills_total); cs->ema_remote = tiny_ace_ema(cs->ema_remote, remote_spill); cs->ema_miss = tiny_ace_ema(cs->ema_miss, miss); if (ops == 0 && spills_total == 0 && st->spill_requeue == 0) { pol.ema_ops_snapshot = cs->ema_ops; g_ace_policy[idx] = pol; return; } TinyAceStateId next_state; if (g_ace_mem_tight_flag) { next_state = ACE_STATE_MEM_TIGHT; } else if (st->spill_requeue > 0) { next_state = ACE_STATE_BURST; } else if (cs->ema_remote > 16 && cs->ema_remote >= (cs->ema_spill / 3 + 1)) { next_state = ACE_STATE_REMOTE_HEAVY; } else if (cs->ema_spill > 32 || cs->ema_miss > 16 || miss > 16) { next_state = ACE_STATE_BURST; } else { next_state = ACE_STATE_STEADY; } if (next_state != cs->state) { if (now - cs->last_switch_ns >= ACE_COOLDOWN_NS) { cs->state = next_state; cs->last_switch_ns = now; } else { next_state = cs->state; } } pol.state = cs->state; pol.hot_rank = 0; pol.request_trim = 0; int base_mag = tiny_default_cap(idx); if (base_mag > g_mag_cap_limit) base_mag = g_mag_cap_limit; if (base_mag < 16) base_mag = 16; int mag_min = base_mag; int mag_max = tiny_cap_max_for_class(idx); if (mag_max > g_mag_cap_limit) mag_max = g_mag_cap_limit; if (mag_max < mag_min) mag_max = mag_min; int current_mag = g_mag_cap_override[idx]; if (current_mag <= 0) current_mag = base_mag; if (current_mag < mag_min) current_mag = mag_min; if (current_mag > mag_max) current_mag = mag_max; int mag_step = (g_obs_mag_step > 0) ? g_obs_mag_step : ACE_MAG_STEP_DEFAULT; if (mag_step < 1) mag_step = 1; // Phase12: g_sll_cap_override はレガシー互換ダミー。SLL cap は TinyAcePolicy に直接保持する。 int current_sll = pol.sll_cap; if (current_sll < current_mag) current_sll = current_mag; if (current_sll < 32) current_sll = 32; int sll_step = (g_obs_sll_step > 0) ? g_obs_sll_step : ACE_SLL_STEP_DEFAULT; if (sll_step < 1) sll_step = 1; int sll_max = TINY_TLS_MAG_CAP; uint16_t base_fast = g_fast_cap_defaults[idx]; uint16_t current_fast = g_fast_cap[idx]; if (current_fast == 0 && base_fast > 0) current_fast = base_fast; uint16_t new_fast = current_fast; uint16_t new_batch = (idx <= 3) ? 64 : 48; uint8_t new_drain = 2; uint8_t new_slab_lg = 20; int new_mag = current_mag; int new_sll = current_sll; int hot_cap_new = (int)hotmag_effective_cap(idx); int hot_refill_new = (int)hotmag_refill_target(idx); int hot_cap_limit = g_hotmag_cap_default + 64; if (hot_cap_limit < 32) hot_cap_limit = 32; if (hot_cap_limit > 512) hot_cap_limit = 512; int hot_cap_floor = 24; if (hot_cap_floor > hot_cap_limit) hot_cap_floor = hot_cap_limit; switch (cs->state) { case ACE_STATE_STEADY: { if (new_mag > mag_min) { int dec = mag_step / 2; if (dec < 1) dec = 1; new_mag -= dec; if (new_mag < mag_min) new_mag = mag_min; } int target_sll = new_mag * ((g_sll_multiplier > 0) ? g_sll_multiplier : 2); if (target_sll < new_mag) target_sll = new_mag; if (new_sll > target_sll) { int dec = sll_step / 2; if (dec < 1) dec = 1; new_sll -= dec; if (new_sll < target_sll) new_sll = target_sll; } if (g_hotmag_enable && idx <= 3) { if (!g_hotmag_cap_locked[idx]) hot_cap_new -= 16; if (!g_hotmag_refill_locked[idx]) hot_refill_new -= 8; } if (g_fast_enable) new_fast = base_fast; new_drain = 2; break; } case ACE_STATE_BURST: { if (g_hotmag_enable && idx <= 3) { if (!g_hotmag_cap_locked[idx]) hot_cap_new += 32; if (!g_hotmag_refill_locked[idx]) hot_refill_new += 16; } new_mag += mag_step; if (new_mag > mag_max) new_mag = mag_max; int target_sll = new_mag * ((g_sll_multiplier > 0) ? g_sll_multiplier : 2) + sll_step; if (target_sll > sll_max) target_sll = sll_max; if (target_sll > new_sll) new_sll = target_sll; if (g_fast_enable) { uint32_t f = (uint32_t)((base_fast > 0) ? base_fast : current_fast) + 64u; if (f > TINY_TLS_MAG_CAP) f = TINY_TLS_MAG_CAP; new_fast = (uint16_t)f; } new_batch = (idx <= 3) ? 96 : 64; new_drain = 1; if (idx <= 3) new_slab_lg = 21; break; } case ACE_STATE_REMOTE_HEAVY: { if (g_hotmag_enable && idx <= 3) { if (!g_hotmag_cap_locked[idx]) hot_cap_new += 16; if (!g_hotmag_refill_locked[idx]) hot_refill_new += 8; } int target_sll = new_sll + sll_step; if (target_sll > sll_max) target_sll = sll_max; new_sll = target_sll; if (new_mag < mag_max) { int inc = mag_step / 2; if (inc < 1) inc = 1; new_mag += inc; if (new_mag > mag_max) new_mag = mag_max; } new_drain = 0; if (g_fast_enable) new_fast = base_fast; break; } case ACE_STATE_MEM_TIGHT: { if (g_hotmag_enable && idx <= 3) { if (!g_hotmag_cap_locked[idx]) hot_cap_new -= 24; if (!g_hotmag_refill_locked[idx]) hot_refill_new /= 2; } new_mag -= mag_step * 2; if (new_mag < mag_min) new_mag = mag_min; new_sll -= sll_step; if (new_sll < new_mag) new_sll = new_mag; pol.request_trim = 1; if (g_fast_enable) { if (base_fast > 0) { uint32_t f = base_fast / 2; if (f < 16) f = 16; if (f > TINY_TLS_MAG_CAP) f = TINY_TLS_MAG_CAP; new_fast = (uint16_t)f; } else { new_fast = 0; } } new_batch = (idx <= 3) ? 48 : 32; new_drain = 2; new_slab_lg = 20; break; } } if (g_hotmag_enable && idx <= 3) { if (!g_hotmag_cap_locked[idx]) { if (hot_cap_new > hot_cap_limit) hot_cap_new = hot_cap_limit; if (hot_cap_new < hot_cap_floor) hot_cap_new = hot_cap_floor; } else { hot_cap_new = (int)hotmag_effective_cap(idx); } if (!g_hotmag_refill_locked[idx]) { if (hot_refill_new < 0) hot_refill_new = 0; if (hot_refill_new > hot_cap_new) hot_refill_new = hot_cap_new; if (hot_refill_new > 0 && hot_refill_new < 8) hot_refill_new = 8; } else { hot_refill_new = (int)hotmag_refill_target(idx); } } else { hot_cap_new = (int)hotmag_effective_cap(idx); hot_refill_new = (int)hotmag_refill_target(idx); } if (new_mag > mag_max) new_mag = mag_max; if (new_mag < mag_min) new_mag = mag_min; if (new_sll > sll_max) new_sll = sll_max; if (new_sll < new_mag) new_sll = new_mag; if (new_mag < current_mag) pol.request_trim = 1; if (!g_fast_enable) new_fast = 0; if (new_fast > TINY_TLS_MAG_CAP) new_fast = TINY_TLS_MAG_CAP; pol.mag_cap = (uint16_t)new_mag; pol.sll_cap = (uint16_t)new_sll; pol.fast_cap = new_fast; pol.batch = new_batch; pol.drain_mask = new_drain; pol.slab_lg = new_slab_lg; pol.hotmag_cap = (uint16_t)hot_cap_new; pol.hotmag_refill = (uint16_t)hot_refill_new; pol.ema_ops_snapshot = cs->ema_ops; if (g_obs_debug) { static const char* state_names[] = {"steady", "burst", "remote", "tight"}; fprintf(stderr, "[ace] class %d state=%s ops=%llu spill=%llu remote=%llu miss=%llu mag=%d->%d sll=%d fast=%u hot=%d/%d\n", idx, state_names[cs->state], (unsigned long long)ops, (unsigned long long)spills_total, (unsigned long long)remote_spill, (unsigned long long)miss, current_mag, new_mag, new_sll, (unsigned)new_fast, hot_cap_new, hot_refill_new); } g_ace_policy[idx] = pol; } static void tiny_ace_refresh_hot_ranks(void) { int top1 = -1, top2 = -1, top3 = -1; uint64_t val1 = 0, val2 = 0, val3 = 0; for (int i = 0; i < TINY_NUM_CLASSES; i++) { uint64_t ops = g_ace_state[i].ema_ops; if (ops > val1) { val3 = val2; top3 = top2; val2 = val1; top2 = top1; val1 = ops; top1 = i; } else if (ops > val2) { val3 = val2; top3 = top2; val2 = ops; top2 = i; } else if (ops > val3) { val3 = ops; top3 = i; } } for (int i = 0; i < TINY_NUM_CLASSES; i++) { TinyAcePolicy* pol = &g_ace_policy[i]; pol->hot_rank = 0; if (!g_fast_enable) { pol->fast_cap = 0; } } if (top1 >= 0) g_ace_policy[top1].hot_rank = 2; if (top2 >= 0) g_ace_policy[top2].hot_rank = 1; if (top3 >= 0) g_ace_policy[top3].hot_rank = 3; if (!g_fast_enable) { for (int i = 0; i < TINY_NUM_CLASSES; i++) { g_hot_alloc_fn[i] = NULL; } return; } for (int i = 0; i < TINY_NUM_CLASSES; i++) { TinyAcePolicy* pol = &g_ace_policy[i]; uint32_t target = pol->fast_cap; if (pol->state == ACE_STATE_MEM_TIGHT) { // keep tightened fast cap } else if (pol->hot_rank == 2) { target += 48u; } else if (pol->hot_rank == 1) { target += 24u; } else if (pol->hot_rank == 3) { target += 16u; } else { // relax toward base uint32_t base = g_fast_cap_defaults[i]; if (target > base) { uint32_t dec = target - base; if (dec > 32u) dec = 32u; if (target > dec) target -= dec; else target = base; } else { target = base; } } if (target > TINY_TLS_MAG_CAP) target = TINY_TLS_MAG_CAP; pol->fast_cap = (uint16_t)target; } for (int i = 0; i < TINY_NUM_CLASSES; i++) { TinyHotAllocFn fn = NULL; if (g_ace_policy[i].hot_rank != 0) { switch (i) { case 0: fn = tiny_hot_pop_class0; break; case 1: fn = tiny_hot_pop_class1; break; case 2: fn = tiny_hot_pop_class2; break; case 3: fn = tiny_hot_pop_class3; break; default: fn = NULL; break; } } g_hot_alloc_fn[i] = fn; } } static void tiny_ace_apply_policies(void) { for (int i = 0; i < TINY_NUM_CLASSES; i++) { TinyAcePolicy* pol = &g_ace_policy[i]; int prev_mag = g_mag_cap_override[i]; if (prev_mag <= 0) prev_mag = tiny_default_cap(i); int new_mag = pol->mag_cap; if (new_mag < 16) new_mag = 16; if (new_mag > TINY_TLS_MAG_CAP) new_mag = TINY_TLS_MAG_CAP; if (new_mag != g_mag_cap_override[i]) { g_mag_cap_override[i] = new_mag; tiny_tls_publish_targets(i, (uint32_t)new_mag); } if (pol->request_trim || new_mag < prev_mag) { tiny_tls_request_trim(i, g_obs_epoch); } int new_sll = pol->sll_cap; if (new_sll < new_mag) new_sll = new_mag; if (new_sll > TINY_TLS_MAG_CAP) new_sll = TINY_TLS_MAG_CAP; pol->sll_cap = (uint16_t)new_sll; // publish only into policy (no global override) if (g_fast_enable && !g_fast_cap_locked[i]) { uint16_t new_fast = pol->fast_cap; if (new_fast > TINY_TLS_MAG_CAP) new_fast = TINY_TLS_MAG_CAP; g_fast_cap[i] = new_fast; } if (g_hotmag_enable && hkm_is_hot_class(i)) { if (!g_hotmag_cap_locked[i]) { uint16_t target_cap = pol->hotmag_cap; if (target_cap < 16) target_cap = 16; if (target_cap > 512) target_cap = 512; if (g_hotmag_cap_current[i] != target_cap) { g_hotmag_cap_current[i] = target_cap; } } if (!g_hotmag_refill_locked[i]) { uint16_t target_ref = pol->hotmag_refill; if (target_ref > g_hotmag_cap_current[i]) target_ref = g_hotmag_cap_current[i]; g_hotmag_refill_current[i] = target_ref; } hotmag_init_if_needed(i); } } } static void tiny_ace_init_defaults(void) { uint64_t now = tiny_ace_now_ns(); int mult = (g_sll_multiplier > 0) ? g_sll_multiplier : 2; for (int i = 0; i < TINY_NUM_CLASSES; i++) { TinyAceState* cs = &g_ace_state[i]; cs->ema_ops = 0; cs->ema_spill = 0; cs->ema_remote = 0; cs->ema_miss = 0; cs->state = ACE_STATE_STEADY; cs->last_switch_ns = now; TinyAcePolicy* pol = &g_ace_policy[i]; pol->state = ACE_STATE_STEADY; pol->hot_rank = 0; pol->request_trim = 0; pol->ema_ops_snapshot = 0; int base_mag = tiny_default_cap(i); if (base_mag > g_mag_cap_limit) base_mag = g_mag_cap_limit; if (base_mag < 16) base_mag = 16; pol->mag_cap = (uint16_t)base_mag; int sll = base_mag * mult; if (sll > TINY_TLS_MAG_CAP) sll = TINY_TLS_MAG_CAP; pol->sll_cap = (uint16_t)sll; pol->fast_cap = g_fast_cap_defaults[i]; pol->batch = (i <= 3) ? 64 : 48; pol->drain_mask = 2; pol->slab_lg = 20; pol->hotmag_cap = hotmag_effective_cap(i); pol->hotmag_refill = hotmag_refill_target(i); if (g_mag_cap_override[i] <= 0) g_mag_cap_override[i] = pol->mag_cap; // Phase12: g_sll_cap_override は使用しない(互換用ダミー) switch (i) { case 0: g_hot_alloc_fn[i] = tiny_hot_pop_class0; break; case 1: g_hot_alloc_fn[i] = tiny_hot_pop_class1; break; case 2: g_hot_alloc_fn[i] = tiny_hot_pop_class2; break; default: g_hot_alloc_fn[i] = NULL; break; } } for (int i = 0; i < TINY_NUM_CLASSES; i++) { tiny_tls_publish_targets(i, (uint32_t)g_mag_cap_override[i]); } } static void tiny_obs_update_interval(void) { if (!g_obs_auto_tune) return; uint32_t current = g_obs_interval_current; int active_states = 0; for (int i = 0; i < TINY_NUM_CLASSES; i++) { if (g_ace_policy[i].state != ACE_STATE_STEADY) { active_states++; } } int urgent = g_ace_mem_tight_flag || (active_states > 0); if (urgent) { uint32_t target = g_obs_interval_min; if (target < 1u) target = 1u; if (current != target) { g_obs_interval_current = target; g_obs_last_interval_epoch = g_obs_epoch; if (g_obs_debug) { fprintf(stderr, "[obs] interval -> %u (urgent)\n", target); } } return; } if (current >= g_obs_interval_max) return; if ((g_obs_epoch - g_obs_last_interval_epoch) < g_obs_interval_cooldown) return; uint32_t target = current << 1; if (target < current) target = g_obs_interval_max; // overflow guard if (target > g_obs_interval_max) target = g_obs_interval_max; if (target != current) { g_obs_interval_current = target; g_obs_last_interval_epoch = g_obs_epoch; if (g_obs_debug) { fprintf(stderr, "[obs] interval -> %u (steady)\n", target); } } } static inline void superslab_partial_release(SuperSlab* ss, uint32_t epoch) { #if defined(MADV_DONTNEED) if (!g_ss_partial_enable) return; if (!ss || ss->magic != SUPERSLAB_MAGIC) return; uint32_t prev = ss->partial_epoch; if (epoch != 0 && (epoch - prev) < g_ss_partial_interval) return; size_t len = (size_t)1 << ss->lg_size; if (madvise(ss, len, MADV_DONTNEED) == 0) { ss->partial_epoch = epoch; } #else (void)ss; (void)epoch; #endif } static inline void tiny_obs_adjust_class(int idx, const TinyObsStats* st) { if (!g_obs_auto_tune) return; tiny_ace_collect_stats(idx, st); } static void tiny_obs_apply_tuning(void) { g_obs_epoch++; g_ace_tick_now_ns = tiny_ace_now_ns(); tiny_ace_update_mem_tight(g_ace_tick_now_ns); for (int i = 0; i < TINY_NUM_CLASSES; i++) { uint64_t cur_hit = g_tls_hit_count[i]; uint64_t cur_miss = g_tls_miss_count[i]; uint64_t cur_spill_ss = g_tls_spill_ss_count[i]; uint64_t cur_spill_owner = g_tls_spill_owner_count[i]; uint64_t cur_spill_mag = g_tls_spill_mag_count[i]; uint64_t cur_spill_requeue = g_tls_spill_requeue_count[i]; TinyObsStats* stats = &g_obs_stats[i]; stats->hit = cur_hit - g_obs_last_hit[i]; stats->miss = cur_miss - g_obs_last_miss[i]; stats->spill_ss = cur_spill_ss - g_obs_last_spill_ss[i]; stats->spill_owner = cur_spill_owner - g_obs_last_spill_owner[i]; stats->spill_mag = cur_spill_mag - g_obs_last_spill_mag[i]; stats->spill_requeue = cur_spill_requeue - g_obs_last_spill_requeue[i]; g_obs_last_hit[i] = cur_hit; g_obs_last_miss[i] = cur_miss; g_obs_last_spill_ss[i] = cur_spill_ss; g_obs_last_spill_owner[i] = cur_spill_owner; g_obs_last_spill_mag[i] = cur_spill_mag; g_obs_last_spill_requeue[i] = cur_spill_requeue; tiny_obs_adjust_class(i, stats); } if (g_obs_auto_tune) { tiny_ace_refresh_hot_ranks(); tiny_ace_apply_policies(); tiny_obs_update_interval(); } } static void* tiny_obs_worker(void* arg) { (void)arg; uint32_t processed = 0; while (!g_obs_stop) { uint32_t head = atomic_load_explicit(&g_obs_head, memory_order_relaxed); uint32_t tail = atomic_load_explicit(&g_obs_tail, memory_order_acquire); if (head == tail) { if (processed > 0) { tiny_obs_apply_tuning(); processed = 0; } struct timespec ts = {0, 1000000}; // 1.0 ms backoff when idle nanosleep(&ts, NULL); continue; } uint32_t idx = head & TINY_OBS_MASK; if (!atomic_load_explicit(&g_obs_ready[idx], memory_order_acquire)) { sched_yield(); continue; } TinyObsEvent ev = g_obs_ring[idx]; atomic_store_explicit(&g_obs_ready[idx], 0u, memory_order_release); atomic_store_explicit(&g_obs_head, head + 1u, memory_order_relaxed); tiny_obs_process(&ev); if (++processed >= g_obs_interval_current) { tiny_obs_apply_tuning(); processed = 0; } } // Drain remaining events before exit for (;;) { uint32_t head = atomic_load_explicit(&g_obs_head, memory_order_relaxed); uint32_t tail = atomic_load_explicit(&g_obs_tail, memory_order_acquire); if (head == tail) break; uint32_t idx = head & TINY_OBS_MASK; if (!atomic_load_explicit(&g_obs_ready[idx], memory_order_acquire)) { sched_yield(); continue; } TinyObsEvent ev = g_obs_ring[idx]; atomic_store_explicit(&g_obs_ready[idx], 0u, memory_order_release); atomic_store_explicit(&g_obs_head, head + 1u, memory_order_relaxed); tiny_obs_process(&ev); } tiny_obs_apply_tuning(); return NULL; } static void tiny_obs_start_if_needed(void) { // OBS runtime knobs removed; keep disabled for predictable memory use. g_obs_enable = 0; g_obs_started = 0; (void)g_obs_interval_default; (void)g_obs_interval_current; (void)g_obs_interval_min; (void)g_obs_interval_max; (void)g_obs_auto_tune; (void)g_obs_mag_step; (void)g_obs_sll_step; (void)g_obs_debug; } static void tiny_obs_shutdown(void) { if (!g_obs_started) return; g_obs_stop = 1; pthread_join(g_obs_thread, NULL); g_obs_started = 0; g_obs_enable = 0; } // Tiny diet (memory-tight) controls // Event logging options: default minimal (no timestamp, no thread id) static int g_int_event_ts = 0; // HAKMEM_INT_EVENT_TS=1 to include timestamp static unsigned g_int_sample_mask = 0; // HAKMEM_INT_SAMPLE=(N) → mask=(1<