Restore C7 Warm/TLS carve for release and add policy scaffolding

This commit is contained in:
Moe Charm (CI)
2025-12-06 01:34:04 +09:00
parent d17ec46628
commit 03538055ae
15 changed files with 588 additions and 164 deletions

View File

@ -21,6 +21,8 @@
#include "../box/tiny_page_box.h" // Tiny-Plus Page Box (C5C7 initial hook)
#include "../box/ss_tls_bind_box.h" // Box: TLS Bind (SuperSlab -> TLS binding)
#include "../box/tiny_tls_carve_one_block_box.h" // Box: TLS carve helper (shared)
#include "../box/tiny_class_policy_box.h" // Box: per-class policy (Page/Warm caps)
#include "../box/tiny_class_stats_box.h" // Box: lightweight per-class stats
#include "../box/warm_tls_bind_logger_box.h" // Box: Warm TLS Bind logging (throttled)
#define WARM_POOL_DBG_DEFINE
#include "../box/warm_pool_dbg_box.h" // Box: Warm Pool C7 debug counters
@ -516,6 +518,10 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
tiny_warm_pool_init_once();
TinyUnifiedCache* cache = &g_unified_cache[class_idx];
const TinyClassPolicy* policy = tiny_policy_get(class_idx);
int warm_enabled = policy ? policy->warm_enabled : 0;
int warm_cap = policy ? policy->warm_cap : 0;
int page_enabled = policy ? policy->page_box_enabled : 0;
// ✅ Phase 11+: Ensure cache is initialized (lazy init for cold path)
if (!cache->slots) {
@ -560,7 +566,7 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
// ========== PAGE BOX HOT PATHTiny-Plus 層): Try page box FIRST ==========
// 将来的に C7 専用の page-level freelist 管理をここに統合する。
// いまは stub 実装で常に 0 を返すが、Box 境界としての接続だけ先に行う。
if (tiny_page_box_is_enabled(class_idx)) {
if (page_enabled && tiny_page_box_is_enabled(class_idx)) {
int page_produced = tiny_page_box_refill(class_idx, out, room);
if (page_produced > 0) {
// Store blocks into cache and return first
@ -573,6 +579,7 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
#if !HAKMEM_BUILD_RELEASE
g_unified_cache_miss[class_idx]++;
#endif
tiny_class_stats_on_uc_miss(class_idx);
if (measure) {
uint64_t end_cycles = read_tsc();
@ -593,169 +600,198 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
// ========== WARM POOL HOT PATH: Check warm pool FIRST ==========
// This is the critical optimization - avoid superslab_refill() registry scan
#if !HAKMEM_BUILD_RELEASE
atomic_fetch_add_explicit(&g_dbg_warm_pop_attempts, 1, memory_order_relaxed);
if (class_idx == 7) {
warm_pool_dbg_c7_attempt();
}
#endif
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
atomic_fetch_add_explicit(&g_rel_c7_warm_pop, 1, memory_order_relaxed);
}
#endif
SuperSlab* warm_ss = tiny_warm_pool_pop(class_idx);
if (warm_ss) {
#if !HAKMEM_BUILD_RELEASE
if (warm_enabled) {
if (class_idx == 7) {
warm_pool_dbg_c7_hit();
const TinyClassPolicy* pol = tiny_policy_get(7);
static _Atomic int g_c7_policy_logged = 0;
if (atomic_exchange_explicit(&g_c7_policy_logged, 1, memory_order_acq_rel) == 0) {
fprintf(stderr,
"[C7_POLICY_AT_WARM] page=%u warm=%u cap=%u\n",
pol ? pol->page_box_enabled : 0,
pol ? pol->warm_enabled : 0,
pol ? pol->warm_cap : 0);
}
}
// Debug-only: Warm TLS Bind experiment (C7 only)
#if !HAKMEM_BUILD_RELEASE
atomic_fetch_add_explicit(&g_dbg_warm_pop_attempts, 1, memory_order_relaxed);
if (class_idx == 7) {
int warm_mode = warm_tls_bind_mode_c7();
if (warm_mode >= 1) {
int cap = ss_slabs_capacity(warm_ss);
int slab_idx = -1;
warm_pool_dbg_c7_attempt();
}
#endif
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
atomic_fetch_add_explicit(&g_rel_c7_warm_pop, 1, memory_order_relaxed);
}
#endif
SuperSlab* warm_ss = tiny_warm_pool_pop(class_idx);
if (warm_ss) {
if (class_idx == 7) {
#if !HAKMEM_BUILD_RELEASE
warm_pool_dbg_c7_hit();
#endif
int warm_mode = warm_tls_bind_mode_c7();
if (warm_mode >= 1) {
int cap = ss_slabs_capacity(warm_ss);
int slab_idx = -1;
// Simple heuristic: first slab matching class
for (int i = 0; i < cap; i++) {
if (tiny_get_class_from_ss(warm_ss, i) == class_idx) {
slab_idx = i;
break;
// Simple heuristic: first slab matching class
for (int i = 0; i < cap; i++) {
if (tiny_get_class_from_ss(warm_ss, i) == class_idx) {
slab_idx = i;
break;
}
}
}
if (slab_idx >= 0) {
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
uint32_t tid = (uint32_t)(uintptr_t)pthread_self();
if (ss_tls_bind_one(class_idx, tls, warm_ss, slab_idx, tid)) {
warm_tls_bind_log_success(warm_ss, slab_idx);
if (slab_idx >= 0) {
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
uint32_t tid = (uint32_t)(uintptr_t)pthread_self();
if (ss_tls_bind_one(class_idx, tls, warm_ss, slab_idx, tid)) {
warm_tls_bind_log_success(warm_ss, slab_idx);
// Mode 2: carve a single block via TLS fast path
if (warm_mode == 2) {
warm_pool_dbg_c7_tls_attempt();
TinyTLSCarveOneResult tls_carve =
tiny_tls_carve_one_block(tls, class_idx);
if (tls_carve.block) {
warm_tls_bind_log_tls_carve(warm_ss, slab_idx, tls_carve.block);
warm_pool_dbg_c7_tls_success();
out[0] = tls_carve.block;
produced = 1;
tls_carved = 1;
} else {
warm_tls_bind_log_tls_fail(warm_ss, slab_idx);
warm_pool_dbg_c7_tls_fail();
// Mode 2: carve a single block via TLS fast path
if (warm_mode == 2) {
#if !HAKMEM_BUILD_RELEASE
warm_pool_dbg_c7_tls_attempt();
#endif
TinyTLSCarveOneResult tls_carve =
tiny_tls_carve_one_block(tls, class_idx);
if (tls_carve.block) {
warm_tls_bind_log_tls_carve(warm_ss, slab_idx, tls_carve.block);
#if !HAKMEM_BUILD_RELEASE
warm_pool_dbg_c7_tls_success();
#endif
out[0] = tls_carve.block;
produced = 1;
tls_carved = 1;
} else {
warm_tls_bind_log_tls_fail(warm_ss, slab_idx);
#if !HAKMEM_BUILD_RELEASE
warm_pool_dbg_c7_tls_fail();
#endif
}
}
}
}
}
}
}
atomic_fetch_add_explicit(&g_dbg_warm_pop_hits, 1, memory_order_relaxed);
#endif
// HOT PATH: Warm pool hit, try to carve directly
if (produced == 0) {
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
warm_pool_rel_c7_carve_attempt();
}
#if !HAKMEM_BUILD_RELEASE
atomic_fetch_add_explicit(&g_dbg_warm_pop_hits, 1, memory_order_relaxed);
#endif
produced = slab_carve_from_ss(class_idx, warm_ss, out, room);
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
// HOT PATH: Warm pool hit, try to carve directly
if (produced == 0) {
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
warm_pool_rel_c7_carve_attempt();
}
#endif
produced = slab_carve_from_ss(class_idx, warm_ss, out, room);
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
if (produced > 0) {
warm_pool_rel_c7_carve_success();
} else {
warm_pool_rel_c7_carve_zero();
}
}
#endif
if (produced > 0) {
warm_pool_rel_c7_carve_success();
} else {
warm_pool_rel_c7_carve_zero();
// Update active counter for carved blocks
ss_active_add(warm_ss, (uint32_t)produced);
}
}
#endif
if (produced > 0) {
// Update active counter for carved blocks
ss_active_add(warm_ss, (uint32_t)produced);
}
}
if (produced > 0) {
#if !HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
warm_pool_dbg_c7_carve();
if (tls_carved) {
warm_pool_dbg_c7_uc_miss_tls();
} else {
warm_pool_dbg_c7_uc_miss_warm();
#if !HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
warm_pool_dbg_c7_carve();
if (tls_carved) {
warm_pool_dbg_c7_uc_miss_tls();
} else {
warm_pool_dbg_c7_uc_miss_warm();
}
}
}
#endif
// Success! Return SuperSlab to warm pool for next use
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
atomic_fetch_add_explicit(&g_rel_c7_warm_push, 1, memory_order_relaxed);
}
#endif
tiny_warm_pool_push(class_idx, warm_ss);
#endif
// Success! Return SuperSlab to warm pool for next use
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
atomic_fetch_add_explicit(&g_rel_c7_warm_push, 1, memory_order_relaxed);
}
#endif
tiny_warm_pool_push_with_cap(class_idx, warm_ss, warm_cap);
// Track warm pool hit (always compiled, ENV-gated printing)
warm_pool_record_hit(class_idx);
// Track warm pool hit (always compiled, ENV-gated printing)
warm_pool_record_hit(class_idx);
tiny_class_stats_on_warm_hit(class_idx);
// Store blocks into cache and return first
void* first = out[0];
for (int i = 1; i < produced; i++) {
cache->slots[cache->tail] = out[i];
cache->tail = (cache->tail + 1) & cache->mask;
// Store blocks into cache and return first
void* first = out[0];
for (int i = 1; i < produced; i++) {
cache->slots[cache->tail] = out[i];
cache->tail = (cache->tail + 1) & cache->mask;
}
#if !HAKMEM_BUILD_RELEASE
g_unified_cache_miss[class_idx]++;
#endif
tiny_class_stats_on_uc_miss(class_idx);
if (measure) {
uint64_t end_cycles = read_tsc();
uint64_t delta = end_cycles - start_cycles;
atomic_fetch_add_explicit(&g_unified_cache_refill_cycles_global,
delta, memory_order_relaxed);
atomic_fetch_add_explicit(&g_unified_cache_misses_global,
1, memory_order_relaxed);
// Per-class 集計C5C7 の refill コストを可視化)
atomic_fetch_add_explicit(&g_unified_cache_refill_cycles_by_class[class_idx],
delta, memory_order_relaxed);
atomic_fetch_add_explicit(&g_unified_cache_misses_by_class[class_idx],
1, memory_order_relaxed);
}
return HAK_BASE_FROM_RAW(first);
}
// SuperSlab carve failed (produced == 0)
#if !HAKMEM_BUILD_RELEASE
g_unified_cache_miss[class_idx]++;
atomic_fetch_add_explicit(&g_dbg_warm_pop_carve_zero, 1, memory_order_relaxed);
#endif
if (measure) {
uint64_t end_cycles = read_tsc();
uint64_t delta = end_cycles - start_cycles;
atomic_fetch_add_explicit(&g_unified_cache_refill_cycles_global,
delta, memory_order_relaxed);
atomic_fetch_add_explicit(&g_unified_cache_misses_global,
1, memory_order_relaxed);
// Per-class 集計C5C7 の refill コストを可視化)
atomic_fetch_add_explicit(&g_unified_cache_refill_cycles_by_class[class_idx],
delta, memory_order_relaxed);
atomic_fetch_add_explicit(&g_unified_cache_misses_by_class[class_idx],
1, memory_order_relaxed);
// This slab is either exhausted or has no more available capacity
// The statistics counter 'prefilled' tracks how often we try to prefill
if (produced == 0 && tiny_warm_pool_count(class_idx) == 0) {
// Pool is empty and carve failed - prefill would help here
warm_pool_record_prefilled(class_idx);
}
return HAK_BASE_FROM_RAW(first);
} else {
#if !HAKMEM_BUILD_RELEASE
atomic_fetch_add_explicit(&g_dbg_warm_pop_empty, 1, memory_order_relaxed);
#endif
}
// SuperSlab carve failed (produced == 0)
#if !HAKMEM_BUILD_RELEASE
atomic_fetch_add_explicit(&g_dbg_warm_pop_carve_zero, 1, memory_order_relaxed);
#endif
// This slab is either exhausted or has no more available capacity
// The statistics counter 'prefilled' tracks how often we try to prefill
if (produced == 0 && tiny_warm_pool_count(class_idx) == 0) {
// Pool is empty and carve failed - prefill would help here
warm_pool_record_prefilled(class_idx);
}
} else {
#if !HAKMEM_BUILD_RELEASE
atomic_fetch_add_explicit(&g_dbg_warm_pop_empty, 1, memory_order_relaxed);
#endif
// ========== COLD PATH: Warm pool miss, use superslab_refill ==========
// Track warm pool miss (always compiled, ENV-gated printing)
warm_pool_record_miss(class_idx);
}
// ========== COLD PATH: Warm pool miss, use superslab_refill ==========
// Track warm pool miss (always compiled, ENV-gated printing)
warm_pool_record_miss(class_idx);
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
// Step 1: Ensure SuperSlab available via normal refill
// Enhanced: Use Warm Pool Prefill Box for secondary prefill when pool is empty
if (warm_pool_do_prefill(class_idx, tls) < 0) {
return HAK_BASE_FROM_RAW(NULL);
if (warm_enabled) {
if (warm_pool_do_prefill(class_idx, tls, warm_cap) < 0) {
return HAK_BASE_FROM_RAW(NULL);
}
// After prefill: tls->ss has the final slab for carving
tls = &g_tls_slabs[class_idx]; // Reload (already done in prefill box)
} else {
if (!tls->ss) {
if (!superslab_refill(class_idx)) {
return HAK_BASE_FROM_RAW(NULL);
}
tls = &g_tls_slabs[class_idx];
}
}
// After prefill: tls->ss has the final slab for carving
// tls = &g_tls_slabs[class_idx]; // Reload (already done in prefill box)
// Step 2: Direct carve from SuperSlab into local array (bypass TLS SLL!)
TinySlabMeta* m = tls->meta;
@ -844,6 +880,7 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
}
g_unified_cache_miss[class_idx]++;
#endif
tiny_class_stats_on_uc_miss(class_idx);
// Measure refill cycles
if (measure) {