diff --git a/core/box/tiny_front_hot_box.h b/core/box/tiny_front_hot_box.h index 0e6220d5..aca1ee72 100644 --- a/core/box/tiny_front_hot_box.h +++ b/core/box/tiny_front_hot_box.h @@ -50,13 +50,13 @@ // Debug Metrics (Zero Overhead in Release) // ============================================================================ -#if !HAKMEM_BUILD_RELEASE -// Increment cache hit counter (debug only) +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED +// Increment cache hit counter (debug/observe only; zero overhead when compiled-out) #define TINY_HOT_METRICS_HIT(class_idx) \ do { extern __thread uint64_t g_unified_cache_hit[]; \ g_unified_cache_hit[class_idx]++; } while(0) -// Increment cache miss counter (debug only) +// Increment cache miss counter (debug/observe only; zero overhead when compiled-out) #define TINY_HOT_METRICS_MISS(class_idx) \ do { extern __thread uint64_t g_unified_cache_miss[]; \ g_unified_cache_miss[class_idx]++; } while(0) diff --git a/core/front/tiny_unified_cache.c b/core/front/tiny_unified_cache.c index 86574d02..527c11ef 100644 --- a/core/front/tiny_unified_cache.c +++ b/core/front/tiny_unified_cache.c @@ -97,7 +97,7 @@ __thread TinyWarmPool g_tiny_warm_pool[TINY_NUM_CLASSES] = {0}; // Metrics (Phase 23, optional for debugging) // ============================================================================ -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED __thread uint64_t g_unified_cache_hit[TINY_NUM_CLASSES] = {0}; __thread uint64_t g_unified_cache_miss[TINY_NUM_CLASSES] = {0}; __thread uint64_t g_unified_cache_push[TINY_NUM_CLASSES] = {0}; @@ -262,7 +262,7 @@ void unified_cache_shutdown(void) { void unified_cache_print_stats(void) { if (!unified_cache_enabled()) return; -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED fprintf(stderr, "\n[Unified-STATS] Unified Cache Metrics:\n"); for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { @@ -296,6 +296,11 @@ void unified_cache_print_stats(void) { #endif } +__attribute__((destructor)) +static void unified_cache_auto_stats(void) { + unified_cache_print_stats(); +} + // ============================================================================ // Warm Pool Stats (always compiled, ENV-gated at runtime) // ============================================================================ diff --git a/core/front/tiny_unified_cache.h b/core/front/tiny_unified_cache.h index 493e8679..65adb40f 100644 --- a/core/front/tiny_unified_cache.h +++ b/core/front/tiny_unified_cache.h @@ -92,7 +92,7 @@ extern __thread TinyUnifiedCache g_unified_cache[TINY_NUM_CLASSES]; // Metrics (Phase 23, optional for debugging) // ============================================================================ -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED extern __thread uint64_t g_unified_cache_hit[TINY_NUM_CLASSES]; // Alloc hits extern __thread uint64_t g_unified_cache_miss[TINY_NUM_CLASSES]; // Alloc misses extern __thread uint64_t g_unified_cache_push[TINY_NUM_CLASSES]; // Free pushes @@ -143,7 +143,7 @@ static inline size_t unified_capacity(int class_idx) { while (pow2 < g_cap[class_idx]) pow2 *= 2; g_cap[class_idx] = pow2; -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED fprintf(stderr, "[Unified-INIT] C%d capacity = %zu (power of 2)\n", class_idx, g_cap[class_idx]); fflush(stderr); #endif @@ -197,7 +197,7 @@ static inline hak_base_ptr_t unified_cache_pop(int class_idx) { // Empty check if (__builtin_expect(cache->head == cache->tail, 0)) { -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED g_unified_cache_miss[class_idx]++; #endif return HAK_BASE_FROM_RAW(NULL); // Empty @@ -207,7 +207,7 @@ static inline hak_base_ptr_t unified_cache_pop(int class_idx) { void* base = cache->slots[cache->head]; // 1 cache miss (array access) cache->head = (cache->head + 1) & cache->mask; // Fast modulo (power of 2) -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED g_unified_cache_hit[class_idx]++; #endif @@ -251,7 +251,7 @@ static inline int unified_cache_push(int class_idx, hak_base_ptr_t base) { // Full check (leave 1 slot empty to distinguish full/empty) if (__builtin_expect(next_tail == cache->head, 0)) { -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED g_unified_cache_full[class_idx]++; #endif return 0; // Full @@ -261,7 +261,7 @@ static inline int unified_cache_push(int class_idx, hak_base_ptr_t base) { cache->slots[cache->tail] = base_raw; // 1 cache miss (array write) cache->tail = next_tail; -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED g_unified_cache_push[class_idx]++; #endif @@ -300,7 +300,7 @@ static inline hak_base_ptr_t unified_cache_pop_or_refill(int class_idx) { // Phase 22: Compile-out when disabled (default OFF) void* tcache_base = tiny_tcache_try_pop(class_idx); if (tcache_base != NULL) { -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED g_unified_cache_hit[class_idx]++; #endif #if HAKMEM_TINY_UNIFIED_CACHE_MEASURE_COMPILED @@ -320,7 +320,7 @@ static inline hak_base_ptr_t unified_cache_pop_or_refill(int class_idx) { if (__builtin_expect(cache->head != cache->tail, 1)) { void* base = cache->slots[cache->head]; // 1 cache miss (array access) cache->head = (cache->head + 1) & cache->mask; -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED g_unified_cache_hit[class_idx]++; #endif #if HAKMEM_TINY_UNIFIED_CACHE_MEASURE_COMPILED @@ -337,7 +337,7 @@ static inline hak_base_ptr_t unified_cache_pop_or_refill(int class_idx) { } // Cache miss → Batch refill from SuperSlab -#if !HAKMEM_BUILD_RELEASE +#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED g_unified_cache_miss[class_idx]++; #endif return unified_cache_refill(class_idx); // Refill + return first block (BASE) diff --git a/core/front/tiny_warm_pool.h b/core/front/tiny_warm_pool.h index c9b21a48..4e12b73e 100644 --- a/core/front/tiny_warm_pool.h +++ b/core/front/tiny_warm_pool.h @@ -38,12 +38,11 @@ // // ============================================================================ -// Maximum warm SuperSlabs per thread per class (tunable) -// Trade-off: Working set size vs warm pool effectiveness -// - 4: Original (90% hit rate expected, but broken implementation - hardcoded prefill threshold) -// - 12: Optimized capacity with matching prefill threshold (Phase 1) -// - Higher values: More memory but better locality -#define TINY_WARM_POOL_MAX_PER_CLASS 12 +// Warm pool sizing policy: +// - DEFAULT is the value used when ENV is unset (keeps TLS footprint reasonable). +// - MAX is the absolute cap used for array sizing and clamping (allows safe ENV sweeps). +#define TINY_WARM_POOL_DEFAULT_PER_CLASS 12 +#define TINY_WARM_POOL_MAX_PER_CLASS 32 typedef struct { SuperSlab* slabs[TINY_WARM_POOL_MAX_PER_CLASS]; @@ -106,12 +105,12 @@ static inline int warm_pool_max_per_class(void) { const char* env = getenv("HAKMEM_WARM_POOL_SIZE"); if (env && *env) { int v = atoi(env); - // Clamp to valid range [1, 12] + // Clamp to valid range [1, TINY_WARM_POOL_MAX_PER_CLASS] if (v < 1) v = 1; - if (v > 12) v = 12; + if (v > TINY_WARM_POOL_MAX_PER_CLASS) v = TINY_WARM_POOL_MAX_PER_CLASS; g_max = v; } else { - g_max = TINY_WARM_POOL_MAX_PER_CLASS; + g_max = TINY_WARM_POOL_DEFAULT_PER_CLASS; } } return g_max; diff --git a/core/hakmem_shared_pool_acquire.c b/core/hakmem_shared_pool_acquire.c index 21167c22..8a98b7b8 100644 --- a/core/hakmem_shared_pool_acquire.c +++ b/core/hakmem_shared_pool_acquire.c @@ -266,6 +266,9 @@ sp_acquire_from_empty_scan(int class_idx, SuperSlab** ss_out, int* slab_idx_out, SuperSlab* primary_result = NULL; int primary_slab_idx = -1; + // Cache warm pool cap once per acquire call (SSOT: same as unified_cache_refill()). + const int warm_cap = warm_pool_max_per_class(); + for (int i = 0; i < scan_limit; i++) { SuperSlab* ss = super_reg_by_class_at(class_idx, i); if (!(ss && ss->magic == SUPERSLAB_MAGIC)) continue; @@ -274,9 +277,8 @@ sp_acquire_from_empty_scan(int class_idx, SuperSlab** ss_out, int* slab_idx_out, if (ss->empty_count == 0) continue; // No EMPTY slabs in this SS // WARM POOL PREFILL: Add HOT SuperSlabs to warm pool (if not already primary result) - // This is low-cost during registry scan and avoids future expensive scans - // Phase 1: Increase threshold from 4 to 12 to match TINY_WARM_POOL_MAX_PER_CLASS - if (ss != primary_result && tiny_warm_pool_count(class_idx) < 12) { + // This is low-cost during registry scan and avoids future expensive scans. + if (ss != primary_result && tiny_warm_pool_count(class_idx) < warm_cap) { tiny_warm_pool_push(class_idx, ss); // Track prefilled SuperSlabs for metrics g_warm_pool_stats[class_idx].prefilled++;