// warm_pool_prefill_box.h - Warm Pool Prefill Box // Purpose: Secondary prefill optimization - load multiple superlslabs when pool is empty // License: MIT // Date: 2025-12-04 #ifndef HAK_WARM_POOL_PREFILL_BOX_H #define HAK_WARM_POOL_PREFILL_BOX_H #include #include #include #include #include "../hakmem_tiny_config.h" #include "../hakmem_tiny_superslab.h" #include "../tiny_tls.h" #include "../front/tiny_warm_pool.h" #include "../box/warm_pool_stats_box.h" #include "../box/warm_pool_rel_counters_box.h" extern _Atomic uintptr_t g_c7_stage3_magic_ss; static inline int warm_prefill_log_enabled(void) { static int g_warm_log = -1; if (__builtin_expect(g_warm_log == -1, 0)) { const char* e = getenv("HAKMEM_TINY_WARM_LOG"); g_warm_log = (e && *e && *e != '0') ? 1 : 0; } return g_warm_log; } static inline void warm_prefill_log_c7_meta(const char* tag, TinyTLSSlab* tls) { if (!tls || !tls->ss) return; if (!warm_prefill_log_enabled()) return; #if HAKMEM_BUILD_RELEASE static _Atomic uint32_t rel_logs = 0; uint32_t n = atomic_fetch_add_explicit(&rel_logs, 1, memory_order_relaxed); if (n < 4) { TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx]; uintptr_t magic = atomic_load_explicit(&g_c7_stage3_magic_ss, memory_order_relaxed); fprintf(stderr, "[REL_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p magic=%#lx\n", tag, (void*)tls->ss, (unsigned)tls->slab_idx, (unsigned)meta->class_idx, (unsigned)meta->used, (unsigned)meta->capacity, (unsigned)meta->carved, meta->freelist, (unsigned long)magic); } #else static _Atomic uint32_t dbg_logs = 0; uint32_t n = atomic_fetch_add_explicit(&dbg_logs, 1, memory_order_relaxed); if (n < 4) { TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx]; uintptr_t magic = atomic_load_explicit(&g_c7_stage3_magic_ss, memory_order_relaxed); fprintf(stderr, "[DBG_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p magic=%#lx\n", tag, (void*)tls->ss, (unsigned)tls->slab_idx, (unsigned)meta->class_idx, (unsigned)meta->used, (unsigned)meta->capacity, (unsigned)meta->carved, meta->freelist, (unsigned long)magic); } #endif } // Forward declarations extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; extern SuperSlab* superslab_refill(int class_idx); // ============================================================================ // Warm Pool Prefill Policy // ============================================================================ // Prefill budget: How many additional SuperSlabs to load when pool is empty // - If pool is empty, load PREFILL_BUDGET extra slabs to build working set // - This avoids repeated registry scans on rapid cache misses // - Phase 2: Keep at 2 (increasing to 4 caused contention regression -1.5%) #define WARM_POOL_PREFILL_BUDGET 2 // ============================================================================ // Warm Pool Prefill API (Inline for Cold Path) // ============================================================================ // Perform secondary prefill when warm pool becomes empty // Called from unified_cache_refill() cold path when warm_pool_count() == 0 // // Algorithm: // 1. Check if pool is empty // 2. If yes, load PREFILL_BUDGET additional superlslabs via superslab_refill // 3. Push all but the last to warm pool // 4. Return the last one for immediate carving (in tls->ss) // // Returns: 0 on success, -1 if superslab_refill fails // // Performance: Only triggered when pool is empty, cold path cost // static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls, int warm_cap_hint) { #if HAKMEM_BUILD_RELEASE if (class_idx == 7) { warm_pool_rel_c7_prefill_call(); } #endif int budget = (tiny_warm_pool_count(class_idx) == 0) ? WARM_POOL_PREFILL_BUDGET : 1; while (budget > 0) { if (class_idx == 7) { warm_prefill_log_c7_meta("PREFILL_META", tls); } if (!tls->ss) { // Need to load a new SuperSlab if (!superslab_refill(class_idx)) { return -1; // Error: cannot allocate new SuperSlab } tls = &g_tls_slabs[class_idx]; // Reload TLS after refill } // Check SuperSlab validity if (!(tls->ss && tls->ss->magic == SUPERSLAB_MAGIC)) { break; } // C7 safety: prefer only pristine slabs (used=0 carved=0 freelist=NULL) if (class_idx == 7 && warm_prefill_log_enabled()) { TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx]; if (meta->class_idx == 7 && (meta->used > 0 || meta->carved > 0 || meta->freelist != NULL)) { #if HAKMEM_BUILD_RELEASE static _Atomic int rel_c7_skip_logged = 0; if (atomic_load_explicit(&rel_c7_skip_logged, memory_order_relaxed) == 0) { fprintf(stderr, "[REL_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n", (void*)tls->ss, (unsigned)tls->slab_idx, (unsigned)meta->used, (unsigned)meta->capacity, (unsigned)meta->carved, meta->freelist); atomic_store_explicit(&rel_c7_skip_logged, 1, memory_order_relaxed); } #else static __thread int dbg_c7_skip_logged = 0; if (dbg_c7_skip_logged < 4) { fprintf(stderr, "[DBG_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n", (void*)tls->ss, (unsigned)tls->slab_idx, (unsigned)meta->used, (unsigned)meta->capacity, (unsigned)meta->carved, meta->freelist); dbg_c7_skip_logged++; } #endif tls->ss = NULL; // Drop exhausted slab and try another budget--; continue; } } if (budget > 1) { // Prefill mode: push to pool and load another tiny_warm_pool_push_with_cap(class_idx, tls->ss, warm_cap_hint); warm_pool_record_prefilled(class_idx); #if HAKMEM_BUILD_RELEASE if (class_idx == 7) { warm_pool_rel_c7_prefill_slab(); } #else if (class_idx == 7 && warm_prefill_log_enabled()) { static __thread int dbg_c7_prefill_logs = 0; if (dbg_c7_prefill_logs < 8) { TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx]; fprintf(stderr, "[DBG_C7_PREFILL] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n", (void*)tls->ss, (unsigned)tls->slab_idx, (unsigned)meta->used, (unsigned)meta->capacity, (unsigned)meta->carved, meta->freelist); dbg_c7_prefill_logs++; } } #endif tls->ss = NULL; // Force next iteration to refill budget--; } else { // Final slab: keep in TLS for immediate carving budget = 0; } } return 0; // Success } #endif // HAK_WARM_POOL_PREFILL_BOX_H