#include "hakmem_super_registry.h" #include "hakmem_tiny_superslab.h" #include "box/ss_allocation_box.h" // For superslab_allocate() declaration #include "box/ss_addr_map_box.h" // Phase 9-1: SuperSlab address map #include "box/ss_cold_start_box.inc.h" // Phase 11+: Cold Start prewarm defaults #include "hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls) #include #include #include #include // munmap for incompatible SuperSlab eviction // Global registry storage (allocated via SuperRegBox) static SuperRegEntry* reg_entries(void) { return super_reg_entries(); } pthread_mutex_t g_super_reg_lock = PTHREAD_MUTEX_INITIALIZER; int g_super_reg_initialized = 0; // Per-class registry storage (Phase 6: Registry Optimization) int g_super_reg_class_size[TINY_NUM_CLASSES]; // Phase 9: Lazy Deallocation - LRU Cache Storage SuperSlabLRUCache g_ss_lru_cache = {0}; static int g_ss_lru_initialized = 0; // Phase 11: Prewarm bypass flag (disable LRU pop during prewarm) static _Atomic int g_ss_prewarm_bypass = 0; // Initialize registry (call once at startup) void hak_super_registry_init(void) { if (g_super_reg_initialized) return; super_reg_init(NULL, NULL); SuperRegEntry* entries = reg_entries(); int reg_cap = super_reg_effective_size(); if (!entries) { fprintf(stderr, "[SUPER_REG] init failed: no registry entries\n"); abort(); } // Zero-initialize all entries (hash table) memset(entries, 0, (size_t)reg_cap * sizeof(SuperRegEntry)); // Zero-initialize per-class registry (Phase 6: Registry Optimization) SuperSlab** by_class = super_reg_by_class_slots(); int stride = super_reg_by_class_stride(); if (by_class && stride > 0) { memset(by_class, 0, (size_t)TINY_NUM_CLASSES * (size_t)stride * sizeof(SuperSlab*)); } memset(g_super_reg_class_size, 0, sizeof(g_super_reg_class_size)); // Memory fence to ensure initialization is visible to all threads atomic_thread_fence(memory_order_release); g_super_reg_initialized = 1; } // Register SuperSlab (mutex-protected) // CRITICAL: Call AFTER SuperSlab is fully initialized // Publish order: ss init → release fence → base write // Phase 8.3: ACE - lg_size aware registration // Phase 6: Registry Optimization - Also add to per-class registry for fast refill scan int hak_super_register(uintptr_t base, SuperSlab* ss) { if (!g_super_reg_initialized) { hak_super_registry_init(); } pthread_mutex_lock(&g_super_reg_lock); int lg = ss->lg_size; // Phase 8.3: Get lg_size from SuperSlab // Priority-2: Use cached ENV (eliminate debug syscall overhead) #if !HAKMEM_BUILD_RELEASE int dbg = HAK_ENV_SUPER_REG_DEBUG(); #else const int dbg = 0; #endif SuperRegEntry* entries = reg_entries(); if (!entries) { pthread_mutex_unlock(&g_super_reg_lock); return 0; } int h = hak_super_hash(base, lg); const int mask = super_reg_effective_mask(); const int probe_limit = super_reg_effective_size() > SUPER_MAX_PROBE ? SUPER_MAX_PROBE : super_reg_effective_size(); // Step 1: Register in hash table (for address → SuperSlab lookup) int hash_registered = 0; for (int i = 0; i < probe_limit; i++) { SuperRegEntry* e = &entries[(h + i) & mask]; if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) { // Found empty slot // Step 1: Write SuperSlab pointer and lg_size (atomic for MT-safety) atomic_store_explicit(&e->ss, ss, memory_order_release); e->lg_size = lg; // Phase 8.3: Store lg_size for fast lookup // Step 2: Release fence (ensures ss/lg_size write is visible before base) atomic_thread_fence(memory_order_release); // Step 3: Publish base address (makes entry visible to readers) atomic_store_explicit(&e->base, base, memory_order_release); hash_registered = 1; if (dbg == 1) { fprintf(stderr, "[SUPER_REG] register base=%p lg=%d slot=%d magic=%llx\n", (void*)base, lg, (h + i) & mask, (unsigned long long)ss->magic); } break; } if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) { // Already registered (duplicate registration) hash_registered = 1; break; } } if (!hash_registered) { // Hash table full (probing limit reached) pthread_mutex_unlock(&g_super_reg_lock); fprintf(stderr, "HAKMEM: SuperSlab registry full! Increase SUPER_REG_SIZE\n"); return 0; } // Phase 12: per-class registry not keyed by ss->size_class anymore. // Keep existing global hash registration only. // Phase 9-1: Also register in new hash table (for optimized lookup) ss_map_insert(&g_ss_addr_map, (void*)base, ss); pthread_mutex_unlock(&g_super_reg_lock); return 1; } // Unregister SuperSlab (mutex-protected) // CRITICAL: Call BEFORE munmap to prevent reader segfault // Unpublish order: base = 0 (release) → munmap outside this function // Phase 8.3: ACE - Try both lg_sizes (we don't know which one was used) // Phase 6: Registry Optimization - Also remove from per-class registry void hak_super_unregister(uintptr_t base) { #if !HAKMEM_BUILD_RELEASE static int dbg_once = -1; // shared with register path for debug toggle #else static const int dbg_once = 0; #endif (void)dbg_once; if (!g_super_reg_initialized) return; pthread_mutex_lock(&g_super_reg_lock); // Step 1: Find and remove from hash table SuperSlab* ss = NULL; // Save SuperSlab pointer for per-class removal SuperRegEntry* entries = reg_entries(); if (!entries) { pthread_mutex_unlock(&g_super_reg_lock); return; } for (int lg = 20; lg <= 21; lg++) { int h = hak_super_hash(base, lg); const int mask = super_reg_effective_mask(); const int probe_limit = super_reg_effective_size() > SUPER_MAX_PROBE ? SUPER_MAX_PROBE : super_reg_effective_size(); // Linear probing to find matching entry for (int i = 0; i < probe_limit; i++) { SuperRegEntry* e = &entries[(h + i) & mask]; if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) { // Found entry to remove // Save SuperSlab pointer BEFORE clearing (for per-class removal) ss = atomic_load_explicit(&e->ss, memory_order_acquire); // Step 1: Clear SuperSlab pointer (atomic, prevents TOCTOU race) atomic_store_explicit(&e->ss, NULL, memory_order_release); // Step 2: Unpublish base (makes entry invisible to readers) atomic_store_explicit(&e->base, 0, memory_order_release); // Step 3: Clear lg_size (optional cleanup) e->lg_size = 0; #if !HAKMEM_BUILD_RELEASE // Priority-2: Use cached ENV (eliminate lazy-init overhead) if (__builtin_expect(dbg_once == -1, 0)) { dbg_once = HAK_ENV_SUPER_REG_DEBUG(); } if (dbg_once == 1) { fprintf(stderr, "[SUPER_REG] unregister base=%p\n", (void*)base); } #endif // Found in hash table, continue to per-class removal goto hash_removed; } if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) { // Not found in this lg_size, try next break; } } } hash_removed: // Step 2: Remove from per-class registry (Phase 6: Registry Optimization) if (ss && ss->magic == SUPERSLAB_MAGIC) { // Phase 12: per-class registry no longer keyed; no per-class removal required. } // Phase 9-1: Also remove from new hash table ss_map_remove(&g_ss_addr_map, (void*)base); pthread_mutex_unlock(&g_super_reg_lock); // Not found is not an error (could be duplicate unregister) } // ============================================================================ // Phase 9: Lazy Deallocation - LRU Cache Implementation // ============================================================================ // hak_now_ns() is defined in superslab/superslab_inline.h - use that #include // For munmap // Initialize LRU cache (called once at startup) void hak_ss_lru_init(void) { if (g_ss_lru_initialized) return; pthread_mutex_lock(&g_super_reg_lock); if (g_ss_lru_initialized) { pthread_mutex_unlock(&g_super_reg_lock); return; } // Priority-2: Use cached ENV (eliminate config syscall overhead) g_ss_lru_cache.max_cached = (uint32_t)HAK_ENV_SUPERSLAB_MAX_CACHED(); g_ss_lru_cache.max_memory_mb = (uint64_t)HAK_ENV_SUPERSLAB_MAX_MEMORY_MB(); uint32_t ttl_sec = (uint32_t)HAK_ENV_SUPERSLAB_TTL_SEC(); g_ss_lru_cache.ttl_ns = (uint64_t)ttl_sec * 1000000000ULL; g_ss_lru_cache.lru_head = NULL; g_ss_lru_cache.lru_tail = NULL; g_ss_lru_cache.total_count = 0; g_ss_lru_cache.total_memory_mb = 0; g_ss_lru_cache.generation = 0; g_ss_lru_initialized = 1; pthread_mutex_unlock(&g_super_reg_lock); #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[SS_LRU_INIT] max_cached=%u max_memory_mb=%llu ttl_sec=%u\n", g_ss_lru_cache.max_cached, (unsigned long long)g_ss_lru_cache.max_memory_mb, ttl_sec); #endif } // Remove SuperSlab from LRU list (does NOT free memory) static void ss_lru_remove(SuperSlab* ss) { if (!ss) return; if (ss->lru_prev) { ss->lru_prev->lru_next = ss->lru_next; } else { g_ss_lru_cache.lru_head = ss->lru_next; } if (ss->lru_next) { ss->lru_next->lru_prev = ss->lru_prev; } else { g_ss_lru_cache.lru_tail = ss->lru_prev; } ss->lru_prev = NULL; ss->lru_next = NULL; } // Insert SuperSlab at head of LRU list (most recently used) static void ss_lru_insert_head(SuperSlab* ss) { if (!ss) return; ss->lru_next = g_ss_lru_cache.lru_head; ss->lru_prev = NULL; if (g_ss_lru_cache.lru_head) { g_ss_lru_cache.lru_head->lru_prev = ss; } else { g_ss_lru_cache.lru_tail = ss; } g_ss_lru_cache.lru_head = ss; } // Mark SuperSlab as recently used (move to head) void hak_ss_lru_touch(SuperSlab* ss) { if (!ss || !g_ss_lru_initialized) return; pthread_mutex_lock(&g_super_reg_lock); ss->last_used_ns = hak_now_ns(); // If already in list, remove and re-insert at head if (ss->lru_prev || ss->lru_next || g_ss_lru_cache.lru_head == ss) { ss_lru_remove(ss); ss_lru_insert_head(ss); } pthread_mutex_unlock(&g_super_reg_lock); } // Evict one SuperSlab from tail (oldest) // Returns: 1 if evicted, 0 if cache is empty static int ss_lru_evict_one(void) { // Priority-2: Use cached ENV (eliminate lazy-init static overhead) #if !HAKMEM_BUILD_RELEASE static int dbg = -1; if (__builtin_expect(dbg == -1, 0)) { dbg = HAK_ENV_SS_LRU_DEBUG(); } #else static const int dbg = 0; #endif SuperSlab* victim = g_ss_lru_cache.lru_tail; if (!victim) return 0; // Safety guard: if the tail SuperSlab is no longer registered in the // global registry, its memory may already have been unmapped by another // path. In that case, dereferencing victim (or its lru_prev/next) is // unsafe. Treat this as a stale LRU entry and conservatively reset the // cache to an empty state instead of evicting. // // NOTE: hak_super_lookup() only consults the registry / address map and // never dereferences the SuperSlab pointer itself, so this check is safe // even if victim has been munmapped. if (hak_super_lookup((void*)victim) == NULL) { #if !HAKMEM_BUILD_RELEASE static int stale_log_count = 0; if (stale_log_count < 4) { fprintf(stderr, "[SS_LRU_STALE_TAIL] victim=%p not in registry; resetting LRU cache\n", (void*)victim); stale_log_count++; } #endif g_ss_lru_cache.lru_head = NULL; g_ss_lru_cache.lru_tail = NULL; g_ss_lru_cache.total_count = 0; g_ss_lru_cache.total_memory_mb = 0; return 0; } // Remove from LRU list ss_lru_remove(victim); g_ss_lru_cache.total_count--; size_t ss_size = (size_t)1 << victim->lg_size; g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024)); // Unregister and free uintptr_t base = (uintptr_t)victim; (void)base; // Debug logging for LRU EVICT if (dbg == 1) { fprintf(stderr, "[LRU_EVICT] ss=%p size=%zu KB (freed)\n", (void*)victim, ss_size / 1024); } // Already unregistered when added to cache, just munmap victim->magic = 0; munmap(victim, ss_size); #if !HAKMEM_BUILD_RELEASE static int evict_log_count = 0; if (evict_log_count < 10) { fprintf(stderr, "[SS_LRU_EVICT] ss=%p size=%zu (cache_count=%u)\n", victim, ss_size, g_ss_lru_cache.total_count); evict_log_count++; } #endif return 1; } // Evict old SuperSlabs based on policy void hak_ss_lru_evict(void) { if (!g_ss_lru_initialized) return; pthread_mutex_lock(&g_super_reg_lock); uint64_t now = hak_now_ns(); // Policy 1: Evict until count <= max_cached while (g_ss_lru_cache.total_count > g_ss_lru_cache.max_cached) { if (!ss_lru_evict_one()) break; } // Policy 2: Evict until memory <= max_memory_mb while (g_ss_lru_cache.total_memory_mb > g_ss_lru_cache.max_memory_mb) { if (!ss_lru_evict_one()) break; } // Policy 3: Evict expired SuperSlabs (TTL) SuperSlab* curr = g_ss_lru_cache.lru_tail; while (curr) { SuperSlab* prev = curr->lru_prev; uint64_t age = now - curr->last_used_ns; if (age > g_ss_lru_cache.ttl_ns) { ss_lru_remove(curr); g_ss_lru_cache.total_count--; size_t ss_size = (size_t)1 << curr->lg_size; g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024)); curr->magic = 0; munmap(curr, ss_size); } curr = prev; } pthread_mutex_unlock(&g_super_reg_lock); } // Try to reuse a cached SuperSlab SuperSlab* hak_ss_lru_pop(uint8_t size_class) { if (!g_ss_lru_initialized) { hak_ss_lru_init(); } // Phase 11: Bypass LRU cache during prewarm if (atomic_load_explicit(&g_ss_prewarm_bypass, memory_order_acquire)) { return NULL; } // Priority-2: Use cached ENV (eliminate lazy-init TLS overhead) #if !HAKMEM_BUILD_RELEASE static __thread int dbg = -1; if (__builtin_expect(dbg == -1, 0)) { dbg = HAK_ENV_SS_LRU_DEBUG(); } #else static const int dbg = 0; #endif pthread_mutex_lock(&g_super_reg_lock); // Find a compatible SuperSlab in cache (stride must match current config) SuperSlab* curr = g_ss_lru_cache.lru_head; extern const size_t g_tiny_class_sizes[]; size_t expected_stride = g_tiny_class_sizes[size_class]; while (curr) { // Validate: Check if cached SuperSlab slabs match current stride // This prevents reusing old 1024B SuperSlabs for new 2048B C7 allocations int is_compatible = 1; // Scan active slabs for stride mismatch int cap = ss_slabs_capacity(curr); for (int i = 0; i < cap; i++) { if (curr->slab_bitmap & (1u << i)) { TinySlabMeta* meta = &curr->slabs[i]; if (meta->capacity > 0) { // Calculate implied stride from slab geometry // Slab 0: 63488B usable, Others: 65536B usable size_t slab_usable = (i == 0) ? 63488 : 65536; size_t implied_stride = slab_usable / meta->capacity; // Stride mismatch detected if (implied_stride != expected_stride) { is_compatible = 0; #if !HAKMEM_BUILD_RELEASE static _Atomic uint32_t g_incomp_log = 0; uint32_t n = atomic_fetch_add(&g_incomp_log, 1); if (n < 8) { fprintf(stderr, "[LRU_INCOMPATIBLE] class=%d ss=%p slab=%d expect_stride=%zu implied=%zu (evicting)\n", size_class, (void*)curr, i, expected_stride, implied_stride); } #endif break; } } } } if (is_compatible) { // Compatible - reuse this SuperSlab ss_lru_remove(curr); g_ss_lru_cache.total_count--; size_t ss_size = (size_t)1 << curr->lg_size; g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024)); uint32_t cache_count_after = g_ss_lru_cache.total_count; pthread_mutex_unlock(&g_super_reg_lock); // Debug logging for LRU POP (hit) if (dbg == 1) { fprintf(stderr, "[LRU_POP] class=%d ss=%p (hit) (cache_size=%u/%u)\n", size_class, (void*)curr, cache_count_after, g_ss_lru_cache.max_cached); } #if !HAKMEM_BUILD_RELEASE static int pop_log_count = 0; if (pop_log_count < 10) { fprintf(stderr, "[SS_LRU_POP] Reusing ss=%p size=%zu (cache_count=%u)\n", curr, ss_size, cache_count_after); pop_log_count++; } #endif // Re-initialize SuperSlab (magic, timestamps, etc.) curr->magic = SUPERSLAB_MAGIC; curr->last_used_ns = hak_now_ns(); curr->lru_prev = NULL; curr->lru_next = NULL; // ROOT CAUSE FIX: Re-register in global registry (idempotent) // Without this, hak_super_lookup() fails in free() path hak_super_register((uintptr_t)curr, curr); return curr; } // Incompatible SuperSlab - evict immediately SuperSlab* next = curr->lru_next; ss_lru_remove(curr); g_ss_lru_cache.total_count--; size_t ss_size = (size_t)1 << curr->lg_size; g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024)); // Track evictions for observability static _Atomic uint64_t g_incompatible_evictions = 0; atomic_fetch_add(&g_incompatible_evictions, 1); // Release memory munmap(curr, ss_size); curr = next; } uint32_t cache_count_miss = g_ss_lru_cache.total_count; pthread_mutex_unlock(&g_super_reg_lock); // Debug logging for LRU POP (miss) if (dbg == 1) { fprintf(stderr, "[LRU_POP] class=%d (miss) (cache_size=%u/%u)\n", size_class, cache_count_miss, g_ss_lru_cache.max_cached); } return NULL; // No matching SuperSlab in cache } // Add SuperSlab to LRU cache int hak_ss_lru_push(SuperSlab* ss) { if (!ss || !g_ss_lru_initialized) { hak_ss_lru_init(); } // Priority-2: Use cached ENV (eliminate lazy-init TLS overhead) #if !HAKMEM_BUILD_RELEASE static __thread int dbg = -1; if (__builtin_expect(dbg == -1, 0)) { dbg = HAK_ENV_SS_LRU_DEBUG(); } #else static const int dbg = 0; #endif pthread_mutex_lock(&g_super_reg_lock); // Check if we should cache or evict immediately size_t ss_size = (size_t)1 << ss->lg_size; uint64_t ss_mb = ss_size / (1024 * 1024); // If adding this would exceed limits, evict first while (g_ss_lru_cache.total_count >= g_ss_lru_cache.max_cached || g_ss_lru_cache.total_memory_mb + ss_mb > g_ss_lru_cache.max_memory_mb) { if (!ss_lru_evict_one()) { // Cache is empty but still can't fit - don't cache pthread_mutex_unlock(&g_super_reg_lock); return 0; } } // Add to cache ss->last_used_ns = hak_now_ns(); ss->generation = g_ss_lru_cache.generation++; ss_lru_insert_head(ss); g_ss_lru_cache.total_count++; g_ss_lru_cache.total_memory_mb += ss_mb; uint32_t cache_count_after = g_ss_lru_cache.total_count; pthread_mutex_unlock(&g_super_reg_lock); // Debug logging for LRU PUSH if (dbg == 1) { fprintf(stderr, "[LRU_PUSH] ss=%p size=%zu KB (cache_size=%u/%u)\n", (void*)ss, ss_size / 1024, cache_count_after, g_ss_lru_cache.max_cached); } #if !HAKMEM_BUILD_RELEASE static int push_log_count = 0; if (push_log_count < 10) { fprintf(stderr, "[SS_LRU_PUSH] Cached ss=%p size=%zu (cache_count=%u)\n", ss, ss_size, cache_count_after); push_log_count++; } #endif return 1; } // ============================================================================ // Phase 11: SuperSlab Prewarm - Eliminate mmap/munmap bottleneck // ============================================================================ // Prewarm specific size class with count SuperSlabs void hak_ss_prewarm_class(int size_class, uint32_t count) { if (size_class < 0 || size_class >= TINY_NUM_CLASSES) { fprintf(stderr, "[SS_PREWARM] Invalid size_class=%d (valid: 0-%d)\n", size_class, TINY_NUM_CLASSES - 1); return; } // Priority-2: Use cached ENV (eliminate lazy-init static overhead) #if !HAKMEM_BUILD_RELEASE static int dbg = -1; if (__builtin_expect(dbg == -1, 0)) { dbg = HAK_ENV_SS_PREWARM_DEBUG(); } #else static const int dbg = 0; #endif // Ensure LRU cache is initialized if (!g_ss_lru_initialized) { hak_ss_lru_init(); } // Phase 11+: Use static array to avoid malloc() during init (causes recursion) // Cap at 512 as defined in SS_COLD_START_MAX_COUNT #define SS_PREWARM_MAX_BATCH 512 static SuperSlab* slabs[SS_PREWARM_MAX_BATCH]; if (count > SS_PREWARM_MAX_BATCH) { count = SS_PREWARM_MAX_BATCH; } // Enable prewarm bypass to prevent LRU cache from being used during allocation atomic_store_explicit(&g_ss_prewarm_bypass, 1, memory_order_release); uint32_t allocated = 0; for (uint32_t i = 0; i < count; i++) { // Allocate a SuperSlab for this class SuperSlab* ss = superslab_allocate((uint8_t)size_class); if (!ss) { break; // Stop on OOM } slabs[allocated++] = ss; } // Disable prewarm bypass atomic_store_explicit(&g_ss_prewarm_bypass, 0, memory_order_release); // Now push all allocated SuperSlabs to LRU cache uint32_t cached = 0; for (uint32_t i = 0; i < allocated; i++) { int pushed = hak_ss_lru_push(slabs[i]); if (pushed) { cached++; } else { // LRU cache full - free remaining SuperSlabs for (uint32_t j = i; j < allocated; j++) { superslab_free(slabs[j]); } break; } } // Note: slabs is static array, no free() needed // Debug logging for PREWARM if (dbg == 1) { fprintf(stderr, "[PREWARM] Class %d: allocated=%u cached=%u\n", size_class, allocated, cached); } #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[SS_PREWARM] Class %d: allocated=%u cached=%u\n", size_class, allocated, cached); #else (void)cached; // Suppress unused warning #endif } // Prewarm all classes (counts[i] = number of SuperSlabs for class i) void hak_ss_prewarm_all(const uint32_t counts[TINY_NUM_CLASSES]) { if (!counts) return; for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { if (counts[cls] > 0) { hak_ss_prewarm_class(cls, counts[cls]); } } } // Prewarm: Allocate SuperSlabs at startup and add to LRU cache // Phase 11+: Cold Start Box enables prewarm by default (1 SuperSlab/class) void hak_ss_prewarm_init(void) { // Priority-2: Use cached ENV (eliminate lazy-init static overhead) #if !HAKMEM_BUILD_RELEASE static int dbg = -1; if (__builtin_expect(dbg == -1, 0)) { dbg = HAK_ENV_SS_PREWARM_DEBUG(); } #else static const int dbg = 0; #endif // Phase 11+: Get default from Cold Start Box (enables prewarm by default) // Can be disabled via HAKMEM_SS_PREWARM_DISABLE=1 or HAKMEM_SS_PREWARM_COUNT=0 int cold_start_count = ss_cold_start_get_count(); ss_cold_start_log_config(); // Log configuration for diagnostics if (cold_start_count == 0) { // Prewarm explicitly disabled return; } // Priority-2: Use cached ENV (eliminate legacy config syscall overhead) // Check for legacy ENV override (HAKMEM_PREWARM_SUPERSLABS) // This takes precedence over Cold Start Box default int env_val = HAK_ENV_PREWARM_SUPERSLABS(); long global = (env_val != 0) ? env_val : cold_start_count; // Default from Cold Start Box if (env_val != 0) { // Legacy ENV override active global = env_val; if (global == 0) { // Legacy disable via HAKMEM_PREWARM_SUPERSLABS=0 return; } } // Cap at reasonable limit (avoid OOM on typo like "10000") if (global > 512) { fprintf(stderr, "[SS_PREWARM] WARNING: Capping prewarm count from %ld to 512 per class\n", global); global = 512; } uint32_t prewarm_count = (uint32_t)global; // Expand LRU cache capacity to hold prewarmed SuperSlabs uint32_t needed = prewarm_count * TINY_NUM_CLASSES; pthread_mutex_lock(&g_super_reg_lock); if (needed > g_ss_lru_cache.max_cached) { g_ss_lru_cache.max_cached = needed; // Expand memory limit (1 SuperSlab = 1MB or 2MB) // Conservative estimate: 2MB per SuperSlab uint64_t needed_mb = (uint64_t)needed * 2; if (needed_mb > g_ss_lru_cache.max_memory_mb) { g_ss_lru_cache.max_memory_mb = needed_mb; } #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[SS_PREWARM] Expanded LRU cache: max_cached=%u max_memory_mb=%llu\n", g_ss_lru_cache.max_cached, (unsigned long long)g_ss_lru_cache.max_memory_mb); #endif } pthread_mutex_unlock(&g_super_reg_lock); // Prewarm all classes uniformly uint32_t counts[TINY_NUM_CLASSES]; for (int i = 0; i < TINY_NUM_CLASSES; i++) { counts[i] = prewarm_count; } // Debug logging for PREWARM initialization if (dbg == 1) { fprintf(stderr, "[PREWARM] Allocating %u SuperSlabs for classes 0-%d (total=%u)\n", prewarm_count, TINY_NUM_CLASSES - 1, needed); } #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[SS_PREWARM] Starting prewarm: %u SuperSlabs per class (%u total)\n", prewarm_count, needed); #endif hak_ss_prewarm_all(counts); // Debug logging for PREWARM completion if (dbg == 1) { fprintf(stderr, "[PREWARM] Complete: %u SuperSlabs cached\n", g_ss_lru_cache.total_count); } #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[SS_PREWARM] Prewarm complete (cache_count=%u)\n", g_ss_lru_cache.total_count); #endif } // Debug: Get registry statistics void hak_super_registry_stats(SuperRegStats* stats) { if (!stats) return; int eff_size = super_reg_effective_size(); int eff_mask = super_reg_effective_mask(); SuperRegEntry* reg = reg_entries(); stats->total_slots = eff_size; stats->used_slots = 0; stats->max_probe_depth = 0; if (!reg || eff_size <= 0) { return; } pthread_mutex_lock(&g_super_reg_lock); // Count used slots for (int i = 0; i < eff_size; i++) { if (atomic_load_explicit(®[i].base, memory_order_acquire) != 0) { stats->used_slots++; } } // Calculate max probe depth for (int i = 0; i < eff_size; i++) { if (atomic_load_explicit(®[i].base, memory_order_acquire) != 0) { uintptr_t base = atomic_load_explicit(®[i].base, memory_order_acquire); int lg = reg[i].lg_size; // Phase 8.3: Use stored lg_size int h = hak_super_hash(base, lg); // Find actual probe depth for this entry for (int j = 0; j < SUPER_MAX_PROBE; j++) { int idx = (h + j) & eff_mask; if (atomic_load_explicit(®[idx].base, memory_order_acquire) == base && reg[idx].lg_size == lg) { if (j > stats->max_probe_depth) { stats->max_probe_depth = j; } break; } } } } pthread_mutex_unlock(&g_super_reg_lock); }