Files
hakmem/core/hakmem_super_registry.c
Moe Charm (CI) 6d40dc7418 Fix: Add missing superslab_allocate() declaration
Root cause identified by Task agent investigation:
- superslab_allocate() called without declaration in 2 files
- Compiler assumes implicit int return type (C99 standard)
- Actual signature returns SuperSlab* (64-bit pointer)
- Pointer truncated to 32-bit int, then sign-extended to 64-bit
- Results in corrupted pointer and segmentation fault

Mechanism of corruption:
1. superslab_allocate() returns 0x00005555eba00000
2. Compiler expects int, reads only %eax: 0xeba00000
3. movslq %eax,%rbp sign-extends with bit 31 set
4. Result: 0xffffffffeba00000 (invalid pointer)
5. Dereferencing causes SEGFAULT

Files fixed:
1. hakmem_tiny_superslab_internal.h - Added box/ss_allocation_box.h
   (fixes superslab_head.c via transitive include)
2. hakmem_super_registry.c - Added box/ss_allocation_box.h

Warnings eliminated:
- "implicit declaration of function 'superslab_allocate'"
- "type of 'superslab_allocate' does not match original declaration"
- "code may be misoptimized unless '-fno-strict-aliasing' is used"

Test results:
- larson_hakmem now runs without segfault ✓
- Multiple test runs confirmed stable ✓
- 2 threads, 4 threads: All passing ✓

Impact:
- CRITICAL severity bug (affects all SuperSlab expansion)
- Intermittent (depends on memory layout ~50% probability)
- Now FIXED completely

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-29 06:22:49 +09:00

770 lines
25 KiB
C

#include "hakmem_super_registry.h"
#include "hakmem_tiny_superslab.h"
#include "box/ss_allocation_box.h" // For superslab_allocate() declaration
#include <string.h>
#include <stdio.h>
#include <sys/mman.h> // munmap for incompatible SuperSlab eviction
// Global registry storage
SuperRegEntry g_super_reg[SUPER_REG_SIZE];
pthread_mutex_t g_super_reg_lock = PTHREAD_MUTEX_INITIALIZER;
int g_super_reg_initialized = 0;
// Per-class registry storage (Phase 6: Registry Optimization)
SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
int g_super_reg_class_size[TINY_NUM_CLASSES];
// Phase 9: Lazy Deallocation - LRU Cache Storage
SuperSlabLRUCache g_ss_lru_cache = {0};
static int g_ss_lru_initialized = 0;
// Phase 11: Prewarm bypass flag (disable LRU pop during prewarm)
static _Atomic int g_ss_prewarm_bypass = 0;
// Initialize registry (call once at startup)
void hak_super_registry_init(void) {
if (g_super_reg_initialized) return;
// Zero-initialize all entries (hash table)
memset(g_super_reg, 0, sizeof(g_super_reg));
// Zero-initialize per-class registry (Phase 6: Registry Optimization)
memset(g_super_reg_by_class, 0, sizeof(g_super_reg_by_class));
memset(g_super_reg_class_size, 0, sizeof(g_super_reg_class_size));
// Memory fence to ensure initialization is visible to all threads
atomic_thread_fence(memory_order_release);
g_super_reg_initialized = 1;
}
// Register SuperSlab (mutex-protected)
// CRITICAL: Call AFTER SuperSlab is fully initialized
// Publish order: ss init → release fence → base write
// Phase 8.3: ACE - lg_size aware registration
// Phase 6: Registry Optimization - Also add to per-class registry for fast refill scan
int hak_super_register(uintptr_t base, SuperSlab* ss) {
if (!g_super_reg_initialized) {
hak_super_registry_init();
}
pthread_mutex_lock(&g_super_reg_lock);
int lg = ss->lg_size; // Phase 8.3: Get lg_size from SuperSlab
#if !HAKMEM_BUILD_RELEASE
// Debug logging (check ENV every time for now - performance not critical during debug)
const char* dbg_env = getenv("HAKMEM_SUPER_REG_DEBUG");
int dbg = (dbg_env && *dbg_env && *dbg_env != '0') ? 1 : 0;
#else
const int dbg = 0;
#endif
int h = hak_super_hash(base, lg);
// Step 1: Register in hash table (for address → SuperSlab lookup)
int hash_registered = 0;
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) {
// Found empty slot
// Step 1: Write SuperSlab pointer and lg_size (atomic for MT-safety)
atomic_store_explicit(&e->ss, ss, memory_order_release);
e->lg_size = lg; // Phase 8.3: Store lg_size for fast lookup
// Step 2: Release fence (ensures ss/lg_size write is visible before base)
atomic_thread_fence(memory_order_release);
// Step 3: Publish base address (makes entry visible to readers)
atomic_store_explicit(&e->base, base, memory_order_release);
hash_registered = 1;
if (dbg == 1) {
fprintf(stderr, "[SUPER_REG] register base=%p lg=%d slot=%d magic=%llx\n",
(void*)base, lg, (h + i) & SUPER_REG_MASK,
(unsigned long long)ss->magic);
}
break;
}
if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) {
// Already registered (duplicate registration)
hash_registered = 1;
break;
}
}
if (!hash_registered) {
// Hash table full (probing limit reached)
pthread_mutex_unlock(&g_super_reg_lock);
fprintf(stderr, "HAKMEM: SuperSlab registry full! Increase SUPER_REG_SIZE\n");
return 0;
}
// Phase 12: per-class registry not keyed by ss->size_class anymore.
// Keep existing global hash registration only.
pthread_mutex_unlock(&g_super_reg_lock);
return 1;
}
// Unregister SuperSlab (mutex-protected)
// CRITICAL: Call BEFORE munmap to prevent reader segfault
// Unpublish order: base = 0 (release) → munmap outside this function
// Phase 8.3: ACE - Try both lg_sizes (we don't know which one was used)
// Phase 6: Registry Optimization - Also remove from per-class registry
void hak_super_unregister(uintptr_t base) {
#if !HAKMEM_BUILD_RELEASE
static int dbg_once = -1; // shared with register path for debug toggle
#else
static const int dbg_once = 0;
#endif
if (!g_super_reg_initialized) return;
pthread_mutex_lock(&g_super_reg_lock);
// Step 1: Find and remove from hash table
SuperSlab* ss = NULL; // Save SuperSlab pointer for per-class removal
for (int lg = 20; lg <= 21; lg++) {
int h = hak_super_hash(base, lg);
// Linear probing to find matching entry
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) {
// Found entry to remove
// Save SuperSlab pointer BEFORE clearing (for per-class removal)
ss = atomic_load_explicit(&e->ss, memory_order_acquire);
// Step 1: Clear SuperSlab pointer (atomic, prevents TOCTOU race)
atomic_store_explicit(&e->ss, NULL, memory_order_release);
// Step 2: Unpublish base (makes entry invisible to readers)
atomic_store_explicit(&e->base, 0, memory_order_release);
// Step 3: Clear lg_size (optional cleanup)
e->lg_size = 0;
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(dbg_once == -1, 0)) {
const char* e = getenv("HAKMEM_SUPER_REG_DEBUG"); dbg_once = (e && *e && *e!='0');
}
if (dbg_once == 1) {
fprintf(stderr, "[SUPER_REG] unregister base=%p\n", (void*)base);
}
#endif
// Found in hash table, continue to per-class removal
goto hash_removed;
}
if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) {
// Not found in this lg_size, try next
break;
}
}
}
hash_removed:
// Step 2: Remove from per-class registry (Phase 6: Registry Optimization)
if (ss && ss->magic == SUPERSLAB_MAGIC) {
// Phase 12: per-class registry no longer keyed; no per-class removal required.
}
pthread_mutex_unlock(&g_super_reg_lock);
// Not found is not an error (could be duplicate unregister)
}
// ============================================================================
// Phase 9: Lazy Deallocation - LRU Cache Implementation
// ============================================================================
// hak_now_ns() is defined in superslab/superslab_inline.h - use that
#include <sys/mman.h> // For munmap
// Initialize LRU cache (called once at startup)
void hak_ss_lru_init(void) {
if (g_ss_lru_initialized) return;
pthread_mutex_lock(&g_super_reg_lock);
if (g_ss_lru_initialized) {
pthread_mutex_unlock(&g_super_reg_lock);
return;
}
// Parse environment variables
const char* max_cached_env = getenv("HAKMEM_SUPERSLAB_MAX_CACHED");
const char* max_memory_env = getenv("HAKMEM_SUPERSLAB_MAX_MEMORY_MB");
const char* ttl_env = getenv("HAKMEM_SUPERSLAB_TTL_SEC");
g_ss_lru_cache.max_cached = max_cached_env ? (uint32_t)atoi(max_cached_env) : 256;
g_ss_lru_cache.max_memory_mb = max_memory_env ? (uint64_t)atoi(max_memory_env) : 512;
uint32_t ttl_sec = ttl_env ? (uint32_t)atoi(ttl_env) : 60;
g_ss_lru_cache.ttl_ns = (uint64_t)ttl_sec * 1000000000ULL;
g_ss_lru_cache.lru_head = NULL;
g_ss_lru_cache.lru_tail = NULL;
g_ss_lru_cache.total_count = 0;
g_ss_lru_cache.total_memory_mb = 0;
g_ss_lru_cache.generation = 0;
g_ss_lru_initialized = 1;
pthread_mutex_unlock(&g_super_reg_lock);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[SS_LRU_INIT] max_cached=%u max_memory_mb=%llu ttl_sec=%u\n",
g_ss_lru_cache.max_cached,
(unsigned long long)g_ss_lru_cache.max_memory_mb,
ttl_sec);
#endif
}
// Remove SuperSlab from LRU list (does NOT free memory)
static void ss_lru_remove(SuperSlab* ss) {
if (!ss) return;
if (ss->lru_prev) {
ss->lru_prev->lru_next = ss->lru_next;
} else {
g_ss_lru_cache.lru_head = ss->lru_next;
}
if (ss->lru_next) {
ss->lru_next->lru_prev = ss->lru_prev;
} else {
g_ss_lru_cache.lru_tail = ss->lru_prev;
}
ss->lru_prev = NULL;
ss->lru_next = NULL;
}
// Insert SuperSlab at head of LRU list (most recently used)
static void ss_lru_insert_head(SuperSlab* ss) {
if (!ss) return;
ss->lru_next = g_ss_lru_cache.lru_head;
ss->lru_prev = NULL;
if (g_ss_lru_cache.lru_head) {
g_ss_lru_cache.lru_head->lru_prev = ss;
} else {
g_ss_lru_cache.lru_tail = ss;
}
g_ss_lru_cache.lru_head = ss;
}
// Mark SuperSlab as recently used (move to head)
void hak_ss_lru_touch(SuperSlab* ss) {
if (!ss || !g_ss_lru_initialized) return;
pthread_mutex_lock(&g_super_reg_lock);
ss->last_used_ns = hak_now_ns();
// If already in list, remove and re-insert at head
if (ss->lru_prev || ss->lru_next || g_ss_lru_cache.lru_head == ss) {
ss_lru_remove(ss);
ss_lru_insert_head(ss);
}
pthread_mutex_unlock(&g_super_reg_lock);
}
// Evict one SuperSlab from tail (oldest)
// Returns: 1 if evicted, 0 if cache is empty
static int ss_lru_evict_one(void) {
#if !HAKMEM_BUILD_RELEASE
// Debug logging flag (lazy init)
static int dbg = -1;
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_LRU_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#else
static const int dbg = 0;
#endif
SuperSlab* victim = g_ss_lru_cache.lru_tail;
if (!victim) return 0;
// Remove from LRU list
ss_lru_remove(victim);
g_ss_lru_cache.total_count--;
size_t ss_size = (size_t)1 << victim->lg_size;
g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024));
// Unregister and free
uintptr_t base = (uintptr_t)victim;
// Debug logging for LRU EVICT
if (dbg == 1) {
fprintf(stderr, "[LRU_EVICT] ss=%p size=%zu KB (freed)\n",
(void*)victim, ss_size / 1024);
}
// Already unregistered when added to cache, just munmap
victim->magic = 0;
munmap(victim, ss_size);
#if !HAKMEM_BUILD_RELEASE
static int evict_log_count = 0;
if (evict_log_count < 10) {
fprintf(stderr, "[SS_LRU_EVICT] ss=%p size=%zu (cache_count=%u)\n",
victim, ss_size, g_ss_lru_cache.total_count);
evict_log_count++;
}
#endif
return 1;
}
// Evict old SuperSlabs based on policy
void hak_ss_lru_evict(void) {
if (!g_ss_lru_initialized) return;
pthread_mutex_lock(&g_super_reg_lock);
uint64_t now = hak_now_ns();
// Policy 1: Evict until count <= max_cached
while (g_ss_lru_cache.total_count > g_ss_lru_cache.max_cached) {
if (!ss_lru_evict_one()) break;
}
// Policy 2: Evict until memory <= max_memory_mb
while (g_ss_lru_cache.total_memory_mb > g_ss_lru_cache.max_memory_mb) {
if (!ss_lru_evict_one()) break;
}
// Policy 3: Evict expired SuperSlabs (TTL)
SuperSlab* curr = g_ss_lru_cache.lru_tail;
while (curr) {
SuperSlab* prev = curr->lru_prev;
uint64_t age = now - curr->last_used_ns;
if (age > g_ss_lru_cache.ttl_ns) {
ss_lru_remove(curr);
g_ss_lru_cache.total_count--;
size_t ss_size = (size_t)1 << curr->lg_size;
g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024));
curr->magic = 0;
munmap(curr, ss_size);
}
curr = prev;
}
pthread_mutex_unlock(&g_super_reg_lock);
}
// Try to reuse a cached SuperSlab
SuperSlab* hak_ss_lru_pop(uint8_t size_class) {
if (!g_ss_lru_initialized) {
hak_ss_lru_init();
}
// Phase 11: Bypass LRU cache during prewarm
if (atomic_load_explicit(&g_ss_prewarm_bypass, memory_order_acquire)) {
return NULL;
}
#if !HAKMEM_BUILD_RELEASE
// Debug logging flag (lazy init)
static __thread int dbg = -1;
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_LRU_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#else
static const int dbg = 0;
#endif
pthread_mutex_lock(&g_super_reg_lock);
// Find a compatible SuperSlab in cache (stride must match current config)
SuperSlab* curr = g_ss_lru_cache.lru_head;
extern const size_t g_tiny_class_sizes[];
size_t expected_stride = g_tiny_class_sizes[size_class];
while (curr) {
// Validate: Check if cached SuperSlab slabs match current stride
// This prevents reusing old 1024B SuperSlabs for new 2048B C7 allocations
int is_compatible = 1;
// Scan active slabs for stride mismatch
int cap = ss_slabs_capacity(curr);
for (int i = 0; i < cap; i++) {
if (curr->slab_bitmap & (1u << i)) {
TinySlabMeta* meta = &curr->slabs[i];
if (meta->capacity > 0) {
// Calculate implied stride from slab geometry
// Slab 0: 63488B usable, Others: 65536B usable
size_t slab_usable = (i == 0) ? 63488 : 65536;
size_t implied_stride = slab_usable / meta->capacity;
// Stride mismatch detected
if (implied_stride != expected_stride) {
is_compatible = 0;
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint32_t g_incomp_log = 0;
uint32_t n = atomic_fetch_add(&g_incomp_log, 1);
if (n < 8) {
fprintf(stderr,
"[LRU_INCOMPATIBLE] class=%d ss=%p slab=%d expect_stride=%zu implied=%zu (evicting)\n",
size_class, (void*)curr, i, expected_stride, implied_stride);
}
#endif
break;
}
}
}
}
if (is_compatible) {
// Compatible - reuse this SuperSlab
ss_lru_remove(curr);
g_ss_lru_cache.total_count--;
size_t ss_size = (size_t)1 << curr->lg_size;
g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024));
uint32_t cache_count_after = g_ss_lru_cache.total_count;
pthread_mutex_unlock(&g_super_reg_lock);
// Debug logging for LRU POP (hit)
if (dbg == 1) {
fprintf(stderr, "[LRU_POP] class=%d ss=%p (hit) (cache_size=%u/%u)\n",
size_class, (void*)curr, cache_count_after, g_ss_lru_cache.max_cached);
}
#if !HAKMEM_BUILD_RELEASE
static int pop_log_count = 0;
if (pop_log_count < 10) {
fprintf(stderr, "[SS_LRU_POP] Reusing ss=%p size=%zu (cache_count=%u)\n",
curr, ss_size, cache_count_after);
pop_log_count++;
}
#endif
// Re-initialize SuperSlab (magic, timestamps, etc.)
curr->magic = SUPERSLAB_MAGIC;
curr->last_used_ns = hak_now_ns();
curr->lru_prev = NULL;
curr->lru_next = NULL;
return curr;
}
// Incompatible SuperSlab - evict immediately
SuperSlab* next = curr->lru_next;
ss_lru_remove(curr);
g_ss_lru_cache.total_count--;
size_t ss_size = (size_t)1 << curr->lg_size;
g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024));
// Track evictions for observability
static _Atomic uint64_t g_incompatible_evictions = 0;
atomic_fetch_add(&g_incompatible_evictions, 1);
// Release memory
munmap(curr, ss_size);
curr = next;
}
uint32_t cache_count_miss = g_ss_lru_cache.total_count;
pthread_mutex_unlock(&g_super_reg_lock);
// Debug logging for LRU POP (miss)
if (dbg == 1) {
fprintf(stderr, "[LRU_POP] class=%d (miss) (cache_size=%u/%u)\n",
size_class, cache_count_miss, g_ss_lru_cache.max_cached);
}
return NULL; // No matching SuperSlab in cache
}
// Add SuperSlab to LRU cache
int hak_ss_lru_push(SuperSlab* ss) {
if (!ss || !g_ss_lru_initialized) {
hak_ss_lru_init();
}
#if !HAKMEM_BUILD_RELEASE
// Debug logging flag (lazy init)
static __thread int dbg = -1;
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_LRU_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#else
static const int dbg = 0;
#endif
pthread_mutex_lock(&g_super_reg_lock);
// Check if we should cache or evict immediately
size_t ss_size = (size_t)1 << ss->lg_size;
uint64_t ss_mb = ss_size / (1024 * 1024);
// If adding this would exceed limits, evict first
while (g_ss_lru_cache.total_count >= g_ss_lru_cache.max_cached ||
g_ss_lru_cache.total_memory_mb + ss_mb > g_ss_lru_cache.max_memory_mb) {
if (!ss_lru_evict_one()) {
// Cache is empty but still can't fit - don't cache
pthread_mutex_unlock(&g_super_reg_lock);
return 0;
}
}
// Add to cache
ss->last_used_ns = hak_now_ns();
ss->generation = g_ss_lru_cache.generation++;
ss_lru_insert_head(ss);
g_ss_lru_cache.total_count++;
g_ss_lru_cache.total_memory_mb += ss_mb;
uint32_t cache_count_after = g_ss_lru_cache.total_count;
pthread_mutex_unlock(&g_super_reg_lock);
// Debug logging for LRU PUSH
if (dbg == 1) {
fprintf(stderr, "[LRU_PUSH] ss=%p size=%zu KB (cache_size=%u/%u)\n",
(void*)ss, ss_size / 1024, cache_count_after, g_ss_lru_cache.max_cached);
}
#if !HAKMEM_BUILD_RELEASE
static int push_log_count = 0;
if (push_log_count < 10) {
fprintf(stderr, "[SS_LRU_PUSH] Cached ss=%p size=%zu (cache_count=%u)\n",
ss, ss_size, cache_count_after);
push_log_count++;
}
#endif
return 1;
}
// ============================================================================
// Phase 11: SuperSlab Prewarm - Eliminate mmap/munmap bottleneck
// ============================================================================
// Prewarm specific size class with count SuperSlabs
void hak_ss_prewarm_class(int size_class, uint32_t count) {
if (size_class < 0 || size_class >= TINY_NUM_CLASSES) {
fprintf(stderr, "[SS_PREWARM] Invalid size_class=%d (valid: 0-%d)\n",
size_class, TINY_NUM_CLASSES - 1);
return;
}
#if !HAKMEM_BUILD_RELEASE
// Debug logging flag (lazy init)
static int dbg = -1;
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#else
static const int dbg = 0;
#endif
// Ensure LRU cache is initialized
if (!g_ss_lru_initialized) {
hak_ss_lru_init();
}
// Allocate all SuperSlabs first (store in temp array to avoid LRU pop/push cycle)
SuperSlab** slabs = (SuperSlab**)malloc(count * sizeof(SuperSlab*));
if (!slabs) {
fprintf(stderr, "[SS_PREWARM] Failed to allocate temp array for class %d\n", size_class);
return;
}
// Enable prewarm bypass to prevent LRU cache from being used during allocation
atomic_store_explicit(&g_ss_prewarm_bypass, 1, memory_order_release);
uint32_t allocated = 0;
for (uint32_t i = 0; i < count; i++) {
// Allocate a SuperSlab for this class
SuperSlab* ss = superslab_allocate((uint8_t)size_class);
if (!ss) {
break; // Stop on OOM
}
slabs[allocated++] = ss;
}
// Disable prewarm bypass
atomic_store_explicit(&g_ss_prewarm_bypass, 0, memory_order_release);
// Now push all allocated SuperSlabs to LRU cache
uint32_t cached = 0;
for (uint32_t i = 0; i < allocated; i++) {
int pushed = hak_ss_lru_push(slabs[i]);
if (pushed) {
cached++;
} else {
// LRU cache full - free remaining SuperSlabs
for (uint32_t j = i; j < allocated; j++) {
superslab_free(slabs[j]);
}
break;
}
}
free(slabs);
// Debug logging for PREWARM
if (dbg == 1) {
fprintf(stderr, "[PREWARM] Class %d: allocated=%u cached=%u\n",
size_class, allocated, cached);
}
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[SS_PREWARM] Class %d: allocated=%u cached=%u\n",
size_class, allocated, cached);
#else
(void)cached; // Suppress unused warning
#endif
}
// Prewarm all classes (counts[i] = number of SuperSlabs for class i)
void hak_ss_prewarm_all(const uint32_t counts[TINY_NUM_CLASSES]) {
if (!counts) return;
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
if (counts[cls] > 0) {
hak_ss_prewarm_class(cls, counts[cls]);
}
}
}
// Prewarm: Allocate SuperSlabs at startup and add to LRU cache
void hak_ss_prewarm_init(void) {
#if !HAKMEM_BUILD_RELEASE
// Debug logging flag (lazy init)
static int dbg = -1;
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#else
static const int dbg = 0;
#endif
// Parse environment variable
const char* env = getenv("HAKMEM_PREWARM_SUPERSLABS");
if (!env || !*env) {
// Prewarm disabled
return;
}
// Parse as single number (uniform across all classes)
char* endptr;
long global = strtol(env, &endptr, 10);
if (*endptr != '\0' || global <= 0) {
fprintf(stderr, "[SS_PREWARM] Invalid HAKMEM_PREWARM_SUPERSLABS='%s' (expected positive integer)\n", env);
return;
}
// Cap at reasonable limit (avoid OOM on typo like "10000")
if (global > 512) {
fprintf(stderr, "[SS_PREWARM] WARNING: Capping prewarm count from %ld to 512 per class\n", global);
global = 512;
}
uint32_t prewarm_count = (uint32_t)global;
// Expand LRU cache capacity to hold prewarmed SuperSlabs
uint32_t needed = prewarm_count * TINY_NUM_CLASSES;
pthread_mutex_lock(&g_super_reg_lock);
if (needed > g_ss_lru_cache.max_cached) {
g_ss_lru_cache.max_cached = needed;
// Expand memory limit (1 SuperSlab = 1MB or 2MB)
// Conservative estimate: 2MB per SuperSlab
uint64_t needed_mb = (uint64_t)needed * 2;
if (needed_mb > g_ss_lru_cache.max_memory_mb) {
g_ss_lru_cache.max_memory_mb = needed_mb;
}
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[SS_PREWARM] Expanded LRU cache: max_cached=%u max_memory_mb=%llu\n",
g_ss_lru_cache.max_cached, (unsigned long long)g_ss_lru_cache.max_memory_mb);
#endif
}
pthread_mutex_unlock(&g_super_reg_lock);
// Prewarm all classes uniformly
uint32_t counts[TINY_NUM_CLASSES];
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
counts[i] = prewarm_count;
}
// Debug logging for PREWARM initialization
if (dbg == 1) {
fprintf(stderr, "[PREWARM] Allocating %u SuperSlabs for classes 0-%d (total=%u)\n",
prewarm_count, TINY_NUM_CLASSES - 1, needed);
}
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[SS_PREWARM] Starting prewarm: %u SuperSlabs per class (%u total)\n",
prewarm_count, needed);
#endif
hak_ss_prewarm_all(counts);
// Debug logging for PREWARM completion
if (dbg == 1) {
fprintf(stderr, "[PREWARM] Complete: %u SuperSlabs cached\n", g_ss_lru_cache.total_count);
}
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[SS_PREWARM] Prewarm complete (cache_count=%u)\n", g_ss_lru_cache.total_count);
#endif
}
// Debug: Get registry statistics
void hak_super_registry_stats(SuperRegStats* stats) {
if (!stats) return;
stats->total_slots = SUPER_REG_SIZE;
stats->used_slots = 0;
stats->max_probe_depth = 0;
pthread_mutex_lock(&g_super_reg_lock);
// Count used slots
for (int i = 0; i < SUPER_REG_SIZE; i++) {
if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) {
stats->used_slots++;
}
}
// Calculate max probe depth
for (int i = 0; i < SUPER_REG_SIZE; i++) {
if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) {
uintptr_t base = atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire);
int lg = g_super_reg[i].lg_size; // Phase 8.3: Use stored lg_size
int h = hak_super_hash(base, lg);
// Find actual probe depth for this entry
for (int j = 0; j < SUPER_MAX_PROBE; j++) {
int idx = (h + j) & SUPER_REG_MASK;
if (atomic_load_explicit(&g_super_reg[idx].base, memory_order_acquire) == base && g_super_reg[idx].lg_size == lg) {
if (j > stats->max_probe_depth) {
stats->max_probe_depth = j;
}
break;
}
}
}
}
pthread_mutex_unlock(&g_super_reg_lock);
}