// superslab_allocate.c - SuperSlab allocation and deallocation // Purpose: Main allocation/free entry points for SuperSlabs // License: MIT // Date: 2025-11-28 #include "hakmem_tiny_superslab_internal.h" // ============================================================================ // SuperSlab Allocation (2MB aligned) // ============================================================================ SuperSlab* superslab_allocate(uint8_t size_class) { // Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗 static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate static __thread unsigned long fault_tick = 0; if (__builtin_expect(fault_rate == -1, 0)) { const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE"); if (e && *e) { int v = atoi(e); if (v < 0) v = 0; fault_rate = v; } else { fault_rate = 0; } } if (fault_rate > 0) { unsigned long t = ++fault_tick; if ((t % (unsigned long)fault_rate) == 0ul) { return NULL; // simulate OOM } } // Optional env clamp for SuperSlab size static int env_parsed = 0; static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_DEFAULT; // Start with default (2MB) static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX; if (!env_parsed) { char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB"); if (maxmb) { int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21; } char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB"); if (minmb) { int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21; } if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env; const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG"); if (force_lg_env && *force_lg_env) { int v = atoi(force_lg_env); if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) { g_ss_force_lg = v; g_ss_min_lg_env = g_ss_max_lg_env = v; } } size_t precharge_default = 0; const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE"); if (precharge_env && *precharge_env) { long v = atol(precharge_env); if (v < 0) v = 0; precharge_default = (size_t)v; if (v > 0) { atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); } } size_t cache_default = 0; const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE"); if (cache_env && *cache_env) { long v = atol(cache_env); if (v < 0) v = 0; cache_default = (size_t)v; } for (int i = 0; i < 8; i++) { g_ss_cache_cap[i] = cache_default; g_ss_precharge_target[i] = precharge_default; } for (int i = 0; i < 8; i++) { char name[64]; snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i); char* cap_env = getenv(name); if (cap_env && *cap_env) { long v = atol(cap_env); if (v < 0) v = 0; g_ss_cache_cap[i] = (size_t)v; } snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i); char* pre_env = getenv(name); if (pre_env && *pre_env) { long v = atol(pre_env); if (v < 0) v = 0; g_ss_precharge_target[i] = (size_t)v; if (v > 0) { atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); } } if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) { g_ss_cache_enabled = 1; } } const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE"); if (populate_env && atoi(populate_env) != 0) { atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); } env_parsed = 1; } uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class); if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env; if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env; size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB uintptr_t ss_mask = ss_size - 1; int from_cache = 0; void* ptr = NULL; // Debug logging flag (lazy init) static __thread int dbg = -1; #if HAKMEM_BUILD_RELEASE dbg = 0; #else if (__builtin_expect(dbg == -1, 0)) { const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG"); dbg = (e && *e && *e != '0') ? 1 : 0; } #endif // Phase 9: Try LRU cache first (lazy deallocation) SuperSlab* cached_ss = hak_ss_lru_pop(size_class); if (cached_ss) { ptr = (void*)cached_ss; from_cache = 1; // Debug logging for REFILL from LRU if (dbg == 1) { fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n", size_class, (void*)cached_ss); } // Skip old cache path - LRU cache takes priority } else if (g_ss_cache_enabled && size_class < 8) { // Fallback to old cache (will be deprecated) ss_cache_precharge(size_class, ss_size, ss_mask); SuperslabCacheEntry* old_cached = ss_cache_pop(size_class); if (old_cached) { ptr = (void*)old_cached; from_cache = 1; // Debug logging for REFILL from prewarm (old cache is essentially prewarm) if (dbg == 1) { fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n", size_class, (void*)old_cached); } } } if (!ptr) { int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel); ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate); if (!ptr) { return NULL; } // Debug logging for REFILL with new allocation if (dbg == 1) { fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n", size_class, (void*)ptr); } } // Initialize SuperSlab header (Phase 12: no global size_class field) SuperSlab* ss = (SuperSlab*)ptr; ss->magic = SUPERSLAB_MAGIC; ss->active_slabs = 0; ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB) ss->slab_bitmap = 0; ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask ss->partial_epoch = 0; ss->publish_hint = 0xFF; // Initialize atomics explicitly atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed); atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed); atomic_store_explicit(&ss->listed, 0, memory_order_relaxed); ss->partial_next = NULL; // Phase 9: Initialize LRU fields ss->last_used_ns = 0; ss->generation = 0; ss->lru_prev = NULL; ss->lru_next = NULL; // Phase 3d-C: Initialize Hot/Cold Split fields ss->hot_count = 0; ss->cold_count = 0; for (int i = 0; i < 16; i++) { ss->hot_indices[i] = 0; ss->cold_indices[i] = 0; } // Initialize all slab metadata (only up to max slabs for this size) int max_slabs = (int)(ss_size / SLAB_SIZE); // PERF_OPT: memset removed - mmap() already returns zero-initialized pages // Previous memset calls consumed 23.83% CPU time (perf analysis 2025-11-28) // Measured improvement: +1.3% throughput (71.86M → 72.78M ops/s) // Note: ASan/debug builds may need these, but production mmap guarantees zero pages // memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta)); // memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t)); // memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t)); // memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t)); for (int i = 0; i < max_slabs; i++) { // Phase 1: Atomic initialization (freelist + used are now _Atomic) slab_freelist_store_relaxed(&ss->slabs[i], NULL); // Explicit NULL (redundant after memset, but clear intent) atomic_store_explicit(&ss->slabs[i].used, 0, memory_order_relaxed); ss->slabs[i].capacity = 0; ss->slabs[i].owner_tid_low = 0; // Initialize remote queue atomics (memset already zeroed, but use proper atomic init) atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed); atomic_store_explicit(&ss->remote_counts[i], 0, memory_order_relaxed); atomic_store_explicit(&ss->slab_listed[i], 0, memory_order_relaxed); } if (from_cache) { ss_stats_cache_reuse(); } // Phase 8.3: Update ACE current_lg to match allocated size g_ss_ace[size_class].current_lg = lg; // Phase 1: Register SuperSlab in global registry for fast lookup // CRITICAL: Register AFTER full initialization (ss structure is ready) uintptr_t base = (uintptr_t)ss; if (!hak_super_register(base, ss)) { // Registry full - this is a fatal error fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss); // Still return ss to avoid memory leak, but lookups may fail } return ss; } // ============================================================================ // SuperSlab Deallocation // ============================================================================ void superslab_free(SuperSlab* ss) { if (!ss || ss->magic != SUPERSLAB_MAGIC) { return; // Invalid SuperSlab } // ADD DEBUG LOGGING static __thread int dbg = -1; #if HAKMEM_BUILD_RELEASE dbg = 0; #else if (__builtin_expect(dbg == -1, 0)) { const char* e = getenv("HAKMEM_SS_FREE_DEBUG"); dbg = (e && *e && *e != '0') ? 1 : 0; } #endif if (dbg == 1) { fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n", (void*)ss, ss->lg_size, ss->active_slabs); } // Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap size_t ss_size = (size_t)1 << ss->lg_size; // Phase 1: Unregister SuperSlab from registry FIRST // CRITICAL: Must unregister BEFORE adding to LRU cache // Reason: Cached SuperSlabs should NOT be found by lookups uintptr_t base = (uintptr_t)ss; hak_super_unregister(base); // Memory fence to ensure unregister is visible atomic_thread_fence(memory_order_release); // Phase 9: Try LRU cache first (lazy deallocation) // NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation // Magic will be cleared on eviction or reuse int lru_cached = hak_ss_lru_push(ss); if (dbg == 1) { fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached); } if (lru_cached) { // Successfully cached in LRU - defer munmap return; } // LRU cache full or disabled - try old cache using head class_idx (if known) int old_cached = ss_cache_push(0, ss); if (old_cached) { ss_stats_cache_store(); return; } // Both caches full - immediately free to OS (eager deallocation) // Clear magic to prevent use-after-free ss->magic = 0; #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n", (void*)ss, ss_size, atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed)); #endif munmap(ss, ss_size); // Update statistics for actual release to OS pthread_mutex_lock(&g_superslab_lock); g_superslabs_freed++; // Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here g_bytes_allocated -= ss_size; pthread_mutex_unlock(&g_superslab_lock); #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n", (unsigned long long)g_superslabs_freed); #endif }