diff --git a/core/box/tls_sll_drain_box.h b/core/box/tls_sll_drain_box.h new file mode 100644 index 00000000..70461930 --- /dev/null +++ b/core/box/tls_sll_drain_box.h @@ -0,0 +1,252 @@ +// tls_sll_drain_box.h - Box: TLS SLL Periodic Drain +// Purpose: Restore slab accounting consistency by periodically draining TLS SLL to slab freelists +// +// Problem: +// - Fast free path (hak_tiny_free_fast_v2) pushes to TLS SLL without decrementing meta->used +// - Slabs never appear empty → SuperSlabs never freed → LRU cache never populated +// - Result: 6,455 mmap/munmap syscalls per 200K iterations (74.8% time) +// +// Solution: +// - Every N frees (default: 1024), drain TLS SLL → slab freelist +// - This path decrements meta->used properly via tiny_free_local_box() +// - Enables empty detection → SuperSlabs freed → LRU cache functional +// +// Expected Impact: +// - mmap/munmap: 6,455 → ~100 calls (-96-97%) +// - Throughput: 563K → 8-10M ops/s (+1,300-1,700%) +// +// References: +// - Root cause: PHASE9_LRU_ARCHITECTURE_ISSUE.md +// - Design: Option B (Periodic TLS SLL Drain) + +#pragma once + +#include +#include +#include +#include "tls_sll_box.h" // TLS SLL operations (tls_sll_pop) +#include "../hakmem_tiny_config.h" // TINY_NUM_CLASSES +#include "../hakmem_super_registry.h" // SuperSlab lookup +#include "free_local_box.h" // tiny_free_local_box (decrements meta->used) + +// ========== ENV Configuration ========== + +// Check if TLS SLL drain is enabled +// ENV: HAKMEM_TINY_SLL_DRAIN_ENABLE=1/0 (default: 1) +static inline int tls_sll_drain_is_enabled(void) { + static int g_drain_enable = -1; + if (__builtin_expect(g_drain_enable == -1, 0)) { + const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_ENABLE"); + if (env && *env == '0') { + g_drain_enable = 0; + fprintf(stderr, "[TLS_SLL_DRAIN] Drain DISABLED via ENV\n"); + } else { + g_drain_enable = 1; + fprintf(stderr, "[TLS_SLL_DRAIN] Drain ENABLED (default)\n"); + } + } + return g_drain_enable; +} + +// Get drain interval (number of frees before triggering drain) +// ENV: HAKMEM_TINY_SLL_DRAIN_INTERVAL=N (default: 1024) +static inline uint32_t tls_sll_drain_get_interval(void) { + static uint32_t g_drain_interval = 0; + if (__builtin_expect(g_drain_interval == 0, 0)) { + const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_INTERVAL"); + if (env && *env) { + int val = atoi(env); + if (val > 0 && val <= 65536) { + g_drain_interval = (uint32_t)val; + fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (from ENV)\n", g_drain_interval); + } else { + g_drain_interval = 1024; + fprintf(stderr, "[TLS_SLL_DRAIN] Invalid ENV value, using default=1024\n"); + } + } else { + g_drain_interval = 1024; + fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (default)\n", g_drain_interval); + } + } + return g_drain_interval; +} + +// ========== Drain Counter (TLS) ========== + +// Per-class drain counter (TLS, one per size class) +// Incremented on each free, triggers drain when reaching interval +static __thread uint32_t g_tls_sll_drain_counter[TINY_NUM_CLASSES] = {0}; + +// Debug: Total drain operations performed (all classes) +static __thread uint64_t g_tls_sll_drain_total_calls = 0; +static __thread uint64_t g_tls_sll_drain_total_blocks = 0; + +// ========== Drain Implementation (Skeleton) ========== + +// Box: TLS SLL Drain +// Purpose: Pop blocks from TLS SLL and push to slab freelist +// +// Flow: +// 1. Pop up to batch_size blocks from TLS SLL (g_tls_sll_head[class_idx]) +// 2. For each block: +// a. Resolve SuperSlab/Slab (like slow path does) +// b. Call tiny_free_local_box() → decrements meta->used properly +// 3. Result: meta->used reflects true state, empty detection works +// +// Args: +// class_idx: Size class to drain +// batch_size: Max blocks to drain (0 = drain all) +// +// Returns: Number of blocks drained +static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) { + if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) { + return 0; + } + + // Sanity check: TLS SLL count + extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; + uint32_t avail = g_tls_sll_count[class_idx]; + if (avail == 0) { + return 0; // Nothing to drain + } + + // Drain up to batch_size blocks (0 = drain all) + uint32_t to_drain = (batch_size == 0) ? avail : (avail < batch_size ? avail : batch_size); + uint32_t drained = 0; + + // Debug logging + static int g_debug = -1; + if (__builtin_expect(g_debug == -1, 0)) { + const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_DEBUG"); + g_debug = (env && *env && *env != '0') ? 1 : 0; + } + + if (g_debug) { + fprintf(stderr, "[TLS_SLL_DRAIN] START: class=%d avail=%u to_drain=%u\n", + class_idx, avail, to_drain); + } + + // External functions needed for drain + extern SuperSlab* hak_super_lookup(void* ptr); // SuperSlab registry lookup + extern uint32_t tiny_self_u32(void); // Thread ID (from tiny_superslab_free.inc.h:127) + extern size_t g_tiny_class_sizes[TINY_NUM_CLASSES]; // Block sizes + + // Get thread ID once (used for all blocks) + uint32_t my_tid = tiny_self_u32(); + + // Drain loop: Pop blocks from TLS SLL and push to slab freelist + for (uint32_t i = 0; i < to_drain; i++) { + void* base = NULL; + if (!tls_sll_pop(class_idx, &base)) { + // TLS SLL exhausted (concurrent drain or count mismatch) + break; + } + + // Resolve SuperSlab/Slab (like slow path does) + SuperSlab* ss = hak_super_lookup(base); + if (!ss || ss->magic != SUPERSLAB_MAGIC) { + // Invalid SuperSlab - skip this block + if (g_debug) { + fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid SuperSlab)\n", + class_idx, base); + } + continue; + } + + // Get slab index + int slab_idx = slab_index_for(ss, base); + if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { + // Invalid slab index - skip this block + if (g_debug) { + fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid slab_idx=%d)\n", + class_idx, base, slab_idx); + } + continue; + } + + // Get slab metadata + TinySlabMeta* meta = &ss->slabs[slab_idx]; + + // Convert BASE → USER pointer (add 1 byte header offset) + // Phase E1: ALL classes (C0-C7) have 1-byte header + void* user_ptr = (char*)base + 1; + + // Call tiny_free_local_box() to: + // 1. Push block to slab freelist + // 2. Decrement meta->used (THIS IS THE KEY!) + // 3. Check if slab becomes empty (meta->used == 0) + // 4. If empty, release slab → SuperSlab → LRU cache + tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid); + + drained++; + + // Debug: Log when used reaches 0 (slab becomes empty) + if (g_debug && meta->used == 0) { + fprintf(stderr, "[TLS_SLL_DRAIN] EMPTY: class=%d ss=%p slab=%d (meta->used=0)\n", + class_idx, (void*)ss, slab_idx); + } + } + + if (g_debug && drained > 0) { + fprintf(stderr, "[TLS_SLL_DRAIN] END: class=%d drained=%u remaining=%u\n", + class_idx, drained, g_tls_sll_count[class_idx]); + } + + // Update stats + g_tls_sll_drain_total_calls++; + g_tls_sll_drain_total_blocks += drained; + + return drained; +} + +// ========== Drain Trigger (Called from Fast Free Path) ========== + +// Box: Try Drain (with counter trigger) +// Purpose: Check drain counter and trigger drain if interval reached +// +// Flow: +// 1. Increment drain counter for this class +// 2. If counter >= interval, trigger drain and reset counter +// 3. Otherwise, do nothing (fast path continues) +// +// Args: +// class_idx: Size class that was just freed +// +// Returns: Number of blocks drained (0 if no drain) +static inline uint32_t tiny_tls_sll_try_drain(int class_idx) { + // Check if drain is enabled + if (__builtin_expect(!tls_sll_drain_is_enabled(), 0)) { + return 0; + } + + // Increment counter + g_tls_sll_drain_counter[class_idx]++; + + // Check if interval reached + uint32_t interval = tls_sll_drain_get_interval(); + if (__builtin_expect(g_tls_sll_drain_counter[class_idx] >= interval, 0)) { + // Trigger drain (drain ~32 blocks for now, tune later) + uint32_t drained = tiny_tls_sll_drain(class_idx, 32); + + // Reset counter + g_tls_sll_drain_counter[class_idx] = 0; + + return drained; + } + + return 0; // No drain triggered +} + +// ========== Debug Stats (Destructor) ========== + +#if !HAKMEM_BUILD_RELEASE +static void tls_sll_drain_print_stats(void) __attribute__((destructor)); +static void tls_sll_drain_print_stats(void) { + if (g_tls_sll_drain_total_calls > 0) { + fprintf(stderr, "[TLS_SLL_DRAIN_STATS] Total drains: %lu, Total blocks: %lu, Avg: %.2f\n", + g_tls_sll_drain_total_calls, + g_tls_sll_drain_total_blocks, + (double)g_tls_sll_drain_total_blocks / g_tls_sll_drain_total_calls); + } +} +#endif diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h index 06ee9419..77dde5c9 100644 --- a/core/tiny_free_fast_v2.inc.h +++ b/core/tiny_free_fast_v2.inc.h @@ -19,6 +19,7 @@ #include "hakmem_build_flags.h" #include "hakmem_tiny_config.h" // For TINY_TLS_MAG_CAP, TINY_NUM_CLASSES #include "box/tls_sll_box.h" // Box TLS-SLL API +#include "box/tls_sll_drain_box.h" // Box TLS-SLL Drain (Option B) #include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection // Phase 7: Header-based ultra-fast free @@ -136,6 +137,13 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { return 0; } + // Option B: Periodic TLS SLL Drain (restore slab accounting consistency) + // Purpose: Every N frees (default: 1024), drain TLS SLL → slab freelist + // Impact: Enables empty detection → SuperSlabs freed → LRU cache functional + // Cost: 2-3 cycles (counter increment + comparison, predict-not-taken) + // Benefit: +1,300-1,700% throughput (563K → 8-10M ops/s expected) + tiny_tls_sll_try_drain(class_idx); + return 1; // Success - handled in fast path }