// tls_sll_drain_box.h - Box: TLS SLL Periodic Drain // Purpose: Restore slab accounting consistency by periodically draining TLS SLL to slab freelists // // Problem: // - Fast free path (hak_tiny_free_fast_v2) pushes to TLS SLL without decrementing meta->used // - Slabs never appear empty → SuperSlabs never freed → LRU cache never populated // - Result: 6,455 mmap/munmap syscalls per 200K iterations (74.8% time) // // Solution: // - Every N frees (default: 1024), drain TLS SLL → slab freelist // - This path decrements meta->used properly via tiny_free_local_box() // - Enables empty detection → SuperSlabs freed → LRU cache functional // // Expected Impact: // - mmap/munmap: 6,455 → ~100 calls (-96-97%) // - Throughput: 563K → 8-10M ops/s (+1,300-1,700%) // // References: // - Root cause: PHASE9_LRU_ARCHITECTURE_ISSUE.md // - Design: Option B (Periodic TLS SLL Drain) #pragma once #include #include #include #include #include "tls_sll_box.h" // TLS SLL operations (tls_sll_pop) #include "../hakmem_tiny_config.h" // TINY_NUM_CLASSES #include "../hakmem_super_registry.h" // SuperSlab lookup #include "free_local_box.h" // tiny_free_local_box (decrements meta->used) // ========== ENV Configuration ========== // Check if TLS SLL drain is enabled // ENV: HAKMEM_TINY_SLL_DRAIN_ENABLE=1/0 (default: 1) static inline int tls_sll_drain_is_enabled(void) { static int g_drain_enable = -1; if (__builtin_expect(g_drain_enable == -1, 0)) { const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_ENABLE"); if (env && *env == '0') { g_drain_enable = 0; fprintf(stderr, "[TLS_SLL_DRAIN] Drain DISABLED via ENV\n"); } else { g_drain_enable = 1; fprintf(stderr, "[TLS_SLL_DRAIN] Drain ENABLED (default)\n"); } } return g_drain_enable; } // Get drain interval (number of frees before triggering drain) // ENV: HAKMEM_TINY_SLL_DRAIN_INTERVAL=N (default: 1024) static inline uint32_t tls_sll_drain_get_interval(void) { static uint32_t g_drain_interval = 0; if (__builtin_expect(g_drain_interval == 0, 0)) { const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_INTERVAL"); if (env && *env) { int val = atoi(env); if (val > 0 && val <= 65536) { g_drain_interval = (uint32_t)val; fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (from ENV)\n", g_drain_interval); } else { g_drain_interval = 1024; fprintf(stderr, "[TLS_SLL_DRAIN] Invalid ENV value, using default=1024\n"); } } else { g_drain_interval = 1024; fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (default)\n", g_drain_interval); } } return g_drain_interval; } // ========== Drain Counter (TLS) ========== // Per-class drain counter (TLS, one per size class) // Incremented on each free, triggers drain when reaching interval static __thread uint32_t g_tls_sll_drain_counter[TINY_NUM_CLASSES] = {0}; // Debug: Total drain operations performed (all classes) static __thread uint64_t g_tls_sll_drain_total_calls = 0; static __thread uint64_t g_tls_sll_drain_total_blocks = 0; // ========== Drain Implementation (Skeleton) ========== // Box: TLS SLL Drain // Purpose: Pop blocks from TLS SLL and push to slab freelist // // Flow: // 1. Pop up to batch_size blocks from TLS SLL (g_tls_sll_head[class_idx]) // 2. For each block: // a. Resolve SuperSlab/Slab (like slow path does) // b. Call tiny_free_local_box() → decrements meta->used properly // 3. Result: meta->used reflects true state, empty detection works // // Args: // class_idx: Size class to drain // batch_size: Max blocks to drain (0 = drain all) // // Returns: Number of blocks drained static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) { if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) { return 0; } // Sanity check: TLS SLL count extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; uint32_t avail = g_tls_sll_count[class_idx]; if (avail == 0) { return 0; // Nothing to drain } // Drain up to batch_size blocks (0 = drain all) uint32_t to_drain = (batch_size == 0) ? avail : (avail < batch_size ? avail : batch_size); uint32_t drained = 0; // Debug logging static int g_debug = -1; if (__builtin_expect(g_debug == -1, 0)) { const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_DEBUG"); g_debug = (env && *env && *env != '0') ? 1 : 0; } if (g_debug) { fprintf(stderr, "[TLS_SLL_DRAIN] START: class=%d avail=%u to_drain=%u\n", class_idx, avail, to_drain); } // External functions needed for drain extern SuperSlab* hak_super_lookup(void* ptr); // SuperSlab registry lookup extern const size_t g_tiny_class_sizes[TINY_NUM_CLASSES]; // Block sizes (const) // Get thread ID once (used for all blocks) // Note: Use pthread_self() directly since tiny_self_u32() is static inline uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self(); // Drain loop: Pop blocks from TLS SLL and push to slab freelist for (uint32_t i = 0; i < to_drain; i++) { void* base = NULL; if (!tls_sll_pop(class_idx, &base)) { // TLS SLL exhausted (concurrent drain or count mismatch) break; } // Resolve SuperSlab/Slab (like slow path does) SuperSlab* ss = hak_super_lookup(base); if (!ss || ss->magic != SUPERSLAB_MAGIC) { // Invalid SuperSlab - skip this block if (g_debug) { fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid SuperSlab)\n", class_idx, base); } continue; } // Get slab index int slab_idx = slab_index_for(ss, base); if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { // Invalid slab index - skip this block if (g_debug) { fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid slab_idx=%d)\n", class_idx, base, slab_idx); } continue; } // Get slab metadata TinySlabMeta* meta = &ss->slabs[slab_idx]; // Convert BASE → USER pointer (add 1 byte header offset) // Phase E1: ALL classes (C0-C7) have 1-byte header void* user_ptr = (char*)base + 1; // Call tiny_free_local_box() to: // 1. Push block to slab freelist // 2. Decrement meta->used (THIS IS THE KEY!) tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid); drained++; // CRITICAL: Check if slab became empty and release to shared pool // (This logic is in tiny_superslab_free.inc.h:223-236) if (meta->used == 0) { // Debug: Log when used reaches 0 (slab becomes empty) if (g_debug) { fprintf(stderr, "[TLS_SLL_DRAIN] EMPTY: class=%d ss=%p slab=%d (meta->used=0) -> releasing to pool\n", class_idx, (void*)ss, slab_idx); } // Release empty slab to shared pool // This will eventually free the SuperSlab and add to LRU cache extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx); shared_pool_release_slab(ss, slab_idx); } } if (g_debug && drained > 0) { fprintf(stderr, "[TLS_SLL_DRAIN] END: class=%d drained=%u remaining=%u\n", class_idx, drained, g_tls_sll_count[class_idx]); } // Update stats g_tls_sll_drain_total_calls++; g_tls_sll_drain_total_blocks += drained; return drained; } // ========== Drain Trigger (Called from Fast Free Path) ========== // Box: Try Drain (with counter trigger) // Purpose: Check drain counter and trigger drain if interval reached // // Flow: // 1. Increment drain counter for this class // 2. If counter >= interval, trigger drain and reset counter // 3. Otherwise, do nothing (fast path continues) // // Args: // class_idx: Size class that was just freed // // Returns: Number of blocks drained (0 if no drain) static inline uint32_t tiny_tls_sll_try_drain(int class_idx) { // Check if drain is enabled if (__builtin_expect(!tls_sll_drain_is_enabled(), 0)) { return 0; } // Increment counter g_tls_sll_drain_counter[class_idx]++; // Check if interval reached uint32_t interval = tls_sll_drain_get_interval(); if (__builtin_expect(g_tls_sll_drain_counter[class_idx] >= interval, 0)) { // Trigger drain (drain ALL blocks to enable empty detection) // batch_size=0 means drain all available blocks uint32_t drained = tiny_tls_sll_drain(class_idx, 0); // Reset counter g_tls_sll_drain_counter[class_idx] = 0; return drained; } return 0; // No drain triggered } // ========== Debug Stats (Destructor) ========== #if !HAKMEM_BUILD_RELEASE static void tls_sll_drain_print_stats(void) __attribute__((destructor)); static void tls_sll_drain_print_stats(void) { if (g_tls_sll_drain_total_calls > 0) { fprintf(stderr, "[TLS_SLL_DRAIN_STATS] Total drains: %lu, Total blocks: %lu, Avg: %.2f\n", g_tls_sll_drain_total_calls, g_tls_sll_drain_total_blocks, (double)g_tls_sll_drain_total_blocks / g_tls_sll_drain_total_calls); } } #endif