Option B: Periodic TLS SLL Drain - Fix Phase 9 LRU Architecture Issue
Root Cause: - TLS SLL fast path (95-99% of frees) does NOT decrement meta->used - Slabs never appear empty → SuperSlabs never freed → LRU never used - Impact: 6,455 mmap/munmap calls per 200K iterations (74.8% time) - Performance: -94% regression (9.38M → 563K ops/s) Solution: - Periodic drain every N frees (default: 1024) per size class - Drain path: TLS SLL → slab freelist via tiny_free_local_box() - This properly decrements meta->used and enables empty detection Implementation: 1. core/box/tls_sll_drain_box.h - New drain box function - tiny_tls_sll_drain(): Pop from TLS SLL, push to slab freelist - tiny_tls_sll_try_drain(): Drain trigger with counter - ENV: HAKMEM_TINY_SLL_DRAIN_ENABLE=1/0 (default: 1) - ENV: HAKMEM_TINY_SLL_DRAIN_INTERVAL=N (default: 1024) - ENV: HAKMEM_TINY_SLL_DRAIN_DEBUG=1 (debug logging) 2. core/tiny_free_fast_v2.inc.h - Integrated drain trigger - Added drain call after successful TLS SLL push (line 145) - Cost: 2-3 cycles per free (counter increment + comparison) - Drain triggered every 1024 frees (0.1% overhead) Expected Impact: - mmap/munmap: 6,455 → ~100 calls (-96-97%) - Throughput: 563K → 8-10M ops/s (+1,300-1,700%) - LRU utilization: 0% → >90% (functional) Reference: PHASE9_LRU_ARCHITECTURE_ISSUE.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
252
core/box/tls_sll_drain_box.h
Normal file
252
core/box/tls_sll_drain_box.h
Normal file
@ -0,0 +1,252 @@
|
||||
// tls_sll_drain_box.h - Box: TLS SLL Periodic Drain
|
||||
// Purpose: Restore slab accounting consistency by periodically draining TLS SLL to slab freelists
|
||||
//
|
||||
// Problem:
|
||||
// - Fast free path (hak_tiny_free_fast_v2) pushes to TLS SLL without decrementing meta->used
|
||||
// - Slabs never appear empty → SuperSlabs never freed → LRU cache never populated
|
||||
// - Result: 6,455 mmap/munmap syscalls per 200K iterations (74.8% time)
|
||||
//
|
||||
// Solution:
|
||||
// - Every N frees (default: 1024), drain TLS SLL → slab freelist
|
||||
// - This path decrements meta->used properly via tiny_free_local_box()
|
||||
// - Enables empty detection → SuperSlabs freed → LRU cache functional
|
||||
//
|
||||
// Expected Impact:
|
||||
// - mmap/munmap: 6,455 → ~100 calls (-96-97%)
|
||||
// - Throughput: 563K → 8-10M ops/s (+1,300-1,700%)
|
||||
//
|
||||
// References:
|
||||
// - Root cause: PHASE9_LRU_ARCHITECTURE_ISSUE.md
|
||||
// - Design: Option B (Periodic TLS SLL Drain)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "tls_sll_box.h" // TLS SLL operations (tls_sll_pop)
|
||||
#include "../hakmem_tiny_config.h" // TINY_NUM_CLASSES
|
||||
#include "../hakmem_super_registry.h" // SuperSlab lookup
|
||||
#include "free_local_box.h" // tiny_free_local_box (decrements meta->used)
|
||||
|
||||
// ========== ENV Configuration ==========
|
||||
|
||||
// Check if TLS SLL drain is enabled
|
||||
// ENV: HAKMEM_TINY_SLL_DRAIN_ENABLE=1/0 (default: 1)
|
||||
static inline int tls_sll_drain_is_enabled(void) {
|
||||
static int g_drain_enable = -1;
|
||||
if (__builtin_expect(g_drain_enable == -1, 0)) {
|
||||
const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_ENABLE");
|
||||
if (env && *env == '0') {
|
||||
g_drain_enable = 0;
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] Drain DISABLED via ENV\n");
|
||||
} else {
|
||||
g_drain_enable = 1;
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] Drain ENABLED (default)\n");
|
||||
}
|
||||
}
|
||||
return g_drain_enable;
|
||||
}
|
||||
|
||||
// Get drain interval (number of frees before triggering drain)
|
||||
// ENV: HAKMEM_TINY_SLL_DRAIN_INTERVAL=N (default: 1024)
|
||||
static inline uint32_t tls_sll_drain_get_interval(void) {
|
||||
static uint32_t g_drain_interval = 0;
|
||||
if (__builtin_expect(g_drain_interval == 0, 0)) {
|
||||
const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_INTERVAL");
|
||||
if (env && *env) {
|
||||
int val = atoi(env);
|
||||
if (val > 0 && val <= 65536) {
|
||||
g_drain_interval = (uint32_t)val;
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (from ENV)\n", g_drain_interval);
|
||||
} else {
|
||||
g_drain_interval = 1024;
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] Invalid ENV value, using default=1024\n");
|
||||
}
|
||||
} else {
|
||||
g_drain_interval = 1024;
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (default)\n", g_drain_interval);
|
||||
}
|
||||
}
|
||||
return g_drain_interval;
|
||||
}
|
||||
|
||||
// ========== Drain Counter (TLS) ==========
|
||||
|
||||
// Per-class drain counter (TLS, one per size class)
|
||||
// Incremented on each free, triggers drain when reaching interval
|
||||
static __thread uint32_t g_tls_sll_drain_counter[TINY_NUM_CLASSES] = {0};
|
||||
|
||||
// Debug: Total drain operations performed (all classes)
|
||||
static __thread uint64_t g_tls_sll_drain_total_calls = 0;
|
||||
static __thread uint64_t g_tls_sll_drain_total_blocks = 0;
|
||||
|
||||
// ========== Drain Implementation (Skeleton) ==========
|
||||
|
||||
// Box: TLS SLL Drain
|
||||
// Purpose: Pop blocks from TLS SLL and push to slab freelist
|
||||
//
|
||||
// Flow:
|
||||
// 1. Pop up to batch_size blocks from TLS SLL (g_tls_sll_head[class_idx])
|
||||
// 2. For each block:
|
||||
// a. Resolve SuperSlab/Slab (like slow path does)
|
||||
// b. Call tiny_free_local_box() → decrements meta->used properly
|
||||
// 3. Result: meta->used reflects true state, empty detection works
|
||||
//
|
||||
// Args:
|
||||
// class_idx: Size class to drain
|
||||
// batch_size: Max blocks to drain (0 = drain all)
|
||||
//
|
||||
// Returns: Number of blocks drained
|
||||
static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) {
|
||||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Sanity check: TLS SLL count
|
||||
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
|
||||
uint32_t avail = g_tls_sll_count[class_idx];
|
||||
if (avail == 0) {
|
||||
return 0; // Nothing to drain
|
||||
}
|
||||
|
||||
// Drain up to batch_size blocks (0 = drain all)
|
||||
uint32_t to_drain = (batch_size == 0) ? avail : (avail < batch_size ? avail : batch_size);
|
||||
uint32_t drained = 0;
|
||||
|
||||
// Debug logging
|
||||
static int g_debug = -1;
|
||||
if (__builtin_expect(g_debug == -1, 0)) {
|
||||
const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_DEBUG");
|
||||
g_debug = (env && *env && *env != '0') ? 1 : 0;
|
||||
}
|
||||
|
||||
if (g_debug) {
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] START: class=%d avail=%u to_drain=%u\n",
|
||||
class_idx, avail, to_drain);
|
||||
}
|
||||
|
||||
// External functions needed for drain
|
||||
extern SuperSlab* hak_super_lookup(void* ptr); // SuperSlab registry lookup
|
||||
extern uint32_t tiny_self_u32(void); // Thread ID (from tiny_superslab_free.inc.h:127)
|
||||
extern size_t g_tiny_class_sizes[TINY_NUM_CLASSES]; // Block sizes
|
||||
|
||||
// Get thread ID once (used for all blocks)
|
||||
uint32_t my_tid = tiny_self_u32();
|
||||
|
||||
// Drain loop: Pop blocks from TLS SLL and push to slab freelist
|
||||
for (uint32_t i = 0; i < to_drain; i++) {
|
||||
void* base = NULL;
|
||||
if (!tls_sll_pop(class_idx, &base)) {
|
||||
// TLS SLL exhausted (concurrent drain or count mismatch)
|
||||
break;
|
||||
}
|
||||
|
||||
// Resolve SuperSlab/Slab (like slow path does)
|
||||
SuperSlab* ss = hak_super_lookup(base);
|
||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
|
||||
// Invalid SuperSlab - skip this block
|
||||
if (g_debug) {
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid SuperSlab)\n",
|
||||
class_idx, base);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get slab index
|
||||
int slab_idx = slab_index_for(ss, base);
|
||||
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
|
||||
// Invalid slab index - skip this block
|
||||
if (g_debug) {
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid slab_idx=%d)\n",
|
||||
class_idx, base, slab_idx);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get slab metadata
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
|
||||
// Convert BASE → USER pointer (add 1 byte header offset)
|
||||
// Phase E1: ALL classes (C0-C7) have 1-byte header
|
||||
void* user_ptr = (char*)base + 1;
|
||||
|
||||
// Call tiny_free_local_box() to:
|
||||
// 1. Push block to slab freelist
|
||||
// 2. Decrement meta->used (THIS IS THE KEY!)
|
||||
// 3. Check if slab becomes empty (meta->used == 0)
|
||||
// 4. If empty, release slab → SuperSlab → LRU cache
|
||||
tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid);
|
||||
|
||||
drained++;
|
||||
|
||||
// Debug: Log when used reaches 0 (slab becomes empty)
|
||||
if (g_debug && meta->used == 0) {
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] EMPTY: class=%d ss=%p slab=%d (meta->used=0)\n",
|
||||
class_idx, (void*)ss, slab_idx);
|
||||
}
|
||||
}
|
||||
|
||||
if (g_debug && drained > 0) {
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN] END: class=%d drained=%u remaining=%u\n",
|
||||
class_idx, drained, g_tls_sll_count[class_idx]);
|
||||
}
|
||||
|
||||
// Update stats
|
||||
g_tls_sll_drain_total_calls++;
|
||||
g_tls_sll_drain_total_blocks += drained;
|
||||
|
||||
return drained;
|
||||
}
|
||||
|
||||
// ========== Drain Trigger (Called from Fast Free Path) ==========
|
||||
|
||||
// Box: Try Drain (with counter trigger)
|
||||
// Purpose: Check drain counter and trigger drain if interval reached
|
||||
//
|
||||
// Flow:
|
||||
// 1. Increment drain counter for this class
|
||||
// 2. If counter >= interval, trigger drain and reset counter
|
||||
// 3. Otherwise, do nothing (fast path continues)
|
||||
//
|
||||
// Args:
|
||||
// class_idx: Size class that was just freed
|
||||
//
|
||||
// Returns: Number of blocks drained (0 if no drain)
|
||||
static inline uint32_t tiny_tls_sll_try_drain(int class_idx) {
|
||||
// Check if drain is enabled
|
||||
if (__builtin_expect(!tls_sll_drain_is_enabled(), 0)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Increment counter
|
||||
g_tls_sll_drain_counter[class_idx]++;
|
||||
|
||||
// Check if interval reached
|
||||
uint32_t interval = tls_sll_drain_get_interval();
|
||||
if (__builtin_expect(g_tls_sll_drain_counter[class_idx] >= interval, 0)) {
|
||||
// Trigger drain (drain ~32 blocks for now, tune later)
|
||||
uint32_t drained = tiny_tls_sll_drain(class_idx, 32);
|
||||
|
||||
// Reset counter
|
||||
g_tls_sll_drain_counter[class_idx] = 0;
|
||||
|
||||
return drained;
|
||||
}
|
||||
|
||||
return 0; // No drain triggered
|
||||
}
|
||||
|
||||
// ========== Debug Stats (Destructor) ==========
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static void tls_sll_drain_print_stats(void) __attribute__((destructor));
|
||||
static void tls_sll_drain_print_stats(void) {
|
||||
if (g_tls_sll_drain_total_calls > 0) {
|
||||
fprintf(stderr, "[TLS_SLL_DRAIN_STATS] Total drains: %lu, Total blocks: %lu, Avg: %.2f\n",
|
||||
g_tls_sll_drain_total_calls,
|
||||
g_tls_sll_drain_total_blocks,
|
||||
(double)g_tls_sll_drain_total_blocks / g_tls_sll_drain_total_calls);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -19,6 +19,7 @@
|
||||
#include "hakmem_build_flags.h"
|
||||
#include "hakmem_tiny_config.h" // For TINY_TLS_MAG_CAP, TINY_NUM_CLASSES
|
||||
#include "box/tls_sll_box.h" // Box TLS-SLL API
|
||||
#include "box/tls_sll_drain_box.h" // Box TLS-SLL Drain (Option B)
|
||||
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
|
||||
|
||||
// Phase 7: Header-based ultra-fast free
|
||||
@ -136,6 +137,13 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Option B: Periodic TLS SLL Drain (restore slab accounting consistency)
|
||||
// Purpose: Every N frees (default: 1024), drain TLS SLL → slab freelist
|
||||
// Impact: Enables empty detection → SuperSlabs freed → LRU cache functional
|
||||
// Cost: 2-3 cycles (counter increment + comparison, predict-not-taken)
|
||||
// Benefit: +1,300-1,700% throughput (563K → 8-10M ops/s expected)
|
||||
tiny_tls_sll_try_drain(class_idx);
|
||||
|
||||
return 1; // Success - handled in fast path
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user