// tiny_alloc_fast_sfc.inc.h - Box 5-NEW: Super Front Cache (SFC) // Purpose: Ultra-fast TLS cache with 128-256 slots (vs old 16 slots) // Performance: 3-4 instruction fast path, 95%+ hit rate, refill -98.5% // Box Theory: Clear ownership, same-thread only, A/B testable #pragma once #include #include #include // For debug output (getenv, fprintf, stderr) #include // For getenv #include "hakmem_tiny.h" #include "box/tiny_next_ptr_box.h" // Phase E1-CORRECT: unified next pointer API // ============================================================================ // Box 5-NEW: Super Front Cache - Global Config // ============================================================================ // Default capacities (can be overridden per-class) #define SFC_DEFAULT_CAPACITY 256 #define SFC_DEFAULT_REFILL_COUNT 128 #define SFC_DEFAULT_SPILL_THRESH 90 // Spill when >90% full // Per-class capacity limits #define SFC_MIN_CAPACITY 16 #define SFC_MAX_CAPACITY 256 // ============================================================================ // Box 5-NEW: Super Front Cache - TLS Data Structures // ============================================================================ // TLS arrays (one per class, zero-initialized at thread start) extern __thread void* g_sfc_head[TINY_NUM_CLASSES]; // Head of linked list extern __thread uint32_t g_sfc_count[TINY_NUM_CLASSES]; // Current count extern uint32_t g_sfc_capacity[TINY_NUM_CLASSES]; // Target capacity (shared, read-only) // ============================================================================ // Box 5-NEW: Super Front Cache - Statistics (optional, compile-time gated) // ============================================================================ #if HAKMEM_DEBUG_COUNTERS typedef struct { // Fast path metrics uint64_t alloc_hits; // Fast path hits (3-4 inst) uint64_t alloc_misses; // Fast path misses → refill uint64_t free_hits; // Push success uint64_t free_full; // Push failed (full) → spill // Slow path metrics uint64_t refill_calls; // Refill invocations uint64_t refill_blocks; // Total blocks refilled uint64_t spill_calls; // Spill invocations uint64_t spill_blocks; // Total blocks spilled // Learning metrics (Phase 3) uint64_t alloc_window; // Allocs in current window uint64_t miss_window; // Misses in current window double miss_rate; // miss_window / alloc_window int hotness; // 0=cold, 1=warm, 2=hot, 3=scorching // Adaptive config (Phase 3) int adaptive_capacity; // Current capacity (16-256) int adaptive_refill; // Current refill count (16-128) } sfc_stats_t; extern sfc_stats_t g_sfc_stats[TINY_NUM_CLASSES]; #endif // HAKMEM_DEBUG_COUNTERS // ============================================================================ // Box 5-NEW: Super Front Cache - Fast Path (3-4 instructions) // ============================================================================ // Alloc: Pop from SFC cache (inline for zero-cost) // Returns: pointer on success, NULL on miss // Contract: Caller owns returned pointer // Invariants: count ≥ 0, all pointers belong to correct class static inline void* sfc_alloc(int cls) { void* base = g_sfc_head[cls]; if (__builtin_expect(base != NULL, 1)) { // Phase E1-CORRECT: Use Box API for next pointer read g_sfc_head[cls] = tiny_next_read(cls, base); g_sfc_count[cls]--; // count-- #if HAKMEM_DEBUG_COUNTERS g_sfc_stats[cls].alloc_hits++; #endif return base; // 🚀 SFC HIT! (returns base) } #if HAKMEM_DEBUG_COUNTERS g_sfc_stats[cls].alloc_misses++; #endif return NULL; // Miss → caller should refill } // Free: Push to SFC cache (inline for zero-cost) // Returns: 1 on success (cached), 0 on full (caller should spill) // Contract: ptr must belong to cls, same-thread only // Invariants: count ≤ capacity, linked list integrity static inline int sfc_free_push(int cls, void* ptr) { uint32_t cap = g_sfc_capacity[cls]; uint32_t cnt = g_sfc_count[cls]; #if !HAKMEM_BUILD_RELEASE && defined(HAKMEM_SFC_DEBUG_LOG) // Debug logging (compile-time gated; zero cost in release) do { static __thread int free_debug_count = 0; extern int g_sfc_debug; if (__builtin_expect(g_sfc_debug, 0) && free_debug_count < 20) { free_debug_count++; extern int g_sfc_enabled; fprintf(stderr, "[SFC_FREE_PUSH] cls=%d, ptr=%p, cnt=%u, cap=%u, will_succeed=%d, enabled=%d\n", cls, ptr, cnt, cap, (cnt < cap), g_sfc_enabled); } } while(0); #endif if (__builtin_expect(cnt < cap, 1)) { // Phase E1-CORRECT: Use Box API for next pointer write tiny_next_write(cls, ptr, g_sfc_head[cls]); g_sfc_head[cls] = ptr; // head = base g_sfc_count[cls] = cnt + 1; // count++ #if HAKMEM_DEBUG_COUNTERS g_sfc_stats[cls].free_hits++; #endif return 1; // Success } #if HAKMEM_DEBUG_COUNTERS g_sfc_stats[cls].free_full++; #endif return 0; // Full → caller should spill } // ============================================================================ // Box 5-NEW: Super Front Cache - Public API (slow path, not inline) // ============================================================================ // Initialize SFC (called once at startup) void sfc_init(void); void sfc_cascade_from_tls_initial(void); // Shutdown SFC (called at exit, optional) void sfc_shutdown(void); // Refill: Batch refill from backend (Magazine/SuperSlab) // Returns: number of blocks refilled (0 on failure) // Contract: Transfers ownership from backend to SFC int sfc_refill(int cls, int target_count); // Spill: Batch spill to backend when cache too full // Returns: number of blocks spilled // Contract: Transfers ownership from SFC to backend int sfc_spill(int cls, int spill_count); // ============================================================================ // Box 5-NEW: Super Front Cache - Configuration (tuning) // ============================================================================ typedef struct { int capacity; // Target capacity (128-256) int refill_count; // Batch refill size (64-128) int spill_thresh; // Spill when count > capacity * thresh% } sfc_config_t; sfc_config_t sfc_get_config(int cls); void sfc_set_config(int cls, sfc_config_t cfg); // ============================================================================ // Box 5-NEW: Super Front Cache - Statistics (debugging) // ============================================================================ #if HAKMEM_DEBUG_COUNTERS sfc_stats_t sfc_get_stats(int cls); void sfc_reset_stats(int cls); void sfc_print_stats(void); // Human-readable dump #endif // HAKMEM_DEBUG_COUNTERS // ============================================================================ // Box 5-NEW: Super Front Cache - Feature Flag (A/B testing) // ============================================================================ // Global enable flag (set via ENV: HAKMEM_SFC_ENABLE) extern int g_sfc_enabled; // ============================================================================ // Box 5-NEW: Super Front Cache - Box Theory Compliance // ============================================================================ // Invariants (enforced by design): // - INVARIANT 1: g_sfc_count[cls] <= g_sfc_capacity[cls] (always) // - INVARIANT 2: All pointers in cache belong to correct class (caller responsibility) // - INVARIANT 3: SFC contains only same-thread allocations (TLS) // - INVARIANT 4: Linked list integrity (*ptr points to valid node or NULL) // - INVARIANT 5: g_sfc_count[cls] >= 0 (always) // Ownership Rules: // - SFC owns: Cached pointers in g_sfc_head[cls] linked list // - Transfer IN: sfc_refill() from Magazine/SuperSlab (batch) // - Transfer OUT: sfc_alloc() to application (single) // - Return: sfc_free_push() from application (single) // - Spill: sfc_spill() to Magazine/SuperSlab (batch) // Boundaries: // - Minimal coupling: Reuses existing sll_refill_small_from_ss() // - Fallback: Old Box 5 (TLS SLL 16 slots) remains for backward compat // - A/B testable: HAKMEM_SFC_ENABLE=0/1 switches between old/new // ============================================================================ // End of tiny_alloc_fast_sfc.inc.h // ============================================================================