From d72a700948d1ab9a2d2ccf974dd1d289a72127ed Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Sat, 15 Nov 2025 13:39:37 +0900 Subject: [PATCH] Phase 13-B: TinyHeapV2 free path supply hook (magazine population) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement magazine supply from free path to enable TinyHeapV2 L0 cache Changes: 1. core/tiny_free_fast_v2.inc.h (Line 24, 134-143): - Include tiny_heap_v2.h for magazine API - Add supply hook after BASE pointer conversion (Line 134-143) - Try to push freed block to TinyHeapV2 magazine (C0-C3 only) - Falls back to TLS SLL if magazine full (existing behavior) 2. core/front/tiny_heap_v2.h (Line 24-46): - Move TinyHeapV2Mag / TinyHeapV2Stats typedef from hakmem_tiny.c - Add extern declarations for TLS variables - Define TINY_HEAP_V2_MAG_CAP (16 slots) - Enables use from tiny_free_fast_v2.inc.h 3. core/hakmem_tiny.c (Line 1270-1276, 1766-1768): - Remove duplicate typedef definitions - Move TLS storage declarations after tiny_heap_v2.h include - Reason: tiny_heap_v2.h must be included AFTER tiny_alloc_fast.inc.h - Forward declarations remain for early reference Supply Hook Flow: ``` hak_free_at(ptr) → hak_tiny_free_fast_v2(ptr) → class_idx = read_header(ptr) → base = ptr - 1 → if (class_idx <= 3 && tiny_heap_v2_enabled()) → tiny_heap_v2_try_push(class_idx, base) → success: return (magazine supplied) → full: fall through to TLS SLL → tls_sll_push(class_idx, base) # existing path ``` Benefits: - Magazine gets populated from freed blocks (L0 cache warm-up) - Next allocation hits magazine (fast L0 path, no backend refill) - Expected: 70-90% hit rate for fixed-size workloads - Expected: +200-500% performance for C0-C3 classes Build & Smoke Test: - ✅ Build successful - ✅ bench_fixed_size 256B workset=50: 33M ops/s (stable) - ✅ bench_fixed_size 16B workset=60: 30M ops/s (stable) - 🔜 A/B test (hit rate measurement) deferred to next commit Implementation Status: - ✅ Phase 13-A: Alloc hook + stats (completed, committed) - ✅ Phase 13-B: Free path supply (THIS COMMIT) - 🔜 Phase 13-C: Evaluation & tuning Notes: - Supply hook is C0-C3 only (TinyHeapV2 target range) - Magazine capacity=16 (same as Phase 13-A) - No performance regression (hook is ENV-gated: HAKMEM_TINY_HEAP_V2=1) 🤝 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- core/front/tiny_heap_v2.h | 25 +++++++++++++++++++++++-- core/hakmem_tiny.c | 35 +++++++---------------------------- core/tiny_free_fast_v2.inc.h | 12 ++++++++++++ 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/core/front/tiny_heap_v2.h b/core/front/tiny_heap_v2.h index 38623dc4..b8c80632 100644 --- a/core/front/tiny_heap_v2.h +++ b/core/front/tiny_heap_v2.h @@ -21,8 +21,29 @@ #include "../hakmem_tiny.h" -// NOTE: TinyHeapV2Mag struct and g_tiny_heap_v2_mag are defined in hakmem_tiny.c -// This header provides only the implementations (static inline functions). +// Phase 13-B: Magazine capacity (same as Phase 13-A) +#ifndef TINY_HEAP_V2_MAG_CAP +#define TINY_HEAP_V2_MAG_CAP 16 +#endif + +// TinyHeapV2 Magazine (per-thread, per-class) +typedef struct { + void* items[TINY_HEAP_V2_MAG_CAP]; + int top; +} TinyHeapV2Mag; + +// TinyHeapV2 Statistics (per-thread, per-class) +typedef struct { + uint64_t alloc_calls; + uint64_t mag_hits; + uint64_t refill_calls; + uint64_t refill_blocks; + uint64_t backend_oom; +} TinyHeapV2Stats; + +// External TLS variables (defined in hakmem_tiny.c) +extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES]; +extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES]; // Enable flag (cached) static inline int tiny_heap_v2_enabled(void) { diff --git a/core/hakmem_tiny.c b/core/hakmem_tiny.c index 8f92e3de..b5392d09 100644 --- a/core/hakmem_tiny.c +++ b/core/hakmem_tiny.c @@ -1267,34 +1267,9 @@ static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES]; int g_quick_enable = 0; // HAKMEM_TINY_QUICK=1 __thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below -// Phase 13: Tiny Heap v2 - Forward declarations (implementations come after tiny_alloc_fast.inc.h) -// This allows tiny_alloc_fast.inc.h to call these functions. - -// Very small per-class magazine for tiny sizes (C0–C3) -#ifndef TINY_HEAP_V2_MAG_CAP -#define TINY_HEAP_V2_MAG_CAP 16 -#endif - -typedef struct { - void* items[TINY_HEAP_V2_MAG_CAP]; - int top; -} TinyHeapV2Mag; - -// TLS magazines per class -__thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES]; - -// Phase 13-A: Observability counters (per-thread, per-class) -typedef struct { - uint64_t alloc_calls; // Total alloc attempts via HeapV2 - uint64_t mag_hits; // Magazine had blocks (fast path) - uint64_t refill_calls; // Refill invocations - uint64_t refill_blocks; // Total blocks obtained from refills - uint64_t backend_oom; // Backend OOM (refill returned 0) -} TinyHeapV2Stats; - -__thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES]; - -// Forward declarations +// Phase 13: Tiny Heap v2 - Forward declarations +// NOTE: TLS storage declarations moved to after tiny_heap_v2.h include (Line ~1770) +// Reason: tiny_heap_v2.h must be included AFTER tiny_alloc_fast.inc.h static inline int tiny_heap_v2_enabled(void); static inline int tiny_heap_v2_class_enabled(int class_idx); static inline int tiny_heap_v2_refill_mag(int class_idx); @@ -1788,6 +1763,10 @@ TinySlab* hak_tiny_owner_slab(void* ptr) { // Phase 13: Tiny Heap v2 front (must come AFTER tiny_alloc_fast.inc.h) #include "front/tiny_heap_v2.h" + // Phase 13: Tiny Heap v2 - TLS storage (types defined in tiny_heap_v2.h above) + __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES]; + __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES]; + // Box 6: Free Fast Path (Layer 2 - 2-3 instructions) #include "tiny_free_fast.inc.h" diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h index 77dde5c9..394c97f9 100644 --- a/core/tiny_free_fast_v2.inc.h +++ b/core/tiny_free_fast_v2.inc.h @@ -21,6 +21,7 @@ #include "box/tls_sll_box.h" // Box TLS-SLL API #include "box/tls_sll_drain_box.h" // Box TLS-SLL Drain (Option B) #include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection +#include "front/tiny_heap_v2.h" // Phase 13-B: TinyHeapV2 magazine supply // Phase 7: Header-based ultra-fast free #if HAKMEM_TINY_HEADER_CLASSIDX @@ -130,6 +131,17 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { // Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1 void* base = (char*)ptr - 1; + // Phase 13-B: TinyHeapV2 magazine supply (C0-C3 only) + // Try to supply to magazine first (L0 cache, faster than TLS SLL) + // Falls back to TLS SLL if magazine is full + if (class_idx <= 3 && tiny_heap_v2_enabled()) { + if (tiny_heap_v2_try_push(class_idx, base)) { + // Successfully supplied to magazine + return 1; + } + // Magazine full → fall through to TLS SLL + } + // REVERT E3-2: Use Box TLS-SLL for all builds (testing hypothesis) // Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs if (!tls_sll_push(class_idx, base, UINT32_MAX)) {