Phase 13-B: TinyHeapV2 free path supply hook (magazine population)
Implement magazine supply from free path to enable TinyHeapV2 L0 cache
Changes:
1. core/tiny_free_fast_v2.inc.h (Line 24, 134-143):
- Include tiny_heap_v2.h for magazine API
- Add supply hook after BASE pointer conversion (Line 134-143)
- Try to push freed block to TinyHeapV2 magazine (C0-C3 only)
- Falls back to TLS SLL if magazine full (existing behavior)
2. core/front/tiny_heap_v2.h (Line 24-46):
- Move TinyHeapV2Mag / TinyHeapV2Stats typedef from hakmem_tiny.c
- Add extern declarations for TLS variables
- Define TINY_HEAP_V2_MAG_CAP (16 slots)
- Enables use from tiny_free_fast_v2.inc.h
3. core/hakmem_tiny.c (Line 1270-1276, 1766-1768):
- Remove duplicate typedef definitions
- Move TLS storage declarations after tiny_heap_v2.h include
- Reason: tiny_heap_v2.h must be included AFTER tiny_alloc_fast.inc.h
- Forward declarations remain for early reference
Supply Hook Flow:
```
hak_free_at(ptr) → hak_tiny_free_fast_v2(ptr)
→ class_idx = read_header(ptr)
→ base = ptr - 1
→ if (class_idx <= 3 && tiny_heap_v2_enabled())
→ tiny_heap_v2_try_push(class_idx, base)
→ success: return (magazine supplied)
→ full: fall through to TLS SLL
→ tls_sll_push(class_idx, base) # existing path
```
Benefits:
- Magazine gets populated from freed blocks (L0 cache warm-up)
- Next allocation hits magazine (fast L0 path, no backend refill)
- Expected: 70-90% hit rate for fixed-size workloads
- Expected: +200-500% performance for C0-C3 classes
Build & Smoke Test:
- ✅ Build successful
- ✅ bench_fixed_size 256B workset=50: 33M ops/s (stable)
- ✅ bench_fixed_size 16B workset=60: 30M ops/s (stable)
- 🔜 A/B test (hit rate measurement) deferred to next commit
Implementation Status:
- ✅ Phase 13-A: Alloc hook + stats (completed, committed)
- ✅ Phase 13-B: Free path supply (THIS COMMIT)
- 🔜 Phase 13-C: Evaluation & tuning
Notes:
- Supply hook is C0-C3 only (TinyHeapV2 target range)
- Magazine capacity=16 (same as Phase 13-A)
- No performance regression (hook is ENV-gated: HAKMEM_TINY_HEAP_V2=1)
🤝 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -21,8 +21,29 @@
|
||||
|
||||
#include "../hakmem_tiny.h"
|
||||
|
||||
// NOTE: TinyHeapV2Mag struct and g_tiny_heap_v2_mag are defined in hakmem_tiny.c
|
||||
// This header provides only the implementations (static inline functions).
|
||||
// Phase 13-B: Magazine capacity (same as Phase 13-A)
|
||||
#ifndef TINY_HEAP_V2_MAG_CAP
|
||||
#define TINY_HEAP_V2_MAG_CAP 16
|
||||
#endif
|
||||
|
||||
// TinyHeapV2 Magazine (per-thread, per-class)
|
||||
typedef struct {
|
||||
void* items[TINY_HEAP_V2_MAG_CAP];
|
||||
int top;
|
||||
} TinyHeapV2Mag;
|
||||
|
||||
// TinyHeapV2 Statistics (per-thread, per-class)
|
||||
typedef struct {
|
||||
uint64_t alloc_calls;
|
||||
uint64_t mag_hits;
|
||||
uint64_t refill_calls;
|
||||
uint64_t refill_blocks;
|
||||
uint64_t backend_oom;
|
||||
} TinyHeapV2Stats;
|
||||
|
||||
// External TLS variables (defined in hakmem_tiny.c)
|
||||
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
|
||||
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
||||
|
||||
// Enable flag (cached)
|
||||
static inline int tiny_heap_v2_enabled(void) {
|
||||
|
||||
@ -1267,34 +1267,9 @@ static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES];
|
||||
int g_quick_enable = 0; // HAKMEM_TINY_QUICK=1
|
||||
__thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below
|
||||
|
||||
// Phase 13: Tiny Heap v2 - Forward declarations (implementations come after tiny_alloc_fast.inc.h)
|
||||
// This allows tiny_alloc_fast.inc.h to call these functions.
|
||||
|
||||
// Very small per-class magazine for tiny sizes (C0–C3)
|
||||
#ifndef TINY_HEAP_V2_MAG_CAP
|
||||
#define TINY_HEAP_V2_MAG_CAP 16
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
void* items[TINY_HEAP_V2_MAG_CAP];
|
||||
int top;
|
||||
} TinyHeapV2Mag;
|
||||
|
||||
// TLS magazines per class
|
||||
__thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
|
||||
|
||||
// Phase 13-A: Observability counters (per-thread, per-class)
|
||||
typedef struct {
|
||||
uint64_t alloc_calls; // Total alloc attempts via HeapV2
|
||||
uint64_t mag_hits; // Magazine had blocks (fast path)
|
||||
uint64_t refill_calls; // Refill invocations
|
||||
uint64_t refill_blocks; // Total blocks obtained from refills
|
||||
uint64_t backend_oom; // Backend OOM (refill returned 0)
|
||||
} TinyHeapV2Stats;
|
||||
|
||||
__thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
||||
|
||||
// Forward declarations
|
||||
// Phase 13: Tiny Heap v2 - Forward declarations
|
||||
// NOTE: TLS storage declarations moved to after tiny_heap_v2.h include (Line ~1770)
|
||||
// Reason: tiny_heap_v2.h must be included AFTER tiny_alloc_fast.inc.h
|
||||
static inline int tiny_heap_v2_enabled(void);
|
||||
static inline int tiny_heap_v2_class_enabled(int class_idx);
|
||||
static inline int tiny_heap_v2_refill_mag(int class_idx);
|
||||
@ -1788,6 +1763,10 @@ TinySlab* hak_tiny_owner_slab(void* ptr) {
|
||||
// Phase 13: Tiny Heap v2 front (must come AFTER tiny_alloc_fast.inc.h)
|
||||
#include "front/tiny_heap_v2.h"
|
||||
|
||||
// Phase 13: Tiny Heap v2 - TLS storage (types defined in tiny_heap_v2.h above)
|
||||
__thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
|
||||
__thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
||||
|
||||
// Box 6: Free Fast Path (Layer 2 - 2-3 instructions)
|
||||
#include "tiny_free_fast.inc.h"
|
||||
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
#include "box/tls_sll_box.h" // Box TLS-SLL API
|
||||
#include "box/tls_sll_drain_box.h" // Box TLS-SLL Drain (Option B)
|
||||
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
|
||||
#include "front/tiny_heap_v2.h" // Phase 13-B: TinyHeapV2 magazine supply
|
||||
|
||||
// Phase 7: Header-based ultra-fast free
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
@ -130,6 +131,17 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
// Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1
|
||||
void* base = (char*)ptr - 1;
|
||||
|
||||
// Phase 13-B: TinyHeapV2 magazine supply (C0-C3 only)
|
||||
// Try to supply to magazine first (L0 cache, faster than TLS SLL)
|
||||
// Falls back to TLS SLL if magazine is full
|
||||
if (class_idx <= 3 && tiny_heap_v2_enabled()) {
|
||||
if (tiny_heap_v2_try_push(class_idx, base)) {
|
||||
// Successfully supplied to magazine
|
||||
return 1;
|
||||
}
|
||||
// Magazine full → fall through to TLS SLL
|
||||
}
|
||||
|
||||
// REVERT E3-2: Use Box TLS-SLL for all builds (testing hypothesis)
|
||||
// Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs
|
||||
if (!tls_sll_push(class_idx, base, UINT32_MAX)) {
|
||||
|
||||
Reference in New Issue
Block a user