Files
hakmem/core/front/tiny_heap_v2.h
Moe Charm (CI) d72a700948 Phase 13-B: TinyHeapV2 free path supply hook (magazine population)
Implement magazine supply from free path to enable TinyHeapV2 L0 cache

Changes:
1. core/tiny_free_fast_v2.inc.h (Line 24, 134-143):
   - Include tiny_heap_v2.h for magazine API
   - Add supply hook after BASE pointer conversion (Line 134-143)
   - Try to push freed block to TinyHeapV2 magazine (C0-C3 only)
   - Falls back to TLS SLL if magazine full (existing behavior)

2. core/front/tiny_heap_v2.h (Line 24-46):
   - Move TinyHeapV2Mag / TinyHeapV2Stats typedef from hakmem_tiny.c
   - Add extern declarations for TLS variables
   - Define TINY_HEAP_V2_MAG_CAP (16 slots)
   - Enables use from tiny_free_fast_v2.inc.h

3. core/hakmem_tiny.c (Line 1270-1276, 1766-1768):
   - Remove duplicate typedef definitions
   - Move TLS storage declarations after tiny_heap_v2.h include
   - Reason: tiny_heap_v2.h must be included AFTER tiny_alloc_fast.inc.h
   - Forward declarations remain for early reference

Supply Hook Flow:
```
hak_free_at(ptr) → hak_tiny_free_fast_v2(ptr)
  → class_idx = read_header(ptr)
  → base = ptr - 1
  → if (class_idx <= 3 && tiny_heap_v2_enabled())
      → tiny_heap_v2_try_push(class_idx, base)
        → success: return (magazine supplied)
        → full: fall through to TLS SLL
  → tls_sll_push(class_idx, base)  # existing path
```

Benefits:
- Magazine gets populated from freed blocks (L0 cache warm-up)
- Next allocation hits magazine (fast L0 path, no backend refill)
- Expected: 70-90% hit rate for fixed-size workloads
- Expected: +200-500% performance for C0-C3 classes

Build & Smoke Test:
-  Build successful
-  bench_fixed_size 256B workset=50: 33M ops/s (stable)
-  bench_fixed_size 16B workset=60: 30M ops/s (stable)
- 🔜 A/B test (hit rate measurement) deferred to next commit

Implementation Status:
-  Phase 13-A: Alloc hook + stats (completed, committed)
-  Phase 13-B: Free path supply (THIS COMMIT)
- 🔜 Phase 13-C: Evaluation & tuning

Notes:
- Supply hook is C0-C3 only (TinyHeapV2 target range)
- Magazine capacity=16 (same as Phase 13-A)
- No performance regression (hook is ENV-gated: HAKMEM_TINY_HEAP_V2=1)

🤝 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 13:39:37 +09:00

194 lines
7.1 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_heap_v2.h - Tiny per-thread heap (experimental Box)
// Purpose:
// - Provide a very simple per-thread front for tiny allocations.
// - Currently targets small classes (C0C3) and is gated by ENV:
// HAKMEM_TINY_HEAP_V2=1
// - Backend remains existing FastCache + Superslab refill.
//
// Design (first pass):
// - Per-thread, per-class small magazine (L0) in front of FastCache.
// - On alloc:
// 1) Pop from magazine.
// 2) If empty, refill magazine from FastCache (and backend via tiny_alloc_fast_refill).
// - On free: still goes through existing free path (hak_tiny_free_fast_v2),
// which ultimately feeds TLS SLL / drain / Superslab.
//
// This Box is intentionally minimal; performance tuning (sizes, class set)
// is left for later phases.
#ifndef HAK_FRONT_TINY_HEAP_V2_H
#define HAK_FRONT_TINY_HEAP_V2_H
#include "../hakmem_tiny.h"
// Phase 13-B: Magazine capacity (same as Phase 13-A)
#ifndef TINY_HEAP_V2_MAG_CAP
#define TINY_HEAP_V2_MAG_CAP 16
#endif
// TinyHeapV2 Magazine (per-thread, per-class)
typedef struct {
void* items[TINY_HEAP_V2_MAG_CAP];
int top;
} TinyHeapV2Mag;
// TinyHeapV2 Statistics (per-thread, per-class)
typedef struct {
uint64_t alloc_calls;
uint64_t mag_hits;
uint64_t refill_calls;
uint64_t refill_blocks;
uint64_t backend_oom;
} TinyHeapV2Stats;
// External TLS variables (defined in hakmem_tiny.c)
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
// Enable flag (cached)
static inline int tiny_heap_v2_enabled(void) {
static int g_enable = -1;
static int g_first_call = 1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2");
g_enable = (e && *e && *e != '0') ? 1 : 0;
fprintf(stderr, "[HeapV2-INIT] tiny_heap_v2_enabled() called: ENV='%s' → %d\n",
e ? e : "(null)", g_enable);
fflush(stderr);
}
if (g_first_call && g_enable) {
fprintf(stderr, "[HeapV2-FIRST] Returning enabled=%d\n", g_enable);
fflush(stderr);
g_first_call = 0;
}
return g_enable;
}
// Class-specific enable mask (cached)
// ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK (bitmask: bit 0=C0, bit 1=C1, bit 2=C2, bit 3=C3)
// Default: 0xF (all classes C0-C3 enabled)
// Example: 0x2 = C1 only, 0x8 = C3 only, 0x6 = C1+C2
static inline int tiny_heap_v2_class_enabled(int class_idx) {
static int g_class_mask = -1;
if (__builtin_expect(g_class_mask == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_CLASS_MASK");
if (e && *e) {
// Parse hex or decimal
char* endptr;
long val = strtol(e, &endptr, 0); // 0 = auto-detect base (0x for hex, else decimal)
g_class_mask = (int)val;
} else {
g_class_mask = 0xF; // Default: C0-C3 all enabled
}
}
if (class_idx < 0 || class_idx >= 8) return 0;
return (g_class_mask & (1 << class_idx)) != 0;
}
// NOTE: This header MUST be included AFTER tiny_alloc_fast.inc.h!
// It uses fastcache_pop, tiny_alloc_fast_refill, hak_tiny_size_to_class which are
// static inline functions defined in tiny_alloc_fast.inc.h and related headers.
// Phase 13-A Step 1: NO REFILL (avoid circular dependency)
// TinyHeapV2 is a "lucky hit" L0 cache that doesn't refill itself.
// Refill will come from existing front layers later (outside TinyHeapV2).
// This function is currently a no-op stub for future use.
static inline int tiny_heap_v2_refill_mag(int class_idx) {
(void)class_idx;
// NO-OP: Do not refill to avoid circular dependency with FastCache
return 0;
}
// Phase 13-A Step 2: Try to push a block into TinyHeapV2 magazine
// Called from free path to supply magazine with "leftover" blocks.
// Returns: 1 if pushed successfully, 0 if magazine is full
static inline int tiny_heap_v2_try_push(int class_idx, void* base) {
// 1. Check if class is enabled
if (class_idx < 0 || class_idx > 3) return 0;
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// 2. Check if magazine has room
if (mag->top >= TINY_HEAP_V2_MAG_CAP) {
return 0; // Magazine full
}
// 3. Push BASE pointer into magazine
mag->items[mag->top++] = base;
// DEBUG: Log push events
static int g_push_dbg = -1;
if (g_push_dbg == -1) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
g_push_dbg = (e && *e && *e != '0') ? 1 : 0;
}
if (g_push_dbg) {
static __thread int g_push_count[TINY_NUM_CLASSES] = {0};
if (g_push_count[class_idx] < 5) {
fprintf(stderr, "[HeapV2-PUSH] C%d push #%d, base=%p, mag->top=%d\n",
class_idx, g_push_count[class_idx]++, base, mag->top);
}
}
return 1; // Success
}
// Tiny heap v2 alloc returns BASE pointer or NULL.
// Phase 13-A Step 1: Minimal "lucky hit" L0 cache (NO REFILL)
// Strategy: Pop from magazine if available, else return NULL immediately.
// Caller is responsible for header write via HAK_RET_ALLOC (BASE → USER conversion).
// Contract:
// - Only handles class 0-3 (8-64B) based on CLASS_MASK
// - Returns BASE pointer (not USER pointer!)
// - Returns NULL if magazine empty (caller falls back to existing path)
static inline void* tiny_heap_v2_alloc(size_t size) {
// 1. Size → class index
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0, 0)) {
return NULL; // Not a tiny size
}
// 2. Limit to hot tiny classes (0..3) for now
if (class_idx > 3) {
return NULL; // Fall back to existing path for class 4-7
}
// 3. Check class-specific enable mask
if (__builtin_expect(!tiny_heap_v2_class_enabled(class_idx), 0)) {
return NULL; // Class disabled via HAKMEM_TINY_HEAP_V2_CLASS_MASK
}
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
// Debug: Print first few allocs
static __thread int g_debug_count[TINY_NUM_CLASSES] = {0};
if (g_debug_count[class_idx] < 3) {
const char* debug_env = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
if (debug_env && *debug_env && *debug_env != '0') {
fprintf(stderr, "[HeapV2-DEBUG] C%d alloc #%d (total_allocs=%lu)\n",
class_idx, g_debug_count[class_idx]++, g_tiny_heap_v2_stats[class_idx].alloc_calls);
}
}
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// 4. ONLY path: pop from magazine if available (lucky hit!)
if (__builtin_expect(mag->top > 0, 0)) { // Expect miss (unlikely hit)
g_tiny_heap_v2_stats[class_idx].mag_hits++;
void* base = mag->items[--mag->top];
return base; // BASE pointer (caller will convert to USER)
}
// 5. Magazine empty: return NULL immediately (NO REFILL)
// Let existing front layers handle this allocation.
return NULL;
}
// Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1)
// Declaration only (implementation in hakmem_tiny.c for external linkage)
void tiny_heap_v2_print_stats(void);
#endif // HAK_FRONT_TINY_HEAP_V2_H