Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids
circular dependency with FastCache by eliminating self-refill.
Key Changes:
- New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation
- tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL
- tiny_heap_v2_refill_mag(): No-op stub (no backend refill)
- ENV: HAKMEM_TINY_HEAP_V2=1 to enable
- ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control)
- ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics
- Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook
- Hook at entry point (after class_idx calculation)
- Fallback to existing front if TinyHeapV2 returns NULL
- Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path
- Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper
- TinyHeapV2Mag: Per-class magazine (capacity=16)
- TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.)
- tiny_heap_v2_print_stats(): Statistics output at exit
- New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification
Root Cause Fixed:
- BEFORE: TinyHeapV2 refilled from FastCache → circular dependency
- TinyHeapV2 intercepted all allocs → FastCache never populated
- Result: 100% backend OOM, 0% hit rate, 99% slowdown
- AFTER: TinyHeapV2 is passive L0 cache (no refill)
- Magazine empty → return NULL → existing front handles it
- Result: 0% overhead, stable baseline performance
A/B Test Results (100K iterations, fixed-size bench):
- C1 (8B): Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%)
- C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%)
- C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%)
- All within noise range: NO PERFORMANCE REGRESSION ✅
Statistics (HeapV2 ON, C1-C3):
- alloc_calls: 200K (hook works correctly)
- mag_hits: 0 (0%) - Magazine empty as expected
- refill_calls: 0 - No refill executed (circular dependency avoided)
- backend_oom: 0 - No backend access
Next Steps (Phase 13-A Step 2):
- Implement magazine supply strategy (from existing front or free path)
- Goal: Populate magazine with "leftover" blocks from existing pipeline
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 01:42:57 +09:00
|
|
|
|
// tiny_heap_v2.h - Tiny per-thread heap (experimental Box)
|
|
|
|
|
|
// Purpose:
|
|
|
|
|
|
// - Provide a very simple per-thread front for tiny allocations.
|
|
|
|
|
|
// - Currently targets small classes (C0–C3) and is gated by ENV:
|
|
|
|
|
|
// HAKMEM_TINY_HEAP_V2=1
|
|
|
|
|
|
// - Backend remains existing FastCache + Superslab refill.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Design (first pass):
|
|
|
|
|
|
// - Per-thread, per-class small magazine (L0) in front of FastCache.
|
|
|
|
|
|
// - On alloc:
|
|
|
|
|
|
// 1) Pop from magazine.
|
|
|
|
|
|
// 2) If empty, refill magazine from FastCache (and backend via tiny_alloc_fast_refill).
|
|
|
|
|
|
// - On free: still goes through existing free path (hak_tiny_free_fast_v2),
|
|
|
|
|
|
// which ultimately feeds TLS SLL / drain / Superslab.
|
|
|
|
|
|
//
|
|
|
|
|
|
// This Box is intentionally minimal; performance tuning (sizes, class set)
|
|
|
|
|
|
// is left for later phases.
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef HAK_FRONT_TINY_HEAP_V2_H
|
|
|
|
|
|
#define HAK_FRONT_TINY_HEAP_V2_H
|
|
|
|
|
|
|
|
|
|
|
|
#include "../hakmem_tiny.h"
|
|
|
|
|
|
|
2025-11-15 13:39:37 +09:00
|
|
|
|
// Phase 13-B: Magazine capacity (same as Phase 13-A)
|
|
|
|
|
|
#ifndef TINY_HEAP_V2_MAG_CAP
|
|
|
|
|
|
#define TINY_HEAP_V2_MAG_CAP 16
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
// TinyHeapV2 Magazine (per-thread, per-class)
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
|
void* items[TINY_HEAP_V2_MAG_CAP];
|
|
|
|
|
|
int top;
|
|
|
|
|
|
} TinyHeapV2Mag;
|
|
|
|
|
|
|
|
|
|
|
|
// TinyHeapV2 Statistics (per-thread, per-class)
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
|
uint64_t alloc_calls;
|
|
|
|
|
|
uint64_t mag_hits;
|
|
|
|
|
|
uint64_t refill_calls;
|
|
|
|
|
|
uint64_t refill_blocks;
|
|
|
|
|
|
uint64_t backend_oom;
|
|
|
|
|
|
} TinyHeapV2Stats;
|
|
|
|
|
|
|
|
|
|
|
|
// External TLS variables (defined in hakmem_tiny.c)
|
|
|
|
|
|
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
|
|
|
|
|
|
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids
circular dependency with FastCache by eliminating self-refill.
Key Changes:
- New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation
- tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL
- tiny_heap_v2_refill_mag(): No-op stub (no backend refill)
- ENV: HAKMEM_TINY_HEAP_V2=1 to enable
- ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control)
- ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics
- Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook
- Hook at entry point (after class_idx calculation)
- Fallback to existing front if TinyHeapV2 returns NULL
- Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path
- Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper
- TinyHeapV2Mag: Per-class magazine (capacity=16)
- TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.)
- tiny_heap_v2_print_stats(): Statistics output at exit
- New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification
Root Cause Fixed:
- BEFORE: TinyHeapV2 refilled from FastCache → circular dependency
- TinyHeapV2 intercepted all allocs → FastCache never populated
- Result: 100% backend OOM, 0% hit rate, 99% slowdown
- AFTER: TinyHeapV2 is passive L0 cache (no refill)
- Magazine empty → return NULL → existing front handles it
- Result: 0% overhead, stable baseline performance
A/B Test Results (100K iterations, fixed-size bench):
- C1 (8B): Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%)
- C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%)
- C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%)
- All within noise range: NO PERFORMANCE REGRESSION ✅
Statistics (HeapV2 ON, C1-C3):
- alloc_calls: 200K (hook works correctly)
- mag_hits: 0 (0%) - Magazine empty as expected
- refill_calls: 0 - No refill executed (circular dependency avoided)
- backend_oom: 0 - No backend access
Next Steps (Phase 13-A Step 2):
- Implement magazine supply strategy (from existing front or free path)
- Goal: Populate magazine with "leftover" blocks from existing pipeline
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 01:42:57 +09:00
|
|
|
|
|
|
|
|
|
|
// Enable flag (cached)
|
|
|
|
|
|
static inline int tiny_heap_v2_enabled(void) {
|
|
|
|
|
|
static int g_enable = -1;
|
|
|
|
|
|
static int g_first_call = 1;
|
|
|
|
|
|
if (__builtin_expect(g_enable == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_HEAP_V2");
|
|
|
|
|
|
g_enable = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
fprintf(stderr, "[HeapV2-INIT] tiny_heap_v2_enabled() called: ENV='%s' → %d\n",
|
|
|
|
|
|
e ? e : "(null)", g_enable);
|
|
|
|
|
|
fflush(stderr);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (g_first_call && g_enable) {
|
|
|
|
|
|
fprintf(stderr, "[HeapV2-FIRST] Returning enabled=%d\n", g_enable);
|
|
|
|
|
|
fflush(stderr);
|
|
|
|
|
|
g_first_call = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
return g_enable;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Class-specific enable mask (cached)
|
|
|
|
|
|
// ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK (bitmask: bit 0=C0, bit 1=C1, bit 2=C2, bit 3=C3)
|
|
|
|
|
|
// Default: 0xF (all classes C0-C3 enabled)
|
|
|
|
|
|
// Example: 0x2 = C1 only, 0x8 = C3 only, 0x6 = C1+C2
|
|
|
|
|
|
static inline int tiny_heap_v2_class_enabled(int class_idx) {
|
|
|
|
|
|
static int g_class_mask = -1;
|
|
|
|
|
|
if (__builtin_expect(g_class_mask == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_HEAP_V2_CLASS_MASK");
|
|
|
|
|
|
if (e && *e) {
|
|
|
|
|
|
// Parse hex or decimal
|
|
|
|
|
|
char* endptr;
|
|
|
|
|
|
long val = strtol(e, &endptr, 0); // 0 = auto-detect base (0x for hex, else decimal)
|
|
|
|
|
|
g_class_mask = (int)val;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
g_class_mask = 0xF; // Default: C0-C3 all enabled
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (class_idx < 0 || class_idx >= 8) return 0;
|
|
|
|
|
|
return (g_class_mask & (1 << class_idx)) != 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// NOTE: This header MUST be included AFTER tiny_alloc_fast.inc.h!
|
|
|
|
|
|
// It uses fastcache_pop, tiny_alloc_fast_refill, hak_tiny_size_to_class which are
|
|
|
|
|
|
// static inline functions defined in tiny_alloc_fast.inc.h and related headers.
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 13-A Step 1: NO REFILL (avoid circular dependency)
|
|
|
|
|
|
// TinyHeapV2 is a "lucky hit" L0 cache that doesn't refill itself.
|
|
|
|
|
|
// Refill will come from existing front layers later (outside TinyHeapV2).
|
|
|
|
|
|
// This function is currently a no-op stub for future use.
|
|
|
|
|
|
static inline int tiny_heap_v2_refill_mag(int class_idx) {
|
|
|
|
|
|
(void)class_idx;
|
|
|
|
|
|
// NO-OP: Do not refill to avoid circular dependency with FastCache
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-15 02:28:26 +09:00
|
|
|
|
// Phase 13-A Step 2: Try to push a block into TinyHeapV2 magazine
|
|
|
|
|
|
// Called from free path to supply magazine with "leftover" blocks.
|
|
|
|
|
|
// Returns: 1 if pushed successfully, 0 if magazine is full
|
|
|
|
|
|
static inline int tiny_heap_v2_try_push(int class_idx, void* base) {
|
|
|
|
|
|
// 1. Check if class is enabled
|
|
|
|
|
|
if (class_idx < 0 || class_idx > 3) return 0;
|
|
|
|
|
|
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
|
|
|
|
|
|
|
|
|
|
|
|
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
|
|
|
|
|
|
|
|
|
|
|
|
// 2. Check if magazine has room
|
|
|
|
|
|
if (mag->top >= TINY_HEAP_V2_MAG_CAP) {
|
|
|
|
|
|
return 0; // Magazine full
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 3. Push BASE pointer into magazine
|
|
|
|
|
|
mag->items[mag->top++] = base;
|
|
|
|
|
|
|
|
|
|
|
|
// DEBUG: Log push events
|
|
|
|
|
|
static int g_push_dbg = -1;
|
|
|
|
|
|
if (g_push_dbg == -1) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
|
|
|
|
|
|
g_push_dbg = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (g_push_dbg) {
|
|
|
|
|
|
static __thread int g_push_count[TINY_NUM_CLASSES] = {0};
|
|
|
|
|
|
if (g_push_count[class_idx] < 5) {
|
|
|
|
|
|
fprintf(stderr, "[HeapV2-PUSH] C%d push #%d, base=%p, mag->top=%d\n",
|
|
|
|
|
|
class_idx, g_push_count[class_idx]++, base, mag->top);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return 1; // Success
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids
circular dependency with FastCache by eliminating self-refill.
Key Changes:
- New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation
- tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL
- tiny_heap_v2_refill_mag(): No-op stub (no backend refill)
- ENV: HAKMEM_TINY_HEAP_V2=1 to enable
- ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control)
- ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics
- Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook
- Hook at entry point (after class_idx calculation)
- Fallback to existing front if TinyHeapV2 returns NULL
- Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path
- Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper
- TinyHeapV2Mag: Per-class magazine (capacity=16)
- TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.)
- tiny_heap_v2_print_stats(): Statistics output at exit
- New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification
Root Cause Fixed:
- BEFORE: TinyHeapV2 refilled from FastCache → circular dependency
- TinyHeapV2 intercepted all allocs → FastCache never populated
- Result: 100% backend OOM, 0% hit rate, 99% slowdown
- AFTER: TinyHeapV2 is passive L0 cache (no refill)
- Magazine empty → return NULL → existing front handles it
- Result: 0% overhead, stable baseline performance
A/B Test Results (100K iterations, fixed-size bench):
- C1 (8B): Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%)
- C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%)
- C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%)
- All within noise range: NO PERFORMANCE REGRESSION ✅
Statistics (HeapV2 ON, C1-C3):
- alloc_calls: 200K (hook works correctly)
- mag_hits: 0 (0%) - Magazine empty as expected
- refill_calls: 0 - No refill executed (circular dependency avoided)
- backend_oom: 0 - No backend access
Next Steps (Phase 13-A Step 2):
- Implement magazine supply strategy (from existing front or free path)
- Goal: Populate magazine with "leftover" blocks from existing pipeline
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 01:42:57 +09:00
|
|
|
|
// Tiny heap v2 alloc – returns BASE pointer or NULL.
|
|
|
|
|
|
// Phase 13-A Step 1: Minimal "lucky hit" L0 cache (NO REFILL)
|
|
|
|
|
|
// Strategy: Pop from magazine if available, else return NULL immediately.
|
|
|
|
|
|
// Caller is responsible for header write via HAK_RET_ALLOC (BASE → USER conversion).
|
|
|
|
|
|
// Contract:
|
|
|
|
|
|
// - Only handles class 0-3 (8-64B) based on CLASS_MASK
|
|
|
|
|
|
// - Returns BASE pointer (not USER pointer!)
|
|
|
|
|
|
// - Returns NULL if magazine empty (caller falls back to existing path)
|
|
|
|
|
|
static inline void* tiny_heap_v2_alloc(size_t size) {
|
|
|
|
|
|
// 1. Size → class index
|
|
|
|
|
|
int class_idx = hak_tiny_size_to_class(size);
|
|
|
|
|
|
if (__builtin_expect(class_idx < 0, 0)) {
|
|
|
|
|
|
return NULL; // Not a tiny size
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 2. Limit to hot tiny classes (0..3) for now
|
|
|
|
|
|
if (class_idx > 3) {
|
|
|
|
|
|
return NULL; // Fall back to existing path for class 4-7
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 3. Check class-specific enable mask
|
|
|
|
|
|
if (__builtin_expect(!tiny_heap_v2_class_enabled(class_idx), 0)) {
|
|
|
|
|
|
return NULL; // Class disabled via HAKMEM_TINY_HEAP_V2_CLASS_MASK
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
|
|
|
|
|
|
|
|
|
|
|
|
// Debug: Print first few allocs
|
|
|
|
|
|
static __thread int g_debug_count[TINY_NUM_CLASSES] = {0};
|
|
|
|
|
|
if (g_debug_count[class_idx] < 3) {
|
|
|
|
|
|
const char* debug_env = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
|
|
|
|
|
|
if (debug_env && *debug_env && *debug_env != '0') {
|
|
|
|
|
|
fprintf(stderr, "[HeapV2-DEBUG] C%d alloc #%d (total_allocs=%lu)\n",
|
|
|
|
|
|
class_idx, g_debug_count[class_idx]++, g_tiny_heap_v2_stats[class_idx].alloc_calls);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
|
|
|
|
|
|
|
|
|
|
|
|
// 4. ONLY path: pop from magazine if available (lucky hit!)
|
|
|
|
|
|
if (__builtin_expect(mag->top > 0, 0)) { // Expect miss (unlikely hit)
|
|
|
|
|
|
g_tiny_heap_v2_stats[class_idx].mag_hits++;
|
|
|
|
|
|
void* base = mag->items[--mag->top];
|
|
|
|
|
|
return base; // BASE pointer (caller will convert to USER)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 5. Magazine empty: return NULL immediately (NO REFILL)
|
|
|
|
|
|
// Let existing front layers handle this allocation.
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1)
|
|
|
|
|
|
// Declaration only (implementation in hakmem_tiny.c for external linkage)
|
|
|
|
|
|
void tiny_heap_v2_print_stats(void);
|
|
|
|
|
|
|
|
|
|
|
|
#endif // HAK_FRONT_TINY_HEAP_V2_H
|