Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids circular dependency with FastCache by eliminating self-refill. Key Changes: - New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation - tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL - tiny_heap_v2_refill_mag(): No-op stub (no backend refill) - ENV: HAKMEM_TINY_HEAP_V2=1 to enable - ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control) - ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics - Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook - Hook at entry point (after class_idx calculation) - Fallback to existing front if TinyHeapV2 returns NULL - Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path - Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper - TinyHeapV2Mag: Per-class magazine (capacity=16) - TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.) - tiny_heap_v2_print_stats(): Statistics output at exit - New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification Root Cause Fixed: - BEFORE: TinyHeapV2 refilled from FastCache → circular dependency - TinyHeapV2 intercepted all allocs → FastCache never populated - Result: 100% backend OOM, 0% hit rate, 99% slowdown - AFTER: TinyHeapV2 is passive L0 cache (no refill) - Magazine empty → return NULL → existing front handles it - Result: 0% overhead, stable baseline performance A/B Test Results (100K iterations, fixed-size bench): - C1 (8B): Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%) - C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%) - C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%) - All within noise range: NO PERFORMANCE REGRESSION ✅ Statistics (HeapV2 ON, C1-C3): - alloc_calls: 200K (hook works correctly) - mag_hits: 0 (0%) - Magazine empty as expected - refill_calls: 0 - No refill executed (circular dependency avoided) - backend_oom: 0 - No backend access Next Steps (Phase 13-A Step 2): - Implement magazine supply strategy (from existing front or free path) - Goal: Populate magazine with "leftover" blocks from existing pipeline 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -1267,6 +1267,39 @@ static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES];
|
||||
int g_quick_enable = 0; // HAKMEM_TINY_QUICK=1
|
||||
__thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below
|
||||
|
||||
// Phase 13: Tiny Heap v2 - Forward declarations (implementations come after tiny_alloc_fast.inc.h)
|
||||
// This allows tiny_alloc_fast.inc.h to call these functions.
|
||||
|
||||
// Very small per-class magazine for tiny sizes (C0–C3)
|
||||
#ifndef TINY_HEAP_V2_MAG_CAP
|
||||
#define TINY_HEAP_V2_MAG_CAP 16
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
void* items[TINY_HEAP_V2_MAG_CAP];
|
||||
int top;
|
||||
} TinyHeapV2Mag;
|
||||
|
||||
// TLS magazines per class
|
||||
__thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
|
||||
|
||||
// Phase 13-A: Observability counters (per-thread, per-class)
|
||||
typedef struct {
|
||||
uint64_t alloc_calls; // Total alloc attempts via HeapV2
|
||||
uint64_t mag_hits; // Magazine had blocks (fast path)
|
||||
uint64_t refill_calls; // Refill invocations
|
||||
uint64_t refill_blocks; // Total blocks obtained from refills
|
||||
uint64_t backend_oom; // Backend OOM (refill returned 0)
|
||||
} TinyHeapV2Stats;
|
||||
|
||||
__thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
||||
|
||||
// Forward declarations
|
||||
static inline int tiny_heap_v2_enabled(void);
|
||||
static inline int tiny_heap_v2_class_enabled(int class_idx);
|
||||
static inline int tiny_heap_v2_refill_mag(int class_idx);
|
||||
static inline void* tiny_heap_v2_alloc(size_t size);
|
||||
|
||||
// Phase 2D-1: Hot-path inline function extractions(Front)
|
||||
// NOTE: TinyFastCache/TinyQuickSlot は front/ で定義済み
|
||||
#include "hakmem_tiny_hot_pop.inc.h" // 4 functions: tiny_hot_pop_class{0..3}
|
||||
@ -1752,6 +1785,9 @@ TinySlab* hak_tiny_owner_slab(void* ptr) {
|
||||
// Box 5: Allocation Fast Path (Layer 1 - 3-4 instructions)
|
||||
#include "tiny_alloc_fast.inc.h"
|
||||
|
||||
// Phase 13: Tiny Heap v2 front (must come AFTER tiny_alloc_fast.inc.h)
|
||||
#include "front/tiny_heap_v2.h"
|
||||
|
||||
// Box 6: Free Fast Path (Layer 2 - 2-3 instructions)
|
||||
#include "tiny_free_fast.inc.h"
|
||||
|
||||
@ -2040,3 +2076,36 @@ void tiny_guard_on_invalid(void* user_ptr, uint8_t hdr) {
|
||||
tiny_guard_dump_bytes("dump_before", u - 8, 8);
|
||||
tiny_guard_dump_bytes("dump_after", u, 8);
|
||||
}
|
||||
|
||||
|
||||
// Phase 13-A: Tiny Heap v2 statistics wrapper (for external linkage)
|
||||
void tiny_heap_v2_print_stats(void) {
|
||||
// Implemented in front/tiny_heap_v2.h as static inline
|
||||
// This wrapper is needed for external linkage from bench programs
|
||||
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
||||
|
||||
static int g_stats_enable = -1;
|
||||
if (g_stats_enable == -1) {
|
||||
const char* e = getenv("HAKMEM_TINY_HEAP_V2_STATS");
|
||||
g_stats_enable = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
if (!g_stats_enable) return;
|
||||
|
||||
fprintf(stderr, "\n=== TinyHeapV2 Statistics (en=%d) ===\n", g_stats_enable);
|
||||
int any_allocs = 0;
|
||||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||||
TinyHeapV2Stats* s = &g_tiny_heap_v2_stats[cls];
|
||||
if (s->alloc_calls == 0) continue;
|
||||
|
||||
double hit_rate = (s->alloc_calls > 0) ? (100.0 * s->mag_hits / s->alloc_calls) : 0.0;
|
||||
double avg_refill = (s->refill_calls > 0) ? ((double)s->refill_blocks / s->refill_calls) : 0.0;
|
||||
|
||||
fprintf(stderr, "[C%d] alloc=%lu mag_hits=%lu (%.1f%%) refill=%lu avg_blocks=%.1f oom=%lu\n",
|
||||
cls, s->alloc_calls, s->mag_hits, hit_rate,
|
||||
s->refill_calls, avg_refill, s->backend_oom);
|
||||
any_allocs = 1;
|
||||
}
|
||||
if (!any_allocs) fprintf(stderr, "(No HeapV2 allocs recorded)\n");
|
||||
fprintf(stderr, "==============================\n\n");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user