Files
hakmem/core/front/tiny_heap_v2.h
Moe Charm (CI) 5cc1f93622 Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids
circular dependency with FastCache by eliminating self-refill.

Key Changes:
- New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation
  - tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL
  - tiny_heap_v2_refill_mag(): No-op stub (no backend refill)
  - ENV: HAKMEM_TINY_HEAP_V2=1 to enable
  - ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control)
  - ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics
- Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook
  - Hook at entry point (after class_idx calculation)
  - Fallback to existing front if TinyHeapV2 returns NULL
- Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path
- Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper
  - TinyHeapV2Mag: Per-class magazine (capacity=16)
  - TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.)
  - tiny_heap_v2_print_stats(): Statistics output at exit
- New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification

Root Cause Fixed:
- BEFORE: TinyHeapV2 refilled from FastCache → circular dependency
  - TinyHeapV2 intercepted all allocs → FastCache never populated
  - Result: 100% backend OOM, 0% hit rate, 99% slowdown
- AFTER: TinyHeapV2 is passive L0 cache (no refill)
  - Magazine empty → return NULL → existing front handles it
  - Result: 0% overhead, stable baseline performance

A/B Test Results (100K iterations, fixed-size bench):
- C1 (8B):  Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%)
- C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%)
- C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%)
- All within noise range: NO PERFORMANCE REGRESSION 

Statistics (HeapV2 ON, C1-C3):
- alloc_calls: 200K (hook works correctly)
- mag_hits: 0 (0%) - Magazine empty as expected
- refill_calls: 0 - No refill executed (circular dependency avoided)
- backend_oom: 0 - No backend access

Next Steps (Phase 13-A Step 2):
- Implement magazine supply strategy (from existing front or free path)
- Goal: Populate magazine with "leftover" blocks from existing pipeline

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 01:42:57 +09:00

138 lines
5.4 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_heap_v2.h - Tiny per-thread heap (experimental Box)
// Purpose:
// - Provide a very simple per-thread front for tiny allocations.
// - Currently targets small classes (C0C3) and is gated by ENV:
// HAKMEM_TINY_HEAP_V2=1
// - Backend remains existing FastCache + Superslab refill.
//
// Design (first pass):
// - Per-thread, per-class small magazine (L0) in front of FastCache.
// - On alloc:
// 1) Pop from magazine.
// 2) If empty, refill magazine from FastCache (and backend via tiny_alloc_fast_refill).
// - On free: still goes through existing free path (hak_tiny_free_fast_v2),
// which ultimately feeds TLS SLL / drain / Superslab.
//
// This Box is intentionally minimal; performance tuning (sizes, class set)
// is left for later phases.
#ifndef HAK_FRONT_TINY_HEAP_V2_H
#define HAK_FRONT_TINY_HEAP_V2_H
#include "../hakmem_tiny.h"
// NOTE: TinyHeapV2Mag struct and g_tiny_heap_v2_mag are defined in hakmem_tiny.c
// This header provides only the implementations (static inline functions).
// Enable flag (cached)
static inline int tiny_heap_v2_enabled(void) {
static int g_enable = -1;
static int g_first_call = 1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2");
g_enable = (e && *e && *e != '0') ? 1 : 0;
fprintf(stderr, "[HeapV2-INIT] tiny_heap_v2_enabled() called: ENV='%s' → %d\n",
e ? e : "(null)", g_enable);
fflush(stderr);
}
if (g_first_call && g_enable) {
fprintf(stderr, "[HeapV2-FIRST] Returning enabled=%d\n", g_enable);
fflush(stderr);
g_first_call = 0;
}
return g_enable;
}
// Class-specific enable mask (cached)
// ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK (bitmask: bit 0=C0, bit 1=C1, bit 2=C2, bit 3=C3)
// Default: 0xF (all classes C0-C3 enabled)
// Example: 0x2 = C1 only, 0x8 = C3 only, 0x6 = C1+C2
static inline int tiny_heap_v2_class_enabled(int class_idx) {
static int g_class_mask = -1;
if (__builtin_expect(g_class_mask == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_CLASS_MASK");
if (e && *e) {
// Parse hex or decimal
char* endptr;
long val = strtol(e, &endptr, 0); // 0 = auto-detect base (0x for hex, else decimal)
g_class_mask = (int)val;
} else {
g_class_mask = 0xF; // Default: C0-C3 all enabled
}
}
if (class_idx < 0 || class_idx >= 8) return 0;
return (g_class_mask & (1 << class_idx)) != 0;
}
// NOTE: This header MUST be included AFTER tiny_alloc_fast.inc.h!
// It uses fastcache_pop, tiny_alloc_fast_refill, hak_tiny_size_to_class which are
// static inline functions defined in tiny_alloc_fast.inc.h and related headers.
// Phase 13-A Step 1: NO REFILL (avoid circular dependency)
// TinyHeapV2 is a "lucky hit" L0 cache that doesn't refill itself.
// Refill will come from existing front layers later (outside TinyHeapV2).
// This function is currently a no-op stub for future use.
static inline int tiny_heap_v2_refill_mag(int class_idx) {
(void)class_idx;
// NO-OP: Do not refill to avoid circular dependency with FastCache
return 0;
}
// Tiny heap v2 alloc returns BASE pointer or NULL.
// Phase 13-A Step 1: Minimal "lucky hit" L0 cache (NO REFILL)
// Strategy: Pop from magazine if available, else return NULL immediately.
// Caller is responsible for header write via HAK_RET_ALLOC (BASE → USER conversion).
// Contract:
// - Only handles class 0-3 (8-64B) based on CLASS_MASK
// - Returns BASE pointer (not USER pointer!)
// - Returns NULL if magazine empty (caller falls back to existing path)
static inline void* tiny_heap_v2_alloc(size_t size) {
// 1. Size → class index
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0, 0)) {
return NULL; // Not a tiny size
}
// 2. Limit to hot tiny classes (0..3) for now
if (class_idx > 3) {
return NULL; // Fall back to existing path for class 4-7
}
// 3. Check class-specific enable mask
if (__builtin_expect(!tiny_heap_v2_class_enabled(class_idx), 0)) {
return NULL; // Class disabled via HAKMEM_TINY_HEAP_V2_CLASS_MASK
}
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
// Debug: Print first few allocs
static __thread int g_debug_count[TINY_NUM_CLASSES] = {0};
if (g_debug_count[class_idx] < 3) {
const char* debug_env = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
if (debug_env && *debug_env && *debug_env != '0') {
fprintf(stderr, "[HeapV2-DEBUG] C%d alloc #%d (total_allocs=%lu)\n",
class_idx, g_debug_count[class_idx]++, g_tiny_heap_v2_stats[class_idx].alloc_calls);
}
}
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// 4. ONLY path: pop from magazine if available (lucky hit!)
if (__builtin_expect(mag->top > 0, 0)) { // Expect miss (unlikely hit)
g_tiny_heap_v2_stats[class_idx].mag_hits++;
void* base = mag->items[--mag->top];
return base; // BASE pointer (caller will convert to USER)
}
// 5. Magazine empty: return NULL immediately (NO REFILL)
// Let existing front layers handle this allocation.
return NULL;
}
// Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1)
// Declaration only (implementation in hakmem_tiny.c for external linkage)
void tiny_heap_v2_print_stats(void);
#endif // HAK_FRONT_TINY_HEAP_V2_H