Files
hakmem/core/front/tiny_heap_v2.h
Moe Charm (CI) bb70d422dc Phase 13-B: TinyHeapV2 supply path with dual-mode A/B framework (Stealing vs Leftover)
Summary:
- Implemented free path supply with ENV-gated A/B modes (HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE)
- Mode 0 (Stealing, default): L0 gets freed blocks first → +18% @ 32B
- Mode 1 (Leftover): L1 primary owner, L0 gets leftovers → Box-clean but -5% @ 16B
- Decision: Default to Stealing for performance (ChatGPT analysis: L0 doesn't corrupt learning layer signals)

Performance (100K iterations, workset=128):
- 16B: 43.9M → 45.6M ops/s (+3.9%)
- 32B: 41.9M → 49.6M ops/s (+18.4%) 
- 64B: 51.2M → 51.5M ops/s (+0.6%)
- 100% magazine hit rate (supply from free path working correctly)

Implementation:
- tiny_free_fast_v2.inc.h: Dual-mode supply (lines 134-166)
- tiny_heap_v2.h: Add tiny_heap_v2_leftover_mode() flag + rationale doc
- tiny_alloc_fast.inc.h: Alloc hook with tiny_heap_v2_alloc_by_class()
- CURRENT_TASK.md: Updated Phase 13-B status (complete) with A/B results

ENV flags:
- HAKMEM_TINY_HEAP_V2=1                      # Enable TinyHeapV2
- HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE=0        # Mode 0 (Stealing, default)
- HAKMEM_TINY_HEAP_V2_CLASS_MASK=0xE         # C1-C3 only (skip C0 -5% regression)
- HAKMEM_TINY_HEAP_V2_STATS=1                # Print statistics

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 16:28:40 +09:00

197 lines
7.4 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_heap_v2.h - Tiny per-thread heap (experimental Box)
// Purpose:
// - Provide a very simple per-thread front for tiny allocations.
// - Currently targets small classes (C0C3) and is gated by ENV:
// HAKMEM_TINY_HEAP_V2=1
// - Backend remains existing FastCache + Superslab refill.
//
// Design (first pass):
// - Per-thread, per-class small magazine (L0) in front of FastCache.
// - On alloc:
// 1) Pop from magazine.
// 2) If empty, refill magazine from FastCache (and backend via tiny_alloc_fast_refill).
// - On free: still goes through existing free path (hak_tiny_free_fast_v2),
// which ultimately feeds TLS SLL / drain / Superslab.
//
// This Box is intentionally minimal; performance tuning (sizes, class set)
// is left for later phases.
#ifndef HAK_FRONT_TINY_HEAP_V2_H
#define HAK_FRONT_TINY_HEAP_V2_H
#include "../hakmem_tiny.h"
// Phase 13-B: Magazine capacity (same as Phase 13-A)
#ifndef TINY_HEAP_V2_MAG_CAP
#define TINY_HEAP_V2_MAG_CAP 16
#endif
// TinyHeapV2 Magazine (per-thread, per-class)
typedef struct {
void* items[TINY_HEAP_V2_MAG_CAP];
int top;
} TinyHeapV2Mag;
// TinyHeapV2 Statistics (per-thread, per-class)
typedef struct {
uint64_t alloc_calls;
uint64_t mag_hits;
uint64_t refill_calls;
uint64_t refill_blocks;
uint64_t backend_oom;
} TinyHeapV2Stats;
// External TLS variables (defined in hakmem_tiny.c)
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
// Enable flag (cached)
static inline int tiny_heap_v2_enabled(void) {
static int g_enable = -1;
static int g_first_call = 1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2");
g_enable = (e && *e && *e != '0') ? 1 : 0;
fprintf(stderr, "[HeapV2-INIT] tiny_heap_v2_enabled() called: ENV='%s' → %d\n",
e ? e : "(null)", g_enable);
fflush(stderr);
}
if (g_first_call && g_enable) {
fprintf(stderr, "[HeapV2-FIRST] Returning enabled=%d\n", g_enable);
fflush(stderr);
g_first_call = 0;
}
return g_enable;
}
// Class-specific enable mask (cached)
// ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK (bitmask: bit 0=C0, bit 1=C1, bit 2=C2, bit 3=C3)
// Default: 0xF (all classes C0-C3 enabled)
// Example: 0x2 = C1 only, 0x8 = C3 only, 0x6 = C1+C2
static inline int tiny_heap_v2_class_enabled(int class_idx) {
static int g_class_mask = -1;
if (__builtin_expect(g_class_mask == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_CLASS_MASK");
if (e && *e) {
// Parse hex or decimal
char* endptr;
long val = strtol(e, &endptr, 0); // 0 = auto-detect base (0x for hex, else decimal)
g_class_mask = (int)val;
} else {
g_class_mask = 0xF; // Default: C0-C3 all enabled
}
}
if (class_idx < 0 || class_idx >= 8) return 0;
return (g_class_mask & (1 << class_idx)) != 0;
}
// Leftover mode flag (cached)
// ENV: HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE
// - 0 (default): L0 gets blocks first ("stealing" design, +18% @ 32B)
// - 1: L1 primary owner, L0 gets leftovers ("leftover" design, Box-clean but -5% @ 16B)
//
// Decision (Phase 13-B): Default to Mode 0 (Stealing) for performance
// Rationale (ChatGPT analysis):
// - Learning layer primarily observes Superslab/Pool statistics
// - L0 stealing doesn't corrupt Superslab carving/drain signals
// - If needed, add TinyHeapV2 hit/miss counters to learning layer later
// - Performance gain (+18% @ 32B) justifies less-strict Box boundary
static inline int tiny_heap_v2_leftover_mode(void) {
static int g_leftover_mode = -1;
if (__builtin_expect(g_leftover_mode == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE");
g_leftover_mode = (e && *e && *e != '0') ? 1 : 0;
}
return g_leftover_mode;
}
// NOTE: This header MUST be included AFTER tiny_alloc_fast.inc.h!
// It uses fastcache_pop, tiny_alloc_fast_refill, hak_tiny_size_to_class which are
// static inline functions defined in tiny_alloc_fast.inc.h and related headers.
// Phase 13-A Step 1: NO REFILL (avoid circular dependency)
// TinyHeapV2 is a "lucky hit" L0 cache that doesn't refill itself.
// Refill will come from existing front layers later (outside TinyHeapV2).
// This function is currently a no-op stub for future use.
static inline int tiny_heap_v2_refill_mag(int class_idx) {
(void)class_idx;
// NO-OP: Do not refill to avoid circular dependency with FastCache
return 0;
}
// Phase 13-A Step 2: Try to push a block into TinyHeapV2 magazine
// Called from free path to supply magazine with "leftover" blocks.
// Returns: 1 if pushed successfully, 0 if magazine is full
static inline int tiny_heap_v2_try_push(int class_idx, void* base) {
// 1. Check if class is enabled
if (class_idx < 0 || class_idx > 3) return 0;
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// 2. Check if magazine has room
if (mag->top >= TINY_HEAP_V2_MAG_CAP) {
return 0; // Magazine full
}
// 3. Push BASE pointer into magazine
mag->items[mag->top++] = base;
// DEBUG: Log push events
static int g_push_dbg = -1;
if (g_push_dbg == -1) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
g_push_dbg = (e && *e && *e != '0') ? 1 : 0;
}
if (g_push_dbg) {
static __thread int g_push_count[TINY_NUM_CLASSES] = {0};
if (g_push_count[class_idx] < 5) {
fprintf(stderr, "[HeapV2-PUSH] C%d push #%d, base=%p, mag->top=%d\n",
class_idx, g_push_count[class_idx]++, base, mag->top);
}
}
return 1; // Success
}
// Tiny heap v2 alloc returns BASE pointer or NULL.
// Phase 13-A Step 1: Minimal "lucky hit" L0 cache (NO REFILL)
// Strategy: Pop from magazine if available, else return NULL immediately.
// Caller is responsible for header write via HAK_RET_ALLOC (BASE → USER conversion).
// Contract:
// - Only handles class 0-3 (8-64B) based on CLASS_MASK
// - Returns BASE pointer (not USER pointer!)
// - Returns NULL if magazine empty (caller falls back to existing path)
//
// PERFORMANCE FIX: Accept class_idx as parameter to avoid redundant size→class conversion
static inline void* tiny_heap_v2_alloc_by_class(int class_idx) {
// FAST PATH: Caller already validated class_idx (0-3), skip redundant checks
#if !HAKMEM_BUILD_RELEASE
// Debug: Class-specific enable mask (only in debug builds)
if (__builtin_expect(!tiny_heap_v2_class_enabled(class_idx), 0)) {
return NULL; // Class disabled via HAKMEM_TINY_HEAP_V2_CLASS_MASK
}
#endif
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// Pop from magazine if available (lucky hit!)
if (__builtin_expect(mag->top > 0, 1)) { // Expect HIT (likely, 99% hit rate)
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
g_tiny_heap_v2_stats[class_idx].mag_hits++;
void* base = mag->items[--mag->top];
return base; // BASE pointer (caller will convert to USER)
}
// Magazine empty: return NULL immediately (NO REFILL)
return NULL;
}
// Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1)
// Declaration only (implementation in hakmem_tiny.c for external linkage)
void tiny_heap_v2_print_stats(void);
#endif // HAK_FRONT_TINY_HEAP_V2_H