## Summary - ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え) - core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理 - core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off) - A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy) ## Files Modified - core/box/pool_api.inc.h: pool_zero_mode_box.h include - core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避) - core/hakmem_pool.c: zero mode 参照・制御ロジック - core/box/pool_zero_mode_box.h (新設): enum/getter - CURRENT_TASK.md: Phase ML1 結果記載 ## Test Results | Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement | |-----------|----------------|-----------------|------------| | 10K | 3.06 M ops/s | 3.17 M ops/s | +3.65% | | 1M | 23.71 M ops/s | 27.34 M ops/s | **+15.34%** | 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
281 lines
10 KiB
C
281 lines
10 KiB
C
// tiny_heap_v2.h - Tiny per-thread heap (Front-V2, tcache-like)
|
||
// Goal:
|
||
// - 1 レイヤの TLS magazine を前段に置き、FastCache/SFC 等をバイパス。
|
||
// - ENV で A/B 切り替え可能(デフォルト OFF)。戻しやすく安全に。
|
||
// - 対象は C0–C3 のみ。Magazine が空なら SLL→SS 経由で補充。
|
||
|
||
#ifndef HAK_FRONT_TINY_HEAP_V2_H
|
||
#define HAK_FRONT_TINY_HEAP_V2_H
|
||
|
||
#include "../hakmem_tiny.h"
|
||
#include "../box/tls_sll_box.h"
|
||
#include "../hakmem_env_cache.h"
|
||
|
||
#ifndef TINY_FRONT_TLS_SLL_ENABLED
|
||
#define HAK_TINY_TLS_SLL_ENABLED_FALLBACK 1
|
||
#else
|
||
#define HAK_TINY_TLS_SLL_ENABLED_FALLBACK TINY_FRONT_TLS_SLL_ENABLED
|
||
#endif
|
||
|
||
#ifndef TINY_FRONT_HEAP_V2_ENABLED
|
||
#define HAK_TINY_HEAP_V2_ENABLED_FALLBACK tiny_heap_v2_enabled()
|
||
#else
|
||
#define HAK_TINY_HEAP_V2_ENABLED_FALLBACK TINY_FRONT_HEAP_V2_ENABLED
|
||
#endif
|
||
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
|
||
// Phase 13-B: Magazine capacity (same as Phase 13-A)
|
||
#ifndef TINY_HEAP_V2_MAG_CAP
|
||
#define TINY_HEAP_V2_MAG_CAP 16
|
||
#endif
|
||
|
||
// TinyHeapV2 Magazine (per-thread, per-class)
|
||
typedef struct {
|
||
void* items[TINY_HEAP_V2_MAG_CAP];
|
||
int top;
|
||
} TinyHeapV2Mag;
|
||
|
||
// TinyHeapV2 Statistics (per-thread, per-class)
|
||
typedef struct {
|
||
uint64_t alloc_calls;
|
||
uint64_t mag_hits;
|
||
uint64_t refill_calls;
|
||
uint64_t refill_blocks;
|
||
uint64_t backend_oom;
|
||
} TinyHeapV2Stats;
|
||
|
||
// External TLS variables (defined in hakmem_tiny.c)
|
||
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
|
||
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
||
extern __thread int g_tls_heap_v2_initialized;
|
||
|
||
// Backend refill helpers (implemented in Tiny refill path)
|
||
int sll_refill_small_from_ss(int class_idx, int max_take);
|
||
int sll_refill_batch_from_ss(int class_idx, int max_take);
|
||
|
||
// Enable flag (cached)
|
||
// ENV: HAKMEM_TINY_FRONT_V2
|
||
// - 0 (default): OFF
|
||
// - 1: ON (Front-V2 有効化、FastCache/SFC を経由せず magazine を先頭に)
|
||
static inline int tiny_heap_v2_enabled(void) {
|
||
static int g_enable = -1;
|
||
if (__builtin_expect(g_enable == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_FRONT_V2");
|
||
g_enable = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
return g_enable;
|
||
}
|
||
|
||
// Class-specific enable mask (cached)
|
||
// ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK (bitmask: bit 0=C0, bit 1=C1, bit 2=C2, bit 3=C3)
|
||
// Default: 0xE (C1-C3 only, skip C0 8B due to -5% regression)
|
||
// Example: 0x2 = C1 only, 0x8 = C3 only, 0x6 = C1+C2, 0xF = all C0-C3
|
||
static inline int tiny_heap_v2_class_enabled(int class_idx) {
|
||
static int g_class_mask = -1;
|
||
if (__builtin_expect(g_class_mask == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_HEAP_V2_CLASS_MASK");
|
||
if (e && *e) {
|
||
// Parse hex or decimal
|
||
char* endptr;
|
||
long val = strtol(e, &endptr, 0); // 0 = auto-detect base (0x for hex, else decimal)
|
||
g_class_mask = (int)val;
|
||
} else {
|
||
g_class_mask = 0xE; // Default: C1-C3 (16/32/64B), skip C0 8B (-5% regression)
|
||
}
|
||
}
|
||
|
||
if (class_idx < 0 || class_idx >= 8) return 0;
|
||
return (g_class_mask & (1 << class_idx)) != 0;
|
||
}
|
||
|
||
// Leftover mode flag (cached)
|
||
// ENV: HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE
|
||
// - 0 (default): L0 gets blocks first ("stealing" design, +18% @ 32B)
|
||
// - 1: L1 primary owner, L0 gets leftovers ("leftover" design, Box-clean but -5% @ 16B)
|
||
//
|
||
// Decision (Phase 13-B): Default to Mode 0 (Stealing) for performance
|
||
// Rationale (ChatGPT analysis):
|
||
// - Learning layer primarily observes Superslab/Pool statistics
|
||
// - L0 stealing doesn't corrupt Superslab carving/drain signals
|
||
// - If needed, add TinyHeapV2 hit/miss counters to learning layer later
|
||
// - Performance gain (+18% @ 32B) justifies less-strict Box boundary
|
||
static inline int tiny_heap_v2_leftover_mode(void) {
|
||
static int g_leftover_mode = -1;
|
||
if (__builtin_expect(g_leftover_mode == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE");
|
||
g_leftover_mode = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
return g_leftover_mode;
|
||
}
|
||
|
||
// NOTE: This header MUST be included AFTER tiny_alloc_fast.inc.h!
|
||
// It uses fastcache_pop, tiny_alloc_fast_refill, hak_tiny_size_to_class which are
|
||
// static inline functions defined in tiny_alloc_fast.inc.h and related headers.
|
||
|
||
// Phase 13-A Step 2: Try to push a block into TinyHeapV2 magazine
|
||
// Called from free path to supply magazine with "leftover" blocks.
|
||
// Returns: 1 if pushed successfully, 0 if magazine is full
|
||
static inline int tiny_heap_v2_try_push(int class_idx, void* base) {
|
||
// 1. Check if class is enabled
|
||
if (class_idx < 0 || class_idx > 3) return 0;
|
||
if (!tiny_heap_v2_enabled()) return 0;
|
||
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
|
||
|
||
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
|
||
|
||
// 2. Check if magazine has room
|
||
if (mag->top >= TINY_HEAP_V2_MAG_CAP) {
|
||
return 0; // Magazine full
|
||
}
|
||
|
||
// 3. Push BASE pointer into magazine
|
||
mag->items[mag->top++] = base;
|
||
|
||
// DEBUG: Log push events
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
static int g_push_dbg = -1;
|
||
if (g_push_dbg == -1) {
|
||
const char* e = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
|
||
g_push_dbg = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
if (g_push_dbg) {
|
||
static __thread int g_push_count[TINY_NUM_CLASSES] = {0};
|
||
if (g_push_count[class_idx] < 5) {
|
||
fprintf(stderr, "[HeapV2-PUSH] C%d push #%d, base=%p, mag->top=%d\n",
|
||
class_idx, g_push_count[class_idx]++, base, mag->top);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
return 1; // Success
|
||
}
|
||
|
||
// Stats gate (ENV cached)
|
||
static inline int tiny_heap_v2_stats_enabled(void) {
|
||
return HAK_ENV_TINY_HEAP_V2_STATS();
|
||
}
|
||
|
||
// TLS HeapV2 initialization barrier (ensures mag->top is zero on first use)
|
||
static inline void tiny_heap_v2_ensure_init(void) {
|
||
extern __thread int g_tls_heap_v2_initialized;
|
||
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[];
|
||
|
||
if (__builtin_expect(!g_tls_heap_v2_initialized, 0)) {
|
||
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
||
g_tiny_heap_v2_mag[i].top = 0;
|
||
}
|
||
g_tls_heap_v2_initialized = 1;
|
||
}
|
||
}
|
||
|
||
// Magazine refill from TLS SLL/backend
|
||
static inline int tiny_heap_v2_refill_mag(int class_idx) {
|
||
// FIX: Ensure TLS is initialized before first magazine access
|
||
tiny_heap_v2_ensure_init();
|
||
if (class_idx < 0 || class_idx > 3) return 0;
|
||
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
|
||
|
||
// Phase 7-Step7: Use config macro for dead code elimination in PGO mode
|
||
if (!HAK_TINY_TLS_SLL_ENABLED_FALLBACK) return 0;
|
||
|
||
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
|
||
const int cap = TINY_HEAP_V2_MAG_CAP;
|
||
int filled = 0;
|
||
|
||
// FIX: Validate mag->top before use (prevent uninitialized TLS corruption)
|
||
if (mag->top < 0 || mag->top > cap) {
|
||
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
|
||
if (!s_reset_logged[class_idx]) {
|
||
fprintf(stderr, "[HEAP_V2_REFILL] C%d mag->top=%d corrupted, reset to 0\n",
|
||
class_idx, mag->top);
|
||
s_reset_logged[class_idx] = 1;
|
||
}
|
||
mag->top = 0;
|
||
}
|
||
|
||
// First, steal from TLS SLL if already available.
|
||
while (mag->top < cap) {
|
||
void* base = NULL;
|
||
if (!tls_sll_pop(class_idx, &base)) break;
|
||
mag->items[mag->top++] = base;
|
||
filled++;
|
||
}
|
||
|
||
// If magazine is still empty, ask backend to refill SLL once, then steal again.
|
||
if (mag->top < cap && filled == 0) {
|
||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||
(void)sll_refill_batch_from_ss(class_idx, cap);
|
||
#else
|
||
(void)sll_refill_small_from_ss(class_idx, cap);
|
||
#endif
|
||
while (mag->top < cap) {
|
||
void* base = NULL;
|
||
if (!tls_sll_pop(class_idx, &base)) break;
|
||
mag->items[mag->top++] = base;
|
||
filled++;
|
||
}
|
||
}
|
||
|
||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||
if (filled > 0) {
|
||
g_tiny_heap_v2_stats[class_idx].refill_calls++;
|
||
g_tiny_heap_v2_stats[class_idx].refill_blocks += (uint64_t)filled;
|
||
}
|
||
}
|
||
return filled;
|
||
}
|
||
|
||
// Magazine pop (fast path)
|
||
static inline void* tiny_heap_v2_alloc_by_class(int class_idx) {
|
||
// FIX: Ensure TLS is initialized before first magazine access
|
||
tiny_heap_v2_ensure_init();
|
||
if (class_idx < 0 || class_idx > 3) return NULL;
|
||
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
|
||
if (!HAK_TINY_HEAP_V2_ENABLED_FALLBACK) return NULL;
|
||
if (!tiny_heap_v2_class_enabled(class_idx)) return NULL;
|
||
|
||
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
|
||
|
||
// Hit: magazine has entries
|
||
if (__builtin_expect(mag->top > 0, 1)) {
|
||
// FIX: Add underflow protection before array access
|
||
const int cap = TINY_HEAP_V2_MAG_CAP;
|
||
if (mag->top > cap || mag->top < 0) {
|
||
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
|
||
if (!s_reset_logged[class_idx]) {
|
||
fprintf(stderr, "[HEAP_V2_ALLOC] C%d mag->top=%d corrupted, reset to 0\n",
|
||
class_idx, mag->top);
|
||
s_reset_logged[class_idx] = 1;
|
||
}
|
||
mag->top = 0;
|
||
return NULL; // Fall through to refill path
|
||
}
|
||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
|
||
g_tiny_heap_v2_stats[class_idx].mag_hits++;
|
||
}
|
||
return mag->items[--mag->top];
|
||
}
|
||
|
||
// Miss: try single refill from SLL/backend
|
||
int filled = tiny_heap_v2_refill_mag(class_idx);
|
||
if (filled > 0 && mag->top > 0) {
|
||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
|
||
g_tiny_heap_v2_stats[class_idx].mag_hits++;
|
||
}
|
||
return mag->items[--mag->top];
|
||
}
|
||
|
||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||
g_tiny_heap_v2_stats[class_idx].backend_oom++;
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
// Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1, impl in hakmem_tiny.c)
|
||
void tiny_heap_v2_print_stats(void);
|
||
|
||
#endif // HAK_FRONT_TINY_HEAP_V2_H
|