Files
hakmem/core/front/tiny_heap_v2.h
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

281 lines
10 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_heap_v2.h - Tiny per-thread heap (Front-V2, tcache-like)
// Goal:
// - 1 レイヤの TLS magazine を前段に置き、FastCache/SFC 等をバイパス。
// - ENV で A/B 切り替え可能(デフォルト OFF。戻しやすく安全に。
// - 対象は C0C3 のみ。Magazine が空なら SLL→SS 経由で補充。
#ifndef HAK_FRONT_TINY_HEAP_V2_H
#define HAK_FRONT_TINY_HEAP_V2_H
#include "../hakmem_tiny.h"
#include "../box/tls_sll_box.h"
#include "../hakmem_env_cache.h"
#ifndef TINY_FRONT_TLS_SLL_ENABLED
#define HAK_TINY_TLS_SLL_ENABLED_FALLBACK 1
#else
#define HAK_TINY_TLS_SLL_ENABLED_FALLBACK TINY_FRONT_TLS_SLL_ENABLED
#endif
#ifndef TINY_FRONT_HEAP_V2_ENABLED
#define HAK_TINY_HEAP_V2_ENABLED_FALLBACK tiny_heap_v2_enabled()
#else
#define HAK_TINY_HEAP_V2_ENABLED_FALLBACK TINY_FRONT_HEAP_V2_ENABLED
#endif
#include <stdlib.h>
#include <stdio.h>
// Phase 13-B: Magazine capacity (same as Phase 13-A)
#ifndef TINY_HEAP_V2_MAG_CAP
#define TINY_HEAP_V2_MAG_CAP 16
#endif
// TinyHeapV2 Magazine (per-thread, per-class)
typedef struct {
void* items[TINY_HEAP_V2_MAG_CAP];
int top;
} TinyHeapV2Mag;
// TinyHeapV2 Statistics (per-thread, per-class)
typedef struct {
uint64_t alloc_calls;
uint64_t mag_hits;
uint64_t refill_calls;
uint64_t refill_blocks;
uint64_t backend_oom;
} TinyHeapV2Stats;
// External TLS variables (defined in hakmem_tiny.c)
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
extern __thread int g_tls_heap_v2_initialized;
// Backend refill helpers (implemented in Tiny refill path)
int sll_refill_small_from_ss(int class_idx, int max_take);
int sll_refill_batch_from_ss(int class_idx, int max_take);
// Enable flag (cached)
// ENV: HAKMEM_TINY_FRONT_V2
// - 0 (default): OFF
// - 1: ON (Front-V2 有効化、FastCache/SFC を経由せず magazine を先頭に)
static inline int tiny_heap_v2_enabled(void) {
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V2");
g_enable = (e && *e && *e != '0') ? 1 : 0;
}
return g_enable;
}
// Class-specific enable mask (cached)
// ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK (bitmask: bit 0=C0, bit 1=C1, bit 2=C2, bit 3=C3)
// Default: 0xE (C1-C3 only, skip C0 8B due to -5% regression)
// Example: 0x2 = C1 only, 0x8 = C3 only, 0x6 = C1+C2, 0xF = all C0-C3
static inline int tiny_heap_v2_class_enabled(int class_idx) {
static int g_class_mask = -1;
if (__builtin_expect(g_class_mask == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_CLASS_MASK");
if (e && *e) {
// Parse hex or decimal
char* endptr;
long val = strtol(e, &endptr, 0); // 0 = auto-detect base (0x for hex, else decimal)
g_class_mask = (int)val;
} else {
g_class_mask = 0xE; // Default: C1-C3 (16/32/64B), skip C0 8B (-5% regression)
}
}
if (class_idx < 0 || class_idx >= 8) return 0;
return (g_class_mask & (1 << class_idx)) != 0;
}
// Leftover mode flag (cached)
// ENV: HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE
// - 0 (default): L0 gets blocks first ("stealing" design, +18% @ 32B)
// - 1: L1 primary owner, L0 gets leftovers ("leftover" design, Box-clean but -5% @ 16B)
//
// Decision (Phase 13-B): Default to Mode 0 (Stealing) for performance
// Rationale (ChatGPT analysis):
// - Learning layer primarily observes Superslab/Pool statistics
// - L0 stealing doesn't corrupt Superslab carving/drain signals
// - If needed, add TinyHeapV2 hit/miss counters to learning layer later
// - Performance gain (+18% @ 32B) justifies less-strict Box boundary
static inline int tiny_heap_v2_leftover_mode(void) {
static int g_leftover_mode = -1;
if (__builtin_expect(g_leftover_mode == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE");
g_leftover_mode = (e && *e && *e != '0') ? 1 : 0;
}
return g_leftover_mode;
}
// NOTE: This header MUST be included AFTER tiny_alloc_fast.inc.h!
// It uses fastcache_pop, tiny_alloc_fast_refill, hak_tiny_size_to_class which are
// static inline functions defined in tiny_alloc_fast.inc.h and related headers.
// Phase 13-A Step 2: Try to push a block into TinyHeapV2 magazine
// Called from free path to supply magazine with "leftover" blocks.
// Returns: 1 if pushed successfully, 0 if magazine is full
static inline int tiny_heap_v2_try_push(int class_idx, void* base) {
// 1. Check if class is enabled
if (class_idx < 0 || class_idx > 3) return 0;
if (!tiny_heap_v2_enabled()) return 0;
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// 2. Check if magazine has room
if (mag->top >= TINY_HEAP_V2_MAG_CAP) {
return 0; // Magazine full
}
// 3. Push BASE pointer into magazine
mag->items[mag->top++] = base;
// DEBUG: Log push events
#if !HAKMEM_BUILD_RELEASE
static int g_push_dbg = -1;
if (g_push_dbg == -1) {
const char* e = getenv("HAKMEM_TINY_HEAP_V2_DEBUG");
g_push_dbg = (e && *e && *e != '0') ? 1 : 0;
}
if (g_push_dbg) {
static __thread int g_push_count[TINY_NUM_CLASSES] = {0};
if (g_push_count[class_idx] < 5) {
fprintf(stderr, "[HeapV2-PUSH] C%d push #%d, base=%p, mag->top=%d\n",
class_idx, g_push_count[class_idx]++, base, mag->top);
}
}
#endif
return 1; // Success
}
// Stats gate (ENV cached)
static inline int tiny_heap_v2_stats_enabled(void) {
return HAK_ENV_TINY_HEAP_V2_STATS();
}
// TLS HeapV2 initialization barrier (ensures mag->top is zero on first use)
static inline void tiny_heap_v2_ensure_init(void) {
extern __thread int g_tls_heap_v2_initialized;
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[];
if (__builtin_expect(!g_tls_heap_v2_initialized, 0)) {
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
g_tiny_heap_v2_mag[i].top = 0;
}
g_tls_heap_v2_initialized = 1;
}
}
// Magazine refill from TLS SLL/backend
static inline int tiny_heap_v2_refill_mag(int class_idx) {
// FIX: Ensure TLS is initialized before first magazine access
tiny_heap_v2_ensure_init();
if (class_idx < 0 || class_idx > 3) return 0;
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
// Phase 7-Step7: Use config macro for dead code elimination in PGO mode
if (!HAK_TINY_TLS_SLL_ENABLED_FALLBACK) return 0;
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
const int cap = TINY_HEAP_V2_MAG_CAP;
int filled = 0;
// FIX: Validate mag->top before use (prevent uninitialized TLS corruption)
if (mag->top < 0 || mag->top > cap) {
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
if (!s_reset_logged[class_idx]) {
fprintf(stderr, "[HEAP_V2_REFILL] C%d mag->top=%d corrupted, reset to 0\n",
class_idx, mag->top);
s_reset_logged[class_idx] = 1;
}
mag->top = 0;
}
// First, steal from TLS SLL if already available.
while (mag->top < cap) {
void* base = NULL;
if (!tls_sll_pop(class_idx, &base)) break;
mag->items[mag->top++] = base;
filled++;
}
// If magazine is still empty, ask backend to refill SLL once, then steal again.
if (mag->top < cap && filled == 0) {
#if HAKMEM_TINY_P0_BATCH_REFILL
(void)sll_refill_batch_from_ss(class_idx, cap);
#else
(void)sll_refill_small_from_ss(class_idx, cap);
#endif
while (mag->top < cap) {
void* base = NULL;
if (!tls_sll_pop(class_idx, &base)) break;
mag->items[mag->top++] = base;
filled++;
}
}
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
if (filled > 0) {
g_tiny_heap_v2_stats[class_idx].refill_calls++;
g_tiny_heap_v2_stats[class_idx].refill_blocks += (uint64_t)filled;
}
}
return filled;
}
// Magazine pop (fast path)
static inline void* tiny_heap_v2_alloc_by_class(int class_idx) {
// FIX: Ensure TLS is initialized before first magazine access
tiny_heap_v2_ensure_init();
if (class_idx < 0 || class_idx > 3) return NULL;
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
if (!HAK_TINY_HEAP_V2_ENABLED_FALLBACK) return NULL;
if (!tiny_heap_v2_class_enabled(class_idx)) return NULL;
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// Hit: magazine has entries
if (__builtin_expect(mag->top > 0, 1)) {
// FIX: Add underflow protection before array access
const int cap = TINY_HEAP_V2_MAG_CAP;
if (mag->top > cap || mag->top < 0) {
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
if (!s_reset_logged[class_idx]) {
fprintf(stderr, "[HEAP_V2_ALLOC] C%d mag->top=%d corrupted, reset to 0\n",
class_idx, mag->top);
s_reset_logged[class_idx] = 1;
}
mag->top = 0;
return NULL; // Fall through to refill path
}
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
g_tiny_heap_v2_stats[class_idx].mag_hits++;
}
return mag->items[--mag->top];
}
// Miss: try single refill from SLL/backend
int filled = tiny_heap_v2_refill_mag(class_idx);
if (filled > 0 && mag->top > 0) {
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
g_tiny_heap_v2_stats[class_idx].mag_hits++;
}
return mag->items[--mag->top];
}
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
g_tiny_heap_v2_stats[class_idx].backend_oom++;
}
return NULL;
}
// Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1, impl in hakmem_tiny.c)
void tiny_heap_v2_print_stats(void);
#endif // HAK_FRONT_TINY_HEAP_V2_H