Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)

## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-10 09:08:18 +09:00
parent a905e0ffdd
commit acc64f2438
115 changed files with 2103 additions and 1287 deletions

View File

@ -177,127 +177,6 @@ static void tiny_fast_print_profile(void) {
}
// ========== Front-V2 helpers (tcache-like TLS magazine) ==========
// Priority-2: Use cached ENV (eliminate lazy-init overhead)
static inline int tiny_heap_v2_stats_enabled(void) {
return HAK_ENV_TINY_HEAP_V2_STATS();
}
// TLS HeapV2 initialization barrier (ensures mag->top is zero on first use)
static inline void tiny_heap_v2_ensure_init(void) {
extern __thread int g_tls_heap_v2_initialized;
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[];
if (__builtin_expect(!g_tls_heap_v2_initialized, 0)) {
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
g_tiny_heap_v2_mag[i].top = 0;
}
g_tls_heap_v2_initialized = 1;
}
}
static inline int tiny_heap_v2_refill_mag(int class_idx) {
// FIX: Ensure TLS is initialized before first magazine access
tiny_heap_v2_ensure_init();
if (class_idx < 0 || class_idx > 3) return 0;
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
// Phase 7-Step7: Use config macro for dead code elimination in PGO mode
if (!TINY_FRONT_TLS_SLL_ENABLED) return 0;
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
const int cap = TINY_HEAP_V2_MAG_CAP;
int filled = 0;
// FIX: Validate mag->top before use (prevent uninitialized TLS corruption)
if (mag->top < 0 || mag->top > cap) {
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
if (!s_reset_logged[class_idx]) {
fprintf(stderr, "[HEAP_V2_REFILL] C%d mag->top=%d corrupted, reset to 0\n",
class_idx, mag->top);
s_reset_logged[class_idx] = 1;
}
mag->top = 0;
}
// First, steal from TLS SLL if already available.
while (mag->top < cap) {
void* base = NULL;
if (!tls_sll_pop(class_idx, &base)) break;
mag->items[mag->top++] = base;
filled++;
}
// If magazine is still empty, ask backend to refill SLL once, then steal again.
if (mag->top < cap && filled == 0) {
#if HAKMEM_TINY_P0_BATCH_REFILL
(void)sll_refill_batch_from_ss(class_idx, cap);
#else
(void)sll_refill_small_from_ss(class_idx, cap);
#endif
while (mag->top < cap) {
void* base = NULL;
if (!tls_sll_pop(class_idx, &base)) break;
mag->items[mag->top++] = base;
filled++;
}
}
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
if (filled > 0) {
g_tiny_heap_v2_stats[class_idx].refill_calls++;
g_tiny_heap_v2_stats[class_idx].refill_blocks += (uint64_t)filled;
}
}
return filled;
}
static inline void* tiny_heap_v2_alloc_by_class(int class_idx) {
// FIX: Ensure TLS is initialized before first magazine access
tiny_heap_v2_ensure_init();
if (class_idx < 0 || class_idx > 3) return NULL;
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
if (!TINY_FRONT_HEAP_V2_ENABLED) return NULL;
if (!tiny_heap_v2_class_enabled(class_idx)) return NULL;
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
// Hit: magazine has entries
if (__builtin_expect(mag->top > 0, 1)) {
// FIX: Add underflow protection before array access
const int cap = TINY_HEAP_V2_MAG_CAP;
if (mag->top > cap || mag->top < 0) {
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
if (!s_reset_logged[class_idx]) {
fprintf(stderr, "[HEAP_V2_ALLOC] C%d mag->top=%d corrupted, reset to 0\n",
class_idx, mag->top);
s_reset_logged[class_idx] = 1;
}
mag->top = 0;
return NULL; // Fall through to refill path
}
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
g_tiny_heap_v2_stats[class_idx].mag_hits++;
}
return mag->items[--mag->top];
}
// Miss: try single refill from SLL/backend
int filled = tiny_heap_v2_refill_mag(class_idx);
if (filled > 0 && mag->top > 0) {
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
g_tiny_heap_v2_stats[class_idx].mag_hits++;
}
return mag->items[--mag->top];
}
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
g_tiny_heap_v2_stats[class_idx].backend_oom++;
}
return NULL;
}
// ========== Fast Path: TLS Freelist Pop (3-4 instructions) ==========
// External SFC control (defined in hakmem_tiny_sfc.c)