Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary - ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え) - core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理 - core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off) - A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy) ## Files Modified - core/box/pool_api.inc.h: pool_zero_mode_box.h include - core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避) - core/hakmem_pool.c: zero mode 参照・制御ロジック - core/box/pool_zero_mode_box.h (新設): enum/getter - CURRENT_TASK.md: Phase ML1 結果記載 ## Test Results | Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement | |-----------|----------------|-----------------|------------| | 10K | 3.06 M ops/s | 3.17 M ops/s | +3.65% | | 1M | 23.71 M ops/s | 27.34 M ops/s | **+15.34%** | 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -9,7 +9,21 @@
|
||||
|
||||
#include "../hakmem_tiny.h"
|
||||
#include "../box/tls_sll_box.h"
|
||||
#include "../hakmem_env_cache.h"
|
||||
|
||||
#ifndef TINY_FRONT_TLS_SLL_ENABLED
|
||||
#define HAK_TINY_TLS_SLL_ENABLED_FALLBACK 1
|
||||
#else
|
||||
#define HAK_TINY_TLS_SLL_ENABLED_FALLBACK TINY_FRONT_TLS_SLL_ENABLED
|
||||
#endif
|
||||
|
||||
#ifndef TINY_FRONT_HEAP_V2_ENABLED
|
||||
#define HAK_TINY_HEAP_V2_ENABLED_FALLBACK tiny_heap_v2_enabled()
|
||||
#else
|
||||
#define HAK_TINY_HEAP_V2_ENABLED_FALLBACK TINY_FRONT_HEAP_V2_ENABLED
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Phase 13-B: Magazine capacity (same as Phase 13-A)
|
||||
#ifndef TINY_HEAP_V2_MAG_CAP
|
||||
@ -34,6 +48,11 @@ typedef struct {
|
||||
// External TLS variables (defined in hakmem_tiny.c)
|
||||
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES];
|
||||
extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES];
|
||||
extern __thread int g_tls_heap_v2_initialized;
|
||||
|
||||
// Backend refill helpers (implemented in Tiny refill path)
|
||||
int sll_refill_small_from_ss(int class_idx, int max_take);
|
||||
int sll_refill_batch_from_ss(int class_idx, int max_take);
|
||||
|
||||
// Enable flag (cached)
|
||||
// ENV: HAKMEM_TINY_FRONT_V2
|
||||
@ -132,10 +151,128 @@ static inline int tiny_heap_v2_try_push(int class_idx, void* base) {
|
||||
return 1; // Success
|
||||
}
|
||||
|
||||
// Forward declaration: refill + alloc helper (implemented inline where included)
|
||||
static inline int tiny_heap_v2_refill_mag(int class_idx);
|
||||
static inline void* tiny_heap_v2_alloc_by_class(int class_idx);
|
||||
static inline int tiny_heap_v2_stats_enabled(void);
|
||||
// Stats gate (ENV cached)
|
||||
static inline int tiny_heap_v2_stats_enabled(void) {
|
||||
return HAK_ENV_TINY_HEAP_V2_STATS();
|
||||
}
|
||||
|
||||
// TLS HeapV2 initialization barrier (ensures mag->top is zero on first use)
|
||||
static inline void tiny_heap_v2_ensure_init(void) {
|
||||
extern __thread int g_tls_heap_v2_initialized;
|
||||
extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[];
|
||||
|
||||
if (__builtin_expect(!g_tls_heap_v2_initialized, 0)) {
|
||||
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
||||
g_tiny_heap_v2_mag[i].top = 0;
|
||||
}
|
||||
g_tls_heap_v2_initialized = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Magazine refill from TLS SLL/backend
|
||||
static inline int tiny_heap_v2_refill_mag(int class_idx) {
|
||||
// FIX: Ensure TLS is initialized before first magazine access
|
||||
tiny_heap_v2_ensure_init();
|
||||
if (class_idx < 0 || class_idx > 3) return 0;
|
||||
if (!tiny_heap_v2_class_enabled(class_idx)) return 0;
|
||||
|
||||
// Phase 7-Step7: Use config macro for dead code elimination in PGO mode
|
||||
if (!HAK_TINY_TLS_SLL_ENABLED_FALLBACK) return 0;
|
||||
|
||||
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
|
||||
const int cap = TINY_HEAP_V2_MAG_CAP;
|
||||
int filled = 0;
|
||||
|
||||
// FIX: Validate mag->top before use (prevent uninitialized TLS corruption)
|
||||
if (mag->top < 0 || mag->top > cap) {
|
||||
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
|
||||
if (!s_reset_logged[class_idx]) {
|
||||
fprintf(stderr, "[HEAP_V2_REFILL] C%d mag->top=%d corrupted, reset to 0\n",
|
||||
class_idx, mag->top);
|
||||
s_reset_logged[class_idx] = 1;
|
||||
}
|
||||
mag->top = 0;
|
||||
}
|
||||
|
||||
// First, steal from TLS SLL if already available.
|
||||
while (mag->top < cap) {
|
||||
void* base = NULL;
|
||||
if (!tls_sll_pop(class_idx, &base)) break;
|
||||
mag->items[mag->top++] = base;
|
||||
filled++;
|
||||
}
|
||||
|
||||
// If magazine is still empty, ask backend to refill SLL once, then steal again.
|
||||
if (mag->top < cap && filled == 0) {
|
||||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||||
(void)sll_refill_batch_from_ss(class_idx, cap);
|
||||
#else
|
||||
(void)sll_refill_small_from_ss(class_idx, cap);
|
||||
#endif
|
||||
while (mag->top < cap) {
|
||||
void* base = NULL;
|
||||
if (!tls_sll_pop(class_idx, &base)) break;
|
||||
mag->items[mag->top++] = base;
|
||||
filled++;
|
||||
}
|
||||
}
|
||||
|
||||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||||
if (filled > 0) {
|
||||
g_tiny_heap_v2_stats[class_idx].refill_calls++;
|
||||
g_tiny_heap_v2_stats[class_idx].refill_blocks += (uint64_t)filled;
|
||||
}
|
||||
}
|
||||
return filled;
|
||||
}
|
||||
|
||||
// Magazine pop (fast path)
|
||||
static inline void* tiny_heap_v2_alloc_by_class(int class_idx) {
|
||||
// FIX: Ensure TLS is initialized before first magazine access
|
||||
tiny_heap_v2_ensure_init();
|
||||
if (class_idx < 0 || class_idx > 3) return NULL;
|
||||
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
|
||||
if (!HAK_TINY_HEAP_V2_ENABLED_FALLBACK) return NULL;
|
||||
if (!tiny_heap_v2_class_enabled(class_idx)) return NULL;
|
||||
|
||||
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx];
|
||||
|
||||
// Hit: magazine has entries
|
||||
if (__builtin_expect(mag->top > 0, 1)) {
|
||||
// FIX: Add underflow protection before array access
|
||||
const int cap = TINY_HEAP_V2_MAG_CAP;
|
||||
if (mag->top > cap || mag->top < 0) {
|
||||
static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0};
|
||||
if (!s_reset_logged[class_idx]) {
|
||||
fprintf(stderr, "[HEAP_V2_ALLOC] C%d mag->top=%d corrupted, reset to 0\n",
|
||||
class_idx, mag->top);
|
||||
s_reset_logged[class_idx] = 1;
|
||||
}
|
||||
mag->top = 0;
|
||||
return NULL; // Fall through to refill path
|
||||
}
|
||||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||||
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
|
||||
g_tiny_heap_v2_stats[class_idx].mag_hits++;
|
||||
}
|
||||
return mag->items[--mag->top];
|
||||
}
|
||||
|
||||
// Miss: try single refill from SLL/backend
|
||||
int filled = tiny_heap_v2_refill_mag(class_idx);
|
||||
if (filled > 0 && mag->top > 0) {
|
||||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||||
g_tiny_heap_v2_stats[class_idx].alloc_calls++;
|
||||
g_tiny_heap_v2_stats[class_idx].mag_hits++;
|
||||
}
|
||||
return mag->items[--mag->top];
|
||||
}
|
||||
|
||||
if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) {
|
||||
g_tiny_heap_v2_stats[class_idx].backend_oom++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1, impl in hakmem_tiny.c)
|
||||
void tiny_heap_v2_print_stats(void);
|
||||
|
||||
Reference in New Issue
Block a user