Phase SO-BACKEND-OPT-1: v3 backend 分解&Tiny/ULTRA 完成世代宣言
=== 実装内容 === 1. v3 backend 詳細計測 - ENV: HAKMEM_SO_V3_STATS で alloc/free パス内訳計測 - 追加 stats: alloc_current_hit, alloc_partial_hit, free_current, free_partial, free_retire - so_alloc_fast / so_free_fast に埋め込み - デストラクタで [ALLOC_DETAIL] / [FREE_DETAIL] 出力 2. v3 backend ボトルネック分析完了 - C7-only: alloc_current_hit=99.99%, alloc_refill=0.9%, free_retire=0.1%, page_of_fail=0 - Mixed: alloc_current_hit=100%, alloc_refill=0.85%, free_retire=0.07%, page_of_fail=0 - 結論: v3 ロジック部分(ページ選択・retire)は完全最適化済み - 残り 5% overhead は内部コスト(header write, memcpy, 分岐) 3. Tiny/ULTRA 層「完成世代」宣言 - 総括ドキュメント作成: docs/analysis/PERF_EXEC_SUMMARY_ULTRA_PHASE_20251211.md - CURRENT_TASK.md に Phase ULTRA 総括セクション追加 - AGENTS.md に Tiny/ULTRA 完成世代宣言追加 - 最終成果: Mixed 16–1024B = 43.9M ops/s (baseline 30.6M → +43.5%) === ボトルネック地図 === | 層 | 関数 | overhead | |-----|------|----------| | Front | malloc/free dispatcher | ~40–45% | | ULTRA | C4–C7 alloc/free/refill | ~12% | | v3 backend | so_alloc/so_free | ~5% | | mid/pool | hak_super_lookup | 3–5% | === フェーズ履歴(Phase ULTRA cycle) === - Phase PERF-ULTRA-FREE-OPT-1: C4–C7 ULTRA統合 → +9.3% - Phase REFACTOR: Code quality (60行削減) - Phase PERF-ULTRA-REFILL-OPT-1a/1b: C7 ULTRA refill最適化 → +11.1% - Phase SO-BACKEND-OPT-1: v3 backend分解 → 設計限界確認 === 次フェーズ(独立ライン) === 1. Phase SO-BACKEND-OPT-2: v3 header write削減 (1-2%) 2. Headerless/v6系: out-of-band header (1-2%) 3. mid/pool v3新設計: C6-heavy 10M → 20–25M 本フェーズでTiny/ULTRA層は「完成世代」として基盤固定。 今後の大きい変更はHeaderless/mid系の独立ラインで検討。 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -49,6 +49,16 @@ void so_v3_record_alloc_fallback(uint8_t ci) {
|
||||
if (st) atomic_fetch_add_explicit(&st->alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_alloc_current_hit(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->alloc_current_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_alloc_partial_hit(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->alloc_partial_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_free_call(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->free_calls, 1, memory_order_relaxed);
|
||||
@ -59,6 +69,21 @@ void so_v3_record_free_fallback(uint8_t ci) {
|
||||
if (st) atomic_fetch_add_explicit(&st->free_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_free_current(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->free_current, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_free_partial(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->free_partial, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_free_retire(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->free_retire, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_page_of_fail(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->page_of_fail, 1, memory_order_relaxed);
|
||||
@ -137,6 +162,7 @@ static inline void* so_alloc_fast(so_ctx_v3* ctx, uint32_t ci) {
|
||||
void* blk = p->freelist;
|
||||
p->freelist = *(void**)blk;
|
||||
p->used++;
|
||||
so_v3_record_alloc_current_hit((uint8_t)ci);
|
||||
if (skip_header_c7) {
|
||||
uint8_t* header_ptr = (uint8_t*)blk;
|
||||
*header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK));
|
||||
@ -165,6 +191,7 @@ static inline void* so_alloc_fast(so_ctx_v3* ctx, uint32_t ci) {
|
||||
void* blk = p->freelist;
|
||||
p->freelist = *(void**)blk;
|
||||
p->used++;
|
||||
so_v3_record_alloc_partial_hit((uint8_t)ci);
|
||||
if (skip_header_c7) {
|
||||
uint8_t* header_ptr = (uint8_t*)blk;
|
||||
*header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK));
|
||||
@ -229,17 +256,21 @@ static inline void so_free_fast(so_ctx_v3* ctx, uint32_t ci, void* ptr) {
|
||||
(void)so_unlink_partial(hc, page);
|
||||
if (hc->partial_count < hc->max_partial_pages) {
|
||||
so_page_push_partial(hc, page);
|
||||
so_v3_record_free_partial((uint8_t)ci);
|
||||
if (!hc->current) {
|
||||
hc->current = page;
|
||||
so_v3_record_free_current((uint8_t)ci);
|
||||
}
|
||||
} else {
|
||||
if (hc->current == page) {
|
||||
hc->current = NULL;
|
||||
}
|
||||
so_v3_record_free_retire((uint8_t)ci);
|
||||
so_page_retire_slow(ctx, ci, page);
|
||||
}
|
||||
} else if (!hc->current) {
|
||||
hc->current = page;
|
||||
so_v3_record_free_current((uint8_t)ci);
|
||||
}
|
||||
}
|
||||
|
||||
@ -339,15 +370,34 @@ static void so_v3_stats_dump(void) {
|
||||
so_stats_class_v3* st = &g_so_stats[i];
|
||||
uint64_t rh = atomic_load_explicit(&st->route_hits, memory_order_relaxed);
|
||||
uint64_t ac = atomic_load_explicit(&st->alloc_calls, memory_order_relaxed);
|
||||
uint64_t ach = atomic_load_explicit(&st->alloc_current_hit, memory_order_relaxed);
|
||||
uint64_t aph = atomic_load_explicit(&st->alloc_partial_hit, memory_order_relaxed);
|
||||
uint64_t ar = atomic_load_explicit(&st->alloc_refill, memory_order_relaxed);
|
||||
uint64_t afb = atomic_load_explicit(&st->alloc_fallback_v1, memory_order_relaxed);
|
||||
uint64_t fc = atomic_load_explicit(&st->free_calls, memory_order_relaxed);
|
||||
uint64_t fcur = atomic_load_explicit(&st->free_current, memory_order_relaxed);
|
||||
uint64_t fpar = atomic_load_explicit(&st->free_partial, memory_order_relaxed);
|
||||
uint64_t fret = atomic_load_explicit(&st->free_retire, memory_order_relaxed);
|
||||
uint64_t ffb = atomic_load_explicit(&st->free_fallback_v1, memory_order_relaxed);
|
||||
uint64_t pof = atomic_load_explicit(&st->page_of_fail, memory_order_relaxed);
|
||||
if (rh + ac + afb + fc + ffb + ar + pof == 0) continue;
|
||||
if (rh + ac + afb + fc + ffb + ar + pof + ach + aph + fcur + fpar + fret == 0) continue;
|
||||
|
||||
// Main stats (basic)
|
||||
fprintf(stderr, "[SMALL_HEAP_V3_STATS] cls=%d route_hits=%llu alloc_calls=%llu alloc_refill=%llu alloc_fb_v1=%llu free_calls=%llu free_fb_v1=%llu page_of_fail=%llu\n",
|
||||
i, (unsigned long long)rh, (unsigned long long)ac,
|
||||
(unsigned long long)ar, (unsigned long long)afb, (unsigned long long)fc,
|
||||
(unsigned long long)ffb, (unsigned long long)pof);
|
||||
|
||||
// Detailed alloc path breakdown
|
||||
if (ach + aph > 0) {
|
||||
fprintf(stderr, " [ALLOC_DETAIL] alloc_current_hit=%llu alloc_partial_hit=%llu\n",
|
||||
(unsigned long long)ach, (unsigned long long)aph);
|
||||
}
|
||||
|
||||
// Detailed free path breakdown
|
||||
if (fcur + fpar + fret > 0) {
|
||||
fprintf(stderr, " [FREE_DETAIL] free_current=%llu free_partial=%llu free_retire=%llu\n",
|
||||
(unsigned long long)fcur, (unsigned long long)fpar, (unsigned long long)fret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user