From 326102599533aee162103378652c17f9e0be9d13 Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Wed, 10 Dec 2025 18:18:05 +0900 Subject: [PATCH] Phase v4-4: pilot C6 v4 route with opt-in gate --- CURRENT_TASK.md | 10 ++++++++++ core/box/smallobject_hotbox_v4_env_box.h | 4 ++++ core/front/malloc_tiny_fast.h | 2 +- core/smallobject_hotbox_v4.c | 12 ++++++------ docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md | 6 ++++++ 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index a6c608ab..1410f3db 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -1061,3 +1061,13 @@ export HAKMEM_POOL_ZERO_MODE=header - v3 route: 40,661,560 ops/s - v4 route: 40,010,302 ops/s(-1.6% 以内、回帰なし) - 所感: C7-only では v4 が逆転し、prepare 増加の問題は解消。Mixed も健康レンジに収まった。次は C7 v4 の pf/partial 再利用 or C6/C5 拡張を検討。 + +### Phase v4-4: C6 v4 パイロット(C6-heavy 専用、強ゲート) +- 変更: + - v4 ENV マスクに C6(bit6=0x40) を解禁。`smallobject_hotbox_v4_env_box.h` に C6 helper を追加。 + - `smallobject_hotbox_v4.c` に C6 クラスの alloc/free を追加(Tiny v1 Cold 経由、C7 と同形 freelist/current/partial パス)。 + - front free の v4 ルートで C6 も small_heap_free_fast_v4 を通すよう修正。 +- A/B (C6-heavy, ws=400, iters=1M, size=257–768): + - C6 v1: 28,690,913 ops/s + - C6 v4: 30,068,995 ops/s(+4.8%)segv/assert なし +- Mixed 16–1024B: デフォルトは C6 v1 のまま(`HAKMEM_SMALL_HEAP_V4_CLASSES=0x80` で C7-only)。C6 v4 は研究箱として opt-in(`0x40`)。 diff --git a/core/box/smallobject_hotbox_v4_env_box.h b/core/box/smallobject_hotbox_v4_env_box.h index 366a22aa..22430381 100644 --- a/core/box/smallobject_hotbox_v4_env_box.h +++ b/core/box/smallobject_hotbox_v4_env_box.h @@ -43,3 +43,7 @@ static inline int small_heap_v4_class_enabled(uint8_t class_idx) { static inline int small_heap_v4_c7_enabled(void) { return small_heap_v4_class_enabled(7); } + +static inline int small_heap_v4_c6_enabled(void) { + return small_heap_v4_class_enabled(6); +} diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index dee8a5e0..9eacf96e 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -319,7 +319,7 @@ static inline int free_tiny_fast(void* ptr) { if (__builtin_expect(use_tiny_heap, 0)) { switch (route) { case TINY_ROUTE_SMALL_HEAP_V4: - if (class_idx == 7) { + if (class_idx == 7 || class_idx == 6) { small_heap_free_fast_v4(small_heap_ctx_v4_get(), class_idx, base); return 1; } diff --git a/core/smallobject_hotbox_v4.c b/core/smallobject_hotbox_v4.c index 6cced59f..a36a8220 100644 --- a/core/smallobject_hotbox_v4.c +++ b/core/smallobject_hotbox_v4.c @@ -106,7 +106,7 @@ static small_page_v4* v4_find_page(small_class_heap_v4* h, const uint8_t* ptr, v // ----------------------------------------------------------------------------- static small_page_v4* cold_refill_page_v4(small_heap_ctx_v4* hot_ctx, uint32_t class_idx) { - if (__builtin_expect(class_idx != 7, 0)) return NULL; + if (__builtin_expect(class_idx != 7 && class_idx != 6, 0)) return NULL; (void)hot_ctx; tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread(); if (!tctx) return NULL; @@ -193,10 +193,10 @@ static small_page_v4* small_alloc_slow_v4(small_heap_ctx_v4* ctx, int class_idx) } void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) { - if (__builtin_expect(class_idx != 7, 0)) { - return NULL; // C7 専用 + if (__builtin_expect(class_idx != 7 && class_idx != 6, 0)) { + return NULL; // C6/C7 専用 } - if (!small_heap_v4_c7_enabled()) return NULL; + if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return NULL; small_class_heap_v4* h = &ctx->cls[class_idx]; small_page_v4* page = h->current; @@ -240,10 +240,10 @@ static void v4_unlink_from_list(small_class_heap_v4* h, v4_loc_t loc, small_page } void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) { - if (__builtin_expect(class_idx != 7, 0)) { + if (__builtin_expect(class_idx != 7 && class_idx != 6, 0)) { return; } - if (!small_heap_v4_c7_enabled()) return; + if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return; if (!ptr) return; small_class_heap_v4* h = &ctx->cls[class_idx]; diff --git a/docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md b/docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md index bead6afa..ea04346e 100644 --- a/docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md +++ b/docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md @@ -38,3 +38,9 @@ - v3 route: 40.66M ops/s - v4 route: 40.01M ops/s(-1.6% 以内、回帰なし) - どちらも segv/assert なし。C7 v4 の prepare 増加は解消済み。Mixed ではまだ v3 がわずかに優勢だが許容範囲。 +- Phase v4-4 (C6 v4 パイロット): + - ENV: `HAKMEM_SMALL_HEAP_V4_ENABLED=1`, `HAKMEM_SMALL_HEAP_V4_CLASSES=0x40`(C6-only v4)。Mixed では標準 OFF(0x80= C7-only)。 + - C6-heavy ベンチ (ws=400, iters=1M, size 257–768): + - C6 v1: 28.69M ops/s + - C6 v4: 30.07M ops/s(+4.8%)segv/assert なし + - Mixed 16–1024B はデフォルトで C6 v1 のまま(C6 v4 は研究箱)。今後 C6 v4 の安定度を見つつ拡張予定。