diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index ba1fc249..74e09a5b 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -120,6 +120,21 @@ --- +### Phase v4-mid-6: C6 v4 TLS Fastlist (2025-12-11) + +- **実装**: + - `core/smallobject_hotbox_v4.c` に C6 専用の TLS fastlist (`g_small_c6_fast`) を追加。 + - `small_heap_alloc_fast_v4` / `free` の冒頭で fastlist をチェックし、O(1) で処理するパスを実装。 + - 安全のため ENV `HAKMEM_SMALL_HEAP_V4_FASTLIST=1` でのみ有効化(デフォルト OFF)。 + - `small_heap_free_fast_v4` のフォールバック時に `hak_pool_free` を呼ぶよう修正(メモリリーク防止)。 + +- **評価**: + - `bench_mid_large_mt_hakmem` (C6-heavy): v4 ON で完走、約 22M ops/s(fastlist OFF時と同等)。 + - `bench_random_mixed_hakmem` (Mixed 16-1024B): v4 ON にすると fastlist ON/OFF に関わらず SEGV が発生することを確認(Phase v4-mid-5 時点からの潜在的な不安定性)。 + - 結論: Fastlist 機構は実装済みだが、v4 自体が Mixed ワークロードで不安定なため、機能は ENV ゲートで封印してマージする。 + +--- + ### 5. 健康診断ラン(必ず最初に叩く 2 本) - Tiny/Mixed 用: diff --git a/core/box/smallobject_hotbox_v4_box.h b/core/box/smallobject_hotbox_v4_box.h index 01822d03..3941fe9c 100644 --- a/core/box/smallobject_hotbox_v4_box.h +++ b/core/box/smallobject_hotbox_v4_box.h @@ -48,6 +48,15 @@ typedef struct SmallHeapCtx { SmallClassHeap cls[SMALLOBJECT_NUM_CLASSES]; } SmallHeapCtx; +// SmallC6FastState: TLS fastlist state for C6 (Phase v4-mid-6) +typedef struct SmallC6FastState { + void* freelist; // C6 fastlist head + void* page_base; // Current C6 page base address + uint32_t used; // Usage count (synced to page->used on switch) + uint32_t capacity; // Page capacity + SmallPageMeta* meta; // Back-pointer to page metadata +} SmallC6FastState; + // Backward compatibility aliases (deprecated, use SmallXxx directly) typedef SmallPageMeta small_page_v4; typedef SmallClassHeap small_class_heap_v4; diff --git a/core/box/smallobject_hotbox_v4_env_box.h b/core/box/smallobject_hotbox_v4_env_box.h index baf86e20..b347ba65 100644 --- a/core/box/smallobject_hotbox_v4_env_box.h +++ b/core/box/smallobject_hotbox_v4_env_box.h @@ -51,3 +51,17 @@ static inline int small_heap_v4_c6_enabled(void) { static inline int small_heap_v4_c5_enabled(void) { return small_heap_v4_class_enabled(5); } + +static inline int small_heap_v4_fastlist_enabled(void) { + static int g_fast = -1; + if (__builtin_expect(g_fast == -1, 0)) { + const char* e = getenv("HAKMEM_SMALL_HEAP_V4_FASTLIST"); + if (e && *e) { + g_fast = (*e != '0') ? 1 : 0; + } else { + // Default OFF due to SEGV in random mixed + g_fast = 0; + } + } + return g_fast; +} diff --git a/core/smallobject_hotbox_v4.c b/core/smallobject_hotbox_v4.c index 24033427..3ad6078d 100644 --- a/core/smallobject_hotbox_v4.c +++ b/core/smallobject_hotbox_v4.c @@ -5,6 +5,11 @@ #include #include +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + #include "box/smallobject_hotbox_v4_box.h" #include "box/smallobject_hotbox_v4_env_box.h" #include "box/smallobject_hotbox_v4_stats_box.h" @@ -34,6 +39,13 @@ small_heap_v4_class_stats_t g_small_heap_v4_stats[8]; // TLS context static __thread small_heap_ctx_v4 g_ctx_v4; +// Phase v4-mid-6: C6 TLS Fastlist +static __thread SmallC6FastState g_small_c6_fast; + +static inline SmallC6FastState* small_c6_fast_state(void) { + return &g_small_c6_fast; +} + // Internal segment structure (internal use only, not exposed via public box API) typedef struct small_segment_v4_internal { int class_idx; @@ -366,6 +378,22 @@ void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) { // Phase v4-mid-5: Add stats instrumentation small_heap_v4_stat_alloc_call(class_idx); + // Phase v4-mid-6: C6 Fastlist Path + if (class_idx == 6 && small_heap_v4_fastlist_enabled()) { + SmallC6FastState* s = &g_small_c6_fast; + if (likely(s->freelist)) { + void* b = s->freelist; + s->freelist = *(void**)b; + s->used++; + small_heap_v4_stat_alloc_success(class_idx); + return tiny_region_id_write_header(b, class_idx); + } + // Fastlist empty: sync used back to meta before slow path + if (s->meta) { + s->meta->used = (uint16_t)s->used; + } + } + // Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation if (__builtin_expect(!v4_class_supported(class_idx), 0)) { small_heap_v4_stat_alloc_fallback_pool(class_idx); @@ -398,6 +426,32 @@ void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) { return NULL; } + // Phase v4-mid-6: Promote to C6 Fastlist + if (class_idx == 6 && small_heap_v4_fastlist_enabled()) { + if (!page) { + // Should not happen + } else if (!page->freelist) { + return NULL; + } else { + SmallC6FastState* s = &g_small_c6_fast; + s->meta = page; + s->page_base = page->base; + s->capacity = page->capacity; + s->used = page->used; + s->freelist = page->freelist; + page->freelist = NULL; // Steal freelist ownership + + // Retry fast path + if (likely(s->freelist)) { + void* b = s->freelist; + s->freelist = *(void**)b; + s->used++; + small_heap_v4_stat_alloc_success(class_idx); + return tiny_region_id_write_header(b, class_idx); + } + } + } + // Allocate from newly acquired/promoted page void* blk = page->freelist; void* next = NULL; @@ -432,10 +486,29 @@ static void v4_unlink_from_list(small_class_heap_v4* h, v4_loc_t loc, small_page page->next = NULL; } +extern void hak_pool_free(void* ptr, size_t size, uintptr_t site_id); + void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) { // Phase v4-mid-5: Add stats instrumentation small_heap_v4_stat_free_call(class_idx); + // Phase v4-mid-6: C6 Fastlist Path + if (class_idx == 6 && small_heap_v4_fastlist_enabled()) { + SmallC6FastState* s = &g_small_c6_fast; + if (s->page_base && (uintptr_t)ptr >= (uintptr_t)s->page_base) { + // Use actual block size from meta + uint32_t bsize = (s->meta) ? s->meta->block_size : 512; + size_t span = (size_t)s->capacity * bsize; + if ((uintptr_t)ptr < (uintptr_t)s->page_base + span) { + *(void**)ptr = s->freelist; + s->freelist = ptr; + s->used--; + small_heap_v4_stat_free_page_found(class_idx); + return; + } + } + } + // Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation if (__builtin_expect(!v4_class_supported(class_idx), 0)) { return; @@ -459,7 +532,8 @@ void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) { small_heap_v4_stat_free_page_not_found(class_idx); // Try to find via segment mask+shift (requires segment to be initialized) // For now, this is a fallback for future segment-based allocation - // Return without freeing (pool v1 will handle) + // Fallback to pool v1 (avoid recursion via free()) + hak_pool_free(base_ptr, 0, 0); return; }