From e2ca52d59dbfe0838dee22382321bfcfd501b2f2 Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Thu, 11 Dec 2025 15:59:29 +0900 Subject: [PATCH] Phase v6-6: Inline hot path optimization for SmallObject Core v6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize v6 alloc/free by eliminating redundant route checks and adding inline hot path functions: - smallobject_core_v6_box.h: Add inline hot path functions: - small_alloc_c6_hot_v6() / small_alloc_c5_hot_v6(): Direct TLS pop - small_free_c6_hot_v6() / small_free_c5_hot_v6(): Direct TLS push - No route check needed (caller already validated via switch case) - smallobject_core_v6.c: Add cold path functions: - small_alloc_cold_v6(): Handle TLS refill from page - small_free_cold_v6(): Handle page freelist push (TLS full/cross-thread) - malloc_tiny_fast.h: Update front gate to use inline hot path: - Alloc: hot path first, cold path fallback on TLS miss - Free: hot path first, cold path fallback on TLS full Performance results: - C5-heavy: v6 ON 42.2M ≈ baseline (parity restored) - C6-heavy: v6 ON 34.5M ≈ baseline (parity restored) - Mixed 16-1024B: ~26.5M (v3-only: ~28.1M, gap is routing overhead) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- core/box/smallobject_core_v6_box.h | 71 ++++++++++++++++++++++ core/front/malloc_tiny_fast.h | 29 +++++++-- core/smallobject_core_v6.c | 96 ++++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 6 deletions(-) diff --git a/core/box/smallobject_core_v6_box.h b/core/box/smallobject_core_v6_box.h index 7d5b22f8..4269fcc9 100644 --- a/core/box/smallobject_core_v6_box.h +++ b/core/box/smallobject_core_v6_box.h @@ -56,6 +56,77 @@ static inline int small_tls_owns_ptr_v6(SmallHeapCtxV6* ctx, void* ptr) { return addr >= ctx->tls_seg_base && addr < ctx->tls_seg_end; } +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +// Forward declarations for cold path +struct SmallPageMetaV6; +struct SmallPageMetaV6* small_cold_v6_refill_page(uint32_t class_idx); +struct SmallPageMetaV6* small_page_meta_v6_of(void* ptr); +void small_cold_v6_retire_page(struct SmallPageMetaV6* page); + +// ============================================================================ +// Inline Hot Path (Phase v6-6: Skip route check for maximum performance) +// ============================================================================ + +/// C6 alloc hot path - no route check, direct TLS pop +/// @return: USER pointer or NULL (fallback needed) +static inline void* small_alloc_c6_hot_v6(SmallHeapCtxV6* ctx) { + if (likely(ctx->tls_count_c6 > 0)) { + void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6]; + return SMALL_V6_USER_FROM_BASE(blk); + } + return NULL; // Need refill +} + +/// C5 alloc hot path - no route check, direct TLS pop +/// @return: USER pointer or NULL (fallback needed) +static inline void* small_alloc_c5_hot_v6(SmallHeapCtxV6* ctx) { + if (likely(ctx->tls_count_c5 > 0)) { + void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5]; + return SMALL_V6_USER_FROM_BASE(blk); + } + return NULL; // Need refill +} + +/// C6 free hot path - TLS ownership check + TLS push +/// @return: 1 if handled, 0 if fallback needed +static inline int small_free_c6_hot_v6(SmallHeapCtxV6* ctx, void* ptr) { + if (likely(small_tls_owns_ptr_v6(ctx, ptr))) { + if (ctx->tls_count_c6 < SMALL_V6_TLS_CAP) { + void* base = SMALL_V6_BASE_FROM_USER(ptr); + ctx->tls_freelist_c6[ctx->tls_count_c6++] = base; + return 1; + } + } + return 0; // Need cold path +} + +/// C5 free hot path - TLS ownership check + TLS push +/// @return: 1 if handled, 0 if fallback needed +static inline int small_free_c5_hot_v6(SmallHeapCtxV6* ctx, void* ptr) { + if (likely(small_tls_owns_ptr_v6(ctx, ptr))) { + if (ctx->tls_count_c5 < SMALL_V6_TLS_CAP) { + void* base = SMALL_V6_BASE_FROM_USER(ptr); + ctx->tls_freelist_c5[ctx->tls_count_c5++] = base; + return 1; + } + } + return 0; // Need cold path +} + +// ============================================================================ +// Cold Path Declarations (in smallobject_core_v6.c) +// ============================================================================ + +/// Cold path: alloc with refill (called when TLS empty) +void* small_alloc_cold_v6(uint32_t class_idx, SmallHeapCtxV6* ctx); + +/// Cold path: free to page freelist (called when TLS full or cross-thread) +void small_free_cold_v6(void* ptr, uint32_t class_idx); + // API SmallHeapCtxV6* small_heap_ctx_v6(void); diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index cdc143e7..7dfc3d96 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -158,10 +158,20 @@ static inline void* malloc_tiny_fast(size_t size) { switch (route) { case TINY_ROUTE_SMALL_HEAP_V6: { - // Phase v6-1: C6-only Core v6 route stub (pool v1 fallback) + // Phase v6-6: Inline hot path (no route check, direct TLS pop) SmallHeapCtxV6* ctx_v6 = small_heap_ctx_v6(); - const SmallPolicySnapshotV6* snap_v6 = tiny_policy_snapshot_v6(); - void* v6p = small_alloc_fast_v6(size, (uint32_t)class_idx, ctx_v6, snap_v6); + void* v6p = NULL; + if (class_idx == 6) { + v6p = small_alloc_c6_hot_v6(ctx_v6); + if (TINY_HOT_UNLIKELY(!v6p)) { + v6p = small_alloc_cold_v6(6, ctx_v6); + } + } else if (class_idx == 5) { + v6p = small_alloc_c5_hot_v6(ctx_v6); + if (TINY_HOT_UNLIKELY(!v6p)) { + v6p = small_alloc_cold_v6(5, ctx_v6); + } + } if (TINY_HOT_LIKELY(v6p != NULL)) { return v6p; } @@ -374,10 +384,17 @@ static inline int free_tiny_fast(void* ptr) { if (__builtin_expect(use_tiny_heap, 0)) { switch (route) { case TINY_ROUTE_SMALL_HEAP_V6: { - // Phase v6-1: C6-only Core v6 route stub + // Phase v6-6: Inline hot path (no route check, direct TLS push) SmallHeapCtxV6* ctx_v6 = small_heap_ctx_v6(); - const SmallPolicySnapshotV6* snap_v6 = tiny_policy_snapshot_v6(); - small_free_fast_v6(base, (uint32_t)class_idx, ctx_v6, snap_v6); + int handled = 0; + if (class_idx == 6) { + handled = small_free_c6_hot_v6(ctx_v6, base); + } else if (class_idx == 5) { + handled = small_free_c5_hot_v6(ctx_v6, base); + } + if (!handled) { + small_free_cold_v6(base, (uint32_t)class_idx); + } return 1; } case TINY_ROUTE_SMALL_HEAP_V5: { diff --git a/core/smallobject_core_v6.c b/core/smallobject_core_v6.c index cef04399..88174fe6 100644 --- a/core/smallobject_core_v6.c +++ b/core/smallobject_core_v6.c @@ -243,3 +243,99 @@ void small_free_fast_v6(void* ptr, small_cold_v6_retire_page(page); } } + +// ============================================================================ +// Cold Path Implementation (Phase v6-6) +// ============================================================================ + +/// Cold path: alloc with refill - called when TLS is empty +/// @param class_idx: C5 or C6 +/// @param ctx: TLS context +/// @return: USER pointer or NULL +void* small_alloc_cold_v6(uint32_t class_idx, SmallHeapCtxV6* ctx) { + // Refill TLS from page + SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx); + if (!page || !page->free_list) { + return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : 256, 0); + } + + uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx); + + if (class_idx == SMALL_V6_C6_CLASS_IDX) { + int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6; + int filled = 0; + + while (page->free_list && filled < max_fill - 1) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + ((uint8_t*)blk)[0] = header_byte; + ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk; + filled++; + } + page->used += filled; + + if (page->free_list) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + page->used++; + ((uint8_t*)blk)[0] = header_byte; + return SMALL_V6_USER_FROM_BASE(blk); + } + + if (ctx->tls_count_c6 > 0) { + void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6]; + return SMALL_V6_USER_FROM_BASE(blk); + } + } + else if (class_idx == SMALL_V6_C5_CLASS_IDX) { + int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5; + int filled = 0; + + while (page->free_list && filled < max_fill - 1) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + ((uint8_t*)blk)[0] = header_byte; + ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk; + filled++; + } + page->used += filled; + + if (page->free_list) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + page->used++; + ((uint8_t*)blk)[0] = header_byte; + return SMALL_V6_USER_FROM_BASE(blk); + } + + if (ctx->tls_count_c5 > 0) { + void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5]; + return SMALL_V6_USER_FROM_BASE(blk); + } + } + + return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : 256, 0); +} + +/// Cold path: free to page freelist - called when TLS full or cross-thread +/// @param ptr: USER pointer +/// @param class_idx: C5 or C6 +void small_free_cold_v6(void* ptr, uint32_t class_idx) { + (void)class_idx; // Not needed for page lookup + + void* base = SMALL_V6_BASE_FROM_USER(ptr); + + SmallPageMetaV6* page = small_page_meta_v6_of(ptr); + if (!page) { + hak_pool_free(ptr, 0, 0); + return; + } + + *(void**)base = page->free_list; + page->free_list = base; + if (page->used > 0) page->used--; + + if (page->used == 0) { + small_cold_v6_retire_page(page); + } +}