diff --git a/Makefile b/Makefile index d7674f2a..3f29b097 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o OBJS = $(OBJS_BASE) # Shared library diff --git a/core/box/free_front_v3_env_box.c b/core/box/free_front_v3_env_box.c new file mode 100644 index 00000000..af99aa9d --- /dev/null +++ b/core/box/free_front_v3_env_box.c @@ -0,0 +1,81 @@ +// free_front_v3_env_box.c - Free Front v3 Route Snapshot Implementation + +#include "free_front_v3_env_box.h" +#include "tiny_route_env_box.h" // For small_heap_v6_enabled(), small_heap_v6_class_mask() +#include "smallobject_hotbox_v3_env_box.h" // For small_heap_v3_enabled(), small_heap_v3_class_mask() +#include "tiny_front_v3_env_box.h" // For tiny_front_v3_enabled() + +// TLS snapshot storage +static __thread FreeRouteSnapshotV3 g_free_route_snapshot_v3; +static __thread int g_free_route_snapshot_v3_ready = 0; + +// ============================================================================ +// ENV gate +// ============================================================================ + +bool free_front_v3_enabled(void) { + static int g_enabled = -1; + if (__builtin_expect(g_enabled == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_FREE_FRONT_V3_ENABLED"); + if (e && *e) { + g_enabled = (*e != '0') ? 1 : 0; + } else { + g_enabled = 0; // default: OFF + } + } + return g_enabled != 0; +} + +// ============================================================================ +// Snapshot Initialization +// ============================================================================ + +void free_front_v3_snapshot_init(void) { + if (g_free_route_snapshot_v3_ready) { + return; // Already initialized + } + + // Phase v3-2: Optimized - read ENV masks directly, no tiny_route_for_class() calls + // Priority order: v6 > v3 > pool/legacy + + // 1. v6 が有効なクラスを判定 + uint32_t v6_mask = 0; + if (small_heap_v6_enabled()) { + v6_mask = small_heap_v6_class_mask(); + } + + // 2. v3 が有効なクラスを判定(ENV 依存) + uint32_t v3_mask = 0; + extern bool tiny_front_v3_enabled(void); // From tiny_front_v3_env_box.h + if (tiny_front_v3_enabled() && small_heap_v3_enabled()) { + v3_mask = small_heap_v3_class_mask(); + } + + // 3. クラスごとの route を決定(優先度順: v6 > v3 > pool) + for (uint32_t ci = 0; ci < NUM_SMALL_CLASSES; ci++) { + if (v6_mask & (1u << ci)) { + // v6 が最優先(C6, C5, C4) + g_free_route_snapshot_v3.route_kind[ci] = FREE_ROUTE_CORE_V6_C6; + } else if (v3_mask & (1u << ci)) { + // v3 が次優先(C7) + g_free_route_snapshot_v3.route_kind[ci] = FREE_ROUTE_TINY_V3; + } else { + // それ以外は pool/legacy + // Note: Pool v1 はまだ統合されていないため、legacy にフォールバック + g_free_route_snapshot_v3.route_kind[ci] = FREE_ROUTE_LEGACY; + } + } + + g_free_route_snapshot_v3_ready = 1; +} + +// ============================================================================ +// Snapshot Accessor +// ============================================================================ + +const FreeRouteSnapshotV3* free_front_v3_snapshot_get(void) { + if (__builtin_expect(!g_free_route_snapshot_v3_ready, 0)) { + free_front_v3_snapshot_init(); + } + return &g_free_route_snapshot_v3; +} diff --git a/core/box/free_front_v3_env_box.h b/core/box/free_front_v3_env_box.h new file mode 100644 index 00000000..f6f5f648 --- /dev/null +++ b/core/box/free_front_v3_env_box.h @@ -0,0 +1,62 @@ +// free_front_v3_env_box.h - Free Front v3 Route Snapshot Box +// +// Purpose: +// Phase FREE-FRONT-V3-1: Unify free route decision (Tiny v3, Core v6 C6, Pool v1) +// into a single snapshot table, removing redundant ENV checks and route logic +// from the hot path. +// +// Box Theory: +// - Single Responsibility: +// Consolidate free routing policy for all small classes (C0-C7). +// - Clear Boundary: +// hak_free reads snapshot once, no ENV checks in hot path. +// - Reversible / A/B: +// ENV HAKMEM_TINY_FREE_FRONT_V3_ENABLED (default 0 = OFF). +// +// Phase Plan: +// - v3-1: Snapshot infrastructure (this file) - behavior unchanged +// - v3-2: Remove redundant route_for_class calls from hot path +// - v3-3: Consolidate ENV checks for all free paths + +#ifndef FREE_FRONT_V3_ENV_BOX_H +#define FREE_FRONT_V3_ENV_BOX_H + +#include +#include +#include + +// ============================================================================ +// Route Kind Enum (free-specific, compatible with tiny_route_kind_t) +// ============================================================================ + +typedef enum { + FREE_ROUTE_LEGACY = 0, // Legacy path (hak_free_at_legacy) + FREE_ROUTE_TINY_V3 = 1, // Tiny v3 (SmallObject HotHeap v3) + FREE_ROUTE_CORE_V6_C6 = 2, // SmallObject Core v6 (C6-only) + FREE_ROUTE_POOL_V1 = 3, // Pool v1 fallback +} free_route_kind_t; + +// ============================================================================ +// Snapshot Structure +// ============================================================================ + +#define NUM_SMALL_CLASSES 8 // C0-C7 + +typedef struct FreeRouteSnapshotV3 { + free_route_kind_t route_kind[NUM_SMALL_CLASSES]; +} FreeRouteSnapshotV3; + +// ============================================================================ +// API +// ============================================================================ + +// ENV gate: default OFF (set HAKMEM_TINY_FREE_FRONT_V3_ENABLED=1 to enable) +bool free_front_v3_enabled(void); + +// Get TLS snapshot (lazy init on first call) +const FreeRouteSnapshotV3* free_front_v3_snapshot_get(void); + +// Initialize snapshot (called once per thread on first use) +void free_front_v3_snapshot_init(void); + +#endif // FREE_FRONT_V3_ENV_BOX_H diff --git a/core/box/hak_free_api.inc.h b/core/box/hak_free_api.inc.h index ea2d76c1..2ec701b4 100644 --- a/core/box/hak_free_api.inc.h +++ b/core/box/hak_free_api.inc.h @@ -143,6 +143,26 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) { switch (fg.domain) { case FG_DOMAIN_TINY: { + // Phase FREE-FRONT-V3-2: v3 snapshot routing (optional, default OFF) + // Optimized: No tiny_route_for_class() calls, no redundant ENV checks +#if HAKMEM_TINY_HEADER_CLASSIDX + { + // Check if v3 snapshot routing is enabled (cached) + static int g_v3_enabled = -1; + if (__builtin_expect(g_v3_enabled == -1, 0)) { + // For now, v3 snapshot routing is DISABLED by default (experimental) + // Phase v3-2 infrastructure is ready but not yet integrated + g_v3_enabled = 0; // TODO: Enable when ready: free_front_v3_enabled() ? 1 : 0; + } + + // Note: v3 snapshot path currently disabled (Phase v3-2 infrastructure only) + // When enabled, it would consolidate free routing logic and remove redundant + // ENV checks from the hot path. For now, use legacy routing below. + (void)g_v3_enabled; // Suppress unused variable warning + } +#endif + + // Legacy path (default when v3 is OFF) // Fast path: Tiny (C0-C7) with 1-byte header (0xa0 | class_idx) #if HAKMEM_TINY_HEADER_CLASSIDX if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) { diff --git a/core/box/pool_api.inc.h b/core/box/pool_api.inc.h index ad42567a..b2f316ce 100644 --- a/core/box/pool_api.inc.h +++ b/core/box/pool_api.inc.h @@ -1027,6 +1027,10 @@ void* hak_pool_try_alloc(size_t size, uintptr_t site_id) { } void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) { + // Phase FREE-LEGACY-BREAKDOWN-1: pool v1 カウンタ + extern void free_path_stat_inc_pool_v1_fast(void); + free_path_stat_inc_pool_v1_fast(); + if (!hak_pool_v2_route()) { if (hak_pool_v1_flatten_enabled()) { hak_pool_free_v1_flat(ptr, size, site_id); diff --git a/core/box/smallobject_core_v6_box.h b/core/box/smallobject_core_v6_box.h index 4269fcc9..b84176b5 100644 --- a/core/box/smallobject_core_v6_box.h +++ b/core/box/smallobject_core_v6_box.h @@ -36,6 +36,10 @@ struct SmallHeapCtxV6 { void* tls_freelist_c5[SMALL_V6_TLS_CAP]; uint8_t tls_count_c5; + // C4 TLS freelist (Phase v6-6) + void* tls_freelist_c4[SMALL_V6_TLS_CAP]; + uint8_t tls_count_c4; + // TLS segment ownership (for fast check) uintptr_t tls_seg_base; uintptr_t tls_seg_end; @@ -91,6 +95,16 @@ static inline void* small_alloc_c5_hot_v6(SmallHeapCtxV6* ctx) { return NULL; // Need refill } +/// C4 alloc hot path - no route check, direct TLS pop (Phase v6-6) +/// @return: USER pointer or NULL (fallback needed) +static inline void* small_alloc_c4_hot_v6(SmallHeapCtxV6* ctx) { + if (likely(ctx->tls_count_c4 > 0)) { + void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4]; + return SMALL_V6_USER_FROM_BASE(blk); + } + return NULL; // Need refill +} + /// C6 free hot path - TLS ownership check + TLS push /// @return: 1 if handled, 0 if fallback needed static inline int small_free_c6_hot_v6(SmallHeapCtxV6* ctx, void* ptr) { @@ -117,6 +131,19 @@ static inline int small_free_c5_hot_v6(SmallHeapCtxV6* ctx, void* ptr) { return 0; // Need cold path } +/// C4 free hot path - TLS ownership check + TLS push (Phase v6-6) +/// @return: 1 if handled, 0 if fallback needed +static inline int small_free_c4_hot_v6(SmallHeapCtxV6* ctx, void* ptr) { + if (likely(small_tls_owns_ptr_v6(ctx, ptr))) { + if (ctx->tls_count_c4 < SMALL_V6_TLS_CAP) { + void* base = SMALL_V6_BASE_FROM_USER(ptr); + ctx->tls_freelist_c4[ctx->tls_count_c4++] = base; + return 1; + } + } + return 0; // Need cold path +} + // ============================================================================ // Cold Path Declarations (in smallobject_core_v6.c) // ============================================================================ diff --git a/core/box/smallobject_hotbox_v3_env_box.h b/core/box/smallobject_hotbox_v3_env_box.h index 56937b8b..cf9d8809 100644 --- a/core/box/smallobject_hotbox_v3_env_box.h +++ b/core/box/smallobject_hotbox_v3_env_box.h @@ -24,7 +24,7 @@ static inline int small_heap_v3_enabled(void) { return g_enable; } -static inline int small_heap_v3_class_enabled(uint8_t class_idx) { +static inline uint32_t small_heap_v3_class_mask(void) { static int g_parsed = 0; static unsigned g_mask = 0; if (__builtin_expect(!g_parsed, 0)) { @@ -38,9 +38,14 @@ static inline int small_heap_v3_class_enabled(uint8_t class_idx) { } g_parsed = 1; } + return (uint32_t)g_mask; +} + +static inline int small_heap_v3_class_enabled(uint8_t class_idx) { if (!small_heap_v3_enabled()) return 0; if (class_idx >= TINY_NUM_CLASSES) return 0; - return (g_mask & (1u << class_idx)) != 0; + uint32_t mask = small_heap_v3_class_mask(); + return (mask & (1u << class_idx)) != 0; } static inline int small_heap_v3_c7_enabled(void) { diff --git a/core/box/smallsegment_v6_box.h b/core/box/smallsegment_v6_box.h index c27da47b..c0fb94f2 100644 --- a/core/box/smallsegment_v6_box.h +++ b/core/box/smallsegment_v6_box.h @@ -20,6 +20,10 @@ #define SMALL_V6_C5_CLASS_IDX 5 #define SMALL_V6_C5_BLOCK_SIZE 256 +// C4 configuration (Phase v6-6) +#define SMALL_V6_C4_CLASS_IDX 4 +#define SMALL_V6_C4_BLOCK_SIZE 128 + // Page index calculation macro (requires 'seg' variable in scope) #define SMALL_V6_PAGE_IDX(seg, addr) (((uintptr_t)(addr) - (seg)->base) >> SMALL_PAGE_V6_SHIFT) diff --git a/core/smallobject_cold_iface_v6.c b/core/smallobject_cold_iface_v6.c index 6934e87c..970a2935 100644 --- a/core/smallobject_cold_iface_v6.c +++ b/core/smallobject_cold_iface_v6.c @@ -11,14 +11,16 @@ #define unlikely(x) __builtin_expect(!!(x), 0) #endif -// Refill page for given class (C6 and C5 in v6-5) +// Refill page for given class (C6, C5, C4 in v6-6) SmallPageMetaV6* small_cold_v6_refill_page(uint32_t class_idx) { - // v6-5: Support C5 and C6 + // v6-6: Support C4, C5, and C6 size_t block_size; if (class_idx == SMALL_V6_C6_CLASS_IDX) { block_size = SMALL_V6_C6_BLOCK_SIZE; // 512 } else if (class_idx == SMALL_V6_C5_CLASS_IDX) { block_size = SMALL_V6_C5_BLOCK_SIZE; // 256 + } else if (class_idx == SMALL_V6_C4_CLASS_IDX) { + block_size = SMALL_V6_C4_BLOCK_SIZE; // 128 } else { return NULL; // Unsupported class } diff --git a/core/smallobject_core_v6.c b/core/smallobject_core_v6.c index 88174fe6..076122a4 100644 --- a/core/smallobject_core_v6.c +++ b/core/smallobject_core_v6.c @@ -103,6 +103,14 @@ void* small_alloc_fast_v6(size_t size, return SMALL_V6_USER_FROM_BASE(blk); } } + // C4 fast path (Phase v6-6) + else if (class_idx == SMALL_V6_C4_CLASS_IDX) { + // Fast path: TLS freelist hit + if (likely(ctx->tls_count_c4 > 0)) { + void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4]; + return SMALL_V6_USER_FROM_BASE(blk); + } + } else { // Unsupported class for v6 return hak_pool_try_alloc(size, 0); @@ -177,6 +185,36 @@ void* small_alloc_fast_v6(size_t size, return SMALL_V6_USER_FROM_BASE(blk); } } + else if (class_idx == SMALL_V6_C4_CLASS_IDX) { + // C4 refill path (Phase v6-6) + int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c4; + int filled = 0; + + // Fill TLS (leave room for 1 to return) + while (page->free_list && filled < max_fill - 1) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + ((uint8_t*)blk)[0] = header_byte; + ctx->tls_freelist_c4[ctx->tls_count_c4++] = blk; + filled++; + } + page->used += filled; + + // Pop one more to return to caller + if (page->free_list) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + page->used++; + ((uint8_t*)blk)[0] = header_byte; + return SMALL_V6_USER_FROM_BASE(blk); + } + + // If we filled TLS but no more blocks, pop from TLS + if (ctx->tls_count_c4 > 0) { + void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4]; + return SMALL_V6_USER_FROM_BASE(blk); + } + } // Should not reach here return hak_pool_try_alloc(size, 0); @@ -224,6 +262,11 @@ void small_free_fast_v6(void* ptr, ctx->tls_freelist_c5[ctx->tls_count_c5++] = base; return; } + // C4 TLS push (Phase v6-6) + if (class_idx == SMALL_V6_C4_CLASS_IDX && ctx->tls_count_c4 < SMALL_V6_TLS_CAP) { + ctx->tls_freelist_c4[ctx->tls_count_c4++] = base; + return; + } } // Slow path: page_meta lookup and push to page freelist @@ -249,14 +292,14 @@ void small_free_fast_v6(void* ptr, // ============================================================================ /// Cold path: alloc with refill - called when TLS is empty -/// @param class_idx: C5 or C6 +/// @param class_idx: C4, C5 or C6 /// @param ctx: TLS context /// @return: USER pointer or NULL void* small_alloc_cold_v6(uint32_t class_idx, SmallHeapCtxV6* ctx) { // Refill TLS from page SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx); if (!page || !page->free_list) { - return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : 256, 0); + return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : (class_idx == SMALL_V6_C5_CLASS_IDX ? 256 : 128), 0); } uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx); @@ -313,8 +356,34 @@ void* small_alloc_cold_v6(uint32_t class_idx, SmallHeapCtxV6* ctx) { return SMALL_V6_USER_FROM_BASE(blk); } } + else if (class_idx == SMALL_V6_C4_CLASS_IDX) { + int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c4; + int filled = 0; - return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : 256, 0); + while (page->free_list && filled < max_fill - 1) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + ((uint8_t*)blk)[0] = header_byte; + ctx->tls_freelist_c4[ctx->tls_count_c4++] = blk; + filled++; + } + page->used += filled; + + if (page->free_list) { + void* blk = page->free_list; + page->free_list = *(void**)blk; + page->used++; + ((uint8_t*)blk)[0] = header_byte; + return SMALL_V6_USER_FROM_BASE(blk); + } + + if (ctx->tls_count_c4 > 0) { + void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4]; + return SMALL_V6_USER_FROM_BASE(blk); + } + } + + return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : (class_idx == SMALL_V6_C5_CLASS_IDX ? 256 : 128), 0); } /// Cold path: free to page freelist - called when TLS full or cross-thread diff --git a/docs/analysis/FREE_FRONT_V3_DESIGN.md b/docs/analysis/FREE_FRONT_V3_DESIGN.md new file mode 100644 index 00000000..ead26f23 --- /dev/null +++ b/docs/analysis/FREE_FRONT_V3_DESIGN.md @@ -0,0 +1,124 @@ +# Free Front v3 設計メモ + +## 目的 + +Mixed 16–1024B における free hotpath(`free` ≈ 23–36%, `tiny_alloc_gate_fast` ≈ 22%)を、 +「header 読み + route 決定 + 1 回の dispatch」に近づけることを目標に、free front v3 の構造を整理する。 + +v6/vULTRA でやりたかった「lookup を避ける」「TLS 所有で早期 free」を、free 前段の設計に統合する。 + +--- + +## 現状 free front のフローとボトルネック + +### 現状フロー(簡略図) + +```text +free + → hak_free_at + → fg_classify_domain + → fg_tiny_gate + → tiny_free_gate_try_fast + → hak_tiny_free_fast_v2 + → ss_fast_lookup(ptr) + → slab_index_for(ptr) + → tiny_get_class_from_ss() + → TLS push / remote / pool free... +``` + +`free_tiny_fast` 相当の処理は概ね次のステップから成る: + +1. header 読み取り + magic check(OK) +2. class_idx 抽出(OK) +3. stats increment(OK) +4. C7 ULTRA check(ENV gate) +5. C7 v3 check(ENV gate) +6. `tiny_route_for_class()` 呼び出し(毎 free) +7. v4/v5/v6 など複数の ENV check +8. `tiny_route_is_heap_kind()` で heap/legacy 判定 +9. `tiny_front_v3_snapshot_get()` で front snapshot/TLS 判定 +10. `ss_fast_lookup(base)` + `slab_index_for()` で Superslab/スラブ index 解決 +11. Larson cross-thread check(owner/TLS 判定) +12. 最後に route に応じた switch/case で実 free 実行 + +### 問題点 + +- `tiny_route_for_class()` を free 毎に呼んでいる(class→route は snapshot で cache 可能)。 +- `tiny_front_v3_snapshot_get()` を毎 free で呼んでいる。 +- `ss_fast_lookup` + `slab_index_for` が free 前段にぶら下がっており、 + 「lookup を避ける」という v6/vULTRA の目標と構造的に衝突している。 +- C7 ULTRA / v4 / v5 / v6 など複数の ENV 判定が HotPath に並んでいて、branch mispredict の要因になっている。 + +--- + +## free front v3 の目標 + +free front v3 では、次の 3 点を達成する: + +1. **route 決定を snapshot に閉じ込める** + - `tiny_route_for_class()` の結果や各種 ENV を、起動時/スナップショット更新時に `route_kind[class_idx]` テーブルに焼き込む。 + - free 時には「header→class_idx→route_kind」をテーブル 1 回参照で決める。 + +2. **ENV check を HotPath から外す** + - C7 ULTRA / v4 / v5 / v6 などの ON/OFF 判定は snapshot 初期化時に反映し、 + free のホットパスでは ENV を直接読まない。 + +3. **lookup(ss_fast_lookup/slab_index_for)を “本当に必要な場面だけ” に限定** + - C6/C5/C4 など Core v6/Tiny/v3 が TLS 所有を持っているクラスでは、`small_tls_owns_ptr_v6` や C7 ULTRA の mask 判定で Superslab lookup を飛ばす。 + - Superslab/pool v1 経路でのみ lookup を使い、free front v3 の基本ルートからは排除する。 + +--- + +## 理想的な free front v3 の形(イメージ) + +```c +typedef struct FreeRouteSnapshotV3 { + uint8_t route_kind[NUM_SMALL_CLASSES]; // enum free_route_kind_t + // v6/v3/pool/legacy 向けの補助情報(policy pointer 等)は別テーブルで +} FreeRouteSnapshotV3; + +const FreeRouteSnapshotV3* free_front_v3_snapshot_get(void); + +void hak_free(void* ptr) { + uint8_t header = *(uint8_t*)((uintptr_t)ptr - 1); + uint32_t class_idx = header & HEADER_CLASS_MASK; + + const FreeRouteSnapshotV3* snap = free_front_v3_snapshot_get(); + free_route_kind_t route = snap->route_kind[class_idx]; + + switch (route) { + case FREE_ROUTE_TINY_V3: + hak_tiny_free_fast_v3(ptr, class_idx, snap); + return; + case FREE_ROUTE_CORE_V6_C6: + small_free_fast_v6(ptr, class_idx, small_heap_ctx_v6(), + snap->core_v6_policy); + return; + case FREE_ROUTE_POOL_V1: + hak_pool_free(ptr, 0, 0); + return; + // ... 他ルート(ULTRA, legacy など) + } +} +``` + +ここでは: +- header→class_idx→route_kind が front v3 の責務。 +- route_kind に v6/v3/pool/legacy の組み合わせや ENV の影響がすべて反映される。 +- 下流の箱(v6/v3/pool)は「自分の free を実行する」ことだけに集中できる。 + +--- + +## FREE-FRONT-V3-0 のゴール + +このドキュメントでは、以下を設計レベルで固定する: + +- 現状 free front のスタックと perf 上のボトルネック(route 二重チェック、ENV 多段チェック、lookup の位置)。 +- free front v3 で目指す構造(header 読み+class_idx 判定+snapshot route 決定+1回の dispatch)。 +- 以降のフェーズで: + - `free_front_v3_env_box.h` のような ENV→snapshot 初期化箱を作る。 + - `hak_free` に free front v3 の接続を 1 箇所だけ差し込む。 + - Mixed で free/gate の self% を A/B 測定する。 + +実装は次フェーズ(FREE-FRONT-V3-1 以降)のタスクとし、ここでは「どこを削るか」「どこを HotPath から外すか」の設計を記録する。 + diff --git a/docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md b/docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md index 0c06df20..c39e986a 100644 --- a/docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md +++ b/docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md @@ -410,6 +410,13 @@ uint32_t region_id_box_lookup(void* ptr); - C6 → C5 → 他クラスと、hot class から CORE v6 に載せ、Mixed 16–1024B の perf を確認。 - C7 ULTRA(L0)と CORE v6(L1)の共存チューニング。 +5. **Phase v6-4 以降(C5/C4 拡張 + free hotpath 削減)** + - C6 で安定・baseline 同等が確認できたら、C5 / C4 を順次 CORE v6 に載せていき、free hotpath の `ss_fast_lookup`/`slab_index_for` 依存を削っていく。 + - 各クラスごとに: + - heavy プロファイル(C5-heavy, C4-heavy)で v6 ON/OFF の A/B(まずは ±0〜数% を目標)。 + - Mixed 16–1024B で v6 ON 時の impact(free% の減少と ops/s の変化)を確認。 + - それでも free 側が支配的なら、最終的には front/gate の dispatcher 自体を薄くするフェーズ(free dispatch 削減)に進む。 + 以降の Phase は、この「層」と「責務」を変えずに micro-optimization を繰り返すフェーズとする。 --- diff --git a/docs/status/CURRENT_TASK.md b/docs/status/CURRENT_TASK.md index 8262c4c3..8f65ecdd 100644 --- a/docs/status/CURRENT_TASK.md +++ b/docs/status/CURRENT_TASK.md @@ -168,6 +168,39 @@ Step 2.5 が TLS_SLL_PUSH_DUP を「修正」するために追加されたが --- +## Phase FREE-LEGACY-OPT シリーズ(2025-12-11) + +### Phase FREE-LEGACY-OPT-4-1: Legacy per-class 分析 ✅ 完了 + +**目的**: Legacy fallback 49.2% の内訳を per-class で分析 + +**測定結果(Mixed 16-1024B)**: +- **C6 (513-1024B)**: 51.4% (137,319 / 266,942 Legacy calls) +- C5 (257-512B): 25.8% +- C4 (129-256B): 13.0% +- C3 (65-128B): 6.5% +- C2 (33-64B): 3.3% +- C0/C1/C7: 0.0% + +**最大ターゲット**: C6 が Legacy の過半数を占める + +**詳細**: `docs/analysis/FREE_LEGACY_PATH_ANALYSIS.md` 参照 + +### Phase FREE-LEGACY-OPT-4-2: C6_ULTRA_FREE_BOX 実装(進行中) + +**目的**: C6 の free だけを C7 ULTRA 風 TLS キャッシュで受け、Legacy fallback を半減 + +**実装範囲**: +- C6 専用・free 専用(alloc は既存ルートのまま) +- TLS に `c6_freelist[32]` + `c6_count` + segment range check +- ENV: `HAKMEM_TINY_C6_ULTRA_FREE_ENABLED=0`(研究箱、デフォルト OFF) + +**期待効果**: +- Legacy fallback: 49.2% → 24-27%(C6 分を削減) +- Mixed throughput: +5-8% 改善(44.8M → 47-48M ops/s) + +--- + ## 🎯 次のアクション ### 現時点での選択肢 diff --git a/hakmem.d b/hakmem.d index fe78a160..e4b1c341 100644 --- a/hakmem.d +++ b/hakmem.d @@ -105,14 +105,21 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/box/../front/../box/smallobject_hotbox_v3_env_box.h \ core/box/../front/../box/smallobject_hotbox_v4_box.h \ core/box/../front/../box/smallobject_hotbox_v5_box.h \ - core/box/../front/../box/smallobject_core_v6_box.h \ core/box/../front/../box/tiny_c7_ultra_box.h \ core/box/../front/../box/tiny_c7_ultra_segment_box.h \ + core/box/../front/../box/tiny_c6_ultra_free_box.h \ + core/box/../front/../box/tiny_c6_ultra_free_env_box.h \ + core/box/../front/../box/tiny_ultra_classes_box.h \ + core/box/../front/../box/tiny_legacy_fallback_box.h \ core/box/../front/../box/tiny_front_v3_env_box.h \ + core/box/../front/../box/free_path_stats_box.h \ + core/box/../front/../box/tiny_front_hot_box.h \ + core/box/../front/../box/tiny_ptr_convert_box.h \ core/box/../front/../box/tiny_route_env_box.h \ core/box/../front/../box/smallobject_hotbox_v4_env_box.h \ core/box/../front/../box/smallobject_v5_env_box.h \ core/box/../front/../box/tiny_front_stats_box.h \ + core/box/../front/../box/free_path_stats_box.h \ core/box/tiny_alloc_gate_box.h core/box/tiny_route_box.h \ core/box/tiny_front_config_box.h core/box/wrapper_env_box.h \ core/box/../hakmem_internal.h @@ -295,14 +302,21 @@ core/box/../front/../box/../superslab/superslab_inline.h: core/box/../front/../box/smallobject_hotbox_v3_env_box.h: core/box/../front/../box/smallobject_hotbox_v4_box.h: core/box/../front/../box/smallobject_hotbox_v5_box.h: -core/box/../front/../box/smallobject_core_v6_box.h: core/box/../front/../box/tiny_c7_ultra_box.h: core/box/../front/../box/tiny_c7_ultra_segment_box.h: +core/box/../front/../box/tiny_c6_ultra_free_box.h: +core/box/../front/../box/tiny_c6_ultra_free_env_box.h: +core/box/../front/../box/tiny_ultra_classes_box.h: +core/box/../front/../box/tiny_legacy_fallback_box.h: core/box/../front/../box/tiny_front_v3_env_box.h: +core/box/../front/../box/free_path_stats_box.h: +core/box/../front/../box/tiny_front_hot_box.h: +core/box/../front/../box/tiny_ptr_convert_box.h: core/box/../front/../box/tiny_route_env_box.h: core/box/../front/../box/smallobject_hotbox_v4_env_box.h: core/box/../front/../box/smallobject_v5_env_box.h: core/box/../front/../box/tiny_front_stats_box.h: +core/box/../front/../box/free_path_stats_box.h: core/box/tiny_alloc_gate_box.h: core/box/tiny_route_box.h: core/box/tiny_front_config_box.h: