diff --git a/core/box/tiny_free_route_cache_env_box.h b/core/box/tiny_free_route_cache_env_box.h new file mode 100644 index 00000000..35309008 --- /dev/null +++ b/core/box/tiny_free_route_cache_env_box.h @@ -0,0 +1,32 @@ +// tiny_free_route_cache_env_box.h - Phase 3 D1: Free Path Route Cache +// 役割: +// - ENV gate for HAKMEM_FREE_STATIC_ROUTE (default OFF) +// - Enable cached route lookup in free path to eliminate tiny_route_for_class() overhead +// +// Target: 4.39% self + 24.78% children in tiny_route_for_class() during free +// Expected gain: +1-2% in Mixed workload +// +#pragma once + +#include +#include + +static int g_free_static_route_enabled = -1; // -1 = uninitialized + +// tiny_free_static_route_enabled() - Check if free path route cache is enabled +// Returns: 1 if enabled, 0 if disabled (default) +// ENV: HAKMEM_FREE_STATIC_ROUTE=0/1 +static inline int tiny_free_static_route_enabled(void) { + if (__builtin_expect(g_free_static_route_enabled >= 0, 1)) { + return g_free_static_route_enabled; + } + + const char* e = getenv("HAKMEM_FREE_STATIC_ROUTE"); + int enabled = (e && *e && *e != '0') ? 1 : 0; + g_free_static_route_enabled = enabled; + + if (enabled) { + fprintf(stderr, "[FREE_STATIC_ROUTE] enabled (Phase 3 D1)\n"); + } + return enabled; +} diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index c3922bd9..50bef2ae 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -68,6 +68,7 @@ #include "../box/free_tiny_fast_hotcold_env_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: ENV control #include "../box/free_tiny_fast_hotcold_stats_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Stats #include "../box/tiny_metadata_cache_hot_box.h" // Phase 3 C2: Policy hot cache (metadata cache optimization) +#include "../box/tiny_free_route_cache_env_box.h" // Phase 3 D1: Free path route cache // Helper: current thread id (low 32 bits) for owner check #ifndef TINY_SELF_U32_LOCAL_DEFINED @@ -369,7 +370,19 @@ static int free_tiny_fast_cold(void* ptr, void* base, int class_idx) { FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_hit); - tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx); + // Phase 3 D1: Free path route cache (eliminate tiny_route_for_class overhead) + tiny_route_kind_t route; + if (__builtin_expect(tiny_free_static_route_enabled(), 0)) { + // Use cached route (bypasses tiny_route_for_class()) + route = g_tiny_route_class[(unsigned)class_idx & 7u]; + if (__builtin_expect(route == TINY_ROUTE_LEGACY && !g_tiny_route_snapshot_done, 0)) { + // Fallback if uninitialized + route = tiny_route_for_class((uint8_t)class_idx); + } + } else { + // Standard path + route = tiny_route_for_class((uint8_t)class_idx); + } const int use_tiny_heap = tiny_route_is_heap_kind(route); const TinyFrontV3Snapshot* front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; @@ -763,7 +776,19 @@ static inline int free_tiny_fast(void* ptr) { legacy_fallback: // LEGACY fallback path - tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx); + // Phase 3 D1: Free path route cache (eliminate tiny_route_for_class overhead) + tiny_route_kind_t route; + if (__builtin_expect(tiny_free_static_route_enabled(), 0)) { + // Use cached route (bypasses tiny_route_for_class()) + route = g_tiny_route_class[(unsigned)class_idx & 7u]; + if (__builtin_expect(route == TINY_ROUTE_LEGACY && !g_tiny_route_snapshot_done, 0)) { + // Fallback if uninitialized + route = tiny_route_for_class((uint8_t)class_idx); + } + } else { + // Standard path + route = tiny_route_for_class((uint8_t)class_idx); + } const int use_tiny_heap = tiny_route_is_heap_kind(route); const TinyFrontV3Snapshot* front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; diff --git a/docs/analysis/PHASE3_D1_FREE_ROUTE_CACHE_1_DESIGN.md b/docs/analysis/PHASE3_D1_FREE_ROUTE_CACHE_1_DESIGN.md new file mode 100644 index 00000000..a895cc2a --- /dev/null +++ b/docs/analysis/PHASE3_D1_FREE_ROUTE_CACHE_1_DESIGN.md @@ -0,0 +1,46 @@ +# Phase 3 D1: Free Path Route Cache 設計メモ + +## 目的 +Free path の `tiny_route_for_class()` コストを削減(4.39% self + 24.78% children) + +## 観察 +- free() → tiny_free_fast() → tiny_route_for_class() → g_tiny_route_snapshot_done check +- Route determination が free path の支配的なボトルネック +- Phase 3 C3 (Static routing for alloc) と同じアプローチを free に適用 + +## 実装アプローチ + +### L0: Env(戻せる) +- `HAKMEM_FREE_STATIC_ROUTE=0/1` (default: 0, OFF) + +### L1: IntegrationBox(境界: 1箇所) + +`tiny_route_env_box.h` に既存する `g_tiny_route` を free path で活用: +- `tiny_route_for_class()` を呼ばずに直接 route を決定 +- Cache invalidate: policy version change on sync + +### 実装指示 + +**File 1**: `core/box/tiny_free_route_cache_env_box.h` (新規) +- Inline function: `tiny_free_static_route_enabled()` + - Check `HAKMEM_FREE_STATIC_ROUTE` ENV + - Lazy init with -1 sentinel + - Return cached value + +**File 2**: Modify `core/box/tiny_route_env_box.h` (既存) +- Add: `SmallRouteKind tiny_route_get_kind(int class_idx)` if not exist +- Use existing `g_tiny_route.route_kind[class]` cache + +**File 3**: Modify `core/front/tiny_legacy_fallback_box.h` (既存) +- In `tiny_legacy_fallback_free_base()` function +- Check: `if (tiny_free_static_route_enabled())` before calling `tiny_route_for_class()` +- Fallback: call `tiny_route_for_class()` if disabled + +## A/B テスト + +- Mixed (10-run): HAKMEM_FREE_STATIC_ROUTE=0 vs =1 +- GO: +1.0%+, NO-GO: -1.0%- + +## 期待 +- tiny_route_for_class() call 削減 → L1 cache pressure 低下 +- +1-2% gain in free path