diff --git a/Makefile b/Makefile index 827c0814..d7199a43 100644 --- a/Makefile +++ b/Makefile @@ -428,7 +428,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o core/box/tiny_route_box.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/box/ss_allocation_box.c b/core/box/ss_allocation_box.c index 9e4b53b9..609152bf 100644 --- a/core/box/ss_allocation_box.c +++ b/core/box/ss_allocation_box.c @@ -184,6 +184,9 @@ SuperSlab* superslab_allocate(uint8_t size_class) { ss->magic = SUPERSLAB_MAGIC; ss->active_slabs = 0; ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB) + + // P-Tier: Initialize tier to HOT (normal operation, eligible for allocation) + atomic_store_explicit(&ss->tier, SS_TIER_HOT, memory_order_relaxed); ss->slab_bitmap = 0; ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask ss->freelist_mask = 0; // P1.1 FIX: Initialize freelist_mask diff --git a/core/box/ss_tier_box.h b/core/box/ss_tier_box.h new file mode 100644 index 00000000..6eaa9e1d --- /dev/null +++ b/core/box/ss_tier_box.h @@ -0,0 +1,302 @@ +// ss_tier_box.h - P-Tier: Utilization-Aware SuperSlab Tiering Box +// Purpose: Manage SuperSlab tier transitions based on utilization +// License: MIT +// Date: 2025-12-04 + +#ifndef HAK_SS_TIER_BOX_H +#define HAK_SS_TIER_BOX_H + +#include +#include +#include // for getenv() +#include "../superslab/superslab_types.h" + +// ============================================================================ +// P-Tier: Utilization-Aware SuperSlab Tiering Box +// ============================================================================ +// +// Goal: Reduce registry pressure by consolidating allocations to HOT SuperSlabs +// and efficiently draining DRAINING SuperSlabs. +// +// Tier Definitions: +// - HOT (>25% utilization): Accept new allocations, actively used +// - DRAINING (<=25% utilization): Drain only, no new allocations +// - FREE (0% utilization): Ready for LRU cache or munmap +// +// Strategy: +// - Allocations target HOT tier SuperSlabs only +// - DRAINING tier SuperSlabs accept no new allocations +// - Automatic transitions based on utilization thresholds +// - Hysteresis prevents thrashing between HOT and DRAINING +// +// Expected Benefits: +// - Reduced registry size (fewer partially-used SuperSlabs) +// - Improved cache locality (concentrated allocations) +// - Faster allocation (skip DRAINING SuperSlabs) +// - Efficient memory reclamation (clear path to FREE tier) +// +// Box Contract: +// - ss_tier_calc_utilization(): Calculate current utilization [0.0, 1.0] +// - ss_tier_check_transition(): Check and perform tier transitions +// - ss_tier_get(): Get current tier +// - ss_tier_is_hot(): Quick check if SuperSlab accepts allocations +// - ss_tier_set(): Force tier change (testing/debug) +// +// ============================================================================ + +// Default thresholds (can be overridden by environment variables) +#define SS_TIER_DOWN_THRESHOLD_DEFAULT 0.25f // HOT → DRAINING (25% utilization) +#define SS_TIER_UP_THRESHOLD_DEFAULT 0.50f // DRAINING → HOT (50% utilization, hysteresis) + +// Environment variable support for runtime configuration +// ENV: HAKMEM_SS_TIER_DOWN_THRESHOLD (default: 0.25) +// ENV: HAKMEM_SS_TIER_UP_THRESHOLD (default: 0.50) +static inline float ss_tier_get_down_threshold(void) { + static float cached = -1.0f; + if (__builtin_expect(cached < 0.0f, 0)) { + const char* e = getenv("HAKMEM_SS_TIER_DOWN_THRESHOLD"); + if (e && *e) { + float v = (float)atof(e); + cached = (v > 0.0f && v <= 1.0f) ? v : SS_TIER_DOWN_THRESHOLD_DEFAULT; + } else { + cached = SS_TIER_DOWN_THRESHOLD_DEFAULT; + } + } + return cached; +} + +static inline float ss_tier_get_up_threshold(void) { + static float cached = -1.0f; + if (__builtin_expect(cached < 0.0f, 0)) { + const char* e = getenv("HAKMEM_SS_TIER_UP_THRESHOLD"); + if (e && *e) { + float v = (float)atof(e); + cached = (v > 0.0f && v <= 1.0f) ? v : SS_TIER_UP_THRESHOLD_DEFAULT; + } else { + cached = SS_TIER_UP_THRESHOLD_DEFAULT; + } + } + return cached; +} + +// ============================================================================ +// 1. Utilization Calculation +// ============================================================================ +// +// Calculates current utilization as: total_active_blocks / total_capacity +// +// Uses: +// - ss->total_active_blocks: Atomic counter of all active blocks across slabs +// - ss->active_slabs: Number of carved slabs +// - ss->slabs[].capacity: Per-slab capacity +// +// Returns: Utilization ratio [0.0, 1.0] +// 0.0 = completely empty (FREE tier candidate) +// 1.0 = fully utilized (strong HOT tier) +// +// Note: Uses relaxed memory order as this is a heuristic for tier classification, +// not a safety-critical invariant. +// ============================================================================ + +static inline float ss_tier_calc_utilization(SuperSlab* ss) { + if (!ss) return 0.0f; + + // Get current active blocks (atomic load) + uint32_t active = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed); + + // Calculate total capacity across all active slabs + // Note: We sum capacity from active_slabs to handle per-slab class assignment (Phase 12) + uint32_t total_capacity = 0; + uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE; + if (max_slabs > SLABS_PER_SUPERSLAB_MAX) { + max_slabs = SLABS_PER_SUPERSLAB_MAX; + } + + // Iterate through active slabs and sum capacity + for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) { + TinySlabMeta* meta = &ss->slabs[i]; + if (meta->capacity > 0) { + total_capacity += meta->capacity; + } + } + + // Handle edge case: no capacity yet (fresh SuperSlab) + if (total_capacity == 0) { + return 0.0f; + } + + // Return utilization ratio + return (float)active / (float)total_capacity; +} + +// ============================================================================ +// 2. Tier Transition Check +// ============================================================================ +// +// Checks current utilization and performs tier transitions if needed. +// +// Transition Rules: +// - HOT → DRAINING: utilization <= down_threshold (default: 25%) +// - DRAINING → HOT: utilization >= up_threshold (default: 50%, hysteresis) +// - DRAINING → FREE: utilization == 0% (all blocks freed) +// - FREE → HOT: First allocation (handled by allocation path, not here) +// +// Hysteresis Rationale: +// - Down threshold (25%) < Up threshold (50%) prevents oscillation +// - SuperSlab must demonstrate sustained activity to return to HOT +// +// Returns: true if tier transition occurred, false otherwise +// +// Thread Safety: Uses atomic compare_exchange for safe concurrent transitions +// ============================================================================ + +static inline bool ss_tier_check_transition(SuperSlab* ss) { + if (!ss) return false; + + // Calculate current utilization + float util = ss_tier_calc_utilization(ss); + + // Get current tier (atomic load) + uint8_t current_tier = atomic_load_explicit(&ss->tier, memory_order_acquire); + + // Get thresholds (cached after first call) + float down_thresh = ss_tier_get_down_threshold(); + float up_thresh = ss_tier_get_up_threshold(); + + // Determine target tier based on utilization and current state + uint8_t target_tier = current_tier; + + switch (current_tier) { + case SS_TIER_HOT: + // HOT → DRAINING: Drop below down threshold + if (util <= down_thresh) { + target_tier = SS_TIER_DRAINING; + } + // HOT → FREE: Complete deallocation (rare, usually via DRAINING) + if (util == 0.0f) { + target_tier = SS_TIER_FREE; + } + break; + + case SS_TIER_DRAINING: + // DRAINING → HOT: Rise above up threshold (hysteresis) + if (util >= up_thresh) { + target_tier = SS_TIER_HOT; + } + // DRAINING → FREE: Complete deallocation + if (util == 0.0f) { + target_tier = SS_TIER_FREE; + } + break; + + case SS_TIER_FREE: + // FREE → HOT: First allocation (util > 0) + // Note: Typically handled by allocation path setting tier directly + if (util > 0.0f) { + target_tier = SS_TIER_HOT; + } + break; + + default: + // Invalid tier, reset to HOT (defensive) + target_tier = SS_TIER_HOT; + break; + } + + // If no transition needed, return early + if (target_tier == current_tier) { + return false; + } + + // Attempt atomic transition (CAS loop for concurrent safety) + // Note: We use weak CAS in a loop for efficiency on weak-memory architectures + uint8_t expected = current_tier; + while (!atomic_compare_exchange_weak_explicit( + &ss->tier, + &expected, + target_tier, + memory_order_release, // Success: publish tier change + memory_order_relaxed // Failure: retry with updated expected + )) { + // Concurrent modification detected, re-evaluate + // If tier already changed to target, we're done + if (expected == target_tier) { + return false; // Another thread completed the transition + } + // Otherwise, retry with new expected value + } + + // Transition successful + return true; +} + +// ============================================================================ +// 3. Tier State Query +// ============================================================================ +// +// Returns the current tier of the SuperSlab. +// +// Returns: SS_TIER_HOT, SS_TIER_DRAINING, or SS_TIER_FREE +// +// Memory Order: Acquire ensures we see all updates made before tier was set +// ============================================================================ + +static inline SSTier ss_tier_get(SuperSlab* ss) { + if (!ss) return SS_TIER_FREE; // Defensive: NULL = not usable + + uint8_t tier = atomic_load_explicit(&ss->tier, memory_order_acquire); + return (SSTier)tier; +} + +// ============================================================================ +// 4. Hot Tier Check (Allocation Eligibility) +// ============================================================================ +// +// Fast path check: Can this SuperSlab accept new allocations? +// +// Returns: true if SuperSlab is in HOT tier (accepts allocations) +// false otherwise (DRAINING or FREE, skip for allocation) +// +// Usage: Called by allocation path to filter candidate SuperSlabs +// +// Memory Order: Relaxed is sufficient as this is a filtering heuristic, +// not a safety invariant. Worst case: we occasionally skip +// a freshly-promoted HOT SuperSlab (benign race). +// ============================================================================ + +static inline bool ss_tier_is_hot(SuperSlab* ss) { + if (!ss) return false; + + uint8_t tier = atomic_load_explicit(&ss->tier, memory_order_relaxed); + return (tier == SS_TIER_HOT); +} + +// ============================================================================ +// 5. Tier Force-Set (Testing/Debug Only) +// ============================================================================ +// +// Directly sets the tier without utilization checks. +// +// WARNING: This bypasses all transition logic and should ONLY be used for: +// - Unit tests +// - Debug/instrumentation +// - Initialization (setting fresh SuperSlab to HOT) +// +// Do NOT use in production hot paths. +// +// Memory Order: Release ensures any prior modifications are visible after +// the tier change is observed by other threads. +// ============================================================================ + +static inline void ss_tier_set(SuperSlab* ss, SSTier tier) { + if (!ss) return; + + // Validate tier value (defensive) + if (tier != SS_TIER_HOT && tier != SS_TIER_DRAINING && tier != SS_TIER_FREE) { + return; // Invalid tier, refuse to set + } + + atomic_store_explicit(&ss->tier, (uint8_t)tier, memory_order_release); +} + +#endif // HAK_SS_TIER_BOX_H diff --git a/core/box/tiny_alloc_gate_box.h b/core/box/tiny_alloc_gate_box.h index 5efd53a0..709ebebe 100644 --- a/core/box/tiny_alloc_gate_box.h +++ b/core/box/tiny_alloc_gate_box.h @@ -30,6 +30,7 @@ #include "tiny_ptr_bridge_box.h" // Tiny Superslab Bridge #include "../tiny_region_id.h" // Header 読み出し #include "../front/malloc_tiny_fast.h" // 既存 Tiny Fast Path +#include "tiny_route_box.h" // Tiny Front Routing Policy // 将来の拡張用コンテキスト: // - size : 要求サイズ @@ -133,15 +134,59 @@ static inline int tiny_alloc_gate_validate(TinyAllocGateContext* ctx) // Tiny Alloc Gatekeeper 本体: // - malloc ラッパ (hak_wrappers) から呼ばれる Tiny fast alloc の入口。 -// - 現状は malloc_tiny_fast(size) の薄いラッパで、診断 ON のときだけ -// 返された USER ポインタに対して Bridge + Layout 検査を追加。 +// - ルーティングポリシーに基づき Tiny front / Pool fallback を振り分け、 +// 診断 ON のときだけ返された USER ポインタに対して Bridge + Layout 検査を追加。 static inline void* tiny_alloc_gate_fast(size_t size) { - // まずは従来どおり Tiny Fast Path で割り当て(USER ポインタを得る) + int class_idx = hak_tiny_size_to_class(size); + if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) { + // サイズが Tiny 管理外 → Pool/backend に任せる(NULL で Gate を抜けさせる) + return NULL; + } + + TinyRoutePolicy route = tiny_route_get(class_idx); + + // Pool-only: Tiny front は完全スキップ(Gate から見ると「Tiny では取れなかった」扱い) + if (__builtin_expect(route == ROUTE_POOL_ONLY, 0)) { + return NULL; + } + + // まず Tiny Fast Path で割り当て(USER ポインタを得る) void* user_ptr = malloc_tiny_fast(size); - // Layer 3a(alloc 側): 取得したポインタが明らかに異常な場合は - // Debug ビルドで早期に検出して Fail-Fast。 + // Tiny-only: その結果をそのまま返す(NULL なら上位が扱う) + if (__builtin_expect(route == ROUTE_TINY_ONLY, 1)) { +#if !HAKMEM_BUILD_RELEASE + // Layer 3a(alloc 側): 明らかに異常なポインタは debug ビルドで早期検出 + if (user_ptr) { + uintptr_t addr = (uintptr_t)user_ptr; + if (__builtin_expect(addr < 4096, 0)) { + fprintf(stderr, + "[TINY_ALLOC_GATE_RANGE_INVALID] size=%zu user=%p\n", + size, user_ptr); + fflush(stderr); + abort(); + } + } + + if (__builtin_expect(tiny_alloc_gate_diag_enabled(), 0) && user_ptr) { + TinyAllocGateContext ctx; + ctx.size = size; + ctx.user = HAK_USER_FROM_RAW(user_ptr); + ctx.class_idx = class_idx; + ctx.base = HAK_BASE_FROM_RAW(NULL); + ctx.bridge.ss = NULL; + ctx.bridge.meta = NULL; + ctx.bridge.slab_idx = -1; + ctx.bridge.meta_cls = 0xffu; + + (void)tiny_alloc_gate_validate(&ctx); + } +#endif + return user_ptr; + } + + // ROUTE_TINY_FIRST: Tiny で取れなければ Pool/backend fallback を許可(NULL で Gate 脱出) #if !HAKMEM_BUILD_RELEASE if (user_ptr) { uintptr_t addr = (uintptr_t)user_ptr; @@ -152,20 +197,20 @@ static inline void* tiny_alloc_gate_fast(size_t size) fflush(stderr); abort(); } - } - if (__builtin_expect(tiny_alloc_gate_diag_enabled(), 0) && user_ptr) { - TinyAllocGateContext ctx; - ctx.size = size; - ctx.user = HAK_USER_FROM_RAW(user_ptr); - ctx.class_idx = hak_tiny_size_to_class(size); - ctx.base = HAK_BASE_FROM_RAW(NULL); - ctx.bridge.ss = NULL; - ctx.bridge.meta = NULL; - ctx.bridge.slab_idx = -1; - ctx.bridge.meta_cls = 0xffu; + if (__builtin_expect(tiny_alloc_gate_diag_enabled(), 0)) { + TinyAllocGateContext ctx; + ctx.size = size; + ctx.user = HAK_USER_FROM_RAW(user_ptr); + ctx.class_idx = class_idx; + ctx.base = HAK_BASE_FROM_RAW(NULL); + ctx.bridge.ss = NULL; + ctx.bridge.meta = NULL; + ctx.bridge.slab_idx = -1; + ctx.bridge.meta_cls = 0xffu; - (void)tiny_alloc_gate_validate(&ctx); + (void)tiny_alloc_gate_validate(&ctx); + } } #endif diff --git a/core/box/tiny_route_box.c b/core/box/tiny_route_box.c new file mode 100644 index 00000000..35e6b3ee --- /dev/null +++ b/core/box/tiny_route_box.c @@ -0,0 +1,44 @@ +// tiny_route_box.c - Implementation of Tiny Front Routing Policy Box + +#include "tiny_route_box.h" + +#include +#include + +// Default: conservative profile (all classes TINY_FIRST). +// This keeps Tiny in the fast path but always allows Pool fallback. +uint8_t g_tiny_route[8] = { + ROUTE_TINY_FIRST, ROUTE_TINY_FIRST, ROUTE_TINY_FIRST, ROUTE_TINY_FIRST, + ROUTE_TINY_FIRST, ROUTE_TINY_FIRST, ROUTE_TINY_FIRST, ROUTE_TINY_FIRST +}; + +void tiny_route_init(void) +{ + const char* profile = getenv("HAKMEM_TINY_PROFILE"); + if (!profile || !*profile) { + profile = "conservative"; + } + + if (strcmp(profile, "hot") == 0) { + // Hot profile: + // - C0-C3: TINY_ONLY (小さいクラスは Tiny 専用で aggressive) + // - C4-C6: TINY_FIRST (中間サイズは fallback あり) + // - C7 : POOL_ONLY (1KB headerless は Pool に任せる) + g_tiny_route[0] = g_tiny_route[1] = g_tiny_route[2] = g_tiny_route[3] = ROUTE_TINY_ONLY; + g_tiny_route[4] = g_tiny_route[5] = g_tiny_route[6] = ROUTE_TINY_FIRST; + g_tiny_route[7] = ROUTE_POOL_ONLY; + } else if (strcmp(profile, "full") == 0) { + // Full Tiny profile: + // - 全クラス TINY_ONLY(microbench 用、Pool に逃がさない) + memset(g_tiny_route, ROUTE_TINY_ONLY, sizeof(g_tiny_route)); + } else if (strcmp(profile, "off") == 0) { + // Tiny off profile: + // - 全クラス POOL_ONLY(Tiny front 完全無効化) + memset(g_tiny_route, ROUTE_POOL_ONLY, sizeof(g_tiny_route)); + } else { + // "conservative" および未知の値: + // - 全クラス TINY_FIRST(Tiny を使うが必ず Pool fallbackあり) + memset(g_tiny_route, ROUTE_TINY_FIRST, sizeof(g_tiny_route)); + } +} + diff --git a/core/box/tiny_route_box.h b/core/box/tiny_route_box.h new file mode 100644 index 00000000..da86f8f2 --- /dev/null +++ b/core/box/tiny_route_box.h @@ -0,0 +1,50 @@ +// tiny_route_box.h - Box: Tiny Front Routing Policy +// +// Purpose: +// Decide, per Tiny class, whether allocation should go through Tiny front +// or directly to the Pool/backend. This keeps routing policy in a single, +// cheap table lookup, without getenv() or complex logic in the hot path. +// +// Box Theory: +// - Single Responsibility: +// Only decides "Tiny vs Pool vs Tiny+Fallback" per class. +// - Clear Boundary: +// Front Gate / Alloc Gatekeeper calls tiny_route_get(class_idx) once. +// Tiny Fast Path and Pool backend remain unchanged. +// - Reversible / A/B: +// Profiles are selected via HAKMEM_TINY_PROFILE ENV at init time. +// Hot path is stable; routing can be tuned without touching fast code. + +#ifndef TINY_ROUTE_BOX_H +#define TINY_ROUTE_BOX_H + +#include + +// Routing policy per Tiny class. +typedef enum { + ROUTE_TINY_ONLY = 0, // Tiny front only (no fallback; failure bubbles up) + ROUTE_TINY_FIRST = 1, // Try Tiny front, then fallback to Pool backend + ROUTE_POOL_ONLY = 2, // Skip Tiny entirely, use Pool/backend only +} TinyRoutePolicy; + +// Global routing table for Tiny classes (0..7). +// Initialized once from ENV: HAKMEM_TINY_PROFILE. +extern uint8_t g_tiny_route[8]; + +// Initialize routing table from ENV profile. +// Profiles: +// "hot" C0-C3=TINY_ONLY, C4-C6=TINY_FIRST, C7=POOL_ONLY +// "conservative" 全クラス TINY_FIRST(デフォルト) +// "off" 全クラス POOL_ONLY(Tiny 無効) +// "full" 全クラス TINY_ONLY(microbench 用) +void tiny_route_init(void); + +// Hot path helper: return routing policy for a given class. +// Uses simple array lookup; class_idx is masked to [0,7] defensively. +static inline TinyRoutePolicy tiny_route_get(int class_idx) +{ + return (TinyRoutePolicy)g_tiny_route[class_idx & 7]; +} + +#endif // TINY_ROUTE_BOX_H + diff --git a/core/hakmem_shared_pool_acquire.c b/core/hakmem_shared_pool_acquire.c index a8e9111f..82eb6332 100644 --- a/core/hakmem_shared_pool_acquire.c +++ b/core/hakmem_shared_pool_acquire.c @@ -6,6 +6,7 @@ #include "box/pagefault_telemetry_box.h" #include "box/tls_sll_drain_box.h" #include "box/tls_slab_reuse_guard_box.h" +#include "box/ss_tier_box.h" // P-Tier: Tier filtering support #include "hakmem_policy.h" #include "hakmem_env_cache.h" // Priority-2: ENV cache @@ -41,6 +42,8 @@ sp_acquire_from_empty_scan(int class_idx, SuperSlab** ss_out, int* slab_idx_out, for (int i = 0; i < scan_limit; i++) { SuperSlab* ss = g_super_reg_by_class[class_idx][i]; if (!(ss && ss->magic == SUPERSLAB_MAGIC)) continue; + // P-Tier: Skip DRAINING tier SuperSlabs + if (!ss_tier_is_hot(ss)) continue; if (ss->empty_count == 0) continue; // No EMPTY slabs in this SS uint32_t mask = ss->empty_mask; @@ -151,6 +154,16 @@ stage1_retry_after_tension_drain: SuperSlab* ss_guard = atomic_load_explicit(&reuse_meta->ss, memory_order_relaxed); if (ss_guard) { tiny_tls_slab_reuse_guard(ss_guard); + + // P-Tier: Skip DRAINING tier SuperSlabs (reinsert to freelist and fallback) + if (!ss_tier_is_hot(ss_guard)) { + // DRAINING SuperSlab - skip this slot and fall through to Stage 2 + if (g_lock_stats_enabled == 1) { + atomic_fetch_add(&g_lock_release_count, 1); + } + pthread_mutex_unlock(&g_shared_pool.alloc_lock); + goto stage2_fallback; + } } // Activate slot under mutex (slot state transition requires protection) @@ -221,6 +234,13 @@ stage2_fallback: { SuperSlab* hint_ss = g_shared_pool.class_hints[class_idx]; if (__builtin_expect(hint_ss != NULL, 1)) { + // P-Tier: Skip DRAINING tier SuperSlabs + if (!ss_tier_is_hot(hint_ss)) { + // Clear stale hint pointing to DRAINING SuperSlab + g_shared_pool.class_hints[class_idx] = NULL; + goto stage2_scan; + } + // P0 Optimization: O(1) lookup via cached pointer (avoids metadata scan) SharedSSMeta* hint_meta = hint_ss->shared_meta; if (__builtin_expect(hint_meta != NULL, 1)) { @@ -277,6 +297,7 @@ stage2_fallback: } } +stage2_scan: // P0-5: Lock-free atomic CAS claiming (no mutex needed for slot state transition!) // RACE FIX: Read ss_meta_count atomically (now properly declared as _Atomic) // No cast needed! memory_order_acquire synchronizes with release in sp_meta_find_or_create @@ -288,10 +309,23 @@ stage2_fallback: for (uint32_t i = 0; i < meta_count; i++) { SharedSSMeta* meta = &g_shared_pool.ss_metadata[i]; + // RACE FIX: Load SuperSlab pointer atomically BEFORE claiming + // Use memory_order_acquire to synchronize with release in sp_meta_find_or_create + SuperSlab* ss_preflight = atomic_load_explicit(&meta->ss, memory_order_acquire); + if (!ss_preflight) { + // SuperSlab was freed - skip this entry + continue; + } + + // P-Tier: Skip DRAINING tier SuperSlabs + if (!ss_tier_is_hot(ss_preflight)) { + continue; + } + // Try lock-free claiming (UNUSED → ACTIVE via CAS) int claimed_idx = sp_slot_claim_lockfree(meta, class_idx); if (claimed_idx >= 0) { - // RACE FIX: Load SuperSlab pointer atomically (critical for lock-free Stage 2) + // RACE FIX: Load SuperSlab pointer atomically again after claiming // Use memory_order_acquire to synchronize with release in sp_meta_find_or_create SuperSlab* ss = atomic_load_explicit(&meta->ss, memory_order_acquire); if (!ss) { diff --git a/core/hakmem_shared_pool_release.c b/core/hakmem_shared_pool_release.c index 36f783ba..57a711d6 100644 --- a/core/hakmem_shared_pool_release.c +++ b/core/hakmem_shared_pool_release.c @@ -2,6 +2,7 @@ #include "hakmem_debug_master.h" #include "box/ss_slab_meta_box.h" #include "box/ss_hot_cold_box.h" +#include "box/ss_tier_box.h" // P-Tier: Utilization-aware tiering #include "hakmem_env_cache.h" // Priority-2: ENV cache #include "superslab/superslab_inline.h" // superslab_ref_get guard for TLS pins #include "box/ss_release_guard_box.h" // Box: SuperSlab Release Guard @@ -176,6 +177,51 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx) #endif } + // P-Tier: Check tier transition after releasing slab + // This may transition HOT → DRAINING if utilization dropped below threshold + // or DRAINING → FREE if utilization reached 0 + ss_tier_check_transition(ss); + + // P-Tier Step B: Eager FREE eviction + // If tier transitioned to FREE (total_active_blocks == 0), immediately try to + // release the SuperSlab regardless of active_slots. This prevents registry bloat. + SSTier current_tier = ss_tier_get(ss); + if (current_tier == SS_TIER_FREE) { + // Double-check: total_active_blocks should be 0 for FREE tier + uint32_t active_blocks = atomic_load_explicit(&ss->total_active_blocks, memory_order_acquire); + if (active_blocks == 0 && ss_release_guard_superslab_can_free(ss)) { + #if !HAKMEM_BUILD_RELEASE + if (dbg == 1) { + fprintf(stderr, "[SP_TIER_FREE_EAGER] ss=%p tier=FREE active_slots=%u -> immediate free\n", + (void*)ss, sp_meta->active_slots); + } + #endif + + // Force all remaining slots to EMPTY state for clean metadata + for (uint32_t i = 0; i < sp_meta->total_slots; i++) { + SlotState st = atomic_load_explicit(&sp_meta->slots[i].state, memory_order_relaxed); + if (st == SLOT_ACTIVE) { + atomic_store_explicit(&sp_meta->slots[i].state, SLOT_EMPTY, memory_order_relaxed); + } + } + sp_meta->active_slots = 0; + + if (g_lock_stats_enabled == 1) { + atomic_fetch_add(&g_lock_release_count, 1); + } + + // Clear meta->ss before unlocking (race prevention) + atomic_store_explicit(&sp_meta->ss, NULL, memory_order_release); + + pthread_mutex_unlock(&g_shared_pool.alloc_lock); + + // Free SuperSlab immediately (bypasses normal active_slots==0 check) + extern void superslab_free(SuperSlab* ss); + superslab_free(ss); + return; + } + } + // Check if SuperSlab is now completely empty (all slots EMPTY or UNUSED) if (sp_meta->active_slots == 0) { #if !HAKMEM_BUILD_RELEASE diff --git a/core/hakmem_tiny_init.inc b/core/hakmem_tiny_init.inc index 5a4632b6..b79dba99 100644 --- a/core/hakmem_tiny_init.inc +++ b/core/hakmem_tiny_init.inc @@ -379,6 +379,14 @@ void hak_tiny_init(void) { if (q && atoi(q) != 0) g_quick_enable = 1; } + // Tiny Front Routing Policy: initialize per-class Tiny vs Pool routing. + // ENV: HAKMEM_TINY_PROFILE = hot / conservative / off / full + // - conservative (default): 全クラス TINY_FIRST + // - hot: C0-C3=TINY_ONLY, C4-C6=TINY_FIRST, C7=POOL_ONLY + // - off: 全クラス POOL_ONLY + // - full: 全クラス TINY_ONLY + tiny_route_init(); + tiny_obs_start_if_needed(); // Deferred Intelligence Engine diff --git a/core/superslab/superslab_types.h b/core/superslab/superslab_types.h index 1c316930..bb8f1e52 100644 --- a/core/superslab/superslab_types.h +++ b/core/superslab/superslab_types.h @@ -35,6 +35,14 @@ extern "C" { // Magic for SuperSlab validation #define SUPERSLAB_MAGIC 0x5353504Cu // 'SSPL' +// P-Tier: Utilization-Aware Tiering +// SuperSlab tier classification based on utilization for efficient allocation/deallocation +typedef enum { + SS_TIER_HOT = 0, // 通常運用状態 (alloc 対象) + SS_TIER_DRAINING = 1, // 低 utilization (alloc 対象外、free 待ち) + SS_TIER_FREE = 2 // 完全に空 (munmap/LRU 候補) +} SSTier; + // ACE state (extern; defined in hakmem_tiny_superslab.c) typedef struct SuperSlabACEState { uint8_t current_lg; @@ -53,7 +61,10 @@ extern SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS]; typedef struct SuperSlab { uint32_t magic; // SUPERSLAB_MAGIC uint8_t lg_size; // log2(super slab size), 20=1MB, 21=2MB - uint8_t _pad0[3]; + + // P-Tier: Utilization-Aware Tiering + _Atomic uint8_t tier; // SS_TIER_HOT, SS_TIER_DRAINING, SS_TIER_FREE + uint8_t _tier_pad[2]; // アライメント用パディング // Phase 12: per-SS size_class removed; classes are per-slab via TinySlabMeta.class_idx _Atomic uint32_t total_active_blocks; diff --git a/docs/specs/ENV_VARS.md b/docs/specs/ENV_VARS.md index 5c28c3a1..7ff84560 100644 --- a/docs/specs/ENV_VARS.md +++ b/docs/specs/ENV_VARS.md @@ -280,6 +280,41 @@ New (debug isolation) - 小クラス用の小型TLSマガジン(128要素, classes 0..3)を有効化。既定0(A/B用)。 - alloc: HotMag→SLL→Magazine の順でヒットを狙う。free: SLL優先、溢れ時にHotMag→Magazine。 +### Superslab Tiering / Registry 制御 + +- HAKMEM_SS_TIER_DOWN_THRESHOLD + - 型: float (0.0–1.0) + - 既定値: `0.25` + - 役割: SuperSlab 利用率(`total_active_blocks / capacity`)がこの値以下になったとき、Tier を `HOT→DRAINING` に落とすための下限。 + - 影響: DRAINING Tier の SuperSlab は新規 alloc の対象から外れ、drain/解放の対象になる(Box: `ss_tier_box.h`)。 + +- HAKMEM_SS_TIER_UP_THRESHOLD + - 型: float (0.0–1.0) + - 既定値: `0.50` + - 役割: DRAINING Tier の SuperSlab の利用率がこの値以上になったときに `DRAINING→HOT` に戻すための上限(ヒステリシス)。 + - 影響: 利用率が一時的にブレても HOT/DRAINING を行き来しにくくし、Tier の振動を防ぐ。 + +### Tiny Front Routing(Tiny vs Pool の切替) + +- HAKMEM_TINY_PROFILE + - 型: string + - 既定値: `"conservative"` + - 役割: Tiny Front(TLS SLL / FastCache)と Pool/backend のルーティング方針をクラス別に切り替えるプロファイル。 + - プロファイル: + - `"conservative"`(既定): + - C0〜C7 すべて `TINY_FIRST`(まず Tiny front、失敗時は Pool/backend にフォールバック) + - `"hot"`: + - C0〜C3: `TINY_ONLY`(小クラスを Tiny 専用で積極活用) + - C4〜C6: `TINY_FIRST` + - C7: `POOL_ONLY`(1KB headerless は Pool に任せる) + - `"off"`: + - C0〜C7 すべて `POOL_ONLY`(Tiny front を完全に無効化) + - `"full"`: + - C0〜C7 すべて `TINY_ONLY`(microbench 用、Gate としては常に Tiny 経由) + - 実装: + - Box: `core/box/tiny_route_box.h` / `tiny_route_box.c` + - Gate: `tiny_alloc_gate_fast()` がクラスごとに `TinyRoutePolicy` を参照して Tiny vs Pool を振り分ける。 + ## USDT/tracepoints(perfのユーザ空間静的トレース) - ビルド時に `CFLAGS+=-DHAKMEM_USDT=1` を付与すると、主要分岐にUSDT(DTrace互換)プローブが埋め込まれます。 - 依存: ``(Debian/Ubuntu: `sudo apt-get install systemtap-sdt-dev`)。 diff --git a/docs/specs/ENV_VARS_COMPLETE.md b/docs/specs/ENV_VARS_COMPLETE.md index 68d57730..efc0222b 100644 --- a/docs/specs/ENV_VARS_COMPLETE.md +++ b/docs/specs/ENV_VARS_COMPLETE.md @@ -297,7 +297,50 @@ From `/mnt/workdisk/public_share/hakmem/core/hakmem_tiny_stats.h`: --- -### 9. Memory Efficiency & RSS Control +### 9. Tiny Front Routing + +#### HAKMEM_TINY_PROFILE +- **Default**: `"conservative"` +- **Type**: string +- **Purpose**: Control Tiny Front (TLS SLL / FastCache) vs Pool/backend routing per Tiny class via a simple profile. +- **Profiles**: + - `"conservative"`: + - All classes (C0–C7) use `TINY_FIRST`: try Tiny Front first, then fallback to Pool/backend on miss. + - `"hot"`: + - C0–C3: `TINY_ONLY` (small classes use Tiny exclusively via front gate) + - C4–C6: `TINY_FIRST` + - C7: `POOL_ONLY` (1KB headerless class uses Pool/backend) + - `"off"`: + - All classes `POOL_ONLY` (Tiny Front is fully disabled, Pool-only allocator behaviour). + - `"full"`: + - All classes `TINY_ONLY` (microbench-style, front gate always routes via Tiny). +- **Implementation**: + - Box: `core/box/tiny_route_box.h` / `tiny_route_box.c` (per-class `g_tiny_route[8]` table). + - Gate: `tiny_alloc_gate_fast()` reads `TinyRoutePolicy` and decides Tiny vs Pool on each allocation. + +--- + +### 10. Superslab Tiering & Registry Control + +#### HAKMEM_SS_TIER_DOWN_THRESHOLD +- **Default**: `0.25` +- **Range**: 0.0–1.0 +- **Purpose**: SuperSlab 利用率がこの値以下になったときに、Tier を `HOT → DRAINING` に遷移させる下限。 +- **Impact**: + - DRAINING Tier の SuperSlab は新規割り当ての対象外となり、drain/解放候補として扱われる。 + - 利用率が低い SuperSlab への新規割り当てを避け、活発な SuperSlab に負荷を集中させる。 + +#### HAKMEM_SS_TIER_UP_THRESHOLD +- **Default**: `0.50` +- **Range**: 0.0–1.0 +- **Purpose**: DRAINING Tier の SuperSlab 利用率がこの値以上になったときに `DRAINING → HOT` に戻す上限(ヒステリシス)。 +- **Impact**: + - Down/Up 閾値にギャップを持たせることで、Tier が HOT と DRAINING の間で頻繁に振動するのを防ぐ。 + - Sustained な利用増加が観測された SuperSlab のみ HOT に復帰させる。 + +--- + +### 11. Memory Efficiency & RSS Control #### HAKMEM_TINY_RSS_BUDGET_KB - **Default**: Unlimited @@ -333,7 +376,7 @@ From `/mnt/workdisk/public_share/hakmem/core/hakmem_tiny_stats.h`: --- -### 10. Policy & Learning Parameters +### 11. Policy & Learning Parameters #### HAKMEM_LEARN - **Default**: 0 (OFF, unless HAKMEM_MODE=learning/research) diff --git a/hakmem.d b/hakmem.d index 9cd39fdc..0815b81a 100644 --- a/hakmem.d +++ b/hakmem.d @@ -86,9 +86,9 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/box/../front/../box/../front/tiny_unified_cache.h \ core/box/../front/../box/tiny_layout_box.h \ core/box/../front/../box/tiny_front_cold_box.h \ - core/box/tiny_alloc_gate_box.h core/box/tiny_front_config_box.h \ - core/box/wrapper_env_box.h core/box/../hakmem_internal.h \ - core/box/../superslab/superslab_inline.h + core/box/tiny_alloc_gate_box.h core/box/tiny_route_box.h \ + core/box/tiny_front_config_box.h core/box/wrapper_env_box.h \ + core/box/../hakmem_internal.h core/box/../superslab/superslab_inline.h core/hakmem.h: core/hakmem_build_flags.h: core/hakmem_config.h: @@ -245,6 +245,7 @@ core/box/../front/../box/../front/tiny_unified_cache.h: core/box/../front/../box/tiny_layout_box.h: core/box/../front/../box/tiny_front_cold_box.h: core/box/tiny_alloc_gate_box.h: +core/box/tiny_route_box.h: core/box/tiny_front_config_box.h: core/box/wrapper_env_box.h: core/box/../hakmem_internal.h: