diff --git a/ENV_VARS.md b/ENV_VARS.md index 3194cbd3..f208a0d3 100644 --- a/ENV_VARS.md +++ b/ENV_VARS.md @@ -79,6 +79,13 @@ Front命中率の底上げ(採用境界でのスプライス) - 目的: 次回 tiny_alloc_fast_pop のミス率を低下させる(cross‑thread供給をFrontへ寄せる)。 - 境界厳守: 本スプライスは採用境界の中だけで実施。publish 側で drain/owner を触らない。 +Front リフィル量(A/B) +- HAKMEM_TINY_REFILL_COUNT=N(全クラス共通) +- HAKMEM_TINY_REFILL_COUNT_HOT=N(class<=3) +- HAKMEM_TINY_REFILL_COUNT_MID=N(class>=4) +- HAKMEM_TINY_REFILL_COUNT_C{0..7}=N(クラス個別) + - tiny_alloc_fast のリフィル数を制御(既定16)。大きくするとミス頻度が下がる一方、1回のリフィルコストは増える。 + 重要: publish/adopt の前提(SuperSlab ON) - HAKMEM_TINY_USE_SUPERSLAB=1 - publish→mailbox→adopt のパイプラインは SuperSlab 経路が ON のときのみ動作します。 diff --git a/core/hakmem_tiny.c b/core/hakmem_tiny.c index 5950add8..0941af62 100644 --- a/core/hakmem_tiny.c +++ b/core/hakmem_tiny.c @@ -191,6 +191,12 @@ static inline __attribute__((always_inline)) void ss_active_inc(SuperSlab* ss) { } // EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2) +// Front refill count global config (declare before init.inc uses them) +extern int g_refill_count_global; +extern int g_refill_count_hot; +extern int g_refill_count_mid; +extern int g_refill_count_class[TINY_NUM_CLASSES]; + // Step 3d: Forced inlining for slow path (maintain monolithic performance) // Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c) #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR @@ -1537,6 +1543,13 @@ TinySlab* hak_tiny_owner_slab(void* ptr) { // Box 6: Free Fast Path (Layer 2 - 2-3 instructions) #include "tiny_free_fast.inc.h" + // ---------------- Refill count (Front) global config ---------------- + // Parsed once at init; hot path reads plain ints (no getenv). + int g_refill_count_global = 0; // HAKMEM_TINY_REFILL_COUNT + int g_refill_count_hot = 0; // HAKMEM_TINY_REFILL_COUNT_HOT + int g_refill_count_mid = 0; // HAKMEM_TINY_REFILL_COUNT_MID + int g_refill_count_class[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_REFILL_COUNT_C{0..7} + // Export wrapper functions for hakmem.c to call // Phase 6-1.7 Optimization: Remove diagnostic overhead, rely on LTO for inlining void* hak_tiny_alloc_fast_wrapper(size_t size) { diff --git a/core/hakmem_tiny_init.inc b/core/hakmem_tiny_init.inc index dff083b2..54ecaa3b 100644 --- a/core/hakmem_tiny_init.inc +++ b/core/hakmem_tiny_init.inc @@ -392,6 +392,21 @@ void hak_tiny_init(void) { snprintf(var, sizeof(var), "HAKMEM_TINY_SLL_CAP_C%d", i); char* vs = getenv(var); if (vs) { int v = atoi(vs); if (v > 0 && v <= TINY_TLS_MAG_CAP) g_sll_cap_override[i] = v; } + + // Front refill count per-class override (fast path tuning) + snprintf(var, sizeof(var), "HAKMEM_TINY_REFILL_COUNT_C%d", i); + char* rc = getenv(var); + if (rc) { int v = atoi(rc); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_class[i] = v; } + } + + // Front refill count globals + { + char* g = getenv("HAKMEM_TINY_REFILL_COUNT"); + if (g) { int v = atoi(g); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_global = v; } + char* h = getenv("HAKMEM_TINY_REFILL_COUNT_HOT"); + if (h) { int v = atoi(h); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_hot = v; } + char* m = getenv("HAKMEM_TINY_REFILL_COUNT_MID"); + if (m) { int v = atoi(m); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_mid = v; } } { char* fast_env = getenv("HAKMEM_TINY_FAST"); diff --git a/core/tiny_alloc_fast.inc.h b/core/tiny_alloc_fast.inc.h index 34d06109..89769d79 100644 --- a/core/tiny_alloc_fast.inc.h +++ b/core/tiny_alloc_fast.inc.h @@ -5,6 +5,7 @@ #pragma once #include "tiny_atomic.h" #include "hakmem_tiny.h" +#include // ========== Debug Counters (compile-time gated) ========== #if HAKMEM_DEBUG_COUNTERS @@ -43,6 +44,12 @@ extern int sll_refill_small_from_ss(int class_idx, int max_take); extern void* hak_tiny_alloc_slow(size_t size, int class_idx); extern int hak_tiny_size_to_class(size_t size); +// Global Front refill config (parsed at init; defined in hakmem_tiny.c) +extern int g_refill_count_global; +extern int g_refill_count_hot; +extern int g_refill_count_mid; +extern int g_refill_count_class[TINY_NUM_CLASSES]; + // External macros #ifndef HAK_RET_ALLOC #define HAK_RET_ALLOC(cls, ptr) return (ptr) @@ -157,18 +164,29 @@ static inline void* tiny_alloc_fast_pop(int class_idx) { static inline int tiny_alloc_fast_refill(int class_idx) { uint64_t start = tiny_profile_enabled() ? tiny_fast_rdtsc() : 0; - // Tunable refill count (cached in TLS for performance) - static __thread int s_refill_count = 0; - if (__builtin_expect(s_refill_count == 0, 0)) { + // Tunable refill count (cached per-class in TLS for performance) + static __thread int s_refill_count[TINY_NUM_CLASSES] = {0}; + int cnt = s_refill_count[class_idx]; + if (__builtin_expect(cnt == 0, 0)) { int def = 16; // Default: 16 (smaller = less overhead per refill) - char* env = getenv("HAKMEM_TINY_REFILL_COUNT"); - int v = (env ? atoi(env) : def); + int v = def; + // Resolve precedence without getenv on hot path (values parsed at init) + if (g_refill_count_class[class_idx] > 0) { + v = g_refill_count_class[class_idx]; + } else if (class_idx <= 3 && g_refill_count_hot > 0) { + v = g_refill_count_hot; + } else if (class_idx >= 4 && g_refill_count_mid > 0) { + v = g_refill_count_mid; + } else if (g_refill_count_global > 0) { + v = g_refill_count_global; + } // Clamp to sane range (avoid pathological cases) if (v < 8) v = 8; // Minimum: avoid thrashing if (v > 256) v = 256; // Maximum: avoid excessive TLS memory - s_refill_count = v; + s_refill_count[class_idx] = v; + cnt = v; } #if HAKMEM_DEBUG_COUNTERS @@ -179,7 +197,7 @@ static inline int tiny_alloc_fast_refill(int class_idx) { // Box Boundary: Delegate to Backend (Box 3: SuperSlab) // This gives us ACE, Learning layer, L25 integration for free! // Note: g_rf_hit_slab counter is incremented inside sll_refill_small_from_ss() - int refilled = sll_refill_small_from_ss(class_idx, s_refill_count); + int refilled = sll_refill_small_from_ss(class_idx, cnt); if (start) { g_tiny_refill_cycles += (tiny_fast_rdtsc() - start);