Refactor: Extract TLS Bind Box for unified slab binding

- Created core/box/ss_tls_bind_box.h containing ss_tls_bind_one(). - Refactored superslab_refill() to use the new box. - Updated signatures to avoid circular dependencies (tiny_self_u32). - Added future integration points for Warm Pool and Page Box.
2025-12-05 19:57:30 +09:00
parent a67965139f
commit 45b2ccbe45
4 changed files with 197 additions and 53 deletions
--- a/core/front/tiny_unified_cache.c
+++ b/core/front/tiny_unified_cache.c
@ -15,6 +15,7 @@
 #include "../box/warm_pool_prefill_box.h"    // Box: Warm Pool Prefill (secondary optimization)
 #include "../hakmem_env_cache.h"             // Priority-2: ENV cache (eliminate syscalls)
 #include "../box/tiny_page_box.h"           // Tiny-Plus Page Box (C5–C7 initial hook)
+#include "../box/ss_tls_bind_box.h"         // Box: TLS Bind (SuperSlab -> TLS binding)
 #include <stdlib.h>
 #include <string.h>
 #include <stdatomic.h>
@ -86,6 +87,21 @@ __thread uint64_t g_unified_cache_full[TINY_NUM_CLASSES] = {0};
 // Note: These are kept outside !HAKMEM_BUILD_RELEASE for profiling in release builds
 __thread TinyWarmPoolStats g_warm_pool_stats[TINY_NUM_CLASSES] = {0};

+#if !HAKMEM_BUILD_RELEASE
+// Debug-only diagnostics for Warm Pool effectiveness
+_Atomic uint64_t g_dbg_warm_prefill_attempts = 0;
+_Atomic uint64_t g_dbg_warm_prefill_refill_fail = 0;
+_Atomic uint64_t g_dbg_warm_prefill_push_ok = 0;
+_Atomic uint64_t g_dbg_warm_prefill_push_full = 0;
+_Atomic uint64_t g_dbg_warm_pop_attempts = 0;
+_Atomic uint64_t g_dbg_warm_pop_hits = 0;
+_Atomic uint64_t g_dbg_warm_pop_empty = 0;
+_Atomic uint64_t g_dbg_warm_pop_carve_zero = 0;
+#endif
+
+// Forward declaration for Warm Pool stats printer (defined later in this file)
+static inline void tiny_warm_pool_print_stats(void);
+
 // ============================================================================
 // Phase 8-Step1-Fix: unified_cache_enabled() implementation (non-static)
 // ============================================================================
@ -231,9 +247,9 @@ static inline void tiny_warm_pool_print_stats(void) {

    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        uint64_t total = g_warm_pool_stats[i].hits + g_warm_pool_stats[i].misses;
-        if (total == 0) continue;  // Skip unused classes
-
-        float hit_rate = 100.0 * g_warm_pool_stats[i].hits / total;
+        float hit_rate = (total > 0)
+                             ? (100.0 * g_warm_pool_stats[i].hits / total)
+                             : 0.0;
        fprintf(stderr, "  C%d: hits=%llu misses=%llu hit_rate=%.1f%% prefilled=%llu\n",
                i,
                (unsigned long long)g_warm_pool_stats[i].hits,
@ -241,6 +257,21 @@ static inline void tiny_warm_pool_print_stats(void) {
                hit_rate,
                (unsigned long long)g_warm_pool_stats[i].prefilled);
    }
+
+#if !HAKMEM_BUILD_RELEASE
+    // Debug-only aggregated diagnostics for Warm Pool
+    fprintf(stderr,
+            "  [DBG] prefill_attempts=%llu refill_fail=%llu push_ok=%llu push_full=%llu "
+            "pop_attempts=%llu pop_hits=%llu pop_empty=%llu pop_carve_zero=%llu\n",
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_attempts, memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_refill_fail, memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_push_ok, memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_push_full, memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_attempts, memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_hits, memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_empty, memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_carve_zero, memory_order_relaxed));
+#endif
    fflush(stderr);
 }

@ -426,15 +457,23 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
    if (room <= 0) return HAK_BASE_FROM_RAW(NULL);
    // Batch size limit（クラス別チューニング）
    //   - 通常: 128
-    //   - C5〜C7（129B〜1024B 混在レンジ）: 256 まで拡張して refill 頻度を下げる
-    //   - 安全性のため、下の out[] 配列サイズ（256）と常に整合させる
-    int max_batch = (class_idx >= 5 && class_idx <= 7) ? 256 : 128;
+    //   - C5〜C6（129B〜512B）: 256 まで拡張
+    //   - C7（≈1KB）: 512 まで拡張して refill 頻度をさらに下げる
+    //   - 安全性のため、下の out[] 配列サイズ（512）と常に整合させる
+    int max_batch;
+    if (class_idx == 7) {
+        max_batch = 512;
+    } else if (class_idx >= 5 && class_idx <= 6) {
+        max_batch = 256;
+    } else {
+        max_batch = 128;
+    }
    if (room > max_batch) room = max_batch;

    // NOTE:
-    //  - C5〜C7 では max_batch を 256 まで拡張するため、スタック配列も 256 エントリ確保する。
-    //  - これにより、room <= max_batch <= 256 が常に成り立ち、out[] オーバーランを防止する。
-    void* out[256];
+    //  - C7 では max_batch を 512 まで拡張するため、スタック配列も 512 エントリ確保する。
+    //  - これにより、room <= max_batch <= 512 が常に成り立ち、out[] オーバーランを防止する。
+    void* out[512];
    int produced = 0;

    // ========== PAGE BOX HOT PATH（Tiny-Plus 層）: Try page box FIRST ==========
@ -473,8 +512,21 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {

    // ========== WARM POOL HOT PATH: Check warm pool FIRST ==========
    // This is the critical optimization - avoid superslab_refill() registry scan
+    #if !HAKMEM_BUILD_RELEASE
+    atomic_fetch_add_explicit(&g_dbg_warm_pop_attempts, 1, memory_order_relaxed);
+    #endif
    SuperSlab* warm_ss = tiny_warm_pool_pop(class_idx);
    if (warm_ss) {
+        // FUTURE: TLS Bind Box Integration
+        // Currently we carve directly from warm_ss via slab_carve_from_ss().
+        // To unify logic, we should eventually:
+        // 1. Choose a slab index (via tiny_page_box or heuristic).
+        // 2. Bind it to TLS via ss_tls_bind_one(..., warm_ss, slab_idx, ...).
+        // 3. Fall through to TLS-based allocation.
+
+        #if !HAKMEM_BUILD_RELEASE
+        atomic_fetch_add_explicit(&g_dbg_warm_pop_hits, 1, memory_order_relaxed);
+        #endif
        // HOT PATH: Warm pool hit, try to carve directly
        produced = slab_carve_from_ss(class_idx, warm_ss, out, room);
        if (produced > 0) {
@ -518,12 +570,19 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
        }

        // SuperSlab carve failed (produced == 0)
+        #if !HAKMEM_BUILD_RELEASE
+        atomic_fetch_add_explicit(&g_dbg_warm_pop_carve_zero, 1, memory_order_relaxed);
+        #endif
        // This slab is either exhausted or has no more available capacity
        // The statistics counter 'prefilled' tracks how often we try to prefill
        if (produced == 0 && tiny_warm_pool_count(class_idx) == 0) {
            // Pool is empty and carve failed - prefill would help here
            warm_pool_record_prefilled(class_idx);
        }
+    } else {
+        #if !HAKMEM_BUILD_RELEASE
+        atomic_fetch_add_explicit(&g_dbg_warm_pop_empty, 1, memory_order_relaxed);
+        #endif
    }

    // ========== COLD PATH: Warm pool miss, use superslab_refill ==========