diff --git a/core/hakmem_tiny.c b/core/hakmem_tiny.c
index 76e62bce..378d8264 100644
--- a/core/hakmem_tiny.c
+++ b/core/hakmem_tiny.c
@@ -187,331 +187,23 @@ static inline int fastcache_push(int class_idx, void* ptr);
 // Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
 // 88 lines (lines 407-494)
 
-static __attribute__((cold, noinline, unused)) void* tiny_slow_alloc_fast(int class_idx) {
-    int tls_enabled = g_tls_list_enable;
-    TinyTLSList* tls = &g_tls_lists[class_idx];
-    pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
-    pthread_mutex_lock(lock);
 
-    TinySlab* slab = g_tiny_pool.free_slabs[class_idx];
-    if (slab) {
-        g_tiny_pool.free_slabs[class_idx] = slab->next;
-    } else {
-        slab = allocate_new_slab(class_idx);
-        if (!slab) {
-            pthread_mutex_unlock(lock);
-            return NULL;
-        }
-    }
-    slab->next = NULL;
+// ============================================================================
+// Legacy Slow Allocation Path - EXTRACTED to hakmem_tiny_legacy_slow_box.inc
+// ============================================================================
+#include "hakmem_tiny_legacy_slow_box.inc"
 
-    if (atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
-        tiny_remote_drain_locked(slab);
-    }
-
-    int block_idx = hak_tiny_find_free_block(slab);
-    if (block_idx < 0) {
-        slab->next = g_tiny_pool.free_slabs[class_idx];
-        g_tiny_pool.free_slabs[class_idx] = slab;
-        pthread_mutex_unlock(lock);
-        return NULL;
-    }
-
-    hak_tiny_set_used(slab, block_idx);
-    slab->free_count--;
-    size_t block_size = g_tiny_class_sizes[class_idx];
-    uint8_t* base = (uint8_t*)slab->base;
-    void* ret = (void*)(base + ((size_t)block_idx * block_size));
-    g_tiny_pool.alloc_count[class_idx]++;
-
-    uint16_t cap = g_fast_cap_defaults[class_idx];
-    uint16_t count = g_fast_count[class_idx];
-    uint16_t fast_need = (cap > count) ? (uint16_t)(cap - count) : 0;
-    if (fast_need > slab->free_count) fast_need = (uint16_t)slab->free_count;
-
-    uint32_t tls_need = 0;
-    if (tls_enabled && tls_list_needs_refill(tls)) {
-        uint32_t target = tls_list_refill_threshold(tls);
-        if (tls->count < target) {
-            tls_need = target - tls->count;
-        }
-    }
-    uint32_t remaining = slab->free_count;
-    if (fast_need > remaining) fast_need = (uint16_t)remaining;
-    remaining -= fast_need;
-    if (tls_need > remaining) tls_need = remaining;
-
-    while (fast_need > 0) {
-        int extra_idx = hak_tiny_find_free_block(slab);
-        if (extra_idx < 0) break;
-        hak_tiny_set_used(slab, extra_idx);
-        slab->free_count--;
-        void* extra = (void*)(base + ((size_t)extra_idx * block_size));
-        int pushed = 0;
-        if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
-            pushed = fastcache_push(class_idx, extra);
-        } else {
-            pushed = tiny_fast_push(class_idx, extra);
-        }
-        if (!pushed) {
-            if (tls_enabled) {
-                tiny_tls_list_guard_push(class_idx, tls, extra);
-                tls_list_push(tls, extra, class_idx);
-            }
-        }
-        fast_need--;
-    }
-
-    while (tls_enabled && tls_need > 0) {
-        int extra_idx = hak_tiny_find_free_block(slab);
-        if (extra_idx < 0) break;
-        hak_tiny_set_used(slab, extra_idx);
-        slab->free_count--;
-        void* extra = (void*)(base + ((size_t)extra_idx * block_size));
-        tiny_tls_list_guard_push(class_idx, tls, extra);
-        tls_list_push(tls, extra, class_idx);
-        tls_need--;
-    }
-
-    if (slab->free_count == 0) {
-        move_to_full_list(class_idx, slab);
-    } else {
-        slab->next = g_tiny_pool.free_slabs[class_idx];
-        g_tiny_pool.free_slabs[class_idx] = slab;
-    }
-
-    pthread_mutex_unlock(lock);
-    return ret;
-}
 
 // ============================================================================
 // EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
 // ============================================================================
 // Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)
-// Hot-path cheap sampling counter to avoid rand() in allocation path
-// Phase 9.4: TLS single-linked freelist (mimalloc-inspired) for hottest classes (≤128B/≤256B)
-int g_tls_sll_enable = 1;                 // HAKMEM_TINY_TLS_SLL=0 to disable
-int g_tiny_hotpath_class5 = 0;            // HAKMEM_TINY_HOTPATH_CLASS5=1 to enable class 5 hotpath
-// Phase 6-1.7: Export TLS variables for box refactor (Box 5/6 need access from hakmem.c)
-// CRITICAL FIX: Explicit initializers prevent SEGV from uninitialized TLS in worker threads
-// PRIORITY 3: TLS Canaries - Add canaries around TLS arrays to detect buffer overruns
-#define TLS_CANARY_MAGIC 0xDEADBEEFDEADBEEFULL
-// Phase 3d-B: Unified TLS SLL (head+count in same cache line for +12-18% cache hit rate)
-__thread uint64_t g_tls_canary_before_sll = TLS_CANARY_MAGIC;
-__thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES] __attribute__((aligned(64))) = {0};
-__thread uint64_t g_tls_canary_after_sll = TLS_CANARY_MAGIC;
-static int g_tiny_ultra = 0;                     // HAKMEM_TINY_ULTRA=1 for SLL-only ultra mode
-static int g_ultra_validate = 0;                 // HAKMEM_TINY_ULTRA_VALIDATE=1 to enable per-pop validation
-// Ultra debug counters
-#if HAKMEM_DEBUG_COUNTERS
-static __attribute__((unused)) uint64_t g_ultra_pop_hits[TINY_NUM_CLASSES] = {0};
-static uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_ultra_resets[TINY_NUM_CLASSES] = {0};
-#endif
 
-// Path counters (normal mode visibility): lightweight, for debugging/bench only
-#if HAKMEM_DEBUG_COUNTERS
-static __attribute__((unused)) uint64_t g_path_sll_pop[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_path_mag_pop[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_path_front_pop[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_path_superslab[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_path_refill_calls[TINY_NUM_CLASSES] = {0};
-// New: slow/bitmap/bump/bin instrumentation
-static __attribute__((unused)) uint64_t g_alloc_slow_calls[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_superslab_refill_calls_dbg[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_bitmap_scan_calls[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_bgbin_pops[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_bump_hits[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_bump_arms[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_spec_calls[TINY_NUM_CLASSES] = {0};
-static __attribute__((unused)) uint64_t g_spec_hits[TINY_NUM_CLASSES] = {0};
-#endif
-static int g_path_debug_enabled = 0;
+// ============================================================================
+// TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc
+// ============================================================================
+#include "hakmem_tiny_tls_state_box.inc"
 
-// Spill hysteresis（freeホットパスからgetenvを排除）
-static int g_spill_hyst = 32;  // default margin (configured at init; never getenv on hot path)
-
-// Optional per-class refill batch overrides (0=use global defaults)
-static int g_refill_max_c[TINY_NUM_CLASSES] = {0};
-static int g_refill_max_hot_c[TINY_NUM_CLASSES] = {0};
-static inline __attribute__((always_inline)) int tiny_refill_max_for_class(int class_idx) {
-    int v = g_refill_max_c[class_idx];
-    if (v > 0) return v;
-    if (class_idx <= 3) {
-        int hv = g_refill_max_hot_c[class_idx];
-        if (hv > 0) return hv;
-        return g_tiny_refill_max_hot;
-    }
-    return g_tiny_refill_max;
-}
-
-// Phase 9.5: Frontend/Backend split - Tiny Front modules（QuickSlot / FastCache）
-#include "front/quick_slot.h"
-#include "front/fast_cache.h"
-__thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
-static int g_frontend_enable = 0;                // HAKMEM_TINY_FRONTEND=1 (experimental ultra-fast frontend)
-// SLL capacity multiplier for hot tiny classes (env: HAKMEM_SLL_MULTIPLIER)
-int g_sll_multiplier = 2;
-// Cached thread id (uint32) to avoid repeated pthread_self() in hot paths
-static __thread uint32_t g_tls_tid32;
-static __thread int g_tls_tid32_inited;
-// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
-#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
-inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
-#else
-static inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
-#endif
-    if (__builtin_expect(!g_tls_tid32_inited, 0)) {
-        g_tls_tid32 = (uint32_t)(uintptr_t)pthread_self();
-        g_tls_tid32_inited = 1;
-    }
-    return g_tls_tid32;
-}
-// Cached pthread_t as-is for APIs that require pthread_t comparison
-static __thread pthread_t g_tls_pt_self;
-static __thread int g_tls_pt_inited;
-
-// Frontend FastCache hit/miss counters (Small diagnostics)
-unsigned long long g_front_fc_hit[TINY_NUM_CLASSES] = {0};
-unsigned long long g_front_fc_miss[TINY_NUM_CLASSES] = {0};
-// TLS SLL class mask: bit i = 1 allows SLL for class i. Default: all 8 classes enabled.
-int g_tls_sll_class_mask = 0xFF;
-// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
-#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
-inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
-#else
-static inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
-#endif
-    if (__builtin_expect(!g_tls_pt_inited, 0)) {
-        g_tls_pt_self = pthread_self();
-        g_tls_pt_inited = 1;
-    }
-    return g_tls_pt_self;
-}
-
-#include "tiny_refill.h"
-// tiny_mmap_gate.h already included at top
-#include "tiny_publish.h"
-
-int g_sll_cap_override[TINY_NUM_CLASSES] = {0};     // LEGACY (Phase12以降は参照しない／互換用ダミー)
-// Optional prefetch on SLL pop (guarded by env: HAKMEM_TINY_PREFETCH=1)
-static int g_tiny_prefetch = 0;
-
-// Small-class magazine pre-initialization (to avoid cap==0 checks on hot path)
-
-
-
-// Hot-class small TLS magazine（実体とスイッチ）
-typedef struct {
-    void* slots[128];
-    uint16_t top;   // 0..128
-    uint16_t cap;   // =128
-} TinyHotMag;
-static int g_hotmag_cap_default = 128;         // default capacity (env override)
-static int g_hotmag_refill_default = 32;       // default refill batch (env override)
-static int g_hotmag_enable = 0;                // 既定OFF（A/B用）。envでON可。
-static uint16_t g_hotmag_cap_current[TINY_NUM_CLASSES];
-static uint8_t g_hotmag_cap_locked[TINY_NUM_CLASSES];
-static uint16_t g_hotmag_refill_current[TINY_NUM_CLASSES];
-static uint8_t g_hotmag_refill_locked[TINY_NUM_CLASSES];
-static uint8_t g_hotmag_class_en[TINY_NUM_CLASSES];       // 0=disabled for class, 1=enabled
-static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES];
-// Inline helpers
-
-#include "box/tls_sll_box.h"  // Box TLS-SLL: Safe SLL operations API (needed by hotmag)
-#include "hakmem_tiny_hotmag.inc.h"
-
-// Size-specialized tiny alloc (32B/64B) via function pointers (A/B用)
-// TinyQuickSlot: 1 cache line per class (quick 6 items + small metadata)
-// Opt-in via HAKMEM_TINY_QUICK=1
-// NOTE: This type definition must come BEFORE the Phase 2D-1 includes below
-int g_quick_enable = 0;                 // HAKMEM_TINY_QUICK=1
-__thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below
-
-// Phase 2D-1: Hot-path inline function extractions（Front）
-// NOTE: TinyFastCache/TinyQuickSlot は front/ で定義済み
-#include "hakmem_tiny_hot_pop.inc.h"       // 4 functions: tiny_hot_pop_class{0..3}
-#include "hakmem_tiny_refill.inc.h"        // 8 functions: refill operations
-#if HAKMEM_TINY_P0_BATCH_REFILL
-#include "hakmem_tiny_refill_p0.inc.h"     // P0 batch refill → FastCache 直補充
-#endif
-
-// Phase 7 Task 3: Pre-warm TLS cache at init
-// Pre-allocate blocks to reduce first-allocation miss penalty
-#if HAKMEM_TINY_PREWARM_TLS
-void hak_tiny_prewarm_tls_cache(void) {
-    // Pre-warm each class with HAKMEM_TINY_PREWARM_COUNT blocks
-    // This reduces the first-allocation miss penalty by populating TLS cache
-    // Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL
-    for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
-        int count = HAKMEM_TINY_PREWARM_COUNT;  // Default: 16 blocks per class
-
-        // Trigger refill to populate TLS cache
-        // P0 Fix: Use appropriate refill function based on P0 status
-#if HAKMEM_TINY_P0_BATCH_REFILL
-        sll_refill_batch_from_ss(class_idx, count);
-#else
-        sll_refill_small_from_ss(class_idx, count);
-#endif
-    }
-}
-#endif
-
-// Ultra-Simple front (small per-class stack) — combines tiny front to minimize
-// instructions and memory touches on alloc/free. Uses existing TLS bump shadow
-// (g_tls_bcur/bend) when enabled to avoid per-alloc header writes.
-// UltraFront capacity for 32/64B fast pop
-#ifndef ULTRA_FRONT_CAP
-#define ULTRA_FRONT_CAP 64
-#endif
-typedef struct __attribute__((aligned(64))) {
-    void* slots[ULTRA_FRONT_CAP];
-    uint16_t top;   // 0..ULTRA_FRONT_CAP
-    uint16_t _pad;
-} TinyUltraFront;
-static int g_ultra_simple = 0;                 // HAKMEM_TINY_ULTRA_SIMPLE=1
-static __thread TinyUltraFront g_tls_ultra[TINY_NUM_CLASSES];
-// Inline helpers
-#include "hakmem_tiny_ultra_front.inc.h"
-
-// Ultra-Bump TLS shadow (bench/opt-in): keep a TLS-only bump window
-// to avoid per-alloc header writes. Header is updated per-chunk reservation.
-// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
-int g_bump_chunk = 32;                // HAKMEM_TINY_BUMP_CHUNK (blocks)
-__thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES] = {0};
-__thread uint8_t* g_tls_bend[TINY_NUM_CLASSES] = {0};
-
-// SLL small refill batch for specialized class (32/64B)
-// Specialized order toggle: 1 = mag-first, 0 = sll-first
-// HotMag helpers (for classes 0..3)
-static inline int is_hot_class(int class_idx) { return class_idx >= 0 && class_idx <= 3; }
-
-// Optional front (Ultra/HotMag) push helper: compile-out in release builds
-static inline int tiny_optional_push(int class_idx, void* ptr) {
-#if HAKMEM_BUILD_RELEASE
-    (void)class_idx;
-    (void)ptr;
-    return 0;
-#else
-    if (__builtin_expect(g_ultra_simple && is_hot_class(class_idx), 0)) {
-        if (__builtin_expect(ultra_push(class_idx, ptr), 0)) {
-            return 1;
-        }
-    }
-    if (__builtin_expect(is_hot_class(class_idx), 0)) {
-        if (__builtin_expect(hotmag_push(class_idx, ptr), 0)) {
-            return 1;
-        }
-    }
-    return 0;
-#endif
-}
-
-// Ultra-Simple helpers
-
-// Phase 9.6: Deferred Intelligence (event queue + background)
-// Extended event for FLINT Intelligence (lightweight; recorded off hot path only)
-// Observability, ACE, and intelligence helpers
 #include "hakmem_tiny_intel.inc"
 
 // ============================================================================
@@ -767,83 +459,12 @@ static inline int ultra_batch_for_class(int class_idx) {
 // EXTRACTED: }
 
 // Lookup slab by base address (O(1) average)
-static TinySlab* registry_lookup(uintptr_t slab_base) {
-    // Lock-free read with atomic owner access (MT-safe)
-    int hash = registry_hash(slab_base);
-
-    // Linear probing search
-    for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
-        int idx = (hash + i) & SLAB_REGISTRY_MASK;
-        SlabRegistryEntry* entry = &g_slab_registry[idx];
-
-        if (entry->slab_base == slab_base) {
-            // Atomic load to prevent TOCTOU race with registry_unregister()
-            TinySlab* owner = atomic_load_explicit(&entry->owner, memory_order_acquire);
-            if (!owner) return NULL;  // Entry cleared by unregister
-            return owner;
-        }
-
-        if (entry->slab_base == 0) {
-            return NULL;  // Empty slot - not found
-        }
-    }
-    return NULL;  // Not found after max probes
-}
 
 // ============================================================================
-// EXTRACTED TO hakmem_tiny_slab_mgmt.inc (Phase 2D-4 FINAL)
+// Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc
 // ============================================================================
-// Function: allocate_new_slab() - 79 lines (lines 952-1030)
-// Allocate new slab for a class
+#include "hakmem_tiny_slab_lookup_box.inc"
 
-// Function: release_slab() - 23 lines (lines 1033-1055)
-// Release a slab back to system
-
-// Step 2: Find slab owner by pointer (O(1) via hash table registry, or O(N) fallback)
-TinySlab* hak_tiny_owner_slab(void* ptr) {
-    if (!ptr || !g_tiny_initialized) return NULL;
-
-    // Phase 6.14: Runtime toggle between Registry (O(1)) and List (O(N))
-    if (g_use_registry) {
-        // O(1) lookup via hash table
-        uintptr_t slab_base = (uintptr_t)ptr & ~(TINY_SLAB_SIZE - 1);
-        TinySlab* slab = registry_lookup(slab_base);
-        if (!slab) return NULL;
-        // SAFETY: validate membership (ptr must be inside [base, base+64KB))
-        uintptr_t start = (uintptr_t)slab->base;
-        uintptr_t end = start + TINY_SLAB_SIZE;
-        if ((uintptr_t)ptr < start || (uintptr_t)ptr >= end) {
-            return NULL;  // false positive from registry → treat as non-Tiny
-        }
-        return slab;
-    } else {
-        // O(N) fallback: linear search through all slab lists (lock per class)
-        for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
-            pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
-            pthread_mutex_lock(lock);
-            // Search free slabs
-            for (TinySlab* slab = g_tiny_pool.free_slabs[class_idx]; slab; slab = slab->next) {
-                uintptr_t slab_start = (uintptr_t)slab->base;
-                uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
-                if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
-                    pthread_mutex_unlock(lock);
-                    return slab;
-                }
-            }
-            // Search full slabs
-            for (TinySlab* slab = g_tiny_pool.full_slabs[class_idx]; slab; slab = slab->next) {
-                uintptr_t slab_start = (uintptr_t)slab->base;
-                uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
-                if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
-                    pthread_mutex_unlock(lock);
-                    return slab;
-                }
-            }
-            pthread_mutex_unlock(lock);
-        }
-        return NULL;  // Not found
-    }
-}
 
 // Function: move_to_full_list() - 20 lines (lines 1104-1123)
 // Move slab to full list
diff --git a/core/hakmem_tiny_legacy_slow_box.inc b/core/hakmem_tiny_legacy_slow_box.inc
new file mode 100644
index 00000000..8bf6893b
--- /dev/null
+++ b/core/hakmem_tiny_legacy_slow_box.inc
@@ -0,0 +1,96 @@
+static __attribute__((cold, noinline, unused)) void* tiny_slow_alloc_fast(int class_idx) {
+    int tls_enabled = g_tls_list_enable;
+    TinyTLSList* tls = &g_tls_lists[class_idx];
+    pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
+    pthread_mutex_lock(lock);
+
+    TinySlab* slab = g_tiny_pool.free_slabs[class_idx];
+    if (slab) {
+        g_tiny_pool.free_slabs[class_idx] = slab->next;
+    } else {
+        slab = allocate_new_slab(class_idx);
+        if (!slab) {
+            pthread_mutex_unlock(lock);
+            return NULL;
+        }
+    }
+    slab->next = NULL;
+
+    if (atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
+        tiny_remote_drain_locked(slab);
+    }
+
+    int block_idx = hak_tiny_find_free_block(slab);
+    if (block_idx < 0) {
+        slab->next = g_tiny_pool.free_slabs[class_idx];
+        g_tiny_pool.free_slabs[class_idx] = slab;
+        pthread_mutex_unlock(lock);
+        return NULL;
+    }
+
+    hak_tiny_set_used(slab, block_idx);
+    slab->free_count--;
+    size_t block_size = g_tiny_class_sizes[class_idx];
+    uint8_t* base = (uint8_t*)slab->base;
+    void* ret = (void*)(base + ((size_t)block_idx * block_size));
+    g_tiny_pool.alloc_count[class_idx]++;
+
+    uint16_t cap = g_fast_cap_defaults[class_idx];
+    uint16_t count = g_fast_count[class_idx];
+    uint16_t fast_need = (cap > count) ? (uint16_t)(cap - count) : 0;
+    if (fast_need > slab->free_count) fast_need = (uint16_t)slab->free_count;
+
+    uint32_t tls_need = 0;
+    if (tls_enabled && tls_list_needs_refill(tls)) {
+        uint32_t target = tls_list_refill_threshold(tls);
+        if (tls->count < target) {
+            tls_need = target - tls->count;
+        }
+    }
+    uint32_t remaining = slab->free_count;
+    if (fast_need > remaining) fast_need = (uint16_t)remaining;
+    remaining -= fast_need;
+    if (tls_need > remaining) tls_need = remaining;
+
+    while (fast_need > 0) {
+        int extra_idx = hak_tiny_find_free_block(slab);
+        if (extra_idx < 0) break;
+        hak_tiny_set_used(slab, extra_idx);
+        slab->free_count--;
+        void* extra = (void*)(base + ((size_t)extra_idx * block_size));
+        int pushed = 0;
+        if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
+            pushed = fastcache_push(class_idx, extra);
+        } else {
+            pushed = tiny_fast_push(class_idx, extra);
+        }
+        if (!pushed) {
+            if (tls_enabled) {
+                tiny_tls_list_guard_push(class_idx, tls, extra);
+                tls_list_push(tls, extra, class_idx);
+            }
+        }
+        fast_need--;
+    }
+
+    while (tls_enabled && tls_need > 0) {
+        int extra_idx = hak_tiny_find_free_block(slab);
+        if (extra_idx < 0) break;
+        hak_tiny_set_used(slab, extra_idx);
+        slab->free_count--;
+        void* extra = (void*)(base + ((size_t)extra_idx * block_size));
+        tiny_tls_list_guard_push(class_idx, tls, extra);
+        tls_list_push(tls, extra, class_idx);
+        tls_need--;
+    }
+
+    if (slab->free_count == 0) {
+        move_to_full_list(class_idx, slab);
+    } else {
+        slab->next = g_tiny_pool.free_slabs[class_idx];
+        g_tiny_pool.free_slabs[class_idx] = slab;
+    }
+
+    pthread_mutex_unlock(lock);
+    return ret;
+}
diff --git a/core/hakmem_tiny_slab_lookup_box.inc b/core/hakmem_tiny_slab_lookup_box.inc
new file mode 100644
index 00000000..2a5684a1
--- /dev/null
+++ b/core/hakmem_tiny_slab_lookup_box.inc
@@ -0,0 +1,77 @@
+static TinySlab* registry_lookup(uintptr_t slab_base) {
+    // Lock-free read with atomic owner access (MT-safe)
+    int hash = registry_hash(slab_base);
+
+    // Linear probing search
+    for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
+        int idx = (hash + i) & SLAB_REGISTRY_MASK;
+        SlabRegistryEntry* entry = &g_slab_registry[idx];
+
+        if (entry->slab_base == slab_base) {
+            // Atomic load to prevent TOCTOU race with registry_unregister()
+            TinySlab* owner = atomic_load_explicit(&entry->owner, memory_order_acquire);
+            if (!owner) return NULL;  // Entry cleared by unregister
+            return owner;
+        }
+
+        if (entry->slab_base == 0) {
+            return NULL;  // Empty slot - not found
+        }
+    }
+    return NULL;  // Not found after max probes
+}
+
+// ============================================================================
+// EXTRACTED TO hakmem_tiny_slab_mgmt.inc (Phase 2D-4 FINAL)
+// ============================================================================
+// Function: allocate_new_slab() - 79 lines (lines 952-1030)
+// Allocate new slab for a class
+
+// Function: release_slab() - 23 lines (lines 1033-1055)
+// Release a slab back to system
+
+// Step 2: Find slab owner by pointer (O(1) via hash table registry, or O(N) fallback)
+TinySlab* hak_tiny_owner_slab(void* ptr) {
+    if (!ptr || !g_tiny_initialized) return NULL;
+
+    // Phase 6.14: Runtime toggle between Registry (O(1)) and List (O(N))
+    if (g_use_registry) {
+        // O(1) lookup via hash table
+        uintptr_t slab_base = (uintptr_t)ptr & ~(TINY_SLAB_SIZE - 1);
+        TinySlab* slab = registry_lookup(slab_base);
+        if (!slab) return NULL;
+        // SAFETY: validate membership (ptr must be inside [base, base+64KB))
+        uintptr_t start = (uintptr_t)slab->base;
+        uintptr_t end = start + TINY_SLAB_SIZE;
+        if ((uintptr_t)ptr < start || (uintptr_t)ptr >= end) {
+            return NULL;  // false positive from registry → treat as non-Tiny
+        }
+        return slab;
+    } else {
+        // O(N) fallback: linear search through all slab lists (lock per class)
+        for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
+            pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
+            pthread_mutex_lock(lock);
+            // Search free slabs
+            for (TinySlab* slab = g_tiny_pool.free_slabs[class_idx]; slab; slab = slab->next) {
+                uintptr_t slab_start = (uintptr_t)slab->base;
+                uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
+                if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
+                    pthread_mutex_unlock(lock);
+                    return slab;
+                }
+            }
+            // Search full slabs
+            for (TinySlab* slab = g_tiny_pool.full_slabs[class_idx]; slab; slab = slab->next) {
+                uintptr_t slab_start = (uintptr_t)slab->base;
+                uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
+                if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
+                    pthread_mutex_unlock(lock);
+                    return slab;
+                }
+            }
+            pthread_mutex_unlock(lock);
+        }
+        return NULL;  // Not found
+    }
+}
diff --git a/core/hakmem_tiny_tls_state_box.inc b/core/hakmem_tiny_tls_state_box.inc
new file mode 100644
index 00000000..9dabc8d5
--- /dev/null
+++ b/core/hakmem_tiny_tls_state_box.inc
@@ -0,0 +1,224 @@
+// Hot-path cheap sampling counter to avoid rand() in allocation path
+// Phase 9.4: TLS single-linked freelist (mimalloc-inspired) for hottest classes (≤128B/≤256B)
+int g_tls_sll_enable = 1;                 // HAKMEM_TINY_TLS_SLL=0 to disable
+int g_tiny_hotpath_class5 = 0;            // HAKMEM_TINY_HOTPATH_CLASS5=1 to enable class 5 hotpath
+// Phase 6-1.7: Export TLS variables for box refactor (Box 5/6 need access from hakmem.c)
+// CRITICAL FIX: Explicit initializers prevent SEGV from uninitialized TLS in worker threads
+// PRIORITY 3: TLS Canaries - Add canaries around TLS arrays to detect buffer overruns
+#define TLS_CANARY_MAGIC 0xDEADBEEFDEADBEEFULL
+// Phase 3d-B: Unified TLS SLL (head+count in same cache line for +12-18% cache hit rate)
+__thread uint64_t g_tls_canary_before_sll = TLS_CANARY_MAGIC;
+__thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES] __attribute__((aligned(64))) = {0};
+__thread uint64_t g_tls_canary_after_sll = TLS_CANARY_MAGIC;
+static int g_tiny_ultra = 0;                     // HAKMEM_TINY_ULTRA=1 for SLL-only ultra mode
+static int g_ultra_validate = 0;                 // HAKMEM_TINY_ULTRA_VALIDATE=1 to enable per-pop validation
+// Ultra debug counters
+#if HAKMEM_DEBUG_COUNTERS
+static __attribute__((unused)) uint64_t g_ultra_pop_hits[TINY_NUM_CLASSES] = {0};
+static uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_ultra_resets[TINY_NUM_CLASSES] = {0};
+#endif
+
+// Path counters (normal mode visibility): lightweight, for debugging/bench only
+#if HAKMEM_DEBUG_COUNTERS
+static __attribute__((unused)) uint64_t g_path_sll_pop[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_path_mag_pop[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_path_front_pop[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_path_superslab[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_path_refill_calls[TINY_NUM_CLASSES] = {0};
+// New: slow/bitmap/bump/bin instrumentation
+static __attribute__((unused)) uint64_t g_alloc_slow_calls[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_superslab_refill_calls_dbg[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_bitmap_scan_calls[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_bgbin_pops[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_bump_hits[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_bump_arms[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_spec_calls[TINY_NUM_CLASSES] = {0};
+static __attribute__((unused)) uint64_t g_spec_hits[TINY_NUM_CLASSES] = {0};
+#endif
+static int g_path_debug_enabled = 0;
+
+// Spill hysteresis（freeホットパスからgetenvを排除）
+static int g_spill_hyst = 32;  // default margin (configured at init; never getenv on hot path)
+
+// Optional per-class refill batch overrides (0=use global defaults)
+static int g_refill_max_c[TINY_NUM_CLASSES] = {0};
+static int g_refill_max_hot_c[TINY_NUM_CLASSES] = {0};
+static inline __attribute__((always_inline)) int tiny_refill_max_for_class(int class_idx) {
+    int v = g_refill_max_c[class_idx];
+    if (v > 0) return v;
+    if (class_idx <= 3) {
+        int hv = g_refill_max_hot_c[class_idx];
+        if (hv > 0) return hv;
+        return g_tiny_refill_max_hot;
+    }
+    return g_tiny_refill_max;
+}
+
+// Phase 9.5: Frontend/Backend split - Tiny Front modules（QuickSlot / FastCache）
+#include "front/quick_slot.h"
+#include "front/fast_cache.h"
+__thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
+static int g_frontend_enable = 0;                // HAKMEM_TINY_FRONTEND=1 (experimental ultra-fast frontend)
+// SLL capacity multiplier for hot tiny classes (env: HAKMEM_SLL_MULTIPLIER)
+int g_sll_multiplier = 2;
+// Cached thread id (uint32) to avoid repeated pthread_self() in hot paths
+static __thread uint32_t g_tls_tid32;
+static __thread int g_tls_tid32_inited;
+// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
+#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
+inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
+#else
+static inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
+#endif
+    if (__builtin_expect(!g_tls_tid32_inited, 0)) {
+        g_tls_tid32 = (uint32_t)(uintptr_t)pthread_self();
+        g_tls_tid32_inited = 1;
+    }
+    return g_tls_tid32;
+}
+// Cached pthread_t as-is for APIs that require pthread_t comparison
+static __thread pthread_t g_tls_pt_self;
+static __thread int g_tls_pt_inited;
+
+// Frontend FastCache hit/miss counters (Small diagnostics)
+unsigned long long g_front_fc_hit[TINY_NUM_CLASSES] = {0};
+unsigned long long g_front_fc_miss[TINY_NUM_CLASSES] = {0};
+// TLS SLL class mask: bit i = 1 allows SLL for class i. Default: all 8 classes enabled.
+int g_tls_sll_class_mask = 0xFF;
+// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
+#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
+inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
+#else
+static inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
+#endif
+    if (__builtin_expect(!g_tls_pt_inited, 0)) {
+        g_tls_pt_self = pthread_self();
+        g_tls_pt_inited = 1;
+    }
+    return g_tls_pt_self;
+}
+
+#include "tiny_refill.h"
+// tiny_mmap_gate.h already included at top
+#include "tiny_publish.h"
+
+int g_sll_cap_override[TINY_NUM_CLASSES] = {0};     // LEGACY (Phase12以降は参照しない／互換用ダミー)
+// Optional prefetch on SLL pop (guarded by env: HAKMEM_TINY_PREFETCH=1)
+static int g_tiny_prefetch = 0;
+
+// Small-class magazine pre-initialization (to avoid cap==0 checks on hot path)
+
+
+
+// Hot-class small TLS magazine（実体とスイッチ）
+typedef struct {
+    void* slots[128];
+    uint16_t top;   // 0..128
+    uint16_t cap;   // =128
+} TinyHotMag;
+static int g_hotmag_cap_default = 128;         // default capacity (env override)
+static int g_hotmag_refill_default = 32;       // default refill batch (env override)
+static int g_hotmag_enable = 0;                // 既定OFF（A/B用）。envでON可。
+static uint16_t g_hotmag_cap_current[TINY_NUM_CLASSES];
+static uint8_t g_hotmag_cap_locked[TINY_NUM_CLASSES];
+static uint16_t g_hotmag_refill_current[TINY_NUM_CLASSES];
+static uint8_t g_hotmag_refill_locked[TINY_NUM_CLASSES];
+static uint8_t g_hotmag_class_en[TINY_NUM_CLASSES];       // 0=disabled for class, 1=enabled
+static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES];
+// Inline helpers
+
+#include "box/tls_sll_box.h"  // Box TLS-SLL: Safe SLL operations API (needed by hotmag)
+#include "hakmem_tiny_hotmag.inc.h"
+
+// Size-specialized tiny alloc (32B/64B) via function pointers (A/B用)
+// TinyQuickSlot: 1 cache line per class (quick 6 items + small metadata)
+// Opt-in via HAKMEM_TINY_QUICK=1
+// NOTE: This type definition must come BEFORE the Phase 2D-1 includes below
+int g_quick_enable = 0;                 // HAKMEM_TINY_QUICK=1
+__thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below
+
+// Phase 2D-1: Hot-path inline function extractions（Front）
+// NOTE: TinyFastCache/TinyQuickSlot は front/ で定義済み
+#include "hakmem_tiny_hot_pop.inc.h"       // 4 functions: tiny_hot_pop_class{0..3}
+#include "hakmem_tiny_refill.inc.h"        // 8 functions: refill operations
+#if HAKMEM_TINY_P0_BATCH_REFILL
+#include "hakmem_tiny_refill_p0.inc.h"     // P0 batch refill → FastCache 直補充
+#endif
+
+// Phase 7 Task 3: Pre-warm TLS cache at init
+// Pre-allocate blocks to reduce first-allocation miss penalty
+#if HAKMEM_TINY_PREWARM_TLS
+void hak_tiny_prewarm_tls_cache(void) {
+    // Pre-warm each class with HAKMEM_TINY_PREWARM_COUNT blocks
+    // This reduces the first-allocation miss penalty by populating TLS cache
+    // Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL
+    for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
+        int count = HAKMEM_TINY_PREWARM_COUNT;  // Default: 16 blocks per class
+
+        // Trigger refill to populate TLS cache
+        // P0 Fix: Use appropriate refill function based on P0 status
+#if HAKMEM_TINY_P0_BATCH_REFILL
+        sll_refill_batch_from_ss(class_idx, count);
+#else
+        sll_refill_small_from_ss(class_idx, count);
+#endif
+    }
+}
+#endif
+
+// Ultra-Simple front (small per-class stack) — combines tiny front to minimize
+// instructions and memory touches on alloc/free. Uses existing TLS bump shadow
+// (g_tls_bcur/bend) when enabled to avoid per-alloc header writes.
+// UltraFront capacity for 32/64B fast pop
+#ifndef ULTRA_FRONT_CAP
+#define ULTRA_FRONT_CAP 64
+#endif
+typedef struct __attribute__((aligned(64))) {
+    void* slots[ULTRA_FRONT_CAP];
+    uint16_t top;   // 0..ULTRA_FRONT_CAP
+    uint16_t _pad;
+} TinyUltraFront;
+static int g_ultra_simple = 0;                 // HAKMEM_TINY_ULTRA_SIMPLE=1
+static __thread TinyUltraFront g_tls_ultra[TINY_NUM_CLASSES];
+// Inline helpers
+#include "hakmem_tiny_ultra_front.inc.h"
+
+// Ultra-Bump TLS shadow (bench/opt-in): keep a TLS-only bump window
+// to avoid per-alloc header writes. Header is updated per-chunk reservation.
+// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
+int g_bump_chunk = 32;                // HAKMEM_TINY_BUMP_CHUNK (blocks)
+__thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES] = {0};
+__thread uint8_t* g_tls_bend[TINY_NUM_CLASSES] = {0};
+
+// SLL small refill batch for specialized class (32/64B)
+// Specialized order toggle: 1 = mag-first, 0 = sll-first
+// HotMag helpers (for classes 0..3)
+static inline int is_hot_class(int class_idx) { return class_idx >= 0 && class_idx <= 3; }
+
+// Optional front (Ultra/HotMag) push helper: compile-out in release builds
+static inline int tiny_optional_push(int class_idx, void* ptr) {
+#if HAKMEM_BUILD_RELEASE
+    (void)class_idx;
+    (void)ptr;
+    return 0;
+#else
+    if (__builtin_expect(g_ultra_simple && is_hot_class(class_idx), 0)) {
+        if (__builtin_expect(ultra_push(class_idx, ptr), 0)) {
+            return 1;
+        }
+    }
+    if (__builtin_expect(is_hot_class(class_idx), 0)) {
+        if (__builtin_expect(hotmag_push(class_idx, ptr), 0)) {
+            return 1;
+        }
+    }
+    return 0;
+#endif
+}
+
+// Ultra-Simple helpers
+
+// Phase 9.6: Deferred Intelligence (event queue + background)
+// Extended event for FLINT Intelligence (lightweight; recorded off hot path only)
+// Observability, ACE, and intelligence helpers