diff --git a/core/box/pool_alloc_v2_box.h b/core/box/pool_alloc_v2_box.h
new file mode 100644
index 00000000..de2bc77c
--- /dev/null
+++ b/core/box/pool_alloc_v2_box.h
@@ -0,0 +1,332 @@
+// pool_alloc_v2_box.h — Box: Pool V2 Alloc Implementation
+//
+// Purpose: Pool v2 alloc path with hotbox_v2 integration
+// Pattern: Enhanced alloc path with hotbox, MF2, TC drain, and TLS support
+// Phase: Pool API Modularization - Step 7 (LARGEST COMPLEXITY - 277 lines)
+// Dependencies: Assumes pool_api.inc.h includes this after pool_block_to_user_box.h
+//               (provides AllocHeader, PoolBlock, PoolTLSRing, g_pool, etc.)
+
+#ifndef POOL_ALLOC_V2_BOX_H
+#define POOL_ALLOC_V2_BOX_H
+
+#include "pool_block_to_user_box.h"  // Pool block to user helpers
+#include "pool_config_box.h"          // For configuration gates
+#include "pool_stats_box.h"           // For statistics
+#include "pool_mid_desc_cache_box.h"  // For mid_desc_lookup_cached
+#include "pool_hotbox_v2_box.h"       // For hotbox v2 functions
+#include "tiny_heap_env_box.h"        // TinyHeap profile
+
+#include <stdint.h>
+#include <stdatomic.h>
+
+// External functions (large set due to complexity)
+extern void hak_pool_init(void);
+extern int hak_pool_is_poolable(size_t size);
+extern int hak_pool_get_class_index(size_t size);
+extern int hak_pool_get_shard_index(uintptr_t site_id);
+extern void set_nonempty_bit(int class_idx, int shard);
+extern void clear_nonempty_bit(int class_idx, int shard);
+extern void mid_desc_adopt(void* block, int class_idx, uint64_t owner_tid);
+extern void* mf2_alloc_fast(int class_idx, size_t size, uintptr_t site_id);
+extern int choose_nonempty_shard(int class_idx, int shard_idx);
+extern void drain_remote_locked(int class_idx, int shard_idx);
+extern int is_shard_nonempty(int class_idx, int shard_idx);
+extern int refill_freelist(int class_idx, int shard_idx);
+
+// Note: The following functions/macros/types are assumed to be available from the
+// caller's compilation unit (hakmem_pool.c):
+// - PoolTLSPage, PoolTLSBin, FrozenPolicy (types from pool_tls_types.inc.h)
+// - mid_tc_has_items, mid_tc_drain_into_tls (from pool_mid_tc.inc.h)
+// - refill_tls_from_active_page, alloc_tls_page (from pool_tls_core.inc.h)
+// - hkm_policy_get (from hakmem_policy.h)
+// - hkm_prof_begin, hkm_prof_end (macros from hakmem_prof.h)
+
+// Assumed available from caller includes:
+// - AllocHeader, PoolBlock, PoolTLSRing, PoolTLSPage (from hakmem_internal.h / pool_tls_types.inc.h)
+// - g_pool, g_tls_bin, g_class_sizes, t_pool_rng, g_count_sample_exp (from hakmem_pool.c)
+// - g_tls_ring_enabled, g_tls_active_page_a/b/c, g_tc_enabled, g_tc_drain_trigger
+// - g_mf2_enabled, g_wrap_l2_enabled, g_trylock_probes
+// - HEADER_SIZE, POOL_L2_RING_CAP, POOL_NUM_SHARDS, POOL_MIN_SIZE, POOL_MAX_SIZE
+// - HKM_TIME_START, HKM_TIME_END, HKM_CAT_*, HKP_* macros
+
+// ============================================================================
+// Pool V2 Alloc Implementation (with hotbox_v2, MF2, TC drain, TLS support)
+// ============================================================================
+static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) {
+    // Debug: IMMEDIATE output to verify function is called
+    static int first_call = 1;
+    if (__builtin_expect(first_call, 0)) {
+        HAKMEM_LOG("[Pool] hak_pool_try_alloc FIRST CALL EVER!\n");
+        first_call = 0;
+    }
+
+    if (__builtin_expect(size == 40960, 0)) {
+        HAKMEM_LOG("[Pool] hak_pool_try_alloc called with 40KB (Bridge class 5)\n");
+    }
+
+    hak_pool_init();  // pthread_once() ensures thread-safe init (no data race!)
+
+    // Debug for 33-41KB allocations
+    if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
+        HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size);
+    }
+
+    // P1.7 guard: allow pool by default even when called from wrappers.
+    // Only block if explicitly disabled via env or during nested recursion.
+    extern int hak_in_wrapper(void);
+    extern __thread int g_hakmem_lock_depth;
+    int in_wrapper = hak_in_wrapper();
+    if (in_wrapper && g_hakmem_lock_depth > 1) {
+        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
+            HAKMEM_LOG("[Pool]   REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth);
+        }
+        return NULL;
+    }
+    if (in_wrapper && !g_wrap_l2_enabled) {
+        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
+            HAKMEM_LOG("[Pool]   REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled);
+        }
+        return NULL;
+    }
+    if (!hak_pool_is_poolable(size)) {
+        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
+            HAKMEM_LOG("[Pool]   REJECTED: not poolable (min=%d, max=%d)\n", POOL_MIN_SIZE, POOL_MAX_SIZE);
+        }
+        return NULL;
+    }
+
+    // Get class and shard indices
+    int class_idx = hak_pool_get_class_index(size);
+    if (class_idx < 0) {
+        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
+            HAKMEM_LOG("[Pool]   REJECTED: class_idx=%d (size=%zu not mapped)\n", class_idx, size);
+        }
+        return NULL;
+    }
+
+    // Experimental PoolHotBox v2 (Hot path) — currently structure only.
+    if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
+        void* p = pool_hotbox_v2_alloc((uint32_t)class_idx, size, site_id);
+        if (p) return p;
+        pool_hotbox_v2_record_alloc_fallback((uint32_t)class_idx);
+    }
+
+    if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
+        HAKMEM_LOG("[Pool]   ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx);
+    }
+
+    // MF2: Per-Page Sharding path
+    if (g_mf2_enabled) {
+        return mf2_alloc_fast(class_idx, size, site_id);
+    }
+
+    // OLD PATH: TLS fast path (ring then local LIFO); drain TC only when needed
+    PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
+    if (g_tc_enabled && ring->top < g_tc_drain_trigger && mid_tc_has_items(class_idx)) {
+        HKM_TIME_START(t_tc_drain);
+        if (mid_tc_drain_into_tls(class_idx, ring, &g_tls_bin[class_idx])) {
+            HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain);
+            if (ring->top > 0) {
+                HKM_TIME_START(t_ring_pop0);
+                PoolBlock* tlsb = ring->items[--ring->top];
+                HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop0);
+                return hak_pool_block_to_user(tlsb, class_idx, site_id);
+            }
+        } else { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); }
+    }
+    if (g_tls_ring_enabled) {
+        if (ring->top == 0) {
+            atomic_fetch_add_explicit(&g_pool.ring_underflow, 1, memory_order_relaxed);
+        }
+        if (ring->top > 0) {
+            HKM_TIME_START(t_ring_pop1);
+            PoolBlock* tlsb = ring->items[--ring->top];
+            HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop1);
+            return hak_pool_block_to_user(tlsb, class_idx, site_id);
+        }
+    }
+    if (g_tls_bin[class_idx].lo_head) {
+        HKM_TIME_START(t_lifo_pop0);
+        PoolBlock* b = g_tls_bin[class_idx].lo_head;
+        g_tls_bin[class_idx].lo_head = b->next;
+        if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
+        HKM_TIME_END(HKM_CAT_POOL_TLS_LIFO_POP, t_lifo_pop0);
+        return hak_pool_block_to_user(b, class_idx, site_id);
+    }
+
+    // Compute shard only when we need to access shared structures
+    int shard_idx = hak_pool_get_shard_index(site_id);
+
+    // Try to batch-pop from a non-empty shard using trylock to fill TLS ring
+    if (g_tls_ring_enabled) {
+        int s0 = choose_nonempty_shard(class_idx, shard_idx);
+        for (int probe = 0; probe < g_trylock_probes; ++probe) {
+            int s = (s0 + probe) & (POOL_NUM_SHARDS - 1);
+            pthread_mutex_t* l = &g_pool.freelist_locks[class_idx][s].m;
+            atomic_fetch_add_explicit(&g_pool.trylock_attempts, 1, memory_order_relaxed);
+            if (pthread_mutex_trylock(l) == 0) {
+                atomic_fetch_add_explicit(&g_pool.trylock_success, 1, memory_order_relaxed);
+                // First, drain any remote frees into freelist
+                if (atomic_load_explicit(&g_pool.remote_count[class_idx][s], memory_order_relaxed) != 0) {
+                    drain_remote_locked(class_idx, s);
+                }
+                PoolBlock* head = g_pool.freelist[class_idx][s];
+                int to_ring = POOL_L2_RING_CAP - ring->top; if (to_ring < 0) to_ring = 0;
+                while (head && to_ring-- > 0) { PoolBlock* nxt = head->next; ring->items[ring->top++] = head; head = nxt; }
+                while (head) { PoolBlock* nxt = head->next; head->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = head; g_tls_bin[class_idx].lo_count++; head = nxt; }
+                g_pool.freelist[class_idx][s] = head;
+                if (!head) clear_nonempty_bit(class_idx, s);
+                pthread_mutex_unlock(l);
+                if (ring->top > 0) {
+                    PoolBlock* tlsb = ring->items[--ring->top];
+                    return hak_pool_block_to_user(tlsb, class_idx, site_id);
+                }
+            }
+        }
+    }
+
+    // Try TLS active pages (owner-only local bump-run, up to 3)
+    PoolTLSPage* ap = NULL;
+    if (g_tls_active_page_a[class_idx].page && g_tls_active_page_a[class_idx].count > 0 && g_tls_active_page_a[class_idx].bump < g_tls_active_page_a[class_idx].end) ap = &g_tls_active_page_a[class_idx];
+    else if (g_tls_active_page_b[class_idx].page && g_tls_active_page_b[class_idx].count > 0 && g_tls_active_page_b[class_idx].bump < g_tls_active_page_b[class_idx].end) ap = &g_tls_active_page_b[class_idx];
+    else if (g_tls_active_page_c[class_idx].page && g_tls_active_page_c[class_idx].count > 0 && g_tls_active_page_c[class_idx].bump < g_tls_active_page_c[class_idx].end) ap = &g_tls_active_page_c[class_idx];
+    if (ap) {
+        if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
+            int need = POOL_L2_RING_CAP - ring->top;
+            (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need);
+        }
+        PoolBlock* b = NULL;
+        if (ring->top > 0) { b = ring->items[--ring->top]; }
+        else if (ap->page && ap->count > 0 && ap->bump < ap->end) {
+            b = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count<=0){ ap->page=NULL; ap->count=0; }
+        }
+        if (b) {
+            g_pool.hits[class_idx]++;
+            return hak_pool_block_to_user(b, class_idx, site_id);
+        }
+    }
+
+    // Lock the shard freelist for this (class, shard)
+    pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx].m;
+    HKM_TIME_START(t_lock);
+    struct timespec ts_lk1; int lk1 = hkm_prof_begin(&ts_lk1);
+    (void)ts_lk1; (void)lk1;  // Unused profiling variables
+    pthread_mutex_lock(lock);
+    HKM_TIME_END(HKM_CAT_POOL_LOCK, t_lock);
+    hkm_prof_end(lk1, HKP_POOL_LOCK, &ts_lk1);
+
+    // Try to pop from freelist
+    PoolBlock* block = g_pool.freelist[class_idx][shard_idx];
+
+    if (!block) {
+        // Before refilling, try draining remote stack and simple shard steal
+        int stole = 0;
+        const FrozenPolicy* pol = hkm_policy_get();
+        if (pol) {
+            uint16_t cap = 0;
+            if (class_idx < 5) cap = pol->mid_cap[class_idx];
+            else if (class_idx == 5 && pol->mid_dyn1_bytes != 0) cap = pol->mid_cap_dyn1;
+            else if (class_idx == 6 && pol->mid_dyn2_bytes != 0) cap = pol->mid_cap_dyn2;
+            // Drain remotes
+            if (atomic_load_explicit(&g_pool.remote_count[class_idx][shard_idx], memory_order_relaxed) != 0) {
+                drain_remote_locked(class_idx, shard_idx);
+                block = g_pool.freelist[class_idx][shard_idx];
+            }
+            // Light shard steal when over cap
+            if (!block && cap > 0 && g_pool.pages_by_class[class_idx] >= cap) {
+                HKM_TIME_START(t_steal);
+                for (int d = 1; d <= 4 && !stole; d++) {
+                    int s1 = (shard_idx + d) & (POOL_NUM_SHARDS - 1);
+                    int s2 = (shard_idx - d) & (POOL_NUM_SHARDS - 1);
+                    if (is_shard_nonempty(class_idx, s1)) {
+                        pthread_mutex_t* l2 = &g_pool.freelist_locks[class_idx][s1].m;
+                        pthread_mutex_lock(l2);
+                        PoolBlock* b2 = g_pool.freelist[class_idx][s1];
+                        if (b2) {
+                            g_pool.freelist[class_idx][s1] = b2->next;
+                            if (!g_pool.freelist[class_idx][s1]) clear_nonempty_bit(class_idx, s1);
+                            block = b2; stole = 1;
+                        }
+                        pthread_mutex_unlock(l2);
+                    }
+                    if (!stole && is_shard_nonempty(class_idx, s2)) {
+                        pthread_mutex_t* l3 = &g_pool.freelist_locks[class_idx][s2].m;
+                        pthread_mutex_lock(l3);
+                        PoolBlock* b3 = g_pool.freelist[class_idx][s2];
+                        if (b3) {
+                            g_pool.freelist[class_idx][s2] = b3->next;
+                            if (!g_pool.freelist[class_idx][s2]) clear_nonempty_bit(class_idx, s2);
+                            block = b3; stole = 1;
+                        }
+                        pthread_mutex_unlock(l3);
+                    }
+                }
+                HKM_TIME_END(HKM_CAT_SHARD_STEAL, t_steal);
+            }
+        }
+
+        if (!stole && !block) {
+            // Freelist empty, refill page
+            PoolTLSPage* tap = NULL;
+            if (g_tls_active_page_a[class_idx].page == NULL || g_tls_active_page_a[class_idx].count == 0) tap = &g_tls_active_page_a[class_idx];
+            else if (g_tls_active_page_b[class_idx].page == NULL || g_tls_active_page_b[class_idx].count == 0) tap = &g_tls_active_page_b[class_idx];
+            else if (g_tls_active_page_c[class_idx].page == NULL || g_tls_active_page_c[class_idx].count == 0) tap = &g_tls_active_page_c[class_idx];
+            else tap = &g_tls_active_page_a[class_idx];
+            HKM_TIME_START(t_alloc_page);
+            if (alloc_tls_page(class_idx, tap)) {
+                HKM_TIME_END(HKM_CAT_POOL_ALLOC_TLS_PAGE, t_alloc_page);
+                pthread_mutex_unlock(lock);
+                // Top-up ring and return
+                ap = tap;
+                if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
+                    int need = POOL_L2_RING_CAP - ring->top;
+                    (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need);
+                }
+                PoolBlock* takeb = NULL;
+                if (ring->top > 0) {
+                    HKM_TIME_START(t_ring_pop2);
+                    takeb = ring->items[--ring->top];
+                    HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop2);
+                } else if (ap->page && ap->count > 0 && ap->bump < ap->end) {
+                    takeb = (PoolBlock*)(void*)ap->bump;
+                    ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]);
+                    ap->count--;
+                    if (ap->bump >= ap->end || ap->count == 0) {
+                        ap->page = NULL;
+                        ap->count = 0;
+                    }
+                }
+                return hak_pool_block_to_user(takeb, class_idx, site_id);
+            }
+            HKM_TIME_START(t_refill);
+            struct timespec ts_rf; int rf = hkm_prof_begin(&ts_rf);
+            (void)ts_rf; (void)rf;
+            int ok = refill_freelist(class_idx, shard_idx);
+            HKM_TIME_END(HKM_CAT_POOL_REFILL, t_refill);
+            hkm_prof_end(rf, HKP_POOL_REFILL, &ts_rf);
+            if (!ok) {
+                t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
+                if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.misses[class_idx]++;
+                pthread_mutex_unlock(lock);
+                return NULL;
+            }
+        }
+    }
+
+    // Pop block and adopt page
+    g_pool.freelist[class_idx][shard_idx] = block->next;
+    mid_desc_adopt(block, class_idx, (uint64_t)(uintptr_t)pthread_self());
+    if (g_pool.freelist[class_idx][shard_idx] == NULL) clear_nonempty_bit(class_idx, shard_idx);
+    pthread_mutex_unlock(lock);
+
+    // Store to TLS then pop
+    PoolBlock* take;
+    if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; take = ring->items[--ring->top]; }
+    else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++;
+           if (g_tls_ring_enabled && ring->top > 0) { take = ring->items[--ring->top]; }
+           else { take = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = take->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; } }
+
+    return hak_pool_block_to_user(take, class_idx, site_id);
+}
+
+#endif  // POOL_ALLOC_V2_BOX_H
diff --git a/core/box/pool_api.inc.h b/core/box/pool_api.inc.h
index 8b3c4dd1..461bb8b7 100644
--- a/core/box/pool_api.inc.h
+++ b/core/box/pool_api.inc.h
@@ -13,285 +13,9 @@
 #include "box/pool_block_to_user_box.h"  // Pool block to user pointer helpers
 #include "box/pool_free_v2_box.h"     // Pool v2 free implementation (with hotbox v2)
 #include "box/pool_alloc_v1_flat_box.h"  // Pool v1 flatten (TLS-only fast path)
+#include "box/pool_alloc_v2_box.h"    // Pool v2 alloc implementation (with hotbox v2)
 #include <stdint.h>
 
-static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) {
-    // Debug: IMMEDIATE output to verify function is called
-    static int first_call = 1;
-    if (__builtin_expect(first_call, 0)) {
-        HAKMEM_LOG("[Pool] hak_pool_try_alloc FIRST CALL EVER!\n");
-        first_call = 0;
-    }
-
-    if (__builtin_expect(size == 40960, 0)) {
-        HAKMEM_LOG("[Pool] hak_pool_try_alloc called with 40KB (Bridge class 5)\n");
-    }
-
-    hak_pool_init();  // pthread_once() ensures thread-safe init (no data race!)
-
-    // Debug for 33-41KB allocations
-    if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
-        HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size);
-    }
-
-    // P1.7 guard: allow pool by default even when called from wrappers.
-    // Only block if explicitly disabled via env or during nested recursion.
-    extern int hak_in_wrapper(void);
-    extern __thread int g_hakmem_lock_depth;
-    int in_wrapper = hak_in_wrapper();
-    if (in_wrapper && g_hakmem_lock_depth > 1) {
-        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
-            HAKMEM_LOG("[Pool]   REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth);
-        }
-        return NULL;
-    }
-    if (in_wrapper && !g_wrap_l2_enabled) {
-        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
-            HAKMEM_LOG("[Pool]   REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled);
-        }
-        return NULL;
-    }
-    if (!hak_pool_is_poolable(size)) {
-        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
-            HAKMEM_LOG("[Pool]   REJECTED: not poolable (min=%d, max=%d)\n", POOL_MIN_SIZE, POOL_MAX_SIZE);
-        }
-        return NULL;
-    }
-
-    // Get class and shard indices
-    int class_idx = hak_pool_get_class_index(size);
-    if (class_idx < 0) {
-        if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
-            HAKMEM_LOG("[Pool]   REJECTED: class_idx=%d (size=%zu not mapped)\n", class_idx, size);
-        }
-        return NULL;
-    }
-
-    // Experimental PoolHotBox v2 (Hot path) — currently structure only.
-    if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
-        void* p = pool_hotbox_v2_alloc((uint32_t)class_idx, size, site_id);
-        if (p) return p;
-        pool_hotbox_v2_record_alloc_fallback((uint32_t)class_idx);
-    }
-
-    if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
-        HAKMEM_LOG("[Pool]   ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx);
-    }
-
-    // MF2: Per-Page Sharding path
-    if (g_mf2_enabled) {
-        return mf2_alloc_fast(class_idx, size, site_id);
-    }
-
-    // OLD PATH: TLS fast path (ring then local LIFO); drain TC only when needed
-    PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
-    if (g_tc_enabled && ring->top < g_tc_drain_trigger && mid_tc_has_items(class_idx)) {
-        HKM_TIME_START(t_tc_drain);
-        if (mid_tc_drain_into_tls(class_idx, ring, &g_tls_bin[class_idx])) {
-            HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain);
-            if (ring->top > 0) {
-                HKM_TIME_START(t_ring_pop0);
-                PoolBlock* tlsb = ring->items[--ring->top];
-                HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop0);
-                return hak_pool_block_to_user(tlsb, class_idx, site_id);
-            }
-        } else { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); }
-    }
-    if (g_tls_ring_enabled) {
-        if (ring->top == 0) {
-            atomic_fetch_add_explicit(&g_pool.ring_underflow, 1, memory_order_relaxed);
-        }
-        if (ring->top > 0) {
-            HKM_TIME_START(t_ring_pop1);
-            PoolBlock* tlsb = ring->items[--ring->top];
-            HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop1);
-            return hak_pool_block_to_user(tlsb, class_idx, site_id);
-        }
-    }
-    if (g_tls_bin[class_idx].lo_head) {
-        HKM_TIME_START(t_lifo_pop0);
-        PoolBlock* b = g_tls_bin[class_idx].lo_head;
-        g_tls_bin[class_idx].lo_head = b->next;
-        if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
-        HKM_TIME_END(HKM_CAT_POOL_TLS_LIFO_POP, t_lifo_pop0);
-        return hak_pool_block_to_user(b, class_idx, site_id);
-    }
-
-    // Compute shard only when we need to access shared structures
-    int shard_idx = hak_pool_get_shard_index(site_id);
-
-    // Try to batch-pop from a non-empty shard using trylock to fill TLS ring
-    if (g_tls_ring_enabled) {
-        int s0 = choose_nonempty_shard(class_idx, shard_idx);
-        for (int probe = 0; probe < g_trylock_probes; ++probe) {
-            int s = (s0 + probe) & (POOL_NUM_SHARDS - 1);
-            pthread_mutex_t* l = &g_pool.freelist_locks[class_idx][s].m;
-            atomic_fetch_add_explicit(&g_pool.trylock_attempts, 1, memory_order_relaxed);
-            if (pthread_mutex_trylock(l) == 0) {
-                atomic_fetch_add_explicit(&g_pool.trylock_success, 1, memory_order_relaxed);
-                // First, drain any remote frees into freelist
-                if (atomic_load_explicit(&g_pool.remote_count[class_idx][s], memory_order_relaxed) != 0) {
-                    drain_remote_locked(class_idx, s);
-                }
-                PoolBlock* head = g_pool.freelist[class_idx][s];
-                int to_ring = POOL_L2_RING_CAP - ring->top; if (to_ring < 0) to_ring = 0;
-                while (head && to_ring-- > 0) { PoolBlock* nxt = head->next; ring->items[ring->top++] = head; head = nxt; }
-                while (head) { PoolBlock* nxt = head->next; head->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = head; g_tls_bin[class_idx].lo_count++; head = nxt; }
-                g_pool.freelist[class_idx][s] = head;
-                if (!head) clear_nonempty_bit(class_idx, s);
-                pthread_mutex_unlock(l);
-                if (ring->top > 0) {
-                    PoolBlock* tlsb = ring->items[--ring->top];
-                    return hak_pool_block_to_user(tlsb, class_idx, site_id);
-                }
-            }
-        }
-    }
-
-    // Try TLS active pages (owner-only local bump-run, up to 3)
-    PoolTLSPage* ap = NULL;
-    if (g_tls_active_page_a[class_idx].page && g_tls_active_page_a[class_idx].count > 0 && g_tls_active_page_a[class_idx].bump < g_tls_active_page_a[class_idx].end) ap = &g_tls_active_page_a[class_idx];
-    else if (g_tls_active_page_b[class_idx].page && g_tls_active_page_b[class_idx].count > 0 && g_tls_active_page_b[class_idx].bump < g_tls_active_page_b[class_idx].end) ap = &g_tls_active_page_b[class_idx];
-    else if (g_tls_active_page_c[class_idx].page && g_tls_active_page_c[class_idx].count > 0 && g_tls_active_page_c[class_idx].bump < g_tls_active_page_c[class_idx].end) ap = &g_tls_active_page_c[class_idx];
-    if (ap) {
-        if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
-            int need = POOL_L2_RING_CAP - ring->top;
-            (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need);
-        }
-        PoolBlock* b = NULL;
-        if (ring->top > 0) { b = ring->items[--ring->top]; }
-        else if (ap->page && ap->count > 0 && ap->bump < ap->end) {
-            b = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count<=0){ ap->page=NULL; ap->count=0; }
-        }
-        if (b) {
-            g_pool.hits[class_idx]++;
-            return hak_pool_block_to_user(b, class_idx, site_id);
-        }
-    }
-
-    // Lock the shard freelist for this (class, shard)
-    pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx].m;
-    HKM_TIME_START(t_lock);
-    struct timespec ts_lk1; int lk1 = hkm_prof_begin(&ts_lk1);
-    (void)ts_lk1; (void)lk1;  // Unused profiling variables
-    pthread_mutex_lock(lock);
-    HKM_TIME_END(HKM_CAT_POOL_LOCK, t_lock);
-    hkm_prof_end(lk1, HKP_POOL_LOCK, &ts_lk1);
-
-    // Try to pop from freelist
-    PoolBlock* block = g_pool.freelist[class_idx][shard_idx];
-
-    if (!block) {
-        // Before refilling, try draining remote stack and simple shard steal
-        int stole = 0;
-        const FrozenPolicy* pol = hkm_policy_get();
-        if (pol) {
-            uint16_t cap = 0;
-            if (class_idx < 5) cap = pol->mid_cap[class_idx];
-            else if (class_idx == 5 && pol->mid_dyn1_bytes != 0) cap = pol->mid_cap_dyn1;
-            else if (class_idx == 6 && pol->mid_dyn2_bytes != 0) cap = pol->mid_cap_dyn2;
-            // Drain remotes
-            if (atomic_load_explicit(&g_pool.remote_count[class_idx][shard_idx], memory_order_relaxed) != 0) {
-                drain_remote_locked(class_idx, shard_idx);
-                block = g_pool.freelist[class_idx][shard_idx];
-            }
-            // Light shard steal when over cap
-            if (!block && cap > 0 && g_pool.pages_by_class[class_idx] >= cap) {
-                HKM_TIME_START(t_steal);
-                for (int d = 1; d <= 4 && !stole; d++) {
-                    int s1 = (shard_idx + d) & (POOL_NUM_SHARDS - 1);
-                    int s2 = (shard_idx - d) & (POOL_NUM_SHARDS - 1);
-                    if (is_shard_nonempty(class_idx, s1)) {
-                        pthread_mutex_t* l2 = &g_pool.freelist_locks[class_idx][s1].m;
-                        pthread_mutex_lock(l2);
-                        PoolBlock* b2 = g_pool.freelist[class_idx][s1];
-                        if (b2) {
-                            g_pool.freelist[class_idx][s1] = b2->next;
-                            if (!g_pool.freelist[class_idx][s1]) clear_nonempty_bit(class_idx, s1);
-                            block = b2; stole = 1;
-                        }
-                        pthread_mutex_unlock(l2);
-                    }
-                    if (!stole && is_shard_nonempty(class_idx, s2)) {
-                        pthread_mutex_t* l3 = &g_pool.freelist_locks[class_idx][s2].m;
-                        pthread_mutex_lock(l3);
-                        PoolBlock* b3 = g_pool.freelist[class_idx][s2];
-                        if (b3) {
-                            g_pool.freelist[class_idx][s2] = b3->next;
-                            if (!g_pool.freelist[class_idx][s2]) clear_nonempty_bit(class_idx, s2);
-                            block = b3; stole = 1;
-                        }
-                        pthread_mutex_unlock(l3);
-                    }
-                }
-                HKM_TIME_END(HKM_CAT_SHARD_STEAL, t_steal);
-            }
-        }
-
-        if (!stole && !block) {
-            // Freelist empty, refill page
-            PoolTLSPage* tap = NULL;
-            if (g_tls_active_page_a[class_idx].page == NULL || g_tls_active_page_a[class_idx].count == 0) tap = &g_tls_active_page_a[class_idx];
-            else if (g_tls_active_page_b[class_idx].page == NULL || g_tls_active_page_b[class_idx].count == 0) tap = &g_tls_active_page_b[class_idx];
-            else if (g_tls_active_page_c[class_idx].page == NULL || g_tls_active_page_c[class_idx].count == 0) tap = &g_tls_active_page_c[class_idx];
-            else tap = &g_tls_active_page_a[class_idx];
-            HKM_TIME_START(t_alloc_page);
-            if (alloc_tls_page(class_idx, tap)) {
-                HKM_TIME_END(HKM_CAT_POOL_ALLOC_TLS_PAGE, t_alloc_page);
-                pthread_mutex_unlock(lock);
-                // Top-up ring and return
-                ap = tap;
-                if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
-                    int need = POOL_L2_RING_CAP - ring->top;
-                    (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need);
-                }
-                PoolBlock* takeb = NULL;
-                if (ring->top > 0) {
-                    HKM_TIME_START(t_ring_pop2);
-                    takeb = ring->items[--ring->top];
-                    HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop2);
-                } else if (ap->page && ap->count > 0 && ap->bump < ap->end) {
-                    takeb = (PoolBlock*)(void*)ap->bump;
-                    ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]);
-                    ap->count--;
-                    if (ap->bump >= ap->end || ap->count == 0) {
-                        ap->page = NULL;
-                        ap->count = 0;
-                    }
-                }
-                return hak_pool_block_to_user(takeb, class_idx, site_id);
-            }
-            HKM_TIME_START(t_refill);
-            struct timespec ts_rf; int rf = hkm_prof_begin(&ts_rf);
-            (void)ts_rf; (void)rf;
-            int ok = refill_freelist(class_idx, shard_idx);
-            HKM_TIME_END(HKM_CAT_POOL_REFILL, t_refill);
-            hkm_prof_end(rf, HKP_POOL_REFILL, &ts_rf);
-            if (!ok) {
-                t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
-                if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.misses[class_idx]++;
-                pthread_mutex_unlock(lock);
-                return NULL;
-            }
-        }
-    }
-
-    // Pop block and adopt page
-    g_pool.freelist[class_idx][shard_idx] = block->next;
-    mid_desc_adopt(block, class_idx, (uint64_t)(uintptr_t)pthread_self());
-    if (g_pool.freelist[class_idx][shard_idx] == NULL) clear_nonempty_bit(class_idx, shard_idx);
-    pthread_mutex_unlock(lock);
-
-    // Store to TLS then pop
-    PoolBlock* take;
-    if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; take = ring->items[--ring->top]; }
-    else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; 
-           if (g_tls_ring_enabled && ring->top > 0) { take = ring->items[--ring->top]; }
-           else { take = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = take->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; } }
-
-    return hak_pool_block_to_user(take, class_idx, site_id);
-}
-
 static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id) {
     // Debug: IMMEDIATE output to verify function is called
     static int first_call = 1;