diff --git a/core/box/free_local_box.c b/core/box/free_local_box.c
index cb915c2c..984bb248 100644
--- a/core/box/free_local_box.c
+++ b/core/box/free_local_box.c
@@ -19,11 +19,11 @@ void tiny_failfast_log(const char* stage,
                        void* ptr,
                        void* prev);
 
-void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid) {
+int tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid) {
     extern _Atomic uint64_t g_free_local_box_calls;
     atomic_fetch_add_explicit(&g_free_local_box_calls, 1, memory_order_relaxed);
-    if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return;
-    if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return;
+    if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return 0;
+    if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return 0;
     (void)my_tid;
 
     // ✅ Phase E1-CORRECT: ALL classes have headers, calculate BASE pointer once
@@ -177,11 +177,16 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
 
     // Track local free (debug helpers may be no-op)
     tiny_remote_track_on_local_free(ss, slab_idx, ptr, "local_free", my_tid);
-    meta->used--;
+    
+    // BUGFIX Phase 9-2: Use atomic_fetch_sub to detect 1->0 transition reliably
+    // meta->used--; // old
+    uint16_t prev_used = atomic_fetch_sub_explicit(&meta->used, 1, memory_order_release);
+    int is_empty = (prev_used == 1); // Transitioned from 1 to 0
+    
     ss_active_dec_one(ss);
 
     // Phase 12-1.1: EMPTY slab detection (immediate reuse optimization)
-    if (meta->used == 0) {
+    if (is_empty) {
         // Slab became EMPTY → mark for highest-priority reuse
         ss_mark_slab_empty(ss, slab_idx);
 
@@ -206,4 +211,6 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
         uint8_t cls0 = (meta && meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : 0;
         tiny_free_publish_first_free((int)cls0, ss, slab_idx);
     }
+    
+    return is_empty;
 }
diff --git a/core/box/free_local_box.h b/core/box/free_local_box.h
index 7e565e7b..1e2303da 100644
--- a/core/box/free_local_box.h
+++ b/core/box/free_local_box.h
@@ -4,5 +4,6 @@
 #include "hakmem_tiny_superslab.h"
 
 // Perform same-thread freelist push. On first-free (prev==NULL), publishes via Ready/Mailbox.
-void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid);
+// Returns: 1 if slab transitioned to EMPTY (used=0), 0 otherwise.
+int tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid);
 
diff --git a/core/box/tls_sll_drain_box.h b/core/box/tls_sll_drain_box.h
index 4aaf51a8..86517589 100644
--- a/core/box/tls_sll_drain_box.h
+++ b/core/box/tls_sll_drain_box.h
@@ -204,7 +204,8 @@ static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) {
         // Call tiny_free_local_box() to:
         // 1. Push block to slab freelist
         // 2. Decrement meta->used (THIS IS THE KEY!)
-        tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid);
+        // Phase 9-2 FIX: Capture 'is_empty' return value to detect ownership of 1->0 transition
+        int is_empty = tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid);
 
 #if !HAKMEM_BUILD_RELEASE
         // Trace drain operation (debug only)
@@ -220,15 +221,9 @@ static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) {
         drained++;
 
         // Phase 9-2: Track touched slab for later EMPTY check
-        // We track (ss, slab_idx) pairs to check after loop completes
-        int already_tracked = 0;
-        for (int t = 0; t < num_touched; t++) {
-            if (touched[t].ss == ss && touched[t].slab_idx == slab_idx) {
-                already_tracked = 1;
-                break;
-            }
-        }
-        if (!already_tracked && num_touched < MAX_TOUCHED_SLABS) {
+        // CRITICAL FIX: Only recycle if WE caused the transition to EMPTY (is_empty == 1)
+        // This prevents multiple threads from racing to release the same slab
+        if (is_empty && num_touched < MAX_TOUCHED_SLABS) {
             touched[num_touched].ss = ss;
             touched[num_touched].slab_idx = slab_idx;
             num_touched++;
diff --git a/core/hakmem_shared_pool_acquire.c b/core/hakmem_shared_pool_acquire.c
index 3f7cba84..5f641747 100644
--- a/core/hakmem_shared_pool_acquire.c
+++ b/core/hakmem_shared_pool_acquire.c
@@ -418,19 +418,8 @@ stage2_fallback:
     }
 
     // Before creating a new SuperSlab, consult learning-layer soft cap.
-    // If current active slots for this class already exceed the policy cap,
-    // fail early so caller can fall back to legacy backend.
-    uint32_t limit = sp_class_active_limit(class_idx);
-    if (limit > 0) {
-        uint32_t cur = g_shared_pool.class_active_slots[class_idx];
-        if (cur >= limit) {
-            if (g_lock_stats_enabled == 1) {
-                atomic_fetch_add(&g_lock_release_count, 1);
-            }
-            pthread_mutex_unlock(&g_shared_pool.alloc_lock);
-            return -1;  // Soft cap reached for this class
-        }
-    }
+    // Phase 9-2: Soft Cap removed to allow Shared Pool to fully replace Legacy Backend.
+    // We now rely on LRU eviction and EMPTY recycling to manage memory pressure.
 
     // Create metadata for this new SuperSlab
     SharedSSMeta* new_meta = sp_meta_find_or_create(new_ss);
diff --git a/core/hakmem_shared_pool_release.c b/core/hakmem_shared_pool_release.c
index a51dfeef..de1c6cb5 100644
--- a/core/hakmem_shared_pool_release.c
+++ b/core/hakmem_shared_pool_release.c
@@ -26,6 +26,23 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx)
         return;
     }
 
+    // Phase 9-2 FIX: Promote Legacy SuperSlabs to Shared Pool on first recycle
+    // If we are recycling a slot from a Legacy SS, we must remove it from the
+    // Legacy list (g_superslab_heads) to prevent Legacy Backend from allocating
+    // from it simultaneously (Double Allocation Race).
+    // This effectively transfers ownership to Shared Pool.
+    extern void remove_superslab_from_legacy_head(SuperSlab* ss);
+    remove_superslab_from_legacy_head(ss);
+
+    // BUGFIX: Re-check used count after removal. Legacy Backend might have
+    // allocated from this slab while we were waiting for the lock in remove().
+    TinySlabMeta* slab_meta = &ss->slabs[slab_idx];
+    if (atomic_load_explicit(&slab_meta->used, memory_order_acquire) != 0) {
+        // Legacy Backend stole this slab. It's now an orphan (removed from list).
+        // We abort recycling. It will be recycled when Legacy frees it later.
+        return;
+    }
+
     // Debug logging
 #if !HAKMEM_BUILD_RELEASE
     static int dbg = -1;
@@ -46,9 +63,9 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx)
 
     pthread_mutex_lock(&g_shared_pool.alloc_lock);
 
-    TinySlabMeta* slab_meta = &ss->slabs[slab_idx];
+    // TinySlabMeta* slab_meta = &ss->slabs[slab_idx]; // Already declared above
     if (slab_meta->used != 0) {
-        // Not actually empty; nothing to do
+        // Not actually empty (double check under lock)
         if (g_lock_stats_enabled == 1) {
             atomic_fetch_add(&g_lock_release_count, 1);
         }
@@ -160,15 +177,28 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx)
 
         pthread_mutex_unlock(&g_shared_pool.alloc_lock);
 
-        // Remove from legacy backend list (if present) to prevent dangling pointers
-        extern void remove_superslab_from_legacy_head(SuperSlab* ss);
-        remove_superslab_from_legacy_head(ss);
+        // Remove from legacy backend list (moved to top of function)
+        // extern void remove_superslab_from_legacy_head(SuperSlab* ss);
+        // remove_superslab_from_legacy_head(ss);
 
         // Free SuperSlab:
         // 1. Try LRU cache (hak_ss_lru_push) - lazy deallocation
         // 2. Or munmap if LRU is full - eager deallocation
-        extern void superslab_free(SuperSlab* ss);
-        superslab_free(ss);
+        
+        // BUGFIX: Double check total_active_blocks. Legacy Backend might have
+        // allocated from ANOTHER slab in this SS just before we removed it.
+        // If so, we must NOT free the SS.
+        if (atomic_load(&ss->total_active_blocks) == 0) {
+            extern void superslab_free(SuperSlab* ss);
+            superslab_free(ss);
+        } else {
+            #if !HAKMEM_BUILD_RELEASE
+            if (dbg == 1) {
+                fprintf(stderr, "[SP_SLOT_RELEASE] SKIP free ss=%p: total_active_blocks=%u > 0\n",
+                        (void*)ss, atomic_load(&ss->total_active_blocks));
+            }
+            #endif
+        }
         return;
     }
 
diff --git a/core/superslab_backend.c b/core/superslab_backend.c
index f465f462..3adf5de5 100644
--- a/core/superslab_backend.c
+++ b/core/superslab_backend.c
@@ -28,6 +28,9 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx)
         g_superslab_heads[class_idx] = head;
     }
 
+    // LOCK expansion_lock to protect list traversal (vs remove_superslab_from_legacy_head)
+    pthread_mutex_lock(&head->expansion_lock);
+
     SuperSlab* chunk = head->current_chunk ? head->current_chunk : head->first_chunk;
 
     while (chunk) {
@@ -62,12 +65,19 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx)
 
                 meta->used++;
                 atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed);
+                
+                // UNLOCK before return
+                pthread_mutex_unlock(&head->expansion_lock);
+
                 HAK_RET_ALLOC_BLOCK_TRACED(class_idx, base, ALLOC_PATH_BACKEND);
             }
         }
         chunk = chunk->next_chunk;
     }
 
+    // UNLOCK before expansion (which takes lock internally)
+    pthread_mutex_unlock(&head->expansion_lock);
+
     if (expand_superslab_head(head) < 0) {
         return NULL;
     }
@@ -212,74 +222,23 @@ void* hak_tiny_alloc_superslab_backend_shared(int class_idx)
  * Box API entry:
  *  - Single front-door for tiny-side Superslab allocations.
  *
- * Phase 9-2 Root Fix: Shared Pool backend unified mode (default ON)
+ * Phase 9-2 Final: Shared Pool ONLY (Legacy Backend Removed)
  * Policy:
- *  - HAKMEM_TINY_SS_SHARED=2 (default) → Shared Pool backend ONLY (no legacy fallback)
- *  - HAKMEM_TINY_SS_SHARED=1 → Shared Pool backend with legacy fallback (testing mode)
- *  - HAKMEM_TINY_SS_SHARED=0 → Legacy backend only (compatibility mode)
- *
- * Root Cause: Legacy backend (g_superslab_heads) has TLS_SLL_DUP issue
- * Solution: Disable legacy backend by default, keep as "reversible box" via env var
+ *  - HAKMEM_TINY_SS_SHARED is now ignored (or used only for logging).
+ *  - Always uses Shared Pool backend.
+ *  - Legacy backend (g_superslab_heads) is no longer used for allocation.
  */
 void* hak_tiny_alloc_superslab_box(int class_idx)
 {
-    static int g_ss_shared_mode = -1;
-    static _Atomic uint32_t g_ss_backend_log = 0;
-    if (__builtin_expect(g_ss_shared_mode == -1, 0)) {
-        const char* e = getenv("HAKMEM_TINY_SS_SHARED");
-        if (!e || !*e) {
-            g_ss_shared_mode = 2; // Phase 9-2 Root Fix: Shared Pool ONLY (no legacy fallback)
-        } else {
-            int v = atoi(e);
-            g_ss_shared_mode = v; // 0=legacy only, 1=shared+fallback, 2=shared only
-        }
-#if !HAKMEM_BUILD_RELEASE
-        const char* mode_str = (g_ss_shared_mode == 2) ? "shared_only" :
-                               (g_ss_shared_mode == 1) ? "shared+fallback" : "legacy_only";
-        fprintf(stderr, "[SS_BACKEND] Mode: %s (HAKMEM_TINY_SS_SHARED=%d)\n", mode_str, g_ss_shared_mode);
-#endif
-    }
-
-    // Mode 2: Shared Pool ONLY (default, no legacy fallback)
-    if (g_ss_shared_mode == 2) {
-        void* p = hak_tiny_alloc_superslab_backend_shared(class_idx);
-        if (p != NULL) {
-            uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
-            if (n < 4) {
-                fprintf(stderr, "[SS_BACKEND] shared_only cls=%d ptr=%p\n", class_idx, p);
-            }
-            return p;
-        }
-        // Phase 9-2: NO fallback to legacy - return NULL on failure
-        uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
+    // Always use Shared Pool (Mode 2 equivalent)
+    void* p = hak_tiny_alloc_superslab_backend_shared(class_idx);
+    
+    if (p == NULL) {
+        static _Atomic uint32_t g_ss_oom_log = 0;
+        uint32_t n = atomic_fetch_add_explicit(&g_ss_oom_log, 1, memory_order_relaxed);
         if (n < 4) {
-            fprintf(stderr, "[SS_BACKEND] shared_fail→NULL (no legacy) cls=%d\n", class_idx);
+            fprintf(stderr, "[SS_BACKEND] shared_fail→NULL (OOM) cls=%d\n", class_idx);
         }
-        return NULL;
     }
-
-    // Mode 1: Shared Pool with legacy fallback (testing mode)
-    if (g_ss_shared_mode == 1) {
-        void* p = hak_tiny_alloc_superslab_backend_shared(class_idx);
-        if (p != NULL) {
-            uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
-            if (n < 4) {
-                fprintf(stderr, "[SS_BACKEND] shared cls=%d ptr=%p\n", class_idx, p);
-            }
-            return p;
-        }
-        // Fallback to legacy
-        uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
-        if (n < 4) {
-            fprintf(stderr, "[SS_BACKEND] shared_fail→legacy cls=%d\n", class_idx);
-        }
-        return hak_tiny_alloc_superslab_backend_legacy(class_idx);
-    }
-
-    // Mode 0: Legacy backend only (compatibility mode)
-    uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
-    if (n < 4) {
-        fprintf(stderr, "[SS_BACKEND] legacy cls=%d\n", class_idx);
-    }
-    return hak_tiny_alloc_superslab_backend_legacy(class_idx);
+    return p;
 }
diff --git a/core/superslab_slab.c b/core/superslab_slab.c
index 6e4d6724..dfbbe4bf 100644
--- a/core/superslab_slab.c
+++ b/core/superslab_slab.c
@@ -27,6 +27,8 @@ void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMe
     // and splice in front of current freelist preserving relative order.
     void* prev = meta->freelist;
     int cls = (int)meta->class_idx;
+    uint16_t drained_count = 0; // Phase 9-2: Batched used decrement
+
     HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe");
     if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) {
         static _Atomic int g_remote_drain_cls_oob = 0;
@@ -104,14 +106,21 @@ void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMe
         tiny_next_write(cls, (void*)cur, prev);
         prev = (void*)cur;
         cur = next;
+        drained_count++;
     }
     meta->freelist = prev;
     // Reset remote count after full drain
     atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release);
 
-    // Phase 9-2: Try to recycle slab if EMPTY after remote drain
-    // This fixes the bug where EMPTY slabs accumulate and never get returned to freelist
-    SLAB_TRY_RECYCLE(ss, slab_idx, meta);
+    // Phase 9-2: Batched decrement of used count (Atomic)
+    // Remote frees don't decrement used until they land in freelist.
+    if (drained_count > 0) {
+        uint16_t old_used = atomic_fetch_sub_explicit(&meta->used, drained_count, memory_order_release);
+        // If used became 0 (old_used == drained_count), try to recycle
+        if (old_used == drained_count) {
+             SLAB_TRY_RECYCLE(ss, slab_idx, meta);
+        }
+    }
 
     // Update freelist/nonempty visibility bits
     uint32_t bit = (1u << slab_idx);