diff --git a/core/tiny_fastcache.c b/core/tiny_fastcache.c
index 0d06a522..813201e8 100644
--- a/core/tiny_fastcache.c
+++ b/core/tiny_fastcache.c
@@ -14,6 +14,13 @@ __thread void* g_tiny_fast_cache[TINY_FAST_CLASS_COUNT];
 __thread uint32_t g_tiny_fast_count[TINY_FAST_CLASS_COUNT];
 __thread int g_tiny_fast_initialized = 0;
 
+// ========== Phase 6-7: Dual Free Lists (Phase 2) ==========
+// Inspired by mimalloc's local/remote split design
+// Separate alloc/free paths to reduce cache line bouncing
+
+__thread void* g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT];  // Free staging area
+__thread uint32_t g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT];  // Free count
+
 // ========== External References ==========
 
 // External references to existing Tiny infrastructure (from hakmem_tiny.c)
@@ -108,7 +115,12 @@ void tiny_fast_drain(int class_idx) {
 
     g_tiny_fast_drain_count++;
 
-    // Drain half of the cache to Magazine/SuperSlab
+    // ========================================================================
+    // Phase 6-7: Drain from free_head (Phase 2)
+    // Since frees go to free_head, drain from there when capacity exceeded
+    // ========================================================================
+
+    // Drain half of the free_head to Magazine/SuperSlab
     // TODO: For now, we just reduce the count limit
     // In a full implementation, we'd push blocks back to Magazine freelist
 
@@ -116,12 +128,12 @@ void tiny_fast_drain(int class_idx) {
     // A full implementation would return blocks to SuperSlab freelist
     uint32_t target = TINY_FAST_CACHE_CAP / 2;
 
-    while (g_tiny_fast_count[class_idx] > target) {
-        void* ptr = g_tiny_fast_cache[class_idx];
+    while (g_tiny_fast_free_count[class_idx] > target) {
+        void* ptr = g_tiny_fast_free_head[class_idx];
         if (!ptr) break;
 
-        g_tiny_fast_cache[class_idx] = *(void**)ptr;
-        g_tiny_fast_count[class_idx]--;
+        g_tiny_fast_free_head[class_idx] = *(void**)ptr;
+        g_tiny_fast_free_count[class_idx]--;
 
         // TODO: Return to Magazine/SuperSlab
         // For now, we'll just re-push it (no-op, but prevents loss)
diff --git a/core/tiny_fastcache.h b/core/tiny_fastcache.h
index 24970398..8e21768a 100644
--- a/core/tiny_fastcache.h
+++ b/core/tiny_fastcache.h
@@ -36,6 +36,12 @@ extern __thread uint32_t g_tiny_fast_count[TINY_FAST_CLASS_COUNT];
 // Initialized flag
 extern __thread int g_tiny_fast_initialized;
 
+// ========== Phase 6-7: Dual Free Lists (Phase 2) ==========
+// Separate free staging area to reduce cache line bouncing
+
+extern __thread void* g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT];
+extern __thread uint32_t g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT];
+
 // ========== Size to Class Mapping ==========
 // Inline size-to-class for fast path (O(1) lookup table)
 
@@ -89,7 +95,7 @@ static inline void* tiny_fast_alloc(size_t size) {
     int cls = tiny_fast_size_to_class(size);
     if (__builtin_expect(cls < 0, 0)) return NULL;  // Not tiny (rare)
 
-    // Step 2: Pop from TLS cache (2-3 instructions)
+    // Step 2: Pop from alloc_head (hot allocation path)
     void* ptr = g_tiny_fast_cache[cls];
     if (__builtin_expect(ptr != NULL, 1)) {
         // Fast path: Pop head, decrement count
@@ -98,6 +104,25 @@ static inline void* tiny_fast_alloc(size_t size) {
         return ptr;
     }
 
+    // ========================================================================
+    // Phase 6-7: Step 2.5: Lazy Migration from free_head (Phase 2)
+    // If alloc_head empty but free_head has blocks, migrate with pointer swap
+    // This is mimalloc's key optimization: batched migration, zero overhead
+    // ========================================================================
+    if (__builtin_expect(g_tiny_fast_free_head[cls] != NULL, 0)) {
+        // Migrate entire free_head → alloc_head (pointer swap, instant!)
+        g_tiny_fast_cache[cls] = g_tiny_fast_free_head[cls];
+        g_tiny_fast_count[cls] = g_tiny_fast_free_count[cls];
+        g_tiny_fast_free_head[cls] = NULL;
+        g_tiny_fast_free_count[cls] = 0;
+
+        // Now pop one from newly migrated list
+        ptr = g_tiny_fast_cache[cls];
+        g_tiny_fast_cache[cls] = *(void**)ptr;
+        g_tiny_fast_count[cls]--;
+        return ptr;
+    }
+
     // Step 3: Slow path - refill from Magazine/SuperSlab
     return tiny_fast_refill(cls);
 }
@@ -109,16 +134,22 @@ static inline void tiny_fast_free(void* ptr, size_t size) {
     int cls = tiny_fast_size_to_class(size);
     if (__builtin_expect(cls < 0, 0)) return;  // Not tiny (error)
 
-    // Step 2: Check capacity
-    if (__builtin_expect(g_tiny_fast_count[cls] >= TINY_FAST_CACHE_CAP, 0)) {
-        // Cache full - drain to Magazine/SuperSlab
+    // ========================================================================
+    // Phase 6-7: Push to free_head (Phase 2)
+    // Separate free staging area reduces cache line contention with alloc_head
+    // mimalloc's key insight: alloc/free touch different cache lines
+    // ========================================================================
+
+    // Step 2: Check free_head capacity
+    if (__builtin_expect(g_tiny_fast_free_count[cls] >= TINY_FAST_CACHE_CAP, 0)) {
+        // Free cache full - drain to Magazine/SuperSlab
         tiny_fast_drain(cls);
     }
 
-    // Step 3: Push to TLS cache (2 instructions)
-    *(void**)ptr = g_tiny_fast_cache[cls];
-    g_tiny_fast_cache[cls] = ptr;
-    g_tiny_fast_count[cls]++;
+    // Step 3: Push to free_head (separate cache line from alloc_head!)
+    *(void**)ptr = g_tiny_fast_free_head[cls];
+    g_tiny_fast_free_head[cls] = ptr;
+    g_tiny_fast_free_count[cls]++;
 }
 
 // ========== Initialization ==========
@@ -128,5 +159,10 @@ static inline void tiny_fast_init(void) {
 
     memset(g_tiny_fast_cache, 0, sizeof(g_tiny_fast_cache));
     memset(g_tiny_fast_count, 0, sizeof(g_tiny_fast_count));
+
+    // Phase 6-7: Initialize dual free lists (Phase 2)
+    memset(g_tiny_fast_free_head, 0, sizeof(g_tiny_fast_free_head));
+    memset(g_tiny_fast_free_count, 0, sizeof(g_tiny_fast_free_count));
+
     g_tiny_fast_initialized = 1;
 }