diff --git a/core/box/ss_allocation_box.c b/core/box/ss_allocation_box.c
index 39d45718..985ec9a4 100644
--- a/core/box/ss_allocation_box.c
+++ b/core/box/ss_allocation_box.c
@@ -430,6 +430,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
     meta->freelist = NULL;          // NULL = linear allocation mode
     meta->used = 0;
     meta->active = 0;               // P1.3: blocks in use by user (starts at 0)
+    meta->tls_cached = 0;           // P2.2: blocks cached in TLS SLL (starts at 0)
     meta->capacity = capacity;
     meta->carved = 0;
     // Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs)
diff --git a/core/box/ss_hot_cold_box.h b/core/box/ss_hot_cold_box.h
index 6d73c059..a63cd88a 100644
--- a/core/box/ss_hot_cold_box.h
+++ b/core/box/ss_hot_cold_box.h
@@ -9,6 +9,7 @@
 #include "../superslab/superslab_types.h"
 #include <stdbool.h>
 #include <stdlib.h>  // P1.3: for getenv()
+#include <stdio.h>   // P2.4: for fprintf() in debug output
 
 // ============================================================================
 // Phase 3d-C: Hot/Cold Split Box API
@@ -154,4 +155,99 @@ static inline void ss_init_hot_cold(SuperSlab* ss) {
     }
 }
 
+// ============================================================================
+// P2.4: Invariant Verification for Debug Builds
+// ============================================================================
+//
+// Invariant: active + tls_cached ≈ used
+//
+// - active: blocks currently held by user code
+// - tls_cached: blocks cached in TLS SLL (returned by user, not yet pushed to slab freelist)
+// - used: total blocks carved from slab and distributed
+//
+// Due to concurrent updates, exact equality is not guaranteed.
+// We allow a small tolerance (delta) for race conditions.
+//
+// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable (disabled by default)
+// ============================================================================
+
+// P2.4: Verify slab invariant: active + tls_cached ≈ used
+// Returns: true if invariant holds within tolerance, false if violated
+// tolerance: maximum allowed deviation (default: 2 for TLS lag)
+static inline bool ss_verify_slab_invariant(const TinySlabMeta* meta, int tolerance) {
+    if (!meta || meta->capacity == 0) return true;  // Skip uninitialized slabs
+
+    uint16_t used = atomic_load_explicit(&meta->used, memory_order_relaxed);
+    uint16_t active = atomic_load_explicit(&meta->active, memory_order_relaxed);
+    uint16_t tls_cached = atomic_load_explicit(&meta->tls_cached, memory_order_relaxed);
+
+    int sum = (int)active + (int)tls_cached;
+    int diff = sum - (int)used;
+    if (diff < 0) diff = -diff;  // abs(diff)
+
+    return (diff <= tolerance);
+}
+
+// P2.4: Verify all slab invariants in a SuperSlab
+// Returns: count of slabs that violate the invariant
+// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable checking
+static inline int ss_verify_superslab_invariants(const SuperSlab* ss, int tolerance) {
+    static int g_invariant_check = -1;
+    if (__builtin_expect(g_invariant_check == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_INVARIANT_CHECK");
+        g_invariant_check = (e && *e && *e != '0') ? 1 : 0;
+    }
+
+    if (!g_invariant_check) return 0;  // Disabled by ENV
+    if (!ss) return 0;
+
+    int violations = 0;
+    uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
+    if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
+        max_slabs = SLABS_PER_SUPERSLAB_MAX;
+    }
+
+    for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
+        const TinySlabMeta* meta = &ss->slabs[i];
+        if (!ss_verify_slab_invariant(meta, tolerance)) {
+            violations++;
+#ifndef NDEBUG
+            // Debug output for violations
+            fprintf(stderr, "[P2.4] Invariant VIOLATION: slab[%u] used=%u active=%u tls_cached=%u (sum=%u)\n",
+                    i, meta->used,
+                    atomic_load_explicit(&meta->active, memory_order_relaxed),
+                    atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
+                    atomic_load_explicit(&meta->active, memory_order_relaxed) +
+                    atomic_load_explicit(&meta->tls_cached, memory_order_relaxed));
+#endif
+        }
+    }
+
+    return violations;
+}
+
+// P2.4: Debug dump of slab state for troubleshooting
+// ENV: HAKMEM_TINY_INVARIANT_DUMP=1 to enable periodic dumps
+static inline void ss_dump_slab_state(const SuperSlab* ss, int slab_idx) {
+#ifndef NDEBUG
+    static int g_dump_enabled = -1;
+    if (__builtin_expect(g_dump_enabled == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_INVARIANT_DUMP");
+        g_dump_enabled = (e && *e && *e != '0') ? 1 : 0;
+    }
+    if (!g_dump_enabled) return;
+    if (!ss || slab_idx < 0 || slab_idx >= (int)ss->active_slabs) return;
+
+    const TinySlabMeta* meta = &ss->slabs[slab_idx];
+    fprintf(stderr, "[P2.4-DUMP] slab[%d]: used=%u active=%u tls_cached=%u capacity=%u class=%u\n",
+            slab_idx, meta->used,
+            atomic_load_explicit(&meta->active, memory_order_relaxed),
+            atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
+            meta->capacity, meta->class_idx);
+#else
+    (void)ss;
+    (void)slab_idx;
+#endif
+}
+
 #endif // SS_HOT_COLD_BOX_H
diff --git a/core/hakmem_tiny_superslab.c b/core/hakmem_tiny_superslab.c
index e5c6efb0..f2dd46f1 100644
--- a/core/hakmem_tiny_superslab.c
+++ b/core/hakmem_tiny_superslab.c
@@ -1222,6 +1222,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
     meta->freelist = NULL;          // NULL = linear allocation mode
     meta->used = 0;
     meta->active = 0;               // P1.3: blocks in use by user (starts at 0)
+    meta->tls_cached = 0;           // P2.2: blocks cached in TLS SLL (starts at 0)
     meta->capacity = capacity;
     meta->carved = 0;
     // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
diff --git a/core/superslab/superslab_types.h b/core/superslab/superslab_types.h
index 18dca0ad..d0659ed1 100644
--- a/core/superslab/superslab_types.h
+++ b/core/superslab/superslab_types.h
@@ -11,11 +11,13 @@
 typedef struct TinySlabMeta {
     _Atomic(void*) freelist; // NULL = bump-only, non-NULL = freelist head (ATOMIC for MT safety)
     _Atomic uint16_t used;   // blocks allocated from this slab's freelist (ATOMIC for MT safety)
-    _Atomic uint16_t active; // P1.3: blocks currently in use by user (used - tls_cached) (ATOMIC)
+    _Atomic uint16_t active; // P1.3: blocks currently in use by user (ATOMIC)
+    _Atomic uint16_t tls_cached; // P2.2: blocks cached in TLS SLL (ATOMIC)
     uint16_t capacity;       // total blocks this slab can hold
     uint8_t  class_idx;      // owning tiny class (Phase 12: per-slab)
     uint8_t  carved;         // carve/owner flags
     uint8_t  owner_tid_low;  // low 8 bits of owner TID (debug / locality)
+    // P2.2 Invariant: active + tls_cached == used (approximately, due to TLS locality)
 } TinySlabMeta;
 
 #define TINY_NUM_CLASSES_SS 8
diff --git a/core/tiny_alloc_fast.inc.h b/core/tiny_alloc_fast.inc.h
index 1623ee8a..44caa8a9 100644
--- a/core/tiny_alloc_fast.inc.h
+++ b/core/tiny_alloc_fast.inc.h
@@ -37,8 +37,9 @@
 #include <stdio.h>
 #include <stdatomic.h>
 
-// P1.3: Helper to increment meta->active when allocating from TLS SLL
+// P1.3/P2.2: Helper to track active/tls_cached when allocating from TLS SLL
 // ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
+// Flow: TLS SLL → User means active++, tls_cached--
 static inline void tiny_active_track_alloc(void* base) {
     static __thread int g_active_track = -1;
     if (__builtin_expect(g_active_track == -1, 0)) {
@@ -53,6 +54,7 @@ static inline void tiny_active_track_alloc(void* base) {
             if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
                 TinySlabMeta* meta = &ss->slabs[slab_idx];
                 atomic_fetch_add_explicit(&meta->active, 1, memory_order_relaxed);
+                atomic_fetch_sub_explicit(&meta->tls_cached, 1, memory_order_relaxed);  // P2.2
             }
         }
     }
diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h
index 1a6492cc..0a456b32 100644
--- a/core/tiny_free_fast_v2.inc.h
+++ b/core/tiny_free_fast_v2.inc.h
@@ -107,17 +107,18 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
     }
     #endif
 
-    // P1.2: Use class_map instead of Header to avoid Header/Next contention
-    // ENV: HAKMEM_TINY_USE_CLASS_MAP=1 to enable (default: 0 for compatibility)
+    // P2.1: Use class_map instead of Header to avoid Header/Next contention
+    // ENV: HAKMEM_TINY_NO_CLASS_MAP=1 to disable (default: ON - class_map is preferred)
     int class_idx = -1;
     {
         static __thread int g_use_class_map = -1;
         if (__builtin_expect(g_use_class_map == -1, 0)) {
-            const char* e = getenv("HAKMEM_TINY_USE_CLASS_MAP");
-            g_use_class_map = (e && *e && *e != '0') ? 1 : 0;
+            const char* e = getenv("HAKMEM_TINY_NO_CLASS_MAP");
+            // P2.1: Default is ON (use class_map), set HAKMEM_TINY_NO_CLASS_MAP=1 to disable
+            g_use_class_map = (e && *e && *e != '0') ? 0 : 1;
         }
 
-        if (__builtin_expect(g_use_class_map, 0)) {
+        if (__builtin_expect(g_use_class_map, 1)) {
             // P1.2: class_map path - avoid Header read
             SuperSlab* ss = ss_fast_lookup((uint8_t*)ptr - 1);
             if (ss && ss->magic == SUPERSLAB_MAGIC) {
@@ -144,7 +145,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
                 #endif
             }
         } else {
-            // Default: Header read (existing behavior)
+            // P2.1: Fallback to Header read (disabled class_map mode)
             class_idx = tiny_region_id_read_header(ptr);
             #if HAKMEM_DEBUG_VERBOSE
             if (atomic_load(&debug_calls) <= 5) {
@@ -329,8 +330,9 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
         return 0;
     }
 
-    // P1.3: Decrement meta->active when block is freed (user gives it back)
+    // P1.3/P2.2: Track active/tls_cached when block is freed (user gives it back)
     // ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
+    // Flow: User → TLS SLL means active--, tls_cached++
     {
         static __thread int g_active_track = -1;
         if (__builtin_expect(g_active_track == -1, 0)) {
@@ -345,6 +347,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
                 if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
                     TinySlabMeta* meta = &ss->slabs[slab_idx];
                     atomic_fetch_sub_explicit(&meta->active, 1, memory_order_relaxed);
+                    atomic_fetch_add_explicit(&meta->tls_cached, 1, memory_order_relaxed);  // P2.2
                 }
             }
         }
diff --git a/core/tiny_nextptr.h b/core/tiny_nextptr.h
index 77da054b..eb325a7e 100644
--- a/core/tiny_nextptr.h
+++ b/core/tiny_nextptr.h
@@ -34,6 +34,7 @@
 
 #include <stdint.h>
 #include <string.h>
+#include <stdlib.h>  // P2.3: for getenv()
 #include "hakmem_build_flags.h"
 #include "tiny_region_id.h"  // HEADER_MAGIC/HEADER_CLASS_MASK for header repair/logging
 #include "hakmem_super_registry.h"  // hak_super_lookup
@@ -74,20 +75,27 @@ static inline __attribute__((always_inline)) void* tiny_next_load(const void* ba
 }
 
 // Safe store of next pointer into a block base.
-// DESIGN RULE: "Header is written by BOTH Alloc and Free/Drain"
-// - Free/Drain paths: This function restores header for C0-C6 (offset 1), then writes Next pointer
-// - Alloc paths: Write header before returning block to user (HAK_RET_ALLOC)
-// - C7 (offset 0): Header is overwritten by next pointer, so no restoration needed
+// P2.3: Header restoration is now conditional (default: skip when class_map is active)
+// - When class_map is used for class_idx lookup (default), header restoration is unnecessary
+// - Alloc path always writes fresh header before returning block to user (HAK_RET_ALLOC)
+// - ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy mode)
 // P0.1: C7 uses offset 0 (overwrites header), C0-C6 use offset 1 (header preserved)
 static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) {
     size_t off = tiny_next_off(class_idx);
 
 #if HAKMEM_TINY_HEADER_CLASSIDX
-    // For C0-C6 (offset 1): Restore header before writing next pointer
-    // For C7 (offset 0): Header is overwritten, so no restoration needed
+    // P2.3: Skip header restoration by default (class_map is now default for class_idx lookup)
+    // ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode)
     if (off != 0) {
-        // Restore header for classes that preserve it (C0-C6)
-        *(uint8_t*)base = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
+        static int g_restore_header = -1;
+        if (__builtin_expect(g_restore_header == -1, 0)) {
+            const char* e = getenv("HAKMEM_TINY_RESTORE_HEADER");
+            g_restore_header = (e && *e && *e != '0') ? 1 : 0;
+        }
+        if (__builtin_expect(g_restore_header, 0)) {
+            // Legacy mode: Restore header for classes that preserve it (C0-C6)
+            *(uint8_t*)base = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
+        }
     }
 #endif
 
diff --git a/docs/specs/ENV_VARS.md b/docs/specs/ENV_VARS.md
index ae390cd8..cc73af37 100644
--- a/docs/specs/ENV_VARS.md
+++ b/docs/specs/ENV_VARS.md
@@ -141,6 +141,30 @@ Safety (free の検証)
   - free 境界で追加の検証を有効化（SuperSlab 範囲・クラス不一致・危険な二重 free の検出）。
   - デバッグ時の既定推奨。perf 計測時は 0 を推奨。
 
+P2 TLS SLL Redesign (Header/Next conflict fix)
+- HAKMEM_TINY_ACTIVE_TRACK=1
+  - meta->active / meta->tls_cached tracking を有効化。
+  - active: ユーザが保持中のブロック数
+  - tls_cached: TLS SLL にキャッシュされたブロック数
+  - Invariant: active + tls_cached ≈ used
+  - 有効時、ss_is_slab_empty() は active==0 で EMPTY 判定（TLS SLL のキャッシュも考慮）。
+  - オーバーヘッド: 約1%（atomic inc/dec per alloc/free）。
+- HAKMEM_TINY_NO_CLASS_MAP=1
+  - class_map ルックアップを無効化（legacy mode）。
+  - 既定: class_map ON（P2.1 で default 化）。
+  - Header から class_idx を読む従来動作に戻す（Header/Next 競合リスクあり）。
+- HAKMEM_TINY_RESTORE_HEADER=1
+  - tiny_next_store() で Header 復元を強制（legacy mode）。
+  - 既定: Header 復元 OFF（P2.3 で無効化）。
+  - class_map 使用時は Header 復元不要（alloc 時に HAK_RET_ALLOC で書き直される）。
+- HAKMEM_TINY_INVARIANT_CHECK=1
+  - active + tls_cached ≈ used の不変条件検証を有効化（debug builds）。
+  - 違反時は stderr に警告出力（NDEBUG 未定義時のみ）。
+  - オーバーヘッド: 約2%（ss_verify_superslab_invariants() 呼び出し時のみ）。
+- HAKMEM_TINY_INVARIANT_DUMP=1
+  - スラブ状態の定期ダンプを有効化（debug builds, NDEBUG 未定義時のみ）。
+  - used/active/tls_cached/capacity/class の内訳を stderr に出力。
+
 Frontend (mimalloc-inspired, experimental)
 - HAKMEM_INT_ADAPT_REFILL=0/1
   - INTで refill 上限（`HAKMEM_TINY_REFILL_MAX(_HOT)`）をウィンドウ毎に±16で調整（既定ON）