From 6b86c60a20ea26ad6d8256ddcd3034b61e29ea1b Mon Sep 17 00:00:00 2001
From: "Moe Charm (CI)" <moecharm@example.com>
Date: Fri, 28 Nov 2025 13:53:45 +0900
Subject: [PATCH] P1.3: Add meta->active for TLS SLL tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add active field to TinySlabMeta to track blocks currently held by
users (not in TLS SLL or freelist caches). This enables accurate
empty slab detection that accounts for TLS SLL cached blocks.

Changes:
- superslab_types.h: Add _Atomic uint16_t active field
- ss_allocation_box.c, hakmem_tiny_superslab.c: Initialize active=0
- tiny_free_fast_v2.inc.h: Decrement active on TLS SLL push
- tiny_alloc_fast.inc.h: Add tiny_active_track_alloc() helper,
  increment active on TLS SLL pop (all code paths)
- ss_hot_cold_box.h: ss_is_slab_empty() uses active when enabled

All tracking is ENV-gated: HAKMEM_TINY_ACTIVE_TRACK=1 to enable.
Default is off for zero performance impact.

Invariant: active = used - tls_cached (active <= used)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 core/box/ss_allocation_box.c     |  1 +
 core/box/ss_hot_cold_box.h       | 23 +++++++++++++++--
 core/hakmem_tiny_superslab.c     |  1 +
 core/superslab/superslab_types.h |  3 ++-
 core/tiny_alloc_fast.inc.h       | 42 ++++++++++++++++++++++++++++++--
 core/tiny_free_fast_v2.inc.h     | 21 ++++++++++++++++
 6 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/core/box/ss_allocation_box.c b/core/box/ss_allocation_box.c
index 18424bfc..39d45718 100644
--- a/core/box/ss_allocation_box.c
+++ b/core/box/ss_allocation_box.c
@@ -429,6 +429,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
     TinySlabMeta* meta = &ss->slabs[slab_idx];
     meta->freelist = NULL;          // NULL = linear allocation mode
     meta->used = 0;
+    meta->active = 0;               // P1.3: blocks in use by user (starts at 0)
     meta->capacity = capacity;
     meta->carved = 0;
     // Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs)
diff --git a/core/box/ss_hot_cold_box.h b/core/box/ss_hot_cold_box.h
index 2bdafa60..6d73c059 100644
--- a/core/box/ss_hot_cold_box.h
+++ b/core/box/ss_hot_cold_box.h
@@ -8,6 +8,7 @@
 
 #include "../superslab/superslab_types.h"
 #include <stdbool.h>
+#include <stdlib.h>  // P1.3: for getenv()
 
 // ============================================================================
 // Phase 3d-C: Hot/Cold Split Box API
@@ -33,9 +34,27 @@
 #define HOT_UTILIZATION_THRESHOLD 50  // 使用率50%以上でホット判定
 
 // Phase 12-1.1: EMPTY判定ロジック（最優先再利用）
-// Returns: true if slab is completely EMPTY (used == 0, highest reuse priority)
+// P1.3: ENV gate for active-based empty detection
+// ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
+// Returns: true if slab is completely EMPTY (highest reuse priority)
 static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
-    return (meta->capacity > 0 && meta->used == 0);
+    if (meta->capacity == 0) return false;
+
+    // P1.3: Use active-based empty detection if enabled
+    static int g_use_active = -1;
+    if (__builtin_expect(g_use_active == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
+        g_use_active = (e && *e && *e != '0') ? 1 : 0;
+    }
+
+    if (g_use_active) {
+        // P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
+        uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
+        return (act == 0);
+    } else {
+        // Legacy: used == 0 (doesn't account for TLS SLL)
+        return (meta->used == 0);
+    }
 }
 
 // Phase 3d-C: Hot判定ロジック
diff --git a/core/hakmem_tiny_superslab.c b/core/hakmem_tiny_superslab.c
index 79e4071a..e5c6efb0 100644
--- a/core/hakmem_tiny_superslab.c
+++ b/core/hakmem_tiny_superslab.c
@@ -1221,6 +1221,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
     TinySlabMeta* meta = &ss->slabs[slab_idx];
     meta->freelist = NULL;          // NULL = linear allocation mode
     meta->used = 0;
+    meta->active = 0;               // P1.3: blocks in use by user (starts at 0)
     meta->capacity = capacity;
     meta->carved = 0;
     // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
diff --git a/core/superslab/superslab_types.h b/core/superslab/superslab_types.h
index 0a6459ce..18dca0ad 100644
--- a/core/superslab/superslab_types.h
+++ b/core/superslab/superslab_types.h
@@ -10,7 +10,8 @@
 // TinySlabMeta: per-slab metadata embedded in SuperSlab
 typedef struct TinySlabMeta {
     _Atomic(void*) freelist; // NULL = bump-only, non-NULL = freelist head (ATOMIC for MT safety)
-    _Atomic uint16_t used;   // blocks currently allocated from this slab (ATOMIC for MT safety)
+    _Atomic uint16_t used;   // blocks allocated from this slab's freelist (ATOMIC for MT safety)
+    _Atomic uint16_t active; // P1.3: blocks currently in use by user (used - tls_cached) (ATOMIC)
     uint16_t capacity;       // total blocks this slab can hold
     uint8_t  class_idx;      // owning tiny class (Phase 12: per-slab)
     uint8_t  carved;         // carve/owner flags
diff --git a/core/tiny_alloc_fast.inc.h b/core/tiny_alloc_fast.inc.h
index 915c8b3f..1623ee8a 100644
--- a/core/tiny_alloc_fast.inc.h
+++ b/core/tiny_alloc_fast.inc.h
@@ -37,6 +37,27 @@
 #include <stdio.h>
 #include <stdatomic.h>
 
+// P1.3: Helper to increment meta->active when allocating from TLS SLL
+// ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
+static inline void tiny_active_track_alloc(void* base) {
+    static __thread int g_active_track = -1;
+    if (__builtin_expect(g_active_track == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
+        g_active_track = (e && *e && *e != '0') ? 1 : 0;
+    }
+    if (__builtin_expect(g_active_track, 0)) {
+        extern SuperSlab* ss_fast_lookup(void* ptr);
+        SuperSlab* ss = ss_fast_lookup(base);
+        if (ss && ss->magic == SUPERSLAB_MAGIC) {
+            int slab_idx = slab_index_for(ss, base);
+            if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
+                TinySlabMeta* meta = &ss->slabs[slab_idx];
+                atomic_fetch_add_explicit(&meta->active, 1, memory_order_relaxed);
+            }
+        }
+    }
+}
+
 // Diag counter: size>=1024 allocations routed to Tiny (env: HAKMEM_TINY_ALLOC_1024_METRIC)
 extern _Atomic uint64_t g_tiny_alloc_ge1024[];
 static inline void tiny_diag_track_size_ge1024_fast(size_t req_size, int class_idx) {
@@ -364,6 +385,8 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
                 // Front Gate: SLL hit (SLIM fast path - 3 instructions)
                 extern unsigned long long g_front_sll_hit[];
                 g_front_sll_hit[class_idx]++;
+                // P1.3: Track active when allocating from TLS SLL
+                tiny_active_track_alloc(base);
                 return base;
             }
         }
@@ -436,6 +459,9 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
             extern unsigned long long g_front_sll_hit[];
             g_front_sll_hit[class_idx]++;
 
+            // P1.3: Track active when allocating from TLS SLL
+            tiny_active_track_alloc(base);
+
 #if HAKMEM_DEBUG_COUNTERS
             // Track TLS freelist hits (compile-time gated, zero runtime cost when disabled)
             g_free_via_tls_sll[class_idx]++;
@@ -786,7 +812,13 @@ static inline void* tiny_alloc_fast(size_t size) {
 #endif
         } else {
             void* base = NULL;
-            if (tls_sll_pop(class_idx, &base)) ptr = base; else ptr = NULL;
+            if (tls_sll_pop(class_idx, &base)) {
+                // P1.3: Track active when allocating from TLS SLL
+                tiny_active_track_alloc(base);
+                ptr = base;
+            } else {
+                ptr = NULL;
+            }
         }
     } else {
         ptr = NULL;  // SLL disabled OR Front-Direct active → bypass SLL
@@ -826,7 +858,13 @@ static inline void* tiny_alloc_fast(size_t size) {
 #endif
                 } else {
                     void* base2 = NULL;
-                    if (tls_sll_pop(class_idx, &base2)) ptr = base2; else ptr = NULL;
+                    if (tls_sll_pop(class_idx, &base2)) {
+                        // P1.3: Track active when allocating from TLS SLL
+                        tiny_active_track_alloc(base2);
+                        ptr = base2;
+                    } else {
+                        ptr = NULL;
+                    }
                 }
             } else {
                 ptr = NULL;  // SLL disabled OR Front-Direct active → bypass SLL
diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h
index 0393dfc1..1a6492cc 100644
--- a/core/tiny_free_fast_v2.inc.h
+++ b/core/tiny_free_fast_v2.inc.h
@@ -329,6 +329,27 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
         return 0;
     }
 
+    // P1.3: Decrement meta->active when block is freed (user gives it back)
+    // ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
+    {
+        static __thread int g_active_track = -1;
+        if (__builtin_expect(g_active_track == -1, 0)) {
+            const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
+            g_active_track = (e && *e && *e != '0') ? 1 : 0;
+        }
+        if (__builtin_expect(g_active_track, 0)) {
+            // Lookup the actual slab meta for this block
+            SuperSlab* ss = ss_fast_lookup(base);
+            if (ss && ss->magic == SUPERSLAB_MAGIC) {
+                int slab_idx = slab_index_for(ss, base);
+                if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
+                    TinySlabMeta* meta = &ss->slabs[slab_idx];
+                    atomic_fetch_sub_explicit(&meta->active, 1, memory_order_relaxed);
+                }
+            }
+        }
+    }
+
     // Option B: Periodic TLS SLL Drain (restore slab accounting consistency)
     // Purpose: Every N frees (default: 1024), drain TLS SLL → slab freelist
     // Impact: Enables empty detection → SuperSlabs freed → LRU cache functional