P1.3: Add meta->active for TLS SLL tracking

Add active field to TinySlabMeta to track blocks currently held by users (not in TLS SLL or freelist caches). This enables accurate empty slab detection that accounts for TLS SLL cached blocks. Changes: - superslab_types.h: Add _Atomic uint16_t active field - ss_allocation_box.c, hakmem_tiny_superslab.c: Initialize active=0 - tiny_free_fast_v2.inc.h: Decrement active on TLS SLL push - tiny_alloc_fast.inc.h: Add tiny_active_track_alloc() helper, increment active on TLS SLL pop (all code paths) - ss_hot_cold_box.h: ss_is_slab_empty() uses active when enabled All tracking is ENV-gated: HAKMEM_TINY_ACTIVE_TRACK=1 to enable. Default is off for zero performance impact. Invariant: active = used - tls_cached (active <= used) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 13:53:45 +09:00
parent dc9e650db3
commit 6b86c60a20
6 changed files with 86 additions and 5 deletions
--- a/core/box/ss_allocation_box.c
+++ b/core/box/ss_allocation_box.c
@ -429,6 +429,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
    TinySlabMeta* meta = &ss->slabs[slab_idx];
    meta->freelist = NULL;          // NULL = linear allocation mode
    meta->used = 0;
    meta->active = 0;               // P1.3: blocks in use by user (starts at 0)
    meta->capacity = capacity;
    meta->carved = 0;
    // Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs)
--- a/core/box/ss_hot_cold_box.h
+++ b/core/box/ss_hot_cold_box.h
@ -8,6 +8,7 @@
 #include "../superslab/superslab_types.h"
 #include <stdbool.h>
 #include <stdlib.h>  // P1.3: for getenv()
 // ============================================================================
 // Phase 3d-C: Hot/Cold Split Box API
@ -33,9 +34,27 @@
 #define HOT_UTILIZATION_THRESHOLD 50  // 使用率50%以上でホット判定
 // Phase 12-1.1: EMPTY判定ロジック（最優先再利用）
-// Returns: true if slab is completely EMPTY (used == 0, highest reuse priority)
+// P1.3: ENV gate for active-based empty detection
 // ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
 // Returns: true if slab is completely EMPTY (highest reuse priority)
 static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
-    return (meta->capacity > 0 && meta->used == 0);
+    if (meta->capacity == 0) return false;
    // P1.3: Use active-based empty detection if enabled
    static int g_use_active = -1;
    if (__builtin_expect(g_use_active == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
        g_use_active = (e && *e && *e != '0') ? 1 : 0;
    }
    if (g_use_active) {
        // P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
        uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
        return (act == 0);
    } else {
        // Legacy: used == 0 (doesn't account for TLS SLL)
        return (meta->used == 0);
    }
 }
 // Phase 3d-C: Hot判定ロジック
--- a/core/hakmem_tiny_superslab.c
+++ b/core/hakmem_tiny_superslab.c
@ -1221,6 +1221,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
    TinySlabMeta* meta = &ss->slabs[slab_idx];
    meta->freelist = NULL;          // NULL = linear allocation mode
    meta->used = 0;
    meta->active = 0;               // P1.3: blocks in use by user (starts at 0)
    meta->capacity = capacity;
    meta->carved = 0;
    // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
--- a/core/superslab/superslab_types.h
+++ b/core/superslab/superslab_types.h
@ -10,7 +10,8 @@
 // TinySlabMeta: per-slab metadata embedded in SuperSlab
 typedef struct TinySlabMeta {
    _Atomic(void*) freelist; // NULL = bump-only, non-NULL = freelist head (ATOMIC for MT safety)
-    _Atomic uint16_t used;   // blocks currently allocated from this slab (ATOMIC for MT safety)
+    _Atomic uint16_t used;   // blocks allocated from this slab's freelist (ATOMIC for MT safety)
    _Atomic uint16_t active; // P1.3: blocks currently in use by user (used - tls_cached) (ATOMIC)
    uint16_t capacity;       // total blocks this slab can hold
    uint8_t  class_idx;      // owning tiny class (Phase 12: per-slab)
    uint8_t  carved;         // carve/owner flags
--- a/core/tiny_alloc_fast.inc.h
+++ b/core/tiny_alloc_fast.inc.h
@ -37,6 +37,27 @@
 #include <stdio.h>
 #include <stdatomic.h>
 // P1.3: Helper to increment meta->active when allocating from TLS SLL
 // ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
 static inline void tiny_active_track_alloc(void* base) {
    static __thread int g_active_track = -1;
    if (__builtin_expect(g_active_track == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
        g_active_track = (e && *e && *e != '0') ? 1 : 0;
    }
    if (__builtin_expect(g_active_track, 0)) {
        extern SuperSlab* ss_fast_lookup(void* ptr);
        SuperSlab* ss = ss_fast_lookup(base);
        if (ss && ss->magic == SUPERSLAB_MAGIC) {
            int slab_idx = slab_index_for(ss, base);
            if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
                TinySlabMeta* meta = &ss->slabs[slab_idx];
                atomic_fetch_add_explicit(&meta->active, 1, memory_order_relaxed);
            }
        }
    }
 }
 // Diag counter: size>=1024 allocations routed to Tiny (env: HAKMEM_TINY_ALLOC_1024_METRIC)
 extern _Atomic uint64_t g_tiny_alloc_ge1024[];
 static inline void tiny_diag_track_size_ge1024_fast(size_t req_size, int class_idx) {
@ -364,6 +385,8 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
                // Front Gate: SLL hit (SLIM fast path - 3 instructions)
                extern unsigned long long g_front_sll_hit[];
                g_front_sll_hit[class_idx]++;
                // P1.3: Track active when allocating from TLS SLL
                tiny_active_track_alloc(base);
                return base;
            }
        }
@ -436,6 +459,9 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
            extern unsigned long long g_front_sll_hit[];
            g_front_sll_hit[class_idx]++;
            // P1.3: Track active when allocating from TLS SLL
            tiny_active_track_alloc(base);
 #if HAKMEM_DEBUG_COUNTERS
            // Track TLS freelist hits (compile-time gated, zero runtime cost when disabled)
            g_free_via_tls_sll[class_idx]++;
@ -786,7 +812,13 @@ static inline void* tiny_alloc_fast(size_t size) {
 #endif
        } else {
            void* base = NULL;
-            if (tls_sll_pop(class_idx, &base)) ptr = base; else ptr = NULL;
+            if (tls_sll_pop(class_idx, &base)) {
                // P1.3: Track active when allocating from TLS SLL
                tiny_active_track_alloc(base);
                ptr = base;
            } else {
                ptr = NULL;
            }
        }
    } else {
        ptr = NULL;  // SLL disabled OR Front-Direct active → bypass SLL
@ -826,7 +858,13 @@ static inline void* tiny_alloc_fast(size_t size) {
 #endif
                } else {
                    void* base2 = NULL;
-                    if (tls_sll_pop(class_idx, &base2)) ptr = base2; else ptr = NULL;
+                    if (tls_sll_pop(class_idx, &base2)) {
                        // P1.3: Track active when allocating from TLS SLL
                        tiny_active_track_alloc(base2);
                        ptr = base2;
                    } else {
                        ptr = NULL;
                    }
                }
            } else {
                ptr = NULL;  // SLL disabled OR Front-Direct active → bypass SLL
--- a/core/tiny_free_fast_v2.inc.h
+++ b/core/tiny_free_fast_v2.inc.h
@ -329,6 +329,27 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
        return 0;
    }
    // P1.3: Decrement meta->active when block is freed (user gives it back)
    // ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
    {
        static __thread int g_active_track = -1;
        if (__builtin_expect(g_active_track == -1, 0)) {
            const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
            g_active_track = (e && *e && *e != '0') ? 1 : 0;
        }
        if (__builtin_expect(g_active_track, 0)) {
            // Lookup the actual slab meta for this block
            SuperSlab* ss = ss_fast_lookup(base);
            if (ss && ss->magic == SUPERSLAB_MAGIC) {
                int slab_idx = slab_index_for(ss, base);
                if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
                    TinySlabMeta* meta = &ss->slabs[slab_idx];
                    atomic_fetch_sub_explicit(&meta->active, 1, memory_order_relaxed);
                }
            }
        }
    }
    // Option B: Periodic TLS SLL Drain (restore slab accounting consistency)
    // Purpose: Every N frees (default: 1024), drain TLS SLL → slab freelist
    // Impact: Enables empty detection → SuperSlabs freed → LRU cache functional