P1.3: Add meta->active for TLS SLL tracking

Add active field to TinySlabMeta to track blocks currently held by
users (not in TLS SLL or freelist caches). This enables accurate
empty slab detection that accounts for TLS SLL cached blocks.

Changes:
- superslab_types.h: Add _Atomic uint16_t active field
- ss_allocation_box.c, hakmem_tiny_superslab.c: Initialize active=0
- tiny_free_fast_v2.inc.h: Decrement active on TLS SLL push
- tiny_alloc_fast.inc.h: Add tiny_active_track_alloc() helper,
  increment active on TLS SLL pop (all code paths)
- ss_hot_cold_box.h: ss_is_slab_empty() uses active when enabled

All tracking is ENV-gated: HAKMEM_TINY_ACTIVE_TRACK=1 to enable.
Default is off for zero performance impact.

Invariant: active = used - tls_cached (active <= used)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-28 13:53:45 +09:00
parent dc9e650db3
commit 6b86c60a20
6 changed files with 86 additions and 5 deletions

View File

@ -429,6 +429,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
TinySlabMeta* meta = &ss->slabs[slab_idx]; TinySlabMeta* meta = &ss->slabs[slab_idx];
meta->freelist = NULL; // NULL = linear allocation mode meta->freelist = NULL; // NULL = linear allocation mode
meta->used = 0; meta->used = 0;
meta->active = 0; // P1.3: blocks in use by user (starts at 0)
meta->capacity = capacity; meta->capacity = capacity;
meta->carved = 0; meta->carved = 0;
// Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs) // Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs)

View File

@ -8,6 +8,7 @@
#include "../superslab/superslab_types.h" #include "../superslab/superslab_types.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdlib.h> // P1.3: for getenv()
// ============================================================================ // ============================================================================
// Phase 3d-C: Hot/Cold Split Box API // Phase 3d-C: Hot/Cold Split Box API
@ -33,9 +34,27 @@
#define HOT_UTILIZATION_THRESHOLD 50 // 使用率50%以上でホット判定 #define HOT_UTILIZATION_THRESHOLD 50 // 使用率50%以上でホット判定
// Phase 12-1.1: EMPTY判定ロジック最優先再利用 // Phase 12-1.1: EMPTY判定ロジック最優先再利用
// Returns: true if slab is completely EMPTY (used == 0, highest reuse priority) // P1.3: ENV gate for active-based empty detection
// ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
// Returns: true if slab is completely EMPTY (highest reuse priority)
static inline bool ss_is_slab_empty(const TinySlabMeta* meta) { static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
return (meta->capacity > 0 && meta->used == 0); if (meta->capacity == 0) return false;
// P1.3: Use active-based empty detection if enabled
static int g_use_active = -1;
if (__builtin_expect(g_use_active == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
g_use_active = (e && *e && *e != '0') ? 1 : 0;
}
if (g_use_active) {
// P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
return (act == 0);
} else {
// Legacy: used == 0 (doesn't account for TLS SLL)
return (meta->used == 0);
}
} }
// Phase 3d-C: Hot判定ロジック // Phase 3d-C: Hot判定ロジック

View File

@ -1221,6 +1221,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
TinySlabMeta* meta = &ss->slabs[slab_idx]; TinySlabMeta* meta = &ss->slabs[slab_idx];
meta->freelist = NULL; // NULL = linear allocation mode meta->freelist = NULL; // NULL = linear allocation mode
meta->used = 0; meta->used = 0;
meta->active = 0; // P1.3: blocks in use by user (starts at 0)
meta->capacity = capacity; meta->capacity = capacity;
meta->carved = 0; meta->carved = 0;
// LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes

View File

@ -10,7 +10,8 @@
// TinySlabMeta: per-slab metadata embedded in SuperSlab // TinySlabMeta: per-slab metadata embedded in SuperSlab
typedef struct TinySlabMeta { typedef struct TinySlabMeta {
_Atomic(void*) freelist; // NULL = bump-only, non-NULL = freelist head (ATOMIC for MT safety) _Atomic(void*) freelist; // NULL = bump-only, non-NULL = freelist head (ATOMIC for MT safety)
_Atomic uint16_t used; // blocks currently allocated from this slab (ATOMIC for MT safety) _Atomic uint16_t used; // blocks allocated from this slab's freelist (ATOMIC for MT safety)
_Atomic uint16_t active; // P1.3: blocks currently in use by user (used - tls_cached) (ATOMIC)
uint16_t capacity; // total blocks this slab can hold uint16_t capacity; // total blocks this slab can hold
uint8_t class_idx; // owning tiny class (Phase 12: per-slab) uint8_t class_idx; // owning tiny class (Phase 12: per-slab)
uint8_t carved; // carve/owner flags uint8_t carved; // carve/owner flags

View File

@ -37,6 +37,27 @@
#include <stdio.h> #include <stdio.h>
#include <stdatomic.h> #include <stdatomic.h>
// P1.3: Helper to increment meta->active when allocating from TLS SLL
// ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
static inline void tiny_active_track_alloc(void* base) {
static __thread int g_active_track = -1;
if (__builtin_expect(g_active_track == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
g_active_track = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_active_track, 0)) {
extern SuperSlab* ss_fast_lookup(void* ptr);
SuperSlab* ss = ss_fast_lookup(base);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(ss, base);
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
atomic_fetch_add_explicit(&meta->active, 1, memory_order_relaxed);
}
}
}
}
// Diag counter: size>=1024 allocations routed to Tiny (env: HAKMEM_TINY_ALLOC_1024_METRIC) // Diag counter: size>=1024 allocations routed to Tiny (env: HAKMEM_TINY_ALLOC_1024_METRIC)
extern _Atomic uint64_t g_tiny_alloc_ge1024[]; extern _Atomic uint64_t g_tiny_alloc_ge1024[];
static inline void tiny_diag_track_size_ge1024_fast(size_t req_size, int class_idx) { static inline void tiny_diag_track_size_ge1024_fast(size_t req_size, int class_idx) {
@ -364,6 +385,8 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
// Front Gate: SLL hit (SLIM fast path - 3 instructions) // Front Gate: SLL hit (SLIM fast path - 3 instructions)
extern unsigned long long g_front_sll_hit[]; extern unsigned long long g_front_sll_hit[];
g_front_sll_hit[class_idx]++; g_front_sll_hit[class_idx]++;
// P1.3: Track active when allocating from TLS SLL
tiny_active_track_alloc(base);
return base; return base;
} }
} }
@ -436,6 +459,9 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
extern unsigned long long g_front_sll_hit[]; extern unsigned long long g_front_sll_hit[];
g_front_sll_hit[class_idx]++; g_front_sll_hit[class_idx]++;
// P1.3: Track active when allocating from TLS SLL
tiny_active_track_alloc(base);
#if HAKMEM_DEBUG_COUNTERS #if HAKMEM_DEBUG_COUNTERS
// Track TLS freelist hits (compile-time gated, zero runtime cost when disabled) // Track TLS freelist hits (compile-time gated, zero runtime cost when disabled)
g_free_via_tls_sll[class_idx]++; g_free_via_tls_sll[class_idx]++;
@ -786,7 +812,13 @@ static inline void* tiny_alloc_fast(size_t size) {
#endif #endif
} else { } else {
void* base = NULL; void* base = NULL;
if (tls_sll_pop(class_idx, &base)) ptr = base; else ptr = NULL; if (tls_sll_pop(class_idx, &base)) {
// P1.3: Track active when allocating from TLS SLL
tiny_active_track_alloc(base);
ptr = base;
} else {
ptr = NULL;
}
} }
} else { } else {
ptr = NULL; // SLL disabled OR Front-Direct active → bypass SLL ptr = NULL; // SLL disabled OR Front-Direct active → bypass SLL
@ -826,7 +858,13 @@ static inline void* tiny_alloc_fast(size_t size) {
#endif #endif
} else { } else {
void* base2 = NULL; void* base2 = NULL;
if (tls_sll_pop(class_idx, &base2)) ptr = base2; else ptr = NULL; if (tls_sll_pop(class_idx, &base2)) {
// P1.3: Track active when allocating from TLS SLL
tiny_active_track_alloc(base2);
ptr = base2;
} else {
ptr = NULL;
}
} }
} else { } else {
ptr = NULL; // SLL disabled OR Front-Direct active → bypass SLL ptr = NULL; // SLL disabled OR Front-Direct active → bypass SLL

View File

@ -329,6 +329,27 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
return 0; return 0;
} }
// P1.3: Decrement meta->active when block is freed (user gives it back)
// ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance)
{
static __thread int g_active_track = -1;
if (__builtin_expect(g_active_track == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
g_active_track = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_active_track, 0)) {
// Lookup the actual slab meta for this block
SuperSlab* ss = ss_fast_lookup(base);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(ss, base);
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
atomic_fetch_sub_explicit(&meta->active, 1, memory_order_relaxed);
}
}
}
}
// Option B: Periodic TLS SLL Drain (restore slab accounting consistency) // Option B: Periodic TLS SLL Drain (restore slab accounting consistency)
// Purpose: Every N frees (default: 1024), drain TLS SLL → slab freelist // Purpose: Every N frees (default: 1024), drain TLS SLL → slab freelist
// Impact: Enables empty detection → SuperSlabs freed → LRU cache functional // Impact: Enables empty detection → SuperSlabs freed → LRU cache functional