diff --git a/core/box/ss_allocation_box.c b/core/box/ss_allocation_box.c index 39d45718..985ec9a4 100644 --- a/core/box/ss_allocation_box.c +++ b/core/box/ss_allocation_box.c @@ -430,6 +430,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_ meta->freelist = NULL; // NULL = linear allocation mode meta->used = 0; meta->active = 0; // P1.3: blocks in use by user (starts at 0) + meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0) meta->capacity = capacity; meta->carved = 0; // Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs) diff --git a/core/box/ss_hot_cold_box.h b/core/box/ss_hot_cold_box.h index 6d73c059..a63cd88a 100644 --- a/core/box/ss_hot_cold_box.h +++ b/core/box/ss_hot_cold_box.h @@ -9,6 +9,7 @@ #include "../superslab/superslab_types.h" #include #include // P1.3: for getenv() +#include // P2.4: for fprintf() in debug output // ============================================================================ // Phase 3d-C: Hot/Cold Split Box API @@ -154,4 +155,99 @@ static inline void ss_init_hot_cold(SuperSlab* ss) { } } +// ============================================================================ +// P2.4: Invariant Verification for Debug Builds +// ============================================================================ +// +// Invariant: active + tls_cached ≈ used +// +// - active: blocks currently held by user code +// - tls_cached: blocks cached in TLS SLL (returned by user, not yet pushed to slab freelist) +// - used: total blocks carved from slab and distributed +// +// Due to concurrent updates, exact equality is not guaranteed. +// We allow a small tolerance (delta) for race conditions. +// +// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable (disabled by default) +// ============================================================================ + +// P2.4: Verify slab invariant: active + tls_cached ≈ used +// Returns: true if invariant holds within tolerance, false if violated +// tolerance: maximum allowed deviation (default: 2 for TLS lag) +static inline bool ss_verify_slab_invariant(const TinySlabMeta* meta, int tolerance) { + if (!meta || meta->capacity == 0) return true; // Skip uninitialized slabs + + uint16_t used = atomic_load_explicit(&meta->used, memory_order_relaxed); + uint16_t active = atomic_load_explicit(&meta->active, memory_order_relaxed); + uint16_t tls_cached = atomic_load_explicit(&meta->tls_cached, memory_order_relaxed); + + int sum = (int)active + (int)tls_cached; + int diff = sum - (int)used; + if (diff < 0) diff = -diff; // abs(diff) + + return (diff <= tolerance); +} + +// P2.4: Verify all slab invariants in a SuperSlab +// Returns: count of slabs that violate the invariant +// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable checking +static inline int ss_verify_superslab_invariants(const SuperSlab* ss, int tolerance) { + static int g_invariant_check = -1; + if (__builtin_expect(g_invariant_check == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_INVARIANT_CHECK"); + g_invariant_check = (e && *e && *e != '0') ? 1 : 0; + } + + if (!g_invariant_check) return 0; // Disabled by ENV + if (!ss) return 0; + + int violations = 0; + uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE; + if (max_slabs > SLABS_PER_SUPERSLAB_MAX) { + max_slabs = SLABS_PER_SUPERSLAB_MAX; + } + + for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) { + const TinySlabMeta* meta = &ss->slabs[i]; + if (!ss_verify_slab_invariant(meta, tolerance)) { + violations++; +#ifndef NDEBUG + // Debug output for violations + fprintf(stderr, "[P2.4] Invariant VIOLATION: slab[%u] used=%u active=%u tls_cached=%u (sum=%u)\n", + i, meta->used, + atomic_load_explicit(&meta->active, memory_order_relaxed), + atomic_load_explicit(&meta->tls_cached, memory_order_relaxed), + atomic_load_explicit(&meta->active, memory_order_relaxed) + + atomic_load_explicit(&meta->tls_cached, memory_order_relaxed)); +#endif + } + } + + return violations; +} + +// P2.4: Debug dump of slab state for troubleshooting +// ENV: HAKMEM_TINY_INVARIANT_DUMP=1 to enable periodic dumps +static inline void ss_dump_slab_state(const SuperSlab* ss, int slab_idx) { +#ifndef NDEBUG + static int g_dump_enabled = -1; + if (__builtin_expect(g_dump_enabled == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_INVARIANT_DUMP"); + g_dump_enabled = (e && *e && *e != '0') ? 1 : 0; + } + if (!g_dump_enabled) return; + if (!ss || slab_idx < 0 || slab_idx >= (int)ss->active_slabs) return; + + const TinySlabMeta* meta = &ss->slabs[slab_idx]; + fprintf(stderr, "[P2.4-DUMP] slab[%d]: used=%u active=%u tls_cached=%u capacity=%u class=%u\n", + slab_idx, meta->used, + atomic_load_explicit(&meta->active, memory_order_relaxed), + atomic_load_explicit(&meta->tls_cached, memory_order_relaxed), + meta->capacity, meta->class_idx); +#else + (void)ss; + (void)slab_idx; +#endif +} + #endif // SS_HOT_COLD_BOX_H diff --git a/core/hakmem_tiny_superslab.c b/core/hakmem_tiny_superslab.c index e5c6efb0..f2dd46f1 100644 --- a/core/hakmem_tiny_superslab.c +++ b/core/hakmem_tiny_superslab.c @@ -1222,6 +1222,7 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_ meta->freelist = NULL; // NULL = linear allocation mode meta->used = 0; meta->active = 0; // P1.3: blocks in use by user (starts at 0) + meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0) meta->capacity = capacity; meta->carved = 0; // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes diff --git a/core/superslab/superslab_types.h b/core/superslab/superslab_types.h index 18dca0ad..d0659ed1 100644 --- a/core/superslab/superslab_types.h +++ b/core/superslab/superslab_types.h @@ -11,11 +11,13 @@ typedef struct TinySlabMeta { _Atomic(void*) freelist; // NULL = bump-only, non-NULL = freelist head (ATOMIC for MT safety) _Atomic uint16_t used; // blocks allocated from this slab's freelist (ATOMIC for MT safety) - _Atomic uint16_t active; // P1.3: blocks currently in use by user (used - tls_cached) (ATOMIC) + _Atomic uint16_t active; // P1.3: blocks currently in use by user (ATOMIC) + _Atomic uint16_t tls_cached; // P2.2: blocks cached in TLS SLL (ATOMIC) uint16_t capacity; // total blocks this slab can hold uint8_t class_idx; // owning tiny class (Phase 12: per-slab) uint8_t carved; // carve/owner flags uint8_t owner_tid_low; // low 8 bits of owner TID (debug / locality) + // P2.2 Invariant: active + tls_cached == used (approximately, due to TLS locality) } TinySlabMeta; #define TINY_NUM_CLASSES_SS 8 diff --git a/core/tiny_alloc_fast.inc.h b/core/tiny_alloc_fast.inc.h index 1623ee8a..44caa8a9 100644 --- a/core/tiny_alloc_fast.inc.h +++ b/core/tiny_alloc_fast.inc.h @@ -37,8 +37,9 @@ #include #include -// P1.3: Helper to increment meta->active when allocating from TLS SLL +// P1.3/P2.2: Helper to track active/tls_cached when allocating from TLS SLL // ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance) +// Flow: TLS SLL → User means active++, tls_cached-- static inline void tiny_active_track_alloc(void* base) { static __thread int g_active_track = -1; if (__builtin_expect(g_active_track == -1, 0)) { @@ -53,6 +54,7 @@ static inline void tiny_active_track_alloc(void* base) { if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { TinySlabMeta* meta = &ss->slabs[slab_idx]; atomic_fetch_add_explicit(&meta->active, 1, memory_order_relaxed); + atomic_fetch_sub_explicit(&meta->tls_cached, 1, memory_order_relaxed); // P2.2 } } } diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h index 1a6492cc..0a456b32 100644 --- a/core/tiny_free_fast_v2.inc.h +++ b/core/tiny_free_fast_v2.inc.h @@ -107,17 +107,18 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { } #endif - // P1.2: Use class_map instead of Header to avoid Header/Next contention - // ENV: HAKMEM_TINY_USE_CLASS_MAP=1 to enable (default: 0 for compatibility) + // P2.1: Use class_map instead of Header to avoid Header/Next contention + // ENV: HAKMEM_TINY_NO_CLASS_MAP=1 to disable (default: ON - class_map is preferred) int class_idx = -1; { static __thread int g_use_class_map = -1; if (__builtin_expect(g_use_class_map == -1, 0)) { - const char* e = getenv("HAKMEM_TINY_USE_CLASS_MAP"); - g_use_class_map = (e && *e && *e != '0') ? 1 : 0; + const char* e = getenv("HAKMEM_TINY_NO_CLASS_MAP"); + // P2.1: Default is ON (use class_map), set HAKMEM_TINY_NO_CLASS_MAP=1 to disable + g_use_class_map = (e && *e && *e != '0') ? 0 : 1; } - if (__builtin_expect(g_use_class_map, 0)) { + if (__builtin_expect(g_use_class_map, 1)) { // P1.2: class_map path - avoid Header read SuperSlab* ss = ss_fast_lookup((uint8_t*)ptr - 1); if (ss && ss->magic == SUPERSLAB_MAGIC) { @@ -144,7 +145,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { #endif } } else { - // Default: Header read (existing behavior) + // P2.1: Fallback to Header read (disabled class_map mode) class_idx = tiny_region_id_read_header(ptr); #if HAKMEM_DEBUG_VERBOSE if (atomic_load(&debug_calls) <= 5) { @@ -329,8 +330,9 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { return 0; } - // P1.3: Decrement meta->active when block is freed (user gives it back) + // P1.3/P2.2: Track active/tls_cached when block is freed (user gives it back) // ENV gate: HAKMEM_TINY_ACTIVE_TRACK=1 to enable (default: 0 for performance) + // Flow: User → TLS SLL means active--, tls_cached++ { static __thread int g_active_track = -1; if (__builtin_expect(g_active_track == -1, 0)) { @@ -345,6 +347,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { TinySlabMeta* meta = &ss->slabs[slab_idx]; atomic_fetch_sub_explicit(&meta->active, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&meta->tls_cached, 1, memory_order_relaxed); // P2.2 } } } diff --git a/core/tiny_nextptr.h b/core/tiny_nextptr.h index 77da054b..eb325a7e 100644 --- a/core/tiny_nextptr.h +++ b/core/tiny_nextptr.h @@ -34,6 +34,7 @@ #include #include +#include // P2.3: for getenv() #include "hakmem_build_flags.h" #include "tiny_region_id.h" // HEADER_MAGIC/HEADER_CLASS_MASK for header repair/logging #include "hakmem_super_registry.h" // hak_super_lookup @@ -74,20 +75,27 @@ static inline __attribute__((always_inline)) void* tiny_next_load(const void* ba } // Safe store of next pointer into a block base. -// DESIGN RULE: "Header is written by BOTH Alloc and Free/Drain" -// - Free/Drain paths: This function restores header for C0-C6 (offset 1), then writes Next pointer -// - Alloc paths: Write header before returning block to user (HAK_RET_ALLOC) -// - C7 (offset 0): Header is overwritten by next pointer, so no restoration needed +// P2.3: Header restoration is now conditional (default: skip when class_map is active) +// - When class_map is used for class_idx lookup (default), header restoration is unnecessary +// - Alloc path always writes fresh header before returning block to user (HAK_RET_ALLOC) +// - ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy mode) // P0.1: C7 uses offset 0 (overwrites header), C0-C6 use offset 1 (header preserved) static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) { size_t off = tiny_next_off(class_idx); #if HAKMEM_TINY_HEADER_CLASSIDX - // For C0-C6 (offset 1): Restore header before writing next pointer - // For C7 (offset 0): Header is overwritten, so no restoration needed + // P2.3: Skip header restoration by default (class_map is now default for class_idx lookup) + // ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode) if (off != 0) { - // Restore header for classes that preserve it (C0-C6) - *(uint8_t*)base = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); + static int g_restore_header = -1; + if (__builtin_expect(g_restore_header == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_RESTORE_HEADER"); + g_restore_header = (e && *e && *e != '0') ? 1 : 0; + } + if (__builtin_expect(g_restore_header, 0)) { + // Legacy mode: Restore header for classes that preserve it (C0-C6) + *(uint8_t*)base = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); + } } #endif diff --git a/docs/specs/ENV_VARS.md b/docs/specs/ENV_VARS.md index ae390cd8..cc73af37 100644 --- a/docs/specs/ENV_VARS.md +++ b/docs/specs/ENV_VARS.md @@ -141,6 +141,30 @@ Safety (free の検証) - free 境界で追加の検証を有効化(SuperSlab 範囲・クラス不一致・危険な二重 free の検出)。 - デバッグ時の既定推奨。perf 計測時は 0 を推奨。 +P2 TLS SLL Redesign (Header/Next conflict fix) +- HAKMEM_TINY_ACTIVE_TRACK=1 + - meta->active / meta->tls_cached tracking を有効化。 + - active: ユーザが保持中のブロック数 + - tls_cached: TLS SLL にキャッシュされたブロック数 + - Invariant: active + tls_cached ≈ used + - 有効時、ss_is_slab_empty() は active==0 で EMPTY 判定(TLS SLL のキャッシュも考慮)。 + - オーバーヘッド: 約1%(atomic inc/dec per alloc/free)。 +- HAKMEM_TINY_NO_CLASS_MAP=1 + - class_map ルックアップを無効化(legacy mode)。 + - 既定: class_map ON(P2.1 で default 化)。 + - Header から class_idx を読む従来動作に戻す(Header/Next 競合リスクあり)。 +- HAKMEM_TINY_RESTORE_HEADER=1 + - tiny_next_store() で Header 復元を強制(legacy mode)。 + - 既定: Header 復元 OFF(P2.3 で無効化)。 + - class_map 使用時は Header 復元不要(alloc 時に HAK_RET_ALLOC で書き直される)。 +- HAKMEM_TINY_INVARIANT_CHECK=1 + - active + tls_cached ≈ used の不変条件検証を有効化(debug builds)。 + - 違反時は stderr に警告出力(NDEBUG 未定義時のみ)。 + - オーバーヘッド: 約2%(ss_verify_superslab_invariants() 呼び出し時のみ)。 +- HAKMEM_TINY_INVARIANT_DUMP=1 + - スラブ状態の定期ダンプを有効化(debug builds, NDEBUG 未定義時のみ)。 + - used/active/tls_cached/capacity/class の内訳を stderr に出力。 + Frontend (mimalloc-inspired, experimental) - HAKMEM_INT_ADAPT_REFILL=0/1 - INTで refill 上限(`HAKMEM_TINY_REFILL_MAX(_HOT)`)をウィンドウ毎に±16で調整(既定ON)