This commit completes the P2 phase of the Tiny Pool TLS SLL redesign to fix the Header/Next pointer conflict that was causing ~30% crash rates. Changes: - P2.1: Make class_map lookup the default (ENV: HAKMEM_TINY_NO_CLASS_MAP=1 for legacy) - P2.2: Add meta->tls_cached field to track blocks cached in TLS SLL - P2.3: Make Header restoration conditional in tiny_next_store() (default: skip) - P2.4: Add invariant verification functions (active + tls_cached ≈ used) - P0.4: Document new ENV variables in ENV_VARS.md New ENV variables: - HAKMEM_TINY_ACTIVE_TRACK=1: Enable active/tls_cached tracking (~1% overhead) - HAKMEM_TINY_NO_CLASS_MAP=1: Disable class_map (legacy mode) - HAKMEM_TINY_RESTORE_HEADER=1: Force header restoration (legacy mode) - HAKMEM_TINY_INVARIANT_CHECK=1: Enable invariant verification (debug) - HAKMEM_TINY_INVARIANT_DUMP=1: Enable periodic state dumps (debug) Benchmark results (bench_tiny_hot_hakmem 64B): - Default (class_map ON): 84.49 M ops/sec - ACTIVE_TRACK=1: 83.62 M ops/sec (-1%) - NO_CLASS_MAP=1 (legacy): 85.06 M ops/sec - MT performance: +21-28% vs system allocator No crashes observed. All tests passed. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
254 lines
9.3 KiB
C
254 lines
9.3 KiB
C
// ss_hot_cold_box.h - Phase 3d-C: Hot/Cold Slab Split Box
|
||
// Purpose: Cache locality optimization via hot/cold slab separation
|
||
// License: MIT
|
||
// Date: 2025-11-20
|
||
|
||
#ifndef SS_HOT_COLD_BOX_H
|
||
#define SS_HOT_COLD_BOX_H
|
||
|
||
#include "../superslab/superslab_types.h"
|
||
#include <stdbool.h>
|
||
#include <stdlib.h> // P1.3: for getenv()
|
||
#include <stdio.h> // P2.4: for fprintf() in debug output
|
||
|
||
// ============================================================================
|
||
// Phase 3d-C: Hot/Cold Split Box API
|
||
// ============================================================================
|
||
//
|
||
// Goal: Improve L1D cache hit rate by separating hot (high utilization) and
|
||
// cold (low utilization) slabs within a SuperSlab.
|
||
//
|
||
// Strategy:
|
||
// - Hot slabs (used > 50%): Prioritized for allocation → better cache locality
|
||
// - Cold slabs (used ≤ 50%): Used as fallback → delayed deallocation
|
||
//
|
||
// Expected: +8-12% throughput from improved cache line locality
|
||
//
|
||
// Box Contract:
|
||
// - ss_is_slab_hot(): Returns true if slab should be considered "hot"
|
||
// - ss_update_hot_cold_indices(): Rebuilds hot/cold index arrays
|
||
// - ss_init_hot_cold(): Initializes hot/cold fields on SuperSlab creation
|
||
//
|
||
// ============================================================================
|
||
|
||
// Phase 3d-C: Hot/Cold判定閾値
|
||
#define HOT_UTILIZATION_THRESHOLD 50 // 使用率50%以上でホット判定
|
||
|
||
// Phase 12-1.1: EMPTY判定ロジック(最優先再利用)
|
||
// P1.3: ENV gate for active-based empty detection
|
||
// ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
|
||
// Returns: true if slab is completely EMPTY (highest reuse priority)
|
||
static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
|
||
if (meta->capacity == 0) return false;
|
||
|
||
// P1.3: Use active-based empty detection if enabled
|
||
static int g_use_active = -1;
|
||
if (__builtin_expect(g_use_active == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
|
||
g_use_active = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
|
||
if (g_use_active) {
|
||
// P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
|
||
uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
|
||
return (act == 0);
|
||
} else {
|
||
// Legacy: used == 0 (doesn't account for TLS SLL)
|
||
return (meta->used == 0);
|
||
}
|
||
}
|
||
|
||
// Phase 3d-C: Hot判定ロジック
|
||
// Returns: true if slab is "hot" (high utilization, should be prioritized)
|
||
static inline bool ss_is_slab_hot(const TinySlabMeta* meta) {
|
||
// ヒューリスティック: 使用率 > 50% → ホット
|
||
// 理由: 使用率が高い = 頻繁にアクセスされている = キャッシュに載せたい
|
||
if (meta->capacity == 0) {
|
||
return false; // Uninitialized slab
|
||
}
|
||
return (meta->used * 100 / meta->capacity) > HOT_UTILIZATION_THRESHOLD;
|
||
}
|
||
|
||
// Phase 12-1.1: EMPTY mask更新ヘルパー
|
||
// Marks a slab as EMPTY (highest reuse priority)
|
||
static inline void ss_mark_slab_empty(SuperSlab* ss, int slab_idx) {
|
||
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
|
||
|
||
uint32_t bit = (1u << slab_idx);
|
||
if (!(ss->empty_mask & bit)) {
|
||
ss->empty_mask |= bit;
|
||
ss->empty_count++;
|
||
}
|
||
}
|
||
|
||
// Phase 12-1.1: EMPTY mask クリアヘルパー
|
||
// Removes a slab from EMPTY state (when reactivated)
|
||
static inline void ss_clear_slab_empty(SuperSlab* ss, int slab_idx) {
|
||
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
|
||
|
||
uint32_t bit = (1u << slab_idx);
|
||
if (ss->empty_mask & bit) {
|
||
ss->empty_mask &= ~bit;
|
||
ss->empty_count--;
|
||
}
|
||
}
|
||
|
||
// Phase 3d-C: Hot/Cold インデックス更新
|
||
// Rebuilds hot_indices[] and cold_indices[] arrays based on current slab state
|
||
static inline void ss_update_hot_cold_indices(SuperSlab* ss) {
|
||
if (!ss) return;
|
||
|
||
ss->hot_count = 0;
|
||
ss->cold_count = 0;
|
||
// Phase 12-1.1: Reset empty tracking
|
||
ss->empty_mask = 0;
|
||
ss->empty_count = 0;
|
||
|
||
uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
|
||
if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
|
||
max_slabs = SLABS_PER_SUPERSLAB_MAX;
|
||
}
|
||
|
||
// Scan active slabs and classify as EMPTY / hot / cold
|
||
for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
|
||
TinySlabMeta* meta = &ss->slabs[i];
|
||
|
||
// Skip uninitialized slabs (capacity == 0)
|
||
if (meta->capacity == 0) {
|
||
continue;
|
||
}
|
||
|
||
// Phase 12-1.1: EMPTY slabs have highest reuse priority
|
||
if (ss_is_slab_empty(meta)) {
|
||
ss_mark_slab_empty(ss, (int)i);
|
||
continue; // Don't add to hot/cold arrays
|
||
}
|
||
|
||
if (ss_is_slab_hot(meta)) {
|
||
// Hot slab: high utilization
|
||
if (ss->hot_count < 16) {
|
||
ss->hot_indices[ss->hot_count++] = (uint8_t)i;
|
||
}
|
||
} else {
|
||
// Cold slab: low utilization
|
||
if (ss->cold_count < 16) {
|
||
ss->cold_indices[ss->cold_count++] = (uint8_t)i;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Phase 3d-C: SuperSlab初期化時にhot/cold fieldsをゼロクリア
|
||
static inline void ss_init_hot_cold(SuperSlab* ss) {
|
||
if (!ss) return;
|
||
|
||
ss->hot_count = 0;
|
||
ss->cold_count = 0;
|
||
// Phase 12-1.1: Initialize EMPTY tracking
|
||
ss->empty_mask = 0;
|
||
ss->empty_count = 0;
|
||
|
||
// Initialize index arrays to 0 (defensive programming)
|
||
for (int i = 0; i < 16; i++) {
|
||
ss->hot_indices[i] = 0;
|
||
ss->cold_indices[i] = 0;
|
||
}
|
||
}
|
||
|
||
// ============================================================================
|
||
// P2.4: Invariant Verification for Debug Builds
|
||
// ============================================================================
|
||
//
|
||
// Invariant: active + tls_cached ≈ used
|
||
//
|
||
// - active: blocks currently held by user code
|
||
// - tls_cached: blocks cached in TLS SLL (returned by user, not yet pushed to slab freelist)
|
||
// - used: total blocks carved from slab and distributed
|
||
//
|
||
// Due to concurrent updates, exact equality is not guaranteed.
|
||
// We allow a small tolerance (delta) for race conditions.
|
||
//
|
||
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable (disabled by default)
|
||
// ============================================================================
|
||
|
||
// P2.4: Verify slab invariant: active + tls_cached ≈ used
|
||
// Returns: true if invariant holds within tolerance, false if violated
|
||
// tolerance: maximum allowed deviation (default: 2 for TLS lag)
|
||
static inline bool ss_verify_slab_invariant(const TinySlabMeta* meta, int tolerance) {
|
||
if (!meta || meta->capacity == 0) return true; // Skip uninitialized slabs
|
||
|
||
uint16_t used = atomic_load_explicit(&meta->used, memory_order_relaxed);
|
||
uint16_t active = atomic_load_explicit(&meta->active, memory_order_relaxed);
|
||
uint16_t tls_cached = atomic_load_explicit(&meta->tls_cached, memory_order_relaxed);
|
||
|
||
int sum = (int)active + (int)tls_cached;
|
||
int diff = sum - (int)used;
|
||
if (diff < 0) diff = -diff; // abs(diff)
|
||
|
||
return (diff <= tolerance);
|
||
}
|
||
|
||
// P2.4: Verify all slab invariants in a SuperSlab
|
||
// Returns: count of slabs that violate the invariant
|
||
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable checking
|
||
static inline int ss_verify_superslab_invariants(const SuperSlab* ss, int tolerance) {
|
||
static int g_invariant_check = -1;
|
||
if (__builtin_expect(g_invariant_check == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_INVARIANT_CHECK");
|
||
g_invariant_check = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
|
||
if (!g_invariant_check) return 0; // Disabled by ENV
|
||
if (!ss) return 0;
|
||
|
||
int violations = 0;
|
||
uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
|
||
if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
|
||
max_slabs = SLABS_PER_SUPERSLAB_MAX;
|
||
}
|
||
|
||
for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
|
||
const TinySlabMeta* meta = &ss->slabs[i];
|
||
if (!ss_verify_slab_invariant(meta, tolerance)) {
|
||
violations++;
|
||
#ifndef NDEBUG
|
||
// Debug output for violations
|
||
fprintf(stderr, "[P2.4] Invariant VIOLATION: slab[%u] used=%u active=%u tls_cached=%u (sum=%u)\n",
|
||
i, meta->used,
|
||
atomic_load_explicit(&meta->active, memory_order_relaxed),
|
||
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
|
||
atomic_load_explicit(&meta->active, memory_order_relaxed) +
|
||
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed));
|
||
#endif
|
||
}
|
||
}
|
||
|
||
return violations;
|
||
}
|
||
|
||
// P2.4: Debug dump of slab state for troubleshooting
|
||
// ENV: HAKMEM_TINY_INVARIANT_DUMP=1 to enable periodic dumps
|
||
static inline void ss_dump_slab_state(const SuperSlab* ss, int slab_idx) {
|
||
#ifndef NDEBUG
|
||
static int g_dump_enabled = -1;
|
||
if (__builtin_expect(g_dump_enabled == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_INVARIANT_DUMP");
|
||
g_dump_enabled = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
if (!g_dump_enabled) return;
|
||
if (!ss || slab_idx < 0 || slab_idx >= (int)ss->active_slabs) return;
|
||
|
||
const TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||
fprintf(stderr, "[P2.4-DUMP] slab[%d]: used=%u active=%u tls_cached=%u capacity=%u class=%u\n",
|
||
slab_idx, meta->used,
|
||
atomic_load_explicit(&meta->active, memory_order_relaxed),
|
||
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
|
||
meta->capacity, meta->class_idx);
|
||
#else
|
||
(void)ss;
|
||
(void)slab_idx;
|
||
#endif
|
||
}
|
||
|
||
#endif // SS_HOT_COLD_BOX_H
|