Files
hakmem/core/box/ss_hot_cold_box.h
Moe Charm (CI) a6e681aae7 P2: TLS SLL Redesign - class_map default, tls_cached tracking, conditional header restore
This commit completes the P2 phase of the Tiny Pool TLS SLL redesign to fix the
Header/Next pointer conflict that was causing ~30% crash rates.

Changes:
- P2.1: Make class_map lookup the default (ENV: HAKMEM_TINY_NO_CLASS_MAP=1 for legacy)
- P2.2: Add meta->tls_cached field to track blocks cached in TLS SLL
- P2.3: Make Header restoration conditional in tiny_next_store() (default: skip)
- P2.4: Add invariant verification functions (active + tls_cached ≈ used)
- P0.4: Document new ENV variables in ENV_VARS.md

New ENV variables:
- HAKMEM_TINY_ACTIVE_TRACK=1: Enable active/tls_cached tracking (~1% overhead)
- HAKMEM_TINY_NO_CLASS_MAP=1: Disable class_map (legacy mode)
- HAKMEM_TINY_RESTORE_HEADER=1: Force header restoration (legacy mode)
- HAKMEM_TINY_INVARIANT_CHECK=1: Enable invariant verification (debug)
- HAKMEM_TINY_INVARIANT_DUMP=1: Enable periodic state dumps (debug)

Benchmark results (bench_tiny_hot_hakmem 64B):
- Default (class_map ON): 84.49 M ops/sec
- ACTIVE_TRACK=1: 83.62 M ops/sec (-1%)
- NO_CLASS_MAP=1 (legacy): 85.06 M ops/sec
- MT performance: +21-28% vs system allocator

No crashes observed. All tests passed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 14:11:37 +09:00

254 lines
9.3 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ss_hot_cold_box.h - Phase 3d-C: Hot/Cold Slab Split Box
// Purpose: Cache locality optimization via hot/cold slab separation
// License: MIT
// Date: 2025-11-20
#ifndef SS_HOT_COLD_BOX_H
#define SS_HOT_COLD_BOX_H
#include "../superslab/superslab_types.h"
#include <stdbool.h>
#include <stdlib.h> // P1.3: for getenv()
#include <stdio.h> // P2.4: for fprintf() in debug output
// ============================================================================
// Phase 3d-C: Hot/Cold Split Box API
// ============================================================================
//
// Goal: Improve L1D cache hit rate by separating hot (high utilization) and
// cold (low utilization) slabs within a SuperSlab.
//
// Strategy:
// - Hot slabs (used > 50%): Prioritized for allocation → better cache locality
// - Cold slabs (used ≤ 50%): Used as fallback → delayed deallocation
//
// Expected: +8-12% throughput from improved cache line locality
//
// Box Contract:
// - ss_is_slab_hot(): Returns true if slab should be considered "hot"
// - ss_update_hot_cold_indices(): Rebuilds hot/cold index arrays
// - ss_init_hot_cold(): Initializes hot/cold fields on SuperSlab creation
//
// ============================================================================
// Phase 3d-C: Hot/Cold判定閾値
#define HOT_UTILIZATION_THRESHOLD 50 // 使用率50%以上でホット判定
// Phase 12-1.1: EMPTY判定ロジック最優先再利用
// P1.3: ENV gate for active-based empty detection
// ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
// Returns: true if slab is completely EMPTY (highest reuse priority)
static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
if (meta->capacity == 0) return false;
// P1.3: Use active-based empty detection if enabled
static int g_use_active = -1;
if (__builtin_expect(g_use_active == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
g_use_active = (e && *e && *e != '0') ? 1 : 0;
}
if (g_use_active) {
// P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
return (act == 0);
} else {
// Legacy: used == 0 (doesn't account for TLS SLL)
return (meta->used == 0);
}
}
// Phase 3d-C: Hot判定ロジック
// Returns: true if slab is "hot" (high utilization, should be prioritized)
static inline bool ss_is_slab_hot(const TinySlabMeta* meta) {
// ヒューリスティック: 使用率 > 50% → ホット
// 理由: 使用率が高い = 頻繁にアクセスされている = キャッシュに載せたい
if (meta->capacity == 0) {
return false; // Uninitialized slab
}
return (meta->used * 100 / meta->capacity) > HOT_UTILIZATION_THRESHOLD;
}
// Phase 12-1.1: EMPTY mask更新ヘルパー
// Marks a slab as EMPTY (highest reuse priority)
static inline void ss_mark_slab_empty(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
uint32_t bit = (1u << slab_idx);
if (!(ss->empty_mask & bit)) {
ss->empty_mask |= bit;
ss->empty_count++;
}
}
// Phase 12-1.1: EMPTY mask クリアヘルパー
// Removes a slab from EMPTY state (when reactivated)
static inline void ss_clear_slab_empty(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
uint32_t bit = (1u << slab_idx);
if (ss->empty_mask & bit) {
ss->empty_mask &= ~bit;
ss->empty_count--;
}
}
// Phase 3d-C: Hot/Cold インデックス更新
// Rebuilds hot_indices[] and cold_indices[] arrays based on current slab state
static inline void ss_update_hot_cold_indices(SuperSlab* ss) {
if (!ss) return;
ss->hot_count = 0;
ss->cold_count = 0;
// Phase 12-1.1: Reset empty tracking
ss->empty_mask = 0;
ss->empty_count = 0;
uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
max_slabs = SLABS_PER_SUPERSLAB_MAX;
}
// Scan active slabs and classify as EMPTY / hot / cold
for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
TinySlabMeta* meta = &ss->slabs[i];
// Skip uninitialized slabs (capacity == 0)
if (meta->capacity == 0) {
continue;
}
// Phase 12-1.1: EMPTY slabs have highest reuse priority
if (ss_is_slab_empty(meta)) {
ss_mark_slab_empty(ss, (int)i);
continue; // Don't add to hot/cold arrays
}
if (ss_is_slab_hot(meta)) {
// Hot slab: high utilization
if (ss->hot_count < 16) {
ss->hot_indices[ss->hot_count++] = (uint8_t)i;
}
} else {
// Cold slab: low utilization
if (ss->cold_count < 16) {
ss->cold_indices[ss->cold_count++] = (uint8_t)i;
}
}
}
}
// Phase 3d-C: SuperSlab初期化時にhot/cold fieldsをゼロクリア
static inline void ss_init_hot_cold(SuperSlab* ss) {
if (!ss) return;
ss->hot_count = 0;
ss->cold_count = 0;
// Phase 12-1.1: Initialize EMPTY tracking
ss->empty_mask = 0;
ss->empty_count = 0;
// Initialize index arrays to 0 (defensive programming)
for (int i = 0; i < 16; i++) {
ss->hot_indices[i] = 0;
ss->cold_indices[i] = 0;
}
}
// ============================================================================
// P2.4: Invariant Verification for Debug Builds
// ============================================================================
//
// Invariant: active + tls_cached ≈ used
//
// - active: blocks currently held by user code
// - tls_cached: blocks cached in TLS SLL (returned by user, not yet pushed to slab freelist)
// - used: total blocks carved from slab and distributed
//
// Due to concurrent updates, exact equality is not guaranteed.
// We allow a small tolerance (delta) for race conditions.
//
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable (disabled by default)
// ============================================================================
// P2.4: Verify slab invariant: active + tls_cached ≈ used
// Returns: true if invariant holds within tolerance, false if violated
// tolerance: maximum allowed deviation (default: 2 for TLS lag)
static inline bool ss_verify_slab_invariant(const TinySlabMeta* meta, int tolerance) {
if (!meta || meta->capacity == 0) return true; // Skip uninitialized slabs
uint16_t used = atomic_load_explicit(&meta->used, memory_order_relaxed);
uint16_t active = atomic_load_explicit(&meta->active, memory_order_relaxed);
uint16_t tls_cached = atomic_load_explicit(&meta->tls_cached, memory_order_relaxed);
int sum = (int)active + (int)tls_cached;
int diff = sum - (int)used;
if (diff < 0) diff = -diff; // abs(diff)
return (diff <= tolerance);
}
// P2.4: Verify all slab invariants in a SuperSlab
// Returns: count of slabs that violate the invariant
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable checking
static inline int ss_verify_superslab_invariants(const SuperSlab* ss, int tolerance) {
static int g_invariant_check = -1;
if (__builtin_expect(g_invariant_check == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_INVARIANT_CHECK");
g_invariant_check = (e && *e && *e != '0') ? 1 : 0;
}
if (!g_invariant_check) return 0; // Disabled by ENV
if (!ss) return 0;
int violations = 0;
uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
max_slabs = SLABS_PER_SUPERSLAB_MAX;
}
for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
const TinySlabMeta* meta = &ss->slabs[i];
if (!ss_verify_slab_invariant(meta, tolerance)) {
violations++;
#ifndef NDEBUG
// Debug output for violations
fprintf(stderr, "[P2.4] Invariant VIOLATION: slab[%u] used=%u active=%u tls_cached=%u (sum=%u)\n",
i, meta->used,
atomic_load_explicit(&meta->active, memory_order_relaxed),
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
atomic_load_explicit(&meta->active, memory_order_relaxed) +
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed));
#endif
}
}
return violations;
}
// P2.4: Debug dump of slab state for troubleshooting
// ENV: HAKMEM_TINY_INVARIANT_DUMP=1 to enable periodic dumps
static inline void ss_dump_slab_state(const SuperSlab* ss, int slab_idx) {
#ifndef NDEBUG
static int g_dump_enabled = -1;
if (__builtin_expect(g_dump_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_INVARIANT_DUMP");
g_dump_enabled = (e && *e && *e != '0') ? 1 : 0;
}
if (!g_dump_enabled) return;
if (!ss || slab_idx < 0 || slab_idx >= (int)ss->active_slabs) return;
const TinySlabMeta* meta = &ss->slabs[slab_idx];
fprintf(stderr, "[P2.4-DUMP] slab[%d]: used=%u active=%u tls_cached=%u capacity=%u class=%u\n",
slab_idx, meta->used,
atomic_load_explicit(&meta->active, memory_order_relaxed),
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
meta->capacity, meta->class_idx);
#else
(void)ss;
(void)slab_idx;
#endif
}
#endif // SS_HOT_COLD_BOX_H