2025-11-20 07:44:07 +09:00
|
|
|
|
// ss_hot_cold_box.h - Phase 3d-C: Hot/Cold Slab Split Box
|
|
|
|
|
|
// Purpose: Cache locality optimization via hot/cold slab separation
|
|
|
|
|
|
// License: MIT
|
|
|
|
|
|
// Date: 2025-11-20
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef SS_HOT_COLD_BOX_H
|
|
|
|
|
|
#define SS_HOT_COLD_BOX_H
|
|
|
|
|
|
|
|
|
|
|
|
#include "../superslab/superslab_types.h"
|
|
|
|
|
|
#include <stdbool.h>
|
2025-11-28 13:53:45 +09:00
|
|
|
|
#include <stdlib.h> // P1.3: for getenv()
|
2025-11-28 14:11:37 +09:00
|
|
|
|
#include <stdio.h> // P2.4: for fprintf() in debug output
|
2025-11-20 07:44:07 +09:00
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Phase 3d-C: Hot/Cold Split Box API
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
//
|
|
|
|
|
|
// Goal: Improve L1D cache hit rate by separating hot (high utilization) and
|
|
|
|
|
|
// cold (low utilization) slabs within a SuperSlab.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Strategy:
|
|
|
|
|
|
// - Hot slabs (used > 50%): Prioritized for allocation → better cache locality
|
|
|
|
|
|
// - Cold slabs (used ≤ 50%): Used as fallback → delayed deallocation
|
|
|
|
|
|
//
|
|
|
|
|
|
// Expected: +8-12% throughput from improved cache line locality
|
|
|
|
|
|
//
|
|
|
|
|
|
// Box Contract:
|
|
|
|
|
|
// - ss_is_slab_hot(): Returns true if slab should be considered "hot"
|
|
|
|
|
|
// - ss_update_hot_cold_indices(): Rebuilds hot/cold index arrays
|
|
|
|
|
|
// - ss_init_hot_cold(): Initializes hot/cold fields on SuperSlab creation
|
|
|
|
|
|
//
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 3d-C: Hot/Cold判定閾値
|
|
|
|
|
|
#define HOT_UTILIZATION_THRESHOLD 50 // 使用率50%以上でホット判定
|
|
|
|
|
|
|
2025-11-21 04:56:48 +09:00
|
|
|
|
// Phase 12-1.1: EMPTY判定ロジック(最優先再利用)
|
2025-11-28 13:53:45 +09:00
|
|
|
|
// P1.3: ENV gate for active-based empty detection
|
|
|
|
|
|
// ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
|
|
|
|
|
|
// Returns: true if slab is completely EMPTY (highest reuse priority)
|
2025-11-21 04:56:48 +09:00
|
|
|
|
static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
|
2025-11-28 13:53:45 +09:00
|
|
|
|
if (meta->capacity == 0) return false;
|
|
|
|
|
|
|
|
|
|
|
|
// P1.3: Use active-based empty detection if enabled
|
|
|
|
|
|
static int g_use_active = -1;
|
|
|
|
|
|
if (__builtin_expect(g_use_active == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
|
|
|
|
|
|
g_use_active = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (g_use_active) {
|
|
|
|
|
|
// P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
|
|
|
|
|
|
uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
|
|
|
|
|
|
return (act == 0);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Legacy: used == 0 (doesn't account for TLS SLL)
|
|
|
|
|
|
return (meta->used == 0);
|
|
|
|
|
|
}
|
2025-11-21 04:56:48 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-20 07:44:07 +09:00
|
|
|
|
// Phase 3d-C: Hot判定ロジック
|
|
|
|
|
|
// Returns: true if slab is "hot" (high utilization, should be prioritized)
|
|
|
|
|
|
static inline bool ss_is_slab_hot(const TinySlabMeta* meta) {
|
|
|
|
|
|
// ヒューリスティック: 使用率 > 50% → ホット
|
|
|
|
|
|
// 理由: 使用率が高い = 頻繁にアクセスされている = キャッシュに載せたい
|
|
|
|
|
|
if (meta->capacity == 0) {
|
|
|
|
|
|
return false; // Uninitialized slab
|
|
|
|
|
|
}
|
|
|
|
|
|
return (meta->used * 100 / meta->capacity) > HOT_UTILIZATION_THRESHOLD;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-21 04:56:48 +09:00
|
|
|
|
// Phase 12-1.1: EMPTY mask更新ヘルパー
|
|
|
|
|
|
// Marks a slab as EMPTY (highest reuse priority)
|
|
|
|
|
|
static inline void ss_mark_slab_empty(SuperSlab* ss, int slab_idx) {
|
|
|
|
|
|
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t bit = (1u << slab_idx);
|
|
|
|
|
|
if (!(ss->empty_mask & bit)) {
|
|
|
|
|
|
ss->empty_mask |= bit;
|
|
|
|
|
|
ss->empty_count++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 12-1.1: EMPTY mask クリアヘルパー
|
|
|
|
|
|
// Removes a slab from EMPTY state (when reactivated)
|
|
|
|
|
|
static inline void ss_clear_slab_empty(SuperSlab* ss, int slab_idx) {
|
|
|
|
|
|
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t bit = (1u << slab_idx);
|
|
|
|
|
|
if (ss->empty_mask & bit) {
|
|
|
|
|
|
ss->empty_mask &= ~bit;
|
|
|
|
|
|
ss->empty_count--;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-20 07:44:07 +09:00
|
|
|
|
// Phase 3d-C: Hot/Cold インデックス更新
|
|
|
|
|
|
// Rebuilds hot_indices[] and cold_indices[] arrays based on current slab state
|
|
|
|
|
|
static inline void ss_update_hot_cold_indices(SuperSlab* ss) {
|
|
|
|
|
|
if (!ss) return;
|
|
|
|
|
|
|
|
|
|
|
|
ss->hot_count = 0;
|
|
|
|
|
|
ss->cold_count = 0;
|
2025-11-21 04:56:48 +09:00
|
|
|
|
// Phase 12-1.1: Reset empty tracking
|
|
|
|
|
|
ss->empty_mask = 0;
|
|
|
|
|
|
ss->empty_count = 0;
|
2025-11-20 07:44:07 +09:00
|
|
|
|
|
|
|
|
|
|
uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
|
|
|
|
|
|
if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
|
|
|
|
|
|
max_slabs = SLABS_PER_SUPERSLAB_MAX;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-21 04:56:48 +09:00
|
|
|
|
// Scan active slabs and classify as EMPTY / hot / cold
|
2025-11-20 07:44:07 +09:00
|
|
|
|
for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
|
|
|
|
|
|
TinySlabMeta* meta = &ss->slabs[i];
|
|
|
|
|
|
|
|
|
|
|
|
// Skip uninitialized slabs (capacity == 0)
|
|
|
|
|
|
if (meta->capacity == 0) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-21 04:56:48 +09:00
|
|
|
|
// Phase 12-1.1: EMPTY slabs have highest reuse priority
|
|
|
|
|
|
if (ss_is_slab_empty(meta)) {
|
|
|
|
|
|
ss_mark_slab_empty(ss, (int)i);
|
|
|
|
|
|
continue; // Don't add to hot/cold arrays
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-20 07:44:07 +09:00
|
|
|
|
if (ss_is_slab_hot(meta)) {
|
|
|
|
|
|
// Hot slab: high utilization
|
|
|
|
|
|
if (ss->hot_count < 16) {
|
|
|
|
|
|
ss->hot_indices[ss->hot_count++] = (uint8_t)i;
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Cold slab: low utilization
|
|
|
|
|
|
if (ss->cold_count < 16) {
|
|
|
|
|
|
ss->cold_indices[ss->cold_count++] = (uint8_t)i;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 3d-C: SuperSlab初期化時にhot/cold fieldsをゼロクリア
|
|
|
|
|
|
static inline void ss_init_hot_cold(SuperSlab* ss) {
|
|
|
|
|
|
if (!ss) return;
|
|
|
|
|
|
|
|
|
|
|
|
ss->hot_count = 0;
|
|
|
|
|
|
ss->cold_count = 0;
|
2025-11-21 04:56:48 +09:00
|
|
|
|
// Phase 12-1.1: Initialize EMPTY tracking
|
|
|
|
|
|
ss->empty_mask = 0;
|
|
|
|
|
|
ss->empty_count = 0;
|
2025-11-20 07:44:07 +09:00
|
|
|
|
|
|
|
|
|
|
// Initialize index arrays to 0 (defensive programming)
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
|
|
ss->hot_indices[i] = 0;
|
|
|
|
|
|
ss->cold_indices[i] = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-28 14:11:37 +09:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// P2.4: Invariant Verification for Debug Builds
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
//
|
|
|
|
|
|
// Invariant: active + tls_cached ≈ used
|
|
|
|
|
|
//
|
|
|
|
|
|
// - active: blocks currently held by user code
|
|
|
|
|
|
// - tls_cached: blocks cached in TLS SLL (returned by user, not yet pushed to slab freelist)
|
|
|
|
|
|
// - used: total blocks carved from slab and distributed
|
|
|
|
|
|
//
|
|
|
|
|
|
// Due to concurrent updates, exact equality is not guaranteed.
|
|
|
|
|
|
// We allow a small tolerance (delta) for race conditions.
|
|
|
|
|
|
//
|
|
|
|
|
|
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable (disabled by default)
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
// P2.4: Verify slab invariant: active + tls_cached ≈ used
|
|
|
|
|
|
// Returns: true if invariant holds within tolerance, false if violated
|
|
|
|
|
|
// tolerance: maximum allowed deviation (default: 2 for TLS lag)
|
|
|
|
|
|
static inline bool ss_verify_slab_invariant(const TinySlabMeta* meta, int tolerance) {
|
|
|
|
|
|
if (!meta || meta->capacity == 0) return true; // Skip uninitialized slabs
|
|
|
|
|
|
|
|
|
|
|
|
uint16_t used = atomic_load_explicit(&meta->used, memory_order_relaxed);
|
|
|
|
|
|
uint16_t active = atomic_load_explicit(&meta->active, memory_order_relaxed);
|
|
|
|
|
|
uint16_t tls_cached = atomic_load_explicit(&meta->tls_cached, memory_order_relaxed);
|
|
|
|
|
|
|
|
|
|
|
|
int sum = (int)active + (int)tls_cached;
|
|
|
|
|
|
int diff = sum - (int)used;
|
|
|
|
|
|
if (diff < 0) diff = -diff; // abs(diff)
|
|
|
|
|
|
|
|
|
|
|
|
return (diff <= tolerance);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// P2.4: Verify all slab invariants in a SuperSlab
|
|
|
|
|
|
// Returns: count of slabs that violate the invariant
|
|
|
|
|
|
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable checking
|
|
|
|
|
|
static inline int ss_verify_superslab_invariants(const SuperSlab* ss, int tolerance) {
|
|
|
|
|
|
static int g_invariant_check = -1;
|
|
|
|
|
|
if (__builtin_expect(g_invariant_check == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_INVARIANT_CHECK");
|
|
|
|
|
|
g_invariant_check = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!g_invariant_check) return 0; // Disabled by ENV
|
|
|
|
|
|
if (!ss) return 0;
|
|
|
|
|
|
|
|
|
|
|
|
int violations = 0;
|
|
|
|
|
|
uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
|
|
|
|
|
|
if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
|
|
|
|
|
|
max_slabs = SLABS_PER_SUPERSLAB_MAX;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
|
|
|
|
|
|
const TinySlabMeta* meta = &ss->slabs[i];
|
|
|
|
|
|
if (!ss_verify_slab_invariant(meta, tolerance)) {
|
|
|
|
|
|
violations++;
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
|
// Debug output for violations
|
|
|
|
|
|
fprintf(stderr, "[P2.4] Invariant VIOLATION: slab[%u] used=%u active=%u tls_cached=%u (sum=%u)\n",
|
|
|
|
|
|
i, meta->used,
|
|
|
|
|
|
atomic_load_explicit(&meta->active, memory_order_relaxed),
|
|
|
|
|
|
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
|
|
|
|
|
|
atomic_load_explicit(&meta->active, memory_order_relaxed) +
|
|
|
|
|
|
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed));
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return violations;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// P2.4: Debug dump of slab state for troubleshooting
|
|
|
|
|
|
// ENV: HAKMEM_TINY_INVARIANT_DUMP=1 to enable periodic dumps
|
|
|
|
|
|
static inline void ss_dump_slab_state(const SuperSlab* ss, int slab_idx) {
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
|
static int g_dump_enabled = -1;
|
|
|
|
|
|
if (__builtin_expect(g_dump_enabled == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_INVARIANT_DUMP");
|
|
|
|
|
|
g_dump_enabled = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!g_dump_enabled) return;
|
|
|
|
|
|
if (!ss || slab_idx < 0 || slab_idx >= (int)ss->active_slabs) return;
|
|
|
|
|
|
|
|
|
|
|
|
const TinySlabMeta* meta = &ss->slabs[slab_idx];
|
|
|
|
|
|
fprintf(stderr, "[P2.4-DUMP] slab[%d]: used=%u active=%u tls_cached=%u capacity=%u class=%u\n",
|
|
|
|
|
|
slab_idx, meta->used,
|
|
|
|
|
|
atomic_load_explicit(&meta->active, memory_order_relaxed),
|
|
|
|
|
|
atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
|
|
|
|
|
|
meta->capacity, meta->class_idx);
|
|
|
|
|
|
#else
|
|
|
|
|
|
(void)ss;
|
|
|
|
|
|
(void)slab_idx;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-20 07:44:07 +09:00
|
|
|
|
#endif // SS_HOT_COLD_BOX_H
|