// ss_hot_cold_box.h - Phase 3d-C: Hot/Cold Slab Split Box // Purpose: Cache locality optimization via hot/cold slab separation // License: MIT // Date: 2025-11-20 #ifndef SS_HOT_COLD_BOX_H #define SS_HOT_COLD_BOX_H #include "../superslab/superslab_types.h" #include #include // P1.3: for getenv() #include // P2.4: for fprintf() in debug output // ============================================================================ // Phase 3d-C: Hot/Cold Split Box API // ============================================================================ // // Goal: Improve L1D cache hit rate by separating hot (high utilization) and // cold (low utilization) slabs within a SuperSlab. // // Strategy: // - Hot slabs (used > 50%): Prioritized for allocation → better cache locality // - Cold slabs (used ≤ 50%): Used as fallback → delayed deallocation // // Expected: +8-12% throughput from improved cache line locality // // Box Contract: // - ss_is_slab_hot(): Returns true if slab should be considered "hot" // - ss_update_hot_cold_indices(): Rebuilds hot/cold index arrays // - ss_init_hot_cold(): Initializes hot/cold fields on SuperSlab creation // // ============================================================================ // Phase 3d-C: Hot/Cold判定閾値 #define HOT_UTILIZATION_THRESHOLD 50 // 使用率50%以上でホット判定 // Phase 12-1.1: EMPTY判定ロジック(最優先再利用) // P1.3: ENV gate for active-based empty detection // ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used // Returns: true if slab is completely EMPTY (highest reuse priority) static inline bool ss_is_slab_empty(const TinySlabMeta* meta) { if (meta->capacity == 0) return false; // P1.3: Use active-based empty detection if enabled static int g_use_active = -1; if (__builtin_expect(g_use_active == -1, 0)) { const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK"); g_use_active = (e && *e && *e != '0') ? 1 : 0; } if (g_use_active) { // P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL) uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed); return (act == 0); } else { // Legacy: used == 0 (doesn't account for TLS SLL) return (meta->used == 0); } } // Phase 3d-C: Hot判定ロジック // Returns: true if slab is "hot" (high utilization, should be prioritized) static inline bool ss_is_slab_hot(const TinySlabMeta* meta) { // ヒューリスティック: 使用率 > 50% → ホット // 理由: 使用率が高い = 頻繁にアクセスされている = キャッシュに載せたい if (meta->capacity == 0) { return false; // Uninitialized slab } return (meta->used * 100 / meta->capacity) > HOT_UTILIZATION_THRESHOLD; } // Phase 12-1.1: EMPTY mask更新ヘルパー // Marks a slab as EMPTY (highest reuse priority) static inline void ss_mark_slab_empty(SuperSlab* ss, int slab_idx) { if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return; uint32_t bit = (1u << slab_idx); if (!(ss->empty_mask & bit)) { ss->empty_mask |= bit; ss->empty_count++; } } // Phase 12-1.1: EMPTY mask クリアヘルパー // Removes a slab from EMPTY state (when reactivated) static inline void ss_clear_slab_empty(SuperSlab* ss, int slab_idx) { if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return; uint32_t bit = (1u << slab_idx); if (ss->empty_mask & bit) { ss->empty_mask &= ~bit; ss->empty_count--; } } // Phase 3d-C: Hot/Cold インデックス更新 // Rebuilds hot_indices[] and cold_indices[] arrays based on current slab state static inline void ss_update_hot_cold_indices(SuperSlab* ss) { if (!ss) return; ss->hot_count = 0; ss->cold_count = 0; // Phase 12-1.1: Reset empty tracking ss->empty_mask = 0; ss->empty_count = 0; uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE; if (max_slabs > SLABS_PER_SUPERSLAB_MAX) { max_slabs = SLABS_PER_SUPERSLAB_MAX; } // Scan active slabs and classify as EMPTY / hot / cold for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) { TinySlabMeta* meta = &ss->slabs[i]; // Skip uninitialized slabs (capacity == 0) if (meta->capacity == 0) { continue; } // Phase 12-1.1: EMPTY slabs have highest reuse priority if (ss_is_slab_empty(meta)) { ss_mark_slab_empty(ss, (int)i); continue; // Don't add to hot/cold arrays } if (ss_is_slab_hot(meta)) { // Hot slab: high utilization if (ss->hot_count < 16) { ss->hot_indices[ss->hot_count++] = (uint8_t)i; } } else { // Cold slab: low utilization if (ss->cold_count < 16) { ss->cold_indices[ss->cold_count++] = (uint8_t)i; } } } } // Phase 3d-C: SuperSlab初期化時にhot/cold fieldsをゼロクリア static inline void ss_init_hot_cold(SuperSlab* ss) { if (!ss) return; ss->hot_count = 0; ss->cold_count = 0; // Phase 12-1.1: Initialize EMPTY tracking ss->empty_mask = 0; ss->empty_count = 0; // Initialize index arrays to 0 (defensive programming) for (int i = 0; i < 16; i++) { ss->hot_indices[i] = 0; ss->cold_indices[i] = 0; } } // ============================================================================ // P2.4: Invariant Verification for Debug Builds // ============================================================================ // // Invariant: active + tls_cached ≈ used // // - active: blocks currently held by user code // - tls_cached: blocks cached in TLS SLL (returned by user, not yet pushed to slab freelist) // - used: total blocks carved from slab and distributed // // Due to concurrent updates, exact equality is not guaranteed. // We allow a small tolerance (delta) for race conditions. // // ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable (disabled by default) // ============================================================================ // P2.4: Verify slab invariant: active + tls_cached ≈ used // Returns: true if invariant holds within tolerance, false if violated // tolerance: maximum allowed deviation (default: 2 for TLS lag) static inline bool ss_verify_slab_invariant(const TinySlabMeta* meta, int tolerance) { if (!meta || meta->capacity == 0) return true; // Skip uninitialized slabs uint16_t used = atomic_load_explicit(&meta->used, memory_order_relaxed); uint16_t active = atomic_load_explicit(&meta->active, memory_order_relaxed); uint16_t tls_cached = atomic_load_explicit(&meta->tls_cached, memory_order_relaxed); int sum = (int)active + (int)tls_cached; int diff = sum - (int)used; if (diff < 0) diff = -diff; // abs(diff) return (diff <= tolerance); } // P2.4: Verify all slab invariants in a SuperSlab // Returns: count of slabs that violate the invariant // ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable checking static inline int ss_verify_superslab_invariants(const SuperSlab* ss, int tolerance) { static int g_invariant_check = -1; if (__builtin_expect(g_invariant_check == -1, 0)) { const char* e = getenv("HAKMEM_TINY_INVARIANT_CHECK"); g_invariant_check = (e && *e && *e != '0') ? 1 : 0; } if (!g_invariant_check) return 0; // Disabled by ENV if (!ss) return 0; int violations = 0; uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE; if (max_slabs > SLABS_PER_SUPERSLAB_MAX) { max_slabs = SLABS_PER_SUPERSLAB_MAX; } for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) { const TinySlabMeta* meta = &ss->slabs[i]; if (!ss_verify_slab_invariant(meta, tolerance)) { violations++; #ifndef NDEBUG // Debug output for violations fprintf(stderr, "[P2.4] Invariant VIOLATION: slab[%u] used=%u active=%u tls_cached=%u (sum=%u)\n", i, meta->used, atomic_load_explicit(&meta->active, memory_order_relaxed), atomic_load_explicit(&meta->tls_cached, memory_order_relaxed), atomic_load_explicit(&meta->active, memory_order_relaxed) + atomic_load_explicit(&meta->tls_cached, memory_order_relaxed)); #endif } } return violations; } // P2.4: Debug dump of slab state for troubleshooting // ENV: HAKMEM_TINY_INVARIANT_DUMP=1 to enable periodic dumps static inline void ss_dump_slab_state(const SuperSlab* ss, int slab_idx) { #ifndef NDEBUG static int g_dump_enabled = -1; if (__builtin_expect(g_dump_enabled == -1, 0)) { const char* e = getenv("HAKMEM_TINY_INVARIANT_DUMP"); g_dump_enabled = (e && *e && *e != '0') ? 1 : 0; } if (!g_dump_enabled) return; if (!ss || slab_idx < 0 || slab_idx >= (int)ss->active_slabs) return; const TinySlabMeta* meta = &ss->slabs[slab_idx]; fprintf(stderr, "[P2.4-DUMP] slab[%d]: used=%u active=%u tls_cached=%u capacity=%u class=%u\n", slab_idx, meta->used, atomic_load_explicit(&meta->active, memory_order_relaxed), atomic_load_explicit(&meta->tls_cached, memory_order_relaxed), meta->capacity, meta->class_idx); #else (void)ss; (void)slab_idx; #endif } #endif // SS_HOT_COLD_BOX_H