hakmem/core/box/ss_hot_cold_box.h

// ss_hot_cold_box.h - Phase 3d-C: Hot/Cold Slab Split Box
// Purpose: Cache locality optimization via hot/cold slab separation
// License: MIT
// Date: 2025-11-20

#ifndef SS_HOT_COLD_BOX_H
#define SS_HOT_COLD_BOX_H

#include "../superslab/superslab_types.h"
#include <stdbool.h>
#include <stdlib.h>  // P1.3: for getenv()
#include <stdio.h>   // P2.4: for fprintf() in debug output

// ============================================================================
// Phase 3d-C: Hot/Cold Split Box API
// ============================================================================
//
// Goal: Improve L1D cache hit rate by separating hot (high utilization) and
//       cold (low utilization) slabs within a SuperSlab.
//
// Strategy:
// - Hot slabs (used > 50%): Prioritized for allocation → better cache locality
// - Cold slabs (used ≤ 50%): Used as fallback → delayed deallocation
//
// Expected: +8-12% throughput from improved cache line locality
//
// Box Contract:
// - ss_is_slab_hot(): Returns true if slab should be considered "hot"
// - ss_update_hot_cold_indices(): Rebuilds hot/cold index arrays
// - ss_init_hot_cold(): Initializes hot/cold fields on SuperSlab creation
//
// ============================================================================

// Phase 3d-C: Hot/Cold判定閾値
#define HOT_UTILIZATION_THRESHOLD 50  // 使用率50%以上でホット判定

// Phase 12-1.1: EMPTY判定ロジック（最優先再利用）
// P1.3: ENV gate for active-based empty detection
// ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
// Returns: true if slab is completely EMPTY (highest reuse priority)
static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
    if (meta->capacity == 0) return false;

    // P1.3: Use active-based empty detection if enabled
    static int g_use_active = -1;
    if (__builtin_expect(g_use_active == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
        g_use_active = (e && *e && *e != '0') ? 1 : 0;
    }

    if (g_use_active) {
        // P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
        uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
        return (act == 0);
    } else {
        // Legacy: used == 0 (doesn't account for TLS SLL)
        return (meta->used == 0);
    }
}

// Phase 3d-C: Hot判定ロジック
// Returns: true if slab is "hot" (high utilization, should be prioritized)
static inline bool ss_is_slab_hot(const TinySlabMeta* meta) {
    // ヒューリスティック: 使用率 > 50% → ホット
    // 理由: 使用率が高い = 頻繁にアクセスされている = キャッシュに載せたい
    if (meta->capacity == 0) {
        return false;  // Uninitialized slab
    }
    return (meta->used * 100 / meta->capacity) > HOT_UTILIZATION_THRESHOLD;
}

// Phase 12-1.1: EMPTY mask更新ヘルパー
// Marks a slab as EMPTY (highest reuse priority)
static inline void ss_mark_slab_empty(SuperSlab* ss, int slab_idx) {
    if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;

    uint32_t bit = (1u << slab_idx);
    if (!(ss->empty_mask & bit)) {
        ss->empty_mask |= bit;
        ss->empty_count++;
    }
}

// Phase 12-1.1: EMPTY mask クリアヘルパー
// Removes a slab from EMPTY state (when reactivated)
static inline void ss_clear_slab_empty(SuperSlab* ss, int slab_idx) {
    if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;

    uint32_t bit = (1u << slab_idx);
    if (ss->empty_mask & bit) {
        ss->empty_mask &= ~bit;
        ss->empty_count--;
    }
}

// Phase 3d-C: Hot/Cold インデックス更新
// Rebuilds hot_indices[] and cold_indices[] arrays based on current slab state
static inline void ss_update_hot_cold_indices(SuperSlab* ss) {
    if (!ss) return;

    ss->hot_count = 0;
    ss->cold_count = 0;
    // Phase 12-1.1: Reset empty tracking
    ss->empty_mask = 0;
    ss->empty_count = 0;

    uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
    if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
        max_slabs = SLABS_PER_SUPERSLAB_MAX;
    }

    // Scan active slabs and classify as EMPTY / hot / cold
    for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
        TinySlabMeta* meta = &ss->slabs[i];

        // Skip uninitialized slabs (capacity == 0)
        if (meta->capacity == 0) {
            continue;
        }

        // Phase 12-1.1: EMPTY slabs have highest reuse priority
        if (ss_is_slab_empty(meta)) {
            ss_mark_slab_empty(ss, (int)i);
            continue;  // Don't add to hot/cold arrays
        }

        if (ss_is_slab_hot(meta)) {
            // Hot slab: high utilization
            if (ss->hot_count < 16) {
                ss->hot_indices[ss->hot_count++] = (uint8_t)i;
            }
        } else {
            // Cold slab: low utilization
            if (ss->cold_count < 16) {
                ss->cold_indices[ss->cold_count++] = (uint8_t)i;
            }
        }
    }
}

// Phase 3d-C: SuperSlab初期化時にhot/cold fieldsをゼロクリア
static inline void ss_init_hot_cold(SuperSlab* ss) {
    if (!ss) return;

    ss->hot_count = 0;
    ss->cold_count = 0;
    // Phase 12-1.1: Initialize EMPTY tracking
    ss->empty_mask = 0;
    ss->empty_count = 0;

    // Initialize index arrays to 0 (defensive programming)
    for (int i = 0; i < 16; i++) {
        ss->hot_indices[i] = 0;
        ss->cold_indices[i] = 0;
    }
}

// ============================================================================
// P2.4: Invariant Verification for Debug Builds
// ============================================================================
//
// Invariant: active + tls_cached ≈ used
//
// - active: blocks currently held by user code
// - tls_cached: blocks cached in TLS SLL (returned by user, not yet pushed to slab freelist)
// - used: total blocks carved from slab and distributed
//
// Due to concurrent updates, exact equality is not guaranteed.
// We allow a small tolerance (delta) for race conditions.
//
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable (disabled by default)
// ============================================================================

// P2.4: Verify slab invariant: active + tls_cached ≈ used
// Returns: true if invariant holds within tolerance, false if violated
// tolerance: maximum allowed deviation (default: 2 for TLS lag)
static inline bool ss_verify_slab_invariant(const TinySlabMeta* meta, int tolerance) {
    if (!meta || meta->capacity == 0) return true;  // Skip uninitialized slabs

    uint16_t used = atomic_load_explicit(&meta->used, memory_order_relaxed);
    uint16_t active = atomic_load_explicit(&meta->active, memory_order_relaxed);
    uint16_t tls_cached = atomic_load_explicit(&meta->tls_cached, memory_order_relaxed);

    int sum = (int)active + (int)tls_cached;
    int diff = sum - (int)used;
    if (diff < 0) diff = -diff;  // abs(diff)

    return (diff <= tolerance);
}

// P2.4: Verify all slab invariants in a SuperSlab
// Returns: count of slabs that violate the invariant
// ENV: HAKMEM_TINY_INVARIANT_CHECK=1 to enable checking
static inline int ss_verify_superslab_invariants(const SuperSlab* ss, int tolerance) {
    static int g_invariant_check = -1;
    if (__builtin_expect(g_invariant_check == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_INVARIANT_CHECK");
        g_invariant_check = (e && *e && *e != '0') ? 1 : 0;
    }

    if (!g_invariant_check) return 0;  // Disabled by ENV
    if (!ss) return 0;

    int violations = 0;
    uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
    if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
        max_slabs = SLABS_PER_SUPERSLAB_MAX;
    }

    for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
        const TinySlabMeta* meta = &ss->slabs[i];
        if (!ss_verify_slab_invariant(meta, tolerance)) {
            violations++;
#ifndef NDEBUG
            // Debug output for violations
            fprintf(stderr, "[P2.4] Invariant VIOLATION: slab[%u] used=%u active=%u tls_cached=%u (sum=%u)\n",
                    i, meta->used,
                    atomic_load_explicit(&meta->active, memory_order_relaxed),
                    atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
                    atomic_load_explicit(&meta->active, memory_order_relaxed) +
                    atomic_load_explicit(&meta->tls_cached, memory_order_relaxed));
#endif
        }
    }

    return violations;
}

// P2.4: Debug dump of slab state for troubleshooting
// ENV: HAKMEM_TINY_INVARIANT_DUMP=1 to enable periodic dumps
static inline void ss_dump_slab_state(const SuperSlab* ss, int slab_idx) {
#ifndef NDEBUG
    static int g_dump_enabled = -1;
    if (__builtin_expect(g_dump_enabled == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_INVARIANT_DUMP");
        g_dump_enabled = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!g_dump_enabled) return;
    if (!ss || slab_idx < 0 || slab_idx >= (int)ss->active_slabs) return;

    const TinySlabMeta* meta = &ss->slabs[slab_idx];
    fprintf(stderr, "[P2.4-DUMP] slab[%d]: used=%u active=%u tls_cached=%u capacity=%u class=%u\n",
            slab_idx, meta->used,
            atomic_load_explicit(&meta->active, memory_order_relaxed),
            atomic_load_explicit(&meta->tls_cached, memory_order_relaxed),
            meta->capacity, meta->class_idx);
#else
    (void)ss;
    (void)slab_idx;
#endif
}

#endif // SS_HOT_COLD_BOX_H