Files
hakmem/core/box/ss_hot_cold_box.h
Moe Charm (CI) 6b86c60a20 P1.3: Add meta->active for TLS SLL tracking
Add active field to TinySlabMeta to track blocks currently held by
users (not in TLS SLL or freelist caches). This enables accurate
empty slab detection that accounts for TLS SLL cached blocks.

Changes:
- superslab_types.h: Add _Atomic uint16_t active field
- ss_allocation_box.c, hakmem_tiny_superslab.c: Initialize active=0
- tiny_free_fast_v2.inc.h: Decrement active on TLS SLL push
- tiny_alloc_fast.inc.h: Add tiny_active_track_alloc() helper,
  increment active on TLS SLL pop (all code paths)
- ss_hot_cold_box.h: ss_is_slab_empty() uses active when enabled

All tracking is ENV-gated: HAKMEM_TINY_ACTIVE_TRACK=1 to enable.
Default is off for zero performance impact.

Invariant: active = used - tls_cached (active <= used)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 13:53:45 +09:00

158 lines
5.3 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ss_hot_cold_box.h - Phase 3d-C: Hot/Cold Slab Split Box
// Purpose: Cache locality optimization via hot/cold slab separation
// License: MIT
// Date: 2025-11-20
#ifndef SS_HOT_COLD_BOX_H
#define SS_HOT_COLD_BOX_H
#include "../superslab/superslab_types.h"
#include <stdbool.h>
#include <stdlib.h> // P1.3: for getenv()
// ============================================================================
// Phase 3d-C: Hot/Cold Split Box API
// ============================================================================
//
// Goal: Improve L1D cache hit rate by separating hot (high utilization) and
// cold (low utilization) slabs within a SuperSlab.
//
// Strategy:
// - Hot slabs (used > 50%): Prioritized for allocation → better cache locality
// - Cold slabs (used ≤ 50%): Used as fallback → delayed deallocation
//
// Expected: +8-12% throughput from improved cache line locality
//
// Box Contract:
// - ss_is_slab_hot(): Returns true if slab should be considered "hot"
// - ss_update_hot_cold_indices(): Rebuilds hot/cold index arrays
// - ss_init_hot_cold(): Initializes hot/cold fields on SuperSlab creation
//
// ============================================================================
// Phase 3d-C: Hot/Cold判定閾値
#define HOT_UTILIZATION_THRESHOLD 50 // 使用率50%以上でホット判定
// Phase 12-1.1: EMPTY判定ロジック最優先再利用
// P1.3: ENV gate for active-based empty detection
// ENV: HAKMEM_TINY_ACTIVE_TRACK=1 → use active, else use used
// Returns: true if slab is completely EMPTY (highest reuse priority)
static inline bool ss_is_slab_empty(const TinySlabMeta* meta) {
if (meta->capacity == 0) return false;
// P1.3: Use active-based empty detection if enabled
static int g_use_active = -1;
if (__builtin_expect(g_use_active == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ACTIVE_TRACK");
g_use_active = (e && *e && *e != '0') ? 1 : 0;
}
if (g_use_active) {
// P1.3: active == 0 means all blocks returned by user (even if some in TLS SLL)
uint16_t act = atomic_load_explicit(&meta->active, memory_order_relaxed);
return (act == 0);
} else {
// Legacy: used == 0 (doesn't account for TLS SLL)
return (meta->used == 0);
}
}
// Phase 3d-C: Hot判定ロジック
// Returns: true if slab is "hot" (high utilization, should be prioritized)
static inline bool ss_is_slab_hot(const TinySlabMeta* meta) {
// ヒューリスティック: 使用率 > 50% → ホット
// 理由: 使用率が高い = 頻繁にアクセスされている = キャッシュに載せたい
if (meta->capacity == 0) {
return false; // Uninitialized slab
}
return (meta->used * 100 / meta->capacity) > HOT_UTILIZATION_THRESHOLD;
}
// Phase 12-1.1: EMPTY mask更新ヘルパー
// Marks a slab as EMPTY (highest reuse priority)
static inline void ss_mark_slab_empty(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
uint32_t bit = (1u << slab_idx);
if (!(ss->empty_mask & bit)) {
ss->empty_mask |= bit;
ss->empty_count++;
}
}
// Phase 12-1.1: EMPTY mask クリアヘルパー
// Removes a slab from EMPTY state (when reactivated)
static inline void ss_clear_slab_empty(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) return;
uint32_t bit = (1u << slab_idx);
if (ss->empty_mask & bit) {
ss->empty_mask &= ~bit;
ss->empty_count--;
}
}
// Phase 3d-C: Hot/Cold インデックス更新
// Rebuilds hot_indices[] and cold_indices[] arrays based on current slab state
static inline void ss_update_hot_cold_indices(SuperSlab* ss) {
if (!ss) return;
ss->hot_count = 0;
ss->cold_count = 0;
// Phase 12-1.1: Reset empty tracking
ss->empty_mask = 0;
ss->empty_count = 0;
uint32_t max_slabs = (1u << ss->lg_size) / SLAB_SIZE;
if (max_slabs > SLABS_PER_SUPERSLAB_MAX) {
max_slabs = SLABS_PER_SUPERSLAB_MAX;
}
// Scan active slabs and classify as EMPTY / hot / cold
for (uint32_t i = 0; i < max_slabs && i < ss->active_slabs; i++) {
TinySlabMeta* meta = &ss->slabs[i];
// Skip uninitialized slabs (capacity == 0)
if (meta->capacity == 0) {
continue;
}
// Phase 12-1.1: EMPTY slabs have highest reuse priority
if (ss_is_slab_empty(meta)) {
ss_mark_slab_empty(ss, (int)i);
continue; // Don't add to hot/cold arrays
}
if (ss_is_slab_hot(meta)) {
// Hot slab: high utilization
if (ss->hot_count < 16) {
ss->hot_indices[ss->hot_count++] = (uint8_t)i;
}
} else {
// Cold slab: low utilization
if (ss->cold_count < 16) {
ss->cold_indices[ss->cold_count++] = (uint8_t)i;
}
}
}
}
// Phase 3d-C: SuperSlab初期化時にhot/cold fieldsをゼロクリア
static inline void ss_init_hot_cold(SuperSlab* ss) {
if (!ss) return;
ss->hot_count = 0;
ss->cold_count = 0;
// Phase 12-1.1: Initialize EMPTY tracking
ss->empty_mask = 0;
ss->empty_count = 0;
// Initialize index arrays to 0 (defensive programming)
for (int i = 0; i < 16; i++) {
ss->hot_indices[i] = 0;
ss->cold_indices[i] = 0;
}
}
#endif // SS_HOT_COLD_BOX_H