Modularize Warm Pool with 3 Box Refactorings - Phase B-3a Complete
Objective: Clean up warm pool implementation by extracting inline boxes for statistics, carving, and prefill logic. Achieved full modularity with zero performance regression using aggressive inline optimization. Changes: 1. **Legacy Code Removal** (Phase 0) - Removed unused static __thread prefill_attempt_count variable - Cleaned up duplicate comments - Simplified carve failure handling 2. **Warm Pool Statistics Box** (Phase 1) - New file: core/box/warm_pool_stats_box.h - Inline APIs: warm_pool_record_hit/miss/prefilled() - All statistics recording externalized - Integrated into unified_cache.c - Performance: 0 cost (inlined to direct memory write) 3. **Slab Carving Box** (Phase 2) - New file: core/box/slab_carve_box.h - Inline API: slab_carve_from_ss() - Extracted unified_cache_carve_from_ss() function - Now reusable by other refill paths (P0, etc.) - Performance: 100% inlined, O(slabs) scan unchanged 4. **Warm Pool Prefill Box** (Phase 3) - New file: core/box/warm_pool_prefill_box.h - Inline API: warm_pool_do_prefill() - Extracted prefill loop with configurable budget - WARM_POOL_PREFILL_BUDGET = 3 (tunable) - Cold path optimization (only on empty pool) - Performance: Cold path cost (non-critical) Architecture: - core/front/tiny_unified_cache.c now 40+ lines shorter - Logic distributed to 3 well-defined boxes - Each box has single responsibility (SRP) - Inline compilation preserves hot path performance - LTO (-flto) enables cross-file inlining Performance Results: - 1M allocations: 4.099M ops/s (maintained) - 5M allocations: 4.046M ops/s (maintained) - 55.6% warm pool hit rate (unchanged) - Zero regression on throughput - All three boxes fully inlined by compiler Code Quality Improvements: ✅ Removed legacy unused variables ✅ Separated concerns into specialized boxes ✅ Improved readability and maintainability ✅ Preserved performance via aggressive inline ✅ Enabled future reuse (carve box for P0) Testing: ✅ Compilation: No errors ✅ Functionality: 1M and 5M allocation tests pass ✅ Performance: Baseline maintained ✅ Statistics: Output identical to pre-refactor Next Phase: Consider similar modularization for: - Registry scanning (registry_scan_box.h) - TLS management (tls_management_box.h) - Cache operations (unified_cache_policy_box.h) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -10,6 +10,9 @@
|
||||
#include "../box/pagefault_telemetry_box.h" // Phase 24: Box PageFaultTelemetry (Tiny page touch stats)
|
||||
#include "../box/ss_tier_box.h" // For ss_tier_is_hot() tier checks
|
||||
#include "../box/ss_slab_meta_box.h" // For ss_active_add() and slab metadata operations
|
||||
#include "../box/warm_pool_stats_box.h" // Box: Warm Pool Statistics Recording (inline)
|
||||
#include "../box/slab_carve_box.h" // Box: Slab Carving (inline O(slabs) scan)
|
||||
#include "../box/warm_pool_prefill_box.h" // Box: Warm Pool Prefill (secondary optimization)
|
||||
#include "../hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -378,73 +381,9 @@ static inline int unified_refill_validate_base(int class_idx,
|
||||
// Warm Pool Enhanced: Direct carve from warm SuperSlab (bypass superslab_refill)
|
||||
// ============================================================================
|
||||
|
||||
// Helper: Try to carve blocks directly from a SuperSlab (warm pool path)
|
||||
// Returns: Number of blocks produced (0 if failed)
|
||||
static inline int unified_cache_carve_from_ss(int class_idx, SuperSlab* ss,
|
||||
void** out, int max_blocks) {
|
||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) return 0;
|
||||
|
||||
// Find an available slab in this SuperSlab
|
||||
int cap = ss_slabs_capacity(ss);
|
||||
for (int slab_idx = 0; slab_idx < cap; slab_idx++) {
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
|
||||
// Check if this slab matches our class and has capacity
|
||||
if (meta->class_idx != (uint8_t)class_idx) continue;
|
||||
if (meta->used >= meta->capacity && !meta->freelist) continue;
|
||||
|
||||
// Carve blocks from this slab
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||||
int produced = 0;
|
||||
|
||||
while (produced < max_blocks) {
|
||||
void* p = NULL;
|
||||
|
||||
if (meta->freelist) {
|
||||
// Pop from freelist
|
||||
p = meta->freelist;
|
||||
void* next_node = tiny_next_read(class_idx, p);
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
*(uint8_t*)p = (uint8_t)(0xa0 | (class_idx & 0x0f));
|
||||
__atomic_thread_fence(__ATOMIC_RELEASE);
|
||||
#endif
|
||||
|
||||
meta->freelist = next_node;
|
||||
meta->used++;
|
||||
|
||||
} else if (meta->carved < meta->capacity) {
|
||||
// Linear carve
|
||||
p = (void*)(base + ((size_t)meta->carved * bs));
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
*(uint8_t*)p = (uint8_t)(0xa0 | (class_idx & 0x0f));
|
||||
#endif
|
||||
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
|
||||
} else {
|
||||
break; // This slab exhausted
|
||||
}
|
||||
|
||||
if (p) {
|
||||
pagefault_telemetry_touch(class_idx, p);
|
||||
out[produced++] = p;
|
||||
}
|
||||
}
|
||||
|
||||
if (produced > 0) {
|
||||
ss_active_add(ss, (uint32_t)produced);
|
||||
return produced;
|
||||
}
|
||||
}
|
||||
|
||||
return 0; // No suitable slab found in this SuperSlab
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Batch refill from SuperSlab (called on cache miss)
|
||||
// ============================================================================
|
||||
// Returns: BASE pointer (first block, wrapped), or NULL-wrapped if failed
|
||||
// Design: Direct carve from SuperSlab to array (no TLS SLL intermediate layer)
|
||||
// Warm Pool Integration: PRIORITIZE warm pool, use superslab_refill as fallback
|
||||
@ -489,14 +428,18 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
SuperSlab* warm_ss = tiny_warm_pool_pop(class_idx);
|
||||
if (warm_ss) {
|
||||
// HOT PATH: Warm pool hit, try to carve directly
|
||||
produced = unified_cache_carve_from_ss(class_idx, warm_ss, out, room);
|
||||
produced = slab_carve_from_ss(class_idx, warm_ss, out, room);
|
||||
if (produced > 0) {
|
||||
// Update active counter for carved blocks
|
||||
ss_active_add(warm_ss, (uint32_t)produced);
|
||||
}
|
||||
|
||||
if (produced > 0) {
|
||||
// Success! Return SuperSlab to warm pool for next use
|
||||
tiny_warm_pool_push(class_idx, warm_ss);
|
||||
|
||||
// Track warm pool hit (always compiled, ENV-gated printing)
|
||||
g_warm_pool_stats[class_idx].hits++;
|
||||
warm_pool_record_hit(class_idx);
|
||||
|
||||
// Store blocks into cache and return first
|
||||
void* first = out[0];
|
||||
@ -522,49 +465,25 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
// SuperSlab carve failed (produced == 0)
|
||||
// This slab is either exhausted or has no more available capacity
|
||||
// The statistics counter 'prefilled' tracks how often we try to prefill
|
||||
// To improve: implement secondary prefill (scan for more HOT superlslabs)
|
||||
static __thread int prefill_attempt_count = 0;
|
||||
if (produced == 0 && tiny_warm_pool_count(class_idx) == 0) {
|
||||
// Pool is empty and carve failed - prefill would help here
|
||||
g_warm_pool_stats[class_idx].prefilled++;
|
||||
prefill_attempt_count = 0; // Reset counter
|
||||
warm_pool_record_prefilled(class_idx);
|
||||
}
|
||||
}
|
||||
|
||||
// ========== COLD PATH: Warm pool miss, use superslab_refill ==========
|
||||
// Track warm pool miss (always compiled, ENV-gated printing)
|
||||
g_warm_pool_stats[class_idx].misses++;
|
||||
warm_pool_record_miss(class_idx);
|
||||
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
|
||||
// Step 1: Ensure SuperSlab available via normal refill
|
||||
// Enhanced: If pool is empty (just became empty), try prefill
|
||||
// Prefill budget: Load 3 extra superlslabs when pool is empty for better hit rate
|
||||
int pool_prefill_budget = (tiny_warm_pool_count(class_idx) == 0) ? 3 : 1;
|
||||
|
||||
while (pool_prefill_budget > 0) {
|
||||
if (!tls->ss) {
|
||||
if (!superslab_refill(class_idx)) return HAK_BASE_FROM_RAW(NULL);
|
||||
tls = &g_tls_slabs[class_idx]; // Reload after refill
|
||||
}
|
||||
|
||||
// Warm Pool: Cache this SuperSlab for potential future use
|
||||
// This provides locality - same SuperSlab likely to have more available slabs
|
||||
if (tls->ss && tls->ss->magic == SUPERSLAB_MAGIC) {
|
||||
if (pool_prefill_budget > 1) {
|
||||
// Prefill mode: push to warm pool and load another slab
|
||||
tiny_warm_pool_push(class_idx, tls->ss);
|
||||
g_warm_pool_stats[class_idx].prefilled++;
|
||||
tls->ss = NULL; // Force next iteration to refill
|
||||
pool_prefill_budget--;
|
||||
} else {
|
||||
// Final slab: keep for carving, don't push yet
|
||||
pool_prefill_budget = 0;
|
||||
}
|
||||
} else {
|
||||
pool_prefill_budget = 0;
|
||||
}
|
||||
// Enhanced: Use Warm Pool Prefill Box for secondary prefill when pool is empty
|
||||
if (warm_pool_do_prefill(class_idx, tls) < 0) {
|
||||
return HAK_BASE_FROM_RAW(NULL);
|
||||
}
|
||||
// After prefill: tls->ss has the final slab for carving
|
||||
// tls = &g_tls_slabs[class_idx]; // Reload (already done in prefill box)
|
||||
|
||||
// Step 2: Direct carve from SuperSlab into local array (bypass TLS SLL!)
|
||||
TinySlabMeta* m = tls->meta;
|
||||
|
||||
Reference in New Issue
Block a user