Files
hakmem/core/tiny_alloc_fast_sfc.inc.h
Moe Charm (CI) 543abb0586 ENV cleanup: Consolidate SFC_DEBUG getenv() calls (86% reduction)
Optimized HAKMEM_SFC_DEBUG environment variable handling by caching
the value at initialization instead of repeated getenv() calls in
hot paths.

Changes:
1. Added g_sfc_debug global variable (core/hakmem_tiny_sfc.c)
   - Initialized once in sfc_init() by reading HAKMEM_SFC_DEBUG
   - Single source of truth for SFC debug state

2. Declared g_sfc_debug as extern (core/hakmem_tiny_config.h)
   - Available to all modules that need SFC debug checks

3. Replaced getenv() with g_sfc_debug in hot paths:
   - core/tiny_alloc_fast_sfc.inc.h (allocation path)
   - core/tiny_free_fast.inc.h (free path)
   - core/box/hak_wrappers.inc.h (wrapper layer)

Impact:
- getenv() calls: 7 → 1 (86% reduction)
- Hot-path calls eliminated: 6 (all moved to init-time)
- Performance: 15.10M ops/s (stable, 0% CV)
- Build: Clean compilation, no new warnings

Testing:
- 10 runs of 100K iterations: consistent performance
- Symbol verification: g_sfc_debug present in hakmem_tiny_sfc.o
- No regression detected

Note: 3 additional getenv("HAKMEM_SFC_DEBUG") calls exist in
hakmem_tiny_ultra_simple.inc but are dead code (file not compiled
in current build configuration).

Files modified: 5 core files
Status: Production-ready, all tests passed

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 03:18:33 +09:00

221 lines
8.4 KiB
C

// tiny_alloc_fast_sfc.inc.h - Box 5-NEW: Super Front Cache (SFC)
// Purpose: Ultra-fast TLS cache with 128-256 slots (vs old 16 slots)
// Performance: 3-4 instruction fast path, 95%+ hit rate, refill -98.5%
// Box Theory: Clear ownership, same-thread only, A/B testable
#pragma once
#include <stdint.h>
#include <stddef.h>
#include <stdio.h> // For debug output (getenv, fprintf, stderr)
#include <stdlib.h> // For getenv
#include "hakmem_tiny.h"
#include "box/tiny_next_ptr_box.h" // Phase E1-CORRECT: unified next pointer API
// ============================================================================
// Box 5-NEW: Super Front Cache - Global Config
// ============================================================================
// Default capacities (can be overridden per-class)
#define SFC_DEFAULT_CAPACITY 256
#define SFC_DEFAULT_REFILL_COUNT 128
#define SFC_DEFAULT_SPILL_THRESH 90 // Spill when >90% full
// Per-class capacity limits
#define SFC_MIN_CAPACITY 16
#define SFC_MAX_CAPACITY 256
// ============================================================================
// Box 5-NEW: Super Front Cache - TLS Data Structures
// ============================================================================
// TLS arrays (one per class, zero-initialized at thread start)
extern __thread void* g_sfc_head[TINY_NUM_CLASSES]; // Head of linked list
extern __thread uint32_t g_sfc_count[TINY_NUM_CLASSES]; // Current count
extern uint32_t g_sfc_capacity[TINY_NUM_CLASSES]; // Target capacity (shared, read-only)
// ============================================================================
// Box 5-NEW: Super Front Cache - Statistics (optional, compile-time gated)
// ============================================================================
#if HAKMEM_DEBUG_COUNTERS
typedef struct {
// Fast path metrics
uint64_t alloc_hits; // Fast path hits (3-4 inst)
uint64_t alloc_misses; // Fast path misses → refill
uint64_t free_hits; // Push success
uint64_t free_full; // Push failed (full) → spill
// Slow path metrics
uint64_t refill_calls; // Refill invocations
uint64_t refill_blocks; // Total blocks refilled
uint64_t spill_calls; // Spill invocations
uint64_t spill_blocks; // Total blocks spilled
// Learning metrics (Phase 3)
uint64_t alloc_window; // Allocs in current window
uint64_t miss_window; // Misses in current window
double miss_rate; // miss_window / alloc_window
int hotness; // 0=cold, 1=warm, 2=hot, 3=scorching
// Adaptive config (Phase 3)
int adaptive_capacity; // Current capacity (16-256)
int adaptive_refill; // Current refill count (16-128)
} sfc_stats_t;
extern sfc_stats_t g_sfc_stats[TINY_NUM_CLASSES];
#endif // HAKMEM_DEBUG_COUNTERS
// ============================================================================
// Box 5-NEW: Super Front Cache - Fast Path (3-4 instructions)
// ============================================================================
// Alloc: Pop from SFC cache (inline for zero-cost)
// Returns: pointer on success, NULL on miss
// Contract: Caller owns returned pointer
// Invariants: count ≥ 0, all pointers belong to correct class
static inline void* sfc_alloc(int cls) {
void* base = g_sfc_head[cls];
if (__builtin_expect(base != NULL, 1)) {
// Phase E1-CORRECT: Use Box API for next pointer read
g_sfc_head[cls] = tiny_next_read(cls, base);
g_sfc_count[cls]--; // count--
#if HAKMEM_DEBUG_COUNTERS
g_sfc_stats[cls].alloc_hits++;
#endif
return base; // 🚀 SFC HIT! (returns base)
}
#if HAKMEM_DEBUG_COUNTERS
g_sfc_stats[cls].alloc_misses++;
#endif
return NULL; // Miss → caller should refill
}
// Free: Push to SFC cache (inline for zero-cost)
// Returns: 1 on success (cached), 0 on full (caller should spill)
// Contract: ptr must belong to cls, same-thread only
// Invariants: count ≤ capacity, linked list integrity
static inline int sfc_free_push(int cls, void* ptr) {
uint32_t cap = g_sfc_capacity[cls];
uint32_t cnt = g_sfc_count[cls];
#if !HAKMEM_BUILD_RELEASE && defined(HAKMEM_SFC_DEBUG_LOG)
// Debug logging (compile-time gated; zero cost in release)
do {
static __thread int free_debug_count = 0;
extern int g_sfc_debug;
if (__builtin_expect(g_sfc_debug, 0) && free_debug_count < 20) {
free_debug_count++;
extern int g_sfc_enabled;
fprintf(stderr, "[SFC_FREE_PUSH] cls=%d, ptr=%p, cnt=%u, cap=%u, will_succeed=%d, enabled=%d\n",
cls, ptr, cnt, cap, (cnt < cap), g_sfc_enabled);
}
} while(0);
#endif
if (__builtin_expect(cnt < cap, 1)) {
// Phase E1-CORRECT: Use Box API for next pointer write
tiny_next_write(cls, ptr, g_sfc_head[cls]);
g_sfc_head[cls] = ptr; // head = base
g_sfc_count[cls] = cnt + 1; // count++
#if HAKMEM_DEBUG_COUNTERS
g_sfc_stats[cls].free_hits++;
#endif
return 1; // Success
}
#if HAKMEM_DEBUG_COUNTERS
g_sfc_stats[cls].free_full++;
#endif
return 0; // Full → caller should spill
}
// ============================================================================
// Box 5-NEW: Super Front Cache - Public API (slow path, not inline)
// ============================================================================
// Initialize SFC (called once at startup)
void sfc_init(void);
void sfc_cascade_from_tls_initial(void);
// Shutdown SFC (called at exit, optional)
void sfc_shutdown(void);
// Refill: Batch refill from backend (Magazine/SuperSlab)
// Returns: number of blocks refilled (0 on failure)
// Contract: Transfers ownership from backend to SFC
int sfc_refill(int cls, int target_count);
// Spill: Batch spill to backend when cache too full
// Returns: number of blocks spilled
// Contract: Transfers ownership from SFC to backend
int sfc_spill(int cls, int spill_count);
// ============================================================================
// Box 5-NEW: Super Front Cache - Configuration (tuning)
// ============================================================================
typedef struct {
int capacity; // Target capacity (128-256)
int refill_count; // Batch refill size (64-128)
int spill_thresh; // Spill when count > capacity * thresh%
} sfc_config_t;
sfc_config_t sfc_get_config(int cls);
void sfc_set_config(int cls, sfc_config_t cfg);
// ============================================================================
// Box 5-NEW: Super Front Cache - Statistics (debugging)
// ============================================================================
#if HAKMEM_DEBUG_COUNTERS
sfc_stats_t sfc_get_stats(int cls);
void sfc_reset_stats(int cls);
void sfc_print_stats(void); // Human-readable dump
#endif // HAKMEM_DEBUG_COUNTERS
// ============================================================================
// Box 5-NEW: Super Front Cache - Feature Flag (A/B testing)
// ============================================================================
// Global enable flag (set via ENV: HAKMEM_SFC_ENABLE)
extern int g_sfc_enabled;
// ============================================================================
// Box 5-NEW: Super Front Cache - Box Theory Compliance
// ============================================================================
// Invariants (enforced by design):
// - INVARIANT 1: g_sfc_count[cls] <= g_sfc_capacity[cls] (always)
// - INVARIANT 2: All pointers in cache belong to correct class (caller responsibility)
// - INVARIANT 3: SFC contains only same-thread allocations (TLS)
// - INVARIANT 4: Linked list integrity (*ptr points to valid node or NULL)
// - INVARIANT 5: g_sfc_count[cls] >= 0 (always)
// Ownership Rules:
// - SFC owns: Cached pointers in g_sfc_head[cls] linked list
// - Transfer IN: sfc_refill() from Magazine/SuperSlab (batch)
// - Transfer OUT: sfc_alloc() to application (single)
// - Return: sfc_free_push() from application (single)
// - Spill: sfc_spill() to Magazine/SuperSlab (batch)
// Boundaries:
// - Minimal coupling: Reuses existing sll_refill_small_from_ss()
// - Fallback: Old Box 5 (TLS SLL 16 slots) remains for backward compat
// - A/B testable: HAKMEM_SFC_ENABLE=0/1 switches between old/new
// ============================================================================
// End of tiny_alloc_fast_sfc.inc.h
// ============================================================================