Phase 24-26: Hot path atomic telemetry prune (+2.00% cumulative)
Summary: - Phase 24 (alloc stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL (code cleanliness) - Total: 11 atomics compiled-out, +2.00% improvement Phase 24: OBSERVE tax prune (tiny_class_stats_box.h) - Added HAKMEM_TINY_CLASS_STATS_COMPILED (default: 0) - Wrapped 5 stats functions: uc_miss, warm_hit, shared_lock, tls_carve_* - Result: +0.93% (baseline 56.675M vs compiled-in 56.151M ops/s) Phase 25: Tiny free stats prune (tiny_superslab_free.inc.h) - Added HAKMEM_TINY_FREE_STATS_COMPILED (default: 0) - Wrapped g_free_ss_enter atomic in free hot path - Result: +1.07% (baseline 57.017M vs compiled-in 56.415M ops/s) Phase 26: Hot path diagnostic atomics prune - Added 5 compile gates for low-frequency error counters: - HAKMEM_TINY_C7_FREE_COUNT_COMPILED - HAKMEM_TINY_HDR_MISMATCH_LOG_COMPILED - HAKMEM_TINY_HDR_META_MISMATCH_COMPILED - HAKMEM_TINY_METRIC_BAD_CLASS_COMPILED - HAKMEM_TINY_HDR_META_FAST_COMPILED - Result: -0.33% NEUTRAL (within noise, kept for cleanliness) Alignment with mimalloc principles: - "No atomics on hot path" - telemetry moved to compile-time opt-in - Fixed per-op tax elimination - Production builds: maximum performance (atomics compiled-out) - Research builds: full diagnostics (COMPILED=1) Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -223,12 +223,15 @@ static inline int unified_cache_push(int class_idx, hak_base_ptr_t base) {
|
||||
|
||||
void* base_raw = HAK_BASE_TO_RAW(base);
|
||||
|
||||
#if HAKMEM_TINY_TCACHE_COMPILED
|
||||
// Phase 14 v1: Try tcache first (intrusive LIFO, no array access)
|
||||
// Phase 22: Compile-out when disabled (default OFF)
|
||||
if (tiny_tcache_try_push(class_idx, base_raw)) {
|
||||
return 1; // SUCCESS (tcache hit, no array access)
|
||||
}
|
||||
#endif
|
||||
|
||||
// Tcache overflow or disabled → fall through to array cache
|
||||
// Tcache overflow/disabled/compiled-out → fall through to array cache
|
||||
TinyUnifiedCache* cache = &g_unified_cache[class_idx]; // 1 cache miss (TLS)
|
||||
|
||||
// Phase 8-Step3: Lazy init check (conditional in PGO mode)
|
||||
@ -289,30 +292,36 @@ static inline hak_base_ptr_t unified_cache_pop_or_refill(int class_idx) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAKMEM_TINY_TCACHE_COMPILED
|
||||
// Phase 14 v1: Try tcache first (intrusive LIFO, no array access)
|
||||
// Phase 22: Compile-out when disabled (default OFF)
|
||||
void* tcache_base = tiny_tcache_try_pop(class_idx);
|
||||
if (tcache_base != NULL) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
g_unified_cache_hit[class_idx]++;
|
||||
#endif
|
||||
// Performance measurement: count cache hits (ENV enabled only)
|
||||
#if HAKMEM_TINY_UNIFIED_CACHE_MEASURE_COMPILED
|
||||
// Phase 23: Performance measurement (compile-out when disabled, default OFF)
|
||||
if (__builtin_expect(unified_cache_measure_check(), 0)) {
|
||||
atomic_fetch_add_explicit(&g_unified_cache_hits_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_unified_cache_hits_by_class[class_idx],
|
||||
1, memory_order_relaxed);
|
||||
}
|
||||
#endif
|
||||
return HAK_BASE_FROM_RAW(tcache_base); // HIT (tcache, no array access)
|
||||
}
|
||||
#endif
|
||||
|
||||
// Tcache miss or disabled → try pop from array cache (fast path)
|
||||
// Tcache miss/disabled/compiled-out → try pop from array cache (fast path)
|
||||
if (__builtin_expect(cache->head != cache->tail, 1)) {
|
||||
void* base = cache->slots[cache->head]; // 1 cache miss (array access)
|
||||
cache->head = (cache->head + 1) & cache->mask;
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
g_unified_cache_hit[class_idx]++;
|
||||
#endif
|
||||
// Performance measurement: count cache hits(ENV 有効時のみ)
|
||||
#if HAKMEM_TINY_UNIFIED_CACHE_MEASURE_COMPILED
|
||||
// Phase 23: Performance measurement (compile-out when disabled, default OFF)
|
||||
if (__builtin_expect(unified_cache_measure_check(), 0)) {
|
||||
atomic_fetch_add_explicit(&g_unified_cache_hits_global,
|
||||
1, memory_order_relaxed);
|
||||
@ -320,6 +329,7 @@ static inline hak_base_ptr_t unified_cache_pop_or_refill(int class_idx) {
|
||||
atomic_fetch_add_explicit(&g_unified_cache_hits_by_class[class_idx],
|
||||
1, memory_order_relaxed);
|
||||
}
|
||||
#endif
|
||||
return HAK_BASE_FROM_RAW(base); // Hit! (2-3 cache misses total)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user