Phase 74-1/74-2: UnifiedCache LOCALIZE optimization (P1 frozen, NEUTRAL -0.87%)
Phase 74-1 (ENV-gated LOCALIZE): - Result: +0.50% (NEUTRAL) - Runtime branch overhead caused instructions/branches to increase - Diagnosed: Branch tax dominates intended optimization Phase 74-2 (compile-time LOCALIZE): - Result: -0.87% (NEUTRAL, P1 frozen) - Removed runtime branch → instructions -0.6%, branches -2.3% ✓ - But cache-misses +86% (register pressure/spill) → net loss - Conclusion: LOCALIZE本体 works, but fragile to cache effects Key finding: - Dependency chain reduction (LOCALIZE) has low ROI due to cache-miss sensitivity - P1 (LOCALIZE) frozen at default OFF - Next: Phase 74-3 (P0: FASTAPI) - move branches outside hot loop Files: - core/hakmem_build_flags.h: HAKMEM_TINY_UC_LOCALIZE_COMPILED flag - core/box/tiny_unified_cache_hitpath_env_box.h: ENV gate (frozen) - core/front/tiny_unified_cache.h: compile-time #if blocks - docs/analysis/PHASE74_*: Design, instructions, results - CURRENT_TASK.md: P1 frozen, P0 next instructions Also includes: - Phase 69 refill tuning results (archived docs) - PERFORMANCE_TARGETS_SCORECARD.md: Phase 69 baseline update - PHASE70_REFILL_OBSERVABILITY_PREREQS_SSOT.md: Route banner docs 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -31,6 +31,7 @@
|
||||
#include "../box/ptr_type_box.h" // Phantom pointer types (BASE/USER)
|
||||
#include "../box/tiny_front_config_box.h" // Phase 8-Step1: Config macros
|
||||
#include "../box/tiny_tcache_box.h" // Phase 14 v1: Intrusive LIFO tcache
|
||||
#include "../box/tiny_unified_cache_hitpath_env_box.h" // Phase 74: LOCALIZE ENV gate
|
||||
|
||||
// ============================================================================
|
||||
// Phase 3 C2 Patch 3: Bounds Check Compile-out
|
||||
@ -247,6 +248,30 @@ static inline int unified_cache_push(int class_idx, hak_base_ptr_t base) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// Phase 74-2: LOCALIZE optimization (compile-time gate, no runtime branch)
|
||||
#if HAKMEM_TINY_UC_LOCALIZE_COMPILED
|
||||
// LOCALIZE: Load head/tail/mask once into locals to avoid reload dependency chains
|
||||
uint16_t head = cache->head;
|
||||
uint16_t tail = cache->tail;
|
||||
uint16_t mask = cache->mask;
|
||||
uint16_t next_tail = (tail + 1) & mask;
|
||||
|
||||
if (__builtin_expect(next_tail == head, 0)) {
|
||||
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
|
||||
g_unified_cache_full[class_idx]++;
|
||||
#endif
|
||||
return 0; // Full
|
||||
}
|
||||
|
||||
cache->slots[tail] = base_raw;
|
||||
cache->tail = next_tail;
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
|
||||
g_unified_cache_push[class_idx]++;
|
||||
#endif
|
||||
return 1; // SUCCESS (LOCALIZE path)
|
||||
#else
|
||||
// Default path: Original implementation
|
||||
uint16_t next_tail = (cache->tail + 1) & cache->mask;
|
||||
|
||||
// Full check (leave 1 slot empty to distinguish full/empty)
|
||||
@ -266,6 +291,7 @@ static inline int unified_cache_push(int class_idx, hak_base_ptr_t base) {
|
||||
#endif
|
||||
|
||||
return 1; // SUCCESS (2-3 cache misses total)
|
||||
#endif // HAKMEM_TINY_UC_LOCALIZE_COMPILED
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
@ -316,6 +342,37 @@ static inline hak_base_ptr_t unified_cache_pop_or_refill(int class_idx) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// Phase 74-2: LOCALIZE optimization (compile-time gate, no runtime branch)
|
||||
#if HAKMEM_TINY_UC_LOCALIZE_COMPILED
|
||||
// LOCALIZE: Load head/tail/mask once into locals to avoid reload dependency chains
|
||||
uint16_t head = cache->head;
|
||||
uint16_t tail = cache->tail;
|
||||
uint16_t mask = cache->mask;
|
||||
|
||||
if (__builtin_expect(head != tail, 1)) {
|
||||
void* base = cache->slots[head];
|
||||
cache->head = (head + 1) & mask;
|
||||
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
|
||||
g_unified_cache_hit[class_idx]++;
|
||||
#endif
|
||||
#if HAKMEM_TINY_UNIFIED_CACHE_MEASURE_COMPILED
|
||||
if (__builtin_expect(unified_cache_measure_check(), 0)) {
|
||||
atomic_fetch_add_explicit(&g_unified_cache_hits_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_unified_cache_hits_by_class[class_idx],
|
||||
1, memory_order_relaxed);
|
||||
}
|
||||
#endif
|
||||
return HAK_BASE_FROM_RAW(base); // Hit! (LOCALIZE path)
|
||||
}
|
||||
|
||||
// Cache miss → Batch refill from SuperSlab
|
||||
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
|
||||
g_unified_cache_miss[class_idx]++;
|
||||
#endif
|
||||
return unified_cache_refill(class_idx);
|
||||
#else
|
||||
// Default path: Original implementation
|
||||
// Tcache miss/disabled/compiled-out → try pop from array cache (fast path)
|
||||
if (__builtin_expect(cache->head != cache->tail, 1)) {
|
||||
void* base = cache->slots[cache->head]; // 1 cache miss (array access)
|
||||
@ -341,6 +398,7 @@ static inline hak_base_ptr_t unified_cache_pop_or_refill(int class_idx) {
|
||||
g_unified_cache_miss[class_idx]++;
|
||||
#endif
|
||||
return unified_cache_refill(class_idx); // Refill + return first block (BASE)
|
||||
#endif // HAKMEM_TINY_UC_LOCALIZE_COMPILED
|
||||
}
|
||||
|
||||
#endif // HAK_FRONT_TINY_UNIFIED_CACHE_H
|
||||
|
||||
Reference in New Issue
Block a user