Phase 83-1 + Allocator Comparison: Switch dispatch fixed (NO-GO +0.32%), PROFILE correction, SCORECARD update
Key changes: - Phase 83-1: Switch dispatch fixed mode (tiny_inline_slots_switch_dispatch_fixed_box) - NO-GO (marginal +0.32%, branch reduction negligible) Reason: lazy-init pattern already optimal, Phase 78-1 pattern shows diminishing returns - Allocator comparison baseline update (10-run SSOT, WS=400, ITERS=20M): tcmalloc: 115.26M (92.33% of mimalloc) jemalloc: 97.39M (77.96% of mimalloc) system: 85.20M (68.24% of mimalloc) mimalloc: 124.82M (baseline) - hakmem PROFILE correction: scripts/run_mixed_10_cleanenv.sh + run_allocator_quick_matrix.sh PROFILE explicitly set to MIXED_TINYV3_C7_SAFE for hakmem measurements Result: baseline stabilized to 55.53M (44.46% of mimalloc) Previous unstable measurement (35.57M) was due to profile leak - Documentation: * PERFORMANCE_TARGETS_SCORECARD.md: Reference allocators + M1/M2 milestone status * PHASE83_1_SWITCH_DISPATCH_FIXED_RESULTS.md: Phase 83-1 analysis (NO-GO) * ALLOCATOR_COMPARISON_QUICK_RUNBOOK.md: Quick comparison procedure * ALLOCATOR_COMPARISON_SSOT.md: Detailed SSOT methodology - M2 milestone status: 44.46% (target 55%, gap -10.54pp) - structural improvements needed 🤖 Generated with Claude Code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -35,6 +35,15 @@
|
||||
#include "../front/tiny_c6_inline_slots.h" // Phase 75-1: C6 inline slots API
|
||||
#include "tiny_c5_inline_slots_env_box.h" // Phase 75-2: C5 inline slots ENV gate
|
||||
#include "../front/tiny_c5_inline_slots.h" // Phase 75-2: C5 inline slots API
|
||||
#include "tiny_c4_inline_slots_env_box.h" // Phase 76-1: C4 inline slots ENV gate
|
||||
#include "../front/tiny_c4_inline_slots.h" // Phase 76-1: C4 inline slots API
|
||||
#include "tiny_c2_local_cache_env_box.h" // Phase 79-1: C2 local cache ENV gate
|
||||
#include "../front/tiny_c2_local_cache.h" // Phase 79-1: C2 local cache API
|
||||
#include "tiny_c3_inline_slots_env_box.h" // Phase 77-1: C3 inline slots ENV gate
|
||||
#include "../front/tiny_c3_inline_slots.h" // Phase 77-1: C3 inline slots API
|
||||
#include "tiny_inline_slots_fixed_mode_box.h" // Phase 78-1: Optional fixed-mode gating
|
||||
#include "tiny_inline_slots_switch_dispatch_box.h" // Phase 80-1: Switch dispatch for C4/C5/C6
|
||||
#include "tiny_inline_slots_switch_dispatch_fixed_box.h" // Phase 83-1: Switch dispatch fixed mode
|
||||
|
||||
// ============================================================================
|
||||
// Branch Prediction Macros (Pointer Safety - Prediction Hints)
|
||||
@ -114,9 +123,93 @@ __attribute__((always_inline))
|
||||
static inline void* tiny_hot_alloc_fast(int class_idx) {
|
||||
extern __thread TinyUnifiedCache g_unified_cache[];
|
||||
|
||||
// Phase 80-1: Switch dispatch for C4/C5/C6 (branch reduction optimization)
|
||||
// Phase 83-1: Per-op branch removed via fixed-mode caching
|
||||
// C2/C3 excluded (NO-GO from Phase 77-1/79-1)
|
||||
if (tiny_inline_slots_switch_dispatch_enabled_fast()) {
|
||||
// Switch mode: Direct jump to case (zero comparison overhead for C4/C5/C6)
|
||||
switch (class_idx) {
|
||||
case 4:
|
||||
if (tiny_c4_inline_slots_enabled_fast()) {
|
||||
void* base = c4_inline_pop(c4_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return tiny_header_finalize_alloc(base, class_idx);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (tiny_c5_inline_slots_enabled_fast()) {
|
||||
void* base = c5_inline_pop(c5_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return tiny_header_finalize_alloc(base, class_idx);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
if (tiny_c6_inline_slots_enabled_fast()) {
|
||||
void* base = c6_inline_pop(c6_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return tiny_header_finalize_alloc(base, class_idx);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// C0-C3, C7: fall through to unified_cache
|
||||
break;
|
||||
}
|
||||
// Switch mode: fall through to unified_cache after miss
|
||||
} else {
|
||||
// If-chain mode (Phase 80-1 baseline): C3/C4/C5/C6 sequential checks
|
||||
// NOTE: C2 local cache (Phase 79-1 NO-GO) removed from hot path
|
||||
|
||||
// Phase 77-1: C3 Inline Slots early-exit (ENV gated)
|
||||
// Try C3 inline slots SECOND (before C4/C5/C6/unified cache) for class 3
|
||||
if (class_idx == 3 && tiny_c3_inline_slots_enabled_fast()) {
|
||||
void* base = c3_inline_pop(c3_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return tiny_header_finalize_alloc(base, class_idx);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
}
|
||||
// C3 inline miss → fall through to C4/C5/C6/unified cache
|
||||
}
|
||||
|
||||
// Phase 76-1: C4 Inline Slots early-exit (ENV gated)
|
||||
// Try C4 inline slots SECOND (before C5/C6/unified cache) for class 4
|
||||
if (class_idx == 4 && tiny_c4_inline_slots_enabled_fast()) {
|
||||
void* base = c4_inline_pop(c4_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return tiny_header_finalize_alloc(base, class_idx);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
}
|
||||
// C4 inline miss → fall through to C5/C6/unified cache
|
||||
}
|
||||
|
||||
// Phase 75-2: C5 Inline Slots early-exit (ENV gated)
|
||||
// Try C5 inline slots FIRST (before C6 and unified cache) for class 5
|
||||
if (class_idx == 5 && tiny_c5_inline_slots_enabled()) {
|
||||
// Try C5 inline slots SECOND (before C6 and unified cache) for class 5
|
||||
if (class_idx == 5 && tiny_c5_inline_slots_enabled_fast()) {
|
||||
void* base = c5_inline_pop(c5_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
@ -129,20 +222,21 @@ static inline void* tiny_hot_alloc_fast(int class_idx) {
|
||||
// C5 inline miss → fall through to C6/unified cache
|
||||
}
|
||||
|
||||
// Phase 75-1: C6 Inline Slots early-exit (ENV gated)
|
||||
// Try C6 inline slots SECOND (before unified cache) for class 6
|
||||
if (class_idx == 6 && tiny_c6_inline_slots_enabled()) {
|
||||
void* base = c6_inline_pop(c6_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return tiny_header_finalize_alloc(base, class_idx);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
// Phase 75-1: C6 Inline Slots early-exit (ENV gated)
|
||||
// Try C6 inline slots THIRD (before unified cache) for class 6
|
||||
if (class_idx == 6 && tiny_c6_inline_slots_enabled_fast()) {
|
||||
void* base = c6_inline_pop(c6_inline_tls());
|
||||
if (TINY_HOT_LIKELY(base != NULL)) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return tiny_header_finalize_alloc(base, class_idx);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
}
|
||||
// C6 inline miss → fall through to unified cache
|
||||
}
|
||||
// C6 inline miss → fall through to unified cache
|
||||
}
|
||||
} // End of if-chain mode
|
||||
|
||||
// TLS cache access (1 cache miss)
|
||||
// NOTE: Range check removed - caller (hak_tiny_size_to_class) guarantees valid class_idx
|
||||
|
||||
Reference in New Issue
Block a user