Phase 5 E5-3: Candidate Analysis (All DEFERRED) + E5-4 Instructions
E5-3 Analysis Results:
- free_tiny_fast_cold (7.14%): DEFER - cold path, low ROI
- unified_cache_push (3.39%): DEFER - already optimized
- hakmem_env_snapshot_enabled (2.97%): DEFER - low headroom
Key Insight: perf self% is time-weighted, not frequency-weighted.
Cold paths appear hot but have low total impact.
Next: E5-4 (Malloc Tiny Direct Path)
- Apply E5-1 winning pattern to malloc side
- Target: tiny_alloc_gate_fast() gate tax elimination
- ENV gate: HAKMEM_MALLOC_TINY_DIRECT=0/1
Files added:
- docs/analysis/PHASE5_E5_3_ANALYSIS_AND_RECOMMENDATIONS.md
- docs/analysis/PHASE5_E5_4_MALLOC_TINY_DIRECT_NEXT_INSTRUCTIONS.md
- core/box/free_cold_shape_env_box.{h,c} (research box, not tested)
- core/box/free_cold_shape_stats_box.{h,c} (research box, not tested)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -70,6 +70,8 @@
|
||||
#include "../box/tiny_metadata_cache_hot_box.h" // Phase 3 C2: Policy hot cache (metadata cache optimization)
|
||||
#include "../box/tiny_free_route_cache_env_box.h" // Phase 3 D1: Free path route cache
|
||||
#include "../box/hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation
|
||||
#include "../box/free_cold_shape_env_box.h" // Phase 5 E5-3a: Free cold path shape optimization
|
||||
#include "../box/free_cold_shape_stats_box.h" // Phase 5 E5-3a: Free cold shape stats
|
||||
|
||||
// Helper: current thread id (low 32 bits) for owner check
|
||||
#ifndef TINY_SELF_U32_LOCAL_DEFINED
|
||||
@ -413,6 +415,28 @@ static int free_tiny_fast_cold(void* ptr, void* base, int class_idx)
|
||||
}
|
||||
#endif // !HAKMEM_BUILD_RELEASE
|
||||
|
||||
// Phase 5 E5-3a: Optimized cold path shape
|
||||
// Strategy: Handle common LEGACY path first (use_tiny_heap==0 in Mixed ~90%+)
|
||||
// Defer expensive LARSON/cross-thread checks to only when heap routing needed
|
||||
static __thread int g_cold_shape = -1;
|
||||
if (__builtin_expect(g_cold_shape == -1, 0)) {
|
||||
g_cold_shape = free_cold_shape_enabled() ? 1 : 0;
|
||||
}
|
||||
|
||||
if (g_cold_shape == 1) {
|
||||
// Optimized shape: Check use_tiny_heap FIRST
|
||||
if (__builtin_expect(!use_tiny_heap, 1)) {
|
||||
// Most common case in Mixed: LEGACY path, no heap routing
|
||||
// Skip LARSON/cross-thread check entirely (not needed for legacy)
|
||||
FREE_COLD_SHAPE_STAT_INC(legacy_fast);
|
||||
FREE_COLD_SHAPE_STAT_INC(enabled_count);
|
||||
goto legacy_fallback;
|
||||
}
|
||||
// Rare: heap routing needed, do full validation
|
||||
FREE_COLD_SHAPE_STAT_INC(heap_path);
|
||||
}
|
||||
|
||||
// Baseline shape: LARSON check first (current behavior)
|
||||
// Cross-thread free detection (Larson MT crash fix, ENV gated) + TinyHeap free path
|
||||
{
|
||||
static __thread int g_larson_fix = -1;
|
||||
@ -467,7 +491,7 @@ static int free_tiny_fast_cold(void* ptr, void* base, int class_idx)
|
||||
}
|
||||
return 0; // remote push failed; fall back to normal path
|
||||
}
|
||||
// Same-thread + TinyHeap route → route-based free
|
||||
// Same-thread + TinyHeap route → route-based free
|
||||
if (__builtin_expect(use_tiny_heap, 0)) {
|
||||
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_tinyheap);
|
||||
switch (route) {
|
||||
@ -541,6 +565,7 @@ static int free_tiny_fast_cold(void* ptr, void* base, int class_idx)
|
||||
#endif
|
||||
|
||||
// Phase REFACTOR-2: Legacy fallback (use unified helper)
|
||||
legacy_fallback:
|
||||
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_legacy_fallback);
|
||||
tiny_legacy_fallback_free_base(base, class_idx);
|
||||
return 1;
|
||||
|
||||
Reference in New Issue
Block a user