Files
hakmem/core/box/free_path_stats_box.h
Moe Charm (CI) 871034da1f Phase 9: FREE-TINY-FAST MONO DUALHOT (GO +2.72%)
Results:
- A/B test: +2.72% on Mixed (10-run, clean env)
- Baseline: 48.89M ops/s
- Optimized: 50.22M ops/s
- Improvement: +1.33M ops/s (+2.72%)
- Stability: Standard deviation reduced by 60.8% (2.44M → 955K ops/s)

Strategy:
- Transplant C0-C3 "second hot" path to monolithic free_tiny_fast()
- Early-exit within monolithic (no hot/cold split)
- FastLane free now benefits from C0-C3 direct path

Success factors:
1. Performance improvement: +2.72% (2.7x GO threshold)
2. Stability improvement: 2.6x more stable (stdev 60.8% reduction)
3. Learned from Phase 7 failure:
   - Phase 7: Function split (hot/cold) → NO-GO
   - Phase 9: Early-exit within monolithic → GO
4. FastLane free compatibility: C0-C3 direct path now works with FastLane
5. Policy snapshot overhead reduction: C0-C3 (48% of Mixed) skip route lookup

Implementation:
- Patch 1: ENV gate box (free_tiny_fast_mono_dualhot_env_box.h)
  - ENV: HAKMEM_FREE_TINY_FAST_MONO_DUALHOT=0/1 (default 0)
  - Probe window: 64 (avoid bench_profile putenv race)
- Patch 2: Early-exit in free_tiny_fast() (malloc_tiny_fast.h)
  - Conditions: class_idx <= 3, !LARSON_FIX, route==LEGACY
  - Direct call: tiny_legacy_fallback_free_base()
- Patch 3: Visibility (free_path_stats_box.h)
  - mono_dualhot_hit counter (compile-out in release)
- Patch 4: cleanenv extension (run_mixed_10_cleanenv.sh)
  - ENV leak protection

Files modified:
- core/bench_profile.h: add to MIXED_TINYV3_C7_SAFE preset
- core/front/malloc_tiny_fast.h: early-exit insertion
- core/box/free_path_stats_box.h: counter
- core/box/free_tiny_fast_mono_dualhot_env_box.h: NEW (ENV gate)
- scripts/run_mixed_10_cleanenv.sh: ENV leak protection

Health check: PASSED (all profiles)

Promotion: Added to MIXED_TINYV3_C7_SAFE preset (default ON, opt-out)

Rollback: HAKMEM_FREE_TINY_FAST_MONO_DUALHOT=0

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-14 19:16:49 +09:00

81 lines
2.6 KiB
C

#ifndef HAKMEM_FREE_PATH_STATS_BOX_H
#define HAKMEM_FREE_PATH_STATS_BOX_H
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include "../hakmem_build_flags.h"
typedef struct FreePathStats {
uint64_t total_calls;
uint64_t c7_ultra_fast;
uint64_t c6_ultra_free_fast; // Phase 4-2: C6 ULTRA-free
uint64_t c6_ultra_alloc_hit; // Phase 4-4: C6 ULTRA-alloc (TLS pop)
uint64_t c6_ifl_push; // Phase TLS-UNIFY-3: C6 intrusive push
uint64_t c6_ifl_pop; // Phase TLS-UNIFY-3: C6 intrusive pop
uint64_t c6_ifl_fallback; // Phase TLS-UNIFY-3: C6 intrusive fallback (slow)
uint64_t c5_ultra_free_fast; // Phase 5-1: C5 ULTRA-free
uint64_t c5_ultra_alloc_hit; // Phase 5-2: C5 ULTRA-alloc (TLS pop)
uint64_t c4_ultra_free_fast; // Phase 6: C4 ULTRA-free (cap=64)
uint64_t c4_ultra_alloc_hit; // Phase 6: C4 ULTRA-alloc (TLS pop)
uint64_t smallheap_v3_fast;
uint64_t smallheap_v6_fast;
uint64_t smallheap_v7_fast; // Phase v7-2: SmallHeap v7 fast free
uint64_t tiny_heap_v1_fast;
uint64_t pool_v1_fast;
uint64_t remote_free;
uint64_t super_lookup_called;
uint64_t legacy_fallback;
// Phase 4-1: Legacy per-class breakdown
uint64_t legacy_by_class[8]; // C0-C7 の Legacy fallback 内訳
// Phase POLICY-FAST-PATH-V2: Fast-path policy skip
uint64_t policy_fast_v2_skip; // Phase POLICY-FAST-PATH-V2 fast-path skips
// Phase 9: MONO DUALHOT hit
uint64_t mono_dualhot_hit; // Phase 9: C0-C3 direct path (monolithic free_tiny_fast)
} FreePathStats;
// ENV gate
static inline bool free_path_stats_enabled(void) {
#if !HAKMEM_DEBUG_COUNTERS
return false;
#else
static int g_enabled = -1; // -1: unknown, 0: off, 1: on
static int g_probe_left = 64; // tolerate early getenv() instability (bench_profile putenv)
if (__builtin_expect(g_enabled == 1, 1)) return true;
if (__builtin_expect(g_enabled == 0, 1)) return false;
const char* e = getenv("HAKMEM_FREE_PATH_STATS");
if (e && *e) {
g_enabled = (*e != '0') ? 1 : 0;
return g_enabled == 1;
}
if (g_probe_left-- > 0) {
return false; // keep g_enabled==-1, retry later
}
g_enabled = 0;
return false;
#endif
}
// Global stats instance
extern FreePathStats g_free_path_stats;
// Increment macros (with unlikely guard)
#if HAKMEM_DEBUG_COUNTERS
#define FREE_PATH_STAT_INC(field) \
do { if (__builtin_expect(free_path_stats_enabled(), 0)) { \
g_free_path_stats.field++; \
} } while(0)
#else
#define FREE_PATH_STAT_INC(field) ((void)0)
#endif
#endif // HAKMEM_FREE_PATH_STATS_BOX_H