Results: - A/B test: +1.89% on Mixed (10-run, clean env) - Baseline: 51.96M ops/s - Optimized: 52.94M ops/s - Improvement: +984K ops/s (+1.89%) - C6-heavy verification: +7.86% (nonlegacy_mask works correctly, no misfires) Strategy: - Extend Phase 9 (C0-C3 DUALHOT) to C4-C7 LEGACY DIRECT - Fail-Fast principle: Never misclassify MID/ULTRA/V7 as LEGACY - nonlegacy_mask: Cached at init, hot path uses single bit operation Success factors: 1. Performance improvement: +1.89% (1.9x GO threshold) 2. Safety verified: nonlegacy_mask prevents MID v3 misfire in C6-heavy 3. Phase 9 coexistence: C0-C3 (Phase 9) + C4-C7 (Phase 10) = full LEGACY coverage 4. Minimal overhead: Single bit operation in hot path (mask & (1u<<class)) Implementation: - Patch 1: ENV gate box (free_tiny_fast_mono_legacy_direct_env_box.h) - ENV: HAKMEM_FREE_TINY_FAST_MONO_LEGACY_DIRECT=0/1 (default 0) - nonlegacy_mask cached (reuses free_policy_fast_v2_nonlegacy_mask()) - Probe window: 64 (avoid bench_profile putenv race) - Patch 2: Early-exit in free_tiny_fast() (malloc_tiny_fast.h) - Conditions: !nonlegacy_mask, route==LEGACY, !LARSON_FIX, done==1 - Direct call: tiny_legacy_fallback_free_base() - Patch 3: Visibility (free_path_stats_box.h) - mono_legacy_direct_hit counter (compile-out in release) - Patch 4: cleanenv extension (run_mixed_10_cleanenv.sh) - ENV leak protection Safety verification (C6-heavy): - OFF: 19.75M ops/s - ON: 21.30M ops/s (+7.86%) - nonlegacy_mask correctly excludes C6 (MID v3 active) - Improvement from C0-C5, C7 direct path acceleration Files modified: - core/bench_profile.h: add to MIXED_TINYV3_C7_SAFE preset - core/front/malloc_tiny_fast.h: early-exit insertion - core/box/free_path_stats_box.h: counter - core/box/free_tiny_fast_mono_legacy_direct_env_box.h: NEW (ENV gate + nonlegacy_mask) - scripts/run_mixed_10_cleanenv.sh: ENV leak protection Health check: PASSED (all profiles) Promotion: Added to MIXED_TINYV3_C7_SAFE preset (default ON, opt-out) Rollback: HAKMEM_FREE_TINY_FAST_MONO_LEGACY_DIRECT=0 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
84 lines
2.7 KiB
C
84 lines
2.7 KiB
C
#ifndef HAKMEM_FREE_PATH_STATS_BOX_H
|
|
#define HAKMEM_FREE_PATH_STATS_BOX_H
|
|
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include "../hakmem_build_flags.h"
|
|
|
|
typedef struct FreePathStats {
|
|
uint64_t total_calls;
|
|
|
|
uint64_t c7_ultra_fast;
|
|
uint64_t c6_ultra_free_fast; // Phase 4-2: C6 ULTRA-free
|
|
uint64_t c6_ultra_alloc_hit; // Phase 4-4: C6 ULTRA-alloc (TLS pop)
|
|
uint64_t c6_ifl_push; // Phase TLS-UNIFY-3: C6 intrusive push
|
|
uint64_t c6_ifl_pop; // Phase TLS-UNIFY-3: C6 intrusive pop
|
|
uint64_t c6_ifl_fallback; // Phase TLS-UNIFY-3: C6 intrusive fallback (slow)
|
|
uint64_t c5_ultra_free_fast; // Phase 5-1: C5 ULTRA-free
|
|
uint64_t c5_ultra_alloc_hit; // Phase 5-2: C5 ULTRA-alloc (TLS pop)
|
|
uint64_t c4_ultra_free_fast; // Phase 6: C4 ULTRA-free (cap=64)
|
|
uint64_t c4_ultra_alloc_hit; // Phase 6: C4 ULTRA-alloc (TLS pop)
|
|
uint64_t smallheap_v3_fast;
|
|
uint64_t smallheap_v6_fast;
|
|
uint64_t smallheap_v7_fast; // Phase v7-2: SmallHeap v7 fast free
|
|
uint64_t tiny_heap_v1_fast;
|
|
uint64_t pool_v1_fast;
|
|
uint64_t remote_free;
|
|
uint64_t super_lookup_called;
|
|
uint64_t legacy_fallback;
|
|
|
|
// Phase 4-1: Legacy per-class breakdown
|
|
uint64_t legacy_by_class[8]; // C0-C7 の Legacy fallback 内訳
|
|
|
|
// Phase POLICY-FAST-PATH-V2: Fast-path policy skip
|
|
uint64_t policy_fast_v2_skip; // Phase POLICY-FAST-PATH-V2 fast-path skips
|
|
|
|
// Phase 9: MONO DUALHOT hit
|
|
uint64_t mono_dualhot_hit; // Phase 9: C0-C3 direct path (monolithic free_tiny_fast)
|
|
|
|
// Phase 10: MONO LEGACY DIRECT hit
|
|
uint64_t mono_legacy_direct_hit; // Phase 10: C4-C7 legacy direct path (skip policy snapshot)
|
|
} FreePathStats;
|
|
|
|
// ENV gate
|
|
static inline bool free_path_stats_enabled(void) {
|
|
#if !HAKMEM_DEBUG_COUNTERS
|
|
return false;
|
|
#else
|
|
static int g_enabled = -1; // -1: unknown, 0: off, 1: on
|
|
static int g_probe_left = 64; // tolerate early getenv() instability (bench_profile putenv)
|
|
|
|
if (__builtin_expect(g_enabled == 1, 1)) return true;
|
|
if (__builtin_expect(g_enabled == 0, 1)) return false;
|
|
|
|
const char* e = getenv("HAKMEM_FREE_PATH_STATS");
|
|
if (e && *e) {
|
|
g_enabled = (*e != '0') ? 1 : 0;
|
|
return g_enabled == 1;
|
|
}
|
|
|
|
if (g_probe_left-- > 0) {
|
|
return false; // keep g_enabled==-1, retry later
|
|
}
|
|
|
|
g_enabled = 0;
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
// Global stats instance
|
|
extern FreePathStats g_free_path_stats;
|
|
|
|
// Increment macros (with unlikely guard)
|
|
#if HAKMEM_DEBUG_COUNTERS
|
|
#define FREE_PATH_STAT_INC(field) \
|
|
do { if (__builtin_expect(free_path_stats_enabled(), 0)) { \
|
|
g_free_path_stats.field++; \
|
|
} } while(0)
|
|
#else
|
|
#define FREE_PATH_STAT_INC(field) ((void)0)
|
|
#endif
|
|
|
|
#endif // HAKMEM_FREE_PATH_STATS_BOX_H
|