Phase 4 E1: ENV Snapshot Consolidation - GO (+3.92% avg, +4.01% median)

Target: Consolidate 3 ENV gate TLS reads → 1 TLS read
- tiny_c7_ultra_enabled_env():    1.28% self
- tiny_front_v3_enabled():        1.01% self
- tiny_metadata_cache_enabled():  0.97% self
- Total overhead: 3.26% self (perf profile analysis)

Implementation:
- core/box/hakmem_env_snapshot_box.h (new): ENV snapshot struct & API
- core/box/hakmem_env_snapshot_box.c (new): TLS snapshot implementation
- core/front/malloc_tiny_fast.h: Migrated 5 call sites to snapshot
- core/box/tiny_legacy_fallback_box.h: Migrated 2 call sites
- core/box/tiny_metadata_cache_hot_box.h: Migrated 1 call site
- core/bench_profile.h: Added hakmem_env_snapshot_refresh_from_env()
- Makefile: Added hakmem_env_snapshot_box.o to build
- ENV gate: HAKMEM_ENV_SNAPSHOT=0/1 (default: 0, research box)

A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (E1=0): 43,617,549 ops/s (avg), 43,562,895 ops/s (median)
- Optimized (E1=1): 45,327,239 ops/s (avg), 45,309,218 ops/s (median)
- Improvement: avg +3.92%, median +4.01%

Decision: GO (+3.92% >= +2.5% threshold)
- Action: Keep as research box (default OFF) for Phase 4
- Next: Consider promotion to default in MIXED_TINYV3_C7_SAFE preset

Design Rationale:
- Shape optimizations (B3, D3) reached saturation (+0.56% NEUTRAL)
- Shift to memory/TLS overhead optimization (new optimization frontier)
- Pattern: Similar to existing tiny_front_v3_snapshot (proven approach)
- Expected: +1-3% from 3.26% ENV overhead → Achieved: +3.92%

Technical Details:
- Consolidation: 3 TLS reads → 1 TLS read (66% reduction)
- Learner interlock: tiny_metadata_cache_eff pre-computed in snapshot
- Version sync: Refreshes on small_policy_v7_version_changed()
- Fallback safety: Existing ENV gates still available when E1=0

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-14 00:59:12 +09:00
parent 42ba23fbd0
commit 88717a8737
6 changed files with 215 additions and 13 deletions

View File

@ -69,6 +69,7 @@
#include "../box/free_tiny_fast_hotcold_stats_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Stats
#include "../box/tiny_metadata_cache_hot_box.h" // Phase 3 C2: Policy hot cache (metadata cache optimization)
#include "../box/tiny_free_route_cache_env_box.h" // Phase 3 D1: Free path route cache
#include "../box/hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation
// Helper: current thread id (low 32 bits) for owner check
#ifndef TINY_SELF_U32_LOCAL_DEFINED
@ -226,7 +227,16 @@ static inline void* malloc_tiny_fast_for_class(size_t size, int class_idx) {
// Phase v11a-5b: C7 ULTRA early-exit (skip policy snapshot for common case)
// This is the most common hot path - avoids TLS policy overhead
if (class_idx == 7 && tiny_c7_ultra_enabled_env()) {
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
bool c7_ultra_on;
if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
c7_ultra_on = env->tiny_c7_ultra_enabled;
} else {
c7_ultra_on = tiny_c7_ultra_enabled_env();
}
if (class_idx == 7 && c7_ultra_on) {
void* ultra_p = tiny_c7_ultra_alloc(size);
if (TINY_HOT_LIKELY(ultra_p != NULL)) {
return ultra_p;
@ -384,8 +394,14 @@ static int free_tiny_fast_cold(void* ptr, void* base, int class_idx)
route = tiny_route_for_class((uint8_t)class_idx);
}
const int use_tiny_heap = tiny_route_is_heap_kind(route);
const TinyFrontV3Snapshot* front_snap =
__builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
const TinyFrontV3Snapshot* front_snap;
if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
front_snap = env->tiny_front_v3_enabled ? tiny_front_v3_snapshot_get() : NULL;
} else {
front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
}
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
// In Release builds, we trust header magic (0xA0) as sufficient validation.
@ -576,7 +592,16 @@ static inline int free_tiny_fast_hot(void* ptr) {
FREE_PATH_STAT_INC(total_calls);
// Phase v11b-1: C7 ULTRA early-exit (skip policy snapshot for most common case)
if (class_idx == 7 && tiny_c7_ultra_enabled_env()) {
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
bool c7_ultra_free;
if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
c7_ultra_free = env->tiny_c7_ultra_enabled;
} else {
c7_ultra_free = tiny_c7_ultra_enabled_env();
}
if (class_idx == 7 && c7_ultra_free) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_c7_ultra);
tiny_c7_ultra_free(ptr);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
@ -719,7 +744,16 @@ static inline int free_tiny_fast(void* ptr) {
FREE_PATH_STAT_INC(total_calls);
// Phase v11b-1: C7 ULTRA early-exit (skip policy snapshot for most common case)
if (class_idx == 7 && tiny_c7_ultra_enabled_env()) {
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
bool c7_ultra_free;
if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
c7_ultra_free = env->tiny_c7_ultra_enabled;
} else {
c7_ultra_free = tiny_c7_ultra_enabled_env();
}
if (class_idx == 7 && c7_ultra_free) {
tiny_c7_ultra_free(ptr);
return 1;
}
@ -790,8 +824,14 @@ legacy_fallback:
route = tiny_route_for_class((uint8_t)class_idx);
}
const int use_tiny_heap = tiny_route_is_heap_kind(route);
const TinyFrontV3Snapshot* front_snap =
__builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
const TinyFrontV3Snapshot* front_snap;
if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
front_snap = env->tiny_front_v3_enabled ? tiny_front_v3_snapshot_get() : NULL;
} else {
front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
}
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
// In Release builds, we trust header magic (0xA0) as sufficient validation.