Target: Eliminate E1 lazy init check overhead (3.22% self%) - E1 consolidated ENV gates but lazy check remained in hot path - Strategy: __attribute__((constructor(101))) for pre-main init Implementation: - ENV gate: HAKMEM_ENV_SNAPSHOT_CTOR=0/1 (default 0, research box) - core/box/hakmem_env_snapshot_box.c: Constructor function added - Reads ENV before main() when CTOR=1 - Refresh also syncs gate state for bench_profile putenv - core/box/hakmem_env_snapshot_box.h: Dual-mode enabled check - CTOR=1 fast path: direct global read (no lazy branch) - CTOR=0 fallback: legacy lazy init (rollback safe) - Branch hints adjusted for default OFF baseline A/B Test Results (Mixed, 10-run, 20M iters, E1=1): - Baseline (CTOR=0): 44.28M ops/s (mean), 44.60M ops/s (median) - Optimized (CTOR=1): 46.38M ops/s (mean), 46.53M ops/s (median) - Improvement: +4.75% mean, +4.35% median Decision: GO (+4.75% >> +0.5% threshold) - Expected +0.5-1.5%, achieved +4.75% - Lazy init branch overhead was larger than expected - Action: Keep as research box (default OFF), evaluate promotion Phase 4 Cumulative: - E1 (ENV Snapshot): +3.92% - E2 (Alloc Per-Class): -0.21% (NEUTRAL, frozen) - E3-4 (Constructor Init): +4.75% - Total Phase 4: ~+8.5% Deliverables: - docs/analysis/PHASE4_E3_ENV_CONSTRUCTOR_INIT_DESIGN.md - docs/analysis/PHASE4_E3_ENV_CONSTRUCTOR_INIT_NEXT_INSTRUCTIONS.md - docs/analysis/PHASE4_COMPREHENSIVE_STATUS_ANALYSIS.md - docs/analysis/PHASE4_EXECUTIVE_SUMMARY.md - scripts/verify_health_profiles.sh (sanity check script) - CURRENT_TASK.md (E3-4 complete, next instructions) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
82 lines
3.2 KiB
C
82 lines
3.2 KiB
C
// hakmem_env_snapshot_box.h - Phase 4 E1: ENV Snapshot Consolidation
|
|
//
|
|
// Purpose: Consolidate 3 hot ENV gate calls into 1 TLS snapshot read
|
|
// Target: tiny_c7_ultra_enabled_env (1.28%) + tiny_front_v3_enabled (1.01%) +
|
|
// tiny_metadata_cache_enabled (0.97%) = 3.26% combined ENV overhead
|
|
//
|
|
// Design:
|
|
// - ENV: HAKMEM_ENV_SNAPSHOT=0/1 (default 0, research box)
|
|
// - Single TLS snapshot struct containing all hot toggles
|
|
// - Lazy init with version-based refresh (follows tiny_front_v3_snapshot pattern)
|
|
// - Learner interlock: tiny_metadata_cache_eff = cache && !learner
|
|
//
|
|
// E3-4 Extension: Constructor init to eliminate lazy check overhead
|
|
// - ENV: HAKMEM_ENV_SNAPSHOT_CTOR=0/1 (default 0)
|
|
// - When =1: Gate init runs in constructor (before main)
|
|
// - Eliminates 3.22% lazy init check overhead
|
|
//
|
|
// Benefits:
|
|
// - 3 TLS reads → 1 TLS read (66% reduction)
|
|
// - 3 lazy init checks → 1 lazy init check
|
|
// - E3-4: Lazy init check → no check (constructor init)
|
|
// - Expected gain: +1-3% (E1) + +0.5-1.5% (E3-4)
|
|
|
|
#ifndef HAK_ENV_SNAPSHOT_BOX_H
|
|
#define HAK_ENV_SNAPSHOT_BOX_H
|
|
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
|
|
// ENV snapshot struct: consolidates all hot ENV gates
|
|
typedef struct HakmemEnvSnapshot {
|
|
bool tiny_c7_ultra_enabled; // ENV: HAKMEM_TINY_C7_ULTRA (default 1)
|
|
bool tiny_front_v3_enabled; // ENV: HAKMEM_TINY_FRONT_V3_ENABLED (default 1)
|
|
bool tiny_metadata_cache; // ENV: HAKMEM_TINY_METADATA_CACHE (default 0)
|
|
bool tiny_metadata_cache_eff; // Effective: cache && !learner (for hot path)
|
|
} HakmemEnvSnapshot;
|
|
|
|
// Global snapshot state (implemented in hakmem_env_snapshot_box.c)
|
|
extern HakmemEnvSnapshot g_hakmem_env_snapshot;
|
|
extern int g_hakmem_env_snapshot_ready;
|
|
|
|
// Snapshot initializer (implemented in hakmem_env_snapshot_box.c)
|
|
void hakmem_env_snapshot_init(void);
|
|
|
|
// Refresh from ENV (for bench_profile putenv sync)
|
|
void hakmem_env_snapshot_refresh_from_env(void);
|
|
|
|
// Fast snapshot getter: lazy init + 1 TLS read
|
|
static inline const HakmemEnvSnapshot* hakmem_env_snapshot(void) {
|
|
if (__builtin_expect(!g_hakmem_env_snapshot_ready, 0)) {
|
|
hakmem_env_snapshot_init();
|
|
}
|
|
return &g_hakmem_env_snapshot;
|
|
}
|
|
|
|
// E3-4: Global gate state (defined in hakmem_env_snapshot_box.c)
|
|
extern int g_hakmem_env_snapshot_gate;
|
|
extern int g_hakmem_env_snapshot_ctor_mode;
|
|
|
|
// ENV gate: default OFF (research box, set =1 to enable)
|
|
// E3-4: Dual-mode - constructor init (fast) or legacy lazy init (fallback)
|
|
static inline bool hakmem_env_snapshot_enabled(void) {
|
|
// E3-4 Fast path: constructor mode (no lazy check, just global read)
|
|
// Default is OFF, so ctor_mode==1 is UNLIKELY.
|
|
if (__builtin_expect(g_hakmem_env_snapshot_ctor_mode == 1, 0)) {
|
|
return g_hakmem_env_snapshot_gate != 0;
|
|
}
|
|
|
|
// Legacy path: lazy init (fallback when HAKMEM_ENV_SNAPSHOT_CTOR=0)
|
|
if (__builtin_expect(g_hakmem_env_snapshot_gate == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_ENV_SNAPSHOT");
|
|
if (e && *e) {
|
|
g_hakmem_env_snapshot_gate = (*e == '1') ? 1 : 0;
|
|
} else {
|
|
g_hakmem_env_snapshot_gate = 0; // default: OFF (research box)
|
|
}
|
|
}
|
|
return g_hakmem_env_snapshot_gate != 0;
|
|
}
|
|
|
|
#endif // HAK_ENV_SNAPSHOT_BOX_H
|