// fastlane_direct_env_box.h - Phase 19-1: FastLane Direct Path ENV Control // // Goal: Remove wrapper layer overhead (30.79% of cycles) by calling core allocator directly // Strategy: Compile-time + runtime gate to bypass front_fastlane_try_*() wrapper // // Box Theory: // - Boundary: HAKMEM_FASTLANE_DIRECT=0/1 (default: 0, opt-in) // - Rollback: ENV=0 reverts to existing FastLane wrapper path // - Observability: perf stat shows instruction/branch reduction // // Expected Performance: // - Reduction: -17.5 instructions/op, -6.0 branches/op // - Impact: +10-15% throughput (remove 30% wrapper overhead) // // ENV Variables: // HAKMEM_FASTLANE_DIRECT=0/1 # Enable direct path (default: 0, research box) #pragma once #include #include #include "../hakmem_build_flags.h" // ENV control: cached flag for fastlane_direct_enabled() // -1: uninitialized, 0: disabled, 1: enabled // NOTE: Must be a single global (not header-static) so bench_profile refresh can // update the same cache used by malloc/free wrappers. extern _Atomic int g_fastlane_direct_enabled; // Runtime check: Is FastLane Direct path enabled? // Returns: 1 if enabled, 0 if disabled // Hot path: Single atomic load (after first call) static inline int fastlane_direct_enabled(void) { #if HAKMEM_FAST_PROFILE_FIXED return 1; #endif int val = atomic_load_explicit(&g_fastlane_direct_enabled, memory_order_relaxed); if (__builtin_expect(val == -1, 0)) { // Cold path: Initialize from ENV const char* e = getenv("HAKMEM_FASTLANE_DIRECT"); int enable = (e && *e && *e != '0') ? 1 : 0; atomic_store_explicit(&g_fastlane_direct_enabled, enable, memory_order_relaxed); return enable; } return val; } // Refresh from ENV: Called during benchmark ENV reloads // Allows runtime toggle without recompilation void fastlane_direct_env_refresh_from_env(void);