// fastlane_direct_env_box.h - Phase 19-1: FastLane Direct Path ENV Control
//
// Goal: Remove wrapper layer overhead (30.79% of cycles) by calling core allocator directly
// Strategy: Compile-time + runtime gate to bypass front_fastlane_try_*() wrapper
//
// Box Theory:
//   - Boundary: HAKMEM_FASTLANE_DIRECT=0/1 (default: 0, opt-in)
//   - Rollback: ENV=0 reverts to existing FastLane wrapper path
//   - Observability: perf stat shows instruction/branch reduction
//
// Expected Performance:
//   - Reduction: -17.5 instructions/op, -6.0 branches/op
//   - Impact: +10-15% throughput (remove 30% wrapper overhead)
//
// ENV Variables:
//   HAKMEM_FASTLANE_DIRECT=0/1  # Enable direct path (default: 0, research box)

#pragma once

#include <stdatomic.h>
#include <stdlib.h>
#include "../hakmem_build_flags.h"

// ENV control: cached flag for fastlane_direct_enabled()
// -1: uninitialized, 0: disabled, 1: enabled
// NOTE: Must be a single global (not header-static) so bench_profile refresh can
// update the same cache used by malloc/free wrappers.
extern _Atomic int g_fastlane_direct_enabled;

// Runtime check: Is FastLane Direct path enabled?
// Returns: 1 if enabled, 0 if disabled
// Hot path: Single atomic load (after first call)
static inline int fastlane_direct_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
    return 1;
#endif
    int val = atomic_load_explicit(&g_fastlane_direct_enabled, memory_order_relaxed);
    if (__builtin_expect(val == -1, 0)) {
        // Cold path: Initialize from ENV
        const char* e = getenv("HAKMEM_FASTLANE_DIRECT");
        int enable = (e && *e && *e != '0') ? 1 : 0;
        atomic_store_explicit(&g_fastlane_direct_enabled, enable, memory_order_relaxed);
        return enable;
    }
    return val;
}

// Refresh from ENV: Called during benchmark ENV reloads
// Allows runtime toggle without recompilation
void fastlane_direct_env_refresh_from_env(void);