47 lines
1.8 KiB
C
47 lines
1.8 KiB
C
|
|
// fastlane_direct_env_box.h - Phase 19-1: FastLane Direct Path ENV Control
|
||
|
|
//
|
||
|
|
// Goal: Remove wrapper layer overhead (30.79% of cycles) by calling core allocator directly
|
||
|
|
// Strategy: Compile-time + runtime gate to bypass front_fastlane_try_*() wrapper
|
||
|
|
//
|
||
|
|
// Box Theory:
|
||
|
|
// - Boundary: HAKMEM_FASTLANE_DIRECT=0/1 (default: 0, opt-in)
|
||
|
|
// - Rollback: ENV=0 reverts to existing FastLane wrapper path
|
||
|
|
// - Observability: perf stat shows instruction/branch reduction
|
||
|
|
//
|
||
|
|
// Expected Performance:
|
||
|
|
// - Reduction: -17.5 instructions/op, -6.0 branches/op
|
||
|
|
// - Impact: +10-15% throughput (remove 30% wrapper overhead)
|
||
|
|
//
|
||
|
|
// ENV Variables:
|
||
|
|
// HAKMEM_FASTLANE_DIRECT=0/1 # Enable direct path (default: 0, research box)
|
||
|
|
|
||
|
|
#pragma once
|
||
|
|
|
||
|
|
#include <stdatomic.h>
|
||
|
|
#include <stdlib.h>
|
||
|
|
|
||
|
|
// ENV control: cached flag for fastlane_direct_enabled()
|
||
|
|
// -1: uninitialized, 0: disabled, 1: enabled
|
||
|
|
// NOTE: Must be a single global (not header-static) so bench_profile refresh can
|
||
|
|
// update the same cache used by malloc/free wrappers.
|
||
|
|
extern _Atomic int g_fastlane_direct_enabled;
|
||
|
|
|
||
|
|
// Runtime check: Is FastLane Direct path enabled?
|
||
|
|
// Returns: 1 if enabled, 0 if disabled
|
||
|
|
// Hot path: Single atomic load (after first call)
|
||
|
|
static inline int fastlane_direct_enabled(void) {
|
||
|
|
int val = atomic_load_explicit(&g_fastlane_direct_enabled, memory_order_relaxed);
|
||
|
|
if (__builtin_expect(val == -1, 0)) {
|
||
|
|
// Cold path: Initialize from ENV
|
||
|
|
const char* e = getenv("HAKMEM_FASTLANE_DIRECT");
|
||
|
|
int enable = (e && *e && *e != '0') ? 1 : 0;
|
||
|
|
atomic_store_explicit(&g_fastlane_direct_enabled, enable, memory_order_relaxed);
|
||
|
|
return enable;
|
||
|
|
}
|
||
|
|
return val;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Refresh from ENV: Called during benchmark ENV reloads
|
||
|
|
// Allows runtime toggle without recompilation
|
||
|
|
void fastlane_direct_env_refresh_from_env(void);
|