Changes: - scripts/box/pgo_fast_profile_config.sh: Expanded WS patterns (3→5) and seeds (1→3) for reduced overfitting and better production workload representativeness - PERFORMANCE_TARGETS_SCORECARD.md: Phase 68 baseline promoted (61.614M = 50.93%) - CURRENT_TASK.md: Phase 68 marked complete, Phase 67a (layout tax forensics) set Active Results: - 10-run verification: +1.19% vs Phase 66 baseline (GO, >+1.0% threshold) - M1 milestone: 50.93% of mimalloc (target 50%, exceeded by +0.93pp) - Stability: 10-run mean/median with <2.1% CV 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
51 lines
1.9 KiB
C
51 lines
1.9 KiB
C
// fastlane_direct_env_box.h - Phase 19-1: FastLane Direct Path ENV Control
|
|
//
|
|
// Goal: Remove wrapper layer overhead (30.79% of cycles) by calling core allocator directly
|
|
// Strategy: Compile-time + runtime gate to bypass front_fastlane_try_*() wrapper
|
|
//
|
|
// Box Theory:
|
|
// - Boundary: HAKMEM_FASTLANE_DIRECT=0/1 (default: 0, opt-in)
|
|
// - Rollback: ENV=0 reverts to existing FastLane wrapper path
|
|
// - Observability: perf stat shows instruction/branch reduction
|
|
//
|
|
// Expected Performance:
|
|
// - Reduction: -17.5 instructions/op, -6.0 branches/op
|
|
// - Impact: +10-15% throughput (remove 30% wrapper overhead)
|
|
//
|
|
// ENV Variables:
|
|
// HAKMEM_FASTLANE_DIRECT=0/1 # Enable direct path (default: 0, research box)
|
|
|
|
#pragma once
|
|
|
|
#include <stdatomic.h>
|
|
#include <stdlib.h>
|
|
#include "../hakmem_build_flags.h"
|
|
|
|
// ENV control: cached flag for fastlane_direct_enabled()
|
|
// -1: uninitialized, 0: disabled, 1: enabled
|
|
// NOTE: Must be a single global (not header-static) so bench_profile refresh can
|
|
// update the same cache used by malloc/free wrappers.
|
|
extern _Atomic int g_fastlane_direct_enabled;
|
|
|
|
// Runtime check: Is FastLane Direct path enabled?
|
|
// Returns: 1 if enabled, 0 if disabled
|
|
// Hot path: Single atomic load (after first call)
|
|
static inline int fastlane_direct_enabled(void) {
|
|
#if HAKMEM_FAST_PROFILE_FIXED
|
|
return 1;
|
|
#endif
|
|
int val = atomic_load_explicit(&g_fastlane_direct_enabled, memory_order_relaxed);
|
|
if (__builtin_expect(val == -1, 0)) {
|
|
// Cold path: Initialize from ENV
|
|
const char* e = getenv("HAKMEM_FASTLANE_DIRECT");
|
|
int enable = (e && *e && *e != '0') ? 1 : 0;
|
|
atomic_store_explicit(&g_fastlane_direct_enabled, enable, memory_order_relaxed);
|
|
return enable;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
// Refresh from ENV: Called during benchmark ENV reloads
|
|
// Allows runtime toggle without recompilation
|
|
void fastlane_direct_env_refresh_from_env(void);
|