Files
hakmem/core/box/front_fastlane_env_box.h
Moe Charm (CI) dcc1d42e7f Phase 6-2: Promote Front FastLane Free DeDup (default ON)
Results:
- A/B test: +5.18% on Mixed (10-run, clean env)
- Baseline: 46.68M ops/s
- Optimized: 49.10M ops/s
- Improvement: +2.42M ops/s (+5.18%)

Strategy:
- Eliminate duplicate header validation in front_fastlane_try_free()
- Direct call to free_tiny_fast() when dedup enabled
- Single validation path (no redundant checks)

Success factors:
1. Complete duplicate elimination (free path optimization)
2. Free path importance (50% of Mixed workload)
3. Improved execution stability (CV: 1.00% → 0.58%)

Phase 6 cumulative:
- Phase 6-1 FastLane: +11.13%
- Phase 6-2 Free DeDup: +5.18%
- Total: ~+16-17% from baseline (multiplicative effect)

Promotion:
- Default: HAKMEM_FRONT_FASTLANE_FREE_DEDUP=1 (opt-out)
- Added to MIXED_TINYV3_C7_SAFE preset
- Added to C6_HEAVY_LEGACY_POOLV1 preset
- Rollback: HAKMEM_FRONT_FASTLANE_FREE_DEDUP=0

Files modified:
- core/box/front_fastlane_env_box.h: default 0 → 1
- core/bench_profile.h: added to presets
- CURRENT_TASK.md: Phase 6-2 GO result

Health check: PASSED (all profiles)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-14 17:38:21 +09:00

125 lines
4.3 KiB
C

#ifndef HAK_FRONT_FASTLANE_ENV_BOX_H
#define HAK_FRONT_FASTLANE_ENV_BOX_H
// ============================================================================
// Phase 6: Front FastLane - ENV Gate Box
// ============================================================================
//
// Purpose: ENV gate for Front FastLane (Layer Collapse optimization)
//
// ENV Variables:
// HAKMEM_FRONT_FASTLANE=0/1 (default: 1, promoted)
// - 0: Disabled (use existing wrapper paths)
// - 1: Enabled (use FastLane single-box entry point)
//
// HAKMEM_FRONT_FASTLANE_CLASS_MASK=0x.. (default: 0xFF, optional)
// - Bitmask for gradual rollout (e.g., 0x01 = class 0 only)
// - 0xFF = all classes enabled
//
// Box Theory:
// - L0: ENV gate (this file)
// - L1: FrontFastLaneBox (front_fastlane_box.h)
// - Integration: hak_wrappers.inc.h
//
// Safety:
// - Default ON (opt-out via ENV=0)
// - Zero overhead when disabled (static cached)
// - Lazy init (getenv on first call)
//
// Rollback:
// - Set HAKMEM_FRONT_FASTLANE=0
// - Or rebuild without integration
//
// ============================================================================
#include <stdatomic.h>
#include <stdlib.h>
#include <stdint.h>
// Forward declaration for cross-box includes
static inline int front_fastlane_enabled(void);
static inline uint8_t front_fastlane_class_mask(void);
static inline int front_fastlane_free_dedup_enabled(void);
// ============================================================================
// ENV Gate Implementation
// ============================================================================
// Lazy init: Check ENV variable on first call, cache result
// Thread-safe: Read-only after init (atomic store, relaxed load)
static inline int front_fastlane_enabled(void) {
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);
if (__builtin_expect(val == -1, 0)) {
// Cold path: First call, check ENV
const char* env = getenv("HAKMEM_FRONT_FASTLANE");
int enabled = 1; // default: ON (opt-out)
if (env) {
// Parse: "0" or empty = disabled, "1" or non-empty = enabled
enabled = (env[0] != '0' && env[0] != '\0') ? 1 : 0;
}
// Cache result (thread-safe: atomic store)
atomic_store_explicit(&cached, enabled, memory_order_relaxed);
val = enabled;
}
return val;
}
// Get class mask for gradual rollout (default: 0xFF = all classes)
static inline uint8_t front_fastlane_class_mask(void) {
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);
if (__builtin_expect(val == -1, 0)) {
// Cold path: First call, check ENV
const char* env = getenv("HAKMEM_FRONT_FASTLANE_CLASS_MASK");
int mask = 0xFF; // Default: all classes enabled
if (env) {
// Parse hex value (e.g., "0x03" or "03")
char* end;
long parsed = strtol(env, &end, 0); // Auto-detect base (0x prefix)
if (end != env && parsed >= 0 && parsed <= 0xFF) {
mask = (int)parsed;
}
}
// Cache result (thread-safe: atomic store)
atomic_store_explicit(&cached, mask, memory_order_relaxed);
val = mask;
}
return (uint8_t)val;
}
// Phase 6-2: Free DeDup gate (eliminate duplicate header validation)
// When enabled, front_fastlane_try_free() directly calls free_tiny_fast()
// instead of doing its own header validation.
static inline int front_fastlane_free_dedup_enabled(void) {
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);
if (__builtin_expect(val == -1, 0)) {
// Cold path: First call, check ENV
const char* env = getenv("HAKMEM_FRONT_FASTLANE_FREE_DEDUP");
int enabled = 1; // default: ON (opt-out, +5.18% proven)
if (env) {
// Parse: "0" or empty = disabled, "1" or non-empty = enabled
enabled = (env[0] != '0' && env[0] != '\0') ? 1 : 0;
}
// Cache result (thread-safe: atomic store)
atomic_store_explicit(&cached, enabled, memory_order_relaxed);
val = enabled;
}
return val;
}
#endif // HAK_FRONT_FASTLANE_ENV_BOX_H