// hakmem_build_flags.h - Centralized compile-time feature switches // Purpose: Define all build-time toggles in one place with safe defaults. // Usage: Include from common public headers (e.g., hakmem.h / hakmem_tiny.h). #ifndef HAKMEM_BUILD_FLAGS_H #define HAKMEM_BUILD_FLAGS_H // ------------------------------------------------------------ // Phase 2: Headerless Mode Override // ------------------------------------------------------------ // If Headerless is enabled, force HEADER_CLASSIDX to 0 #if defined(HAKMEM_TINY_HEADERLESS) && HAKMEM_TINY_HEADERLESS #undef HAKMEM_TINY_HEADER_CLASSIDX #define HAKMEM_TINY_HEADER_CLASSIDX 0 #endif // ------------------------------------------------------------ // Release/debug detection // ------------------------------------------------------------ // HAKMEM_BUILD_RELEASE: 1 in release-like builds, 0 otherwise #ifndef HAKMEM_BUILD_RELEASE # if defined(NDEBUG) # define HAKMEM_BUILD_RELEASE 1 # else # define HAKMEM_BUILD_RELEASE 0 # endif #endif // ------------------------------------------------------------ // Instrumentation & counters (compile-time) // ------------------------------------------------------------ // Enable lightweight path/debug counters (compiled out when 0) // Default: 0 in release builds (NDEBUG set), 1 in debug builds #ifndef HAKMEM_DEBUG_COUNTERS # if defined(NDEBUG) # define HAKMEM_DEBUG_COUNTERS 0 # else # define HAKMEM_DEBUG_COUNTERS 1 # endif #endif // Enable extended memory profiling (compiled out when 0) #ifndef HAKMEM_DEBUG_MEMORY # define HAKMEM_DEBUG_MEMORY 0 #endif // Tiny refill optimization helpers (header-only) #ifndef HAKMEM_TINY_REFILL_OPT # define HAKMEM_TINY_REFILL_OPT 1 #endif // Batch refill P0 (can be toggled for A/B) #ifndef HAKMEM_TINY_P0_BATCH_REFILL # define HAKMEM_TINY_P0_BATCH_REFILL 0 #endif // Box refactor (Phase 6-1.7) — usually injected from build system #ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR # define HAKMEM_TINY_PHASE6_BOX_REFACTOR 1 #endif // SuperSlab backend toggle (compile-time) // Default: 1 (ON) - SuperSlab is the core architecture. // Set to 0 only for legacy/compat testing. #ifndef HAKMEM_TINY_USE_SUPERSLAB # define HAKMEM_TINY_USE_SUPERSLAB 1 #endif // ------------------------------------------------------------ // Phase 7: Region-ID Direct Lookup (Header-based optimization) // ------------------------------------------------------------ // Phase 7 Task 1: Header-based class_idx for O(1) free // Default: OFF (enable after full validation in Task 5) // Build: make HEADER_CLASSIDX=1 or make phase7 #ifndef HAKMEM_TINY_HEADER_CLASSIDX # define HAKMEM_TINY_HEADER_CLASSIDX 1 #endif // Phase 7 Task 2: Aggressive inline TLS cache access // Default: OFF (enable after full validation in Task 5) // Build: make AGGRESSIVE_INLINE=1 or make phase7 // Requires: HAKMEM_TINY_HEADER_CLASSIDX=1 #ifndef HAKMEM_TINY_AGGRESSIVE_INLINE # define HAKMEM_TINY_AGGRESSIVE_INLINE 0 #endif // Inline TLS SLL pop (experimental, A/B only) // Default: OFF (HAKMEM_TINY_INLINE_SLL=0) to keep Box TLS-SLL API as the standard path. // Enable explicitly via build flag: -DHAKMEM_TINY_INLINE_SLL=1 (bench/debug only). #ifndef HAKMEM_TINY_INLINE_SLL # define HAKMEM_TINY_INLINE_SLL 0 #endif // Phase 1A3: Always-inline tiny_region_id_write_header() // Default: OFF (HAKMEM_TINY_HEADER_WRITE_ALWAYS_INLINE=0) - enable after A/B validation // Purpose: Force inline expansion of header write to reduce alloc path overhead // Expected impact: +0.5-2% on Mixed workloads // Build: make EXTRA_CFLAGS=-DHAKMEM_TINY_HEADER_WRITE_ALWAYS_INLINE=1 [target] #ifndef HAKMEM_TINY_HEADER_WRITE_ALWAYS_INLINE # define HAKMEM_TINY_HEADER_WRITE_ALWAYS_INLINE 0 #endif // Phase 7 Task 3: Pre-warm TLS cache at init // Default: OFF (enable after implementation) // Build: make PREWARM_TLS=1 or make phase7 #ifndef HAKMEM_TINY_PREWARM_TLS # define HAKMEM_TINY_PREWARM_TLS 0 #endif // ------------------------------------------------------------ // Phase 1: Headerless Optimization - TLS SuperSlab Hint Cache // ------------------------------------------------------------ // Purpose: Accelerate ptr→SuperSlab lookup in Headerless mode // Default: 0 (disabled during development and testing) // Target: 1 (enabled after validation in Phase 1 rollout) // // Performance Impact: // - Cache hit: 2-5 cycles (vs 10-50 cycles for hak_super_lookup) // - Expected hit rate: 85-95% (single-threaded), 70-85% (multi-threaded) // - Expected throughput improvement: 15-20% // // Memory Overhead: // - 112 bytes per thread (TLS) // - Negligible for typical workloads (1000 threads = 112KB) // // Dependencies: // - Requires HAKMEM_TINY_HEADERLESS=1 (hint is no-op in header mode) // - No other dependencies (self-contained Box) // // Build: make EXTRA_CFLAGS="-DHAKMEM_TINY_SS_TLS_HINT=1" #ifndef HAKMEM_TINY_SS_TLS_HINT # define HAKMEM_TINY_SS_TLS_HINT 0 #endif // Validation: Hint Box only active in Headerless mode #if HAKMEM_TINY_SS_TLS_HINT && !defined(HAKMEM_TINY_HEADERLESS) #warning "HAKMEM_TINY_SS_TLS_HINT enabled but HAKMEM_TINY_HEADERLESS not defined - hint will have no effect" #endif // Runtime verbosity (printf-heavy diagnostics). Keep OFF for benches. #ifndef HAKMEM_DEBUG_VERBOSE # define HAKMEM_DEBUG_VERBOSE 0 #endif // Tiny/Mid safety checks on free path (mincore header validation). // 0 = performance (boundary-only), 1 = strict (mincore for all) #ifndef HAKMEM_TINY_SAFE_FREE # define HAKMEM_TINY_SAFE_FREE 0 #endif // Phase 10: Aggressive refill count defaults (tunable via env vars) // Goal: Reduce backend transitions by refilling in larger batches // HAKMEM_TINY_REFILL_COUNT: global default (default: 128) // HAKMEM_TINY_REFILL_COUNT_HOT: class 0-3 (default: 128) // HAKMEM_TINY_REFILL_COUNT_MID: class 4-7 (default: 96) // Larson Fix (Priority 1): Increased from 64 to 128 to reduce lock contention // Expected impact: Lock frequency reduction 19K → ~1.6K locks/sec (12x) // NOTE: Multi-threaded Larson has pre-existing crash bug (not caused by this change) #ifndef HAKMEM_TINY_REFILL_DEFAULT # define HAKMEM_TINY_REFILL_DEFAULT 128 #endif // ------------------------------------------------------------ // Tiny front architecture toggles (compile-time defaults) // ------------------------------------------------------------ // New 3-layer Tiny front (A/B via build flag) #ifndef HAKMEM_TINY_USE_NEW_3LAYER # define HAKMEM_TINY_USE_NEW_3LAYER 0 #endif // Minimal/strict front variants (bench/debug only) #ifndef HAKMEM_TINY_MINIMAL_FRONT # define HAKMEM_TINY_MINIMAL_FRONT 1 #endif #ifndef HAKMEM_TINY_STRICT_FRONT # define HAKMEM_TINY_STRICT_FRONT 0 #endif // ------------------------------------------------------------ // Phase 4-Step3: Tiny Front PGO Config Box // ------------------------------------------------------------ // HAKMEM_TINY_FRONT_PGO: // 0 = Normal build with runtime configuration (default, backward compatible) // Configuration checked via ENV variables at runtime (flexible) // 1 = PGO-optimized build with compile-time configuration (performance) // Configuration fixed at compile time (dead code elimination) // Eliminates runtime branches for maximum performance. // Use with: make CFLAGS="-DHAKMEM_TINY_FRONT_PGO=1" bench_random_mixed_hakmem // Expected benefit: +5-8% improvement via dead code elimination (57.2 → 60-62 M ops/s) #ifndef HAKMEM_TINY_FRONT_PGO # define HAKMEM_TINY_FRONT_PGO 0 #endif // Phase 5-Step3: Mid/Large PGO Config Box // ------------------------------------------------------------ // HAKMEM_MID_LARGE_PGO: // 0 = Normal build with runtime configuration (default, backward compatible) // Configuration checked via ENV variables at runtime (flexible) // 1 = PGO-optimized build with compile-time configuration (performance) // Configuration fixed at compile time (dead code elimination) // Eliminates runtime branches for Mid/Large allocation paths. // Use with: make EXTRA_CFLAGS="-DHAKMEM_MID_LARGE_PGO=1" bench_random_mixed_hakmem // Expected benefit: +2-4% improvement via dead code elimination #ifndef HAKMEM_MID_LARGE_PGO # define HAKMEM_MID_LARGE_PGO 0 #endif // Route fingerprint (compile-time gate; runtime ENV still required) #ifndef HAKMEM_ROUTE # define HAKMEM_ROUTE 0 #endif // Bench-only knobs (default values; can be overridden via build flags) #ifndef HAKMEM_TINY_BENCH_REFILL # define HAKMEM_TINY_BENCH_REFILL 8 #endif #ifndef HAKMEM_TINY_BENCH_REFILL8 # define HAKMEM_TINY_BENCH_REFILL8 HAKMEM_TINY_BENCH_REFILL #endif #ifndef HAKMEM_TINY_BENCH_REFILL16 # define HAKMEM_TINY_BENCH_REFILL16 HAKMEM_TINY_BENCH_REFILL #endif #ifndef HAKMEM_TINY_BENCH_REFILL32 # define HAKMEM_TINY_BENCH_REFILL32 HAKMEM_TINY_BENCH_REFILL #endif #ifndef HAKMEM_TINY_BENCH_REFILL64 # define HAKMEM_TINY_BENCH_REFILL64 HAKMEM_TINY_BENCH_REFILL #endif #ifndef HAKMEM_TINY_BENCH_WARMUP8 # define HAKMEM_TINY_BENCH_WARMUP8 64 #endif #ifndef HAKMEM_TINY_BENCH_WARMUP16 # define HAKMEM_TINY_BENCH_WARMUP16 96 #endif #ifndef HAKMEM_TINY_BENCH_WARMUP32 # define HAKMEM_TINY_BENCH_WARMUP32 160 #endif #ifndef HAKMEM_TINY_BENCH_WARMUP64 # define HAKMEM_TINY_BENCH_WARMUP64 192 #endif // ------------------------------------------------------------ // Helper enum (for documentation / logging) // ------------------------------------------------------------ typedef enum { HAK_FLAG_BUILD_RELEASE = HAKMEM_BUILD_RELEASE, HAK_FLAG_DEBUG_COUNTERS = HAKMEM_DEBUG_COUNTERS, HAK_FLAG_DEBUG_MEMORY = HAKMEM_DEBUG_MEMORY, HAK_FLAG_REFILL_OPT = HAKMEM_TINY_REFILL_OPT, HAK_FLAG_P0_BATCH = HAKMEM_TINY_P0_BATCH_REFILL, HAK_FLAG_BOX_REFACTOR = HAKMEM_TINY_PHASE6_BOX_REFACTOR, HAK_FLAG_NEW_3LAYER = HAKMEM_TINY_USE_NEW_3LAYER, } hak_build_flags_t; #endif // HAKMEM_BUILD_FLAGS_H