Goal: Reduce backend transitions by increasing frontend hit rate Result: +2% best case, syscalls unchanged (root cause: SuperSlab churn) Implementation: 1. Cache capacity expansion (2-8x per-class) - Hot classes (C0-C3): 4x increase (512 slots) - Medium classes (C4-C6): 2-3x increase - Class 7 (1KB): 2x increase (128 slots) - Fast cache: 2x default capacity 2. Refill batch size increase (4-8x) - Global default: 16 → 64 (4x) - Hot classes: 128 (8x) via HAKMEM_TINY_REFILL_COUNT_HOT - Mid classes: 96 (6x) via HAKMEM_TINY_REFILL_COUNT_MID - Class 7: 64 → 128 (2x) - SFC refill: 64 → 128 (2x) 3. Adaptive sizing aggressive parameters - Grow threshold: 80% → 70% (expand earlier) - Shrink threshold: 20% → 10% (shrink less) - Growth rate: 2x → 1.5x (smoother growth) - Max capacity: 2048 → 4096 (2x ceiling) - Adapt frequency: Every 10 → 5 refills (more responsive) Performance Results (100K iterations): Before (Phase 9): - Performance: 9.71M ops/s - Syscalls: 1,729 (mmap:877, munmap:852) After (Phase 10): - Default settings: 8.77M ops/s (-9.7%) ⚠️ - Optimal ENV: 9.89M ops/s (+2%) ✅ - Syscalls: 1,729 (unchanged) ❌ Optimal ENV configuration: export HAKMEM_TINY_REFILL_COUNT_HOT=256 export HAKMEM_TINY_REFILL_COUNT_MID=192 Root Cause Analysis: Bottleneck is NOT TLS/SFC hit rate, but SuperSlab allocation churn: - 877 SuperSlabs allocated (877MB via mmap) - Phase 9 LRU cache not utilized (no frees during benchmark) - All SuperSlabs retained until program exit - System malloc: 9 syscalls vs HAKMEM: 1,729 syscalls (192x gap) Conclusion: TLS/SFC tuning cannot solve SuperSlab allocation policy problem. Next step: Phase 11 SuperSlab Prewarm strategy to eliminate mmap/munmap during benchmark execution. ChatGPT review: Strategy validated, Option A (Prewarm) recommended. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
158 lines
5.2 KiB
C
158 lines
5.2 KiB
C
// hakmem_build_flags.h - Centralized compile-time feature switches
|
|
// Purpose: Define all build-time toggles in one place with safe defaults.
|
|
// Usage: Include from common public headers (e.g., hakmem.h / hakmem_tiny.h).
|
|
|
|
#ifndef HAKMEM_BUILD_FLAGS_H
|
|
#define HAKMEM_BUILD_FLAGS_H
|
|
|
|
// ------------------------------------------------------------
|
|
// Release/debug detection
|
|
// ------------------------------------------------------------
|
|
// HAKMEM_BUILD_RELEASE: 1 in release-like builds, 0 otherwise
|
|
#ifndef HAKMEM_BUILD_RELEASE
|
|
# if defined(NDEBUG)
|
|
# define HAKMEM_BUILD_RELEASE 1
|
|
# else
|
|
# define HAKMEM_BUILD_RELEASE 0
|
|
# endif
|
|
#endif
|
|
|
|
// ------------------------------------------------------------
|
|
// Instrumentation & counters (compile-time)
|
|
// ------------------------------------------------------------
|
|
// Enable lightweight path/debug counters (compiled out when 0)
|
|
#ifndef HAKMEM_DEBUG_COUNTERS
|
|
# define HAKMEM_DEBUG_COUNTERS 1
|
|
#endif
|
|
|
|
// Enable extended memory profiling (compiled out when 0)
|
|
#ifndef HAKMEM_DEBUG_MEMORY
|
|
# define HAKMEM_DEBUG_MEMORY 0
|
|
#endif
|
|
|
|
// Tiny refill optimization helpers (header-only)
|
|
#ifndef HAKMEM_TINY_REFILL_OPT
|
|
# define HAKMEM_TINY_REFILL_OPT 1
|
|
#endif
|
|
|
|
// Batch refill P0 (can be toggled for A/B)
|
|
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
|
|
# define HAKMEM_TINY_P0_BATCH_REFILL 0
|
|
#endif
|
|
|
|
// Box refactor (Phase 6-1.7) — usually injected from build system
|
|
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
|
# define HAKMEM_TINY_PHASE6_BOX_REFACTOR 1
|
|
#endif
|
|
|
|
// ------------------------------------------------------------
|
|
// Phase 7: Region-ID Direct Lookup (Header-based optimization)
|
|
// ------------------------------------------------------------
|
|
// Phase 7 Task 1: Header-based class_idx for O(1) free
|
|
// Default: OFF (enable after full validation in Task 5)
|
|
// Build: make HEADER_CLASSIDX=1 or make phase7
|
|
#ifndef HAKMEM_TINY_HEADER_CLASSIDX
|
|
# define HAKMEM_TINY_HEADER_CLASSIDX 1
|
|
#endif
|
|
|
|
// Phase 7 Task 2: Aggressive inline TLS cache access
|
|
// Default: OFF (enable after full validation in Task 5)
|
|
// Build: make AGGRESSIVE_INLINE=1 or make phase7
|
|
// Requires: HAKMEM_TINY_HEADER_CLASSIDX=1
|
|
#ifndef HAKMEM_TINY_AGGRESSIVE_INLINE
|
|
# define HAKMEM_TINY_AGGRESSIVE_INLINE 0
|
|
#endif
|
|
|
|
// Phase 7 Task 3: Pre-warm TLS cache at init
|
|
// Default: OFF (enable after implementation)
|
|
// Build: make PREWARM_TLS=1 or make phase7
|
|
#ifndef HAKMEM_TINY_PREWARM_TLS
|
|
# define HAKMEM_TINY_PREWARM_TLS 0
|
|
#endif
|
|
|
|
// Runtime verbosity (printf-heavy diagnostics). Keep OFF for benches.
|
|
#ifndef HAKMEM_DEBUG_VERBOSE
|
|
# define HAKMEM_DEBUG_VERBOSE 0
|
|
#endif
|
|
|
|
// Tiny/Mid safety checks on free path (mincore header validation).
|
|
// 0 = performance (boundary-only), 1 = strict (mincore for all)
|
|
#ifndef HAKMEM_TINY_SAFE_FREE
|
|
# define HAKMEM_TINY_SAFE_FREE 0
|
|
#endif
|
|
|
|
// Phase 10: Aggressive refill count defaults (tunable via env vars)
|
|
// Goal: Reduce backend transitions by refilling in larger batches
|
|
// HAKMEM_TINY_REFILL_COUNT: global default (default: 64)
|
|
// HAKMEM_TINY_REFILL_COUNT_HOT: class 0-3 (default: 128)
|
|
// HAKMEM_TINY_REFILL_COUNT_MID: class 4-7 (default: 96)
|
|
#ifndef HAKMEM_TINY_REFILL_DEFAULT
|
|
# define HAKMEM_TINY_REFILL_DEFAULT 64
|
|
#endif
|
|
|
|
// ------------------------------------------------------------
|
|
// Tiny front architecture toggles (compile-time defaults)
|
|
// ------------------------------------------------------------
|
|
// New 3-layer Tiny front (A/B via build flag)
|
|
#ifndef HAKMEM_TINY_USE_NEW_3LAYER
|
|
# define HAKMEM_TINY_USE_NEW_3LAYER 0
|
|
#endif
|
|
|
|
// Minimal/strict front variants (bench/debug only)
|
|
#ifndef HAKMEM_TINY_MINIMAL_FRONT
|
|
# define HAKMEM_TINY_MINIMAL_FRONT 1
|
|
#endif
|
|
#ifndef HAKMEM_TINY_STRICT_FRONT
|
|
# define HAKMEM_TINY_STRICT_FRONT 0
|
|
#endif
|
|
|
|
// Route fingerprint (compile-time gate; runtime ENV still required)
|
|
#ifndef HAKMEM_ROUTE
|
|
# define HAKMEM_ROUTE 0
|
|
#endif
|
|
|
|
// Bench-only knobs (default values; can be overridden via build flags)
|
|
#ifndef HAKMEM_TINY_BENCH_REFILL
|
|
# define HAKMEM_TINY_BENCH_REFILL 8
|
|
#endif
|
|
#ifndef HAKMEM_TINY_BENCH_REFILL8
|
|
# define HAKMEM_TINY_BENCH_REFILL8 HAKMEM_TINY_BENCH_REFILL
|
|
#endif
|
|
#ifndef HAKMEM_TINY_BENCH_REFILL16
|
|
# define HAKMEM_TINY_BENCH_REFILL16 HAKMEM_TINY_BENCH_REFILL
|
|
#endif
|
|
#ifndef HAKMEM_TINY_BENCH_REFILL32
|
|
# define HAKMEM_TINY_BENCH_REFILL32 HAKMEM_TINY_BENCH_REFILL
|
|
#endif
|
|
#ifndef HAKMEM_TINY_BENCH_REFILL64
|
|
# define HAKMEM_TINY_BENCH_REFILL64 HAKMEM_TINY_BENCH_REFILL
|
|
#endif
|
|
|
|
#ifndef HAKMEM_TINY_BENCH_WARMUP8
|
|
# define HAKMEM_TINY_BENCH_WARMUP8 64
|
|
#endif
|
|
#ifndef HAKMEM_TINY_BENCH_WARMUP16
|
|
# define HAKMEM_TINY_BENCH_WARMUP16 96
|
|
#endif
|
|
#ifndef HAKMEM_TINY_BENCH_WARMUP32
|
|
# define HAKMEM_TINY_BENCH_WARMUP32 160
|
|
#endif
|
|
#ifndef HAKMEM_TINY_BENCH_WARMUP64
|
|
# define HAKMEM_TINY_BENCH_WARMUP64 192
|
|
#endif
|
|
|
|
// ------------------------------------------------------------
|
|
// Helper enum (for documentation / logging)
|
|
// ------------------------------------------------------------
|
|
typedef enum {
|
|
HAK_FLAG_BUILD_RELEASE = HAKMEM_BUILD_RELEASE,
|
|
HAK_FLAG_DEBUG_COUNTERS = HAKMEM_DEBUG_COUNTERS,
|
|
HAK_FLAG_DEBUG_MEMORY = HAKMEM_DEBUG_MEMORY,
|
|
HAK_FLAG_REFILL_OPT = HAKMEM_TINY_REFILL_OPT,
|
|
HAK_FLAG_P0_BATCH = HAKMEM_TINY_P0_BATCH_REFILL,
|
|
HAK_FLAG_BOX_REFACTOR = HAKMEM_TINY_PHASE6_BOX_REFACTOR,
|
|
HAK_FLAG_NEW_3LAYER = HAKMEM_TINY_USE_NEW_3LAYER,
|
|
} hak_build_flags_t;
|
|
|
|
#endif // HAKMEM_BUILD_FLAGS_H
|