Comprehensive interaction testing with single binary, ENV-only configuration: 4-Point Matrix Results (Mixed SSOT, WS=400): - Point A (C5=0, C6=0): 42.36 M ops/s [Baseline] - Point B (C5=1, C6=0): 43.54 M ops/s (+2.79% vs A) - Point C (C5=0, C6=1): 44.25 M ops/s (+4.46% vs A) - Point D (C5=1, C6=1): 44.65 M ops/s (+5.41% vs A) **[COMBINED TARGET]** Additivity Analysis: - Expected additive: 45.43 M ops/s (B+C-A) - Actual: 44.65 M ops/s (D) - Sub-additivity: 1.72% (near-perfect, minimal negative interaction) Perf Stat Validation (Point D vs A): - Instructions: -6.1% (function call elimination confirmed) - Branches: -6.1% (matches instructions reduction) - Cache-misses: -31.5% (improved locality, NO code explosion) - Throughput: +5.41% (net positive) Decision: ✅ STRONG GO (exceeds +3.0% GO threshold) - D vs A: +5.41% >> +3.0% - Sub-additivity: 1.72% << 20% acceptable - Phase 73 hypothesis validated: -6.1% instructions/branches → +5.41% throughput Promotion to Defaults: - core/bench_profile.h: C5+C6 added to bench_apply_mixed_tinyv3_c7_common() - scripts/run_mixed_10_cleanenv.sh: C5+C6 ENV defaults added - C5+C6 inline slots now PRESET DEFAULT for MIXED_TINYV3_C7_SAFE New Baseline: 44.65 M ops/s (36.75% of mimalloc, +5.41% from Phase 75-0) M2 Target: 55% of mimalloc ≈ 66.8 M ops/s (remaining gap: 22.15 M ops/s) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
52 lines
2.3 KiB
Bash
Executable File
52 lines
2.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# 10-run Mixed benchmark with explicit clean ENV for frozen/research knobs.
|
|
# Purpose: avoid "bench_setenv_default() does not override exported ENV" drift.
|
|
|
|
profile=${HAKMEM_PROFILE:-MIXED_TINYV3_C7_SAFE}
|
|
iters=${ITERS:-20000000}
|
|
ws=${WS:-400}
|
|
runs=${RUNS:-10}
|
|
bin=${BENCH_BIN:-./bench_random_mixed_hakmem}
|
|
|
|
# Keep profiles reproducible even if user exported env vars.
|
|
case "${profile}" in
|
|
MIXED_TINYV3_C7_BALANCED)
|
|
export HAKMEM_SS_MEM_LEAN=1
|
|
export HAKMEM_SS_MEM_LEAN_DECOMMIT=OFF
|
|
export HAKMEM_SS_MEM_LEAN_TARGET_MB=10
|
|
;;
|
|
*)
|
|
export HAKMEM_SS_MEM_LEAN=0
|
|
export HAKMEM_SS_MEM_LEAN_DECOMMIT=OFF
|
|
export HAKMEM_SS_MEM_LEAN_TARGET_MB=10
|
|
;;
|
|
esac
|
|
|
|
# Force known research knobs OFF to avoid accidental carry-over.
|
|
export HAKMEM_TINY_HEADER_WRITE_ONCE=${HAKMEM_TINY_HEADER_WRITE_ONCE:-0}
|
|
export HAKMEM_TINY_C7_PRESERVE_HEADER=${HAKMEM_TINY_C7_PRESERVE_HEADER:-0}
|
|
export HAKMEM_TINY_TCACHE=${HAKMEM_TINY_TCACHE:-0}
|
|
export HAKMEM_TINY_TCACHE_CAP=${HAKMEM_TINY_TCACHE_CAP:-64}
|
|
export HAKMEM_MALLOC_TINY_DIRECT=${HAKMEM_MALLOC_TINY_DIRECT:-0}
|
|
export HAKMEM_FRONT_FASTLANE_ALLOC_LEGACY_DIRECT=${HAKMEM_FRONT_FASTLANE_ALLOC_LEGACY_DIRECT:-0}
|
|
export HAKMEM_FORCE_LIBC_ALLOC=${HAKMEM_FORCE_LIBC_ALLOC:-0}
|
|
export HAKMEM_ENV_SNAPSHOT_SHAPE=${HAKMEM_ENV_SNAPSHOT_SHAPE:-0}
|
|
export HAKMEM_TINY_C7_ULTRA_HEADER_LIGHT=${HAKMEM_TINY_C7_ULTRA_HEADER_LIGHT:-0}
|
|
# NOTE: Phase 19-1b is promoted in presets. Keep cleanenv aligned by default.
|
|
export HAKMEM_FASTLANE_DIRECT=${HAKMEM_FASTLANE_DIRECT:-1}
|
|
# NOTE: Phase 9/10 are promoted (bench_profile defaults to 1). Keep cleanenv aligned by default.
|
|
export HAKMEM_FREE_TINY_FAST_MONO_DUALHOT=${HAKMEM_FREE_TINY_FAST_MONO_DUALHOT:-1}
|
|
export HAKMEM_FREE_TINY_FAST_MONO_LEGACY_DIRECT=${HAKMEM_FREE_TINY_FAST_MONO_LEGACY_DIRECT:-1}
|
|
# NOTE: Phase 69-1 winner (Warm Pool Size=16, +3.26% Strong GO, ENV-only)
|
|
export HAKMEM_WARM_POOL_SIZE=${HAKMEM_WARM_POOL_SIZE:-16}
|
|
# NOTE: Phase 75-3 winner (C5+C6 Inline Slots, +5.41% GO, 4-point matrix A/B)
|
|
export HAKMEM_TINY_C5_INLINE_SLOTS=${HAKMEM_TINY_C5_INLINE_SLOTS:-1}
|
|
export HAKMEM_TINY_C6_INLINE_SLOTS=${HAKMEM_TINY_C6_INLINE_SLOTS:-1}
|
|
|
|
for i in $(seq 1 "${runs}"); do
|
|
echo "=== Run ${i}/${runs} ==="
|
|
HAKMEM_PROFILE="${profile}" "${bin}" "${iters}" "${ws}" 1 2>&1 | rg "Throughput" || true
|
|
done
|