Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
72 lines
2.4 KiB
Bash
Executable File
72 lines
2.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Simple runner for larson contention benchmark without LD_PRELOAD.
|
|
# Builds larson variants if missing and runs with given presets.
|
|
|
|
dur=2
|
|
threads_csv="1,4"
|
|
min_sz=8
|
|
max_sz=128
|
|
chunks=1024
|
|
rounds=1
|
|
seed=12345
|
|
|
|
while getopts ":d:t:m:M:c:r:s:" opt; do
|
|
case $opt in
|
|
d) dur="$OPTARG";;
|
|
t) threads_csv="$OPTARG";;
|
|
m) min_sz="$OPTARG";;
|
|
M) max_sz="$OPTARG";;
|
|
c) chunks="$OPTARG";;
|
|
r) rounds="$OPTARG";;
|
|
s) seed="$OPTARG";;
|
|
*) echo "Usage: $0 [-d sec] [-t threads_csv] [-m min] [-M max] [-c chunks] [-r rounds] [-s seed]"; exit 1;;
|
|
esac
|
|
done
|
|
|
|
if [[ ! -x ./larson_system ]]; then
|
|
make -s larson_system >/dev/null
|
|
fi
|
|
if [[ ! -x ./larson_hakmem ]]; then
|
|
make -s larson_hakmem >/dev/null
|
|
fi
|
|
|
|
MI_SO="${MIMALLOC_SO:-mimalloc-bench/extern/mi/out/release/libmimalloc.so}"
|
|
have_mi=0
|
|
if [[ -f "$MI_SO" ]]; then
|
|
have_mi=1
|
|
if [[ ! -x ./larson_mi ]]; then
|
|
make -s larson_mi >/dev/null || have_mi=0
|
|
fi
|
|
fi
|
|
|
|
IFS=',' read -ra tlist <<<"$threads_csv"
|
|
printf "\nLarson benchmark (min=%s, max=%s, chunks=%s, rounds=%s, seed=%s)\n" "$min_sz" "$max_sz" "$chunks" "$rounds" "$seed"
|
|
printf "Duration: %ss\n\n" "$dur"
|
|
|
|
printf "%-10s %-5s %s\n" "Allocator" "Thr" "Output"
|
|
printf "%-10s %-5s %s\n" "---------" "----" "------"
|
|
for t in "${tlist[@]}"; do
|
|
out=$(./larson_system "$dur" "$min_sz" "$max_sz" "$chunks" "$rounds" "$seed" "$t" 2>/dev/null | rg "Throughput" -n || true)
|
|
printf "%-10s %-5s %s\n" "system" "$t" "$out"
|
|
|
|
if (( have_mi == 1 )); then
|
|
out=$(./larson_mi "$dur" "$min_sz" "$max_sz" "$chunks" "$rounds" "$seed" "$t" 2>/dev/null | rg "Throughput" -n || true)
|
|
printf "%-10s %-5s %s\n" "mimalloc" "$t" "$out"
|
|
fi
|
|
|
|
# Gate fast paths to isolate correctness first; toggle via env if needed
|
|
HAKMEM_TINY_META_FREE=${HAKMEM_TINY_META_FREE:-0} \
|
|
HAKMEM_TINY_META_ALLOC=${HAKMEM_TINY_META_ALLOC:-0} \
|
|
HAKMEM_DISABLE_BATCH=${HAKMEM_DISABLE_BATCH:-1} \
|
|
# Enable partial adopt for multi-thread runs to improve reuse
|
|
if [[ "$t" -gt 1 ]]; then
|
|
HAKMEM_TINY_SS_ADOPT=1 HAKMEM_TINY_SS_ADOPT_COOLDOWN=${HAKMEM_TINY_SS_ADOPT_COOLDOWN:-4} \
|
|
out=$(./larson_hakmem "$dur" "$min_sz" "$max_sz" "$chunks" "$rounds" "$seed" "$t" 2>/dev/null | rg "Throughput" -n || true)
|
|
else
|
|
out=$(./larson_hakmem "$dur" "$min_sz" "$max_sz" "$chunks" "$rounds" "$seed" "$t" 2>/dev/null | rg "Throughput" -n || true)
|
|
fi
|
|
printf "%-10s %-5s %s\n" "hakmem" "$t" "$out"
|
|
done
|