Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-05 12:31:14 +09:00
commit 52386401b3
27144 changed files with 124451 additions and 0 deletions

View File

@ -0,0 +1,91 @@
#!/usr/bin/env bash
set -euo pipefail
# Run (nearly) the full mimalloc-bench suite with timeouts.
# Compares: HAKMEM (via LD_PRELOAD on sys), mimalloc (mi), and system (sys).
#
# Env knobs:
# TIMEOUT_SEC per-run timeout seconds (default: 900)
# PROCS concurrency list for bench.sh --procs (default: 1,4)
# INCLUDE_JE include jemalloc reference (0/1, default: 0)
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
TIMEOUT_SEC=${TIMEOUT_SEC:-900}
PROCS=${PROCS:-1,4}
INCLUDE_JE=${INCLUDE_JE:-0}
TESTS=${TESTS:-allt} # Space-separated tests or 'allt'
REPEATS=${REPEATS:-1} # Pass through to bench.sh (-r)
RESULT_DIR="$ROOT_DIR/bench_results/mimalloc_full_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$RESULT_DIR"
BENCH_ROOT="$ROOT_DIR/mimalloc-bench"
BENCH_OUT="$BENCH_ROOT/out/bench"
if [[ ! -d "$BENCH_OUT" ]]; then
echo "[warn] mimalloc-bench/out/bench not found. Attempting auto-build (bench only)."
if [[ -x "$BENCH_ROOT/build-bench-env.sh" ]]; then
pushd "$BENCH_ROOT" >/dev/null
./build-bench-env.sh bench
popd >/dev/null
else
echo "[error] build-bench-env.sh not found under mimalloc-bench."
echo " Please build manually: cd mimalloc-bench && ./build-bench-env.sh bench"
exit 1
fi
fi
if [[ ! -d "$BENCH_OUT" ]]; then
echo "[error] mimalloc-bench/out/bench still missing after auto-build."
echo " Try: cd mimalloc-bench && ./build-bench-env.sh all"
exit 1
fi
echo "[info] Building HAKMEM shared library with PGO for LD_PRELOAD"
make -s pgo-profile-shared pgo-build-shared >/dev/null
pushd "$BENCH_OUT" >/dev/null
run_case() {
local name="$1"; shift
local preload="$1"; shift
local args=("$@")
local log="$RESULT_DIR/${name}.log"
echo "[case] $name | timeout=${TIMEOUT_SEC}s | args: ${args[*]}" | tee -a "$log"
if [[ -n "$preload" ]]; then
LD_PRELOAD="$preload" timeout -s INT "$TIMEOUT_SEC" bash ../../bench.sh -r="$REPEATS" "${args[@]}" 2>&1 | tee -a "$log" || true
else
timeout -s INT "$TIMEOUT_SEC" bash ../../bench.sh -r="$REPEATS" "${args[@]}" 2>&1 | tee -a "$log" || true
fi
# Save benchres.csv for this case if present
if [[ -f benchres.csv ]]; then
cp -f benchres.csv "$RESULT_DIR/${name}_benchres.csv" || true
fi
}
if [[ "$TESTS" == "allt" ]]; then
# HAKMEM vs mimalloc vs system (and optional jemalloc) for full-all tests
run_case "hakmem_procs=${PROCS//,/}" "$ROOT_DIR/libhakmem.so" --procs="$PROCS" sys allt
run_case "mimalloc_procs=${PROCS//,/}" "" --procs="$PROCS" mi allt
run_case "system_procs=${PROCS//,/}" "" --procs="$PROCS" sys allt
if [[ "$INCLUDE_JE" == "1" ]]; then
run_case "jemalloc_procs=${PROCS//,/}" "" --procs="$PROCS" je allt
fi
else
# Split per test to enforce per-test timeouts and partial progress
for t in $TESTS; do
run_case "hakmem_${t}_p=${PROCS//,/}" "$ROOT_DIR/libhakmem.so" --procs="$PROCS" sys "$t"
run_case "mimalloc_${t}_p=${PROCS//,/}" "" --procs="$PROCS" mi "$t"
run_case "system_${t}_p=${PROCS//,/}" "" --procs="$PROCS" sys "$t"
if [[ "$INCLUDE_JE" == "1" ]]; then
run_case "jemalloc_${t}_p=${PROCS//,/}" "" --procs="$PROCS" je "$t"
fi
done
fi
popd >/dev/null
echo "[info] Logs: $RESULT_DIR"
echo "[hint] Parse logs to CSV: scripts/parse_mimalloc_logs.py $RESULT_DIR > $RESULT_DIR/summary.csv"

View File

@ -0,0 +1,71 @@
#!/usr/bin/env bash
set -euo pipefail
# Run a small suite across system/mimalloc/hakmem and save logs under docs/benchmarks/<timestamp>.
# Focus: mimalloc-bench/bench/larson patterns that cover Tiny/Mid/Large/Big.
# Optionally include a 'hakmem best' run (WRAP L1 + learning on + DYN auto).
RUNTIME=${RUNTIME:-1}
THREADS_CSV=${THREADS:-"1,4"}
BEST=${BEST:-0}
# Hard wall-clock timeout per run (external). Defaults to RUNTIME+3s.
BENCH_TIMEOUT=${BENCH_TIMEOUT:-}
KILL_GRACE=${KILL_GRACE:-2}
if [[ -z "${BENCH_TIMEOUT}" ]]; then
# Add small cushion to allow larson to exit cleanly
BENCH_TIMEOUT=$(( RUNTIME + 3 ))
fi
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
OUTDIR="$ROOT_DIR/docs/benchmarks/$(date +%Y%m%d_%H%M%S)_SUITE"
mkdir -p "$OUTDIR"
LARSON="$ROOT_DIR/mimalloc-bench/bench/larson/larson"
LIB_HAK="$ROOT_DIR/libhakmem.so"
LIB_HAK_ABS=$(readlink -f "$LIB_HAK")
LIB_MI="/lib/x86_64-linux-gnu/libmimalloc.so.2"
echo "Suite: RUNTIME=${RUNTIME}s THREADS=${THREADS_CSV} BEST=${BEST}" | tee "$OUTDIR/summary.txt"
run_case() {
local label="$1"; shift
local min="$1"; local max="$2"; shift 2
local threads_csv="$1"; shift
IFS=',' read -r -a TARR <<< "$threads_csv"
for t in "${TARR[@]}"; do
echo "== ${label} | ${t}T | system ==" | tee -a "$OUTDIR/summary.txt"
timeout -k "${KILL_GRACE}s" "${BENCH_TIMEOUT}s" \
"$LARSON" "$RUNTIME" "$min" "$max" 10000 1 12345 "$t" 2>&1 \
| tee "$OUTDIR/${label}_system_T${t}.log" | tail -n 3 | tee -a "$OUTDIR/summary.txt"
if [[ -f "$LIB_MI" ]]; then
echo "== ${label} | ${t}T | mimalloc ==" | tee -a "$OUTDIR/summary.txt"
timeout -k "${KILL_GRACE}s" "${BENCH_TIMEOUT}s" \
env LD_PRELOAD="$LIB_MI" "$LARSON" "$RUNTIME" "$min" "$max" 10000 1 12345 "$t" 2>&1 \
| tee "$OUTDIR/${label}_mimalloc_T${t}.log" | tail -n 3 | tee -a "$OUTDIR/summary.txt"
fi
if [[ -f "$LIB_HAK" ]]; then
echo "== ${label} | ${t}T | hakmem(default) ==" | tee -a "$OUTDIR/summary.txt"
timeout -k "${KILL_GRACE}s" "${BENCH_TIMEOUT}s" \
env LD_PRELOAD="$LIB_HAK_ABS" "$LARSON" "$RUNTIME" "$min" "$max" 10000 1 12345 "$t" 2>&1 \
| tee "$OUTDIR/${label}_hakmem_default_T${t}.log" | tail -n 3 | tee -a "$OUTDIR/summary.txt"
if [[ "$BEST" == "1" ]]; then
echo "== ${label} | ${t}T | hakmem(best) ==" | tee -a "$OUTDIR/summary.txt"
timeout -k "${KILL_GRACE}s" "${BENCH_TIMEOUT}s" \
env HAKMEM_WRAP_L2=1 HAKMEM_WRAP_L25=1 HAKMEM_LEARN=1 HAKMEM_DYN1_AUTO=1 HAKMEM_DYN2_AUTO=1 HAKMEM_HIST_SAMPLE=7 HAKMEM_WMAX_LEARN=1 HAKMEM_WMAX_DWELL_SEC=2 \
LD_PRELOAD="$LIB_HAK_ABS" "$LARSON" "$RUNTIME" "$min" "$max" 10000 1 12345 "$t" 2>&1 \
| tee "$OUTDIR/${label}_hakmem_best_T${t}.log" | tail -n 3 | tee -a "$OUTDIR/summary.txt"
fi
fi
done
}
# Tiny band (864B)
run_case tiny 8 64 "$THREADS_CSV"
# Mid band (232KiB)
run_case mid 2048 32768 "$THREADS_CSV"
# Large band (64KiB1MiB)
run_case large 65536 1048576 "$THREADS_CSV"
# Big band (24MiB)
run_case big 2097152 4194304 "$THREADS_CSV"
echo "Saved suite: $OUTDIR" | tee -a "$OUTDIR/summary.txt"

View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -euo pipefail
# Run comprehensive bench for HAKMEM (direct) and mimalloc (direct) and parse to CSV
# Usage: scripts/run_comprehensive_pair.sh
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/comp_pair_${TS}"
mkdir -p "$OUTDIR"
echo "[build] HAKMEM + mi direct-link…"
make -s bench_fast >/dev/null
make -s bench_comprehensive_mi >/dev/null
echo "[run] HAKMEM (direct)"
HAKMEM_TINY_TLS_SLL=${HAKMEM_TINY_TLS_SLL:-1} \
HAKMEM_TINY_MAG_CAP=${HAKMEM_TINY_MAG_CAP:-128} \
HAKMEM_WRAP_TINY=1 \
./bench_comprehensive_hakmem | tee "$OUTDIR/hakmem.log" >/dev/null
echo "[run] mimalloc (direct)"
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}:$ROOT_DIR/mimalloc-bench/extern/mi/out/release"
./bench_comprehensive_mi | tee "$OUTDIR/mimalloc.log" >/dev/null
echo "[parse] to CSV"
python3 scripts/parse_comprehensive_logs.py "$OUTDIR" > "$OUTDIR/summary.csv"
echo "[done] CSV: $OUTDIR/summary.csv"
sed -n '1,60p' "$OUTDIR/summary.csv" || true

View File

@ -0,0 +1,52 @@
#!/usr/bin/env bash
set -euo pipefail
# Run a representative subset of mimalloc-bench with HAKMEM via LD_PRELOAD.
# Produces logs under bench_results/.
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
RESULT_DIR="$ROOT_DIR/bench_results/mimalloc_suite_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$RESULT_DIR"
echo "[info] Building HAKMEM shared library with PGO (for LD_PRELOAD)"
make -s pgo-profile-shared pgo-build-shared >/dev/null
BENCH_ROOT="$ROOT_DIR/mimalloc-bench"
BENCH_OUT="$BENCH_ROOT/out/bench"
if [[ ! -d "$BENCH_OUT" ]]; then
echo "[warn] mimalloc-bench/out/bench not found."
echo " Build it first: (may require sudo/network)"
echo " cd mimalloc-bench"
echo " ./build-bench-env.sh all"
exit 1
fi
pushd "$BENCH_OUT" >/dev/null
TESTS=(larson cfrac espresso xmalloc-test sh6bench sh8bench cscratch cthrash mstress malloc-large)
ALLOC_HAK=(sys) # sys + LD_PRELOAD=hakmem
ALLOC_BASE=(mi je sys) # reference allocators for comparison
PROCS=(1 4)
echo "[info] Running mimalloc-bench subset with HAKMEM (LD_PRELOAD)"
for p in "${PROCS[@]}"; do
log="$RESULT_DIR/hakmem_p${p}.log"
echo "[case] HAKMEM LD_PRELOAD --procs=$p | tests: ${TESTS[*]}" | tee -a "$log"
LD_PRELOAD="$ROOT_DIR/libhakmem.so" \
HAKMEM_WRAP_TINY=1 bash ../../bench.sh --procs="$p" "${ALLOC_HAK[@]}" "${TESTS[@]}" 2>&1 | tee -a "$log" || true
done
echo "[info] Running reference allocators (mi/je/sys)"
for p in "${PROCS[@]}"; do
log="$RESULT_DIR/ref_p${p}.log"
echo "[case] REF mi/je/sys --procs=$p | tests: ${TESTS[*]}" | tee -a "$log"
bash ../../bench.sh --procs="$p" "${ALLOC_BASE[@]}" "${TESTS[@]}" 2>&1 | tee -a "$log" || true
done
popd >/dev/null
echo "[info] Logs written to: $RESULT_DIR"
echo "[done] mimalloc-bench subset complete"

View File

@ -0,0 +1,56 @@
#!/usr/bin/env bash
set -euo pipefail
# perf-stat triad (HAKMEM/System/mimalloc) for bench_tiny_hot at a single size/batch/cycles
# Collects cycles/instructions/branches/branch-misses/L1-dcache-load-misses (CSV via perf -x ,)
# Usage: scripts/run_perf_hot_triad.sh [size] [batch] [cycles] [reps]
# size : bytes (default 32)
# batch : batch size (default 100)
# cycles: loop cycles per op (default 50000)
# reps : perf repetitions (default 3)
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
size=${1:-32}
batch=${2:-100}
cycles=${3:-50000}
reps=${4:-3}
echo "[build] benches (fast + mi)"
make -s bench_fast bench_tiny_hot_mi >/dev/null
# Ensure LD_LIBRARY_PATH is defined (set -u safety)
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
MI_LIBDIR="$ROOT_DIR/mimalloc-bench/extern/mi/out/release"
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/perf_hot_triad_${TS}"
mkdir -p "$OUTDIR"
echo "[info] size=$size batch=$batch cycles=$cycles reps=$reps"
echo "[info] results → $OUTDIR"
run_perf() {
local alloc="$1"; shift
local bin="$1"; shift
local tag="$alloc"_s${size}_b${batch}_c${cycles}
local log="$OUTDIR/${tag}.perf.csv"
echo "[perf] $tag"
if [[ "$alloc" == "mimalloc" ]]; then
LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MI_LIBDIR" \
perf stat -x , -r "$reps" -e cycles,instructions,branches,branch-misses,L1-dcache-load-misses \
"$bin" "$size" "$batch" "$cycles" 1>/dev/null 2>"$log" || true
else
perf stat -x , -r "$reps" -e cycles,instructions,branches,branch-misses,L1-dcache-load-misses \
"$bin" "$size" "$batch" "$cycles" 1>/dev/null 2>"$log" || true
fi
}
run_perf hakmem ./bench_tiny_hot_hakmem
run_perf system ./bench_tiny_hot_system
run_perf mimalloc ./bench_tiny_hot_mi
echo "[done] perf CSVs under: $OUTDIR"
ls -1 "$OUTDIR" | sed -n '1,20p'

View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail
# Safe mainline-oriented preset triad (no bench-only macros).
# Usage: scripts/run_perf_main_triad.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
echo "[build] perf_main (no bench-only macros)"
make -s perf_main >/dev/null
# Recommended envs
export HAKMEM_TINY_TLS_SLL=${HAKMEM_TINY_TLS_SLL:-1}
export HAKMEM_TINY_REFILL_MAX=${HAKMEM_TINY_REFILL_MAX:-96}
export HAKMEM_TINY_REFILL_MAX_HOT=${HAKMEM_TINY_REFILL_MAX_HOT:-192}
export HAKMEM_TINY_SPILL_HYST=${HAKMEM_TINY_SPILL_HYST:-16}
export HAKMEM_TINY_BG_REMOTE=${HAKMEM_TINY_BG_REMOTE:-0}
echo "[info] env: TLS_SLL=$HAKMEM_TINY_TLS_SLL REFILL_MAX=$HAKMEM_TINY_REFILL_MAX HOT=$HAKMEM_TINY_REFILL_MAX_HOT HYST=$HAKMEM_TINY_SPILL_HYST BG_REMOTE=$HAKMEM_TINY_BG_REMOTE"
bash scripts/run_tiny_hot_triad.sh "$cycles"
echo "[done] perf_main triad finished"

View File

@ -0,0 +1,57 @@
#!/usr/bin/env bash
set -euo pipefail
# Run random-mixed bench for HAKMEM, System, mimalloc across ws/seeds and write CSV
# Usage: scripts/run_random_mixed_matrix.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-150000}
echo "[build] benches (fast + random-mixed variants)"
make -s bench_fast bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi >/dev/null
# Ensure LD_LIBRARY_PATH is defined (set -u safety)
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
MI_LIBDIR="$ROOT_DIR/mimalloc-bench/extern/mi/out/release"
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/random_mixed_${TS}"
mkdir -p "$OUTDIR"
CSV="$OUTDIR/results.csv"
echo "allocator,cycles,ws,seed,throughput_mops" > "$CSV"
ws_list=(200 400 800)
seeds=(42 1337)
run_case() {
local bin="$1"; shift
local alloc="$1"; shift
local cyc="$1"; shift
local ws="$1"; shift
local seed="$1"; shift
local out
if [[ "$alloc" == "mimalloc" ]]; then
LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MI_LIBDIR" \
$bin "$cyc" "$ws" "$seed" | sed -n 's/^Throughput: \([0-9.][0-9.]*\) M ops.*/\1/p' >"$OUTDIR/tmp.txt" || true
else
$bin "$cyc" "$ws" "$seed" | sed -n 's/^Throughput: \([0-9.][0-9.]*\) M ops.*/\1/p' >"$OUTDIR/tmp.txt" || true
fi
out=$(cat "$OUTDIR/tmp.txt" || true)
if [[ -n "$out" ]]; then echo "$alloc,$cyc,$ws,$seed,$out" >> "$CSV"; fi
}
for ws in "${ws_list[@]}"; do
for seed in "${seeds[@]}"; do
echo "[run] HAKMEM ws=$ws seed=$seed cycles=$cycles"
run_case ./bench_random_mixed_hakmem hakmem "$cycles" "$ws" "$seed"
echo "[run] SYSTEM ws=$ws seed=$seed cycles=$cycles"
run_case ./bench_random_mixed_system system "$cycles" "$ws" "$seed"
echo "[run] MIMALLOC ws=$ws seed=$seed cycles=$cycles"
run_case ./bench_random_mixed_mi mimalloc "$cycles" "$ws" "$seed"
done
done
echo "[done] CSV: $CSV"
sed -n '1,40p' "$CSV" || true

View File

@ -0,0 +1,64 @@
#!/usr/bin/env bash
set -euo pipefail
# Aggregate suite runner: tiny-hot triad, random-mixed triad, comprehensive pair, optional app benches
# Usage: scripts/run_suite_compare.sh [cycles_hot] [cycles_mixed] [with_apps]
# cycles_hot : tiny hot cycles (default 80000)
# cycles_mixed : random mixed cycles (default 120000)
# with_apps : 0/1 (default 0) — if 1, runs scripts/run_apps_with_hakmem.sh
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles_hot=${1:-80000}
cycles_mixed=${2:-120000}
with_apps=${3:-0}
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/suite_${TS}"
mkdir -p "$OUTDIR"
log() { echo "[$(date +%H:%M:%S)] $*"; }
log "tiny hot triad ($cycles_hot)"
bash scripts/run_tiny_hot_triad.sh "$cycles_hot" | tee "$OUTDIR/tiny_hot.log" >/dev/null || true
th_csv=$(sed -n 's/^\[done\] CSV: \(.*\)$/\1/p' "$OUTDIR/tiny_hot.log" | tail -n1)
log "random mixed triad ($cycles_mixed)"
bash scripts/run_random_mixed_matrix.sh "$cycles_mixed" | tee "$OUTDIR/random_mixed.log" >/dev/null || true
rm_csv=$(sed -n 's/^\[done\] CSV: \(.*\)$/\1/p' "$OUTDIR/random_mixed.log" | tail -n1)
log "comprehensive pair"
bash scripts/run_comprehensive_pair.sh | tee "$OUTDIR/comp_pair.log" >/dev/null || true
cp_csv=$(sed -n 's/^\[done\] CSV: \(.*\)$/\1/p' "$OUTDIR/comp_pair.log" | tail -n1)
if [[ "$with_apps" == "1" ]]; then
log "apps (LD-safe)"
bash scripts/run_apps_with_hakmem.sh | tee "$OUTDIR/apps.log" >/dev/null || true
fi
summary="$OUTDIR/summary.md"
{
echo "# HAKMEM vs System vs mimalloc Suite ($TS)"
echo ""
echo "- tiny hot triad CSV: \
\
$th_csv"
echo "- random mixed triad CSV: \
\
$rm_csv"
echo "- comprehensive pair CSV: \
\
$cp_csv"
if [[ "$with_apps" == "1" ]]; then
echo "- apps log: $OUTDIR/apps.log"
fi
echo ""
echo "Quick peek (head):"
echo ""; echo '```'; sed -n '1,20p' "$th_csv"; echo '```'
echo ""; echo '```'; sed -n '1,20p' "$rm_csv"; echo '```'
echo ""; echo '```'; sed -n '1,30p' "$cp_csv"; echo '```'
} > "$summary"
log "done → $summary"