Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-05 12:31:14 +09:00
commit 52386401b3
27144 changed files with 124451 additions and 0 deletions

View File

@ -0,0 +1,62 @@
#!/usr/bin/env bash
set -euo pipefail
# Measure steady-state RSS for Tiny sizes by maintaining a live set
# and churning short-lived allocations. Reports peak and end RSS.
#
# Usage: scripts/measure_rss_tiny.sh <size> <live_count> <iters>
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
if [[ $# -lt 3 ]]; then
echo "usage: $0 <size> <live_count> <iters>" >&2
exit 1
fi
size=$1; live=$2; iters=$3
cat > "$ROOT_DIR/.tmp_rss_bench.c" <<'EOF'
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
static size_t get_rss_kb(void) {
FILE* f = fopen("/proc/self/statm","r");
if (!f) return 0; unsigned long size, res; fscanf(f, "%lu %lu", &size, &res); fclose(f);
long ps = sysconf(_SC_PAGESIZE); return (size_t)((res * ps) / 1024);
}
int main(int argc, char** argv) {
size_t size = (size_t)strtoull(argv[1],NULL,10);
int live = atoi(argv[2]);
int iters = atoi(argv[3]);
void** L = (void**)malloc(sizeof(void*)*(size_t)live);
for (int i=0;i<live;i++) L[i] = malloc(size);
size_t peak=0;
for (int it=0; it<iters; it++) {
for (int i=0;i<live;i+=2) { free(L[i]); L[i]=malloc(size); }
size_t rss = get_rss_kb(); if (rss>peak) peak=rss;
}
size_t end_rss = get_rss_kb();
printf("peak_rss_kb=%zu end_rss_kb=%zu\n", peak, end_rss);
for (int i=0;i<live;i++) free(L[i]); free(L); return 0;
}
EOF
gcc -O3 -march=native -mtune=native .tmp_rss_bench.c -o .tmp_rss_bench
echo "[info] Building shared lib (for LD_PRELOAD HAKMEM case)"
make -s pgo-build-shared >/dev/null || true
echo "[case] HAKMEM (LD_PRELOAD)"
out_h=$(HAKMEM_LD_SAFE=1 LD_PRELOAD="$ROOT_DIR/libhakmem.so" ./.tmp_rss_bench "$size" "$live" "$iters")
echo "$out_h"
echo "[case] System"
out_s=$(./.tmp_rss_bench "$size" "$live" "$iters")
echo "$out_s"
rm -f .tmp_rss_bench .tmp_rss_bench.c

View File

@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -euo pipefail
# Build and run Tiny-Hot triad with bench-only fast path (SLL→Mag→tiny refill).
# Usage: scripts/run_tiny_benchfast_triad.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
echo "[build] system/mimalloc (fast + mi)"
make -s bench_fast bench_tiny_hot_mi >/dev/null
echo "[build] HAKMEM bench-fastpath"
make -s bench_fastpath >/dev/null
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
echo "[run] triad (bench-fastpath HAKMEM vs System vs mimalloc)"
SKIP_BUILD=1 bash scripts/run_tiny_hot_triad.sh "$cycles"
echo "[note] Latest CSV printed by triad runner."

View File

@ -0,0 +1,32 @@
#!/usr/bin/env bash
set -euo pipefail
# Debug-oriented Tiny triad run with counters + perf
# Usage: scripts/run_tiny_debug.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-40000}
echo "[build] bench_debug (HAKMEM_DEBUG_COUNTERS=1)"
make -s bench_debug >/dev/null
export HAKMEM_TINY_PATH_DEBUG=1
export HAKMEM_TINY_COUNTERS_DUMP=1
echo "[run] tiny hot triad (cycles=$cycles)"
HAKMEM_TINY_SPECIALIZE_32_64=${HAKMEM_TINY_SPECIALIZE_32_64:-1} \
HAKMEM_TINY_BUMP_SHADOW=${HAKMEM_TINY_BUMP_SHADOW:-0} \
HAKMEM_TINY_BG_BIN=${HAKMEM_TINY_BG_BIN:-0} \
HAKMEM_TINY_ULTRA_SIMPLE=${HAKMEM_TINY_ULTRA_SIMPLE:-0} \
HAKMEM_TINY_HOTMAG=${HAKMEM_TINY_HOTMAG:-0} \
HAKMEM_WRAP_TINY=1 HAKMEM_INT_ENGINE=0 HAKMEM_TINY_TLS_SLL=1 \
bash scripts/run_tiny_hot_triad.sh "$cycles"
echo "[perf] 32B / 64B"
HAKMEM_TINY_SPECIALIZE_32_64=1 bash scripts/run_perf_hot_triad.sh 32 100 50000 3
HAKMEM_TINY_SPECIALIZE_32_64=1 bash scripts/run_perf_hot_triad.sh 64 100 50000 3
echo "[done] Inspect stderr for [Tiny Path Debug] and [Tiny Extended Counters]"

View File

@ -0,0 +1,47 @@
#!/usr/bin/env bash
set -euo pipefail
# Sweep Tiny hot-path microbench across sizes/batches and save CSV
# Usage: scripts/run_tiny_hot_sweep.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-200000}
echo "[info] Building tiny hot benches (bench_fast)"
make -s bench_fast >/dev/null
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/tiny_hot_${TS}"
mkdir -p "$OUTDIR"
CSV="$OUTDIR/results.csv"
echo "mode,size,batch,cycles,throughput_mops" > "$CSV"
sizes=(8 16 24 32 40 48 56 64 128)
batches=(50 100 200)
run_case() {
local mode="$1"; shift
local size="$1"; shift
local batch="$1"; shift
local cyc="$1"; shift
local bin
if [[ "$mode" == "hakmem" ]]; then bin="./bench_tiny_hot_hakmem"; else bin="./bench_tiny_hot_system"; fi
local out
out=$($bin "$size" "$batch" "$cyc" | sed -n 's/^Throughput: \([0-9.][0-9.]*\) M ops.*/\1/p' || true)
if [[ -n "$out" ]]; then echo "$mode,$size,$batch,$cyc,$out" >> "$CSV"; fi
}
for s in "${sizes[@]}"; do
for b in "${batches[@]}"; do
echo "[run] HAKMEM size=$s batch=$b cycles=$cycles"
run_case hakmem "$s" "$b" "$cycles"
echo "[run] SYSTEM size=$s batch=$b cycles=$cycles"
run_case system "$s" "$b" "$cycles"
done
done
echo "[done] CSV: $CSV"
grep -E '^(mode|hakmem)' "$CSV" | sed -n '1,20p' || true

View File

@ -0,0 +1,61 @@
#!/usr/bin/env bash
set -euo pipefail
# Run tiny hot bench across sizes/batches for HAKMEM, System, mimalloc (direct-link triad)
# Usage: scripts/run_tiny_hot_triad.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-100000}
if [[ "${SKIP_BUILD:-0}" != "1" ]]; then
echo "[build] benches (fast + mi)"
make -s bench_fast bench_tiny_hot_mi >/dev/null
else
echo "[build] skipped (SKIP_BUILD=1)"
fi
# Ensure LD_LIBRARY_PATH is defined (set -u safety)
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
MI_LIBDIR="$ROOT_DIR/mimalloc-bench/extern/mi/out/release"
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/tiny_hot_triad_${TS}"
mkdir -p "$OUTDIR"
CSV="$OUTDIR/results.csv"
echo "size,batch,cycles,allocator,throughput_mops" > "$CSV"
sizes=(8 16 24 32 40 48 56 64 128)
batches=(50 100 200)
run_case() {
local size="$1"; shift
local batch="$1"; shift
local cyc="$1"; shift
local bin="$1"; shift
local alloc="$1"; shift
local out
if [[ "$alloc" == "mimalloc" ]]; then
LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MI_LIBDIR" \
$bin "$size" "$batch" "$cyc" | sed -n 's/^Throughput: \([0-9.][0-9.]*\) M ops.*/\1/p' >"$OUTDIR/tmp.txt" || true
else
$bin "$size" "$batch" "$cyc" | sed -n 's/^Throughput: \([0-9.][0-9.]*\) M ops.*/\1/p' >"$OUTDIR/tmp.txt" || true
fi
out=$(cat "$OUTDIR/tmp.txt" || true)
if [[ -n "$out" ]]; then echo "$size,$batch,$cyc,$alloc,$out" >> "$CSV"; fi
}
for s in "${sizes[@]}"; do
for b in "${batches[@]}"; do
echo "[run] HAKMEM size=$s batch=$b cycles=$cycles"
run_case "$s" "$b" "$cycles" ./bench_tiny_hot_hakmem hakmem
echo "[run] SYSTEM size=$s batch=$b cycles=$cycles"
run_case "$s" "$b" "$cycles" ./bench_tiny_hot_system system
echo "[run] MIMALLOC size=$s batch=$b cycles=$cycles"
run_case "$s" "$b" "$cycles" ./bench_tiny_hot_mi mimalloc
done
done
echo "[done] CSV: $CSV"
sed -n '1,40p' "$CSV" || true

View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -euo pipefail
# Build and run Tiny-Hot triad with bench-only SLL-only + warmup tuned (REFILL=12, WARMUP32=192) + PGO.
# Usage: scripts/run_tiny_sllonly_r12w192_triad.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
echo "[build] system/mimalloc (fast + mi)"
make -s bench_fast bench_tiny_hot_mi >/dev/null
echo "[build] HAKMEM bench_sll_only (r12 w32=192 PGO)"
make -s pgo-benchsll-r12w192-profile >/dev/null
make -s pgo-benchsll-r12w192-build >/dev/null
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
echo "[run] triad (bench_sll_only r12 w32=192)"
SKIP_BUILD=1 bash scripts/run_tiny_hot_triad.sh "$cycles"
echo "[note] Latest CSV printed by triad runner."

View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -euo pipefail
# Build and run Tiny-Hot triad with bench-only SLL-only + warmup + PGO.
# Usage: scripts/run_tiny_sllonly_triad.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
echo "[build] system/mimalloc (fast + mi)"
make -s bench_fast bench_tiny_hot_mi >/dev/null
echo "[build] HAKMEM bench_sll_only (PGO)"
make -s pgo-benchsll-profile >/dev/null
make -s pgo-benchsll-build >/dev/null
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
echo "[run] triad (bench_sll_only HAKMEM vs System vs mimalloc)"
SKIP_BUILD=1 bash scripts/run_tiny_hot_triad.sh "$cycles"
echo "[note] Latest CSV printed by triad runner."

View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -euo pipefail
# Run Tiny-Hot triad with Ultra (SLL-only) front for HAKMEM, comparing to System/mimalloc.
# Usage: scripts/run_tiny_ultra_triad.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
echo "[build] system/mimalloc benches (fast + mi)"
make -s bench_fast bench_tiny_hot_mi >/dev/null
echo "[build] HAKMEM Ultra (SLL-only)"
make -s bench_ultra >/dev/null
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
echo "[run] triad (Ultra HAKMEM vs System vs mimalloc)"
SKIP_BUILD=1 bash scripts/run_tiny_hot_triad.sh "$cycles"
echo "[note] Latest CSV printed above by triad runner."