Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-05 12:31:14 +09:00
commit 52386401b3
27144 changed files with 124451 additions and 0 deletions

View File

@ -0,0 +1,46 @@
#!/usr/bin/env bash
set -euo pipefail
# Head-to-head for Large(64KB1MB), 10s, system/mimalloc/hakmem (P1/P2 profiles)
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
LARSON="$ROOT_DIR/mimalloc-bench/bench/larson/larson"
LIB_HAK="$ROOT_DIR/libhakmem.so"
LIB_MI="/lib/x86_64-linux-gnu/libmimalloc.so.2"
if [[ ! -x "$LARSON" ]]; then
echo "[ERR] larson not found: $LARSON" >&2; exit 1
fi
if [[ ! -f "$LIB_HAK" ]]; then
echo "[ERR] libhakmem.so not found: $LIB_HAK" >&2; exit 1
fi
TS=$(date +%Y%m%d_%H%M%S)
OUT="$ROOT_DIR/docs/benchmarks/${TS}_HEAD2HEAD_LARGE"
mkdir -p "$OUT"
echo "[OUT] $OUT"
cd "$ROOT_DIR/mimalloc-bench/bench/larson"
echo "System malloc LARGE_4T" | tee "$OUT/system_large_4t.log"
timeout "${BENCH_TIMEOUT:-13}s" "$LARSON" 10 65536 1048576 10000 1 12345 4 2>&1 | tee -a "$OUT/system_large_4t.log"
echo "mimalloc LARGE_4T" | tee "$OUT/mimalloc_large_4t.log"
timeout "${BENCH_TIMEOUT:-13}s" env LD_PRELOAD="$LIB_MI" "$LARSON" 10 65536 1048576 10000 1 12345 4 2>&1 | tee -a "$OUT/mimalloc_large_4t.log"
# P1 best (alloc優先)
echo "hakmem P1 LARGE_4T (remote, factor=4, HDR=1)" | tee "$OUT/hakmem_p1_large_4t.log"
timeout "${BENCH_TIMEOUT:-13}s" env LD_PRELOAD="$LIB_HAK" HAKMEM_WRAP_L25=1 HAKMEM_L25_PREF=remote HAKMEM_L25_RUN_FACTOR=4 \
HAKMEM_HDR_LIGHT=1 HAKMEM_SHARD_MIX=1 HAKMEM_TLS_LO_MAX=512 \
"$LARSON" 10 65536 1048576 10000 1 12345 4 2>&1 | tee -a "$OUT/hakmem_p1_large_4t.log"
# P2+TC best (free優先)
echo "hakmem P2+TC LARGE_4T (remote, factor=4, HDR=2, TC_SPILL=16)" | tee "$OUT/hakmem_p2_large_4t.log"
timeout "${BENCH_TIMEOUT:-13}s" env LD_PRELOAD="$LIB_HAK" HAKMEM_WRAP_L25=1 HAKMEM_L25_PREF=remote HAKMEM_L25_RUN_FACTOR=4 \
HAKMEM_HDR_LIGHT=2 HAKMEM_L25_TC_SPILL=16 HAKMEM_SHARD_MIX=1 HAKMEM_TLS_LO_MAX=512 \
"$LARSON" 10 65536 1048576 10000 1 12345 4 2>&1 | tee -a "$OUT/hakmem_p2_large_4t.log"
cd - >/dev/null
rg -n "Throughput" "$OUT"/*.log | tee "$OUT/summary.txt" || true
echo "[DONE] Logs at $OUT"

View File

@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -euo pipefail
# Kill any lingering mimalloc-bench/larson runs and our bench runner scripts.
# Usage: scripts/kill_bench.sh
PATS=(
"mimalloc-bench/bench/larson/larson"
"scripts/run_bench_suite.sh"
"scripts/save_prof_sweep.sh"
"scripts/ab_sweep_mid.sh"
)
found=0
for pat in "${PATS[@]}"; do
if pgrep -fa "$pat" >/dev/null 2>&1; then
echo "[kill_bench] Found processes for: $pat"
pgrep -fa "$pat" || true
found=1
fi
done
if [[ "$found" -eq 0 ]]; then
echo "[kill_bench] No matching bench processes found."
exit 0
fi
echo "[kill_bench] Sending SIGTERM..."
for pat in "${PATS[@]}"; do
pgrep -f "$pat" >/dev/null 2>&1 && pkill -f "$pat" || true
done
sleep 1
echo "[kill_bench] Forcing SIGKILL for leftovers..."
for pat in "${PATS[@]}"; do
pgrep -f "$pat" >/dev/null 2>&1 && pkill -9 -f "$pat" || true
done
echo "[kill_bench] Done."

View File

@ -0,0 +1,25 @@
-- lua_workload.lua - mixed string builder + table churn
local N = tonumber(os.getenv("LUA_WORK_N")) or 500000
-- String builder (amortized)
local t = {}
for i = 1, N do
t[#t+1] = tostring(i)
if (i % 5) == 0 then t[#t+1] = "-" end
end
local s = table.concat(t)
-- Table churn (insert/remove)
local arr = {}
for i = 1, N do
arr[i] = i * 3
end
local sum = 0
for i = 1, N, 3 do
sum = sum + (arr[i] or 0)
arr[i] = nil
end
print("len(s)=", #s, " sum=", sum)

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python3
import sys, os, re
TEST_MAP = {
'Sequential LIFO': 'lifo',
'Sequential FIFO': 'fifo',
'Random Order Free': 'random',
'Interleaved': 'interleave',
'Long-lived vs Short-lived': 'longshort',
'Mixed Sizes': 'mixed',
}
def parse_file(path, allocator):
size = None
cur_test = None
results = []
with open(path,'r',errors='ignore') as f:
for line in f:
m = re.search(r'^SIZE CLASS:\s*(\d+) Bytes', line)
if m:
size = int(m.group(1))
cur_test = None
continue
# Detect tests
for key, short in TEST_MAP.items():
if key != 'Mixed Sizes' and key in line:
cur_test = short
break
if 'Mixed Sizes ---' in line or 'Test 5: Mixed Sizes' in line:
size = 'mixed'
cur_test = 'mixed'
m2 = re.search(r'^Throughput:\s*([0-9.]+) M ops/sec', line)
if m2:
thr = float(m2.group(1))
results.append((allocator, size, cur_test, thr))
return results
def main():
if len(sys.argv) != 2:
print('usage: parse_comprehensive_logs.py <dir>', file=sys.stderr)
sys.exit(1)
d = sys.argv[1]
out = []
for name, alloc in [('hakmem.log','hakmem'),('mimalloc.log','mimalloc'),('system.log','system')]:
p = os.path.join(d,name)
if os.path.exists(p):
out.extend(parse_file(p, alloc))
print('allocator,size,test,throughput_mops')
for rec in out:
print('{},{},{},{}'.format(rec[0], rec[1], rec[2], rec[3]))
if __name__ == '__main__':
main()

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python3
import sys, re, os, glob
# Parse mimalloc-bench logs and print CSV: file,allocator,procs,test,throughput
# Assumes lines like: "Throughput = 1234567 operations per second, ..."
def infer_allocator_from_file(path):
fn = os.path.basename(path)
if fn.startswith('hakmem_'): return 'hakmem'
if fn.startswith('mimalloc_'): return 'mimalloc'
if fn.startswith('jemalloc_'): return 'jemalloc'
if fn.startswith('system_'): return 'system'
# fallback: try substring
for k in ('hakmem','mimalloc','jemalloc','system'):
if k in fn: return k
return 'unknown'
def infer_procs_from_file(path):
m = re.search(r'procs=([0-9,]+)', os.path.basename(path))
return m.group(1) if m else ''
def parse_file(path):
alloc = infer_allocator_from_file(path)
procs = infer_procs_from_file(path)
test = ''
with open(path,'r',errors='ignore') as f:
for line in f:
tl = line.strip()
m = re.search(r'^Test\s*:\s*(\S+)', tl)
if m: test = m.group(1)
m2 = re.search(r'Throughput\s*=\s*([0-9]+)\s+operations per second', tl)
if m2:
thr = int(m2.group(1))
yield (path, alloc, procs, test, thr)
def main():
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} <log_dir>")
sys.exit(1)
logdir = sys.argv[1]
files = sorted(glob.glob(os.path.join(logdir,'*.log')))
print('file,allocator,procs,test,throughput_ops_per_sec')
for fp in files:
for rec in parse_file(fp):
print(','.join(str(x) for x in rec))
if __name__ == '__main__':
main()

View File

@ -0,0 +1,128 @@
#!/usr/bin/env python3
import sys
import os
import re
import csv
# Accept PMU events with or without user-only suffix (":u")
PMU_EVENTS = {
'cycles': 'cycles',
'cycles:u': 'cycles',
'instructions': 'instructions',
'instructions:u': 'instructions',
'L1-dcache-load-misses': 'l1_miss',
'L1-dcache-load-misses:u': 'l1_miss',
'branch-misses': 'br_miss',
'branch-misses:u': 'br_miss',
}
USDT_EVENTS = {
'sdt:hakmem:sll_pop': 'sll_pop',
'sdt:hakmem:mag_pop': 'mag_pop',
'sdt:hakmem:front_pop': 'front_pop',
'sdt:hakmem:bump_hit': 'bump_hit',
'sdt:hakmem:slow_alloc': 'slow_alloc',
'sdt:hakmem:sll_push': 'sll_push',
'sdt:hakmem:mag_push': 'mag_push',
'sdt:hakmem:spill_super': 'spill_super',
'sdt:hakmem:spill_tiny': 'spill_tiny',
'sdt:hakmem:remote_drain': 'remote_drain',
'sdt:hakmem:superslab_alloc': 'super_alloc',
'sdt:hakmem:superslab_fail': 'super_fail',
'sdt:hakmem:quick_pop': 'quick_pop',
'sdt:hakmem:quick_refill_sll': 'quick_refill_sll',
'sdt:hakmem:quick_refill_mag': 'quick_refill_mag',
'sdt:hakmem:bitmap_burst': 'bitmap_burst',
'sdt:hakmem:mag_refill': 'mag_refill',
'sdt:hakmem:bitmap_scan': 'bitmap_scan',
}
def parse_value(s):
s = s.strip()
# perf may print numbers with no separators in -x , mode; best-effort
try:
return int(s)
except ValueError:
# try float to int
try:
return int(float(s))
except Exception:
return None
def parse_stat_file(path):
data = {}
with open(path, 'r', errors='ignore') as f:
for line in f:
parts = [p.strip() for p in line.strip().split(',')]
if len(parts) < 3:
continue
val = parse_value(parts[0])
event = parts[2]
if val is None:
continue
# Normalize PMU event key (strip optional ":u")
if not event.startswith('sdt:'):
base = event.split(':')[0]
if event not in PMU_EVENTS and base in PMU_EVENTS:
event = base
if event in PMU_EVENTS:
data[PMU_EVENTS[event]] = val
elif event in USDT_EVENTS:
name = USDT_EVENTS[event]
data[name] = data.get(name, 0) + val
# else ignore
return data
def main():
if len(sys.argv) != 2:
print("Usage: parse_usdt_stat.py <usdt_results_dir>")
sys.exit(1)
root = sys.argv[1]
rows = []
for fn in sorted(os.listdir(root)):
if not fn.endswith('.stat.csv'):
continue
m = re.match(r'(?P<alloc>hakmem|system)_s(?P<size>\d+)_b(?P<batch>\d+)_c(?P<cycles>\d+)\.stat\.csv', fn)
if not m:
continue
meta = m.groupdict()
path = os.path.join(root, fn)
stats = parse_stat_file(path)
row = {
'allocator': meta['alloc'],
'size': int(meta['size']),
'batch': int(meta['batch']),
'cycles_param': int(meta['cycles']),
}
row.update(stats)
# derived
total_pops = sum(row.get(k, 0) for k in ('sll_pop','mag_pop','front_pop'))
if total_pops > 0:
row['front_rate'] = row.get('front_pop',0)/total_pops
row['sll_rate'] = row.get('sll_pop',0)/total_pops
row['mag_rate'] = row.get('mag_pop',0)/total_pops
else:
row['front_rate'] = row['sll_rate'] = row['mag_rate'] = 0.0
rows.append(row)
# sort for readability
rows.sort(key=lambda r: (r['allocator'], r['size'], r['batch']))
out = os.path.join(root, 'summary.csv')
# collect headers
headers = ['allocator','size','batch','cycles_param'] + list(PMU_EVENTS.values()) + list(USDT_EVENTS.values()) + ['front_rate','sll_rate','mag_rate']
# remove duplicates but keep order
seen = set()
hdr_final = []
for h in headers:
if h not in seen:
hdr_final.append(h)
seen.add(h)
with open(out, 'w', newline='') as f:
w = csv.DictWriter(f, fieldnames=hdr_final)
w.writeheader()
for r in rows:
w.writerow(r)
print(out)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -euo pipefail
# Sweep bench-fastpath refill sizes (8/12/16) and run Tiny-Hot triad each.
# Usage: scripts/run_benchfast_sweep.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
echo "[build] system/mimalloc (fast + mi)"
make -s bench_fast bench_tiny_hot_mi >/dev/null
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/tiny_benchfast_sweep_${TS}"
mkdir -p "$OUTDIR"
run_case() {
local tag="$1"; shift
echo "[build] HAKMEM bench-fastpath (${tag})"
make -s "bench_fastpath_${tag}" >/dev/null
echo "[run] triad (${tag})"
SKIP_BUILD=1 bash scripts/run_tiny_hot_triad.sh "$cycles"
# pick the latest triad CSV and copy with tag
local latest_csv
latest_csv=$(ls -1dt bench_results/tiny_hot_triad_* | head -1)/results.csv
cp "$latest_csv" "$OUTDIR/results_${tag}.csv"
echo "[saved] $OUTDIR/results_${tag}.csv"
}
run_case r8
run_case r12
run_case r16
echo "[done] sweep outputs in: $OUTDIR"

View File

@ -0,0 +1,170 @@
#!/usr/bin/env bash
set -euo pipefail
# Reproducible larson runner for hakmem/system/mimalloc.
#
# Usage:
# scripts/run_larson.sh [runtime_sec] [threads]
# Examples:
# scripts/run_larson.sh # default: 10s, threads=1 4
# scripts/run_larson.sh 10 1 # 10s, 1 thread
#
# Optional env vars:
# HAKMEM_WRAP_TINY=0|1
# HAKMEM_WRAP_TINY_REFILL=0|1
# HAKMEM_TINY_MAG_CAP=INT
# HAKMEM_SAFE_FREE=0|1
# HAKMEM_EVO_SAMPLE=INT (0 disables evo recording; default 0)
# MIMALLOC_SO=/path/to/libmimalloc.so.2 (optional; if not set, auto-detect)
usage() {
cat << USAGE
Usage: scripts/run_larson.sh [options] [runtime_sec] [threads_csv]
Options:
-d SECONDS Runtime seconds (default: 10)
-t CSV Threads CSV, e.g. 1,4 (default: 1,4)
-c NUM Chunks per thread (default: 10000)
-r NUM Rounds (default: 1)
-m BYTES Min size (default: 8)
-M BYTES Max size (default: 1024)
-s SEED Random seed (default: 12345)
-p PRESET Preset: burst|loop (sets -c/-r)
-w Include WRAP_TINY=1 runs (default: off)
-h Show this help
Env overrides (alternative to flags):
MIN, MAX, CHUNK_PER_THREAD, ROUNDS, SEED
HAKMEM_* toggles per README
USAGE
}
# Defaults
RUNTIME="10"
THREADS_ARG="1,4"
# Workload defaults (burst preset)
MIN="${MIN:-8}"
MAX="${MAX:-1024}"
CHUNK_PER_THREAD="${CHUNK_PER_THREAD:-10000}"
ROUNDS="${ROUNDS:-1}"
SEED="${SEED:-12345}"
PRESET=""
INCLUDE_WRAP=0
while getopts ":d:t:c:r:m:M:s:p:wh" opt; do
case $opt in
d) RUNTIME="$OPTARG" ;;
t) THREADS_ARG="$OPTARG" ;;
c) CHUNK_PER_THREAD="$OPTARG" ;;
r) ROUNDS="$OPTARG" ;;
m) MIN="$OPTARG" ;;
M) MAX="$OPTARG" ;;
s) SEED="$OPTARG" ;;
p) PRESET="$OPTARG" ;;
w) INCLUDE_WRAP=1 ;;
h) usage; exit 0 ;;
:) echo "Missing argument for -$OPTARG" >&2; usage; exit 2 ;;
*) usage; exit 2 ;;
esac
done
shift $((OPTIND-1))
# Backward-compatible positional args
if [[ $# -ge 1 ]]; then RUNTIME="$1"; fi
if [[ $# -ge 2 ]]; then THREADS_ARG="$2"; fi
case "$PRESET" in
burst|BURST)
CHUNK_PER_THREAD=10000; ROUNDS=1 ;;
loop|LOOP)
CHUNK_PER_THREAD=100; ROUNDS=100 ;;
"" ) : ;;
*) echo "Unknown preset: $PRESET" >&2; exit 2 ;;
esac
# Params matching our standard runs (larson reads: runtime, min, max, chunks/thread, rounds, seed, threads)
# Show resolved parameters for reproducibility
echo "[CFG] runtime=${RUNTIME}s threads={${THREADS_ARG}} min=${MIN} max=${MAX} chunks/thread=${CHUNK_PER_THREAD} rounds=${ROUNDS} seed=${SEED}"
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
LIB_HAKMEM="$ROOT_DIR/libhakmem.so"
LARSON_BIN="$ROOT_DIR/mimalloc-bench/bench/larson/larson"
if [[ ! -x "$LARSON_BIN" ]]; then
echo "[ERR] Larson binary not found at: $LARSON_BIN" >&2
echo " Did you sync submodule/build bench?" >&2
exit 1
fi
if [[ ! -f "$LIB_HAKMEM" ]]; then
echo "[INFO] libhakmem.so not found; building..."
(cd "$ROOT_DIR" && make -j4 shared >/dev/null)
fi
abs_hakmem="$(readlink -f "$LIB_HAKMEM")"
detect_mimalloc() {
if [[ -n "${MIMALLOC_SO:-}" && -f "$MIMALLOC_SO" ]]; then
echo "$MIMALLOC_SO"
return 0
fi
# try common paths or ldconfig
for p in \
/usr/lib/x86_64-linux-gnu/libmimalloc.so.2 \
/lib/x86_64-linux-gnu/libmimalloc.so.2; do
[[ -f "$p" ]] && { echo "$p"; return 0; }
done
if command -v ldconfig >/dev/null 2>&1; then
so="$(ldconfig -p | awk '/libmimalloc.so/ {print $4; exit}')"
[[ -n "$so" && -f "$so" ]] && { echo "$so"; return 0; }
fi
return 1
}
mimalloc_so=""
if mimalloc_so=$(detect_mimalloc); then
:
else
mimalloc_so=""
fi
run_case() {
local label="$1"; shift
local preload="$1"; shift
local threads="$1"; shift
echo "\n== $label | ${threads}T | ${RUNTIME}s =="
if [[ -n "$preload" ]]; then
env LD_PRELOAD="$preload" "$LARSON_BIN" "$RUNTIME" "$MIN" "$MAX" "$CHUNK_PER_THREAD" "$ROUNDS" "$SEED" "$threads" 2>&1 | tail -n 3
else
"$LARSON_BIN" "$RUNTIME" "$MIN" "$MAX" "$CHUNK_PER_THREAD" "$ROUNDS" "$SEED" "$threads" 2>&1 | tail -n 3
fi
}
IFS=',' read -r -a THREADS <<< "$THREADS_ARG"
for t in "${THREADS[@]}"; do
# system malloc
run_case "system malloc" "" "$t"
# mimalloc (optional)
if [[ -n "$mimalloc_so" ]]; then
run_case "mimalloc" "$mimalloc_so" "$t"
else
echo "\n== mimalloc | ${t}T | ${RUNTIME}s =="
echo "[SKIP] libmimalloc not found"
fi
# hakmem default
run_case "hakmem (default)" "$abs_hakmem" "$t"
# hakmem wrap tiny (optional)
if [[ "$INCLUDE_WRAP" -eq 1 ]]; then
echo "\n== hakmem (HAKMEM_WRAP_TINY=1) | ${t}T | ${RUNTIME}s =="
HAKMEM_WRAP_TINY=1 LD_PRELOAD="$abs_hakmem" "$LARSON_BIN" "$RUNTIME" "$MIN" "$MAX" "$CHUNK_PER_THREAD" "$ROUNDS" "$SEED" "$t" 2>&1 | tail -n 3
fi
done
echo "\nDone."

View File

@ -0,0 +1,79 @@
#!/usr/bin/env bash
set -euo pipefail
# Compare memory efficiency (Max RSS) between HAKMEM and System on tiny-hot bench.
# - Runs selected sizes/batches with /usr/bin/time -v and parses Maximum resident set size (KB).
# - Optionally toggles HAKMEM_TINY_FLUSH_ON_EXIT to evaluate exit-time trimming.
# Output: bench_results/memory_eff_YYYYMMDD_HHMMSS/results.csv
# Usage: scripts/run_memory_efficiency.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
if [[ ! -x /usr/bin/time ]]; then
echo "[error] /usr/bin/time not found. Install 'time' package." >&2
exit 1
fi
echo "[build] perf_main benches (no bench-only macros)"
make -s perf_main >/dev/null
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/memory_eff_${TS}"
mkdir -p "$OUTDIR"
CSV="$OUTDIR/results.csv"
echo "allocator,size,batch,cycles,flush_on_exit,max_rss_kb,elapsed_ms" > "$CSV"
sizes=(32 64 128)
batches=(100)
run_case() {
local alloc="$1"; shift
local size="$1"; shift
local batch="$1"; shift
local cyc="$1"; shift
local flush="$1"; shift
local bin
if [[ "$alloc" == "hakmem" ]]; then bin=./bench_tiny_hot_hakmem; else bin=./bench_tiny_hot_system; fi
local tmp_log="$OUTDIR/tmp_${alloc}_${size}_${batch}_${cyc}_${flush}.log"
local tmp_out="$OUTDIR/tmp_${alloc}_${size}_${batch}_${cyc}_${flush}.out"
if [[ "$alloc" == "hakmem" ]]; then
HAKMEM_TINY_FLUSH_ON_EXIT="$flush" /usr/bin/time -v "$bin" "$size" "$batch" "$cyc" >"$tmp_out" 2>"$tmp_log" || true
else
/usr/bin/time -v "$bin" "$size" "$batch" "$cyc" >"$tmp_out" 2>"$tmp_log" || true
fi
local rss=$(sed -n 's/^\s*Maximum resident set size (kbytes): \([0-9]\+\).*/\1/p' "$tmp_log" | tail -1)
local elapsed=$(sed -n 's/^\s*Elapsed (wall clock) time (h:mm:ss or m:ss): \(.*\)/\1/p' "$tmp_log" | tail -1)
# convert elapsed to ms (best-effort; handles m:ss or h:mm:ss)
local ms=0
if [[ -n "$elapsed" ]]; then
local e1="" e2="" e3=""
IFS=: read -r e1 e2 e3 <<<"$elapsed" || true
if [[ -n "$e3" ]]; then
# h:m:s
ms=$(( (10#${e1}*3600 + 10#${e2}*60) * 1000 ))
ms=$(( ms + (10#${e3%.*})*1000 ))
else
# m:s
ms=$(( (10#${e1}*60) * 1000 ))
ms=$(( ms + (10#${e2%.*})*1000 ))
fi
fi
echo "$alloc,$size,$batch,$cyc,$flush,${rss:-},${ms:-}" >> "$CSV"
}
for s in "${sizes[@]}"; do
for b in "${batches[@]}"; do
echo "[run] SYSTEM size=$s batch=$b cycles=$cycles"
run_case system "$s" "$b" "$cycles" 0
echo "[run] HAKMEM (flush=0) size=$s batch=$b cycles=$cycles"
run_case hakmem "$s" "$b" "$cycles" 0
echo "[run] HAKMEM (flush=1) size=$s batch=$b cycles=$cycles"
run_case hakmem "$s" "$b" "$cycles" 1
done
done
echo "[done] CSV: $CSV"
sed -n '1,40p' "$CSV" || true

View File

@ -0,0 +1,61 @@
#!/usr/bin/env bash
set -euo pipefail
# Sweep Ultra Tiny (SLL-only) with debug counters and output CSV
# Usage: scripts/run_ultra_debug_sweep.sh [cycles] [batch]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT_DIR"
cycles=${1:-60000}
batch=${2:-200}
make -s bench_fast >/dev/null
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/ultra_debug_${TS}"
mkdir -p "$OUTDIR"
CSV="$OUTDIR/results.csv"
echo "size,batch,cycles,throughput_mops,class,pop_hits,refills,resets,sll_count" > "$CSV"
sizes=(16 32 64)
size_to_class() {
case "$1" in
16) echo 1;;
32) echo 2;;
64) echo 3;;
8) echo 0;;
128) echo 4;;
*) echo -1;;
esac
}
for s in "${sizes[@]}"; do
cls=$(size_to_class "$s")
log="$OUTDIR/ultra_${s}_b=${batch}_c=${cycles}.log"
# Run with Ultra + debug; capture stdout+stderr in one file
HAKMEM_TINY_ULTRA=1 HAKMEM_TINY_ULTRA_DEBUG=1 HAKMEM_TINY_MAG_CAP=128 \
./bench_tiny_hot_hakmem "$s" "$batch" "$cycles" >"$log" 2>&1 || true
thr=$(sed -n 's/^Throughput: \([0-9.][0-9.]*\) M ops.*/\1/p' "$log" | tail -n1)
# Extract Ultra debug block
start=$(grep -n '^\[Ultra Tiny Debug\]' "$log" | tail -n1 | cut -d: -f1)
if [[ -n "$start" ]]; then
# header is the next line; data follows
data_start=$((start+2))
# take next 8 lines (classes 0..7)
sed -n "${data_start},$((data_start+7))p" "$log" > "$OUTDIR/tmp_ultra.txt" || true
# pick the line for target class
line=$(awk -F',' -v k="$cls" '($1==k){print $0}' "$OUTDIR/tmp_ultra.txt" | tail -n1)
if [[ -n "$line" ]]; then
# line format: class,pop_hits,refills,resets,sll_count
IFS=',' read -r c ph rf rs sc <<<"$line"
echo "$s,$batch,$cycles,${thr:-},$c,$ph,$rf,$rs,$sc" >> "$CSV"
fi
fi
done
echo "[done] CSV: $CSV"
sed -n '1,20p' "$CSV" || true

View File

@ -0,0 +1,101 @@
#!/usr/bin/env bash
set -euo pipefail
# Build with USDT tracepoints and run perf stat for USDT events + PMU on tiny_hot + mixed
# Usage: scripts/run_usdt_overview.sh [cycles]
ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
# Allow overriding perf binary (e.g., WSL generic tools). Usage:
# PERF_BIN=/usr/lib/linux-tools-6.8.0-86/perf bash scripts/run_usdt_overview.sh 40000
PERF_BIN=${PERF_BIN:-perf}
cd "$ROOT_DIR"
cycles=${1:-50000}
if [[ "${SKIP_BUILD:-0}" != "1" ]]; then
echo "[build] USDT-enabled benches"
make -s clean >/dev/null 2>&1 || true
make -s bench_fast CFLAGS+=" -DHAKMEM_USDT=1" >/dev/null
else
echo "[build] skipped (SKIP_BUILD=1)"
fi
TS=$(date +%Y%m%d_%H%M%S)
OUTDIR="bench_results/usdt_${TS}"
mkdir -p "$OUTDIR"
EVENTS_USDT=(
sdt:hakmem:sll_pop
sdt:hakmem:mag_pop
sdt:hakmem:front_pop
sdt:hakmem:bump_hit
sdt:hakmem:slow_alloc
sdt:hakmem:sll_push
sdt:hakmem:mag_push
sdt:hakmem:spill_super
sdt:hakmem:spill_tiny
sdt:hakmem:remote_drain
sdt:hakmem:superslab_alloc
sdt:hakmem:superslab_fail
)
EVENTS_PMU=(cycles,instructions,L1-dcache-load-misses,branch-misses)
join_events() {
local IFS=','; echo "$*"
}
PMU_JOINED=$(join_events "${EVENTS_PMU[@]}" )
# Detect USDT availability by actually probing a dummy run
USDT_JOINED=""
{
"$PERF_BIN" stat -x , -e sdt:hakmem:front_pop true 1>/dev/null 2>"$OUTDIR/.usdt_probe.err"
} || true
if rg -q "unknown tracepoint" "$OUTDIR/.usdt_probe.err"; then
echo "[warn] perf does not support 'sdt:' on this system (unknown tracepoint). Using PMU-only." | tee -a "$OUTDIR/summary.txt"
echo "[hint] Install perf matching your kernel: sudo apt-get install linux-tools-\$(uname -r)" | tee -a "$OUTDIR/summary.txt"
echo "[hint] Kernel must have UPROBE/SDT support (CONFIG_UPROBE_EVENTS)." | tee -a "$OUTDIR/summary.txt"
elif rg -q "can't access trace events|No permissions" "$OUTDIR/.usdt_probe.err"; then
echo "[warn] USDT blocked by tracefs perms; falling back to PMU-only." | tee -a "$OUTDIR/summary.txt"
echo "[hint] Try: sudo mount -t tracefs -o mode=755 nodev /sys/kernel/tracing" | tee -a "$OUTDIR/summary.txt"
echo "[hint] And: sudo sysctl kernel.perf_event_paranoid=1" | tee -a "$OUTDIR/summary.txt"
else
# Looks good; enable USDT events
USDT_JOINED=$(join_events "${EVENTS_USDT[@]}")
fi
# Basic environment info for troubleshooting
{
echo "[env] perf=$($PERF_BIN --version 2>/dev/null | head -n1)";
echo "[env] kernel=$(uname -r)";
echo "[env] tracefs=$(ls -ld /sys/kernel/tracing 2>/dev/null || true)";
} | tee -a "$OUTDIR/summary.txt"
run_perf() {
local tag="$1"; shift
local bin="$1"; shift
local size="$1"; shift
local batch="$1"; shift
local cyc="$1"; shift
local log="$OUTDIR/${tag}_s${size}_b${batch}_c${cyc}.stat.csv"
echo "[perf] $tag size=$size batch=$batch cycles=$cyc" | tee -a "$OUTDIR/summary.txt"
if [[ -n "$USDT_JOINED" ]]; then
"$PERF_BIN" stat -x , -e "$USDT_JOINED","$PMU_JOINED" "$bin" "$size" "$batch" "$cyc" 1>/dev/null 2>"$log" || true
else
"$PERF_BIN" stat -x , -e "$PMU_JOINED" "$bin" "$size" "$batch" "$cyc" 1>/dev/null 2>"$log" || true
fi
}
# Tiny-hot focus (8/16/32/64)
for s in 8 16 32 64; do
for b in 100; do
HAKMEM_QUIET=1 run_perf "hakmem" ./bench_tiny_hot_hakmem "$s" "$b" "$cycles"
HAKMEM_QUIET=1 run_perf "system" ./bench_tiny_hot_system "$s" "$b" "$cycles"
done
done
# Random mixed overview
bash scripts/run_random_mixed_matrix.sh 80000 >/dev/null || true
echo "[done] USDT overview: $OUTDIR"
ls -1 "$OUTDIR" | sed -n '1,20p'

View File

@ -0,0 +1,63 @@
#!/usr/bin/env bash
set -euo pipefail
# Save a short profiler sweep into docs/benchmarks/<YYYYMMDD_HHMMSS>/
# Usage: scripts/save_prof_sweep.sh [-d SEC] [-t CSV] [-s N]
RUNTIME=2
THREADS="1,4"
SAMPLE_N=8
BENCH_TIMEOUT=""
KILL_GRACE=${KILL_GRACE:-2}
while getopts ":d:t:s:h" opt; do
case $opt in
d) RUNTIME="$OPTARG" ;;
t) THREADS="$OPTARG" ;;
s) SAMPLE_N="$OPTARG" ;;
h) echo "Usage: $0 [-d SEC] [-t CSV] [-s N]"; exit 0 ;;
:) echo "Missing arg -$OPTARG"; exit 2 ;;
*) echo "Usage: $0 [-d SEC] [-t CSV] [-s N]"; exit 2 ;;
esac
done
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
OUTDIR="$ROOT_DIR/docs/benchmarks/$(date +%Y%m%d_%H%M%S)"
mkdir -p "$OUTDIR"
LIB="$(readlink -f "$ROOT_DIR/libhakmem.so" || true)"
LARSON="$ROOT_DIR/mimalloc-bench/bench/larson/larson"
if [[ -z "${BENCH_TIMEOUT}" ]]; then
BENCH_TIMEOUT=$(( RUNTIME + 3 ))
fi
echo "Saving sweep into: $OUTDIR" | tee "$OUTDIR/summary.txt"
echo "RUNTIME=$RUNTIME THREADS=$THREADS SAMPLE=1/$((1<<SAMPLE_N)) TIMEOUT=${BENCH_TIMEOUT}s" | tee -a "$OUTDIR/summary.txt"
declare -a RUNS=(
"tiny 8 1024"
"mid 2048 32768"
"gap 33000 65536"
"large 65536 1048576"
)
IFS=',' read -r -a TARR <<< "$THREADS"
for r in "${RUNS[@]}"; do
read -r name rmin rmax <<< "$r"
for t in "${TARR[@]}"; do
label="${name}_T${t}_${rmin}-${rmax}"
echo "== $label ==" | tee -a "$OUTDIR/summary.txt"
if [[ -f "$LARSON" && -f "$ROOT_DIR/libhakmem.so" ]]; then
timeout -k "${KILL_GRACE}s" "${BENCH_TIMEOUT}s" \
env HAKMEM_PROF=1 HAKMEM_PROF_SAMPLE="$SAMPLE_N" \
LD_PRELOAD="$LIB" "$LARSON" "$RUNTIME" "$rmin" "$rmax" 10000 1 12345 "$t" 2>&1 \
| tee "$OUTDIR/${label}.log" | tail -n 80 | tee -a "$OUTDIR/summary.txt"
else
echo "Skip: missing larson or libhakmem.so" | tee -a "$OUTDIR/summary.txt"
fi
done
done
echo "Done. See $OUTDIR" | tee -a "$OUTDIR/summary.txt"

View File

@ -0,0 +1,32 @@
PRAGMA journal_mode = OFF;
PRAGMA synchronous = OFF;
PRAGMA temp_store = MEMORY;
-- schema
CREATE TABLE t (
id INTEGER PRIMARY KEY,
s TEXT
);
-- bulk insert via recursive CTE (~50k rows)
WITH RECURSIVE cnt(x) AS (
SELECT 1
UNION ALL
SELECT x+1 FROM cnt LIMIT 50000
)
INSERT INTO t(s)
SELECT printf('str-%d-%d', x, x*x) FROM cnt;
-- simple read queries
SELECT COUNT(*) FROM t;
SELECT SUM(LENGTH(s)) FROM t;
-- point lookups
SELECT s FROM t WHERE id IN (1, 100, 1000, 10000, 40000);
-- update a slice
UPDATE t SET s = s || '-x' WHERE (id % 50) = 0;
-- final check
SELECT COUNT(*) FROM t WHERE s LIKE '%-x';