AotPrep collections_hot matmul tuning and bench tweaks

This commit is contained in:
nyash-codex
2025-11-14 13:36:20 +09:00
parent 13f21334c9
commit f1fa182a4b
17 changed files with 760 additions and 219 deletions

View File

@ -5,9 +5,9 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
BIN="$ROOT/target/release/hakorune"
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe]"; }
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; }
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0; BUDGET_MS=0
while [[ $# -gt 0 ]]; do
case "$1" in
--case) CASE="$2"; shift 2;;
@ -15,6 +15,7 @@ while [[ $# -gt 0 ]]; do
--runs) RUNS="$2"; shift 2;;
--backend) BACKEND="$2"; shift 2;;
--exe) EXE_MODE=1; shift 1;;
--budget-ms) BUDGET_MS="$2"; shift 2;;
--help|-h) usage; exit 0;;
*) echo "Unknown arg: $1"; usage; exit 2;;
esac
@ -46,6 +47,7 @@ bench_hako() {
fi
PYTHONPATH="${PYTHONPATH:-$ROOT}" \
NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 HAKO_USING_RESOLVER_FIRST=1 \
NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \
NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \
NYASH_LLVM_USE_HARNESS=1 "$BIN" --backend llvm "$file" >/dev/null 2>&1
@ -79,6 +81,17 @@ time_exe_run() {
mktemp_hako() { mktemp --suffix .hako; }
mktemp_c() { mktemp --suffix .c; }
# Fallback diagnostics for EXE flow: check MIR JSON for externcall/boxcall/jsonfrag
diag_mir_json() {
local json="$1"
local rewrites; rewrites=$(rg -c '"op":"externcall"' "$json" 2>/dev/null || echo 0)
local arrays; arrays=$(rg -c 'nyash\.array\.' "$json" 2>/dev/null || echo 0)
local maps; maps=$(rg -c 'nyash\.map\.' "$json" 2>/dev/null || echo 0)
local boxcalls; boxcalls=$(rg -c '"op":"boxcall"' "$json" 2>/dev/null || echo 0)
local jsonfrag; jsonfrag=$(rg -c '\[emit/jsonfrag\]' "$json" 2>/dev/null || echo 0)
echo "[diag] externcall=${rewrites} (array=${arrays}, map=${maps}), boxcall_left=${boxcalls}, jsonfrag=${jsonfrag}" >&2
}
case "$CASE" in
loop)
HAKO_FILE=$(mktemp_hako)
@ -672,7 +685,7 @@ int main(){
int64_t ii = (i + r) % rows;
int64_t jj = (j + r) % bucket;
int64_t k2 = (ii / bucket) * bucket + jj;
mapv[k2] = v;
mapv[k2] = v;
acc += mapv[k2];
}
}
@ -684,10 +697,16 @@ C
;;
kilo)
# kilo は C 参照側が重く、デフォルト N=5_000_000 だと実行が非常に長くなる。
# EXE モードでかつ N が未指定(既定値)の場合は、計測が現実的になるよう N を下げる。
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
# Phase 21.5 最適化フェーズでは LLVM 系ベンチは EXE 経路のみを対象にする。
# - LLVM backend かつ N が既定値5_000_000)の場合は、常に N=200_000 に下げる。
# - LLVM backend で EXE_MODE=0 の場合も、EXE 経路へ強制昇格するVM フォールバック禁止)。
if [[ "$BACKEND" = "llvm" && "$N" = "5000000" ]]; then
N=200000
fi
if [[ "$BACKEND" = "llvm" && "$EXE_MODE" = "0" ]]; then
echo "[info] kilo: forcing --exe for llvm backend (Phase 21.5 optimization)" >&2
EXE_MODE=1
fi
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
box KiloBench {
@ -814,45 +833,31 @@ if [[ "$EXE_MODE" = "1" ]]; then
ensure_nyrt
HAKO_EXE=$(mktemp --suffix .out)
TMP_JSON=$(mktemp --suffix .json)
# Default: use jsonfrag (stable/fast). Set PERF_USE_PROVIDER=1 to prefer provider/selfhost MIR.
# Default: use provider-first with AotPrep for maximum optimization
# DEBUG: Show file paths
echo "[matmul/debug] HAKO_FILE=$HAKO_FILE TMP_JSON=$TMP_JSON" >&2
if ! \
HAKO_SELFHOST_TRACE=1 \
HAKO_SELFHOST_BUILDER_FIRST=0 HAKO_SELFHOST_NO_DELEGATE=0 \
HAKO_APPLY_AOT_PREP=1 \
NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \
HAKO_MIR_BUILDER_LOOP_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \
HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \
HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-1}" \
HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-1}" \
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" >/dev/null 2>&1; then
NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" 2>&1 | tee /tmp/matmul_emit_log.txt | grep -E "\[prep:|provider/emit\]" >&2; then
echo "[FAIL] emit MIR JSON failed (hint: set PERF_USE_PROVIDER=1 or HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG=1)" >&2; exit 3
fi
# Optional AOT prep stage: apply pre-normalization/passes on MIR JSON before building EXE
# Enabled when fast/hoist/collections_hot are ON (we already set them explicitly above)
# This ensures EXE path receives the same optimized JSON as harness runs.
(
PREP_HAKO=$(mktemp --suffix .hako)
cat >"$PREP_HAKO" <<'HAKO'
using selfhost.llvm.ir.aot_prep as AotPrepBox
static box Main { method main(args) {
local in = args.get(0)
local out = AotPrepBox.prep(in)
if out == null { println("[prep:fail]") return 1 }
println(out)
return 0
} }
HAKO
set +e
OUT_PATH=$(NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 NYASH_FILEBOX_MODE=core-ro \
NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \
"$BIN" --backend vm "$PREP_HAKO" -- "$TMP_JSON" 2>/dev/null | tail -n 1)
rc=$?
set -e
if [[ $rc -eq 0 && -f "$OUT_PATH" ]]; then
mv -f "$OUT_PATH" "$TMP_JSON"
fi
rm -f "$PREP_HAKO" 2>/dev/null || true
)
# Quick diagnostics: ensure AotPrep rewrites are present and jsonfrag fallback is not used
# DEBUG: Copy TMP_JSON for inspection
cp "$TMP_JSON" /tmp/matmul_from_perf.json 2>/dev/null || true
echo "[matmul/debug] TMP_JSON copied to /tmp/matmul_from_perf.json" >&2
echo "[matmul/debug] Direct externcall count: $(grep -o '"op":"externcall"' "$TMP_JSON" 2>/dev/null | wc -l)" >&2
diag_mir_json "$TMP_JSON"
# AotPrep is now applied in hakorune_emit_mir.sh via HAKO_APPLY_AOT_PREP=1
# Build EXE via helper (selects crate backend ny-llvmc under the hood)
if ! NYASH_LLVM_BACKEND=crate NYASH_LLVM_SKIP_BUILD=1 \
NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \
@ -862,17 +867,32 @@ HAKO
echo "[FAIL] build Nyash EXE failed (crate backend). Ensure ny-llvmc exists or try NYASH_LLVM_BACKEND=crate." >&2; exit 3
fi
for i in $(seq 1 "$RUNS"); do
t_c=$(time_exe_run "$C_EXE")
t_h=$(time_exe_run "$HAKO_EXE")
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h))
if command -v python3 >/dev/null 2>&1; then
ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA)
else
ratio=NA
fi
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2
done
# Execute runs. If BUDGET_MS>0, keep running until budget is exhausted.
if [[ "$BUDGET_MS" != "0" ]]; then
i=0; used=0
while true; do
i=$((i+1))
t_c=$(time_exe_run "$C_EXE"); t_h=$(time_exe_run "$HAKO_EXE")
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h)); used=$((used + t_h))
if command -v python3 >/dev/null 2>&1; then ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA); else ratio=NA; fi
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}% (budget used=${used}/${BUDGET_MS}ms)" >&2
if [[ $used -ge $BUDGET_MS ]]; then RUNS=$i; break; fi
# Safety valve to avoid infinite loop if t_h is 0ms
if [[ $i -ge 999 ]]; then RUNS=$i; break; fi
done
else
for i in $(seq 1 "$RUNS"); do
t_c=$(time_exe_run "$C_EXE")
t_h=$(time_exe_run "$HAKO_EXE")
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h))
if command -v python3 >/dev/null 2>&1; then
ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA)
else
ratio=NA
fi
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2
done
fi
avg_c=$((sum_c / RUNS)); avg_h=$((sum_h / RUNS))
echo "avg c=${avg_c}ms hak=${avg_h}ms" >&2
if [ "$avg_c" -lt 5 ]; then