AotPrep collections_hot matmul tuning and bench tweaks
This commit is contained in:
@ -5,9 +5,9 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
BIN="$ROOT/target/release/hakorune"
|
||||
|
||||
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe]"; }
|
||||
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; }
|
||||
|
||||
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0
|
||||
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0; BUDGET_MS=0
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--case) CASE="$2"; shift 2;;
|
||||
@ -15,6 +15,7 @@ while [[ $# -gt 0 ]]; do
|
||||
--runs) RUNS="$2"; shift 2;;
|
||||
--backend) BACKEND="$2"; shift 2;;
|
||||
--exe) EXE_MODE=1; shift 1;;
|
||||
--budget-ms) BUDGET_MS="$2"; shift 2;;
|
||||
--help|-h) usage; exit 0;;
|
||||
*) echo "Unknown arg: $1"; usage; exit 2;;
|
||||
esac
|
||||
@ -46,6 +47,7 @@ bench_hako() {
|
||||
fi
|
||||
PYTHONPATH="${PYTHONPATH:-$ROOT}" \
|
||||
NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \
|
||||
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 HAKO_USING_RESOLVER_FIRST=1 \
|
||||
NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \
|
||||
NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \
|
||||
NYASH_LLVM_USE_HARNESS=1 "$BIN" --backend llvm "$file" >/dev/null 2>&1
|
||||
@ -79,6 +81,17 @@ time_exe_run() {
|
||||
mktemp_hako() { mktemp --suffix .hako; }
|
||||
mktemp_c() { mktemp --suffix .c; }
|
||||
|
||||
# Fallback diagnostics for EXE flow: check MIR JSON for externcall/boxcall/jsonfrag
|
||||
diag_mir_json() {
|
||||
local json="$1"
|
||||
local rewrites; rewrites=$(rg -c '"op":"externcall"' "$json" 2>/dev/null || echo 0)
|
||||
local arrays; arrays=$(rg -c 'nyash\.array\.' "$json" 2>/dev/null || echo 0)
|
||||
local maps; maps=$(rg -c 'nyash\.map\.' "$json" 2>/dev/null || echo 0)
|
||||
local boxcalls; boxcalls=$(rg -c '"op":"boxcall"' "$json" 2>/dev/null || echo 0)
|
||||
local jsonfrag; jsonfrag=$(rg -c '\[emit/jsonfrag\]' "$json" 2>/dev/null || echo 0)
|
||||
echo "[diag] externcall=${rewrites} (array=${arrays}, map=${maps}), boxcall_left=${boxcalls}, jsonfrag=${jsonfrag}" >&2
|
||||
}
|
||||
|
||||
case "$CASE" in
|
||||
loop)
|
||||
HAKO_FILE=$(mktemp_hako)
|
||||
@ -672,7 +685,7 @@ int main(){
|
||||
int64_t ii = (i + r) % rows;
|
||||
int64_t jj = (j + r) % bucket;
|
||||
int64_t k2 = (ii / bucket) * bucket + jj;
|
||||
mapv[k2] = v;
|
||||
mapv[k2] = v;
|
||||
acc += mapv[k2];
|
||||
}
|
||||
}
|
||||
@ -684,10 +697,16 @@ C
|
||||
;;
|
||||
kilo)
|
||||
# kilo は C 参照側が重く、デフォルト N=5_000_000 だと実行が非常に長くなる。
|
||||
# EXE モードでかつ N が未指定(既定値)の場合は、計測が現実的になるよう N を下げる。
|
||||
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
|
||||
# Phase 21.5 最適化フェーズでは LLVM 系ベンチは EXE 経路のみを対象にする。
|
||||
# - LLVM backend かつ N が既定値(5_000_000)の場合は、常に N=200_000 に下げる。
|
||||
# - LLVM backend で EXE_MODE=0 の場合も、EXE 経路へ強制昇格する(VM フォールバック禁止)。
|
||||
if [[ "$BACKEND" = "llvm" && "$N" = "5000000" ]]; then
|
||||
N=200000
|
||||
fi
|
||||
if [[ "$BACKEND" = "llvm" && "$EXE_MODE" = "0" ]]; then
|
||||
echo "[info] kilo: forcing --exe for llvm backend (Phase 21.5 optimization)" >&2
|
||||
EXE_MODE=1
|
||||
fi
|
||||
HAKO_FILE=$(mktemp_hako)
|
||||
cat >"$HAKO_FILE" <<HAKO
|
||||
box KiloBench {
|
||||
@ -814,45 +833,31 @@ if [[ "$EXE_MODE" = "1" ]]; then
|
||||
ensure_nyrt
|
||||
HAKO_EXE=$(mktemp --suffix .out)
|
||||
TMP_JSON=$(mktemp --suffix .json)
|
||||
# Default: use jsonfrag (stable/fast). Set PERF_USE_PROVIDER=1 to prefer provider/selfhost MIR.
|
||||
# Default: use provider-first with AotPrep for maximum optimization
|
||||
# DEBUG: Show file paths
|
||||
echo "[matmul/debug] HAKO_FILE=$HAKO_FILE TMP_JSON=$TMP_JSON" >&2
|
||||
if ! \
|
||||
HAKO_SELFHOST_TRACE=1 \
|
||||
HAKO_SELFHOST_BUILDER_FIRST=0 HAKO_SELFHOST_NO_DELEGATE=0 \
|
||||
HAKO_APPLY_AOT_PREP=1 \
|
||||
NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \
|
||||
HAKO_MIR_BUILDER_LOOP_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \
|
||||
HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \
|
||||
HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-1}" \
|
||||
HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-1}" \
|
||||
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
|
||||
NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" >/dev/null 2>&1; then
|
||||
NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" 2>&1 | tee /tmp/matmul_emit_log.txt | grep -E "\[prep:|provider/emit\]" >&2; then
|
||||
echo "[FAIL] emit MIR JSON failed (hint: set PERF_USE_PROVIDER=1 or HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG=1)" >&2; exit 3
|
||||
fi
|
||||
|
||||
# Optional AOT prep stage: apply pre-normalization/passes on MIR JSON before building EXE
|
||||
# Enabled when fast/hoist/collections_hot are ON (we already set them explicitly above)
|
||||
# This ensures EXE path receives the same optimized JSON as harness runs.
|
||||
(
|
||||
PREP_HAKO=$(mktemp --suffix .hako)
|
||||
cat >"$PREP_HAKO" <<'HAKO'
|
||||
using selfhost.llvm.ir.aot_prep as AotPrepBox
|
||||
static box Main { method main(args) {
|
||||
local in = args.get(0)
|
||||
local out = AotPrepBox.prep(in)
|
||||
if out == null { println("[prep:fail]") return 1 }
|
||||
println(out)
|
||||
return 0
|
||||
} }
|
||||
HAKO
|
||||
set +e
|
||||
OUT_PATH=$(NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 NYASH_FILEBOX_MODE=core-ro \
|
||||
NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \
|
||||
"$BIN" --backend vm "$PREP_HAKO" -- "$TMP_JSON" 2>/dev/null | tail -n 1)
|
||||
rc=$?
|
||||
set -e
|
||||
if [[ $rc -eq 0 && -f "$OUT_PATH" ]]; then
|
||||
mv -f "$OUT_PATH" "$TMP_JSON"
|
||||
fi
|
||||
rm -f "$PREP_HAKO" 2>/dev/null || true
|
||||
)
|
||||
# Quick diagnostics: ensure AotPrep rewrites are present and jsonfrag fallback is not used
|
||||
# DEBUG: Copy TMP_JSON for inspection
|
||||
cp "$TMP_JSON" /tmp/matmul_from_perf.json 2>/dev/null || true
|
||||
echo "[matmul/debug] TMP_JSON copied to /tmp/matmul_from_perf.json" >&2
|
||||
echo "[matmul/debug] Direct externcall count: $(grep -o '"op":"externcall"' "$TMP_JSON" 2>/dev/null | wc -l)" >&2
|
||||
diag_mir_json "$TMP_JSON"
|
||||
|
||||
# AotPrep is now applied in hakorune_emit_mir.sh via HAKO_APPLY_AOT_PREP=1
|
||||
# Build EXE via helper (selects crate backend ny-llvmc under the hood)
|
||||
if ! NYASH_LLVM_BACKEND=crate NYASH_LLVM_SKIP_BUILD=1 \
|
||||
NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \
|
||||
@ -862,17 +867,32 @@ HAKO
|
||||
echo "[FAIL] build Nyash EXE failed (crate backend). Ensure ny-llvmc exists or try NYASH_LLVM_BACKEND=crate." >&2; exit 3
|
||||
fi
|
||||
|
||||
for i in $(seq 1 "$RUNS"); do
|
||||
t_c=$(time_exe_run "$C_EXE")
|
||||
t_h=$(time_exe_run "$HAKO_EXE")
|
||||
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h))
|
||||
if command -v python3 >/dev/null 2>&1; then
|
||||
ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA)
|
||||
else
|
||||
ratio=NA
|
||||
fi
|
||||
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2
|
||||
done
|
||||
# Execute runs. If BUDGET_MS>0, keep running until budget is exhausted.
|
||||
if [[ "$BUDGET_MS" != "0" ]]; then
|
||||
i=0; used=0
|
||||
while true; do
|
||||
i=$((i+1))
|
||||
t_c=$(time_exe_run "$C_EXE"); t_h=$(time_exe_run "$HAKO_EXE")
|
||||
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h)); used=$((used + t_h))
|
||||
if command -v python3 >/dev/null 2>&1; then ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA); else ratio=NA; fi
|
||||
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}% (budget used=${used}/${BUDGET_MS}ms)" >&2
|
||||
if [[ $used -ge $BUDGET_MS ]]; then RUNS=$i; break; fi
|
||||
# Safety valve to avoid infinite loop if t_h is 0ms
|
||||
if [[ $i -ge 999 ]]; then RUNS=$i; break; fi
|
||||
done
|
||||
else
|
||||
for i in $(seq 1 "$RUNS"); do
|
||||
t_c=$(time_exe_run "$C_EXE")
|
||||
t_h=$(time_exe_run "$HAKO_EXE")
|
||||
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h))
|
||||
if command -v python3 >/dev/null 2>&1; then
|
||||
ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA)
|
||||
else
|
||||
ratio=NA
|
||||
fi
|
||||
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2
|
||||
done
|
||||
fi
|
||||
avg_c=$((sum_c / RUNS)); avg_h=$((sum_h / RUNS))
|
||||
echo "avg c=${avg_c}ms hak=${avg_h}ms" >&2
|
||||
if [ "$avg_c" -lt 5 ]; then
|
||||
|
||||
Reference in New Issue
Block a user