feat(phase21.5): MirBuilder optimization prep + crate EXE infrastructure

Phase 21.5 optimization readiness - C-level performance target:
- MirBuilder: JsonFrag purify toggle (HAKO_MIR_BUILDER_JSONFRAG_PURIFY=1)
- Normalizer: extended f64 canonicalization + dedupe improvements
- loop_opts_adapter: JsonFrag path refinement for crate EXE compatibility

Infrastructure improvements:
- provider_registry: add diagnostics + ring-1 providers (array/console/map/path)
- mir_interpreter: add normalization/purify feature gates
- tools/selfhost_exe_stageb.sh: new end-to-end Stage-B→crate EXE pipeline
- tools/perf/microbench.sh: performance measurement tooling

Smoke tests (phase2100):
- Extend timeout 15s→120s for heavy crate EXE builds
- Add stageb_loop_jsonfrag_crate_exe_canary_vm.sh (target test)
- Add s3_backend_selector_crate_exe_vm_parity_return42_canary_vm.sh

Documentation:
- ENV_VARS.md: add Phase 21.5 optimization toggles
- README updates: clarify crate backend strategy
- phase215-optimization.md: new optimization roadmap

This commit sets the stage for Phase 21.5 critical optimization:
achieving C-level performance to decide hakorune's future viability.
This commit is contained in:
nyash-codex
2025-11-11 02:07:12 +09:00
parent ece91306b7
commit 07a254fc0d
49 changed files with 905 additions and 167 deletions

216
tools/perf/microbench.sh Normal file
View File

@ -0,0 +1,216 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
BIN="$ROOT/target/release/hakorune"
usage() { echo "Usage: $0 --case {loop|strlen|box} [--n N] [--runs R] [--backend {llvm|vm}] [--exe]"; }
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0
while [[ $# -gt 0 ]]; do
case "$1" in
--case) CASE="$2"; shift 2;;
--n) N="$2"; shift 2;;
--runs) RUNS="$2"; shift 2;;
--backend) BACKEND="$2"; shift 2;;
--exe) EXE_MODE=1; shift 1;;
--help|-h) usage; exit 0;;
*) echo "Unknown arg: $1"; usage; exit 2;;
esac
done
if [[ ! -x "$BIN" ]]; then echo "[FAIL] hakorune not built: $BIN" >&2; exit 2; fi
bench_hako() {
local file="$1"; local backend="$2"; shift 2
local start end
start=$(date +%s%N)
if [[ "$backend" = "llvm" ]]; then
# Ensure ny-llvmc exists; build if missing
if [[ ! -x "$ROOT/target/release/ny-llvmc" ]]; then
(cargo build -q --release -p nyash-llvm-compiler >/dev/null 2>&1) || true
fi
PYTHONPATH="${PYTHONPATH:-$ROOT}" \
NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \
NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \
NYASH_LLVM_USE_HARNESS=1 "$BIN" --backend llvm "$file" >/dev/null 2>&1
else
"$BIN" --backend vm "$file" >/dev/null 2>&1
fi
end=$(date +%s%N)
echo $(( (end - start)/1000000 ))
}
bench_c() {
local csrc="$1"; local exe="$2"
cc -O3 -march=native -o "$exe" "$csrc"
local start end
start=$(date +%s%N)
"$exe" >/dev/null 2>&1
end=$(date +%s%N)
echo $(( (end - start)/1000000 ))
}
# Build once and time executable runs (ms)
time_exe_run() {
local exe="$1"
local start end
start=$(date +%s%N)
"$exe" >/dev/null 2>&1
end=$(date +%s%N)
echo $(( (end - start)/1000000 ))
}
mktemp_hako() { mktemp --suffix .hako; }
mktemp_c() { mktemp --suffix .c; }
case "$CASE" in
loop)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local n = ${N}
local i = 0
local s = 0
loop(i < n) { s = s + i i = i + 1 }
return s
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
int main(){
volatile int64_t n = N_PLACEHOLDER;
volatile int64_t s=0; for (int64_t i=0;i<n;i++){ s+=i; }
return (int)(s&0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
strlen)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local n = ${N}
local i = 0
local s = 0
local t = "abcdefghijklmnopqrstuvwxyz"
loop(i < n) { s = s + t.length() i = i + 1 }
return s
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <string.h>
int main(){
volatile int64_t n = N_PLACEHOLDER; volatile int64_t s=0;
const char* t = "abcdefghijklmnopqrstuvwxyz";
for (int64_t i=0;i<n;i++){ s += (int64_t)strlen(t); }
return (int)(s&0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
box)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local n = ${N}
local i = 0
loop(i < n) { local t = new StringBox("x"); i = i + 1 }
return 0
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
typedef struct { char* p; } Str;
static inline Str* new_str(){ Str* s=(Str*)malloc(sizeof(Str)); s->p=strdup("x"); free(s->p); free(s); return s; }
int main(){ volatile int64_t n=N_PLACEHOLDER; for(int64_t i=0;i<n;i++){ new_str(); } return 0; }
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
*) echo "Unknown case: $CASE"; exit 2;;
esac
echo "[perf] case=$CASE n=$N runs=$RUNS backend=$BACKEND" >&2
sum_c=0; sum_h=0
if [[ "$EXE_MODE" = "1" ]]; then
# Build C exe once
C_EXE=$(mktemp --suffix .out)
cc -O3 -march=native -o "$C_EXE" "$C_FILE"
# Build Nyash exe once (requires llvm harness)
if [[ "$BACKEND" != "llvm" ]]; then
echo "[FAIL] --exe requires --backend llvm" >&2; exit 2
fi
if [[ ! -x "$ROOT/target/release/ny-llvmc" ]]; then
(cargo build -q --release -p nyash-llvm-compiler >/dev/null 2>&1) || true
fi
HAKO_EXE=$(mktemp --suffix .out)
TMP_JSON=$(mktemp --suffix .json)
if ! HAKO_SELFHOST_BUILDER_FIRST=1 HAKO_MIR_BUILDER_LOOP_JSONFRAG=1 HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE=1 \
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" >/dev/null 2>&1; then
echo "[FAIL] failed to emit MIR JSON" >&2; exit 3
fi
# Ensure runtime lib exists (nyash_kernel)
(cd "$ROOT/crates/nyash_kernel" && cargo build -q --release >/dev/null) || true
# Build EXE via helper (selects crate backend ny-llvmc under the hood)
if ! NYASH_LLVM_BACKEND=crate \
NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \
NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \
NYASH_LLVM_VERIFY=1 NYASH_LLVM_VERIFY_IR=1 NYASH_LLVM_FAST=1 \
bash "$ROOT/tools/ny_mir_builder.sh" --in "$TMP_JSON" --emit exe -o "$HAKO_EXE" --quiet >/dev/null 2>&1; then
echo "[FAIL] failed to build Nyash EXE" >&2; exit 3
fi
for i in $(seq 1 "$RUNS"); do
t_c=$(time_exe_run "$C_EXE")
t_h=$(time_exe_run "$HAKO_EXE")
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h))
if command -v python3 >/dev/null 2>&1; then
ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA)
else
ratio=NA
fi
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2
done
avg_c=$((sum_c / RUNS)); avg_h=$((sum_h / RUNS))
echo "avg c=${avg_c}ms hak=${avg_h}ms" >&2
if command -v python3 >/dev/null 2>&1; then
python3 - <<PY
c=$avg_c; h=$avg_h
ratio = (h/max(c,1))*100.0
print(f"ratio={ratio:.2f}%")
PY
fi
rm -f "$C_EXE" "$HAKO_EXE" "$TMP_JSON" 2>/dev/null || true
else
for i in $(seq 1 "$RUNS"); do
t_c=$(bench_c "$C_FILE" "${C_FILE%.c}")
t_h=$(bench_hako "$HAKO_FILE" "$BACKEND")
sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h))
if command -v python3 >/dev/null 2>&1; then
ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA)
else
ratio=NA
fi
echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2
done
avg_c=$((sum_c / RUNS)); avg_h=$((sum_h / RUNS))
echo "avg c=${avg_c}ms hak=${avg_h}ms" >&2
if command -v python3 >/dev/null 2>&1; then
python3 - <<PY
c=$avg_c; h=$avg_h
ratio = (h/max(c,1))*100.0
print(f"ratio={ratio:.2f}%")
PY
fi
fi
rm -f "$HAKO_FILE" "$C_FILE" "${C_FILE%.c}" 2>/dev/null || true