#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" BIN="$ROOT/target/release/hakorune" usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|matmul_core|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; } CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0; BUDGET_MS=0 while [[ $# -gt 0 ]]; do case "$1" in --case) CASE="$2"; shift 2;; --n) N="$2"; shift 2;; --runs) RUNS="$2"; shift 2;; --backend) BACKEND="$2"; shift 2;; --exe) EXE_MODE=1; shift 1;; --budget-ms) BUDGET_MS="$2"; shift 2;; --help|-h) usage; exit 0;; *) echo "Unknown arg: $1"; usage; exit 2;; esac done if [[ ! -x "$BIN" ]]; then echo "[FAIL] hakorune not built: $BIN" >&2; exit 2; fi # Helpers: build once, then reuse ensure_llvmc() { if [[ ! -x "$ROOT/target/release/ny-llvmc" ]]; then (cargo build -q --release -p nyash-llvm-compiler >/dev/null 2>&1) || true fi } ensure_nyrt() { # Accept either .a or .rlib as presence of built runtime if [[ ! -f "$ROOT/target/release/libnyash_kernel.a" && ! -f "$ROOT/target/release/libnyash_kernel.rlib" ]]; then (cd "$ROOT/crates/nyash_kernel" && cargo build -q --release >/dev/null 2>&1) || true fi } bench_hako() { local file="$1"; local backend="$2"; shift 2 local start end start=$(date +%s%N) if [[ "$backend" = "llvm" ]]; then # Ensure ny-llvmc exists; build if missing if [[ ! -x "$ROOT/target/release/ny-llvmc" ]]; then (cargo build -q --release -p nyash-llvm-compiler >/dev/null 2>&1) || true fi PYTHONPATH="${PYTHONPATH:-$ROOT}" \ NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \ NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 HAKO_USING_RESOLVER_FIRST=1 \ NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \ NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \ NYASH_LLVM_USE_HARNESS=1 "$BIN" --backend llvm "$file" >/dev/null 2>&1 else "$BIN" --backend vm "$file" >/dev/null 2>&1 fi end=$(date +%s%N) echo $(( (end - start)/1000000 )) } bench_c() { local csrc="$1"; local exe="$2" cc -O3 -march=native -o "$exe" "$csrc" local start end start=$(date +%s%N) "$exe" >/dev/null 2>&1 end=$(date +%s%N) echo $(( (end - start)/1000000 )) } # Build once and time executable runs (ms) time_exe_run() { local exe="$1" local start end start=$(date +%s%N) "$exe" >/dev/null 2>&1 end=$(date +%s%N) echo $(( (end - start)/1000000 )) } mktemp_hako() { mktemp --suffix .hako; } mktemp_c() { mktemp --suffix .c; } # Fallback diagnostics for EXE flow: check MIR JSON for externcall/boxcall/jsonfrag diag_mir_json() { local json="$1" local rewrites; rewrites=$(rg -c '"op":"externcall"' "$json" 2>/dev/null || echo 0) local arrays; arrays=$(rg -c 'nyash\.array\.' "$json" 2>/dev/null || echo 0) local maps; maps=$(rg -c 'nyash\.map\.' "$json" 2>/dev/null || echo 0) local boxcalls; boxcalls=$(rg -c '"op":"boxcall"' "$json" 2>/dev/null || echo 0) local jsonfrag; jsonfrag=$(rg -c '\[emit/jsonfrag\]' "$json" 2>/dev/null || echo 0) echo "[diag] externcall=${rewrites} (array=${arrays}, map=${maps}), boxcall_left=${boxcalls}, jsonfrag=${jsonfrag}" >&2 } case "$CASE" in loop) HAKO_FILE=$(mktemp_hako) cat >"$HAKO_FILE" <"$C_FILE" <<'C' #include int main(){ volatile int64_t n = N_PLACEHOLDER; volatile int64_t s=0; for (int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include #include int main(){ volatile int64_t n = N_PLACEHOLDER; volatile int64_t s=0; const char* t = "abcdefghijklmnopqrstuvwxyz"; for (int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include #include #include typedef struct { char* p; } Str; static inline Str* new_str(){ Str* s=(Str*)malloc(sizeof(Str)); s->p=strdup("x"); free(s->p); free(s); return s; } int main(){ volatile int64_t n=N_PLACEHOLDER; for(int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include int main(){ volatile int64_t n = N_PLACEHOLDER; volatile int64_t acc = 0; for (int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include static inline int64_t mix(int64_t a, int64_t b, int64_t c){ return (a + b) - c; } static inline int64_t twist(int64_t v){ return (v % 2 == 0) ? v / 2 : v * 3 + 1; } int main(){ volatile int64_t n = N_PLACEHOLDER; volatile int64_t value = 1; for (int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include #include int main(){ volatile int64_t n = N_PLACEHOLDER; volatile int64_t acc = 0; const char* base = "abcdefghijklmnopqrstuvwxyz0123456789"; char tmp[128]; for (int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include int main(){ volatile int64_t n = N_PLACEHOLDER; volatile int64_t sum = 0; int64_t bucket = 32; int64_t arr[32]; int64_t mapv[32]; for (int i=0;i<32;i++){ arr[i]=i; mapv[i]=i; } for (int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include int main(){ volatile int64_t cycles = N_PLACEHOLDER; int pc = 0; int program_size = 10; int program[10] = {96,5,97,7,112,3,113,2,18,0}; int regs[16] = {0}; for (int64_t i=0;i> 12; if (nib == 1) { pc = opcode & 0x0FFF; pc %= program_size; } else if (nib == 6) { int reg = (opcode >> 8) & 0xF; regs[reg] = opcode & 0xFF; } else if (nib == 7) { int reg = (opcode >> 8) & 0xF; regs[reg] += opcode & 0xFF; } } int64_t sum = 0; for (int i=0;i<16;i++){ sum += regs[i]; } return (int)(sum & 0xFF); } C sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE" ;; sieve) # N: 上限値。EXE モードは計測安定性のため C 実行時間が十分大きくなる既定値に固定 # 既定 N=5,000,000 のまま維持(以前の 500,000 丸めはタイマ粒度ノイズを増やすため撤廃) if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then N=5000000 fi HAKO_FILE=$(mktemp_hako) cat >"$HAKO_FILE" <"$C_FILE" <<'C' #include #include int main(){ int64_t limit = N_PLACEHOLDER; unsigned char *flags = (unsigned char*)malloc((limit+1)); for (int64_t i=0;i<=limit;i++) flags[i]=1; flags[0]=flags[1]=0; for (int64_t p=2;p*p<=limit;p++) if (flags[p]) for (int64_t m=p*p;m<=limit;m+=p) flags[m]=0; int64_t count=0; for (int64_t i=0;i<=limit;i++) count+=flags[i]; free(flags); return (int)(count & 0xFF); } C sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE" ;; matmul) # N: 行列サイズ。EXE モード既定は N=512、REPS_M=16 に上げてタイマ粒度ノイズを低減 if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then N=512 fi REPS_M=${REPS_M:-8} if [[ "$EXE_MODE" = "1" && "${REPS_M}" = "8" ]]; then REPS_M=16 fi HAKO_FILE=$(mktemp_hako) cat >"$HAKO_FILE" <"$C_FILE" <<'C' #include #include int main(){ int n = N_PLACEHOLDER; int reps = REPS_PLACE; int *A = (int*)malloc(sizeof(int)*n*n); int *B = (int*)malloc(sizeof(int)*n*n); int *C = (int*)malloc(sizeof(int)*n*n); for (int i=0;i/dev/null 2>&1; then echo "[SKIP] matmul emit unstable (try PERF_USE_JSONFRAG=1 for diagnosis)" >&2 rm -f "$TMP_CHECK_JSON" "$HAKO_FILE" "$C_FILE" 2>/dev/null || true exit 0 fi rm -f "$TMP_CHECK_JSON" 2>/dev/null || true fi ;; matmul_core) # Core numeric matmul using MatI64 + IntArrayCore # Use smaller default N to keep runtime reasonable if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then N=256 fi HAKO_FILE=$(mktemp_hako) cat >"$HAKO_FILE" <"$C_FILE" <<'C' #include #include typedef struct { int64_t *ptr; int64_t rows; int64_t cols; int64_t stride; } MatI64Core; static inline int64_t mat_get(MatI64Core *m, int64_t r, int64_t c) { return m->ptr[r * m->stride + c]; } static inline void mat_set(MatI64Core *m, int64_t r, int64_t c, int64_t v) { m->ptr[r * m->stride + c] = v; } int main() { int64_t n = N_PLACEHOLDER; int64_t total = n * n; MatI64Core A, B, C; A.rows = B.rows = C.rows = n; A.cols = B.cols = C.cols = n; A.stride = B.stride = C.stride = n; A.ptr = (int64_t*)malloc(sizeof(int64_t)*total); B.ptr = (int64_t*)malloc(sizeof(int64_t)*total); C.ptr = (int64_t*)malloc(sizeof(int64_t)*total); for (int64_t idx = 0; idx < total; idx++) { A.ptr[idx] = idx % 97; B.ptr[idx] = (idx * 3) % 101; C.ptr[idx] = 0; } for (int64_t i = 0; i < n; i++) { for (int64_t k = 0; k < n; k++) { int64_t aik = mat_get(&A, i, k); for (int64_t j = 0; j < n; j++) { int64_t idx = i * C.stride + j; int64_t v = C.ptr[idx] + aik * mat_get(&B, k, j); C.ptr[idx] = v; } } } int64_t r = mat_get(&C, n-1, n-1); free(A.ptr); free(B.ptr); free(C.ptr); return (int)(r & 0xFF); } C sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE" ;; linidx) # Linear index pattern: idx = i*cols + j # Derive rows/cols from N to keep runtime stable ROWS=10000; COLS=32 if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then ROWS=20000; COLS=32; fi HAKO_FILE=$(mktemp_hako) cat >"$HAKO_FILE" <"$C_FILE" <<'C' #include #include int main(){ const int64_t rows = ROWS_P; const int64_t cols = COLS_P; const int64_t total = rows * cols; int64_t *A = (int64_t*)malloc(sizeof(int64_t)*total); for (int64_t i=0;i"$HAKO_FILE" <"$C_FILE" <<'C' #include #include int main(){ const int64_t rows = ROWS_P; const int64_t bucket = BUCKET_P; const int64_t reps = REPS_P; int64_t *arr = (int64_t*)malloc(sizeof(int64_t)*bucket); int64_t *mapv = (int64_t*)malloc(sizeof(int64_t)*rows); for (int64_t i=0;i&2 EXE_MODE=1 fi HAKO_FILE=$(mktemp_hako) cat >"$HAKO_FILE" <= 0) { me.lines.set(i, line + replacement) } i = i + 1 } } digest() { local total = 0 local count = me.lines.length().toString().toInteger() local i = 0 loop(i < count) { total = total + me.lines.get(i).length() i = i + 1 } return total + me.undo.length().toString().toInteger() } } static box Main { method main(args) { local ops = ${N} local bench = new KiloBench() bench.birth() local i = 0 loop(i < ops) { bench.insert_chunk(i % 64, "xx") if (i % 8 == 0) { bench.replace("line", "ln") } i = i + 1 } return bench.digest() } } HAKO C_FILE=$(mktemp_c) cat >"$C_FILE" <<'C' #include #include #include #include static void insert_chunk(char **lines, int row, const char *text){ char *line = lines[row]; size_t len = strlen(line); size_t split = len/2; char *out = malloc(len + strlen(text) + 1); memcpy(out, line, split); strcpy(out+split, text); strcpy(out+split+strlen(text), line+split); free(line); lines[row] = out; } static void replace_line(char **lines, const char *pattern, const char *repl){ for (int i=0;i<64;i++){ if (strstr(lines[i], pattern)){ size_t len = strlen(lines[i]) + strlen(repl) + 1; char *out = malloc(len); strcpy(out, lines[i]); strcat(out, repl); free(lines[i]); lines[i] = out; } } } int main(){ volatile int64_t ops = N_PLACEHOLDER; char *lines[64]; for (int i=0;i<64;i++){ char buf[32]; sprintf(buf, "line-%d", i); lines[i] = strdup(buf); } for (int64_t i=0;i&2 sum_c=0; sum_h=0 if [[ "$EXE_MODE" = "1" ]]; then # Build C exe once C_EXE=$(mktemp --suffix .out) cc -O3 -march=native -o "$C_EXE" "$C_FILE" # Build Nyash exe once (requires llvm harness) if [[ "$BACKEND" != "llvm" ]]; then echo "[FAIL] --exe requires --backend llvm" >&2; exit 2 fi ensure_llvmc ensure_nyrt HAKO_EXE=$(mktemp --suffix .out) TMP_JSON=$(mktemp --suffix .json) # Default: use provider-first with AotPrep for maximum optimization # DEBUG: Show file paths echo "[matmul/debug] HAKO_FILE=$HAKO_FILE TMP_JSON=$TMP_JSON" >&2 if ! \ HAKO_SELFHOST_TRACE=1 \ HAKO_SELFHOST_BUILDER_FIRST=0 HAKO_SELFHOST_NO_DELEGATE=0 \ HAKO_APPLY_AOT_PREP=1 \ NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \ HAKO_MIR_BUILDER_LOOP_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \ HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \ HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-1}" \ HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-1}" \ NYASH_AOT_NUMERIC_CORE="${NYASH_AOT_NUMERIC_CORE:-0}" \ NYASH_AOT_NUMERIC_CORE_TRACE="${NYASH_AOT_NUMERIC_CORE_TRACE:-0}" \ NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \ NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" 2>&1 | tee /tmp/matmul_emit_log.txt | grep -E "\[prep:|provider/emit\]" >&2; then echo "[FAIL] emit MIR JSON failed (hint: set PERF_USE_PROVIDER=1 or HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG=1)" >&2; exit 3 fi # Quick diagnostics: ensure AotPrep rewrites are present and jsonfrag fallback is not used # DEBUG: Copy TMP_JSON for inspection cp "$TMP_JSON" /tmp/matmul_from_perf.json 2>/dev/null || true echo "[matmul/debug] TMP_JSON copied to /tmp/matmul_from_perf.json" >&2 echo "[matmul/debug] Direct externcall count: $(grep -o '"op":"externcall"' "$TMP_JSON" 2>/dev/null | wc -l)" >&2 diag_mir_json "$TMP_JSON" # AotPrep is now applied in hakorune_emit_mir.sh via HAKO_APPLY_AOT_PREP=1 # Build EXE via helper (selects crate backend ny-llvmc under the hood) if ! NYASH_LLVM_BACKEND=crate NYASH_LLVM_SKIP_BUILD=1 \ NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \ NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \ NYASH_LLVM_VERIFY=1 NYASH_LLVM_VERIFY_IR=1 NYASH_LLVM_FAST=1 \ bash "$ROOT/tools/ny_mir_builder.sh" --in "$TMP_JSON" --emit exe -o "$HAKO_EXE" --quiet >/dev/null 2>&1; then echo "[FAIL] build Nyash EXE failed (crate backend). Ensure ny-llvmc exists or try NYASH_LLVM_BACKEND=crate." >&2; exit 3 fi # Execute runs. If BUDGET_MS>0, keep running until budget is exhausted. if [[ "$BUDGET_MS" != "0" ]]; then i=0; used=0 while true; do i=$((i+1)) t_c=$(time_exe_run "$C_EXE"); t_h=$(time_exe_run "$HAKO_EXE") sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h)); used=$((used + t_h)) if command -v python3 >/dev/null 2>&1; then ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA); else ratio=NA; fi echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}% (budget used=${used}/${BUDGET_MS}ms)" >&2 if [[ $used -ge $BUDGET_MS ]]; then RUNS=$i; break; fi # Safety valve to avoid infinite loop if t_h is 0ms if [[ $i -ge 999 ]]; then RUNS=$i; break; fi done else for i in $(seq 1 "$RUNS"); do t_c=$(time_exe_run "$C_EXE") t_h=$(time_exe_run "$HAKO_EXE") sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h)) if command -v python3 >/dev/null 2>&1; then ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA) else ratio=NA fi echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2 done fi avg_c=$((sum_c / RUNS)); avg_h=$((sum_h / RUNS)) echo "avg c=${avg_c}ms hak=${avg_h}ms" >&2 if [ "$avg_c" -lt 5 ]; then echo "[WARN] C runtime is very small (${avg_c}ms). Increase --n to reduce timer granularity noise." >&2 fi if command -v python3 >/dev/null 2>&1; then python3 - </dev/null || true else for i in $(seq 1 "$RUNS"); do t_c=$(bench_c "$C_FILE" "${C_FILE%.c}") t_h=$(bench_hako "$HAKO_FILE" "$BACKEND") sum_c=$((sum_c + t_c)); sum_h=$((sum_h + t_h)) if command -v python3 >/dev/null 2>&1; then ratio=$(python3 -c "print(round(${t_h}/max(${t_c},1)*100,2))" 2>/dev/null || echo NA) else ratio=NA fi echo "run#$i c=${t_c}ms hak=${t_h}ms ratio=${ratio}%" >&2 done avg_c=$((sum_c / RUNS)); avg_h=$((sum_h / RUNS)) echo "avg c=${avg_c}ms hak=${avg_h}ms" >&2 if [ "$avg_c" -lt 5 ]; then echo "[WARN] C runtime is very small (${avg_c}ms). Increase --n to reduce timer granularity noise." >&2 fi if command -v python3 >/dev/null 2>&1; then python3 - </dev/null || true