Phase 21.7 normalization: optimization pre-work + bench harness expansion

- Add opt-in optimizations (defaults OFF)
  - Ret purity verifier: NYASH_VERIFY_RET_PURITY=1
  - strlen FAST enhancement for const handles
  - FAST_INT gate for same-BB SSA optimization
  - length cache for string literals in llvmlite
- Expand bench harness (tools/perf/microbench.sh)
  - Add branch/call/stringchain/arraymap/chip8/kilo cases
  - Auto-calculate ratio vs C reference
  - Document in benchmarks/README.md
- Compiler health improvements
  - Unify PHI insertion to insert_phi_at_head()
  - Add NYASH_LLVM_SKIP_BUILD=1 for build reuse
- Runtime & safety enhancements
  - Clarify Rust/Hako ownership boundaries
  - Strengthen receiver localization (LocalSSA/pin/after-PHIs)
  - Stop excessive PluginInvoke→BoxCall rewrites
- Update CURRENT_TASK.md, docs, and canaries

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-11-13 16:40:58 +09:00
parent 9e2fa1e36e
commit dda65b94b7
160 changed files with 6773 additions and 1692 deletions

View File

@ -0,0 +1,53 @@
#!/usr/bin/env bash
# bench_hakorune_emit_mir.sh — StageB → MIR(JSON) bench via Hakorune path
#
# Usage:
# tools/perf/bench_hakorune_emit_mir.sh <input.hako> [rounds]
#
# Env toggles (forwarded as-is):
# HAKO_USING_RESOLVER_FIRST=1 # resolver-first
# HAKO_SELFHOST_BUILDER_FIRST=1 # try selfhost builder first
# HAKO_MIR_BUILDER_BOX=hako.mir.builder|min # builder box selector
# HAKO_SELFHOST_TRACE=1 # extra trace (stderr)
#
# Output: CSV (round,ms,size_bytes,sha1)
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <input.hako> [rounds]" >&2
exit 2
fi
IN="$1"; ROUNDS="${2:-5}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"; ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
EMIT="$ROOT/tools/hakorune_emit_mir.sh"
if [[ ! -x "$EMIT" ]]; then echo "error: $EMIT not found/executable" >&2; exit 2; fi
if [[ ! -f "$IN" ]]; then echo "error: input not found: $IN" >&2; exit 2; fi
sha1() {
if command -v sha1sum >/dev/null 2>&1; then sha1sum | awk '{print $1}';
elif command -v shasum >/dev/null 2>&1; then shasum -a 1 | awk '{print $1}';
else openssl sha1 | awk '{print $2}'; fi
}
echo "round,ms,size,sha1"
for ((i=1; i<=ROUNDS; i++)); do
OUT="/tmp/hako_mir_bench_$$.json"
rm -f "$OUT" || true
start=$(date +%s%3N)
# Forward env toggles implicitly
if ! "$EMIT" "$IN" "$OUT" >/dev/null 2>&1; then
echo "$i,ERROR,0,NA"; continue
fi
end=$(date +%s%3N)
ms=$((end - start))
size=$(stat -c '%s' "$OUT" 2>/dev/null || stat -f '%z' "$OUT")
norm=$(jq -cS . "$OUT" 2>/dev/null || cat "$OUT")
digest=$(printf '%s' "$norm" | sha1)
echo "$i,$ms,$size,$digest"
rm -f "$OUT" || true
done
exit 0

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# bench_ny_mir_builder.sh — Quick micro-bench for MIR(JSON) → {obj|exe}
# Usage: tools/perf/bench_ny_mir_builder.sh <mir.json> [rounds]
# Notes:
# - Uses crate backend (ny-llvmc). Keeps defaults conservative (O0).
# - Prints simple CSV: kind,round,ms
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <mir.json> [rounds]" >&2
exit 2
fi
IN="$1"; ROUNDS="${2:-3}"
BIN_BUILDER="tools/ny_mir_builder.sh"
if [[ ! -x "$BIN_BUILDER" ]]; then echo "error: $BIN_BUILDER not found/executable" >&2; exit 2; fi
measure() {
local kind="$1"; shift
local out_path="$PWD/target/aot_objects/__bench_${kind}_$$"
[[ "$kind" == "exe" ]] && out_path+=".out" || out_path+=".o"
local start end ms
start=$(date +%s%3N)
NYASH_LLVM_BACKEND=crate "$BIN_BUILDER" --in "$IN" --emit "$kind" -o "$out_path" --quiet || return 1
end=$(date +%s%3N)
ms=$((end - start))
rm -f "$out_path" 2>/dev/null || true
echo "$kind,$ms"
}
echo "kind,round,ms"
for k in obj exe; do
for ((i=1; i<=ROUNDS; i++)); do
line=$(measure "$k" || echo "$k,ERROR")
echo "$k,$i,${line#*,}"
done
done

View File

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# compare_mir_json.sh — Structural diff for two MIR(JSON) files
# Usage: tools/perf/compare_mir_json.sh <a.json> <b.json>
# Prints sizes, sha1 (normalized), then unified diff (jq -S pretty) if available.
set -euo pipefail
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <a.json> <b.json>" >&2
exit 2
fi
A="$1"; B="$2"
if [[ ! -f "$A" || ! -f "$B" ]]; then echo "error: file not found" >&2; exit 2; fi
sha1() {
if command -v sha1sum >/dev/null 2>&1; then sha1sum | awk '{print $1}';
elif command -v shasum >/dev/null 2>&1; then shasum -a 1 | awk '{print $1}';
else openssl sha1 | awk '{print $2}'; fi
}
size_a=$(stat -c '%s' "$A" 2>/dev/null || stat -f '%z' "$A")
size_b=$(stat -c '%s' "$B" 2>/dev/null || stat -f '%z' "$B")
norm_a=$(jq -cS . "$A" 2>/dev/null || cat "$A")
norm_b=$(jq -cS . "$B" 2>/dev/null || cat "$B")
sha_a=$(printf '%s' "$norm_a" | sha1)
sha_b=$(printf '%s' "$norm_b" | sha1)
echo "A: $A (size=$size_a, sha1=$sha_a)"
echo "B: $B (size=$size_b, sha1=$sha_b)"
if [[ "$sha_a" == "$sha_b" ]]; then
echo "= MIR JSON equal (normalized)"
exit 0
fi
echo "- Diff (normalized, jq -S)"
tmpa=$(mktemp); tmpb=$(mktemp)
trap 'rm -f "$tmpa" "$tmpb" || true' EXIT
printf '%s\n' "$norm_a" | jq -S . >/dev/null 2>&1 && printf '%s\n' "$norm_a" | jq -S . >"$tmpa" || printf '%s\n' "$norm_a" >"$tmpa"
printf '%s\n' "$norm_b" | jq -S . >/dev/null 2>&1 && printf '%s\n' "$norm_b" | jq -S . >"$tmpb" || printf '%s\n' "$norm_b" >"$tmpb"
diff -u "$tmpa" "$tmpb" || true
exit 1

View File

@ -0,0 +1,66 @@
#!/usr/bin/env bash
# dual_emit_compare.sh — Dualemit MIR(JSON) (provider vs selfhost) and compare + bench
# Usage: tools/perf/dual_emit_compare.sh <input.hako> [rounds]
# Output: human summary + CSV snippets (provider/selfhost benches)
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <input.hako> [rounds]" >&2
exit 2
fi
IN="$1"; ROUNDS="${2:-3}"
ROOT="$(cd "$(dirname "$0")"/../.. && pwd)"
EMIT="$ROOT/tools/hakorune_emit_mir.sh"
BENCH="$ROOT/tools/perf/bench_hakorune_emit_mir.sh"
CMP="$ROOT/tools/perf/compare_mir_json.sh"
for f in "$EMIT" "$BENCH" "$CMP"; do
[[ -x "$f" ]] || { echo "error: missing executable: $f" >&2; exit 2; }
done
[[ -f "$IN" ]] || { echo "error: input not found: $IN" >&2; exit 2; }
prov_csv=$(HAKO_SELFHOST_BUILDER_FIRST=0 "$BENCH" "$IN" "$ROUNDS" || true)
self_csv=$(HAKO_SELFHOST_BUILDER_FIRST=1 "$BENCH" "$IN" "$ROUNDS" || true)
calc_stats() {
# stdin: CSV header then rows: round,ms,size,sha1
awk -F, 'NR>1 && $2 ~ /^[0-9]+$/ { n++; s+=$2; arr[n]=$2 } END {
if (n==0) { print "count=0 avg=NA p50=NA"; exit }
asort(arr)
p50 = (n%2==1)? arr[(n+1)/2] : (arr[n/2]+arr[n/2+1])/2
printf("count=%d avg=%.0f p50=%.0f\n", n, (s/n), p50)
}'
}
prov_stats=$(printf '%s\n' "$prov_csv" | calc_stats)
self_stats=$(printf '%s\n' "$self_csv" | calc_stats)
OUT_PROV="/tmp/dual_mir_provider_$$.json"
OUT_SELF="/tmp/dual_mir_selfhost_$$.json"
trap 'rm -f "$OUT_PROV" "$OUT_SELF" || true' EXIT
# Produce concrete MIR JSONs
HAKO_SELFHOST_BUILDER_FIRST=0 "$EMIT" "$IN" "$OUT_PROV" >/dev/null 2>&1 || true
HAKO_SELFHOST_BUILDER_FIRST=1 "$EMIT" "$IN" "$OUT_SELF" >/dev/null 2>&1 || true
echo "== DualEmit Bench Summary =="
echo "input: $IN rounds: $ROUNDS"
echo "provider: $prov_stats"
echo "selfhost: $self_stats"
if [[ -s "$OUT_PROV" && -s "$OUT_SELF" ]]; then
echo "\n== Structural Compare (normalized) =="
"$CMP" "$OUT_PROV" "$OUT_SELF" || true
else
echo "\n[warn] one or both MIR outputs missing. Check bench CSV for ERROR rows." >&2
fi
echo "\n== Provider CSV =="
printf '%s\n' "$prov_csv" | sed -n '1,20p'
echo "\n== Selfhost CSV =="
printf '%s\n' "$self_csv" | sed -n '1,20p'
exit 0

81
tools/perf/dump_mir.sh Normal file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env bash
set -euo pipefail
# dump_mir.sh — Stable helper to emit MIR(JSON) and print a quick histogram
#
# Usage:
# tools/perf/dump_mir.sh <input.hako> [--out out.json] [--mode {provider|jsonfrag}]
#
# Notes:
# - provider: 普通の MirBuilder ルート(失敗する環境では自動で jsonfrag にフォールバック)
# - jsonfrag : ループを while-form に純化した最小 MIR構造検証用
INPUT="${1:-}"
OUT=""
MODE="provider"
shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
--out) OUT="$2"; shift 2;;
--mode) MODE="$2"; shift 2;;
-h|--help) echo "Usage: $0 <input.hako> [--out out.json] [--mode {provider|jsonfrag}]"; exit 0;;
*) echo "Unknown arg: $1"; exit 2;;
esac
done
if [[ -z "$INPUT" || ! -f "$INPUT" ]]; then
echo "[FAIL] input .hako not found: $INPUT" >&2; exit 2
fi
ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel 2>/dev/null || true)"
[[ -z "$ROOT" ]] && ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
TMP_OUT=$(mktemp --suffix .mir.json)
trap 'rm -f "$TMP_OUT" >/dev/null 2>&1 || true' EXIT
emit_provider() {
# Provider/selfhost-first with min fallback; keep plugins ON to satisfy core boxes
set +e
NYASH_SKIP_TOML_ENV=1 NYASH_DISABLE_PLUGINS=0 NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
HAKO_SELFHOST_BUILDER_FIRST=1 HAKO_SELFHOST_TRY_MIN=1 HAKO_MIR_NORMALIZE_PROVIDER=0 NYASH_JSON_ONLY=1 \
"$ROOT/tools/hakorune_emit_mir.sh" "$INPUT" "$TMP_OUT" >/dev/null 2>&1
local rc=$?
set -e
return $rc
}
emit_jsonfrag() {
NYASH_SKIP_TOML_ENV=1 NYASH_DISABLE_PLUGINS=1 \
HAKO_SELFHOST_BUILDER_FIRST=1 HAKO_MIR_BUILDER_LOOP_JSONFRAG=1 HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG=1 \
HAKO_MIR_BUILDER_JSONFRAG_PURIFY=1 NYASH_JSON_ONLY=1 \
"$ROOT/tools/hakorune_emit_mir.sh" "$INPUT" "$TMP_OUT" >/dev/null
}
if [[ "$MODE" = "provider" ]]; then
if ! emit_provider; then
echo "[WARN] provider emit failed; falling back to jsonfrag" >&2
emit_jsonfrag
fi
else
emit_jsonfrag
fi
if [[ -n "$OUT" ]]; then
cp -f "$TMP_OUT" "$OUT"
echo "[OK] MIR JSON -> $OUT"
fi
# Print a quick histogram
python3 - "$TMP_OUT" <<'PY'
import json,sys
p=sys.argv[1]
j=json.load(open(p))
for f in j.get('functions',[]):
print('Function:', f.get('name'))
for b in (f.get('blocks') or []):
ops=[(i or {}).get('op') for i in (b.get('instructions') or [])]
if not ops: continue
from collections import Counter
c=Counter(ops)
print(' bb', b.get('id'), dict(c))
PY

View File

@ -0,0 +1,64 @@
#!/usr/bin/env bash
set -euo pipefail
# dump_mir_provider.sh — Force provider/selfhost builder to emit MIR(JSON) with verbose diagnostics
# Usage: tools/perf/dump_mir_provider.sh <input.hako> [--out out.json]
INPUT="${1:-}"
OUT=""
shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
--out) OUT="$2"; shift 2;;
-h|--help) echo "Usage: $0 <input.hako> [--out out.json]"; exit 0;;
*) echo "Unknown arg: $1"; exit 2;;
esac
done
if [[ -z "$INPUT" || ! -f "$INPUT" ]]; then
echo "[FAIL] input .hako not found: $INPUT" >&2; exit 2
fi
ROOT="$(git rev-parse --show-toplevel 2>/dev/null || true)"
[[ -z "$ROOT" ]] && ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
TMP_JSON=$(mktemp --suffix .mir.json)
trap 'rm -f "$TMP_JSON" >/dev/null 2>&1 || true' EXIT
# Try selfhost-first with plugins enabled; print tail on failure
set +e
HAKO_SELFHOST_BUILDER_FIRST=1 HAKO_SELFHOST_NO_DELEGATE=1 HAKO_SELFHOST_TRACE=1 \
NYASH_DISABLE_PLUGINS=0 NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
"$ROOT/tools/hakorune_emit_mir.sh" "$INPUT" "$TMP_JSON" 2>"$TMP_JSON.err"
rc=$?
set -e
if [[ $rc -ne 0 ]]; then
echo "[WARN] selfhost-first failed; last 80 lines:" >&2
tail -n 80 "$TMP_JSON.err" >&2 || true
echo "[INFO] falling back to provider-first" >&2
if ! NYASH_DISABLE_PLUGINS=0 NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
"$ROOT/tools/hakorune_emit_mir.sh" "$INPUT" "$TMP_JSON" >/dev/null 2>&1; then
echo "[FAIL] provider-first emit failed too" >&2
exit 3
fi
fi
if [[ -n "$OUT" ]]; then
cp -f "$TMP_JSON" "$OUT"
echo "[OK] MIR JSON -> $OUT"
else
python3 - "$TMP_JSON" <<'PY'
import json,sys
p=sys.argv[1]
j=json.load(open(p))
for f in j.get('functions',[]):
print('Function:', f.get('name'))
for b in (f.get('blocks') or []):
ops=[(i or {}).get('op') for i in (b.get('instructions') or [])]
if not ops: continue
from collections import Counter
c=Counter(ops)
print(' bb', b.get('id'), dict(c))
PY
fi

View File

@ -5,7 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
BIN="$ROOT/target/release/hakorune"
usage() { echo "Usage: $0 --case {loop|strlen|box} [--n N] [--runs R] [--backend {llvm|vm}] [--exe]"; }
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe]"; }
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0
while [[ $# -gt 0 ]]; do
@ -22,6 +22,19 @@ done
if [[ ! -x "$BIN" ]]; then echo "[FAIL] hakorune not built: $BIN" >&2; exit 2; fi
# Helpers: build once, then reuse
ensure_llvmc() {
if [[ ! -x "$ROOT/target/release/ny-llvmc" ]]; then
(cargo build -q --release -p nyash-llvm-compiler >/dev/null 2>&1) || true
fi
}
ensure_nyrt() {
# Accept either .a or .rlib as presence of built runtime
if [[ ! -f "$ROOT/target/release/libnyash_kernel.a" && ! -f "$ROOT/target/release/libnyash_kernel.rlib" ]]; then
(cd "$ROOT/crates/nyash_kernel" && cargo build -q --release >/dev/null 2>&1) || true
fi
}
bench_hako() {
local file="$1"; local backend="$2"; shift 2
local start end
@ -131,6 +144,608 @@ HAKO
typedef struct { char* p; } Str;
static inline Str* new_str(){ Str* s=(Str*)malloc(sizeof(Str)); s->p=strdup("x"); free(s->p); free(s); return s; }
int main(){ volatile int64_t n=N_PLACEHOLDER; for(int64_t i=0;i<n;i++){ new_str(); } return 0; }
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
branch)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local n = ${N}
local i = 0
local acc = 0
loop(i < n) {
local mod = i % 30
if (mod == 0) {
acc = acc + 3
} else if (mod < 10) {
acc = acc + (i % 7)
} else if (mod < 20) {
acc = acc - (i % 11)
} else {
acc = acc + 1
}
i = i + 1
}
return acc
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
int main(){
volatile int64_t n = N_PLACEHOLDER;
volatile int64_t acc = 0;
for (int64_t i=0;i<n;i++){
int64_t mod = i % 30;
if (mod == 0) {
acc += 3;
} else if (mod < 10) {
acc += (i % 7);
} else if (mod < 20) {
acc -= (i % 11);
} else {
acc += 1;
}
}
return (int)(acc & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
call)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
function mix(a, b, c) {
return (a + b) - c
}
function twist(v) {
if (v % 2 == 0) { return v / 2 }
return v * 3 + 1
}
static box Main { method main(args) {
local n = ${N}
local i = 0
local value = 1
loop(i < n) {
value = mix(value, i, value % 7)
value = mix(value, twist(i), twist(value))
i = i + 1
}
return value
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
static inline int64_t mix(int64_t a, int64_t b, int64_t c){ return (a + b) - c; }
static inline int64_t twist(int64_t v){ return (v % 2 == 0) ? v / 2 : v * 3 + 1; }
int main(){
volatile int64_t n = N_PLACEHOLDER; volatile int64_t value = 1;
for (int64_t i=0;i<n;i++){
value = mix(value, i, value % 7);
value = mix(value, twist(i), twist(value));
}
return (int)(value & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
stringchain)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local n = ${N}
local base = "abcdefghijklmnopqrstuvwxyz0123456789"
local acc = 0
local i = 0
loop(i < n) {
local part1 = base.substring(0, 12)
local part2 = base.substring(5, 20)
local s = part1 + part2 + base.substring(2, 18)
acc = acc + s.length()
i = i + 1
}
return acc
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <string.h>
int main(){
volatile int64_t n = N_PLACEHOLDER; volatile int64_t acc = 0;
const char* base = "abcdefghijklmnopqrstuvwxyz0123456789";
char tmp[128];
for (int64_t i=0;i<n;i++){
memcpy(tmp, base, 12); tmp[12] = '\0';
char buf[192];
strcpy(buf, tmp);
strncat(buf, base+5, 15);
strncat(buf, base+2, 16);
acc += (int64_t)strlen(buf);
}
return (int)(acc & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
arraymap)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local n = ${N}
local arr = new ArrayBox()
local map = new MapBox()
local bucket = 32
local i = 0
loop(i < bucket) {
arr.push(i)
map.set("k" + i.toString(), i)
i = i + 1
}
local sum = 0
i = 0
loop(i < n) {
local idx = i % bucket
local val = arr.get(idx)
arr.set(idx, val + 1)
local key = "k" + idx.toString()
map.set(key, val)
sum = sum + map.get(key)
i = i + 1
}
return sum
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
int main(){
volatile int64_t n = N_PLACEHOLDER; volatile int64_t sum = 0;
int64_t bucket = 32;
int64_t arr[32];
int64_t mapv[32];
for (int i=0;i<32;i++){ arr[i]=i; mapv[i]=i; }
for (int64_t i=0;i<n;i++){
int64_t idx = i % bucket;
int64_t val = arr[idx];
arr[idx] = val + 1;
mapv[idx] = val;
sum += mapv[idx];
}
return (int)(sum & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
chip8)
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
box Chip8Bench {
init { program, registers, pc, program_size }
birth() {
me.program = new ArrayBox()
me.registers = new ArrayBox()
me.pc = 0
local i = 0
loop(i < 16) { me.registers.push(0); i = i + 1 }
local opcodes = new ArrayBox()
// 6005, 6107, 7003, 7102, 1200 pattern
opcodes.push(96); opcodes.push(5)
opcodes.push(97); opcodes.push(7)
opcodes.push(112); opcodes.push(3)
opcodes.push(113); opcodes.push(2)
opcodes.push(18); opcodes.push(0)
local count_box = opcodes.length()
local count = 0
if count_box != null { count = count_box.toString().toInteger() }
i = 0
loop(i < count) {
me.program.push(opcodes.get(i))
i = i + 1
}
me.program_size = count
}
execute_cycle() {
local hi = me.program.get(me.pc)
local lo = me.program.get((me.pc + 1) % me.program_size)
local opcode = (hi * 256) + lo
me.pc = (me.pc + 2) % me.program_size
local nib = opcode / 4096
if (nib == 1) {
me.pc = opcode % me.program_size
} else if (nib == 6) {
local reg = (opcode / 256) % 16
local value = opcode % 256
me.registers.set(reg, value)
} else if (nib == 7) {
local reg = (opcode / 256) % 16
local value = opcode % 256
local cur = me.registers.get(reg)
me.registers.set(reg, cur + value)
}
}
run(cycles) {
local i = 0
loop(i < cycles) { me.execute_cycle(); i = i + 1 }
}
checksum() {
local total = 0
local len = me.registers.length().toString().toInteger()
local i = 0
loop(i < len) { total = total + me.registers.get(i); i = i + 1 }
return total
}
}
static box Main { method main(args) {
local cycles = ${N}
local bench = new Chip8Bench()
bench.birth()
bench.run(cycles)
return bench.checksum()
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
int main(){
volatile int64_t cycles = N_PLACEHOLDER;
int pc = 0;
int program_size = 10;
int program[10] = {96,5,97,7,112,3,113,2,18,0};
int regs[16] = {0};
for (int64_t i=0;i<cycles;i++){
int hi = program[pc];
int lo = program[(pc+1)%program_size];
int opcode = (hi<<8) | lo;
pc = (pc + 2) % program_size;
int nib = opcode >> 12;
if (nib == 1) {
pc = opcode & 0x0FFF;
pc %= program_size;
} else if (nib == 6) {
int reg = (opcode >> 8) & 0xF;
regs[reg] = opcode & 0xFF;
} else if (nib == 7) {
int reg = (opcode >> 8) & 0xF;
regs[reg] += opcode & 0xFF;
}
}
int64_t sum = 0; for (int i=0;i<16;i++){ sum += regs[i]; }
return (int)(sum & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
sieve)
# N: 上限値。EXEモードでデフォルトなら安全側に丸める
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
N=500000
fi
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local limit = ${N}
// true=prime候補
local flags = new ArrayBox()
local i = 0
loop(i <= limit) { flags.push(1) i = i + 1 }
flags.set(0, 0) flags.set(1, 0)
local p = 2
loop(p * p <= limit) {
if (flags.get(p) == 1) {
local m = p * p
loop(m <= limit) { flags.set(m, 0) m = m + p }
}
p = p + 1
}
local count = 0
i = 0
loop(i <= limit) { count = count + flags.get(i) i = i + 1 }
return count
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <stdlib.h>
int main(){
int64_t limit = N_PLACEHOLDER;
unsigned char *flags = (unsigned char*)malloc((limit+1));
for (int64_t i=0;i<=limit;i++) flags[i]=1;
flags[0]=flags[1]=0;
for (int64_t p=2;p*p<=limit;p++) if (flags[p]) for (int64_t m=p*p;m<=limit;m+=p) flags[m]=0;
int64_t count=0; for (int64_t i=0;i<=limit;i++) count+=flags[i];
free(flags);
return (int)(count & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
matmul)
# N: 行列サイズ。EXEモードでデフォルトなら 128
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
N=128
fi
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local n = ${N}
// A,B,C を一次元ArrayBoxに格納row-major
local A = new ArrayBox(); local B = new ArrayBox(); local C = new ArrayBox()
local i = 0
loop(i < n*n) { A.push(i % 97) B.push((i*3) % 101) C.push(0) i = i + 1 }
i = 0
loop(i < n) {
local j = 0
loop(j < n) {
local sum = 0
local k = 0
loop(k < n) {
local a = A.get(i*n + k)
local b = B.get(k*n + j)
sum = sum + a * b
k = k + 1
}
C.set(i*n + j, sum)
j = j + 1
}
i = i + 1
}
// 端を返して最適化抑止
return C.get((n-1)*n + (n-1))
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <stdlib.h>
int main(){
int n = N_PLACEHOLDER;
int *A = (int*)malloc(sizeof(int)*n*n);
int *B = (int*)malloc(sizeof(int)*n*n);
int *C = (int*)malloc(sizeof(int)*n*n);
for (int i=0;i<n*n;i++){ A[i]=i%97; B[i]=(i*3)%101; C[i]=0; }
for (int i=0;i<n;i++){
for (int j=0;j<n;j++){
long long sum=0;
for (int k=0;k<n;k++) sum += (long long)A[i*n+k]*B[k*n+j];
C[i*n+j]=(int)sum;
}
}
int r = C[(n-1)*n + (n-1)];
free(A); free(B); free(C);
return r & 0xFF;
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
linidx)
# Linear index pattern: idx = i*cols + j
# Derive rows/cols from N to keep runtime stable
ROWS=10000; COLS=32
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then ROWS=20000; COLS=32; fi
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local rows = ${ROWS}
local cols = ${COLS}
local total = rows * cols
local A = new ArrayBox()
local i = 0
loop(i < total) { A.push(i % 97) i = i + 1 }
local acc = 0
i = 0
loop(i < rows) {
local j = 0
loop(j < cols) {
local idx = i * cols + j
local v = A.get(idx)
acc = acc + v
A.set(idx, (v + acc) % 17)
j = j + 1
}
i = i + 1
}
return acc & 255
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <stdlib.h>
int main(){
const int64_t rows = ROWS_P; const int64_t cols = COLS_P;
const int64_t total = rows * cols;
int64_t *A = (int64_t*)malloc(sizeof(int64_t)*total);
for (int64_t i=0;i<total;i++) A[i]=i%97;
int64_t acc=0;
for (int64_t i=0;i<rows;i++){
for (int64_t j=0;j<cols;j++){
int64_t idx = i*cols + j;
int64_t v = A[idx];
acc += v;
A[idx] = (v + acc) % 17;
}
}
free(A);
return (int)(acc & 255);
}
C
sed -i "s/ROWS_P/${ROWS}/; s/COLS_P/${COLS}/" "$C_FILE"
;;
maplin)
# Map with integer linear key: key = i*bucket + j
# Keep bucket small to stress get/set hot path
# Interpret N as rows when provided (except when default 5_000_000)
ROWS=10000; BUCKET=32
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
ROWS=40000
elif [[ "$N" != "5000000" ]]; then
ROWS="$N"
fi
BUCKET=32
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
static box Main { method main(args) {
local rows = ${ROWS}
local bucket = ${BUCKET}
local arr = new ArrayBox()
local map = new MapBox()
// Prefill
local i = 0
loop(i < bucket) { arr.push(i) i = i + 1 }
// Run
i = 0
local acc = 0
loop(i < rows) {
local j = i % bucket
local key = (i / bucket) * bucket + j
local v = arr.get(j)
arr.set(j, v + 1)
map.set(key, v)
acc = acc + map.get(key)
i = i + 1
}
return acc & 255
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <stdlib.h>
int main(){
const int64_t rows = ROWS_P; const int64_t bucket = BUCKET_P;
int64_t *arr = (int64_t*)malloc(sizeof(int64_t)*bucket);
int64_t *mapv = (int64_t*)malloc(sizeof(int64_t)*rows);
for (int64_t i=0;i<bucket;i++) arr[i]=i;
int64_t acc=0;
for (int64_t i=0;i<rows;i++){
int64_t j = i % bucket;
int64_t key = (i / bucket) * bucket + j;
int64_t v = arr[j];
arr[j] = v + 1;
mapv[key] = v;
acc += mapv[key];
}
free(arr); free(mapv);
return (int)(acc & 255);
}
C
sed -i "s/ROWS_P/${ROWS}/; s/BUCKET_P/${BUCKET}/" "$C_FILE"
;;
kilo)
# kilo は C 参照側が重く、デフォルト N=5_000_000 だと実行が非常に長くなる。
# EXE モードでかつ N が未指定(既定値)の場合は、計測が現実的になるよう N を下げる。
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
N=200000
fi
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
box KiloBench {
init { lines, undo }
birth() {
me.lines = new ArrayBox()
me.undo = new ArrayBox()
local i = 0
loop(i < 64) {
me.lines.push("line-" + i.toString())
i = i + 1
}
}
insert_chunk(row, text) {
local line = me.lines.get(row)
local len_box = line.length()
local len = 0
if len_box != null { len = len_box.toString().toInteger() }
local split = len / 2
local new_line = line.substring(0, split) + text + line.substring(split, len)
me.lines.set(row, new_line)
me.undo.push(text)
}
replace(pattern, replacement) {
local count = me.lines.length().toString().toInteger()
local i = 0
loop(i < count) {
local line = me.lines.get(i)
if (line.indexOf(pattern) >= 0) {
me.lines.set(i, line + replacement)
}
i = i + 1
}
}
digest() {
local total = 0
local count = me.lines.length().toString().toInteger()
local i = 0
loop(i < count) {
total = total + me.lines.get(i).length()
i = i + 1
}
return total + me.undo.length().toString().toInteger()
}
}
static box Main { method main(args) {
local ops = ${N}
local bench = new KiloBench()
bench.birth()
local i = 0
loop(i < ops) {
bench.insert_chunk(i % 64, "xx")
if (i % 8 == 0) {
bench.replace("line", "ln")
}
i = i + 1
}
return bench.digest()
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
static void insert_chunk(char **lines, int row, const char *text){
char *line = lines[row];
size_t len = strlen(line);
size_t split = len/2;
char *out = malloc(len + strlen(text) + 1);
memcpy(out, line, split);
strcpy(out+split, text);
strcpy(out+split+strlen(text), line+split);
free(line);
lines[row] = out;
}
static void replace_line(char **lines, const char *pattern, const char *repl){
for (int i=0;i<64;i++){
if (strstr(lines[i], pattern)){
size_t len = strlen(lines[i]) + strlen(repl) + 1;
char *out = malloc(len);
strcpy(out, lines[i]);
strcat(out, repl);
free(lines[i]);
lines[i] = out;
}
}
}
int main(){
volatile int64_t ops = N_PLACEHOLDER;
char *lines[64];
for (int i=0;i<64;i++){
char buf[32]; sprintf(buf, "line-%d", i);
lines[i] = strdup(buf);
}
for (int64_t i=0;i<ops;i++){
insert_chunk(lines, i % 64, "xx");
if (i % 8 == 0) replace_line(lines, "line", "ln");
}
int64_t total = 0;
for (int i=0;i<64;i++){ total += strlen(lines[i]); }
for (int i=0;i<64;i++){ free(lines[i]); }
return (int)(total & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
@ -148,21 +763,22 @@ if [[ "$EXE_MODE" = "1" ]]; then
if [[ "$BACKEND" != "llvm" ]]; then
echo "[FAIL] --exe requires --backend llvm" >&2; exit 2
fi
if [[ ! -x "$ROOT/target/release/ny-llvmc" ]]; then
(cargo build -q --release -p nyash-llvm-compiler >/dev/null 2>&1) || true
fi
ensure_llvmc
ensure_nyrt
HAKO_EXE=$(mktemp --suffix .out)
TMP_JSON=$(mktemp --suffix .json)
if ! HAKO_SELFHOST_BUILDER_FIRST=1 HAKO_MIR_BUILDER_LOOP_JSONFRAG=1 HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG=1 \
HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE=1 HAKO_MIR_BUILDER_JSONFRAG_PURIFY=1 \
# Default: use jsonfrag (stable/fast). Set PERF_USE_PROVIDER=1 to prefer provider/selfhost MIR.
if ! HAKO_SELFHOST_BUILDER_FIRST=1 \
HAKO_MIR_BUILDER_LOOP_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_JSONFRAG:-$([[ "${PERF_USE_PROVIDER:-0}" = 1 ]] && echo 0 || echo 1)}" \
HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-$([[ "${PERF_USE_PROVIDER:-0}" = 1 ]] && echo 0 || echo 1)}" \
HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-1}" \
HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-1}" \
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" >/dev/null 2>&1; then
echo "[FAIL] failed to emit MIR JSON" >&2; exit 3
fi
# Ensure runtime lib exists (nyash_kernel)
(cd "$ROOT/crates/nyash_kernel" && cargo build -q --release >/dev/null) || true
# Build EXE via helper (selects crate backend ny-llvmc under the hood)
if ! NYASH_LLVM_BACKEND=crate \
if ! NYASH_LLVM_BACKEND=crate NYASH_LLVM_SKIP_BUILD=1 \
NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \
NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \
NYASH_LLVM_VERIFY=1 NYASH_LLVM_VERIFY_IR=1 NYASH_LLVM_FAST=1 \