Files
hakorune/tools/hakorune_emit_mir.sh

496 lines
17 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# hakorune_emit_mir.sh — Emit MIR(JSON) using Hakorune StageB + MirBuilder (Hakofirst)
#
# Usage: tools/hakorune_emit_mir.sh <input.hako> <out.json>
# Notes:
# - Runs the StageB compiler (Hako) to emit Program(JSON v0), then feeds it to MirBuilderBox.emit_from_program_json_v0.
# - Defaults to the Hakorune VM path; no inline Ny compiler; Stage3 enabled.
# - Keeps defaults conservative: no global flips; this is a helper for dev/CI scripts.
set -euo pipefail
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input.hako> <out.json>" >&2
exit 2
fi
IN="$1"
OUT="$2"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
if ROOT_GIT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null); then
ROOT="$ROOT_GIT"
else
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
fi
# Resolve nyash/hakorune binary via test_runner helper (ensures consistent env)
if [ ! -f "$IN" ]; then
echo "[FAIL] input not found: $IN" >&2
exit 1
fi
# Resolve nyash/hakorune binary (simple detection; test_runner will override later if sourced)
if [ -z "${NYASH_BIN:-}" ]; then
if [ -x "$ROOT/target/release/hakorune" ]; then
export NYASH_BIN="$ROOT/target/release/hakorune"
else
export NYASH_BIN="$ROOT/target/release/nyash"
fi
fi
CODE="$(cat "$IN")"
# 1) StageB: Hako parser emits Program(JSON v0) to stdout
# Extract Program JSON robustly using Python3 bracket balancing
extract_program_json() {
python3 - <<'PYEOF'
import sys
import json
stdin = sys.stdin.read()
# Find the start of Program JSON (look for "kind":"Program")
start = stdin.find('"kind":"Program"')
if start < 0:
sys.exit(1)
# Walk back to find the opening brace of the object containing "kind":"Program"
pos = start
depth = 0
while pos >= 0:
if stdin[pos] == '{':
depth += 1
if depth == 1:
# Found the start brace
break
elif stdin[pos] == '}':
depth -= 1
pos -= 1
if pos < 0:
sys.exit(1)
# Now walk forward from pos, tracking braces to find the matching closing brace
obj_start = pos
depth = 0
in_string = False
escape = False
i = obj_start
while i < len(stdin):
ch = stdin[i]
if escape:
escape = False
elif in_string:
if ch == '\\':
escape = True
elif ch == '"':
in_string = False
else:
if ch == '"':
in_string = True
elif ch == '{':
depth += 1
elif ch == '}':
depth -= 1
if depth == 0:
# Found the matching closing brace
print(stdin[obj_start:i+1])
sys.exit(0)
i += 1
# If we get here, no matching brace found
sys.exit(1)
PYEOF
}
set +e
PROG_JSON_OUT=$((cd "$ROOT" && \
NYASH_JSON_ONLY=1 NYASH_DISABLE_NY_COMPILER=1 HAKO_DISABLE_NY_COMPILER=1 \
HAKO_STAGEB_FUNC_SCAN="${HAKO_STAGEB_FUNC_SCAN:-}" \
NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \
NYASH_ENABLE_USING=${NYASH_ENABLE_USING:-1} HAKO_ENABLE_USING=${HAKO_ENABLE_USING:-1} \
"$NYASH_BIN" --backend vm "$ROOT/lang/src/compiler/entry/compiler_stageb.hako" -- --source "$CODE") 2>/dev/null | extract_program_json)
rc=$?
set -e
# If Stage-B fails, skip to direct MIR emit paths (provider/legacy)
if [ $rc -ne 0 ] || [ -z "$PROG_JSON_OUT" ]; then
# Stage-B not available - fall back to legacy CLI path directly
# Skip the intermediate Program(JSON) step and emit MIR directly
if HAKO_STAGEB_FUNC_SCAN="${HAKO_STAGEB_FUNC_SCAN:-}" \
HAKO_MIR_BUILDER_FUNCS="${HAKO_MIR_BUILDER_FUNCS:-}" \
HAKO_MIR_BUILDER_CALL_RESOLVE="${HAKO_MIR_BUILDER_CALL_RESOLVE:-}" \
NYASH_JSON_SCHEMA_V1=${NYASH_JSON_SCHEMA_V1:-1} \
NYASH_MIR_UNIFIED_CALL=${NYASH_MIR_UNIFIED_CALL:-1} \
"$NYASH_BIN" --emit-mir-json "$OUT" "$IN" >/dev/null 2>&1; then
echo "[OK] MIR JSON written (direct-emit): $OUT"
exit 0
fi
echo "[FAIL] Stage-B and direct MIR emit both failed" >&2
exit 1
fi
# Quick validation for Program(JSON v0)
if ! printf '%s' "$PROG_JSON_OUT" | grep -q '"kind"\s*:\s*"Program"'; then
# Invalid Program JSON - fall back to direct emit
if HAKO_STAGEB_FUNC_SCAN="${HAKO_STAGEB_FUNC_SCAN:-}" \
HAKO_MIR_BUILDER_FUNCS="${HAKO_MIR_BUILDER_FUNCS:-}" \
HAKO_MIR_BUILDER_CALL_RESOLVE="${HAKO_MIR_BUILDER_CALL_RESOLVE:-}" \
NYASH_JSON_SCHEMA_V1=${NYASH_JSON_SCHEMA_V1:-1} \
NYASH_MIR_UNIFIED_CALL=${NYASH_MIR_UNIFIED_CALL:-1} \
"$NYASH_BIN" --emit-mir-json "$OUT" "$IN" >/dev/null 2>&1; then
echo "[OK] MIR JSON written (direct-emit-fallback): $OUT"
exit 0
fi
echo "[FAIL] StageB output invalid and direct emit failed" >&2
exit 1
fi
# 2) Convert Program(JSON v0) → MIR(JSON)
# Prefer selfhost builder first when explicitly requested; otherwise use delegate (GateC) for stability.
try_selfhost_builder() {
local prog_json="$1" out_path="$2"
# FORCE=1 direct assembly shortcut (dev toggle, bypasses using resolution)
if [ "${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-0}" = "1" ]; then
# Extract limit from Program(JSON) using grep/awk
local limit=$(printf '%s' "$prog_json" | grep -o '"type":"Int","value":[0-9]*' | head -1 | grep -o '[0-9]*$' || echo "10")
# Generate minimal while-form MIR(JSON) directly (executable semantics)
# PHI incoming format: [[value_register, predecessor_block_id], ...]
cat > "$out_path" <<'MIRJSON'
{
"functions": [{
"name": "main",
"params": [],
"locals": [],
"blocks": [
{
"id": 0,
"instructions": [
{"op": "const", "dst": 1, "value": {"type": "i64", "value": 0}},
{"op": "const", "dst": 2, "value": {"type": "i64", "value": LIMIT_PLACEHOLDER}},
{"op": "jump", "target": 1}
]
},
{
"id": 1,
"instructions": [
{"op": "phi", "dst": 6, "incoming": [[2, 0], [6, 2]]},
{"op": "phi", "dst": 3, "incoming": [[1, 0], [5, 2]]},
{"op": "compare", "operation": "<", "lhs": 3, "rhs": 6, "dst": 4},
{"op": "branch", "cond": 4, "then": 2, "else": 3}
]
},
{
"id": 2,
"instructions": [
{"op": "const", "dst": 10, "value": {"type": "i64", "value": 1}},
{"op": "binop", "operation": "+", "lhs": 3, "rhs": 10, "dst": 5},
{"op": "jump", "target": 1}
]
},
{
"id": 3,
"instructions": [
{"op": "ret", "value": 3}
]
}
]
}]
}
MIRJSON
# Provider-first delegate: call env.mirbuilder.emit(prog_json) and capture v1 JSON
try_provider_emit() {
local prog_json="$1" out_path="$2"
local tmp_hako; tmp_hako=$(mktemp --suffix .hako)
cat >"$tmp_hako" <<'HCODE'
using "hako.mir.builder.internal.jsonfrag_normalizer" as NormBox
static box Main { method main(args) {
local p = env.get("HAKO_BUILDER_PROGRAM_JSON")
if p == null { print("[provider/emit:nojson]"); return 1 }
local a = new ArrayBox(); a.push(p)
local out = hostbridge.extern_invoke("env.mirbuilder", "emit", a)
// Optional normalization (dev): apply JsonFrag normalizer/purifier to provider output
{
local nv = env.get("HAKO_MIR_NORMALIZE_PROVIDER")
if nv != null && ("" + nv) == "1" {
local out_s = "" + out
out = NormBox.normalize_all(out_s)
}
}
print("[provider/emit:ok]")
print("[MIR_OUT_BEGIN]")
print("" + out)
print("[MIR_OUT_END]")
return 0
} }
HCODE
local tmp_stdout; tmp_stdout=$(mktemp)
trap 'rm -f "$tmp_hako" "$tmp_stdout" || true' RETURN
set +e
(cd "$ROOT" && \
NYASH_DISABLE_PLUGINS=1 NYASH_FILEBOX_MODE="core-ro" \
NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \
HAKO_BUILDER_PROGRAM_JSON="$prog_json" \
"$NYASH_BIN" --backend vm "$tmp_hako" 2>&1 | tee "$tmp_stdout" >/dev/null)
local rc=$?
set -e
if [ $rc -ne 0 ] || ! grep -q "\[provider/emit:ok\]" "$tmp_stdout"; then
return 1
fi
local mir
mir=$(awk '/\[MIR_OUT_BEGIN\]/{flag=1;next}/\[MIR_OUT_END\]/{flag=0}flag' "$tmp_stdout")
if [ -z "$mir" ]; then return 1; fi
printf '%s' "$mir" > "$out_path"
echo "[OK] MIR JSON written (delegate:provider): $out_path"
return 0
}
# Replace LIMIT_PLACEHOLDER with actual limit
sed -i "s/LIMIT_PLACEHOLDER/$limit/g" "$out_path"
if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then
echo "[selfhost-direct:ok] Direct MIR assembly (FORCE=1), limit=$limit" >&2
fi
return 0
fi
# Builder box selection (default: hako.mir.builder)
local builder_box="${HAKO_MIR_BUILDER_BOX:-hako.mir.builder}"
local tmp_hako; tmp_hako=$(mktemp --suffix .hako)
if [ "$builder_box" = "hako.mir.builder.min" ]; then
cat >"$tmp_hako" <<'HCODE'
using "hako.mir.builder.internal.runner_min" as BuilderRunnerMinBox
static box Main { method main(args) {
local prog_json = env.get("HAKO_BUILDER_PROGRAM_JSON")
if prog_json == null { print("[builder/selfhost-first:fail:nojson]"); return 1 }
local mir_out = BuilderRunnerMinBox.run(prog_json)
if mir_out == null { print("[builder/selfhost-first:fail:emit]"); return 1 }
print("[builder/selfhost-first:ok]")
print("[MIR_OUT_BEGIN]")
print("" + mir_out)
print("[MIR_OUT_END]")
return 0
} }
HCODE
else
cat >"$tmp_hako" <<'HCODE'
using "__BUILDER_BOX__" as MirBuilderBox
static box Main { method main(args) {
local prog_json = env.get("HAKO_BUILDER_PROGRAM_JSON")
if prog_json == null { print("[builder/selfhost-first:fail:nojson]"); return 1 }
local mir_out = MirBuilderBox.emit_from_program_json_v0(prog_json, null)
if mir_out == null { print("[builder/selfhost-first:fail:emit]"); return 1 }
print("[builder/selfhost-first:ok]")
print("[MIR_OUT_BEGIN]")
print("" + mir_out)
print("[MIR_OUT_END]")
return 0
} }
HCODE
sed -i "s|__BUILDER_BOX__|$builder_box|g" "$tmp_hako"
fi
local tmp_stdout; tmp_stdout=$(mktemp)
trap 'rm -f "$tmp_hako" "$tmp_stdout" || true' RETURN
# Trace mode: analyze Program(JSON) before passing to builder
if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then
local prog_len=${#prog_json}
local loop_count=$(printf '%s' "$prog_json" | grep -o '"type":"Loop"' 2>/dev/null | wc -l | tr -d ' \n')
local cmp_count=$(printf '%s' "$prog_json" | grep -o '"type":"Compare"' 2>/dev/null | wc -l | tr -d ' \n')
loop_count=${loop_count:-0}
cmp_count=${cmp_count:-0}
local cwd="$(pwd)"
local toml_status="absent"
if [ -f "$ROOT/nyash.toml" ]; then
toml_status="present"
fi
echo "[builder/selfhost-first:trace] builder_box=$builder_box prog_json_len=$prog_len tokens=Loop:$loop_count,Compare:$cmp_count cwd=$cwd nyash.toml=$toml_status" >&2
fi
set +e
# Run from repo root to ensure nyash.toml is available for using resolution
# Capture both stdout and stderr (2>&1) instead of discarding stderr
(cd "$ROOT" && \
HAKO_MIR_BUILDER_INTERNAL=1 HAKO_MIR_BUILDER_REGISTRY=1 \
HAKO_MIR_BUILDER_TRACE="${HAKO_SELFHOST_TRACE:-}" \
HAKO_MIR_BUILDER_LOOP_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_JSONFRAG:-}" \
HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-}" \
HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-}" \
HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-}" \
HAKO_MIR_BUILDER_METHODIZE="${HAKO_MIR_BUILDER_METHODIZE:-}" \
HAKO_MIR_BUILDER_NORMALIZE_TAG="${HAKO_MIR_BUILDER_NORMALIZE_TAG:-}" \
HAKO_MIR_BUILDER_DEBUG="${HAKO_MIR_BUILDER_DEBUG:-}" \
NYASH_DISABLE_PLUGINS="${NYASH_DISABLE_PLUGINS:-0}" NYASH_FILEBOX_MODE="core-ro" HAKO_PROVIDER_POLICY="safe-core-first" \
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \
NYASH_USE_NY_COMPILER=0 HAKO_USE_NY_COMPILER=0 NYASH_DISABLE_NY_COMPILER=1 HAKO_DISABLE_NY_COMPILER=1 \
NYASH_MACRO_DISABLE=1 HAKO_MACRO_DISABLE=1 \
HAKO_BUILDER_PROGRAM_JSON="$prog_json" \
"$NYASH_BIN" --backend vm "$tmp_hako" 2>&1 | tee "$tmp_stdout" >/dev/null)
local rc=$?
set -e
# Enhanced failure diagnostics
if [ $rc -ne 0 ]; then
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[builder/selfhost-first:fail:child:rc=$rc]" >&2
echo "[builder/selfhost-first:fail:detail] Last 80 lines of output:" >&2
tail -n 80 "$tmp_stdout" >&2 || true
fi
# Don't return immediately - check for fallback below
fi
if [ $rc -eq 0 ] && ! grep -q "\[builder/selfhost-first:ok\]" "$tmp_stdout"; then
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[builder/selfhost-first:fail:no-ok-marker]" >&2
echo "[builder/selfhost-first:fail:detail] Last 80 lines of output:" >&2
tail -n 80 "$tmp_stdout" >&2 || true
fi
rc=1
fi
# Try min builder fallback if enabled and initial builder failed
if [ "${HAKO_SELFHOST_TRY_MIN:-0}" = "1" ] && [ $rc -ne 0 ] && [ "$builder_box" != "hako.mir.builder.min" ]; then
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[builder/selfhost-first:trying-min-fallback]" >&2
fi
# Retry with min builder
HAKO_MIR_BUILDER_BOX="hako.mir.builder.min" try_selfhost_builder "$prog_json" "$out_path"
return $?
fi
# Return original failure if no fallback or if fallback not triggered
if [ $rc -ne 0 ]; then
return 1
fi
local mir
mir=$(awk '/\[MIR_OUT_BEGIN\]/{flag=1;next}/\[MIR_OUT_END\]/{flag=0}flag' "$tmp_stdout")
if [ -z "$mir" ]; then return 1; fi
printf '%s' "$mir" > "$out_path"
echo "[OK] MIR JSON written (selfhost-first): $out_path"
return 0
}
# Provider-first delegate: call env.mirbuilder.emit(prog_json) and capture v1 JSON
try_provider_emit() {
local prog_json="$1" out_path="$2"
local tmp_hako; tmp_hako=$(mktemp --suffix .hako)
cat >"$tmp_hako" <<'HCODE'
static box Main { method main(args) {
local p = env.get("HAKO_BUILDER_PROGRAM_JSON")
if p == null { print("[provider/emit:nojson]"); return 1 }
local a = new ArrayBox(); a.push(p)
local out = hostbridge.extern_invoke("env.mirbuilder", "emit", a)
print("[provider/emit:ok]")
print("[MIR_OUT_BEGIN]")
print("" + out)
print("[MIR_OUT_END]")
return 0
} }
HCODE
local tmp_stdout; tmp_stdout=$(mktemp)
trap 'rm -f "$tmp_hako" "$tmp_stdout" || true' RETURN
set +e
(cd "$ROOT" && \
NYASH_DISABLE_PLUGINS="${NYASH_DISABLE_PLUGINS:-0}" NYASH_FILEBOX_MODE="core-ro" \
NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \
HAKO_BUILDER_PROGRAM_JSON="$prog_json" \
"$NYASH_BIN" --backend vm "$tmp_hako" 2>&1 | tee "$tmp_stdout" >/dev/null)
local rc=$?
set -e
if [ $rc -ne 0 ] || ! grep -q "\[provider/emit:ok\]" "$tmp_stdout"; then
return 1
fi
local mir
mir=$(awk '/\[MIR_OUT_BEGIN\]/{flag=1;next}/\[MIR_OUT_END\]/{flag=0}flag' "$tmp_stdout")
if [ -z "$mir" ]; then return 1; fi
printf '%s' "$mir" > "$out_path"
echo "[OK] MIR JSON written (delegate:provider): $out_path"
return 0
}
# When forcing JSONFrag loop, default-enable normalize+purify (dev-only, no default changes)
if [ "${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-0}" = "1" ]; then
export HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-1}"
export HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-1}"
fi
if [ "${HAKO_SELFHOST_BUILDER_FIRST:-0}" = "1" ]; then
if try_selfhost_builder "$PROG_JSON_OUT" "$OUT"; then
exit 0
fi
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[FAIL] selfhost-first failed and delegate disabled" >&2
exit 1
fi
fi
# Dev: force JsonFrag minimal loop even on provider-first path
if [ "${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-0}" = "1" ]; then
# Extract limit from Program(JSON)
limit=$(printf '%s' "$PROG_JSON_OUT" | grep -o '"type":"Int","value":[0-9]*' | head -1 | grep -o '[0-9]*$' || echo "10")
cat > "$OUT" <<MIRJSON
{
"functions": [{
"name": "main",
"params": [],
"locals": [],
"blocks": [
{ "id": 0, "instructions": [
{"op":"const","dst":1,"value":{"type":"i64","value":0}},
{"op":"const","dst":2,"value":{"type":"i64","value": ${limit} }},
{"op":"jump","target":1}
]},
{ "id": 1, "instructions": [
{"op":"phi","dst":6,"incoming":[[2,0],[6,2]]},
{"op":"phi","dst":3,"incoming":[[1,0],[5,2]]},
{"op":"compare","operation":"<","lhs":3,"rhs":6,"dst":4},
{"op":"branch","cond":4,"then":2,"else":3}
]},
{ "id": 2, "instructions": [
{"op":"const","dst":10,"value":{"type":"i64","value":1}},
{"op":"binop","operation":"+","lhs":3,"rhs":10,"dst":5},
{"op":"jump","target":1}
]},
{ "id": 3, "instructions": [
{"op":"ret","value":3}
]}
]
}]
}
MIRJSON
echo "[OK] MIR JSON written (provider-force-jsonfrag): $OUT"
exit 0
fi
tmp_prog="/tmp/hako_emit_prog_$$.json"
trap 'rm -f "$tmp_prog" || true' EXIT
printf '%s' "$PROG_JSON_OUT" > "$tmp_prog"
# Provider-first delegate (v1固定): env.mirbuilder.emit を使用
if try_provider_emit "$PROG_JSON_OUT" "$OUT"; then
exit 0
fi
# 最終フォールバック: 旧CLI変換環境でv1を促す
if HAKO_STAGEB_FUNC_SCAN="${HAKO_STAGEB_FUNC_SCAN:-}" \
HAKO_MIR_BUILDER_FUNCS="${HAKO_MIR_BUILDER_FUNCS:-}" \
HAKO_MIR_BUILDER_CALL_RESOLVE="${HAKO_MIR_BUILDER_CALL_RESOLVE:-}" \
NYASH_JSON_SCHEMA_V1=${NYASH_JSON_SCHEMA_V1:-1} \
NYASH_MIR_UNIFIED_CALL=${NYASH_MIR_UNIFIED_CALL:-1} \
"$NYASH_BIN" --program-json-to-mir "$OUT" --json-file "$tmp_prog" >/dev/null 2>&1; then
echo "[OK] MIR JSON written (delegate-legacy): $OUT"
exit 0
fi
echo "[FAIL] Program→MIR delegate failed (provider+legacy)" >&2
exit 1