Files
hakorune/tools/hakorune_emit_mir.sh
nyash-codex 7b1f791395 feat(phase21.5): Loop FORCE direct assembly + PHI/compare fixes
## Loop FORCE Direct Assembly 
- Added: Direct MIR assembly bypass when HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG=1
- Implementation: Extracts limit from Program(JSON), generates minimal while-form
- Structure: entry(0) → loop(1) → body(2) → exit(3)
- PHI: i = {i0, entry} | {i_next, body}
- Location: tools/hakorune_emit_mir.sh:70-126
- Tag: [selfhost-direct:ok] Direct MIR assembly (FORCE=1)

## PHI/Compare Fixes (ny-llvmc) 
- Fixed: vmap maintenance for PHI results across instructions
- Fixed: PHI placeholder name consistency (bytes vs str)
- Fixed: ensure_phi_alloca creates unique placeholders per block
- Fixed: resolve_i64_strict properly looks up PHI results
- Files:
  - src/llvm_py/phi_wiring/tagging.py
  - src/llvm_py/phi_wiring/wiring.py
  - src/llvm_py/instructions/compare.py
  - src/llvm_py/resolver.py

## Testing Results
- VM backend:  rc=10 (correct)
- Direct assembly MIR:  Structurally correct
- Crate backend: ⚠️ PHI/compare issues (being investigated)

## Implementation Principles
- 既定挙動不変 (FORCE=1 gated)
- Dev toggle controlled
- Minimal diff, surgical changes
- Bypasses using resolution when FORCE=1

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-11 17:04:33 +09:00

258 lines
9.4 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# hakorune_emit_mir.sh — Emit MIR(JSON) using Hakorune StageB + MirBuilder (Hakofirst)
#
# Usage: tools/hakorune_emit_mir.sh <input.hako> <out.json>
# Notes:
# - Runs the StageB compiler (Hako) to emit Program(JSON v0), then feeds it to MirBuilderBox.emit_from_program_json_v0.
# - Defaults to the Hakorune VM path; no inline Ny compiler; Stage3 enabled.
# - Keeps defaults conservative: no global flips; this is a helper for dev/CI scripts.
set -euo pipefail
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input.hako> <out.json>" >&2
exit 2
fi
IN="$1"
OUT="$2"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
if ROOT_GIT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null); then
ROOT="$ROOT_GIT"
else
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
fi
# Resolve nyash/hakorune binary via test_runner helper (ensures consistent env)
if [ ! -f "$IN" ]; then
echo "[FAIL] input not found: $IN" >&2
exit 1
fi
# Resolve nyash/hakorune binary (simple detection; test_runner will override later if sourced)
if [ -z "${NYASH_BIN:-}" ]; then
if [ -x "$ROOT/target/release/hakorune" ]; then
export NYASH_BIN="$ROOT/target/release/hakorune"
else
export NYASH_BIN="$ROOT/target/release/nyash"
fi
fi
CODE="$(cat "$IN")"
# 1) StageB: Hako parser emits Program(JSON v0) to stdout
set +e
PROG_JSON_OUT=$(NYASH_JSON_ONLY=1 NYASH_DISABLE_NY_COMPILER=1 HAKO_DISABLE_NY_COMPILER=1 \
NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \
NYASH_ENABLE_USING=${NYASH_ENABLE_USING:-1} HAKO_ENABLE_USING=${HAKO_ENABLE_USING:-1} \
"$NYASH_BIN" --backend vm "$ROOT/lang/src/compiler/entry/compiler_stageb.hako" -- --source "$CODE" 2>/dev/null | awk '/^{/,/^}$/')
rc=$?
set -e
if [ $rc -ne 0 ] || [ -z "$PROG_JSON_OUT" ]; then
echo "[FAIL] Stage-B parse failed (rc=$rc)" >&2
exit 1
fi
# Quick validation for Program(JSON v0)
if ! printf '%s' "$PROG_JSON_OUT" | grep -q '"kind"\s*:\s*"Program"'; then
echo "[FAIL] StageB output is not Program(JSON)" >&2
printf '%s\n' "$PROG_JSON_OUT" | sed -n '1,80p' >&2 || true
exit 1
fi
# 2) Convert Program(JSON v0) → MIR(JSON)
# Prefer selfhost builder first when explicitly requested; otherwise use delegate (GateC) for stability.
try_selfhost_builder() {
local prog_json="$1" out_path="$2"
# FORCE=1 direct assembly shortcut (dev toggle, bypasses using resolution)
if [ "${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-0}" = "1" ]; then
# Extract limit from Program(JSON) using grep/awk
local limit=$(printf '%s' "$prog_json" | grep -o '"type":"Int","value":[0-9]*' | head -1 | grep -o '[0-9]*$' || echo "10")
# Generate minimal while-form MIR(JSON) directly (executable semantics)
# PHI incoming format: [[value_register, predecessor_block_id], ...]
cat > "$out_path" <<'MIRJSON'
{
"functions": [{
"name": "main",
"params": [],
"locals": [],
"blocks": [
{
"id": 0,
"instructions": [
{"op": "const", "dst": 1, "value": {"type": "i64", "value": 0}},
{"op": "const", "dst": 2, "value": {"type": "i64", "value": LIMIT_PLACEHOLDER}},
{"op": "jump", "target": 1}
]
},
{
"id": 1,
"instructions": [
{"op": "phi", "dst": 6, "incoming": [[2, 0], [6, 2]]},
{"op": "phi", "dst": 3, "incoming": [[1, 0], [5, 2]]},
{"op": "compare", "operation": "<", "lhs": 3, "rhs": 6, "dst": 4},
{"op": "branch", "cond": 4, "then": 2, "else": 3}
]
},
{
"id": 2,
"instructions": [
{"op": "const", "dst": 10, "value": {"type": "i64", "value": 1}},
{"op": "binop", "operation": "+", "lhs": 3, "rhs": 10, "dst": 5},
{"op": "jump", "target": 1}
]
},
{
"id": 3,
"instructions": [
{"op": "ret", "value": 3}
]
}
]
}]
}
MIRJSON
# Replace LIMIT_PLACEHOLDER with actual limit
sed -i "s/LIMIT_PLACEHOLDER/$limit/g" "$out_path"
if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then
echo "[selfhost-direct:ok] Direct MIR assembly (FORCE=1), limit=$limit" >&2
fi
return 0
fi
# Builder box selection (default: hako.mir.builder)
local builder_box="${HAKO_MIR_BUILDER_BOX:-hako.mir.builder}"
local tmp_hako; tmp_hako=$(mktemp --suffix .hako)
cat >"$tmp_hako" <<'HCODE'
using "__BUILDER_BOX__" as MirBuilderBox
static box Main { method main(args) {
local prog_json = env.get("HAKO_BUILDER_PROGRAM_JSON")
if prog_json == null { print("[builder/selfhost-first:fail:nojson]"); return 1 }
local mir_out = MirBuilderBox.emit_from_program_json_v0(prog_json, null)
if mir_out == null { print("[builder/selfhost-first:fail:emit]"); return 1 }
print("[builder/selfhost-first:ok]")
print("[MIR_OUT_BEGIN]")
print("" + mir_out)
print("[MIR_OUT_END]")
return 0
} }
HCODE
# Substitute builder box name after heredoc to avoid shell interpolation issues
sed -i "s|__BUILDER_BOX__|$builder_box|g" "$tmp_hako"
local tmp_stdout; tmp_stdout=$(mktemp)
trap 'rm -f "$tmp_hako" "$tmp_stdout" || true' RETURN
# Trace mode: analyze Program(JSON) before passing to builder
if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then
local prog_len=${#prog_json}
local loop_count=$(printf '%s' "$prog_json" | grep -o '"type":"Loop"' 2>/dev/null | wc -l | tr -d ' \n')
local cmp_count=$(printf '%s' "$prog_json" | grep -o '"type":"Compare"' 2>/dev/null | wc -l | tr -d ' \n')
loop_count=${loop_count:-0}
cmp_count=${cmp_count:-0}
local cwd="$(pwd)"
local toml_status="absent"
if [ -f "$ROOT/nyash.toml" ]; then
toml_status="present"
fi
echo "[builder/selfhost-first:trace] builder_box=$builder_box prog_json_len=$prog_len tokens=Loop:$loop_count,Compare:$cmp_count cwd=$cwd nyash.toml=$toml_status" >&2
fi
set +e
# Run from repo root to ensure nyash.toml is available for using resolution
# Capture both stdout and stderr (2>&1) instead of discarding stderr
(cd "$ROOT" && \
HAKO_MIR_BUILDER_INTERNAL=1 HAKO_MIR_BUILDER_REGISTRY=1 \
HAKO_MIR_BUILDER_TRACE="${HAKO_SELFHOST_TRACE:-}" \
HAKO_MIR_BUILDER_LOOP_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_JSONFRAG:-}" \
HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-}" \
HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-}" \
HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-}" \
HAKO_MIR_BUILDER_NORMALIZE_TAG="${HAKO_MIR_BUILDER_NORMALIZE_TAG:-}" \
HAKO_MIR_BUILDER_DEBUG="${HAKO_MIR_BUILDER_DEBUG:-}" \
NYASH_DISABLE_PLUGINS=1 NYASH_FILEBOX_MODE="core-ro" HAKO_PROVIDER_POLICY="safe-core-first" \
NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 \
NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \
NYASH_USE_NY_COMPILER=0 HAKO_USE_NY_COMPILER=0 NYASH_DISABLE_NY_COMPILER=1 HAKO_DISABLE_NY_COMPILER=1 \
NYASH_MACRO_DISABLE=1 HAKO_MACRO_DISABLE=1 \
HAKO_BUILDER_PROGRAM_JSON="$prog_json" \
"$NYASH_BIN" --backend vm "$tmp_hako" 2>&1 | tee "$tmp_stdout" >/dev/null)
local rc=$?
set -e
# Enhanced failure diagnostics
if [ $rc -ne 0 ]; then
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[builder/selfhost-first:fail:child:rc=$rc]" >&2
echo "[builder/selfhost-first:fail:detail] Last 80 lines of output:" >&2
tail -n 80 "$tmp_stdout" >&2 || true
fi
# Don't return immediately - check for fallback below
fi
if [ $rc -eq 0 ] && ! grep -q "\[builder/selfhost-first:ok\]" "$tmp_stdout"; then
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[builder/selfhost-first:fail:no-ok-marker]" >&2
echo "[builder/selfhost-first:fail:detail] Last 80 lines of output:" >&2
tail -n 80 "$tmp_stdout" >&2 || true
fi
rc=1
fi
# Try min builder fallback if enabled and initial builder failed
if [ "${HAKO_SELFHOST_TRY_MIN:-0}" = "1" ] && [ $rc -ne 0 ] && [ "$builder_box" != "hako.mir.builder.min" ]; then
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[builder/selfhost-first:trying-min-fallback]" >&2
fi
# Retry with min builder
HAKO_MIR_BUILDER_BOX="hako.mir.builder.min" try_selfhost_builder "$prog_json" "$out_path"
return $?
fi
# Return original failure if no fallback or if fallback not triggered
if [ $rc -ne 0 ]; then
return 1
fi
local mir
mir=$(awk '/\[MIR_OUT_BEGIN\]/{flag=1;next}/\[MIR_OUT_END\]/{flag=0}flag' "$tmp_stdout")
if [ -z "$mir" ]; then return 1; fi
printf '%s' "$mir" > "$out_path"
echo "[OK] MIR JSON written (selfhost-first): $out_path"
return 0
}
# When forcing JSONFrag loop, default-enable normalize+purify (dev-only, no default changes)
if [ "${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-0}" = "1" ]; then
export HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-1}"
export HAKO_MIR_BUILDER_JSONFRAG_PURIFY="${HAKO_MIR_BUILDER_JSONFRAG_PURIFY:-1}"
fi
if [ "${HAKO_SELFHOST_BUILDER_FIRST:-0}" = "1" ]; then
if try_selfhost_builder "$PROG_JSON_OUT" "$OUT"; then
exit 0
fi
if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then
echo "[FAIL] selfhost-first failed and delegate disabled" >&2
exit 1
fi
fi
tmp_prog="/tmp/hako_emit_prog_$$.json"
trap 'rm -f "$tmp_prog" || true' EXIT
printf '%s' "$PROG_JSON_OUT" > "$tmp_prog"
if "$NYASH_BIN" --program-json-to-mir "$OUT" --json-file "$tmp_prog" >/dev/null 2>&1; then
echo "[OK] MIR JSON written (delegate): $OUT"
exit 0
fi
echo "[FAIL] Program→MIR delegate failed" >&2
exit 1