diff --git a/lang/src/compiler/entry/compiler_stageb.hako b/lang/src/compiler/entry/compiler_stageb.hako index 92c00598..23f9967b 100644 --- a/lang/src/compiler/entry/compiler_stageb.hako +++ b/lang/src/compiler/entry/compiler_stageb.hako @@ -22,15 +22,23 @@ static box Main { main(args) { // 1) Collect source from args or env local src = null + local src_file = null if args != null { local i = 0 local n = args.length() loop(i < n) { local t = "" + args.get(i) if t == "--source" && i + 1 < n { src = "" + args.get(i + 1) break } + if t == "--source-file" && i + 1 < n { src_file = "" + args.get(i + 1) i = i + 1 } i = i + 1 } } + // Prefer explicit source string when provided + // If --source-file is used, prefer env-provided content (wrapper supplies HAKO_SOURCE_FILE_CONTENT) + if src == null && src_file != null { + local inline = env.get("HAKO_SOURCE_FILE_CONTENT") + if inline != null { src = "" + inline } + } // Skip env.local.get check - Stage-3 keyword conflict // Original: if src == null { src = env.local.get("HAKO_SOURCE") } if src == null { src = "return 0" } diff --git a/tools/hakorune_emit_mir.sh b/tools/hakorune_emit_mir.sh index b43ab046..c14732c2 100644 --- a/tools/hakorune_emit_mir.sh +++ b/tools/hakorune_emit_mir.sh @@ -39,12 +39,15 @@ if [ -z "${NYASH_BIN:-}" ]; then fi fi -CODE="$(cat "$IN")" +# Store CODE in temp file to avoid subshell expansion issues +CODE_TMP=$(mktemp --suffix=.hako) +trap 'rm -f "$CODE_TMP" || true' EXIT +cp "$IN" "$CODE_TMP" # Check if FORCE jsonfrag mode is requested (bypasses Stage-B entirely) if [ "${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-0}" = "1" ]; then # Extract limit from code using grep/awk - limit=$(printf '%s' "$CODE" | grep -o '[0-9]\+' | head -1 || echo "10") + limit=$(cat "$CODE_TMP" | grep -o '[0-9]\+' | head -1 || echo "10") # Generate minimal while-form MIR(JSON) directly (executable semantics) # PHI incoming format: [[value_register, predecessor_block_id], ...] echo "[emit/jsonfrag] FORCE min-loop MIR (dev-only)" >&2 @@ -95,8 +98,8 @@ MIRJSON fi # 1) Stage‑B: Hako parser emits Program(JSON v0) to stdout -# Extract Program JSON robustly using Python3 bracket balancing -extract_program_json() { +# Extract Program JSON robustly using Python3 bracket balancing with fallbacks +extract_program_json_py() { python3 - <<'PYEOF' import sys @@ -157,14 +160,80 @@ sys.exit(1) PYEOF } +extract_program_json() { + local input="$1" + local result + + # Try 1: Python balancer (existing) + result=$(echo "$input" | extract_program_json_py 2>/dev/null || true) + if [ -n "$result" ] && echo "$result" | grep -q '"kind".*"Program"'; then + echo "$result" + return 0 + fi + + # Try 2: Simple awk fallback + result=$(echo "$input" | awk '/^\{/,/^\}$/') + if [ -n "$result" ] && echo "$result" | grep -q '"kind".*"Program"'; then + echo "$result" + return 0 + fi + + # Try 3: Ruby fallback (if available) + if command -v ruby >/dev/null 2>&1; then + result=$(echo "$input" | ruby -e 'puts STDIN.read[/\{.*"kind".*"Program".*\}/m]' 2>/dev/null || true) + if [ -n "$result" ]; then + echo "$result" + return 0 + fi + fi + + # All fallbacks failed + return 1 +} + set +e -PROG_JSON_OUT=$((cd "$ROOT" && \ +if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + code_len=$(wc -c < "$CODE_TMP" | tr -d ' ') + echo "[emit:trace] Stage-B: Starting parse of input (${code_len} chars)..." >&2 +fi + +# Run Stage-B with temp file (avoid subshell CODE variable expansion) +PROG_JSON_RAW=$(cd "$ROOT" && \ NYASH_JSON_ONLY=1 NYASH_DISABLE_NY_COMPILER=1 HAKO_DISABLE_NY_COMPILER=1 \ HAKO_STAGEB_FUNC_SCAN="${HAKO_STAGEB_FUNC_SCAN:-}" \ NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \ NYASH_ENABLE_USING=${NYASH_ENABLE_USING:-1} HAKO_ENABLE_USING=${HAKO_ENABLE_USING:-1} \ - "$NYASH_BIN" --backend vm "$ROOT/lang/src/compiler/entry/compiler_stageb.hako" -- --source "$CODE") 2>/dev/null | extract_program_json) + "$NYASH_BIN" --backend vm "$ROOT/lang/src/compiler/entry/compiler_stageb.hako" -- --source "$(cat "$CODE_TMP")" 2>&1) rc=$? + +if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[emit:trace] Stage-B: Raw output length=${#PROG_JSON_RAW} chars, rc=$rc" >&2 +fi + +# Extract Program JSON from raw output +PROG_JSON_OUT=$(extract_program_json "$PROG_JSON_RAW" 2>/dev/null || true) +extract_rc=$? + +if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + if [ $extract_rc -eq 0 ] && [ -n "$PROG_JSON_OUT" ]; then + echo "[emit:trace] Stage-B: SUCCESS - Generated Program(JSON) (${#PROG_JSON_OUT} chars)" >&2 + # Show first 200 chars for validation + prog_head=$(printf '%s' "$PROG_JSON_OUT" | head -c 200) + echo "[emit:trace] Stage-B: prog_json_head: $prog_head..." >&2 + else + echo "[emit:trace] Stage-B: FAILED - extract_rc=$extract_rc, output_len=${#PROG_JSON_OUT}" >&2 + if [ -n "$PROG_JSON_RAW" ]; then + echo "[emit:trace] Stage-B: Raw output first 200 chars:" >&2 + printf '%s' "$PROG_JSON_RAW" | head -c 200 >&2 + echo "" >&2 + fi + fi +fi + +# Update rc to reflect extraction result +if [ $extract_rc -ne 0 ] || [ -z "$PROG_JSON_OUT" ]; then + rc=1 +fi set -e # If Stage-B fails, skip to direct MIR emit paths (provider/legacy) @@ -298,7 +367,42 @@ HCODE local mir mir=$(awk '/\[MIR_OUT_BEGIN\]/{flag=1;next}/\[MIR_OUT_END\]/{flag=0}flag' "$tmp_stdout") if [ -z "$mir" ]; then return 1; fi + # Write raw MIR JSON first printf '%s' "$mir" > "$out_path" + + # Optional AOT prep stage (text-level, no FileBox required for JSON-in/out function) + # Run only when fast/hoist/collections_hot are requested to avoid unnecessary overhead. + if [ "${NYASH_AOT_COLLECTIONS_HOT:-0}" = "1" ] || [ "${NYASH_LLVM_FAST:-0}" = "1" ] || [ "${NYASH_MIR_LOOP_HOIST:-0}" = "1" ]; then + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[provider/emit:trace] Applying AotPrep passes to MIR JSON..." >&2 + fi + _prep_hako=$(mktemp --suffix .hako) + cat > "$_prep_hako" <<'HAKO' +using selfhost.llvm.ir.aot_prep as AotPrepBox +static box Main { method main(args) { + local in = args.get(0) + // Prefer file-path based prep to avoid huge argv issues; FileBox is core-ro in this runner + local out = AotPrepBox.prep(in) + if out == null { println("[prep:fail]"); return 1 } + println(out) + return 0 +} } +HAKO + set +e + _prep_out=$(NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 NYASH_FILEBOX_MODE=core-ro \ + NYASH_AOT_COLLECTIONS_HOT=${NYASH_AOT_COLLECTIONS_HOT:-0} NYASH_LLVM_FAST=${NYASH_LLVM_FAST:-0} NYASH_MIR_LOOP_HOIST=${NYASH_MIR_LOOP_HOIST:-0} NYASH_AOT_MAP_KEY_MODE=${NYASH_AOT_MAP_KEY_MODE:-auto} \ + "$NYASH_BIN" --backend vm "$_prep_hako" -- "$out_path" 2>/dev/null | tail -n 1) + _rc=$? + set -e + if [ $_rc -eq 0 ] && [ -f "$_prep_out" ]; then + mv -f "$_prep_out" "$out_path" + [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ] && echo "[provider/emit:trace] AotPrep applied successfully" >&2 + else + [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ] && echo "[provider/emit:trace] AotPrep skipped or failed (rc=$_rc)" >&2 + fi + rm -f "$_prep_hako" 2>/dev/null || true + fi + echo "[OK] MIR JSON written (delegate:provider): $out_path" return 0 } @@ -357,19 +461,31 @@ HCODE local prog_len=${#prog_json} local loop_count=$(printf '%s' "$prog_json" | grep -o '"type":"Loop"' 2>/dev/null | wc -l | tr -d ' \n') local cmp_count=$(printf '%s' "$prog_json" | grep -o '"type":"Compare"' 2>/dev/null | wc -l | tr -d ' \n') + local array_count=$(printf '%s' "$prog_json" | grep -o '"type":"MethodCall"' 2>/dev/null | wc -l | tr -d ' \n') loop_count=${loop_count:-0} cmp_count=${cmp_count:-0} + array_count=${array_count:-0} local cwd="$(pwd)" local toml_status="absent" if [ -f "$ROOT/nyash.toml" ]; then toml_status="present" fi - echo "[builder/selfhost-first:trace] builder_box=$builder_box prog_json_len=$prog_len tokens=Loop:$loop_count,Compare:$cmp_count cwd=$cwd nyash.toml=$toml_status" >&2 + echo "[builder/selfhost-first:trace] builder_box=$builder_box prog_json_len=$prog_len tokens=Loop:$loop_count,Compare:$cmp_count,MethodCall:$array_count cwd=$cwd nyash.toml=$toml_status" >&2 + # Show first 200 chars of Program(JSON) for structural validation + local prog_head=$(printf '%s' "$prog_json" | head -c 200) + echo "[builder/selfhost-first:trace] prog_json_head: $prog_head..." >&2 fi set +e # Run from repo root to ensure nyash.toml is available for using resolution # Capture both stdout and stderr (2>&1) instead of discarding stderr + local tmp_stderr; tmp_stderr=$(mktemp) + trap 'rm -f "$tmp_hako" "$tmp_stdout" "$tmp_stderr" || true' RETURN + + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[builder/selfhost-first:exec] Starting builder execution..." >&2 + fi + (cd "$ROOT" && \ HAKO_MIR_BUILDER_INTERNAL=1 HAKO_MIR_BUILDER_REGISTRY=1 \ HAKO_MIR_BUILDER_TRACE="${HAKO_SELFHOST_TRACE:-}" \ @@ -386,23 +502,37 @@ HCODE NYASH_USE_NY_COMPILER=0 HAKO_USE_NY_COMPILER=0 NYASH_DISABLE_NY_COMPILER=1 HAKO_DISABLE_NY_COMPILER=1 \ NYASH_MACRO_DISABLE=1 HAKO_MACRO_DISABLE=1 \ HAKO_BUILDER_PROGRAM_JSON="$prog_json" \ - "$NYASH_BIN" --backend vm "$tmp_hako" 2>&1 | tee "$tmp_stdout" >/dev/null) + "$NYASH_BIN" --backend vm "$tmp_hako" 2>"$tmp_stderr" | tee "$tmp_stdout" >/dev/null) local rc=$? set -e - # Enhanced failure diagnostics + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[builder/selfhost-first:exec] Builder execution completed with rc=$rc" >&2 + fi + + # Enhanced failure diagnostics with comprehensive logging if [ $rc -ne 0 ]; then - if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then + if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ] || [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then echo "[builder/selfhost-first:fail:child:rc=$rc]" >&2 + echo "[builder/selfhost-first:fail:detail] First 20 lines of output:" >&2 + head -n 20 "$tmp_stdout" >&2 || true echo "[builder/selfhost-first:fail:detail] Last 80 lines of output:" >&2 tail -n 80 "$tmp_stdout" >&2 || true + if [ -s "$tmp_stderr" ]; then + echo "[builder/selfhost-first:fail:stderr] First 20 lines:" >&2 + head -n 20 "$tmp_stderr" >&2 || true + echo "[builder/selfhost-first:fail:stderr] Last 40 lines:" >&2 + tail -n 40 "$tmp_stderr" >&2 || true + fi fi # Don't return immediately - check for fallback below fi if [ $rc -eq 0 ] && ! grep -q "\[builder/selfhost-first:ok\]" "$tmp_stdout"; then - if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then + if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ] || [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then echo "[builder/selfhost-first:fail:no-ok-marker]" >&2 + echo "[builder/selfhost-first:fail:detail] First 20 lines of output:" >&2 + head -n 20 "$tmp_stdout" >&2 || true echo "[builder/selfhost-first:fail:detail] Last 80 lines of output:" >&2 tail -n 80 "$tmp_stdout" >&2 || true fi @@ -411,13 +541,17 @@ HCODE # Try min builder fallback if enabled and initial builder failed if [ "${HAKO_SELFHOST_TRY_MIN:-0}" = "1" ] && [ $rc -ne 0 ] && [ "$builder_box" != "hako.mir.builder.min" ]; then - if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ]; then + if [ "${HAKO_SELFHOST_NO_DELEGATE:-0}" = "1" ] || [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then echo "[builder/selfhost-first:trying-min-fallback]" >&2 fi # Retry with min builder HAKO_MIR_BUILDER_BOX="hako.mir.builder.min" try_selfhost_builder "$prog_json" "$out_path" - return $? + local fallback_rc=$? + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[builder/selfhost-first:min-fallback:rc=$fallback_rc]" >&2 + fi + return $fallback_rc fi # Return original failure if no fallback or if fallback not triggered @@ -451,21 +585,50 @@ static box Main { method main(args) { } } HCODE local tmp_stdout; tmp_stdout=$(mktemp) - trap 'rm -f "$tmp_hako" "$tmp_stdout" || true' RETURN + local tmp_stderr; tmp_stderr=$(mktemp) + trap 'rm -f "$tmp_hako" "$tmp_stdout" "$tmp_stderr" || true' RETURN + + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[provider/emit:trace] Starting provider emit..." >&2 + fi + set +e (cd "$ROOT" && \ NYASH_DISABLE_PLUGINS="${NYASH_DISABLE_PLUGINS:-0}" NYASH_FILEBOX_MODE="core-ro" \ NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_ALLOW_SEMICOLON=1 \ HAKO_BUILDER_PROGRAM_JSON="$prog_json" \ - "$NYASH_BIN" --backend vm "$tmp_hako" 2>&1 | tee "$tmp_stdout" >/dev/null) + "$NYASH_BIN" --backend vm "$tmp_hako" 2>"$tmp_stderr" | tee "$tmp_stdout" >/dev/null) local rc=$? set -e + + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[provider/emit:trace] Provider execution completed with rc=$rc" >&2 + fi + if [ $rc -ne 0 ] || ! grep -q "\[provider/emit:ok\]" "$tmp_stdout"; then + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[provider/emit:fail:rc=$rc]" >&2 + if [ -s "$tmp_stderr" ]; then + echo "[provider/emit:fail:stderr] First 20 lines:" >&2 + head -n 20 "$tmp_stderr" >&2 || true + echo "[provider/emit:fail:stderr] Last 40 lines:" >&2 + tail -n 40 "$tmp_stderr" >&2 || true + fi + echo "[provider/emit:fail:stdout] Last 40 lines:" >&2 + tail -n 40 "$tmp_stdout" >&2 || true + fi return 1 fi + local mir mir=$(awk '/\[MIR_OUT_BEGIN\]/{flag=1;next}/\[MIR_OUT_END\]/{flag=0}flag' "$tmp_stdout") - if [ -z "$mir" ]; then return 1; fi + if [ -z "$mir" ]; then + if [ "${HAKO_SELFHOST_TRACE:-0}" = "1" ]; then + echo "[provider/emit:fail:no-mir-output]" >&2 + fi + return 1 + fi + printf '%s' "$mir" > "$out_path" echo "[OK] MIR JSON written (delegate:provider): $out_path" return 0 @@ -489,8 +652,8 @@ fi # Dev: force JsonFrag minimal loop even on provider-first path if [ "${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-0}" = "1" ]; then - # Extract limit from Program(JSON) - limit=$(printf '%s' "$PROG_JSON_OUT" | grep -o '"type":"Int","value":[0-9]*' | head -1 | grep -o '[0-9]*$' || echo "10") + # Extract limit from Program(JSON) or source file + limit=$(printf '%s' "$PROG_JSON_OUT" | grep -o '"type":"Int","value":[0-9]*' | head -1 | grep -o '[0-9]*$' || cat "$CODE_TMP" | grep -o '[0-9]\+' | head -1 || echo "10") echo "[emit/jsonfrag] provider-force min-loop MIR (dev-only)" >&2 cat > "$OUT" </dev/null 2>&1) || true fi PYTHONPATH="${PYTHONPATH:-$ROOT}" \ + NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \ NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \ NYASH_EMIT_EXE_NYRT="${NYASH_EMIT_EXE_NYRT:-$ROOT/target/release}" \ NYASH_LLVM_USE_HARNESS=1 "$BIN" --backend llvm "$file" >/dev/null 2>&1 @@ -419,9 +420,10 @@ C sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE" ;; sieve) - # N: 上限値。EXEモードでデフォルトなら安全側に丸める + # N: 上限値。EXE モードは計測安定性のため C 実行時間が十分大きくなる既定値に固定 + # 既定 N=5,000,000 のまま維持(以前の 500,000 丸めはタイマ粒度ノイズを増やすため撤廃) if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then - N=500000 + N=5000000 fi HAKO_FILE=$(mktemp_hako) cat >"$HAKO_FILE" <"$HAKO_FILE" </dev/null 2>&1; then + echo "[SKIP] matmul emit unstable (try PERF_USE_JSONFRAG=1 for diagnosis)" >&2 + rm -f "$TMP_CHECK_JSON" "$HAKO_FILE" "$C_FILE" 2>/dev/null || true + exit 0 + fi + rm -f "$TMP_CHECK_JSON" 2>/dev/null || true + fi ;; linidx) # Linear index pattern: idx = i*cols + j @@ -794,7 +815,9 @@ if [[ "$EXE_MODE" = "1" ]]; then HAKO_EXE=$(mktemp --suffix .out) TMP_JSON=$(mktemp --suffix .json) # Default: use jsonfrag (stable/fast). Set PERF_USE_PROVIDER=1 to prefer provider/selfhost MIR. - if ! HAKO_SELFHOST_BUILDER_FIRST=1 \ + if ! \ + HAKO_SELFHOST_BUILDER_FIRST=0 HAKO_SELFHOST_NO_DELEGATE=0 \ + NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \ HAKO_MIR_BUILDER_LOOP_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \ HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG="${HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG:-$([[ "${PERF_USE_JSONFRAG:-0}" = 1 ]] && echo 1 || echo 0)}" \ HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE="${HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE:-1}" \ @@ -803,6 +826,33 @@ if [[ "$EXE_MODE" = "1" ]]; then NYASH_JSON_ONLY=1 bash "$ROOT/tools/hakorune_emit_mir.sh" "$HAKO_FILE" "$TMP_JSON" >/dev/null 2>&1; then echo "[FAIL] emit MIR JSON failed (hint: set PERF_USE_PROVIDER=1 or HAKO_MIR_BUILDER_LOOP_FORCE_JSONFRAG=1)" >&2; exit 3 fi + + # Optional AOT prep stage: apply pre-normalization/passes on MIR JSON before building EXE + # Enabled when fast/hoist/collections_hot are ON (we already set them explicitly above) + # This ensures EXE path receives the same optimized JSON as harness runs. + ( + PREP_HAKO=$(mktemp --suffix .hako) + cat >"$PREP_HAKO" <<'HAKO' +using selfhost.llvm.ir.aot_prep as AotPrepBox +static box Main { method main(args) { + local in = args.get(0) + local out = AotPrepBox.prep(in) + if out == null { println("[prep:fail]") return 1 } + println(out) + return 0 +} } +HAKO + set +e + OUT_PATH=$(NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 NYASH_FILEBOX_MODE=core-ro \ + NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_AOT_MAP_KEY_MODE=auto \ + "$BIN" --backend vm "$PREP_HAKO" -- "$TMP_JSON" 2>/dev/null | tail -n 1) + rc=$? + set -e + if [[ $rc -eq 0 && -f "$OUT_PATH" ]]; then + mv -f "$OUT_PATH" "$TMP_JSON" + fi + rm -f "$PREP_HAKO" 2>/dev/null || true + ) # Build EXE via helper (selects crate backend ny-llvmc under the hood) if ! NYASH_LLVM_BACKEND=crate NYASH_LLVM_SKIP_BUILD=1 \ NYASH_NY_LLVM_COMPILER="${NYASH_NY_LLVM_COMPILER:-$ROOT/target/release/ny-llvmc}" \