fix(parser): Stage-1 CLI infinite loop - MirBuilder-friendly refactoring (Part 1/3)

**Problem**: Stage-1 CLI hits VM step budget (infinite loops) in multiple functions - ParserBox.parse_program2: bb2853→bb2854 loop - StringHelpers.skip_ws: bb488→bb491 loop **Root Cause**: MirBuilder bug with `loop(cont==1)` + nested `if-else` pattern - PHI instructions with self-references - Loop exit blocks jumping back to header instead of continuation **Solution**: Refactor to MirBuilder-friendly pattern ```hako // Before (causes infinite loop): loop(cont == 1) { if guard > max { return j } else { guard++ } if condition { action } else { cont = 0 } } // After (MirBuilder-friendly): loop(j < n) { if condition { j++; continue } break } ``` **Changes**: 1. parser_box.hako: parse_program2 main loop refactored - 7 sections: ws→EOF/guard→parse_stmt2→progress guard→trace→semicolons→emit - Unconditional parse_stmt2 execution (no nested if-else) - Explicit `break` for loop exits 2. string_helpers.hako: StringHelpers.skip_ws simplified - Removed `cont` flag and guard counter - Direct `loop(j < n)` with `continue/break` 3. vm.rs, vm_fallback.rs: RUST_MIR_DUMP_PATH support - Enables offline MIR analysis for debugging **Progress**: - ✅ parse_program2: infinite loop fixed - ✅ StringHelpers.skip_ws: infinite loop fixed - ⏳ ParserIdentScanBox.scan_ident: next target (bb985 loop) **Testing**: - MIR dumps generated successfully (116K+ lines) - ws_init loop completes - parse_program2 progresses further - Still hits infinite loop in scan_ident (Part 2 needed) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-24 18:19:31 +09:00
parent 6885235145
commit c6c5653ae0
4 changed files with 97 additions and 116 deletions
--- a/lang/src/compiler/parser/parser_box.hako
+++ b/lang/src/compiler/parser/parser_box.hako
@ -351,17 +351,20 @@ box ParserBox {
    }
    loop(cont_prog == 1) {
-      // ✅ TRACE: main_prog loop progress (Task先生推奨トレースポイント)
+      // ===== 1) WS SKIP (inline, simple structure) =====
-      local _loop_info = ""
+      loop(i < n) {
-      if trace == 1 && (guard_prog < 5 || (guard_prog % 10) == 0) {
+        local ch_ws = src.substring(i, i + 1)
-        _loop_info = "[parse_program2:main] count=" + ("" + guard_prog) +
+        if ch_ws == " " || ch_ws == "\n" || ch_ws == "\r" || ch_ws == "\t" {
-                     " i=" + ("" + i) + " n=" + ("" + n)
+          i = i + 1
-        if i < n {
+          continue
          local _end = i + 40
          if _end > n { _end = n }
          _loop_info = _loop_info + " ctx=\"" + src.substring(i, _end) + "\""
        }
-        print(_loop_info)
+        break
      }
      // ===== 2) EOF/GUARD CHECK (early exit) =====
      if i >= n {
        cont_prog = 0
        break
      }
      if max_prog > 0 {
@ -371,117 +374,78 @@ box ParserBox {
            print("[parser/program2:guard] max iteration reached at pos=" + ("" + i))
          }
          cont_prog = 0
          break
        }
      }
-      // Fail-fast: EOF まで進んでいれば即座にループを抜ける（無限走行防止）。
+
-      if i >= n {
+      // ===== 3) PARSE_STMT2 + GPOS_GET (unconditional execution) =====
-        cont_prog = 0
+      local start_i = i
-        break
+      local s = me.parse_stmt2(src, i)
      i = me.gpos_get()
      // ===== 4) PROGRESS GUARD (ensure i always advances) =====
      if i <= start_i {
        if trace == 1 {
          print("[parser/trace:program2] progress-guard bump i from " + ("" + start_i) + " to " + ("" + (start_i + 1)))
        }
        i = start_i + 1
        if i > n { i = n }
        me.gpos_set(i)
      }
      // ===== 5) TRACE (simplified, at end) =====
      if trace == 1 {
-        local kind = "Stmt"
+        print("[parser/trace:program2] i=" + ("" + i) + " stmt_len=" + ("" + s.length()) + " guard=" + ("" + guard_prog))
-        if i >= n {
+      }
-          kind = "EOF"
+
      // ===== 6) CONSUME OPTIONAL SEMICOLONS =====
      local done_semi = 0
      local guard_semi = 0
      local max_semi = 100000
      loop(done_semi == 0) {
        if guard_semi > max_semi {
          done_semi = 1
        } else {
-          if me.starts_with(src, i, "static box") == 1 { kind = "StaticBox" }
+          guard_semi = guard_semi + 1
          else {
            if me.starts_with(src, i, "box ") == 1 { kind = "BoxDecl" }
            else {
              if me.starts_with(src, i, "method ") == 1 { kind = "Method" }
            }
          }
        }
        print("[parser/trace:program2] pos=" + ("" + i) + " kind=" + kind + " stage3=" + ("" + me.stage3))
        // Dev-only: small preview of remaining source to debug top-level progress
        {
          local head = ""
          if i < n {
            local end = i + 40
            if end > n { end = n }
            head = src.substring(i, end)
          }
          print("[parser/trace:program2] head=\"" + head + "\"")
        }
      }
      // Inline skip_ws instead of calling me.skip_ws(src, i)
      if i < n {
        local ws_cont_1 = 1
        loop(ws_cont_1 == 1) {
          if i < n {
            local ch1 = src.substring(i, i + 1)
            if ch1 == " " || ch1 == "\n" || ch1 == "\r" || ch1 == "\t" { i = i + 1 }
            else { ws_cont_1 = 0 }
          } else { ws_cont_1 = 0 }
        }
      }
      if i >= src.length() {
        cont_prog = 0
      } else {
        local start_i = i
        if trace == 1 {
          print("[parser/trace:program2] before_stmt i=" + ("" + i))
        }
        local s = me.parse_stmt2(src, i)
        i = me.gpos_get()
        if trace == 1 {
          print("[parser/trace:program2] after_stmt i=" + ("" + i) + " stmt_len=" + ("" + s.length()))
        }
-        // Progress guard
+        local before_semi = i
        if i <= start_i {
          if trace == 1 {
            print("[parser/trace:program2] progress-guard bump i from " + ("" + start_i) + " to " + ("" + (start_i + 1)))
          }
          if i < src.length() { i = i + 1 }
          else { i = src.length() }
          me.gpos_set(i)
        }
-        // consume optional semicolons
+        // Inline ws skip
-        local done2 = 0
+        loop(i < n) {
-        local guard2 = 0
+          local ch_semi = src.substring(i, i + 1)
-        local max2 = 100000
+          if ch_semi == " " || ch_semi == "\n" || ch_semi == "\r" || ch_semi == "\t" {
        loop(done2 == 0) {
          if guard2 > max2 { done2 = 1 }
          else { guard2 = guard2 + 1 }
          local before2 = i
          // Inline skip_ws instead of calling me.skip_ws(src, i)
          if i < n {
            local ws_cont_2 = 1
            loop(ws_cont_2 == 1) {
              if i < n {
                local ch2 = src.substring(i, i + 1)
                if ch2 == " " || ch2 == "\n" || ch2 == "\r" || ch2 == "\t" { i = i + 1 }
                else { ws_cont_2 = 0 }
              } else { ws_cont_2 = 0 }
            }
          }
          if i < src.length() && src.substring(i, i+1) == ";" {
            i = i + 1
-          } else {
+            continue
            done2 = 1
          }
-
+          break
          if i == before2 { done2 = 1 }
        }
-        if s.length() > 0 {
+        if i < n && src.substring(i, i+1) == ";" {
-          if first == 1 {
+          i = i + 1
-            if trace == 1 {
+        } else {
-              print("[parser/trace:program2] emit-first stmt_len=" + ("" + s.length()))
+          done_semi = 1
-            }
+        }
-            body = body + s
+
-            first = 0
+        if i == before_semi {
-          } else {
+          done_semi = 1
-            if trace == 1 {
+        }
-              print("[parser/trace:program2] emit stmt_len=" + ("" + s.length()))
+      }
-            }
+
-            body = body + "," + s
+      // ===== 7) EMIT STATEMENT =====
      if s.length() > 0 {
        if first == 1 {
          if trace == 1 {
            print("[parser/trace:program2] emit-first stmt_len=" + ("" + s.length()))
          }
          body = body + s
          first = 0
        } else {
          if trace == 1 {
            print("[parser/trace:program2] emit stmt_len=" + ("" + s.length()))
          }
          body = body + "," + s
        }
      }
    }
--- a/lang/src/shared/common/string_helpers.hako
+++ b/lang/src/shared/common/string_helpers.hako
@ -172,14 +172,13 @@ static box StringHelpers {
    local s = "" + src
    local n = s.length()
    local j = i
-    local cont = 1
+    // Simplified loop structure for MirBuilder compatibility
-    local guard = 0
+    loop(j < n) {
-    local max = 100000
+      if me.is_space(s.substring(j, j+1)) {
-    loop(cont == 1) {
+        j = j + 1
-      if guard > max { return j } else { guard = guard + 1 }
+        continue
-      if j < n {
+      }
-        if me.is_space(s.substring(j, j+1)) { j = j + 1 } else { cont = 0 }
+      break
      } else { cont = 0 }
    }
    return j
  }
--- a/src/runner/modes/vm.rs
+++ b/src/runner/modes/vm.rs
@ -431,6 +431,15 @@ impl NyashRunner {
        }
        // Optional: dump MIR for diagnostics
        // Phase 25.1: File dump for offline analysis (ParserBox等)
        if let Ok(path) = std::env::var("RUST_MIR_DUMP_PATH") {
            if let Ok(mut f) = std::fs::File::create(&path) {
                let p = crate::mir::MirPrinter::new();
                let _ = std::io::Write::write_all(&mut f, p.print_module(&module_vm).as_bytes());
                eprintln!("[vm] MIR dumped to: {}", path);
            }
        }
        // Existing: NYASH_VM_DUMP_MIR dumps to stderr
        if crate::config::env::env_bool("NYASH_VM_DUMP_MIR") {
            let p = crate::mir::MirPrinter::new();
            eprintln!("{}", p.print_module(&module_vm));
--- a/src/runner/modes/vm_fallback.rs
+++ b/src/runner/modes/vm_fallback.rs
@ -294,6 +294,15 @@ impl NyashRunner {
        }
        // Optional: dump MIR for diagnostics (parity with vm path)
        // Phase 25.1: File dump for offline analysis (ParserBox等)
        if let Ok(path) = std::env::var("RUST_MIR_DUMP_PATH") {
            if let Ok(mut f) = std::fs::File::create(&path) {
                let p = crate::mir::MirPrinter::new();
                let _ = std::io::Write::write_all(&mut f, p.print_module(&module_vm).as_bytes());
                eprintln!("[vm-fallback] MIR dumped to: {}", path);
            }
        }
        // Existing: NYASH_VM_DUMP_MIR dumps to stderr
        if crate::config::env::env_bool("NYASH_VM_DUMP_MIR") {
            let p = crate::mir::MirPrinter::new();
            eprintln!("{}", p.print_module(&module_vm));