fix(parser): Stage-1 CLI infinite loop - MirBuilder-friendly refactoring (Part 1/3)

**Problem**: Stage-1 CLI hits VM step budget (infinite loops) in multiple functions
- ParserBox.parse_program2: bb2853→bb2854 loop
- StringHelpers.skip_ws: bb488→bb491 loop

**Root Cause**: MirBuilder bug with `loop(cont==1)` + nested `if-else` pattern
- PHI instructions with self-references
- Loop exit blocks jumping back to header instead of continuation

**Solution**: Refactor to MirBuilder-friendly pattern
```hako
// Before (causes infinite loop):
loop(cont == 1) {
  if guard > max { return j } else { guard++ }
  if condition { action } else { cont = 0 }
}

// After (MirBuilder-friendly):
loop(j < n) {
  if condition { j++; continue }
  break
}
```

**Changes**:
1. parser_box.hako: parse_program2 main loop refactored
   - 7 sections: ws→EOF/guard→parse_stmt2→progress guard→trace→semicolons→emit
   - Unconditional parse_stmt2 execution (no nested if-else)
   - Explicit `break` for loop exits

2. string_helpers.hako: StringHelpers.skip_ws simplified
   - Removed `cont` flag and guard counter
   - Direct `loop(j < n)` with `continue/break`

3. vm.rs, vm_fallback.rs: RUST_MIR_DUMP_PATH support
   - Enables offline MIR analysis for debugging

**Progress**:
-  parse_program2: infinite loop fixed
-  StringHelpers.skip_ws: infinite loop fixed
-  ParserIdentScanBox.scan_ident: next target (bb985 loop)

**Testing**:
- MIR dumps generated successfully (116K+ lines)
- ws_init loop completes
- parse_program2 progresses further
- Still hits infinite loop in scan_ident (Part 2 needed)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-11-24 18:19:31 +09:00
parent 6885235145
commit c6c5653ae0
4 changed files with 97 additions and 116 deletions

View File

@ -351,17 +351,20 @@ box ParserBox {
} }
loop(cont_prog == 1) { loop(cont_prog == 1) {
// ✅ TRACE: main_prog loop progress (Task先生推奨トレースポイント) // ===== 1) WS SKIP (inline, simple structure) =====
local _loop_info = "" loop(i < n) {
if trace == 1 && (guard_prog < 5 || (guard_prog % 10) == 0) { local ch_ws = src.substring(i, i + 1)
_loop_info = "[parse_program2:main] count=" + ("" + guard_prog) + if ch_ws == " " || ch_ws == "\n" || ch_ws == "\r" || ch_ws == "\t" {
" i=" + ("" + i) + " n=" + ("" + n) i = i + 1
if i < n { continue
local _end = i + 40
if _end > n { _end = n }
_loop_info = _loop_info + " ctx=\"" + src.substring(i, _end) + "\""
} }
print(_loop_info) break
}
// ===== 2) EOF/GUARD CHECK (early exit) =====
if i >= n {
cont_prog = 0
break
} }
if max_prog > 0 { if max_prog > 0 {
@ -371,117 +374,78 @@ box ParserBox {
print("[parser/program2:guard] max iteration reached at pos=" + ("" + i)) print("[parser/program2:guard] max iteration reached at pos=" + ("" + i))
} }
cont_prog = 0 cont_prog = 0
break
} }
} }
// Fail-fast: EOF まで進んでいれば即座にループを抜ける(無限走行防止)。
if i >= n { // ===== 3) PARSE_STMT2 + GPOS_GET (unconditional execution) =====
cont_prog = 0 local start_i = i
break local s = me.parse_stmt2(src, i)
i = me.gpos_get()
// ===== 4) PROGRESS GUARD (ensure i always advances) =====
if i <= start_i {
if trace == 1 {
print("[parser/trace:program2] progress-guard bump i from " + ("" + start_i) + " to " + ("" + (start_i + 1)))
}
i = start_i + 1
if i > n { i = n }
me.gpos_set(i)
} }
// ===== 5) TRACE (simplified, at end) =====
if trace == 1 { if trace == 1 {
local kind = "Stmt" print("[parser/trace:program2] i=" + ("" + i) + " stmt_len=" + ("" + s.length()) + " guard=" + ("" + guard_prog))
if i >= n { }
kind = "EOF"
// ===== 6) CONSUME OPTIONAL SEMICOLONS =====
local done_semi = 0
local guard_semi = 0
local max_semi = 100000
loop(done_semi == 0) {
if guard_semi > max_semi {
done_semi = 1
} else { } else {
if me.starts_with(src, i, "static box") == 1 { kind = "StaticBox" } guard_semi = guard_semi + 1
else {
if me.starts_with(src, i, "box ") == 1 { kind = "BoxDecl" }
else {
if me.starts_with(src, i, "method ") == 1 { kind = "Method" }
}
}
}
print("[parser/trace:program2] pos=" + ("" + i) + " kind=" + kind + " stage3=" + ("" + me.stage3))
// Dev-only: small preview of remaining source to debug top-level progress
{
local head = ""
if i < n {
local end = i + 40
if end > n { end = n }
head = src.substring(i, end)
}
print("[parser/trace:program2] head=\"" + head + "\"")
}
}
// Inline skip_ws instead of calling me.skip_ws(src, i)
if i < n {
local ws_cont_1 = 1
loop(ws_cont_1 == 1) {
if i < n {
local ch1 = src.substring(i, i + 1)
if ch1 == " " || ch1 == "\n" || ch1 == "\r" || ch1 == "\t" { i = i + 1 }
else { ws_cont_1 = 0 }
} else { ws_cont_1 = 0 }
}
}
if i >= src.length() {
cont_prog = 0
} else {
local start_i = i
if trace == 1 {
print("[parser/trace:program2] before_stmt i=" + ("" + i))
}
local s = me.parse_stmt2(src, i)
i = me.gpos_get()
if trace == 1 {
print("[parser/trace:program2] after_stmt i=" + ("" + i) + " stmt_len=" + ("" + s.length()))
} }
// Progress guard local before_semi = i
if i <= start_i {
if trace == 1 {
print("[parser/trace:program2] progress-guard bump i from " + ("" + start_i) + " to " + ("" + (start_i + 1)))
}
if i < src.length() { i = i + 1 }
else { i = src.length() }
me.gpos_set(i)
}
// consume optional semicolons // Inline ws skip
local done2 = 0 loop(i < n) {
local guard2 = 0 local ch_semi = src.substring(i, i + 1)
local max2 = 100000 if ch_semi == " " || ch_semi == "\n" || ch_semi == "\r" || ch_semi == "\t" {
loop(done2 == 0) {
if guard2 > max2 { done2 = 1 }
else { guard2 = guard2 + 1 }
local before2 = i
// Inline skip_ws instead of calling me.skip_ws(src, i)
if i < n {
local ws_cont_2 = 1
loop(ws_cont_2 == 1) {
if i < n {
local ch2 = src.substring(i, i + 1)
if ch2 == " " || ch2 == "\n" || ch2 == "\r" || ch2 == "\t" { i = i + 1 }
else { ws_cont_2 = 0 }
} else { ws_cont_2 = 0 }
}
}
if i < src.length() && src.substring(i, i+1) == ";" {
i = i + 1 i = i + 1
} else { continue
done2 = 1
} }
break
if i == before2 { done2 = 1 }
} }
if s.length() > 0 { if i < n && src.substring(i, i+1) == ";" {
if first == 1 { i = i + 1
if trace == 1 { } else {
print("[parser/trace:program2] emit-first stmt_len=" + ("" + s.length())) done_semi = 1
} }
body = body + s
first = 0 if i == before_semi {
} else { done_semi = 1
if trace == 1 { }
print("[parser/trace:program2] emit stmt_len=" + ("" + s.length())) }
}
body = body + "," + s // ===== 7) EMIT STATEMENT =====
if s.length() > 0 {
if first == 1 {
if trace == 1 {
print("[parser/trace:program2] emit-first stmt_len=" + ("" + s.length()))
} }
body = body + s
first = 0
} else {
if trace == 1 {
print("[parser/trace:program2] emit stmt_len=" + ("" + s.length()))
}
body = body + "," + s
} }
} }
} }

View File

@ -172,14 +172,13 @@ static box StringHelpers {
local s = "" + src local s = "" + src
local n = s.length() local n = s.length()
local j = i local j = i
local cont = 1 // Simplified loop structure for MirBuilder compatibility
local guard = 0 loop(j < n) {
local max = 100000 if me.is_space(s.substring(j, j+1)) {
loop(cont == 1) { j = j + 1
if guard > max { return j } else { guard = guard + 1 } continue
if j < n { }
if me.is_space(s.substring(j, j+1)) { j = j + 1 } else { cont = 0 } break
} else { cont = 0 }
} }
return j return j
} }

View File

@ -431,6 +431,15 @@ impl NyashRunner {
} }
// Optional: dump MIR for diagnostics // Optional: dump MIR for diagnostics
// Phase 25.1: File dump for offline analysis (ParserBox等)
if let Ok(path) = std::env::var("RUST_MIR_DUMP_PATH") {
if let Ok(mut f) = std::fs::File::create(&path) {
let p = crate::mir::MirPrinter::new();
let _ = std::io::Write::write_all(&mut f, p.print_module(&module_vm).as_bytes());
eprintln!("[vm] MIR dumped to: {}", path);
}
}
// Existing: NYASH_VM_DUMP_MIR dumps to stderr
if crate::config::env::env_bool("NYASH_VM_DUMP_MIR") { if crate::config::env::env_bool("NYASH_VM_DUMP_MIR") {
let p = crate::mir::MirPrinter::new(); let p = crate::mir::MirPrinter::new();
eprintln!("{}", p.print_module(&module_vm)); eprintln!("{}", p.print_module(&module_vm));

View File

@ -294,6 +294,15 @@ impl NyashRunner {
} }
// Optional: dump MIR for diagnostics (parity with vm path) // Optional: dump MIR for diagnostics (parity with vm path)
// Phase 25.1: File dump for offline analysis (ParserBox等)
if let Ok(path) = std::env::var("RUST_MIR_DUMP_PATH") {
if let Ok(mut f) = std::fs::File::create(&path) {
let p = crate::mir::MirPrinter::new();
let _ = std::io::Write::write_all(&mut f, p.print_module(&module_vm).as_bytes());
eprintln!("[vm-fallback] MIR dumped to: {}", path);
}
}
// Existing: NYASH_VM_DUMP_MIR dumps to stderr
if crate::config::env::env_bool("NYASH_VM_DUMP_MIR") { if crate::config::env::env_bool("NYASH_VM_DUMP_MIR") {
let p = crate::mir::MirPrinter::new(); let p = crate::mir::MirPrinter::new();
eprintln!("{}", p.print_module(&module_vm)); eprintln!("{}", p.print_module(&module_vm));