From 844e53d96c491f917e2011847f15c4db22f196cb Mon Sep 17 00:00:00 2001 From: nyash-codex Date: Tue, 25 Nov 2025 00:16:29 +0900 Subject: [PATCH] fix(parser): Part 5 - eliminate infinite loops in parse_string2 and scan_with_quote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: Nested escape sequence processing in string parsing functions caused infinite loops due to missing continue statements, allowing fallthrough to the else block which corrupted the position counter. Fixed functions: - parser_expr_box.hako::parse_string2 (lines 22-83) - parser_string_scan_box.hako::scan_with_quote (lines 11-101) Changes: - Flattened nested 'else { if ... }' chains to flat 'if' statements - Added explicit 'continue' after each escape sequence processing - Prevents fallthrough to position increment code Verification: ✅ NYASH_USE_STAGE1_CLI=1 STAGE1_EMIT_PROGRAM_JSON=1 HAKO_VM_MAX_STEPS=2000000 apps/tests/minimal_ssa_skip_ws.hako ✅ No budget exceeded ✅ Program JSON v0 output to stdout ✅ Exit code: 0 MirBuilder-friendly pattern applied consistently with Parts 1-4. Stage-1 CLI infinite loop elimination COMPLETE! --- .../compiler/parser/expr/parser_expr_box.hako | 46 ++++++++++++---- .../parser/scan/parser_string_scan_box.hako | 53 +++++++++++++------ 2 files changed, 72 insertions(+), 27 deletions(-) diff --git a/lang/src/compiler/parser/expr/parser_expr_box.hako b/lang/src/compiler/parser/expr/parser_expr_box.hako index 8e5a67d8..508d11e2 100644 --- a/lang/src/compiler/parser/expr/parser_expr_box.hako +++ b/lang/src/compiler/parser/expr/parser_expr_box.hako @@ -39,17 +39,43 @@ static box ParserExprBox { if ch == "\\" && j + 1 < n { local nx = src.substring(j+1, j+2) - if nx == "\"" { out = out + "\"" j = j + 2 } - else { if nx == "\\" { out = out + "\\" j = j + 2 } - else { if nx == "n" { out = out + "\n" j = j + 2 } - else { if nx == "r" { out = out + "\r" j = j + 2 } - else { if nx == "t" { out = out + "\t" j = j + 2 } - else { if nx == "u" && j + 5 < n { out = out + src.substring(j, j+6) j = j + 6 } - else { out = out + nx j = j + 2 } } } } } } - } else { - out = out + ch - j = j + 1 + if nx == "\"" { + out = out + "\"" + j = j + 2 + continue + } + if nx == "\\" { + out = out + "\\" + j = j + 2 + continue + } + if nx == "n" { + out = out + "\n" + j = j + 2 + continue + } + if nx == "r" { + out = out + "\r" + j = j + 2 + continue + } + if nx == "t" { + out = out + "\t" + j = j + 2 + continue + } + if nx == "u" && j + 5 < n { + out = out + src.substring(j, j+6) + j = j + 6 + continue + } + out = out + nx + j = j + 2 + continue } + + out = out + ch + j = j + 1 } ctx.gpos_set(j) diff --git a/lang/src/compiler/parser/scan/parser_string_scan_box.hako b/lang/src/compiler/parser/scan/parser_string_scan_box.hako index 80b60024..748112b7 100644 --- a/lang/src/compiler/parser/scan/parser_string_scan_box.hako +++ b/lang/src/compiler/parser/scan/parser_string_scan_box.hako @@ -35,47 +35,66 @@ static box ParserStringScanBox { if nx == "\\" { out = out + "\\" j = j + 2 - } else { if nx == "\"" { + continue + } + if nx == "\"" { out = out + "\"" j = j + 2 - } else { if nx == "'" { + continue + } + if nx == "'" { out = out + "'" j = j + 2 - } else { if nx == "/" { + continue + } + if nx == "/" { out = out + "/" j = j + 2 - } else { if nx == "b" { + continue + } + if nx == "b" { // Backspace (0x08) - for MVP, skip (empty string) out = out + "" j = j + 2 - } else { if nx == "f" { + continue + } + if nx == "f" { // Form feed (0x0C) - for MVP, skip (empty string) out = out + "" j = j + 2 - } else { if nx == "n" { + continue + } + if nx == "n" { out = out + "\n" j = j + 2 - } else { if nx == "r" { + continue + } + if nx == "r" { // FIX: \r should be CR (0x0D), not LF (0x0A) // Keep as "\r" literal for MVP out = out + "\r" j = j + 2 - } else { if nx == "t" { + continue + } + if nx == "t" { out = out + "\t" j = j + 2 - } else { if nx == "u" && j + 5 < n { + continue + } + if nx == "u" && j + 5 < n { // \uXXXX: MVP - concatenate as-is (6 chars) out = out + src.substring(j, j+6) j = j + 6 - } else { - // Unknown escape: tolerate (keep backslash + char) - out = out + "\\" + nx - j = j + 2 - } } } } } } } } } } - } else { - out = out + ch - j = j + 1 + continue + } + // Unknown escape: tolerate (keep backslash + char) + out = out + "\\" + nx + j = j + 2 + continue } + + out = out + ch + j = j + 1 } // if unterminated, return what we have and the last pos to avoid infinite loops return out + "@" + ParserCommonUtilsBox.i2s(j)