hv1: early-exit at main (no plugin init); tokenizer: Stage-3 single-quote + full escapes (\/ \b \f \' \r fix); builder: route BinOp via SSOT emit_binop_to_dst; hv1 verify canary route (builder→Core); docs: phase-20.39 updates

This commit is contained in:
nyash-codex
2025-11-04 20:46:43 +09:00
parent 31ce798341
commit 44a5158a14
53 changed files with 2237 additions and 179 deletions

View File

@ -81,6 +81,29 @@ box ParserBox {
read_ident2(src, i) { return ParserIdentScanBox.scan_ident(src, i) }
read_string_lit(src, i) {
local q0 = src.substring(i, i + 1)
// Check for single quote (Stage-3 only)
if q0 == "'" {
if me.stage3_enabled() == 1 {
// Single-quote string in Stage-3
local pair = ParserStringScanBox.scan_with_quote(src, i, "'")
local at = pair.lastIndexOf("@")
local content = pair.substring(0, at)
local pos = 0
if at >= 0 { pos = me.to_int(pair.substring(at+1, pair.length())) }
else { pos = i }
me.gpos_set(pos)
return content
} else {
// Single-quote not allowed, degrade gracefully
// Return empty string and advance 1 char
me.gpos_set(i + 1)
return ""
}
}
// Double-quote string (existing path)
local pair = ParserStringScanBox.scan(src, i)
local at = pair.lastIndexOf("@")
local content = pair.substring(0, at)

View File

@ -7,11 +7,12 @@
using lang.compiler.parser.scan.parser_common_utils_box as ParserCommonUtilsBox
static box ParserStringScanBox {
scan(src, i) {
// Generic scanner with quote abstraction (quote is "\"" or "'")
scan_with_quote(src, i, quote) {
if src == null { return "@" + ParserCommonUtilsBox.i2s(i) }
local n = src.length()
local j = i
if j >= n || src.substring(j, j+1) != "\"" { return "@" + ParserCommonUtilsBox.i2s(i) }
if j >= n || src.substring(j, j+1) != quote { return "@" + ParserCommonUtilsBox.i2s(i) }
j = j + 1
local out = ""
local guard = 0
@ -19,25 +20,58 @@ static box ParserStringScanBox {
loop(j < n) {
if guard > max { break } else { guard = guard + 1 }
local ch = src.substring(j, j+1)
if ch == "\"" {
// End of string: found matching quote
if ch == quote {
j = j + 1
return out + "@" + ParserCommonUtilsBox.i2s(j)
}
// Escape sequence
if ch == "\\" && j + 1 < n {
local nx = src.substring(j+1, j+2)
if nx == "\"" { out = out + "\"" j = j + 2 }
else {
if nx == "\\" { out = out + "\\" j = j + 2 } else {
if nx == "n" { out = out + "\n" j = j + 2 } else {
if nx == "r" { out = out + "\n" j = j + 2 } else {
if nx == "t" { out = out + "\t" j = j + 2 } else {
if nx == "u" && j + 5 < n { out = out + src.substring(j, j+6) j = j + 6 }
else { out = out + nx j = j + 2 }
}
}
}
}
}
// Decode escape
if nx == "\\" {
out = out + "\\"
j = j + 2
} else { if nx == "\"" {
out = out + "\""
j = j + 2
} else { if nx == "'" {
out = out + "'"
j = j + 2
} else { if nx == "/" {
out = out + "/"
j = j + 2
} else { if nx == "b" {
// Backspace (0x08) - for MVP, skip (empty string)
out = out + ""
j = j + 2
} else { if nx == "f" {
// Form feed (0x0C) - for MVP, skip (empty string)
out = out + ""
j = j + 2
} else { if nx == "n" {
out = out + "\n"
j = j + 2
} else { if nx == "r" {
// FIX: \r should be CR (0x0D), not LF (0x0A)
// Keep as "\r" literal for MVP
out = out + "\r"
j = j + 2
} else { if nx == "t" {
out = out + "\t"
j = j + 2
} else { if nx == "u" && j + 5 < n {
// \uXXXX: MVP - concatenate as-is (6 chars)
out = out + src.substring(j, j+6)
j = j + 6
} else {
// Unknown escape: tolerate (keep backslash + char)
out = out + "\\" + nx
j = j + 2
} } } } } } } } } }
} else {
out = out + ch
j = j + 1
@ -46,5 +80,10 @@ static box ParserStringScanBox {
// if unterminated, return what we have and the last pos to avoid infinite loops
return out + "@" + ParserCommonUtilsBox.i2s(j)
}
// Existing: backward-compatible wrapper
scan(src, i) {
return me.scan_with_quote(src, i, "\"")
}
}