Files
hakorune/apps/selfhost-compiler/boxes/parser_box.nyash
Selfhosting Dev 2720884a20 bridge/json_v0: split expr lowering; add Ternary/Peek lowering + AST; selfhost Peek JSON emit; add selfhost Peek smoke; warning cleanup in lowering/optimizer/verification
- Split expr lowering into ; route calls from stmt lowering
- Implement ternary/peek lowering (MIR13 PHI-off=Copy, PHI-on=Phi)
- Extend JSON v0 AST (ExprV0::{Ternary,Peek}, PeekArmV0)
- Selfhost parser_box: emit Peek JSON; add Stage-2 'Peek basic' smoke
- Reduce warnings: remove unused imports/vars in several modules
- current_task: update plan for legacy VM/Interpreter offboarding
2025-09-17 11:45:57 +09:00

908 lines
37 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ParserBox — Stage1 JSON v0 generatorextracted, simplified for Rust parser
box ParserBox {
gpos
usings_json
stage3
birth() { me.gpos = 0 me.usings_json = "[]" me.stage3 = 0 return 0 }
stage3_enable(flag) {
if flag == null { flag = 0 }
if flag == 0 { me.stage3 = 0 } else { me.stage3 = 1 }
return 0
}
stage3_enabled() {
if me.stage3 == 1 { return 1 }
return 0
}
esc_json(s) {
local out = ""
local i = 0
local n = s.length()
loop(i < n) {
local ch = s.substring(i, i+1)
if ch == "\\" { out = out + "\\\\" } else { if ch == "\"" { out = out + "\\\"" } else { out = out + ch } }
i = i + 1
}
return out
}
is_digit(ch) { return ch >= "0" && ch <= "9" }
is_space(ch) { return ch == " " || ch == "\t" || ch == "\n" || ch == "\r" }
// simple alpha/underscore check for identifiers
is_alpha(ch) { return (ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_" }
gpos_set(i) { me.gpos = i return 0 }
gpos_get() { return me.gpos }
// lightweight string helpers
starts_with(src, i, pat) {
local n = src.length()
local m = pat.length()
if i + m > n { return 0 }
local k = 0
loop(k < m) {
if src.substring(i + k, i + k + 1) != pat.substring(k, k + 1) { return 0 }
k = k + 1
}
return 1
}
index_of(src, i, pat) {
local n = src.length()
local m = pat.length()
if m == 0 { return i }
local j = i
loop(j + m <= n) {
if me.starts_with(src, j, pat) { return j }
j = j + 1
}
return -1
}
trim(s) {
local i = 0
local n = s.length()
loop(i < n && (s.substring(i,i+1) == " " || s.substring(i,i+1) == "\t")) { i = i + 1 }
local j = n
loop(j > i && (s.substring(j-1,j) == " " || s.substring(j-1,j) == "\t" || s.substring(j-1,j) == ";")) { j = j - 1 }
return s.substring(i, j)
}
// keyword match at position i with word-boundary (next char not [A-Za-z0-9_])
starts_with_kw(src, i, kw) {
if me.starts_with(src, i, kw) == 0 { return 0 }
local n = src.length()
local j = i + kw.length()
if j >= n { return 1 }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) || me.is_digit(ch) { return 0 }
return 1
}
// integer to string (uses string concat coercion)
i2s(v) { return "" + v }
// Read identifier starting at i: [A-Za-z_][A-Za-z0-9_]*; returns "name@pos"
read_ident2(src, i) {
local n = src.length()
local j = i
if j >= n { return "@" + me.i2s(i) }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) == 0 { return "@" + me.i2s(i) }
j = j + 1
loop(j < n) {
local c = src.substring(j, j+1)
if me.is_alpha(c) || me.is_digit(c) { j = j + 1 } else { break }
}
local name = src.substring(i, j)
return name + "@" + me.i2s(j)
}
// Read string literal at i (i points to '"'); returns raw content (no quotes), updates gpos
read_string_lit(src, i) {
local n = src.length()
local j = i
if j >= n || src.substring(j, j+1) != "\"" { me.gpos_set(i) return "" }
j = j + 1
local out = ""
local guard = 0
local max = 200000
loop(j < n) {
if guard > max { break } else { guard = guard + 1 }
local ch = src.substring(j, j+1)
if ch == "\"" { j = j + 1 me.gpos_set(j) return out }
if ch == "\\" && j + 1 < n {
local nx = src.substring(j+1, j+2)
if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } }
j = j + 2
} else { out = out + ch j = j + 1 }
}
me.gpos_set(j)
return out
}
// Append a using entry into usings_json (no-op acceptance path)
add_using(kind, target, alias) {
// kind: "path" or "ns"; target: path or namespace; alias: nullable
local cur = me.usings_json
if cur == null || cur.length() == 0 { cur = "[]" }
// Build entry
local name = ""
local path = null
if kind == "path" {
path = target
if alias != null { name = alias } else {
local p = target
// basename
local idx = -1
local t = 0
loop(t < p.length()) { if p.substring(t,t+1) == "/" { idx = t } t = t + 1 }
if idx >= 0 { p = p.substring(idx+1, p.length()) }
if p.length() > 6 && me.starts_with(p, p.length()-6, ".nyash") == 1 { p = p.substring(0, p.length()-6) }
name = p
}
} else {
name = target
if alias != null { name = alias }
}
local entry = "{\"name\":\"" + me.esc_json(name) + "\""
if path != null { entry = entry + ",\"path\":\"" + me.esc_json(path) + "\"" }
entry = entry + "}"
// Insert before closing ']' of array
if cur == "[]" { me.usings_json = "[" + entry + "]" return 0 }
// naive append
local pos = cur.lastIndexOf("]")
if pos < 0 { me.usings_json = "[" + entry + "]" return 0 }
me.usings_json = cur.substring(0, pos) + "," + entry + "]"
return 0
}
// Collect `using` lines into JSON array stored in me.usings_json (no-op acceptance)
extract_usings(src) {
if src == null { me.usings_json = "[]" return 0 }
local n = src.length()
local i = 0
local first = 1
local out = "["
loop(i < n) {
// read a line
local j = i
loop(j < n && src.substring(j, j+1) != "\n") { j = j + 1 }
local line = src.substring(i, j)
// process
local k = 0
loop(k < line.length() && (line.substring(k,k+1) == " " || line.substring(k,k+1) == "\t")) { k = k + 1 }
if me.starts_with(line, k, "using ") == 1 {
local rest = me.trim(line.substring(k + 6, line.length()))
// split on ' as '
local as_pos = me.index_of(rest, 0, " as ")
local target = rest
local alias = null
if as_pos >= 0 {
target = me.trim(rest.substring(0, as_pos))
alias = me.trim(rest.substring(as_pos + 4, rest.length()))
}
// path or namespace
local is_path = 0
if target.length() > 0 {
if me.starts_with(target, 0, "\"") == 1 { is_path = 1 }
if me.starts_with(target, 0, "./") == 1 { is_path = 1 }
if me.starts_with(target, 0, "/") == 1 { is_path = 1 }
if target.length() >= 6 && me.starts_with(target, target.length()-6, ".nyash") == 1 { is_path = 1 }
}
local name = ""
local path = null
if is_path == 1 {
// trim quotes
if me.starts_with(target, 0, "\"") == 1 {
target = target.substring(1, target.length())
if target.length() > 0 && target.substring(target.length()-1, target.length()) == "\"" {
target = target.substring(0, target.length()-1)
}
}
path = target
if alias != null { name = alias } else {
// derive from basename
local p = target
// find last '/'
local idx = -1
local t = 0
loop(t < p.length()) { if p.substring(t,t+1) == "/" { idx = t } t = t + 1 }
if idx >= 0 { p = p.substring(idx+1, p.length()) }
// strip .nyash
if p.length() > 6 && me.starts_with(p, p.length()-6, ".nyash") == 1 { p = p.substring(0, p.length()-6) }
name = p
}
} else {
name = target
}
// append JSON entry
if first == 0 { out = out + "," } else { first = 0 }
out = out + "{\"name\":\"" + me.esc_json(name) + "\""
if path != null { out = out + ",\"path\":\"" + me.esc_json(path) + "\"" }
out = out + "}"
}
i = j + 1
}
out = out + "]"
me.usings_json = out
return 0
}
get_usings_json() { return me.usings_json }
to_int(s) { local n = s.length() if n == 0 { return 0 } local i = 0 local acc = 0 loop(i < n) { local d = s.substring(i, i+1) local dv = 0 if d == "1" { dv = 1 } else { if d == "2" { dv = 2 } else { if d == "3" { dv = 3 } else { if d == "4" { dv = 4 } else { if d == "5" { dv = 5 } else { if d == "6" { dv = 6 } else { if d == "7" { dv = 7 } else { if d == "8" { dv = 8 } else { if d == "9" { dv = 9 } } } } } } } } } acc = acc * 10 + dv i = i + 1 } return acc }
skip_ws(src, i) { if src == null { return i } local n = src.length() local cont = 1 local guard = 0 local max = 100000 loop(cont == 1) { if guard > max { return i } guard = guard + 1 if i < n { if me.is_space(src.substring(i, i+1)) { i = i + 1 } else { cont = 0 } } else { cont = 0 } } return i }
// identifiers/strings not required for Stage1 beyond string literal parse above
// using metadata omitted in Stage1
parse_number2(src, i) { local n = src.length() local j = i local cont = 1 local guard = 0 local max = 100000 loop(cont == 1) { if guard > max { cont = 0 } else { guard = guard + 1 if j < n { if me.is_digit(src.substring(j, j+1)) { j = j + 1 } else { cont = 0 } } else { cont = 0 } } } local s = src.substring(i, j) me.gpos_set(j) return "{\"type\":\"Int\",\"value\":" + s + "}" }
parse_string2(src, i) { local n = src.length() local j = i + 1 local out = "" local guard = 0 local max = 200000 loop(j < n) { if guard > max { break } guard = guard + 1 local ch = src.substring(j, j+1) if ch == "\"" { j = j + 1 me.gpos_set(j) return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}" } if ch == "\\" && j + 1 < n { local nx = src.substring(j+1, j+2) if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } } j = j + 2 } else { out = out + ch j = j + 1 } } me.gpos_set(j) return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}" }
parse_factor2(src, i) {
local j = me.skip_ws(src, i)
if j >= src.length() { me.gpos_set(j) return "{\"type\":\"Int\",\"value\":0}" }
if me.starts_with_kw(src, j, "true") == 1 { me.gpos_set(j + 4) return "{\"type\":\"Bool\",\"value\":true}" }
if me.starts_with_kw(src, j, "false") == 1 { me.gpos_set(j + 5) return "{\"type\":\"Bool\",\"value\":false}" }
if me.starts_with_kw(src, j, "null") == 1 { me.gpos_set(j + 4) return "{\"type\":\"Null\"}" }
// Peek expression: peek <expr> { "label" => <expr>, ..., else => <expr> }
if me.starts_with_kw(src, j, "peek") == 1 {
j = j + 4
j = me.skip_ws(src, j)
// scrutinee expression
local scr = me.parse_expr2(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "{" { j = j + 1 } // enter arms block
j = me.skip_ws(src, j)
local arms_json = "["
local first_arm = 1
local else_json = null
local n = src.length()
local contp = 1
local guardp = 0
local maxp = 400000
loop(contp == 1) {
if guardp > maxp { contp = 0 } else { guardp = guardp + 1 }
j = me.skip_ws(src, j)
if j >= n { contp = 0 } else {
if src.substring(j, j+1) == "}" {
j = j + 1
contp = 0
} else {
// else arm or labeled arm
if me.starts_with_kw(src, j, "else") == 1 {
j = j + 4
j = me.skip_ws(src, j)
if src.substring(j, j+2) == "=>" { j = j + 2 }
j = me.skip_ws(src, j)
// else body may be a block or bare expr
if src.substring(j, j+1) == "{" {
j = j + 1
j = me.skip_ws(src, j)
else_json = me.parse_expr2(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "}" { j = j + 1 }
} else {
else_json = me.parse_expr2(src, j)
j = me.gpos_get()
}
// optional separator/newline tolerated; continue until '}'
} else {
// labeled arm: string literal label
if src.substring(j, j+1) != "\"" {
// degrade safely to avoid infinite loop
j = j + 1
continue
}
local label_raw = me.read_string_lit(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+2) == "=>" { j = j + 2 }
j = me.skip_ws(src, j)
// arm expr: block or bare expr
local expr_json = "{\"type\":\"Int\",\"value\":0}"
if src.substring(j, j+1) == "{" {
j = j + 1
j = me.skip_ws(src, j)
expr_json = me.parse_expr2(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "}" { j = j + 1 }
} else {
expr_json = me.parse_expr2(src, j)
j = me.gpos_get()
}
local arm_json = "{\"label\":\"" + me.esc_json(label_raw) + "\",\"expr\":" + expr_json + "}"
if first_arm == 1 { arms_json = arms_json + arm_json first_arm = 0 } else { arms_json = arms_json + "," + arm_json }
}
}
}
}
arms_json = arms_json + "]"
if else_json == null { else_json = "{\"type\":\"Null\"}" }
me.gpos_set(j)
return "{\"type\":\"Peek\",\"scrutinee\":" + scr + ",\"arms\":" + arms_json + ",\"else\":" + else_json + "}"
}
local ch = src.substring(j, j+1)
// Parenthesized
if ch == "(" {
local inner = me.parse_expr2(src, j + 1)
local k = me.gpos_get()
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
me.gpos_set(k)
return inner
}
// String literal
if ch == "\"" { return me.parse_string2(src, j) }
// Map literal: {"k": v, ...} (string keys only) → Call{name:"map.of", args:[Str(k1), v1, Str(k2), v2, ...]}
if ch == "{" {
local n = src.length()
j = j + 1
local out = "["
local first = 1
local cont = 1
local guard = 0
local max = 400000
loop(cont == 1) {
if guard > max { cont = 0 } else { guard = guard + 1 }
j = me.skip_ws(src, j)
if j >= n { cont = 0 } else {
if src.substring(j, j+1) == "}" { j = j + 1 cont = 0 } else {
// key (string only for Stage-2)
if src.substring(j, j+1) != "\"" {
// degrade by skipping one char to avoid infinite loop
j = j + 1
continue
}
local key_raw = me.read_string_lit(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == ":" { j = j + 1 }
j = me.skip_ws(src, j)
local val_json = me.parse_expr2(src, j)
j = me.gpos_get()
local key_json = "{\"type\":\"Str\",\"value\":\"" + me.esc_json(key_raw) + "\"}"
if first == 1 { out = out + key_json + "," + val_json first = 0 } else { out = out + "," + key_json + "," + val_json }
// optional comma
local before2 = j
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," { j = j + 1 }
// progress guard (in case of malformed input)
if j <= before2 { if j < n { j = j + 1 } else { j = n } }
}
}
}
out = out + "]"
me.gpos_set(j)
return "{\"type\":\"Call\",\"name\":\"map.of\",\"args\":" + out + "}"
}
// Array literal: [e1, e2, ...] → Call{name:"array.of", args:[...]}
if ch == "[" {
local n = src.length()
j = j + 1
local out = "["
local first = 1
local cont = 1
local guard = 0
local max = 400000
loop(cont == 1) {
if guard > max { cont = 0 } else { guard = guard + 1 }
j = me.skip_ws(src, j)
if j >= n { cont = 0 } else {
if src.substring(j, j+1) == "]" { j = j + 1 cont = 0 } else {
local before = j
local ej = me.parse_expr2(src, j)
j = me.gpos_get()
if first == 1 { out = out + ej first = 0 } else { out = out + "," + ej }
// optional comma+whitespace
local before2 = j
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," { j = j + 1 }
// progress guard
if j <= before { if j < n { j = j + 1 } else { j = n } }
}
}
}
out = out + "]"
me.gpos_set(j)
return "{\"type\":\"Call\",\"name\":\"array.of\",\"args\":" + out + "}"
}
// true/false
if me.starts_with_kw(src, j, "true") == 1 { me.gpos_set(j + 4) return "{\"type\":\"Bool\",\"value\":true}" }
if me.starts_with_kw(src, j, "false") == 1 { me.gpos_set(j + 5) return "{\"type\":\"Bool\",\"value\":false}" }
// new Class(args)
if me.starts_with_kw(src, j, "new") == 1 {
local p = me.skip_ws(src, j + 3)
local idp = me.read_ident2(src, p)
local at = idp.lastIndexOf("@")
local cls = idp.substring(0, at)
local k = me.to_int(idp.substring(at+1, idp.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == "(" { k = k + 1 }
local args_and_pos = me.parse_args2(src, k)
local at2 = args_and_pos.lastIndexOf("@")
local args_json = args_and_pos.substring(0, at2)
k = me.to_int(args_and_pos.substring(at2+1, args_and_pos.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
me.gpos_set(k)
return "{\"type\":\"New\",\"class\":\"" + cls + "\",\"args\":" + args_json + "}"
}
// Identifier / Call / Method chain
if me.is_alpha(ch) {
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
local k = me.to_int(idp.substring(at+1, idp.length()))
local node = "{\"type\":\"Var\",\"name\":\"" + name + "\"}"
local cont2 = 1
loop(cont2 == 1) {
k = me.skip_ws(src, k)
local tch = src.substring(k, k+1)
if tch == "(" {
k = k + 1
local args_and_pos = me.parse_args2(src, k)
local at2 = args_and_pos.lastIndexOf("@")
local args_json = args_and_pos.substring(0, at2)
k = me.to_int(args_and_pos.substring(at2+1, args_and_pos.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
node = "{\"type\":\"Call\",\"name\":\"" + name + "\",\"args\":" + args_json + "}"
} else {
if tch == "." {
k = k + 1
k = me.skip_ws(src, k)
local midp = me.read_ident2(src, k)
local at3 = midp.lastIndexOf("@")
local mname = midp.substring(0, at3)
k = me.to_int(midp.substring(at3+1, midp.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == "(" { k = k + 1 }
local args2 = me.parse_args2(src, k)
local at4 = args2.lastIndexOf("@")
local args_json2 = args2.substring(0, at4)
k = me.to_int(args2.substring(at4+1, args2.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
node = "{\"type\":\"Method\",\"recv\":" + node + ",\"method\":\"" + mname + "\",\"args\":" + args_json2 + "}"
} else { cont2 = 0 }
}
}
me.gpos_set(k)
return node
}
// Fallback: number
return me.parse_number2(src, j)
}
// unary minus binds tighter than * /
parse_unary2(src, i) {
local j = me.skip_ws(src, i)
if src.substring(j, j+1) == "-" {
local rhs = me.parse_factor2(src, j + 1)
j = me.gpos_get()
local zero = "{\"type\":\"Int\",\"value\":0}"
me.gpos_set(j)
return "{\"type\":\"Binary\",\"op\":\"-\",\"lhs\":" + zero + ",\"rhs\":" + rhs + "}"
}
return me.parse_factor2(src, j)
}
parse_term2(src, i) { local lhs = me.parse_unary2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) if j >= src.length() { cont = 0 } else { local op = src.substring(j, j+1) if op != "*" && op != "/" { cont = 0 } else { local rhs = me.parse_unary2(src, j+1) j = me.gpos_get() lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } } me.gpos_set(j) return lhs }
parse_sum2(src, i) { local lhs = me.parse_term2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) if j >= src.length() { cont = 0 } else { local op = src.substring(j, j+1) if op != "+" && op != "-" { cont = 0 } else { local rhs = me.parse_term2(src, j+1) j = me.gpos_get() lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } } me.gpos_set(j) return lhs }
parse_compare2(src, i) { local lhs = me.parse_sum2(src, i) local j = me.gpos_get() j = me.skip_ws(src, j) local two = src.substring(j, j+2) local one = src.substring(j, j+1) local op = "" if two == "==" || two == "!=" || two == "<=" || two == ">=" { op = two j = j + 2 } else { if one == "<" || one == ">" { op = one j = j + 1 } } if op == "" { me.gpos_set(j) return lhs } local rhs = me.parse_sum2(src, j) j = me.gpos_get() me.gpos_set(j) return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" }
parse_expr2(src, i) {
local lhs = me.parse_compare2(src, i)
local j = me.gpos_get()
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
local two = src.substring(j, j+2)
if two != "&&" && two != "||" { cont = 0 } else {
local rhs = me.parse_compare2(src, j+2)
j = me.gpos_get()
lhs = "{\"type\":\"Logical\",\"op\":\"" + two + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "?" {
j = j + 1
j = me.skip_ws(src, j)
local then_expr = me.parse_expr2(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == ":" { j = j + 1 }
j = me.skip_ws(src, j)
local else_expr = me.parse_expr2(src, j)
j = me.gpos_get()
if else_expr.length() == 0 { else_expr = "{\"type\":\"Int\",\"value\":0}" }
me.gpos_set(j)
return "{\"type\":\"Ternary\",\"cond\":" + lhs + ",\"then\":" + then_expr + ",\"else\":" + else_expr + "}"
}
me.gpos_set(j)
return lhs
}
parse_args2(src, i) {
local j = me.skip_ws(src, i)
local n = src.length()
local out = "["
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == ")" { return "[]@" + me.i2s(j) }
// first argument
local e = me.parse_expr2(src, j)
j = me.gpos_get()
out = out + e
// subsequent arguments with guard
local cont_args = 1
local guard = 0
local max = 100000
loop(cont_args == 1) {
if guard > max { cont_args = 0 } else { guard = guard + 1 }
local before = j
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," {
j = j + 1
j = me.skip_ws(src, j)
e = me.parse_expr2(src, j)
j = me.gpos_get()
out = out + "," + e
} else { cont_args = 0 }
if j == before { cont_args = 0 }
}
out = out + "]"
return out + "@" + me.i2s(j)
}
parse_stmt2(src, i) {
local j = me.skip_ws(src, i)
local stmt_start = j
if me.starts_with_kw(src, j, "using") == 1 {
j = j + 5
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "\"" {
local p = me.read_string_lit(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
local alias = null
if me.starts_with_kw(src, j, "as") == 1 { j = j + 2 j = me.skip_ws(src, j) local idp = me.read_ident2(src, j) local at = idp.lastIndexOf("@") alias = idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) }
me.add_using("path", p, alias)
} else {
if me.is_alpha(src.substring(j, j+1)) {
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
j = me.to_int(idp.substring(at+1, idp.length()))
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "." { j = j + 1 j = me.skip_ws(src, j) idp = me.read_ident2(src, j) at = idp.lastIndexOf("@") name = name + "." + idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) } else { cont = 0 }
}
j = me.skip_ws(src, j)
local alias2 = null
if me.starts_with_kw(src, j, "as") == 1 { j = j + 2 j = me.skip_ws(src, j) idp = me.read_ident2(src, j) at = idp.lastIndexOf("@") alias2 = idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) }
me.add_using("ns", name, alias2)
}
}
// ensure progress
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return ""
}
// simple assignment: IDENT '=' expr ; → JSON v0 Local{name, expr} (Stage2 uses Local for updates)
if j < src.length() && me.is_alpha(src.substring(j, j+1)) {
local idp0 = me.read_ident2(src, j)
local at0 = idp0.lastIndexOf("@")
if at0 > 0 {
local name0 = idp0.substring(0, at0)
local k0 = me.to_int(idp0.substring(at0+1, idp0.length()))
k0 = me.skip_ws(src, k0)
if k0 < src.length() && src.substring(k0, k0+1) == "=" {
local eq_two = "="
if k0 + 1 < src.length() { eq_two = src.substring(k0, k0+2) }
if eq_two != "==" {
k0 = k0 + 1
k0 = me.skip_ws(src, k0)
local default_local = "{\"type\":\"Int\",\"value\":0}"
local expr_json0 = default_local
local end_pos0 = k0
if k0 < src.length() {
local ahead = src.substring(k0, k0+1)
if ahead != "}" && ahead != ";" {
expr_json0 = me.parse_expr2(src, k0)
end_pos0 = me.gpos_get()
}
}
k0 = end_pos0
if k0 <= stmt_start { if k0 < src.length() { k0 = k0 + 1 } else { k0 = src.length() } }
me.gpos_set(k0)
return "{\"type\":\"Local\",\"name\":\"" + name0 + "\",\"expr\":" + expr_json0 + "}"
}
}
}
}
if me.starts_with_kw(src, j, "return") == 1 {
j = j + 6
j = me.skip_ws(src, j)
local default_ret = "{\"type\":\"Int\",\"value\":0}"
local expr_json_ret = default_ret
local end_pos_ret = j
if j < src.length() {
local ahead_ret = src.substring(j, j+1)
if ahead_ret != "}" && ahead_ret != ";" {
expr_json_ret = me.parse_expr2(src, j)
end_pos_ret = me.gpos_get()
}
}
j = end_pos_ret
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Return\",\"expr\":" + expr_json_ret + "}"
}
if me.starts_with_kw(src, j, "local") == 1 {
j = j + 5
j = me.skip_ws(src, j)
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
j = me.to_int(idp.substring(at+1, idp.length()))
j = me.skip_ws(src, j)
if j < src.length() && src.substring(j, j+1) == "=" { j = j + 1 }
j = me.skip_ws(src, j)
local default_local = "{\"type\":\"Int\",\"value\":0}"
local expr_json_local = default_local
local end_pos_local = j
if j < src.length() {
local ahead_local = src.substring(j, j+1)
if ahead_local != "}" && ahead_local != ";" {
expr_json_local = me.parse_expr2(src, j)
end_pos_local = me.gpos_get()
}
}
j = end_pos_local
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Local\",\"name\":\"" + name + "\",\"expr\":" + expr_json_local + "}"
}
if me.starts_with_kw(src, j, "if") == 1 {
j = j + 2
j = me.skip_ws(src, j)
local paren = 0
if src.substring(j, j+1) == "(" { paren = 1 j = j + 1 }
local cond = me.parse_expr2(src, j)
j = me.gpos_get()
if paren == 1 { j = me.skip_ws(src, j) if src.substring(j, j+1) == ")" { j = j + 1 } }
j = me.skip_ws(src, j)
local then_res = me.parse_block2(src, j)
local at1 = then_res.lastIndexOf("@")
local then_json = then_res.substring(0, at1)
j = me.to_int(then_res.substring(at1+1, then_res.length()))
j = me.skip_ws(src, j)
local else_json = null
if me.starts_with_kw(src, j, "else") == 1 { j = j + 4 j = me.skip_ws(src, j) local else_res = me.parse_block2(src, j) local at2 = else_res.lastIndexOf("@") else_json = else_res.substring(0, at2) j = me.to_int(else_res.substring(at2+1, else_res.length())) }
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
if else_json == null { return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + "}" } else { return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + ",\"else\":" + else_json + "}" }
}
if me.starts_with_kw(src, j, "loop") == 1 {
j = j + 4
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "(" { j = j + 1 }
local cond = me.parse_expr2(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == ")" { j = j + 1 }
j = me.skip_ws(src, j)
local body_res = me.parse_block2(src, j)
local at3 = body_res.lastIndexOf("@")
local body_json = body_res.substring(0, at3)
j = me.to_int(body_res.substring(at3+1, body_res.length()))
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Loop\",\"cond\":" + cond + ",\"body\":" + body_json + "}"
}
// Stage-3 acceptance (syntax only): break / continue → no-op expression
if me.starts_with_kw(src, j, "break") == 1 {
j = j + 5
if me.stage3_enabled() == 1 {
j = me.skip_ws(src, j)
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Break\"}"
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
}
if me.starts_with_kw(src, j, "continue") == 1 {
j = j + 8
if me.stage3_enabled() == 1 {
j = me.skip_ws(src, j)
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Continue\"}"
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
}
// Stage-3 acceptance: throw expr → degrade to Expr(expr)
if me.starts_with_kw(src, j, "throw") == 1 {
j = j + 5
j = me.skip_ws(src, j)
local e_throw = me.parse_expr2(src, j)
j = me.gpos_get()
if me.stage3_enabled() == 1 {
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Throw\",\"expr\":" + e_throw + "}"
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":" + e_throw + "}"
}
// Stage-3 acceptance: try { ... } (catch ...)* (finally { ... })? → degrade to no-op (syntax only)
if me.starts_with_kw(src, j, "try") == 1 {
j = j + 3
j = me.skip_ws(src, j)
// parse try block
local try_res = me.parse_block2(src, j)
local at_t = try_res.lastIndexOf("@")
local try_json = try_res.substring(0, at_t)
j = me.to_int(try_res.substring(at_t+1, try_res.length()))
local catches_json = "["
local catch_first = 1
// zero or more catch
local guard_ct = 0
local max_ct = 100
local cont_ct = 1
loop(cont_ct == 1) {
if guard_ct > max_ct { cont_ct = 0 } else { guard_ct = guard_ct + 1 }
j = me.skip_ws(src, j)
if me.starts_with_kw(src, j, "catch") == 1 {
j = j + 5
j = me.skip_ws(src, j)
local catch_type = null
local catch_param = null
if src.substring(j, j+1) == "(" { j = j + 1 j = me.skip_ws(src, j)
// optional type + name
if me.is_alpha(src.substring(j, j+1)) {
local id1 = me.read_ident2(src, j)
local at1 = id1.lastIndexOf("@")
catch_type = id1.substring(0, at1)
j = me.to_int(id1.substring(at1+1, id1.length()))
j = me.skip_ws(src, j)
}
if me.is_alpha(src.substring(j, j+1)) {
local id2 = me.read_ident2(src, j)
local at2 = id2.lastIndexOf("@")
catch_param = id2.substring(0, at2)
j = me.to_int(id2.substring(at2+1, id2.length()))
j = me.skip_ws(src, j)
}
if src.substring(j, j+1) == ")" { j = j + 1 }
}
j = me.skip_ws(src, j)
// catch body
local c_res = me.parse_block2(src, j)
local atc = c_res.lastIndexOf("@")
j = me.to_int(c_res.substring(atc+1, c_res.length()))
if me.stage3_enabled() == 1 {
local entry = "{"
local wrote = 0
if catch_param != null && catch_param.length() > 0 { entry = entry + "\"param\":\"" + me.esc_json(catch_param) + "\"" wrote = 1 }
if catch_type != null && catch_type.length() > 0 { if wrote == 1 { entry = entry + "," } entry = entry + "\"typeHint\":\"" + me.esc_json(catch_type) + "\"" wrote = 1 }
local body_json = c_res.substring(0, atc)
if wrote == 1 { entry = entry + "," }
entry = entry + "\"body\":" + body_json + "}"
if catch_first == 0 { catches_json = catches_json + "," + entry } else { catches_json = catches_json + entry catch_first = 0 }
}
} else { cont_ct = 0 }
}
catches_json = catches_json + "]"
// optional finally
j = me.skip_ws(src, j)
local finally_json = null
if me.starts_with_kw(src, j, "finally") == 1 {
j = j + 7
j = me.skip_ws(src, j)
local f_res = me.parse_block2(src, j)
local atf = f_res.lastIndexOf("@")
j = me.to_int(f_res.substring(atf+1, f_res.length()))
finally_json = f_res.substring(0, atf)
}
if me.stage3_enabled() == 1 {
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
local node = "{\"type\":\"Try\",\"try\":" + try_json + ",\"catches\":" + catches_json
if finally_json != null { node = node + ",\"finally\":" + finally_json }
node = node + "}"
return node
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
}
// Fallback: expression or unknown token — ensure progress even on malformed input
local expr_start = j
local e = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= expr_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":" + e + "}"
}
parse_block2(src, i) {
local j = me.skip_ws(src, i)
if src.substring(j, j+1) != "{" { return "[]@" + me.i2s(j) }
j = j + 1
local body = "["
local first = 1
local cont_block = 1
loop(cont_block == 1) {
j = me.skip_ws(src, j)
if j >= src.length() { cont_block = 0 } else {
if src.substring(j, j+1) == "}" { j = j + 1 cont_block = 0 } else {
local start_j = j
local s = me.parse_stmt2(src, j)
j = me.gpos_get()
// Progress guard: ensure forward movement to avoid infinite loop on malformed input
if j <= start_j {
if j < src.length() { j = j + 1 } else { j = src.length() }
me.gpos_set(j)
}
// consume optional semicolons (ASI minimal)
local done = 0
local guard = 0
local max = 100000
loop(done == 0) {
if guard > max { done = 1 } else { guard = guard + 1 }
local before = j
j = me.skip_ws(src, j)
if j < src.length() && src.substring(j, j+1) == ";" { j = j + 1 } else { done = 1 }
if j == before { done = 1 }
}
if s.length() > 0 { if first == 1 { body = body + s first = 0 } else { body = body + "," + s } }
}
}
}
body = body + "]"
return body + "@" + me.i2s(j)
}
parse_program2(src) {
local i = me.skip_ws(src, 0)
local body = "["
local first = 1
local cont_prog = 1
loop(cont_prog == 1) {
i = me.skip_ws(src, i)
if i >= src.length() { cont_prog = 0 } else {
local start_i = i
local s = me.parse_stmt2(src, i)
i = me.gpos_get()
// Progress guard: ensure forward movement to avoid infinite loop on malformed input
if i <= start_i {
if i < src.length() { i = i + 1 } else { i = src.length() }
me.gpos_set(i)
}
// consume optional semicolons between top-level statements
local done2 = 0
local guard2 = 0
local max2 = 100000
loop(done2 == 0) {
if guard2 > max2 { done2 = 1 } else { guard2 = guard2 + 1 }
local before2 = i
i = me.skip_ws(src, i)
if i < src.length() && src.substring(i, i+1) == ";" { i = i + 1 } else { done2 = 1 }
if i == before2 { done2 = 1 }
}
if s.length() > 0 { if first == 1 { body = body + s first = 0 } else { body = body + "," + s } }
}
}
body = body + "]"
return "{\"version\":0,\"kind\":\"Program\",\"body\":" + body + "}"
}
}
static box ParserStub { main(args) { return 0 } }