Files
hakorune/apps/selfhost-compiler/boxes/parser_box.nyash
Selfhosting Dev 63c8fda808 🔍 Research: GPT-5-Codex capabilities and GitHub PR integration
## Summary
Investigated OpenAI's new GPT-5-Codex model and Codex GitHub PR review integration capabilities.

## GPT-5-Codex Analysis

### Benchmark Performance (Good)
- SWE-bench Verified: 74.5% (vs GPT-5's 72.8%)
- Refactoring tasks: 51.3% (vs GPT-5's 33.9%)
- Code review: Higher developer ratings

### Real-World Issues (Concerning)
- Users report degraded coding performance
- Scripts that previously worked now fail
- Less consistent than GPT-4.5
- Longer response times (minutes vs instant)
- "Creatively and emotionally flat"
- Basic errors (e.g., counting letters incorrectly)

### Key Finding
Classic case of "optimizing for benchmarks vs real usability" - scores well on tests but performs poorly in practice.

## Codex GitHub PR Integration

### Setup Process
1. Enable MFA and connect GitHub account
2. Authorize Codex GitHub app for repos
3. Enable "Code review" in repository settings

### Usage Methods
- **Manual**: Comment '@codex review' in PR
- **Automatic**: Triggers when PR moves from draft to ready

### Current Limitations
- One-way communication (doesn't respond to review comments)
- Prefers creating new PRs over updating existing ones
- Better for single-pass reviews than iterative feedback

## 'codex resume' Feature
New session management capability:
- Resume previous codex exec sessions
- Useful for continuing long tasks across days
- Maintains context from interrupted work

🐱 The investigation reveals that while GPT-5-Codex shows benchmark improvements, practical developer experience has declined - a reminder that metrics don't always reflect real-world utility\!
2025-09-16 16:28:25 +09:00

696 lines
29 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ParserBox — Stage1 JSON v0 generatorextracted, simplified for Rust parser
box ParserBox {
gpos
usings_json
birth() { me.gpos = 0 me.usings_json = "[]" return 0 }
esc_json(s) {
local out = ""
local i = 0
local n = s.length()
loop(i < n) {
local ch = s.substring(i, i+1)
if ch == "\\" { out = out + "\\\\" } else { if ch == "\"" { out = out + "\\\"" } else { out = out + ch } }
i = i + 1
}
return out
}
is_digit(ch) { return ch >= "0" && ch <= "9" }
is_space(ch) { return ch == " " || ch == "\t" || ch == "\n" || ch == "\r" }
// simple alpha/underscore check for identifiers
is_alpha(ch) { return (ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_" }
gpos_set(i) { me.gpos = i return 0 }
gpos_get() { return me.gpos }
// lightweight string helpers
starts_with(src, i, pat) {
local n = src.length()
local m = pat.length()
if i + m > n { return 0 }
local k = 0
loop(k < m) {
if src.substring(i + k, i + k + 1) != pat.substring(k, k + 1) { return 0 }
k = k + 1
}
return 1
}
index_of(src, i, pat) {
local n = src.length()
local m = pat.length()
if m == 0 { return i }
local j = i
loop(j + m <= n) {
if me.starts_with(src, j, pat) { return j }
j = j + 1
}
return -1
}
trim(s) {
local i = 0
local n = s.length()
loop(i < n && (s.substring(i,i+1) == " " || s.substring(i,i+1) == "\t")) { i = i + 1 }
local j = n
loop(j > i && (s.substring(j-1,j) == " " || s.substring(j-1,j) == "\t" || s.substring(j-1,j) == ";")) { j = j - 1 }
return s.substring(i, j)
}
// keyword match at position i with word-boundary (next char not [A-Za-z0-9_])
starts_with_kw(src, i, kw) {
if me.starts_with(src, i, kw) == 0 { return 0 }
local n = src.length()
local j = i + kw.length()
if j >= n { return 1 }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) || me.is_digit(ch) { return 0 }
return 1
}
// integer to string (uses string concat coercion)
i2s(v) { return "" + v }
// Read identifier starting at i: [A-Za-z_][A-Za-z0-9_]*; returns "name@pos"
read_ident2(src, i) {
local n = src.length()
local j = i
if j >= n { return "@" + me.i2s(i) }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) == 0 { return "@" + me.i2s(i) }
j = j + 1
loop(j < n) {
local c = src.substring(j, j+1)
if me.is_alpha(c) || me.is_digit(c) { j = j + 1 } else { break }
}
local name = src.substring(i, j)
return name + "@" + me.i2s(j)
}
// Read string literal at i (i points to '"'); returns raw content (no quotes), updates gpos
read_string_lit(src, i) {
local n = src.length()
local j = i
if j >= n || src.substring(j, j+1) != "\"" { me.gpos_set(i) return "" }
j = j + 1
local out = ""
local guard = 0
local max = 200000
loop(j < n) {
if guard > max { break } else { guard = guard + 1 }
local ch = src.substring(j, j+1)
if ch == "\"" { j = j + 1 me.gpos_set(j) return out }
if ch == "\\" && j + 1 < n {
local nx = src.substring(j+1, j+2)
if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } }
j = j + 2
} else { out = out + ch j = j + 1 }
}
me.gpos_set(j)
return out
}
// Append a using entry into usings_json (no-op acceptance path)
add_using(kind, target, alias) {
// kind: "path" or "ns"; target: path or namespace; alias: nullable
local cur = me.usings_json
if cur == null || cur.length() == 0 { cur = "[]" }
// Build entry
local name = ""
local path = null
if kind == "path" {
path = target
if alias != null { name = alias } else {
local p = target
// basename
local idx = -1
local t = 0
loop(t < p.length()) { if p.substring(t,t+1) == "/" { idx = t } t = t + 1 }
if idx >= 0 { p = p.substring(idx+1, p.length()) }
if p.length() > 6 && me.starts_with(p, p.length()-6, ".nyash") == 1 { p = p.substring(0, p.length()-6) }
name = p
}
} else {
name = target
if alias != null { name = alias }
}
local entry = "{\"name\":\"" + me.esc_json(name) + "\""
if path != null { entry = entry + ",\"path\":\"" + me.esc_json(path) + "\"" }
entry = entry + "}"
// Insert before closing ']' of array
if cur == "[]" { me.usings_json = "[" + entry + "]" return 0 }
// naive append
local pos = cur.lastIndexOf("]")
if pos < 0 { me.usings_json = "[" + entry + "]" return 0 }
me.usings_json = cur.substring(0, pos) + "," + entry + "]"
return 0
}
// Collect `using` lines into JSON array stored in me.usings_json (no-op acceptance)
extract_usings(src) {
if src == null { me.usings_json = "[]" return 0 }
local n = src.length()
local i = 0
local first = 1
local out = "["
loop(i < n) {
// read a line
local j = i
loop(j < n && src.substring(j, j+1) != "\n") { j = j + 1 }
local line = src.substring(i, j)
// process
local k = 0
loop(k < line.length() && (line.substring(k,k+1) == " " || line.substring(k,k+1) == "\t")) { k = k + 1 }
if me.starts_with(line, k, "using ") == 1 {
local rest = me.trim(line.substring(k + 6, line.length()))
// split on ' as '
local as_pos = me.index_of(rest, 0, " as ")
local target = rest
local alias = null
if as_pos >= 0 {
target = me.trim(rest.substring(0, as_pos))
alias = me.trim(rest.substring(as_pos + 4, rest.length()))
}
// path or namespace
local is_path = 0
if target.length() > 0 {
if me.starts_with(target, 0, "\"") == 1 { is_path = 1 }
if me.starts_with(target, 0, "./") == 1 { is_path = 1 }
if me.starts_with(target, 0, "/") == 1 { is_path = 1 }
if target.length() >= 6 && me.starts_with(target, target.length()-6, ".nyash") == 1 { is_path = 1 }
}
local name = ""
local path = null
if is_path == 1 {
// trim quotes
if me.starts_with(target, 0, "\"") == 1 {
target = target.substring(1, target.length())
if target.length() > 0 && target.substring(target.length()-1, target.length()) == "\"" {
target = target.substring(0, target.length()-1)
}
}
path = target
if alias != null { name = alias } else {
// derive from basename
local p = target
// find last '/'
local idx = -1
local t = 0
loop(t < p.length()) { if p.substring(t,t+1) == "/" { idx = t } t = t + 1 }
if idx >= 0 { p = p.substring(idx+1, p.length()) }
// strip .nyash
if p.length() > 6 && me.starts_with(p, p.length()-6, ".nyash") == 1 { p = p.substring(0, p.length()-6) }
name = p
}
} else {
name = target
}
// append JSON entry
if first == 0 { out = out + "," } else { first = 0 }
out = out + "{\"name\":\"" + me.esc_json(name) + "\""
if path != null { out = out + ",\"path\":\"" + me.esc_json(path) + "\"" }
out = out + "}"
}
i = j + 1
}
out = out + "]"
me.usings_json = out
return 0
}
get_usings_json() { return me.usings_json }
to_int(s) { local n = s.length() if n == 0 { return 0 } local i = 0 local acc = 0 loop(i < n) { local d = s.substring(i, i+1) local dv = 0 if d == "1" { dv = 1 } else { if d == "2" { dv = 2 } else { if d == "3" { dv = 3 } else { if d == "4" { dv = 4 } else { if d == "5" { dv = 5 } else { if d == "6" { dv = 6 } else { if d == "7" { dv = 7 } else { if d == "8" { dv = 8 } else { if d == "9" { dv = 9 } } } } } } } } } acc = acc * 10 + dv i = i + 1 } return acc }
skip_ws(src, i) { if src == null { return i } local n = src.length() local cont = 1 local guard = 0 local max = 100000 loop(cont == 1) { if guard > max { return i } guard = guard + 1 if i < n { if me.is_space(src.substring(i, i+1)) { i = i + 1 } else { cont = 0 } } else { cont = 0 } } return i }
// identifiers/strings not required for Stage1 beyond string literal parse above
// using metadata omitted in Stage1
parse_number2(src, i) { local n = src.length() local j = i local cont = 1 local guard = 0 local max = 100000 loop(cont == 1) { if guard > max { cont = 0 } else { guard = guard + 1 if j < n { if me.is_digit(src.substring(j, j+1)) { j = j + 1 } else { cont = 0 } } else { cont = 0 } } } local s = src.substring(i, j) me.gpos_set(j) return "{\"type\":\"Int\",\"value\":" + s + "}" }
parse_string2(src, i) { local n = src.length() local j = i + 1 local out = "" local guard = 0 local max = 200000 loop(j < n) { if guard > max { break } guard = guard + 1 local ch = src.substring(j, j+1) if ch == "\"" { j = j + 1 me.gpos_set(j) return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}" } if ch == "\\" && j + 1 < n { local nx = src.substring(j+1, j+2) if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } } j = j + 2 } else { out = out + ch j = j + 1 } } me.gpos_set(j) return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}" }
parse_factor2(src, i) {
local j = me.skip_ws(src, i)
local ch = src.substring(j, j+1)
// Parenthesized
if ch == "(" {
local inner = me.parse_expr2(src, j + 1)
local k = me.gpos_get()
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
me.gpos_set(k)
return inner
}
// String literal
if ch == "\"" { return me.parse_string2(src, j) }
// Map literal: {"k": v, ...} (string keys only) → Call{name:"map.of", args:[Str(k1), v1, Str(k2), v2, ...]}
if ch == "{" {
local n = src.length()
j = j + 1
local out = "["
local first = 1
local cont = 1
local guard = 0
local max = 400000
loop(cont == 1) {
if guard > max { cont = 0 } else { guard = guard + 1 }
j = me.skip_ws(src, j)
if j >= n { cont = 0 } else {
if src.substring(j, j+1) == "}" { j = j + 1 cont = 0 } else {
// key (string only for Stage-2)
if src.substring(j, j+1) != "\"" {
// degrade by skipping one char to avoid infinite loop
j = j + 1
continue
}
local key_raw = me.read_string_lit(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == ":" { j = j + 1 }
j = me.skip_ws(src, j)
local val_json = me.parse_expr2(src, j)
j = me.gpos_get()
local key_json = "{\"type\":\"Str\",\"value\":\"" + me.esc_json(key_raw) + "\"}"
if first == 1 { out = out + key_json + "," + val_json first = 0 } else { out = out + "," + key_json + "," + val_json }
// optional comma
local before2 = j
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," { j = j + 1 }
// progress guard (in case of malformed input)
if j <= before2 { if j < n { j = j + 1 } else { j = n } }
}
}
}
out = out + "]"
me.gpos_set(j)
return "{\"type\":\"Call\",\"name\":\"map.of\",\"args\":" + out + "}"
}
// Array literal: [e1, e2, ...] → Call{name:"array.of", args:[...]}
if ch == "[" {
local n = src.length()
j = j + 1
local out = "["
local first = 1
local cont = 1
local guard = 0
local max = 400000
loop(cont == 1) {
if guard > max { cont = 0 } else { guard = guard + 1 }
j = me.skip_ws(src, j)
if j >= n { cont = 0 } else {
if src.substring(j, j+1) == "]" { j = j + 1 cont = 0 } else {
local before = j
local ej = me.parse_expr2(src, j)
j = me.gpos_get()
if first == 1 { out = out + ej first = 0 } else { out = out + "," + ej }
// optional comma+whitespace
local before2 = j
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," { j = j + 1 }
// progress guard
if j <= before { if j < n { j = j + 1 } else { j = n } }
}
}
}
out = out + "]"
me.gpos_set(j)
return "{\"type\":\"Call\",\"name\":\"array.of\",\"args\":" + out + "}"
}
// true/false
if me.starts_with_kw(src, j, "true") == 1 { me.gpos_set(j + 4) return "{\"type\":\"Bool\",\"value\":true}" }
if me.starts_with_kw(src, j, "false") == 1 { me.gpos_set(j + 5) return "{\"type\":\"Bool\",\"value\":false}" }
// new Class(args)
if me.starts_with_kw(src, j, "new") == 1 {
local p = me.skip_ws(src, j + 3)
local idp = me.read_ident2(src, p)
local at = idp.lastIndexOf("@")
local cls = idp.substring(0, at)
local k = me.to_int(idp.substring(at+1, idp.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == "(" { k = k + 1 }
local args_and_pos = me.parse_args2(src, k)
local at2 = args_and_pos.lastIndexOf("@")
local args_json = args_and_pos.substring(0, at2)
k = me.to_int(args_and_pos.substring(at2+1, args_and_pos.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
me.gpos_set(k)
return "{\"type\":\"New\",\"class\":\"" + cls + "\",\"args\":" + args_json + "}"
}
// Identifier / Call / Method chain
if me.is_alpha(ch) {
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
local k = me.to_int(idp.substring(at+1, idp.length()))
local node = "{\"type\":\"Var\",\"name\":\"" + name + "\"}"
local cont2 = 1
loop(cont2 == 1) {
k = me.skip_ws(src, k)
local tch = src.substring(k, k+1)
if tch == "(" {
k = k + 1
local args_and_pos = me.parse_args2(src, k)
local at2 = args_and_pos.lastIndexOf("@")
local args_json = args_and_pos.substring(0, at2)
k = me.to_int(args_and_pos.substring(at2+1, args_and_pos.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
node = "{\"type\":\"Call\",\"name\":\"" + name + "\",\"args\":" + args_json + "}"
} else {
if tch == "." {
k = k + 1
k = me.skip_ws(src, k)
local midp = me.read_ident2(src, k)
local at3 = midp.lastIndexOf("@")
local mname = midp.substring(0, at3)
k = me.to_int(midp.substring(at3+1, midp.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == "(" { k = k + 1 }
local args2 = me.parse_args2(src, k)
local at4 = args2.lastIndexOf("@")
local args_json2 = args2.substring(0, at4)
k = me.to_int(args2.substring(at4+1, args2.length()))
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
node = "{\"type\":\"Method\",\"recv\":" + node + ",\"method\":\"" + mname + "\",\"args\":" + args_json2 + "}"
} else { cont2 = 0 }
}
}
me.gpos_set(k)
return node
}
// Fallback: number
return me.parse_number2(src, j)
}
// unary minus binds tighter than * /
parse_unary2(src, i) {
local j = me.skip_ws(src, i)
if src.substring(j, j+1) == "-" {
local rhs = me.parse_factor2(src, j + 1)
j = me.gpos_get()
local zero = "{\"type\":\"Int\",\"value\":0}"
me.gpos_set(j)
return "{\"type\":\"Binary\",\"op\":\"-\",\"lhs\":" + zero + ",\"rhs\":" + rhs + "}"
}
return me.parse_factor2(src, j)
}
parse_term2(src, i) { local lhs = me.parse_unary2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) if j >= src.length() { cont = 0 } else { local op = src.substring(j, j+1) if op != "*" && op != "/" { cont = 0 } else { local rhs = me.parse_unary2(src, j+1) j = me.gpos_get() lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } } me.gpos_set(j) return lhs }
parse_sum2(src, i) { local lhs = me.parse_term2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) if j >= src.length() { cont = 0 } else { local op = src.substring(j, j+1) if op != "+" && op != "-" { cont = 0 } else { local rhs = me.parse_term2(src, j+1) j = me.gpos_get() lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } } me.gpos_set(j) return lhs }
parse_compare2(src, i) { local lhs = me.parse_sum2(src, i) local j = me.gpos_get() j = me.skip_ws(src, j) local two = src.substring(j, j+2) local one = src.substring(j, j+1) local op = "" if two == "==" || two == "!=" || two == "<=" || two == ">=" { op = two j = j + 2 } else { if one == "<" || one == ">" { op = one j = j + 1 } } if op == "" { me.gpos_set(j) return lhs } local rhs = me.parse_sum2(src, j) j = me.gpos_get() me.gpos_set(j) return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" }
parse_expr2(src, i) { local lhs = me.parse_compare2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) local two = src.substring(j, j+2) if two != "&&" && two != "||" { cont = 0 } else { local rhs = me.parse_compare2(src, j+2) j = me.gpos_get() lhs = "{\"type\":\"Logical\",\"op\":\"" + two + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } me.gpos_set(j) return lhs }
parse_args2(src, i) {
local j = me.skip_ws(src, i)
local n = src.length()
local out = "["
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == ")" { return "[]@" + me.i2s(j) }
// first argument
local e = me.parse_expr2(src, j)
j = me.gpos_get()
out = out + e
// subsequent arguments with guard
local cont_args = 1
local guard = 0
local max = 100000
loop(cont_args == 1) {
if guard > max { cont_args = 0 } else { guard = guard + 1 }
local before = j
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," {
j = j + 1
j = me.skip_ws(src, j)
e = me.parse_expr2(src, j)
j = me.gpos_get()
out = out + "," + e
} else { cont_args = 0 }
if j == before { cont_args = 0 }
}
out = out + "]"
return out + "@" + me.i2s(j)
}
parse_stmt2(src, i) {
local j = me.skip_ws(src, i)
local stmt_start = j
if me.starts_with_kw(src, j, "using") == 1 {
j = j + 5
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "\"" {
local p = me.read_string_lit(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
local alias = null
if me.starts_with_kw(src, j, "as") == 1 { j = j + 2 j = me.skip_ws(src, j) local idp = me.read_ident2(src, j) local at = idp.lastIndexOf("@") alias = idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) }
me.add_using("path", p, alias)
} else {
if me.is_alpha(src.substring(j, j+1)) {
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
j = me.to_int(idp.substring(at+1, idp.length()))
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "." { j = j + 1 j = me.skip_ws(src, j) idp = me.read_ident2(src, j) at = idp.lastIndexOf("@") name = name + "." + idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) } else { cont = 0 }
}
j = me.skip_ws(src, j)
local alias2 = null
if me.starts_with_kw(src, j, "as") == 1 { j = j + 2 j = me.skip_ws(src, j) idp = me.read_ident2(src, j) at = idp.lastIndexOf("@") alias2 = idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) }
me.add_using("ns", name, alias2)
}
}
// ensure progress
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return ""
}
// simple assignment: IDENT '=' expr ; → JSON v0 Local{name, expr} (Stage2 uses Local for updates)
if me.is_alpha(src.substring(j, j+1)) {
local idp0 = me.read_ident2(src, j)
local at0 = idp0.lastIndexOf("@")
if at0 > 0 {
local name0 = idp0.substring(0, at0)
local k0 = me.to_int(idp0.substring(at0+1, idp0.length()))
k0 = me.skip_ws(src, k0)
if src.substring(k0, k0+1) == "=" {
k0 = k0 + 1
k0 = me.skip_ws(src, k0)
local e0 = me.parse_expr2(src, k0)
k0 = me.gpos_get()
if k0 <= stmt_start { if k0 < src.length() { k0 = k0 + 1 } else { k0 = src.length() } }
me.gpos_set(k0)
return "{\"type\":\"Local\",\"name\":\"" + name0 + "\",\"expr\":" + e0 + "}"
}
}
}
if me.starts_with_kw(src, j, "return") == 1 {
j = j + 6
j = me.skip_ws(src, j)
local e = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Return\",\"expr\":" + e + "}"
}
if me.starts_with_kw(src, j, "local") == 1 {
j = j + 5
j = me.skip_ws(src, j)
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
j = me.to_int(idp.substring(at+1, idp.length()))
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "=" { j = j + 1 }
j = me.skip_ws(src, j)
local e2 = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Local\",\"name\":\"" + name + "\",\"expr\":" + e2 + "}"
}
if me.starts_with_kw(src, j, "if") == 1 {
j = j + 2
j = me.skip_ws(src, j)
local paren = 0
if src.substring(j, j+1) == "(" { paren = 1 j = j + 1 }
local cond = me.parse_expr2(src, j)
j = me.gpos_get()
if paren == 1 { j = me.skip_ws(src, j) if src.substring(j, j+1) == ")" { j = j + 1 } }
j = me.skip_ws(src, j)
local then_res = me.parse_block2(src, j)
local at1 = then_res.lastIndexOf("@")
local then_json = then_res.substring(0, at1)
j = me.to_int(then_res.substring(at1+1, then_res.length()))
j = me.skip_ws(src, j)
local else_json = null
if me.starts_with_kw(src, j, "else") == 1 { j = j + 4 j = me.skip_ws(src, j) local else_res = me.parse_block2(src, j) local at2 = else_res.lastIndexOf("@") else_json = else_res.substring(0, at2) j = me.to_int(else_res.substring(at2+1, else_res.length())) }
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
if else_json == null { return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + "}" } else { return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + ",\"else\":" + else_json + "}" }
}
if me.starts_with_kw(src, j, "loop") == 1 {
j = j + 4
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "(" { j = j + 1 }
local cond = me.parse_expr2(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == ")" { j = j + 1 }
j = me.skip_ws(src, j)
local body_res = me.parse_block2(src, j)
local at3 = body_res.lastIndexOf("@")
local body_json = body_res.substring(0, at3)
j = me.to_int(body_res.substring(at3+1, body_res.length()))
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Loop\",\"cond\":" + cond + ",\"body\":" + body_json + "}"
}
// Stage-3 acceptance (syntax only): break / continue → no-op expression
if me.starts_with_kw(src, j, "break") == 1 {
j = j + 5
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
}
if me.starts_with_kw(src, j, "continue") == 1 {
j = j + 8
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
}
// Stage-3 acceptance: throw expr → degrade to Expr(expr)
if me.starts_with_kw(src, j, "throw") == 1 {
j = j + 5
j = me.skip_ws(src, j)
local e_throw = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":" + e_throw + "}"
}
// Stage-3 acceptance: try { ... } (catch ...)* (finally { ... })? → degrade to no-op (syntax only)
if me.starts_with_kw(src, j, "try") == 1 {
j = j + 3
j = me.skip_ws(src, j)
// parse try block
local try_res = me.parse_block2(src, j)
local at_t = try_res.lastIndexOf("@")
j = me.to_int(try_res.substring(at_t+1, try_res.length()))
// zero or more catch
local guard_ct = 0
local max_ct = 100
local cont_ct = 1
loop(cont_ct == 1) {
if guard_ct > max_ct { cont_ct = 0 } else { guard_ct = guard_ct + 1 }
j = me.skip_ws(src, j)
if me.starts_with_kw(src, j, "catch") == 1 {
j = j + 5
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "(" { j = j + 1 j = me.skip_ws(src, j)
// optional type + name
if me.is_alpha(src.substring(j, j+1)) { local id1 = me.read_ident2(src, j) local at1 = id1.lastIndexOf("@") j = me.to_int(id1.substring(at1+1, id1.length())) j = me.skip_ws(src, j) }
if me.is_alpha(src.substring(j, j+1)) { local id2 = me.read_ident2(src, j) local at2 = id2.lastIndexOf("@") j = me.to_int(id2.substring(at2+1, id2.length())) j = me.skip_ws(src, j) }
if src.substring(j, j+1) == ")" { j = j + 1 }
}
j = me.skip_ws(src, j)
// catch body
local c_res = me.parse_block2(src, j)
local atc = c_res.lastIndexOf("@")
j = me.to_int(c_res.substring(atc+1, c_res.length()))
} else { cont_ct = 0 }
}
// optional finally
j = me.skip_ws(src, j)
if me.starts_with_kw(src, j, "finally") == 1 {
j = j + 7
j = me.skip_ws(src, j)
local f_res = me.parse_block2(src, j)
local atf = f_res.lastIndexOf("@")
j = me.to_int(f_res.substring(atf+1, f_res.length()))
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
}
// Fallback: expression or unknown token — ensure progress even on malformed input
local expr_start = j
local e = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= expr_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":" + e + "}"
}
parse_block2(src, i) {
local j = me.skip_ws(src, i)
if src.substring(j, j+1) != "{" { return "[]@" + me.i2s(j) }
j = j + 1
local body = "["
local first = 1
local cont_block = 1
loop(cont_block == 1) {
j = me.skip_ws(src, j)
if j >= src.length() { cont_block = 0 } else {
if src.substring(j, j+1) == "}" { j = j + 1 cont_block = 0 } else {
local start_j = j
local s = me.parse_stmt2(src, j)
j = me.gpos_get()
// Progress guard: ensure forward movement to avoid infinite loop on malformed input
if j <= start_j {
if j < src.length() { j = j + 1 } else { j = src.length() }
me.gpos_set(j)
}
// consume optional semicolons (ASI minimal)
local done = 0
local guard = 0
local max = 100000
loop(done == 0) {
if guard > max { done = 1 } else { guard = guard + 1 }
local before = j
j = me.skip_ws(src, j)
if j < src.length() && src.substring(j, j+1) == ";" { j = j + 1 } else { done = 1 }
if j == before { done = 1 }
}
if s.length() > 0 { if first == 1 { body = body + s first = 0 } else { body = body + "," + s } }
}
}
}
body = body + "]"
return body + "@" + me.i2s(j)
}
parse_program2(src) {
local i = me.skip_ws(src, 0)
local body = "["
local first = 1
local cont_prog = 1
loop(cont_prog == 1) {
i = me.skip_ws(src, i)
if i >= src.length() { cont_prog = 0 } else {
local start_i = i
local s = me.parse_stmt2(src, i)
i = me.gpos_get()
// Progress guard: ensure forward movement to avoid infinite loop on malformed input
if i <= start_i {
if i < src.length() { i = i + 1 } else { i = src.length() }
me.gpos_set(i)
}
// consume optional semicolons between top-level statements
local done2 = 0
local guard2 = 0
local max2 = 100000
loop(done2 == 0) {
if guard2 > max2 { done2 = 1 } else { guard2 = guard2 + 1 }
local before2 = i
i = me.skip_ws(src, i)
if i < src.length() && src.substring(i, i+1) == ";" { i = i + 1 } else { done2 = 1 }
if i == before2 { done2 = 1 }
}
if s.length() > 0 { if first == 1 { body = body + s first = 0 } else { body = body + "," + s } }
}
}
body = body + "]"
return "{\"version\":0,\"kind\":\"Program\",\"body\":" + body + "}"
}
}
static box ParserStub { main(args) { return 0 } }