📚 Phase 15 - セルフホスティング戦略の明確化とEXE-first実装

## 主な変更点

### 🎯 戦略の転換と明確化
- PyVMを開発ツールとして位置づけ(本番経路ではない)
- EXE-first戦略を明確に優先(build_compiler_exe.sh実装済み)
- Phase順序の整理: 15.2(LLVM)→15.3(コンパイラ)→15.4(VM)

### 🚀 セルフホスティング基盤の実装
- apps/selfhost-compiler/にNyashコンパイラMVP実装
  - compiler.nyash: メインエントリー(位置引数対応)
  - boxes/: parser_box, emitter_box, debug_box分離
- tools/build_compiler_exe.sh: ネイティブEXEビルド+dist配布
- Python MVPパーサーStage-2完成(local/if/loop/call/method/new)

### 📝 ドキュメント整備
- Phase 15 README/ROADMAP更新(Self-Hosting優先明記)
- docs/guides/exe-first-wsl.md: WSLクイックスタート追加
- docs/private/papers/: 論文G~L、爆速事件簿41事例収録

### 🔧 技術的改善
- JSON v0 Bridge: If/Loop PHI生成実装(ChatGPT協力)
- PyVM/llvmliteパリティ検証スイート追加
- using/namespace機能(gated実装、Phase 15では非解決)

## 次のステップ
1. パーサー無限ループ修正(未実装関数の実装)
2. EXEビルドとセルフホスティング実証
3. c0→c1→c1'ブートストラップループ確立

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Selfhosting Dev
2025-09-15 18:44:49 +09:00
parent 8f11c79f19
commit d90216e9c4
68 changed files with 4521 additions and 1641 deletions

View File

@ -0,0 +1,38 @@
// DebugBox — conditional debug output aggregator (extracted)
box DebugBox {
enabled
birth() {
me.enabled = 0
return 0
}
set_enabled(v) {
me.enabled = v
return 0
}
log(msg) {
if me.enabled {
local c = new ConsoleBox()
c.println("[DEBUG] " + msg)
}
return 0
}
info(msg) {
if me.enabled {
local c = new ConsoleBox()
c.println("[INFO] " + msg)
}
return 0
}
error(msg) {
if me.enabled {
local c = new ConsoleBox()
c.println("[ERROR] " + msg)
}
return 0
}
}
// Include stub to satisfy current include lowering (expects a static box)
static box DebugStub {
main(args) { return 0 }
}

View File

@ -0,0 +1,18 @@
// EmitterBox — thin wrapper to emit JSON v0 (extracted)
box EmitterBox {
emit_program(json, usings_json) {
if usings_json == null { return json }
// Attach meta.usings when available and non-empty (bridge ignores unknown fields)
if usings_json.length() == 0 { return json }
if usings_json == "[]" { return json }
// naive injection: insert before the final '}' of the top-level object
local n = json.lastIndexOf("}")
if n < 0 { return json }
local head = json.substring(0, n)
return head + ",\"meta\":{\"usings\":" + usings_json + "}}"
}
}
static box EmitterStub {
main(args) { return 0 }
}

View File

@ -0,0 +1,456 @@
// ParserBox — Stage1 JSON v0 generatorextracted, simplified for Rust parser
box ParserBox {
gpos
usings_json
birth() { me.gpos = 0 me.usings_json = "[]" return 0 }
esc_json(s) {
local out = ""
local i = 0
local n = s.length()
loop(i < n) {
local ch = s.substring(i, i+1)
if ch == "\\" { out = out + "\\\\" } else { if ch == "\"" { out = out + "\\\"" } else { out = out + ch } }
i = i + 1
}
return out
}
is_digit(ch) { return ch >= "0" && ch <= "9" }
is_space(ch) { return ch == " " || ch == "\t" || ch == "\n" || ch == "\r" }
// simple alpha/underscore check for identifiers
is_alpha(ch) { return (ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_" }
gpos_set(i) { me.gpos = i return 0 }
gpos_get() { return me.gpos }
// lightweight string helpers
starts_with(src, i, pat) {
local n = src.length()
local m = pat.length()
if i + m > n { return 0 }
local k = 0
loop(k < m) {
if src.substring(i + k, i + k + 1) != pat.substring(k, k + 1) { return 0 }
k = k + 1
}
return 1
}
index_of(src, i, pat) {
local n = src.length()
local m = pat.length()
if m == 0 { return i }
local j = i
loop(j + m <= n) {
if me.starts_with(src, j, pat) { return j }
j = j + 1
}
return -1
}
trim(s) {
local i = 0
local n = s.length()
loop(i < n && (s.substring(i,i+1) == " " || s.substring(i,i+1) == "\t")) { i = i + 1 }
local j = n
loop(j > i && (s.substring(j-1,j) == " " || s.substring(j-1,j) == "\t" || s.substring(j-1,j) == ";")) { j = j - 1 }
return s.substring(i, j)
}
// keyword match at position i with word-boundary (next char not [A-Za-z0-9_])
starts_with_kw(src, i, kw) {
if me.starts_with(src, i, kw) == 0 { return 0 }
local n = src.length()
local j = i + kw.length()
if j >= n { return 1 }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) || me.is_digit(ch) { return 0 }
return 1
}
// integer to string (uses string concat coercion)
i2s(v) { return "" + v }
// Read identifier starting at i: [A-Za-z_][A-Za-z0-9_]*; returns "name@pos"
read_ident2(src, i) {
local n = src.length()
local j = i
if j >= n { return "@" + me.i2s(i) }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) == 0 { return "@" + me.i2s(i) }
j = j + 1
loop(j < n) {
local c = src.substring(j, j+1)
if me.is_alpha(c) || me.is_digit(c) { j = j + 1 } else { break }
}
local name = src.substring(i, j)
return name + "@" + me.i2s(j)
}
// Read string literal at i (i points to '"'); returns raw content (no quotes), updates gpos
read_string_lit(src, i) {
local n = src.length()
local j = i
if j >= n || src.substring(j, j+1) != "\"" { me.gpos_set(i) return "" }
j = j + 1
local out = ""
local guard = 0
local max = 200000
loop(j < n) {
if guard > max { break } else { guard = guard + 1 }
local ch = src.substring(j, j+1)
if ch == "\"" { j = j + 1 me.gpos_set(j) return out }
if ch == "\\" && j + 1 < n {
local nx = src.substring(j+1, j+2)
if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } }
j = j + 2
} else { out = out + ch j = j + 1 }
}
me.gpos_set(j)
return out
}
// Append a using entry into usings_json (no-op acceptance path)
add_using(kind, target, alias) {
// kind: "path" or "ns"; target: path or namespace; alias: nullable
local cur = me.usings_json
if cur == null || cur.length() == 0 { cur = "[]" }
// Build entry
local name = ""
local path = null
if kind == "path" {
path = target
if alias != null { name = alias } else {
local p = target
// basename
local idx = -1
local t = 0
loop(t < p.length()) { if p.substring(t,t+1) == "/" { idx = t } t = t + 1 }
if idx >= 0 { p = p.substring(idx+1, p.length()) }
if p.length() > 6 && me.starts_with(p, p.length()-6, ".nyash") == 1 { p = p.substring(0, p.length()-6) }
name = p
}
} else {
name = target
if alias != null { name = alias }
}
local entry = "{\"name\":\"" + me.esc_json(name) + "\""
if path != null { entry = entry + ",\"path\":\"" + me.esc_json(path) + "\"" }
entry = entry + "}"
// Insert before closing ']' of array
if cur == "[]" { me.usings_json = "[" + entry + "]" return 0 }
// naive append
local pos = cur.lastIndexOf("]")
if pos < 0 { me.usings_json = "[" + entry + "]" return 0 }
me.usings_json = cur.substring(0, pos) + "," + entry + "]"
return 0
}
// Collect `using` lines into JSON array stored in me.usings_json (no-op acceptance)
extract_usings(src) {
if src == null { me.usings_json = "[]" return 0 }
local n = src.length()
local i = 0
local first = 1
local out = "["
loop(i < n) {
// read a line
local j = i
loop(j < n && src.substring(j, j+1) != "\n") { j = j + 1 }
local line = src.substring(i, j)
// process
local k = 0
loop(k < line.length() && (line.substring(k,k+1) == " " || line.substring(k,k+1) == "\t")) { k = k + 1 }
if me.starts_with(line, k, "using ") == 1 {
local rest = me.trim(line.substring(k + 6, line.length()))
// split on ' as '
local as_pos = me.index_of(rest, 0, " as ")
local target = rest
local alias = null
if as_pos >= 0 {
target = me.trim(rest.substring(0, as_pos))
alias = me.trim(rest.substring(as_pos + 4, rest.length()))
}
// path or namespace
local is_path = 0
if target.length() > 0 {
if me.starts_with(target, 0, "\"") == 1 { is_path = 1 }
if me.starts_with(target, 0, "./") == 1 { is_path = 1 }
if me.starts_with(target, 0, "/") == 1 { is_path = 1 }
if target.length() >= 6 && me.starts_with(target, target.length()-6, ".nyash") == 1 { is_path = 1 }
}
local name = ""
local path = null
if is_path == 1 {
// trim quotes
if me.starts_with(target, 0, "\"") == 1 {
target = target.substring(1, target.length())
if target.length() > 0 && target.substring(target.length()-1, target.length()) == "\"" {
target = target.substring(0, target.length()-1)
}
}
path = target
if alias != null { name = alias } else {
// derive from basename
local p = target
// find last '/'
local idx = -1
local t = 0
loop(t < p.length()) { if p.substring(t,t+1) == "/" { idx = t } t = t + 1 }
if idx >= 0 { p = p.substring(idx+1, p.length()) }
// strip .nyash
if p.length() > 6 && me.starts_with(p, p.length()-6, ".nyash") == 1 { p = p.substring(0, p.length()-6) }
name = p
}
} else {
name = target
}
// append JSON entry
if first == 0 { out = out + "," } else { first = 0 }
out = out + "{\"name\":\"" + me.esc_json(name) + "\""
if path != null { out = out + ",\"path\":\"" + me.esc_json(path) + "\"" }
out = out + "}"
}
i = j + 1
}
out = out + "]"
me.usings_json = out
return 0
}
get_usings_json() { return me.usings_json }
to_int(s) { local n = s.length() if n == 0 { return 0 } local i = 0 local acc = 0 loop(i < n) { local d = s.substring(i, i+1) local dv = 0 if d == "1" { dv = 1 } else { if d == "2" { dv = 2 } else { if d == "3" { dv = 3 } else { if d == "4" { dv = 4 } else { if d == "5" { dv = 5 } else { if d == "6" { dv = 6 } else { if d == "7" { dv = 7 } else { if d == "8" { dv = 8 } else { if d == "9" { dv = 9 } } } } } } } } } acc = acc * 10 + dv i = i + 1 } return acc }
skip_ws(src, i) { if src == null { return i } local n = src.length() local cont = 1 local guard = 0 local max = 100000 loop(cont == 1) { if guard > max { return i } guard = guard + 1 if i < n { if me.is_space(src.substring(i, i+1)) { i = i + 1 } else { cont = 0 } } else { cont = 0 } } return i }
// identifiers/strings not required for Stage1 beyond string literal parse above
// using metadata omitted in Stage1
parse_number2(src, i) { local n = src.length() local j = i local cont = 1 local guard = 0 local max = 100000 loop(cont == 1) { if guard > max { cont = 0 } else { guard = guard + 1 if j < n { if me.is_digit(src.substring(j, j+1)) { j = j + 1 } else { cont = 0 } } else { cont = 0 } } } local s = src.substring(i, j) me.gpos_set(j) return "{\"type\":\"Int\",\"value\":" + s + "}" }
parse_string2(src, i) { local n = src.length() local j = i + 1 local out = "" local guard = 0 local max = 200000 loop(j < n) { if guard > max { break } guard = guard + 1 local ch = src.substring(j, j+1) if ch == "\"" { j = j + 1 me.gpos_set(j) return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}" } if ch == "\\" && j + 1 < n { local nx = src.substring(j+1, j+2) if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } } j = j + 2 } else { out = out + ch j = j + 1 } } me.gpos_set(j) return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}" }
parse_factor2(src, i) { local j = me.skip_ws(src, i) local ch = src.substring(j, j+1) if ch == "(" { local inner = me.parse_expr2(src, j + 1) local k = me.gpos_get() k = me.skip_ws(src, k) if src.substring(k, k+1) == ")" { k = k + 1 } me.gpos_set(k) return inner } if ch == "\"" { return me.parse_string2(src, j) } if me.starts_with_kw(src, j, "true") == 1 { me.gpos_set(j + 4) return "{\"type\":\"Bool\",\"value\":true}" } if me.starts_with_kw(src, j, "false") == 1 { me.gpos_set(j + 5) return "{\"type\":\"Bool\",\"value\":false}" } if me.starts_with_kw(src, j, "new") == 1 { local p = me.skip_ws(src, j + 3) local idp = me.read_ident2(src, p) local at = idp.lastIndexOf("@") local cls = idp.substring(0, at) local k = me.to_int(idp.substring(at+1, idp.length())) k = me.skip_ws(src, k) if src.substring(k, k+1) == "(" { k = k + 1 } local args_and_pos = me.parse_args2(src, k) local at2 = args_and_pos.lastIndexOf("@") local args_json = args_and_pos.substring(0, at2) k = me.to_int(args_and_pos.substring(at2+1, args_and_pos.length())) k = me.skip_ws(src, k) if src.substring(k, k+1) == ")" { k = k + 1 } me.gpos_set(k) return "{\"type\":\"New\",\"class\":\"" + cls + "\",\"args\":" + args_json + "}" } if me.is_alpha(ch) { local idp = me.read_ident2(src, j) local at = idp.lastIndexOf("@") local name = idp.substring(0, at) local k = me.to_int(idp.substring(at+1, idp.length())) local node = "{\"type\":\"Var\",\"name\":\"" + name + "\"}" local cont = 1 loop(cont == 1) { k = me.skip_ws(src, k) local tch = src.substring(k, k+1) if tch == "(" { k = k + 1 local args_and_pos = me.parse_args2(src, k) local at2 = args_and_pos.lastIndexOf("@") local args_json = args_and_pos.substring(0, at2) k = me.to_int(args_and_pos.substring(at2+1, args_and_pos.length())) k = me.skip_ws(src, k) if src.substring(k, k+1) == ")" { k = k + 1 } node = "{\"type\":\"Call\",\"name\":\"" + name + "\",\"args\":" + args_json + "}" } else { if tch == "." { k = k + 1 k = me.skip_ws(src, k) local midp = me.read_ident2(src, k) local at3 = midp.lastIndexOf("@") local mname = midp.substring(0, at3) k = me.to_int(midp.substring(at3+1, midp.length())) k = me.skip_ws(src, k) if src.substring(k, k+1) == "(" { k = k + 1 } local args2 = me.parse_args2(src, k) local at4 = args2.lastIndexOf("@") local args_json2 = args2.substring(0, at4) k = me.to_int(args2.substring(at4+1, args2.length())) k = me.skip_ws(src, k) if src.substring(k, k+1) == ")" { k = k + 1 } node = "{\"type\":\"Method\",\"recv\":" + node + ",\"method\":\"" + mname + "\",\"args\":" + args_json2 + "}" } else { cont = 0 } } } me.gpos_set(k) return node } return me.parse_number2(src, j) }
// unary minus binds tighter than * /
parse_unary2(src, i) {
local j = me.skip_ws(src, i)
if src.substring(j, j+1) == "-" {
local rhs = me.parse_factor2(src, j + 1)
j = me.gpos_get()
local zero = "{\"type\":\"Int\",\"value\":0}"
me.gpos_set(j)
return "{\"type\":\"Binary\",\"op\":\"-\",\"lhs\":" + zero + ",\"rhs\":" + rhs + "}"
}
return me.parse_factor2(src, j)
}
parse_term2(src, i) { local lhs = me.parse_unary2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) if j >= src.length() { cont = 0 } else { local op = src.substring(j, j+1) if op != "*" && op != "/" { cont = 0 } else { local rhs = me.parse_unary2(src, j+1) j = me.gpos_get() lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } } me.gpos_set(j) return lhs }
parse_sum2(src, i) { local lhs = me.parse_term2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) if j >= src.length() { cont = 0 } else { local op = src.substring(j, j+1) if op != "+" && op != "-" { cont = 0 } else { local rhs = me.parse_term2(src, j+1) j = me.gpos_get() lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } } me.gpos_set(j) return lhs }
parse_compare2(src, i) { local lhs = me.parse_sum2(src, i) local j = me.gpos_get() j = me.skip_ws(src, j) local two = src.substring(j, j+2) local one = src.substring(j, j+1) local op = "" if two == "==" || two == "!=" || two == "<=" || two == ">=" { op = two j = j + 2 } else { if one == "<" || one == ">" { op = one j = j + 1 } } if op == "" { me.gpos_set(j) return lhs } local rhs = me.parse_sum2(src, j) j = me.gpos_get() me.gpos_set(j) return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" }
parse_expr2(src, i) { local lhs = me.parse_compare2(src, i) local j = me.gpos_get() local cont = 1 loop(cont == 1) { j = me.skip_ws(src, j) local two = src.substring(j, j+2) if two != "&&" && two != "||" { cont = 0 } else { local rhs = me.parse_compare2(src, j+2) j = me.gpos_get() lhs = "{\"type\":\"Logical\",\"op\":\"" + two + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" } } me.gpos_set(j) return lhs }
parse_args2(src, i) {
local j = me.skip_ws(src, i)
local n = src.length()
local out = "["
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == ")" { return "[]@" + me.i2s(j) }
// first argument
local e = me.parse_expr2(src, j)
j = me.gpos_get()
out = out + e
// subsequent arguments with guard
local cont_args = 1
local guard = 0
local max = 100000
loop(cont_args == 1) {
if guard > max { cont_args = 0 } else { guard = guard + 1 }
local before = j
j = me.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," {
j = j + 1
j = me.skip_ws(src, j)
e = me.parse_expr2(src, j)
j = me.gpos_get()
out = out + "," + e
} else { cont_args = 0 }
if j == before { cont_args = 0 }
}
out = out + "]"
return out + "@" + me.i2s(j)
}
parse_stmt2(src, i) {
local j = me.skip_ws(src, i)
local stmt_start = j
if me.starts_with_kw(src, j, "using") == 1 {
j = j + 5
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "\"" {
local p = me.read_string_lit(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
local alias = null
if me.starts_with_kw(src, j, "as") == 1 { j = j + 2 j = me.skip_ws(src, j) local idp = me.read_ident2(src, j) local at = idp.lastIndexOf("@") alias = idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) }
me.add_using("path", p, alias)
} else {
if me.is_alpha(src.substring(j, j+1)) {
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
j = me.to_int(idp.substring(at+1, idp.length()))
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "." { j = j + 1 j = me.skip_ws(src, j) idp = me.read_ident2(src, j) at = idp.lastIndexOf("@") name = name + "." + idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) } else { cont = 0 }
}
j = me.skip_ws(src, j)
local alias2 = null
if me.starts_with_kw(src, j, "as") == 1 { j = j + 2 j = me.skip_ws(src, j) idp = me.read_ident2(src, j) at = idp.lastIndexOf("@") alias2 = idp.substring(0, at) j = me.to_int(idp.substring(at+1, idp.length())) }
me.add_using("ns", name, alias2)
}
}
// ensure progress
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return ""
}
if me.starts_with_kw(src, j, "return") == 1 {
j = j + 6
j = me.skip_ws(src, j)
local e = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Return\",\"expr\":" + e + "}"
}
if me.starts_with_kw(src, j, "local") == 1 {
j = j + 5
j = me.skip_ws(src, j)
local idp = me.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
j = me.to_int(idp.substring(at+1, idp.length()))
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "=" { j = j + 1 }
j = me.skip_ws(src, j)
local e2 = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Local\",\"name\":\"" + name + "\",\"expr\":" + e2 + "}"
}
if me.starts_with_kw(src, j, "if") == 1 {
j = j + 2
j = me.skip_ws(src, j)
local paren = 0
if src.substring(j, j+1) == "(" { paren = 1 j = j + 1 }
local cond = me.parse_expr2(src, j)
j = me.gpos_get()
if paren == 1 { j = me.skip_ws(src, j) if src.substring(j, j+1) == ")" { j = j + 1 } }
j = me.skip_ws(src, j)
local then_res = me.parse_block2(src, j)
local at1 = then_res.lastIndexOf("@")
local then_json = then_res.substring(0, at1)
j = me.to_int(then_res.substring(at1+1, then_res.length()))
j = me.skip_ws(src, j)
local else_json = null
if me.starts_with_kw(src, j, "else") == 1 { j = j + 4 j = me.skip_ws(src, j) local else_res = me.parse_block2(src, j) local at2 = else_res.lastIndexOf("@") else_json = else_res.substring(0, at2) j = me.to_int(else_res.substring(at2+1, else_res.length())) }
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
if else_json == null { return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + "}" } else { return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + ",\"else\":" + else_json + "}" }
}
if me.starts_with_kw(src, j, "loop") == 1 {
j = j + 4
j = me.skip_ws(src, j)
if src.substring(j, j+1) == "(" { j = j + 1 }
local cond = me.parse_expr2(src, j)
j = me.gpos_get()
j = me.skip_ws(src, j)
if src.substring(j, j+1) == ")" { j = j + 1 }
j = me.skip_ws(src, j)
local body_res = me.parse_block2(src, j)
local at3 = body_res.lastIndexOf("@")
local body_json = body_res.substring(0, at3)
j = me.to_int(body_res.substring(at3+1, body_res.length()))
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Loop\",\"cond\":" + cond + ",\"body\":" + body_json + "}"
}
// Fallback: expression or unknown token — ensure progress even on malformed input
local expr_start = j
local e = me.parse_expr2(src, j)
j = me.gpos_get()
if j <= expr_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":" + e + "}"
}
parse_block2(src, i) {
local j = me.skip_ws(src, i)
if src.substring(j, j+1) != "{" { return "[]@" + me.i2s(j) }
j = j + 1
local body = "["
local first = 1
local cont_block = 1
loop(cont_block == 1) {
j = me.skip_ws(src, j)
if j >= src.length() { cont_block = 0 } else {
if src.substring(j, j+1) == "}" { j = j + 1 cont_block = 0 } else {
local start_j = j
local s = me.parse_stmt2(src, j)
j = me.gpos_get()
// Progress guard: ensure forward movement to avoid infinite loop on malformed input
if j <= start_j {
if j < src.length() { j = j + 1 } else { j = src.length() }
me.gpos_set(j)
}
// consume optional semicolons (ASI minimal)
local done = 0
local guard = 0
local max = 100000
loop(done == 0) {
if guard > max { done = 1 } else { guard = guard + 1 }
local before = j
j = me.skip_ws(src, j)
if j < src.length() && src.substring(j, j+1) == ";" { j = j + 1 } else { done = 1 }
if j == before { done = 1 }
}
if s.length() > 0 { if first == 1 { body = body + s first = 0 } else { body = body + "," + s } }
}
}
}
body = body + "]"
return body + "@" + me.i2s(j)
}
parse_program2(src) {
local i = me.skip_ws(src, 0)
local body = "["
local first = 1
local cont_prog = 1
loop(cont_prog == 1) {
i = me.skip_ws(src, i)
if i >= src.length() { cont_prog = 0 } else {
local start_i = i
local s = me.parse_stmt2(src, i)
i = me.gpos_get()
// Progress guard: ensure forward movement to avoid infinite loop on malformed input
if i <= start_i {
if i < src.length() { i = i + 1 } else { i = src.length() }
me.gpos_set(i)
}
// consume optional semicolons between top-level statements
local done2 = 0
local guard2 = 0
local max2 = 100000
loop(done2 == 0) {
if guard2 > max2 { done2 = 1 } else { guard2 = guard2 + 1 }
local before2 = i
i = me.skip_ws(src, i)
if i < src.length() && src.substring(i, i+1) == ";" { i = i + 1 } else { done2 = 1 }
if i == before2 { done2 = 1 }
}
if s.length() > 0 { if first == 1 { body = body + s first = 0 } else { body = body + "," + s } }
}
}
body = body + "]"
return "{\"version\":0,\"kind\":\"Program\",\"body\":" + body + "}"
}
}
static box ParserStub { main(args) { return 0 } }

View File

@ -1,5 +1,10 @@
// Selfhost Compiler MVP (Phase 15.3)
// Reads tmp/ny_parser_input.ny and prints a minimal JSON v0 program.
// Components are split under boxes/ and included here.
include "apps/selfhost-compiler/boxes/debug_box.nyash"
include "apps/selfhost-compiler/boxes/parser_box.nyash"
include "apps/selfhost-compiler/boxes/emitter_box.nyash"
static box Main {
// ---- IO helper ----
@ -8,6 +13,7 @@ static box Main {
fb.open(path, "r")
local s = fb.read()
fb.close()
if s == null { return "return 1+2*3" }
return s
}
@ -26,283 +32,76 @@ static box Main {
return out
}
// ---- Lexer helpers ----
is_digit(ch) {
return ch == "0" || ch == "1" || ch == "2" || ch == "3" || ch == "4" || ch == "5" || ch == "6" || ch == "7" || ch == "8" || ch == "9"
}
is_space(ch) { return ch == " " || ch == "\t" || ch == "\n" || ch == "\r" || ch == ";" }
// ---- Parser (Stage-1/mini Stage-2) ----
// Global cursor for second-pass parser (no pack strings)
gpos_set(i) {
me.gpos = i
return 0
}
gpos_get() { return me.gpos }
parse_number2(src, i) {
local n = src.length()
local j = i
loop(j < n && me.is_digit(src.substring(j, j+1))) { j = j + 1 }
local s = src.substring(i, j)
me.gpos_set(j)
return "{\"type\":\"Int\",\"value\":" + s + "}"
}
parse_string2(src, i) {
local n = src.length()
local j = i + 1
local out = ""
local done = 0
loop(j < n && done == 0) {
local ch = src.substring(j, j+1)
if ch == "\"" {
j = j + 1
done = 1
} else {
if ch == "\\" && j + 1 < n {
local nx = src.substring(j+1, j+2)
if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } }
j = j + 2
} else {
out = out + ch
j = j + 1
}
}
}
me.gpos_set(j)
return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}"
}
parse_factor2(src, i) {
local j = me.skip_ws(src, i)
local ch = src.substring(j, j+1)
if ch == "(" {
local inner = me.parse_expr2(src, j + 1)
local k = me.gpos_get()
k = me.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
me.gpos_set(k)
return inner
}
if ch == "\"" { return me.parse_string2(src, j) }
return me.parse_number2(src, j)
}
parse_term2(src, i) {
local lhs = me.parse_factor2(src, i)
local j = me.gpos_get()
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
if j >= src.length() { cont = 0 } else {
local op = src.substring(j, j+1)
if op != "*" && op != "/" { cont = 0 } else {
local rhs = me.parse_factor2(src, j+1)
j = me.gpos_get()
lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
}
me.gpos_set(j)
return lhs
}
parse_expr2(src, i) {
local lhs = me.parse_term2(src, i)
local j = me.gpos_get()
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
if j >= src.length() { cont = 0 } else {
local op = src.substring(j, j+1)
if op != "+" && op != "-" { cont = 0 } else {
local rhs = me.parse_term2(src, j+1)
j = me.gpos_get()
lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
}
me.gpos_set(j)
return lhs
}
parse_program2(src) {
local i = me.skip_ws(src, 0)
local j = me.skip_return_kw(src, i)
if j == i { j = i } // optional 'return'
local expr = me.parse_expr2(src, j)
return "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":" + expr + "}]}"
}
i2s(n) {
// integer to decimal string (non-negative only for MVP)
if n == 0 { return "0" }
local x = n
if x < 0 { x = 0 } // MVP: clamp negatives to 0 to avoid surprises
local out = ""
loop(x > 0) {
local q = x / 10
local d = x - q * 10
local ch = "0"
if d == 1 { ch = "1" } else {
if d == 2 { ch = "2" } else {
if d == 3 { ch = "3" } else {
if d == 4 { ch = "4" } else {
if d == 5 { ch = "5" } else {
if d == 6 { ch = "6" } else {
if d == 7 { ch = "7" } else {
if d == 8 { ch = "8" } else {
if d == 9 { ch = "9" }
}
}
}
}
}
}
}
}
out = ch + out
x = q
}
return out
}
to_int(s) {
local n = s.length()
if n == 0 { return 0 }
local i = 0
local acc = 0
loop(i < n) {
local d = s.substring(i, i+1)
local dv = 0
if d == "1" { dv = 1 } else { if d == "2" { dv = 2 } else { if d == "3" { dv = 3 } else { if d == "4" { dv = 4 } else { if d == "5" { dv = 5 } else { if d == "6" { dv = 6 } else { if d == "7" { dv = 7 } else { if d == "8" { dv = 8 } else { if d == "9" { dv = 9 } } } } } } } } }
acc = acc * 10 + dv
i = i + 1
}
return acc
}
parse_number(src, i) {
local n = src.length()
local j = i
loop(j < n && me.is_digit(src.substring(j, j+1))) { j = j + 1 }
local s = src.substring(i, j)
local json = "{\"type\":\"Int\",\"value\":" + s + "}"
return json + "@" + me.i2s(j)
}
parse_string(src, i) {
local n = src.length()
local j = i + 1
local out = ""
loop(j < n) {
local ch = src.substring(j, j+1)
if ch == "\"" {
j = j + 1
return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}@" + j
}
if ch == "\\" && j + 1 < n {
local nx = src.substring(j+1, j+2)
if nx == "\"" { out = out + "\"" } else { if nx == "\\" { out = out + "\\" } else { out = out + nx } }
j = j + 2
} else {
out = out + ch
j = j + 1
}
}
return "{\"type\":\"Str\",\"value\":\"" + me.esc_json(out) + "\"}@" + me.i2s(j)
}
skip_ws(src, i) {
local n = src.length()
loop(i < n && me.is_space(src.substring(i, i+1))) { i = i + 1 }
return i
}
skip_return_kw(src, i) {
// If source at i starts with "return", advance; otherwise return i unchanged
local n = src.length()
local j = i
if j < n && src.substring(j, j+1) == "r" { j = j + 1 } else { return i }
if j < n && src.substring(j, j+1) == "e" { j = j + 1 } else { return i }
if j < n && src.substring(j, j+1) == "t" { j = j + 1 } else { return i }
if j < n && src.substring(j, j+1) == "u" { j = j + 1 } else { return i }
if j < n && src.substring(j, j+1) == "r" { j = j + 1 } else { return i }
if j < n && src.substring(j, j+1) == "n" { j = j + 1 } else { return i }
return j
}
parse_factor(src, i) {
i = me.skip_ws(src, i)
local ch = src.substring(i, i+1)
if ch == "(" {
local p = me.parse_expr(src, i + 1)
local at = p.lastIndexOf("@")
local ej = p.substring(0, at)
local j = me.to_int(p.substring(at+1, p.length()))
j = me.skip_ws(src, j)
if src.substring(j, j+1) == ")" { j = j + 1 }
return ej + "@" + me.i2s(j)
}
if ch == "\"" { return me.parse_string(src, i) }
return me.parse_number(src, i)
}
parse_term(src, i) {
local p = me.parse_factor(src, i)
local at = p.lastIndexOf("@")
local lhs = p.substring(0, at)
local j = me.to_int(p.substring(at+1, p.length()))
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
if j >= src.length() { cont = 0 } else {
local op = src.substring(j, j+1)
if op != "*" && op != "/" { cont = 0 } else {
local q = me.parse_factor(src, j+1)
local at2 = q.lastIndexOf("@")
local rhs = q.substring(0, at2)
j = me.to_int(q.substring(at2+1, q.length()))
lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
}
return lhs + "@" + me.i2s(j)
}
parse_expr(src, i) {
local p = me.parse_term(src, i)
local at = p.lastIndexOf("@")
local lhs = p.substring(0, at)
local j = me.to_int(p.substring(at+1, p.length()))
local cont = 1
loop(cont == 1) {
j = me.skip_ws(src, j)
if j >= src.length() { cont = 0 } else {
local op = src.substring(j, j+1)
if op != "+" && op != "-" { cont = 0 } else {
local q = me.parse_term(src, j+1)
local at2 = q.lastIndexOf("@")
local rhs = q.substring(0, at2)
j = me.to_int(q.substring(at2+1, q.length()))
lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
}
return lhs + "@" + me.i2s(j)
}
// Parser delegation
parse_program(src) {
// Legacy packed path (debug) removed; use parser2
return me.parse_program2(src)
local parser = new ParserBox()
// Collect using metadata (no-op acceptance in Stage15)
parser.extract_usings(src)
me._usings = parser.get_usings_json()
return parser.parse_program2(src)
}
main(args) {
// Parse the input and emit JSON v0
local src = me.read_all("tmp/ny_parser_input.ny")
if src == null { src = "return 1+2*3" }
local json = me.parse_program(src)
// Debug setup
me.dbg = new DebugBox()
me.dbg.set_enabled(0)
// Source selection (EXE-first friendly)
// - default: safe constant
// - positional arg: treat as input file path
// - --read-tmp: use tmp/ny_parser_input.ny (requires FileBox plugin)
local src = "return 1+2*3"
local read_tmp = 0
local input_path = null
if args != null {
local alen = args.length()
local i = 0
loop(i < alen) {
local a = args.get(i)
if a == "--read-tmp" { read_tmp = 1 } else {
if a == "--min-json" { /* handled later */ } else {
// First non-flag arg as input path
if input_path == null { input_path = a }
}
}
i = i + 1
}
}
if input_path != null {
// Prefer explicit file when provided (requires FileBox plugin)
local s1 = me.read_all(input_path)
if s1 != null { src = s1 }
} else {
if read_tmp == 1 {
// Optional: read tmp/ny_parser_input.ny (requires FileBox plugin; do not use in CI)
local s2 = me.read_all("tmp/ny_parser_input.ny")
if s2 != null { src = s2 }
}
}
// Gate: minimal JSON when requested via script arg
local min_mode = 0
if args != null {
local alen = args.length()
local i = 0
loop(i < alen) {
if args.get(i) == "--min-json" { min_mode = 1 }
i = i + 1
}
}
local json = null
if min_mode == 1 {
json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}"
} else {
json = me.parse_program(src)
}
// Emit via EmitterBox (attach meta.usings when available)
local emitter = new EmitterBox()
json = emitter.emit_program(json, me._usings)
// Output
local console = new ConsoleBox()
// console.println(json) -- final output only
console.println(json)
return 0
}

View File

@ -0,0 +1,15 @@
static box Main {
main(args) {
local console = new ConsoleBox()
local a = new ArrayBox()
a.push(10)
a.push(20)
console.println("alen=" + a.size())
console.println("g0=" + a.get(0))
console.println("g1=" + a.get(1))
a.set(1, 30)
console.println("g1b=" + a.get(1))
return 0
}
}

View File

@ -0,0 +1,14 @@
static box Main {
main(args) {
local console = new ConsoleBox()
local m = new MapBox()
m.set("a", 1)
m.set("b", 2)
console.println("msz=" + m.size())
console.println("ha=" + m.has("a"))
console.println("hc=" + m.has("c"))
console.println("ga=" + m.get("a"))
return 0
}
}