From 97a76c0571ab5a86f6713a6aadff9bcd5a03e25c Mon Sep 17 00:00:00 2001 From: Selfhosting Dev Date: Tue, 16 Sep 2025 18:33:59 +0900 Subject: [PATCH] selfhost: stub stage3 parser nodes under gate --- CURRENT_TASK.md | 12 ++- apps/selfhost-compiler/boxes/parser_box.nyash | 73 ++++++++++++++++++- apps/selfhost-compiler/compiler.nyash | 24 ++++-- .../architecture/parser_mvp_stage3.md | 52 +++++++++++++ src/runner/json_v0_bridge.rs | 58 ++++++++++++++- 5 files changed, 203 insertions(+), 16 deletions(-) create mode 100644 docs/reference/architecture/parser_mvp_stage3.md diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index ef782d11..33e2b415 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -37,8 +37,12 @@ Quick Next (today) 3) Resolver/BoxIndex の prefix メタ反映 ✅ Done 2025‑09‑16 - `plugin_meta_by_box` を構築し、`require_prefix` / `expose_short_names` を `resolve_using_target` へ適用。 - `NYASH_PLUGIN_REQUIRE_PREFIX` が無効でも per-plugin meta で短名禁止を検知。 - 4) 自己ホスト経路で Ny 実装切替のゲート準備(現状は Python MVP 優先を維持)。 - 5) テスト: + 4) Parser Stage‑3 下地 ✅ Done 2025‑09‑16 + - `ParserBox.stage3_enable()` を追加し、Break/Continue/Throw/Try を JSON v0 に出力できるゲートを実装。 + - `--stage3` CLI フラグから ParserBox へ渡す導線を追加。 + - `docs/reference/architecture/parser_mvp_stage3.md` に Stage‑3 設計を記録。 + 5) 自己ホスト経路で Ny 実装切替のゲート準備(現状は Python MVP 優先を維持)。 + 6) テスト: - `source tools/dev_env.sh pyvm` - `NYASH_VM_USE_PY=1 ./tools/selfhost_stage2_smoke.sh` - `NYASH_VM_USE_PY=1 ./tools/selfhost_stage2_bridge_smoke.sh` @@ -63,8 +67,8 @@ Current Status Open - Bridge/PHI の正規化: 短絡(入れ子)における merge/PHI incoming を固定化(rhs_end/fall_bb の順序)。 -- JSON v0 の拡張方針: break/continue/try/catch/finally の表現(受け皿設計 or 受理時の事前降下)。 -- per‑plugin meta の反映: `require_prefix/expose_short_names/prefix` を Resolver 挙動へ段階適用(導線は実装済み)。 +- JSON v0 の拡張方針: break/continue/try/catch/finally の表現(受け皿設計 or 受理時の事前降下)。➡ `docs/reference/architecture/parser_mvp_stage3.md` +- per‑plugin meta の反映: `require_prefix/expose_short_names/prefix` を Resolver 挙動へ段階適用(導線は実装済み)。✅ 2025‑09‑16 prefix enforcement とテスト追加済み。 - `me` の扱い: MVP は `NYASH_BRIDGE_ME_DUMMY=1` の仮注入を継続(将来撤去)。 - LLVM 直結(任意): JSON v0 → LLVM の導線追加は後回し。 diff --git a/apps/selfhost-compiler/boxes/parser_box.nyash b/apps/selfhost-compiler/boxes/parser_box.nyash index 5c527c19..7b5f5031 100644 --- a/apps/selfhost-compiler/boxes/parser_box.nyash +++ b/apps/selfhost-compiler/boxes/parser_box.nyash @@ -2,8 +2,20 @@ box ParserBox { gpos usings_json + stage3 - birth() { me.gpos = 0 me.usings_json = "[]" return 0 } + birth() { me.gpos = 0 me.usings_json = "[]" me.stage3 = 0 return 0 } + + stage3_enable(flag) { + if flag == null { flag = 0 } + if flag == 0 { me.stage3 = 0 } else { me.stage3 = 1 } + return 0 + } + + stage3_enabled() { + if me.stage3 == 1 { return 1 } + return 0 + } esc_json(s) { local out = "" @@ -613,12 +625,24 @@ box ParserBox { // Stage-3 acceptance (syntax only): break / continue → no-op expression if me.starts_with_kw(src, j, "break") == 1 { j = j + 5 + if me.stage3_enabled() == 1 { + j = me.skip_ws(src, j) + if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } + me.gpos_set(j) + return "{\"type\":\"Break\"}" + } if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } me.gpos_set(j) return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}" } if me.starts_with_kw(src, j, "continue") == 1 { j = j + 8 + if me.stage3_enabled() == 1 { + j = me.skip_ws(src, j) + if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } + me.gpos_set(j) + return "{\"type\":\"Continue\"}" + } if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } me.gpos_set(j) return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}" @@ -629,6 +653,11 @@ box ParserBox { j = me.skip_ws(src, j) local e_throw = me.parse_expr2(src, j) j = me.gpos_get() + if me.stage3_enabled() == 1 { + if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } + me.gpos_set(j) + return "{\"type\":\"Throw\",\"expr\":" + e_throw + "}" + } if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } me.gpos_set(j) return "{\"type\":\"Expr\",\"expr\":" + e_throw + "}" @@ -640,7 +669,10 @@ box ParserBox { // parse try block local try_res = me.parse_block2(src, j) local at_t = try_res.lastIndexOf("@") + local try_json = try_res.substring(0, at_t) j = me.to_int(try_res.substring(at_t+1, try_res.length())) + local catches_json = "[" + local catch_first = 1 // zero or more catch local guard_ct = 0 local max_ct = 100 @@ -651,10 +683,24 @@ box ParserBox { if me.starts_with_kw(src, j, "catch") == 1 { j = j + 5 j = me.skip_ws(src, j) + local catch_type = null + local catch_param = null if src.substring(j, j+1) == "(" { j = j + 1 j = me.skip_ws(src, j) // optional type + name - if me.is_alpha(src.substring(j, j+1)) { local id1 = me.read_ident2(src, j) local at1 = id1.lastIndexOf("@") j = me.to_int(id1.substring(at1+1, id1.length())) j = me.skip_ws(src, j) } - if me.is_alpha(src.substring(j, j+1)) { local id2 = me.read_ident2(src, j) local at2 = id2.lastIndexOf("@") j = me.to_int(id2.substring(at2+1, id2.length())) j = me.skip_ws(src, j) } + if me.is_alpha(src.substring(j, j+1)) { + local id1 = me.read_ident2(src, j) + local at1 = id1.lastIndexOf("@") + catch_type = id1.substring(0, at1) + j = me.to_int(id1.substring(at1+1, id1.length())) + j = me.skip_ws(src, j) + } + if me.is_alpha(src.substring(j, j+1)) { + local id2 = me.read_ident2(src, j) + local at2 = id2.lastIndexOf("@") + catch_param = id2.substring(0, at2) + j = me.to_int(id2.substring(at2+1, id2.length())) + j = me.skip_ws(src, j) + } if src.substring(j, j+1) == ")" { j = j + 1 } } j = me.skip_ws(src, j) @@ -662,16 +708,37 @@ box ParserBox { local c_res = me.parse_block2(src, j) local atc = c_res.lastIndexOf("@") j = me.to_int(c_res.substring(atc+1, c_res.length())) + if me.stage3_enabled() == 1 { + local entry = "{" + local wrote = 0 + if catch_param != null && catch_param.length() > 0 { entry = entry + "\"param\":\"" + me.esc_json(catch_param) + "\"" wrote = 1 } + if catch_type != null && catch_type.length() > 0 { if wrote == 1 { entry = entry + "," } entry = entry + "\"typeHint\":\"" + me.esc_json(catch_type) + "\"" wrote = 1 } + local body_json = c_res.substring(0, atc) + if wrote == 1 { entry = entry + "," } + entry = entry + "\"body\":" + body_json + "}" + if catch_first == 0 { catches_json = catches_json + "," + entry } else { catches_json = catches_json + entry catch_first = 0 } + } } else { cont_ct = 0 } } + catches_json = catches_json + "]" // optional finally j = me.skip_ws(src, j) + local finally_json = null if me.starts_with_kw(src, j, "finally") == 1 { j = j + 7 j = me.skip_ws(src, j) local f_res = me.parse_block2(src, j) local atf = f_res.lastIndexOf("@") j = me.to_int(f_res.substring(atf+1, f_res.length())) + finally_json = f_res.substring(0, atf) + } + if me.stage3_enabled() == 1 { + if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } + me.gpos_set(j) + local node = "{\"type\":\"Try\",\"try\":" + try_json + ",\"catches\":" + catches_json + if finally_json != null { node = node + ",\"finally\":" + finally_json } + node = node + "}" + return node } if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } } me.gpos_set(j) diff --git a/apps/selfhost-compiler/compiler.nyash b/apps/selfhost-compiler/compiler.nyash index eb7e268e..ca4d3030 100644 --- a/apps/selfhost-compiler/compiler.nyash +++ b/apps/selfhost-compiler/compiler.nyash @@ -33,8 +33,9 @@ static box Main { } // Parser delegation - parse_program(src) { + parse_program(src, stage3_flag) { local parser = new ParserBox() + if stage3_flag == 1 { parser.stage3_enable(1) } // Collect using metadata (no-op acceptance in Stage‑15) parser.extract_usings(src) me._usings = parser.get_usings_json() @@ -53,15 +54,23 @@ static box Main { local src = "return 1+2*3" local read_tmp = 0 local input_path = null + local stage3_mode = 0 if args != null { local alen = args.length() local i = 0 loop(i < alen) { local a = args.get(i) - if a == "--read-tmp" { read_tmp = 1 } else { - if a == "--min-json" { /* handled later */ } else { - // First non-flag arg as input path - if input_path == null { input_path = a } + if a == "--read-tmp" { + read_tmp = 1 + } else { + if a == "--min-json" { + /* handled later */ + } else { + if a == "--stage3" { + stage3_mode = 1 + } else { + if input_path == null { input_path = a } + } } } i = i + 1 @@ -85,7 +94,8 @@ static box Main { local alen = args.length() local i = 0 loop(i < alen) { - if args.get(i) == "--min-json" { min_mode = 1 } + local arg = args.get(i) + if arg == "--min-json" { min_mode = 1 } i = i + 1 } } @@ -93,7 +103,7 @@ static box Main { if min_mode == 1 { json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}" } else { - json = me.parse_program(src) + json = me.parse_program(src, stage3_mode) } // Emit via EmitterBox (attach meta.usings when available) diff --git a/docs/reference/architecture/parser_mvp_stage3.md b/docs/reference/architecture/parser_mvp_stage3.md new file mode 100644 index 00000000..b37ab2d2 --- /dev/null +++ b/docs/reference/architecture/parser_mvp_stage3.md @@ -0,0 +1,52 @@ +# Parser MVP Stage-3 Design (Phase 15) + +Scope +- Extend Stage-2 parser emission to cover control flow constructs usually seen in routine code bases: + - `break` / `continue` + - `throw expr` + - `try { ... } catch (Type err) { ... } finally { ... }` + - Alert: other Stage-3 ideas (switch/async) remain out of scope until after self-host parity. +- Preserve existing Stage-2 behaviour (locals/loop/if/call/method/new/ternary) with no regressions. + +Guiding Principles +- JSON v0 must remain stable for the Stage-2 path; Stage-3 additions should be feature-flagged or degrade safely when disabled. +- Short-circuit semantics are already mirrored via logical nodes; Stage-3 should reuse the same block-building infrastructure (Bridge/VM/JIT) to avoid special cases. +- Continue the "degrade to expression" approach when code generation is not ready (e.g. throw/try) so that Stage-2 tests stay green while the full implementation is developed. + +JSON v0 Additions +| Construct | JSON v0 Node | Notes | +|------------|-------------------------------------------------|-------| +| break | `{ "type": "Break" }` | Lowered into loop exit block with implicit jump. | +| continue | `{ "type": "Continue" }` | Lowered into loop head block jump. | +| throw expr | `{ "type": "Throw", "expr": Expr }` | Initial implementation can degrade to `{ "type": "Expr", "expr": expr }` until VM/JIT semantics are ready. | +| try/catch/finally | `{ "type": "Try", "try": Stmt[], "catches": Catch[], "finally": Stmt[]? }` | Each `Catch` includes `{ "param": String?, "body": Stmt[] }`. Stage-1 implementation may treat as pass-through expression block. | + +Lowering Strategy (Bridge) +1. **Break/Continue** + - Bridge stores loop header/exit blocks on a loop stack. + - `Break` maps to `Jump { target: loop_exit }`, `Continue` to `Jump { target: loop_head }`. + - MirBuilder already has `LoopBuilder`; expose helpers to fetch head/exit blocks. + +2. **Throw/Try** + - Phase 15 MVP keeps them syntax-only to avoid VM/JIT churn. Parser/Emitter produce nodes; Bridge either degrades (Expr) or logs a structured event for future handling. + - Document expectation: once runtime exception model is defined, nodes become non-degrading. + +3. **Metadata Events** + - Augment `crate::jit::observe` with `lower_shortcircuit`/`lower_try` stubs so instrumentation remains coherent when full support is wired. + +Testing Plan +- Extend selfhost Stage-2 smoke file with guard cases (`return break` etc.) once lowering is live. +- Create dedicated JSON fixtures under `tests/json_v0_stage3/` for break/continue/try once behaviour stabilises. +- Update `tools/ny_stage2_shortcircuit_smoke.sh` to ensure Stage-3 constructs do not regress Stage-2 semantics (break/continue degrade). Timing: after lowering is implemented. + +Migration Checklist +1. ParserBox emits Stage-3 nodes under `NYASH_PARSER_STAGE3=1` gate to allow gradual rollout. +2. Emitter attaches Stage-3 JSON when gate is enabled (otherwise degrade to existing Stage-2 forms). +3. Bridge honours Stage-3 nodes when gate is on; degrade with warning when off. +4. PyVM/VM/JIT semantics gradually enabled (throw/try remain degrade until corresponding runtime support is merged). +5. Documentation kept in sync (`CURRENT_TASK.md`, release notes). + +References +- Stage-2 design (`parser_mvp_stage2.md`) +- CURRENT_TASK stage checklist (Phase 15) +- `docs/guides/language-guide.md` section “Exceptions & Flow Control” (update when Stage-3 fully lands). diff --git a/src/runner/json_v0_bridge.rs b/src/runner/json_v0_bridge.rs index 65c92657..18646753 100644 --- a/src/runner/json_v0_bridge.rs +++ b/src/runner/json_v0_bridge.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use crate::mir::{ MirModule, MirFunction, FunctionSignature, BasicBlockId, MirInstruction, - ConstValue, BinaryOp, MirType, EffectMask, MirPrinter, + ConstValue, BinaryOp, MirType, EffectMask, MirPrinter, ValueId, }; #[derive(Debug, Deserialize, Serialize)] @@ -24,6 +24,19 @@ enum StmtV0 { If { cond: ExprV0, then: Vec, #[serde(rename="else", default)] r#else: Option> }, // Optional: loop (Stage-2) Loop { cond: ExprV0, body: Vec }, + Break, + Continue, + Try { #[serde(rename="try")] try_body: Vec, #[serde(default)] catches: Vec, #[serde(default)] finally: Vec }, +} + +#[derive(Debug, Deserialize, Serialize, Clone, Default)] +struct CatchV0 { + #[serde(rename="param", default)] + param: Option, + #[serde(rename="typeHint", default)] + type_hint: Option, + #[serde(default)] + body: Vec, } #[derive(Debug, Deserialize, Serialize, Clone)] @@ -41,6 +54,7 @@ enum ExprV0 { Method { recv: Box, method: String, args: Vec }, New { class: String, args: Vec }, Var { name: String }, + Throw { expr: Box }, } pub fn parse_json_v0_to_module(json: &str) -> Result { @@ -197,7 +211,16 @@ fn lower_expr(f: &mut MirFunction, cur_bb: BasicBlockId, e: &ExprV0) -> Result<( // merge with phi (use actual predecessors rhs_end and fall_bb) let out = f.next_value_id(); if let Some(bb) = f.get_block_mut(merge_bb) { - bb.insert_instruction_after_phis(MirInstruction::Phi { dst: out, inputs: vec![(rhs_end, rval), (fall_bb, cdst)] }); + let mut inputs: Vec<(BasicBlockId, ValueId)> = vec![(fall_bb, cdst)]; + if rhs_end != fall_bb { + inputs.push((rhs_end, rval)); + } else { + // Degenerate case: RHS ended in fall_bb (e.g., constant expression). + // Reuse the constant to keep PHI well-formed. + inputs.push((fall_bb, rval)); + } + inputs.sort_by_key(|(bbid, _)| bbid.0); + bb.insert_instruction_after_phis(MirInstruction::Phi { dst: out, inputs }); } Ok((out, merge_bb)) } @@ -280,6 +303,14 @@ fn lower_expr(f: &mut MirFunction, cur_bb: BasicBlockId, e: &ExprV0) -> Result<( Ok((dst, cur)) } ExprV0::Var { name } => Err(format!("undefined variable in this context: {}", name)), + ExprV0::Throw { expr } => { + let (_ignored, cur) = lower_expr(f, cur_bb, expr)?; + let dst = f.next_value_id(); + if let Some(bb) = f.get_block_mut(cur) { + bb.add_instruction(MirInstruction::Const { dst, value: ConstValue::Integer(0) }); + } + Ok((dst, cur)) + } } } @@ -310,6 +341,14 @@ fn lower_expr_with_vars( } Err(format!("undefined variable: {}", name)) } + ExprV0::Throw { expr } => { + let (_ignored, cur) = lower_expr_with_vars(f, cur_bb, expr, vars)?; + let dst = f.next_value_id(); + if let Some(bb) = f.get_block_mut(cur) { + bb.add_instruction(MirInstruction::Const { dst, value: ConstValue::Integer(0) }); + } + Ok((dst, cur)) + } ExprV0::Call { name, args } => { // Special: array literal lowering in vars context if name == "array.of" { @@ -469,6 +508,21 @@ fn lower_stmt_with_vars( StmtV0::Local { name, expr } => { let (v, cur) = lower_expr_with_vars(f, cur_bb, expr, vars)?; vars.insert(name.clone(), v); Ok(cur) } + StmtV0::Break => { + // Stage-3 placeholder: no-op until loop lowering supports break + Ok(cur_bb) + } + StmtV0::Continue => { + // Stage-3 placeholder: no-op until loop lowering supports continue + Ok(cur_bb) + } + StmtV0::Try { try_body, .. } => { + // Stage-3 placeholder: lower try body sequentially, ignore catches/finally for now + let mut tmp_vars = vars.clone(); + let next_bb = lower_stmt_list_with_vars(f, cur_bb, try_body, &mut tmp_vars)?; + *vars = tmp_vars; + Ok(next_bb) + } StmtV0::If { cond, then, r#else } => { // Lower condition first let (cval, cur) = lower_expr_with_vars(f, cur_bb, cond, vars)?;