selfhost: stub stage3 parser nodes under gate

This commit is contained in:
Selfhosting Dev
2025-09-16 18:33:59 +09:00
parent fa1619bf4b
commit 97a76c0571
5 changed files with 203 additions and 16 deletions

View File

@ -37,8 +37,12 @@ Quick Next (today)
3) Resolver/BoxIndex の prefix メタ反映 ✅ Done 20250916
- `plugin_meta_by_box` を構築し、`require_prefix` / `expose_short_names``resolve_using_target` へ適用。
- `NYASH_PLUGIN_REQUIRE_PREFIX` が無効でも per-plugin meta で短名禁止を検知。
4) 自己ホスト経路で Ny 実装切替のゲート準備(現状は Python MVP 優先を維持)。
5) テスト:
4) Parser Stage3 下地 ✅ Done 20250916
- `ParserBox.stage3_enable()` を追加し、Break/Continue/Throw/Try を JSON v0 に出力できるゲートを実装。
- `--stage3` CLI フラグから ParserBox へ渡す導線を追加。
- `docs/reference/architecture/parser_mvp_stage3.md` に Stage3 設計を記録。
5) 自己ホスト経路で Ny 実装切替のゲート準備(現状は Python MVP 優先を維持)。
6) テスト:
- `source tools/dev_env.sh pyvm`
- `NYASH_VM_USE_PY=1 ./tools/selfhost_stage2_smoke.sh`
- `NYASH_VM_USE_PY=1 ./tools/selfhost_stage2_bridge_smoke.sh`
@ -63,8 +67,8 @@ Current Status
Open
- Bridge/PHI の正規化: 短絡(入れ子)における merge/PHI incoming を固定化rhs_end/fall_bb の順序)。
- JSON v0 の拡張方針: break/continue/try/catch/finally の表現(受け皿設計 or 受理時の事前降下)。
- perplugin meta の反映: `require_prefix/expose_short_names/prefix` を Resolver 挙動へ段階適用(導線は実装済み)。
- JSON v0 の拡張方針: break/continue/try/catch/finally の表現(受け皿設計 or 受理時の事前降下)。`docs/reference/architecture/parser_mvp_stage3.md`
- perplugin meta の反映: `require_prefix/expose_short_names/prefix` を Resolver 挙動へ段階適用(導線は実装済み)。✅ 20250916 prefix enforcement とテスト追加済み。
- `me` の扱い: MVP は `NYASH_BRIDGE_ME_DUMMY=1` の仮注入を継続(将来撤去)。
- LLVM 直結(任意): JSON v0 → LLVM の導線追加は後回し。

View File

@ -2,8 +2,20 @@
box ParserBox {
gpos
usings_json
stage3
birth() { me.gpos = 0 me.usings_json = "[]" return 0 }
birth() { me.gpos = 0 me.usings_json = "[]" me.stage3 = 0 return 0 }
stage3_enable(flag) {
if flag == null { flag = 0 }
if flag == 0 { me.stage3 = 0 } else { me.stage3 = 1 }
return 0
}
stage3_enabled() {
if me.stage3 == 1 { return 1 }
return 0
}
esc_json(s) {
local out = ""
@ -613,12 +625,24 @@ box ParserBox {
// Stage-3 acceptance (syntax only): break / continue → no-op expression
if me.starts_with_kw(src, j, "break") == 1 {
j = j + 5
if me.stage3_enabled() == 1 {
j = me.skip_ws(src, j)
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Break\"}"
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
}
if me.starts_with_kw(src, j, "continue") == 1 {
j = j + 8
if me.stage3_enabled() == 1 {
j = me.skip_ws(src, j)
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Continue\"}"
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
@ -629,6 +653,11 @@ box ParserBox {
j = me.skip_ws(src, j)
local e_throw = me.parse_expr2(src, j)
j = me.gpos_get()
if me.stage3_enabled() == 1 {
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Throw\",\"expr\":" + e_throw + "}"
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
return "{\"type\":\"Expr\",\"expr\":" + e_throw + "}"
@ -640,7 +669,10 @@ box ParserBox {
// parse try block
local try_res = me.parse_block2(src, j)
local at_t = try_res.lastIndexOf("@")
local try_json = try_res.substring(0, at_t)
j = me.to_int(try_res.substring(at_t+1, try_res.length()))
local catches_json = "["
local catch_first = 1
// zero or more catch
local guard_ct = 0
local max_ct = 100
@ -651,10 +683,24 @@ box ParserBox {
if me.starts_with_kw(src, j, "catch") == 1 {
j = j + 5
j = me.skip_ws(src, j)
local catch_type = null
local catch_param = null
if src.substring(j, j+1) == "(" { j = j + 1 j = me.skip_ws(src, j)
// optional type + name
if me.is_alpha(src.substring(j, j+1)) { local id1 = me.read_ident2(src, j) local at1 = id1.lastIndexOf("@") j = me.to_int(id1.substring(at1+1, id1.length())) j = me.skip_ws(src, j) }
if me.is_alpha(src.substring(j, j+1)) { local id2 = me.read_ident2(src, j) local at2 = id2.lastIndexOf("@") j = me.to_int(id2.substring(at2+1, id2.length())) j = me.skip_ws(src, j) }
if me.is_alpha(src.substring(j, j+1)) {
local id1 = me.read_ident2(src, j)
local at1 = id1.lastIndexOf("@")
catch_type = id1.substring(0, at1)
j = me.to_int(id1.substring(at1+1, id1.length()))
j = me.skip_ws(src, j)
}
if me.is_alpha(src.substring(j, j+1)) {
local id2 = me.read_ident2(src, j)
local at2 = id2.lastIndexOf("@")
catch_param = id2.substring(0, at2)
j = me.to_int(id2.substring(at2+1, id2.length()))
j = me.skip_ws(src, j)
}
if src.substring(j, j+1) == ")" { j = j + 1 }
}
j = me.skip_ws(src, j)
@ -662,16 +708,37 @@ box ParserBox {
local c_res = me.parse_block2(src, j)
local atc = c_res.lastIndexOf("@")
j = me.to_int(c_res.substring(atc+1, c_res.length()))
if me.stage3_enabled() == 1 {
local entry = "{"
local wrote = 0
if catch_param != null && catch_param.length() > 0 { entry = entry + "\"param\":\"" + me.esc_json(catch_param) + "\"" wrote = 1 }
if catch_type != null && catch_type.length() > 0 { if wrote == 1 { entry = entry + "," } entry = entry + "\"typeHint\":\"" + me.esc_json(catch_type) + "\"" wrote = 1 }
local body_json = c_res.substring(0, atc)
if wrote == 1 { entry = entry + "," }
entry = entry + "\"body\":" + body_json + "}"
if catch_first == 0 { catches_json = catches_json + "," + entry } else { catches_json = catches_json + entry catch_first = 0 }
}
} else { cont_ct = 0 }
}
catches_json = catches_json + "]"
// optional finally
j = me.skip_ws(src, j)
local finally_json = null
if me.starts_with_kw(src, j, "finally") == 1 {
j = j + 7
j = me.skip_ws(src, j)
local f_res = me.parse_block2(src, j)
local atf = f_res.lastIndexOf("@")
j = me.to_int(f_res.substring(atf+1, f_res.length()))
finally_json = f_res.substring(0, atf)
}
if me.stage3_enabled() == 1 {
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)
local node = "{\"type\":\"Try\",\"try\":" + try_json + ",\"catches\":" + catches_json
if finally_json != null { node = node + ",\"finally\":" + finally_json }
node = node + "}"
return node
}
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
me.gpos_set(j)

View File

@ -33,8 +33,9 @@ static box Main {
}
// Parser delegation
parse_program(src) {
parse_program(src, stage3_flag) {
local parser = new ParserBox()
if stage3_flag == 1 { parser.stage3_enable(1) }
// Collect using metadata (no-op acceptance in Stage15)
parser.extract_usings(src)
me._usings = parser.get_usings_json()
@ -53,17 +54,25 @@ static box Main {
local src = "return 1+2*3"
local read_tmp = 0
local input_path = null
local stage3_mode = 0
if args != null {
local alen = args.length()
local i = 0
loop(i < alen) {
local a = args.get(i)
if a == "--read-tmp" { read_tmp = 1 } else {
if a == "--min-json" { /* handled later */ } else {
// First non-flag arg as input path
if a == "--read-tmp" {
read_tmp = 1
} else {
if a == "--min-json" {
/* handled later */
} else {
if a == "--stage3" {
stage3_mode = 1
} else {
if input_path == null { input_path = a }
}
}
}
i = i + 1
}
}
@ -85,7 +94,8 @@ static box Main {
local alen = args.length()
local i = 0
loop(i < alen) {
if args.get(i) == "--min-json" { min_mode = 1 }
local arg = args.get(i)
if arg == "--min-json" { min_mode = 1 }
i = i + 1
}
}
@ -93,7 +103,7 @@ static box Main {
if min_mode == 1 {
json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}"
} else {
json = me.parse_program(src)
json = me.parse_program(src, stage3_mode)
}
// Emit via EmitterBox (attach meta.usings when available)

View File

@ -0,0 +1,52 @@
# Parser MVP Stage-3 Design (Phase 15)
Scope
- Extend Stage-2 parser emission to cover control flow constructs usually seen in routine code bases:
- `break` / `continue`
- `throw expr`
- `try { ... } catch (Type err) { ... } finally { ... }`
- Alert: other Stage-3 ideas (switch/async) remain out of scope until after self-host parity.
- Preserve existing Stage-2 behaviour (locals/loop/if/call/method/new/ternary) with no regressions.
Guiding Principles
- JSON v0 must remain stable for the Stage-2 path; Stage-3 additions should be feature-flagged or degrade safely when disabled.
- Short-circuit semantics are already mirrored via logical nodes; Stage-3 should reuse the same block-building infrastructure (Bridge/VM/JIT) to avoid special cases.
- Continue the "degrade to expression" approach when code generation is not ready (e.g. throw/try) so that Stage-2 tests stay green while the full implementation is developed.
JSON v0 Additions
| Construct | JSON v0 Node | Notes |
|------------|-------------------------------------------------|-------|
| break | `{ "type": "Break" }` | Lowered into loop exit block with implicit jump. |
| continue | `{ "type": "Continue" }` | Lowered into loop head block jump. |
| throw expr | `{ "type": "Throw", "expr": Expr }` | Initial implementation can degrade to `{ "type": "Expr", "expr": expr }` until VM/JIT semantics are ready. |
| try/catch/finally | `{ "type": "Try", "try": Stmt[], "catches": Catch[], "finally": Stmt[]? }` | Each `Catch` includes `{ "param": String?, "body": Stmt[] }`. Stage-1 implementation may treat as pass-through expression block. |
Lowering Strategy (Bridge)
1. **Break/Continue**
- Bridge stores loop header/exit blocks on a loop stack.
- `Break` maps to `Jump { target: loop_exit }`, `Continue` to `Jump { target: loop_head }`.
- MirBuilder already has `LoopBuilder`; expose helpers to fetch head/exit blocks.
2. **Throw/Try**
- Phase 15 MVP keeps them syntax-only to avoid VM/JIT churn. Parser/Emitter produce nodes; Bridge either degrades (Expr) or logs a structured event for future handling.
- Document expectation: once runtime exception model is defined, nodes become non-degrading.
3. **Metadata Events**
- Augment `crate::jit::observe` with `lower_shortcircuit`/`lower_try` stubs so instrumentation remains coherent when full support is wired.
Testing Plan
- Extend selfhost Stage-2 smoke file with guard cases (`return break` etc.) once lowering is live.
- Create dedicated JSON fixtures under `tests/json_v0_stage3/` for break/continue/try once behaviour stabilises.
- Update `tools/ny_stage2_shortcircuit_smoke.sh` to ensure Stage-3 constructs do not regress Stage-2 semantics (break/continue degrade). Timing: after lowering is implemented.
Migration Checklist
1. ParserBox emits Stage-3 nodes under `NYASH_PARSER_STAGE3=1` gate to allow gradual rollout.
2. Emitter attaches Stage-3 JSON when gate is enabled (otherwise degrade to existing Stage-2 forms).
3. Bridge honours Stage-3 nodes when gate is on; degrade with warning when off.
4. PyVM/VM/JIT semantics gradually enabled (throw/try remain degrade until corresponding runtime support is merged).
5. Documentation kept in sync (`CURRENT_TASK.md`, release notes).
References
- Stage-2 design (`parser_mvp_stage2.md`)
- CURRENT_TASK stage checklist (Phase 15)
- `docs/guides/language-guide.md` section “Exceptions & Flow Control” (update when Stage-3 fully lands).

View File

@ -1,7 +1,7 @@
use serde::{Deserialize, Serialize};
use crate::mir::{
MirModule, MirFunction, FunctionSignature, BasicBlockId, MirInstruction,
ConstValue, BinaryOp, MirType, EffectMask, MirPrinter,
ConstValue, BinaryOp, MirType, EffectMask, MirPrinter, ValueId,
};
#[derive(Debug, Deserialize, Serialize)]
@ -24,6 +24,19 @@ enum StmtV0 {
If { cond: ExprV0, then: Vec<StmtV0>, #[serde(rename="else", default)] r#else: Option<Vec<StmtV0>> },
// Optional: loop (Stage-2)
Loop { cond: ExprV0, body: Vec<StmtV0> },
Break,
Continue,
Try { #[serde(rename="try")] try_body: Vec<StmtV0>, #[serde(default)] catches: Vec<CatchV0>, #[serde(default)] finally: Vec<StmtV0> },
}
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
struct CatchV0 {
#[serde(rename="param", default)]
param: Option<String>,
#[serde(rename="typeHint", default)]
type_hint: Option<String>,
#[serde(default)]
body: Vec<StmtV0>,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
@ -41,6 +54,7 @@ enum ExprV0 {
Method { recv: Box<ExprV0>, method: String, args: Vec<ExprV0> },
New { class: String, args: Vec<ExprV0> },
Var { name: String },
Throw { expr: Box<ExprV0> },
}
pub fn parse_json_v0_to_module(json: &str) -> Result<MirModule, String> {
@ -197,7 +211,16 @@ fn lower_expr(f: &mut MirFunction, cur_bb: BasicBlockId, e: &ExprV0) -> Result<(
// merge with phi (use actual predecessors rhs_end and fall_bb)
let out = f.next_value_id();
if let Some(bb) = f.get_block_mut(merge_bb) {
bb.insert_instruction_after_phis(MirInstruction::Phi { dst: out, inputs: vec![(rhs_end, rval), (fall_bb, cdst)] });
let mut inputs: Vec<(BasicBlockId, ValueId)> = vec![(fall_bb, cdst)];
if rhs_end != fall_bb {
inputs.push((rhs_end, rval));
} else {
// Degenerate case: RHS ended in fall_bb (e.g., constant expression).
// Reuse the constant to keep PHI well-formed.
inputs.push((fall_bb, rval));
}
inputs.sort_by_key(|(bbid, _)| bbid.0);
bb.insert_instruction_after_phis(MirInstruction::Phi { dst: out, inputs });
}
Ok((out, merge_bb))
}
@ -280,6 +303,14 @@ fn lower_expr(f: &mut MirFunction, cur_bb: BasicBlockId, e: &ExprV0) -> Result<(
Ok((dst, cur))
}
ExprV0::Var { name } => Err(format!("undefined variable in this context: {}", name)),
ExprV0::Throw { expr } => {
let (_ignored, cur) = lower_expr(f, cur_bb, expr)?;
let dst = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur) {
bb.add_instruction(MirInstruction::Const { dst, value: ConstValue::Integer(0) });
}
Ok((dst, cur))
}
}
}
@ -310,6 +341,14 @@ fn lower_expr_with_vars(
}
Err(format!("undefined variable: {}", name))
}
ExprV0::Throw { expr } => {
let (_ignored, cur) = lower_expr_with_vars(f, cur_bb, expr, vars)?;
let dst = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur) {
bb.add_instruction(MirInstruction::Const { dst, value: ConstValue::Integer(0) });
}
Ok((dst, cur))
}
ExprV0::Call { name, args } => {
// Special: array literal lowering in vars context
if name == "array.of" {
@ -469,6 +508,21 @@ fn lower_stmt_with_vars(
StmtV0::Local { name, expr } => {
let (v, cur) = lower_expr_with_vars(f, cur_bb, expr, vars)?; vars.insert(name.clone(), v); Ok(cur)
}
StmtV0::Break => {
// Stage-3 placeholder: no-op until loop lowering supports break
Ok(cur_bb)
}
StmtV0::Continue => {
// Stage-3 placeholder: no-op until loop lowering supports continue
Ok(cur_bb)
}
StmtV0::Try { try_body, .. } => {
// Stage-3 placeholder: lower try body sequentially, ignore catches/finally for now
let mut tmp_vars = vars.clone();
let next_bb = lower_stmt_list_with_vars(f, cur_bb, try_body, &mut tmp_vars)?;
*vars = tmp_vars;
Ok(next_bb)
}
StmtV0::If { cond, then, r#else } => {
// Lower condition first
let (cval, cur) = lower_expr_with_vars(f, cur_bb, cond, vars)?;