selfhost: stub stage3 parser nodes under gate
This commit is contained in:
@ -37,8 +37,12 @@ Quick Next (today)
|
||||
3) Resolver/BoxIndex の prefix メタ反映 ✅ Done 2025‑09‑16
|
||||
- `plugin_meta_by_box` を構築し、`require_prefix` / `expose_short_names` を `resolve_using_target` へ適用。
|
||||
- `NYASH_PLUGIN_REQUIRE_PREFIX` が無効でも per-plugin meta で短名禁止を検知。
|
||||
4) 自己ホスト経路で Ny 実装切替のゲート準備(現状は Python MVP 優先を維持)。
|
||||
5) テスト:
|
||||
4) Parser Stage‑3 下地 ✅ Done 2025‑09‑16
|
||||
- `ParserBox.stage3_enable()` を追加し、Break/Continue/Throw/Try を JSON v0 に出力できるゲートを実装。
|
||||
- `--stage3` CLI フラグから ParserBox へ渡す導線を追加。
|
||||
- `docs/reference/architecture/parser_mvp_stage3.md` に Stage‑3 設計を記録。
|
||||
5) 自己ホスト経路で Ny 実装切替のゲート準備(現状は Python MVP 優先を維持)。
|
||||
6) テスト:
|
||||
- `source tools/dev_env.sh pyvm`
|
||||
- `NYASH_VM_USE_PY=1 ./tools/selfhost_stage2_smoke.sh`
|
||||
- `NYASH_VM_USE_PY=1 ./tools/selfhost_stage2_bridge_smoke.sh`
|
||||
@ -63,8 +67,8 @@ Current Status
|
||||
|
||||
Open
|
||||
- Bridge/PHI の正規化: 短絡(入れ子)における merge/PHI incoming を固定化(rhs_end/fall_bb の順序)。
|
||||
- JSON v0 の拡張方針: break/continue/try/catch/finally の表現(受け皿設計 or 受理時の事前降下)。
|
||||
- per‑plugin meta の反映: `require_prefix/expose_short_names/prefix` を Resolver 挙動へ段階適用(導線は実装済み)。
|
||||
- JSON v0 の拡張方針: break/continue/try/catch/finally の表現(受け皿設計 or 受理時の事前降下)。➡ `docs/reference/architecture/parser_mvp_stage3.md`
|
||||
- per‑plugin meta の反映: `require_prefix/expose_short_names/prefix` を Resolver 挙動へ段階適用(導線は実装済み)。✅ 2025‑09‑16 prefix enforcement とテスト追加済み。
|
||||
- `me` の扱い: MVP は `NYASH_BRIDGE_ME_DUMMY=1` の仮注入を継続(将来撤去)。
|
||||
- LLVM 直結(任意): JSON v0 → LLVM の導線追加は後回し。
|
||||
|
||||
|
||||
@ -2,8 +2,20 @@
|
||||
box ParserBox {
|
||||
gpos
|
||||
usings_json
|
||||
stage3
|
||||
|
||||
birth() { me.gpos = 0 me.usings_json = "[]" return 0 }
|
||||
birth() { me.gpos = 0 me.usings_json = "[]" me.stage3 = 0 return 0 }
|
||||
|
||||
stage3_enable(flag) {
|
||||
if flag == null { flag = 0 }
|
||||
if flag == 0 { me.stage3 = 0 } else { me.stage3 = 1 }
|
||||
return 0
|
||||
}
|
||||
|
||||
stage3_enabled() {
|
||||
if me.stage3 == 1 { return 1 }
|
||||
return 0
|
||||
}
|
||||
|
||||
esc_json(s) {
|
||||
local out = ""
|
||||
@ -613,12 +625,24 @@ box ParserBox {
|
||||
// Stage-3 acceptance (syntax only): break / continue → no-op expression
|
||||
if me.starts_with_kw(src, j, "break") == 1 {
|
||||
j = j + 5
|
||||
if me.stage3_enabled() == 1 {
|
||||
j = me.skip_ws(src, j)
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
return "{\"type\":\"Break\"}"
|
||||
}
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
|
||||
}
|
||||
if me.starts_with_kw(src, j, "continue") == 1 {
|
||||
j = j + 8
|
||||
if me.stage3_enabled() == 1 {
|
||||
j = me.skip_ws(src, j)
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
return "{\"type\":\"Continue\"}"
|
||||
}
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}"
|
||||
@ -629,6 +653,11 @@ box ParserBox {
|
||||
j = me.skip_ws(src, j)
|
||||
local e_throw = me.parse_expr2(src, j)
|
||||
j = me.gpos_get()
|
||||
if me.stage3_enabled() == 1 {
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
return "{\"type\":\"Throw\",\"expr\":" + e_throw + "}"
|
||||
}
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
return "{\"type\":\"Expr\",\"expr\":" + e_throw + "}"
|
||||
@ -640,7 +669,10 @@ box ParserBox {
|
||||
// parse try block
|
||||
local try_res = me.parse_block2(src, j)
|
||||
local at_t = try_res.lastIndexOf("@")
|
||||
local try_json = try_res.substring(0, at_t)
|
||||
j = me.to_int(try_res.substring(at_t+1, try_res.length()))
|
||||
local catches_json = "["
|
||||
local catch_first = 1
|
||||
// zero or more catch
|
||||
local guard_ct = 0
|
||||
local max_ct = 100
|
||||
@ -651,10 +683,24 @@ box ParserBox {
|
||||
if me.starts_with_kw(src, j, "catch") == 1 {
|
||||
j = j + 5
|
||||
j = me.skip_ws(src, j)
|
||||
local catch_type = null
|
||||
local catch_param = null
|
||||
if src.substring(j, j+1) == "(" { j = j + 1 j = me.skip_ws(src, j)
|
||||
// optional type + name
|
||||
if me.is_alpha(src.substring(j, j+1)) { local id1 = me.read_ident2(src, j) local at1 = id1.lastIndexOf("@") j = me.to_int(id1.substring(at1+1, id1.length())) j = me.skip_ws(src, j) }
|
||||
if me.is_alpha(src.substring(j, j+1)) { local id2 = me.read_ident2(src, j) local at2 = id2.lastIndexOf("@") j = me.to_int(id2.substring(at2+1, id2.length())) j = me.skip_ws(src, j) }
|
||||
if me.is_alpha(src.substring(j, j+1)) {
|
||||
local id1 = me.read_ident2(src, j)
|
||||
local at1 = id1.lastIndexOf("@")
|
||||
catch_type = id1.substring(0, at1)
|
||||
j = me.to_int(id1.substring(at1+1, id1.length()))
|
||||
j = me.skip_ws(src, j)
|
||||
}
|
||||
if me.is_alpha(src.substring(j, j+1)) {
|
||||
local id2 = me.read_ident2(src, j)
|
||||
local at2 = id2.lastIndexOf("@")
|
||||
catch_param = id2.substring(0, at2)
|
||||
j = me.to_int(id2.substring(at2+1, id2.length()))
|
||||
j = me.skip_ws(src, j)
|
||||
}
|
||||
if src.substring(j, j+1) == ")" { j = j + 1 }
|
||||
}
|
||||
j = me.skip_ws(src, j)
|
||||
@ -662,16 +708,37 @@ box ParserBox {
|
||||
local c_res = me.parse_block2(src, j)
|
||||
local atc = c_res.lastIndexOf("@")
|
||||
j = me.to_int(c_res.substring(atc+1, c_res.length()))
|
||||
if me.stage3_enabled() == 1 {
|
||||
local entry = "{"
|
||||
local wrote = 0
|
||||
if catch_param != null && catch_param.length() > 0 { entry = entry + "\"param\":\"" + me.esc_json(catch_param) + "\"" wrote = 1 }
|
||||
if catch_type != null && catch_type.length() > 0 { if wrote == 1 { entry = entry + "," } entry = entry + "\"typeHint\":\"" + me.esc_json(catch_type) + "\"" wrote = 1 }
|
||||
local body_json = c_res.substring(0, atc)
|
||||
if wrote == 1 { entry = entry + "," }
|
||||
entry = entry + "\"body\":" + body_json + "}"
|
||||
if catch_first == 0 { catches_json = catches_json + "," + entry } else { catches_json = catches_json + entry catch_first = 0 }
|
||||
}
|
||||
} else { cont_ct = 0 }
|
||||
}
|
||||
catches_json = catches_json + "]"
|
||||
// optional finally
|
||||
j = me.skip_ws(src, j)
|
||||
local finally_json = null
|
||||
if me.starts_with_kw(src, j, "finally") == 1 {
|
||||
j = j + 7
|
||||
j = me.skip_ws(src, j)
|
||||
local f_res = me.parse_block2(src, j)
|
||||
local atf = f_res.lastIndexOf("@")
|
||||
j = me.to_int(f_res.substring(atf+1, f_res.length()))
|
||||
finally_json = f_res.substring(0, atf)
|
||||
}
|
||||
if me.stage3_enabled() == 1 {
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
local node = "{\"type\":\"Try\",\"try\":" + try_json + ",\"catches\":" + catches_json
|
||||
if finally_json != null { node = node + ",\"finally\":" + finally_json }
|
||||
node = node + "}"
|
||||
return node
|
||||
}
|
||||
if j <= stmt_start { if j < src.length() { j = j + 1 } else { j = src.length() } }
|
||||
me.gpos_set(j)
|
||||
|
||||
@ -33,8 +33,9 @@ static box Main {
|
||||
}
|
||||
|
||||
// Parser delegation
|
||||
parse_program(src) {
|
||||
parse_program(src, stage3_flag) {
|
||||
local parser = new ParserBox()
|
||||
if stage3_flag == 1 { parser.stage3_enable(1) }
|
||||
// Collect using metadata (no-op acceptance in Stage‑15)
|
||||
parser.extract_usings(src)
|
||||
me._usings = parser.get_usings_json()
|
||||
@ -53,17 +54,25 @@ static box Main {
|
||||
local src = "return 1+2*3"
|
||||
local read_tmp = 0
|
||||
local input_path = null
|
||||
local stage3_mode = 0
|
||||
if args != null {
|
||||
local alen = args.length()
|
||||
local i = 0
|
||||
loop(i < alen) {
|
||||
local a = args.get(i)
|
||||
if a == "--read-tmp" { read_tmp = 1 } else {
|
||||
if a == "--min-json" { /* handled later */ } else {
|
||||
// First non-flag arg as input path
|
||||
if a == "--read-tmp" {
|
||||
read_tmp = 1
|
||||
} else {
|
||||
if a == "--min-json" {
|
||||
/* handled later */
|
||||
} else {
|
||||
if a == "--stage3" {
|
||||
stage3_mode = 1
|
||||
} else {
|
||||
if input_path == null { input_path = a }
|
||||
}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
@ -85,7 +94,8 @@ static box Main {
|
||||
local alen = args.length()
|
||||
local i = 0
|
||||
loop(i < alen) {
|
||||
if args.get(i) == "--min-json" { min_mode = 1 }
|
||||
local arg = args.get(i)
|
||||
if arg == "--min-json" { min_mode = 1 }
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
@ -93,7 +103,7 @@ static box Main {
|
||||
if min_mode == 1 {
|
||||
json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}"
|
||||
} else {
|
||||
json = me.parse_program(src)
|
||||
json = me.parse_program(src, stage3_mode)
|
||||
}
|
||||
|
||||
// Emit via EmitterBox (attach meta.usings when available)
|
||||
|
||||
52
docs/reference/architecture/parser_mvp_stage3.md
Normal file
52
docs/reference/architecture/parser_mvp_stage3.md
Normal file
@ -0,0 +1,52 @@
|
||||
# Parser MVP Stage-3 Design (Phase 15)
|
||||
|
||||
Scope
|
||||
- Extend Stage-2 parser emission to cover control flow constructs usually seen in routine code bases:
|
||||
- `break` / `continue`
|
||||
- `throw expr`
|
||||
- `try { ... } catch (Type err) { ... } finally { ... }`
|
||||
- Alert: other Stage-3 ideas (switch/async) remain out of scope until after self-host parity.
|
||||
- Preserve existing Stage-2 behaviour (locals/loop/if/call/method/new/ternary) with no regressions.
|
||||
|
||||
Guiding Principles
|
||||
- JSON v0 must remain stable for the Stage-2 path; Stage-3 additions should be feature-flagged or degrade safely when disabled.
|
||||
- Short-circuit semantics are already mirrored via logical nodes; Stage-3 should reuse the same block-building infrastructure (Bridge/VM/JIT) to avoid special cases.
|
||||
- Continue the "degrade to expression" approach when code generation is not ready (e.g. throw/try) so that Stage-2 tests stay green while the full implementation is developed.
|
||||
|
||||
JSON v0 Additions
|
||||
| Construct | JSON v0 Node | Notes |
|
||||
|------------|-------------------------------------------------|-------|
|
||||
| break | `{ "type": "Break" }` | Lowered into loop exit block with implicit jump. |
|
||||
| continue | `{ "type": "Continue" }` | Lowered into loop head block jump. |
|
||||
| throw expr | `{ "type": "Throw", "expr": Expr }` | Initial implementation can degrade to `{ "type": "Expr", "expr": expr }` until VM/JIT semantics are ready. |
|
||||
| try/catch/finally | `{ "type": "Try", "try": Stmt[], "catches": Catch[], "finally": Stmt[]? }` | Each `Catch` includes `{ "param": String?, "body": Stmt[] }`. Stage-1 implementation may treat as pass-through expression block. |
|
||||
|
||||
Lowering Strategy (Bridge)
|
||||
1. **Break/Continue**
|
||||
- Bridge stores loop header/exit blocks on a loop stack.
|
||||
- `Break` maps to `Jump { target: loop_exit }`, `Continue` to `Jump { target: loop_head }`.
|
||||
- MirBuilder already has `LoopBuilder`; expose helpers to fetch head/exit blocks.
|
||||
|
||||
2. **Throw/Try**
|
||||
- Phase 15 MVP keeps them syntax-only to avoid VM/JIT churn. Parser/Emitter produce nodes; Bridge either degrades (Expr) or logs a structured event for future handling.
|
||||
- Document expectation: once runtime exception model is defined, nodes become non-degrading.
|
||||
|
||||
3. **Metadata Events**
|
||||
- Augment `crate::jit::observe` with `lower_shortcircuit`/`lower_try` stubs so instrumentation remains coherent when full support is wired.
|
||||
|
||||
Testing Plan
|
||||
- Extend selfhost Stage-2 smoke file with guard cases (`return break` etc.) once lowering is live.
|
||||
- Create dedicated JSON fixtures under `tests/json_v0_stage3/` for break/continue/try once behaviour stabilises.
|
||||
- Update `tools/ny_stage2_shortcircuit_smoke.sh` to ensure Stage-3 constructs do not regress Stage-2 semantics (break/continue degrade). Timing: after lowering is implemented.
|
||||
|
||||
Migration Checklist
|
||||
1. ParserBox emits Stage-3 nodes under `NYASH_PARSER_STAGE3=1` gate to allow gradual rollout.
|
||||
2. Emitter attaches Stage-3 JSON when gate is enabled (otherwise degrade to existing Stage-2 forms).
|
||||
3. Bridge honours Stage-3 nodes when gate is on; degrade with warning when off.
|
||||
4. PyVM/VM/JIT semantics gradually enabled (throw/try remain degrade until corresponding runtime support is merged).
|
||||
5. Documentation kept in sync (`CURRENT_TASK.md`, release notes).
|
||||
|
||||
References
|
||||
- Stage-2 design (`parser_mvp_stage2.md`)
|
||||
- CURRENT_TASK stage checklist (Phase 15)
|
||||
- `docs/guides/language-guide.md` section “Exceptions & Flow Control” (update when Stage-3 fully lands).
|
||||
@ -1,7 +1,7 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::mir::{
|
||||
MirModule, MirFunction, FunctionSignature, BasicBlockId, MirInstruction,
|
||||
ConstValue, BinaryOp, MirType, EffectMask, MirPrinter,
|
||||
ConstValue, BinaryOp, MirType, EffectMask, MirPrinter, ValueId,
|
||||
};
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
@ -24,6 +24,19 @@ enum StmtV0 {
|
||||
If { cond: ExprV0, then: Vec<StmtV0>, #[serde(rename="else", default)] r#else: Option<Vec<StmtV0>> },
|
||||
// Optional: loop (Stage-2)
|
||||
Loop { cond: ExprV0, body: Vec<StmtV0> },
|
||||
Break,
|
||||
Continue,
|
||||
Try { #[serde(rename="try")] try_body: Vec<StmtV0>, #[serde(default)] catches: Vec<CatchV0>, #[serde(default)] finally: Vec<StmtV0> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
|
||||
struct CatchV0 {
|
||||
#[serde(rename="param", default)]
|
||||
param: Option<String>,
|
||||
#[serde(rename="typeHint", default)]
|
||||
type_hint: Option<String>,
|
||||
#[serde(default)]
|
||||
body: Vec<StmtV0>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||
@ -41,6 +54,7 @@ enum ExprV0 {
|
||||
Method { recv: Box<ExprV0>, method: String, args: Vec<ExprV0> },
|
||||
New { class: String, args: Vec<ExprV0> },
|
||||
Var { name: String },
|
||||
Throw { expr: Box<ExprV0> },
|
||||
}
|
||||
|
||||
pub fn parse_json_v0_to_module(json: &str) -> Result<MirModule, String> {
|
||||
@ -197,7 +211,16 @@ fn lower_expr(f: &mut MirFunction, cur_bb: BasicBlockId, e: &ExprV0) -> Result<(
|
||||
// merge with phi (use actual predecessors rhs_end and fall_bb)
|
||||
let out = f.next_value_id();
|
||||
if let Some(bb) = f.get_block_mut(merge_bb) {
|
||||
bb.insert_instruction_after_phis(MirInstruction::Phi { dst: out, inputs: vec![(rhs_end, rval), (fall_bb, cdst)] });
|
||||
let mut inputs: Vec<(BasicBlockId, ValueId)> = vec![(fall_bb, cdst)];
|
||||
if rhs_end != fall_bb {
|
||||
inputs.push((rhs_end, rval));
|
||||
} else {
|
||||
// Degenerate case: RHS ended in fall_bb (e.g., constant expression).
|
||||
// Reuse the constant to keep PHI well-formed.
|
||||
inputs.push((fall_bb, rval));
|
||||
}
|
||||
inputs.sort_by_key(|(bbid, _)| bbid.0);
|
||||
bb.insert_instruction_after_phis(MirInstruction::Phi { dst: out, inputs });
|
||||
}
|
||||
Ok((out, merge_bb))
|
||||
}
|
||||
@ -280,6 +303,14 @@ fn lower_expr(f: &mut MirFunction, cur_bb: BasicBlockId, e: &ExprV0) -> Result<(
|
||||
Ok((dst, cur))
|
||||
}
|
||||
ExprV0::Var { name } => Err(format!("undefined variable in this context: {}", name)),
|
||||
ExprV0::Throw { expr } => {
|
||||
let (_ignored, cur) = lower_expr(f, cur_bb, expr)?;
|
||||
let dst = f.next_value_id();
|
||||
if let Some(bb) = f.get_block_mut(cur) {
|
||||
bb.add_instruction(MirInstruction::Const { dst, value: ConstValue::Integer(0) });
|
||||
}
|
||||
Ok((dst, cur))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -310,6 +341,14 @@ fn lower_expr_with_vars(
|
||||
}
|
||||
Err(format!("undefined variable: {}", name))
|
||||
}
|
||||
ExprV0::Throw { expr } => {
|
||||
let (_ignored, cur) = lower_expr_with_vars(f, cur_bb, expr, vars)?;
|
||||
let dst = f.next_value_id();
|
||||
if let Some(bb) = f.get_block_mut(cur) {
|
||||
bb.add_instruction(MirInstruction::Const { dst, value: ConstValue::Integer(0) });
|
||||
}
|
||||
Ok((dst, cur))
|
||||
}
|
||||
ExprV0::Call { name, args } => {
|
||||
// Special: array literal lowering in vars context
|
||||
if name == "array.of" {
|
||||
@ -469,6 +508,21 @@ fn lower_stmt_with_vars(
|
||||
StmtV0::Local { name, expr } => {
|
||||
let (v, cur) = lower_expr_with_vars(f, cur_bb, expr, vars)?; vars.insert(name.clone(), v); Ok(cur)
|
||||
}
|
||||
StmtV0::Break => {
|
||||
// Stage-3 placeholder: no-op until loop lowering supports break
|
||||
Ok(cur_bb)
|
||||
}
|
||||
StmtV0::Continue => {
|
||||
// Stage-3 placeholder: no-op until loop lowering supports continue
|
||||
Ok(cur_bb)
|
||||
}
|
||||
StmtV0::Try { try_body, .. } => {
|
||||
// Stage-3 placeholder: lower try body sequentially, ignore catches/finally for now
|
||||
let mut tmp_vars = vars.clone();
|
||||
let next_bb = lower_stmt_list_with_vars(f, cur_bb, try_body, &mut tmp_vars)?;
|
||||
*vars = tmp_vars;
|
||||
Ok(next_bb)
|
||||
}
|
||||
StmtV0::If { cond, then, r#else } => {
|
||||
// Lower condition first
|
||||
let (cval, cur) = lower_expr_with_vars(f, cur_bb, cond, vars)?;
|
||||
|
||||
Reference in New Issue
Block a user