diff --git a/Cargo.toml b/Cargo.toml index 64d52d68..b127890a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -206,6 +206,9 @@ features = [ # テスト・ベンチマークツール criterion = "0.5" +[build-dependencies] +once_cell = "1.20" + # Benchmark configuration (will be added later) # [[bench]] # name = "box_performance" diff --git a/build.rs b/build.rs new file mode 100644 index 00000000..b7aa4fa8 --- /dev/null +++ b/build.rs @@ -0,0 +1,262 @@ +use std::{env, fs, path::PathBuf}; + +fn main() { + // Path to grammar spec + let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + let grammar_dir = manifest_dir.join("grammar"); + let grammar_file = grammar_dir.join("unified-grammar.toml"); + + // Ensure output dir exists + let out_dir = manifest_dir.join("src").join("grammar"); + fs::create_dir_all(&out_dir).ok(); + let out_file = out_dir.join("generated.rs"); + + // If grammar file is missing, create a minimal one + if !grammar_file.exists() { + fs::create_dir_all(&grammar_dir).ok(); + let minimal = r#" +[keywords.me] +token = "ME" + +[keywords.from] +token = "FROM" + +[keywords.loop] +token = "LOOP" + +[operators.add] +symbol = "+" +coercion_strategy = "string_priority" +type_rules = [ + { left = "String", right = "String", result = "String", action = "concat" }, + { left = "String", right = "Integer", result = "String", action = "concat" }, + { left = "Integer", right = "String", result = "String", action = "concat" }, + { left = "String", right = "Bool", result = "String", action = "concat" }, + { left = "Bool", right = "String", result = "String", action = "concat" }, + { left = "String", right = "Other", result = "String", action = "concat" }, + { left = "Other", right = "String", result = "String", action = "concat" }, + { left = "Integer", right = "Integer", result = "Integer", action = "add_i64" }, + { left = "Float", right = "Float", result = "Float", action = "add_f64" } +] +"#; + fs::write(&grammar_file, minimal).expect("write minimal unified-grammar.toml"); + println!("cargo:warning=Created minimal grammar at {}", grammar_file.display()); + } + + // Read and very light parse: collect + // - keywords..token + // - operators.{add,sub,mul,div}.{coercion_strategy,type_rules} + // - syntax.statements.allow = [..] + // - syntax.expressions.allow_binops = [..] + let content = fs::read_to_string(&grammar_file).expect("read unified-grammar.toml"); + + // Naive line scan to avoid build-deps; supports lines like: [keywords.xxx] then token = "YYY" + let mut current_key: Option = None; + let mut in_operators_add = false; + let mut in_operators_sub = false; + let mut in_operators_mul = false; + let mut in_operators_div = false; + let mut add_coercion: Option = None; + let mut sub_coercion: Option = None; + let mut mul_coercion: Option = None; + let mut div_coercion: Option = None; + let mut entries: Vec<(String, String)> = Vec::new(); + let mut in_type_rules = false; + let mut add_rules: Vec<(String, String, String, String)> = Vec::new(); + let mut sub_rules: Vec<(String, String, String, String)> = Vec::new(); + let mut mul_rules: Vec<(String, String, String, String)> = Vec::new(); + let mut div_rules: Vec<(String, String, String, String)> = Vec::new(); + for line in content.lines() { + let s = line.trim(); + if s.starts_with("[keywords.") && s.ends_with("]") { + let name = s.trim_start_matches("[keywords.").trim_end_matches("]").to_string(); + current_key = Some(name); + in_operators_add = false; + in_operators_sub = false; + in_operators_mul = false; + in_operators_div = false; + continue; + } + if s == "[operators.add]" { current_key = None; in_operators_add = true; in_operators_sub=false; in_operators_mul=false; in_operators_div=false; in_type_rules = false; continue; } + if s == "[operators.sub]" { current_key = None; in_operators_add = false; in_operators_sub=true; in_operators_mul=false; in_operators_div=false; in_type_rules = false; continue; } + if s == "[operators.mul]" { current_key = None; in_operators_add = false; in_operators_sub=false; in_operators_mul=true; in_operators_div=false; in_type_rules = false; continue; } + if s == "[operators.div]" { current_key = None; in_operators_add = false; in_operators_sub=false; in_operators_mul=false; in_operators_div=true; in_type_rules = false; continue; } + if let Some(ref key) = current_key { + if let Some(rest) = s.strip_prefix("token") { + if let Some(eq) = rest.find('=') { + let val = rest[eq+1..].trim().trim_matches('"').to_string(); + entries.push((key.clone(), val)); + } + } + } + if in_operators_add || in_operators_sub || in_operators_mul || in_operators_div { + if s.starts_with("type_rules") && s.contains('[') { in_type_rules = true; continue; } + if in_type_rules { + if s.starts_with(']') { in_type_rules = false; continue; } + // Expect lines like: { left = "String", right = "String", result = "String", action = "concat" }, + if s.starts_with('{') && s.ends_with("},") || s.ends_with('}') { + let inner = s.trim_start_matches('{').trim_end_matches('}').trim_end_matches(','); + let mut left = String::new(); + let mut right = String::new(); + let mut result = String::new(); + let mut action = String::new(); + for part in inner.split(',') { + let kv = part.trim(); + if let Some(eq) = kv.find('=') { + let key = kv[..eq].trim(); + let val = kv[eq+1..].trim().trim_matches('"').to_string(); + match key { + "left" => left = val, + "right" => right = val, + "result" => result = val, + "action" => action = val, + _ => {} + } + } + } + if !left.is_empty() && !right.is_empty() && !result.is_empty() && !action.is_empty() { + if in_operators_add { add_rules.push((left, right, result, action)); } + else if in_operators_sub { sub_rules.push((left, right, result, action)); } + else if in_operators_mul { mul_rules.push((left, right, result, action)); } + else if in_operators_div { div_rules.push((left, right, result, action)); } + } + } + } + if let Some(rest) = s.strip_prefix("coercion_strategy") { + if let Some(eq) = rest.find('=') { + let val = rest[eq+1..].trim().trim_matches('"').to_string(); + if in_operators_add { add_coercion = Some(val.clone()); } + else if in_operators_sub { sub_coercion = Some(val.clone()); } + else if in_operators_mul { mul_coercion = Some(val.clone()); } + else if in_operators_div { div_coercion = Some(val.clone()); } + } + } + } + } + + // Default rules if none present in TOML (keep codegen deterministic) + if add_rules.is_empty() { + add_rules.push(("String".into(), "String".into(), "String".into(), "concat".into())); + add_rules.push(("String".into(), "Integer".into(), "String".into(), "concat".into())); + add_rules.push(("Integer".into(), "String".into(), "String".into(), "concat".into())); + add_rules.push(("String".into(), "Bool".into(), "String".into(), "concat".into())); + add_rules.push(("Bool".into(), "String".into(), "String".into(), "concat".into())); + add_rules.push(("String".into(), "Other".into(), "String".into(), "concat".into())); + add_rules.push(("Other".into(), "String".into(), "String".into(), "concat".into())); + add_rules.push(("Integer".into(), "Integer".into(), "Integer".into(), "add_i64".into())); + add_rules.push(("Float".into(), "Float".into(), "Float".into(), "add_f64".into())); + } + if sub_rules.is_empty() { + sub_rules.push(("Integer".into(), "Integer".into(), "Integer".into(), "sub_i64".into())); + sub_rules.push(("Float".into(), "Float".into(), "Float".into(), "sub_f64".into())); + } + if mul_rules.is_empty() { + mul_rules.push(("Integer".into(), "Integer".into(), "Integer".into(), "mul_i64".into())); + mul_rules.push(("Float".into(), "Float".into(), "Float".into(), "mul_f64".into())); + } + if div_rules.is_empty() { + div_rules.push(("Integer".into(), "Integer".into(), "Integer".into(), "div_i64".into())); + div_rules.push(("Float".into(), "Float".into(), "Float".into(), "div_f64".into())); + } + + // Generate Rust code + let mut code = String::new(); + code.push_str("// Auto-generated from grammar/unified-grammar.toml\n"); + code.push_str("pub static KEYWORDS: &[(&str, &str)] = &[\n"); + for (k, t) in &entries { + code.push_str(&format!(" (\"{}\", \"{}\"),\n", k, t)); + } + code.push_str("];"); + let add_coercion_val = add_coercion.unwrap_or_else(|| "string_priority".to_string()); + let sub_coercion_val = sub_coercion.unwrap_or_else(|| "numeric_only".to_string()); + let mul_coercion_val = mul_coercion.unwrap_or_else(|| "numeric_only".to_string()); + let div_coercion_val = div_coercion.unwrap_or_else(|| "numeric_only".to_string()); + code.push_str(&format!("\npub static OPERATORS_ADD_COERCION: &str = \"{}\";\n", add_coercion_val)); + code.push_str(&format!("pub static OPERATORS_SUB_COERCION: &str = \"{}\";\n", sub_coercion_val)); + code.push_str(&format!("pub static OPERATORS_MUL_COERCION: &str = \"{}\";\n", mul_coercion_val)); + code.push_str(&format!("pub static OPERATORS_DIV_COERCION: &str = \"{}\";\n", div_coercion_val)); + // Emit add rules + code.push_str("pub static OPERATORS_ADD_RULES: &[(&str, &str, &str, &str)] = &[\n"); + for (l, r, res, act) in &add_rules { + code.push_str(&format!(" (\"{}\", \"{}\", \"{}\", \"{}\"),\n", l, r, res, act)); + } + code.push_str("];"); + // Emit sub rules + code.push_str("\npub static OPERATORS_SUB_RULES: &[(&str, &str, &str, &str)] = &[\n"); + for (l, r, res, act) in &sub_rules { + code.push_str(&format!(" (\"{}\", \"{}\", \"{}\", \"{}\"),\n", l, r, res, act)); + } + code.push_str("];"); + // Emit mul rules + code.push_str("\npub static OPERATORS_MUL_RULES: &[(&str, &str, &str, &str)] = &[\n"); + for (l, r, res, act) in &mul_rules { + code.push_str(&format!(" (\"{}\", \"{}\", \"{}\", \"{}\"),\n", l, r, res, act)); + } + code.push_str("];"); + // Emit div rules + code.push_str("\npub static OPERATORS_DIV_RULES: &[(&str, &str, &str, &str)] = &[\n"); + for (l, r, res, act) in &div_rules { + code.push_str(&format!(" (\"{}\", \"{}\", \"{}\", \"{}\"),\n", l, r, res, act)); + } + code.push_str("];"); + code.push_str( + r#" +pub fn lookup_keyword(word: &str) -> Option<&'static str> { + for (k, t) in KEYWORDS { + if *k == word { return Some(*t); } + } + None +} +"#); + + // --- Naive parse for syntax rules (statements/expressions) --- + let mut syntax_statements: Vec = Vec::new(); + let mut syntax_binops: Vec = Vec::new(); + let mut in_syntax_statements = false; + let mut in_syntax_expressions = false; + for line in content.lines() { + let s = line.trim(); + if s == "[syntax.statements]" { in_syntax_statements = true; in_syntax_expressions = false; continue; } + if s == "[syntax.expressions]" { in_syntax_statements = false; in_syntax_expressions = true; continue; } + if s.starts_with('[') { in_syntax_statements = false; in_syntax_expressions = false; } + if in_syntax_statements { + if let Some(rest) = s.strip_prefix("allow") { + if let Some(eq) = rest.find('=') { let arr = rest[eq+1..].trim(); + // Expect [ "if", "loop", ... ] possibly spanning multiple lines; simple split for this snapshot + for part in arr.trim_matches(&['[',']'][..]).split(',') { + let v = part.trim().trim_matches('"'); if !v.is_empty() { syntax_statements.push(v.to_string()); } + } + } + } + } + if in_syntax_expressions { + if let Some(rest) = s.strip_prefix("allow_binops") { + if let Some(eq) = rest.find('=') { let arr = rest[eq+1..].trim(); + for part in arr.trim_matches(&['[',']'][..]).split(',') { + let v = part.trim().trim_matches('"'); if !v.is_empty() { syntax_binops.push(v.to_string()); } + } + } + } + } + } + if syntax_statements.is_empty() { + syntax_statements = vec![ + "box".into(), "global".into(), "function".into(), "static".into(), + "if".into(), "loop".into(), "break".into(), "return".into(), "print".into(), + "nowait".into(), "include".into(), "local".into(), "outbox".into(), "try".into(), "throw".into(), "using".into(), "from".into() + ]; + } + if syntax_binops.is_empty() { + syntax_binops = vec!["add".into(), "sub".into(), "mul".into(), "div".into()]; + } + // Emit syntax arrays + code.push_str("\npub static SYNTAX_ALLOWED_STATEMENTS: &[&str] = &[\n"); + for k in &syntax_statements { code.push_str(&format!(" \"{}\",\n", k)); } + code.push_str("];"); + code.push_str("\npub static SYNTAX_ALLOWED_BINOPS: &[&str] = &[\n"); + for k in &syntax_binops { code.push_str(&format!(" \"{}\",\n", k)); } + code.push_str("];"); + + fs::write(&out_file, code).expect("write generated.rs"); + println!("cargo:rerun-if-changed={}", grammar_file.display()); +} diff --git a/docs/development/current/CURRENT_TASK.md b/docs/development/current/CURRENT_TASK.md index 230b5cba..90f1efd2 100644 --- a/docs/development/current/CURRENT_TASK.md +++ b/docs/development/current/CURRENT_TASK.md @@ -663,3 +663,79 @@ bash tools/build_aot.sh examples/aot_min_string_len.nyash -o app - 非基本コンストラクタの委譲徹底(Math/Random/Sound/Debugなど) - 主要ビルトインの plugin 化(nyash_box.toml 整備) - CIに `NYASH_USE_PLUGIN_BUILTINS=1` / `NYASH_PLUGIN_OVERRIDE_TYPES` のスモークを追加 + +--- + +## 引き継ぎ(Phase 11.9 / 統一文法アーキテクチャ + JIT分割) + +現状サマリ(実装済み) +- 統一文法スキャフォールド + - build時コード生成: `build.rs` → `src/grammar/generated.rs` + - `KEYWORDS`(最小)と `OPERATORS_ADD_COERCION`, `OPERATORS_ADD_RULES` を生成 + - TOML未整備でも add 既定規則を生成側で補完 + - エンジン: `src/grammar/engine.rs`(`is_keyword_str`/`add_coercion_strategy`/`add_rules`/`decide_add_result`) + - Tokenizerに非侵襲差分ログ(`NYASH_GRAMMAR_DIFF=1`) +- Add 規則の非侵襲導入 + - JIT: `lower_binop(Add)` で grammar ヒントをイベント出力 + - VM/Interpreter: 期待と実際の型を差分ログ(`NYASH_GRAMMAR_DIFF=1`) + - オプトイン強制適用(挙動変更は未既定): `NYASH_GRAMMAR_ENFORCE_ADD=1` +- スナップショットテスト + - `tests/grammar_add_rules.rs`(grammar 期待 と 現行セマンティクスの一致検証)→ 単体実行で緑 + +JIT分割 進捗(継続観点) +- 完了: builder分割(`builder/cranelift.rs`)、core 第一段階分割(`core_ops.rs`、`core/analysis.rs`、`core/cfg.rs`) +- jit-direct スモーク緑(debug): mir-branch-ret=1 / mir-phi-min=10 / mir-branch-multi=1 + +使い方(開発時) +- 差分ログ: `NYASH_GRAMMAR_DIFF=1`(Tokenizer/VM/Interp/JIT各所) +- 規則強制: `NYASH_GRAMMAR_ENFORCE_ADD=1`(Add のみ、他は非侵襲) +- JITスモーク例: `NYASH_JIT_THRESHOLD=1 ./target/debug/nyash --jit-direct apps/tests/mir-branch-ret/main.nyash` +- テスト(本件のみ): `cargo test -q --test grammar_add_rules` + +次のTODO(優先順) +1) JITロワラー分割の続き + - 大きい分岐(Extern/PluginInvoke/BoxCall)を `src/jit/lower/core/ops_ext.rs` へ抽出 + - 各ステップごとに jit-direct スモーク確認 +2) 統一文法の拡張 + - operators: Sub/Mul/Div の `type_rules` を TOML → 生成 → VM/Interp/JIT に非侵襲ログ(必要なら `*_ENFORCE_*`を用意) + - keywords/alias/context の雛形を TOML 化(差分ログ継続) +3) スナップショット整備 + - add 以外の演算子でも「grammar期待 vs 実際」の表テストを追加 + - 将来、Tokenizer/Parser でも「grammar期待 vs 実際構文」のスナップショットを追加 + +注意 +- 既存の他テストには未整備部分があり全体 `cargo test` は赤が出るため、当面は個別テスト/スモークを推奨 +- Release の jit-direct 実行は `--features cranelift-jit` が必要 + +## Update: Phase 11.9 – 統一文法アーキテクチャ(MVP導入計画) + +目的: Tokenizer/Parser/Interpreter/MIR/VM/JIT の解釈差異を解消するため、単一の文法・意味・実行定義を導入(詳細は `docs/development/roadmap/phases/phase-11.9/unified-grammar-architecture.md` と `docs/development/roadmap/phases/phase-11.9/PLAN.md`)。 + +直近TODO(M1/M2のMVP範囲) +- [ ] scaffolding: `build.rs` + `src/grammar/{mod.rs,engine.rs}` + `src/grammar/generated.rs`(codegen方式) +- [ ] `grammar/unified-grammar.toml` 初期化(keywords: `me`,`from`,`loop`; operators: `add`) +- [ ] Tokenizer に `engine.is_keyword()` を差し込み(`NYASH_GRAMMAR_DIFF=1` で差分ログ) +- [ ] `ExecutionSemantics` に `operators.add` を実装し、Interpreter/VM/JIT へ薄く統合(既存実装はフォールバック) +- [ ] 予約語マッピングの一貫性テストと、加算セマンティクスの VM/JIT/Interpreter 一致テスト + +備考 +- ランタイム I/O は避け、TOML→生成コードに変換して起動/ホットパスへの影響を最小化 +- プラグイン拡張は将来の統合対象(優先度・名前空間・競合検知を設計) + +## Progress: JIT Lowering リファクタ状況(11.8/12系) + +完了 +- [x] builder 分割(`src/jit/lower/builder.rs` を薄いハブ化、`builder/cranelift.rs` へ移動) +- [x] jit-direct の最小スモーク安定(debug): + - apps/tests/mir-branch-ret → 1 + - apps/tests/mir-phi-min → 10 + - apps/tests/mir-branch-multi → 1 +- [x] core.rs の第一段階分割: + - `src/jit/lower/core_ops.rs` にヘルパー移設(push_value_if_known_or_param, cover_if_supported, BinOp/Compareなど) +- - `src/jit/lower/core/analysis.rs` 追加(Bool/PHI推論+統計) +- - `src/jit/lower/core/cfg.rs` 追加(PHI受け口順序とCFGダンプ) + +次の分割候補 +- [ ] Extern/PluginInvoke/BoxCall 周辺の肥大化した分岐を `core/ops_ext.rs` に整理 +- [ ] `analysis`/`cfg` の補助関数(succ_phi_inputs など)の関数化 +- [ ] 分割ごとに jit-direct スモークの緑維持(debug / release+feature) diff --git a/docs/development/roadmap/phases/phase-11.9/PLAN.md b/docs/development/roadmap/phases/phase-11.9/PLAN.md new file mode 100644 index 00000000..8b03ccd6 --- /dev/null +++ b/docs/development/roadmap/phases/phase-11.9/PLAN.md @@ -0,0 +1,48 @@ +# Phase 11.9: 統一文法アーキテクチャ — 実装予定(MVP〜段階移行) + +## 目的 +- Tokenizer/Parser/Interpreter/MIR/VM/JIT の解釈差異を解消し、単一の「文法・意味・実行」定義から各層が参照する構造へ移行する。 +- 変更や拡張(予約語/演算子/構文)のコストと不整合リスクを減らす。 + +## マイルストーン(MVP→段階導入) + +### M1: 予約語レジストリの導入(最小) +- 追加: `src/grammar/engine.rs`(`UnifiedGrammarEngine`、`KeywordRegistry` の骨格) +- 追加: `grammar/unified-grammar.toml`(初期エントリ: `me`, `from`, `loop`, `+`) +- 追加: `build.rs` で TOML → `src/grammar/generated.rs` をコード生成(ランタイム I/O 回避) +- Tokenizer 統合(非侵襲): 従来テーブルの後段に `engine.is_keyword()` を差し込み、`NYASH_GRAMMAR_DIFF=1` で差分ログ +- 成功条件: 既存テストを落とさず、差分ログが 0 or 想定内のみに収束 + +### M2: 演算子セマンティクスの統一(加算など最小) +- `ExecutionSemantics` に `operators.add` を定義(型規則/コアーション/エラー方針) +- Interpreter/VM/JIT で `execute_semantic("add", …)` による共通実装窓口を追加(従来実装はフォールバック) +- 既存 `hostcall_registry`/JIT ポリシーと接合するインターフェースを用意(型分類/シンボルの参照点を一本化) +- 成功条件: 文字列結合/整数加算/浮動小数加算の3系統で VM/JIT/Interpreter の一致を維持 + +### M3: 構文規則エンジンの段階導入 +- `SyntaxRuleEngine` 追加、`statement`/`expr` の骨格ルールを TOML 側へ切り出し +- Parser 統合(段階的): 既存パーサ優先+新ルールでの検証を併走、差分ログで移行安全性を担保 +- 成功条件: 代表サンプルで新旧の AST→MIR が一致(スナップショット) + +### M4: 並行実行/差分検出・テスト整備 +- 並行期間は新旧両系の結果を比較し、スナップショットとファズで回帰防止 +- 収束後、旧ルートを段階的に縮退 + +## 実装順(詳細 TODO) +1) `build.rs` と `src/grammar/mod.rs` の雛形追加(`generated.rs` を `include!`) +2) `KeywordRegistry` の生成コードを実装、Tokenizer に差し込み(環境変数で切り替え) +3) `operators.add` の型規則を TOML 化し、`ExecutionSemantics` で解決 +4) Interpreter/VM/JIT へ共通窓口の薄い統合(実行は従来実装と比較可能に) +5) 構文ルール最小セット(statement/expr)を TOML へ移管し、解析の差分をログ化 +6) スナップショット/ファズの整備と収束確認 + +## リスクと対策 +- 競合/拡張: プラグイン由来の拡張を名前空間+優先度でマージ、競合は検知してビルド失敗で気付かせる +- 実行コスト: 生成コード方式でランタイム I/O を避け、起動時間・ホットパスへの影響をゼロに近づける +- 文脈依存: `contextual` のキー粒度を設計(node_kind/context など)し、曖昧解釈を防ぐ + +## 成功基準(Exit Criteria) +- 予約語解決の統一(Tokenizer での差分 0) +- 加算に関する VM/JIT/Interpreter のセマンティクス一致(型差分含む) +- 構文最小セットで新旧の AST→MIR が一致(代表ケース) + diff --git a/docs/development/roadmap/phases/phase-11.9/README.md b/docs/development/roadmap/phases/phase-11.9/README.md index 3376dc76..4e5619b4 100644 --- a/docs/development/roadmap/phases/phase-11.9/README.md +++ b/docs/development/roadmap/phases/phase-11.9/README.md @@ -75,12 +75,28 @@ keywords: ## 🔗 関連ドキュメント -- [統一文法アーキテクチャ設計書](unified-grammar-architecture.md) ← **🔥 核心設計** -- [統一予約語システム仕様](unified-keyword-system.md) ← **🎯 具体的実装** -- [AI深層考察: 統一文法アーキテクチャ](ai-deep-thoughts-unified-grammar.md) ← **💡 Gemini/Codex分析** -- [文法統一化詳細設計](grammar-unification.txt) -- [統一文法定義YAML](nyash-grammar-v1.yaml) -- [実装計画](implementation-plan.txt) +### 📌 まず読むべき資料 +- **[統一セマンティクス実装設計](unified-semantics-implementation.txt)** ← **🎯 最新の実装方針** +- **[統一文法設計総合まとめ](UNIFIED-GRAMMAR-DESIGN-SUMMARY.md)** ← 設計思想の理解 + +### 🔥 核心設計ドキュメント +- [統一文法アーキテクチャ設計書](unified-grammar-architecture.md) - 基本設計 +- [統一予約語システム仕様](unified-keyword-system.md) - 具体的実装 +- [AI深層考察: 統一文法アーキテクチャ](ai-deep-thoughts-unified-grammar.md) - Gemini/Codex分析 + +### 📚 発展的設計(参考) +- [発展的設計集](advanced-designs/) - より深い設計思想 + - box-first-grammar-architecture.md - 箱化アプローチ + - root-cutting-architecture.md - 疎結合設計 + - zero-knowledge-architecture.md - 究極の分離 + +### 🔧 実装資料 +- [アーカイブ](archive/) - 過去の詳細設計ドキュメント + - grammar-unification.txt - 初期の文法統一化詳細設計 + - nyash-grammar-v1.yaml - 統一文法定義YAML(初版) + - implementation-plan.txt - 実装計画 + +### 🔗 関連フェーズ - [AI-Nyash Compact Notation Protocol](../../ideas/new-features/2025-08-29-ai-compact-notation-protocol.md) - [Phase 12: プラグインシステム](../phase-12/) diff --git a/docs/development/roadmap/phases/phase-11.9/UNIFIED-GRAMMAR-DESIGN-SUMMARY.md b/docs/development/roadmap/phases/phase-11.9/UNIFIED-GRAMMAR-DESIGN-SUMMARY.md new file mode 100644 index 00000000..fec691b8 --- /dev/null +++ b/docs/development/roadmap/phases/phase-11.9/UNIFIED-GRAMMAR-DESIGN-SUMMARY.md @@ -0,0 +1,107 @@ +# Phase 11.9 統一文法設計 - 総合まとめ + +## 📋 概要 + +Nyashの各実行層(Tokenizer/Parser/Interpreter/MIR/VM/JIT)で予約語・文法解釈がバラバラに実装されている問題を解決する統一文法アーキテクチャ設計のまとめです。 + +## 🎯 核心的な問題 + +```rust +// 現在: 同じ "me" が6箇所で別々に定義 +Tokenizer: "me" → TokenType::ME +Parser: 独自のme処理ロジック +Interpreter: 独自のself参照実装 +MIR Builder: LoadLocal(0)への変換 +VM: OP_LOAD_MEの実行 +JIT: LoadFirstParamの生成 +``` + +## 💡 提案された解決策 + +### 1. 基本アプローチ: 統一文法エンジン +- 単一の文法定義(YAML/TOML) +- 各層が参照する統一API +- UnifiedSemantics による一貫した実行 + +### 2. AI提案: ビルド時コード生成 +- **Gemini**: 宣言的定義 + build.rs によるコード生成 +- **Codex**: MIR中心の統一セマンティクス基盤 +- 実行時オーバーヘッドゼロ + +### 3. 箱化による疎結合設計 +- 各層を独立した「箱」として実装 +- 変換箱(TransformerBox)パターン +- パイプライン方式での連結 + +## 📊 実装アプローチの比較 + +| アプローチ | 利点 | 欠点 | 推奨度 | +|---------|------|------|-------| +| 統一エンジン | シンプル、理解しやすい | 実行時オーバーヘッド | ★★★ | +| コード生成 | 高性能、型安全 | ビルド複雑化 | ★★★★★ | +| 完全箱化 | 究極の疎結合 | 実装複雑度高 | ★★★★ | + +## 🚀 推奨実装計画 + +### Phase 1: 文法定義ファイル作成 +```yaml +# grammar/nyash.yml +tokens: + me: { id: 1, category: self_reference } + from: { id: 2, category: delegation } + loop: { id: 3, category: control_flow } + +operators: + "+": { precedence: 10, associativity: left } +``` + +### Phase 2: コード生成基盤 +```rust +// build.rs +fn generate_from_grammar() { + // grammar.yml → generated/*.rs +} +``` + +### Phase 3: 段階的移行 +1. Tokenizer を生成コードに移行 +2. Parser を統一文法に移行 +3. Semantics を一元化 +4. MIR/VM/JIT を統合 + +## 🎯 期待される効果 + +1. **保守性向上**: 新機能追加が1箇所で完了 +2. **一貫性確保**: 全層で同じセマンティクス +3. **AI対応改善**: LLMが正確なコードを生成 +4. **性能維持**: ビルド時最適化でオーバーヘッドなし + +## 📁 作成されたドキュメント + +### 必須ドキュメント(実装に必要) +1. **[統一文法アーキテクチャ設計書](unified-grammar-architecture.md)** - 基本設計 +2. **[統一予約語システム仕様](unified-keyword-system.md)** - 具体的実装仕様 +3. **[AI深層考察](ai-deep-thoughts-unified-grammar.md)** - Gemini/Codex分析 + +### 発展的ドキュメント(参考資料) +4. **[Box-First文法アーキテクチャ](box-first-grammar-architecture.md)** - 箱化アプローチ +5. **[根切り文法アーキテクチャ](root-cutting-architecture.md)** - 完全疎結合設計 +6. **[ゼロ知識文法アーキテクチャ](zero-knowledge-architecture.md)** - 究極の分離設計 + +### 既存ドキュメント +- [文法統一化詳細設計](grammar-unification.txt) +- [統一文法定義YAML](nyash-grammar-v1.yaml) +- [実装計画](implementation-plan.txt) + +## 🔧 次のステップ + +1. `grammar/nyash.yml` の初版作成 +2. `crates/nygrammar-gen` の実装開始 +3. Tokenizer の移行から着手 +4. 段階的に全層を統一 + +## 📝 結論 + +コード生成アプローチ(Gemini/Codex推奨)を採用し、`grammar/nyash.yml` を単一の真実の源として、build.rs で各層向けのコードを生成する方式が最も実用的です。 + +これにより、Nyashの文法が完全に統一され、保守性・一貫性・AI対応すべてが改善されます。 \ No newline at end of file diff --git a/docs/development/roadmap/phases/phase-11.9/box-first-grammar-architecture.md b/docs/development/roadmap/phases/phase-11.9/advanced-designs/box-first-grammar-architecture.md similarity index 100% rename from docs/development/roadmap/phases/phase-11.9/box-first-grammar-architecture.md rename to docs/development/roadmap/phases/phase-11.9/advanced-designs/box-first-grammar-architecture.md diff --git a/docs/development/roadmap/phases/phase-11.9/root-cutting-architecture.md b/docs/development/roadmap/phases/phase-11.9/advanced-designs/root-cutting-architecture.md similarity index 100% rename from docs/development/roadmap/phases/phase-11.9/root-cutting-architecture.md rename to docs/development/roadmap/phases/phase-11.9/advanced-designs/root-cutting-architecture.md diff --git a/docs/development/roadmap/phases/phase-11.9/advanced-designs/zero-knowledge-architecture.md b/docs/development/roadmap/phases/phase-11.9/advanced-designs/zero-knowledge-architecture.md new file mode 100644 index 00000000..6383228a --- /dev/null +++ b/docs/development/roadmap/phases/phase-11.9/advanced-designs/zero-knowledge-architecture.md @@ -0,0 +1,304 @@ +# ゼロ知識文法アーキテクチャ - 究極の疎結合 + +## 🔍 さらに深い問題: 暗黙知識の漏洩 + +### 現在の設計でもまだ残る問題 +```rust +// 🚨 TokenToASTBoxがTokenの意味を知っている +transform(tokens: TokenStream) -> AST { + if token == Token::Me { // Tokenの意味を知っている! + return AST::SelfReference + } +} + +// 🚨 ASTToMIRBoxがASTの構造を知っている +transform(ast: AST) -> MIR { + match ast { + AST::BinaryOp(op, left, right) => { // AST構造を知っている! + // ... + } + } +} +``` + +## 🎯 ゼロ知識原則: 「箱は変換ルールだけを知る」 + +### 純粋な変換テーブル駆動設計 + +```rust +// 各箱は変換テーブルだけを持つ +box TokenClassifierBox { + init { table: Map } // 文字列→数値のマッピングのみ + + classify(word: String) -> u32 { + return me.table.get(word).unwrapOr(0) // 0 = unknown + } +} + +// ビルド時に生成される純粋なマッピング +const TOKEN_TABLE: Map = { + "me" => 1, + "from" => 2, + "loop" => 3, + // ... +} +``` + +## 📊 統一中間表現(UIR: Unified Intermediate Representation) + +### すべての層が数値タグで通信 + +``` +Source Code UIR Tags Execution +----------- -------- --------- +"me" → [1] → LoadLocal(0) +"+" → [100] → Add +"loop" → [200] → Branch +1 + 2 → [300,1,300,2,100] → Const(1), Const(2), Add +``` + +### UIRTag: 意味を持たない純粋な識別子 +```rust +box UIRTag { + init { id: u32, children: Array } + + // タグは意味を持たない、ただの番号 + isLeaf() { return me.children.isEmpty() } + getChildren() { return me.children } +} +``` + +## 🔄 完全分離された変換パイプライン + +### 1. 字句解析: 文字列→UIRタグ +```rust +box LexicalTransformerBox { + init { charTable: Array } // 文字→タグのテーブル + + transform(text: String) -> Array { + local tags = [] + local chars = text.chars() + + loop(chars.hasNext()) { + local ch = chars.next() + local tag = me.charTable[ch.code()] + + if tag == TAG_LETTER { + local word = me.collectWhile(chars, TAG_LETTER) + tags.push(me.lookupWord(word)) + } else if tag == TAG_DIGIT { + local num = me.collectWhile(chars, TAG_DIGIT) + tags.push(UIRTag(TAG_NUMBER, num)) + } + // ... + } + return tags + } + + // 単語検索も純粋なハッシュ値 + lookupWord(word: String) -> UIRTag { + local hash = me.perfectHash(word) + return UIRTag(hash, []) + } +} +``` + +### 2. 構文解析: UIRタグ→UIRツリー +```rust +box SyntaxTransformerBox { + init { + // 優先順位テーブル(タグ→優先度) + precedence: Map, + // 結合性テーブル(タグ→左/右) + associativity: Map + } + + transform(tags: Array) -> UIRTag { + // Prattパーサーだが、意味を知らない + return me.parseExpression(tags, 0) + } + + parseExpression(tags: Array, minPrec: u32) -> UIRTag { + local left = me.parsePrimary(tags) + + loop(tags.hasNext()) { + local op = tags.peek() + local prec = me.precedence.get(op.id).unwrapOr(0) + + if prec < minPrec { break } + + tags.next() // consume operator + local assoc = me.associativity.get(op.id).unwrapOr(LEFT) + local nextPrec = if assoc == LEFT { prec + 1 } else { prec } + local right = me.parseExpression(tags, nextPrec) + + // 構造だけ作る、意味は知らない + left = UIRTag(op.id, [left, right]) + } + + return left + } +} +``` + +### 3. 意味解析: UIRツリー→実行可能形式 +```rust +box SemanticTransformerBox { + init { + // タグ→実行アクションのテーブル + actions: Map + } + + transform(tree: UIRTag) -> ExecutableCode { + local action = me.actions.get(tree.id) + + if action { + return action.generate(tree.children.map(child => { + me.transform(child) + })) + } + + return ExecutableCode.Noop() + } +} +``` + +## 📐 ビルド時の統一: マスターテーブル生成 + +### grammar.yaml → 各種テーブル生成 +```yaml +# grammar.yaml - 真の単一情報源 +tokens: + me: { id: 1, type: self_reference } + from: { id: 2, type: delegation } + loop: { id: 3, type: control_flow } + +operators: + "+": { id: 100, precedence: 10, associativity: left } + "*": { id: 101, precedence: 20, associativity: left } + +semantics: + 1: { action: load_self } + 2: { action: delegate_call } + 3: { action: loop_construct } + 100: { action: add_operation } +``` + +### ビルド時生成 +```rust +// build.rs +fn generate_tables(grammar: GrammarDef) { + // 1. 完全ハッシュ関数生成 + generate_perfect_hash(grammar.tokens) + + // 2. 優先順位テーブル生成 + generate_precedence_table(grammar.operators) + + // 3. セマンティクステーブル生成 + generate_semantic_table(grammar.semantics) + + // 4. 各層の定数生成 + generate_constants(grammar) +} +``` + +## 🎯 究極の利点: 完全な知識分離 + +### 1. 各箱が知っていること +- **LexicalTransformer**: 文字の分類とハッシュ計算のみ +- **SyntaxTransformer**: 優先順位と結合性のみ +- **SemanticTransformer**: タグとアクションの対応のみ + +### 2. 各箱が知らないこと +- **すべての箱**: 他の層の存在、Nyashという言語名すら知らない +- **すべての箱**: キーワードの意味、演算子の意味 +- **すべての箱**: 最終的な実行形式 + +### 3. テストの単純化 +```rust +test "lexical transformer" { + local table = { "hello" => 42 } + local box = LexicalTransformerBox(table) + assert box.transform("hello") == [UIRTag(42)] +} + +test "syntax transformer" { + local prec = { 100 => 10, 101 => 20 } + local box = SyntaxTransformerBox(prec, {}) + // 1 + 2 * 3 + local tags = [UIRTag(1), UIRTag(100), UIRTag(2), UIRTag(101), UIRTag(3)] + local tree = box.transform(tags) + // 期待: (+ 1 (* 2 3)) + assert tree == UIRTag(100, [ + UIRTag(1), + UIRTag(101, [UIRTag(2), UIRTag(3)]) + ]) +} +``` + +## 🔧 動的拡張: プラグインテーブル + +### 実行時のテーブル拡張 +```rust +box PluginLoaderBox { + init { transformers: Map } + + loadPlugin(path: String) { + local plugin = Plugin.load(path) + + // プラグインは新しいタグを登録 + local newTags = plugin.getTags() + + // 各変換器のテーブルを拡張 + me.transformers.get("lexical").extendTable(newTags.lexical) + me.transformers.get("syntax").extendTable(newTags.syntax) + me.transformers.get("semantic").extendTable(newTags.semantic) + } +} +``` + +## 📊 性能特性 + +### 1. キャッシュ効率 +- 各テーブルは連続メモリに配置 +- CPUキャッシュに収まるサイズ +- ランダムアクセスなし + +### 2. 並列化可能 +- 各変換は状態を持たない +- 入力を分割して並列処理可能 +- ロックフリー実装 + +### 3. 最適化の余地 +- テーブルのコンパクト化 +- SIMDによる並列検索 +- JITによるテーブル特化 + +## 🚀 最終形: 言語に依存しない変換エンジン + +```rust +// このエンジンはNyashを知らない! +box UniversalTransformEngine { + init { + pipeline: Array, + tables: Map + } + + execute(input: String) -> Output { + local data = input + + // 各変換を順番に適用 + me.pipeline.forEach(transformer => { + data = transformer.transform(data) + }) + + return data + } +} + +// Nyash = 特定のテーブルセット +const NYASH_TABLES = load_tables("nyash-grammar.yaml") +local engine = UniversalTransformEngine(STANDARD_PIPELINE, NYASH_TABLES) +``` + +これが究極の「根を切った」設計です。各箱は純粋な変換器であり、Nyashという言語の存在すら知りません。 \ No newline at end of file diff --git a/docs/development/roadmap/phases/phase-11.9/grammar-unification.txt b/docs/development/roadmap/phases/phase-11.9/archive/grammar-unification.txt similarity index 100% rename from docs/development/roadmap/phases/phase-11.9/grammar-unification.txt rename to docs/development/roadmap/phases/phase-11.9/archive/grammar-unification.txt diff --git a/docs/development/roadmap/phases/phase-11.9/implementation-plan.txt b/docs/development/roadmap/phases/phase-11.9/archive/implementation-plan.txt similarity index 100% rename from docs/development/roadmap/phases/phase-11.9/implementation-plan.txt rename to docs/development/roadmap/phases/phase-11.9/archive/implementation-plan.txt diff --git a/docs/development/roadmap/phases/phase-11.9/nyash-grammar-v1.yaml b/docs/development/roadmap/phases/phase-11.9/archive/nyash-grammar-v1.yaml similarity index 100% rename from docs/development/roadmap/phases/phase-11.9/nyash-grammar-v1.yaml rename to docs/development/roadmap/phases/phase-11.9/archive/nyash-grammar-v1.yaml diff --git a/docs/development/roadmap/phases/phase-11.9/chatgpt5-feedback-integration.md b/docs/development/roadmap/phases/phase-11.9/chatgpt5-feedback-integration.md new file mode 100644 index 00000000..acd07b6f --- /dev/null +++ b/docs/development/roadmap/phases/phase-11.9/chatgpt5-feedback-integration.md @@ -0,0 +1,255 @@ +# ChatGPT5フィードバック統合 - 統一文法アーキテクチャ改善 + +## 📋 ChatGPT5からの評価 + +> 「Grammar as THE Source of Truth で各層の乖離を一元化する狙いは現状の痛点に直結しており、有効です」 + +## 🎯 指摘されたリスクへの対応策 + +### 1. ランタイム依存過多への対応 + +#### 問題 +```rust +// ❌ 悪い例:実行時にTOMLパース +let grammar = toml::from_str(&fs::read_to_string("grammar.toml")?)?; +``` + +#### 解決策:build.rs による完全コード生成 +```rust +// build.rs +fn main() { + println!("cargo:rerun-if-changed=grammar/nyash.yml"); + + let grammar = load_grammar_definition(); + + // Rust定数として生成 + generate_keyword_constants(&grammar); + generate_perfect_hash_function(&grammar); + generate_semantic_tables(&grammar); + generate_mir_mappings(&grammar); +} + +// 生成されるコード例 +// generated/keywords.rs +pub const KEYWORD_ME: u32 = 1; +pub const KEYWORD_FROM: u32 = 2; +pub const KEYWORD_LOOP: u32 = 3; + +#[inline(always)] +pub fn classify_keyword(s: &str) -> Option { + match s { + "me" => Some(KEYWORD_ME), + "from" => Some(KEYWORD_FROM), + "loop" => Some(KEYWORD_LOOP), + _ => None, + } +} +``` + +### 2. プラグイン拡張性と競合への対応 + +#### マージ戦略の定義 +```yaml +# grammar/nyash.yml +version: "1.0" +namespace: "core" + +# プラグイン拡張ポイント +extension_points: + operators: + merge_strategy: "priority" # 優先順位ベース + conflict_resolution: "namespace" # 名前空間で分離 + +# プラグイン例 +# plugins/custom/grammar.yml +namespace: "custom" +extends: "core" + +operators: + "++": # 新しい演算子 + priority: 100 + precedence: 15 + semantics: increment +``` + +#### 実装時の名前空間解決 +```rust +pub struct GrammarRegistry { + core: CoreGrammar, + plugins: HashMap, +} + +impl GrammarRegistry { + pub fn resolve_operator(&self, op: &str, context: &Context) -> OperatorDef { + // 1. 現在の名前空間で検索 + if let Some(def) = context.namespace.find_operator(op) { + return def; + } + + // 2. インポートされた名前空間を優先順位順に検索 + for imported in &context.imports { + if let Some(def) = self.plugins.get(imported)?.find_operator(op) { + return def; + } + } + + // 3. コア名前空間にフォールバック + self.core.find_operator(op).unwrap_or_else(|| { + panic!("Unknown operator: {}", op) + }) + } +} +``` + +### 3. 文脈依存キーワードの曖昧性解決 + +#### fromキーワードの文脈解決ルール +```yaml +# grammar/nyash.yml +contextual_keywords: + from: + contexts: + - name: "box_delegation" + pattern: "box IDENT from" + priority: 100 + + - name: "method_delegation" + pattern: "from IDENT.IDENT" + priority: 90 + + - name: "variable_name" + pattern: "IDENT = from" # 変数名として使用 + priority: 10 + + resolution: "longest_match_first" # 最長一致優先 +``` + +#### パーサーでの実装 +```rust +impl Parser { + fn parse_from(&mut self) -> Result { + let start_pos = self.current_pos(); + + // 最長一致を試みる + if let Ok(delegation) = self.try_parse_delegation() { + return Ok(delegation); + } + + // フォールバック:通常の識別子として扱う + self.reset_to(start_pos); + Ok(Node::Identifier("from".to_string())) + } +} +``` + +### 4. 二重実装期間の管理 + +#### 自動差分検出テスト +```rust +#[cfg(test)] +mod migration_tests { + use super::*; + + #[test] + fn test_unified_vs_legacy_semantics() { + let test_cases = load_test_cases("tests/semantics/*.nyash"); + + for case in test_cases { + let legacy_result = legacy_interpreter.execute(&case); + let unified_result = unified_interpreter.execute(&case); + + // スナップショットテスト + assert_snapshot!( + format!("{}_unified", case.name), + unified_result + ); + + // 差分検出 + if legacy_result != unified_result { + // 意図的な変更か確認 + assert!( + is_expected_difference(&case, &legacy_result, &unified_result), + "Unexpected difference in {}: {:?} vs {:?}", + case.name, legacy_result, unified_result + ); + } + } + } +} +``` + +#### 段階的移行フラグ +```rust +pub struct ExecutionConfig { + pub use_unified_grammar: bool, + pub log_differences: bool, + pub fail_on_difference: bool, +} + +impl Interpreter { + pub fn execute_with_migration(&mut self, expr: &Expression) -> Result { + if self.config.use_unified_grammar { + let result = self.unified_execute(expr)?; + + if self.config.log_differences { + let legacy_result = self.legacy_execute(expr)?; + if result != legacy_result { + log::warn!( + "Semantic difference detected: {:?} -> unified: {:?}, legacy: {:?}", + expr, result, legacy_result + ); + + if self.config.fail_on_difference { + panic!("Unexpected semantic difference"); + } + } + } + + Ok(result) + } else { + self.legacy_execute(expr) + } + } +} +``` + +## 📊 改善された実装計画 + +### Phase 0: 準備(1週間) +- ベースラインテストスイート作成 +- 現在のセマンティクスのスナップショット記録 +- 差分検出フレームワーク構築 + +### Phase 1: コード生成基盤(1週間) +- build.rs による完全静的生成 +- ゼロランタイムコスト実現 +- CI/CDでの生成コード検証 + +### Phase 2: 名前空間とプラグイン(1週間) +- 名前空間解決システム +- プラグインマージ戦略実装 +- 競合検出と報告 + +### Phase 3: 文脈依存解決(1週間) +- fromキーワードの文脈ルール実装 +- 最長一致パーサー +- 曖昧性テストケース + +### Phase 4: 段階的移行(2週間) +- フィーチャーフラグ実装 +- 並行実行と差分ログ +- 本番環境での検証 + +## 🎯 期待される成果 + +1. **ゼロコスト抽象化**: 実行時オーバーヘッドなし +2. **安全な拡張性**: プラグイン競合の自動解決 +3. **明確な文脈解決**: 曖昧性のない文法 +4. **リスクフリー移行**: 自動検証による安全な移行 + +## 📝 まとめ + +ChatGPT5さんの指摘により、実装の潜在的リスクが明確になりました。 +これらの対策を組み込むことで、より堅牢で実用的な統一文法アーキテクチャが実現できます。 + +「痛点直結」という評価に応えられる実装を目指しますにゃ!🚀 \ No newline at end of file diff --git a/docs/development/roadmap/phases/phase-11.9/unified-semantics-implementation.txt b/docs/development/roadmap/phases/phase-11.9/unified-semantics-implementation.txt new file mode 100644 index 00000000..66868f74 --- /dev/null +++ b/docs/development/roadmap/phases/phase-11.9/unified-semantics-implementation.txt @@ -0,0 +1,255 @@ +# Nyash統一セマンティクス実装設計 +# 作成日: 2025-09-02 +# 目的: Interpreter/VM/JIT全層での予約語・文法解釈の完全統一 + +## 概要 +すべての実行層が同じセマンティクスに従うよう、MIR正規化層を中心とした統一実装を行う。 + +## 核心的な問題 +現在、同じ式が各層で異なる解釈をされている: +- "hello" + 123 + - Interpreter: エラーを出す + - VM: 型変換してから連結 + - JIT: 数値を文字列化してから連結 + +## 解決策:MIR統一セマンティクス + 軽量UIRタグ + +### 1. 統一セマンティクス定義(grammar/semantics.yml) +```yaml +# すべての層が従う唯一の定義 +version: "1.0" +semantics: + add: + - pattern: [String, String] + action: concat + mir: StringConcat + vm: OP_STR_CONCAT + + - pattern: [String, Any] + action: coerce_concat + steps: + - ToString($2) + - StringConcat($1, $2) + + - pattern: [Integer, Integer] + action: add_i64 + mir: AddI64 + vm: OP_ADD_I64 + + toString: + - pattern: [String] + action: identity + - pattern: [Integer] + action: int_to_string + - pattern: [Float] + action: float_to_string + - pattern: [Bool] + action: bool_to_string + - pattern: [Null] + action: const_null_string +``` + +### 2. UIRタグシステム(層間通信) +```rust +// generated/uir_tags.rs (build.rsで生成) +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u32)] +pub enum UIRTag { + // 予約語 + ME = 1, + FROM = 2, + LOOP = 3, + BOX = 4, + INIT = 5, + + // 演算子 + ADD = 100, + SUB = 101, + MUL = 102, + DIV = 103, + + // セマンティクスアクション + STRING_CONCAT = 200, + TO_STRING = 201, + ADD_I64 = 202, + ADD_F64 = 203, +} +``` + +### 3. MIR正規化層(真実の基盤) +```rust +// src/mir/normalizer.rs +pub struct MIRNormalizer { + semantics_table: SemanticRuleTable, +} + +impl MIRNormalizer { + pub fn normalize(&self, expr: &Expression) -> MIR { + match expr { + Expression::BinaryOp(op, left, right) => { + let left_type = self.infer_type(left); + let right_type = self.infer_type(right); + + // 統一ルールを適用 + let rule = self.semantics_table.lookup(op, &[left_type, right_type]); + + match rule.action { + Action::Concat => { + MIR::StringConcat( + Box::new(self.normalize(left)), + Box::new(self.normalize(right)) + ) + } + Action::CoerceConcat => { + // 右辺を文字列に変換してから連結 + MIR::Sequence(vec![ + self.normalize(left), + MIR::ToString(Box::new(self.normalize(right))), + MIR::StringConcat + ]) + } + Action::AddI64 => { + MIR::AddI64( + Box::new(self.normalize(left)), + Box::new(self.normalize(right)) + ) + } + } + } + } + } +} +``` + +### 4. 各層の統一実装 + +#### Interpreter層 +```rust +impl Interpreter { + fn execute_expression(&mut self, expr: &Expression) -> Result { + // すべてMIR経由で実行 + let mir = self.mir_normalizer.normalize(expr); + self.execute_mir(&mir) + } + + fn execute_mir(&mut self, mir: &MIR) -> Result { + match mir { + MIR::StringConcat(left, right) => { + let left_val = self.execute_mir(left)?; + let right_val = self.execute_mir(right)?; + Ok(Value::String(format!("{}{}", left_val, right_val))) + } + MIR::ToString(expr) => { + let val = self.execute_mir(expr)?; + Ok(Value::String(self.value_to_string(&val))) + } + MIR::AddI64(left, right) => { + let left_val = self.execute_mir(left)?; + let right_val = self.execute_mir(right)?; + Ok(Value::Integer(left_val.as_i64()? + right_val.as_i64()?)) + } + } + } +} +``` + +#### VM層 +```rust +impl VM { + fn compile_mir(&mut self, mir: &MIR) -> Vec { + // MIRから機械的にバイトコード生成 + match mir { + MIR::StringConcat(left, right) => { + let mut code = vec![]; + code.extend(self.compile_mir(left)); + code.extend(self.compile_mir(right)); + code.push(Opcode::StringConcat); + code + } + MIR::ToString(expr) => { + let mut code = self.compile_mir(expr); + code.push(Opcode::ToString); + code + } + MIR::AddI64(left, right) => { + let mut code = vec![]; + code.extend(self.compile_mir(left)); + code.extend(self.compile_mir(right)); + code.push(Opcode::AddI64); + code + } + } + } +} +``` + +#### JIT層 +```rust +impl JITCompiler { + fn compile_mir(&mut self, mir: &MIR) { + // MIRから最適化されたネイティブコード生成 + match mir { + MIR::StringConcat(left, right) => { + self.compile_mir(left); + self.compile_mir(right); + // 高速な文字列連結関数を呼び出し + self.emit_call(fast_string_concat); + } + MIR::ToString(expr) => { + self.compile_mir(expr); + // 型に応じた最適な変換 + self.emit_call(optimized_to_string); + } + MIR::AddI64(left, right) => { + self.compile_mir(left); + self.compile_mir(right); + // ネイティブな加算命令 + self.emit_add_i64(); + } + } + } +} +``` + +## 実装手順 + +### Phase 1: 基盤構築(1週間) +1. grammar/semantics.yml 作成 +2. UIRTag定義とbuild.rs生成 +3. SemanticRuleTable実装 + +### Phase 2: MIR正規化層(1週間) +1. MIRNormalizer実装 +2. 型推論システム構築 +3. セマンティクステーブル連携 + +### Phase 3: 各層統合(2週間) +1. Interpreterを MIR経由に変更 +2. VMのMIRコンパイラ実装 +3. JITのMIRコンパイラ実装 + +### Phase 4: テストと検証(1週間) +1. 統一セマンティクステスト作成 +2. 各層での一貫性検証 +3. パフォーマンス測定 + +## 期待される効果 + +1. **完全な一貫性**: すべての層が同じ動作 +2. **保守性向上**: セマンティクス変更が1箇所 +3. **拡張性**: 新しい演算子の追加が容易 +4. **AI対応**: 単一の仕様から学習可能 +5. **デバッグ容易性**: MIRレベルでの統一デバッグ + +## 注意事項 + +- 既存のコードとの互換性を保つため、フィーチャーフラグで段階的移行 +- パフォーマンスへの影響を最小限にするため、ビルド時最適化を活用 +- テストカバレッジを十分に確保してから本番移行 + +## 関連ファイル + +- grammar/semantics.yml - セマンティクス定義 +- src/mir/normalizer.rs - MIR正規化実装 +- build.rs - コード生成 +- tests/unified_semantics.rs - 統一テスト \ No newline at end of file diff --git a/docs/ideas/improvements/interpreter-box-architecture.md b/docs/ideas/improvements/interpreter-box-architecture.md new file mode 100644 index 00000000..d27aa968 --- /dev/null +++ b/docs/ideas/improvements/interpreter-box-architecture.md @@ -0,0 +1,102 @@ +# InterpreterBox アーキテクチャ - インタープリター層の箱化 + +## 概要 +インタープリター層を丸ごと箱化して疎結合にすることで、将来的な移行・撤退を容易にする設計提案。 + +## 背景 +- ChatGPT5さんの指摘:インタープリター層は将来的に撤退可能 +- 現状:AST実行とMIR実行が並存している +- VM層がMIRを直接実行するため、インタープリター層は冗長 + +## 提案:Everything is Box哲学の適用 + +### 現在の密結合 +```rust +// main.rsで直接呼び出し +match backend { + Backend::Interpreter => interpreter::execute(ast), // 密結合 + Backend::VM => vm::execute(mir), +} +``` + +### 箱化による疎結合 +```rust +// 実行エンジンを箱として抽象化 +pub trait ExecutorBox: Send + Sync { + fn execute(&self, input: ExecutionInput) -> Result; +} + +// インタープリター丸ごと箱化 +pub struct InterpreterBox { + ast_executor: AstExecutor, + symbol_table: SymbolTable, +} + +impl ExecutorBox for InterpreterBox { + fn execute(&self, input: ExecutionInput) -> Result { + self.ast_executor.run(input.ast) + } +} + +// VM丸ごと箱化 +pub struct VMBox { + mir_executor: MirExecutor, + runtime: Runtime, +} + +impl ExecutorBox for VMBox { + fn execute(&self, input: ExecutionInput) -> Result { + let mir = compile_to_mir(input.ast); + self.mir_executor.run(mir) + } +} +``` + +### 使用例 +```rust +let executor: Box = match backend { + Backend::Interpreter => Box::new(InterpreterBox::new()), + Backend::VM => Box::new(VMBox::new()), +}; +executor.execute(program) +``` + +## メリット + +1. **撤退不要**:使わなくなっても箱ごと置いておける +2. **切り替え簡単**:実行時に箱を差し替えるだけ +3. **テスト容易**:両方の箱で実行して結果を比較可能 +4. **将来性**:プラグイン化も可能 + +## Nyash的な書き方 +```nyash +// 将来的にはこんな感じ? +box InterpreterBox { + init { ast_executor, symbol_table } + + execute(ast) { + return me.ast_executor.run(ast) + } +} + +box VMBox { + init { mir_executor, runtime } + + execute(ast) { + local mir = compile_to_mir(ast) + return me.mir_executor.run(mir) + } +} + +// 実行エンジンの切り替え +local executor = new VMBox() // or new InterpreterBox() +executor.execute(program) +``` + +## まとめ +「捨てる」のではなく「箱に入れる」ことで、Nyashの"Everything is Box"哲学を貫きながら、将来の変更に対して柔軟に対応できる設計。 + +--- +作成日: 2025-09-02 +カテゴリ: アーキテクチャ改善 +優先度: 中(将来的な改善案) \ No newline at end of file diff --git a/grammar/unified-grammar.toml b/grammar/unified-grammar.toml new file mode 100644 index 00000000..4af025b3 --- /dev/null +++ b/grammar/unified-grammar.toml @@ -0,0 +1,22 @@ + +[keywords.me] +token = "ME" + +[keywords.from] +token = "FROM" + +[keywords.loop] +token = "LOOP" + +[operators.add] +symbol = "+" + +[syntax.statements] +allow = [ + "box","global","function","static", + "if","loop","break","return","print", + "nowait","include","local","outbox","try","throw","using","from" +] + +[syntax.expressions] +allow_binops = ["add","sub","mul","div","and","or","eq","ne"] diff --git a/src/backend/vm_values.rs b/src/backend/vm_values.rs index f4f8456e..d968a77b 100644 --- a/src/backend/vm_values.rs +++ b/src/backend/vm_values.rs @@ -19,6 +19,64 @@ impl VM { pub(super) fn execute_binary_op(&self, op: &BinaryOp, left: &VMValue, right: &VMValue) -> Result { let debug_bin = std::env::var("NYASH_VM_DEBUG_BIN").ok().as_deref() == Some("1"); if debug_bin { eprintln!("[VM] binop {:?} {:?} {:?}", op, left, right); } + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let lty = match left { VMValue::String(_) => "String", VMValue::Integer(_) => "Integer", VMValue::Float(_) => "Float", VMValue::Bool(_) => "Bool", _ => "Other" }; + let rty = match right { VMValue::String(_) => "String", VMValue::Integer(_) => "Integer", VMValue::Float(_) => "Float", VMValue::Bool(_) => "Bool", _ => "Other" }; + match *op { + BinaryOp::Add => { + let strat = crate::grammar::engine::get().add_coercion_strategy(); + let rule = crate::grammar::engine::get().decide_add_result(lty, rty); + eprintln!("[GRAMMAR-DIFF][VM] add.coercion_strategy={} left={} right={} rule={:?}", strat, lty, rty, rule); + } + BinaryOp::Sub => { + let strat = crate::grammar::engine::get().sub_coercion_strategy(); + let rule = crate::grammar::engine::get().decide_sub_result(lty, rty); + eprintln!("[GRAMMAR-DIFF][VM] sub.coercion_strategy={} left={} right={} rule={:?}", strat, lty, rty, rule); + } + BinaryOp::Mul => { + let strat = crate::grammar::engine::get().mul_coercion_strategy(); + let rule = crate::grammar::engine::get().decide_mul_result(lty, rty); + eprintln!("[GRAMMAR-DIFF][VM] mul.coercion_strategy={} left={} right={} rule={:?}", strat, lty, rty, rule); + } + BinaryOp::Div => { + let strat = crate::grammar::engine::get().div_coercion_strategy(); + let rule = crate::grammar::engine::get().decide_div_result(lty, rty); + eprintln!("[GRAMMAR-DIFF][VM] div.coercion_strategy={} left={} right={} rule={:?}", strat, lty, rty, rule); + } + _ => {} + } + } + if matches!(*op, BinaryOp::Add) && std::env::var("NYASH_GRAMMAR_ENFORCE_ADD").ok().as_deref() == Some("1") { + let lty = match left { VMValue::String(_) => "String", VMValue::Integer(_) => "Integer", VMValue::Float(_) => "Float", VMValue::Bool(_) => "Bool", _ => "Other" }; + let rty = match right { VMValue::String(_) => "String", VMValue::Integer(_) => "Integer", VMValue::Float(_) => "Float", VMValue::Bool(_) => "Bool", _ => "Other" }; + if let Some((res, _)) = crate::grammar::engine::get().decide_add_result(lty, rty) { + match res { + "String" => { + // Best-effort toString concat + fn vmv_to_string(v: &VMValue) -> String { + match v { + VMValue::String(s) => s.clone(), + VMValue::Integer(i) => i.to_string(), + VMValue::Float(f) => f.to_string(), + VMValue::Bool(b) => b.to_string(), + VMValue::Void => "void".to_string(), + VMValue::BoxRef(b) => b.to_string_box().value, + VMValue::Future(_) => "".to_string(), + } + } + let ls = vmv_to_string(left); + let rs = vmv_to_string(right); + return Ok(VMValue::String(format!("{}{}", ls, rs))); + } + "Integer" => { + if let (VMValue::Integer(l), VMValue::Integer(r)) = (left, right) { + return Ok(VMValue::Integer(l + r)); + } + } + _ => {} + } + } + } // Fast path: logical AND/OR accept any truthy via as_bool if matches!(*op, BinaryOp::And | BinaryOp::Or) { let l = left.as_bool()?; diff --git a/src/grammar/engine.rs b/src/grammar/engine.rs new file mode 100644 index 00000000..019ea378 --- /dev/null +++ b/src/grammar/engine.rs @@ -0,0 +1,59 @@ +use once_cell::sync::Lazy; + +use super::generated; + +pub struct UnifiedGrammarEngine; + +impl UnifiedGrammarEngine { + pub fn load() -> Self { Self } + pub fn is_keyword_str(&self, word: &str) -> Option<&'static str> { + generated::lookup_keyword(word) + } + pub fn add_coercion_strategy(&self) -> &'static str { + generated::OPERATORS_ADD_COERCION + } + pub fn add_rules(&self) -> &'static [(&'static str, &'static str, &'static str, &'static str)] { + generated::OPERATORS_ADD_RULES + } + pub fn decide_add_result(&self, left_ty: &str, right_ty: &str) -> Option<(&'static str, &'static str)> { + for (l, r, res, act) in self.add_rules() { + if *l == left_ty && *r == right_ty { return Some((*res, *act)); } + } + None + } + + pub fn sub_coercion_strategy(&self) -> &'static str { generated::OPERATORS_SUB_COERCION } + pub fn sub_rules(&self) -> &'static [(&'static str, &'static str, &'static str, &'static str)] { generated::OPERATORS_SUB_RULES } + pub fn decide_sub_result(&self, left_ty: &str, right_ty: &str) -> Option<(&'static str, &'static str)> { + for (l, r, res, act) in self.sub_rules() { if *l == left_ty && *r == right_ty { return Some((*res, *act)); } } + None + } + + pub fn mul_coercion_strategy(&self) -> &'static str { generated::OPERATORS_MUL_COERCION } + pub fn mul_rules(&self) -> &'static [(&'static str, &'static str, &'static str, &'static str)] { generated::OPERATORS_MUL_RULES } + pub fn decide_mul_result(&self, left_ty: &str, right_ty: &str) -> Option<(&'static str, &'static str)> { + for (l, r, res, act) in self.mul_rules() { if *l == left_ty && *r == right_ty { return Some((*res, *act)); } } + None + } + + pub fn div_coercion_strategy(&self) -> &'static str { generated::OPERATORS_DIV_COERCION } + pub fn div_rules(&self) -> &'static [(&'static str, &'static str, &'static str, &'static str)] { generated::OPERATORS_DIV_RULES } + pub fn decide_div_result(&self, left_ty: &str, right_ty: &str) -> Option<(&'static str, &'static str)> { + for (l, r, res, act) in self.div_rules() { if *l == left_ty && *r == right_ty { return Some((*res, *act)); } } + None + } +} + +pub static ENGINE: Lazy = Lazy::new(UnifiedGrammarEngine::load); + +pub fn get() -> &'static UnifiedGrammarEngine { &ENGINE } + +// --- Syntax rule helpers (generated-backed) --- +impl UnifiedGrammarEngine { + pub fn syntax_is_allowed_statement(&self, keyword: &str) -> bool { + super::generated::SYNTAX_ALLOWED_STATEMENTS.iter().any(|k| *k == keyword) + } + pub fn syntax_is_allowed_binop(&self, op: &str) -> bool { + super::generated::SYNTAX_ALLOWED_BINOPS.iter().any(|k| *k == op) + } +} diff --git a/src/grammar/generated.rs b/src/grammar/generated.rs new file mode 100644 index 00000000..0f04d259 --- /dev/null +++ b/src/grammar/generated.rs @@ -0,0 +1,69 @@ +// Auto-generated from grammar/unified-grammar.toml +pub static KEYWORDS: &[(&str, &str)] = &[ + ("me", "ME"), + ("from", "FROM"), + ("loop", "LOOP"), +]; +pub static OPERATORS_ADD_COERCION: &str = "string_priority"; +pub static OPERATORS_SUB_COERCION: &str = "numeric_only"; +pub static OPERATORS_MUL_COERCION: &str = "numeric_only"; +pub static OPERATORS_DIV_COERCION: &str = "numeric_only"; +pub static OPERATORS_ADD_RULES: &[(&str, &str, &str, &str)] = &[ + ("String", "String", "String", "concat"), + ("String", "Integer", "String", "concat"), + ("Integer", "String", "String", "concat"), + ("String", "Bool", "String", "concat"), + ("Bool", "String", "String", "concat"), + ("String", "Other", "String", "concat"), + ("Other", "String", "String", "concat"), + ("Integer", "Integer", "Integer", "add_i64"), + ("Float", "Float", "Float", "add_f64"), +]; +pub static OPERATORS_SUB_RULES: &[(&str, &str, &str, &str)] = &[ + ("Integer", "Integer", "Integer", "sub_i64"), + ("Float", "Float", "Float", "sub_f64"), +]; +pub static OPERATORS_MUL_RULES: &[(&str, &str, &str, &str)] = &[ + ("Integer", "Integer", "Integer", "mul_i64"), + ("Float", "Float", "Float", "mul_f64"), +]; +pub static OPERATORS_DIV_RULES: &[(&str, &str, &str, &str)] = &[ + ("Integer", "Integer", "Integer", "div_i64"), + ("Float", "Float", "Float", "div_f64"), +]; +pub fn lookup_keyword(word: &str) -> Option<&'static str> { + for (k, t) in KEYWORDS { + if *k == word { return Some(*t); } + } + None +} + +pub static SYNTAX_ALLOWED_STATEMENTS: &[&str] = &[ + "box", + "global", + "function", + "static", + "if", + "loop", + "break", + "return", + "print", + "nowait", + "include", + "local", + "outbox", + "try", + "throw", + "using", + "from", +]; +pub static SYNTAX_ALLOWED_BINOPS: &[&str] = &[ + "add", + "sub", + "mul", + "div", + "and", + "or", + "eq", + "ne", +]; \ No newline at end of file diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs new file mode 100644 index 00000000..03b8e2a5 --- /dev/null +++ b/src/grammar/mod.rs @@ -0,0 +1,6 @@ +pub mod engine; +// Generated tables from grammar/unified-grammar.toml +#[path = "generated.rs"] +mod generated; +pub use generated::*; + diff --git a/src/interpreter/expressions/operators.rs b/src/interpreter/expressions/operators.rs index 9f995255..7f0ceed4 100644 --- a/src/interpreter/expressions/operators.rs +++ b/src/interpreter/expressions/operators.rs @@ -165,8 +165,39 @@ impl NyashInterpreter { match op { BinaryOperator::Add => { + // Optional: enforce grammar rule for add (behind env) + if std::env::var("NYASH_GRAMMAR_ENFORCE_ADD").ok().as_deref() == Some("1") { + let lty = if crate::runtime::semantics::coerce_to_string(left_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(left_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rty = if crate::runtime::semantics::coerce_to_string(right_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(right_val.as_ref()).is_some() { "Integer" } else { "Other" }; + if let Some((res, _act)) = crate::grammar::engine::get().decide_add_result(lty, rty) { + match res { + "String" => { + let ls = crate::runtime::semantics::coerce_to_string(left_val.as_ref()).unwrap_or_else(|| left_val.to_string_box().value); + let rs = crate::runtime::semantics::coerce_to_string(right_val.as_ref()).unwrap_or_else(|| right_val.to_string_box().value); + return Ok(Box::new(StringBox::new(format!("{}{}", ls, rs)))); + } + "Integer" => { + if let (Some(li), Some(ri)) = (crate::runtime::semantics::coerce_to_i64(left_val.as_ref()), crate::runtime::semantics::coerce_to_i64(right_val.as_ref())) { + return Ok(Box::new(IntegerBox::new(li + ri))); + } + } + _ => {} + } + } + } + let (strat, lty, rty, expect) = if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let strat = crate::grammar::engine::get().add_coercion_strategy(); + let lty = if crate::runtime::semantics::coerce_to_string(left_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(left_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rty = if crate::runtime::semantics::coerce_to_string(right_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(right_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rule = crate::grammar::engine::get().decide_add_result(lty, rty); + (Some(strat.to_string()), Some(lty.to_string()), Some(rty.to_string()), rule.map(|(res, act)| (res.to_string(), act.to_string()))) + } else { (None, None, None, None) }; // 1) Intrinsic fast-paths (Integer+Integer, String+*, Bool+Bool) if let Some(result) = try_add_operation(left_val.as_ref(), right_val.as_ref()) { + if let (Some(s), Some(l), Some(r)) = (strat.as_ref(), lty.as_ref(), rty.as_ref()) { + let actual = if result.as_any().downcast_ref::().is_some() { "String" } else if result.as_any().downcast_ref::().is_some() { "Integer" } else { "Other" }; + eprintln!("[GRAMMAR-DIFF][Interp] add strat={} lty={} rty={} expect={:?} actual={} match={}", s, l, r, expect, actual, expect.as_ref().map(|(res,_)| res.as_str())==Some(actual)); + } return Ok(result); } // 2) Concatenation if either side is string-like (semantics) @@ -175,13 +206,22 @@ impl NyashInterpreter { if ls_opt.is_some() || rs_opt.is_some() { let ls = ls_opt.unwrap_or_else(|| left_val.to_string_box().value); let rs = rs_opt.unwrap_or_else(|| right_val.to_string_box().value); + if let (Some(s), Some(l), Some(r)) = (strat.as_ref(), lty.as_ref(), rty.as_ref()) { + eprintln!("[GRAMMAR-DIFF][Interp] add strat={} lty={} rty={} expect={:?} actual=String match={}", s, l, r, expect, expect.as_ref().map(|(res,_)| res=="String").unwrap_or(false)); + } return Ok(Box::new(StringBox::new(format!("{}{}", ls, rs)))); } // 3) Numeric fallback via coerce_to_i64 if let (Some(li), Some(ri)) = (crate::runtime::semantics::coerce_to_i64(left_val.as_ref()), crate::runtime::semantics::coerce_to_i64(right_val.as_ref())) { + if let (Some(s), Some(l), Some(r)) = (strat.as_ref(), lty.as_ref(), rty.as_ref()) { + eprintln!("[GRAMMAR-DIFF][Interp] add strat={} lty={} rty={} expect={:?} actual=Integer match={}", s, l, r, expect, expect.as_ref().map(|(res,_)| res=="Integer").unwrap_or(false)); + } return Ok(Box::new(IntegerBox::new(li + ri))); } // 4) Final error + if let (Some(s), Some(l), Some(r)) = (strat.as_ref(), lty.as_ref(), rty.as_ref()) { + eprintln!("[GRAMMAR-DIFF][Interp] add strat={} lty={} rty={} expect={:?} actual=Error", s, l, r, expect); + } Err(RuntimeError::InvalidOperation { message: format!("Addition not supported between {} and {}", left_val.type_name(), right_val.type_name()) @@ -219,6 +259,13 @@ impl NyashInterpreter { } BinaryOperator::Subtract => { + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let strat = crate::grammar::engine::get().sub_coercion_strategy(); + let lty = if crate::runtime::semantics::coerce_to_string(left_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(left_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rty = if crate::runtime::semantics::coerce_to_string(right_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(right_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rule = crate::grammar::engine::get().decide_sub_result(lty, rty); + eprintln!("[GRAMMAR-DIFF][Interp] sub strat={} lty={} rty={} expect={:?}", strat, lty, rty, rule); + } // Use helper function instead of trait methods if let Some(result) = try_sub_operation(left_val.as_ref(), right_val.as_ref()) { return Ok(result); @@ -231,6 +278,13 @@ impl NyashInterpreter { } BinaryOperator::Multiply => { + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let strat = crate::grammar::engine::get().mul_coercion_strategy(); + let lty = if crate::runtime::semantics::coerce_to_string(left_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(left_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rty = if crate::runtime::semantics::coerce_to_string(right_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(right_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rule = crate::grammar::engine::get().decide_mul_result(lty, rty); + eprintln!("[GRAMMAR-DIFF][Interp] mul strat={} lty={} rty={} expect={:?}", strat, lty, rty, rule); + } // Use helper function instead of trait methods if let Some(result) = try_mul_operation(left_val.as_ref(), right_val.as_ref()) { return Ok(result); @@ -243,6 +297,13 @@ impl NyashInterpreter { } BinaryOperator::Divide => { + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let strat = crate::grammar::engine::get().div_coercion_strategy(); + let lty = if crate::runtime::semantics::coerce_to_string(left_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(left_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rty = if crate::runtime::semantics::coerce_to_string(right_val.as_ref()).is_some() { "String" } else if crate::runtime::semantics::coerce_to_i64(right_val.as_ref()).is_some() { "Integer" } else { "Other" }; + let rule = crate::grammar::engine::get().decide_div_result(lty, rty); + eprintln!("[GRAMMAR-DIFF][Interp] div strat={} lty={} rty={} expect={:?}", strat, lty, rty, rule); + } // Use helper function instead of trait methods match try_div_operation(left_val.as_ref(), right_val.as_ref()) { Ok(result) => Ok(result), diff --git a/src/jit/lower/core.rs b/src/jit/lower/core.rs index d3531390..f5efc427 100644 --- a/src/jit/lower/core.rs +++ b/src/jit/lower/core.rs @@ -1,27 +1,31 @@ use crate::mir::{MirFunction, MirInstruction, ConstValue, BinaryOp, CompareOp, ValueId}; use super::builder::{IRBuilder, BinOpKind, CmpKind}; +mod analysis; +mod cfg; +mod ops_ext; + /// Lower(Core-1): Minimal lowering skeleton for Const/Move/BinOp/Cmp/Branch/Ret /// This does not emit real CLIF yet; it only walks MIR and validates coverage. pub struct LowerCore { - pub unsupported: usize, - pub covered: usize, + pub(crate) unsupported: usize, + pub(crate) covered: usize, /// Minimal constant propagation for i64 to feed host-call args pub(super) known_i64: std::collections::HashMap, /// Minimal constant propagation for f64 (math.* signature checks) - known_f64: std::collections::HashMap, + pub(super) known_f64: std::collections::HashMap, /// Parameter index mapping for ValueId pub(super) param_index: std::collections::HashMap, /// Track values produced by Phi (for minimal PHI path) - phi_values: std::collections::HashSet, + pub(super) phi_values: std::collections::HashSet, /// Map (block, phi dst) -> param index in that block (for multi-PHI) - phi_param_index: std::collections::HashMap<(crate::mir::BasicBlockId, ValueId), usize>, + pub(super) phi_param_index: std::collections::HashMap<(crate::mir::BasicBlockId, ValueId), usize>, /// Track values that are boolean (b1) results, e.g., Compare destinations pub(super) bool_values: std::collections::HashSet, /// Track PHI destinations that are boolean (all inputs derived from bool_values) - bool_phi_values: std::collections::HashSet, + pub(super) bool_phi_values: std::collections::HashSet, /// Track values that are FloatBox instances (for arg type classification) - float_box_values: std::collections::HashSet, + pub(super) float_box_values: std::collections::HashSet, /// Track values that are plugin handles (generic box/handle, type unknown at compile time) pub(super) handle_values: std::collections::HashSet, // Per-function statistics (last lowered) @@ -56,169 +60,13 @@ impl LowerCore { let mut bb_ids: Vec<_> = func.blocks.keys().copied().collect(); bb_ids.sort_by_key(|b| b.0); builder.prepare_blocks(bb_ids.len()); - // Seed boolean lattice with boolean parameters from MIR signature - if !func.signature.params.is_empty() { - for (idx, vid) in func.params.iter().copied().enumerate() { - if let Some(mt) = func.signature.params.get(idx) { - if matches!(mt, crate::mir::MirType::Bool) { - self.bool_values.insert(vid); - } - } - } - } - // Pre-scan to classify boolean-producing values and propagate via Copy/Phi/Load-Store heuristics. - self.bool_values.clear(); - let mut copy_edges: Vec<(crate::mir::ValueId, crate::mir::ValueId)> = Vec::new(); - let mut phi_defs: Vec<(crate::mir::ValueId, Vec)> = Vec::new(); - let mut stores: Vec<(crate::mir::ValueId, crate::mir::ValueId)> = Vec::new(); // (ptr, value) - let mut loads: Vec<(crate::mir::ValueId, crate::mir::ValueId)> = Vec::new(); // (dst, ptr) - for bb in bb_ids.iter() { - if let Some(block) = func.blocks.get(bb) { - for ins in block.instructions.iter() { - match ins { - crate::mir::MirInstruction::Compare { dst, .. } => { self.bool_values.insert(*dst); } - crate::mir::MirInstruction::Const { dst, value } => { - if let ConstValue::Bool(_) = value { self.bool_values.insert(*dst); } - } - crate::mir::MirInstruction::Cast { dst, target_type, .. } => { - if matches!(target_type, crate::mir::MirType::Bool) { self.bool_values.insert(*dst); } - } - crate::mir::MirInstruction::TypeOp { dst, op, ty, .. } => { - // Check and cast-to-bool produce boolean - if matches!(op, crate::mir::TypeOpKind::Check) || matches!(ty, crate::mir::MirType::Bool) { self.bool_values.insert(*dst); } - } - crate::mir::MirInstruction::Copy { dst, src } => { copy_edges.push((*dst, *src)); } - crate::mir::MirInstruction::Phi { dst, inputs } => { - let vs: Vec<_> = inputs.iter().map(|(_, v)| *v).collect(); - phi_defs.push((*dst, vs)); - } - crate::mir::MirInstruction::Store { value, ptr } => { stores.push((*ptr, *value)); } - crate::mir::MirInstruction::Load { dst, ptr } => { loads.push((*dst, *ptr)); } - _ => {} - } - } - if let Some(term) = &block.terminator { - match term { - crate::mir::MirInstruction::Compare { dst, .. } => { self.bool_values.insert(*dst); } - crate::mir::MirInstruction::Const { dst, value } => { - if let ConstValue::Bool(_) = value { self.bool_values.insert(*dst); } - } - crate::mir::MirInstruction::Cast { dst, target_type, .. } => { - if matches!(target_type, crate::mir::MirType::Bool) { self.bool_values.insert(*dst); } - } - crate::mir::MirInstruction::TypeOp { dst, op, ty, .. } => { - if matches!(op, crate::mir::TypeOpKind::Check) || matches!(ty, crate::mir::MirType::Bool) { self.bool_values.insert(*dst); } - } - crate::mir::MirInstruction::Copy { dst, src } => { copy_edges.push((*dst, *src)); } - crate::mir::MirInstruction::Phi { dst, inputs } => { - let vs: Vec<_> = inputs.iter().map(|(_, v)| *v).collect(); - phi_defs.push((*dst, vs)); - } - crate::mir::MirInstruction::Branch { condition, .. } => { self.bool_values.insert(*condition); } - crate::mir::MirInstruction::Store { value, ptr } => { stores.push((*ptr, *value)); } - crate::mir::MirInstruction::Load { dst, ptr } => { loads.push((*dst, *ptr)); } - _ => {} - } - } - } - } - // Fixed-point boolean lattice propagation - let mut changed = true; - let mut store_bool_ptrs: std::collections::HashSet = std::collections::HashSet::new(); - while changed { - changed = false; - // Copy propagation - for (dst, src) in copy_edges.iter().copied() { - if self.bool_values.contains(&src) && !self.bool_values.contains(&dst) { - self.bool_values.insert(dst); - changed = true; - } - // Pointer alias propagation for Store/Load lattice - if store_bool_ptrs.contains(&src) && !store_bool_ptrs.contains(&dst) { - store_bool_ptrs.insert(dst); - changed = true; - } - } - // Store marking - for (ptr, val) in stores.iter().copied() { - if self.bool_values.contains(&val) && !store_bool_ptrs.contains(&ptr) { - store_bool_ptrs.insert(ptr); - changed = true; - } - } - // Load propagation - for (dst, ptr) in loads.iter().copied() { - if store_bool_ptrs.contains(&ptr) && !self.bool_values.contains(&dst) { - self.bool_values.insert(dst); - changed = true; - } - } - // PHI closure for value booleans - for (dst, inputs) in phi_defs.iter() { - if inputs.iter().all(|v| self.bool_values.contains(v)) && !self.bool_values.contains(dst) { - self.bool_values.insert(*dst); - self.bool_phi_values.insert(*dst); - changed = true; - } - } - // PHI closure for pointer aliases: if all inputs are bool-storing pointers, mark dst pointer as such - for (dst, inputs) in phi_defs.iter() { - if inputs.iter().all(|v| store_bool_ptrs.contains(v)) && !store_bool_ptrs.contains(dst) { - store_bool_ptrs.insert(*dst); - changed = true; - } - } - } - // Always-on PHI statistics: count total/b1 phi slots using current heuristics - { - use crate::mir::MirInstruction; - let mut total_phi_slots: usize = 0; - let mut total_phi_b1_slots: usize = 0; - for (dst, inputs) in phi_defs.iter() { - total_phi_slots += 1; - // Heuristics consistent with dump path - let used_as_branch = func.blocks.values().any(|bbx| { - if let Some(MirInstruction::Branch { condition, .. }) = &bbx.terminator { condition == dst } else { false } - }); - let is_b1 = self.bool_phi_values.contains(dst) - || inputs.iter().all(|v| { - self.bool_values.contains(v) || self.known_i64.get(v).map(|&iv| iv == 0 || iv == 1).unwrap_or(false) - }) - || used_as_branch; - if is_b1 { total_phi_b1_slots += 1; } - } - if total_phi_slots > 0 { - crate::jit::rt::phi_total_inc(total_phi_slots as u64); - crate::jit::rt::phi_b1_inc(total_phi_b1_slots as u64); - self.last_phi_total = total_phi_slots as u64; - self.last_phi_b1 = total_phi_b1_slots as u64; - } - } + self.analyze(func, &bb_ids); // Optional: collect PHI targets and ordering per successor for minimal/multi PHI path let cfg_now = crate::jit::config::current(); let enable_phi_min = cfg_now.phi_min; - // For each successor block, store ordered list of phi dst and a map pred->input for each phi - let mut succ_phi_order: std::collections::HashMap> = std::collections::HashMap::new(); - let mut succ_phi_inputs: std::collections::HashMap> = std::collections::HashMap::new(); - if enable_phi_min { - for (bb_id, bb) in func.blocks.iter() { - let mut order: Vec = Vec::new(); - for ins in bb.instructions.iter() { - if let crate::mir::MirInstruction::Phi { dst, inputs } = ins { - order.push(*dst); - // store all (pred,val) pairs in flat vec grouped by succ - for (pred, val) in inputs.iter() { succ_phi_inputs.entry(*bb_id).or_default().push((*pred, *val)); } - } - } - if !order.is_empty() { succ_phi_order.insert(*bb_id, order); } - } - // Pre-declare block parameter counts per successor to avoid late appends - for (succ, order) in succ_phi_order.iter() { - if let Some(idx) = bb_ids.iter().position(|x| x == succ) { - builder.ensure_block_params_i64(idx, order.len()); - } - } - } + // Build successor → phi order and predeclare block params + let succ_phi_order: std::collections::HashMap> = + self.build_phi_succords(func, &bb_ids, builder, enable_phi_min); // Decide ABI: typed or i64-only let native_f64 = cfg_now.native_f64; let native_bool = cfg_now.native_bool; @@ -370,7 +218,7 @@ impl LowerCore { if let crate::mir::MirInstruction::Phi { dst: d2, inputs } = ins { if d2 == dst { if let Some((_, val)) = inputs.iter().find(|(pred, _)| pred == bb_id) { - ops::push_value_if_known_or_param(self, builder, val); + self.push_value_if_known_or_param(builder, val); cnt += 1; } } @@ -389,7 +237,7 @@ impl LowerCore { if let crate::mir::MirInstruction::Phi { dst: d2, inputs } = ins { if d2 == dst { if let Some((_, val)) = inputs.iter().find(|(pred, _)| pred == bb_id) { - ops::push_value_if_known_or_param(self, builder, val); + self.push_value_if_known_or_param(builder, val); cnt += 1; } } @@ -421,7 +269,7 @@ impl LowerCore { if let crate::mir::MirInstruction::Phi { dst: d2, inputs } = ins { if d2 == dst { if let Some((_, val)) = inputs.iter().find(|(pred, _)| pred == bb_id) { - ops::push_value_if_known_or_param(self, builder, val); + self.push_value_if_known_or_param(builder, val); cnt += 1; } } @@ -445,115 +293,11 @@ impl LowerCore { } } builder.end_function(); - if std::env::var("NYASH_JIT_DUMP").ok().as_deref() == Some("1") { - let succs = succ_phi_order.len(); - eprintln!("[JIT] cfg: blocks={} phi_succ={} (phi_min={})", bb_ids.len(), succs, enable_phi_min); - if enable_phi_min { - let mut total_phi_slots: usize = 0; - let mut total_phi_b1_slots: usize = 0; - for (succ, order) in succ_phi_order.iter() { - let mut preds_set: std::collections::BTreeSet = std::collections::BTreeSet::new(); - let mut phi_lines: Vec = Vec::new(); - if let Some(bb_succ) = func.blocks.get(succ) { - for ins in bb_succ.instructions.iter() { - if let crate::mir::MirInstruction::Phi { dst, inputs } = ins { - // collect preds for block-level summary - for (pred, _) in inputs.iter() { preds_set.insert(pred.0 as i64); } - // build detailed mapping text: dst<-pred:val,... - let mut pairs: Vec = Vec::new(); - for (pred, val) in inputs.iter() { - pairs.push(format!("{}:{}", pred.0, val.0)); - } - // Heuristics: boolean PHI if (1) pre-analysis marked it, or - // (2) all inputs look boolean-like (from bool producers or 0/1 const), or - // (3) used as a branch condition somewhere. - let used_as_branch = func.blocks.values().any(|bbx| { - if let Some(MirInstruction::Branch { condition, .. }) = &bbx.terminator { condition == dst } else { false } - }); - let is_b1 = self.bool_phi_values.contains(dst) - || inputs.iter().all(|(_, v)| { - self.bool_values.contains(v) || self.known_i64.get(v).map(|&iv| iv == 0 || iv == 1).unwrap_or(false) - }) - || used_as_branch; - let tag = if is_b1 { " (b1)" } else { "" }; - phi_lines.push(format!(" dst v{}{} <- {}", dst.0, tag, pairs.join(", "))); - total_phi_slots += 1; - if is_b1 { total_phi_b1_slots += 1; } - } - } - } - let preds_list: Vec = preds_set.into_iter().map(|p| p.to_string()).collect(); - eprintln!("[JIT] phi: bb={} slots={} preds={}", succ.0, order.len(), preds_list.join("|")); - for ln in phi_lines { eprintln!("[JIT]{}", ln); } - } - eprintln!("[JIT] phi_summary: total_slots={} b1_slots={}", total_phi_slots, total_phi_b1_slots); - } - } + // Dump CFG/PHI diagnostics + self.dump_phi_cfg(&succ_phi_order, func, bb_ids.len(), enable_phi_min); Ok(()) } - /// Push a value onto the builder stack if it is a known i64 const or a parameter. - pub(super) fn push_value_if_known_or_param(&self, b: &mut dyn IRBuilder, id: &ValueId) { - // Prefer materialized locals first (e.g., PHI stored into a local slot) - if let Some(slot) = self.local_index.get(id).copied() { - b.load_local_i64(slot); - return; - } - if self.phi_values.contains(id) { - // Multi-PHI: find the param index for this phi in the current block - // We don't have the current block id here; rely on builder's current block context and our stored index being positional. - // As an approximation, prefer position 0 if unknown. - let pos = self.phi_param_index.iter().find_map(|((_, vid), idx)| if vid == id { Some(*idx) } else { None }).unwrap_or(0); - // Use b1 loader for boolean PHIs when enabled - if crate::jit::config::current().native_bool && self.bool_phi_values.contains(id) { - b.push_block_param_b1_at(pos); - } else { - b.push_block_param_i64_at(pos); - } - return; - } - if let Some(pidx) = self.param_index.get(id).copied() { - b.emit_param_i64(pidx); - return; - } - if let Some(v) = self.known_i64.get(id).copied() { - b.emit_const_i64(v); - return; - } - } - - fn cover_if_supported(&mut self, instr: &MirInstruction) { - use crate::mir::MirInstruction as I; - let supported = matches!( - instr, - I::Const { .. } - | I::Copy { .. } - | I::Cast { .. } - | I::TypeCheck { .. } - | I::TypeOp { .. } - | I::BinOp { .. } - | I::Compare { .. } - | I::Jump { .. } - | I::Branch { .. } - | I::Return { .. } - | I::Call { .. } - | I::BoxCall { .. } - | I::ArrayGet { .. } - | I::ArraySet { .. } - | I::NewBox { .. } - | I::Store { .. } - | I::Load { .. } - | I::Phi { .. } - // PrintはJIT経路では未対応(VMにフォールバックしてコンソール出力を保持) - // | I::Print { .. } - | I::Debug { .. } - | I::ExternCall { .. } - | I::Safepoint - | I::Nop - | I::PluginInvoke { .. } - ); - if supported { self.covered += 1; } else { self.unsupported += 1; } - } fn try_emit(&mut self, b: &mut dyn IRBuilder, instr: &MirInstruction, cur_bb: crate::mir::BasicBlockId, func: &crate::mir::MirFunction) -> Result<(), String> { use crate::mir::MirInstruction as I; @@ -696,144 +440,10 @@ impl LowerCore { } } I::PluginInvoke { dst, box_val, method, args, .. } => { - // Minimal PluginInvoke footing (AOT strict path): - // - Python3メソッド(import/getattr/call)は実Emitする(型/引数はシム側でTLV化) - // - PyRuntimeBox.birth/eval と IntegerBox.birth は no-op許容 - let bt = self.box_type_map.get(box_val).cloned().unwrap_or_default(); - let m = method.as_str(); - // import/getattr/call 実Emit - if (bt == "PyRuntimeBox" && (m == "import")) { - let argc = 1 + args.len(); - // push receiver param index (a0) if known - if let Some(pidx) = self.param_index.get(box_val).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } - let decision = crate::jit::policy::invoke::decide_box_method(&bt, m, argc, dst.is_some()); - if let crate::jit::policy::invoke::InvokeDecision::PluginInvoke { type_id, method_id, .. } = decision { - b.emit_plugin_invoke(type_id, method_id, argc, dst.is_some()); - if let Some(d) = dst { self.handle_values.insert(*d); } - } else { if dst.is_some() { b.emit_const_i64(0); } } - } else if (bt == "PyRuntimeBox" && (m == "getattr" || m == "call")) { - // getattr/call invoked via PyRuntimeBox helper形式 → by-nameで解決 - let argc = 1 + args.len(); - // push receiver param index (a0) if known - if let Some(pidx) = self.param_index.get(box_val).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } - // push primary arguments if available(a1, a2 ...) - for a in args.iter() { self.push_value_if_known_or_param(b, a); } - b.emit_plugin_invoke_by_name(m, argc, dst.is_some()); - if let Some(d) = dst { - self.handle_values.insert(*d); - // Store handle result into a local slot so it can be used as argument later - let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); - b.store_local_i64(slot); - } - } else if self.handle_values.contains(box_val) && (m == "getattr" || m == "call") { - let argc = 1 + args.len(); - // push receiver handle/param index if possible (here receiver is a handle result previously returned) - // We cannot reconstruct handle here; pass -1 to allow shim fallback. - b.emit_const_i64(-1); - for a in args.iter() { self.push_value_if_known_or_param(b, a); } - b.emit_plugin_invoke_by_name(m, argc, dst.is_some()); - if let Some(d) = dst { - self.handle_values.insert(*d); - let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); - b.store_local_i64(slot); - } - } else if (bt == "PyRuntimeBox" && (m == "birth" || m == "eval")) - || (bt == "IntegerBox" && m == "birth") - || (bt == "StringBox" && m == "birth") - || (bt == "ConsoleBox" && m == "birth") { - if dst.is_some() { b.emit_const_i64(0); } - } else { - self.unsupported += 1; - } + self.lower_plugin_invoke(b, &dst, &box_val, method.as_str(), args, func)?; } I::ExternCall { dst, iface_name, method_name, args, .. } => { - // Minimal extern→plugin bridge: env.console.log/println を ConsoleBox に委譲 - if iface_name == "env.console" && (method_name == "log" || method_name == "println") { - // Ensure we have a ConsoleBox handle on the stack - b.emit_host_call("nyash.console.birth_h", 0, true); - // Push first argument if known/param - if let Some(arg0) = args.get(0) { self.push_value_if_known_or_param(b, arg0); } - // Resolve and emit plugin_invoke for ConsoleBox.method - let decision = crate::jit::policy::invoke::decide_box_method("ConsoleBox", method_name, 2, dst.is_some()); - if let crate::jit::policy::invoke::InvokeDecision::PluginInvoke { type_id, method_id, .. } = decision { - b.emit_plugin_invoke(type_id, method_id, 2, dst.is_some()); - } else { - // Fallback: drop result if any - if dst.is_some() { b.emit_const_i64(0); } - } - } else { - // Await bridge: env.future.await(fut) → await_h + ok_h/err_h select - if iface_name == "env.future" && method_name == "await" { - // Load future: prefer param, then local, then known const, else -1 scan - if let Some(arg0) = args.get(0) { - if let Some(pidx) = self.param_index.get(arg0).copied() { - b.emit_param_i64(pidx); - } else if let Some(slot) = self.local_index.get(arg0).copied() { - b.load_local_i64(slot); - } else if let Some(v) = self.known_i64.get(arg0).copied() { - b.emit_const_i64(v); - } else { - b.emit_const_i64(-1); - } - } else { - b.emit_const_i64(-1); - } - // await_h → handle (0 on timeout) - b.emit_host_call(crate::jit::r#extern::r#async::SYM_FUTURE_AWAIT_H, 1, true); - let hslot = { let id = self.next_local; self.next_local += 1; id }; - b.store_local_i64(hslot); - // ok_h(handle) - b.load_local_i64(hslot); - b.emit_host_call(crate::jit::r#extern::result::SYM_RESULT_OK_H, 1, true); - let ok_slot = { let id = self.next_local; self.next_local += 1; id }; - b.store_local_i64(ok_slot); - // err_h(0) => Timeout - b.emit_const_i64(0); - b.emit_host_call(crate::jit::r#extern::result::SYM_RESULT_ERR_H, 1, true); - let err_slot = { let id = self.next_local; self.next_local += 1; id }; - b.store_local_i64(err_slot); - // Select by (handle==0) - b.load_local_i64(hslot); - b.emit_const_i64(0); - b.emit_compare(crate::jit::lower::builder::CmpKind::Eq); - b.load_local_i64(err_slot); - b.load_local_i64(ok_slot); - b.emit_select_i64(); - if let Some(d) = dst { - self.handle_values.insert(*d); - let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); - b.store_local_i64(slot); - } else { - // drop - } - return Ok(()); - } - // Async spawn bridge: env.future.spawn_instance(recv, method_name, args...) - if iface_name == "env.future" && method_name == "spawn_instance" { - // Stack layout for hostcall: argc_total, a0(recv), a1(method_name), a2(first payload) - // 1) receiver - if let Some(recv) = args.get(0) { - if let Some(pidx) = self.param_index.get(recv).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } - } else { b.emit_const_i64(-1); } - // 2) method name (best-effort) - if let Some(meth) = args.get(1) { self.push_value_if_known_or_param(b, meth); } else { b.emit_const_i64(0); } - // 3) first payload argument if present - if let Some(arg2) = args.get(2) { self.push_value_if_known_or_param(b, arg2); } else { b.emit_const_i64(0); } - // argc_total = explicit args including method name and payload (exclude receiver) - let argc_total = args.len().saturating_sub(1).max(0); - b.emit_const_i64(argc_total as i64); - // Call spawn shim; it returns Future handle - b.emit_host_call(crate::jit::r#extern::r#async::SYM_FUTURE_SPAWN_INSTANCE3_I64, 4, true); - if let Some(d) = dst { - self.handle_values.insert(*d); - let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); - b.store_local_i64(slot); - } - return Ok(()); - } - // Unknown extern: strictではno-opにしてfailを避ける - if dst.is_some() { b.emit_const_i64(0); } - } + self.lower_extern_call(b, &dst, iface_name.as_str(), method_name.as_str(), args, func)?; } I::Cast { dst, value, target_type } => { // Minimal cast footing: materialize source when param/known @@ -968,6 +578,9 @@ impl LowerCore { } } I::BoxCall { box_val: array, method, args, dst, .. } => { + // Clean path: delegate to ops_ext and return + let _ = self.lower_box_call(func, b, &array, method.as_str(), args, dst.clone())?; + return Ok(()); if super::core_hostcall::lower_boxcall_simple_reads(b, &self.param_index, &self.known_i64, array, method.as_str(), args, dst.clone()) { // handled in helper (read-only simple methods) } else if matches!(method.as_str(), "sin" | "cos" | "abs" | "min" | "max") { @@ -981,7 +594,7 @@ impl LowerCore { args, dst.clone(), ); - } else if std::env::var("NYASH_USE_PLUGIN_BUILTINS").ok().as_deref() == Some("1") { + } else if false /* moved to ops_ext: NYASH_USE_PLUGIN_BUILTINS */ { // StringBox(length/is_empty/charCodeAt): policy+observe経由に統一 if matches!(method.as_str(), "length" | "is_empty" | "charCodeAt") { // receiver diff --git a/src/jit/lower/core/analysis.rs b/src/jit/lower/core/analysis.rs new file mode 100644 index 00000000..7229b8b0 --- /dev/null +++ b/src/jit/lower/core/analysis.rs @@ -0,0 +1,117 @@ +use std::collections::{HashMap, HashSet, BTreeSet}; + +use crate::mir::{BasicBlockId, MirFunction, MirInstruction, ValueId}; + +use super::super::builder::IRBuilder; +use super::super::core_ops; // ensure module link remains +use super::LowerCore; + +impl LowerCore { + pub(crate) fn analyze(&mut self, func: &MirFunction, bb_ids: &Vec) { + // Seed boolean lattice with boolean parameters from MIR signature + if !func.signature.params.is_empty() { + for (idx, vid) in func.params.iter().copied().enumerate() { + if let Some(mt) = func.signature.params.get(idx) { + if matches!(mt, crate::mir::MirType::Bool) { + self.bool_values.insert(vid); + } + } + } + } + // Pre-scan to classify boolean-producing values and propagate via Copy/Phi/Load-Store heuristics. + self.bool_values.clear(); + let mut copy_edges: Vec<(ValueId, ValueId)> = Vec::new(); + let mut phi_defs: Vec<(ValueId, Vec)> = Vec::new(); + let mut stores: Vec<(ValueId, ValueId)> = Vec::new(); // (ptr, value) + let mut loads: Vec<(ValueId, ValueId)> = Vec::new(); // (dst, ptr) + for bb in bb_ids.iter() { + if let Some(block) = func.blocks.get(bb) { + for ins in block.instructions.iter() { + match ins { + MirInstruction::Compare { dst, .. } => { self.bool_values.insert(*dst); } + MirInstruction::Const { dst, value } => { + if let crate::mir::ConstValue::Bool(_) = value { self.bool_values.insert(*dst); } + } + MirInstruction::Copy { dst, src } => { copy_edges.push((*dst, *src)); } + MirInstruction::Phi { dst, inputs } => { + self.phi_values.insert(*dst); + let ins: Vec = inputs.iter().map(|(_, v)| *v).collect(); + phi_defs.push((*dst, ins)); + } + MirInstruction::Store { ptr, value } => { stores.push((*ptr, *value)); } + MirInstruction::Load { dst, ptr } => { loads.push((*dst, *ptr)); } + _ => {} + } + } + } + } + // Fixed-point propagation + let mut store_bool_ptrs: HashSet = HashSet::new(); + let mut changed = true; + while changed { + changed = false; + // Copy propagation + for (dst, src) in copy_edges.iter().copied() { + if self.bool_values.contains(&src) && !self.bool_values.contains(&dst) { + self.bool_values.insert(dst); + changed = true; + } + if store_bool_ptrs.contains(&src) && !store_bool_ptrs.contains(&dst) { + store_bool_ptrs.insert(dst); + changed = true; + } + } + // Store marking + for (ptr, val) in stores.iter().copied() { + if self.bool_values.contains(&val) && !store_bool_ptrs.contains(&ptr) { + store_bool_ptrs.insert(ptr); + changed = true; + } + } + // Load propagation + for (dst, ptr) in loads.iter().copied() { + if store_bool_ptrs.contains(&ptr) && !self.bool_values.contains(&dst) { + self.bool_values.insert(dst); + changed = true; + } + } + // PHI closure for value booleans + for (dst, inputs) in phi_defs.iter() { + if inputs.iter().all(|v| self.bool_values.contains(v)) && !self.bool_values.contains(dst) { + self.bool_values.insert(*dst); + self.bool_phi_values.insert(*dst); + changed = true; + } + } + // PHI closure for pointer aliases + for (dst, inputs) in phi_defs.iter() { + if inputs.iter().all(|v| store_bool_ptrs.contains(v)) && !store_bool_ptrs.contains(dst) { + store_bool_ptrs.insert(*dst); + changed = true; + } + } + } + // PHI statistics + let mut total_phi_slots: usize = 0; + let mut total_phi_b1_slots: usize = 0; + for (dst, inputs) in phi_defs.iter() { + total_phi_slots += 1; + let used_as_branch = func.blocks.values().any(|bbx| { + if let Some(MirInstruction::Branch { condition, .. }) = &bbx.terminator { condition == dst } else { false } + }); + let is_b1 = self.bool_phi_values.contains(dst) + || inputs.iter().all(|v| { + self.bool_values.contains(v) || self.known_i64.get(v).map(|&iv| iv == 0 || iv == 1).unwrap_or(false) + }) + || used_as_branch; + if is_b1 { total_phi_b1_slots += 1; } + } + if total_phi_slots > 0 { + crate::jit::rt::phi_total_inc(total_phi_slots as u64); + crate::jit::rt::phi_b1_inc(total_phi_b1_slots as u64); + self.last_phi_total = total_phi_slots as u64; + self.last_phi_b1 = total_phi_b1_slots as u64; + } + } +} + diff --git a/src/jit/lower/core/cfg.rs b/src/jit/lower/core/cfg.rs new file mode 100644 index 00000000..69e5e98b --- /dev/null +++ b/src/jit/lower/core/cfg.rs @@ -0,0 +1,78 @@ +use std::collections::HashMap; + +use crate::mir::{BasicBlockId, MirFunction, MirInstruction}; +use super::super::builder::IRBuilder; +use super::LowerCore; + +impl LowerCore { + pub(crate) fn build_phi_succords( + &mut self, + func: &MirFunction, + bb_ids: &Vec, + builder: &mut dyn IRBuilder, + enable_phi_min: bool, + ) -> HashMap> { + let mut succ_phi_order: HashMap> = HashMap::new(); + if !enable_phi_min { return succ_phi_order; } + for (bb_id, bb) in func.blocks.iter() { + let mut order: Vec = Vec::new(); + for ins in bb.instructions.iter() { + if let MirInstruction::Phi { dst, .. } = ins { order.push(*dst); } + } + if !order.is_empty() { succ_phi_order.insert(*bb_id, order); } + } + // Pre-declare block parameter counts per successor to avoid late appends + for (succ, order) in succ_phi_order.iter() { + if let Some(idx) = bb_ids.iter().position(|x| x == succ) { + builder.ensure_block_params_i64(idx, order.len()); + } + } + succ_phi_order + } + + pub(crate) fn dump_phi_cfg( + &self, + succ_phi_order: &HashMap>, + func: &MirFunction, + blocks_len: usize, + enable_phi_min: bool, + ) { + if std::env::var("NYASH_JIT_DUMP").ok().as_deref() != Some("1") { return; } + let succs = succ_phi_order.len(); + eprintln!("[JIT] cfg: blocks={} phi_succ={} (phi_min={})", blocks_len, succs, enable_phi_min); + if enable_phi_min { + let mut total_phi_slots: usize = 0; + let mut total_phi_b1_slots: usize = 0; + for (succ, order) in succ_phi_order.iter() { + let mut preds_set: std::collections::BTreeSet = std::collections::BTreeSet::new(); + let mut phi_lines: Vec = Vec::new(); + if let Some(bb_succ) = func.blocks.get(succ) { + for ins in bb_succ.instructions.iter() { + if let MirInstruction::Phi { dst, inputs } = ins { + for (pred, _) in inputs.iter() { preds_set.insert(pred.0 as i64); } + let mut pairs: Vec = Vec::new(); + for (pred, val) in inputs.iter() { pairs.push(format!("{}:{}", pred.0, val.0)); } + let used_as_branch = func.blocks.values().any(|bbx| { + if let Some(MirInstruction::Branch { condition, .. }) = &bbx.terminator { condition == dst } else { false } + }); + let is_b1 = self.bool_phi_values.contains(dst) + || inputs.iter().all(|(_, v)| { + self.bool_values.contains(v) || self.known_i64.get(v).map(|&iv| iv == 0 || iv == 1).unwrap_or(false) + }) + || used_as_branch; + if is_b1 { total_phi_b1_slots += 1; } + total_phi_slots += 1; + phi_lines.push(format!(" phi: bb={} dst={} inputs=[{}] (b1={})", + succ.0, dst.0, pairs.join(","), is_b1)); + } + } + } + let preds_list: Vec = preds_set.into_iter().map(|p| p.to_string()).collect(); + eprintln!("[JIT] phi: bb={} slots={} preds={}", succ.0, order.len(), preds_list.join("|")); + for ln in phi_lines { eprintln!("[JIT]{}", ln); } + } + eprintln!("[JIT] phi_summary: total_slots={} b1_slots={}", total_phi_slots, total_phi_b1_slots); + } + } +} + diff --git a/src/jit/lower/core/ops_ext.rs b/src/jit/lower/core/ops_ext.rs new file mode 100644 index 00000000..90ca645d --- /dev/null +++ b/src/jit/lower/core/ops_ext.rs @@ -0,0 +1,331 @@ +use super::super::builder::IRBuilder; +use super::super::core::LowerCore; +use crate::mir::{MirFunction, ValueId}; + +impl LowerCore { + pub fn lower_plugin_invoke( + &mut self, + b: &mut dyn IRBuilder, + dst: &Option, + box_val: &ValueId, + method: &str, + args: &Vec, + _func: &MirFunction, + ) -> Result<(), String> { + // Copied logic from core.rs PluginInvoke arm (scoped to PyRuntimeBox path) + let bt = self.box_type_map.get(box_val).cloned().unwrap_or_default(); + let m = method; + if (bt == "PyRuntimeBox" && (m == "import")) { + let argc = 1 + args.len(); + if let Some(pidx) = self.param_index.get(box_val).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } + let decision = crate::jit::policy::invoke::decide_box_method(&bt, m, argc, dst.is_some()); + if let crate::jit::policy::invoke::InvokeDecision::PluginInvoke { type_id, method_id, .. } = decision { + b.emit_plugin_invoke(type_id, method_id, argc, dst.is_some()); + if let Some(d) = dst { self.handle_values.insert(*d); } + } else { if dst.is_some() { b.emit_const_i64(0); } } + } else if (bt == "PyRuntimeBox" && (m == "getattr" || m == "call")) { + let argc = 1 + args.len(); + if let Some(pidx) = self.param_index.get(box_val).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } + for a in args.iter() { self.push_value_if_known_or_param(b, a); } + b.emit_plugin_invoke_by_name(m, argc, dst.is_some()); + if let Some(d) = dst { + self.handle_values.insert(*d); + let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); + b.store_local_i64(slot); + } + } else if self.handle_values.contains(box_val) && (m == "getattr" || m == "call") { + let argc = 1 + args.len(); + b.emit_const_i64(-1); + for a in args.iter() { self.push_value_if_known_or_param(b, a); } + b.emit_plugin_invoke_by_name(m, argc, dst.is_some()); + if let Some(d) = dst { + self.handle_values.insert(*d); + let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); + b.store_local_i64(slot); + } + } else if (bt == "PyRuntimeBox" && (m == "birth" || m == "eval")) + || (bt == "IntegerBox" && m == "birth") + || (bt == "StringBox" && m == "birth") + || (bt == "ConsoleBox" && m == "birth") { + if dst.is_some() { b.emit_const_i64(0); } + } else { + self.unsupported += 1; + } + Ok(()) + } + + pub fn lower_extern_call( + &mut self, + b: &mut dyn IRBuilder, + dst: &Option, + iface_name: &str, + method_name: &str, + args: &Vec, + _func: &MirFunction, + ) -> Result<(), String> { + // env.console.log/println → ConsoleBox に委譲 + if iface_name == "env.console" && (method_name == "log" || method_name == "println") { + // Ensure we have a Console handle (hostcall birth shim) + b.emit_host_call("nyash.console.birth_h", 0, true); + // a1: first argument best-effort + if let Some(arg0) = args.get(0) { self.push_value_if_known_or_param(b, arg0); } + // Resolve plugin invoke for ConsoleBox.method + let decision = crate::jit::policy::invoke::decide_box_method("ConsoleBox", method_name, 2, dst.is_some()); + if let crate::jit::policy::invoke::InvokeDecision::PluginInvoke { type_id, method_id, .. } = decision { + b.emit_plugin_invoke(type_id, method_id, 2, dst.is_some()); + } else if dst.is_some() { b.emit_const_i64(0); } + return Ok(()); + } + // env.future.await(fut) → await_h + ok_h/err_h select + if iface_name == "env.future" && method_name == "await" { + if let Some(arg0) = args.get(0) { + if let Some(pidx) = self.param_index.get(arg0).copied() { b.emit_param_i64(pidx); } + else if let Some(slot) = self.local_index.get(arg0).copied() { b.load_local_i64(slot); } + else if let Some(v) = self.known_i64.get(arg0).copied() { b.emit_const_i64(v); } + else { b.emit_const_i64(-1); } + } else { b.emit_const_i64(-1); } + // await_h → handle(0 timeout) + b.emit_host_call(crate::jit::r#extern::r#async::SYM_FUTURE_AWAIT_H, 1, true); + let hslot = { let id = self.next_local; self.next_local += 1; id }; + b.store_local_i64(hslot); + // ok_h(handle) + b.load_local_i64(hslot); + b.emit_host_call(crate::jit::r#extern::result::SYM_RESULT_OK_H, 1, true); + let ok_slot = { let id = self.next_local; self.next_local += 1; id }; + b.store_local_i64(ok_slot); + // err_h(0) + b.emit_const_i64(0); + b.emit_host_call(crate::jit::r#extern::result::SYM_RESULT_ERR_H, 1, true); + let err_slot = { let id = self.next_local; self.next_local += 1; id }; + b.store_local_i64(err_slot); + // select(handle==0 ? err : ok) + b.load_local_i64(hslot); + b.emit_const_i64(0); + b.emit_compare(crate::jit::lower::builder::CmpKind::Eq); + b.load_local_i64(err_slot); + b.load_local_i64(ok_slot); + b.emit_select_i64(); + if let Some(d) = dst { + self.handle_values.insert(*d); + let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); + b.store_local_i64(slot); + } + return Ok(()); + } + // env.future.spawn_instance(recv, method_name, args...) + if iface_name == "env.future" && method_name == "spawn_instance" { + // a0 receiver + if let Some(recv) = args.get(0) { + if let Some(pidx) = self.param_index.get(recv).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } + } else { b.emit_const_i64(-1); } + // a1 method name (best-effort) + if let Some(meth) = args.get(1) { self.push_value_if_known_or_param(b, meth); } else { b.emit_const_i64(0); } + // a2 first payload (optional) + if let Some(a2) = args.get(2) { self.push_value_if_known_or_param(b, a2); } else { b.emit_const_i64(0); } + // argc_total = explicit args including method name and payload (exclude receiver) + let argc_total = args.len().saturating_sub(1).max(0); + b.emit_const_i64(argc_total as i64); + // call spawn shim → Future handle + b.emit_host_call(crate::jit::r#extern::r#async::SYM_FUTURE_SPAWN_INSTANCE3_I64, 4, true); + if let Some(d) = dst { + self.handle_values.insert(*d); + let slot = *self.local_index.entry(*d).or_insert_with(|| { let id = self.next_local; self.next_local += 1; id }); + b.store_local_i64(slot); + } + return Ok(()); + } + // Unhandled extern path + self.unsupported += 1; + Ok(()) + } + + pub fn lower_box_call( + &mut self, + func: &MirFunction, + b: &mut dyn IRBuilder, + array: &ValueId, + method: &str, + args: &Vec, + dst: Option, + ) -> Result { + // Delegate to existing helpers first + if super::super::core_hostcall::lower_boxcall_simple_reads(b, &self.param_index, &self.known_i64, array, method, args, dst.clone()) { + return Ok(true); + } + if matches!(method, "sin" | "cos" | "abs" | "min" | "max") { + super::super::core_hostcall::lower_math_call( + func, + b, + &self.known_i64, + &self.known_f64, + &self.float_box_values, + method, + args, + dst.clone(), + ); + return Ok(true); + } + // Builtins-to-plugin path (subset for String/Array/Map critical ops) + if std::env::var("NYASH_USE_PLUGIN_BUILTINS").ok().as_deref() == Some("1") { + // StringBox (length/is_empty/charCodeAt) + if matches!(method, "length" | "is_empty" | "charCodeAt") { + if let Some(pidx) = self.param_index.get(array).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } + let mut argc = 1usize; + if method == "charCodeAt" { + if let Some(v) = args.get(0) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + argc = 2; + } + if method == "is_empty" { b.hint_ret_bool(true); } + let decision = crate::jit::policy::invoke::decide_box_method("StringBox", method, argc, dst.is_some()); + match decision { + crate::jit::policy::invoke::InvokeDecision::PluginInvoke { type_id, method_id, box_type, .. } => { + b.emit_plugin_invoke(type_id, method_id, argc, dst.is_some()); + crate::jit::observe::lower_plugin_invoke(&box_type, method, type_id, method_id, argc); + return Ok(true); + } + crate::jit::policy::invoke::InvokeDecision::HostCall { symbol, .. } => { + crate::jit::observe::lower_hostcall(&symbol, argc, &if argc==1 { ["Handle"][..].to_vec() } else { ["Handle","I64"][..].to_vec() }, "allow", "mapped_symbol"); + b.emit_host_call(&symbol, argc, dst.is_some()); + return Ok(true); + } + _ => {} + } + } + } + // Array/Map minimal handling + match method { + // Array length variants (length/len) + "len" | "length" => { + if let Ok(ph) = crate::runtime::plugin_loader_unified::get_global_plugin_host().read() { + if let Ok(h) = ph.resolve_method("ArrayBox", "length") { + if let Some(pidx) = self.param_index.get(array).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } + b.emit_plugin_invoke(h.type_id, h.method_id, 1, dst.is_some()); + return Ok(true); + } + } + // Hostcall fallback + if let Some(pidx) = self.param_index.get(array).copied() { + crate::jit::observe::lower_hostcall(crate::jit::r#extern::collections::SYM_ANY_LEN_H, 1, &["Handle"], "allow", "mapped_symbol"); + b.emit_param_i64(pidx); + b.emit_host_call(crate::jit::r#extern::collections::SYM_ANY_LEN_H, 1, dst.is_some()); + } else { + crate::jit::observe::lower_hostcall(crate::jit::r#extern::collections::SYM_ARRAY_LEN, 1, &["I64"], "fallback", "receiver_not_param"); + b.emit_const_i64(-1); + b.emit_host_call(crate::jit::r#extern::collections::SYM_ARRAY_LEN, 1, dst.is_some()); + } + return Ok(true); + } + // Array push + "push" => { + let argc = 2usize; + // receiver + if let Some(pidx) = self.param_index.get(array).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } + // value + if let Some(v) = args.get(0).and_then(|vid| self.known_i64.get(vid)).copied() { b.emit_const_i64(v); } + else if let Some(v) = args.get(0) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + // policy decide → plugin / hostcall fallback + let decision = crate::jit::policy::invoke::decide_box_method("ArrayBox", "push", argc, false); + match decision { + crate::jit::policy::invoke::InvokeDecision::PluginInvoke { type_id, method_id, box_type, .. } => { + b.emit_plugin_invoke(type_id, method_id, argc, false); + crate::jit::observe::lower_plugin_invoke(&box_type, "push", type_id, method_id, argc); + } + crate::jit::policy::invoke::InvokeDecision::HostCall { symbol, .. } => { + crate::jit::observe::lower_hostcall(&symbol, argc, &["Handle","I64"], "allow", "mapped_symbol"); + b.emit_host_call(&symbol, argc, false); + } + _ => { + // Fallback hostcall + let sym = if self.param_index.get(array).is_some() { crate::jit::r#extern::collections::SYM_ARRAY_PUSH_H } else { crate::jit::r#extern::collections::SYM_ARRAY_PUSH }; + let arg_types = if self.param_index.get(array).is_some() { &["Handle","I64"][..] } else { &["I64","I64"][..] }; + crate::jit::observe::lower_hostcall(sym, argc, arg_types, "fallback", "policy_or_unknown"); + b.emit_host_call(sym, argc, false); + } + } + return Ok(true); + } + // Map ops + "size" | "get" | "has" | "set" => { + let is_set = method == "set"; + if is_set && crate::jit::policy::current().read_only { // deny under read-only policy + if let Some(_) = dst { b.emit_const_i64(0); } + return Ok(true); + } + let argc = match method { "size" => 1, "get" | "has" => 2, "set" => 3, _ => 1 }; + if let Ok(ph) = crate::runtime::plugin_loader_unified::get_global_plugin_host().read() { + if let Ok(h) = ph.resolve_method("MapBox", method) { + // receiver + if let Some(pidx) = self.param_index.get(array).copied() { b.emit_param_i64(pidx); } else { b.emit_const_i64(-1); } + // args + match method { + "size" => {} + "get" | "has" => { + if let Some(v) = args.get(0) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + } + "set" => { + if let Some(k) = args.get(0) { self.push_value_if_known_or_param(b, k); } else { b.emit_const_i64(0); } + if let Some(v) = args.get(1) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + } + _ => {} + } + b.emit_plugin_invoke(h.type_id, h.method_id, argc, dst.is_some()); + crate::jit::events::emit_lower( + serde_json::json!({ + "id": format!("plugin:{}:{}", h.box_type, method), + "decision":"allow","reason":"plugin_invoke","argc": argc, + "type_id": h.type_id, "method_id": h.method_id + }), + "plugin","" + ); + return Ok(true); + } + } + // Hostcall fallback symbols + if let Some(pidx) = self.param_index.get(array).copied() { + b.emit_param_i64(pidx); + match method { + "size" => b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_SIZE_H, argc, dst.is_some()), + "get" => { + if let Some(v) = args.get(0) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_GET_H, argc, dst.is_some()) + } + "has" => { + if let Some(v) = args.get(0) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_HAS_H, argc, dst.is_some()) + } + "set" => { + if let Some(k) = args.get(0) { self.push_value_if_known_or_param(b, k); } else { b.emit_const_i64(0); } + if let Some(v) = args.get(1) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_SET_H, argc, dst.is_some()) + } + _ => {} + } + } else { + // receiver unknown + b.emit_const_i64(-1); + match method { + "size" => b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_SIZE, argc, dst.is_some()), + "get" => { + if let Some(v) = args.get(0) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_GET_H, argc, dst.is_some()) + } + "has" => { + if let Some(v) = args.get(0) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_HAS_H, argc, dst.is_some()) + } + "set" => { + if let Some(k) = args.get(0) { self.push_value_if_known_or_param(b, k); } else { b.emit_const_i64(0); } + if let Some(v) = args.get(1) { self.push_value_if_known_or_param(b, v); } else { b.emit_const_i64(0); } + b.emit_host_call(crate::jit::r#extern::collections::SYM_MAP_SET, argc, dst.is_some()) + } + _ => {} + } + } + return Ok(true); + } + _ => {} + } + // Not handled here + Ok(false) + } +} diff --git a/src/jit/lower/core_ops.rs b/src/jit/lower/core_ops.rs index 0ab2620a..b03b1bd4 100644 --- a/src/jit/lower/core_ops.rs +++ b/src/jit/lower/core_ops.rs @@ -24,6 +24,28 @@ impl LowerCore { } pub fn lower_binop(&mut self, b: &mut dyn IRBuilder, op: &BinaryOp, lhs: &ValueId, rhs: &ValueId, dst: &ValueId, func: &MirFunction) { + // Optional: consult unified grammar for operator strategy (non-invasive logging) + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + match op { + BinaryOp::Add => { + let strat = crate::grammar::engine::get().add_coercion_strategy(); + crate::jit::events::emit("grammar","add", None, None, serde_json::json!({"coercion": strat})); + } + BinaryOp::Sub => { + let strat = crate::grammar::engine::get().sub_coercion_strategy(); + crate::jit::events::emit("grammar","sub", None, None, serde_json::json!({"coercion": strat})); + } + BinaryOp::Mul => { + let strat = crate::grammar::engine::get().mul_coercion_strategy(); + crate::jit::events::emit("grammar","mul", None, None, serde_json::json!({"coercion": strat})); + } + BinaryOp::Div => { + let strat = crate::grammar::engine::get().div_coercion_strategy(); + crate::jit::events::emit("grammar","div", None, None, serde_json::json!({"coercion": strat})); + } + _ => {} + } + } // Route string-like addition to hostcall (handle,handle) if crate::jit::config::current().hostcall { if matches!(op, BinaryOp::Add) { @@ -111,3 +133,55 @@ impl LowerCore { pub fn lower_jump(&mut self, b: &mut dyn IRBuilder) { b.emit_jump(); } pub fn lower_branch(&mut self, b: &mut dyn IRBuilder) { b.emit_branch(); } } + +// Methods moved from core.rs to reduce file size and centralize op helpers +impl LowerCore { + // Push a value if known or param/local/phi + pub(super) fn push_value_if_known_or_param(&self, b: &mut dyn IRBuilder, id: &ValueId) { + if let Some(slot) = self.local_index.get(id).copied() { b.load_local_i64(slot); return; } + if self.phi_values.contains(id) { + let pos = self.phi_param_index.iter().find_map(|((_, vid), idx)| if vid == id { Some(*idx) } else { None }).unwrap_or(0); + if crate::jit::config::current().native_bool && self.bool_phi_values.contains(id) { + b.push_block_param_b1_at(pos); + } else { + b.push_block_param_i64_at(pos); + } + return; + } + if let Some(pidx) = self.param_index.get(id).copied() { b.emit_param_i64(pidx); return; } + if let Some(v) = self.known_i64.get(id).copied() { b.emit_const_i64(v); return; } + } + + // Coverage helper: increments covered/unsupported counts + pub(super) fn cover_if_supported(&mut self, instr: &crate::mir::MirInstruction) { + use crate::mir::MirInstruction as I; + let supported = matches!( + instr, + I::Const { .. } + | I::Copy { .. } + | I::Cast { .. } + | I::TypeCheck { .. } + | I::TypeOp { .. } + | I::BinOp { .. } + | I::Compare { .. } + | I::Jump { .. } + | I::Branch { .. } + | I::Return { .. } + | I::Call { .. } + | I::BoxCall { .. } + | I::ArrayGet { .. } + | I::ArraySet { .. } + | I::NewBox { .. } + | I::Store { .. } + | I::Load { .. } + | I::Phi { .. } + | I::Debug { .. } + | I::ExternCall { .. } + | I::Safepoint + | I::Nop + | I::PluginInvoke { .. } + ); + if supported { self.covered += 1; } else { self.unsupported += 1; } + } + +} diff --git a/src/lib.rs b/src/lib.rs index 350660f5..46ac0eb3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,6 +60,8 @@ pub mod cli; pub mod runtime; pub mod runner_plugin_init; pub mod debug; +// Unified Grammar (Phase 11.9 scaffolding) +pub mod grammar; #[cfg(target_arch = "wasm32")] pub mod wasm_test; diff --git a/src/main.rs b/src/main.rs index a24ab5b3..8ca516fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -57,6 +57,7 @@ pub mod config; // Runtime system (plugins, registry, etc.) pub mod runtime; pub mod debug; +pub mod grammar; // Phase 11.9 unified grammar scaffolding use nyash_rust::cli::CliConfig; use runner::NyashRunner; diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index 1e675f71..2cf430fa 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -27,6 +27,11 @@ impl NyashParser { let operator = BinaryOperator::Or; self.advance(); let right = self.parse_and()?; + // Non-invasive syntax diff: record binop + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let ok = crate::grammar::engine::get().syntax_is_allowed_binop("or"); + if !ok { eprintln!("[GRAMMAR-DIFF][Parser] binop 'or' not allowed by syntax rules"); } + } expr = ASTNode::BinaryOp { operator, left: Box::new(expr), @@ -46,6 +51,10 @@ impl NyashParser { let operator = BinaryOperator::And; self.advance(); let right = self.parse_equality()?; + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let ok = crate::grammar::engine::get().syntax_is_allowed_binop("and"); + if !ok { eprintln!("[GRAMMAR-DIFF][Parser] binop 'and' not allowed by syntax rules"); } + } expr = ASTNode::BinaryOp { operator, left: Box::new(expr), @@ -69,6 +78,11 @@ impl NyashParser { }; self.advance(); let right = self.parse_comparison()?; + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let name = match operator { BinaryOperator::Equal=>"eq", BinaryOperator::NotEqual=>"ne", _=>"cmp" }; + let ok = crate::grammar::engine::get().syntax_is_allowed_binop(name); + if !ok { eprintln!("[GRAMMAR-DIFF][Parser] binop '{}' not allowed by syntax rules", name); } + } expr = ASTNode::BinaryOp { operator, left: Box::new(expr), @@ -130,6 +144,11 @@ impl NyashParser { }; self.advance(); let right = self.parse_factor()?; + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let name = match operator { BinaryOperator::Add=>"add", BinaryOperator::Subtract=>"sub", _=>"term" }; + let ok = crate::grammar::engine::get().syntax_is_allowed_binop(name); + if !ok { eprintln!("[GRAMMAR-DIFF][Parser] binop '{}' not allowed by syntax rules", name); } + } expr = ASTNode::BinaryOp { operator, left: Box::new(expr), @@ -155,6 +174,11 @@ impl NyashParser { }; self.advance(); let right = self.parse_unary()?; + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let name = match operator { BinaryOperator::Multiply=>"mul", BinaryOperator::Divide=>"div", _=>"mod" }; + let ok = crate::grammar::engine::get().syntax_is_allowed_binop(name); + if !ok { eprintln!("[GRAMMAR-DIFF][Parser] binop '{}' not allowed by syntax rules", name); } + } expr = ASTNode::BinaryOp { operator, left: Box::new(expr), diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 7333c422..6706895c 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -13,8 +13,9 @@ use super::common::ParserUtils; impl NyashParser { /// 文をパース pub(super) fn parse_statement(&mut self) -> Result { - - let result = match &self.current_token().token_type { + // For grammar diff: capture starting token to classify statement keyword + let start_tok = self.current_token().token_type.clone(); + let result = match &start_tok { TokenType::BOX => { self.parse_box_declaration() }, @@ -84,6 +85,33 @@ impl NyashParser { } }; + // Non-invasive syntax rule check + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + let kw = match start_tok { + TokenType::BOX => Some("box"), + TokenType::GLOBAL => Some("global"), + TokenType::FUNCTION => Some("function"), + TokenType::STATIC => Some("static"), + TokenType::IF => Some("if"), + TokenType::LOOP => Some("loop"), + TokenType::BREAK => Some("break"), + TokenType::RETURN => Some("return"), + TokenType::PRINT => Some("print"), + TokenType::NOWAIT => Some("nowait"), + TokenType::INCLUDE => Some("include"), + TokenType::LOCAL => Some("local"), + TokenType::OUTBOX => Some("outbox"), + TokenType::TRY => Some("try"), + TokenType::THROW => Some("throw"), + TokenType::USING => Some("using"), + TokenType::FROM => Some("from"), + _ => None, + }; + if let Some(k) = kw { + let ok = crate::grammar::engine::get().syntax_is_allowed_statement(k); + if !ok { eprintln!("[GRAMMAR-DIFF][Parser] statement '{}' not allowed by syntax rules", k); } + } + } result } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a4b2d93c..86fad9db 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -6,6 +6,7 @@ */ use thiserror::Error; +use crate::grammar::engine; /// トークンの種類を表すenum #[derive(Debug, Clone, PartialEq)] @@ -385,7 +386,7 @@ impl NyashTokenizer { } // キーワードチェック - match identifier.as_str() { + let tok = match identifier.as_str() { "box" => TokenType::BOX, "global" => TokenType::GLOBAL, "singleton" => TokenType::SINGLETON, @@ -425,8 +426,27 @@ impl NyashTokenizer { "true" => TokenType::TRUE, "false" => TokenType::FALSE, "null" => TokenType::NULL, - _ => TokenType::IDENTIFIER(identifier), + _ => TokenType::IDENTIFIER(identifier.clone()), + }; + + // 統一文法エンジンとの差分チェック(動作は変更しない) + if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") { + // 安全に参照(初期導入のため、存在しない場合は無視) + let kw = engine::get().is_keyword_str(&identifier); + match (&tok, kw) { + (TokenType::IDENTIFIER(_), Some(name)) => { + eprintln!("[GRAMMAR-DIFF] tokenizer=IDENT, grammar=KEYWORD({}) word='{}'", name, identifier); + } + (TokenType::IDENTIFIER(_), None) => {} + // tokenizerがキーワード、grammarが未定義 + (t, None) if !matches!(t, TokenType::IDENTIFIER(_)) => { + eprintln!("[GRAMMAR-DIFF] tokenizer=KEYWORD, grammar=IDENT word='{}'", identifier); + } + _ => {} + } } + + tok } /// 行コメントをスキップ diff --git a/tests/grammar_add_rules.rs b/tests/grammar_add_rules.rs new file mode 100644 index 00000000..bf9df103 --- /dev/null +++ b/tests/grammar_add_rules.rs @@ -0,0 +1,66 @@ +use nyash_rust::grammar::engine; +use nyash_rust::box_trait::{StringBox, IntegerBox, BoolBox, VoidBox, NyashBox}; + +fn classify_value(b: &dyn NyashBox) -> &'static str { + if nyash_rust::runtime::semantics::coerce_to_string(b).is_some() { + "String" + } else if nyash_rust::runtime::semantics::coerce_to_i64(b).is_some() { + // coerce_to_i64 succeeds for integers and some numeric-like boxes + // For this snapshot, we only feed IntegerBox so "Integer" is fine + "Integer" + } else if b.as_any().downcast_ref::().is_some() { + "Bool" + } else { + "Other" + } +} + +fn actual_add_result(left: &dyn NyashBox, right: &dyn NyashBox) -> &'static str { + // Mirror current interpreter semantics succinctly: + // 1) If either is string-like => String + if nyash_rust::runtime::semantics::coerce_to_string(left).is_some() + || nyash_rust::runtime::semantics::coerce_to_string(right).is_some() { + return "String"; + } + // 2) If both are i64-coercible => Integer + if nyash_rust::runtime::semantics::coerce_to_i64(left).is_some() + && nyash_rust::runtime::semantics::coerce_to_i64(right).is_some() { + return "Integer"; + } + // 3) Otherwise error(ここでは Error として表現) + "Error" +} + +#[test] +fn snapshot_add_rules_align_with_current_semantics() { + let eng = engine::get(); + // Prepare sample operands for each class + let s = StringBox::new("a".to_string()); + let i = IntegerBox::new(1); + let b = BoolBox::new(true); + let v = VoidBox::new(); + let vals: Vec<(&str, Box)> = vec![ + ("String", Box::new(s)), + ("Integer", Box::new(i)), + ("Bool", Box::new(b)), + ("Other", Box::new(v)), + ]; + + for (li, l) in &vals { + for (ri, r) in &vals { + let lty = classify_value(l.as_ref()); + let rty = classify_value(r.as_ref()); + let actual = actual_add_result(l.as_ref(), r.as_ref()); + let expect = eng.decide_add_result(lty, rty).map(|(res, _)| res); + if let Some(res) = expect { + if actual == "Error" { + panic!("grammar provides rule for {}+{} but actual semantics error", li, ri); + } else { + assert_eq!(res, actual, "grammar expect {} + {} => {}, but actual => {}", li, ri, res, actual); + } + } else { + assert_eq!(actual, "Error", "grammar has no rule for {}+{}, but actual => {}", li, ri, actual); + } + } + } +} diff --git a/tests/grammar_other_ops.rs b/tests/grammar_other_ops.rs new file mode 100644 index 00000000..2828a8cf --- /dev/null +++ b/tests/grammar_other_ops.rs @@ -0,0 +1,19 @@ +use nyash_rust::grammar::engine; + +#[test] +fn grammar_sub_mul_div_rules_exist_and_basic_cases() { + let eng = engine::get(); + + // Sub + assert!(!eng.sub_rules().is_empty(), "sub rules should not be empty"); + assert!(eng.decide_sub_result("Integer","Integer").is_some(), "sub i64+i64 should be defined"); + + // Mul + assert!(!eng.mul_rules().is_empty(), "mul rules should not be empty"); + assert!(eng.decide_mul_result("Integer","Integer").is_some(), "mul i64*i64 should be defined"); + + // Div + assert!(!eng.div_rules().is_empty(), "div rules should not be empty"); + assert!(eng.decide_div_result("Integer","Integer").is_some(), "div i64/i64 should be defined"); +} +