From 4439d64da33297c139fcf8de339d0c030beab5fc Mon Sep 17 00:00:00 2001 From: tomoaki Date: Sat, 20 Dec 2025 13:04:24 +0900 Subject: [PATCH] refactor(joinir): make jump_args layout explicit (Phase 256) --- docs/development/current/main/10-Now.md | 3 +- docs/development/current/main/20-Decisions.md | 6 + .../development/current/main/design/README.md | 1 + .../design/join-explicit-cfg-construction.md | 68 +++++++ .../current/main/design/joinir-design-map.md | 9 + .../phase-256-joinir-contract-questions.md | 90 +++++++++ .../current/main/phases/phase-256/README.md | 47 ++++- src/mir/builder/calls/lowering.rs | 3 + .../joinir/merge/exit_args_collector.rs | 149 +++++++++++---- .../joinir/merge/instruction_rewriter.rs | 174 ++++++++++-------- .../builder/control_flow/joinir/merge/mod.rs | 50 +++-- .../joinir/patterns/exit_binding.rs | 1 + .../patterns/exit_binding_applicator.rs | 1 + .../joinir/patterns/pattern7_split_scan.rs | 2 +- src/mir/join_ir/lowering/inline_boundary.rs | 86 +++++++++ .../lowering/inline_boundary_builder.rs | 10 +- .../join_ir/lowering/split_scan_minimal.rs | 110 +++++------ .../joinir_block_converter.rs | 19 +- src/mir/passes/dce.rs | 76 +++++++- .../apps/phase254_p0_index_of_vm.sh | 3 + .../integration/apps/phase256_p0_split_vm.sh | 2 + 21 files changed, 715 insertions(+), 195 deletions(-) create mode 100644 docs/development/current/main/design/join-explicit-cfg-construction.md diff --git a/docs/development/current/main/10-Now.md b/docs/development/current/main/10-Now.md index 2a0ff3f7..544511d5 100644 --- a/docs/development/current/main/10-Now.md +++ b/docs/development/current/main/10-Now.md @@ -48,7 +48,7 @@ ## 2025-12-19:Phase 256(StringUtils.split/2 可変 step ループ)🔜 - Phase 256 README: `docs/development/current/main/phases/phase-256/README.md` -- Current first FAIL: `StringUtils.split/2`(MIR verification: “Value defined multiple times” + SSA undef) +- Current first FAIL: `json_lint_vm`(Pattern2 break cond: `this.is_whitespace(...)` needs `current_static_box_name`) - 状況: - `MirInstruction::Select` の導入は完了、Pattern6(index_of)は PASS 維持。 - `ValueId(57)` undefined は根治(原因は `const_1` 未初期化)。 @@ -56,6 +56,7 @@ - P1.8で ExitLine/jump_args の余剰許容と関数名マッピングを整流。 - P1.9で `JoinInst::Jump` を tail call として bridge に落とし、`jump_args` を SSOT として保持。 - P1.10で DCE が `jump_args` を used 扱いし、`instruction_spans` を同期(SPAN MISMATCH 根治)。 + - P1.11で ExitArgsCollector の expr_result slot 判定を明確化し、split が `--verify` / integration smoke まで PASS。 - P1.5-DBG: boundary entry params の契約チェックを追加(VM実行前 fail-fast)。 - P1.6: 契約チェックの薄い集約 `run_all_pipeline_checks()` を導入(pipeline の責務を縮退)。 diff --git a/docs/development/current/main/20-Decisions.md b/docs/development/current/main/20-Decisions.md index ff754a24..78eaa85d 100644 --- a/docs/development/current/main/20-Decisions.md +++ b/docs/development/current/main/20-Decisions.md @@ -10,6 +10,12 @@ - Call/MethodCall は effects + typing の論点が増えるため、pure とは分離して Phase 141+ で段階投入する。 - out-of-scope は `Ok(None)` で既存経路へフォールバックし、既定挙動不変を維持する(strict は “close-but-unsupported” のみ fail-fast)。 +2025‑12‑20 +- Phase 256 の詰まり(Jump/continuation/params/jump_args)を「暗黙 ABI の分裂」と捉え、契約を `JoinIR ABI/Contract` として明文化していく(SSOT を 1 箇所へ集約)。 +- continuation の識別は ID を SSOT(String は debug/serialize 用)とし、`join_func_N` の legacy は alias で隔離する。 +- `jump_args` は意味論の SSOT なので、最終的には MIR terminator operand に統合して DCE/CFG から自然に追える形へ収束させる(Phase 256 を緑に戻した後に段階導入)。 +- 上記の収束先(north star)を “Join-Explicit CFG Construction” と命名し、段階移行(案1→案2→必要なら案3)で進める。 + 2025‑09‑08 - ループ制御は既存命令(Branch/Jump/Phi)で表現し、新命令は導入しない。 - Builder に loop_ctx({head, exit})を導入し、continue/break を分岐で降ろす。 diff --git a/docs/development/current/main/design/README.md b/docs/development/current/main/design/README.md index 0cabeb40..1e9a4574 100644 --- a/docs/development/current/main/design/README.md +++ b/docs/development/current/main/design/README.md @@ -8,6 +8,7 @@ ## 現役の設計図(入口) - JoinIR の地図(navigation SSOT): `docs/development/current/main/design/joinir-design-map.md` +- Join-Explicit CFG Construction(north star): `docs/development/current/main/design/join-explicit-cfg-construction.md` - Loop Canonicalizer(設計 SSOT): `docs/development/current/main/design/loop-canonicalizer.md` - ControlTree / StepTree(構造SSOT): `docs/development/current/main/design/control-tree.md` - Normalized ExprLowerer(式の一般化 SSOT): `docs/development/current/main/design/normalized-expr-lowering.md` diff --git a/docs/development/current/main/design/join-explicit-cfg-construction.md b/docs/development/current/main/design/join-explicit-cfg-construction.md new file mode 100644 index 00000000..657e4ffd --- /dev/null +++ b/docs/development/current/main/design/join-explicit-cfg-construction.md @@ -0,0 +1,68 @@ +# Join-Explicit CFG Construction + +Status: SSOT(design goal) +Scope: JoinIR→MIR の「暗黙 ABI」を消し、Join を第一級に扱う CFG へ収束させる北極星(north star)。 +Related: +- Navigation SSOT: `docs/development/current/main/design/joinir-design-map.md` +- Investigation (Phase 256): `docs/development/current/main/investigations/phase-256-joinir-contract-questions.md` +- Decisions: `docs/development/current/main/20-Decisions.md` + +## Goal(最終形) + +“Join-Explicit CFG Construction” を目指す。 + +- `Jump/continuation/params/edge-args` を **第一級(explicit)**として扱う +- JoinIR↔MIR 間の **暗黙 ABI(順序/長さ/名前/役割)** をなくす(SSOT を 1 箇所に封印) +- 変換は「意味の解釈」ではなく「写像(mapping)」に縮退する + +## Non-Goals(いまはやらない) + +- JoinIR を即座に削除する(まずは ABI/Contract で SSOT を固める) +- PHI を全面廃止して block params に置換する一括リファクタ(段階導入) + +## 現状の問題(Phase 256 で露出した型) + +- `jump_args` / `exit_bindings` / `entry.params` / `boundary.join_inputs` が “だいたい同じ順序” を前提にしており、ズレると SSA/dominance が破綻する +- `expr_result` と LoopState carrier が同一 ValueId になり得るが、legacy “expr_result slot” 推測で offset がずれて誤配線になる +- `jump_args` が IR の外側メタ扱いだと、DCE/最適化が “use” を見落としやすい +- spans が並行 Vec だと、パスが 1 箇所でも取りこぼすと SPAN MISMATCH になる + +## 移行戦略(段階導入 / Strangler) + +原則: +- **移行を先に固定**し、機能追加は「新契約に乗るものだけ」併走する(旧経路に新機能を足さない) +- 既定挙動を変えない(必要なら dev-only の診断ガードで観測) + +### Stage 1(短期): JoinIR を “ABI/Contract 付き Normalized SSOT” にする + +狙い: 推測をなくし、順序/役割の SSOT を 1 箇所へ寄せる。 + +- boundary に `jump_args_layout` のような **layout SSOT** を持たせ、collector/rewriter が推測しない +- `JoinInst::Jump` を terminator 語彙として正規化(cond 付きは `Branch` へ寄せる) +- continuation の識別は **ID SSOT**(String は debug/serialize のみに縮退) + +受け入れ: +- `--verify` が PASS(SSA/dominance/PHI/ExitLine の契約違反が消える) +- 直撃回帰テスト(`expr_result == carrier` 等)が固定される + +### Stage 2(中期): MIR を “edge-args を terminator operand に持つ CFG” に寄せる + +狙い: `jump_args` を “意味データ” として IR に埋め込み、DCE/CFG が自然に追える形へ収束する。 + +- `jump_args` を BasicBlock 外メタから terminator operand へ寄せる(段階導入: 互換フィールド併存→移行) +- spans は `Vec>` へ(API で不変条件を守る) + +受け入れ: +- `jump_args` 由来の use が最適化で消えない(テストで固定) +- SPAN MISMATCH が構造的に起きない + +### Stage 3(長期): JoinIR と MIR の境界を薄くし、必要なら JoinIR を builder へ降格 + +狙い: “bridge/merge が意味解釈する余地” を最小化し、一本の CFG 語彙に収束させる。 + +- JoinIR を SSOT IR として残すか、builder DSL として降格するかは、この段階で再判断する + +## 実務ルール(Phase 中の運用) + +- 新パターン/新機能は「新しい Contract で記述できる場合のみ」追加する +- Contract の導入中は “機能追加より SSOT 固め” を優先する(泥沼デバッグの再発防止) diff --git a/docs/development/current/main/design/joinir-design-map.md b/docs/development/current/main/design/joinir-design-map.md index 744edf2d..c4490def 100644 --- a/docs/development/current/main/design/joinir-design-map.md +++ b/docs/development/current/main/design/joinir-design-map.md @@ -85,6 +85,15 @@ flowchart LR --- +## North Star: Join-Explicit CFG Construction + +JoinIR/MIR 間に生える “暗黙 ABI(順序/長さ/名前/役割)” を減らし、Join を第一級として扱う CFG へ収束させる。 + +SSOT(設計目標): +- `docs/development/current/main/design/join-explicit-cfg-construction.md` + +--- + ## “箱”の責務マップ(担当境界) | 領域 | 役割(何を決めるか) | 主な入口/箱(SSOT寄り) | 主な出力 | Fail-Fast(典型) | diff --git a/docs/development/current/main/investigations/phase-256-joinir-contract-questions.md b/docs/development/current/main/investigations/phase-256-joinir-contract-questions.md index 60b77e43..bd71ef57 100644 --- a/docs/development/current/main/investigations/phase-256-joinir-contract-questions.md +++ b/docs/development/current/main/investigations/phase-256-joinir-contract-questions.md @@ -167,3 +167,93 @@ pub struct BasicBlock { - ExitLine: `jump_args` の長さ契約ミスマッチ(carriers と invariants の混在) - `JoinInst::Jump` が bridge で “Return 化” され、continuation の意味が落ちる疑い - DCE が `jump_args` 由来の use を落とし、Copy が消えて SSA/dominance が崩れる + +--- + +## ChatGPT Pro からの設計回答(要約) + +診断(短く): +- JoinIR/MIR 間に「暗黙 ABI(呼び出し規約)」が生えており、SSOT が分裂している +- `Jump/cont/params/jump_args` が層を跨ぐたびに意味が揺れて、SSA/dominance/DCE が壊れやすい + +命名: +- この収束先(north star)を **Join-Explicit CFG Construction** と呼ぶ + +提案の大枠(3案): + +### 案1: JoinIR ABI / Contract モジュール(推奨) + +狙い: +- いま暗黙のまま散っている契約(順序/長さ/名前/役割)を “ABI オブジェクト” として 1 箇所に封印する +- `Vec` の順序契約に魂を預けない(pack/unpack を ABI 経由にする) + +最小イメージ(雰囲気): + +```rust +pub struct JoinAbi { + pub cont_sigs: Vec, // continuation signature SSOT + pub special: SpecialConts, // main/loop_step/k_exit + pub legacy_alias: AliasTable, // join_func_2 -> k_exit 等 +} + +pub struct ContSig { + pub params: Vec, +} + +pub enum ParamRole { + Carrier, + Invariant, + Result, + // ... +} +``` + +Jump の正規形(Normalized JoinIR): +- `Jump` は cond を持たず、必ず終端 +- cond 付きは `Branch { then_cont+args, else_cont+args }` に寄せる + +### 案2: MIR を “ブロック引数 SSA” に昇格(強力) + +狙い: +- JoinIR の cont/args と MIR の block/jump_args を同型化し、bridge/merge の解釈余地を消す + +即効の 2 点(最優先): +- `jump_args` を BasicBlock 外メタではなく terminator に埋め込む(use-def / DCE が自然に追える) +- spans を並行 Vec から `Vec>` へ(SPAN MISMATCH を構造で防ぐ) + +### 案3: JoinIR をやめて CPS CFG 一本化(最終収束案) + +狙い: +- SSOT を 1 個にする(JoinIR/MIR の “橋” を消す) +- ただし移行は重いので、案2 の延長として収束させるのが現実的 + +--- + +## 推奨(この repo の制約込み) + +Phase 256 の “今日の詰まり” に効く順で: + +1) **案1(JoinIR ABI/Contract)を設計 SSOT として採用** + - まず “順序契約を殺す” のが最大のデバッグ短縮になる +2) **案2 のうち即効ポイントを段階導入** + - `jump_args` の SSOT は “terminator operand” に移す(大工事なので Phase 256 を緑に戻した後に着手が安全) + - spans の `Spanned` 化も同様に段階導入(いまは pass 側で不変条件をテストで固定) + +ここまでで、JoinIR を増やさずに「暗黙 ABI を明文化」できる。 + +--- + +## 次に設計として決めたいこと(Decision 候補) + +- `JoinInst::Jump` の SSOT は “tail call 等価” ではなく、Normalized JoinIR の terminator 語彙として固定する +- continuation の識別は **ID SSOT**(String は debug/serialize 用に限る) +- `jump_args` を SSOT にするなら、最終的に MIR terminator に埋め込む(DCE/CFG の整合性が自然になる) + +--- + +## Phase 256 実装から得た追加の教訓(SSOT) + +- `expr_result` と LoopState carrier が同一 ValueId になるケースが現実に起きる(例: ループ式の返り値が `result`)。 + このとき “legacy expr_result slot(jump_args[0])” を機械的に仮定すると、offset がずれて ExitLine の配線が崩れる。 +- 対策として `ExitArgsCollector` には “slot があるかどうか” を推測させず、呼び出し側が `expect_expr_result_slot` + を明示して渡すのが安全(Fail-Fast + 構造)。 diff --git a/docs/development/current/main/phases/phase-256/README.md b/docs/development/current/main/phases/phase-256/README.md index efb1021c..62004da0 100644 --- a/docs/development/current/main/phases/phase-256/README.md +++ b/docs/development/current/main/phases/phase-256/README.md @@ -5,16 +5,19 @@ Scope: Loop pattern recognition for split/tokenization operations Related: - Phase 255 完了(loop_invariants 導入、Pattern 6 完成) - Phase 254 完了(Pattern 6 index_of 実装) +- North star(設計目標): `docs/development/current/main/design/join-explicit-cfg-construction.md` ## Current Status (SSOT) -- Current first FAIL: `StringUtils.split/2`(MIR verification: “Value defined multiple times” + SSA undef) -- Pattern6 は PASS 維持 +- Current first FAIL: `json_lint_vm`(Pattern2 break cond: `this.is_whitespace(...)` needs `current_static_box_name`) +- `StringUtils.split/2` は VM `--verify` / smoke まで PASS +- Pattern6(index_of)は PASS 維持 - 直近の完了: - P1.10: DCE が `jump_args` 参照を保持し、`instruction_spans` と同期するよう修正(回帰テスト追加) - P1.7: SSA undef(`%49/%67`)根治(continuation 関数名の SSOT 不一致) - P1.6: pipeline contract checks を `run_all_pipeline_checks()` に集約 -- 次の作業: P1.11(merge 側の ExitLine/PHI/dominance を `--verify` で緑に戻す) +- 次の作業: Pattern2 の static box context を break condition lowering に渡す(次フェーズ) +- 設計メモ(ChatGPT Pro 相談まとめ): `docs/development/current/main/investigations/phase-256-joinir-contract-questions.md` --- @@ -414,6 +417,44 @@ Option A(Pattern 7 新設)を推奨。 - `instruction_spans` と `instructions` の同期不変条件を維持(SPAN MISMATCH 根治) - 回帰テストを追加(`test_dce_keeps_jump_args_values`, `test_dce_syncs_instruction_spans`) +--- + +## 進捗(P1.11) + +### P1.11: ExitArgsCollector の expr_result slot 判定を明確化(完了) + +症状(split の誤動作): +- runtime 側で `start` と `result` が入れ替わり、ExitLine の配線が崩れる +- 結果として VM 実行で型エラー(例: `ArrayBox <= 5`)に到達 + +根本原因(SSOT): +- ExitArgsCollector が `jump_args[0]` を “expr_result slot” とみなす条件が粗く、 + `expr_result` が exit_bindings の LoopState carrier と同一 ValueId(例: `result`)のケースでも offset=1 側に寄っていた + +修正(方針): +- `expect_expr_result_slot` を明示的に渡し、かつ + `expr_result` が exit_bindings の LoopState carriers に含まれる場合は `expect_expr_result_slot=false` とする +- `exit_args_collector.rs` 側は `expect_expr_result_slot` の意味に合わせて整理し、 + 「末尾の invariants は無視」の仕様は維持する + +受け入れ(確認): +- `./target/release/hakorune --backend vm --verify apps/tests/phase256_p0_split_min.hako` PASS +- `./target/release/hakorune --backend vm apps/tests/phase256_p0_split_min.hako` が期待 RC(`3`)で終了 + +--- + +## 進捗(P1.12) + +### P1.12: integration smoke scripts の終了コード取得を安定化(完了) + +変更(要旨): +- `set -e` のまま “対象コマンドだけ” を `set +e` でラップし、非0終了でも exit code を取得できるようにする +- `HAKORUNE_BIN` の既定値を追加し、手動実行の再現性を上げる + +受け入れ(確認): +- `HAKORUNE_BIN=./target/release/hakorune bash tools/smokes/v2/profiles/integration/apps/phase256_p0_split_vm.sh` PASS(exit=3) +- `HAKORUNE_BIN=./target/release/hakorune bash tools/smokes/v2/profiles/integration/apps/phase254_p0_index_of_vm.sh` PASS(exit=1) + ### リファクタリング方針(P1.6候補 / 先送り推奨) 現時点(split がまだ FAIL)では、箱化のための箱化で複雑さが増えやすいので、以下を推奨する: diff --git a/src/mir/builder/calls/lowering.rs b/src/mir/builder/calls/lowering.rs index 722ce00d..fdb19564 100644 --- a/src/mir/builder/calls/lowering.rs +++ b/src/mir/builder/calls/lowering.rs @@ -45,6 +45,9 @@ impl MirBuilder { // 関数スコープ SlotRegistry は元の関数側から退避しておくよ。 let saved_slot_registry = self.comp_ctx.current_slot_registry.take(); + // Phase 201-A: Clear reserved ValueIds at function entry (function-local). + self.comp_ctx.clear_reserved_value_ids(); + // BoxCompilationContext mode: clear()で完全独立化 if context_active { self.variable_ctx.variable_map.clear(); diff --git a/src/mir/builder/control_flow/joinir/merge/exit_args_collector.rs b/src/mir/builder/control_flow/joinir/merge/exit_args_collector.rs index a6cdbdf6..1b353f62 100644 --- a/src/mir/builder/control_flow/joinir/merge/exit_args_collector.rs +++ b/src/mir/builder/control_flow/joinir/merge/exit_args_collector.rs @@ -19,13 +19,14 @@ //! &remapped_args, //! block_id, //! strict_mode, +//! boundary.jump_args_layout, //! )?; //! exit_phi_inputs.push((result.block_id, result.expr_result_value)); //! carrier_inputs.extend(result.carrier_values); //! ``` use crate::mir::join_ir::lowering::carrier_info::CarrierRole; -use crate::mir::join_ir::lowering::inline_boundary::LoopExitBinding; +use crate::mir::join_ir::lowering::inline_boundary::{JumpArgsLayout, LoopExitBinding}; use crate::mir::{BasicBlockId, ValueId}; use std::collections::BTreeMap; @@ -51,8 +52,9 @@ pub struct ExitArgsCollectionResult { /// # Phase 118 P2 Contract /// /// - Every LoopState carrier in exit_bindings must have an exit PHI input -/// - jump_args order is assumed to match exit_bindings order, with at most one -/// leading extra slot (legacy layouts) +/// - jump_args order is assumed to match exit_bindings order +/// - Layout is enforced via JumpArgsLayout (no inference) +/// - Extra trailing args are treated as invariants and ignored /// - This avoids Pattern3-specific assumptions such as "jump_args[0] is loop_var" /// /// # Fail-Fast Guarantee @@ -76,6 +78,7 @@ impl ExitArgsCollectorBox { /// * `remapped_args` - Remapped jump_args from JoinIR block (already in host value space) /// * `block_id` - Source block ID for PHI inputs /// * `strict_exit` - If true, Fail-Fast on any validation error + /// * `layout` - jump_args layout policy (SSOT from boundary) /// /// # Returns /// @@ -84,16 +87,17 @@ impl ExitArgsCollectorBox { /// /// # Phase 118 P2: SSOT Offset Calculation /// - /// The offset is determined by comparing jump_args length with exit_bindings: - /// - `len(jump_args) == len(exit_phi_bindings)`: offset = 0 - /// - `len(jump_args) == len(exit_phi_bindings) + 1`: offset = 1 (legacy layout) - /// - Otherwise: error (or warning in non-strict mode) + /// The offset is determined by JumpArgsLayout: + /// - `CarriersOnly`: offset = 0 + /// - `ExprResultPlusCarriers`: offset = 1 + /// - Length mismatches are errors (or warnings in non-strict mode) pub fn collect( &self, exit_bindings: &[LoopExitBinding], remapped_args: &[ValueId], block_id: BasicBlockId, strict_exit: bool, + layout: JumpArgsLayout, ) -> Result { // Filter exit bindings to get only LoopState carriers (skip ConditionOnly) let exit_phi_bindings: Vec<_> = exit_bindings @@ -111,15 +115,11 @@ impl ExitArgsCollectorBox { } // Phase 118 P2: Calculate offset (SSOT) - let offset = self.calculate_offset( - remapped_args.len(), - exit_phi_bindings.len(), - block_id, - strict_exit, - )?; + let offset = + self.calculate_offset(remapped_args.len(), exit_phi_bindings.len(), block_id, strict_exit, layout)?; // Collect expr result (first jump arg if offset > 0) - let expr_result_value = if offset > 0 { + let expr_result_value = if layout == JumpArgsLayout::ExprResultPlusCarriers && offset > 0 { remapped_args.first().copied() } else { None @@ -168,14 +168,9 @@ impl ExitArgsCollectorBox { exit_phi_bindings_len: usize, block_id: BasicBlockId, strict_exit: bool, + layout: JumpArgsLayout, ) -> Result { - if jump_args_len == exit_phi_bindings_len { - // Direct mapping: jump_args[i] = exit_phi_bindings[i] - Ok(0) - } else if jump_args_len == exit_phi_bindings_len + 1 { - // Legacy layout: jump_args[0] is expr result, jump_args[1..] are carriers - Ok(1) - } else if jump_args_len < exit_phi_bindings_len { + if jump_args_len < exit_phi_bindings_len { // Too short - missing carriers let msg = format!( "[joinir/exit-line] jump_args too short: need {} carrier args (from exit_bindings) but got {} in block {:?}", @@ -188,15 +183,43 @@ impl ExitArgsCollectorBox { Ok(0) // Best effort: try direct mapping } } else { - // Too long - extra args beyond carriers (e.g., invariants in Pattern 7) - // Phase 256 P1.8: Allow excess args as long as we have enough for carriers - // Direct mapping: jump_args[0..N] = exit_phi_bindings[0..N], rest ignored - #[cfg(debug_assertions)] - eprintln!( - "[joinir/exit-line] jump_args has {} extra args (block {:?}), ignoring invariants", - jump_args_len - exit_phi_bindings_len, block_id - ); - Ok(0) // Direct mapping: first N args are carriers + match layout { + JumpArgsLayout::CarriersOnly => { + if jump_args_len > exit_phi_bindings_len { + #[cfg(debug_assertions)] + eprintln!( + "[joinir/exit-line] jump_args has {} extra args (block {:?}), ignoring invariants", + jump_args_len - exit_phi_bindings_len, + block_id + ); + } + Ok(0) + } + JumpArgsLayout::ExprResultPlusCarriers => { + if jump_args_len >= exit_phi_bindings_len + 1 { + if jump_args_len > exit_phi_bindings_len + 1 { + #[cfg(debug_assertions)] + eprintln!( + "[joinir/exit-line] jump_args has {} extra args (block {:?}), ignoring invariants after expr_result", + jump_args_len - (exit_phi_bindings_len + 1), + block_id + ); + } + Ok(1) + } else { + let msg = format!( + "[joinir/exit-line] expr_result layout requires leading slot: carriers={} args={} in block {:?}", + exit_phi_bindings_len, jump_args_len, block_id + ); + if strict_exit { + Err(msg) + } else { + eprintln!("[DEBUG-177] {}", msg); + Ok(0) + } + } + } + } } } @@ -245,7 +268,13 @@ mod tests { make_binding("count", CarrierRole::LoopState), ]; let args = vec![ValueId(10), ValueId(20)]; - let result = collector.collect(&bindings, &args, BasicBlockId(1), true); + let result = collector.collect( + &bindings, + &args, + BasicBlockId(1), + true, + JumpArgsLayout::CarriersOnly, + ); assert!(result.is_ok()); let res = result.unwrap(); @@ -261,7 +290,13 @@ mod tests { let collector = ExitArgsCollectorBox::new(); let bindings = vec![make_binding("sum", CarrierRole::LoopState)]; let args = vec![ValueId(5), ValueId(10)]; // [expr_result, carrier] - let result = collector.collect(&bindings, &args, BasicBlockId(1), true); + let result = collector.collect( + &bindings, + &args, + BasicBlockId(1), + true, + JumpArgsLayout::ExprResultPlusCarriers, + ); assert!(result.is_ok()); let res = result.unwrap(); @@ -278,7 +313,13 @@ mod tests { make_binding("is_digit", CarrierRole::ConditionOnly), // Skip ]; let args = vec![ValueId(10)]; // Only one arg for LoopState carrier - let result = collector.collect(&bindings, &args, BasicBlockId(1), true); + let result = collector.collect( + &bindings, + &args, + BasicBlockId(1), + true, + JumpArgsLayout::CarriersOnly, + ); assert!(result.is_ok()); let res = result.unwrap(); @@ -293,7 +334,13 @@ mod tests { make_binding("count", CarrierRole::LoopState), ]; let args = vec![ValueId(10)]; // Missing one carrier - let result = collector.collect(&bindings, &args, BasicBlockId(1), true); + let result = collector.collect( + &bindings, + &args, + BasicBlockId(1), + true, + JumpArgsLayout::CarriersOnly, + ); assert!(result.is_err()); assert!(result.unwrap_err().contains("too short")); @@ -307,7 +354,13 @@ mod tests { make_binding("count", CarrierRole::LoopState), ]; let args = vec![ValueId(10)]; // Missing one carrier - let result = collector.collect(&bindings, &args, BasicBlockId(1), false); + let result = collector.collect( + &bindings, + &args, + BasicBlockId(1), + false, + JumpArgsLayout::CarriersOnly, + ); // Non-strict mode: succeeds with warning assert!(result.is_ok()); @@ -328,4 +381,30 @@ mod tests { assert_eq!(map["sum"].len(), 2); assert_eq!(map["count"].len(), 1); } + + #[test] + fn test_collect_extra_invariants_no_expr_result() { + let collector = ExitArgsCollectorBox::new(); + let bindings = vec![ + make_binding("i", CarrierRole::LoopState), + make_binding("start", CarrierRole::LoopState), + make_binding("result", CarrierRole::LoopState), + ]; + let args = vec![ValueId(1), ValueId(2), ValueId(3), ValueId(99)]; // extra invariant + let result = collector.collect( + &bindings, + &args, + BasicBlockId(1), + true, + JumpArgsLayout::CarriersOnly, + ); + + assert!(result.is_ok()); + let res = result.unwrap(); + assert_eq!(res.expr_result_value, None); + assert_eq!(res.carrier_values.len(), 3); + assert_eq!(res.carrier_values[0].1, (BasicBlockId(1), ValueId(1))); + assert_eq!(res.carrier_values[1].1, (BasicBlockId(1), ValueId(2))); + assert_eq!(res.carrier_values[2].1, (BasicBlockId(1), ValueId(3))); + } } diff --git a/src/mir/builder/control_flow/joinir/merge/instruction_rewriter.rs b/src/mir/builder/control_flow/joinir/merge/instruction_rewriter.rs index 12bd544c..392c9841 100644 --- a/src/mir/builder/control_flow/joinir/merge/instruction_rewriter.rs +++ b/src/mir/builder/control_flow/joinir/merge/instruction_rewriter.rs @@ -366,6 +366,7 @@ pub(super) fn merge_and_rewrite( // We skip merging continuation functions, so any tail-call to k_exit must be // lowered as an exit jump to `exit_block_id` (and contribute exit values). let mut k_exit_lowering_decision: Option = None; + let mut k_exit_jump_args: Option> = None; // Phase 177-3: Check if this block is the loop header with PHI nodes let is_loop_header_with_phi = @@ -462,6 +463,7 @@ pub(super) fn merge_and_rewrite( if let Some(decision) = tail_call_policy.classify_tail_call(callee_name, &remapped_args) { // This is a k_exit tail call - policy says normalize to exit jump k_exit_lowering_decision = Some(decision); + k_exit_jump_args = old_block.jump_args.clone(); found_tail_call = true; if debug { log!( @@ -595,14 +597,17 @@ pub(super) fn merge_and_rewrite( .map(|name| continuation_candidates.contains(name)) .unwrap_or(false); + let is_recursive_call = target_func_name + .as_ref() + .map(|name| name == func_name) + .unwrap_or(false); + if let Some(ref target_func_name) = target_func_name { if let Some(target_params) = function_params.get(target_func_name) { // Phase 256 P1.10: Detect call type for param binding strategy // - Recursive call (loop_step → loop_step): Skip all param bindings (PHI handles via latch edges) // - Exit call (loop_step → k_exit): Skip all param bindings (exit PHI handles via exit edges) // - Header entry (main → loop_step at header): Skip all (PHI handles via entry edges) - let is_recursive_call = target_func_name == func_name; - // Phase 256 P1.10: Detect if target is the loop entry function // When calling the loop entry function, header PHIs will define the carriers. // We should skip param bindings in this case. @@ -753,97 +758,99 @@ pub(super) fn merge_and_rewrite( // At this point, args[0] contains the updated loop variable value (i_next). // We record this as the latch incoming so that the header PHI can reference // the correct SSA value at loop continuation time. - if let Some(b) = boundary { - if let Some(loop_var_name) = &b.loop_var_name { - if !args.is_empty() { - // The first arg is the loop variable's updated value - let latch_value = args[0]; - // The current block (new_block_id) is the latch block - loop_header_phi_info.set_latch_incoming( - loop_var_name, - new_block_id, // latch block - latch_value, // updated loop var value (i_next) - ); - - if debug { - log!( - true, - "[cf_loop/joinir] Phase 33-16: Set latch incoming for '{}': block={:?}, value={:?}", - loop_var_name, new_block_id, latch_value + if is_recursive_call { + if let Some(b) = boundary { + if let Some(loop_var_name) = &b.loop_var_name { + if !args.is_empty() { + // The first arg is the loop variable's updated value + let latch_value = args[0]; + // The current block (new_block_id) is the latch block + loop_header_phi_info.set_latch_incoming( + loop_var_name, + new_block_id, // latch block + latch_value, // updated loop var value (i_next) ); - } - } - } - // Phase 33-20: Also set latch incoming for other carriers from exit_bindings - // - // args layout depends on whether we have a dedicated loop variable: - // - loop_var_name = Some(..): args[0] is the loop variable, args[1..] are other carriers - // - loop_var_name = None: args[0..] are carriers (no reserved loop-var slot) - // - // Phase 176-4 FIX: exit_bindings may include the loop variable itself; skip it when loop_var_name is set. - let mut carrier_arg_idx = if b.loop_var_name.is_some() { 1 } else { 0 }; - for binding in b.exit_bindings.iter() { - // Skip if this binding is for the loop variable (already handled) - if let Some(ref loop_var) = b.loop_var_name { - if &binding.carrier_name == loop_var { if debug { log!( true, - "[cf_loop/joinir] Phase 176-4: Skipping loop variable '{}' in exit_bindings (handled separately)", - binding.carrier_name + "[cf_loop/joinir] Phase 33-16: Set latch incoming for '{}': block={:?}, value={:?}", + loop_var_name, new_block_id, latch_value ); } - continue; // Skip loop variable } } - // Process non-loop-variable carrier - if carrier_arg_idx < args.len() { - let latch_value = args[carrier_arg_idx]; - loop_header_phi_info.set_latch_incoming( - &binding.carrier_name, - new_block_id, - latch_value, - ); + // Phase 33-20: Also set latch incoming for other carriers from exit_bindings + // + // args layout depends on whether we have a dedicated loop variable: + // - loop_var_name = Some(..): args[0] is the loop variable, args[1..] are other carriers + // - loop_var_name = None: args[0..] are carriers (no reserved loop-var slot) + // + // Phase 176-4 FIX: exit_bindings may include the loop variable itself; skip it when loop_var_name is set. + let mut carrier_arg_idx = if b.loop_var_name.is_some() { 1 } else { 0 }; + for binding in b.exit_bindings.iter() { + // Skip if this binding is for the loop variable (already handled) + if let Some(ref loop_var) = b.loop_var_name { + if &binding.carrier_name == loop_var { + if debug { + log!( + true, + "[cf_loop/joinir] Phase 176-4: Skipping loop variable '{}' in exit_bindings (handled separately)", + binding.carrier_name + ); + } + continue; // Skip loop variable + } + } - if debug { + // Process non-loop-variable carrier + if carrier_arg_idx < args.len() { + let latch_value = args[carrier_arg_idx]; + loop_header_phi_info.set_latch_incoming( + &binding.carrier_name, + new_block_id, + latch_value, + ); + + if debug { + log!( + true, + "[cf_loop/joinir] Phase 176-4: Set latch incoming for carrier '{}': block={:?}, value={:?} (arg[{}])", + binding.carrier_name, new_block_id, latch_value, carrier_arg_idx + ); + } + carrier_arg_idx += 1; + } else if debug { log!( true, - "[cf_loop/joinir] Phase 176-4: Set latch incoming for carrier '{}': block={:?}, value={:?} (arg[{}])", - binding.carrier_name, new_block_id, latch_value, carrier_arg_idx + "[cf_loop/joinir] Phase 33-20 WARNING: No arg for carrier '{}' at index {}", + binding.carrier_name, carrier_arg_idx ); } - carrier_arg_idx += 1; - } else if debug { - log!( - true, - "[cf_loop/joinir] Phase 33-20 WARNING: No arg for carrier '{}' at index {}", - binding.carrier_name, carrier_arg_idx - ); } - } - // Phase 255 P2: Set latch incoming for loop invariants - // - // Loop invariants don't have corresponding tail call args because they - // are not modified by the loop. Their latch incoming is the PHI dst itself - // (same value across all iterations). - for (inv_name, _inv_host_id) in b.loop_invariants.iter() { - if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(inv_name) { - // For invariants, latch incoming is the PHI dst (same value) - loop_header_phi_info.set_latch_incoming( - inv_name, - new_block_id, // latch block - phi_dst, // same as PHI dst (invariant value) - ); - - if debug { - log!( - true, - "[cf_loop/joinir] Phase 255 P2: Set latch incoming for loop invariant '{}': block={:?}, value={:?} (PHI dst itself)", - inv_name, new_block_id, phi_dst + // Phase 255 P2: Set latch incoming for loop invariants + // + // Loop invariants don't have corresponding tail call args because they + // are not modified by the loop. Their latch incoming is the PHI dst itself + // (same value across all iterations). + for (inv_name, _inv_host_id) in b.loop_invariants.iter() { + if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(inv_name) { + // For invariants, latch incoming is the PHI dst (same value) + loop_header_phi_info.set_latch_incoming( + inv_name, + new_block_id, // latch block + phi_dst, // same as PHI dst (invariant value) ); + + if debug { + log!( + true, + "[cf_loop/joinir] Phase 255 P2: Set latch incoming for loop invariant '{}': block={:?}, value={:?} (PHI dst itself)", + inv_name, new_block_id, phi_dst + ); + } } } } @@ -963,6 +970,7 @@ pub(super) fn merge_and_rewrite( &remapped_args, new_block_id, strict_exit, + b.jump_args_layout, )?; // Add expr_result to exit_phi_inputs (if present) @@ -1080,8 +1088,22 @@ pub(super) fn merge_and_rewrite( // Collect exit values from k_exit arguments if let Some(b) = boundary { let collector = ExitArgsCollectorBox::new(); + let exit_args: Vec = if let Some(ref jump_args) = k_exit_jump_args { + jump_args + .iter() + .map(|&arg| remapper.remap_value(arg)) + .collect() + } else { + args.clone() + }; let collection_result = - collector.collect(&b.exit_bindings, &args, new_block_id, strict_exit)?; + collector.collect( + &b.exit_bindings, + &exit_args, + new_block_id, + strict_exit, + b.jump_args_layout, + )?; if let Some(expr_result_value) = collection_result.expr_result_value { exit_phi_inputs.push((collection_result.block_id, expr_result_value)); } diff --git a/src/mir/builder/control_flow/joinir/merge/mod.rs b/src/mir/builder/control_flow/joinir/merge/mod.rs index fb238fc5..02df6a8b 100644 --- a/src/mir/builder/control_flow/joinir/merge/mod.rs +++ b/src/mir/builder/control_flow/joinir/merge/mod.rs @@ -40,6 +40,7 @@ pub use merge_result::MergeContracts; use super::trace; use crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary; +use crate::mir::join_ir::lowering::error_tags; use crate::mir::{MirModule, ValueId}; use std::collections::BTreeMap; @@ -144,6 +145,16 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks( debug, ); + if let Some(boundary) = boundary { + if let Err(msg) = boundary.validate_jump_args_layout() { + return Err(error_tags::freeze_with_hint( + "phase256/jump_args_layout", + &msg, + "set JoinInlineBoundary.jump_args_layout via builder and avoid expr_result/carrier mismatch", + )); + } + } + if verbose { if let Some(boundary) = boundary { let exit_summary: Vec = boundary @@ -404,9 +415,17 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks( debug && !reserved_phi_dsts.is_empty(), ); + // Phase 201-A: Also reserve JoinIR parameter ValueIds to avoid collisions + let mut reserved_value_ids = reserved_phi_dsts.clone(); + for params in function_params.values() { + for ¶m in params { + reserved_value_ids.insert(param); + } + } + // Phase 201-A: Set reserved IDs in MirBuilder so next_value_id() skips them // This protects against carrier corruption when break conditions emit Const instructions - builder.comp_ctx.reserved_value_ids = reserved_phi_dsts.clone(); + builder.comp_ctx.reserved_value_ids = reserved_value_ids.clone(); trace.stderr_if( &format!( "[cf_loop/joinir] Phase 201-A: Set builder.comp_ctx.reserved_value_ids = {:?}", @@ -420,7 +439,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks( builder, &used_values, &mut remapper, - &reserved_phi_dsts, + &reserved_value_ids, debug, )?; @@ -533,8 +552,18 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks( } } - let main_func_name = "join_func_0"; - let loop_step_func_name = "join_func_1"; + let canonical_main = crate::mir::join_ir::lowering::canonical_names::MAIN; + let canonical_loop_step = crate::mir::join_ir::lowering::canonical_names::LOOP_STEP; + let main_func_name = if function_params.contains_key(canonical_main) { + canonical_main + } else { + "join_func_0" + }; + let loop_step_func_name = if function_params.contains_key(canonical_loop_step) { + canonical_loop_step + } else { + "join_func_1" + }; if function_params.get(main_func_name).is_none() { trace.stderr_if( @@ -1046,19 +1075,6 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks( } } - // Phase 201-A: Clear reserved ValueIds after merge completes - // Future loops will set their own reserved IDs - if !builder.comp_ctx.reserved_value_ids.is_empty() { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 201-A: Clearing reserved_value_ids (was {:?})", - builder.comp_ctx.reserved_value_ids - ), - debug, - ); - builder.comp_ctx.reserved_value_ids.clear(); - } - // Phase 246-EX-FIX: Handle loop variable expr_result separately from carrier expr_result // // The loop variable (e.g., 'i') is returned via exit_phi_result_id, not carrier_phis. diff --git a/src/mir/builder/control_flow/joinir/patterns/exit_binding.rs b/src/mir/builder/control_flow/joinir/patterns/exit_binding.rs index 5cecf3a9..97566fde 100644 --- a/src/mir/builder/control_flow/joinir/patterns/exit_binding.rs +++ b/src/mir/builder/control_flow/joinir/patterns/exit_binding.rs @@ -381,6 +381,7 @@ mod tests { condition_inputs: vec![], // Phase 171: Add missing field condition_bindings: vec![], // Phase 171-fix: Add missing field expr_result: None, // Phase 33-14: Add missing field + jump_args_layout: crate::mir::join_ir::lowering::inline_boundary::JumpArgsLayout::CarriersOnly, loop_var_name: None, // Phase 33-16: Add missing field carrier_info: None, // Phase 228: Add missing field loop_invariants: vec![], // Phase 255 P2: Add missing field diff --git a/src/mir/builder/control_flow/joinir/patterns/exit_binding_applicator.rs b/src/mir/builder/control_flow/joinir/patterns/exit_binding_applicator.rs index 8361ebc7..93804704 100644 --- a/src/mir/builder/control_flow/joinir/patterns/exit_binding_applicator.rs +++ b/src/mir/builder/control_flow/joinir/patterns/exit_binding_applicator.rs @@ -135,6 +135,7 @@ mod tests { condition_inputs: vec![], // Phase 171: Add missing field condition_bindings: vec![], // Phase 171-fix: Add missing field expr_result: None, // Phase 33-14: Add missing field + jump_args_layout: crate::mir::join_ir::lowering::inline_boundary::JumpArgsLayout::CarriersOnly, loop_var_name: None, // Phase 33-16: Add missing field carrier_info: None, // Phase 228: Add missing field loop_invariants: vec![], // Phase 255 P2: Add missing field diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs b/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs index 12834276..803d28fe 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs @@ -365,7 +365,7 @@ impl MirBuilder { /// - 2 carriers: i (loop index), start (segment start) /// - 3 invariants: s (haystack), sep (separator), result (accumulator) /// - Conditional step via Select (P0 pragmatic approach) - /// - Post-loop segment push in k_exit + /// - Post-loop segment push stays in host AST (k_exit is pure return) pub(crate) fn cf_loop_pattern7_split_scan_impl( &mut self, condition: &ASTNode, diff --git a/src/mir/join_ir/lowering/inline_boundary.rs b/src/mir/join_ir/lowering/inline_boundary.rs index 7fd431eb..07812452 100644 --- a/src/mir/join_ir/lowering/inline_boundary.rs +++ b/src/mir/join_ir/lowering/inline_boundary.rs @@ -104,6 +104,15 @@ pub struct LoopExitBinding { pub role: CarrierRole, } +/// Layout policy for JoinIR jump_args (SSOT) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum JumpArgsLayout { + /// jump_args = [carriers...] + CarriersOnly, + /// jump_args = [expr_result, carriers...] + ExprResultPlusCarriers, +} + /// Boundary information for inlining a JoinIR fragment into a host function /// /// This structure captures the "interface" between a JoinIR fragment and the @@ -288,6 +297,12 @@ pub struct JoinInlineBoundary { /// Here, `expr_result = None` because the loop doesn't return a value. pub expr_result: Option, + /// Phase 256 P1.12: jump_args layout (SSOT) + /// + /// This prevents merge from guessing whether jump_args contains a leading + /// expr_result slot. + pub jump_args_layout: JumpArgsLayout, + /// Phase 33-16: Loop variable name (for LoopHeaderPhiBuilder) /// /// The name of the loop control variable (e.g., "i" in `loop(i < 3)`). @@ -330,6 +345,39 @@ pub struct JoinInlineBoundary { } impl JoinInlineBoundary { + /// Decide jump_args layout from boundary inputs (SSOT) + pub fn decide_jump_args_layout( + expr_result: Option, + exit_bindings: &[LoopExitBinding], + ) -> JumpArgsLayout { + if let Some(expr_result_id) = expr_result { + let expr_is_carrier = exit_bindings.iter().any(|binding| { + binding.role != CarrierRole::ConditionOnly + && binding.join_exit_value == expr_result_id + }); + if expr_is_carrier { + JumpArgsLayout::CarriersOnly + } else { + JumpArgsLayout::ExprResultPlusCarriers + } + } else { + JumpArgsLayout::CarriersOnly + } + } + + /// Validate jump_args layout against boundary contract (Fail-Fast) + pub fn validate_jump_args_layout(&self) -> Result<(), String> { + let expected = + Self::decide_jump_args_layout(self.expr_result, self.exit_bindings.as_slice()); + if self.jump_args_layout != expected { + return Err(format!( + "joinir/jump_args_layout_mismatch: expr_result={:?} layout={:?} expected={:?}", + self.expr_result, self.jump_args_layout, expected + )); + } + Ok(()) + } + /// Phase 132-R0 Task 1: SSOT for default continuation function names /// Phase 256 P1.7: Changed from JoinFuncIds to function names (Strings) /// @@ -386,6 +434,7 @@ impl JoinInlineBoundary { condition_inputs: vec![], // Phase 171: Default to empty (deprecated) condition_bindings: vec![], // Phase 171-fix: Default to empty expr_result: None, // Phase 33-14: Default to carrier-only pattern + jump_args_layout: JumpArgsLayout::CarriersOnly, // Phase 256 P1.12 loop_var_name: None, // Phase 33-16 carrier_info: None, // Phase 228: Default to None continuation_func_ids: Self::default_continuations(), @@ -432,6 +481,7 @@ impl JoinInlineBoundary { condition_inputs: vec![], // Phase 171: Default to empty (deprecated) condition_bindings: vec![], // Phase 171-fix: Default to empty expr_result: None, // Phase 33-14 + jump_args_layout: JumpArgsLayout::CarriersOnly, // Phase 256 P1.12 loop_var_name: None, // Phase 33-16 carrier_info: None, // Phase 228 continuation_func_ids: Self::default_continuations(), @@ -495,6 +545,7 @@ impl JoinInlineBoundary { condition_inputs: vec![], // Phase 171: Default to empty (deprecated) condition_bindings: vec![], // Phase 171-fix: Default to empty expr_result: None, // Phase 33-14 + jump_args_layout: JumpArgsLayout::CarriersOnly, // Phase 256 P1.12 loop_var_name: None, // Phase 33-16 carrier_info: None, // Phase 228 continuation_func_ids: Self::default_continuations(), @@ -544,6 +595,7 @@ impl JoinInlineBoundary { condition_inputs, condition_bindings: vec![], // Phase 171-fix: Will be populated by new constructor expr_result: None, // Phase 33-14 + jump_args_layout: JumpArgsLayout::CarriersOnly, // Phase 256 P1.12 loop_var_name: None, // Phase 33-16 carrier_info: None, // Phase 228 continuation_func_ids: Self::default_continuations(), @@ -597,6 +649,7 @@ impl JoinInlineBoundary { condition_inputs, condition_bindings: vec![], // Phase 171-fix: Will be populated by new constructor expr_result: None, // Phase 33-14 + jump_args_layout: JumpArgsLayout::CarriersOnly, // Phase 256 P1.12 loop_var_name: None, // Phase 33-16 carrier_info: None, // Phase 228 continuation_func_ids: Self::default_continuations(), @@ -657,6 +710,7 @@ impl JoinInlineBoundary { condition_inputs: vec![], // Deprecated, use condition_bindings instead condition_bindings, expr_result: None, // Phase 33-14 + jump_args_layout: JumpArgsLayout::CarriersOnly, // Phase 256 P1.12 loop_var_name: None, // Phase 33-16 carrier_info: None, // Phase 228 continuation_func_ids: Self::default_continuations(), @@ -692,4 +746,36 @@ mod tests { fn test_boundary_mismatched_inputs() { JoinInlineBoundary::new_inputs_only(vec![ValueId(0), ValueId(1)], vec![ValueId(4)]); } + + #[test] + fn test_jump_args_layout_rejects_expr_result_carrier_mismatch() { + let boundary = JoinInlineBoundary { + join_inputs: vec![], + host_inputs: vec![], + join_outputs: vec![], + #[allow(deprecated)] + host_outputs: vec![], + loop_invariants: vec![], + exit_bindings: vec![LoopExitBinding { + carrier_name: "result".to_string(), + join_exit_value: ValueId(10), + host_slot: ValueId(1), + role: CarrierRole::LoopState, + }], + #[allow(deprecated)] + condition_inputs: vec![], + condition_bindings: vec![], + expr_result: Some(ValueId(10)), + jump_args_layout: JumpArgsLayout::ExprResultPlusCarriers, + loop_var_name: None, + carrier_info: None, + continuation_func_ids: JoinInlineBoundary::default_continuations(), + exit_reconnect_mode: ExitReconnectMode::default(), + }; + + let err = boundary + .validate_jump_args_layout() + .expect_err("layout mismatch must fail-fast"); + assert!(err.contains("jump_args_layout_mismatch")); + } } diff --git a/src/mir/join_ir/lowering/inline_boundary_builder.rs b/src/mir/join_ir/lowering/inline_boundary_builder.rs index 087b9221..23c9db2f 100644 --- a/src/mir/join_ir/lowering/inline_boundary_builder.rs +++ b/src/mir/join_ir/lowering/inline_boundary_builder.rs @@ -25,7 +25,7 @@ //! ``` use super::condition_to_joinir::ConditionBinding; -use super::inline_boundary::{JoinInlineBoundary, LoopExitBinding}; +use super::inline_boundary::{JoinInlineBoundary, JumpArgsLayout, LoopExitBinding}; use crate::mir::ValueId; /// Role of a parameter in JoinIR lowering (Phase 200-A) @@ -96,6 +96,7 @@ impl JoinInlineBoundaryBuilder { condition_inputs: vec![], condition_bindings: vec![], expr_result: None, + jump_args_layout: JumpArgsLayout::CarriersOnly, loop_var_name: None, carrier_info: None, // Phase 228: Initialize as None continuation_func_ids: JoinInlineBoundary::default_continuations(), @@ -194,7 +195,12 @@ impl JoinInlineBoundaryBuilder { /// Build the final JoinInlineBoundary pub fn build(self) -> JoinInlineBoundary { - self.boundary + let mut boundary = self.boundary; + boundary.jump_args_layout = JoinInlineBoundary::decide_jump_args_layout( + boundary.expr_result, + boundary.exit_bindings.as_slice(), + ); + boundary } /// Add a parameter with explicit role (Phase 200-A) diff --git a/src/mir/join_ir/lowering/split_scan_minimal.rs b/src/mir/join_ir/lowering/split_scan_minimal.rs index c86d563b..dc451f8c 100644 --- a/src/mir/join_ir/lowering/split_scan_minimal.rs +++ b/src/mir/join_ir/lowering/split_scan_minimal.rs @@ -51,17 +51,14 @@ //! start_next = Select(is_match, start_next_if, start) //! i_next = Select(is_match, i_next_if, i_next_else) //! -//! // 4. Conditional push (Phase 256 P0: Simple approach - always push, but with conditional computation) -//! // For P0, we keep the push simple and rely on Boundary to handle result management +//! // 4. Conditional push (Phase 256 P1: ConditionalMethodCall) +//! // Push the matched segment only when is_match is true //! //! // 5. Tail recursion //! Call(loop_step, [s, sep, result, i_next, start_next]) //! //! fn k_exit(result, start, s): -//! // Post-loop: push final segment -//! len = s.length() -//! tail = s.substring(start, len) -//! result.push(tail) +//! // Post-loop tail push stays in host AST; JoinIR exit is a pure return. //! return result //! ``` //! @@ -73,7 +70,7 @@ //! - 3 invariants: s, sep, result (managed via loop_invariants) //! - substring and push are BoxCall operations //! - Select for conditional step (safer than Branch for P0) -//! - Post-loop segment push in k_exit +//! - Post-loop segment push stays in host AST (k_exit is a pure return) use crate::mir::join_ir::lowering::join_value_space::JoinValueSpace; use crate::mir::join_ir::{ @@ -90,7 +87,7 @@ use crate::mir::join_ir::{ /// /// - **main()**: Entry point, calls loop_step /// - **loop_step(s, sep, result, i, start)**: Loop body with conditional step -/// - **k_exit(result, start, s)**: Post-loop segment push + return +/// - **k_exit(result, start, s)**: Pure return (post-loop push stays in host AST) /// /// ## Design Philosophy /// @@ -144,6 +141,8 @@ pub(crate) fn lower_split_scan_minimal( let i_plus_sep = join_value_space.alloc_local(); // i + sep_len let window = join_value_space.alloc_local(); // s.substring(i, i_plus_sep) let is_match = join_value_space.alloc_local(); // window == sep + let segment = join_value_space.alloc_local(); // s.substring(start, i) + let result_next = join_value_space.alloc_local(); // updated result (conditional push) let start_next_if = join_value_space.alloc_local(); // i_plus_sep (match case) let i_next_if = join_value_space.alloc_local(); // start_next_if (match case) let i_next_else = join_value_space.alloc_local(); // i + 1 (no-match case) @@ -154,8 +153,6 @@ pub(crate) fn lower_split_scan_minimal( let result_exit_param = join_value_space.alloc_param(); // accumulator let start_exit_param = join_value_space.alloc_param(); // segment start let s_exit_param = join_value_space.alloc_param(); // haystack - let s_len = join_value_space.alloc_local(); // s.length() - let tail = join_value_space.alloc_local(); // final segment // ================================================================== // main() function @@ -261,7 +258,26 @@ pub(crate) fn lower_split_scan_minimal( rhs: sep_step_param, })); - // 7. Match case variable computation: start_next = i_plus_sep, i_next = start_next + // 7. Compute segment for conditional push: s.substring(start, i) + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::BoxCall { + dst: Some(segment), + box_name: "StringBox".to_string(), + method: "substring".to_string(), + args: vec![s_step_param, start_step_param, i_step_param], + })); + + // 8. Conditional push when separator matches + loop_step_func.body.push(JoinInst::ConditionalMethodCall { + cond: is_match, + dst: result_next, + receiver: result_step_param, + method: "push".to_string(), + args: vec![segment], + }); + + // 9. Match case variable computation: start_next = i_plus_sep, i_next = start_next loop_step_func .body .push(JoinInst::Compute(MirLikeInst::Const { @@ -290,7 +306,7 @@ pub(crate) fn lower_split_scan_minimal( value: ConstValue::Integer(1), })); - // 8. No-match case: i_next_else = i + 1 + // 10. No-match case: i_next_else = i + 1 loop_step_func .body .push(JoinInst::Compute(MirLikeInst::BinOp { @@ -300,7 +316,7 @@ pub(crate) fn lower_split_scan_minimal( rhs: const_1, })); - // 9. Select for start_next: Select(is_match, i_plus_sep, start) + // 11. Select for start_next: Select(is_match, i_plus_sep, start) loop_step_func .body .push(JoinInst::Select { @@ -311,7 +327,7 @@ pub(crate) fn lower_split_scan_minimal( type_hint: None, }); - // 10. Select for i_next: Select(is_match, i_plus_sep, i + 1) + // 12. Select for i_next: Select(is_match, i_plus_sep, i + 1) loop_step_func .body .push(JoinInst::Select { @@ -322,10 +338,10 @@ pub(crate) fn lower_split_scan_minimal( type_hint: None, }); - // 11. Tail recursion: Call(loop_step, [i_next, start_next, result, s, sep]) - Carriers-First! + // 13. Tail recursion: Call(loop_step, [i_next, start_next, result, s, sep]) - Carriers-First! loop_step_func.body.push(JoinInst::Call { func: loop_step_id, - args: vec![i_next, start_next, result_step_param, s_step_param, sep_step_param], + args: vec![i_next, start_next, result_next, s_step_param, sep_step_param], k_next: None, dst: None, }); @@ -344,38 +360,8 @@ pub(crate) fn lower_split_scan_minimal( vec![i_exit_param, start_exit_param, result_exit_param, s_exit_param], ); - // 1. s_len = s.length() - k_exit_func - .body - .push(JoinInst::Compute(MirLikeInst::BoxCall { - dst: Some(s_len), - box_name: "StringBox".to_string(), - method: "length".to_string(), - args: vec![s_exit_param], - })); - - // 2. tail = s.substring(start, s_len) - k_exit_func - .body - .push(JoinInst::Compute(MirLikeInst::BoxCall { - dst: Some(tail), - box_name: "StringBox".to_string(), - method: "substring".to_string(), - args: vec![s_exit_param, start_exit_param, s_len], - })); - - // 3. result.push(tail) - BoxCall with side effect - k_exit_func - .body - .push(JoinInst::Compute(MirLikeInst::BoxCall { - dst: None, - box_name: "ArrayBox".to_string(), // Assuming result is ArrayBox - method: "push".to_string(), - args: vec![result_exit_param, tail], - })); - - // 4. Return result (main return value) - // Phase 256 P0: Also return start and i for carrier PHI setup + // Return result (main return value). + // Post-loop tail push stays in host AST (avoid double-push). k_exit_func.body.push(JoinInst::Ret { value: Some(result_exit_param), }); @@ -437,7 +423,7 @@ mod tests { } #[test] - fn test_k_exit_has_push_box_call() { + fn test_k_exit_is_pure_return() { let mut join_value_space = JoinValueSpace::new(); let join_module = lower_split_scan_minimal(&mut join_value_space); @@ -447,15 +433,31 @@ mod tests { .get(&JoinFuncId::new(2)) .expect("k_exit function should exist"); - // BoxCall(push) が含まれることを確認 - let has_push = k_exit.body.iter().any(|inst| { + assert_eq!(k_exit.body.len(), 1); + assert!(matches!(k_exit.body[0], JoinInst::Ret { .. })); + } + + #[test] + fn test_loop_step_has_conditional_push() { + let mut join_value_space = JoinValueSpace::new(); + + let join_module = lower_split_scan_minimal(&mut join_value_space); + + let loop_step = join_module + .functions + .get(&JoinFuncId::new(1)) + .expect("loop_step function should exist"); + + let has_conditional_push = loop_step.body.iter().any(|inst| { matches!( inst, - JoinInst::Compute(MirLikeInst::BoxCall { method, .. }) - if method == "push" + JoinInst::ConditionalMethodCall { method, .. } if method == "push" ) }); - assert!(has_push, "k_exit should contain push BoxCall"); + assert!( + has_conditional_push, + "loop_step should contain ConditionalMethodCall push" + ); } } diff --git a/src/mir/join_ir_vm_bridge/joinir_block_converter.rs b/src/mir/join_ir_vm_bridge/joinir_block_converter.rs index 2cec3283..dd35de4b 100644 --- a/src/mir/join_ir_vm_bridge/joinir_block_converter.rs +++ b/src/mir/join_ir_vm_bridge/joinir_block_converter.rs @@ -276,15 +276,18 @@ impl JoinIrBlockConverter { branch_terminator, ); + let then_value = mir_func.next_value_id(); + let else_value = mir_func.next_value_id(); + // then block: method call let mut then_block_obj = crate::mir::BasicBlock::new(then_block); then_block_obj.instructions.push(MirInstruction::BoxCall { - dst: Some(*dst), + dst: Some(then_value), box_val: *receiver, method: method.to_string(), method_id: None, args: args.to_vec(), - effects: EffectMask::PURE, + effects: EffectMask::WRITE, }); then_block_obj.instruction_spans.push(Span::unknown()); then_block_obj.terminator = Some(MirInstruction::Jump { @@ -295,7 +298,7 @@ impl JoinIrBlockConverter { // else block: copy receiver let mut else_block_obj = crate::mir::BasicBlock::new(else_block); else_block_obj.instructions.push(MirInstruction::Copy { - dst: *dst, + dst: else_value, src: *receiver, }); else_block_obj.instruction_spans.push(Span::unknown()); @@ -304,8 +307,14 @@ impl JoinIrBlockConverter { }); mir_func.blocks.insert(else_block, else_block_obj); - // merge block - let merge_block_obj = crate::mir::BasicBlock::new(merge_block); + // merge block: phi for dst + let mut merge_block_obj = crate::mir::BasicBlock::new(merge_block); + merge_block_obj.instructions.push(MirInstruction::Phi { + dst: *dst, + inputs: vec![(then_block, then_value), (else_block, else_value)], + type_hint: None, + }); + merge_block_obj.instruction_spans.push(Span::unknown()); mir_func.blocks.insert(merge_block, merge_block_obj); self.current_block_id = merge_block; diff --git a/src/mir/passes/dce.rs b/src/mir/passes/dce.rs index 0125e4e9..96c3dd87 100644 --- a/src/mir/passes/dce.rs +++ b/src/mir/passes/dce.rs @@ -22,7 +22,8 @@ fn eliminate_dead_code_in_function(function: &mut MirFunction) -> usize { // Mark values used by side-effecting instructions and terminators for (_bid, block) in &function.blocks { for instruction in &block.instructions { - if !instruction.effects().is_pure() { + let has_dst = instruction.dst_value().is_some(); + if !instruction.effects().is_pure() || !has_dst { if let Some(dst) = instruction.dst_value() { used_values.insert(dst); } @@ -99,3 +100,76 @@ fn eliminate_dead_code_in_function(function: &mut MirFunction) -> usize { } eliminated } + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::Span; + use crate::mir::{BasicBlockId, ConstValue, EffectMask, FunctionSignature, MirInstruction, MirType}; + + #[test] + fn test_dce_keeps_jump_args_values() { + let mut module = MirModule::new("dce_test".to_string()); + + let sig = FunctionSignature { + name: "test/0".to_string(), + params: vec![], + return_type: MirType::Void, + effects: EffectMask::PURE, + }; + let mut func = MirFunction::new(sig, BasicBlockId(0)); + + let v1 = ValueId(1); + let v2 = ValueId(2); + let v_dead = ValueId(3); + + { + let bb0 = func.blocks.get_mut(&BasicBlockId(0)).unwrap(); + bb0.instructions.push(MirInstruction::Const { + dst: v1, + value: ConstValue::Integer(123), + }); + bb0.instruction_spans.push(Span::unknown()); + + bb0.instructions.push(MirInstruction::Copy { dst: v2, src: v1 }); + bb0.instruction_spans.push(Span::unknown()); + + // This pure instruction should be eliminated. + bb0.instructions.push(MirInstruction::Const { + dst: v_dead, + value: ConstValue::Integer(999), + }); + bb0.instruction_spans.push(Span::unknown()); + + // SSOT: jump_args is a semantic use-site (ExitLine, continuation args). + bb0.jump_args = Some(vec![v2]); + } + + module.add_function(func); + + let eliminated = eliminate_dead_code(&mut module); + assert_eq!(eliminated, 1); + + let func = module.get_function("test/0").unwrap(); + let bb0 = func.blocks.get(&BasicBlockId(0)).unwrap(); + + // Contract: spans must stay aligned with instructions. + assert_eq!(bb0.instructions.len(), bb0.instruction_spans.len()); + + // Contract: values that appear only in jump_args must be kept (and their deps). + assert!(bb0 + .instructions + .iter() + .any(|inst| matches!(inst, MirInstruction::Copy { dst, .. } if *dst == v2))); + assert!(bb0 + .instructions + .iter() + .any(|inst| matches!(inst, MirInstruction::Const { dst, .. } if *dst == v1))); + + // The unused const must be eliminated. + assert!(!bb0 + .instructions + .iter() + .any(|inst| matches!(inst, MirInstruction::Const { dst, .. } if *dst == v_dead))); + } +} diff --git a/tools/smokes/v2/profiles/integration/apps/phase254_p0_index_of_vm.sh b/tools/smokes/v2/profiles/integration/apps/phase254_p0_index_of_vm.sh index f7a543e0..2970e6dd 100644 --- a/tools/smokes/v2/profiles/integration/apps/phase254_p0_index_of_vm.sh +++ b/tools/smokes/v2/profiles/integration/apps/phase254_p0_index_of_vm.sh @@ -2,13 +2,16 @@ # Phase 254 P0: index_of 形 loop (VM backend) set -euo pipefail +HAKORUNE_BIN="${HAKORUNE_BIN:-./target/release/hakorune}" HAKO_PATH="apps/tests/phase254_p0_index_of_min.hako" # Test: "abc".index_of("b") → 1 EXPECTED_EXIT=1 +set +e $HAKORUNE_BIN --backend vm "$HAKO_PATH" actual_exit=$? +set -e if [[ $actual_exit -eq $EXPECTED_EXIT ]]; then echo "✅ phase254_p0_index_of_vm: PASS (exit=$actual_exit)" diff --git a/tools/smokes/v2/profiles/integration/apps/phase256_p0_split_vm.sh b/tools/smokes/v2/profiles/integration/apps/phase256_p0_split_vm.sh index 65df2406..1a9bf09c 100644 --- a/tools/smokes/v2/profiles/integration/apps/phase256_p0_split_vm.sh +++ b/tools/smokes/v2/profiles/integration/apps/phase256_p0_split_vm.sh @@ -5,8 +5,10 @@ HAKORUNE_BIN="${HAKORUNE_BIN:-./target/release/hakorune}" HAKO_PATH="apps/tests/phase256_p0_split_min.hako" EXPECTED_EXIT=3 +set +e $HAKORUNE_BIN --backend vm "$HAKO_PATH" >/dev/null 2>&1 actual_exit=$? +set -e if [[ $actual_exit -eq $EXPECTED_EXIT ]]; then echo "✅ phase256_p0_split_vm: PASS (exit=$actual_exit)"