From 7ab042ca919e2569ad575c72e4d99beb67634795 Mon Sep 17 00:00:00 2001 From: tomoaki Date: Sat, 27 Dec 2025 21:43:37 +0900 Subject: [PATCH] refactor: split large modules into submodules --- .../edgecfg/api/compose/cleanup.rs | 135 +++ .../control_flow/edgecfg/api/compose/if_.rs | 139 +++ .../control_flow/edgecfg/api/compose/loop_.rs | 101 ++ .../api/{compose.rs => compose/mod.rs} | 477 +------- .../control_flow/edgecfg/api/compose/seq.rs | 88 ++ .../control_flow/joinir/merge/config.rs | 44 + .../control_flow/joinir/merge/coordinator.rs | 950 +++++++++++++++ .../builder/control_flow/joinir/merge/mod.rs | 1019 +--------------- src/mir/control_tree/step_tree.rs | 999 ---------------- src/mir/control_tree/step_tree/builder.rs | 218 ++++ .../control_tree/step_tree/fact_extractor.rs | 182 +++ src/mir/control_tree/step_tree/format.rs | 99 ++ src/mir/control_tree/step_tree/mod.rs | 14 + src/mir/control_tree/step_tree/signature.rs | 39 + src/mir/control_tree/step_tree/summary.rs | 76 ++ src/mir/control_tree/step_tree/tests.rs | 237 ++++ src/mir/control_tree/step_tree/types.rs | 158 +++ .../lowering/carrier_update_emitter.rs | 1015 ---------------- .../conditional_step.rs | 97 ++ .../lowering/carrier_update_emitter/legacy.rs | 176 +++ .../lowering/carrier_update_emitter/mod.rs | 20 + .../lowering/carrier_update_emitter/tests.rs | 556 +++++++++ .../carrier_update_emitter/with_env.rs | 183 +++ .../function_scope_capture/analyzers.rs | 1018 ---------------- .../function_scope_capture/analyzers/mod.rs | 10 + .../function_scope_capture/analyzers/tests.rs | 653 ++++++++++ .../function_scope_capture/analyzers/v1.rs | 172 +++ .../function_scope_capture/analyzers/v2.rs | 193 +++ src/runner/mir_json_emit.rs | 1065 ----------------- src/runner/mir_json_emit/emitters/basic.rs | 154 +++ src/runner/mir_json_emit/emitters/calls.rs | 156 +++ .../mir_json_emit/emitters/control_flow.rs | 33 + src/runner/mir_json_emit/emitters/mod.rs | 124 ++ src/runner/mir_json_emit/emitters/phi.rs | 33 + src/runner/mir_json_emit/emitters/weak.rs | 37 + src/runner/mir_json_emit/helpers.rs | 172 +++ src/runner/mir_json_emit/mod.rs | 132 ++ .../modes/common_util/resolve/strip/merge.rs | 250 ++++ .../modes/common_util/resolve/strip/mod.rs | 9 + .../common_util/resolve/strip/preexpand.rs | 66 + .../common_util/resolve/strip/prelude.rs | 270 +++++ .../resolve/{strip.rs => strip/using.rs} | 643 +--------- 42 files changed, 6067 insertions(+), 6145 deletions(-) create mode 100644 src/mir/builder/control_flow/edgecfg/api/compose/cleanup.rs create mode 100644 src/mir/builder/control_flow/edgecfg/api/compose/if_.rs create mode 100644 src/mir/builder/control_flow/edgecfg/api/compose/loop_.rs rename src/mir/builder/control_flow/edgecfg/api/{compose.rs => compose/mod.rs} (58%) create mode 100644 src/mir/builder/control_flow/edgecfg/api/compose/seq.rs create mode 100644 src/mir/builder/control_flow/joinir/merge/config.rs create mode 100644 src/mir/builder/control_flow/joinir/merge/coordinator.rs delete mode 100644 src/mir/control_tree/step_tree.rs create mode 100644 src/mir/control_tree/step_tree/builder.rs create mode 100644 src/mir/control_tree/step_tree/fact_extractor.rs create mode 100644 src/mir/control_tree/step_tree/format.rs create mode 100644 src/mir/control_tree/step_tree/mod.rs create mode 100644 src/mir/control_tree/step_tree/signature.rs create mode 100644 src/mir/control_tree/step_tree/summary.rs create mode 100644 src/mir/control_tree/step_tree/tests.rs create mode 100644 src/mir/control_tree/step_tree/types.rs delete mode 100644 src/mir/join_ir/lowering/carrier_update_emitter.rs create mode 100644 src/mir/join_ir/lowering/carrier_update_emitter/conditional_step.rs create mode 100644 src/mir/join_ir/lowering/carrier_update_emitter/legacy.rs create mode 100644 src/mir/join_ir/lowering/carrier_update_emitter/mod.rs create mode 100644 src/mir/join_ir/lowering/carrier_update_emitter/tests.rs create mode 100644 src/mir/join_ir/lowering/carrier_update_emitter/with_env.rs delete mode 100644 src/mir/loop_pattern_detection/function_scope_capture/analyzers.rs create mode 100644 src/mir/loop_pattern_detection/function_scope_capture/analyzers/mod.rs create mode 100644 src/mir/loop_pattern_detection/function_scope_capture/analyzers/tests.rs create mode 100644 src/mir/loop_pattern_detection/function_scope_capture/analyzers/v1.rs create mode 100644 src/mir/loop_pattern_detection/function_scope_capture/analyzers/v2.rs delete mode 100644 src/runner/mir_json_emit.rs create mode 100644 src/runner/mir_json_emit/emitters/basic.rs create mode 100644 src/runner/mir_json_emit/emitters/calls.rs create mode 100644 src/runner/mir_json_emit/emitters/control_flow.rs create mode 100644 src/runner/mir_json_emit/emitters/mod.rs create mode 100644 src/runner/mir_json_emit/emitters/phi.rs create mode 100644 src/runner/mir_json_emit/emitters/weak.rs create mode 100644 src/runner/mir_json_emit/helpers.rs create mode 100644 src/runner/mir_json_emit/mod.rs create mode 100644 src/runner/modes/common_util/resolve/strip/merge.rs create mode 100644 src/runner/modes/common_util/resolve/strip/mod.rs create mode 100644 src/runner/modes/common_util/resolve/strip/preexpand.rs create mode 100644 src/runner/modes/common_util/resolve/strip/prelude.rs rename src/runner/modes/common_util/resolve/{strip.rs => strip/using.rs} (52%) diff --git a/src/mir/builder/control_flow/edgecfg/api/compose/cleanup.rs b/src/mir/builder/control_flow/edgecfg/api/compose/cleanup.rs new file mode 100644 index 00000000..20635e52 --- /dev/null +++ b/src/mir/builder/control_flow/edgecfg/api/compose/cleanup.rs @@ -0,0 +1,135 @@ +use std::collections::BTreeMap; + +use crate::mir::basic_block::BasicBlockId; +use crate::mir::builder::control_flow::edgecfg::api::exit_kind::ExitKind; +use crate::mir::builder::control_flow::edgecfg::api::frag::Frag; + +/// Phase 281 P3: cleanup() Normal + Return exit wiring implementation +/// +/// Wires cleanup Normal/Return exits to specified targets or propagates them upward. +/// +/// # Contract (P3 Implementation) +/// +/// **Input**: +/// - `main`: Main control flow (loop structure Frag) +/// - `cleanup_frag`: Exit handler (Normal/Return exits only, no wires/branches) +/// - `normal_target`: Where to wire Normal exits +/// - `Some(bb)`: Wire Normal → bb (internal closure, target = Some) +/// - `None`: Propagate Normal → wires (upward propagation, target = None) +/// - `ret_target`: Where to wire Return exits +/// - `Some(bb)`: Wire Return → bb (internal closure, target = Some) +/// - `None`: Propagate Return → wires (upward propagation, target = None) +/// +/// **Output**: +/// - Frag with main's structure + cleanup's exits wired/propagated +/// +/// **Invariants**: +/// - 1 block = 1 terminator (no duplicate BranchStubs) +/// - cleanup_frag must have empty wires/branches (Fail-Fast if not) +/// - cleanup_frag.exits must contain only Normal/Return (Fail-Fast for other kinds) +/// - normal_target=Some: Normal exits → wires (internal) +/// - normal_target=None: Normal exits → wires (target=None, propagate upward) +/// - ret_target=Some: Return exits → wires (internal) +/// - ret_target=None: Return exits → wires (target=None, propagate upward) +/// +/// # Implementation Status +/// +/// P3: Normal + Return wiring logic implemented +/// Future: Break/Continue/Unwind support (P4+) +/// +/// # Migration Notes (Phase 264 → Phase 281) +/// +/// Old signature (Phase 264): `cleanup(body: Frag, cleanup_block: BasicBlockId) -> Frag` +/// Phase 281 P1: `cleanup(main: Frag, cleanup: Frag) -> Result` +/// Phase 281 P2: `cleanup(main: Frag, cleanup_frag: Frag, ret_target: Option) -> Result` +/// Phase 281 P3: `cleanup(main: Frag, cleanup_frag: Frag, normal_target: Option, ret_target: Option) -> Result` +/// +/// Rationale: Pattern6/7 require flexible exit wiring for Normal/Return exits. +/// cleanup_frag must be "exit-only" to prevent terminator confusion. +pub(crate) fn cleanup( + main: Frag, + cleanup_frag: Frag, + normal_target: Option, + ret_target: Option, +) -> Result { + // Phase 281 P3: Normal + Return exit wiring implementation + // - Supported: Normal, Return exits + // - Unsupported: Break, Continue, Unwind (Fail-Fast) + + let mut exits = BTreeMap::new(); + let mut wires = Vec::new(); + let mut branches = Vec::new(); + + // Validate cleanup_frag structure (only exits allowed, no wires/branches) + if !cleanup_frag.wires.is_empty() || !cleanup_frag.branches.is_empty() { + return Err(format!( + "compose::cleanup() Phase 281 P3: cleanup_frag must have empty wires/branches (only exits allowed), found {} wires, {} branches", + cleanup_frag.wires.len(), + cleanup_frag.branches.len() + )); + } + + // Validate cleanup_frag exits (only Normal + Return allowed in P3) + for (kind, _) in &cleanup_frag.exits { + match kind { + ExitKind::Normal | ExitKind::Return => {} // OK + _ => { + return Err(format!( + "compose::cleanup() Phase 281 P3: unsupported exit kind {:?} in cleanup_frag (only Normal/Return allowed)", + kind + )); + } + } + } + + // Process cleanup Normal exits + if let Some(normal_stubs) = cleanup_frag.exits.get(&ExitKind::Normal) { + for mut stub in normal_stubs.clone() { + match normal_target { + Some(target_bb) => { + // Wire: Normal → target_bb (internal closure) + stub.target = Some(target_bb); + wires.push(stub); + } + None => { + // Propagate: Normal → wires (target=None, upward propagation) + stub.target = None; + wires.push(stub); + } + } + } + } + + // Process cleanup Return exits + if let Some(return_stubs) = cleanup_frag.exits.get(&ExitKind::Return) { + for mut stub in return_stubs.clone() { + match ret_target { + Some(target_bb) => { + // Wire: Return → target_bb (internal closure) + stub.target = Some(target_bb); + wires.push(stub); + } + None => { + // Propagate: Return → wires (target=None, will be emitted as Return terminator) + // Note: Return exits can have target=None in wires (Phase 267 special case) + stub.target = None; + wires.push(stub); + } + } + } + } + + // Preserve main's exits/wires/branches + for (kind, stubs) in main.exits { + exits.entry(kind).or_insert_with(Vec::new).extend(stubs); + } + wires.extend(main.wires); + branches.extend(main.branches); + + Ok(Frag { + entry: main.entry, // Entry = main entry (header_bb) + exits, + wires, + branches, + }) +} diff --git a/src/mir/builder/control_flow/edgecfg/api/compose/if_.rs b/src/mir/builder/control_flow/edgecfg/api/compose/if_.rs new file mode 100644 index 00000000..79077d5c --- /dev/null +++ b/src/mir/builder/control_flow/edgecfg/api/compose/if_.rs @@ -0,0 +1,139 @@ +use std::collections::BTreeMap; + +use crate::mir::basic_block::{BasicBlockId, EdgeArgs}; +use crate::mir::builder::control_flow::edgecfg::api::branch_stub::BranchStub; +use crate::mir::builder::control_flow::edgecfg::api::edge_stub::EdgeStub; +use crate::mir::builder::control_flow::edgecfg::api::exit_kind::ExitKind; +use crate::mir::builder::control_flow::edgecfg::api::frag::Frag; +use crate::mir::value_id::ValueId; + +/// 条件分岐合成: `if (cond) { t } else { e }` +/// +/// # Phase 280: Composition SSOT +/// +/// ## Constraint (Caller Allocates) +/// +/// - **Caller allocates**: `header`, `t.entry`, `e.entry`, `join_frag.entry` (`BasicBlockId`), `cond` (`ValueId`) +/// - **Caller provides**: `then_entry_args`, `else_entry_args` (`EdgeArgs`) - Phase 268 P1 SSOT +/// - **Composition wires**: `header` → `t.entry`/`e.entry` (BranchStub), `t/e.Normal` → `join_frag.entry` +/// +/// ## Composition Law (Input → Output) +/// +/// - `header` → `t.entry`/`e.entry` (`BranchStub` → `branches`) +/// - `t/e.Normal` → `join_frag.entry` (`EdgeStub` → `wires`) +/// - Non-Normal exits → propagate upward (`exits`) +/// - Result: `if.entry = header`, `if.exits = t/e.non-Normal + join_frag.all` +/// +/// ## Invariants Preserved +/// +/// - Wires/Exits separation: BranchStub in `branches`, Normal wiring in `wires`, exits `target = None` +/// - Terminator uniqueness: 1 block = 1 terminator (header gets Branch, t/e/join get Jump/Return) +/// - Entry consistency: `if.entry` is valid `BasicBlockId` +/// +/// # Phase 267 P0: Branch 生成実装完了 +/// - header → then/else の BranchStub を branches に追加 +/// - t/e.Normal → join_frag.entry を wires に追加(内部配線) +/// - if の exits は join_frag.exits(join 以降の外へ出る exit) +/// +/// # 配線ルール +/// - header → t.entry / e.entry を BranchStub として branches に追加(Phase 267 P0) +/// - t/e.Normal の EdgeStub.target = Some(join_frag.entry) → wires +/// - if の exits = t/e の非 Normal + join_frag.exits +/// - if の wires = t/e.Normal → join + t/e/join の wires +/// - if の branches = header の BranchStub + t/e/join の branches +/// +/// # 引数 +/// - `header`: 条件判定を行うブロック +/// - `cond`: 条件値(Phase 267 P0 で使用開始) +/// - `t`: then 分岐の断片 +/// - `e`: else 分岐の断片 +/// - `join_frag`: join 以降の断片(t/e.Normal の配線先 + join 以降の処理) +pub(crate) fn if_( + header: BasicBlockId, + cond: ValueId, // Phase 267 P0 で使用開始 + t: Frag, // then 分岐 + then_entry_args: EdgeArgs, // Phase 268 P1: then entry edge-args (SSOT) + e: Frag, // else 分岐 + else_entry_args: EdgeArgs, // Phase 268 P1: else entry edge-args (SSOT) + join_frag: Frag, // join 以降の断片 +) -> Frag { + // Phase 267 P0: header → then/else の BranchStub を作成 + let branch = BranchStub { + from: header, + cond, + then_target: t.entry, + then_args: then_entry_args, // Phase 268 P1: caller provides + else_target: e.entry, + else_args: else_entry_args, // Phase 268 P1: caller provides + }; + + let mut exits = BTreeMap::new(); + let mut wires = Vec::new(); + + // then の全 exit を処理 + for (kind, stubs) in t.exits { + match kind { + ExitKind::Normal => { + // t.Normal → join_frag.entry への配線を wires に追加 + let wired_stubs: Vec = stubs + .into_iter() + .map(|mut stub| { + stub.target = Some(join_frag.entry); + stub + }) + .collect(); + wires.extend(wired_stubs); + } + // Return, Unwind, Break, Continue は上位へ伝搬 + _ => { + exits.entry(kind).or_insert_with(Vec::new).extend(stubs); + } + } + } + + // then の wires をマージ + wires.extend(t.wires); + + // else の全 exit を処理(then と同じロジック) + for (kind, stubs) in e.exits { + match kind { + ExitKind::Normal => { + let wired_stubs: Vec = stubs + .into_iter() + .map(|mut stub| { + stub.target = Some(join_frag.entry); + stub + }) + .collect(); + wires.extend(wired_stubs); + } + _ => { + exits.entry(kind).or_insert_with(Vec::new).extend(stubs); + } + } + } + + // else の wires をマージ + wires.extend(e.wires); + + // join_frag の exits が if 全体の Normal exit になる + for (kind, stubs) in join_frag.exits { + exits.entry(kind).or_insert_with(Vec::new).extend(stubs); + } + + // join_frag の wires もマージ + wires.extend(join_frag.wires); + + // Phase 267 P0: branches を統合 + let mut branches = vec![branch]; + branches.extend(t.branches); + branches.extend(e.branches); + branches.extend(join_frag.branches); + + Frag { + entry: header, // if の入口は header + exits, // t/e の非 Normal + join_frag.exits + wires, // t/e.Normal → join_frag.entry + t/e/join の wires + branches, // Phase 267 P0: header の BranchStub + t/e/join の branches + } +} diff --git a/src/mir/builder/control_flow/edgecfg/api/compose/loop_.rs b/src/mir/builder/control_flow/edgecfg/api/compose/loop_.rs new file mode 100644 index 00000000..5b19b99f --- /dev/null +++ b/src/mir/builder/control_flow/edgecfg/api/compose/loop_.rs @@ -0,0 +1,101 @@ +use std::collections::BTreeMap; + +use crate::mir::basic_block::BasicBlockId; +use crate::mir::builder::control_flow::edgecfg::api::edge_stub::EdgeStub; +use crate::mir::builder::control_flow::edgecfg::api::exit_kind::ExitKind; +use crate::mir::builder::control_flow::edgecfg::api::frag::Frag; +use crate::mir::control_form::LoopId; + +/// ループ合成: `loop (cond) { body }` +/// +/// # Phase 280: Composition SSOT +/// +/// ## Constraint (Caller Allocates) +/// +/// - **Caller allocates**: `loop_id` (`LoopId`), `header`, `after` (`BasicBlockId`) +/// - **Composition wires**: `Continue(loop_id)` → `header`, `Break(loop_id)` → `after` +/// +/// ## Composition Law (Input → Output) +/// +/// - `Continue(loop_id)` → `header` (`EdgeStub` → `wires`) +/// - `Break(loop_id)` → `after` (`EdgeStub` → `wires`) +/// - Normal/Return/Unwind → propagate upward (`exits`) +/// - Result: `loop.entry = header`, `loop.exits = Normal/Return/Unwind only` (no Break/Continue) +/// +/// ## Invariants Preserved +/// +/// - Wires/Exits separation: Continue/Break have `target = Some`, other exits `target = None` +/// - Terminator uniqueness: 1 block = 1 terminator (from-grouping in emit_frag) +/// - Entry consistency: `loop.entry` is valid `BasicBlockId` +/// +/// # Phase 265 P2: wires/exits 分離実装完了 +/// - Continue(loop_id) → header へ配線(wires へ) +/// - Break(loop_id) → after へ配線(wires へ) +/// - Normal/Return/Unwind は target = None のまま上位へ伝搬(exits へ) +/// +/// # 配線ルール +/// - Continue(loop_id) の EdgeStub.target = Some(header) → wires +/// - Break(loop_id) の EdgeStub.target = Some(after) → wires +/// - その他の ExitKind は target = None(exits へ) +/// +/// # 引数 +/// - `loop_id`: ループ識別子(配線対象の Break/Continue 判定に使用) +/// - `header`: ループヘッダー(Continue の配線先) +/// - `after`: ループ後のブロック(Break の配線先) +/// - `body`: ループ本体の断片 +pub(crate) fn loop_( + loop_id: LoopId, + header: BasicBlockId, + after: BasicBlockId, + body: Frag, +) -> Frag { + // Phase 265 P2: exit 集合の配線処理(wires/exits 分離) + let mut exits = BTreeMap::new(); + let mut wires = Vec::new(); // Phase 265 P2: 配線済み内部配線 + + for (kind, stubs) in body.exits { + match kind { + ExitKind::Continue(lid) if lid == loop_id => { + // Continue → header へ配線(wires に追加) + let wired: Vec = stubs + .into_iter() + .map(|mut stub| { + stub.target = Some(header); + stub + }) + .collect(); + wires.extend(wired); + // exits には入れない(内部配線) + } + ExitKind::Break(lid) if lid == loop_id => { + // Break → after へ配線(wires に追加) + let wired: Vec = stubs + .into_iter() + .map(|mut stub| { + stub.target = Some(after); + stub + }) + .collect(); + wires.extend(wired); + // exits には入れない(内部配線) + } + // Normal, Return, Unwind は上位へ伝搬(exits に追加) + _ => { + exits.insert(kind, stubs); + } + } + } + + // body の wires もマージ + wires.extend(body.wires); + + // Phase 267 P0: body の branches もマージ + let branches = body.branches; + + Frag { + entry: header, // ループの入口 + exits, // Normal, Return, Unwind のみ(未配線) + wires, // Continue → header, Break → after(配線済み) + branches, // Phase 267 P0: body の branches + } +} diff --git a/src/mir/builder/control_flow/edgecfg/api/compose.rs b/src/mir/builder/control_flow/edgecfg/api/compose/mod.rs similarity index 58% rename from src/mir/builder/control_flow/edgecfg/api/compose.rs rename to src/mir/builder/control_flow/edgecfg/api/compose/mod.rs index 8e380916..0919ad10 100644 --- a/src/mir/builder/control_flow/edgecfg/api/compose.rs +++ b/src/mir/builder/control_flow/edgecfg/api/compose/mod.rs @@ -64,460 +64,31 @@ * - Phase 280: SSOT positioning (composition as pattern absorption destination) */ -use std::collections::BTreeMap; -use crate::mir::basic_block::{BasicBlockId, EdgeArgs}; -use crate::mir::control_form::LoopId; -use crate::mir::value_id::ValueId; -use super::frag::Frag; -use super::exit_kind::ExitKind; -use super::edge_stub::EdgeStub; // Phase 265 P2: wires/exits 分離で必要 -use super::branch_stub::BranchStub; // Phase 267 P0: Branch 生成に必要 +mod cleanup; +mod if_; +mod loop_; +mod seq; -/// 順次合成: `a; b` -/// -/// # Phase 280: Composition SSOT -/// -/// ## Constraint (Caller Allocates) -/// -/// - **Caller allocates**: `b.entry` (`BasicBlockId`) -/// - **Composition wires**: `a.Normal` → `b.entry` -/// -/// ## Composition Law (Input → Output) -/// -/// - `a.Normal` exits → `wires` (target = `Some(b.entry)`) -/// - Non-Normal exits (Return/Break/Continue/Unwind) → propagate upward (`exits`) -/// - Result: `seq.entry = a.entry`, `seq.exits = a.non-Normal + b.all` -/// -/// ## Invariants Preserved -/// -/// - Wires/Exits separation: wires have `target = Some`, exits have `target = None` -/// - Terminator uniqueness: 1 block = 1 terminator (from-grouping in emit_frag) -/// - Entry consistency: `seq.entry` is valid `BasicBlockId` -/// -/// # Phase 265 P2: wires/exits 分離実装完了 -/// - a.Normal → b.entry を wires に追加(内部配線) -/// - seq の exits[Normal] は b の Normal のみ(外へ出る exit) -/// -/// # 配線ルール -/// - a.Normal の EdgeStub.target = Some(b.entry) → wires -/// - seq の exits = a の非 Normal + b の全 exits -/// - seq の wires = a.Normal → b.entry + a.wires + b.wires -/// -/// # 引数 -/// - `a`: 前段の断片 -/// - `b`: 後段の断片 -pub(crate) fn seq(a: Frag, b: Frag) -> Frag { - let mut exits = BTreeMap::new(); - let mut wires = Vec::new(); - - // a の全 exit を処理 - for (kind, stubs) in a.exits { - match kind { - ExitKind::Normal => { - // a.Normal → b.entry への配線を wires に追加 - let wired_stubs: Vec = stubs - .into_iter() - .map(|mut stub| { - stub.target = Some(b.entry); - stub - }) - .collect(); - wires.extend(wired_stubs); - // exits[Normal] には入れない(内部配線) - } - // Return, Unwind, Break, Continue は上位へ伝搬 - _ => { - exits.insert(kind, stubs); - } - } - } - - // a の wires をマージ - wires.extend(a.wires); - - // b の全 exit をマージ(b.Normal が seq の Normal exit になる) - for (kind, stubs) in b.exits { - exits.entry(kind).or_insert_with(Vec::new).extend(stubs); - } - - // b の wires もマージ - wires.extend(b.wires); - - // Phase 267 P0: branches もマージ - let mut branches = Vec::new(); - branches.extend(a.branches); - branches.extend(b.branches); - - Frag { - entry: a.entry, // seq の入口は a の入口 - exits, // a の非 Normal + b の全 exit - wires, // a.Normal → b.entry + a.wires + b.wires - branches, // Phase 267 P0: a.branches + b.branches - } -} - -/// 条件分岐合成: `if (cond) { t } else { e }` -/// -/// # Phase 280: Composition SSOT -/// -/// ## Constraint (Caller Allocates) -/// -/// - **Caller allocates**: `header`, `t.entry`, `e.entry`, `join_frag.entry` (`BasicBlockId`), `cond` (`ValueId`) -/// - **Caller provides**: `then_entry_args`, `else_entry_args` (`EdgeArgs`) - Phase 268 P1 SSOT -/// - **Composition wires**: `header` → `t.entry`/`e.entry` (BranchStub), `t/e.Normal` → `join_frag.entry` -/// -/// ## Composition Law (Input → Output) -/// -/// - `header` → `t.entry`/`e.entry` (`BranchStub` → `branches`) -/// - `t/e.Normal` → `join_frag.entry` (`EdgeStub` → `wires`) -/// - Non-Normal exits → propagate upward (`exits`) -/// - Result: `if.entry = header`, `if.exits = t/e.non-Normal + join_frag.all` -/// -/// ## Invariants Preserved -/// -/// - Wires/Exits separation: BranchStub in `branches`, Normal wiring in `wires`, exits `target = None` -/// - Terminator uniqueness: 1 block = 1 terminator (header gets Branch, t/e/join get Jump/Return) -/// - Entry consistency: `if.entry` is valid `BasicBlockId` -/// -/// # Phase 267 P0: Branch 生成実装完了 -/// - header → then/else の BranchStub を branches に追加 -/// - t/e.Normal → join_frag.entry を wires に追加(内部配線) -/// - if の exits は join_frag.exits(join 以降の外へ出る exit) -/// -/// # 配線ルール -/// - header → t.entry / e.entry を BranchStub として branches に追加(Phase 267 P0) -/// - t/e.Normal の EdgeStub.target = Some(join_frag.entry) → wires -/// - if の exits = t/e の非 Normal + join_frag.exits -/// - if の wires = t/e.Normal → join + t/e/join の wires -/// - if の branches = header の BranchStub + t/e/join の branches -/// -/// # 引数 -/// - `header`: 条件判定を行うブロック -/// - `cond`: 条件値(Phase 267 P0 で使用開始) -/// - `t`: then 分岐の断片 -/// - `e`: else 分岐の断片 -/// - `join_frag`: join 以降の断片(t/e.Normal の配線先 + join 以降の処理) -pub(crate) fn if_( - header: BasicBlockId, - cond: ValueId, // Phase 267 P0 で使用開始 - t: Frag, // then 分岐 - then_entry_args: EdgeArgs, // Phase 268 P1: then entry edge-args (SSOT) - e: Frag, // else 分岐 - else_entry_args: EdgeArgs, // Phase 268 P1: else entry edge-args (SSOT) - join_frag: Frag, // join 以降の断片 -) -> Frag { - // Phase 267 P0: header → then/else の BranchStub を作成 - let branch = BranchStub { - from: header, - cond, - then_target: t.entry, - then_args: then_entry_args, // Phase 268 P1: caller provides - else_target: e.entry, - else_args: else_entry_args, // Phase 268 P1: caller provides - }; - - let mut exits = BTreeMap::new(); - let mut wires = Vec::new(); - - // then の全 exit を処理 - for (kind, stubs) in t.exits { - match kind { - ExitKind::Normal => { - // t.Normal → join_frag.entry への配線を wires に追加 - let wired_stubs: Vec = stubs - .into_iter() - .map(|mut stub| { - stub.target = Some(join_frag.entry); - stub - }) - .collect(); - wires.extend(wired_stubs); - } - // Return, Unwind, Break, Continue は上位へ伝搬 - _ => { - exits.entry(kind).or_insert_with(Vec::new).extend(stubs); - } - } - } - - // then の wires をマージ - wires.extend(t.wires); - - // else の全 exit を処理(then と同じロジック) - for (kind, stubs) in e.exits { - match kind { - ExitKind::Normal => { - let wired_stubs: Vec = stubs - .into_iter() - .map(|mut stub| { - stub.target = Some(join_frag.entry); - stub - }) - .collect(); - wires.extend(wired_stubs); - } - _ => { - exits.entry(kind).or_insert_with(Vec::new).extend(stubs); - } - } - } - - // else の wires をマージ - wires.extend(e.wires); - - // join_frag の exits が if 全体の Normal exit になる - for (kind, stubs) in join_frag.exits { - exits.entry(kind).or_insert_with(Vec::new).extend(stubs); - } - - // join_frag の wires もマージ - wires.extend(join_frag.wires); - - // Phase 267 P0: branches を統合 - let mut branches = vec![branch]; - branches.extend(t.branches); - branches.extend(e.branches); - branches.extend(join_frag.branches); - - Frag { - entry: header, // if の入口は header - exits, // t/e の非 Normal + join_frag.exits - wires, // t/e.Normal → join_frag.entry + t/e/join の wires - branches, // Phase 267 P0: header の BranchStub + t/e/join の branches - } -} - -/// ループ合成: `loop (cond) { body }` -/// -/// # Phase 280: Composition SSOT -/// -/// ## Constraint (Caller Allocates) -/// -/// - **Caller allocates**: `loop_id` (`LoopId`), `header`, `after` (`BasicBlockId`) -/// - **Composition wires**: `Continue(loop_id)` → `header`, `Break(loop_id)` → `after` -/// -/// ## Composition Law (Input → Output) -/// -/// - `Continue(loop_id)` → `header` (`EdgeStub` → `wires`) -/// - `Break(loop_id)` → `after` (`EdgeStub` → `wires`) -/// - Normal/Return/Unwind → propagate upward (`exits`) -/// - Result: `loop.entry = header`, `loop.exits = Normal/Return/Unwind only` (no Break/Continue) -/// -/// ## Invariants Preserved -/// -/// - Wires/Exits separation: Continue/Break have `target = Some`, other exits `target = None` -/// - Terminator uniqueness: 1 block = 1 terminator (from-grouping in emit_frag) -/// - Entry consistency: `loop.entry` is valid `BasicBlockId` -/// -/// # Phase 265 P2: wires/exits 分離実装完了 -/// - Continue(loop_id) → header へ配線(wires へ) -/// - Break(loop_id) → after へ配線(wires へ) -/// - Normal/Return/Unwind は target = None のまま上位へ伝搬(exits へ) -/// -/// # 配線ルール -/// - Continue(loop_id) の EdgeStub.target = Some(header) → wires -/// - Break(loop_id) の EdgeStub.target = Some(after) → wires -/// - その他の ExitKind は target = None(exits へ) -/// -/// # 引数 -/// - `loop_id`: ループ識別子(配線対象の Break/Continue 判定に使用) -/// - `header`: ループヘッダー(Continue の配線先) -/// - `after`: ループ後のブロック(Break の配線先) -/// - `body`: ループ本体の断片 -pub(crate) fn loop_( - loop_id: LoopId, - header: BasicBlockId, - after: BasicBlockId, - body: Frag, -) -> Frag { - // Phase 265 P2: exit 集合の配線処理(wires/exits 分離) - let mut exits = BTreeMap::new(); - let mut wires = Vec::new(); // Phase 265 P2: 配線済み内部配線 - - for (kind, stubs) in body.exits { - match kind { - ExitKind::Continue(lid) if lid == loop_id => { - // Continue → header へ配線(wires に追加) - let wired: Vec = stubs - .into_iter() - .map(|mut stub| { - stub.target = Some(header); - stub - }) - .collect(); - wires.extend(wired); - // exits には入れない(内部配線) - } - ExitKind::Break(lid) if lid == loop_id => { - // Break → after へ配線(wires に追加) - let wired: Vec = stubs - .into_iter() - .map(|mut stub| { - stub.target = Some(after); - stub - }) - .collect(); - wires.extend(wired); - // exits には入れない(内部配線) - } - // Normal, Return, Unwind は上位へ伝搬(exits に追加) - _ => { - exits.insert(kind, stubs); - } - } - } - - // body の wires もマージ - wires.extend(body.wires); - - // Phase 267 P0: body の branches もマージ - let branches = body.branches; - - Frag { - entry: header, // ループの入口 - exits, // Normal, Return, Unwind のみ(未配線) - wires, // Continue → header, Break → after(配線済み) - branches, // Phase 267 P0: body の branches - } -} - -/// Phase 281 P3: cleanup() Normal + Return exit wiring implementation -/// -/// Wires cleanup Normal/Return exits to specified targets or propagates them upward. -/// -/// # Contract (P3 Implementation) -/// -/// **Input**: -/// - `main`: Main control flow (loop structure Frag) -/// - `cleanup_frag`: Exit handler (Normal/Return exits only, no wires/branches) -/// - `normal_target`: Where to wire Normal exits -/// - `Some(bb)`: Wire Normal → bb (internal closure, target = Some) -/// - `None`: Propagate Normal → wires (upward propagation, target = None) -/// - `ret_target`: Where to wire Return exits -/// - `Some(bb)`: Wire Return → bb (internal closure, target = Some) -/// - `None`: Propagate Return → wires (upward propagation, target = None) -/// -/// **Output**: -/// - Frag with main's structure + cleanup's exits wired/propagated -/// -/// **Invariants**: -/// - 1 block = 1 terminator (no duplicate BranchStubs) -/// - cleanup_frag must have empty wires/branches (Fail-Fast if not) -/// - cleanup_frag.exits must contain only Normal/Return (Fail-Fast for other kinds) -/// - normal_target=Some: Normal exits → wires (internal) -/// - normal_target=None: Normal exits → wires (target=None, propagate upward) -/// - ret_target=Some: Return exits → wires (internal) -/// - ret_target=None: Return exits → wires (target=None, propagate upward) -/// -/// # Implementation Status -/// -/// P3: Normal + Return wiring logic implemented -/// Future: Break/Continue/Unwind support (P4+) -/// -/// # Migration Notes (Phase 264 → Phase 281) -/// -/// Old signature (Phase 264): `cleanup(body: Frag, cleanup_block: BasicBlockId) -> Frag` -/// Phase 281 P1: `cleanup(main: Frag, cleanup: Frag) -> Result` -/// Phase 281 P2: `cleanup(main: Frag, cleanup_frag: Frag, ret_target: Option) -> Result` -/// Phase 281 P3: `cleanup(main: Frag, cleanup_frag: Frag, normal_target: Option, ret_target: Option) -> Result` -/// -/// Rationale: Pattern6/7 require flexible exit wiring for Normal/Return exits. -/// cleanup_frag must be "exit-only" to prevent terminator confusion. -pub(crate) fn cleanup( - main: Frag, - cleanup_frag: Frag, - normal_target: Option, - ret_target: Option, -) -> Result { - // Phase 281 P3: Normal + Return exit wiring implementation - // - Supported: Normal, Return exits - // - Unsupported: Break, Continue, Unwind (Fail-Fast) - - let mut exits = BTreeMap::new(); - let mut wires = Vec::new(); - let mut branches = Vec::new(); - - // Validate cleanup_frag structure (only exits allowed, no wires/branches) - if !cleanup_frag.wires.is_empty() || !cleanup_frag.branches.is_empty() { - return Err(format!( - "compose::cleanup() Phase 281 P3: cleanup_frag must have empty wires/branches (only exits allowed), found {} wires, {} branches", - cleanup_frag.wires.len(), - cleanup_frag.branches.len() - )); - } - - // Validate cleanup_frag exits (only Normal + Return allowed in P3) - for (kind, _) in &cleanup_frag.exits { - match kind { - ExitKind::Normal | ExitKind::Return => {}, // OK - _ => { - return Err(format!( - "compose::cleanup() Phase 281 P3: unsupported exit kind {:?} in cleanup_frag (only Normal/Return allowed)", - kind - )); - } - } - } - - // Process cleanup Normal exits - if let Some(normal_stubs) = cleanup_frag.exits.get(&ExitKind::Normal) { - for mut stub in normal_stubs.clone() { - match normal_target { - Some(target_bb) => { - // Wire: Normal → target_bb (internal closure) - stub.target = Some(target_bb); - wires.push(stub); - } - None => { - // Propagate: Normal → wires (target=None, upward propagation) - stub.target = None; - wires.push(stub); - } - } - } - } - - // Process cleanup Return exits - if let Some(return_stubs) = cleanup_frag.exits.get(&ExitKind::Return) { - for mut stub in return_stubs.clone() { - match ret_target { - Some(target_bb) => { - // Wire: Return → target_bb (internal closure) - stub.target = Some(target_bb); - wires.push(stub); - } - None => { - // Propagate: Return → wires (target=None, will be emitted as Return terminator) - // Note: Return exits can have target=None in wires (Phase 267 special case) - stub.target = None; - wires.push(stub); - } - } - } - } - - // Preserve main's exits/wires/branches - for (kind, stubs) in main.exits { - exits.entry(kind).or_insert_with(Vec::new).extend(stubs); - } - wires.extend(main.wires); - branches.extend(main.branches); - - Ok(Frag { - entry: main.entry, // Entry = main entry (header_bb) - exits, - wires, - branches, - }) -} +#[allow(unused_imports)] +pub(crate) use cleanup::cleanup; +#[allow(unused_imports)] +pub(crate) use if_::if_; +#[allow(unused_imports)] +pub(crate) use loop_::loop_; +#[allow(unused_imports)] +pub(crate) use seq::seq; #[cfg(test)] mod tests { - use super::*; - use crate::mir::basic_block::BasicBlockId; - use super::super::exit_kind::ExitKind; - use super::super::edge_stub::EdgeStub; + use super::{cleanup, if_, loop_, seq}; + use crate::mir::basic_block::{BasicBlockId, EdgeArgs}; + use crate::mir::control_form::LoopId; use crate::mir::join_ir::lowering::inline_boundary::JumpArgsLayout; + use crate::mir::value_id::ValueId; + use crate::mir::builder::control_flow::edgecfg::api::exit_kind::ExitKind; + use crate::mir::builder::control_flow::edgecfg::api::edge_stub::EdgeStub; + use crate::mir::builder::control_flow::edgecfg::api::frag::Frag; + use std::collections::BTreeMap; #[test] fn test_loop_preserves_exits() { @@ -589,14 +160,18 @@ mod tests { assert_eq!(loop_frag.wires.len(), 2); // Break → after の wire - let break_wire = loop_frag.wires.iter() + let break_wire = loop_frag + .wires + .iter() .find(|w| w.kind == ExitKind::Break(loop_id)) .unwrap(); assert_eq!(break_wire.target, Some(after)); assert_eq!(break_wire.from, body_entry); // Continue → header の wire - let continue_wire = loop_frag.wires.iter() + let continue_wire = loop_frag + .wires + .iter() .find(|w| w.kind == ExitKind::Continue(loop_id)) .unwrap(); assert_eq!(continue_wire.target, Some(header)); diff --git a/src/mir/builder/control_flow/edgecfg/api/compose/seq.rs b/src/mir/builder/control_flow/edgecfg/api/compose/seq.rs new file mode 100644 index 00000000..fd000d2c --- /dev/null +++ b/src/mir/builder/control_flow/edgecfg/api/compose/seq.rs @@ -0,0 +1,88 @@ +use std::collections::BTreeMap; + +use crate::mir::builder::control_flow::edgecfg::api::edge_stub::EdgeStub; +use crate::mir::builder::control_flow::edgecfg::api::exit_kind::ExitKind; +use crate::mir::builder::control_flow::edgecfg::api::frag::Frag; + +/// 順次合成: `a; b` +/// +/// # Phase 280: Composition SSOT +/// +/// ## Constraint (Caller Allocates) +/// +/// - **Caller allocates**: `b.entry` (`BasicBlockId`) +/// - **Composition wires**: `a.Normal` → `b.entry` +/// +/// ## Composition Law (Input → Output) +/// +/// - `a.Normal` exits → `wires` (target = `Some(b.entry)`) +/// - Non-Normal exits (Return/Break/Continue/Unwind) → propagate upward (`exits`) +/// - Result: `seq.entry = a.entry`, `seq.exits = a.non-Normal + b.all` +/// +/// ## Invariants Preserved +/// +/// - Wires/Exits separation: wires have `target = Some`, exits have `target = None` +/// - Terminator uniqueness: 1 block = 1 terminator (from-grouping in emit_frag) +/// - Entry consistency: `seq.entry` is valid `BasicBlockId` +/// +/// # Phase 265 P2: wires/exits 分離実装完了 +/// - a.Normal → b.entry を wires に追加(内部配線) +/// - seq の exits[Normal] は b の Normal のみ(外へ出る exit) +/// +/// # 配線ルール +/// - a.Normal の EdgeStub.target = Some(b.entry) → wires +/// - seq の exits = a の非 Normal + b の全 exits +/// - seq の wires = a.Normal → b.entry + a.wires + b.wires +/// +/// # 引数 +/// - `a`: 前段の断片 +/// - `b`: 後段の断片 +pub(crate) fn seq(a: Frag, b: Frag) -> Frag { + let mut exits = BTreeMap::new(); + let mut wires = Vec::new(); + + // a の全 exit を処理 + for (kind, stubs) in a.exits { + match kind { + ExitKind::Normal => { + // a.Normal → b.entry への配線を wires に追加 + let wired_stubs: Vec = stubs + .into_iter() + .map(|mut stub| { + stub.target = Some(b.entry); + stub + }) + .collect(); + wires.extend(wired_stubs); + // exits[Normal] には入れない(内部配線) + } + // Return, Unwind, Break, Continue は上位へ伝搬 + _ => { + exits.insert(kind, stubs); + } + } + } + + // a の wires をマージ + wires.extend(a.wires); + + // b の全 exit をマージ(b.Normal が seq の Normal exit になる) + for (kind, stubs) in b.exits { + exits.entry(kind).or_insert_with(Vec::new).extend(stubs); + } + + // b の wires もマージ + wires.extend(b.wires); + + // Phase 267 P0: branches もマージ + let mut branches = Vec::new(); + branches.extend(a.branches); + branches.extend(b.branches); + + Frag { + entry: a.entry, // seq の入口は a の入口 + exits, // a の非 Normal + b の全 exit + wires, // a.Normal → b.entry + a.wires + b.wires + branches, // Phase 267 P0: a.branches + b.branches + } +} diff --git a/src/mir/builder/control_flow/joinir/merge/config.rs b/src/mir/builder/control_flow/joinir/merge/config.rs new file mode 100644 index 00000000..e29bc627 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/merge/config.rs @@ -0,0 +1,44 @@ +/// Phase 131 P1 Task 6: Merge configuration consolidation +/// +/// Consolidates all merge-related configuration into a single structure +/// to reduce parameter clutter and improve maintainability. +#[derive(Debug, Clone)] +pub struct MergeConfig { + /// Enable detailed trace logs (dev mode) + pub dev_log: bool, + /// Enable strict contract verification (fail-fast on violations) + pub strict_mode: bool, + /// Exit reconnection mode (Phi or DirectValue) + pub exit_reconnect_mode: Option, + /// Allow missing exit block in contract checks (typically exit_block_id before insertion) + pub allow_missing_exit_block: bool, +} + +impl MergeConfig { + /// Default configuration for normal operation + pub fn default() -> Self { + Self { + dev_log: crate::config::env::joinir_dev_enabled(), + strict_mode: crate::config::env::joinir_strict_enabled(), + exit_reconnect_mode: None, + allow_missing_exit_block: true, + } + } + + /// Strict configuration for development/debugging (all checks enabled) + pub fn strict() -> Self { + Self { + dev_log: true, + strict_mode: true, + exit_reconnect_mode: None, + allow_missing_exit_block: true, + } + } + + /// Configuration for specific debug session + pub fn with_debug(debug: bool) -> Self { + let mut config = Self::default(); + config.dev_log = debug || config.dev_log; + config + } +} diff --git a/src/mir/builder/control_flow/joinir/merge/coordinator.rs b/src/mir/builder/control_flow/joinir/merge/coordinator.rs new file mode 100644 index 00000000..b6b275ad --- /dev/null +++ b/src/mir/builder/control_flow/joinir/merge/coordinator.rs @@ -0,0 +1,950 @@ +use super::config::MergeConfig; +use super::trace; +use super::{LoopHeaderPhiBuilder, MergeContracts}; +use crate::mir::join_ir::lowering::error_tags; +use crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary; +use crate::mir::{MirModule, ValueId}; +use std::collections::BTreeMap; + +#[allow(unused_imports)] +use super::{ + block_allocator, block_remapper, boundary_logging, carrier_init_builder, contract_checks, + debug_assertions, entry_selector, exit_args_collector, exit_line, exit_phi_builder, + expr_result_resolver, header_phi_prebuild, instruction_rewriter, loop_header_phi_builder, + loop_header_phi_info, merge_result, phi_block_remapper, rewriter, tail_call_classifier, + tail_call_lowering_policy, value_collector, value_remapper, +}; + +/// Phase 49-3.2: Merge JoinIR-generated MIR blocks into current_function +/// +/// # Phase 189: Multi-Function MIR Merge +/// +/// This merges JoinIR-generated blocks by: +/// 1. Remapping all block IDs across ALL functions to avoid conflicts +/// 2. Remapping all value IDs across ALL functions to avoid conflicts +/// 3. Adding all blocks from all functions to current_function +/// 4. Jumping from current_block to the entry block +/// 5. Converting Return → Jump to exit block for all functions +/// +/// **Multi-Function Support** (Phase 189): +/// - Pattern 1 (Simple While) generates 3 functions: entry + loop_step + k_exit +/// - All functions are flattened into current_function with global ID remapping +/// - Single exit block receives all Return instructions from all functions +/// +/// # Phase 188-Impl-3: JoinInlineBoundary Support +/// +/// When `boundary` is provided, injects Copy instructions at the entry block +/// to connect host ValueIds to JoinIR local ValueIds: +/// +/// ```text +/// entry_block: +/// // Injected by boundary +/// ValueId(100) = Copy ValueId(4) // join_input → host_input +/// // Original JoinIR instructions follow... +/// ``` +/// +/// This enables clean separation: JoinIR uses local IDs (0,1,2...), +/// host uses its own IDs, and Copy instructions bridge the gap. +/// +/// # Returns +/// +/// Returns `Ok(Some(exit_phi_id))` if the merged JoinIR functions have return values +/// that were collected into an exit block PHI. さらに、`boundary` に +/// host_outputs が指定されている場合は、exit PHI の結果をホスト側の +/// SSA スロットへ再接続する(variable_map 内の ValueId を更新する)。 +pub(in crate::mir::builder) fn merge_joinir_mir_blocks( + builder: &mut crate::mir::builder::MirBuilder, + mir_module: &MirModule, + boundary: Option<&JoinInlineBoundary>, + debug: bool, +) -> Result, String> { + // Phase 131 Task 6: Use MergeConfig for consolidated configuration + let config = MergeConfig::with_debug(debug); + let verbose = config.dev_log; + let trace = trace::trace(); + + trace.stderr_if( + &format!( + "[cf_loop/joinir] merge_joinir_mir_blocks called with {} functions", + mir_module.functions.len() + ), + debug, + ); + + if let Some(boundary) = boundary { + if let Err(msg) = boundary.validate_jump_args_layout() { + return Err(error_tags::freeze_with_hint( + "phase256/jump_args_layout", + &msg, + "set JoinInlineBoundary.jump_args_layout via builder and avoid expr_result/carrier mismatch", + )); + } + } + + // Phase 286 P3: Validate boundary contract BEFORE merge begins + // This catches boundary construction bugs early with clear diagnostics + if let Some(boundary) = boundary { + // Enrich context with host_fn and join-side info for better error diagnostics + let host_fn = builder + .scope_ctx + .current_function + .as_ref() + .map(|f| f.signature.name.as_str()) + .unwrap_or(""); + + // Join-side info: continuation count + boundary summary + let cont_count = boundary.continuation_func_ids.len(); + let join_summary = format!( + "conts={} exits={} conds={}", + cont_count, + boundary.exit_bindings.len(), + boundary.condition_bindings.len() + ); + + let context = format!( + "merge_joinir_mir_blocks host={} join={} phase= [{}]", + host_fn, cont_count, join_summary + ); + + if let Err(msg) = contract_checks::verify_boundary_contract_at_creation(boundary, &context) { + return Err(msg); // Fail-Fast: [joinir/contract:B*] error + } + } + + // Phase 287 P0.5: Delegated to boundary_logging module + boundary_logging::log_boundary_info(boundary, &trace, verbose); + + // Phase 1: Allocate block IDs for all functions + // Phase 177-3: block_allocator now returns exit_block_id to avoid conflicts + let (mut remapper, exit_block_id) = + block_allocator::allocate_blocks(builder, mir_module, debug)?; + + // Phase 2: Collect values from all functions + let (mut used_values, value_to_func_name, function_params) = + value_collector::collect_values(mir_module, &remapper, debug)?; + + // Phase 171-fix + Phase 256.7-fix: Add condition_bindings' join_values to used_values for remapping + // UNLESS they are function params. Params should NOT be remapped (they're defined + // by boundary Copies and used directly in JoinIR body). + if let Some(boundary) = boundary { + // Build all_params set for checking (moved before condition_bindings loop) + let all_params: std::collections::HashSet = function_params + .values() + .flat_map(|params| params.iter().copied()) + .collect(); + + // Phase 283 P0 DEBUG: Log condition_bindings count + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 283 P0 DEBUG: Processing {} condition_bindings", + boundary.condition_bindings.len() + ), + debug, + ); + + for binding in &boundary.condition_bindings { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 283 P0 DEBUG: Checking binding '{}' join={:?}", + binding.name, binding.join_value + ), + debug, + ); + + if all_params.contains(&binding.join_value) { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 256.7-fix: Skipping condition binding '{}' (JoinIR {:?} is a param)", + binding.name, binding.join_value + ), + debug, + ); + } else { + // Phase 283 P0 FIX: Ensure remapper has valid mapping (Fail-Fast) + if let Some(host_id) = builder.variable_ctx.variable_map.get(&binding.name) { + // Variable exists in host context - map join_value to existing host_id + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 283 P0: ✅ Condition binding '{}' JoinIR {:?} → host {:?}", + binding.name, binding.join_value, host_id + ), + debug, + ); + remapper.set_value(binding.join_value, *host_id); + used_values.insert(binding.join_value); + } else { + // Fail-Fast: No host ValueId found → surface root cause immediately + return Err(format!( + "[merge/phase2.1] Condition variable '{}' (join={:?}) has no host ValueId in variable_map. \ + This indicates the value was not properly supplied by boundary builder or cond_env. \ + Check: (1) boundary builder supplies all condition vars, (2) cond_env correctly tracks host ValueIds.", + binding.name, binding.join_value + )); + } + } + } + + // Phase 172-3 + Phase 256 P1.10: Add exit_bindings' join_exit_values to used_values + // UNLESS they are function params. Params should NOT be remapped (they're defined + // by call site Copies and used directly in k_exit body). + // Note: all_params was already built above for condition_bindings check. + + for binding in &boundary.exit_bindings { + if all_params.contains(&binding.join_exit_value) { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 256 P1.10: Skipping exit binding '{}' (JoinIR {:?} is a param)", + binding.carrier_name, binding.join_exit_value + ), + debug, + ); + } else { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 172-3: Adding exit binding '{}' JoinIR {:?} to used_values", + binding.carrier_name, binding.join_exit_value + ), + debug, + ); + used_values.insert(binding.join_exit_value); + } + } + } + + // Phase 201-A + Phase 287 P0.4: Pre-build loop header PHIs BEFORE Phase 3 + // + // We need to allocate PHI dst ValueIds before remap_values() runs, + // to prevent conflicts where a Const instruction gets a ValueId that + // will later be used as a PHI dst, causing carrier value corruption. + let (mut loop_header_phi_info, merge_entry_block, reserved_value_ids) = + header_phi_prebuild::prebuild_header_phis( + builder, + mir_module, + boundary, + &remapper, + &function_params, + debug, + )?; + + // Phase 3: Remap ValueIds (with reserved PHI dsts protection) + // Phase 287 P0.2: Delegated to value_remapper module + value_remapper::remap_values( + builder, + &used_values, + &mut remapper, + &reserved_value_ids, + debug, + )?; + + // Phase 177-3 DEBUG: Verify remapper state after Phase 3 + trace.stderr_if("[DEBUG-177] === Remapper state after Phase 3 ===", verbose); + trace.stderr_if( + &format!("[DEBUG-177] used_values count: {}", used_values.len()), + verbose, + ); + for value_id in &used_values { + if let Some(remapped) = remapper.get_value(*value_id) { + trace.stderr_if( + &format!("[DEBUG-177] JoinIR {:?} → Host {:?}", value_id, remapped), + verbose, + ); + } else { + trace.stderr_if( + &format!("[DEBUG-177] JoinIR {:?} → NOT FOUND ❌", value_id), + verbose, + ); + } + } + + // Check condition_bindings specifically + if let Some(boundary) = boundary { + trace.stderr_if("[DEBUG-177] === Condition bindings check ===", verbose); + for binding in &boundary.condition_bindings { + let lookup_result = remapper.get_value(binding.join_value); + trace.stderr_if( + &format!( + "[DEBUG-177] '{}': JoinIR {:?} → {:?}", + binding.name, binding.join_value, lookup_result + ), + verbose, + ); + } + } + trace.stderr_if("[DEBUG-177] ==============================", verbose); + + // Phase 3.5: Override remapper for function parameters to use PHI dsts + // + // Phase 201-A: This phase now uses the loop_header_phi_info built before Phase 3. + // The PHI dst allocation has been moved earlier to prevent ValueId conflicts. + if let Some(boundary) = boundary { + if let Some(loop_var_name) = &boundary.loop_var_name { + // Phase 201-A: PHI info is already built (before Phase 3) - just use it + + // Phase 33-21: Override remapper for loop_step's parameters + // + // JoinIR generates separate parameter ValueIds for each function: + // - main(): ValueId(0), ValueId(1), ... for (i_init, carrier1_init, ...) + // - loop_step(): ValueId(3), ValueId(4), ... for (i_param, carrier1_param, ...) + // + // The loop body uses loop_step's parameters, so we need to remap THOSE + // to the header PHI dsts, not main()'s parameters. + // + // We get loop_step's parameters from function_params collected earlier. + // Phase 33-21: Override remapper for ALL functions' parameters + // + // JoinIR generates separate parameter ValueIds for each function: + // - main (join_func_0): ValueId(0), ValueId(1), ... for (i_init, carrier1_init, ...) + // - loop_step (join_func_1): ValueId(3), ValueId(4), ... for (i_param, carrier1_param, ...) + // + // ALL of these need to be mapped to header PHI dsts so that: + // 1. condition evaluation uses PHI result + // 2. loop body uses PHI result + // 3. tail call args are correctly routed + + // Phase 177-3 fix: Protect condition-ONLY bindings from being overridden to PHI dsts + // + // Problem: condition_bindings may contain: + // 1. True condition-only variables (e.g., 'limit' in loop(i < limit)) - NOT carriers + // 2. Body-only carriers added by Phase 176-5 (e.g., 'result') - ARE carriers + // + // We must ONLY protect (1), not (2), because: + // - Condition-only vars should keep their HOST mapping (e.g., limit = %8) + // - Body-only carriers MUST be remapped to PHI dsts (e.g., result = %24) + // + // Solution: Protect condition_bindings that are NOT in exit_bindings (i.e., not carriers) + let carrier_names: std::collections::HashSet<&str> = boundary + .exit_bindings + .iter() + .map(|eb| eb.carrier_name.as_str()) + .collect(); + + let condition_binding_ids: std::collections::HashSet = boundary + .condition_bindings + .iter() + .filter(|cb| !carrier_names.contains(cb.name.as_str())) + .map(|cb| cb.join_value) + .collect(); + + if !condition_binding_ids.is_empty() { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 177-3: Protected ValueIds (condition-only, not carriers): {:?}", + condition_binding_ids + ), + verbose, + ); + for cb in &boundary.condition_bindings { + let is_carrier = carrier_names.contains(cb.name.as_str()); + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 177-3: '{}': JoinIR {:?} (carrier={})", + cb.name, cb.join_value, is_carrier + ), + verbose, + ); + } + } + + let canonical_main = crate::mir::join_ir::lowering::canonical_names::MAIN; + let canonical_loop_step = crate::mir::join_ir::lowering::canonical_names::LOOP_STEP; + let main_func_name = if function_params.contains_key(canonical_main) { + canonical_main + } else { + "join_func_0" + }; + let loop_step_func_name = if function_params.contains_key(canonical_loop_step) { + canonical_loop_step + } else { + "join_func_1" + }; + + if function_params.get(main_func_name).is_none() { + trace.stderr_if( + &format!( + "[cf_loop/joinir] WARNING: function_params.get('{}') returned None. Available keys: {:?}", + main_func_name, + function_params.keys().collect::>() + ), + verbose, + ); + } + if let Some(main_params) = function_params.get(main_func_name) { + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 33-21: main ({}) params: {:?}", + main_func_name, main_params + ), + verbose, + ); + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 33-21: carrier_phis count: {}, names: {:?}", + loop_header_phi_info.carrier_phis.len(), + loop_header_phi_info + .carrier_phis + .iter() + .map(|(n, _)| n.as_str()) + .collect::>() + ), + verbose, + ); + // Map main's parameters to header PHI dsts + // main params: [i_init, carrier1_init, ...] + // carrier_phis: [("i", entry), ("sum", entry), ...] + for (idx, (carrier_name, entry)) in + loop_header_phi_info.carrier_phis.iter().enumerate() + { + if let Some(&main_param) = main_params.get(idx) { + // Phase 177-3: Don't override condition_bindings + if condition_binding_ids.contains(&main_param) { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 177-3: Skipping override for condition_binding {:?} ('{}')", + main_param, carrier_name + ), + verbose, + ); + continue; + } + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 33-21: REMAP main param[{}] {:?} → {:?} ('{}')", + idx, main_param, entry.phi_dst, carrier_name + ), + verbose, + ); + remapper.set_value(main_param, entry.phi_dst); + } + } + } + + // Phase 177-3-B: Handle body-only carriers + // These are carriers in carrier_phis that are NOT in main function params. + // They appear in condition_bindings (added by Phase 176-5) but need PHI remapping. + for (carrier_name, entry) in &loop_header_phi_info.carrier_phis { + // Check if this carrier has a condition_binding + if let Some(binding) = boundary + .condition_bindings + .iter() + .find(|cb| cb.name == *carrier_name) + { + // Skip if it's a true condition-only variable (already protected above) + if condition_binding_ids.contains(&binding.join_value) { + continue; + } + // This is a body-only carrier - remap it to PHI dst + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 177-3-B: Body-only carrier '{}': JoinIR {:?} → PHI {:?}", + carrier_name, binding.join_value, entry.phi_dst + ), + verbose, + ); + remapper.set_value(binding.join_value, entry.phi_dst); + } + } + + // Map loop_step's parameters + // DEBUG-177: Always log function_params keys to diagnose multi-carrier issue + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 33-21: function_params keys: {:?}", + function_params.keys().collect::>() + ), + verbose, + ); + if function_params.get(loop_step_func_name).is_none() { + trace.stderr_if( + &format!( + "[cf_loop/joinir] WARNING: function_params.get('{}') returned None. Available keys: {:?}", + loop_step_func_name, + function_params.keys().collect::>() + ), + verbose, + ); + } + if let Some(loop_step_params) = function_params.get(loop_step_func_name) { + // DEBUG-177: Always log loop_step params + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 33-21: loop_step ({}) params: {:?}", + loop_step_func_name, loop_step_params + ), + verbose, + ); + // Phase 177-FIX: Process loop_step params but skip if already mapped + // + // We use a name-based approach: for each carrier_phi, check if + // its join_value was already set in Phase 177-3-B (body-only carriers). + // Only process loop_step params for carriers NOT already handled. + for loop_step_param in loop_step_params { + // Phase 177-3: Don't override condition_bindings + if condition_binding_ids.contains(loop_step_param) { + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 177-FIX: Skipping condition_binding {:?}", + loop_step_param + ), + verbose, + ); + continue; + } + // Find which carrier this param belongs to by matching join_value + // Check if this param was already handled by Phase 177-3-B + let already_mapped = boundary.condition_bindings.iter().any(|cb| { + cb.join_value == *loop_step_param + && loop_header_phi_info + .carrier_phis + .iter() + .any(|(name, _)| name == &cb.name) + }); + if already_mapped { + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 177-FIX: Skipping {:?} (already mapped by Phase 177-3-B)", + loop_step_param + ), + verbose, + ); + continue; + } + // Phase 177-STRUCT-2: Use carrier_order for index-based matching + // + // Problem: BTreeMap iterates in alphabetical order, but JoinIR + // generates params in exit_bindings order. + // + // Solution: Use carrier_order (Vec) which preserves insertion order. + if let Some(param_idx) = + loop_step_params.iter().position(|p| p == loop_step_param) + { + // Map params[i] to carrier_order[i] + if let (Some(carrier_name), Some(entry)) = ( + loop_header_phi_info.get_carrier_at_index(param_idx), + loop_header_phi_info.get_entry_at_index(param_idx), + ) { + trace.stderr_if( + &format!( + "[DEBUG-177] Phase 177-STRUCT-2: REMAP loop_step param[{}] {:?} → {:?} (carrier '{}')", + param_idx, loop_step_param, entry.phi_dst, carrier_name + ), + verbose, + ); + remapper.set_value(*loop_step_param, entry.phi_dst); + } + } + } + } + + if function_params.get(main_func_name).is_none() + && function_params.get(loop_step_func_name).is_none() + { + // Fallback: Use old behavior (ValueId(0), ValueId(1), ...) + // This handles patterns that don't have loop_step function + if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(loop_var_name) { + // Phase 177-3: Don't override condition_bindings + if !condition_binding_ids.contains(&ValueId(0)) { + remapper.set_value(ValueId(0), phi_dst); + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 33-16 fallback: Override remap ValueId(0) → {:?} (PHI dst)", + phi_dst + ), + debug, + ); + } else { + trace.stderr_if( + "[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding ValueId(0)", + verbose, + ); + } + } + // Phase 177-STRUCT-2: Use carrier_order for deterministic iteration + for (idx, carrier_name) in loop_header_phi_info.carrier_order.iter().enumerate() { + if carrier_name == loop_var_name { + continue; + } + let entry = match loop_header_phi_info.carrier_phis.get(carrier_name) { + Some(e) => e, + None => continue, + }; + let join_value_id = ValueId(idx as u32); + // Phase 177-3: Don't override condition_bindings + if !condition_binding_ids.contains(&join_value_id) { + remapper.set_value(join_value_id, entry.phi_dst); + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 33-20 fallback: Override remap {:?} → {:?} (carrier '{}' PHI dst)", + join_value_id, entry.phi_dst, carrier_name + ), + debug, + ); + } else { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding {:?} ('{}')", + join_value_id, carrier_name + ), + verbose, + ); + } + } + } + + // Phase 177-3 DEBUG: Check remapper after Phase 33-21 overrides + trace.stderr_if("[DEBUG-177] === Remapper state after Phase 33-21 ===", verbose); + for binding in &boundary.condition_bindings { + let lookup_result = remapper.get_value(binding.join_value); + trace.stderr_if( + &format!( + "[DEBUG-177] '{}': JoinIR {:?} → {:?} (after 33-21)", + binding.name, binding.join_value, lookup_result + ), + verbose, + ); + } + + // Phase 201-A: loop_header_phi_info already built (no assignment needed) + } + } + + // Phase 4: Merge blocks and rewrite instructions + // Phase 33-16: Pass mutable loop_header_phi_info for latch_incoming tracking + // Phase 177-3: Pass exit_block_id from allocator to avoid conflicts + // Phase 260 P0.1: Use rewriter module (re-exports instruction_rewriter) + let merge_result = rewriter::merge_and_rewrite( + builder, + mir_module, + &mut remapper, + &value_to_func_name, + &function_params, + boundary, + &mut loop_header_phi_info, + exit_block_id, + debug, + )?; + + // Phase 4.5: Finalize loop header PHIs (insert into header block) + // + // By now, rewriter has set latch_incoming for all carriers. + // We can finalize the PHIs and insert them into the header block. + if !loop_header_phi_info.carrier_phis.is_empty() { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 4.5: Finalizing {} header PHIs", + loop_header_phi_info.carrier_phis.len() + ), + debug, + ); + LoopHeaderPhiBuilder::finalize(builder, &loop_header_phi_info, debug)?; + } + + // Contract check (Fail-Fast): ensure we didn't leave dangling Jump/Branch targets. + // Phase 131 Task 6: Use MergeConfig.strict_mode instead of env checks + if config.strict_mode || config.dev_log { + if let Some(ref current_func) = builder.scope_ctx.current_function { + // Note: exit_block_id may be allocated but not inserted yet (it becomes the + // current block after merge, and subsequent AST lowering fills it). + // We still want to catch truly dangling targets (e.g., jumps to skipped k_exit). + let contracts = MergeContracts { + allowed_missing_jump_targets: vec![merge_result.exit_block_id], + }; + contract_checks::verify_all_terminator_targets_exist(current_func, &contracts)?; + } + } + + // Phase 5: Build exit PHI (expr result only, not carrier PHIs) + // Phase 33-20: Carrier PHIs are now taken from header PHI info, not exit block + // Phase 246-EX: REVERT Phase 33-20 - Use EXIT PHI dsts, not header PHI dsts! + // Phase 131 P1.5: DirectValue mode completely skips PHI generation + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 131 P1.5 DEBUG: boundary={:?}, mode={:?}", + boundary.is_some(), + boundary.map(|b| b.exit_reconnect_mode) + ), + debug, + ); + + // Phase 131 P1.5: Check if DirectValue mode (skip PHI generation) + let is_direct_value_mode = boundary + .map(|b| b.exit_reconnect_mode == crate::mir::join_ir::lowering::carrier_info::ExitReconnectMode::DirectValue) + .unwrap_or(false); + + // Phase 131 P1.5: Mode detection (dev-only visibility) + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 131 P1.5: exit_reconnect_mode={:?}, is_direct_value_mode={}", + boundary.map(|b| b.exit_reconnect_mode), + is_direct_value_mode + ), + debug || config.dev_log, + ); + + let (exit_phi_result_id, exit_carrier_phis) = if is_direct_value_mode { + // DirectValue mode: Skip PHI generation completely + trace.stderr_if( + "[cf_loop/joinir] Phase 131 P1.5: DirectValue mode - skipping exit PHI generation", + debug, + ); + (None, BTreeMap::new()) + } else { + // Phi mode: Generate exit PHIs as usual + trace.stderr_if( + "[cf_loop/joinir] Phase 131 P1.5: Phi mode - generating exit PHIs", + debug, + ); + exit_phi_builder::build_exit_phi( + builder, + merge_result.exit_block_id, + &merge_result.exit_phi_inputs, + &merge_result.carrier_inputs, + debug, + )? + }; + + // Phase 118 P2: Contract check (Fail-Fast) - exit_bindings LoopState carriers must have exit PHIs. + // Phase 131 P1.5: Skip this check in DirectValue mode + if let Some(boundary) = boundary { + if !is_direct_value_mode { + contract_checks::verify_exit_bindings_have_exit_phis(boundary, &exit_carrier_phis)?; + } + } + + // Phase 118 P1: Dev-only carrier-phi SSOT logs (exit_bindings vs carrier_inputs vs exit_carrier_phis) + // Phase 131 Task 6: Use config.dev_log instead of env check + if config.dev_log { + if let Some(boundary) = boundary { + let exit_binding_names: Vec<&str> = boundary + .exit_bindings + .iter() + .map(|b| b.carrier_name.as_str()) + .collect(); + let carrier_input_names: Vec<&str> = + merge_result.carrier_inputs.keys().map(|s| s.as_str()).collect(); + let exit_phi_names: Vec<&str> = + exit_carrier_phis.keys().map(|s| s.as_str()).collect(); + + trace.stderr_if( + &format!( + "[joinir/phase118/dev] exit_bindings carriers={:?}", + exit_binding_names + ), + true, + ); + trace.stderr_if( + &format!( + "[joinir/phase118/dev] carrier_inputs keys={:?}", + carrier_input_names + ), + true, + ); + trace.stderr_if( + &format!( + "[joinir/phase118/dev] exit_carrier_phis keys={:?}", + exit_phi_names + ), + true, + ); + } + } + + // Phase 246-EX: CRITICAL FIX - Use exit PHI dsts for variable_map reconnection + // + // **Why EXIT PHI, not HEADER PHI?** + // + // Header PHI represents the value at the BEGINNING of each iteration. + // Exit PHI represents the FINAL value when leaving the loop (from any exit path). + // + // For Pattern 2 loops with multiple exit paths (natural exit + break): + // - Header PHI: `%15 = phi [%3, bb7], [%42, bb14]` (loop variable at iteration start) + // - Exit PHI: `%5 = phi [%15, bb11], [%15, bb13]` (final value from exit paths) + // + // When we exit the loop, we want the FINAL value (%5), not the iteration-start value (%15). + // Phase 33-20 incorrectly used header PHI, causing loops to return initial values (e.g., 0 instead of 42). + // + // Example (_atoi): + // - Initial: result=0 (header PHI) + // - After iteration 1: result=4 (updated in loop body) + // - After iteration 2: result=42 (updated in loop body) + // - Exit: Should return 42 (exit PHI), not 0 (header PHI initial value) + // + // The exit PHI correctly merges values from both exit paths, giving us the final result. + let carrier_phis = &exit_carrier_phis; + + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 246-EX: Using EXIT PHI dsts for variable_map (not header): {:?}", + carrier_phis + .iter() + .map(|(n, v)| (n.as_str(), v)) + .collect::>() + ), + debug && !carrier_phis.is_empty(), + ); + + // Phase 6: Reconnect boundary (if specified) + // Phase 197-B: Pass remapper to enable per-carrier exit value lookup + // Phase 33-10-Refactor-P3: Delegate to ExitLineOrchestrator + // Phase 246-EX: Now uses EXIT PHI dsts (reverted Phase 33-20) + // Phase 131 P2: DirectValue mode SSOT uses MergeResult.remapped_exit_values + let remapped_exit_values = merge_result.remapped_exit_values.clone(); + + if let Some(boundary) = boundary { + exit_line::ExitLineOrchestrator::execute( + builder, + boundary, + carrier_phis, + &remapped_exit_values, // Phase 131 P1.5: Now populated with exit PHI dsts + debug, + )?; + } + + let exit_block_id = merge_result.exit_block_id; + + // Phase 256.7-fix: Use merge_entry_block for the Jump + // This is the block where boundary Copies are injected (main's entry when condition_bindings exist). + // The host should Jump here first, then main's tail call jumps to the loop header. + let entry_block = merge_entry_block; + + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 256.7-fix: Entry block (merge_entry_block): {:?}, loop_header={:?}", + entry_block, loop_header_phi_info.header_block + ), + debug, + ); + trace.stderr_if( + &format!( + "[cf_loop/joinir] Current block before emit_jump: {:?}", + builder.current_block + ), + debug, + ); + trace.stderr_if( + &format!( + "[cf_loop/joinir] Jumping to entry block: {:?}", + entry_block + ), + debug, + ); + + crate::mir::builder::emission::branch::emit_jump(builder, entry_block)?; + + trace.stderr_if( + &format!( + "[cf_loop/joinir] After emit_jump, current_block: {:?}", + builder.current_block + ), + debug, + ); + + // Switch to exit block for subsequent code + builder.start_new_block(exit_block_id)?; + + // Phase 287 P0.5: Delegated to boundary_logging module + boundary_logging::log_merge_complete(mir_module.functions.len(), exit_block_id, &trace, debug); + + // Phase 200-3: Verify JoinIR contracts (debug only) + #[cfg(debug_assertions)] + { + if let Some(boundary) = boundary { + if let Some(ref func) = builder.scope_ctx.current_function { + debug_assertions::verify_joinir_contracts( + func, + loop_header_phi_info.header_block, + exit_block_id, + &loop_header_phi_info, + boundary, + ); + } + trace.stderr_if( + "[cf_loop/joinir] Phase 200-3: Contract verification passed", + debug, + ); + } + } + + // Phase 246-EX-FIX: Handle loop variable expr_result separately from carrier expr_result + // + // The loop variable (e.g., 'i') is returned via exit_phi_result_id, not carrier_phis. + // Other carriers use carrier_phis. We need to check which case we're in. + let expr_result_value = if let Some(b) = boundary { + if let Some(expr_result_id) = b.expr_result { + // Check if expr_result is the loop variable + if let Some(loop_var_name) = &b.loop_var_name { + // Find the exit binding for the loop variable + let loop_var_binding = b + .exit_bindings + .iter() + .find(|binding| binding.carrier_name == *loop_var_name); + + if let Some(binding) = loop_var_binding { + if binding.join_exit_value == expr_result_id { + // expr_result is the loop variable! Use exit_phi_result_id + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 246-EX-FIX: expr_result {:?} is loop variable '{}', using exit_phi_result_id {:?}", + expr_result_id, loop_var_name, exit_phi_result_id + ), + debug, + ); + exit_phi_result_id + } else { + // expr_result is not the loop variable, resolve as carrier + expr_result_resolver::ExprResultResolver::resolve( + Some(expr_result_id), + b.exit_bindings.as_slice(), + &carrier_phis, + &remapper, + debug, + )? + } + } else { + // No loop variable binding, resolve normally + expr_result_resolver::ExprResultResolver::resolve( + Some(expr_result_id), + b.exit_bindings.as_slice(), + &carrier_phis, + &remapper, + debug, + )? + } + } else { + // No loop variable name, resolve normally + expr_result_resolver::ExprResultResolver::resolve( + Some(expr_result_id), + b.exit_bindings.as_slice(), + &carrier_phis, + &remapper, + debug, + )? + } + } else { + None + } + } else { + None + }; + + // Return expr_result if present, otherwise fall back to exit_phi_result_id + if let Some(resolved) = expr_result_value { + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 246-EX-FIX: Returning expr_result_value {:?}", + resolved + ), + debug, + ); + Ok(Some(resolved)) + } else { + // Fallback: return exit_phi_result_id (for legacy patterns or carrier-only loops) + trace.stderr_if( + &format!( + "[cf_loop/joinir] Phase 221-R: Returning exit_phi_result_id (fallback): {:?}", + exit_phi_result_id + ), + debug && exit_phi_result_id.is_some(), + ); + Ok(exit_phi_result_id) + } +} diff --git a/src/mir/builder/control_flow/joinir/merge/mod.rs b/src/mir/builder/control_flow/joinir/merge/mod.rs index f145989b..ac1f6493 100644 --- a/src/mir/builder/control_flow/joinir/merge/mod.rs +++ b/src/mir/builder/control_flow/joinir/merge/mod.rs @@ -8,7 +8,7 @@ //! 3. ValueId remapping (uses JoinIrIdRemapper) //! 4. Instruction rewriting (instruction_rewriter.rs) //! 5. Exit PHI construction (exit_phi_builder.rs) -//! 6. Boundary reconnection (inline in this file) +//! 6. Boundary reconnection (coordinator.rs) //! //! Phase 4 Refactoring: Breaking down 714-line merge_joinir_mir_blocks() into focused modules @@ -16,6 +16,8 @@ mod block_allocator; mod block_remapper; // Phase 284 P1: Block ID remap SSOT mod boundary_logging; // Phase 287 P0.5: Boundary logging consolidation mod carrier_init_builder; +mod config; +mod coordinator; pub(super) mod contract_checks; // Phase 256 P1.5-DBG: Exposed for patterns to access verify_boundary_entry_params mod debug_assertions; // Phase 286C-4.3: Debug-only assertions (split from contract_checks) mod entry_selector; // Phase 287 P0.3: Entry function selection (SSOT) @@ -38,1016 +40,15 @@ mod value_remapper; // Phase 287 P0.2: ValueId remapping helper #[cfg(test)] mod tests; // Phase 132-R0 Task 3: Continuation contract tests +use crate::mir::builder::control_flow::joinir::trace; + // Phase 33-17: Re-export for use by other modules pub use loop_header_phi_builder::LoopHeaderPhiBuilder; pub use loop_header_phi_info::LoopHeaderPhiInfo; // Phase 131 P1 Task 1: Re-export MergeContracts for SSOT visibility +#[allow(unused_imports)] pub use merge_result::MergeContracts; -// Phase 131 P1 Task 6: MergeConfig is defined in this module (no re-export needed) - -use super::trace; -use crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary; -use crate::mir::join_ir::lowering::error_tags; -use crate::mir::{MirModule, ValueId}; -use std::collections::BTreeMap; - -/// Phase 131 P1 Task 6: Merge configuration consolidation -/// -/// Consolidates all merge-related configuration into a single structure -/// to reduce parameter clutter and improve maintainability. -#[derive(Debug, Clone)] -pub struct MergeConfig { - /// Enable detailed trace logs (dev mode) - pub dev_log: bool, - /// Enable strict contract verification (fail-fast on violations) - pub strict_mode: bool, - /// Exit reconnection mode (Phi or DirectValue) - pub exit_reconnect_mode: Option, - /// Allow missing exit block in contract checks (typically exit_block_id before insertion) - pub allow_missing_exit_block: bool, -} - -impl MergeConfig { - /// Default configuration for normal operation - pub fn default() -> Self { - Self { - dev_log: crate::config::env::joinir_dev_enabled(), - strict_mode: crate::config::env::joinir_strict_enabled(), - exit_reconnect_mode: None, - allow_missing_exit_block: true, - } - } - - /// Strict configuration for development/debugging (all checks enabled) - pub fn strict() -> Self { - Self { - dev_log: true, - strict_mode: true, - exit_reconnect_mode: None, - allow_missing_exit_block: true, - } - } - - /// Configuration for specific debug session - pub fn with_debug(debug: bool) -> Self { - let mut config = Self::default(); - config.dev_log = debug || config.dev_log; - config - } -} - -/// Phase 49-3.2: Merge JoinIR-generated MIR blocks into current_function -/// -/// # Phase 189: Multi-Function MIR Merge -/// -/// This merges JoinIR-generated blocks by: -/// 1. Remapping all block IDs across ALL functions to avoid conflicts -/// 2. Remapping all value IDs across ALL functions to avoid conflicts -/// 3. Adding all blocks from all functions to current_function -/// 4. Jumping from current_block to the entry block -/// 5. Converting Return → Jump to exit block for all functions -/// -/// **Multi-Function Support** (Phase 189): -/// - Pattern 1 (Simple While) generates 3 functions: entry + loop_step + k_exit -/// - All functions are flattened into current_function with global ID remapping -/// - Single exit block receives all Return instructions from all functions -/// -/// # Phase 188-Impl-3: JoinInlineBoundary Support -/// -/// When `boundary` is provided, injects Copy instructions at the entry block -/// to connect host ValueIds to JoinIR local ValueIds: -/// -/// ```text -/// entry_block: -/// // Injected by boundary -/// ValueId(100) = Copy ValueId(4) // join_input → host_input -/// // Original JoinIR instructions follow... -/// ``` -/// -/// This enables clean separation: JoinIR uses local IDs (0,1,2...), -/// host uses its own IDs, and Copy instructions bridge the gap. -/// -/// # Returns -/// -/// Returns `Ok(Some(exit_phi_id))` if the merged JoinIR functions have return values -/// that were collected into an exit block PHI. さらに、`boundary` に -/// host_outputs が指定されている場合は、exit PHI の結果をホスト側の -/// SSA スロットへ再接続する(variable_map 内の ValueId を更新する)。 -pub(in crate::mir::builder) fn merge_joinir_mir_blocks( - builder: &mut crate::mir::builder::MirBuilder, - mir_module: &MirModule, - boundary: Option<&JoinInlineBoundary>, - debug: bool, -) -> Result, String> { - // Phase 131 Task 6: Use MergeConfig for consolidated configuration - let config = MergeConfig::with_debug(debug); - let verbose = config.dev_log; - let trace = trace::trace(); - - trace.stderr_if( - &format!( - "[cf_loop/joinir] merge_joinir_mir_blocks called with {} functions", - mir_module.functions.len() - ), - debug, - ); - - if let Some(boundary) = boundary { - if let Err(msg) = boundary.validate_jump_args_layout() { - return Err(error_tags::freeze_with_hint( - "phase256/jump_args_layout", - &msg, - "set JoinInlineBoundary.jump_args_layout via builder and avoid expr_result/carrier mismatch", - )); - } - } - - // Phase 286 P3: Validate boundary contract BEFORE merge begins - // This catches boundary construction bugs early with clear diagnostics - if let Some(boundary) = boundary { - // Enrich context with host_fn and join-side info for better error diagnostics - let host_fn = builder - .scope_ctx - .current_function - .as_ref() - .map(|f| f.signature.name.as_str()) - .unwrap_or(""); - - // Join-side info: continuation count + boundary summary - let cont_count = boundary.continuation_func_ids.len(); - let join_summary = format!( - "conts={} exits={} conds={}", - cont_count, - boundary.exit_bindings.len(), - boundary.condition_bindings.len() - ); - - let context = format!( - "merge_joinir_mir_blocks host={} join={} phase= [{}]", - host_fn, cont_count, join_summary - ); - - if let Err(msg) = contract_checks::verify_boundary_contract_at_creation(boundary, &context) { - return Err(msg); // Fail-Fast: [joinir/contract:B*] error - } - } - - // Phase 287 P0.5: Delegated to boundary_logging module - boundary_logging::log_boundary_info(boundary, &trace, verbose); - - // Phase 1: Allocate block IDs for all functions - // Phase 177-3: block_allocator now returns exit_block_id to avoid conflicts - let (mut remapper, exit_block_id) = - block_allocator::allocate_blocks(builder, mir_module, debug)?; - - // Phase 2: Collect values from all functions - let (mut used_values, value_to_func_name, function_params) = - value_collector::collect_values(mir_module, &remapper, debug)?; - - // Phase 171-fix + Phase 256.7-fix: Add condition_bindings' join_values to used_values for remapping - // UNLESS they are function params. Params should NOT be remapped (they're defined - // by boundary Copies and used directly in JoinIR body). - if let Some(boundary) = boundary { - // Build all_params set for checking (moved before condition_bindings loop) - let all_params: std::collections::HashSet = function_params - .values() - .flat_map(|params| params.iter().copied()) - .collect(); - - // Phase 283 P0 DEBUG: Log condition_bindings count - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 283 P0 DEBUG: Processing {} condition_bindings", - boundary.condition_bindings.len() - ), - debug, - ); - - for binding in &boundary.condition_bindings { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 283 P0 DEBUG: Checking binding '{}' join={:?}", - binding.name, binding.join_value - ), - debug, - ); - - if all_params.contains(&binding.join_value) { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 256.7-fix: Skipping condition binding '{}' (JoinIR {:?} is a param)", - binding.name, binding.join_value - ), - debug, - ); - } else { - // Phase 283 P0 FIX: Ensure remapper has valid mapping (Fail-Fast) - if let Some(host_id) = builder.variable_ctx.variable_map.get(&binding.name) { - // Variable exists in host context - map join_value to existing host_id - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 283 P0: ✅ Condition binding '{}' JoinIR {:?} → host {:?}", - binding.name, binding.join_value, host_id - ), - debug, - ); - remapper.set_value(binding.join_value, *host_id); - used_values.insert(binding.join_value); - } else { - // Fail-Fast: No host ValueId found → surface root cause immediately - return Err(format!( - "[merge/phase2.1] Condition variable '{}' (join={:?}) has no host ValueId in variable_map. \ - This indicates the value was not properly supplied by boundary builder or cond_env. \ - Check: (1) boundary builder supplies all condition vars, (2) cond_env correctly tracks host ValueIds.", - binding.name, binding.join_value - )); - } - } - } - - // Phase 172-3 + Phase 256 P1.10: Add exit_bindings' join_exit_values to used_values - // UNLESS they are function params. Params should NOT be remapped (they're defined - // by call site Copies and used directly in k_exit body). - // Note: all_params was already built above for condition_bindings check. - - for binding in &boundary.exit_bindings { - if all_params.contains(&binding.join_exit_value) { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 256 P1.10: Skipping exit binding '{}' (JoinIR {:?} is a param)", - binding.carrier_name, binding.join_exit_value - ), - debug, - ); - } else { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 172-3: Adding exit binding '{}' JoinIR {:?} to used_values", - binding.carrier_name, binding.join_exit_value - ), - debug, - ); - used_values.insert(binding.join_exit_value); - } - } - } - - // Phase 201-A + Phase 287 P0.4: Pre-build loop header PHIs BEFORE Phase 3 - // - // We need to allocate PHI dst ValueIds before remap_values() runs, - // to prevent conflicts where a Const instruction gets a ValueId that - // will later be used as a PHI dst, causing carrier value corruption. - let (mut loop_header_phi_info, merge_entry_block, reserved_value_ids) = - header_phi_prebuild::prebuild_header_phis( - builder, - mir_module, - boundary, - &remapper, - &function_params, - debug, - )?; - - // Phase 3: Remap ValueIds (with reserved PHI dsts protection) - // Phase 287 P0.2: Delegated to value_remapper module - value_remapper::remap_values( - builder, - &used_values, - &mut remapper, - &reserved_value_ids, - debug, - )?; - - // Phase 177-3 DEBUG: Verify remapper state after Phase 3 - trace.stderr_if("[DEBUG-177] === Remapper state after Phase 3 ===", verbose); - trace.stderr_if( - &format!("[DEBUG-177] used_values count: {}", used_values.len()), - verbose, - ); - for value_id in &used_values { - if let Some(remapped) = remapper.get_value(*value_id) { - trace.stderr_if( - &format!("[DEBUG-177] JoinIR {:?} → Host {:?}", value_id, remapped), - verbose, - ); - } else { - trace.stderr_if( - &format!("[DEBUG-177] JoinIR {:?} → NOT FOUND ❌", value_id), - verbose, - ); - } - } - - // Check condition_bindings specifically - if let Some(boundary) = boundary { - trace.stderr_if("[DEBUG-177] === Condition bindings check ===", verbose); - for binding in &boundary.condition_bindings { - let lookup_result = remapper.get_value(binding.join_value); - trace.stderr_if( - &format!( - "[DEBUG-177] '{}': JoinIR {:?} → {:?}", - binding.name, binding.join_value, lookup_result - ), - verbose, - ); - } - } - trace.stderr_if("[DEBUG-177] ==============================", verbose); - - // Phase 3.5: Override remapper for function parameters to use PHI dsts - // - // Phase 201-A: This phase now uses the loop_header_phi_info built before Phase 3. - // The PHI dst allocation has been moved earlier to prevent ValueId conflicts. - if let Some(boundary) = boundary { - if let Some(loop_var_name) = &boundary.loop_var_name { - // Phase 201-A: PHI info is already built (before Phase 3) - just use it - - // Phase 33-21: Override remapper for loop_step's parameters - // - // JoinIR generates separate parameter ValueIds for each function: - // - main(): ValueId(0), ValueId(1), ... for (i_init, carrier1_init, ...) - // - loop_step(): ValueId(3), ValueId(4), ... for (i_param, carrier1_param, ...) - // - // The loop body uses loop_step's parameters, so we need to remap THOSE - // to the header PHI dsts, not main()'s parameters. - // - // We get loop_step's parameters from function_params collected earlier. - // Phase 33-21: Override remapper for ALL functions' parameters - // - // JoinIR generates separate parameter ValueIds for each function: - // - main (join_func_0): ValueId(0), ValueId(1), ... for (i_init, carrier1_init, ...) - // - loop_step (join_func_1): ValueId(3), ValueId(4), ... for (i_param, carrier1_param, ...) - // - // ALL of these need to be mapped to header PHI dsts so that: - // 1. condition evaluation uses PHI result - // 2. loop body uses PHI result - // 3. tail call args are correctly routed - - // Phase 177-3 fix: Protect condition-ONLY bindings from being overridden to PHI dsts - // - // Problem: condition_bindings may contain: - // 1. True condition-only variables (e.g., 'limit' in loop(i < limit)) - NOT carriers - // 2. Body-only carriers added by Phase 176-5 (e.g., 'result') - ARE carriers - // - // We must ONLY protect (1), not (2), because: - // - Condition-only vars should keep their HOST mapping (e.g., limit = %8) - // - Body-only carriers MUST be remapped to PHI dsts (e.g., result = %24) - // - // Solution: Protect condition_bindings that are NOT in exit_bindings (i.e., not carriers) - let carrier_names: std::collections::HashSet<&str> = boundary - .exit_bindings - .iter() - .map(|eb| eb.carrier_name.as_str()) - .collect(); - - let condition_binding_ids: std::collections::HashSet = boundary - .condition_bindings - .iter() - .filter(|cb| !carrier_names.contains(cb.name.as_str())) - .map(|cb| cb.join_value) - .collect(); - - if !condition_binding_ids.is_empty() { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 177-3: Protected ValueIds (condition-only, not carriers): {:?}", - condition_binding_ids - ), - verbose, - ); - for cb in &boundary.condition_bindings { - let is_carrier = carrier_names.contains(cb.name.as_str()); - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 177-3: '{}': JoinIR {:?} (carrier={})", - cb.name, cb.join_value, is_carrier - ), - verbose, - ); - } - } - - let canonical_main = crate::mir::join_ir::lowering::canonical_names::MAIN; - let canonical_loop_step = crate::mir::join_ir::lowering::canonical_names::LOOP_STEP; - let main_func_name = if function_params.contains_key(canonical_main) { - canonical_main - } else { - "join_func_0" - }; - let loop_step_func_name = if function_params.contains_key(canonical_loop_step) { - canonical_loop_step - } else { - "join_func_1" - }; - - if function_params.get(main_func_name).is_none() { - trace.stderr_if( - &format!( - "[cf_loop/joinir] WARNING: function_params.get('{}') returned None. Available keys: {:?}", - main_func_name, - function_params.keys().collect::>() - ), - verbose, - ); - } - if let Some(main_params) = function_params.get(main_func_name) { - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 33-21: main ({}) params: {:?}", - main_func_name, main_params - ), - verbose, - ); - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 33-21: carrier_phis count: {}, names: {:?}", - loop_header_phi_info.carrier_phis.len(), - loop_header_phi_info - .carrier_phis - .iter() - .map(|(n, _)| n.as_str()) - .collect::>() - ), - verbose, - ); - // Map main's parameters to header PHI dsts - // main params: [i_init, carrier1_init, ...] - // carrier_phis: [("i", entry), ("sum", entry), ...] - for (idx, (carrier_name, entry)) in - loop_header_phi_info.carrier_phis.iter().enumerate() - { - if let Some(&main_param) = main_params.get(idx) { - // Phase 177-3: Don't override condition_bindings - if condition_binding_ids.contains(&main_param) { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 177-3: Skipping override for condition_binding {:?} ('{}')", - main_param, carrier_name - ), - verbose, - ); - continue; - } - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 33-21: REMAP main param[{}] {:?} → {:?} ('{}')", - idx, main_param, entry.phi_dst, carrier_name - ), - verbose, - ); - remapper.set_value(main_param, entry.phi_dst); - } - } - } - - // Phase 177-3-B: Handle body-only carriers - // These are carriers in carrier_phis that are NOT in main function params. - // They appear in condition_bindings (added by Phase 176-5) but need PHI remapping. - for (carrier_name, entry) in &loop_header_phi_info.carrier_phis { - // Check if this carrier has a condition_binding - if let Some(binding) = boundary - .condition_bindings - .iter() - .find(|cb| cb.name == *carrier_name) - { - // Skip if it's a true condition-only variable (already protected above) - if condition_binding_ids.contains(&binding.join_value) { - continue; - } - // This is a body-only carrier - remap it to PHI dst - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 177-3-B: Body-only carrier '{}': JoinIR {:?} → PHI {:?}", - carrier_name, binding.join_value, entry.phi_dst - ), - verbose, - ); - remapper.set_value(binding.join_value, entry.phi_dst); - } - } - - // Map loop_step's parameters - // DEBUG-177: Always log function_params keys to diagnose multi-carrier issue - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 33-21: function_params keys: {:?}", - function_params.keys().collect::>() - ), - verbose, - ); - if function_params.get(loop_step_func_name).is_none() { - trace.stderr_if( - &format!( - "[cf_loop/joinir] WARNING: function_params.get('{}') returned None. Available keys: {:?}", - loop_step_func_name, - function_params.keys().collect::>() - ), - verbose, - ); - } - if let Some(loop_step_params) = function_params.get(loop_step_func_name) { - // DEBUG-177: Always log loop_step params - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 33-21: loop_step ({}) params: {:?}", - loop_step_func_name, loop_step_params - ), - verbose, - ); - // Phase 177-FIX: Process loop_step params but skip if already mapped - // - // We use a name-based approach: for each carrier_phi, check if - // its join_value was already set in Phase 177-3-B (body-only carriers). - // Only process loop_step params for carriers NOT already handled. - for loop_step_param in loop_step_params { - // Phase 177-3: Don't override condition_bindings - if condition_binding_ids.contains(loop_step_param) { - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 177-FIX: Skipping condition_binding {:?}", - loop_step_param - ), - verbose, - ); - continue; - } - // Find which carrier this param belongs to by matching join_value - // Check if this param was already handled by Phase 177-3-B - let already_mapped = boundary.condition_bindings.iter().any(|cb| { - cb.join_value == *loop_step_param - && loop_header_phi_info - .carrier_phis - .iter() - .any(|(name, _)| name == &cb.name) - }); - if already_mapped { - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 177-FIX: Skipping {:?} (already mapped by Phase 177-3-B)", - loop_step_param - ), - verbose, - ); - continue; - } - // Phase 177-STRUCT-2: Use carrier_order for index-based matching - // - // Problem: BTreeMap iterates in alphabetical order, but JoinIR - // generates params in exit_bindings order. - // - // Solution: Use carrier_order (Vec) which preserves insertion order. - if let Some(param_idx) = - loop_step_params.iter().position(|p| p == loop_step_param) - { - // Map params[i] to carrier_order[i] - if let (Some(carrier_name), Some(entry)) = ( - loop_header_phi_info.get_carrier_at_index(param_idx), - loop_header_phi_info.get_entry_at_index(param_idx), - ) { - trace.stderr_if( - &format!( - "[DEBUG-177] Phase 177-STRUCT-2: REMAP loop_step param[{}] {:?} → {:?} (carrier '{}')", - param_idx, loop_step_param, entry.phi_dst, carrier_name - ), - verbose, - ); - remapper.set_value(*loop_step_param, entry.phi_dst); - } - } - } - } - - if function_params.get(main_func_name).is_none() - && function_params.get(loop_step_func_name).is_none() - { - // Fallback: Use old behavior (ValueId(0), ValueId(1), ...) - // This handles patterns that don't have loop_step function - if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(loop_var_name) { - // Phase 177-3: Don't override condition_bindings - if !condition_binding_ids.contains(&ValueId(0)) { - remapper.set_value(ValueId(0), phi_dst); - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 33-16 fallback: Override remap ValueId(0) → {:?} (PHI dst)", - phi_dst - ), - debug, - ); - } else { - trace.stderr_if( - "[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding ValueId(0)", - verbose, - ); - } - } - // Phase 177-STRUCT-2: Use carrier_order for deterministic iteration - for (idx, carrier_name) in loop_header_phi_info.carrier_order.iter().enumerate() { - if carrier_name == loop_var_name { - continue; - } - let entry = match loop_header_phi_info.carrier_phis.get(carrier_name) { - Some(e) => e, - None => continue, - }; - let join_value_id = ValueId(idx as u32); - // Phase 177-3: Don't override condition_bindings - if !condition_binding_ids.contains(&join_value_id) { - remapper.set_value(join_value_id, entry.phi_dst); - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 33-20 fallback: Override remap {:?} → {:?} (carrier '{}' PHI dst)", - join_value_id, entry.phi_dst, carrier_name - ), - debug, - ); - } else { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding {:?} ('{}')", - join_value_id, carrier_name - ), - verbose, - ); - } - } - } - - // Phase 177-3 DEBUG: Check remapper after Phase 33-21 overrides - trace.stderr_if("[DEBUG-177] === Remapper state after Phase 33-21 ===", verbose); - for binding in &boundary.condition_bindings { - let lookup_result = remapper.get_value(binding.join_value); - trace.stderr_if( - &format!( - "[DEBUG-177] '{}': JoinIR {:?} → {:?} (after 33-21)", - binding.name, binding.join_value, lookup_result - ), - verbose, - ); - } - - // Phase 201-A: loop_header_phi_info already built (no assignment needed) - } - } - - // Phase 4: Merge blocks and rewrite instructions - // Phase 33-16: Pass mutable loop_header_phi_info for latch_incoming tracking - // Phase 177-3: Pass exit_block_id from allocator to avoid conflicts - // Phase 260 P0.1: Use rewriter module (re-exports instruction_rewriter) - let merge_result = rewriter::merge_and_rewrite( - builder, - mir_module, - &mut remapper, - &value_to_func_name, - &function_params, - boundary, - &mut loop_header_phi_info, - exit_block_id, - debug, - )?; - - // Phase 4.5: Finalize loop header PHIs (insert into header block) - // - // By now, rewriter has set latch_incoming for all carriers. - // We can finalize the PHIs and insert them into the header block. - if !loop_header_phi_info.carrier_phis.is_empty() { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 4.5: Finalizing {} header PHIs", - loop_header_phi_info.carrier_phis.len() - ), - debug, - ); - LoopHeaderPhiBuilder::finalize(builder, &loop_header_phi_info, debug)?; - } - - // Contract check (Fail-Fast): ensure we didn't leave dangling Jump/Branch targets. - // Phase 131 Task 6: Use MergeConfig.strict_mode instead of env checks - if config.strict_mode || config.dev_log { - if let Some(ref current_func) = builder.scope_ctx.current_function { - // Note: exit_block_id may be allocated but not inserted yet (it becomes the - // current block after merge, and subsequent AST lowering fills it). - // We still want to catch truly dangling targets (e.g., jumps to skipped k_exit). - let contracts = MergeContracts { - allowed_missing_jump_targets: vec![merge_result.exit_block_id], - }; - contract_checks::verify_all_terminator_targets_exist(current_func, &contracts)?; - } - } - - // Phase 5: Build exit PHI (expr result only, not carrier PHIs) - // Phase 33-20: Carrier PHIs are now taken from header PHI info, not exit block - // Phase 246-EX: REVERT Phase 33-20 - Use EXIT PHI dsts, not header PHI dsts! - // Phase 131 P1.5: DirectValue mode completely skips PHI generation - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 131 P1.5 DEBUG: boundary={:?}, mode={:?}", - boundary.is_some(), - boundary.map(|b| b.exit_reconnect_mode) - ), - debug, - ); - - // Phase 131 P1.5: Check if DirectValue mode (skip PHI generation) - let is_direct_value_mode = boundary - .map(|b| b.exit_reconnect_mode == crate::mir::join_ir::lowering::carrier_info::ExitReconnectMode::DirectValue) - .unwrap_or(false); - - // Phase 131 P1.5: Mode detection (dev-only visibility) - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 131 P1.5: exit_reconnect_mode={:?}, is_direct_value_mode={}", - boundary.map(|b| b.exit_reconnect_mode), - is_direct_value_mode - ), - debug || config.dev_log, - ); - - let (exit_phi_result_id, exit_carrier_phis) = if is_direct_value_mode { - // DirectValue mode: Skip PHI generation completely - trace.stderr_if( - "[cf_loop/joinir] Phase 131 P1.5: DirectValue mode - skipping exit PHI generation", - debug, - ); - (None, BTreeMap::new()) - } else { - // Phi mode: Generate exit PHIs as usual - trace.stderr_if( - "[cf_loop/joinir] Phase 131 P1.5: Phi mode - generating exit PHIs", - debug, - ); - exit_phi_builder::build_exit_phi( - builder, - merge_result.exit_block_id, - &merge_result.exit_phi_inputs, - &merge_result.carrier_inputs, - debug, - )? - }; - - // Phase 118 P2: Contract check (Fail-Fast) - exit_bindings LoopState carriers must have exit PHIs. - // Phase 131 P1.5: Skip this check in DirectValue mode - if let Some(boundary) = boundary { - if !is_direct_value_mode { - contract_checks::verify_exit_bindings_have_exit_phis(boundary, &exit_carrier_phis)?; - } - } - - // Phase 118 P1: Dev-only carrier-phi SSOT logs (exit_bindings vs carrier_inputs vs exit_carrier_phis) - // Phase 131 Task 6: Use config.dev_log instead of env check - if config.dev_log { - if let Some(boundary) = boundary { - let exit_binding_names: Vec<&str> = boundary - .exit_bindings - .iter() - .map(|b| b.carrier_name.as_str()) - .collect(); - let carrier_input_names: Vec<&str> = - merge_result.carrier_inputs.keys().map(|s| s.as_str()).collect(); - let exit_phi_names: Vec<&str> = - exit_carrier_phis.keys().map(|s| s.as_str()).collect(); - - trace.stderr_if( - &format!( - "[joinir/phase118/dev] exit_bindings carriers={:?}", - exit_binding_names - ), - true, - ); - trace.stderr_if( - &format!( - "[joinir/phase118/dev] carrier_inputs keys={:?}", - carrier_input_names - ), - true, - ); - trace.stderr_if( - &format!( - "[joinir/phase118/dev] exit_carrier_phis keys={:?}", - exit_phi_names - ), - true, - ); - } - } - - // Phase 246-EX: CRITICAL FIX - Use exit PHI dsts for variable_map reconnection - // - // **Why EXIT PHI, not HEADER PHI?** - // - // Header PHI represents the value at the BEGINNING of each iteration. - // Exit PHI represents the FINAL value when leaving the loop (from any exit path). - // - // For Pattern 2 loops with multiple exit paths (natural exit + break): - // - Header PHI: `%15 = phi [%3, bb7], [%42, bb14]` (loop variable at iteration start) - // - Exit PHI: `%5 = phi [%15, bb11], [%15, bb13]` (final value from exit paths) - // - // When we exit the loop, we want the FINAL value (%5), not the iteration-start value (%15). - // Phase 33-20 incorrectly used header PHI, causing loops to return initial values (e.g., 0 instead of 42). - // - // Example (_atoi): - // - Initial: result=0 (header PHI) - // - After iteration 1: result=4 (updated in loop body) - // - After iteration 2: result=42 (updated in loop body) - // - Exit: Should return 42 (exit PHI), not 0 (header PHI initial value) - // - // The exit PHI correctly merges values from both exit paths, giving us the final result. - let carrier_phis = &exit_carrier_phis; - - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 246-EX: Using EXIT PHI dsts for variable_map (not header): {:?}", - carrier_phis - .iter() - .map(|(n, v)| (n.as_str(), v)) - .collect::>() - ), - debug && !carrier_phis.is_empty(), - ); - - // Phase 6: Reconnect boundary (if specified) - // Phase 197-B: Pass remapper to enable per-carrier exit value lookup - // Phase 33-10-Refactor-P3: Delegate to ExitLineOrchestrator - // Phase 246-EX: Now uses EXIT PHI dsts (reverted Phase 33-20) - // Phase 131 P2: DirectValue mode SSOT uses MergeResult.remapped_exit_values - let remapped_exit_values = merge_result.remapped_exit_values.clone(); - - if let Some(boundary) = boundary { - exit_line::ExitLineOrchestrator::execute( - builder, - boundary, - carrier_phis, - &remapped_exit_values, // Phase 131 P1.5: Now populated with exit PHI dsts - debug, - )?; - } - - let exit_block_id = merge_result.exit_block_id; - - // Phase 256.7-fix: Use merge_entry_block for the Jump - // This is the block where boundary Copies are injected (main's entry when condition_bindings exist). - // The host should Jump here first, then main's tail call jumps to the loop header. - let entry_block = merge_entry_block; - - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 256.7-fix: Entry block (merge_entry_block): {:?}, loop_header={:?}", - entry_block, loop_header_phi_info.header_block - ), - debug, - ); - trace.stderr_if( - &format!( - "[cf_loop/joinir] Current block before emit_jump: {:?}", - builder.current_block - ), - debug, - ); - trace.stderr_if( - &format!( - "[cf_loop/joinir] Jumping to entry block: {:?}", - entry_block - ), - debug, - ); - - crate::mir::builder::emission::branch::emit_jump(builder, entry_block)?; - - trace.stderr_if( - &format!( - "[cf_loop/joinir] After emit_jump, current_block: {:?}", - builder.current_block - ), - debug, - ); - - // Switch to exit block for subsequent code - builder.start_new_block(exit_block_id)?; - - // Phase 287 P0.5: Delegated to boundary_logging module - boundary_logging::log_merge_complete(mir_module.functions.len(), exit_block_id, &trace, debug); - - // Phase 200-3: Verify JoinIR contracts (debug only) - #[cfg(debug_assertions)] - { - if let Some(boundary) = boundary { - if let Some(ref func) = builder.scope_ctx.current_function { - debug_assertions::verify_joinir_contracts( - func, - loop_header_phi_info.header_block, - exit_block_id, - &loop_header_phi_info, - boundary, - ); - } - trace.stderr_if( - "[cf_loop/joinir] Phase 200-3: Contract verification passed", - debug, - ); - } - } - - // Phase 246-EX-FIX: Handle loop variable expr_result separately from carrier expr_result - // - // The loop variable (e.g., 'i') is returned via exit_phi_result_id, not carrier_phis. - // Other carriers use carrier_phis. We need to check which case we're in. - let expr_result_value = if let Some(b) = boundary { - if let Some(expr_result_id) = b.expr_result { - // Check if expr_result is the loop variable - if let Some(loop_var_name) = &b.loop_var_name { - // Find the exit binding for the loop variable - let loop_var_binding = b - .exit_bindings - .iter() - .find(|binding| binding.carrier_name == *loop_var_name); - - if let Some(binding) = loop_var_binding { - if binding.join_exit_value == expr_result_id { - // expr_result is the loop variable! Use exit_phi_result_id - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 246-EX-FIX: expr_result {:?} is loop variable '{}', using exit_phi_result_id {:?}", - expr_result_id, loop_var_name, exit_phi_result_id - ), - debug, - ); - exit_phi_result_id - } else { - // expr_result is not the loop variable, resolve as carrier - expr_result_resolver::ExprResultResolver::resolve( - Some(expr_result_id), - b.exit_bindings.as_slice(), - &carrier_phis, - &remapper, - debug, - )? - } - } else { - // No loop variable binding, resolve normally - expr_result_resolver::ExprResultResolver::resolve( - Some(expr_result_id), - b.exit_bindings.as_slice(), - &carrier_phis, - &remapper, - debug, - )? - } - } else { - // No loop variable name, resolve normally - expr_result_resolver::ExprResultResolver::resolve( - Some(expr_result_id), - b.exit_bindings.as_slice(), - &carrier_phis, - &remapper, - debug, - )? - } - } else { - None - } - } else { - None - }; - - // Return expr_result if present, otherwise fall back to exit_phi_result_id - if let Some(resolved) = expr_result_value { - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 246-EX-FIX: Returning expr_result_value {:?}", - resolved - ), - debug, - ); - Ok(Some(resolved)) - } else { - // Fallback: return exit_phi_result_id (for legacy patterns or carrier-only loops) - trace.stderr_if( - &format!( - "[cf_loop/joinir] Phase 221-R: Returning exit_phi_result_id (fallback): {:?}", - exit_phi_result_id - ), - debug && exit_phi_result_id.is_some(), - ); - Ok(exit_phi_result_id) - } -} - -// Phase 287 P0: merge/mod.rs Modularization Complete -// -// Line reduction: 1,555 (Phase 286 start) → 1,034 (Phase 287 P0.6) = -521 lines (-33%) -// -// Extracted modules: -// - P0.1: debug_assertions.rs (verification functions) -// - P0.2: value_remapper.rs (ValueId remapping helper) -// - P0.3: entry_selector.rs (SSOT entry function selection) -// - P0.4: header_phi_prebuild.rs (PHI pre-build orchestration) -// - P0.5: boundary_logging.rs (consolidated logging) -// -// Remaining in mod.rs: -// - Public API: merge_joinir_mir_blocks() -// - Orchestration: Phase 1-6 pipeline coordination -// - Phase 3.5: Parameter → PHI dst remapping (complex, kept inline) -// - Phase 6: Boundary reconnection and expr_result resolution -// -// SSOT Principles Enforced: -// - Entry selection: boundary.loop_header_func_name > continuation_func_ids (no string heuristics) -// - Logging: debug/verbose only (no constant logs in quick profile) -// - Reserved ValueIds: PHI dsts protected from conflicts +// Phase 131 P1 Task 6: MergeConfig is defined in config.rs (re-exported here) +#[allow(unused_imports)] +pub use config::MergeConfig; +pub(in crate::mir::builder) use coordinator::merge_joinir_mir_blocks; diff --git a/src/mir/control_tree/step_tree.rs b/src/mir/control_tree/step_tree.rs deleted file mode 100644 index 4eb45f77..00000000 --- a/src/mir/control_tree/step_tree.rs +++ /dev/null @@ -1,999 +0,0 @@ -use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span, UnaryOperator}; -use crate::mir::control_tree::step_tree_contract_box::{ - StepTreeContract, StepTreeContractBox, -}; -use crate::mir::control_tree::step_tree_facts::StepTreeFacts; - - -#[derive(Debug, Clone, PartialEq)] -pub struct StepTree { - pub root: StepNode, - pub features: StepTreeFeatures, - pub contract: StepTreeContract, - pub signature: StepTreeSignature, -} - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct StepTreeFeatures { - pub has_if: bool, - pub has_loop: bool, - pub has_break: bool, - pub has_continue: bool, - pub has_return: bool, - pub max_if_depth: u32, - pub max_loop_depth: u32, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum StepNode { - Block(Vec), - If { - cond: AstSummary, - cond_ast: AstNodeHandle, - then_branch: Box, - else_branch: Option>, - span: Span, - }, - Loop { - cond: AstSummary, - cond_ast: AstNodeHandle, - body: Box, - span: Span, - }, - Stmt { kind: StepStmtKind, span: Span }, -} - -/// AST 参照の軽量ハンドル(Phase 119: dev-only 観測用) -/// -/// SSOT: cond は AST 参照を保持する。 -/// - 将来的に AstExprId 等に移行可能。 -/// - Phase 119 では Clone を持つ Box で実装(dev-only なので許容)。 -#[derive(Debug, Clone, PartialEq)] -pub struct AstNodeHandle(pub Box); - -#[derive(Debug, Clone, PartialEq)] -pub enum StepStmtKind { - LocalDecl { vars: Vec }, - Assign { - target: Option, - /// Phase 128: assignment value AST (for Normalized lowering) - value_ast: Option, - }, - Print, - Return { - /// Phase 123: return value AST (for Normalized lowering) - value_ast: Option, - }, - Break, - Continue, - Other(&'static str), -} - -#[derive(Debug, Clone, PartialEq)] -pub enum AstSummary { - Variable(String), - Literal(LiteralValue), - Unary { - op: UnaryOperator, - expr: Box, - }, - Binary { - op: BinaryOperator, - lhs: Box, - rhs: Box, - }, - Other(&'static str), -} - -impl StepTree { - pub fn to_compact_string(&self) -> String { - let mut out = String::new(); - self.root.write_compact(&mut out, 0); - out - } - - pub fn signature_basis_string(&self) -> String { - let mut kinds = Vec::new(); - collect_node_kinds(&self.root, &mut kinds); - let kinds = kinds.join(","); - self.contract.signature_basis_string(&kinds) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum ExitKind { - Return, - Break, - Continue, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum StepCapability { - If, - Loop, - NestedIf, - NestedLoop, - Return, - Break, - Continue, - TryCatch, - Throw, - Lambda, - While, - ForRange, - Match, - Arrow, -} - -// StepTreeContract moved to step_tree_contract_box.rs (Phase 120) - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct StepTreeSignature(pub u64); - -impl StepTreeSignature { - pub fn from_basis_string(basis: &str) -> Self { - // FNV-1a 64-bit (stable, no external deps). - let mut hash: u64 = 0xcbf29ce484222325; - for b in basis.as_bytes() { - hash ^= *b as u64; - hash = hash.wrapping_mul(0x100000001b3); - } - StepTreeSignature(hash) - } - - pub fn to_hex(self) -> String { - format!("{:016x}", self.0) - } -} - -impl StepNode { - fn write_compact(&self, out: &mut String, indent: usize) { - let pad = " ".repeat(indent); - match self { - StepNode::Block(nodes) => { - out.push_str(&format!("{pad}Block(len={})\n", nodes.len())); - for n in nodes { - n.write_compact(out, indent + 1); - } - } - StepNode::If { - cond, - then_branch, - else_branch, - .. - } => { - out.push_str(&format!( - "{pad}If(cond={})\n", - cond.to_compact_string() - )); - out.push_str(&format!("{pad} then:\n")); - then_branch.write_compact(out, indent + 2); - if let Some(else_branch) = else_branch { - out.push_str(&format!("{pad} else:\n")); - else_branch.write_compact(out, indent + 2); - } - } - StepNode::Loop { cond, body, .. } => { - out.push_str(&format!( - "{pad}Loop(cond={})\n", - cond.to_compact_string() - )); - body.write_compact(out, indent + 1); - } - StepNode::Stmt { kind, .. } => { - out.push_str(&format!("{pad}Stmt({})\n", kind.to_compact_string())); - } - } - } -} - -impl StepStmtKind { - fn to_compact_string(&self) -> String { - match self { - StepStmtKind::LocalDecl { vars } => format!("local({})", vars.join(",")), - StepStmtKind::Assign { target, .. } => match target { - Some(name) => format!("assign({name})"), - None => "assign(?)".to_string(), - }, - StepStmtKind::Print => "print".to_string(), - StepStmtKind::Return { value_ast } => { - if value_ast.is_some() { - "return(value)".to_string() - } else { - "return(void)".to_string() - } - } - StepStmtKind::Break => "break".to_string(), - StepStmtKind::Continue => "continue".to_string(), - StepStmtKind::Other(name) => format!("other:{name}"), - } - } -} - -impl AstSummary { - fn to_compact_string(&self) -> String { - match self { - AstSummary::Variable(name) => format!("var:{name}"), - AstSummary::Literal(lit) => format!("lit:{}", lit_to_sig_string(lit)), - AstSummary::Unary { op, expr } => format!("({op:?} {})", expr.to_compact_string()), - AstSummary::Binary { op, lhs, rhs } => format!( - "({} {} {})", - lhs.to_compact_string(), - op, - rhs.to_compact_string() - ), - AstSummary::Other(k) => format!("other:{k}"), - } - } -} - -fn lit_to_sig_string(lit: &LiteralValue) -> String { - match lit { - LiteralValue::String(s) => format!("str:{}", escape_sig_atom(s)), - LiteralValue::Integer(i) => format!("int:{i}"), - LiteralValue::Float(f) => format!("float:{:016x}", f.to_bits()), - LiteralValue::Bool(b) => format!("bool:{}", if *b { 1 } else { 0 }), - LiteralValue::Null => "null".to_string(), - LiteralValue::Void => "void".to_string(), - } -} - -fn escape_sig_atom(s: &str) -> String { - // Minimal stable escaping for signature strings. - s.replace('\\', "\\\\").replace('|', "\\|").replace(',', "\\,") -} - -pub struct StepTreeBuilderBox; - -impl StepTreeBuilderBox { - pub fn build_from_ast(ast: &ASTNode) -> StepTree { - match ast { - ASTNode::Program { statements, .. } => Self::build_from_block(statements), - ASTNode::ScopeBox { body, .. } => Self::build_from_block(body), - _ => { - let (node, features) = Self::build_node(ast, 0, 0); - build_step_tree(node, features) - } - } - } - - pub fn build_from_block(stmts: &[ASTNode]) -> StepTree { - let mut nodes = Vec::with_capacity(stmts.len()); - let mut features = StepTreeFeatures::default(); - for stmt in stmts { - let (node, node_features) = Self::build_node(stmt, 0, 0); - nodes.push(node); - features = merge_features(features, node_features); - } - build_step_tree(StepNode::Block(nodes), features) - } - - fn build_node(ast: &ASTNode, if_depth: u32, loop_depth: u32) -> (StepNode, StepTreeFeatures) { - match ast { - ASTNode::If { - condition, - then_body, - else_body, - span, - } => { - let cond = summarize_ast(condition); - let cond_ast = AstNodeHandle(condition.clone()); - let (then_node, then_features) = - Self::build_block_node(then_body, if_depth + 1, loop_depth); - let (else_node, else_features) = match else_body { - Some(else_body) => { - let (node, f) = - Self::build_block_node(else_body, if_depth + 1, loop_depth); - (Some(Box::new(node)), f) - } - None => (None, StepTreeFeatures::default()), - }; - let mut features = StepTreeFeatures { - has_if: true, - max_if_depth: (if_depth + 1).max(then_features.max_if_depth), - ..StepTreeFeatures::default() - }; - features = merge_features(features, then_features); - features = merge_features(features, else_features); - - ( - StepNode::If { - cond, - cond_ast, - then_branch: Box::new(then_node), - else_branch: else_node, - span: span.clone(), - }, - features, - ) - } - ASTNode::Loop { - condition, body, span, .. - } => { - let cond = summarize_ast(condition); - let cond_ast = AstNodeHandle(condition.clone()); - let (body_node, body_features) = - Self::build_block_node(body, if_depth, loop_depth + 1); - let mut features = StepTreeFeatures { - has_loop: true, - max_loop_depth: (loop_depth + 1).max(body_features.max_loop_depth), - ..StepTreeFeatures::default() - }; - features = merge_features(features, body_features); - ( - StepNode::Loop { - cond, - cond_ast, - body: Box::new(body_node), - span: span.clone(), - }, - features, - ) - } - ASTNode::ScopeBox { body, span } => { - let (node, features) = Self::build_block_node(body, if_depth, loop_depth); - (node.with_span(span.clone()), features) - } - ASTNode::Return { value, span } => ( - StepNode::Stmt { - kind: StepStmtKind::Return { - value_ast: value.as_ref().map(|v| AstNodeHandle(v.clone())), - }, - span: span.clone(), - }, - StepTreeFeatures { - has_return: true, - ..StepTreeFeatures::default() - }, - ), - ASTNode::Break { span } => ( - StepNode::Stmt { - kind: StepStmtKind::Break, - span: span.clone(), - }, - StepTreeFeatures { - has_break: true, - ..StepTreeFeatures::default() - }, - ), - ASTNode::Continue { span } => ( - StepNode::Stmt { - kind: StepStmtKind::Continue, - span: span.clone(), - }, - StepTreeFeatures { - has_continue: true, - ..StepTreeFeatures::default() - }, - ), - ASTNode::Local { - variables, span, .. - } => ( - StepNode::Stmt { - kind: StepStmtKind::LocalDecl { - vars: variables.clone(), - }, - span: span.clone(), - }, - StepTreeFeatures::default(), - ), - ASTNode::Assignment { span, value, .. } => ( - StepNode::Stmt { - kind: StepStmtKind::Assign { - target: match ast { - ASTNode::Assignment { target, .. } => match target.as_ref() { - ASTNode::Variable { name, .. } => Some(name.clone()), - _ => None, - }, - _ => None, - }, - // Phase 128: Store value AST for Normalized lowering - value_ast: Some(AstNodeHandle(value.clone())), - }, - span: span.clone(), - }, - StepTreeFeatures::default(), - ), - ASTNode::Print { span, .. } => ( - StepNode::Stmt { - kind: StepStmtKind::Print, - span: span.clone(), - }, - StepTreeFeatures::default(), - ), - other => ( - StepNode::Stmt { - kind: StepStmtKind::Other(ast_kind_name(other)), - span: other.span(), - }, - StepTreeFeatures::default(), - ), - } - } - - fn build_block_node( - stmts: &[ASTNode], - if_depth: u32, - loop_depth: u32, - ) -> (StepNode, StepTreeFeatures) { - let mut nodes = Vec::with_capacity(stmts.len()); - let mut features = StepTreeFeatures::default(); - for stmt in stmts { - let (node, node_features) = Self::build_node(stmt, if_depth, loop_depth); - nodes.push(node); - features = merge_features(features, node_features); - } - (StepNode::Block(nodes), features) - } -} - -fn build_step_tree(root: StepNode, features: StepTreeFeatures) -> StepTree { - // Phase 120: Facts → Contract → Signature (separated concerns) - let facts = extract_facts_from_tree(&root, &features); - let contract = StepTreeContractBox::from_facts(&facts); - let mut kinds = Vec::new(); - collect_node_kinds(&root, &mut kinds); - let kinds = kinds.join(","); - let basis = contract.signature_basis_string(&kinds); - let signature = StepTreeSignature::from_basis_string(&basis); - - StepTree { - root, - features, - contract, - signature, - } -} - -/// Extract raw facts from StepNode tree (Phase 120) -fn extract_facts_from_tree(root: &StepNode, features: &StepTreeFeatures) -> StepTreeFacts { - let mut facts = StepTreeFacts::new(); - - // Required caps from features (structural only) - if features.has_if { - facts.add_capability(StepCapability::If); - } - if features.max_if_depth > 1 { - facts.add_capability(StepCapability::NestedIf); - } - if features.has_loop { - facts.add_capability(StepCapability::Loop); - } - if features.max_loop_depth > 1 { - facts.add_capability(StepCapability::NestedLoop); - } - if features.has_return { - facts.add_capability(StepCapability::Return); - } - if features.has_break { - facts.add_capability(StepCapability::Break); - } - if features.has_continue { - facts.add_capability(StepCapability::Continue); - } - - walk_for_facts(root, &mut facts); - facts -} - -/// Walk StepNode tree to collect facts (Phase 120, Phase 124) -fn walk_for_facts(node: &StepNode, facts: &mut StepTreeFacts) { - match node { - StepNode::Block(nodes) => { - for n in nodes { - walk_for_facts(n, facts); - } - } - StepNode::If { - cond, - cond_ast, - then_branch, - else_branch, - .. - } => { - facts.add_cond_sig(cond.to_compact_string()); - // Phase 124: Extract reads from condition AST - extract_variables_from_ast(&cond_ast.0, facts); - walk_for_facts(then_branch, facts); - if let Some(else_branch) = else_branch { - walk_for_facts(else_branch, facts); - } - } - StepNode::Loop { cond, cond_ast, body, .. } => { - facts.add_cond_sig(cond.to_compact_string()); - // Phase 124: Extract reads from condition AST - extract_variables_from_ast(&cond_ast.0, facts); - walk_for_facts(body, facts); - } - StepNode::Stmt { kind, .. } => { - match kind { - StepStmtKind::LocalDecl { vars } => { - for v in vars { - facts.add_write(v.clone()); - } - } - StepStmtKind::Assign { target, value_ast } => { - if let Some(name) = target.as_ref() { - facts.add_write(name.clone()); - } - // Phase 128: Extract reads from assignment value AST - if let Some(ast) = value_ast { - extract_variables_from_ast(&ast.0, facts); - } - } - StepStmtKind::Print => {} - StepStmtKind::Return { value_ast } => { - facts.add_exit(ExitKind::Return); - // Phase 124: Extract reads from return value AST - if let Some(ast) = value_ast { - extract_variables_from_ast(&ast.0, facts); - } - } - StepStmtKind::Break => { - facts.add_exit(ExitKind::Break); - } - StepStmtKind::Continue => { - facts.add_exit(ExitKind::Continue); - } - StepStmtKind::Other(name) => match *name { - "TryCatch" => { - facts.add_capability(StepCapability::TryCatch); - } - "Throw" => { - facts.add_capability(StepCapability::Throw); - } - "Lambda" => { - facts.add_capability(StepCapability::Lambda); - } - "While" => { - facts.add_capability(StepCapability::While); - } - "ForRange" => { - facts.add_capability(StepCapability::ForRange); - } - "MatchExpr" => { - facts.add_capability(StepCapability::Match); - } - "Arrow" => { - facts.add_capability(StepCapability::Arrow); - } - _ => {} - }, - } - } - } -} - -/// Extract Variable names from AST (Phase 124: reads collection) -/// -/// SSOT for reads extraction: -/// - Recursively walk AST tree -/// - Add Variable { name } to facts.reads -/// - Ignore other node types -fn extract_variables_from_ast(ast: &ASTNode, facts: &mut StepTreeFacts) { - match ast { - ASTNode::Variable { name, .. } => { - facts.add_read(name.clone()); - } - // Recursively walk binary/unary operations - ASTNode::BinaryOp { left, right, .. } => { - extract_variables_from_ast(left, facts); - extract_variables_from_ast(right, facts); - } - ASTNode::UnaryOp { operand, .. } => { - extract_variables_from_ast(operand, facts); - } - // Function calls - ASTNode::FunctionCall { arguments, .. } => { - for arg in arguments { - extract_variables_from_ast(arg, facts); - } - } - // Method calls - ASTNode::MethodCall { object, arguments, .. } => { - extract_variables_from_ast(object, facts); - for arg in arguments { - extract_variables_from_ast(arg, facts); - } - } - // Field access - ASTNode::FieldAccess { object, .. } => { - extract_variables_from_ast(object, facts); - } - // Array/Index access - ASTNode::Index { target, index, .. } => { - extract_variables_from_ast(target, facts); - extract_variables_from_ast(index, facts); - } - // Assignment (RHS only) - ASTNode::Assignment { value, .. } => { - extract_variables_from_ast(value, facts); - } - // Print - ASTNode::Print { expression, .. } => { - extract_variables_from_ast(expression, facts); - } - // Ignore literals, keywords, and other non-variable nodes - _ => {} - } -} - -fn merge_features(mut a: StepTreeFeatures, b: StepTreeFeatures) -> StepTreeFeatures { - a.has_if |= b.has_if; - a.has_loop |= b.has_loop; - a.has_break |= b.has_break; - a.has_continue |= b.has_continue; - a.has_return |= b.has_return; - a.max_if_depth = a.max_if_depth.max(b.max_if_depth); - a.max_loop_depth = a.max_loop_depth.max(b.max_loop_depth); - a -} - -// StepTreeContractBox moved to step_tree_contract_box.rs (Phase 120) -// extract_facts_from_tree + walk_for_facts replace the old compute/walk pattern - -fn collect_node_kinds(node: &StepNode, out: &mut Vec) { - match node { - StepNode::Block(nodes) => { - out.push("Block".to_string()); - for n in nodes { - collect_node_kinds(n, out); - } - } - StepNode::If { - then_branch, - else_branch, - .. - } => { - out.push("If".to_string()); - collect_node_kinds(then_branch, out); - if let Some(else_branch) = else_branch { - collect_node_kinds(else_branch, out); - } - } - StepNode::Loop { body, .. } => { - out.push("Loop".to_string()); - collect_node_kinds(body, out); - } - StepNode::Stmt { kind, .. } => { - out.push(format!("Stmt({})", kind.to_compact_string())); - } - } -} - -fn summarize_ast(ast: &ASTNode) -> AstSummary { - match ast { - ASTNode::Variable { name, .. } => AstSummary::Variable(name.clone()), - ASTNode::Literal { value, .. } => AstSummary::Literal(value.clone()), - ASTNode::UnaryOp { - operator, operand, .. - } => AstSummary::Unary { - op: operator.clone(), - expr: Box::new(summarize_ast(operand)), - }, - ASTNode::BinaryOp { - operator, - left, - right, - .. - } => AstSummary::Binary { - op: operator.clone(), - lhs: Box::new(summarize_ast(left)), - rhs: Box::new(summarize_ast(right)), - }, - other => AstSummary::Other(ast_kind_name(other)), - } -} - -fn ast_kind_name(ast: &ASTNode) -> &'static str { - match ast { - ASTNode::Program { .. } => "Program", - ASTNode::Assignment { .. } => "Assignment", - ASTNode::Print { .. } => "Print", - ASTNode::If { .. } => "If", - ASTNode::Loop { .. } => "Loop", - ASTNode::While { .. } => "While", - ASTNode::ForRange { .. } => "ForRange", - ASTNode::Return { .. } => "Return", - ASTNode::Break { .. } => "Break", - ASTNode::Continue { .. } => "Continue", - ASTNode::UsingStatement { .. } => "UsingStatement", - ASTNode::ImportStatement { .. } => "ImportStatement", - ASTNode::Nowait { .. } => "Nowait", - ASTNode::AwaitExpression { .. } => "AwaitExpression", - ASTNode::QMarkPropagate { .. } => "QMarkPropagate", - ASTNode::MatchExpr { .. } => "MatchExpr", - ASTNode::ArrayLiteral { .. } => "ArrayLiteral", - ASTNode::MapLiteral { .. } => "MapLiteral", - ASTNode::Lambda { .. } => "Lambda", - ASTNode::Arrow { .. } => "Arrow", - ASTNode::TryCatch { .. } => "TryCatch", - ASTNode::Throw { .. } => "Throw", - ASTNode::BoxDeclaration { .. } => "BoxDeclaration", - ASTNode::FunctionDeclaration { .. } => "FunctionDeclaration", - ASTNode::GlobalVar { .. } => "GlobalVar", - ASTNode::Literal { .. } => "Literal", - ASTNode::Variable { .. } => "Variable", - ASTNode::UnaryOp { .. } => "UnaryOp", - ASTNode::BinaryOp { .. } => "BinaryOp", - ASTNode::GroupedAssignmentExpr { .. } => "GroupedAssignmentExpr", - ASTNode::MethodCall { .. } => "MethodCall", - ASTNode::Call { .. } => "Call", - ASTNode::FunctionCall { .. } => "FunctionCall", - ASTNode::FieldAccess { .. } => "FieldAccess", - ASTNode::Index { .. } => "Index", - ASTNode::New { .. } => "New", - ASTNode::This { .. } => "This", - ASTNode::Me { .. } => "Me", - ASTNode::FromCall { .. } => "FromCall", - ASTNode::ThisField { .. } => "ThisField", - ASTNode::MeField { .. } => "MeField", - ASTNode::Local { .. } => "Local", - ASTNode::ScopeBox { .. } => "ScopeBox", - ASTNode::Outbox { .. } => "Outbox", - } -} - -impl StepNode { - fn with_span(self, span: Span) -> StepNode { - match self { - StepNode::Block(nodes) => StepNode::Block(nodes), - StepNode::If { - cond, - cond_ast, - then_branch, - else_branch, - .. - } => StepNode::If { - cond, - cond_ast, - then_branch, - else_branch, - span, - }, - StepNode::Loop { cond, cond_ast, body, .. } => StepNode::Loop { cond, cond_ast, body, span }, - StepNode::Stmt { kind, .. } => StepNode::Stmt { kind, span }, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::ast::{Span, ASTNode, BinaryOperator, LiteralValue}; - - fn str_lit(s: &str) -> ASTNode { - ASTNode::Literal { - value: LiteralValue::String(s.to_string()), - span: Span::unknown(), - } - } - - fn eq(a: ASTNode, b: ASTNode) -> ASTNode { - ASTNode::BinaryOp { - operator: BinaryOperator::Equal, - left: Box::new(a), - right: Box::new(b), - span: Span::unknown(), - } - } - - fn assign_x(num: i64) -> ASTNode { - ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "x".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(num), - span: Span::unknown(), - }), - span: Span::unknown(), - } - } - - #[test] - fn build_step_tree_if_only_nested_if_is_structural() { - // Equivalent shape to Phase103 "if-only merge" fixture: - // - // local x = 0 - // if "x" == "x" { if "y" == "z" { x=1 } else { x=2 } } else { x=3 } - // print(x) - let ast = vec![ - ASTNode::Local { - variables: vec!["x".to_string()], - initial_values: vec![Some(Box::new(ASTNode::Literal { - value: LiteralValue::Integer(0), - span: Span::unknown(), - }))], - span: Span::unknown(), - }, - ASTNode::If { - condition: Box::new(eq(str_lit("x"), str_lit("x"))), - then_body: vec![ASTNode::If { - condition: Box::new(eq(str_lit("y"), str_lit("z"))), - then_body: vec![assign_x(1)], - else_body: Some(vec![assign_x(2)]), - span: Span::unknown(), - }], - else_body: Some(vec![assign_x(3)]), - span: Span::unknown(), - }, - ASTNode::Print { - expression: Box::new(ASTNode::Variable { - name: "x".to_string(), - span: Span::unknown(), - }), - span: Span::unknown(), - }, - ]; - - let tree = StepTreeBuilderBox::build_from_block(&ast); - assert!(tree.features.has_if); - assert!(!tree.features.has_loop); - assert_eq!(tree.features.max_if_depth, 2); - assert_eq!(tree.contract.exits.len(), 0); - assert!(tree.contract.writes.contains("x")); - assert!(tree.contract.required_caps.contains(&StepCapability::If)); - assert!(tree.contract.required_caps.contains(&StepCapability::NestedIf)); - - let basis = tree.signature_basis_string(); - assert_eq!( - basis, - "kinds=Block,Stmt(local(x)),If,Block,If,Block,Stmt(assign(x)),Block,Stmt(assign(x)),Block,Stmt(assign(x)),Stmt(print);exits=;writes=x;reads=;caps=If,NestedIf;conds=(lit:str:x == lit:str:x)|(lit:str:y == lit:str:z)" - ); - - let tree2 = StepTreeBuilderBox::build_from_block(&ast); - assert_eq!(tree.signature, tree2.signature); - - match tree.root { - StepNode::Block(nodes) => { - assert_eq!(nodes.len(), 3); - match &nodes[1] { - StepNode::If { then_branch, cond_ast, .. } => { - // cond_ast should be populated - assert!(matches!(&cond_ast.0.as_ref(), ASTNode::BinaryOp { .. })); - - match &**then_branch { - StepNode::Block(inner_nodes) => match &inner_nodes[0] { - StepNode::If { cond_ast: inner_cond_ast, .. } => { - // inner cond_ast should also be populated - assert!(matches!(&inner_cond_ast.0.as_ref(), ASTNode::BinaryOp { .. })); - } - other => panic!("expected nested If, got {other:?}"), - }, - other => panic!("expected Block in then_branch, got {other:?}"), - } - } - other => panic!("expected If at index 1, got {other:?}"), - } - } - other => panic!("expected root Block, got {other:?}"), - } - } - - #[test] - fn step_tree_cond_ast_is_populated() { - // Phase 119: cond_ast should hold AST reference. - let ast = vec![ASTNode::If { - condition: Box::new(eq(str_lit("a"), str_lit("b"))), - then_body: vec![assign_x(1)], - else_body: None, - span: Span::unknown(), - }]; - - let tree = StepTreeBuilderBox::build_from_block(&ast); - - match &tree.root { - StepNode::Block(nodes) => match &nodes[0] { - StepNode::If { cond_ast, .. } => { - // cond_ast should be populated with BinaryOp - assert!(matches!(&cond_ast.0.as_ref(), ASTNode::BinaryOp { .. })); - } - other => panic!("expected If, got {other:?}"), - }, - other => panic!("expected root Block, got {other:?}"), - } - } - - #[test] - fn step_tree_signature_is_stable_with_cond_ast() { - // Phase 119: cond_ast should NOT affect signature stability. - // Signature is based on cond_sig (AstSummary), not cond_ast. - let ast = vec![ - ASTNode::Loop { - condition: Box::new(ASTNode::Literal { - value: LiteralValue::Bool(true), - span: Span::unknown(), - }), - body: vec![assign_x(1)], - span: Span::unknown(), - }, - ]; - - let tree1 = StepTreeBuilderBox::build_from_block(&ast); - let tree2 = StepTreeBuilderBox::build_from_block(&ast); - - // Signature should be identical (deterministic) - assert_eq!(tree1.signature, tree2.signature); - - // cond_ast should be populated - match &tree1.root { - StepNode::Block(nodes) => match &nodes[0] { - StepNode::Loop { cond_ast, .. } => { - assert!(matches!(&cond_ast.0.as_ref(), ASTNode::Literal { .. })); - } - other => panic!("expected Loop, got {other:?}"), - }, - other => panic!("expected root Block, got {other:?}"), - } - } - - #[test] - fn contract_extracts_loop_exits_and_writes_minimal() { - fn var(name: &str) -> ASTNode { - ASTNode::Variable { - name: name.to_string(), - span: Span::unknown(), - } - } - fn int_lit(v: i64) -> ASTNode { - ASTNode::Literal { - value: LiteralValue::Integer(v), - span: Span::unknown(), - } - } - fn bin(op: BinaryOperator, lhs: ASTNode, rhs: ASTNode) -> ASTNode { - ASTNode::BinaryOp { - operator: op, - left: Box::new(lhs), - right: Box::new(rhs), - span: Span::unknown(), - } - } - fn assign(name: &str, value: ASTNode) -> ASTNode { - ASTNode::Assignment { - target: Box::new(var(name)), - value: Box::new(value), - span: Span::unknown(), - } - } - - // local i=0; local x=0; - // loop(i < 3) { x = x + 1; if x == 2 { break } i = i + 1 } - let ast = vec![ - ASTNode::Local { - variables: vec!["i".to_string()], - initial_values: vec![Some(Box::new(int_lit(0)))], - span: Span::unknown(), - }, - ASTNode::Local { - variables: vec!["x".to_string()], - initial_values: vec![Some(Box::new(int_lit(0)))], - span: Span::unknown(), - }, - ASTNode::Loop { - condition: Box::new(bin(BinaryOperator::Less, var("i"), int_lit(3))), - body: vec![ - assign("x", bin(BinaryOperator::Add, var("x"), int_lit(1))), - ASTNode::If { - condition: Box::new(bin(BinaryOperator::Equal, var("x"), int_lit(2))), - then_body: vec![ASTNode::Break { span: Span::unknown() }], - else_body: None, - span: Span::unknown(), - }, - assign("i", bin(BinaryOperator::Add, var("i"), int_lit(1))), - ], - span: Span::unknown(), - }, - ]; - - let tree = StepTreeBuilderBox::build_from_block(&ast); - assert!(tree.features.has_loop); - assert!(tree.contract.exits.contains(&ExitKind::Break)); - assert!(tree.contract.writes.contains("i")); - assert!(tree.contract.writes.contains("x")); - assert!(tree.contract.required_caps.contains(&StepCapability::Loop)); - assert!(tree.contract.required_caps.contains(&StepCapability::If)); - } -} diff --git a/src/mir/control_tree/step_tree/builder.rs b/src/mir/control_tree/step_tree/builder.rs new file mode 100644 index 00000000..daba51fc --- /dev/null +++ b/src/mir/control_tree/step_tree/builder.rs @@ -0,0 +1,218 @@ +use crate::ast::ASTNode; +use crate::mir::control_tree::step_tree_contract_box::StepTreeContractBox; + +use super::fact_extractor::extract_facts_from_tree; +use super::signature::collect_node_kinds; +use super::summary::{ast_kind_name, summarize_ast}; +use super::types::{AstNodeHandle, StepNode, StepStmtKind, StepTree, StepTreeFeatures, StepTreeSignature}; + +pub struct StepTreeBuilderBox; + +impl StepTreeBuilderBox { + pub fn build_from_ast(ast: &ASTNode) -> StepTree { + match ast { + ASTNode::Program { statements, .. } => Self::build_from_block(statements), + ASTNode::ScopeBox { body, .. } => Self::build_from_block(body), + _ => { + let (node, features) = Self::build_node(ast, 0, 0); + build_step_tree(node, features) + } + } + } + + pub fn build_from_block(stmts: &[ASTNode]) -> StepTree { + let mut nodes = Vec::with_capacity(stmts.len()); + let mut features = StepTreeFeatures::default(); + for stmt in stmts { + let (node, node_features) = Self::build_node(stmt, 0, 0); + nodes.push(node); + features = merge_features(features, node_features); + } + build_step_tree(StepNode::Block(nodes), features) + } + + fn build_node(ast: &ASTNode, if_depth: u32, loop_depth: u32) -> (StepNode, StepTreeFeatures) { + match ast { + ASTNode::If { + condition, + then_body, + else_body, + span, + } => { + let cond = summarize_ast(condition); + let cond_ast = AstNodeHandle(condition.clone()); + let (then_node, then_features) = + Self::build_block_node(then_body, if_depth + 1, loop_depth); + let (else_node, else_features) = match else_body { + Some(else_body) => { + let (node, f) = + Self::build_block_node(else_body, if_depth + 1, loop_depth); + (Some(Box::new(node)), f) + } + None => (None, StepTreeFeatures::default()), + }; + let mut features = StepTreeFeatures { + has_if: true, + max_if_depth: (if_depth + 1).max(then_features.max_if_depth), + ..StepTreeFeatures::default() + }; + features = merge_features(features, then_features); + features = merge_features(features, else_features); + + ( + StepNode::If { + cond, + cond_ast, + then_branch: Box::new(then_node), + else_branch: else_node, + span: span.clone(), + }, + features, + ) + } + ASTNode::Loop { + condition, body, span, .. + } => { + let cond = summarize_ast(condition); + let cond_ast = AstNodeHandle(condition.clone()); + let (body_node, body_features) = + Self::build_block_node(body, if_depth, loop_depth + 1); + let mut features = StepTreeFeatures { + has_loop: true, + max_loop_depth: (loop_depth + 1).max(body_features.max_loop_depth), + ..StepTreeFeatures::default() + }; + features = merge_features(features, body_features); + ( + StepNode::Loop { + cond, + cond_ast, + body: Box::new(body_node), + span: span.clone(), + }, + features, + ) + } + ASTNode::ScopeBox { body, span } => { + let (node, features) = Self::build_block_node(body, if_depth, loop_depth); + (node.with_span(span.clone()), features) + } + ASTNode::Return { value, span } => ( + StepNode::Stmt { + kind: StepStmtKind::Return { + value_ast: value.as_ref().map(|v| AstNodeHandle(v.clone())), + }, + span: span.clone(), + }, + StepTreeFeatures { + has_return: true, + ..StepTreeFeatures::default() + }, + ), + ASTNode::Break { span } => ( + StepNode::Stmt { + kind: StepStmtKind::Break, + span: span.clone(), + }, + StepTreeFeatures { + has_break: true, + ..StepTreeFeatures::default() + }, + ), + ASTNode::Continue { span } => ( + StepNode::Stmt { + kind: StepStmtKind::Continue, + span: span.clone(), + }, + StepTreeFeatures { + has_continue: true, + ..StepTreeFeatures::default() + }, + ), + ASTNode::Local { variables, span, .. } => ( + StepNode::Stmt { + kind: StepStmtKind::LocalDecl { + vars: variables.clone(), + }, + span: span.clone(), + }, + StepTreeFeatures::default(), + ), + ASTNode::Assignment { span, value, .. } => ( + StepNode::Stmt { + kind: StepStmtKind::Assign { + target: match ast { + ASTNode::Assignment { target, .. } => match target.as_ref() { + ASTNode::Variable { name, .. } => Some(name.clone()), + _ => None, + }, + _ => None, + }, + // Phase 128: Store value AST for Normalized lowering + value_ast: Some(AstNodeHandle(value.clone())), + }, + span: span.clone(), + }, + StepTreeFeatures::default(), + ), + ASTNode::Print { span, .. } => ( + StepNode::Stmt { + kind: StepStmtKind::Print, + span: span.clone(), + }, + StepTreeFeatures::default(), + ), + other => ( + StepNode::Stmt { + kind: StepStmtKind::Other(ast_kind_name(other)), + span: other.span(), + }, + StepTreeFeatures::default(), + ), + } + } + + fn build_block_node( + stmts: &[ASTNode], + if_depth: u32, + loop_depth: u32, + ) -> (StepNode, StepTreeFeatures) { + let mut nodes = Vec::with_capacity(stmts.len()); + let mut features = StepTreeFeatures::default(); + for stmt in stmts { + let (node, node_features) = Self::build_node(stmt, if_depth, loop_depth); + nodes.push(node); + features = merge_features(features, node_features); + } + (StepNode::Block(nodes), features) + } +} + +fn build_step_tree(root: StepNode, features: StepTreeFeatures) -> StepTree { + // Phase 120: Facts → Contract → Signature (separated concerns) + let facts = extract_facts_from_tree(&root, &features); + let contract = StepTreeContractBox::from_facts(&facts); + let mut kinds = Vec::new(); + collect_node_kinds(&root, &mut kinds); + let kinds = kinds.join(","); + let basis = contract.signature_basis_string(&kinds); + let signature = StepTreeSignature::from_basis_string(&basis); + + StepTree { + root, + features, + contract, + signature, + } +} + +fn merge_features(mut a: StepTreeFeatures, b: StepTreeFeatures) -> StepTreeFeatures { + a.has_if |= b.has_if; + a.has_loop |= b.has_loop; + a.has_break |= b.has_break; + a.has_continue |= b.has_continue; + a.has_return |= b.has_return; + a.max_if_depth = a.max_if_depth.max(b.max_if_depth); + a.max_loop_depth = a.max_loop_depth.max(b.max_loop_depth); + a +} diff --git a/src/mir/control_tree/step_tree/fact_extractor.rs b/src/mir/control_tree/step_tree/fact_extractor.rs new file mode 100644 index 00000000..afdc4bc4 --- /dev/null +++ b/src/mir/control_tree/step_tree/fact_extractor.rs @@ -0,0 +1,182 @@ +use crate::ast::ASTNode; +use crate::mir::control_tree::step_tree_facts::StepTreeFacts; + +use super::types::{ExitKind, StepCapability, StepNode, StepStmtKind, StepTreeFeatures}; + +/// Extract raw facts from StepNode tree (Phase 120) +pub(super) fn extract_facts_from_tree( + root: &StepNode, + features: &StepTreeFeatures, +) -> StepTreeFacts { + let mut facts = StepTreeFacts::new(); + + // Required caps from features (structural only) + if features.has_if { + facts.add_capability(StepCapability::If); + } + if features.max_if_depth > 1 { + facts.add_capability(StepCapability::NestedIf); + } + if features.has_loop { + facts.add_capability(StepCapability::Loop); + } + if features.max_loop_depth > 1 { + facts.add_capability(StepCapability::NestedLoop); + } + if features.has_return { + facts.add_capability(StepCapability::Return); + } + if features.has_break { + facts.add_capability(StepCapability::Break); + } + if features.has_continue { + facts.add_capability(StepCapability::Continue); + } + + walk_for_facts(root, &mut facts); + facts +} + +/// Walk StepNode tree to collect facts (Phase 120, Phase 124) +fn walk_for_facts(node: &StepNode, facts: &mut StepTreeFacts) { + match node { + StepNode::Block(nodes) => { + for n in nodes { + walk_for_facts(n, facts); + } + } + StepNode::If { + cond, + cond_ast, + then_branch, + else_branch, + .. + } => { + facts.add_cond_sig(cond.to_compact_string()); + // Phase 124: Extract reads from condition AST + extract_variables_from_ast(&cond_ast.0, facts); + walk_for_facts(then_branch, facts); + if let Some(else_branch) = else_branch { + walk_for_facts(else_branch, facts); + } + } + StepNode::Loop { + cond, cond_ast, body, .. + } => { + facts.add_cond_sig(cond.to_compact_string()); + // Phase 124: Extract reads from condition AST + extract_variables_from_ast(&cond_ast.0, facts); + walk_for_facts(body, facts); + } + StepNode::Stmt { kind, .. } => match kind { + StepStmtKind::LocalDecl { vars } => { + for v in vars { + facts.add_write(v.clone()); + } + } + StepStmtKind::Assign { target, value_ast } => { + if let Some(name) = target.as_ref() { + facts.add_write(name.clone()); + } + // Phase 128: Extract reads from assignment value AST + if let Some(ast) = value_ast { + extract_variables_from_ast(&ast.0, facts); + } + } + StepStmtKind::Print => {} + StepStmtKind::Return { value_ast } => { + facts.add_exit(ExitKind::Return); + // Phase 124: Extract reads from return value AST + if let Some(ast) = value_ast { + extract_variables_from_ast(&ast.0, facts); + } + } + StepStmtKind::Break => { + facts.add_exit(ExitKind::Break); + } + StepStmtKind::Continue => { + facts.add_exit(ExitKind::Continue); + } + StepStmtKind::Other(name) => match *name { + "TryCatch" => { + facts.add_capability(StepCapability::TryCatch); + } + "Throw" => { + facts.add_capability(StepCapability::Throw); + } + "Lambda" => { + facts.add_capability(StepCapability::Lambda); + } + "While" => { + facts.add_capability(StepCapability::While); + } + "ForRange" => { + facts.add_capability(StepCapability::ForRange); + } + "MatchExpr" => { + facts.add_capability(StepCapability::Match); + } + "Arrow" => { + facts.add_capability(StepCapability::Arrow); + } + _ => {} + }, + }, + } +} + +/// Extract Variable names from AST (Phase 124: reads collection) +/// +/// SSOT for reads extraction: +/// - Recursively walk AST tree +/// - Add Variable { name } to facts.reads +/// - Ignore other node types +fn extract_variables_from_ast(ast: &ASTNode, facts: &mut StepTreeFacts) { + match ast { + ASTNode::Variable { name, .. } => { + facts.add_read(name.clone()); + } + // Recursively walk binary/unary operations + ASTNode::BinaryOp { left, right, .. } => { + extract_variables_from_ast(left, facts); + extract_variables_from_ast(right, facts); + } + ASTNode::UnaryOp { operand, .. } => { + extract_variables_from_ast(operand, facts); + } + // Function calls + ASTNode::FunctionCall { arguments, .. } => { + for arg in arguments { + extract_variables_from_ast(arg, facts); + } + } + // Method calls + ASTNode::MethodCall { + object, arguments, .. + } => { + extract_variables_from_ast(object, facts); + for arg in arguments { + extract_variables_from_ast(arg, facts); + } + } + // Field access + ASTNode::FieldAccess { object, .. } => { + extract_variables_from_ast(object, facts); + } + // Array/Index access + ASTNode::Index { target, index, .. } => { + extract_variables_from_ast(target, facts); + extract_variables_from_ast(index, facts); + } + // Assignment (RHS only) + ASTNode::Assignment { value, .. } => { + extract_variables_from_ast(value, facts); + } + // Print + ASTNode::Print { expression, .. } => { + extract_variables_from_ast(expression, facts); + } + // Ignore literals, keywords, and other non-variable nodes + _ => {} + } +} diff --git a/src/mir/control_tree/step_tree/format.rs b/src/mir/control_tree/step_tree/format.rs new file mode 100644 index 00000000..ef987f7b --- /dev/null +++ b/src/mir/control_tree/step_tree/format.rs @@ -0,0 +1,99 @@ +use crate::ast::LiteralValue; + +use super::types::{AstSummary, StepNode, StepStmtKind, StepTree}; + +impl StepTree { + pub fn to_compact_string(&self) -> String { + let mut out = String::new(); + self.root.write_compact(&mut out, 0); + out + } +} + +impl StepNode { + fn write_compact(&self, out: &mut String, indent: usize) { + let pad = " ".repeat(indent); + match self { + StepNode::Block(nodes) => { + out.push_str(&format!("{pad}Block(len={})\n", nodes.len())); + for n in nodes { + n.write_compact(out, indent + 1); + } + } + StepNode::If { + cond, + then_branch, + else_branch, + .. + } => { + out.push_str(&format!("{pad}If(cond={})\n", cond.to_compact_string())); + out.push_str(&format!("{pad} then:\n")); + then_branch.write_compact(out, indent + 2); + if let Some(else_branch) = else_branch { + out.push_str(&format!("{pad} else:\n")); + else_branch.write_compact(out, indent + 2); + } + } + StepNode::Loop { cond, body, .. } => { + out.push_str(&format!("{pad}Loop(cond={})\n", cond.to_compact_string())); + body.write_compact(out, indent + 1); + } + StepNode::Stmt { kind, .. } => { + out.push_str(&format!("{pad}Stmt({})\n", kind.to_compact_string())); + } + } + } +} + +impl StepStmtKind { + pub(super) fn to_compact_string(&self) -> String { + match self { + StepStmtKind::LocalDecl { vars } => format!("local({})", vars.join(",")), + StepStmtKind::Assign { target, .. } => match target { + Some(name) => format!("assign({name})"), + None => "assign(?)".to_string(), + }, + StepStmtKind::Print => "print".to_string(), + StepStmtKind::Return { value_ast } => { + if value_ast.is_some() { + "return(value)".to_string() + } else { + "return(void)".to_string() + } + } + StepStmtKind::Break => "break".to_string(), + StepStmtKind::Continue => "continue".to_string(), + StepStmtKind::Other(name) => format!("other:{name}"), + } + } +} + +impl AstSummary { + pub(super) fn to_compact_string(&self) -> String { + match self { + AstSummary::Variable(name) => format!("var:{name}"), + AstSummary::Literal(lit) => format!("lit:{}", lit_to_sig_string(lit)), + AstSummary::Unary { op, expr } => format!("({op:?} {})", expr.to_compact_string()), + AstSummary::Binary { op, lhs, rhs } => { + format!("({} {} {})", lhs.to_compact_string(), op, rhs.to_compact_string()) + } + AstSummary::Other(k) => format!("other:{k}"), + } + } +} + +fn lit_to_sig_string(lit: &LiteralValue) -> String { + match lit { + LiteralValue::String(s) => format!("str:{}", escape_sig_atom(s)), + LiteralValue::Integer(i) => format!("int:{i}"), + LiteralValue::Float(f) => format!("float:{:016x}", f.to_bits()), + LiteralValue::Bool(b) => format!("bool:{}", if *b { 1 } else { 0 }), + LiteralValue::Null => "null".to_string(), + LiteralValue::Void => "void".to_string(), + } +} + +fn escape_sig_atom(s: &str) -> String { + // Minimal stable escaping for signature strings. + s.replace('\\', "\\\\").replace('|', "\\|").replace(',', "\\,") +} diff --git a/src/mir/control_tree/step_tree/mod.rs b/src/mir/control_tree/step_tree/mod.rs new file mode 100644 index 00000000..28e825ea --- /dev/null +++ b/src/mir/control_tree/step_tree/mod.rs @@ -0,0 +1,14 @@ +mod builder; +mod fact_extractor; +mod format; +mod signature; +mod summary; +mod types; +#[cfg(test)] +mod tests; + +pub use builder::StepTreeBuilderBox; +pub use types::{ + AstNodeHandle, AstSummary, ExitKind, StepCapability, StepNode, StepStmtKind, StepTree, + StepTreeFeatures, StepTreeSignature, +}; diff --git a/src/mir/control_tree/step_tree/signature.rs b/src/mir/control_tree/step_tree/signature.rs new file mode 100644 index 00000000..66bc9873 --- /dev/null +++ b/src/mir/control_tree/step_tree/signature.rs @@ -0,0 +1,39 @@ +use super::types::{StepNode, StepTree}; + +impl StepTree { + pub fn signature_basis_string(&self) -> String { + let mut kinds = Vec::new(); + collect_node_kinds(&self.root, &mut kinds); + let kinds = kinds.join(","); + self.contract.signature_basis_string(&kinds) + } +} + +pub(super) fn collect_node_kinds(node: &StepNode, out: &mut Vec) { + match node { + StepNode::Block(nodes) => { + out.push("Block".to_string()); + for n in nodes { + collect_node_kinds(n, out); + } + } + StepNode::If { + then_branch, + else_branch, + .. + } => { + out.push("If".to_string()); + collect_node_kinds(then_branch, out); + if let Some(else_branch) = else_branch { + collect_node_kinds(else_branch, out); + } + } + StepNode::Loop { body, .. } => { + out.push("Loop".to_string()); + collect_node_kinds(body, out); + } + StepNode::Stmt { kind, .. } => { + out.push(format!("Stmt({})", kind.to_compact_string())); + } + } +} diff --git a/src/mir/control_tree/step_tree/summary.rs b/src/mir/control_tree/step_tree/summary.rs new file mode 100644 index 00000000..177a2ca4 --- /dev/null +++ b/src/mir/control_tree/step_tree/summary.rs @@ -0,0 +1,76 @@ +use crate::ast::ASTNode; + +use super::types::AstSummary; + +pub(super) fn summarize_ast(ast: &ASTNode) -> AstSummary { + match ast { + ASTNode::Variable { name, .. } => AstSummary::Variable(name.clone()), + ASTNode::Literal { value, .. } => AstSummary::Literal(value.clone()), + ASTNode::UnaryOp { + operator, operand, .. + } => AstSummary::Unary { + op: operator.clone(), + expr: Box::new(summarize_ast(operand)), + }, + ASTNode::BinaryOp { + operator, + left, + right, + .. + } => AstSummary::Binary { + op: operator.clone(), + lhs: Box::new(summarize_ast(left)), + rhs: Box::new(summarize_ast(right)), + }, + other => AstSummary::Other(ast_kind_name(other)), + } +} + +pub(super) fn ast_kind_name(ast: &ASTNode) -> &'static str { + match ast { + ASTNode::Program { .. } => "Program", + ASTNode::Assignment { .. } => "Assignment", + ASTNode::Print { .. } => "Print", + ASTNode::If { .. } => "If", + ASTNode::Loop { .. } => "Loop", + ASTNode::While { .. } => "While", + ASTNode::ForRange { .. } => "ForRange", + ASTNode::Return { .. } => "Return", + ASTNode::Break { .. } => "Break", + ASTNode::Continue { .. } => "Continue", + ASTNode::UsingStatement { .. } => "UsingStatement", + ASTNode::ImportStatement { .. } => "ImportStatement", + ASTNode::Nowait { .. } => "Nowait", + ASTNode::AwaitExpression { .. } => "AwaitExpression", + ASTNode::QMarkPropagate { .. } => "QMarkPropagate", + ASTNode::MatchExpr { .. } => "MatchExpr", + ASTNode::ArrayLiteral { .. } => "ArrayLiteral", + ASTNode::MapLiteral { .. } => "MapLiteral", + ASTNode::Lambda { .. } => "Lambda", + ASTNode::Arrow { .. } => "Arrow", + ASTNode::TryCatch { .. } => "TryCatch", + ASTNode::Throw { .. } => "Throw", + ASTNode::BoxDeclaration { .. } => "BoxDeclaration", + ASTNode::FunctionDeclaration { .. } => "FunctionDeclaration", + ASTNode::GlobalVar { .. } => "GlobalVar", + ASTNode::Literal { .. } => "Literal", + ASTNode::Variable { .. } => "Variable", + ASTNode::UnaryOp { .. } => "UnaryOp", + ASTNode::BinaryOp { .. } => "BinaryOp", + ASTNode::GroupedAssignmentExpr { .. } => "GroupedAssignmentExpr", + ASTNode::MethodCall { .. } => "MethodCall", + ASTNode::Call { .. } => "Call", + ASTNode::FunctionCall { .. } => "FunctionCall", + ASTNode::FieldAccess { .. } => "FieldAccess", + ASTNode::Index { .. } => "Index", + ASTNode::New { .. } => "New", + ASTNode::This { .. } => "This", + ASTNode::Me { .. } => "Me", + ASTNode::FromCall { .. } => "FromCall", + ASTNode::ThisField { .. } => "ThisField", + ASTNode::MeField { .. } => "MeField", + ASTNode::Local { .. } => "Local", + ASTNode::ScopeBox { .. } => "ScopeBox", + ASTNode::Outbox { .. } => "Outbox", + } +} diff --git a/src/mir/control_tree/step_tree/tests.rs b/src/mir/control_tree/step_tree/tests.rs new file mode 100644 index 00000000..2266bded --- /dev/null +++ b/src/mir/control_tree/step_tree/tests.rs @@ -0,0 +1,237 @@ +use super::*; +use crate::ast::{Span, ASTNode, BinaryOperator, LiteralValue}; + +fn str_lit(s: &str) -> ASTNode { + ASTNode::Literal { + value: LiteralValue::String(s.to_string()), + span: Span::unknown(), + } +} + +fn eq(a: ASTNode, b: ASTNode) -> ASTNode { + ASTNode::BinaryOp { + operator: BinaryOperator::Equal, + left: Box::new(a), + right: Box::new(b), + span: Span::unknown(), + } +} + +fn assign_x(num: i64) -> ASTNode { + ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "x".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(num), + span: Span::unknown(), + }), + span: Span::unknown(), + } +} + +#[test] +fn build_step_tree_if_only_nested_if_is_structural() { + // Equivalent shape to Phase103 "if-only merge" fixture: + // + // local x = 0 + // if "x" == "x" { if "y" == "z" { x=1 } else { x=2 } } else { x=3 } + // print(x) + let ast = vec![ + ASTNode::Local { + variables: vec!["x".to_string()], + initial_values: vec![Some(Box::new(ASTNode::Literal { + value: LiteralValue::Integer(0), + span: Span::unknown(), + }))], + span: Span::unknown(), + }, + ASTNode::If { + condition: Box::new(eq(str_lit("x"), str_lit("x"))), + then_body: vec![ASTNode::If { + condition: Box::new(eq(str_lit("y"), str_lit("z"))), + then_body: vec![assign_x(1)], + else_body: Some(vec![assign_x(2)]), + span: Span::unknown(), + }], + else_body: Some(vec![assign_x(3)]), + span: Span::unknown(), + }, + ASTNode::Print { + expression: Box::new(ASTNode::Variable { + name: "x".to_string(), + span: Span::unknown(), + }), + span: Span::unknown(), + }, + ]; + + let tree = StepTreeBuilderBox::build_from_block(&ast); + assert!(tree.features.has_if); + assert!(!tree.features.has_loop); + assert_eq!(tree.features.max_if_depth, 2); + assert_eq!(tree.contract.exits.len(), 0); + assert!(tree.contract.writes.contains("x")); + assert!(tree.contract.required_caps.contains(&StepCapability::If)); + assert!(tree.contract.required_caps.contains(&StepCapability::NestedIf)); + + let basis = tree.signature_basis_string(); + assert_eq!( + basis, + "kinds=Block,Stmt(local(x)),If,Block,If,Block,Stmt(assign(x)),Block,Stmt(assign(x)),Block,Stmt(assign(x)),Stmt(print);exits=;writes=x;reads=;caps=If,NestedIf;conds=(lit:str:x == lit:str:x)|(lit:str:y == lit:str:z)" + ); + + let tree2 = StepTreeBuilderBox::build_from_block(&ast); + assert_eq!(tree.signature, tree2.signature); + + match tree.root { + StepNode::Block(nodes) => { + assert_eq!(nodes.len(), 3); + match &nodes[1] { + StepNode::If { then_branch, cond_ast, .. } => { + // cond_ast should be populated + assert!(matches!(&cond_ast.0.as_ref(), ASTNode::BinaryOp { .. })); + + match &**then_branch { + StepNode::Block(inner_nodes) => match &inner_nodes[0] { + StepNode::If { cond_ast: inner_cond_ast, .. } => { + // inner cond_ast should also be populated + assert!(matches!(&inner_cond_ast.0.as_ref(), ASTNode::BinaryOp { .. })); + } + other => panic!("expected nested If, got {other:?}"), + }, + other => panic!("expected Block in then_branch, got {other:?}"), + } + } + other => panic!("expected If at index 1, got {other:?}"), + } + } + other => panic!("expected root Block, got {other:?}"), + } +} + +#[test] +fn step_tree_cond_ast_is_populated() { + // Phase 119: cond_ast should hold AST reference. + let ast = vec![ASTNode::If { + condition: Box::new(eq(str_lit("a"), str_lit("b"))), + then_body: vec![assign_x(1)], + else_body: None, + span: Span::unknown(), + }]; + + let tree = StepTreeBuilderBox::build_from_block(&ast); + + match &tree.root { + StepNode::Block(nodes) => match &nodes[0] { + StepNode::If { cond_ast, .. } => { + // cond_ast should be populated with BinaryOp + assert!(matches!(&cond_ast.0.as_ref(), ASTNode::BinaryOp { .. })); + } + other => panic!("expected If, got {other:?}"), + }, + other => panic!("expected root Block, got {other:?}"), + } +} + +#[test] +fn step_tree_signature_is_stable_with_cond_ast() { + // Phase 119: cond_ast should NOT affect signature stability. + // Signature is based on cond_sig (AstSummary), not cond_ast. + let ast = vec![ + ASTNode::Loop { + condition: Box::new(ASTNode::Literal { + value: LiteralValue::Bool(true), + span: Span::unknown(), + }), + body: vec![assign_x(1)], + span: Span::unknown(), + }, + ]; + + let tree1 = StepTreeBuilderBox::build_from_block(&ast); + let tree2 = StepTreeBuilderBox::build_from_block(&ast); + + // Signature should be identical (deterministic) + assert_eq!(tree1.signature, tree2.signature); + + // cond_ast should be populated + match &tree1.root { + StepNode::Block(nodes) => match &nodes[0] { + StepNode::Loop { cond_ast, .. } => { + assert!(matches!(&cond_ast.0.as_ref(), ASTNode::Literal { .. })); + } + other => panic!("expected Loop, got {other:?}"), + }, + other => panic!("expected root Block, got {other:?}"), + } +} + +#[test] +fn contract_extracts_loop_exits_and_writes_minimal() { + fn var(name: &str) -> ASTNode { + ASTNode::Variable { + name: name.to_string(), + span: Span::unknown(), + } + } + fn int_lit(v: i64) -> ASTNode { + ASTNode::Literal { + value: LiteralValue::Integer(v), + span: Span::unknown(), + } + } + fn bin(op: BinaryOperator, lhs: ASTNode, rhs: ASTNode) -> ASTNode { + ASTNode::BinaryOp { + operator: op, + left: Box::new(lhs), + right: Box::new(rhs), + span: Span::unknown(), + } + } + fn assign(name: &str, value: ASTNode) -> ASTNode { + ASTNode::Assignment { + target: Box::new(var(name)), + value: Box::new(value), + span: Span::unknown(), + } + } + + // local i=0; local x=0; + // loop(i < 3) { x = x + 1; if x == 2 { break } i = i + 1 } + let ast = vec![ + ASTNode::Local { + variables: vec!["i".to_string()], + initial_values: vec![Some(Box::new(int_lit(0)))], + span: Span::unknown(), + }, + ASTNode::Local { + variables: vec!["x".to_string()], + initial_values: vec![Some(Box::new(int_lit(0)))], + span: Span::unknown(), + }, + ASTNode::Loop { + condition: Box::new(bin(BinaryOperator::Less, var("i"), int_lit(3))), + body: vec![ + assign("x", bin(BinaryOperator::Add, var("x"), int_lit(1))), + ASTNode::If { + condition: Box::new(bin(BinaryOperator::Equal, var("x"), int_lit(2))), + then_body: vec![ASTNode::Break { span: Span::unknown() }], + else_body: None, + span: Span::unknown(), + }, + assign("i", bin(BinaryOperator::Add, var("i"), int_lit(1))), + ], + span: Span::unknown(), + }, + ]; + + let tree = StepTreeBuilderBox::build_from_block(&ast); + assert!(tree.features.has_loop); + assert!(tree.contract.exits.contains(&ExitKind::Break)); + assert!(tree.contract.writes.contains("i")); + assert!(tree.contract.writes.contains("x")); + assert!(tree.contract.required_caps.contains(&StepCapability::Loop)); + assert!(tree.contract.required_caps.contains(&StepCapability::If)); +} diff --git a/src/mir/control_tree/step_tree/types.rs b/src/mir/control_tree/step_tree/types.rs new file mode 100644 index 00000000..4e12c358 --- /dev/null +++ b/src/mir/control_tree/step_tree/types.rs @@ -0,0 +1,158 @@ +use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span, UnaryOperator}; +use crate::mir::control_tree::step_tree_contract_box::StepTreeContract; + +#[derive(Debug, Clone, PartialEq)] +pub struct StepTree { + pub root: StepNode, + pub features: StepTreeFeatures, + pub contract: StepTreeContract, + pub signature: StepTreeSignature, +} + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct StepTreeFeatures { + pub has_if: bool, + pub has_loop: bool, + pub has_break: bool, + pub has_continue: bool, + pub has_return: bool, + pub max_if_depth: u32, + pub max_loop_depth: u32, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum StepNode { + Block(Vec), + If { + cond: AstSummary, + cond_ast: AstNodeHandle, + then_branch: Box, + else_branch: Option>, + span: Span, + }, + Loop { + cond: AstSummary, + cond_ast: AstNodeHandle, + body: Box, + span: Span, + }, + Stmt { kind: StepStmtKind, span: Span }, +} + +/// AST 参照の軽量ハンドル(Phase 119: dev-only 観測用) +/// +/// SSOT: cond は AST 参照を保持する。 +/// - 将来的に AstExprId 等に移行可能。 +/// - Phase 119 では Clone を持つ Box で実装(dev-only なので許容)。 +#[derive(Debug, Clone, PartialEq)] +pub struct AstNodeHandle(pub Box); + +#[derive(Debug, Clone, PartialEq)] +pub enum StepStmtKind { + LocalDecl { vars: Vec }, + Assign { + target: Option, + /// Phase 128: assignment value AST (for Normalized lowering) + value_ast: Option, + }, + Print, + Return { + /// Phase 123: return value AST (for Normalized lowering) + value_ast: Option, + }, + Break, + Continue, + Other(&'static str), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum AstSummary { + Variable(String), + Literal(LiteralValue), + Unary { + op: UnaryOperator, + expr: Box, + }, + Binary { + op: BinaryOperator, + lhs: Box, + rhs: Box, + }, + Other(&'static str), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum ExitKind { + Return, + Break, + Continue, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum StepCapability { + If, + Loop, + NestedIf, + NestedLoop, + Return, + Break, + Continue, + TryCatch, + Throw, + Lambda, + While, + ForRange, + Match, + Arrow, +} + +// StepTreeContract moved to step_tree_contract_box.rs (Phase 120) + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct StepTreeSignature(pub u64); + +impl StepTreeSignature { + pub fn from_basis_string(basis: &str) -> Self { + // FNV-1a 64-bit (stable, no external deps). + let mut hash: u64 = 0xcbf29ce484222325; + for b in basis.as_bytes() { + hash ^= *b as u64; + hash = hash.wrapping_mul(0x100000001b3); + } + StepTreeSignature(hash) + } + + pub fn to_hex(self) -> String { + format!("{:016x}", self.0) + } +} + +impl StepNode { + pub(super) fn with_span(self, span: Span) -> StepNode { + match self { + StepNode::Block(nodes) => StepNode::Block(nodes), + StepNode::If { + cond, + cond_ast, + then_branch, + else_branch, + .. + } => StepNode::If { + cond, + cond_ast, + then_branch, + else_branch, + span, + }, + StepNode::Loop { + cond, cond_ast, body, .. + } => StepNode::Loop { + cond, + cond_ast, + body, + span, + }, + StepNode::Stmt { kind, .. } => StepNode::Stmt { kind, span }, + } + } +} diff --git a/src/mir/join_ir/lowering/carrier_update_emitter.rs b/src/mir/join_ir/lowering/carrier_update_emitter.rs deleted file mode 100644 index 6bbd7b6f..00000000 --- a/src/mir/join_ir/lowering/carrier_update_emitter.rs +++ /dev/null @@ -1,1015 +0,0 @@ -//! Phase 176-2 / Phase 179 / Phase 184: Carrier Update Emission -//! -//! Converts UpdateExpr (from LoopUpdateAnalyzer) into JoinIR instructions -//! that compute the updated carrier value. -//! -//! This module is extracted from loop_with_break_minimal.rs to improve -//! modularity and single responsibility. -//! -//! Phase 184: Added UpdateEnv support for body-local variable resolution. - -use crate::mir::join_ir::lowering::carrier_info::CarrierVar; -use crate::mir::join_ir::lowering::condition_env::ConditionEnv; -use crate::mir::join_ir::lowering::loop_update_analyzer::{UpdateExpr, UpdateRhs}; -use crate::mir::join_ir::lowering::update_env::UpdateEnv; -use crate::mir::join_ir::{BinOpKind, ConstValue, JoinInst, MirLikeInst}; -use crate::mir::ValueId; - -/// Emit JoinIR instructions for a single carrier update (Phase 184: UpdateEnv version) -/// -/// Converts UpdateExpr (from LoopUpdateAnalyzer) into JoinIR instructions -/// that compute the updated carrier value. Supports both condition variables -/// and body-local variables through UpdateEnv. -/// -/// # Arguments -/// -/// * `carrier` - Carrier variable information (name, ValueId) -/// * `update` - Update expression (e.g., CounterLike, AccumulationLike) -/// * `alloc_value` - ValueId allocator closure -/// * `env` - UpdateEnv for unified variable resolution -/// * `instructions` - Output vector to append instructions to -/// -/// # Returns -/// -/// ValueId of the computed update result -/// -/// # Example -/// -/// ```ignore -/// // For "count = count + temp": -/// let count_next = emit_carrier_update_with_env( -/// &count_carrier, -/// &UpdateExpr::BinOp { lhs: "count", op: Add, rhs: Variable("temp") }, -/// &mut alloc_value, -/// &update_env, // Has both condition and body-local vars -/// &mut instructions, -/// )?; -/// // Generates: -/// // count_next = BinOp(Add, count_param, temp_value) -/// ``` -pub fn emit_carrier_update_with_env( - carrier: &CarrierVar, - update: &UpdateExpr, - alloc_value: &mut dyn FnMut() -> ValueId, - env: &UpdateEnv, - instructions: &mut Vec, -) -> Result { - match update { - UpdateExpr::Const(step) => { - // CounterLike: carrier = carrier + step - // Allocate const ValueId - let const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: const_id, - value: ConstValue::Integer(*step), - })); - - // Get carrier parameter ValueId from env - let carrier_param = env - .resolve(&carrier.name) - .ok_or_else(|| format!("Carrier '{}' not found in UpdateEnv", carrier.name))?; - - // Allocate result ValueId - let result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: result, - op: BinOpKind::Add, - lhs: carrier_param, - rhs: const_id, - })); - - Ok(result) - } - - UpdateExpr::BinOp { lhs, op, rhs } => { - // General binary operation: carrier = carrier op rhs - // Verify lhs matches carrier name - if lhs != &carrier.name { - return Err(format!( - "Update expression LHS '{}' doesn't match carrier '{}'", - lhs, carrier.name - )); - } - - // Get carrier parameter ValueId from env - let carrier_param = env - .resolve(&carrier.name) - .ok_or_else(|| format!("Carrier '{}' not found in UpdateEnv", carrier.name))?; - - // Resolve RHS (Phase 184: Now supports body-local variables!) - let rhs_id = match rhs { - UpdateRhs::Const(n) => { - let const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: const_id, - value: ConstValue::Integer(*n), - })); - const_id - } - UpdateRhs::Variable(var_name) => { - env.resolve(var_name).ok_or_else(|| { - format!( - "Update RHS variable '{}' not found in UpdateEnv (neither condition nor body-local)", - var_name - ) - })? - } - // Phase 188: String updates now emit JoinIR BinOp - // StringAppendLiteral: s = s + "literal" - UpdateRhs::StringLiteral(s) => { - let const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: const_id, - value: ConstValue::String(s.clone()), - })); - const_id - } - // Phase 190: Number accumulation pattern: result = result * base + digit - // Emit as: tmp = carrier * base; result = tmp + digit - UpdateRhs::NumberAccumulation { base, digit_var } => { - // Step 1: Emit const for base - let base_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: base_id, - value: ConstValue::Integer(*base), - })); - - // Step 2: Emit multiplication: tmp = carrier * base - let tmp_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: tmp_id, - op: BinOpKind::Mul, - lhs: carrier_param, - rhs: base_id, - })); - - // Step 3: Resolve digit variable - let digit_id = env.resolve(digit_var).ok_or_else(|| { - format!( - "Number accumulation digit variable '{}' not found in UpdateEnv", - digit_var - ) - })?; - - // Step 4: Emit addition: result = tmp + digit - // This will be handled by the outer BinOp emission - // For now, return digit_id to be used as RHS - // We need to handle this specially - return tmp_id instead - // and adjust the outer BinOp to use correct values - - // Actually, we need to emit both operations here - // Final result = tmp + digit - let result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: result, - op: *op, // Use the operation from outer UpdateExpr - lhs: tmp_id, - rhs: digit_id, - })); - - // Return result directly - we've already emitted everything - return Ok(result); - } - // Phase 178/188: Complex updates (method calls) still rejected - UpdateRhs::Other => { - return Err(format!( - "Carrier '{}' has complex update (UpdateRhs::Other) - should be rejected by can_lower()", - carrier.name - )); - } - }; - - // Allocate result ValueId - let result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: result, - op: *op, - lhs: carrier_param, - rhs: rhs_id, - })); - - Ok(result) - } - } -} - -/// Emit JoinIR instructions for a single carrier update (backward compatibility version) -/// -/// This function is kept for backward compatibility with existing Pattern2/4 code -/// that only needs ConditionEnv. New code should prefer `emit_carrier_update_with_env`. -/// -/// # Arguments -/// -/// * `carrier` - Carrier variable information (name, ValueId) -/// * `update` - Update expression (e.g., CounterLike, AccumulationLike) -/// * `alloc_value` - ValueId allocator closure -/// * `env` - ConditionEnv for variable resolution -/// * `instructions` - Output vector to append instructions to -/// -/// # Returns -/// -/// ValueId of the computed update result -/// -/// # Example -/// -/// ```ignore -/// // For "count = count + 1": -/// let count_next = emit_carrier_update( -/// &count_carrier, -/// &UpdateExpr::BinOp { lhs: "count", op: Add, rhs: Const(1) }, -/// &mut alloc_value, -/// &env, -/// &mut instructions, -/// )?; -/// // Generates: -/// // const_1 = Const(1) -/// // count_next = BinOp(Add, count_param, const_1) -/// ``` -pub fn emit_carrier_update( - carrier: &CarrierVar, - update: &UpdateExpr, - alloc_value: &mut dyn FnMut() -> ValueId, - env: &ConditionEnv, - instructions: &mut Vec, -) -> Result { - match update { - UpdateExpr::Const(step) => { - // CounterLike: carrier = carrier + step - // Allocate const ValueId - let const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: const_id, - value: ConstValue::Integer(*step), - })); - - // Get carrier parameter ValueId from env - let carrier_param = env - .get(&carrier.name) - .ok_or_else(|| format!("Carrier '{}' not found in ConditionEnv", carrier.name))?; - - // Allocate result ValueId - let result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: result, - op: BinOpKind::Add, - lhs: carrier_param, - rhs: const_id, - })); - - Ok(result) - } - - UpdateExpr::BinOp { lhs, op, rhs } => { - // General binary operation: carrier = carrier op rhs - // Verify lhs matches carrier name - if lhs != &carrier.name { - return Err(format!( - "Update expression LHS '{}' doesn't match carrier '{}'", - lhs, carrier.name - )); - } - - // Get carrier parameter ValueId from env - let carrier_param = env - .get(&carrier.name) - .ok_or_else(|| format!("Carrier '{}' not found in ConditionEnv", carrier.name))?; - - // Resolve RHS - let rhs_id = match rhs { - UpdateRhs::Const(n) => { - let const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: const_id, - value: ConstValue::Integer(*n), - })); - const_id - } - UpdateRhs::Variable(var_name) => env.get(var_name).ok_or_else(|| { - format!( - "Update RHS variable '{}' not found in ConditionEnv", - var_name - ) - })?, - // Phase 188: String updates now emit JoinIR BinOp - // StringAppendLiteral: s = s + "literal" - UpdateRhs::StringLiteral(s) => { - let const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: const_id, - value: ConstValue::String(s.clone()), - })); - const_id - } - // Phase 190: Number accumulation pattern: result = result * base + digit - // Emit as: tmp = carrier * base; result = tmp + digit - UpdateRhs::NumberAccumulation { base, digit_var } => { - // Step 1: Emit const for base - let base_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: base_id, - value: ConstValue::Integer(*base), - })); - - // Step 2: Emit multiplication: tmp = carrier * base - let tmp_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: tmp_id, - op: BinOpKind::Mul, - lhs: carrier_param, - rhs: base_id, - })); - - // Step 3: Resolve digit variable - let digit_id = env.get(digit_var).ok_or_else(|| { - format!( - "Number accumulation digit variable '{}' not found in ConditionEnv", - digit_var - ) - })?; - - // Step 4: Emit addition: result = tmp + digit - let result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: result, - op: *op, // Use the operation from outer UpdateExpr - lhs: tmp_id, - rhs: digit_id, - })); - - // Return result directly - we've already emitted everything - return Ok(result); - } - // Phase 178/188: Complex updates (method calls) still rejected - UpdateRhs::Other => { - return Err(format!( - "Carrier '{}' has complex update (UpdateRhs::Other) - should be rejected by can_lower()", - carrier.name - )); - } - }; - - // Allocate result ValueId - let result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: result, - op: *op, - lhs: carrier_param, - rhs: rhs_id, - })); - - Ok(result) - } - } -} - -// ============================================================================ -// Phase 92 P0-3: ConditionalStep Support -// ============================================================================ - -use crate::ast::ASTNode; -use crate::mir::join_ir::lowering::condition_lowerer::lower_condition_to_joinir_no_body_locals; -use crate::mir::join_ir::VarId; -use crate::mir::MirType; - -/// Emit JoinIR instructions for conditional step update (Phase 92 P0-3) -/// -/// Handles the P5b escape sequence pattern where carrier update depends on a condition: -/// ```text -/// if escape_cond { carrier = carrier + then_delta } -/// else { carrier = carrier + else_delta } -/// ``` -/// -/// This generates: -/// 1. Lower condition expression to get cond_id -/// 2. Compute then_result = carrier + then_delta -/// 3. Compute else_result = carrier + else_delta -/// 4. JoinInst::Select { dst: carrier_new, cond: cond_id, then_val: then_result, else_val: else_result } -/// -/// # Arguments -/// -/// * `carrier` - Carrier variable information (name, ValueId) -/// * `cond_ast` - AST node for the condition expression (e.g., `ch == '\\'`) -/// * `then_delta` - Delta to add when condition is true -/// * `else_delta` - Delta to add when condition is false -/// * `alloc_value` - ValueId allocator closure -/// * `env` - ConditionEnv for variable resolution -/// * `instructions` - Output vector to append instructions to -/// -/// # Returns -/// -/// ValueId of the computed update result (the dst of Select) -pub fn emit_conditional_step_update( - carrier: &CarrierVar, - cond_ast: &ASTNode, - then_delta: i64, - else_delta: i64, - alloc_value: &mut dyn FnMut() -> ValueId, - env: &ConditionEnv, - instructions: &mut Vec, -) -> Result { - // Step 1: Lower the condition expression - // Phase 92 P2-2: No body-local support in legacy emitter (use common/conditional_step_emitter instead) - let (cond_id, cond_insts) = lower_condition_to_joinir_no_body_locals(cond_ast, alloc_value, env)?; - instructions.extend(cond_insts); - - // Step 2: Get carrier parameter ValueId from env - let carrier_param = env - .get(&carrier.name) - .ok_or_else(|| format!("Carrier '{}' not found in ConditionEnv", carrier.name))?; - - // Step 3: Compute then_result = carrier + then_delta - let then_const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: then_const_id, - value: ConstValue::Integer(then_delta), - })); - let then_result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: then_result, - op: BinOpKind::Add, - lhs: carrier_param, - rhs: then_const_id, - })); - - // Step 4: Compute else_result = carrier + else_delta - let else_const_id = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::Const { - dst: else_const_id, - value: ConstValue::Integer(else_delta), - })); - let else_result = alloc_value(); - instructions.push(JoinInst::Compute(MirLikeInst::BinOp { - dst: else_result, - op: BinOpKind::Add, - lhs: carrier_param, - rhs: else_const_id, - })); - - // Step 5: Emit Select instruction - let carrier_new: VarId = alloc_value(); - instructions.push(JoinInst::Select { - dst: carrier_new, - cond: cond_id, - then_val: then_result, - else_val: else_result, - type_hint: Some(MirType::Integer), // Carrier is always Integer - }); - - Ok(carrier_new) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::mir::join_ir::lowering::carrier_info::CarrierVar; - use crate::mir::join_ir::lowering::loop_body_local_env::LoopBodyLocalEnv; - use crate::mir::join_ir::lowering::loop_update_analyzer::UpdateRhs; - use crate::mir::join_ir::BinOpKind; - use crate::mir::ValueId; - - // Helper: Create a test ConditionEnv - fn test_env() -> ConditionEnv { - let mut env = ConditionEnv::new(); - env.insert("count".to_string(), ValueId(10)); - env.insert("sum".to_string(), ValueId(20)); - env.insert("i".to_string(), ValueId(30)); - env - } - - // Helper: Create a test LoopBodyLocalEnv - fn test_body_local_env() -> LoopBodyLocalEnv { - let mut env = LoopBodyLocalEnv::new(); - env.insert("temp".to_string(), ValueId(50)); - env.insert("digit".to_string(), ValueId(60)); - env - } - - // Helper: Create a test UpdateEnv - fn test_update_env() -> (ConditionEnv, LoopBodyLocalEnv) { - (test_env(), test_body_local_env()) - } - - // Helper: Create a test CarrierVar - fn test_carrier(name: &str, host_id: u32) -> CarrierVar { - CarrierVar { - name: name.to_string(), - host_id: ValueId(host_id), - join_id: None, // Phase 177-STRUCT-1 - role: crate::mir::join_ir::lowering::carrier_info::CarrierRole::LoopState, - init: crate::mir::join_ir::lowering::carrier_info::CarrierInit::FromHost, // Phase 228 - #[cfg(feature = "normalized_dev")] - binding_id: None, - } - } - - #[test] - fn test_emit_const_update() { - // Test: count = count + 1 (UpdateExpr::Const) - let carrier = test_carrier("count", 100); - let update = UpdateExpr::Const(1); - let env = test_env(); - - let mut value_counter = 50u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = - emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); - - assert!(result.is_ok()); - let result_id = result.unwrap(); - - // Should generate 2 instructions: Const(1) + BinOp(Add) - assert_eq!(instructions.len(), 2); - - // Instruction 1: Const(1) - match &instructions[0] { - JoinInst::Compute(MirLikeInst::Const { dst, value }) => { - assert_eq!(*dst, ValueId(50)); // First allocated - assert!(matches!(value, ConstValue::Integer(1))); - } - _ => panic!("Expected Const instruction"), - } - - // Instruction 2: BinOp(Add, count, const_1) - match &instructions[1] { - JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { - assert_eq!(*dst, ValueId(51)); // Second allocated - assert_eq!(*op, BinOpKind::Add); - assert_eq!(*lhs, ValueId(10)); // count from env - assert_eq!(*rhs, ValueId(50)); // const_1 - } - _ => panic!("Expected BinOp instruction"), - } - - assert_eq!(result_id, ValueId(51)); - } - - #[test] - fn test_emit_binop_update_with_const() { - // Test: sum = sum + 5 (UpdateExpr::BinOp with Const RHS) - let carrier = test_carrier("sum", 200); - let update = UpdateExpr::BinOp { - lhs: "sum".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::Const(5), - }; - let env = test_env(); - - let mut value_counter = 60u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = - emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); - - assert!(result.is_ok()); - let result_id = result.unwrap(); - - // Should generate 2 instructions: Const(5) + BinOp(Add) - assert_eq!(instructions.len(), 2); - - // Instruction 1: Const(5) - match &instructions[0] { - JoinInst::Compute(MirLikeInst::Const { dst, value }) => { - assert_eq!(*dst, ValueId(60)); - assert!(matches!(value, ConstValue::Integer(5))); - } - _ => panic!("Expected Const instruction"), - } - - // Instruction 2: BinOp(Add, sum, const_5) - match &instructions[1] { - JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { - assert_eq!(*dst, ValueId(61)); - assert_eq!(*op, BinOpKind::Add); - assert_eq!(*lhs, ValueId(20)); // sum from env - assert_eq!(*rhs, ValueId(60)); // const_5 - } - _ => panic!("Expected BinOp instruction"), - } - - assert_eq!(result_id, ValueId(61)); - } - - #[test] - fn test_emit_binop_update_with_variable() { - // Test: sum = sum + i (UpdateExpr::BinOp with Variable RHS) - let carrier = test_carrier("sum", 200); - let update = UpdateExpr::BinOp { - lhs: "sum".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::Variable("i".to_string()), - }; - let env = test_env(); - - let mut value_counter = 70u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = - emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); - - assert!(result.is_ok()); - let result_id = result.unwrap(); - - // Should generate 1 instruction: BinOp(Add, sum, i) - assert_eq!(instructions.len(), 1); - - // Instruction: BinOp(Add, sum, i) - match &instructions[0] { - JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { - assert_eq!(*dst, ValueId(70)); - assert_eq!(*op, BinOpKind::Add); - assert_eq!(*lhs, ValueId(20)); // sum from env - assert_eq!(*rhs, ValueId(30)); // i from env - } - _ => panic!("Expected BinOp instruction"), - } - - assert_eq!(result_id, ValueId(70)); - } - - #[test] - fn test_emit_update_carrier_not_in_env() { - // Test error case: carrier not found in env - let carrier = test_carrier("unknown", 300); - let update = UpdateExpr::Const(1); - let env = test_env(); // doesn't have "unknown" - - let mut value_counter = 80u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = - emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); - - assert!(result.is_err()); - assert!(result.unwrap_err().contains("Carrier 'unknown' not found")); - } - - #[test] - fn test_emit_update_lhs_mismatch() { - // Test error case: LHS doesn't match carrier name - let carrier = test_carrier("count", 100); - let update = UpdateExpr::BinOp { - lhs: "sum".to_string(), // Wrong! Should be "count" - op: BinOpKind::Add, - rhs: UpdateRhs::Const(1), - }; - let env = test_env(); - - let mut value_counter = 90u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = - emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); - - assert!(result.is_err()); - assert!(result.unwrap_err().contains("doesn't match carrier")); - } - - #[test] - fn test_emit_update_rhs_variable_not_found() { - // Test error case: RHS variable not in env - let carrier = test_carrier("sum", 200); - let update = UpdateExpr::BinOp { - lhs: "sum".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::Variable("unknown_var".to_string()), - }; - let env = test_env(); - - let mut value_counter = 100u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = - emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); - - assert!(result.is_err()); - assert!(result - .unwrap_err() - .contains("Update RHS variable 'unknown_var' not found")); - } - - // ============================================================================ - // Phase 184: UpdateEnv version tests - // ============================================================================ - - #[test] - fn test_emit_update_with_env_body_local_variable() { - // Phase 184: Test using body-local variable in update expression - // sum = sum + temp (temp is body-local) - let carrier = test_carrier("sum", 200); - let update = UpdateExpr::BinOp { - lhs: "sum".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::Variable("temp".to_string()), // Body-local variable - }; - - let (cond_env, body_env) = test_update_env(); - let promoted: Vec = vec![]; - let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); - - let mut value_counter = 110u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = emit_carrier_update_with_env( - &carrier, - &update, - &mut alloc_value, - &update_env, - &mut instructions, - ); - - assert!(result.is_ok()); - let result_id = result.unwrap(); - - // Should generate 1 instruction: BinOp(Add, sum, temp) - assert_eq!(instructions.len(), 1); - - match &instructions[0] { - JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { - assert_eq!(*dst, ValueId(110)); - assert_eq!(*op, BinOpKind::Add); - assert_eq!(*lhs, ValueId(20)); // sum from condition env - assert_eq!(*rhs, ValueId(50)); // temp from body-local env - } - _ => panic!("Expected BinOp instruction"), - } - - assert_eq!(result_id, ValueId(110)); - } - - #[test] - fn test_emit_update_with_env_condition_priority() { - // Phase 184: Test condition variable takes priority over body-local - // If both envs have "x", condition env should win - let mut cond_env = ConditionEnv::new(); - cond_env.insert("x".to_string(), ValueId(100)); // Condition: x=100 - cond_env.insert("sum".to_string(), ValueId(20)); - - let mut body_env = LoopBodyLocalEnv::new(); - body_env.insert("x".to_string(), ValueId(200)); // Body-local: x=200 (should be ignored) - - let promoted: Vec = vec![]; - let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); - - let carrier = test_carrier("sum", 200); - let update = UpdateExpr::BinOp { - lhs: "sum".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::Variable("x".to_string()), - }; - - let mut value_counter = 120u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = emit_carrier_update_with_env( - &carrier, - &update, - &mut alloc_value, - &update_env, - &mut instructions, - ); - - assert!(result.is_ok()); - - // Should use x=100 (condition env), not x=200 (body-local env) - match &instructions[0] { - JoinInst::Compute(MirLikeInst::BinOp { - dst: _, - op: _, - lhs: _, - rhs, - }) => { - assert_eq!(*rhs, ValueId(100)); // Condition env wins - } - _ => panic!("Expected BinOp instruction"), - } - } - - #[test] - fn test_emit_update_with_env_variable_not_found() { - // Phase 184: Test error when variable not in either env - let (cond_env, body_env) = test_update_env(); - let promoted: Vec = vec![]; - let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); - - let carrier = test_carrier("sum", 200); - let update = UpdateExpr::BinOp { - lhs: "sum".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::Variable("nonexistent".to_string()), - }; - - let mut value_counter = 130u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = emit_carrier_update_with_env( - &carrier, - &update, - &mut alloc_value, - &update_env, - &mut instructions, - ); - - assert!(result.is_err()); - let err = result.unwrap_err(); - assert!(err.contains("Update RHS variable 'nonexistent' not found")); - assert!(err.contains("neither condition nor body-local")); - } - - #[test] - fn test_emit_update_with_env_const_update() { - // Phase 184: Test UpdateEnv with simple const update (baseline) - let (cond_env, body_env) = test_update_env(); - let promoted: Vec = vec![]; - let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); - - let carrier = test_carrier("count", 100); - let update = UpdateExpr::Const(1); - - let mut value_counter = 140u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = emit_carrier_update_with_env( - &carrier, - &update, - &mut alloc_value, - &update_env, - &mut instructions, - ); - - assert!(result.is_ok()); - assert_eq!(instructions.len(), 2); // Const + BinOp - } - - #[test] - fn test_emit_number_accumulation_base10() { - // Phase 190: Test number accumulation pattern: result = result * 10 + digit - let mut cond_env = ConditionEnv::new(); - cond_env.insert("result".to_string(), ValueId(20)); // Carrier parameter - cond_env.insert("digit".to_string(), ValueId(30)); // Digit variable - - let body_env = LoopBodyLocalEnv::new(); - let promoted: Vec = vec![]; - let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); - - let carrier = test_carrier("result", 200); - let update = UpdateExpr::BinOp { - lhs: "result".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::NumberAccumulation { - base: 10, - digit_var: "digit".to_string(), - }, - }; - - let mut value_counter = 150u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = emit_carrier_update_with_env( - &carrier, - &update, - &mut alloc_value, - &update_env, - &mut instructions, - ); - - assert!(result.is_ok()); - let result_id = result.unwrap(); - - // Should generate 3 instructions: - // 1. Const(10) for base - // 2. BinOp(Mul, result, base) for tmp - // 3. BinOp(Add, tmp, digit) for final result - assert_eq!(instructions.len(), 3); - - // Instruction 1: Const(10) - match &instructions[0] { - JoinInst::Compute(MirLikeInst::Const { dst, value }) => { - assert_eq!(*dst, ValueId(150)); // First allocated - assert!(matches!(value, ConstValue::Integer(10))); - } - _ => panic!("Expected Const instruction"), - } - - // Instruction 2: BinOp(Mul, result, base) - match &instructions[1] { - JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { - assert_eq!(*dst, ValueId(151)); // Second allocated (tmp) - assert_eq!(*op, BinOpKind::Mul); - assert_eq!(*lhs, ValueId(20)); // result from env - assert_eq!(*rhs, ValueId(150)); // base const - } - _ => panic!("Expected BinOp(Mul) instruction"), - } - - // Instruction 3: BinOp(Add, tmp, digit) - match &instructions[2] { - JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { - assert_eq!(*dst, ValueId(152)); // Third allocated (final result) - assert_eq!(*op, BinOpKind::Add); - assert_eq!(*lhs, ValueId(151)); // tmp from previous mul - assert_eq!(*rhs, ValueId(30)); // digit from env - } - _ => panic!("Expected BinOp(Add) instruction"), - } - - assert_eq!(result_id, ValueId(152)); - } - - #[test] - fn test_emit_number_accumulation_digit_not_found() { - // Phase 190: Test error when digit variable not in env - let mut cond_env = ConditionEnv::new(); - cond_env.insert("result".to_string(), ValueId(20)); - // Note: digit NOT in env - - let body_env = LoopBodyLocalEnv::new(); - let promoted: Vec = vec![]; - let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); - - let carrier = test_carrier("result", 200); - let update = UpdateExpr::BinOp { - lhs: "result".to_string(), - op: BinOpKind::Add, - rhs: UpdateRhs::NumberAccumulation { - base: 10, - digit_var: "digit".to_string(), - }, - }; - - let mut value_counter = 160u32; - let mut alloc_value = || { - let id = ValueId(value_counter); - value_counter += 1; - id - }; - - let mut instructions = Vec::new(); - let result = emit_carrier_update_with_env( - &carrier, - &update, - &mut alloc_value, - &update_env, - &mut instructions, - ); - - assert!(result.is_err()); - let err = result.unwrap_err(); - assert!(err.contains("Number accumulation digit variable 'digit' not found")); - } -} diff --git a/src/mir/join_ir/lowering/carrier_update_emitter/conditional_step.rs b/src/mir/join_ir/lowering/carrier_update_emitter/conditional_step.rs new file mode 100644 index 00000000..fab09eea --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_update_emitter/conditional_step.rs @@ -0,0 +1,97 @@ +use crate::ast::ASTNode; +use crate::mir::join_ir::lowering::carrier_info::CarrierVar; +use crate::mir::join_ir::lowering::condition_env::ConditionEnv; +use crate::mir::join_ir::lowering::condition_lowerer::lower_condition_to_joinir_no_body_locals; +use crate::mir::join_ir::{BinOpKind, ConstValue, JoinInst, MirLikeInst, VarId}; +use crate::mir::MirType; +use crate::mir::ValueId; + +// Phase 92 P0-3: ConditionalStep Support +// ============================================================================ + +/// Emit JoinIR instructions for conditional step update (Phase 92 P0-3) +/// +/// Handles the P5b escape sequence pattern where carrier update depends on a condition: +/// ```text +/// if escape_cond { carrier = carrier + then_delta } +/// else { carrier = carrier + else_delta } +/// ``` +/// +/// This generates: +/// 1. Lower condition expression to get cond_id +/// 2. Compute then_result = carrier + then_delta +/// 3. Compute else_result = carrier + else_delta +/// 4. JoinInst::Select { dst: carrier_new, cond: cond_id, then_val: then_result, else_val: else_result } +/// +/// # Arguments +/// +/// * `carrier` - Carrier variable information (name, ValueId) +/// * `cond_ast` - AST node for the condition expression (e.g., `ch == '\\'`) +/// * `then_delta` - Delta to add when condition is true +/// * `else_delta` - Delta to add when condition is false +/// * `alloc_value` - ValueId allocator closure +/// * `env` - ConditionEnv for variable resolution +/// * `instructions` - Output vector to append instructions to +/// +/// # Returns +/// +/// ValueId of the computed update result (the dst of Select) +pub fn emit_conditional_step_update( + carrier: &CarrierVar, + cond_ast: &ASTNode, + then_delta: i64, + else_delta: i64, + alloc_value: &mut dyn FnMut() -> ValueId, + env: &ConditionEnv, + instructions: &mut Vec, +) -> Result { + // Step 1: Lower the condition expression + // Phase 92 P2-2: No body-local support in legacy emitter (use common/conditional_step_emitter instead) + let (cond_id, cond_insts) = lower_condition_to_joinir_no_body_locals(cond_ast, alloc_value, env)?; + instructions.extend(cond_insts); + + // Step 2: Get carrier parameter ValueId from env + let carrier_param = env + .get(&carrier.name) + .ok_or_else(|| format!("Carrier '{}' not found in ConditionEnv", carrier.name))?; + + // Step 3: Compute then_result = carrier + then_delta + let then_const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: then_const_id, + value: ConstValue::Integer(then_delta), + })); + let then_result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: then_result, + op: BinOpKind::Add, + lhs: carrier_param, + rhs: then_const_id, + })); + + // Step 4: Compute else_result = carrier + else_delta + let else_const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: else_const_id, + value: ConstValue::Integer(else_delta), + })); + let else_result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: else_result, + op: BinOpKind::Add, + lhs: carrier_param, + rhs: else_const_id, + })); + + // Step 5: Emit Select instruction + let carrier_new: VarId = alloc_value(); + instructions.push(JoinInst::Select { + dst: carrier_new, + cond: cond_id, + then_val: then_result, + else_val: else_result, + type_hint: Some(MirType::Integer), // Carrier is always Integer + }); + + Ok(carrier_new) +} diff --git a/src/mir/join_ir/lowering/carrier_update_emitter/legacy.rs b/src/mir/join_ir/lowering/carrier_update_emitter/legacy.rs new file mode 100644 index 00000000..e5d6d80e --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_update_emitter/legacy.rs @@ -0,0 +1,176 @@ +use crate::mir::join_ir::lowering::carrier_info::CarrierVar; +use crate::mir::join_ir::lowering::condition_env::ConditionEnv; +use crate::mir::join_ir::lowering::loop_update_analyzer::{UpdateExpr, UpdateRhs}; +use crate::mir::join_ir::{BinOpKind, ConstValue, JoinInst, MirLikeInst}; +use crate::mir::ValueId; + +/// Emit JoinIR instructions for a single carrier update (backward compatibility version) +/// +/// This function is kept for backward compatibility with existing Pattern2/4 code +/// that only needs ConditionEnv. New code should prefer `emit_carrier_update_with_env`. +/// +/// # Arguments +/// +/// * `carrier` - Carrier variable information (name, ValueId) +/// * `update` - Update expression (e.g., CounterLike, AccumulationLike) +/// * `alloc_value` - ValueId allocator closure +/// * `env` - ConditionEnv for variable resolution +/// * `instructions` - Output vector to append instructions to +/// +/// # Returns +/// +/// ValueId of the computed update result +/// +/// # Example +/// +/// ```ignore +/// // For "count = count + 1": +/// let count_next = emit_carrier_update( +/// &count_carrier, +/// &UpdateExpr::BinOp { lhs: "count", op: Add, rhs: Const(1) }, +/// &mut alloc_value, +/// &env, +/// &mut instructions, +/// )?; +/// // Generates: +/// // const_1 = Const(1) +/// // count_next = BinOp(Add, count_param, const_1) +/// ``` +pub fn emit_carrier_update( + carrier: &CarrierVar, + update: &UpdateExpr, + alloc_value: &mut dyn FnMut() -> ValueId, + env: &ConditionEnv, + instructions: &mut Vec, +) -> Result { + match update { + UpdateExpr::Const(step) => { + // CounterLike: carrier = carrier + step + // Allocate const ValueId + let const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: const_id, + value: ConstValue::Integer(*step), + })); + + // Get carrier parameter ValueId from env + let carrier_param = env + .get(&carrier.name) + .ok_or_else(|| format!("Carrier '{}' not found in ConditionEnv", carrier.name))?; + + // Allocate result ValueId + let result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: result, + op: BinOpKind::Add, + lhs: carrier_param, + rhs: const_id, + })); + + Ok(result) + } + + UpdateExpr::BinOp { lhs, op, rhs } => { + // General binary operation: carrier = carrier op rhs + // Verify lhs matches carrier name + if lhs != &carrier.name { + return Err(format!( + "Update expression LHS '{}' doesn't match carrier '{}'", + lhs, carrier.name + )); + } + + // Get carrier parameter ValueId from env + let carrier_param = env + .get(&carrier.name) + .ok_or_else(|| format!("Carrier '{}' not found in ConditionEnv", carrier.name))?; + + // Resolve RHS + let rhs_id = match rhs { + UpdateRhs::Const(n) => { + let const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: const_id, + value: ConstValue::Integer(*n), + })); + const_id + } + UpdateRhs::Variable(var_name) => env.get(var_name).ok_or_else(|| { + format!( + "Update RHS variable '{}' not found in ConditionEnv", + var_name + ) + })?, + // Phase 188: String updates now emit JoinIR BinOp + // StringAppendLiteral: s = s + "literal" + UpdateRhs::StringLiteral(s) => { + let const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: const_id, + value: ConstValue::String(s.clone()), + })); + const_id + } + // Phase 190: Number accumulation pattern: result = result * base + digit + // Emit as: tmp = carrier * base; result = tmp + digit + UpdateRhs::NumberAccumulation { base, digit_var } => { + // Step 1: Emit const for base + let base_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: base_id, + value: ConstValue::Integer(*base), + })); + + // Step 2: Emit multiplication: tmp = carrier * base + let tmp_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: tmp_id, + op: BinOpKind::Mul, + lhs: carrier_param, + rhs: base_id, + })); + + // Step 3: Resolve digit variable + let digit_id = env.get(digit_var).ok_or_else(|| { + format!( + "Number accumulation digit variable '{}' not found in ConditionEnv", + digit_var + ) + })?; + + // Step 4: Emit addition: result = tmp + digit + let result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: result, + op: *op, // Use the operation from outer UpdateExpr + lhs: tmp_id, + rhs: digit_id, + })); + + // Return result directly - we've already emitted everything + return Ok(result); + } + // Phase 178/188: Complex updates (method calls) still rejected + UpdateRhs::Other => { + return Err(format!( + "Carrier '{}' has complex update (UpdateRhs::Other) - should be rejected by can_lower()", + carrier.name + )); + } + }; + + // Allocate result ValueId + let result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: result, + op: *op, + lhs: carrier_param, + rhs: rhs_id, + })); + + Ok(result) + } + } +} + +// ============================================================================ diff --git a/src/mir/join_ir/lowering/carrier_update_emitter/mod.rs b/src/mir/join_ir/lowering/carrier_update_emitter/mod.rs new file mode 100644 index 00000000..ae1f5d28 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_update_emitter/mod.rs @@ -0,0 +1,20 @@ +//! Phase 176-2 / Phase 179 / Phase 184: Carrier Update Emission +//! +//! Converts UpdateExpr (from LoopUpdateAnalyzer) into JoinIR instructions +//! that compute the updated carrier value. +//! +//! This module is extracted from loop_with_break_minimal.rs to improve +//! modularity and single responsibility. +//! +//! Phase 184: Added UpdateEnv support for body-local variable resolution. + +mod conditional_step; +mod legacy; +mod with_env; +#[cfg(test)] +mod tests; + +#[allow(unused_imports)] +pub use conditional_step::emit_conditional_step_update; +pub use legacy::emit_carrier_update; +pub use with_env::emit_carrier_update_with_env; diff --git a/src/mir/join_ir/lowering/carrier_update_emitter/tests.rs b/src/mir/join_ir/lowering/carrier_update_emitter/tests.rs new file mode 100644 index 00000000..7484739c --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_update_emitter/tests.rs @@ -0,0 +1,556 @@ +use super::*; +use crate::mir::join_ir::lowering::carrier_info::CarrierVar; +use crate::mir::join_ir::lowering::condition_env::ConditionEnv; +use crate::mir::join_ir::lowering::loop_body_local_env::LoopBodyLocalEnv; +use crate::mir::join_ir::lowering::loop_update_analyzer::UpdateExpr; +use crate::mir::join_ir::lowering::loop_update_analyzer::UpdateRhs; +use crate::mir::join_ir::lowering::update_env::UpdateEnv; +use crate::mir::join_ir::BinOpKind; +use crate::mir::join_ir::{ConstValue, JoinInst, MirLikeInst}; +use crate::mir::ValueId; + +// Helper: Create a test ConditionEnv +fn test_env() -> ConditionEnv { + let mut env = ConditionEnv::new(); + env.insert("count".to_string(), ValueId(10)); + env.insert("sum".to_string(), ValueId(20)); + env.insert("i".to_string(), ValueId(30)); + env +} + +// Helper: Create a test LoopBodyLocalEnv +fn test_body_local_env() -> LoopBodyLocalEnv { + let mut env = LoopBodyLocalEnv::new(); + env.insert("temp".to_string(), ValueId(50)); + env.insert("digit".to_string(), ValueId(60)); + env +} + +// Helper: Create a test UpdateEnv +fn test_update_env() -> (ConditionEnv, LoopBodyLocalEnv) { + (test_env(), test_body_local_env()) +} + +// Helper: Create a test CarrierVar +fn test_carrier(name: &str, host_id: u32) -> CarrierVar { + CarrierVar { + name: name.to_string(), + host_id: ValueId(host_id), + join_id: None, // Phase 177-STRUCT-1 + role: crate::mir::join_ir::lowering::carrier_info::CarrierRole::LoopState, + init: crate::mir::join_ir::lowering::carrier_info::CarrierInit::FromHost, // Phase 228 + #[cfg(feature = "normalized_dev")] + binding_id: None, + } +} + +#[test] +fn test_emit_const_update() { + // Test: count = count + 1 (UpdateExpr::Const) + let carrier = test_carrier("count", 100); + let update = UpdateExpr::Const(1); + let env = test_env(); + + let mut value_counter = 50u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = + emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); + + assert!(result.is_ok()); + let result_id = result.unwrap(); + + // Should generate 2 instructions: Const(1) + BinOp(Add) + assert_eq!(instructions.len(), 2); + + // Instruction 1: Const(1) + match &instructions[0] { + JoinInst::Compute(MirLikeInst::Const { dst, value }) => { + assert_eq!(*dst, ValueId(50)); // First allocated + assert!(matches!(value, ConstValue::Integer(1))); + } + _ => panic!("Expected Const instruction"), + } + + // Instruction 2: BinOp(Add, count, const_1) + match &instructions[1] { + JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { + assert_eq!(*dst, ValueId(51)); // Second allocated + assert_eq!(*op, BinOpKind::Add); + assert_eq!(*lhs, ValueId(10)); // count from env + assert_eq!(*rhs, ValueId(50)); // const_1 + } + _ => panic!("Expected BinOp instruction"), + } + + assert_eq!(result_id, ValueId(51)); +} + +#[test] +fn test_emit_binop_update_with_const() { + // Test: sum = sum + 5 (UpdateExpr::BinOp with Const RHS) + let carrier = test_carrier("sum", 200); + let update = UpdateExpr::BinOp { + lhs: "sum".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::Const(5), + }; + let env = test_env(); + + let mut value_counter = 60u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = + emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); + + assert!(result.is_ok()); + let result_id = result.unwrap(); + + // Should generate 2 instructions: Const(5) + BinOp(Add) + assert_eq!(instructions.len(), 2); + + // Instruction 1: Const(5) + match &instructions[0] { + JoinInst::Compute(MirLikeInst::Const { dst, value }) => { + assert_eq!(*dst, ValueId(60)); + assert!(matches!(value, ConstValue::Integer(5))); + } + _ => panic!("Expected Const instruction"), + } + + // Instruction 2: BinOp(Add, sum, const_5) + match &instructions[1] { + JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { + assert_eq!(*dst, ValueId(61)); + assert_eq!(*op, BinOpKind::Add); + assert_eq!(*lhs, ValueId(20)); // sum from env + assert_eq!(*rhs, ValueId(60)); // const_5 + } + _ => panic!("Expected BinOp instruction"), + } + + assert_eq!(result_id, ValueId(61)); +} + +#[test] +fn test_emit_binop_update_with_variable() { + // Test: sum = sum + i (UpdateExpr::BinOp with Variable RHS) + let carrier = test_carrier("sum", 200); + let update = UpdateExpr::BinOp { + lhs: "sum".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::Variable("i".to_string()), + }; + let env = test_env(); + + let mut value_counter = 70u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = + emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); + + assert!(result.is_ok()); + let result_id = result.unwrap(); + + // Should generate 1 instruction: BinOp(Add, sum, i) + assert_eq!(instructions.len(), 1); + + // Instruction: BinOp(Add, sum, i) + match &instructions[0] { + JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { + assert_eq!(*dst, ValueId(70)); + assert_eq!(*op, BinOpKind::Add); + assert_eq!(*lhs, ValueId(20)); // sum from env + assert_eq!(*rhs, ValueId(30)); // i from env + } + _ => panic!("Expected BinOp instruction"), + } + + assert_eq!(result_id, ValueId(70)); +} + +#[test] +fn test_emit_update_carrier_not_in_env() { + // Test error case: carrier not found in env + let carrier = test_carrier("unknown", 300); + let update = UpdateExpr::Const(1); + let env = test_env(); // doesn't have "unknown" + + let mut value_counter = 80u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = + emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); + + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Carrier 'unknown' not found")); +} + +#[test] +fn test_emit_update_lhs_mismatch() { + // Test error case: LHS doesn't match carrier name + let carrier = test_carrier("count", 100); + let update = UpdateExpr::BinOp { + lhs: "sum".to_string(), // Wrong! Should be "count" + op: BinOpKind::Add, + rhs: UpdateRhs::Const(1), + }; + let env = test_env(); + + let mut value_counter = 90u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = + emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); + + assert!(result.is_err()); + assert!(result.unwrap_err().contains("doesn't match carrier")); +} + +#[test] +fn test_emit_update_rhs_variable_not_found() { + // Test error case: RHS variable not in env + let carrier = test_carrier("sum", 200); + let update = UpdateExpr::BinOp { + lhs: "sum".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::Variable("unknown_var".to_string()), + }; + let env = test_env(); + + let mut value_counter = 100u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = + emit_carrier_update(&carrier, &update, &mut alloc_value, &env, &mut instructions); + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .contains("Update RHS variable 'unknown_var' not found")); +} + +// ============================================================================ +// Phase 184: UpdateEnv version tests +// ============================================================================ + +#[test] +fn test_emit_update_with_env_body_local_variable() { + // Phase 184: Test using body-local variable in update expression + // sum = sum + temp (temp is body-local) + let carrier = test_carrier("sum", 200); + let update = UpdateExpr::BinOp { + lhs: "sum".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::Variable("temp".to_string()), // Body-local variable + }; + + let (cond_env, body_env) = test_update_env(); + let promoted: Vec = vec![]; + let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); + + let mut value_counter = 110u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = emit_carrier_update_with_env( + &carrier, + &update, + &mut alloc_value, + &update_env, + &mut instructions, + ); + + assert!(result.is_ok()); + let result_id = result.unwrap(); + + // Should generate 1 instruction: BinOp(Add, sum, temp) + assert_eq!(instructions.len(), 1); + + match &instructions[0] { + JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { + assert_eq!(*dst, ValueId(110)); + assert_eq!(*op, BinOpKind::Add); + assert_eq!(*lhs, ValueId(20)); // sum from condition env + assert_eq!(*rhs, ValueId(50)); // temp from body-local env + } + _ => panic!("Expected BinOp instruction"), + } + + assert_eq!(result_id, ValueId(110)); +} + +#[test] +fn test_emit_update_with_env_condition_priority() { + // Phase 184: Test condition variable takes priority over body-local + // If both envs have "x", condition env should win + let mut cond_env = ConditionEnv::new(); + cond_env.insert("x".to_string(), ValueId(100)); // Condition: x=100 + cond_env.insert("sum".to_string(), ValueId(20)); + + let mut body_env = LoopBodyLocalEnv::new(); + body_env.insert("x".to_string(), ValueId(200)); // Body-local: x=200 (should be ignored) + + let promoted: Vec = vec![]; + let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); + + let carrier = test_carrier("sum", 200); + let update = UpdateExpr::BinOp { + lhs: "sum".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::Variable("x".to_string()), + }; + + let mut value_counter = 120u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = emit_carrier_update_with_env( + &carrier, + &update, + &mut alloc_value, + &update_env, + &mut instructions, + ); + + assert!(result.is_ok()); + + // Should use x=100 (condition env), not x=200 (body-local env) + match &instructions[0] { + JoinInst::Compute(MirLikeInst::BinOp { + dst: _, + op: _, + lhs: _, + rhs, + }) => { + assert_eq!(*rhs, ValueId(100)); // Condition env wins + } + _ => panic!("Expected BinOp instruction"), + } +} + +#[test] +fn test_emit_update_with_env_variable_not_found() { + // Phase 184: Test error when variable not in either env + let (cond_env, body_env) = test_update_env(); + let promoted: Vec = vec![]; + let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); + + let carrier = test_carrier("sum", 200); + let update = UpdateExpr::BinOp { + lhs: "sum".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::Variable("nonexistent".to_string()), + }; + + let mut value_counter = 130u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = emit_carrier_update_with_env( + &carrier, + &update, + &mut alloc_value, + &update_env, + &mut instructions, + ); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("Update RHS variable 'nonexistent' not found")); + assert!(err.contains("neither condition nor body-local")); +} + +#[test] +fn test_emit_update_with_env_const_update() { + // Phase 184: Test UpdateEnv with simple const update (baseline) + let (cond_env, body_env) = test_update_env(); + let promoted: Vec = vec![]; + let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); + + let carrier = test_carrier("count", 100); + let update = UpdateExpr::Const(1); + + let mut value_counter = 140u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = emit_carrier_update_with_env( + &carrier, + &update, + &mut alloc_value, + &update_env, + &mut instructions, + ); + + assert!(result.is_ok()); + assert_eq!(instructions.len(), 2); // Const + BinOp +} + +#[test] +fn test_emit_number_accumulation_base10() { + // Phase 190: Test number accumulation pattern: result = result * 10 + digit + let mut cond_env = ConditionEnv::new(); + cond_env.insert("result".to_string(), ValueId(20)); // Carrier parameter + cond_env.insert("digit".to_string(), ValueId(30)); // Digit variable + + let body_env = LoopBodyLocalEnv::new(); + let promoted: Vec = vec![]; + let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); + + let carrier = test_carrier("result", 200); + let update = UpdateExpr::BinOp { + lhs: "result".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::NumberAccumulation { + base: 10, + digit_var: "digit".to_string(), + }, + }; + + let mut value_counter = 150u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = emit_carrier_update_with_env( + &carrier, + &update, + &mut alloc_value, + &update_env, + &mut instructions, + ); + + assert!(result.is_ok()); + let result_id = result.unwrap(); + + // Should generate 3 instructions: + // 1. Const(10) for base + // 2. BinOp(Mul, result, base) for tmp + // 3. BinOp(Add, tmp, digit) for final result + assert_eq!(instructions.len(), 3); + + // Instruction 1: Const(10) + match &instructions[0] { + JoinInst::Compute(MirLikeInst::Const { dst, value }) => { + assert_eq!(*dst, ValueId(150)); // First allocated + assert!(matches!(value, ConstValue::Integer(10))); + } + _ => panic!("Expected Const instruction"), + } + + // Instruction 2: BinOp(Mul, result, base) + match &instructions[1] { + JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { + assert_eq!(*dst, ValueId(151)); // Second allocated (tmp) + assert_eq!(*op, BinOpKind::Mul); + assert_eq!(*lhs, ValueId(20)); // result from env + assert_eq!(*rhs, ValueId(150)); // base const + } + _ => panic!("Expected BinOp(Mul) instruction"), + } + + // Instruction 3: BinOp(Add, tmp, digit) + match &instructions[2] { + JoinInst::Compute(MirLikeInst::BinOp { dst, op, lhs, rhs }) => { + assert_eq!(*dst, ValueId(152)); // Third allocated (final result) + assert_eq!(*op, BinOpKind::Add); + assert_eq!(*lhs, ValueId(151)); // tmp from previous mul + assert_eq!(*rhs, ValueId(30)); // digit from env + } + _ => panic!("Expected BinOp(Add) instruction"), + } + + assert_eq!(result_id, ValueId(152)); +} + +#[test] +fn test_emit_number_accumulation_digit_not_found() { + // Phase 190: Test error when digit variable not in env + let mut cond_env = ConditionEnv::new(); + cond_env.insert("result".to_string(), ValueId(20)); + // Note: digit NOT in env + + let body_env = LoopBodyLocalEnv::new(); + let promoted: Vec = vec![]; + let update_env = UpdateEnv::new(&cond_env, &body_env, &promoted); + + let carrier = test_carrier("result", 200); + let update = UpdateExpr::BinOp { + lhs: "result".to_string(), + op: BinOpKind::Add, + rhs: UpdateRhs::NumberAccumulation { + base: 10, + digit_var: "digit".to_string(), + }, + }; + + let mut value_counter = 160u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; + + let mut instructions = Vec::new(); + let result = emit_carrier_update_with_env( + &carrier, + &update, + &mut alloc_value, + &update_env, + &mut instructions, + ); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("Number accumulation digit variable 'digit' not found")); +} diff --git a/src/mir/join_ir/lowering/carrier_update_emitter/with_env.rs b/src/mir/join_ir/lowering/carrier_update_emitter/with_env.rs new file mode 100644 index 00000000..34268a65 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_update_emitter/with_env.rs @@ -0,0 +1,183 @@ +use crate::mir::join_ir::lowering::carrier_info::CarrierVar; +use crate::mir::join_ir::lowering::loop_update_analyzer::{UpdateExpr, UpdateRhs}; +use crate::mir::join_ir::lowering::update_env::UpdateEnv; +use crate::mir::join_ir::{BinOpKind, ConstValue, JoinInst, MirLikeInst}; +use crate::mir::ValueId; + +/// Emit JoinIR instructions for a single carrier update (Phase 184: UpdateEnv version) +/// +/// Converts UpdateExpr (from LoopUpdateAnalyzer) into JoinIR instructions +/// that compute the updated carrier value. Supports both condition variables +/// and body-local variables through UpdateEnv. +/// +/// # Arguments +/// +/// * `carrier` - Carrier variable information (name, ValueId) +/// * `update` - Update expression (e.g., CounterLike, AccumulationLike) +/// * `alloc_value` - ValueId allocator closure +/// * `env` - UpdateEnv for unified variable resolution +/// * `instructions` - Output vector to append instructions to +/// +/// # Returns +/// +/// ValueId of the computed update result +/// +/// # Example +/// +/// ```ignore +/// // For "count = count + temp": +/// let count_next = emit_carrier_update_with_env( +/// &count_carrier, +/// &UpdateExpr::BinOp { lhs: "count", op: Add, rhs: Variable("temp") }, +/// &mut alloc_value, +/// &update_env, // Has both condition and body-local vars +/// &mut instructions, +/// )?; +/// // Generates: +/// // count_next = BinOp(Add, count_param, temp_value) +/// ``` +pub fn emit_carrier_update_with_env( + carrier: &CarrierVar, + update: &UpdateExpr, + alloc_value: &mut dyn FnMut() -> ValueId, + env: &UpdateEnv, + instructions: &mut Vec, +) -> Result { + match update { + UpdateExpr::Const(step) => { + // CounterLike: carrier = carrier + step + // Allocate const ValueId + let const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: const_id, + value: ConstValue::Integer(*step), + })); + + // Get carrier parameter ValueId from env + let carrier_param = env + .resolve(&carrier.name) + .ok_or_else(|| format!("Carrier '{}' not found in UpdateEnv", carrier.name))?; + + // Allocate result ValueId + let result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: result, + op: BinOpKind::Add, + lhs: carrier_param, + rhs: const_id, + })); + + Ok(result) + } + + UpdateExpr::BinOp { lhs, op, rhs } => { + // General binary operation: carrier = carrier op rhs + // Verify lhs matches carrier name + if lhs != &carrier.name { + return Err(format!( + "Update expression LHS '{}' doesn't match carrier '{}'", + lhs, carrier.name + )); + } + + // Get carrier parameter ValueId from env + let carrier_param = env + .resolve(&carrier.name) + .ok_or_else(|| format!("Carrier '{}' not found in UpdateEnv", carrier.name))?; + + // Resolve RHS (Phase 184: Now supports body-local variables!) + let rhs_id = match rhs { + UpdateRhs::Const(n) => { + let const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: const_id, + value: ConstValue::Integer(*n), + })); + const_id + } + UpdateRhs::Variable(var_name) => { + env.resolve(var_name).ok_or_else(|| { + format!( + "Update RHS variable '{}' not found in UpdateEnv (neither condition nor body-local)", + var_name + ) + })? + } + // Phase 188: String updates now emit JoinIR BinOp + // StringAppendLiteral: s = s + "literal" + UpdateRhs::StringLiteral(s) => { + let const_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: const_id, + value: ConstValue::String(s.clone()), + })); + const_id + } + // Phase 190: Number accumulation pattern: result = result * base + digit + // Emit as: tmp = carrier * base; result = tmp + digit + UpdateRhs::NumberAccumulation { base, digit_var } => { + // Step 1: Emit const for base + let base_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: base_id, + value: ConstValue::Integer(*base), + })); + + // Step 2: Emit multiplication: tmp = carrier * base + let tmp_id = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: tmp_id, + op: BinOpKind::Mul, + lhs: carrier_param, + rhs: base_id, + })); + + // Step 3: Resolve digit variable + let digit_id = env.resolve(digit_var).ok_or_else(|| { + format!( + "Number accumulation digit variable '{}' not found in UpdateEnv", + digit_var + ) + })?; + + // Step 4: Emit addition: result = tmp + digit + // This will be handled by the outer BinOp emission + // For now, return digit_id to be used as RHS + // We need to handle this specially - return tmp_id instead + // and adjust the outer BinOp to use correct values + + // Actually, we need to emit both operations here + // Final result = tmp + digit + let result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: result, + op: *op, // Use the operation from outer UpdateExpr + lhs: tmp_id, + rhs: digit_id, + })); + + // Return result directly - we've already emitted everything + return Ok(result); + } + // Phase 178/188: Complex updates (method calls) still rejected + UpdateRhs::Other => { + return Err(format!( + "Carrier '{}' has complex update (UpdateRhs::Other) - should be rejected by can_lower()", + carrier.name + )); + } + }; + + // Allocate result ValueId + let result = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::BinOp { + dst: result, + op: *op, + lhs: carrier_param, + rhs: rhs_id, + })); + + Ok(result) + } + } +} diff --git a/src/mir/loop_pattern_detection/function_scope_capture/analyzers.rs b/src/mir/loop_pattern_detection/function_scope_capture/analyzers.rs deleted file mode 100644 index b1a2cc80..00000000 --- a/src/mir/loop_pattern_detection/function_scope_capture/analyzers.rs +++ /dev/null @@ -1,1018 +0,0 @@ -//! Core analysis functions for function scope capture - -use crate::ast::ASTNode; -use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape; -use crate::mir::ValueId; -use std::collections::BTreeSet; - -use super::helpers::*; -use super::types::{CapturedEnv, CapturedKind, CapturedVar}; - -/// Analyzes function-scoped variables that can be safely captured for loop conditions/body. -/// -/// # Phase 200-B Implementation -/// -/// Detects function-scoped variables that are effectively immutable constants -/// within a loop context (e.g., `digits` in JsonParser._atoi()). -/// -/// # Detection Criteria -/// -/// A variable is captured if ALL of the following conditions are met: -/// -/// 1. **Declared before the loop**: Variable must be declared in function scope before the loop -/// 2. **Safe constant init**: Initialized with string/integer literal only -/// 3. **Never reassigned**: Variable is never reassigned within the function (is_immutable = true) -/// 4. **Referenced in loop**: Variable is referenced in loop condition or body -/// 5. **Not a loop parameter**: Variable is not in scope.loop_params -/// 6. **Not a body-local**: Variable is not in scope.body_locals -/// -/// # Example -/// -/// ```nyash -/// method _atoi(s, pos, len) { -/// local digits = "0123456789" // ✅ Captured (declared before loop, never reassigned) -/// local value = 0 // ❌ Not captured (reassigned in loop body) -/// loop(pos < len) { -/// local ch = s.charAt(pos) // ❌ Not captured (body-local) -/// local digit = digits.indexOf(ch) -/// value = value * 10 + digit -/// pos = pos + 1 -/// } -/// } -/// ``` -/// -/// # Arguments -/// -/// * `fn_body` - AST nodes of the function body (for analysis) -/// * `loop_ast` - AST node of the loop statement -/// * `scope` - LoopScopeShape (for excluding loop params and body-locals) -/// -/// # Returns -/// -/// `CapturedEnv` containing all captured variables -#[allow(dead_code)] -pub(crate) fn analyze_captured_vars( - fn_body: &[ASTNode], - loop_ast: &ASTNode, - scope: &LoopScopeShape, -) -> CapturedEnv { - use std::env; - - let debug = env::var("NYASH_CAPTURE_DEBUG").is_ok(); - - if debug { - eprintln!("[capture/debug] Starting capture analysis"); - } - - // Step 1: Find loop position in fn_body - let loop_index = match find_stmt_index(fn_body, loop_ast) { - Some(idx) => idx, - None => { - if debug { - eprintln!( - "[capture/debug] Loop not found in function body, returning empty CapturedEnv" - ); - } - return CapturedEnv::new(); - } - }; - - if debug { - eprintln!("[capture/debug] Loop found at index {}", loop_index); - } - - // Step 2: Collect local declarations BEFORE the loop - let pre_loop_locals = collect_local_declarations(&fn_body[..loop_index]); - - if debug { - eprintln!( - "[capture/debug] Found {} pre-loop local declarations", - pre_loop_locals.len() - ); - } - - let mut env = CapturedEnv::new(); - - // Step 3: For each pre-loop local, check capture criteria - for (name, init_expr) in pre_loop_locals { - if debug { - eprintln!("[capture/check] Checking variable '{}'", name); - } - - // 3a: Is init expression a safe constant? - if !is_safe_const_init(&init_expr) { - if debug { - eprintln!("[capture/reject] '{}': init is not a safe constant", name); - } - continue; - } - - // 3b: Is this variable reassigned anywhere in fn_body? - if is_reassigned_in_fn(fn_body, &name) { - if debug { - eprintln!("[capture/reject] '{}': reassigned in function", name); - } - continue; - } - - // 3c: Is this variable used in loop (condition or body)? - if !is_used_in_loop(loop_ast, &name) { - if debug { - eprintln!("[capture/reject] '{}': not used in loop", name); - } - continue; - } - - // 3d: Skip if already in pinned, carriers, or body_locals - if scope.pinned.contains(&name) { - if debug { - eprintln!("[capture/reject] '{}': is a pinned variable", name); - } - continue; - } - - if scope.carriers.contains(&name) { - if debug { - eprintln!("[capture/reject] '{}': is a carrier variable", name); - } - continue; - } - - if scope.body_locals.contains(&name) { - if debug { - eprintln!("[capture/reject] '{}': is a body-local variable", name); - } - continue; - } - - // All checks passed: add to CapturedEnv - // Note: We don't have access to variable_map here, so we use a placeholder ValueId - // The actual host_id will be resolved in ConditionEnvBuilder - if debug { - eprintln!( - "[capture/accept] '{}': ALL CHECKS PASSED, adding to CapturedEnv", - name - ); - } - - env.add_var(CapturedVar { - name: name.clone(), - host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder - is_immutable: true, - kind: CapturedKind::Explicit, - }); - } - - if debug { - eprintln!( - "[capture/result] Captured {} variables: {:?}", - env.vars.len(), - env.vars.iter().map(|v| &v.name).collect::>() - ); - } - - env -} - -/// Phase 200-C: Analyze captured vars with condition/body instead of loop_ast -/// -/// This variant solves the pointer comparison problem when the loop AST is constructed -/// dynamically (e.g., in Pattern 2). Instead of passing a loop_ast reference, -/// we pass the condition and body directly and perform structural matching. -/// -/// # Arguments -/// -/// * `fn_body` - AST nodes of the function body (for analysis) -/// * `loop_condition` - Condition expression of the loop -/// * `loop_body` - Body statements of the loop -/// * `scope` - LoopScopeShape (for excluding loop params and body-locals) -/// -/// # Returns -/// -/// `CapturedEnv` containing all captured variables -#[allow(dead_code)] -pub(crate) fn analyze_captured_vars_v2( - fn_body: &[ASTNode], - loop_condition: &ASTNode, - loop_body: &[ASTNode], - scope: &LoopScopeShape, -) -> CapturedEnv { - use std::env; - - let debug = env::var("NYASH_CAPTURE_DEBUG").is_ok(); - - if debug { - eprintln!("[capture/debug] Starting capture analysis v2 (structural matching)"); - } - - // Step 1: Find loop position in fn_body by structural matching - let loop_index = find_loop_index_by_structure(fn_body, loop_condition, loop_body); - - if debug { - match loop_index { - Some(idx) => eprintln!("[capture/debug] Loop found at index {} by structure", idx), - None => eprintln!("[capture/debug] Loop not found in function body by structure (may be unit test or synthetic case)"), - } - } - - // Step 2: Collect local declarations BEFORE the loop - let pre_loop_locals = if let Some(idx) = loop_index { - collect_local_declarations(&fn_body[..idx]) - } else { - // No loop found in fn_body - might be a unit test or synthetic case - // Still collect all locals from fn_body - collect_local_declarations(fn_body) - }; - - if debug { - eprintln!( - "[capture/debug] Found {} pre-loop local declarations", - pre_loop_locals.len() - ); - } - - let mut env = CapturedEnv::new(); - - // Step 3: For each pre-loop local, check capture criteria - for (name, init_expr) in &pre_loop_locals { - if debug { - eprintln!("[capture/check] Checking variable '{}'", name); - } - - // 3a: Is init expression a safe constant? - if !is_safe_const_init(init_expr) { - if debug { - eprintln!("[capture/reject] '{}': init is not a safe constant", name); - } - continue; - } - - // 3b: Is this variable reassigned anywhere in fn_body? - if is_reassigned_in_fn(fn_body, name) { - if debug { - eprintln!("[capture/reject] '{}': reassigned in function", name); - } - continue; - } - - // 3c: Is this variable used in loop (condition or body)? - if !is_used_in_loop_parts(loop_condition, loop_body, name) { - if debug { - eprintln!("[capture/reject] '{}': not used in loop", name); - } - continue; - } - - // 3d: Skip if already in pinned, carriers, or body_locals - if scope.pinned.contains(name) { - if debug { - eprintln!("[capture/reject] '{}': is a pinned variable", name); - } - continue; - } - - if scope.carriers.contains(name) { - if debug { - eprintln!("[capture/reject] '{}': is a carrier variable", name); - } - continue; - } - - if scope.body_locals.contains(name) { - if debug { - eprintln!("[capture/reject] '{}': is a body-local variable", name); - } - continue; - } - - // All checks passed: add to CapturedEnv - if debug { - eprintln!( - "[capture/accept] '{}': ALL CHECKS PASSED, adding to CapturedEnv", - name - ); - } - - env.add_var(CapturedVar { - name: name.clone(), - host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder - is_immutable: true, - kind: CapturedKind::Explicit, - }); - } - - // Phase 245C: Capture function parameters used in loop - let names_in_loop = collect_names_in_loop_parts(loop_condition, loop_body); - - // pre-loop local names (already processed above) - let pre_loop_local_names: BTreeSet = pre_loop_locals - .iter() - .map(|(name, _)| name.clone()) - .collect(); - - // Check each variable used in loop - for name in names_in_loop { - // Skip if already processed as pre-loop local - if pre_loop_local_names.contains(&name) { - continue; - } - - // Skip if already in pinned, carriers, or body_locals - if scope.pinned.contains(&name) - || scope.carriers.contains(&name) - || scope.body_locals.contains(&name) - { - continue; - } - - // Skip if reassigned in function (function parameters should not be reassigned) - if is_reassigned_in_fn(fn_body, &name) { - if debug { - eprintln!("[capture/param/reject] '{}': reassigned in function", name); - } - continue; - } - - // This is a function parameter-like variable - add to CapturedEnv - if debug { - eprintln!( - "[capture/param/accept] '{}': function parameter used in loop", - name - ); - } - - env.add_var(CapturedVar { - name: name.clone(), - host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder - is_immutable: true, - kind: CapturedKind::Explicit, - }); - } - - if debug { - eprintln!( - "[capture/result] Captured {} variables: {:?}", - env.vars.len(), - env.vars.iter().map(|v| &v.name).collect::>() - ); - } - - env -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span}; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - - // Phase 200-B: Capture analysis tests - - #[test] - fn test_capture_simple_digits() { - // Build AST for: - // local digits = "0123456789" - // loop(i < 10) { - // local pos = digits.indexOf(ch) - // } - - let digits_decl = ASTNode::Local { - variables: vec!["digits".to_string()], - initial_values: vec![Some(Box::new(ASTNode::Literal { - value: LiteralValue::String("0123456789".to_string()), - span: Span::unknown(), - }))], - span: Span::unknown(), - }; - - let loop_body = vec![ASTNode::Local { - variables: vec!["pos".to_string()], - initial_values: vec![Some(Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "digits".to_string(), - span: Span::unknown(), - }), - method: "indexOf".to_string(), - arguments: vec![ASTNode::Variable { - name: "ch".to_string(), - span: Span::unknown(), - }], - span: Span::unknown(), - }))], - span: Span::unknown(), - }]; - - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::BinaryOp { - operator: crate::ast::BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "i".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(10), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - body: loop_body, - span: Span::unknown(), - }; - - let fn_body = vec![digits_decl, loop_node.clone()]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["i".to_string()]), - carriers: BTreeSet::new(), - body_locals: BTreeSet::from(["pos".to_string()]), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - // IMPORTANT: Pass a reference to the same loop_node instance that's in fn_body - // find_stmt_index uses pointer comparison, so we must use &fn_body[1] instead of &loop_node - let env = analyze_captured_vars(&fn_body, &fn_body[1], &scope); - - assert_eq!(env.vars.len(), 1); - assert!(env.get("digits").is_some()); - let var = env.get("digits").unwrap(); - assert_eq!(var.name, "digits"); - assert!(var.is_immutable); - } - - #[test] - fn test_capture_reassigned_rejected() { - // Build AST for: - // local digits = "0123456789" - // digits = "abc" // reassignment - // loop(i < 10) { - // local pos = digits.indexOf(ch) - // } - - let digits_decl = ASTNode::Local { - variables: vec!["digits".to_string()], - initial_values: vec![Some(Box::new(ASTNode::Literal { - value: LiteralValue::String("0123456789".to_string()), - span: Span::unknown(), - }))], - span: Span::unknown(), - }; - - let reassignment = ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "digits".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::Literal { - value: LiteralValue::String("abc".to_string()), - span: Span::unknown(), - }), - span: Span::unknown(), - }; - - let loop_body = vec![ASTNode::Local { - variables: vec!["pos".to_string()], - initial_values: vec![Some(Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "digits".to_string(), - span: Span::unknown(), - }), - method: "indexOf".to_string(), - arguments: vec![], - span: Span::unknown(), - }))], - span: Span::unknown(), - }]; - - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::BinaryOp { - operator: crate::ast::BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "i".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(10), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - body: loop_body, - span: Span::unknown(), - }; - - let fn_body = vec![digits_decl, reassignment, loop_node.clone()]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["i".to_string()]), - carriers: BTreeSet::new(), - body_locals: BTreeSet::from(["pos".to_string()]), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - let env = analyze_captured_vars(&fn_body, &loop_node, &scope); - - // Should reject because digits is reassigned - assert_eq!(env.vars.len(), 0); - } - - #[test] - fn test_capture_after_loop_rejected() { - // Build AST for: - // loop(i < 10) { } - // local digits = "0123456789" // defined AFTER loop - - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::BinaryOp { - operator: crate::ast::BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "i".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(10), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - body: vec![], - span: Span::unknown(), - }; - - let digits_decl = ASTNode::Local { - variables: vec!["digits".to_string()], - initial_values: vec![Some(Box::new(ASTNode::Literal { - value: LiteralValue::String("0123456789".to_string()), - span: Span::unknown(), - }))], - span: Span::unknown(), - }; - - let fn_body = vec![loop_node.clone(), digits_decl]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["i".to_string()]), - carriers: BTreeSet::new(), - body_locals: BTreeSet::new(), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - let env = analyze_captured_vars(&fn_body, &loop_node, &scope); - - // Should reject because digits is defined after the loop - assert_eq!(env.vars.len(), 0); - } - - #[test] - fn test_capture_method_call_init_rejected() { - // Build AST for: - // local result = someBox.getValue() // MethodCall init - // loop(i < 10) { - // local x = result.something() - // } - - let result_decl = ASTNode::Local { - variables: vec!["result".to_string()], - initial_values: vec![Some(Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "someBox".to_string(), - span: Span::unknown(), - }), - method: "getValue".to_string(), - arguments: vec![], - span: Span::unknown(), - }))], - span: Span::unknown(), - }; - - let loop_body = vec![ASTNode::Local { - variables: vec!["x".to_string()], - initial_values: vec![Some(Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "result".to_string(), - span: Span::unknown(), - }), - method: "something".to_string(), - arguments: vec![], - span: Span::unknown(), - }))], - span: Span::unknown(), - }]; - - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::BinaryOp { - operator: crate::ast::BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "i".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(10), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - body: loop_body, - span: Span::unknown(), - }; - - let fn_body = vec![result_decl, loop_node.clone()]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["i".to_string()]), - carriers: BTreeSet::new(), - body_locals: BTreeSet::from(["x".to_string()]), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - let env = analyze_captured_vars(&fn_body, &loop_node, &scope); - - // Should reject because result is initialized with MethodCall (not safe constant) - assert_eq!(env.vars.len(), 0); - } - - #[test] - fn test_capture_unused_in_loop_rejected() { - // Build AST for: - // local digits = "0123456789" // not used in loop - // loop(i < 10) { - // print(i) // doesn't use digits - // } - - let digits_decl = ASTNode::Local { - variables: vec!["digits".to_string()], - initial_values: vec![Some(Box::new(ASTNode::Literal { - value: LiteralValue::String("0123456789".to_string()), - span: Span::unknown(), - }))], - span: Span::unknown(), - }; - - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::BinaryOp { - operator: crate::ast::BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "i".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(10), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - body: vec![], // empty body, no usage of digits - span: Span::unknown(), - }; - - let fn_body = vec![digits_decl, loop_node.clone()]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["i".to_string()]), - carriers: BTreeSet::new(), - body_locals: BTreeSet::new(), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - let env = analyze_captured_vars(&fn_body, &loop_node, &scope); - - // Should reject because digits is not used in loop - assert_eq!(env.vars.len(), 0); - } - - // Phase 245C: Function parameter capture tests - - #[test] - fn test_capture_function_param_used_in_condition() { - // Simulate: fn parse_number(s, p, len) { loop(p < len) { ... } } - // Expected: 'len' should be captured (used in condition, not reassigned) - - let condition = Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Variable { - name: "len".to_string(), // function parameter - span: Span::unknown(), - }), - span: Span::unknown(), - }); - - let body = vec![ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - span: Span::unknown(), - }]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["p".to_string()]), // p is loop param - carriers: BTreeSet::new(), - body_locals: BTreeSet::new(), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - // Use analyze_captured_vars_v2 with structural matching - let env = analyze_captured_vars_v2(&[], condition.as_ref(), &body, &scope); - - // Should capture 'len' (function parameter used in condition) - assert_eq!(env.vars.len(), 1); - assert!(env.get("len").is_some()); - let var = env.get("len").unwrap(); - assert_eq!(var.name, "len"); - assert!(var.is_immutable); - } - - #[test] - fn test_capture_function_param_used_in_method_call() { - // Simulate: fn parse_number(s, p) { loop(p < s.length()) { ch = s.charAt(p) } } - // Expected: 's' should be captured (used in condition and body, not reassigned) - - let condition = Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "s".to_string(), // function parameter - span: Span::unknown(), - }), - method: "length".to_string(), - arguments: vec![], - span: Span::unknown(), - }), - span: Span::unknown(), - }); - - let body = vec![ - ASTNode::Local { - variables: vec!["ch".to_string()], - initial_values: vec![Some(Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "s".to_string(), // function parameter - span: Span::unknown(), - }), - method: "charAt".to_string(), - arguments: vec![ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }], - span: Span::unknown(), - }))], - span: Span::unknown(), - }, - ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - span: Span::unknown(), - }, - ]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["p".to_string()]), // p is loop param - carriers: BTreeSet::new(), - body_locals: BTreeSet::from(["ch".to_string()]), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - // Use analyze_captured_vars_v2 with structural matching - let env = analyze_captured_vars_v2(&[], condition.as_ref(), &body, &scope); - - // Should capture 's' (function parameter used in condition and body) - assert_eq!(env.vars.len(), 1); - assert!(env.get("s").is_some()); - let var = env.get("s").unwrap(); - assert_eq!(var.name, "s"); - assert!(var.is_immutable); - } - - #[test] - fn test_capture_function_param_reassigned_rejected() { - // Simulate: fn bad_func(x) { x = 5; loop(x < 10) { x = x + 1 } } - // Expected: 'x' should NOT be captured (reassigned in function) - - let condition = Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "x".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(10), - span: Span::unknown(), - }), - span: Span::unknown(), - }); - - let body = vec![ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "x".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left: Box::new(ASTNode::Variable { - name: "x".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - span: Span::unknown(), - }]; - - // fn_body includes reassignment before loop - let fn_body = vec![ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "x".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(5), - span: Span::unknown(), - }), - span: Span::unknown(), - }]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["x".to_string()]), // x is loop param - carriers: BTreeSet::new(), - body_locals: BTreeSet::new(), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - // Use analyze_captured_vars_v2 with structural matching - let env = analyze_captured_vars_v2(&fn_body, condition.as_ref(), &body, &scope); - - // Should NOT capture 'x' (reassigned in fn_body) - assert_eq!(env.vars.len(), 0); - } - - #[test] - fn test_capture_mixed_locals_and_params() { - // Simulate: fn parse(s, len) { local digits = "0123"; loop(p < len) { ch = digits.indexOf(...); s.charAt(...) } } - // Expected: 'len', 's', and 'digits' should all be captured - - let condition = Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Variable { - name: "len".to_string(), // function parameter - span: Span::unknown(), - }), - span: Span::unknown(), - }); - - let body = vec![ - ASTNode::Local { - variables: vec!["ch".to_string()], - initial_values: vec![Some(Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "s".to_string(), // function parameter - span: Span::unknown(), - }), - method: "charAt".to_string(), - arguments: vec![ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }], - span: Span::unknown(), - }))], - span: Span::unknown(), - }, - ASTNode::Local { - variables: vec!["digit".to_string()], - initial_values: vec![Some(Box::new(ASTNode::MethodCall { - object: Box::new(ASTNode::Variable { - name: "digits".to_string(), // pre-loop local - span: Span::unknown(), - }), - method: "indexOf".to_string(), - arguments: vec![ASTNode::Variable { - name: "ch".to_string(), - span: Span::unknown(), - }], - span: Span::unknown(), - }))], - span: Span::unknown(), - }, - ]; - - // fn_body includes local declaration before loop - let fn_body = vec![ASTNode::Local { - variables: vec!["digits".to_string()], - initial_values: vec![Some(Box::new(ASTNode::Literal { - value: LiteralValue::String("0123".to_string()), - span: Span::unknown(), - }))], - span: Span::unknown(), - }]; - - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { - header: BasicBlockId(0), - body: BasicBlockId(1), - latch: BasicBlockId(2), - exit: BasicBlockId(3), - pinned: BTreeSet::from(["p".to_string()]), // p is loop param - carriers: BTreeSet::new(), - body_locals: BTreeSet::from(["ch".to_string(), "digit".to_string()]), - exit_live: BTreeSet::new(), - progress_carrier: None, - variable_definitions: BTreeMap::new(), - }; - - // Use analyze_captured_vars_v2 with structural matching - let env = analyze_captured_vars_v2(&fn_body, condition.as_ref(), &body, &scope); - - // Should capture all three: 'len' (param), 's' (param), 'digits' (pre-loop local) - assert_eq!(env.vars.len(), 3); - assert!(env.get("len").is_some()); - assert!(env.get("s").is_some()); - assert!(env.get("digits").is_some()); - } -} diff --git a/src/mir/loop_pattern_detection/function_scope_capture/analyzers/mod.rs b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/mod.rs new file mode 100644 index 00000000..2d259f37 --- /dev/null +++ b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/mod.rs @@ -0,0 +1,10 @@ +//! Core analysis functions for function scope capture + +mod v1; +mod v2; +#[cfg(test)] +mod tests; + +#[allow(unused_imports)] +pub(crate) use v1::analyze_captured_vars; +pub(crate) use v2::analyze_captured_vars_v2; diff --git a/src/mir/loop_pattern_detection/function_scope_capture/analyzers/tests.rs b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/tests.rs new file mode 100644 index 00000000..2adcf746 --- /dev/null +++ b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/tests.rs @@ -0,0 +1,653 @@ +use super::*; +use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span}; +use crate::mir::BasicBlockId; +use std::collections::{BTreeMap, BTreeSet}; + +// Phase 200-B: Capture analysis tests + +#[test] +fn test_capture_simple_digits() { + // Build AST for: + // local digits = "0123456789" + // loop(i < 10) { + // local pos = digits.indexOf(ch) + // } + + let digits_decl = ASTNode::Local { + variables: vec!["digits".to_string()], + initial_values: vec![Some(Box::new(ASTNode::Literal { + value: LiteralValue::String("0123456789".to_string()), + span: Span::unknown(), + }))], + span: Span::unknown(), + }; + + let loop_body = vec![ASTNode::Local { + variables: vec!["pos".to_string()], + initial_values: vec![Some(Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "digits".to_string(), + span: Span::unknown(), + }), + method: "indexOf".to_string(), + arguments: vec![ASTNode::Variable { + name: "ch".to_string(), + span: Span::unknown(), + }], + span: Span::unknown(), + }))], + span: Span::unknown(), + }]; + + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: crate::ast::BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(10), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: loop_body, + span: Span::unknown(), + }; + + let fn_body = vec![digits_decl, loop_node.clone()]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["i".to_string()]), + carriers: BTreeSet::new(), + body_locals: BTreeSet::from(["pos".to_string()]), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + // IMPORTANT: Pass a reference to the same loop_node instance that's in fn_body + // find_stmt_index uses pointer comparison, so we must use &fn_body[1] instead of &loop_node + let env = analyze_captured_vars(&fn_body, &fn_body[1], &scope); + + assert_eq!(env.vars.len(), 1); + assert!(env.get("digits").is_some()); + let var = env.get("digits").unwrap(); + assert_eq!(var.name, "digits"); + assert!(var.is_immutable); +} + +#[test] +fn test_capture_reassigned_rejected() { + // Build AST for: + // local digits = "0123456789" + // digits = "abc" // reassignment + // loop(i < 10) { + // local pos = digits.indexOf(ch) + // } + + let digits_decl = ASTNode::Local { + variables: vec!["digits".to_string()], + initial_values: vec![Some(Box::new(ASTNode::Literal { + value: LiteralValue::String("0123456789".to_string()), + span: Span::unknown(), + }))], + span: Span::unknown(), + }; + + let reassignment = ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "digits".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::Literal { + value: LiteralValue::String("abc".to_string()), + span: Span::unknown(), + }), + span: Span::unknown(), + }; + + let loop_body = vec![ASTNode::Local { + variables: vec!["pos".to_string()], + initial_values: vec![Some(Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "digits".to_string(), + span: Span::unknown(), + }), + method: "indexOf".to_string(), + arguments: vec![], + span: Span::unknown(), + }))], + span: Span::unknown(), + }]; + + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: crate::ast::BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(10), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: loop_body, + span: Span::unknown(), + }; + + let fn_body = vec![digits_decl, reassignment, loop_node.clone()]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["i".to_string()]), + carriers: BTreeSet::new(), + body_locals: BTreeSet::from(["pos".to_string()]), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + let env = analyze_captured_vars(&fn_body, &loop_node, &scope); + + // Should reject because digits is reassigned + assert_eq!(env.vars.len(), 0); +} + +#[test] +fn test_capture_after_loop_rejected() { + // Build AST for: + // loop(i < 10) { } + // local digits = "0123456789" // defined AFTER loop + + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: crate::ast::BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(10), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: vec![], + span: Span::unknown(), + }; + + let digits_decl = ASTNode::Local { + variables: vec!["digits".to_string()], + initial_values: vec![Some(Box::new(ASTNode::Literal { + value: LiteralValue::String("0123456789".to_string()), + span: Span::unknown(), + }))], + span: Span::unknown(), + }; + + let fn_body = vec![loop_node.clone(), digits_decl]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["i".to_string()]), + carriers: BTreeSet::new(), + body_locals: BTreeSet::new(), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + let env = analyze_captured_vars(&fn_body, &loop_node, &scope); + + // Should reject because digits is defined after the loop + assert_eq!(env.vars.len(), 0); +} + +#[test] +fn test_capture_method_call_init_rejected() { + // Build AST for: + // local result = someBox.getValue() // MethodCall init + // loop(i < 10) { + // local x = result.something() + // } + + let result_decl = ASTNode::Local { + variables: vec!["result".to_string()], + initial_values: vec![Some(Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "someBox".to_string(), + span: Span::unknown(), + }), + method: "getValue".to_string(), + arguments: vec![], + span: Span::unknown(), + }))], + span: Span::unknown(), + }; + + let loop_body = vec![ASTNode::Local { + variables: vec!["x".to_string()], + initial_values: vec![Some(Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "result".to_string(), + span: Span::unknown(), + }), + method: "something".to_string(), + arguments: vec![], + span: Span::unknown(), + }))], + span: Span::unknown(), + }]; + + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: crate::ast::BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(10), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: loop_body, + span: Span::unknown(), + }; + + let fn_body = vec![result_decl, loop_node.clone()]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["i".to_string()]), + carriers: BTreeSet::new(), + body_locals: BTreeSet::from(["x".to_string()]), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + let env = analyze_captured_vars(&fn_body, &loop_node, &scope); + + // Should reject because result is initialized with MethodCall (not safe constant) + assert_eq!(env.vars.len(), 0); +} + +#[test] +fn test_capture_unused_in_loop_rejected() { + // Build AST for: + // local digits = "0123456789" // not used in loop + // loop(i < 10) { + // print(i) // doesn't use digits + // } + + let digits_decl = ASTNode::Local { + variables: vec!["digits".to_string()], + initial_values: vec![Some(Box::new(ASTNode::Literal { + value: LiteralValue::String("0123456789".to_string()), + span: Span::unknown(), + }))], + span: Span::unknown(), + }; + + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: crate::ast::BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(10), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: vec![], // empty body, no usage of digits + span: Span::unknown(), + }; + + let fn_body = vec![digits_decl, loop_node.clone()]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["i".to_string()]), + carriers: BTreeSet::new(), + body_locals: BTreeSet::new(), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + let env = analyze_captured_vars(&fn_body, &loop_node, &scope); + + // Should reject because digits is not used in loop + assert_eq!(env.vars.len(), 0); +} + +// Phase 245C: Function parameter capture tests + +#[test] +fn test_capture_function_param_used_in_condition() { + // Simulate: fn parse_number(s, p, len) { loop(p < len) { ... } } + // Expected: 'len' should be captured (used in condition, not reassigned) + + let condition = Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Variable { + name: "len".to_string(), // function parameter + span: Span::unknown(), + }), + span: Span::unknown(), + }); + + let body = vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["p".to_string()]), // p is loop param + carriers: BTreeSet::new(), + body_locals: BTreeSet::new(), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + // Use analyze_captured_vars_v2 with structural matching + let env = analyze_captured_vars_v2(&[], condition.as_ref(), &body, &scope); + + // Should capture 'len' (function parameter used in condition) + assert_eq!(env.vars.len(), 1); + assert!(env.get("len").is_some()); + let var = env.get("len").unwrap(); + assert_eq!(var.name, "len"); + assert!(var.is_immutable); +} + +#[test] +fn test_capture_function_param_used_in_method_call() { + // Simulate: fn parse_number(s, p) { loop(p < s.length()) { ch = s.charAt(p) } } + // Expected: 's' should be captured (used in condition and body, not reassigned) + + let condition = Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "s".to_string(), // function parameter + span: Span::unknown(), + }), + method: "length".to_string(), + arguments: vec![], + span: Span::unknown(), + }), + span: Span::unknown(), + }); + + let body = vec![ + ASTNode::Local { + variables: vec!["ch".to_string()], + initial_values: vec![Some(Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "s".to_string(), // function parameter + span: Span::unknown(), + }), + method: "charAt".to_string(), + arguments: vec![ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }], + span: Span::unknown(), + }))], + span: Span::unknown(), + }, + ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }, + ]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["p".to_string()]), // p is loop param + carriers: BTreeSet::new(), + body_locals: BTreeSet::from(["ch".to_string()]), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + // Use analyze_captured_vars_v2 with structural matching + let env = analyze_captured_vars_v2(&[], condition.as_ref(), &body, &scope); + + // Should capture 's' (function parameter used in condition and body) + assert_eq!(env.vars.len(), 1); + assert!(env.get("s").is_some()); + let var = env.get("s").unwrap(); + assert_eq!(var.name, "s"); + assert!(var.is_immutable); +} + +#[test] +fn test_capture_function_param_reassigned_rejected() { + // Simulate: fn bad_func(x) { x = 5; loop(x < 10) { x = x + 1 } } + // Expected: 'x' should NOT be captured (reassigned in function) + + let condition = Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "x".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(10), + span: Span::unknown(), + }), + span: Span::unknown(), + }); + + let body = vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "x".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "x".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }]; + + // fn_body includes reassignment before loop + let fn_body = vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "x".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(5), + span: Span::unknown(), + }), + span: Span::unknown(), + }]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["x".to_string()]), // x is loop param + carriers: BTreeSet::new(), + body_locals: BTreeSet::new(), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + // Use analyze_captured_vars_v2 with structural matching + let env = analyze_captured_vars_v2(&fn_body, condition.as_ref(), &body, &scope); + + // Should NOT capture 'x' (reassigned in fn_body) + assert_eq!(env.vars.len(), 0); +} + +#[test] +fn test_capture_mixed_locals_and_params() { + // Simulate: fn parse(s, len) { local digits = "0123"; loop(p < len) { ch = digits.indexOf(...); s.charAt(...) } } + // Expected: 'len', 's', and 'digits' should all be captured + + let condition = Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Variable { + name: "len".to_string(), // function parameter + span: Span::unknown(), + }), + span: Span::unknown(), + }); + + let body = vec![ + ASTNode::Local { + variables: vec!["ch".to_string()], + initial_values: vec![Some(Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "s".to_string(), // function parameter + span: Span::unknown(), + }), + method: "charAt".to_string(), + arguments: vec![ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }], + span: Span::unknown(), + }))], + span: Span::unknown(), + }, + ASTNode::Local { + variables: vec!["digit".to_string()], + initial_values: vec![Some(Box::new(ASTNode::MethodCall { + object: Box::new(ASTNode::Variable { + name: "digits".to_string(), // pre-loop local + span: Span::unknown(), + }), + method: "indexOf".to_string(), + arguments: vec![ASTNode::Variable { + name: "ch".to_string(), + span: Span::unknown(), + }], + span: Span::unknown(), + }))], + span: Span::unknown(), + }, + ]; + + // fn_body includes local declaration before loop + let fn_body = vec![ASTNode::Local { + variables: vec!["digits".to_string()], + initial_values: vec![Some(Box::new(ASTNode::Literal { + value: LiteralValue::String("0123".to_string()), + span: Span::unknown(), + }))], + span: Span::unknown(), + }]; + + let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { + header: BasicBlockId(0), + body: BasicBlockId(1), + latch: BasicBlockId(2), + exit: BasicBlockId(3), + pinned: BTreeSet::from(["p".to_string()]), // p is loop param + carriers: BTreeSet::new(), + body_locals: BTreeSet::from(["ch".to_string(), "digit".to_string()]), + exit_live: BTreeSet::new(), + progress_carrier: None, + variable_definitions: BTreeMap::new(), + }; + + // Use analyze_captured_vars_v2 with structural matching + let env = analyze_captured_vars_v2(&fn_body, condition.as_ref(), &body, &scope); + + // Should capture all three: 'len' (param), 's' (param), 'digits' (pre-loop local) + assert_eq!(env.vars.len(), 3); + assert!(env.get("len").is_some()); + assert!(env.get("s").is_some()); + assert!(env.get("digits").is_some()); +} diff --git a/src/mir/loop_pattern_detection/function_scope_capture/analyzers/v1.rs b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/v1.rs new file mode 100644 index 00000000..e23b001f --- /dev/null +++ b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/v1.rs @@ -0,0 +1,172 @@ +use crate::ast::ASTNode; +use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape; +use crate::mir::ValueId; + +use super::super::helpers::*; +use super::super::types::{CapturedEnv, CapturedKind, CapturedVar}; + +/// Analyzes function-scoped variables that can be safely captured for loop conditions/body. +/// +/// # Phase 200-B Implementation +/// +/// Detects function-scoped variables that are effectively immutable constants +/// within a loop context (e.g., `digits` in JsonParser._atoi()). +/// +/// # Detection Criteria +/// +/// A variable is captured if ALL of the following conditions are met: +/// +/// 1. **Declared before the loop**: Variable must be declared in function scope before the loop +/// 2. **Safe constant init**: Initialized with string/integer literal only +/// 3. **Never reassigned**: Variable is never reassigned within the function (is_immutable = true) +/// 4. **Referenced in loop**: Variable is referenced in loop condition or body +/// 5. **Not a loop parameter**: Variable is not in scope.loop_params +/// 6. **Not a body-local**: Variable is not in scope.body_locals +/// +/// # Example +/// +/// ```nyash +/// method _atoi(s, pos, len) { +/// local digits = "0123456789" // ✅ Captured (declared before loop, never reassigned) +/// local value = 0 // ❌ Not captured (reassigned in loop body) +/// loop(pos < len) { +/// local ch = s.charAt(pos) // ❌ Not captured (body-local) +/// local digit = digits.indexOf(ch) +/// value = value * 10 + digit +/// pos = pos + 1 +/// } +/// } +/// ``` +/// +/// # Arguments +/// +/// * `fn_body` - AST nodes of the function body (for analysis) +/// * `loop_ast` - AST node of the loop statement +/// * `scope` - LoopScopeShape (for excluding loop params and body-locals) +/// +/// # Returns +/// +/// `CapturedEnv` containing all captured variables +#[allow(dead_code)] +pub(crate) fn analyze_captured_vars( + fn_body: &[ASTNode], + loop_ast: &ASTNode, + scope: &LoopScopeShape, +) -> CapturedEnv { + use std::env; + + let debug = env::var("NYASH_CAPTURE_DEBUG").is_ok(); + + if debug { + eprintln!("[capture/debug] Starting capture analysis"); + } + + // Step 1: Find loop position in fn_body + let loop_index = match find_stmt_index(fn_body, loop_ast) { + Some(idx) => idx, + None => { + if debug { + eprintln!( + "[capture/debug] Loop not found in function body, returning empty CapturedEnv" + ); + } + return CapturedEnv::new(); + } + }; + + if debug { + eprintln!("[capture/debug] Loop found at index {}", loop_index); + } + + // Step 2: Collect local declarations BEFORE the loop + let pre_loop_locals = collect_local_declarations(&fn_body[..loop_index]); + + if debug { + eprintln!( + "[capture/debug] Found {} pre-loop local declarations", + pre_loop_locals.len() + ); + } + + let mut env = CapturedEnv::new(); + + // Step 3: For each pre-loop local, check capture criteria + for (name, init_expr) in pre_loop_locals { + if debug { + eprintln!("[capture/check] Checking variable '{}'", name); + } + + // 3a: Is init expression a safe constant? + if !is_safe_const_init(&init_expr) { + if debug { + eprintln!("[capture/reject] '{}': init is not a safe constant", name); + } + continue; + } + + // 3b: Is this variable reassigned anywhere in fn_body? + if is_reassigned_in_fn(fn_body, &name) { + if debug { + eprintln!("[capture/reject] '{}': reassigned in function", name); + } + continue; + } + + // 3c: Is this variable used in loop (condition or body)? + if !is_used_in_loop(loop_ast, &name) { + if debug { + eprintln!("[capture/reject] '{}': not used in loop", name); + } + continue; + } + + // 3d: Skip if already in pinned, carriers, or body_locals + if scope.pinned.contains(&name) { + if debug { + eprintln!("[capture/reject] '{}': is a pinned variable", name); + } + continue; + } + + if scope.carriers.contains(&name) { + if debug { + eprintln!("[capture/reject] '{}': is a carrier variable", name); + } + continue; + } + + if scope.body_locals.contains(&name) { + if debug { + eprintln!("[capture/reject] '{}': is a body-local variable", name); + } + continue; + } + + // All checks passed: add to CapturedEnv + // Note: We don't have access to variable_map here, so we use a placeholder ValueId + // The actual host_id will be resolved in ConditionEnvBuilder + if debug { + eprintln!( + "[capture/accept] '{}': ALL CHECKS PASSED, adding to CapturedEnv", + name + ); + } + + env.add_var(CapturedVar { + name: name.clone(), + host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder + is_immutable: true, + kind: CapturedKind::Explicit, + }); + } + + if debug { + eprintln!( + "[capture/result] Captured {} variables: {:?}", + env.vars.len(), + env.vars.iter().map(|v| &v.name).collect::>() + ); + } + + env +} diff --git a/src/mir/loop_pattern_detection/function_scope_capture/analyzers/v2.rs b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/v2.rs new file mode 100644 index 00000000..c4374040 --- /dev/null +++ b/src/mir/loop_pattern_detection/function_scope_capture/analyzers/v2.rs @@ -0,0 +1,193 @@ +use crate::ast::ASTNode; +use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape; +use crate::mir::ValueId; +use std::collections::BTreeSet; + +use super::super::helpers::*; +use super::super::types::{CapturedEnv, CapturedKind, CapturedVar}; + +/// Phase 200-C: Analyze captured vars with condition/body instead of loop_ast +/// +/// This variant solves the pointer comparison problem when the loop AST is constructed +/// dynamically (e.g., in Pattern 2). Instead of passing a loop_ast reference, +/// we pass the condition and body directly and perform structural matching. +/// +/// # Arguments +/// +/// * `fn_body` - AST nodes of the function body (for analysis) +/// * `loop_condition` - Condition expression of the loop +/// * `loop_body` - Body statements of the loop +/// * `scope` - LoopScopeShape (for excluding loop params and body-locals) +/// +/// # Returns +/// +/// `CapturedEnv` containing all captured variables +#[allow(dead_code)] +pub(crate) fn analyze_captured_vars_v2( + fn_body: &[ASTNode], + loop_condition: &ASTNode, + loop_body: &[ASTNode], + scope: &LoopScopeShape, +) -> CapturedEnv { + use std::env; + + let debug = env::var("NYASH_CAPTURE_DEBUG").is_ok(); + + if debug { + eprintln!("[capture/debug] Starting capture analysis v2 (structural matching)"); + } + + // Step 1: Find loop position in fn_body by structural matching + let loop_index = find_loop_index_by_structure(fn_body, loop_condition, loop_body); + + if debug { + match loop_index { + Some(idx) => eprintln!("[capture/debug] Loop found at index {} by structure", idx), + None => eprintln!("[capture/debug] Loop not found in function body by structure (may be unit test or synthetic case)"), + } + } + + // Step 2: Collect local declarations BEFORE the loop + let pre_loop_locals = if let Some(idx) = loop_index { + collect_local_declarations(&fn_body[..idx]) + } else { + // No loop found in fn_body - might be a unit test or synthetic case + // Still collect all locals from fn_body + collect_local_declarations(fn_body) + }; + + if debug { + eprintln!( + "[capture/debug] Found {} pre-loop local declarations", + pre_loop_locals.len() + ); + } + + let mut env = CapturedEnv::new(); + + // Step 3: For each pre-loop local, check capture criteria + for (name, init_expr) in &pre_loop_locals { + if debug { + eprintln!("[capture/check] Checking variable '{}'", name); + } + + // 3a: Is init expression a safe constant? + if !is_safe_const_init(init_expr) { + if debug { + eprintln!("[capture/reject] '{}': init is not a safe constant", name); + } + continue; + } + + // 3b: Is this variable reassigned anywhere in fn_body? + if is_reassigned_in_fn(fn_body, name) { + if debug { + eprintln!("[capture/reject] '{}': reassigned in function", name); + } + continue; + } + + // 3c: Is this variable used in loop (condition or body)? + if !is_used_in_loop_parts(loop_condition, loop_body, name) { + if debug { + eprintln!("[capture/reject] '{}': not used in loop", name); + } + continue; + } + + // 3d: Skip if already in pinned, carriers, or body_locals + if scope.pinned.contains(name) { + if debug { + eprintln!("[capture/reject] '{}': is a pinned variable", name); + } + continue; + } + + if scope.carriers.contains(name) { + if debug { + eprintln!("[capture/reject] '{}': is a carrier variable", name); + } + continue; + } + + if scope.body_locals.contains(name) { + if debug { + eprintln!("[capture/reject] '{}': is a body-local variable", name); + } + continue; + } + + // All checks passed: add to CapturedEnv + if debug { + eprintln!( + "[capture/accept] '{}': ALL CHECKS PASSED, adding to CapturedEnv", + name + ); + } + + env.add_var(CapturedVar { + name: name.clone(), + host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder + is_immutable: true, + kind: CapturedKind::Explicit, + }); + } + + // Phase 245C: Capture function parameters used in loop + let names_in_loop = collect_names_in_loop_parts(loop_condition, loop_body); + + // pre-loop local names (already processed above) + let pre_loop_local_names: BTreeSet = pre_loop_locals + .iter() + .map(|(name, _)| name.clone()) + .collect(); + + // Check each variable used in loop + for name in names_in_loop { + // Skip if already processed as pre-loop local + if pre_loop_local_names.contains(&name) { + continue; + } + + // Skip if already in pinned, carriers, or body_locals + if scope.pinned.contains(&name) + || scope.carriers.contains(&name) + || scope.body_locals.contains(&name) + { + continue; + } + + // Skip if reassigned in function (function parameters should not be reassigned) + if is_reassigned_in_fn(fn_body, &name) { + if debug { + eprintln!("[capture/param/reject] '{}': reassigned in function", name); + } + continue; + } + + // This is a function parameter-like variable - add to CapturedEnv + if debug { + eprintln!( + "[capture/param/accept] '{}': function parameter used in loop", + name + ); + } + + env.add_var(CapturedVar { + name: name.clone(), + host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder + is_immutable: true, + kind: CapturedKind::Explicit, + }); + } + + if debug { + eprintln!( + "[capture/result] Captured {} variables: {:?}", + env.vars.len(), + env.vars.iter().map(|v| &v.name).collect::>() + ); + } + + env +} diff --git a/src/runner/mir_json_emit.rs b/src/runner/mir_json_emit.rs deleted file mode 100644 index c27dd38f..00000000 --- a/src/runner/mir_json_emit.rs +++ /dev/null @@ -1,1065 +0,0 @@ -use crate::mir::definitions::call_unified::Callee; -use serde_json::json; - -/// Emit MIR JSON for Python harness/PyVM. -/// The JSON schema matches tools/llvmlite_harness.py expectations and is -/// intentionally minimal for initial scaffolding. -/// -/// Phase 15.5: Supports both v0 (legacy separate ops) and v1 (unified mir_call) formats - -/// Helper: Create JSON v1 root with schema information -/// Includes version, capabilities, metadata for advanced MIR features -fn create_json_v1_root(functions: serde_json::Value) -> serde_json::Value { - json!({ - "schema_version": "1.0", - "capabilities": [ - "unified_call", // Phase 15.5: Unified MirCall support - "phi", // SSA Phi functions - "effects", // Effect tracking for optimization - "callee_typing" // Type-safe call target resolution - ], - "metadata": { - "generator": "nyash-rust", - "phase": "15.5", - "build_time": "Phase 15.5 Development", - "features": ["mir_call_unification", "json_v1_schema"] - }, - "functions": functions - }) -} - -/// Helper: detect residual numeric-core boxcalls that should have been lowered by AotPrepNumericCoreBox. -/// Currently we only check for `boxcall` with `method:"mul_naive"` which should become -/// `call("NyNumericMatI64.mul_naive", ...)` when NYASH_AOT_NUMERIC_CORE=1 is effective. -#[allow(dead_code)] -fn has_numeric_core_boxcall(root: &serde_json::Value) -> bool { - let funs = match root.get("functions") { - Some(v) => v.as_array().cloned().unwrap_or_default(), - None => return false, - }; - for f in funs { - let blocks = match f.get("blocks").and_then(|b| b.as_array()) { - Some(b) => b, - None => continue, - }; - for b in blocks { - let insts = match b.get("instructions").and_then(|i| i.as_array()) { - Some(i) => i, - None => continue, - }; - for inst in insts { - let op = inst.get("op").and_then(|v| v.as_str()); - let method = inst.get("method").and_then(|v| v.as_str()); - if op == Some("boxcall") && method == Some("mul_naive") { - return true; - } - } - } - } - false -} - -/// Helper: enforce numeric_core invariants when NYASH_AOT_NUMERIC_CORE=1 is set. -/// - Default: emit a warning if mul_naive boxcalls are still present. -/// - Strict: if NYASH_AOT_NUMERIC_CORE_STRICT=1, return Err to fail fast. -#[allow(dead_code)] -fn check_numeric_core_invariants(root: &serde_json::Value) -> Result<(), String> { - let numeric_on = matches!( - std::env::var("NYASH_AOT_NUMERIC_CORE").ok().as_deref(), - Some("1") - ); - if !numeric_on { - return Ok(()); - } - - if !has_numeric_core_boxcall(root) { - return Ok(()); - } - - let strict = matches!( - std::env::var("NYASH_AOT_NUMERIC_CORE_STRICT") - .ok() - .as_deref(), - Some("1") - ); - - eprintln!( - "[mir_json/numeric_core] NYASH_AOT_NUMERIC_CORE=1 but MIR JSON still contains boxcall(\"mul_naive\"). \ -AotPrepNumericCoreBox may not have run or did not match; inspect AotPrep logs or run tools/hakorune_emit_mir.sh with HAKO_SELFHOST_TRACE=1." - ); - - if strict { - return Err( - "NYASH_AOT_NUMERIC_CORE_STRICT=1: numeric_core invariants violated (mul_naive boxcall remains)" - .to_string(), - ); - } - Ok(()) -} - -/// Helper: Emit unified mir_call JSON (v1 format) -/// Supports all 6 Callee types in a single unified JSON structure -fn emit_unified_mir_call( - dst: Option, - callee: &Callee, - args: &[u32], - effects: &[&str], -) -> serde_json::Value { - let mut call_obj = json!({ - "op": "mir_call", - "dst": dst, - "mir_call": { - "args": args, - "effects": effects, - "flags": {} - } - }); - - // Generate Callee-specific mir_call structure - match callee { - Callee::Global(name) => { - call_obj["mir_call"]["callee"] = json!({ - "type": "Global", - "name": name - }); - } - Callee::Method { - box_name, - method, - receiver, - certainty, - .. - } => { - call_obj["mir_call"]["callee"] = json!({ - "type": "Method", - "box_name": box_name, - "method": method, - "receiver": receiver.map(|v| v.as_u32()), - "certainty": match certainty { crate::mir::definitions::call_unified::TypeCertainty::Known => "Known", crate::mir::definitions::call_unified::TypeCertainty::Union => "Union" } - }); - } - Callee::Constructor { box_type } => { - call_obj["mir_call"]["callee"] = json!({ - "type": "Constructor", - "box_type": box_type - }); - } - Callee::Closure { - params, - captures, - me_capture, - } => { - let captures_json: Vec<_> = captures - .iter() - .map(|(name, vid)| json!([name, vid.as_u32()])) - .collect(); - call_obj["mir_call"]["callee"] = json!({ - "type": "Closure", - "params": params, - "captures": captures_json, - "me_capture": me_capture.map(|v| v.as_u32()) - }); - } - Callee::Value(vid) => { - call_obj["mir_call"]["callee"] = json!({ - "type": "Value", - "function_value": vid.as_u32() - }); - } - Callee::Extern(name) => { - call_obj["mir_call"]["callee"] = json!({ - "type": "Extern", - "name": name - }); - } - } - - call_obj -} - -pub fn emit_mir_json_for_harness( - module: &nyash_rust::mir::MirModule, - path: &std::path::Path, -) -> Result<(), String> { - use nyash_rust::mir::{BinaryOp as B, CompareOp as C, MirInstruction as I, MirType}; - let mut funs = Vec::new(); - for (name, f) in &module.functions { - let mut blocks = Vec::new(); - let mut ids: Vec<_> = f.blocks.keys().copied().collect(); - ids.sort(); - for bid in ids { - if let Some(bb) = f.blocks.get(&bid) { - let mut insts = Vec::new(); - // Phase 131-13: Emit all instructions in MIR order (SSOT principle) - // No reordering except PHI consolidation at block start (LLVM constraint) - - // Step 1: Emit all PHI instructions first (LLVM requirement) - for inst in &bb.instructions { - if let I::Phi { dst, inputs, .. } = inst { - let incoming: Vec<_> = inputs - .iter() - .map(|(b, v)| json!([v.as_u32(), b.as_u32()])) - .collect(); - // Phase 131-11-F: Add dst_type hint from metadata for all PHI instructions - let mut phi_inst = - json!({"op":"phi","dst": dst.as_u32(), "incoming": incoming}); - if let Some(dst_type) = f.metadata.value_types.get(dst) { - let type_json = match dst_type { - MirType::Integer => json!("i64"), - MirType::Float => json!("f64"), // Phase 275 P0: Float PHI type - MirType::String => json!({"kind": "string"}), - MirType::Box(bt) => json!({"kind": "handle", "box_type": bt}), - MirType::Bool => json!("i1"), - MirType::Void => json!("void"), - _ => json!(null), - }; - if !type_json.is_null() { - phi_inst["dst_type"] = type_json; - } - } - insts.push(phi_inst); - } - } - - // Step 2: Emit all non-PHI instructions in MIR order (no reordering!) - for inst in &bb.instructions { - match inst { - I::Phi { .. } => { - // Already emitted in step 1 - continue; - } - I::Copy { dst, src } => { - insts.push( - json!({"op":"copy","dst": dst.as_u32(), "src": src.as_u32()}), - ); - } - I::UnaryOp { dst, op, operand } => { - let kind = match op { - nyash_rust::mir::UnaryOp::Neg => "neg", - nyash_rust::mir::UnaryOp::Not => "not", - nyash_rust::mir::UnaryOp::BitNot => "bitnot", - }; - insts.push(json!({"op":"unop","kind": kind, "src": operand.as_u32(), "dst": dst.as_u32()})); - } - I::Const { dst, value } => { - match value { - nyash_rust::mir::ConstValue::Integer(i) => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "i64", "value": i}})); - } - nyash_rust::mir::ConstValue::Float(fv) => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "f64", "value": fv}})); - } - nyash_rust::mir::ConstValue::Bool(b) => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "i64", "value": if *b {1} else {0}}})); - } - nyash_rust::mir::ConstValue::String(s) => { - // String constants are exported as StringBox handle by default - insts.push(json!({ - "op":"const", - "dst": dst.as_u32(), - "value": { - "type": {"kind":"handle","box_type":"StringBox"}, - "value": s - } - })); - } - nyash_rust::mir::ConstValue::Null - | nyash_rust::mir::ConstValue::Void => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "void", "value": 0}})); - } - } - } - I::TypeOp { dst, op, value, ty } => { - let op_s = match op { - nyash_rust::mir::TypeOpKind::Check => "check", - nyash_rust::mir::TypeOpKind::Cast => "cast", - }; - let ty_s = match ty { - MirType::Integer => "Integer".to_string(), - MirType::Float => "Float".to_string(), - MirType::Bool => "Bool".to_string(), - MirType::String => "String".to_string(), - MirType::Void => "Void".to_string(), - MirType::Box(name) => name.clone(), - _ => "Unknown".to_string(), - }; - insts.push(json!({ - "op":"typeop", - "operation": op_s, - "src": value.as_u32(), - "dst": dst.as_u32(), - "target_type": ty_s, - })); - } - I::BinOp { dst, op, lhs, rhs } => { - let op_s = match op { - B::Add => "+", - B::Sub => "-", - B::Mul => "*", - B::Div => "/", - B::Mod => "%", - B::BitAnd => "&", - B::BitOr => "|", - B::BitXor => "^", - B::Shl => "<<", - B::Shr => ">>", - B::And => "&", - B::Or => "|", - }; - let mut obj = json!({"op":"binop","operation": op_s, "lhs": lhs.as_u32(), "rhs": rhs.as_u32(), "dst": dst.as_u32()}); - // Phase 131-15-P1: dst_type only when type is KNOWN (not Unknown) - // Operand TypeFacts take priority over dst_type hint in Python - if matches!(op, B::Add) { - let dst_type = f.metadata.value_types.get(dst); - match dst_type { - Some(MirType::Box(bt)) if bt == "StringBox" => { - obj["dst_type"] = - json!({"kind":"handle","box_type":"StringBox"}); - } - Some(MirType::Integer) => { - // Explicitly mark as i64 for integer addition - obj["dst_type"] = json!("i64"); - } - Some(MirType::Unknown) | None => { - // Unknown: DO NOT emit dst_type - // Let Python side infer from operand TypeFacts - } - _ => { - // Other known types: use conservative i64 - obj["dst_type"] = json!("i64"); - } - } - } - insts.push(obj); - } - I::Compare { dst, op, lhs, rhs } => { - let op_s = match op { - C::Lt => "<", - C::Le => "<=", - C::Gt => ">", - C::Ge => ">=", - C::Eq => "==", - C::Ne => "!=", - }; - let mut obj = json!({"op":"compare","operation": op_s, "lhs": lhs.as_u32(), "rhs": rhs.as_u32(), "dst": dst.as_u32()}); - // cmp_kind hint for string equality - if matches!(op, C::Eq | C::Ne) { - let lhs_is_str = match f.metadata.value_types.get(lhs) { - Some(MirType::String) => true, - Some(MirType::Box(bt)) if bt == "StringBox" => true, - _ => false, - }; - let rhs_is_str = match f.metadata.value_types.get(rhs) { - Some(MirType::String) => true, - Some(MirType::Box(bt)) if bt == "StringBox" => true, - _ => false, - }; - if lhs_is_str && rhs_is_str { - obj["cmp_kind"] = json!("string"); - } - } - insts.push(obj); - } - I::Call { - dst, - func, - callee, - args, - effects, - .. - } => { - // Phase 15.5: Unified Call support with environment variable control - let use_unified = match std::env::var("NYASH_MIR_UNIFIED_CALL") - .ok() - .as_deref() - .map(|s| s.to_ascii_lowercase()) - { - Some(s) if s == "0" || s == "false" || s == "off" => false, - _ => true, - }; - - if use_unified && callee.is_some() { - // v1: Unified mir_call format - let effects_str: Vec<&str> = - if effects.is_io() { vec!["IO"] } else { vec![] }; - let args_u32: Vec = args.iter().map(|v| v.as_u32()).collect(); - let unified_call = emit_unified_mir_call( - dst.map(|v| v.as_u32()), - callee.as_ref().unwrap(), - &args_u32, - &effects_str, - ); - insts.push(unified_call); - } else if !use_unified && callee.is_some() { - // v0: When unified is OFF but callee exists, emit proper v0 format - use nyash_rust::mir::definitions::Callee; - match callee.as_ref().unwrap() { - Callee::Method { - method, receiver, .. - } => { - // Emit as boxcall for compatibility - let box_val = receiver.unwrap_or(*func); - let args_a: Vec<_> = - args.iter().map(|v| json!(v.as_u32())).collect(); - let mut obj = json!({ - "op":"boxcall", - "box": box_val.as_u32(), - "method": method, - "args": args_a, - "dst": dst.map(|d| d.as_u32()) - }); - // Add dst_type hints for known methods - let m = method.as_str(); - let dst_ty = if m == "substring" - || m == "dirname" - || m == "join" - || m == "read_all" - || m == "read" - { - Some(json!({"kind":"handle","box_type":"StringBox"})) - } else if m == "length" || m == "lastIndexOf" { - Some(json!("i64")) - } else { - None - }; - if let Some(t) = dst_ty { - obj["dst_type"] = t; - } - insts.push(obj); - if let Some(_d) = dst.map(|v| v.as_u32()) {} - } - _ => { - // Other callee types: emit generic call - let args_a: Vec<_> = - args.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"call","func": func.as_u32(), "args": args_a, "dst": dst.map(|d| d.as_u32())})); - } - } - } else { - // v0: Legacy call format (no callee info) - let args_a: Vec<_> = - args.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"call","func": func.as_u32(), "args": args_a, "dst": dst.map(|d| d.as_u32())})); - } - } - I::ExternCall { - dst, - iface_name, - method_name, - args, - .. - } => { - let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); - let func_name = if iface_name == "env.console" { - format!("nyash.console.{}", method_name) - } else { - format!("{}.{}", iface_name, method_name) - }; - let mut obj = json!({ - "op": "externcall", - "func": func_name, - "args": args_a, - "dst": dst.map(|d| d.as_u32()), - }); - // Minimal dst_type hints for known externs - if iface_name == "env.console" { - // console.* returns i64 status (ignored by user code) - if dst.is_some() { - obj["dst_type"] = json!("i64"); - } - } - insts.push(obj); - } - I::BoxCall { - dst, - box_val, - method, - method_id, - args, - .. - } => { - let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); - // Minimal dst_type hints - let mut obj = json!({ - "op":"boxcall","box": box_val.as_u32(), "method": method, "args": args_a, "dst": dst.map(|d| d.as_u32()) - }); - // Phase 287 P4: Include method_id for universal slot tracking (toString[#0]) - if let Some(mid) = method_id { - obj["method_id"] = json!(mid); - } - let m = method.as_str(); - let dst_ty = if m == "substring" - || m == "dirname" - || m == "join" - || m == "read_all" - || m == "read" - { - Some(json!({"kind":"handle","box_type":"StringBox"})) - } else if m == "length" || m == "lastIndexOf" { - Some(json!("i64")) - } else { - None - }; - if let Some(t) = dst_ty { - obj["dst_type"] = t; - } - insts.push(obj); - if let Some(_d) = dst.map(|v| v.as_u32()) {} - } - I::NewBox { - dst, - box_type, - args, - } => { - let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"newbox","type": box_type, "args": args_a, "dst": dst.as_u32()})); - } - I::Branch { - condition, - then_bb, - else_bb, - .. - } => { - insts.push(json!({"op":"branch","cond": condition.as_u32(), "then": then_bb.as_u32(), "else": else_bb.as_u32()})); - } - I::Jump { target, .. } => { - insts.push(json!({"op":"jump","target": target.as_u32()})); - } - I::Return { value } => { - insts.push(json!({"op":"ret","value": value.map(|v| v.as_u32())})); - } - // Phase 285LLVM-1: WeakRef support (unified form after normalization) - I::WeakRef { dst, op, value } => { - use crate::mir::WeakRefOp; - let op_name = match op { - WeakRefOp::New => "weak_new", - WeakRefOp::Load => "weak_load", - }; - let value_field = match op { - WeakRefOp::New => "box_val", - WeakRefOp::Load => "weak_ref", - }; - insts.push(json!({"op": op_name, "dst": dst.as_u32(), value_field: value.as_u32()})); - } - // Legacy WeakNew/WeakLoad (before normalization) - I::WeakNew { dst, box_val } => { - insts.push(json!({"op":"weak_new","dst": dst.as_u32(), "box_val": box_val.as_u32()})); - } - I::WeakLoad { dst, weak_ref } => { - insts.push(json!({"op":"weak_load","dst": dst.as_u32(), "weak_ref": weak_ref.as_u32()})); - } - // Phase 287: Lifecycle management - I::KeepAlive { values } => { - let values_json: Vec<_> = values.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"keepalive","values":values_json})); - } - I::ReleaseStrong { values } => { - let values_json: Vec<_> = values.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"release_strong","values":values_json})); - } - _ => { /* skip non-essential ops for initial harness */ } - } - } - // Phase 131-13: Terminator emitted inline (no delayed copies) - if let Some(term) = &bb.terminator { - match term { - I::Return { value } => insts.push(json!({"op":"ret","value": value.map(|v| v.as_u32())})), - I::Jump { target, .. } => insts.push(json!({"op":"jump","target": target.as_u32()})), - I::Branch { condition, then_bb, else_bb, .. } => insts.push(json!({"op":"branch","cond": condition.as_u32(), "then": then_bb.as_u32(), "else": else_bb.as_u32()})), - _ => {} - } - } - blocks.push(json!({"id": bid.as_u32(), "instructions": insts})); - } - } - // Export parameter value-ids so a VM can bind arguments - let params: Vec<_> = f.params.iter().map(|v| v.as_u32()).collect(); - - // Phase 131-11-F: Build metadata JSON from MIR metadata (SSOT) - let metadata_json = json!({ - "value_types": f.metadata.value_types.iter().map(|(k, v)| { - let type_str = match v { - MirType::Integer => json!("i64"), - MirType::Float => json!("f64"), // Phase 275 P0: Float type annotation - MirType::String => json!({"kind": "string"}), - MirType::Box(bt) => json!({"kind": "handle", "box_type": bt}), - MirType::Bool => json!("i1"), - MirType::Void => json!("void"), - MirType::Unknown => json!(null), - _ => json!(null), - }; - (k.as_u32().to_string(), type_str) - }).collect::>() - }); - - funs.push(json!({ - "name": name, - "params": params, - "blocks": blocks, - "metadata": metadata_json - })); - } - - // Phase 15.5: JSON v1 schema with environment variable control - let use_v1_schema = std::env::var("NYASH_JSON_SCHEMA_V1").unwrap_or_default() == "1" - || match std::env::var("NYASH_MIR_UNIFIED_CALL") - .ok() - .as_deref() - .map(|s| s.to_ascii_lowercase()) - { - Some(s) if s == "0" || s == "false" || s == "off" => false, - _ => true, - }; - - // Phase 155: Extract CFG information for hako_check - let cfg_info = nyash_rust::mir::extract_cfg_info(module); - - // Phase 285LLVM-1.1: Extract user box declarations for LLVM harness - let user_box_decls: Vec = module.metadata.user_box_decls - .iter() - .map(|(name, fields)| { - json!({ - "name": name, - "fields": fields - }) - }) - .collect(); - - let root = if use_v1_schema { - let mut root = create_json_v1_root(json!(funs)); - // Add CFG data and user box declarations to v1 schema - if let Some(obj) = root.as_object_mut() { - obj.insert("cfg".to_string(), cfg_info); - obj.insert("user_box_decls".to_string(), json!(user_box_decls)); // Phase 285LLVM-1.1 - } - root - } else { - // v0 legacy format - also add CFG and user_box_decls - json!({ - "functions": funs, - "cfg": cfg_info, - "user_box_decls": user_box_decls // Phase 285LLVM-1.1 - }) - }; - - // NOTE: numeric_core strict validation is applied on the AotPrep output - // (tools/hakorune_emit_mir.sh) rather than at raw MIR emit time. This keeps - // pre-AotPrep MIR emission usable even when BoxCall(MatI64, mul_naive) is - // still present. - - std::fs::write(path, serde_json::to_string_pretty(&root).unwrap()) - .map_err(|e| format!("write mir json: {}", e)) -} - -/// Variant for the bin crate's local MIR type -pub fn emit_mir_json_for_harness_bin( - module: &crate::mir::MirModule, - path: &std::path::Path, -) -> Result<(), String> { - use crate::mir::{BinaryOp as B, CompareOp as C, MirInstruction as I, MirType}; - let mut funs = Vec::new(); - for (name, f) in &module.functions { - let mut blocks = Vec::new(); - let mut ids: Vec<_> = f.blocks.keys().copied().collect(); - ids.sort(); - for bid in ids { - if let Some(bb) = f.blocks.get(&bid) { - let mut insts = Vec::new(); - // Phase 131-13: Emit all instructions in MIR order (SSOT principle) - // No reordering except PHI consolidation at block start (LLVM constraint) - - // Step 1: Emit all PHI instructions first (LLVM requirement) - for inst in &bb.instructions { - if let I::Phi { dst, inputs, .. } = inst { - let incoming: Vec<_> = inputs - .iter() - .map(|(b, v)| json!([v.as_u32(), b.as_u32()])) - .collect(); - // Phase 131-11-F: Add dst_type hint from metadata for all PHI instructions - let mut phi_inst = - json!({"op":"phi","dst": dst.as_u32(), "incoming": incoming}); - if let Some(dst_type) = f.metadata.value_types.get(dst) { - let type_json = match dst_type { - MirType::Integer => json!("i64"), - MirType::Float => json!("f64"), // Phase 275 P0: Float PHI type - MirType::String => json!({"kind": "string"}), - MirType::Box(bt) => json!({"kind": "handle", "box_type": bt}), - MirType::Bool => json!("i1"), - MirType::Void => json!("void"), - _ => json!(null), - }; - if !type_json.is_null() { - phi_inst["dst_type"] = type_json; - } - } - insts.push(phi_inst); - } - } - - // Step 2: Emit all non-PHI instructions in MIR order (no reordering!) - for inst in &bb.instructions { - match inst { - I::Phi { .. } => { - // Already emitted in step 1 - continue; - } - I::Copy { dst, src } => { - insts.push( - json!({"op":"copy","dst": dst.as_u32(), "src": src.as_u32()}), - ); - } - I::Const { dst, value } => match value { - crate::mir::ConstValue::Integer(i) => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "i64", "value": i}})); - } - crate::mir::ConstValue::Float(fv) => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "f64", "value": fv}})); - } - crate::mir::ConstValue::Bool(b) => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "i64", "value": if *b {1} else {0}}})); - } - crate::mir::ConstValue::String(s) => { - insts.push(json!({ - "op":"const", - "dst": dst.as_u32(), - "value": { - "type": {"kind":"handle","box_type":"StringBox"}, - "value": s - } - })); - } - crate::mir::ConstValue::Null | crate::mir::ConstValue::Void => { - insts.push(json!({"op":"const","dst": dst.as_u32(), "value": {"type": "void", "value": 0}})); - } - }, - I::BinOp { dst, op, lhs, rhs } => { - let op_s = match op { - B::Add => "+", - B::Sub => "-", - B::Mul => "*", - B::Div => "/", - B::Mod => "%", - B::BitAnd => "&", - B::BitOr => "|", - B::BitXor => "^", - B::Shl => "<<", - B::Shr => ">>", - B::And => "&", - B::Or => "|", - }; - let mut obj = json!({"op":"binop","operation": op_s, "lhs": lhs.as_u32(), "rhs": rhs.as_u32(), "dst": dst.as_u32()}); - // Phase 131-15-P1: dst_type only when type is KNOWN (not Unknown) - // Operand TypeFacts take priority over dst_type hint in Python - if matches!(op, B::Add) { - let dst_type = f.metadata.value_types.get(dst); - match dst_type { - Some(MirType::Box(bt)) if bt == "StringBox" => { - obj["dst_type"] = - json!({"kind":"handle","box_type":"StringBox"}); - } - Some(MirType::Integer) => { - // Explicitly mark as i64 for integer addition - obj["dst_type"] = json!("i64"); - } - Some(MirType::Unknown) | None => { - // Unknown: DO NOT emit dst_type - // Let Python side infer from operand TypeFacts - } - _ => { - // Other known types: use conservative i64 - obj["dst_type"] = json!("i64"); - } - } - } - insts.push(obj); - } - I::Compare { dst, op, lhs, rhs } => { - let op_s = match op { - C::Eq => "==", - C::Ne => "!=", - C::Lt => "<", - C::Le => "<=", - C::Gt => ">", - C::Ge => ">=", - }; - insts.push(json!({"op":"compare","operation": op_s, "lhs": lhs.as_u32(), "rhs": rhs.as_u32(), "dst": dst.as_u32()})); - } - I::TypeOp { dst, op, value, ty } => { - let op_s = match op { - crate::mir::TypeOpKind::Check => "check", - crate::mir::TypeOpKind::Cast => "cast", - }; - let ty_s = match ty { - MirType::Integer => "Integer".to_string(), - MirType::Float => "Float".to_string(), - MirType::Bool => "Bool".to_string(), - MirType::String => "String".to_string(), - MirType::Void => "Void".to_string(), - MirType::Box(name) => name.clone(), - _ => "Unknown".to_string(), - }; - insts.push(json!({ - "op":"typeop", - "operation": op_s, - "src": value.as_u32(), - "dst": dst.as_u32(), - "target_type": ty_s, - })); - } - I::Call { - dst, - func, - callee, - args, - effects, - .. - } => { - // Phase 15.5: Unified Call support with environment variable control - let use_unified = match std::env::var("NYASH_MIR_UNIFIED_CALL") - .ok() - .as_deref() - .map(|s| s.to_ascii_lowercase()) - { - Some(s) if s == "0" || s == "false" || s == "off" => false, - _ => true, - }; - - if use_unified && callee.is_some() { - // v1: Unified mir_call format - let effects_str: Vec<&str> = - if effects.is_io() { vec!["IO"] } else { vec![] }; - let args_u32: Vec = args.iter().map(|v| v.as_u32()).collect(); - let unified_call = emit_unified_mir_call( - dst.map(|v| v.as_u32()), - callee.as_ref().unwrap(), - &args_u32, - &effects_str, - ); - insts.push(unified_call); - } else if !use_unified && callee.is_some() { - // v0: When unified is OFF but callee exists, emit proper v0 format - use Callee; - match callee.as_ref().unwrap() { - Callee::Method { - method, receiver, .. - } => { - // Emit as boxcall for compatibility - let box_val = receiver.unwrap_or(*func); - let args_a: Vec<_> = - args.iter().map(|v| json!(v.as_u32())).collect(); - let mut obj = json!({ - "op":"boxcall", - "box": box_val.as_u32(), - "method": method, - "args": args_a, - "dst": dst.map(|d| d.as_u32()) - }); - // Add dst_type hints for known methods - let m = method.as_str(); - let dst_ty = if m == "substring" - || m == "dirname" - || m == "join" - || m == "read_all" - || m == "read" - { - Some(json!({"kind":"handle","box_type":"StringBox"})) - } else if m == "length" || m == "lastIndexOf" { - Some(json!("i64")) - } else { - None - }; - if let Some(t) = dst_ty { - obj["dst_type"] = t; - } - insts.push(obj); - if let Some(_d) = dst.map(|v| v.as_u32()) {} - } - _ => { - // Other callee types: emit generic call - let args_a: Vec<_> = - args.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"call","func": func.as_u32(), "args": args_a, "dst": dst.map(|d| d.as_u32())})); - } - } - } else { - // v0: Legacy call format (no callee info) - let args_a: Vec<_> = - args.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"call","func": func.as_u32(), "args": args_a, "dst": dst.map(|d| d.as_u32())})); - } - } - I::ExternCall { - dst, - iface_name, - method_name, - args, - .. - } => { - let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); - let mut obj = json!({ - "op":"externcall","func": format!("{}.{}", iface_name, method_name), "args": args_a, - "dst": dst.map(|d| d.as_u32()), - }); - if iface_name == "env.console" { - if dst.is_some() { - obj["dst_type"] = json!("i64"); - } - } - insts.push(obj); - if let Some(_d) = dst.map(|v| v.as_u32()) {} - } - I::BoxCall { - dst, - box_val, - method, - method_id, - args, - .. - } => { - let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); - let mut obj = json!({ - "op":"boxcall","box": box_val.as_u32(), "method": method, "args": args_a, "dst": dst.map(|d| d.as_u32()) - }); - // Phase 287 P4: Include method_id for universal slot tracking (toString[#0]) - if let Some(mid) = method_id { - obj["method_id"] = json!(mid); - } - let m = method.as_str(); - let dst_ty = if m == "substring" - || m == "dirname" - || m == "join" - || m == "read_all" - || m == "read" - { - Some(json!({"kind":"handle","box_type":"StringBox"})) - } else if m == "length" || m == "lastIndexOf" { - Some(json!("i64")) - } else { - None - }; - if let Some(t) = dst_ty { - obj["dst_type"] = t; - } - insts.push(obj); - if let Some(_d) = dst.map(|v| v.as_u32()) {} - } - I::NewBox { - dst, - box_type, - args, - } => { - let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"newbox","type": box_type, "args": args_a, "dst": dst.as_u32()})); - } - I::Branch { - condition, - then_bb, - else_bb, - .. - } => { - insts.push(json!({"op":"branch","cond": condition.as_u32(), "then": then_bb.as_u32(), "else": else_bb.as_u32()})); - } - I::Jump { target, .. } => { - insts.push(json!({"op":"jump","target": target.as_u32()})); - } - I::Return { value } => { - insts.push(json!({"op":"ret","value": value.map(|v| v.as_u32())})); - } - // Phase 285LLVM-1: WeakRef support (unified form after normalization) - I::WeakRef { dst, op, value } => { - use crate::mir::WeakRefOp; - let op_name = match op { - WeakRefOp::New => "weak_new", - WeakRefOp::Load => "weak_load", - }; - let value_field = match op { - WeakRefOp::New => "box_val", - WeakRefOp::Load => "weak_ref", - }; - insts.push(json!({"op": op_name, "dst": dst.as_u32(), value_field: value.as_u32()})); - } - // Legacy WeakNew/WeakLoad (before normalization) - I::WeakNew { dst, box_val } => { - insts.push(json!({"op":"weak_new","dst": dst.as_u32(), "box_val": box_val.as_u32()})); - } - I::WeakLoad { dst, weak_ref } => { - insts.push(json!({"op":"weak_load","dst": dst.as_u32(), "weak_ref": weak_ref.as_u32()})); - } - // Phase 287: Lifecycle management - I::KeepAlive { values } => { - let values_json: Vec<_> = values.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"keepalive","values":values_json})); - } - I::ReleaseStrong { values } => { - let values_json: Vec<_> = values.iter().map(|v| json!(v.as_u32())).collect(); - insts.push(json!({"op":"release_strong","values":values_json})); - } - _ => {} - } - } - // Phase 131-13: Terminator emitted inline (no delayed copies) - if let Some(term) = &bb.terminator { - match term { - I::Return { value } => insts.push(json!({"op":"ret","value": value.map(|v| v.as_u32())})), - I::Jump { target, .. } => insts.push(json!({"op":"jump","target": target.as_u32()})), - I::Branch { condition, then_bb, else_bb, .. } => insts.push(json!({"op":"branch","cond": condition.as_u32(), "then": then_bb.as_u32(), "else": else_bb.as_u32()})), - _ => {} } - } - blocks.push(json!({"id": bid.as_u32(), "instructions": insts})); - } - } - let params: Vec<_> = f.params.iter().map(|v| v.as_u32()).collect(); - - // Phase 131-11-F: Build metadata JSON from MIR metadata (SSOT) - let metadata_json = json!({ - "value_types": f.metadata.value_types.iter().map(|(k, v)| { - let type_str = match v { - MirType::Integer => json!("i64"), - MirType::Float => json!("f64"), // Phase 275 P0: Float type annotation - MirType::String => json!({"kind": "string"}), - MirType::Box(bt) => json!({"kind": "handle", "box_type": bt}), - MirType::Bool => json!("i1"), - MirType::Void => json!("void"), - MirType::Unknown => json!(null), - _ => json!(null), - }; - (k.as_u32().to_string(), type_str) - }).collect::>() - }); - - funs.push(json!({ - "name": name, - "params": params, - "blocks": blocks, - "metadata": metadata_json - })); - } - - // Phase 155: Extract CFG information for hako_check - let cfg_info = crate::mir::extract_cfg_info(module); - - // Phase 285LLVM-1.1: Extract user box declarations for LLVM harness - let user_box_decls: Vec = module.metadata.user_box_decls - .iter() - .map(|(name, fields)| { - json!({ - "name": name, - "fields": fields - }) - }) - .collect(); - - let root = json!({ - "functions": funs, - "cfg": cfg_info, - "user_box_decls": user_box_decls // Phase 285LLVM-1.1 - }); - - // NOTE: numeric_core strict validation is applied on the AotPrep output - // (tools/hakorune_emit_mir.sh) rather than at raw MIR emit time. This keeps - // pre-AotPrep MIR emission usable even when BoxCall(MatI64, mul_naive) is - // still present. - - std::fs::write(path, serde_json::to_string_pretty(&root).unwrap()) - .map_err(|e| format!("write mir json: {}", e)) -} diff --git a/src/runner/mir_json_emit/emitters/basic.rs b/src/runner/mir_json_emit/emitters/basic.rs new file mode 100644 index 00000000..0f9c8fb9 --- /dev/null +++ b/src/runner/mir_json_emit/emitters/basic.rs @@ -0,0 +1,154 @@ +use serde_json::json; + +use crate::mir::{BinaryOp, CompareOp, ConstValue, MirType, TypeOpKind, UnaryOp, ValueId}; + +pub(crate) fn emit_copy(dst: &ValueId, src: &ValueId) -> serde_json::Value { + json!({"op":"copy","dst": dst.as_u32(), "src": src.as_u32()}) +} + +pub(crate) fn emit_unary_op( + dst: &ValueId, + op: &UnaryOp, + operand: &ValueId, +) -> serde_json::Value { + let kind = match op { + UnaryOp::Neg => "neg", + UnaryOp::Not => "not", + UnaryOp::BitNot => "bitnot", + }; + json!({"op":"unop","operation": kind, "src": operand.as_u32(), "dst": dst.as_u32()}) +} + +pub(crate) fn emit_const(dst: &ValueId, value: &ConstValue) -> serde_json::Value { + match value { + ConstValue::Integer(i) => { + json!({"op":"const","dst": dst.as_u32(), "value": {"type": "i64", "value": i}}) + } + ConstValue::Float(fv) => { + json!({"op":"const","dst": dst.as_u32(), "value": {"type": "f64", "value": fv}}) + } + ConstValue::Bool(b) => json!({"op":"const","dst": dst.as_u32(), "value": {"type": "i64", "value": if *b {1} else {0}}}), + ConstValue::String(s) => json!({ + "op":"const", + "dst": dst.as_u32(), + "value": { + "type": {"kind":"handle","box_type":"StringBox"}, + "value": s + } + }), + ConstValue::Null | ConstValue::Void => { + json!({"op":"const","dst": dst.as_u32(), "value": {"type": "void", "value": 0}}) + } + } +} + +pub(crate) fn emit_type_op( + dst: &ValueId, + op: &TypeOpKind, + value: &ValueId, + ty: &MirType, +) -> serde_json::Value { + let op_s = match op { + TypeOpKind::Check => "check", + TypeOpKind::Cast => "cast", + }; + let ty_s = match ty { + MirType::Integer => "Integer".to_string(), + MirType::Float => "Float".to_string(), + MirType::Bool => "Bool".to_string(), + MirType::String => "String".to_string(), + MirType::Void => "Void".to_string(), + MirType::Box(name) => name.clone(), + _ => "Unknown".to_string(), + }; + json!({ + "op":"typeop", + "operation": op_s, + "src": value.as_u32(), + "dst": dst.as_u32(), + "target_type": ty_s, + }) +} + +pub(crate) fn emit_bin_op( + dst: &ValueId, + op: &BinaryOp, + lhs: &ValueId, + rhs: &ValueId, + value_types: &std::collections::BTreeMap, +) -> serde_json::Value { + let op_s = match op { + BinaryOp::Add => "+", + BinaryOp::Sub => "-", + BinaryOp::Mul => "*", + BinaryOp::Div => "/", + BinaryOp::Mod => "%", + BinaryOp::BitAnd => "&", + BinaryOp::BitOr => "|", + BinaryOp::BitXor => "^", + BinaryOp::Shl => "<<", + BinaryOp::Shr => ">>", + BinaryOp::And => "&", + BinaryOp::Or => "|", + }; + let mut obj = json!({"op":"binop","operation": op_s, "lhs": lhs.as_u32(), "rhs": rhs.as_u32(), "dst": dst.as_u32()}); + // Phase 131-15-P1: dst_type only when type is KNOWN (not Unknown) + // Operand TypeFacts take priority over dst_type hint in Python + if matches!(op, BinaryOp::Add) { + let dst_type = value_types.get(dst); + match dst_type { + Some(MirType::Box(bt)) if bt == "StringBox" => { + obj["dst_type"] = json!({"kind":"handle","box_type":"StringBox"}); + } + Some(MirType::Integer) => { + // Explicitly mark as i64 for integer addition + obj["dst_type"] = json!("i64"); + } + Some(MirType::Unknown) | None => { + // Unknown: DO NOT emit dst_type + // Let Python side infer from operand TypeFacts + } + _ => { + // Other known types: use conservative i64 + obj["dst_type"] = json!("i64"); + } + } + } + obj +} + +pub(crate) fn emit_compare( + dst: &ValueId, + op: &CompareOp, + lhs: &ValueId, + rhs: &ValueId, + value_types: &std::collections::BTreeMap, +) -> serde_json::Value { + let op_s = match op { + CompareOp::Ge => ">=", + CompareOp::Le => "<=", + CompareOp::Gt => ">", + CompareOp::Lt => "<", + CompareOp::Eq => "==", + CompareOp::Ne => "!=", + }; + let mut obj = + json!({"op":"compare","operation": op_s, "lhs": lhs.as_u32(), "rhs": rhs.as_u32(), "dst": dst.as_u32()}); + // cmp_kind hint for string equality + if matches!(op, CompareOp::Eq | CompareOp::Ne) { + let lhs_is_str = match value_types.get(lhs) { + Some(MirType::String) => true, + Some(MirType::Box(bt)) if bt == "StringBox" => true, + _ => false, + }; + let rhs_is_str = match value_types.get(rhs) { + Some(MirType::String) => true, + Some(MirType::Box(bt)) if bt == "StringBox" => true, + _ => false, + }; + if lhs_is_str && rhs_is_str { + obj["cmp_kind"] = json!("string"); + } + } + obj +} diff --git a/src/runner/mir_json_emit/emitters/calls.rs b/src/runner/mir_json_emit/emitters/calls.rs new file mode 100644 index 00000000..c978dac8 --- /dev/null +++ b/src/runner/mir_json_emit/emitters/calls.rs @@ -0,0 +1,156 @@ +use serde_json::json; + +use crate::mir::definitions::Callee; +use crate::mir::{EffectMask, ValueId}; + +use super::super::helpers::emit_unified_mir_call; + +pub(crate) fn emit_call( + dst: &Option, + func: &ValueId, + callee: Option<&Callee>, + args: &[ValueId], + effects: &EffectMask, +) -> Option { + // Phase 15.5: Unified Call support with environment variable control + let use_unified = match std::env::var("NYASH_MIR_UNIFIED_CALL") + .ok() + .as_deref() + .map(|s| s.to_ascii_lowercase()) + { + Some(s) if s == "0" || s == "false" || s == "off" => false, + _ => true, + }; + + if use_unified && callee.is_some() { + // v1: Unified mir_call format + let effects_str: Vec<&str> = if effects.is_io() { vec!["IO"] } else { vec![] }; + let args_u32: Vec = args.iter().map(|v| v.as_u32()).collect(); + let unified_call = emit_unified_mir_call( + dst.map(|v| v.as_u32()), + callee.unwrap(), + &args_u32, + &effects_str, + ); + Some(unified_call) + } else if !use_unified && callee.is_some() { + // v0: When unified is OFF but callee exists, emit proper v0 format + match callee.unwrap() { + Callee::Method { + method, receiver, .. + } => { + // Emit as boxcall for compatibility + let box_val = receiver.unwrap_or(*func); + let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); + let mut obj = json!({ + "op":"boxcall", + "box": box_val.as_u32(), + "method": method, + "args": args_a, + "dst": dst.map(|d| d.as_u32()) + }); + // Add dst_type hints for known methods + let m = method.as_str(); + let dst_ty = if m == "substring" + || m == "dirname" + || m == "join" + || m == "read_all" + || m == "read" + { + Some(json!({"kind":"handle","box_type":"StringBox"})) + } else if m == "length" || m == "lastIndexOf" { + Some(json!("i64")) + } else { + None + }; + if let Some(t) = dst_ty { + obj["dst_type"] = t; + } + Some(obj) + } + _ => { + // Other callee types: emit generic call + let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); + Some( + json!({"op":"call","func": func.as_u32(), "args": args_a, "dst": dst.map(|d| d.as_u32())}), + ) + } + } + } else { + // v0: Legacy call format (no callee info) + let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); + Some(json!({"op":"call","func": func.as_u32(), "args": args_a, "dst": dst.map(|d| d.as_u32())})) + } +} + +pub(crate) fn emit_extern_call( + dst: &Option, + iface_name: &str, + method_name: &str, + args: &[ValueId], +) -> serde_json::Value { + let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); + let func_name = if iface_name == "env.console" { + format!("nyash.console.{}", method_name) + } else { + format!("{}.{}", iface_name, method_name) + }; + let mut obj = json!({ + "op": "externcall", + "func": func_name, + "args": args_a, + "dst": dst.map(|d| d.as_u32()), + }); + // Minimal dst_type hints for known externs + if iface_name == "env.console" { + // console.* returns i64 status (ignored by user code) + if dst.is_some() { + obj["dst_type"] = json!("i64"); + } + } + obj +} + +pub(crate) fn emit_box_call( + dst: &Option, + box_val: &ValueId, + method: &str, + method_id: Option<&u16>, + args: &[ValueId], +) -> serde_json::Value { + let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); + // Minimal dst_type hints + let mut obj = json!({ + "op":"boxcall","box": box_val.as_u32(), "method": method, "args": args_a, "dst": dst.map(|d| d.as_u32()) + }); + // Phase 287 P4: Include method_id for universal slot tracking (toString[#0]) + if let Some(mid) = method_id { + obj["method_id"] = json!(mid); + } + let m = method; + let dst_ty = if m == "substring" + || m == "dirname" + || m == "join" + || m == "read_all" + || m == "read" + { + Some(json!({"kind":"handle","box_type":"StringBox"})) + } else if m == "length" || m == "lastIndexOf" { + Some(json!("i64")) + } else { + None + }; + if let Some(t) = dst_ty { + obj["dst_type"] = t; + } + obj +} + +pub(crate) fn emit_new_box( + dst: &ValueId, + box_type: &str, + args: &[ValueId], +) -> serde_json::Value { + let args_a: Vec<_> = args.iter().map(|v| json!(v.as_u32())).collect(); + json!({"op":"newbox","type": box_type, "args": args_a, "dst": dst.as_u32()}) +} diff --git a/src/runner/mir_json_emit/emitters/control_flow.rs b/src/runner/mir_json_emit/emitters/control_flow.rs new file mode 100644 index 00000000..41d846a2 --- /dev/null +++ b/src/runner/mir_json_emit/emitters/control_flow.rs @@ -0,0 +1,33 @@ +use serde_json::json; + +use crate::mir::{BasicBlockId, MirInstruction, ValueId}; + +pub(crate) fn emit_branch( + condition: &ValueId, + then_bb: &BasicBlockId, + else_bb: &BasicBlockId, +) -> serde_json::Value { + json!({"op":"branch","cond": condition.as_u32(), "then": then_bb.as_u32(), "else": else_bb.as_u32()}) +} + +pub(crate) fn emit_jump(target: &BasicBlockId) -> serde_json::Value { + json!({"op":"jump","target": target.as_u32()}) +} + +pub(crate) fn emit_return(value: Option<&ValueId>) -> serde_json::Value { + json!({"op":"ret","value": value.map(|v| v.as_u32())}) +} + +pub(crate) fn emit_terminator(term: &MirInstruction) -> Option { + match term { + MirInstruction::Return { value } => Some(emit_return(value.as_ref())), + MirInstruction::Jump { target, .. } => Some(emit_jump(target)), + MirInstruction::Branch { + condition, + then_bb, + else_bb, + .. + } => Some(emit_branch(condition, then_bb, else_bb)), + _ => None, + } +} diff --git a/src/runner/mir_json_emit/emitters/mod.rs b/src/runner/mir_json_emit/emitters/mod.rs new file mode 100644 index 00000000..45d80ad2 --- /dev/null +++ b/src/runner/mir_json_emit/emitters/mod.rs @@ -0,0 +1,124 @@ +mod basic; +mod calls; +mod control_flow; +mod phi; +mod weak; + +use crate::mir::MirInstruction as I; + +pub(crate) fn emit_phi_instructions( + func: &crate::mir::MirFunction, + block: &crate::mir::BasicBlock, +) -> Vec { + let mut insts = Vec::new(); + for inst in &block.instructions { + if let I::Phi { .. } = inst { + if let Some(value) = phi::emit_phi(inst, &func.metadata.value_types) { + insts.push(value); + } + } + } + insts +} + +pub(crate) fn emit_non_phi_instructions( + func: &crate::mir::MirFunction, + block: &crate::mir::BasicBlock, + insts: &mut Vec, +) { + for inst in &block.instructions { + if let I::Phi { .. } = inst { + continue; + } + if let Some(value) = emit_instruction(func, inst) { + insts.push(value); + } + } +} + +pub(crate) fn emit_terminator( + terminator: &Option, +) -> Option { + terminator.as_ref().and_then(control_flow::emit_terminator) +} + +fn emit_instruction( + func: &crate::mir::MirFunction, + inst: &crate::mir::MirInstruction, +) -> Option { + match inst { + I::Copy { dst, src } => Some(basic::emit_copy(dst, src)), + I::UnaryOp { dst, op, operand } => Some(basic::emit_unary_op(dst, op, operand)), + I::Const { dst, value } => Some(basic::emit_const(dst, value)), + I::TypeOp { dst, op, value, ty } => { + Some(basic::emit_type_op(dst, op, value, ty)) + } + I::BinOp { dst, op, lhs, rhs } => Some(basic::emit_bin_op( + dst, + op, + lhs, + rhs, + &func.metadata.value_types, + )), + I::Compare { dst, op, lhs, rhs } => Some(basic::emit_compare( + dst, + op, + lhs, + rhs, + &func.metadata.value_types, + )), + I::Call { + dst, + func, + callee, + args, + effects, + .. + } => calls::emit_call(dst, func, callee.as_ref(), args, effects), + I::ExternCall { + dst, + iface_name, + method_name, + args, + .. + } => Some(calls::emit_extern_call( + dst, + iface_name, + method_name, + args, + )), + I::BoxCall { + dst, + box_val, + method, + method_id, + args, + .. + } => Some(calls::emit_box_call( + dst, + box_val, + method, + method_id.as_ref(), + args, + )), + I::NewBox { + dst, + box_type, + args, + } => Some(calls::emit_new_box(dst, box_type, args)), + I::Branch { + condition, + then_bb, + else_bb, + .. + } => Some(control_flow::emit_branch(condition, then_bb, else_bb)), + I::Jump { target, .. } => Some(control_flow::emit_jump(target)), + I::Return { value } => Some(control_flow::emit_return(value.as_ref())), + I::WeakRef { dst, op, value } => Some(weak::emit_weak_ref(dst, op, value)), + I::WeakNew { dst, box_val } => Some(weak::emit_weak_new(dst, box_val)), + I::WeakLoad { dst, weak_ref } => Some(weak::emit_weak_load(dst, weak_ref)), + I::KeepAlive { values } => Some(weak::emit_keep_alive(values)), + I::ReleaseStrong { values } => Some(weak::emit_release_strong(values)), + _ => None, + } +} diff --git a/src/runner/mir_json_emit/emitters/phi.rs b/src/runner/mir_json_emit/emitters/phi.rs new file mode 100644 index 00000000..ed66a40f --- /dev/null +++ b/src/runner/mir_json_emit/emitters/phi.rs @@ -0,0 +1,33 @@ +use serde_json::json; + +use crate::mir::{MirInstruction, MirType}; + +pub(crate) fn emit_phi( + inst: &MirInstruction, + value_types: &std::collections::BTreeMap, +) -> Option { + let MirInstruction::Phi { dst, inputs, .. } = inst else { + return None; + }; + let incoming: Vec<_> = inputs + .iter() + .map(|(b, v)| json!([v.as_u32(), b.as_u32()])) + .collect(); + // Phase 131-11-F: Add dst_type hint from metadata for all PHI instructions + let mut phi_inst = json!({"op":"phi","dst": dst.as_u32(), "incoming": incoming}); + if let Some(dst_type) = value_types.get(dst) { + let type_json = match dst_type { + MirType::Integer => json!("i64"), + MirType::Float => json!("f64"), // Phase 275 P0: Float PHI type + MirType::String => json!({"kind": "string"}), + MirType::Box(bt) => json!({"kind": "handle", "box_type": bt}), + MirType::Bool => json!("i1"), + MirType::Void => json!("void"), + _ => json!(null), + }; + if !type_json.is_null() { + phi_inst["dst_type"] = type_json; + } + } + Some(phi_inst) +} diff --git a/src/runner/mir_json_emit/emitters/weak.rs b/src/runner/mir_json_emit/emitters/weak.rs new file mode 100644 index 00000000..315ca3d3 --- /dev/null +++ b/src/runner/mir_json_emit/emitters/weak.rs @@ -0,0 +1,37 @@ +use serde_json::json; + +use crate::mir::{ValueId, WeakRefOp}; + +pub(crate) fn emit_weak_ref( + dst: &ValueId, + op: &WeakRefOp, + value: &ValueId, +) -> serde_json::Value { + let op_name = match op { + WeakRefOp::New => "weak_new", + WeakRefOp::Load => "weak_load", + }; + let value_field = match op { + WeakRefOp::New => "box_val", + WeakRefOp::Load => "weak_ref", + }; + json!({"op": op_name, "dst": dst.as_u32(), value_field: value.as_u32()}) +} + +pub(crate) fn emit_weak_new(dst: &ValueId, box_val: &ValueId) -> serde_json::Value { + json!({"op":"weak_new","dst": dst.as_u32(), "box_val": box_val.as_u32()}) +} + +pub(crate) fn emit_weak_load(dst: &ValueId, weak_ref: &ValueId) -> serde_json::Value { + json!({"op":"weak_load","dst": dst.as_u32(), "weak_ref": weak_ref.as_u32()}) +} + +pub(crate) fn emit_keep_alive(values: &[ValueId]) -> serde_json::Value { + let values_json: Vec<_> = values.iter().map(|v| json!(v.as_u32())).collect(); + json!({"op":"keepalive","values":values_json}) +} + +pub(crate) fn emit_release_strong(values: &[ValueId]) -> serde_json::Value { + let values_json: Vec<_> = values.iter().map(|v| json!(v.as_u32())).collect(); + json!({"op":"release_strong","values":values_json}) +} diff --git a/src/runner/mir_json_emit/helpers.rs b/src/runner/mir_json_emit/helpers.rs new file mode 100644 index 00000000..175486e1 --- /dev/null +++ b/src/runner/mir_json_emit/helpers.rs @@ -0,0 +1,172 @@ +use crate::mir::definitions::call_unified::Callee; +use serde_json::json; + +/// Helper: Create JSON v1 root with schema information +/// Includes version, capabilities, metadata for advanced MIR features +pub(crate) fn create_json_v1_root(functions: serde_json::Value) -> serde_json::Value { + json!({ + "schema_version": "1.0", + "capabilities": [ + "unified_call", // Phase 15.5: Unified MirCall support + "phi", // SSA Phi functions + "effects", // Effect tracking for optimization + "callee_typing" // Type-safe call target resolution + ], + "metadata": { + "generator": "nyash-rust", + "phase": "15.5", + "build_time": "Phase 15.5 Development", + "features": ["mir_call_unification", "json_v1_schema"] + }, + "functions": functions + }) +} + +/// Helper: Emit unified mir_call JSON (v1 format) +/// Supports all 6 Callee types in a single unified JSON structure +pub(crate) fn emit_unified_mir_call( + dst: Option, + callee: &Callee, + args: &[u32], + effects: &[&str], +) -> serde_json::Value { + let mut call_obj = json!({ + "op": "mir_call", + "dst": dst, + "mir_call": { + "args": args, + "effects": effects, + "flags": {} + } + }); + + // Generate Callee-specific mir_call structure + match callee { + Callee::Global(name) => { + call_obj["mir_call"]["callee"] = json!({ + "type": "Global", + "name": name + }); + } + Callee::Method { + box_name, + method, + receiver, + certainty, + .. + } => { + call_obj["mir_call"]["callee"] = json!({ + "type": "Method", + "box_name": box_name, + "method": method, + "receiver": receiver.map(|v| v.as_u32()), + "certainty": match certainty { crate::mir::definitions::call_unified::TypeCertainty::Known => "Known", crate::mir::definitions::call_unified::TypeCertainty::Union => "Union" } + }); + } + Callee::Constructor { box_type } => { + call_obj["mir_call"]["callee"] = json!({ + "type": "Constructor", + "box_type": box_type + }); + } + Callee::Closure { + params, + captures, + me_capture, + } => { + let captures_json: Vec<_> = captures + .iter() + .map(|(name, vid)| json!([name, vid.as_u32()])) + .collect(); + call_obj["mir_call"]["callee"] = json!({ + "type": "Closure", + "params": params, + "captures": captures_json, + "me_capture": me_capture.map(|v| v.as_u32()) + }); + } + Callee::Value(vid) => { + call_obj["mir_call"]["callee"] = json!({ + "type": "Value", + "function_value": vid.as_u32() + }); + } + Callee::Extern(name) => { + call_obj["mir_call"]["callee"] = json!({ + "type": "Extern", + "name": name + }); + } + } + + call_obj +} + +/// Helper: detect residual numeric-core boxcalls that should have been lowered by AotPrepNumericCoreBox. +/// Currently we only check for `boxcall` with `method:"mul_naive"` which should become +/// `call("NyNumericMatI64.mul_naive", ...)` when NYASH_AOT_NUMERIC_CORE=1 is effective. +#[allow(dead_code)] +pub(crate) fn has_numeric_core_boxcall(root: &serde_json::Value) -> bool { + let funs = match root.get("functions") { + Some(v) => v.as_array().cloned().unwrap_or_default(), + None => return false, + }; + for f in funs { + let blocks = match f.get("blocks").and_then(|b| b.as_array()) { + Some(b) => b, + None => continue, + }; + for b in blocks { + let insts = match b.get("instructions").and_then(|i| i.as_array()) { + Some(i) => i, + None => continue, + }; + for inst in insts { + let op = inst.get("op").and_then(|v| v.as_str()); + let method = inst.get("method").and_then(|v| v.as_str()); + if op == Some("boxcall") && method == Some("mul_naive") { + return true; + } + } + } + } + false +} + +/// Helper: enforce numeric_core invariants when NYASH_AOT_NUMERIC_CORE=1 is set. +/// - Default: emit a warning if mul_naive boxcalls are still present. +/// - Strict: if NYASH_AOT_NUMERIC_CORE_STRICT=1, return Err to fail fast. +#[allow(dead_code)] +pub(crate) fn check_numeric_core_invariants(root: &serde_json::Value) -> Result<(), String> { + let numeric_on = matches!( + std::env::var("NYASH_AOT_NUMERIC_CORE").ok().as_deref(), + Some("1") + ); + if !numeric_on { + return Ok(()); + } + + if !has_numeric_core_boxcall(root) { + return Ok(()); + } + + let strict = matches!( + std::env::var("NYASH_AOT_NUMERIC_CORE_STRICT") + .ok() + .as_deref(), + Some("1") + ); + + eprintln!( + "[mir_json/numeric_core] NYASH_AOT_NUMERIC_CORE=1 but MIR JSON still contains boxcall(\"mul_naive\"). \ +AotPrepNumericCoreBox may not have run or did not match; inspect AotPrep logs or run tools/hakorune_emit_mir.sh with HAKO_SELFHOST_TRACE=1." + ); + + if strict { + return Err( + "NYASH_AOT_NUMERIC_CORE_STRICT=1: numeric_core invariants violated (mul_naive boxcall remains)" + .to_string(), + ); + } + Ok(()) +} diff --git a/src/runner/mir_json_emit/mod.rs b/src/runner/mir_json_emit/mod.rs new file mode 100644 index 00000000..dc18a47e --- /dev/null +++ b/src/runner/mir_json_emit/mod.rs @@ -0,0 +1,132 @@ +use serde_json::json; + +mod emitters; +mod helpers; + +/// Emit MIR JSON for Python harness/PyVM. +/// The JSON schema matches tools/llvmlite_harness.py expectations and is +/// intentionally minimal for initial scaffolding. +/// +/// Phase 15.5: Supports both v0 (legacy separate ops) and v1 (unified mir_call) formats +pub fn emit_mir_json_for_harness( + module: &nyash_rust::mir::MirModule, + path: &std::path::Path, +) -> Result<(), String> { + emit_mir_json(module, path) +} + +/// Variant for the bin crate's local MIR type +pub fn emit_mir_json_for_harness_bin( + module: &crate::mir::MirModule, + path: &std::path::Path, +) -> Result<(), String> { + emit_mir_json(module, path) +} + +fn emit_mir_json(module: &crate::mir::MirModule, path: &std::path::Path) -> Result<(), String> { + use crate::mir::MirType; + + let mut funs = Vec::new(); + for (name, f) in &module.functions { + let mut blocks = Vec::new(); + let mut ids: Vec<_> = f.blocks.keys().copied().collect(); + ids.sort(); + for bid in ids { + if let Some(bb) = f.blocks.get(&bid) { + let mut insts = Vec::new(); + // Phase 131-13: Emit all instructions in MIR order (SSOT principle) + // No reordering except PHI consolidation at block start (LLVM constraint) + + // Step 1: Emit all PHI instructions first (LLVM requirement) + insts.extend(emitters::emit_phi_instructions(f, bb)); + + // Step 2: Emit all non-PHI instructions in MIR order (no reordering!) + emitters::emit_non_phi_instructions(f, bb, &mut insts); + + // Phase 131-13: Terminator emitted inline (no delayed copies) + if let Some(term) = emitters::emit_terminator(&bb.terminator) { + insts.push(term); + } + blocks.push(json!({"id": bid.as_u32(), "instructions": insts})); + } + } + // Export parameter value-ids so a VM can bind arguments + let params: Vec<_> = f.params.iter().map(|v| v.as_u32()).collect(); + + // Phase 131-11-F: Build metadata JSON from MIR metadata (SSOT) + let metadata_json = json!({ + "value_types": f.metadata.value_types.iter().map(|(k, v)| { + let type_str = match v { + MirType::Integer => json!("i64"), + MirType::Float => json!("f64"), // Phase 275 P0: Float type annotation + MirType::String => json!({"kind": "string"}), + MirType::Box(bt) => json!({"kind": "handle", "box_type": bt}), + MirType::Bool => json!("i1"), + MirType::Void => json!("void"), + MirType::Unknown => json!(null), + _ => json!(null), + }; + (k.as_u32().to_string(), type_str) + }).collect::>() + }); + + funs.push(json!({ + "name": name, + "params": params, + "blocks": blocks, + "metadata": metadata_json + })); + } + + // Phase 15.5: JSON v1 schema with environment variable control + let use_v1_schema = std::env::var("NYASH_JSON_SCHEMA_V1").unwrap_or_default() == "1" + || match std::env::var("NYASH_MIR_UNIFIED_CALL") + .ok() + .as_deref() + .map(|s| s.to_ascii_lowercase()) + { + Some(s) if s == "0" || s == "false" || s == "off" => false, + _ => true, + }; + + // Phase 155: Extract CFG information for hako_check + let cfg_info = nyash_rust::mir::extract_cfg_info(module); + + // Phase 285LLVM-1.1: Extract user box declarations for LLVM harness + let user_box_decls: Vec = module + .metadata + .user_box_decls + .iter() + .map(|(name, fields)| { + json!({ + "name": name, + "fields": fields + }) + }) + .collect(); + + let root = if use_v1_schema { + let mut root = helpers::create_json_v1_root(json!(funs)); + // Add CFG data and user box declarations to v1 schema + if let Some(obj) = root.as_object_mut() { + obj.insert("cfg".to_string(), cfg_info); + obj.insert("user_box_decls".to_string(), json!(user_box_decls)); // Phase 285LLVM-1.1 + } + root + } else { + // v0 legacy format - also add CFG and user_box_decls + json!({ + "functions": funs, + "cfg": cfg_info, + "user_box_decls": user_box_decls // Phase 285LLVM-1.1 + }) + }; + + // NOTE: numeric_core strict validation is applied on the AotPrep output + // (tools/hakorune_emit_mir.sh) rather than at raw MIR emit time. This keeps + // pre-AotPrep MIR emission usable even when BoxCall(MatI64, mul_naive) is + // still present. + + std::fs::write(path, serde_json::to_string_pretty(&root).unwrap()) + .map_err(|e| format!("write mir json: {}", e)) +} diff --git a/src/runner/modes/common_util/resolve/strip/merge.rs b/src/runner/modes/common_util/resolve/strip/merge.rs new file mode 100644 index 00000000..c772ed08 --- /dev/null +++ b/src/runner/modes/common_util/resolve/strip/merge.rs @@ -0,0 +1,250 @@ +use crate::runner::NyashRunner; + +use super::prelude::resolve_prelude_paths_profiled; +use super::using::collect_using_and_strip; + +/// Merge prelude ASTs with the main AST into a single Program node. +/// - Collects statements from each prelude Program in order, then appends +/// statements from the main Program. +/// - If the main AST is not a Program, returns it unchanged (defensive). +pub fn merge_prelude_asts_with_main( + prelude_asts: Vec, + main_ast: &nyash_rust::ast::ASTNode, +) -> nyash_rust::ast::ASTNode { + use nyash_rust::ast::{ASTNode, Span}; + let mut combined: Vec = Vec::new(); + for a in prelude_asts.into_iter() { + if let ASTNode::Program { statements, .. } = a { + combined.extend(statements); + } + } + if let ASTNode::Program { statements, .. } = main_ast.clone() { + let mut all = combined; + all.extend(statements); + ASTNode::Program { + statements: all, + span: Span::unknown(), + } + } else { + // Defensive: unexpected shape; preserve main AST unchanged. + main_ast.clone() + } +} + +/// Text-based prelude merge: simpler and faster than AST merge. +/// Recursively resolves using dependencies, strips using lines from each file, +/// and concatenates prelude text followed by main source text. +/// Returns merged source text ready for compilation. +pub fn merge_prelude_text( + runner: &NyashRunner, + source: &str, + filename: &str, +) -> Result { + let trace = crate::config::env::env_bool("NYASH_RESOLVE_TRACE"); + + // First pass: collect and resolve prelude paths + let (cleaned_main, prelude_paths) = resolve_prelude_paths_profiled(runner, source, filename)?; + // Expand nested preludes for text-merge too (DFS) so that any `using` + // inside prelude files (e.g., runner_min -> lower_* boxes) are also + // included even when NYASH_USING_AST is OFF. + let mut expanded: Vec = Vec::new(); + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + for p in prelude_paths.iter() { + dfs_text(runner, p, &mut expanded, &mut seen)?; + } + let prelude_paths = &expanded; + // Record for enriched diagnostics (parse error context) + crate::runner::modes::common_util::resolve::set_last_merged_preludes(prelude_paths.clone()); + + if prelude_paths.is_empty() { + // No using statements, return original + return Ok(source.to_string()); + } + + if trace { + crate::runner::trace::log(format!( + "[using/text-merge] {} prelude files for '{}'", + prelude_paths.len(), + filename + )); + } + + // Build merged text: preludes first, then main source + let mut merged = String::new(); + let mut spans: Vec = Vec::new(); + let mut current_line: usize = 1; + + // Add preludes in DFS order + for (idx, path) in prelude_paths.iter().enumerate() { + // Phase 90-A: fs 系移行 + let ring0 = crate::runtime::ring0::get_global_ring0(); + let content = ring0 + .fs + .read_to_string(std::path::Path::new(path)) + .map_err(|e| format!("using: failed to read '{}': {}", path, e))?; + + // Strip using lines from prelude and normalize + let (cleaned_raw, _nested, _nested_imports) = + collect_using_and_strip(runner, &content, path)?; + let mut cleaned = normalize_text_for_inline(&cleaned_raw); + // Hako-friendly normalize for preludes: always strip leading `local ` at line head + // when the prelude is a .hako (or looks like Hako code). This prevents top-level + // `local` from tripping the Nyash parser after text merge. + if path.ends_with(".hako") + || crate::runner::modes::common_util::hako::looks_like_hako_code(&cleaned) + { + cleaned = crate::runner::modes::common_util::hako::strip_local_decl(&cleaned); + } + + if trace { + crate::runner::trace::log(format!( + "[using/text-merge] [{}] '{}' ({} bytes)", + idx + 1, + path, + cleaned.len() + )); + } + + merged.push_str(&cleaned); + merged.push('\n'); + + let added = cleaned.lines().count(); + if added > 0 { + spans.push(crate::runner::modes::common_util::resolve::LineSpan { + file: path.clone(), + start_line: current_line, + line_count: added, + }); + current_line += added + 1; // +1 for extra '\n' + } else { + current_line += 1; + } + } + + // Add boundary marker if debug mode + if crate::config::env::env_bool("NYASH_RESOLVE_SEAM_DEBUG") { + merged.push_str("\n/* --- using prelude/main boundary --- */\n\n"); + let boundary_lines = 3usize; + spans.push(crate::runner::modes::common_util::resolve::LineSpan { + file: "".to_string(), + start_line: current_line, + line_count: boundary_lines, + }); + current_line += boundary_lines; + } + + // Add main source (already cleaned of using lines) and normalize + let mut cleaned_main_norm = normalize_text_for_inline(&cleaned_main); + // Hako-friendly normalize for main: always strip leading `local ` at line head + // when the merged main looks like Hako code (or file is .hako as a heuristic). + if filename.ends_with(".hako") + || crate::runner::modes::common_util::hako::looks_like_hako_code(&cleaned_main_norm) + { + cleaned_main_norm = + crate::runner::modes::common_util::hako::strip_local_decl(&cleaned_main_norm); + } + merged.push_str(&cleaned_main_norm); + let main_lines = cleaned_main_norm.lines().count(); + if main_lines > 0 { + spans.push(crate::runner::modes::common_util::resolve::LineSpan { + file: filename.to_string(), + start_line: current_line, + line_count: main_lines, + }); + current_line += main_lines; + } + let _ = current_line; + + if trace { + crate::runner::trace::log(format!( + "[using/text-merge] final merged: {} bytes ({} prelude + {} main)", + merged.len(), + merged.len() - cleaned_main.len(), + cleaned_main.len() + )); + } + + // Optional dump of merged text for diagnostics + if let Ok(dump_path) = std::env::var("NYASH_RESOLVE_DUMP_MERGED") { + if !dump_path.is_empty() { + let _ = std::fs::write(&dump_path, &merged); + } + } + + crate::runner::modes::common_util::resolve::set_last_text_merge_line_spans(spans); + + Ok(normalize_text_for_inline(&merged)) +} + +fn canonize(p: &str) -> String { + std::fs::canonicalize(p) + .ok() + .map(|pb| pb.to_string_lossy().to_string()) + .unwrap_or_else(|| p.to_string()) +} + +fn dfs_text( + runner: &NyashRunner, + path: &str, + out: &mut Vec, + seen: &mut std::collections::HashSet, +) -> Result<(), String> { + let key = canonize(path); + if !seen.insert(key.clone()) { + return Ok(()); + } + // Phase 90-A: fs 系移行 + let ring0 = crate::runtime::ring0::get_global_ring0(); + let src = ring0 + .fs + .read_to_string(std::path::Path::new(path)) + .map_err(|e| format!("using: failed to read '{}': {}", path, e))?; + let (_cleaned, nested, _nested_imports) = collect_using_and_strip(runner, &src, path)?; + for n in nested.iter() { + dfs_text(runner, n, out, seen)?; + } + out.push(key); + Ok(()) +} + +/// Minimal normalization to improve inline parser robustness. +/// - Normalize CRLF to LF +/// - Remove redundant semicolons before closing braces (`; }` → `}`) +/// - Ensure file ends with a newline +fn normalize_text_for_inline(s: &str) -> String { + let mut out = s.replace("\r\n", "\n").replace("\r", "\n"); + // Remove `;` before `}` across line boundaries conservatively + // pattern: `;` followed by optional spaces/newlines then `}` + // Do a few passes to cover nested cases without regex + for _ in 0..2 { + let mut tmp = String::with_capacity(out.len()); + let bytes = out.as_bytes(); + let mut i = 0usize; + while i < bytes.len() { + if bytes[i] == b';' { + // peek ahead skipping spaces/newlines + let mut j = i + 1; + while j < bytes.len() { + let c = bytes[j]; + if c == b' ' || c == b'\t' || c == b'\n' { + j += 1; + } else { + break; + } + } + if j < bytes.len() && bytes[j] == b'}' { + // drop ';' (do not advance j here) + i += 1; + continue; + } + } + tmp.push(bytes[i] as char); + i += 1; + } + out = tmp; + } + if !out.ends_with('\n') { + out.push('\n'); + } + out +} diff --git a/src/runner/modes/common_util/resolve/strip/mod.rs b/src/runner/modes/common_util/resolve/strip/mod.rs new file mode 100644 index 00000000..c60428b1 --- /dev/null +++ b/src/runner/modes/common_util/resolve/strip/mod.rs @@ -0,0 +1,9 @@ +mod merge; +mod preexpand; +mod prelude; +mod using; + +pub use merge::{merge_prelude_asts_with_main, merge_prelude_text}; +pub use preexpand::preexpand_at_local; +pub use prelude::{parse_preludes_to_asts, resolve_prelude_paths_profiled}; +pub use using::collect_using_and_strip; diff --git a/src/runner/modes/common_util/resolve/strip/preexpand.rs b/src/runner/modes/common_util/resolve/strip/preexpand.rs new file mode 100644 index 00000000..c352b946 --- /dev/null +++ b/src/runner/modes/common_util/resolve/strip/preexpand.rs @@ -0,0 +1,66 @@ +/// Pre-expand line-head `@name[: Type] = expr` into `local name[: Type] = expr`. +/// Minimal, safe, no semantics change. Applies only at line head (after spaces/tabs). +pub fn preexpand_at_local(src: &str) -> String { + let mut out = String::with_capacity(src.len()); + for line in src.lines() { + let bytes = line.as_bytes(); + let mut i = 0; + while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') { + i += 1; + } + if i < bytes.len() && bytes[i] == b'@' { + // parse identifier + let mut j = i + 1; + if j < bytes.len() && ((bytes[j] as char).is_ascii_alphabetic() || bytes[j] == b'_') { + j += 1; + while j < bytes.len() { + let c = bytes[j] as char; + if c.is_ascii_alphanumeric() || c == '_' { + j += 1; + } else { + break; + } + } + let mut k = j; + while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { + k += 1; + } + if k < bytes.len() && bytes[k] == b':' { + k += 1; + while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { + k += 1; + } + if k < bytes.len() + && ((bytes[k] as char).is_ascii_alphabetic() || bytes[k] == b'_') + { + k += 1; + while k < bytes.len() { + let c = bytes[k] as char; + if c.is_ascii_alphanumeric() || c == '_' { + k += 1; + } else { + break; + } + } + } + } + let mut eqp = k; + while eqp < bytes.len() && (bytes[eqp] == b' ' || bytes[eqp] == b'\t') { + eqp += 1; + } + if eqp < bytes.len() && bytes[eqp] == b'=' { + out.push_str(&line[..i]); + out.push_str("local "); + out.push_str(&line[i + 1..eqp]); + out.push_str(" ="); + out.push_str(&line[eqp + 1..]); + out.push('\n'); + continue; + } + } + } + out.push_str(line); + out.push('\n'); + } + out +} diff --git a/src/runner/modes/common_util/resolve/strip/prelude.rs b/src/runner/modes/common_util/resolve/strip/prelude.rs new file mode 100644 index 00000000..70b5df98 --- /dev/null +++ b/src/runner/modes/common_util/resolve/strip/prelude.rs @@ -0,0 +1,270 @@ +use crate::runner::NyashRunner; + +use super::using::collect_using_and_strip; + +/// Profile-aware prelude resolution wrapper (single entrypoint). +/// - Delegates to `collect_using_and_strip` for the first pass. +/// - When AST using is enabled, resolves nested preludes via DFS and injects +/// OperatorBox preludes when available (stringify/compare/add). +/// - All runners call this helper; do not fork resolution logic elsewhere. +pub fn resolve_prelude_paths_profiled( + runner: &NyashRunner, + code: &str, + filename: &str, +) -> Result<(String, Vec), String> { + // First pass: strip using from the main source and collect direct prelude paths + let (cleaned, direct, _imports) = collect_using_and_strip(runner, code, filename)?; + // Recursively collect nested preludes (DFS) for both AST/text merges. + // Rationale: even when we merge via text, nested `using` inside preludes + // must be discovered so that their definitions are present at runtime + // (e.g., runner_min -> lower_* boxes). Previously this only ran when + // NYASH_USING_AST=1, which caused unresolved calls in inline flows. + let _ast_on = crate::config::env::env_bool("NYASH_USING_AST"); + let mut out: Vec = Vec::new(); + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + + for p in direct.iter() { + dfs(runner, p, &mut out, &mut seen)?; + } + // Operator Boxes prelude injection(観測“常時ON”のため) + // stringify/compare/add は常に注入(存在時)。その他(bitwise等)は ALL 指定時のみ。 + let opbox_all = crate::config::env::env_bool("NYASH_OPERATOR_BOX_ALL") + || crate::config::env::env_bool("NYASH_BUILDER_OPERATOR_BOX_ALL_CALL"); + + if let Ok(root) = std::env::var("NYASH_ROOT") { + let must_have = [ + "apps/lib/std/operators/stringify.hako", + "apps/lib/std/operators/compare.hako", + "apps/lib/std/operators/add.hako", + ]; + for rel in must_have.iter() { + let p = std::path::Path::new(&root).join(rel); + if p.exists() { + let path = p.to_string_lossy().to_string(); + if !out.iter().any(|x| x == &path) { + out.push(path); + } + } + } + } + // Inject remaining arithmetic/bitwise/unary operator modules when ALL is requested + if opbox_all { + if let Ok(root) = std::env::var("NYASH_ROOT") { + let rels = vec![ + "apps/lib/std/operators/sub.hako", + "apps/lib/std/operators/mul.hako", + "apps/lib/std/operators/div.hako", + "apps/lib/std/operators/mod.hako", + // Shifts / bitwise (parser tokens now supported) + "apps/lib/std/operators/shl.hako", + "apps/lib/std/operators/shr.hako", + "apps/lib/std/operators/bitand.hako", + "apps/lib/std/operators/bitor.hako", + "apps/lib/std/operators/bitxor.hako", + "apps/lib/std/operators/neg.hako", + "apps/lib/std/operators/not.hako", + "apps/lib/std/operators/bitnot.hako", + ]; + for rel in rels { + let p = std::path::Path::new(&root).join(rel); + if p.exists() { + let path = p.to_string_lossy().to_string(); + if !out.iter().any(|x| x == &path) { + out.push(path); + } + } + } + } + } + // If AST merge is disabled, still return the discovered nested prelude list + // so that the text merger can inline all dependencies. This keeps behavior + // consistent across strategies and fixes nested `using` resolution. + Ok((cleaned, out)) +} + +/// Parse prelude source files into ASTs (single helper for all runner modes). +/// - Reads each path, strips nested `using`, and parses to AST. +/// - Returns a Vec of Program ASTs (one per prelude file), preserving DFS order. +pub fn parse_preludes_to_asts( + runner: &NyashRunner, + prelude_paths: &[String], +) -> Result, String> { + let debug = crate::config::env::env_bool("NYASH_STRIP_DEBUG"); + if debug { + eprintln!( + "[strip-debug] parse_preludes_to_asts: {} files total", + prelude_paths.len() + ); + for (idx, p) in prelude_paths.iter().enumerate() { + eprintln!("[strip-debug] [{}] {}", idx, p); + } + } + let mut out: Vec = Vec::with_capacity(prelude_paths.len()); + for (idx, prelude_path) in prelude_paths.iter().enumerate() { + if debug { + eprintln!( + "[strip-debug] [{}/{}] Processing: {}", + idx + 1, + prelude_paths.len(), + prelude_path + ); + } + // Phase 90-A: fs 系移行 + let ring0 = crate::runtime::ring0::get_global_ring0(); + let src = ring0 + .fs + .read_to_string(std::path::Path::new(prelude_path)) + .map_err(|e| format!("using: error reading {}: {}", prelude_path, e))?; + let (clean_src, _nested, _nested_imports) = + collect_using_and_strip(runner, &src, prelude_path)?; + + // IMPORTANT: Do not attempt to AST-parse .hako preludes here. + // .hako is Hakorune surface, not Nyash AST. VM/VM-fallback paths + // will route to text-merge when any prelude is .hako. + if prelude_path.ends_with(".hako") { + if debug { + eprintln!( + "[strip-debug] skip AST parse for .hako prelude: {}", + prelude_path + ); + } + continue; + } + + let clean_src = clean_src; + + // Debug: dump clean_src if NYASH_STRIP_DEBUG=1 + if debug { + eprintln!( + "[strip-debug] [{}/{}] About to parse: {}", + idx + 1, + prelude_paths.len(), + prelude_path + ); + eprintln!( + "[strip-debug] clean_src first 500 chars:\n{}\n---", + &clean_src.chars().take(500).collect::() + ); + } + + match crate::parser::NyashParser::parse_from_string(&clean_src) { + Ok(ast) => { + if debug { + eprintln!( + "[strip-debug] [{}/{}] ✅ Parse SUCCESS: {}", + idx + 1, + prelude_paths.len(), + prelude_path + ); + } + out.push(ast) + } + Err(e) => { + // Always output debug info on parse failure if NYASH_STRIP_DEBUG=1 + let debug = crate::config::env::env_bool("NYASH_STRIP_DEBUG"); + eprintln!( + "[strip-debug] Parse FAILED for: {} (debug={})", + prelude_path, debug + ); + if debug { + eprintln!("[strip-debug] Error: {}", e); + let es = format!("{}", e); + let lines: Vec<&str> = clean_src.lines().collect(); + eprintln!("[strip-debug] Total lines: {}", lines.len()); + // Try to extract error line number (e.g., "at line 451") and show local context + let mut printed = false; + if let Some(pos) = es.rfind("line ") { + let mut j = pos + 5; // after "line " + let bytes = es.as_bytes(); + let mut n: usize = 0; + let mut had = false; + while j < bytes.len() { + let c = bytes[j]; + if c >= b'0' && c <= b'9' { + n = n * 10 + (c - b'0') as usize; + j += 1; + had = true; + } else { + break; + } + } + if had { + let ln = if n == 0 { 1 } else { n }; + let from = ln.saturating_sub(3); + let to = std::cmp::min(lines.len(), ln + 3); + eprintln!( + "[strip-debug] Context around line {} ({}..={}):", + ln, + from.max(1), + to + ); + for i in from.max(1)..=to { + let mark = if i == ln { ">>" } else { " " }; + if let Some(line) = lines.get(i - 1) { + eprintln!("{} {:4}: {}", mark, i, line); + } + } + printed = true; + } + } + if !printed { + eprintln!("[strip-debug] Lines 15-25:"); + for (idx, line) in lines.iter().enumerate().skip(14).take(11) { + eprintln!(" {:3}: {}", idx + 1, line); + } + } + eprintln!("[strip-debug] Full clean_src:\n{}\n---", clean_src); + } + return Err(format!( + "Parse error in using prelude {}: {}", + prelude_path, e + )); + } + } + } + if debug { + eprintln!( + "[strip-debug] parse_preludes_to_asts: ✅ All {} files parsed successfully", + out.len() + ); + } + Ok(out) +} + +fn normalize_path(path: &str) -> (String, String) { + use std::path::PathBuf; + match PathBuf::from(path).canonicalize() { + Ok(canon) => { + let s = canon.to_string_lossy().to_string(); + (s.clone(), s) + } + Err(_) => { + // Fall back to the original path representation. + (path.to_string(), path.to_string()) + } + } +} + +fn dfs( + runner: &NyashRunner, + path: &str, + out: &mut Vec, + seen: &mut std::collections::HashSet, +) -> Result<(), String> { + let (key, real_path) = normalize_path(path); + if !seen.insert(key.clone()) { + return Ok(()); + } + // Phase 90-A: fs 系移行 + let ring0 = crate::runtime::ring0::get_global_ring0(); + let src = ring0 + .fs + .read_to_string(std::path::Path::new(&real_path)) + .map_err(|e| format!("using: failed to read '{}': {}", real_path, e))?; + let (_cleaned, nested, _nested_imports) = collect_using_and_strip(runner, &src, &real_path)?; + for n in nested.iter() { + dfs(runner, n, out, seen)?; + } + out.push(real_path); + Ok(()) +} diff --git a/src/runner/modes/common_util/resolve/strip.rs b/src/runner/modes/common_util/resolve/strip/using.rs similarity index 52% rename from src/runner/modes/common_util/resolve/strip.rs rename to src/runner/modes/common_util/resolve/strip/using.rs index 50b1ff05..99c33b09 100644 --- a/src/runner/modes/common_util/resolve/strip.rs +++ b/src/runner/modes/common_util/resolve/strip/using.rs @@ -29,6 +29,44 @@ pub fn collect_using_and_strip( std::collections::HashMap::new(), )); } + let plan = plan_using_strip(runner, code, filename)?; + Ok(apply_using_strip_plan(plan)) +} + +struct UsingStripPlan { + kept_lines: Vec, + kept_len: usize, + prelude_paths: Vec, + imports: std::collections::HashMap, +} + +fn apply_using_strip_plan( + plan: UsingStripPlan, +) -> ( + String, + Vec, + std::collections::HashMap, +) { + let mut out = String::with_capacity(plan.kept_len + 64); + for line in plan.kept_lines { + out.push_str(&line); + out.push('\n'); + } + // Optional prelude boundary comment (helps manual inspection; parser ignores comments) + if crate::config::env::env_bool("NYASH_RESOLVE_SEAM_DEBUG") { + let mut with_marker = String::with_capacity(out.len() + 64); + with_marker.push_str("\n/* --- using boundary (AST) --- */\n"); + with_marker.push_str(&out); + out = with_marker; + } + (out, plan.prelude_paths, plan.imports) +} + +fn plan_using_strip( + runner: &NyashRunner, + code: &str, + filename: &str, +) -> Result { let using_ctx = runner.init_using_context(); let prod = crate::config::env::using_is_prod(); let strict = crate::config::env::env_bool("NYASH_USING_STRICT"); @@ -36,7 +74,8 @@ pub fn collect_using_and_strip( crate::config::env::cli_verbose() || crate::config::env::env_bool("NYASH_RESOLVE_TRACE"); let ctx_dir = std::path::Path::new(filename).parent(); - let mut out = String::with_capacity(code.len()); + let mut kept_lines: Vec = Vec::new(); + let mut kept_len: usize = 0; let mut prelude_paths: Vec = Vec::new(); // Duplicate-using detection (same target imported multiple times or alias rebound): error in all profiles use std::collections::HashMap; @@ -463,7 +502,13 @@ pub fn collect_using_and_strip( prev_line )); } else { - seen_paths.insert(canon.clone(), (alias_name.clone().unwrap_or_else(|| "".into()), line_no)); + seen_paths.insert( + canon.clone(), + ( + alias_name.clone().unwrap_or_else(|| "".into()), + line_no, + ), + ); } if let Some(alias) = alias_name.clone() { if let Some((prev_path, prev_line)) = seen_aliases.get(&alias) { @@ -485,15 +530,8 @@ pub fn collect_using_and_strip( } continue; } - out.push_str(line); - out.push('\n'); - } - // Optional prelude boundary comment (helps manual inspection; parser ignores comments) - if crate::config::env::env_bool("NYASH_RESOLVE_SEAM_DEBUG") { - let mut with_marker = String::with_capacity(out.len() + 64); - with_marker.push_str("\n/* --- using boundary (AST) --- */\n"); - with_marker.push_str(&out); - out = with_marker; + kept_len += line.len() + 1; + kept_lines.push(line.to_string()); } // Phase 21.8: Build imports map from seen_aliases (alias -> alias for now) // This provides the MirBuilder with information about which names are valid static box references @@ -501,581 +539,10 @@ pub fn collect_using_and_strip( for (alias, _) in seen_aliases.iter() { imports.insert(alias.clone(), alias.clone()); } - Ok((out, prelude_paths, imports)) -} - -/// Profile-aware prelude resolution wrapper (single entrypoint). -/// - Delegates to `collect_using_and_strip` for the first pass. -/// - When AST using is enabled, resolves nested preludes via DFS and injects -/// OperatorBox preludes when available (stringify/compare/add). -/// - All runners call this helper; do not fork resolution logic elsewhere. -pub fn resolve_prelude_paths_profiled( - runner: &NyashRunner, - code: &str, - filename: &str, -) -> Result<(String, Vec), String> { - // First pass: strip using from the main source and collect direct prelude paths - let (cleaned, direct, _imports) = collect_using_and_strip(runner, code, filename)?; - // Recursively collect nested preludes (DFS) for both AST/text merges. - // Rationale: even when we merge via text, nested `using` inside preludes - // must be discovered so that their definitions are present at runtime - // (e.g., runner_min -> lower_* boxes). Previously this only ran when - // NYASH_USING_AST=1, which caused unresolved calls in inline flows. - let _ast_on = crate::config::env::env_bool("NYASH_USING_AST"); - let mut out: Vec = Vec::new(); - let mut seen: std::collections::HashSet = std::collections::HashSet::new(); - fn normalize_path(path: &str) -> (String, String) { - use std::path::PathBuf; - match PathBuf::from(path).canonicalize() { - Ok(canon) => { - let s = canon.to_string_lossy().to_string(); - (s.clone(), s) - } - Err(_) => { - // Fall back to the original path representation. - (path.to_string(), path.to_string()) - } - } - } - fn dfs( - runner: &NyashRunner, - path: &str, - out: &mut Vec, - seen: &mut std::collections::HashSet, - ) -> Result<(), String> { - let (key, real_path) = normalize_path(path); - if !seen.insert(key.clone()) { - return Ok(()); - } - // Phase 90-A: fs 系移行 - let ring0 = crate::runtime::ring0::get_global_ring0(); - let src = ring0 - .fs - .read_to_string(std::path::Path::new(&real_path)) - .map_err(|e| format!("using: failed to read '{}': {}", real_path, e))?; - let (_cleaned, nested, _nested_imports) = - collect_using_and_strip(runner, &src, &real_path)?; - for n in nested.iter() { - dfs(runner, n, out, seen)?; - } - out.push(real_path); - Ok(()) - } - for p in direct.iter() { - dfs(runner, p, &mut out, &mut seen)?; - } - // Operator Boxes prelude injection(観測“常時ON”のため) - // stringify/compare/add は常に注入(存在時)。その他(bitwise等)は ALL 指定時のみ。 - let opbox_all = crate::config::env::env_bool("NYASH_OPERATOR_BOX_ALL") - || crate::config::env::env_bool("NYASH_BUILDER_OPERATOR_BOX_ALL_CALL"); - - if let Ok(root) = std::env::var("NYASH_ROOT") { - let must_have = [ - "apps/lib/std/operators/stringify.hako", - "apps/lib/std/operators/compare.hako", - "apps/lib/std/operators/add.hako", - ]; - for rel in must_have.iter() { - let p = std::path::Path::new(&root).join(rel); - if p.exists() { - let path = p.to_string_lossy().to_string(); - if !out.iter().any(|x| x == &path) { - out.push(path); - } - } - } - } - // Inject remaining arithmetic/bitwise/unary operator modules when ALL is requested - if opbox_all { - if let Ok(root) = std::env::var("NYASH_ROOT") { - let rels = vec![ - "apps/lib/std/operators/sub.hako", - "apps/lib/std/operators/mul.hako", - "apps/lib/std/operators/div.hako", - "apps/lib/std/operators/mod.hako", - // Shifts / bitwise (parser tokens now supported) - "apps/lib/std/operators/shl.hako", - "apps/lib/std/operators/shr.hako", - "apps/lib/std/operators/bitand.hako", - "apps/lib/std/operators/bitor.hako", - "apps/lib/std/operators/bitxor.hako", - "apps/lib/std/operators/neg.hako", - "apps/lib/std/operators/not.hako", - "apps/lib/std/operators/bitnot.hako", - ]; - for rel in rels { - let p = std::path::Path::new(&root).join(rel); - if p.exists() { - let path = p.to_string_lossy().to_string(); - if !out.iter().any(|x| x == &path) { - out.push(path); - } - } - } - } - } - // If AST merge is disabled, still return the discovered nested prelude list - // so that the text merger can inline all dependencies. This keeps behavior - // consistent across strategies and fixes nested `using` resolution. - Ok((cleaned, out)) -} - -/// Parse prelude source files into ASTs (single helper for all runner modes). -/// - Reads each path, strips nested `using`, and parses to AST. -/// - Returns a Vec of Program ASTs (one per prelude file), preserving DFS order. -pub fn parse_preludes_to_asts( - runner: &NyashRunner, - prelude_paths: &[String], -) -> Result, String> { - let debug = crate::config::env::env_bool("NYASH_STRIP_DEBUG"); - if debug { - eprintln!( - "[strip-debug] parse_preludes_to_asts: {} files total", - prelude_paths.len() - ); - for (idx, p) in prelude_paths.iter().enumerate() { - eprintln!("[strip-debug] [{}] {}", idx, p); - } - } - let mut out: Vec = Vec::with_capacity(prelude_paths.len()); - for (idx, prelude_path) in prelude_paths.iter().enumerate() { - if debug { - eprintln!( - "[strip-debug] [{}/{}] Processing: {}", - idx + 1, - prelude_paths.len(), - prelude_path - ); - } - // Phase 90-A: fs 系移行 - let ring0 = crate::runtime::ring0::get_global_ring0(); - let src = ring0 - .fs - .read_to_string(std::path::Path::new(prelude_path)) - .map_err(|e| format!("using: error reading {}: {}", prelude_path, e))?; - let (clean_src, _nested, _nested_imports) = - collect_using_and_strip(runner, &src, prelude_path)?; - - // IMPORTANT: Do not attempt to AST-parse .hako preludes here. - // .hako is Hakorune surface, not Nyash AST. VM/VM-fallback paths - // will route to text-merge when any prelude is .hako. - if prelude_path.ends_with(".hako") { - if debug { - eprintln!( - "[strip-debug] skip AST parse for .hako prelude: {}", - prelude_path - ); - } - continue; - } - - let clean_src = clean_src; - - // Debug: dump clean_src if NYASH_STRIP_DEBUG=1 - if debug { - eprintln!( - "[strip-debug] [{}/{}] About to parse: {}", - idx + 1, - prelude_paths.len(), - prelude_path - ); - eprintln!( - "[strip-debug] clean_src first 500 chars:\n{}\n---", - &clean_src.chars().take(500).collect::() - ); - } - - match crate::parser::NyashParser::parse_from_string(&clean_src) { - Ok(ast) => { - if debug { - eprintln!( - "[strip-debug] [{}/{}] ✅ Parse SUCCESS: {}", - idx + 1, - prelude_paths.len(), - prelude_path - ); - } - out.push(ast) - } - Err(e) => { - // Always output debug info on parse failure if NYASH_STRIP_DEBUG=1 - let debug = crate::config::env::env_bool("NYASH_STRIP_DEBUG"); - eprintln!( - "[strip-debug] Parse FAILED for: {} (debug={})", - prelude_path, debug - ); - if debug { - eprintln!("[strip-debug] Error: {}", e); - let es = format!("{}", e); - let lines: Vec<&str> = clean_src.lines().collect(); - eprintln!("[strip-debug] Total lines: {}", lines.len()); - // Try to extract error line number (e.g., "at line 451") and show local context - let mut printed = false; - if let Some(pos) = es.rfind("line ") { - let mut j = pos + 5; // after "line " - let bytes = es.as_bytes(); - let mut n: usize = 0; - let mut had = false; - while j < bytes.len() { - let c = bytes[j]; - if c >= b'0' && c <= b'9' { - n = n * 10 + (c - b'0') as usize; - j += 1; - had = true; - } else { - break; - } - } - if had { - let ln = if n == 0 { 1 } else { n }; - let from = ln.saturating_sub(3); - let to = std::cmp::min(lines.len(), ln + 3); - eprintln!( - "[strip-debug] Context around line {} ({}..={}):", - ln, - from.max(1), - to - ); - for i in from.max(1)..=to { - let mark = if i == ln { ">>" } else { " " }; - if let Some(line) = lines.get(i - 1) { - eprintln!("{} {:4}: {}", mark, i, line); - } - } - printed = true; - } - } - if !printed { - eprintln!("[strip-debug] Lines 15-25:"); - for (idx, line) in lines.iter().enumerate().skip(14).take(11) { - eprintln!(" {:3}: {}", idx + 1, line); - } - } - eprintln!("[strip-debug] Full clean_src:\n{}\n---", clean_src); - } - return Err(format!( - "Parse error in using prelude {}: {}", - prelude_path, e - )); - } - } - } - if debug { - eprintln!( - "[strip-debug] parse_preludes_to_asts: ✅ All {} files parsed successfully", - out.len() - ); - } - Ok(out) -} - -/// Merge prelude ASTs with the main AST into a single Program node. -/// - Collects statements from each prelude Program in order, then appends -/// statements from the main Program. -/// - If the main AST is not a Program, returns it unchanged (defensive). -pub fn merge_prelude_asts_with_main( - prelude_asts: Vec, - main_ast: &nyash_rust::ast::ASTNode, -) -> nyash_rust::ast::ASTNode { - use nyash_rust::ast::{ASTNode, Span}; - let mut combined: Vec = Vec::new(); - for a in prelude_asts.into_iter() { - if let ASTNode::Program { statements, .. } = a { - combined.extend(statements); - } - } - if let ASTNode::Program { statements, .. } = main_ast.clone() { - let mut all = combined; - all.extend(statements); - ASTNode::Program { - statements: all, - span: Span::unknown(), - } - } else { - // Defensive: unexpected shape; preserve main AST unchanged. - main_ast.clone() - } -} - -/// Pre-expand line-head `@name[: Type] = expr` into `local name[: Type] = expr`. -/// Minimal, safe, no semantics change. Applies only at line head (after spaces/tabs). -pub fn preexpand_at_local(src: &str) -> String { - let mut out = String::with_capacity(src.len()); - for line in src.lines() { - let bytes = line.as_bytes(); - let mut i = 0; - while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') { - i += 1; - } - if i < bytes.len() && bytes[i] == b'@' { - // parse identifier - let mut j = i + 1; - if j < bytes.len() && ((bytes[j] as char).is_ascii_alphabetic() || bytes[j] == b'_') { - j += 1; - while j < bytes.len() { - let c = bytes[j] as char; - if c.is_ascii_alphanumeric() || c == '_' { - j += 1; - } else { - break; - } - } - let mut k = j; - while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { - k += 1; - } - if k < bytes.len() && bytes[k] == b':' { - k += 1; - while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { - k += 1; - } - if k < bytes.len() - && ((bytes[k] as char).is_ascii_alphabetic() || bytes[k] == b'_') - { - k += 1; - while k < bytes.len() { - let c = bytes[k] as char; - if c.is_ascii_alphanumeric() || c == '_' { - k += 1; - } else { - break; - } - } - } - } - let mut eqp = k; - while eqp < bytes.len() && (bytes[eqp] == b' ' || bytes[eqp] == b'\t') { - eqp += 1; - } - if eqp < bytes.len() && bytes[eqp] == b'=' { - out.push_str(&line[..i]); - out.push_str("local "); - out.push_str(&line[i + 1..eqp]); - out.push_str(" ="); - out.push_str(&line[eqp + 1..]); - out.push('\n'); - continue; - } - } - } - out.push_str(line); - out.push('\n'); - } - out -} - -/// Text-based prelude merge: simpler and faster than AST merge. -/// Recursively resolves using dependencies, strips using lines from each file, -/// and concatenates prelude text followed by main source text. -/// Returns merged source text ready for compilation. -pub fn merge_prelude_text( - runner: &NyashRunner, - source: &str, - filename: &str, -) -> Result { - let trace = crate::config::env::env_bool("NYASH_RESOLVE_TRACE"); - - // First pass: collect and resolve prelude paths - let (cleaned_main, prelude_paths) = resolve_prelude_paths_profiled(runner, source, filename)?; - // Expand nested preludes for text-merge too (DFS) so that any `using` - // inside prelude files (e.g., runner_min -> lower_* boxes) are also - // included even when NYASH_USING_AST is OFF. - let mut expanded: Vec = Vec::new(); - let mut seen: std::collections::HashSet = std::collections::HashSet::new(); - fn canonize(p: &str) -> String { - std::fs::canonicalize(p) - .ok() - .map(|pb| pb.to_string_lossy().to_string()) - .unwrap_or_else(|| p.to_string()) - } - fn dfs_text( - runner: &NyashRunner, - path: &str, - out: &mut Vec, - seen: &mut std::collections::HashSet, - ) -> Result<(), String> { - let key = canonize(path); - if !seen.insert(key.clone()) { - return Ok(()); - } - // Phase 90-A: fs 系移行 - let ring0 = crate::runtime::ring0::get_global_ring0(); - let src = ring0 - .fs - .read_to_string(std::path::Path::new(path)) - .map_err(|e| format!("using: failed to read '{}': {}", path, e))?; - let (_cleaned, nested, _nested_imports) = collect_using_and_strip(runner, &src, path)?; - for n in nested.iter() { - dfs_text(runner, n, out, seen)?; - } - out.push(key); - Ok(()) - } - for p in prelude_paths.iter() { - dfs_text(runner, p, &mut expanded, &mut seen)?; - } - let prelude_paths = &expanded; - // Record for enriched diagnostics (parse error context) - crate::runner::modes::common_util::resolve::set_last_merged_preludes(prelude_paths.clone()); - - if prelude_paths.is_empty() { - // No using statements, return original - return Ok(source.to_string()); - } - - if trace { - crate::runner::trace::log(format!( - "[using/text-merge] {} prelude files for '{}'", - prelude_paths.len(), - filename - )); - } - - // Build merged text: preludes first, then main source - let mut merged = String::new(); - let mut spans: Vec = Vec::new(); - let mut current_line: usize = 1; - - // Add preludes in DFS order - for (idx, path) in prelude_paths.iter().enumerate() { - // Phase 90-A: fs 系移行 - let ring0 = crate::runtime::ring0::get_global_ring0(); - let content = ring0 - .fs - .read_to_string(std::path::Path::new(path)) - .map_err(|e| format!("using: failed to read '{}': {}", path, e))?; - - // Strip using lines from prelude and normalize - let (cleaned_raw, _nested, _nested_imports) = - collect_using_and_strip(runner, &content, path)?; - let mut cleaned = normalize_text_for_inline(&cleaned_raw); - // Hako-friendly normalize for preludes: always strip leading `local ` at line head - // when the prelude is a .hako (or looks like Hako code). This prevents top-level - // `local` from tripping the Nyash parser after text merge. - if path.ends_with(".hako") - || crate::runner::modes::common_util::hako::looks_like_hako_code(&cleaned) - { - cleaned = crate::runner::modes::common_util::hako::strip_local_decl(&cleaned); - } - - if trace { - crate::runner::trace::log(format!( - "[using/text-merge] [{}] '{}' ({} bytes)", - idx + 1, - path, - cleaned.len() - )); - } - - merged.push_str(&cleaned); - merged.push('\n'); - - let added = cleaned.lines().count(); - if added > 0 { - spans.push(crate::runner::modes::common_util::resolve::LineSpan { - file: path.clone(), - start_line: current_line, - line_count: added, - }); - current_line += added + 1; // +1 for extra '\n' - } else { - current_line += 1; - } - } - - // Add boundary marker if debug mode - if crate::config::env::env_bool("NYASH_RESOLVE_SEAM_DEBUG") { - merged.push_str("\n/* --- using prelude/main boundary --- */\n\n"); - let boundary_lines = 3usize; - spans.push(crate::runner::modes::common_util::resolve::LineSpan { - file: "".to_string(), - start_line: current_line, - line_count: boundary_lines, - }); - current_line += boundary_lines; - } - - // Add main source (already cleaned of using lines) and normalize - let mut cleaned_main_norm = normalize_text_for_inline(&cleaned_main); - // Hako-friendly normalize for main: always strip leading `local ` at line head - // when the merged main looks like Hako code (or file is .hako as a heuristic). - if filename.ends_with(".hako") - || crate::runner::modes::common_util::hako::looks_like_hako_code(&cleaned_main_norm) - { - cleaned_main_norm = - crate::runner::modes::common_util::hako::strip_local_decl(&cleaned_main_norm); - } - merged.push_str(&cleaned_main_norm); - let main_lines = cleaned_main_norm.lines().count(); - if main_lines > 0 { - spans.push(crate::runner::modes::common_util::resolve::LineSpan { - file: filename.to_string(), - start_line: current_line, - line_count: main_lines, - }); - current_line += main_lines; - } - let _ = current_line; - - if trace { - crate::runner::trace::log(format!( - "[using/text-merge] final merged: {} bytes ({} prelude + {} main)", - merged.len(), - merged.len() - cleaned_main.len(), - cleaned_main.len() - )); - } - - // Optional dump of merged text for diagnostics - if let Ok(dump_path) = std::env::var("NYASH_RESOLVE_DUMP_MERGED") { - if !dump_path.is_empty() { - let _ = std::fs::write(&dump_path, &merged); - } - } - - crate::runner::modes::common_util::resolve::set_last_text_merge_line_spans(spans); - - Ok(normalize_text_for_inline(&merged)) -} - -/// Minimal normalization to improve inline parser robustness. -/// - Normalize CRLF to LF -/// - Remove redundant semicolons before closing braces (`; }` → `}`) -/// - Ensure file ends with a newline -fn normalize_text_for_inline(s: &str) -> String { - let mut out = s.replace("\r\n", "\n").replace("\r", "\n"); - // Remove `;` before `}` across line boundaries conservatively - // pattern: `;` followed by optional spaces/newlines then `}` - // Do a few passes to cover nested cases without regex - for _ in 0..2 { - let mut tmp = String::with_capacity(out.len()); - let bytes = out.as_bytes(); - let mut i = 0usize; - while i < bytes.len() { - if bytes[i] == b';' { - // peek ahead skipping spaces/newlines - let mut j = i + 1; - while j < bytes.len() { - let c = bytes[j]; - if c == b' ' || c == b'\t' || c == b'\n' { - j += 1; - } else { - break; - } - } - if j < bytes.len() && bytes[j] == b'}' { - // drop ';' (do not advance j here) - i += 1; - continue; - } - } - tmp.push(bytes[i] as char); - i += 1; - } - out = tmp; - } - if !out.ends_with('\n') { - out.push('\n'); - } - out + Ok(UsingStripPlan { + kept_lines, + kept_len, + prelude_paths, + imports, + }) }