diff --git a/src/mir/builder/control_flow.rs b/src/mir/builder/control_flow.rs index af704972..e3c29066 100644 --- a/src/mir/builder/control_flow.rs +++ b/src/mir/builder/control_flow.rs @@ -358,7 +358,8 @@ impl super::MirBuilder { // - Add generated blocks to current_function // - Jump from current_block to the entry of generated loop // - The loop exit becomes the new current_block - self.merge_joinir_mir_blocks(&mir_module, debug)?; + // Phase 188-Impl-3: Pass None for boundary (legacy path without boundary) + self.merge_joinir_mir_blocks(&mir_module, None, debug)?; // Return void for now (loop doesn't have a meaningful return value in this context) let void_val = self.next_value_id(); @@ -484,26 +485,38 @@ impl super::MirBuilder { } // Merge JoinIR blocks into current function - self.merge_joinir_mir_blocks(&mir_module, debug)?; + // Phase 188-Impl-3: Create and pass JoinInlineBoundary for Pattern 1 + let boundary = crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary::new_inputs_only( + vec![ValueId(0)], // JoinIR's main() parameter (loop variable) + vec![loop_var_id], // Host's loop variable + ); + self.merge_joinir_mir_blocks(&mir_module, Some(&boundary), debug)?; - // Return void/0 as loop result (Pattern 1 loops return 0) - // Use the current block to emit the constant - let zero_val = self.value_gen.next(); - use crate::mir::types::ConstValue; - let current_block = self.current_block.ok_or_else(|| { - "[cf_loop/joinir/pattern1] No current block available".to_string() - })?; + // Phase 188-Impl-4-FIX: Return Void instead of trying to emit Const + // + // PROBLEM: Emitting instructions after merge_joinir_mir_blocks is fragile because: + // 1. merge creates exit block and switches to it + // 2. We try to add Const to exit block + // 3. But subsequent code (return statement) might overwrite the block + // + // SOLUTION: Loops don't produce values - they return Void. + // The subsequent "return 0" statement will emit its own Const + Return. + // + // This is cleaner because: + // - Loop lowering doesn't need to know about the return value + // - The return statement handles its own code generation + // - No risk of instructions being lost due to block management issues - if let Some(ref mut func) = self.current_function { - if let Some(block) = func.get_block_mut(current_block) { - block.instructions.push(crate::mir::MirInstruction::Const { - dst: zero_val, - value: ConstValue::Integer(0), - }); - } + let void_val = crate::mir::builder::emission::constant::emit_void(self); + + if debug { + eprintln!( + "[cf_loop/joinir/pattern1] Loop complete, returning Void {:?}", + void_val + ); } - Ok(Some(zero_val)) + Ok(Some(void_val)) } /// Phase 49-3.2: Merge JoinIR-generated MIR blocks into current_function @@ -521,9 +534,25 @@ impl super::MirBuilder { /// - Pattern 1 (Simple While) generates 3 functions: entry + loop_step + k_exit /// - All functions are flattened into current_function with global ID remapping /// - Single exit block receives all Return instructions from all functions + /// + /// # Phase 188-Impl-3: JoinInlineBoundary Support + /// + /// When `boundary` is provided, injects Copy instructions at the entry block + /// to connect host ValueIds to JoinIR local ValueIds: + /// + /// ```text + /// entry_block: + /// // Injected by boundary + /// ValueId(100) = Copy ValueId(4) // join_input → host_input + /// // Original JoinIR instructions follow... + /// ``` + /// + /// This enables clean separation: JoinIR uses local IDs (0,1,2...), + /// host uses its own IDs, and Copy instructions bridge the gap. fn merge_joinir_mir_blocks( &mut self, mir_module: &crate::mir::MirModule, + boundary: Option<&crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary>, debug: bool, ) -> Result<(), String> { use crate::mir::{BasicBlock, BasicBlockId, MirInstruction, ValueId}; @@ -588,14 +617,19 @@ impl super::MirBuilder { // 3. Collect all ValueIds used across ALL functions (Phase 189) // Also build a map of ValueId → function name for Call→Jump conversion + // Phase 188-Impl-3: Also collect function parameters for tail call conversion if debug { eprintln!("[cf_loop/joinir] Phase 189: Collecting value IDs from all functions"); } let mut used_values: std::collections::BTreeSet = std::collections::BTreeSet::new(); let mut value_to_func_name: HashMap = HashMap::new(); + let mut function_params: HashMap> = HashMap::new(); + + for (func_name, func) in &mir_module.functions { + // Phase 188-Impl-3: Collect function parameters for tail call conversion + function_params.insert(func_name.clone(), func.params.clone()); - for func in mir_module.functions.values() { for block in func.blocks.values() { Self::collect_values_in_block(block, &mut used_values); // Phase 189: Track Const String instructions that define function names @@ -731,6 +765,7 @@ impl super::MirBuilder { } // Second pass: Insert parameter bindings for tail calls + // Phase 188-Impl-3: Use actual parameter ValueIds from target function if let Some((target_block, args)) = tail_call_target { if debug { eprintln!( @@ -739,20 +774,36 @@ impl super::MirBuilder { ); } - // Insert Copy instructions for parameter binding - for (i, arg_val_remapped) in args.iter().enumerate() { - let param_val_original = ValueId(i as u32); - if let Some(¶m_val_remapped) = value_map.get(¶m_val_original) { - new_block.instructions.push(MirInstruction::Copy { - dst: param_val_remapped, - src: *arg_val_remapped, - }); + // Find the target function name from the target_block + // We need to reverse-lookup the function name from the entry block + let mut target_func_name: Option = None; + for (fname, &entry_block) in &function_entry_map { + if entry_block == target_block { + target_func_name = Some(fname.clone()); + break; + } + } - if debug { - eprintln!( - "[cf_loop/joinir] Param binding: arg {:?} → param {:?}", - arg_val_remapped, param_val_remapped - ); + if let Some(target_func_name) = target_func_name { + if let Some(target_params) = function_params.get(&target_func_name) { + // Insert Copy instructions for parameter binding + for (i, arg_val_remapped) in args.iter().enumerate() { + if i < target_params.len() { + let param_val_original = target_params[i]; + if let Some(¶m_val_remapped) = value_map.get(¶m_val_original) { + new_block.instructions.push(MirInstruction::Copy { + dst: param_val_remapped, + src: *arg_val_remapped, + }); + + if debug { + eprintln!( + "[cf_loop/joinir] Param binding: arg {:?} → param {:?}", + arg_val_remapped, param_val_remapped + ); + } + } + } } } } @@ -804,6 +855,62 @@ impl super::MirBuilder { } } + // Phase 188-Impl-3: Inject Copy instructions for boundary inputs + if let Some(boundary) = boundary { + // Get entry function's entry block (first function by convention) + let (entry_func_name, entry_func) = mir_module + .functions + .iter() + .next() + .ok_or("JoinIR module has no functions")?; + let entry_block_remapped = block_map[&(entry_func_name.clone(), entry_func.entry_block)]; + + if debug { + eprintln!( + "[cf_loop/joinir] Phase 188-Impl-3: Injecting {} Copy instructions at entry block {:?}", + boundary.join_inputs.len(), + entry_block_remapped + ); + } + + // Inject Copy instructions: join_input_remapped = Copy host_input + if let Some(ref mut current_func) = self.current_function { + if let Some(entry_block) = current_func.get_block_mut(entry_block_remapped) { + // Insert Copy instructions at the BEGINNING of the block + let mut copy_instructions = Vec::new(); + for (join_in, host_in) in boundary.join_inputs.iter().zip(&boundary.host_inputs) { + // join_in is JoinIR's local ValueId (e.g., ValueId(0)) + // host_in is host function's ValueId (e.g., ValueId(4)) + // We need to remap join_in to the merged space + if let Some(&join_in_remapped) = value_map.get(join_in) { + copy_instructions.push(MirInstruction::Copy { + dst: join_in_remapped, + src: *host_in, + }); + if debug { + eprintln!( + "[cf_loop/joinir] Copy boundary: {:?} (host) → {:?} (join_remapped)", + host_in, join_in_remapped + ); + } + } else { + if debug { + eprintln!( + "[cf_loop/joinir] WARNING: join_input {:?} not found in value_map", + join_in + ); + } + } + } + + // Insert at beginning (reverse order so they appear in correct order) + for inst in copy_instructions.into_iter().rev() { + entry_block.instructions.insert(0, inst); + } + } + } + } + // 6. Create exit block (empty for now, will be populated after loop) if let Some(ref mut func) = self.current_function { let exit_block = BasicBlock::new(exit_block_id); diff --git a/src/mir/join_ir/lowering/inline_boundary.rs b/src/mir/join_ir/lowering/inline_boundary.rs new file mode 100644 index 00000000..a1349537 --- /dev/null +++ b/src/mir/join_ir/lowering/inline_boundary.rs @@ -0,0 +1,168 @@ +//! Phase 188-Impl-3: JoinInlineBoundary - Boundary information for JoinIR inlining +//! +//! This module defines the boundary between JoinIR fragments and the host MIR function. +//! It enables clean separation of concerns: +//! +//! - **Box A**: JoinIR Frontend (doesn't know about host ValueIds) +//! - **Box B**: Join→MIR Bridge (converts to MIR using local ValueIds) +//! - **Box C**: JoinInlineBoundary (stores boundary info - THIS FILE) +//! - **Box D**: JoinMirInlineMerger (injects Copy instructions at boundary) +//! +//! ## Design Philosophy +//! +//! The JoinIR lowerer should work with **local ValueIds** (0, 1, 2, ...) without +//! knowing anything about the host function's ValueId space. This ensures: +//! +//! 1. **Modularity**: JoinIR lowerers are pure transformers +//! 2. **Reusability**: Same lowerer can be used in different contexts +//! 3. **Testability**: JoinIR can be tested independently +//! 4. **Correctness**: SSA properties are maintained via explicit Copy instructions +//! +//! ## Example +//! +//! For `loop(i < 3) { print(i); i = i + 1 }`: +//! +//! ```text +//! Host Function: +//! ValueId(4) = Const 0 // i = 0 in host +//! +//! JoinIR Fragment (uses local IDs 0, 1, 2, ...): +//! ValueId(0) = param // i_param (local to JoinIR) +//! ValueId(1) = Const 3 +//! ValueId(2) = Compare ... +//! +//! Boundary: +//! join_inputs: [ValueId(0)] // JoinIR's param slot +//! host_inputs: [ValueId(4)] // Host's `i` variable +//! +//! Merged MIR (with Copy injection): +//! entry: +//! ValueId(100) = Copy ValueId(4) // Connect host→JoinIR +//! ValueId(101) = Const 3 +//! ... +//! ``` + +use crate::mir::ValueId; + +/// Boundary information for inlining a JoinIR fragment into a host function +/// +/// This structure captures the "interface" between a JoinIR fragment and the +/// host function, allowing the merger to inject necessary Copy instructions +/// to connect the two SSA value spaces. +/// +/// # Design Note +/// +/// This is a **pure data structure** with no logic. All transformation logic +/// lives in the merger (merge_joinir_mir_blocks). +#[derive(Debug, Clone)] +pub struct JoinInlineBoundary { + /// JoinIR-local ValueIds that act as "input slots" + /// + /// These are the ValueIds used **inside** the JoinIR fragment to refer + /// to values that come from the host. They should be small sequential + /// IDs (0, 1, 2, ...) since JoinIR lowerers allocate locally. + /// + /// Example: For a loop variable `i`, JoinIR uses ValueId(0) as the parameter. + pub join_inputs: Vec, + + /// Host-function ValueIds that provide the input values + /// + /// These are the ValueIds from the **host function** that correspond to + /// the join_inputs. The merger will inject Copy instructions to connect + /// host_inputs[i] → join_inputs[i]. + /// + /// Example: If host has `i` as ValueId(4), then host_inputs = [ValueId(4)]. + pub host_inputs: Vec, + + /// JoinIR-local ValueIds that represent outputs (if any) + /// + /// For loops that produce values (e.g., loop result), these are the + /// JoinIR-local ValueIds that should be visible to the host after inlining. + /// + /// Currently unused for Pattern 1 (Simple While), reserved for future patterns. + pub join_outputs: Vec, + + /// Host-function ValueIds that receive the outputs + /// + /// These are the destination ValueIds in the host function that should + /// receive the values from join_outputs. + /// + /// Currently unused for Pattern 1 (Simple While), reserved for future patterns. + pub host_outputs: Vec, +} + +impl JoinInlineBoundary { + /// Create a new boundary with input mappings only + /// + /// This is the common case for loops like Pattern 1 where: + /// - Inputs: loop variables (e.g., `i` in `loop(i < 3)`) + /// - Outputs: none (loop returns void/0) + pub fn new_inputs_only(join_inputs: Vec, host_inputs: Vec) -> Self { + assert_eq!( + join_inputs.len(), + host_inputs.len(), + "join_inputs and host_inputs must have same length" + ); + Self { + join_inputs, + host_inputs, + join_outputs: vec![], + host_outputs: vec![], + } + } + + /// Create a new boundary with both inputs and outputs + /// + /// Reserved for future loop patterns that produce values. + #[allow(dead_code)] + pub fn new_with_outputs( + join_inputs: Vec, + host_inputs: Vec, + join_outputs: Vec, + host_outputs: Vec, + ) -> Self { + assert_eq!( + join_inputs.len(), + host_inputs.len(), + "join_inputs and host_inputs must have same length" + ); + assert_eq!( + join_outputs.len(), + host_outputs.len(), + "join_outputs and host_outputs must have same length" + ); + Self { + join_inputs, + host_inputs, + join_outputs, + host_outputs, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_boundary_inputs_only() { + let boundary = JoinInlineBoundary::new_inputs_only( + vec![ValueId(0)], // JoinIR uses ValueId(0) for loop var + vec![ValueId(4)], // Host has loop var at ValueId(4) + ); + + assert_eq!(boundary.join_inputs.len(), 1); + assert_eq!(boundary.host_inputs.len(), 1); + assert_eq!(boundary.join_outputs.len(), 0); + assert_eq!(boundary.host_outputs.len(), 0); + } + + #[test] + #[should_panic(expected = "join_inputs and host_inputs must have same length")] + fn test_boundary_mismatched_inputs() { + JoinInlineBoundary::new_inputs_only( + vec![ValueId(0), ValueId(1)], + vec![ValueId(4)], + ); + } +} diff --git a/src/mir/join_ir/lowering/mod.rs b/src/mir/join_ir/lowering/mod.rs index 6a8ffb78..54fd9668 100644 --- a/src/mir/join_ir/lowering/mod.rs +++ b/src/mir/join_ir/lowering/mod.rs @@ -27,6 +27,7 @@ pub mod if_merge; // Phase 33-7 pub mod if_phi_context; // Phase 61-1 pub mod if_phi_spec; // Phase 61-2 pub mod if_select; // Phase 33 +pub mod inline_boundary; // Phase 188-Impl-3: JoinIR→Host boundary pub mod loop_form_intake; pub mod loop_patterns; // Phase 188: Pattern-based loop lowering (3 patterns) pub mod loop_scope_shape; diff --git a/src/mir/join_ir/lowering/simple_while_minimal.rs b/src/mir/join_ir/lowering/simple_while_minimal.rs index 078aeff3..1dec59b9 100644 --- a/src/mir/join_ir/lowering/simple_while_minimal.rs +++ b/src/mir/join_ir/lowering/simple_while_minimal.rs @@ -50,17 +50,21 @@ use crate::mir::join_ir::{ use crate::mir::ValueId; /// Context passed from the host function to the Pattern 1 lowerer +/// +/// Phase 188-Impl-3: This context is now REMOVED - JoinIR uses only local ValueIds +/// The boundary mapping is handled by JoinInlineBoundary instead. +#[deprecated(since = "188-Impl-3", note = "Use JoinInlineBoundary for host integration")] pub struct Pattern1Context { - /// The loop variable ValueId from the host function (e.g., ValueId(6) for `i`) + /// DEPRECATED: This is no longer used, JoinIR uses local ValueIds pub loop_var: ValueId, - /// ValueId allocator function + /// DEPRECATED: JoinIR allocates sequentially from 0 pub value_allocator: Box ValueId>, } impl Pattern1Context { /// Create a standalone context with hardcoded ValueIds (for backward compatibility) pub fn standalone() -> Self { - let mut counter = 1000u32; + let mut counter = 0u32; Self { loop_var: ValueId(counter), value_allocator: Box::new(move || { @@ -73,34 +77,51 @@ impl Pattern1Context { /// Lower Pattern 1 (Simple While Loop) to JoinIR /// -/// This is a minimal implementation for loop_min_while.hako. -/// It generates JoinIR that integrates with the host function's variable bindings. +/// # Phase 188-Impl-3: Pure JoinIR Fragment Generation /// -/// # Phase 188-Impl-2: Host Variable Integration +/// This version generates JoinIR using **local ValueIds only** (0, 1, 2, ...). +/// It has NO knowledge of the host function's ValueId space. The boundary mapping +/// is handled separately via JoinInlineBoundary. /// -/// This version accepts the host's loop variable ValueId and allocates fresh IDs -/// for intermediate values. This ensures the generated JoinIR connects properly -/// to the host function's variable bindings. +/// ## Design Philosophy /// -/// If called without a context (from legacy code), it uses standalone mode with -/// hardcoded ValueIds for backward compatibility. +/// - **Box A**: JoinIR Frontend (doesn't know about host ValueIds) +/// - **Box B**: This function - converts to JoinIR with local IDs +/// - **Box C**: JoinInlineBoundary - stores boundary info +/// - **Box D**: merge_joinir_mir_blocks - injects Copy instructions +/// +/// This clean separation ensures JoinIR lowerers are: +/// - Pure transformers (no side effects) +/// - Reusable (same lowerer works in any context) +/// - Testable (can test JoinIR independently) /// /// # Arguments /// /// * `_scope` - LoopScopeShape (reserved for future generic implementation) -/// * `ctx` - Pattern1Context containing host variable bindings (or None for standalone) +/// * `_ctx` - DEPRECATED: No longer used, kept for backward compatibility /// /// # Returns /// /// * `Some(JoinModule)` - Successfully lowered to JoinIR /// * `None` - Pattern not matched (fallback to other lowerers) +/// +/// # Boundary Contract +/// +/// This function returns a JoinModule with: +/// - **Input slot**: ValueId(0) in loop_step function represents the loop variable +/// - **Caller responsibility**: Create JoinInlineBoundary to map ValueId(0) to host's loop var pub fn lower_simple_while_minimal( _scope: LoopScopeShape, - ctx: Option, + _ctx: Option, ) -> Option { - let mut ctx = ctx.unwrap_or_else(Pattern1Context::standalone); - // Phase 188-Impl-1: Hardcoded JoinIR for loop_min_while.hako - // This establishes the infrastructure. Generic implementation in Phase 188-Impl-2+. + // Phase 188-Impl-3: Use local ValueId allocator (sequential from 0) + // JoinIR has NO knowledge of host ValueIds - boundary handled separately + let mut value_counter = 0u32; + let mut alloc_value = || { + let id = ValueId(value_counter); + value_counter += 1; + id + }; let mut join_module = JoinModule::new(); @@ -112,32 +133,32 @@ pub fn lower_simple_while_minimal( let k_exit_id = JoinFuncId::new(2); // ================================================================== - // ValueId allocation (Phase 188-Impl-2: Use host variable + allocator) + // ValueId allocation (Phase 188-Impl-3: Sequential local IDs) // ================================================================== - // Host's loop variable (e.g., ValueId(6) for `i`) - let i_init = ctx.loop_var; - - // Allocate fresh IDs for local values - let loop_result = (ctx.value_allocator)(); - let const_0_main = (ctx.value_allocator)(); + // main() locals + let i_init = alloc_value(); // ValueId(0) - loop init value + let loop_result = alloc_value(); // ValueId(1) - result from loop_step + let const_0_main = alloc_value(); // ValueId(2) - return value // loop_step locals - let i_param = (ctx.value_allocator)(); - let const_3 = (ctx.value_allocator)(); - let cmp_lt = (ctx.value_allocator)(); - let exit_cond = (ctx.value_allocator)(); - let const_1 = (ctx.value_allocator)(); - let i_next = (ctx.value_allocator)(); + let i_param = alloc_value(); // ValueId(3) - parameter + let const_3 = alloc_value(); // ValueId(4) - comparison constant + let cmp_lt = alloc_value(); // ValueId(5) - i < 3 + let exit_cond = alloc_value(); // ValueId(6) - !(i < 3) + let const_1 = alloc_value(); // ValueId(7) - increment constant + let i_next = alloc_value(); // ValueId(8) - i + 1 + + // k_exit locals + let const_0_exit = alloc_value(); // ValueId(9) - exit return value // ================================================================== // main() function // ================================================================== - let mut main_func = JoinFunction::new(main_id, "main".to_string(), vec![]); + // Phase 188-Impl-3: main() takes i as a parameter (boundary input) + // The host will inject a Copy instruction: i_init_local = Copy host_i + let mut main_func = JoinFunction::new(main_id, "main".to_string(), vec![i_init]); - // Phase 188-Impl-2: Skip i_init = 0 (host already initialized the variable) - // The host's ValueId (i_init) is already bound to 0 in the host function - - // result = loop_step(i_init) ← Use host's i directly + // result = loop_step(i_init) main_func.body.push(JoinInst::Call { func: loop_step_id, args: vec![i_init], @@ -244,7 +265,7 @@ pub fn lower_simple_while_minimal( let mut k_exit_func = JoinFunction::new(k_exit_id, "k_exit".to_string(), vec![]); // return 0 (Pattern 1 has no exit values) - let const_0_exit = ValueId(3000); + // Phase 188-Impl-3: Use pre-allocated const_0_exit (ValueId(9)) k_exit_func.body.push(JoinInst::Compute(MirLikeInst::Const { dst: const_0_exit, value: ConstValue::Integer(0),