diff --git a/src/mir/builder/control_flow.rs b/src/mir/builder/control_flow.rs index 6953f0d3..d9e0a720 100644 --- a/src/mir/builder/control_flow.rs +++ b/src/mir/builder/control_flow.rs @@ -173,46 +173,345 @@ impl super::MirBuilder { Ok(Some(void_val)) } - /// Phase 49-3: Merge JoinIR-generated MIR blocks into current_function + /// Phase 49-3.2: Merge JoinIR-generated MIR blocks into current_function /// - /// This is a simplified merge that: - /// 1. Remaps all block IDs to avoid conflicts - /// 2. Remaps all value IDs to avoid conflicts - /// 3. Adds all blocks to current_function - /// 4. Jumps from current_block to the entry block + /// This merges JoinIR-generated blocks by: + /// 1. Remapping all block IDs to avoid conflicts + /// 2. Remapping all value IDs to avoid conflicts + /// 3. Adding all blocks to current_function + /// 4. Jumping from current_block to the entry block + /// 5. Converting Return → Jump to exit block fn merge_joinir_mir_blocks( &mut self, mir_module: &crate::mir::MirModule, debug: bool, ) -> Result<(), String> { - // For Phase 49-3 MVP: Just log and fall through - // Full block merging is complex and needs careful ID remapping + use crate::mir::{BasicBlock, BasicBlockId, MirInstruction, ValueId}; + use std::collections::HashMap; + if debug { eprintln!( "[cf_loop/joinir] merge_joinir_mir_blocks called with {} functions", mir_module.functions.len() ); - for (name, func) in &mir_module.functions { + } + + // Get the first (and typically only) function from JoinIR output + let join_func = mir_module + .functions + .values() + .next() + .ok_or("JoinIR module has no functions")?; + + if debug { + eprintln!( + "[cf_loop/joinir] Merging function with {} blocks, entry={:?}", + join_func.blocks.len(), + join_func.entry_block + ); + } + + // Phase 49-3.2: Block ID and Value ID remapping + let mut block_map: HashMap = HashMap::new(); + let mut value_map: HashMap = HashMap::new(); + + // 1. Allocate new block IDs for all JoinIR blocks + for old_block_id in join_func.blocks.keys() { + let new_block_id = self.block_gen.next(); + block_map.insert(*old_block_id, new_block_id); + if debug { eprintln!( - "[cf_loop/joinir] Function '{}': {} blocks, entry={:?}", - name, - func.blocks.len(), - func.entry_block + "[cf_loop/joinir] Block remap: {:?} → {:?}", + old_block_id, new_block_id ); } } - // TODO(Phase 49-3.2): Implement full block merging - // For now, this is a MVP that demonstrates the pipeline works - // The actual block merging will need: - // 1. Block ID remapping (block_gen.next() for each block) - // 2. Value ID remapping (next_value_id() for each value) - // 3. Instruction remapping (update all block/value references) - // 4. Variable map integration (merge variable_map) + // 2. Create exit block for Return conversion + let exit_block_id = self.block_gen.next(); + if debug { + eprintln!("[cf_loop/joinir] Exit block: {:?}", exit_block_id); + } + + // 3. Collect all ValueIds used in JoinIR function + let mut used_values: std::collections::BTreeSet = std::collections::BTreeSet::new(); + for block in join_func.blocks.values() { + Self::collect_values_in_block(block, &mut used_values); + } + // Also collect parameter ValueIds + for param in &join_func.params { + used_values.insert(*param); + } + + // 4. Allocate new ValueIds + for old_value in used_values { + let new_value = self.next_value_id(); + value_map.insert(old_value, new_value); + if debug { + eprintln!( + "[cf_loop/joinir] Value remap: {:?} → {:?}", + old_value, new_value + ); + } + } + + // 5. Clone and remap all blocks + for (old_block_id, old_block) in &join_func.blocks { + let new_block_id = block_map[old_block_id]; + let mut new_block = BasicBlock::new(new_block_id); + + // Remap instructions + for inst in &old_block.instructions { + let remapped = Self::remap_instruction(inst, &value_map, &block_map); + new_block.instructions.push(remapped); + } + new_block.instruction_spans = old_block.instruction_spans.clone(); + + // Remap terminator (convert Return → Jump to exit) + if let Some(ref term) = old_block.terminator { + let remapped_term = match term { + MirInstruction::Return { value } => { + // Convert Return to Jump to exit block + // If there's a return value, we need to store it first + if let Some(ret_val) = value { + let remapped_val = value_map.get(ret_val).copied().unwrap_or(*ret_val); + // Store the return value for later use + // For now, just jump to exit (value handling in Phase 49-4) + if debug { + eprintln!( + "[cf_loop/joinir] Return({:?}) → Jump to exit", + remapped_val + ); + } + } + MirInstruction::Jump { target: exit_block_id } + } + _ => Self::remap_instruction(term, &value_map, &block_map), + }; + new_block.terminator = Some(remapped_term); + } + + // Add block to current function + if let Some(ref mut func) = self.current_function { + func.add_block(new_block); + } + } + + // 6. Create exit block (empty for now, will be populated after loop) + if let Some(ref mut func) = self.current_function { + let exit_block = BasicBlock::new(exit_block_id); + func.add_block(exit_block); + } + + // 7. Jump from current block to JoinIR entry + let entry_block = block_map[&join_func.entry_block]; + crate::mir::builder::emission::branch::emit_jump(self, entry_block)?; + + // 8. Switch to exit block for subsequent code + self.start_new_block(exit_block_id)?; + + if debug { + eprintln!( + "[cf_loop/joinir] Merge complete: {} blocks added, continuing from {:?}", + join_func.blocks.len(), + exit_block_id + ); + } Ok(()) } + /// Collect all ValueIds used in a block + fn collect_values_in_block( + block: &crate::mir::BasicBlock, + values: &mut std::collections::BTreeSet, + ) { + for inst in &block.instructions { + Self::collect_values_in_instruction(inst, values); + } + if let Some(ref term) = block.terminator { + Self::collect_values_in_instruction(term, values); + } + } + + /// Collect all ValueIds used in an instruction + fn collect_values_in_instruction( + inst: &crate::mir::MirInstruction, + values: &mut std::collections::BTreeSet, + ) { + use crate::mir::MirInstruction; + + match inst { + MirInstruction::Const { dst, .. } => { + values.insert(*dst); + } + MirInstruction::BinOp { dst, lhs, rhs, .. } => { + values.insert(*dst); + values.insert(*lhs); + values.insert(*rhs); + } + MirInstruction::UnaryOp { dst, operand, .. } => { + values.insert(*dst); + values.insert(*operand); + } + MirInstruction::Compare { dst, lhs, rhs, .. } => { + values.insert(*dst); + values.insert(*lhs); + values.insert(*rhs); + } + MirInstruction::Load { dst, ptr } => { + values.insert(*dst); + values.insert(*ptr); + } + MirInstruction::Store { value, ptr } => { + values.insert(*value); + values.insert(*ptr); + } + MirInstruction::Call { dst, func, args, .. } => { + if let Some(d) = dst { + values.insert(*d); + } + values.insert(*func); + for arg in args { + values.insert(*arg); + } + } + MirInstruction::BoxCall { dst, box_val, args, .. } => { + if let Some(d) = dst { + values.insert(*d); + } + values.insert(*box_val); + for arg in args { + values.insert(*arg); + } + } + MirInstruction::Branch { condition, .. } => { + values.insert(*condition); + } + MirInstruction::Return { value } => { + if let Some(v) = value { + values.insert(*v); + } + } + MirInstruction::Phi { dst, inputs } => { + values.insert(*dst); + for (_, val) in inputs { + values.insert(*val); + } + } + MirInstruction::Copy { dst, src } => { + values.insert(*dst); + values.insert(*src); + } + MirInstruction::NewBox { dst, args, .. } => { + values.insert(*dst); + for arg in args { + values.insert(*arg); + } + } + MirInstruction::Print { value, .. } => { + values.insert(*value); + } + _ => { + // Other instructions: skip for now + } + } + } + + /// Remap an instruction's ValueIds and BlockIds + fn remap_instruction( + inst: &crate::mir::MirInstruction, + value_map: &std::collections::HashMap, + block_map: &std::collections::HashMap, + ) -> crate::mir::MirInstruction { + use crate::mir::MirInstruction; + + let remap_value = |v: super::ValueId| value_map.get(&v).copied().unwrap_or(v); + let remap_block = |b: crate::mir::BasicBlockId| block_map.get(&b).copied().unwrap_or(b); + + match inst { + MirInstruction::Const { dst, value } => MirInstruction::Const { + dst: remap_value(*dst), + value: value.clone(), + }, + MirInstruction::BinOp { dst, op, lhs, rhs } => MirInstruction::BinOp { + dst: remap_value(*dst), + op: *op, + lhs: remap_value(*lhs), + rhs: remap_value(*rhs), + }, + MirInstruction::UnaryOp { dst, op, operand } => MirInstruction::UnaryOp { + dst: remap_value(*dst), + op: *op, + operand: remap_value(*operand), + }, + MirInstruction::Compare { dst, op, lhs, rhs } => MirInstruction::Compare { + dst: remap_value(*dst), + op: *op, + lhs: remap_value(*lhs), + rhs: remap_value(*rhs), + }, + MirInstruction::Load { dst, ptr } => MirInstruction::Load { + dst: remap_value(*dst), + ptr: remap_value(*ptr), + }, + MirInstruction::Store { value, ptr } => MirInstruction::Store { + value: remap_value(*value), + ptr: remap_value(*ptr), + }, + MirInstruction::Call { dst, func, callee, args, effects } => MirInstruction::Call { + dst: dst.map(remap_value), + func: remap_value(*func), + callee: callee.clone(), + args: args.iter().map(|a| remap_value(*a)).collect(), + effects: *effects, + }, + MirInstruction::BoxCall { dst, box_val, method, method_id, args, effects } => { + MirInstruction::BoxCall { + dst: dst.map(remap_value), + box_val: remap_value(*box_val), + method: method.clone(), + method_id: *method_id, + args: args.iter().map(|a| remap_value(*a)).collect(), + effects: *effects, + } + } + MirInstruction::Branch { condition, then_bb, else_bb } => MirInstruction::Branch { + condition: remap_value(*condition), + then_bb: remap_block(*then_bb), + else_bb: remap_block(*else_bb), + }, + MirInstruction::Jump { target } => MirInstruction::Jump { + target: remap_block(*target), + }, + MirInstruction::Return { value } => MirInstruction::Return { + value: value.map(remap_value), + }, + MirInstruction::Phi { dst, inputs } => MirInstruction::Phi { + dst: remap_value(*dst), + inputs: inputs + .iter() + .map(|(bb, val)| (remap_block(*bb), remap_value(*val))) + .collect(), + }, + MirInstruction::Copy { dst, src } => MirInstruction::Copy { + dst: remap_value(*dst), + src: remap_value(*src), + }, + MirInstruction::NewBox { dst, box_type, args } => MirInstruction::NewBox { + dst: remap_value(*dst), + box_type: box_type.clone(), + args: args.iter().map(|a| remap_value(*a)).collect(), + }, + MirInstruction::Print { value, effects } => MirInstruction::Print { + value: remap_value(*value), + effects: *effects, + }, + // Pass through other instructions unchanged + other => other.clone(), + } + } + /// Control-flow: try/catch/finally pub(super) fn cf_try_catch( &mut self, diff --git a/src/tests/joinir/mainline_phase49.rs b/src/tests/joinir/mainline_phase49.rs index 28d70aaa..938a28fa 100644 --- a/src/tests/joinir/mainline_phase49.rs +++ b/src/tests/joinir/mainline_phase49.rs @@ -1,11 +1,11 @@ -// Phase 49-3: JoinIR Frontend Mainline Integration Test +// Phase 49-3.2: JoinIR Frontend Mainline Integration Test // // このテストは cf_loop の JoinIR Frontend mainline route が // 正常に動作することを確認する。 // -// MVP 制限: -// - merge_joinir_mir_blocks() はログ出力のみ -// - 完全な A/B 比較は Phase 49-3.2(ブロックマージ実装)待ち +// Phase 49-3.2 実装済み: +// - merge_joinir_mir_blocks() によるブロックマージ +// - A/B 比較テスト(Route A vs Route B) // // テスト方法: // HAKO_JOINIR_PRINT_TOKENS_MAIN=1 cargo test --release joinir_mainline_phase49 @@ -129,3 +129,96 @@ static box Main { std::env::remove_var("HAKO_PARSER_STAGE3"); std::env::remove_var("NYASH_DISABLE_PLUGINS"); } + +/// Phase 49-3.2: A/B 比較テスト - Route A (legacy) vs Route B (JoinIR) +/// +/// このテストは同じソースコードを2つの経路でコンパイルし、 +/// 両方が正常に完了することを確認する。 +#[test] +fn phase49_joinir_mainline_ab_comparison() { + let src = r#" +box JsonTokenizer { + tokens: ArrayBox + + birth() { + me.tokens = new ArrayBox() + } + + print_tokens() { + local i = 0 + loop(i < me.tokens.length()) { + i = i + 1 + } + } +} + +static box Main { + main() { + local t = new JsonTokenizer() + t.print_tokens() + return 0 + } +} +"#; + + // Route A: Legacy path (flag OFF) + std::env::remove_var("HAKO_JOINIR_PRINT_TOKENS_MAIN"); + std::env::set_var("NYASH_PARSER_STAGE3", "1"); + std::env::set_var("HAKO_PARSER_STAGE3", "1"); + std::env::set_var("NYASH_DISABLE_PLUGINS", "1"); + + let ast_a: ASTNode = NyashParser::parse_from_string(src) + .expect("phase49 A/B: parse failed (Route A)"); + let mut mc_a = MirCompiler::with_options(false); + let result_a = mc_a.compile(ast_a); + assert!( + result_a.is_ok(), + "Route A compile should succeed: {:?}", + result_a.err() + ); + let module_a = result_a.unwrap().module; + let blocks_a: usize = module_a + .functions + .values() + .map(|f| f.blocks.len()) + .sum(); + + // Route B: JoinIR Frontend path (flag ON) + // Re-set parser flags to ensure they're active + std::env::set_var("NYASH_PARSER_STAGE3", "1"); + std::env::set_var("HAKO_PARSER_STAGE3", "1"); + std::env::set_var("NYASH_DISABLE_PLUGINS", "1"); + std::env::set_var("HAKO_JOINIR_PRINT_TOKENS_MAIN", "1"); + + let ast_b: ASTNode = NyashParser::parse_from_string(src) + .expect("phase49 A/B: parse failed (Route B)"); + let mut mc_b = MirCompiler::with_options(false); + let result_b = mc_b.compile(ast_b); + assert!( + result_b.is_ok(), + "Route B compile should succeed: {:?}", + result_b.err() + ); + let module_b = result_b.unwrap().module; + let blocks_b: usize = module_b + .functions + .values() + .map(|f| f.blocks.len()) + .sum(); + + // Log block counts for debugging + eprintln!( + "[phase49 A/B] Route A: {} total blocks, Route B: {} total blocks", + blocks_a, blocks_b + ); + + // Both should complete successfully (main assertion is the compile succeeds) + // Block counts may differ due to JoinIR's different structure + // Future: Add execution comparison + + // クリーンアップ + std::env::remove_var("HAKO_JOINIR_PRINT_TOKENS_MAIN"); + std::env::remove_var("NYASH_PARSER_STAGE3"); + std::env::remove_var("HAKO_PARSER_STAGE3"); + std::env::remove_var("NYASH_DISABLE_PLUGINS"); +}