Files
hakorune/src/mir/builder/control_flow/joinir/merge/mod.rs
nyash-codex 052040e59c refactor(joinir): extract ExitArgsCollectorBox for jump_args SSOT
Phase 118 refactoring: Box modularization for exit value collection.

# Changes
- Created `exit_args_collector.rs` with ExitArgsCollectorBox
- Extracted jump_args processing logic from instruction_rewriter.rs
- Implemented SSOT for offset calculation (0 vs 1 legacy layout)
- Added comprehensive unit tests (6 tests, all passing)

# Design
- **Responsibility**: Collects exit PHI inputs + carrier inputs from jump_args
- **SSOT**: Centralized offset calculation logic
- **Fail-Fast**: Validates jump_args length against exit_bindings
- **Phase 118 P2 Contract**: Uses boundary.exit_bindings as SSOT

# Key Features
- Filters ConditionOnly carriers (no exit PHI needed)
- Handles both direct mapping (offset=0) and legacy layout (offset=1)
- Strict mode: Fail-Fast on validation errors
- Non-strict mode: Warns and continues with best effort

# Test Results
- Unit tests: 6/6 PASS
- Phase 118 smoke tests: PASS (VM + LLVM)
- Phase 117 regression: PASS

# Reduced Code Complexity
- instruction_rewriter.rs: ~90 lines → ~25 lines (box call)
- Offset logic: Centralized in ExitArgsCollectorBox

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-18 03:56:21 +09:00

1237 lines
49 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! JoinIR MIR Block Merging Coordinator
//!
//! This module coordinates the merging of JoinIR-generated MIR functions
//! into the host MIR builder. The process is broken into 6 phases:
//!
//! 1. Block ID allocation (block_allocator.rs)
//! 2. Value collection (value_collector.rs)
//! 3. ValueId remapping (uses JoinIrIdRemapper)
//! 4. Instruction rewriting (instruction_rewriter.rs)
//! 5. Exit PHI construction (exit_phi_builder.rs)
//! 6. Boundary reconnection (inline in this file)
//!
//! Phase 4 Refactoring: Breaking down 714-line merge_joinir_mir_blocks() into focused modules
mod block_allocator;
mod carrier_init_builder;
mod contract_checks;
pub mod exit_args_collector; // Phase 118: Exit args collection box
pub mod exit_line;
mod exit_phi_builder;
mod expr_result_resolver;
mod instruction_rewriter;
mod loop_header_phi_builder;
mod loop_header_phi_info;
mod merge_result;
mod phi_block_remapper; // Phase 94: Phi block-id remap box
mod tail_call_classifier;
mod value_collector;
// Phase 33-17: Re-export for use by other modules
pub use loop_header_phi_builder::LoopHeaderPhiBuilder;
pub use loop_header_phi_info::LoopHeaderPhiInfo;
use super::trace;
use crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary;
use crate::mir::{MirModule, ValueId};
/// Phase 49-3.2: Merge JoinIR-generated MIR blocks into current_function
///
/// # Phase 189: Multi-Function MIR Merge
///
/// This merges JoinIR-generated blocks by:
/// 1. Remapping all block IDs across ALL functions to avoid conflicts
/// 2. Remapping all value IDs across ALL functions to avoid conflicts
/// 3. Adding all blocks from all functions to current_function
/// 4. Jumping from current_block to the entry block
/// 5. Converting Return → Jump to exit block for all functions
///
/// **Multi-Function Support** (Phase 189):
/// - Pattern 1 (Simple While) generates 3 functions: entry + loop_step + k_exit
/// - All functions are flattened into current_function with global ID remapping
/// - Single exit block receives all Return instructions from all functions
///
/// # Phase 188-Impl-3: JoinInlineBoundary Support
///
/// When `boundary` is provided, injects Copy instructions at the entry block
/// to connect host ValueIds to JoinIR local ValueIds:
///
/// ```text
/// entry_block:
/// // Injected by boundary
/// ValueId(100) = Copy ValueId(4) // join_input → host_input
/// // Original JoinIR instructions follow...
/// ```
///
/// This enables clean separation: JoinIR uses local IDs (0,1,2...),
/// host uses its own IDs, and Copy instructions bridge the gap.
///
/// # Returns
///
/// Returns `Ok(Some(exit_phi_id))` if the merged JoinIR functions have return values
/// that were collected into an exit block PHI. さらに、`boundary` に
/// host_outputs が指定されている場合は、exit PHI の結果をホスト側の
/// SSA スロットへ再接続するvariable_map 内の ValueId を更新する)。
pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
builder: &mut crate::mir::builder::MirBuilder,
mir_module: &MirModule,
boundary: Option<&JoinInlineBoundary>,
debug: bool,
) -> Result<Option<ValueId>, String> {
let verbose = debug || crate::config::env::joinir_dev_enabled();
let trace = trace::trace();
trace.stderr_if(
&format!(
"[cf_loop/joinir] merge_joinir_mir_blocks called with {} functions",
mir_module.functions.len()
),
debug,
);
if verbose {
if let Some(boundary) = boundary {
let exit_summary: Vec<String> = boundary
.exit_bindings
.iter()
.map(|b| {
format!(
"{}: join {:?} → host {:?} ({:?})",
b.carrier_name, b.join_exit_value, b.host_slot, b.role
)
})
.collect();
let cond_summary: Vec<String> = boundary
.condition_bindings
.iter()
.map(|b| {
format!(
"{}: host {:?} → join {:?}",
b.name, b.host_value, b.join_value
)
})
.collect();
trace.stderr_if(
&format!(
"[cf_loop/joinir] Boundary join_inputs={:?} host_inputs={:?}",
boundary.join_inputs, boundary.host_inputs
),
true,
);
trace.stderr_if(
&format!(
"[cf_loop/joinir] Boundary exit_bindings ({}): {}",
boundary.exit_bindings.len(),
exit_summary.join(", ")
),
true,
);
if !cond_summary.is_empty() {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Boundary condition_bindings ({}): {}",
cond_summary.len(),
cond_summary.join(", ")
),
true,
);
}
if let Some(ci) = &boundary.carrier_info {
let carriers: Vec<String> = ci.carriers.iter().map(|c| c.name.clone()).collect();
trace.stderr_if(
&format!(
"[cf_loop/joinir] Boundary carrier_info: loop_var='{}', carriers={:?}",
ci.loop_var_name, carriers
),
true,
);
}
} else {
trace.stderr_if("[cf_loop/joinir] No boundary provided", true);
}
}
// Phase 1: Allocate block IDs for all functions
// Phase 177-3: block_allocator now returns exit_block_id to avoid conflicts
let (mut remapper, exit_block_id) =
block_allocator::allocate_blocks(builder, mir_module, debug)?;
// Phase 2: Collect values from all functions
let (mut used_values, value_to_func_name, function_params) =
value_collector::collect_values(mir_module, &remapper, debug)?;
// Phase 171-fix: Add condition_bindings' join_values to used_values for remapping
if let Some(boundary) = boundary {
for binding in &boundary.condition_bindings {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 171-fix: Adding condition binding '{}' JoinIR {:?} to used_values",
binding.name, binding.join_value
),
debug,
);
used_values.insert(binding.join_value);
}
// Phase 172-3: Add exit_bindings' join_exit_values to used_values for remapping
for binding in &boundary.exit_bindings {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 172-3: Adding exit binding '{}' JoinIR {:?} to used_values",
binding.carrier_name, binding.join_exit_value
),
debug,
);
used_values.insert(binding.join_exit_value);
}
}
// Phase 201-A: Build loop header PHI info BEFORE Phase 3
//
// We need to allocate PHI dst ValueIds before remap_values() runs,
// to prevent conflicts where a Const instruction gets a ValueId that
// will later be used as a PHI dst, causing carrier value corruption.
//
// This is a reordering of Phase 3 and Phase 3.5 logic.
let mut loop_header_phi_info = if let Some(boundary) = boundary {
if let Some(loop_var_name) = &boundary.loop_var_name {
// Get entry function and block for building PHI info
let (entry_func_name, entry_func) = mir_module
.functions
.iter()
.next()
.ok_or("JoinIR module has no functions (Phase 201-A)")?;
let entry_block_remapped = remapper
.get_block(entry_func_name, entry_func.entry_block)
.ok_or_else(|| {
format!(
"Entry block not found for {} (Phase 201-A)",
entry_func_name
)
})?;
// Get host's current block as the entry edge
let host_entry_block = builder
.current_block
.ok_or("Phase 201-A: No current block when building header PHIs")?;
// Get loop variable's initial value from HOST
let loop_var_init = boundary
.host_inputs
.first()
.copied()
.ok_or("Phase 201-A: No host_inputs in boundary for loop_var_init")?;
// Phase 228-4: Extract carriers with their initialization strategy
let other_carriers: Vec<(
String,
ValueId,
crate::mir::join_ir::lowering::carrier_info::CarrierInit,
crate::mir::join_ir::lowering::carrier_info::CarrierRole,
)> = if let Some(ref carrier_info) = boundary.carrier_info {
// Use carrier_info if available (Phase 228)
carrier_info
.carriers
.iter()
.filter(|c| c.name != *loop_var_name)
.map(|c| (c.name.clone(), c.host_id, c.init, c.role))
.collect()
} else {
// Fallback: exit_bindings から取得(既存動作)
boundary
.exit_bindings
.iter()
.filter(|b| b.carrier_name != *loop_var_name)
.map(|b| {
(
b.carrier_name.clone(),
b.host_slot,
crate::mir::join_ir::lowering::carrier_info::CarrierInit::FromHost,
crate::mir::join_ir::lowering::carrier_info::CarrierRole::LoopState,
)
})
.collect()
};
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 201-A: Pre-building header PHIs for loop_var='{}' at {:?}",
loop_var_name, entry_block_remapped
),
debug,
);
trace.stderr_if(
&format!(
"[cf_loop/joinir] loop_var_init={:?}, carriers={:?}",
loop_var_init,
other_carriers
.iter()
.map(|(n, _, _, _)| n.as_str())
.collect::<Vec<_>>()
),
debug,
);
// Build PHI info (this allocates PHI dst ValueIds)
LoopHeaderPhiBuilder::build(
builder,
entry_block_remapped,
host_entry_block,
loop_var_name,
loop_var_init,
&other_carriers,
boundary.expr_result.is_some(),
debug,
)?
} else {
LoopHeaderPhiInfo::empty(
remapper
.get_block(
mir_module.functions.iter().next().unwrap().0,
mir_module.functions.iter().next().unwrap().1.entry_block,
)
.unwrap(),
)
}
} else {
LoopHeaderPhiInfo::empty(
remapper
.get_block(
mir_module.functions.iter().next().unwrap().0,
mir_module.functions.iter().next().unwrap().1.entry_block,
)
.unwrap(),
)
};
// Phase 201-A: Get reserved PHI dst ValueIds and set in MirBuilder
let reserved_phi_dsts = loop_header_phi_info.reserved_value_ids();
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 201-A: Reserved PHI dsts: {:?}",
reserved_phi_dsts
),
debug && !reserved_phi_dsts.is_empty(),
);
// Phase 201-A: Set reserved IDs in MirBuilder so next_value_id() skips them
// This protects against carrier corruption when break conditions emit Const instructions
builder.comp_ctx.reserved_value_ids = reserved_phi_dsts.clone();
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 201-A: Set builder.comp_ctx.reserved_value_ids = {:?}",
builder.comp_ctx.reserved_value_ids
),
debug && !builder.comp_ctx.reserved_value_ids.is_empty(),
);
// Phase 3: Remap ValueIds (with reserved PHI dsts protection)
remap_values(
builder,
&used_values,
&mut remapper,
&reserved_phi_dsts,
debug,
)?;
// Phase 177-3 DEBUG: Verify remapper state after Phase 3
trace.stderr_if("[DEBUG-177] === Remapper state after Phase 3 ===", verbose);
trace.stderr_if(
&format!("[DEBUG-177] used_values count: {}", used_values.len()),
verbose,
);
for value_id in &used_values {
if let Some(remapped) = remapper.get_value(*value_id) {
trace.stderr_if(
&format!("[DEBUG-177] JoinIR {:?} → Host {:?}", value_id, remapped),
verbose,
);
} else {
trace.stderr_if(
&format!("[DEBUG-177] JoinIR {:?} → NOT FOUND ❌", value_id),
verbose,
);
}
}
// Check condition_bindings specifically
if let Some(boundary) = boundary {
trace.stderr_if("[DEBUG-177] === Condition bindings check ===", verbose);
for binding in &boundary.condition_bindings {
let lookup_result = remapper.get_value(binding.join_value);
trace.stderr_if(
&format!(
"[DEBUG-177] '{}': JoinIR {:?}{:?}",
binding.name, binding.join_value, lookup_result
),
verbose,
);
}
}
trace.stderr_if("[DEBUG-177] ==============================", verbose);
// Phase 3.5: Override remapper for function parameters to use PHI dsts
//
// Phase 201-A: This phase now uses the loop_header_phi_info built before Phase 3.
// The PHI dst allocation has been moved earlier to prevent ValueId conflicts.
if let Some(boundary) = boundary {
if let Some(loop_var_name) = &boundary.loop_var_name {
// Phase 201-A: PHI info is already built (before Phase 3) - just use it
// Phase 33-21: Override remapper for loop_step's parameters
//
// JoinIR generates separate parameter ValueIds for each function:
// - main(): ValueId(0), ValueId(1), ... for (i_init, carrier1_init, ...)
// - loop_step(): ValueId(3), ValueId(4), ... for (i_param, carrier1_param, ...)
//
// The loop body uses loop_step's parameters, so we need to remap THOSE
// to the header PHI dsts, not main()'s parameters.
//
// We get loop_step's parameters from function_params collected earlier.
// Phase 33-21: Override remapper for ALL functions' parameters
//
// JoinIR generates separate parameter ValueIds for each function:
// - main (join_func_0): ValueId(0), ValueId(1), ... for (i_init, carrier1_init, ...)
// - loop_step (join_func_1): ValueId(3), ValueId(4), ... for (i_param, carrier1_param, ...)
//
// ALL of these need to be mapped to header PHI dsts so that:
// 1. condition evaluation uses PHI result
// 2. loop body uses PHI result
// 3. tail call args are correctly routed
// Phase 177-3 fix: Protect condition-ONLY bindings from being overridden to PHI dsts
//
// Problem: condition_bindings may contain:
// 1. True condition-only variables (e.g., 'limit' in loop(i < limit)) - NOT carriers
// 2. Body-only carriers added by Phase 176-5 (e.g., 'result') - ARE carriers
//
// We must ONLY protect (1), not (2), because:
// - Condition-only vars should keep their HOST mapping (e.g., limit = %8)
// - Body-only carriers MUST be remapped to PHI dsts (e.g., result = %24)
//
// Solution: Protect condition_bindings that are NOT in exit_bindings (i.e., not carriers)
let carrier_names: std::collections::HashSet<&str> = boundary
.exit_bindings
.iter()
.map(|eb| eb.carrier_name.as_str())
.collect();
let condition_binding_ids: std::collections::HashSet<ValueId> = boundary
.condition_bindings
.iter()
.filter(|cb| !carrier_names.contains(cb.name.as_str()))
.map(|cb| cb.join_value)
.collect();
if !condition_binding_ids.is_empty() {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 177-3: Protected ValueIds (condition-only, not carriers): {:?}",
condition_binding_ids
),
verbose,
);
for cb in &boundary.condition_bindings {
let is_carrier = carrier_names.contains(cb.name.as_str());
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 177-3: '{}': JoinIR {:?} (carrier={})",
cb.name, cb.join_value, is_carrier
),
verbose,
);
}
}
let main_func_name = "join_func_0";
let loop_step_func_name = "join_func_1";
if function_params.get(main_func_name).is_none() {
trace.stderr_if(
&format!(
"[cf_loop/joinir] WARNING: function_params.get('{}') returned None. Available keys: {:?}",
main_func_name,
function_params.keys().collect::<Vec<_>>()
),
verbose,
);
}
if let Some(main_params) = function_params.get(main_func_name) {
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 33-21: main ({}) params: {:?}",
main_func_name, main_params
),
verbose,
);
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 33-21: carrier_phis count: {}, names: {:?}",
loop_header_phi_info.carrier_phis.len(),
loop_header_phi_info
.carrier_phis
.iter()
.map(|(n, _)| n.as_str())
.collect::<Vec<_>>()
),
verbose,
);
// Map main's parameters to header PHI dsts
// main params: [i_init, carrier1_init, ...]
// carrier_phis: [("i", entry), ("sum", entry), ...]
for (idx, (carrier_name, entry)) in
loop_header_phi_info.carrier_phis.iter().enumerate()
{
if let Some(&main_param) = main_params.get(idx) {
// Phase 177-3: Don't override condition_bindings
if condition_binding_ids.contains(&main_param) {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 177-3: Skipping override for condition_binding {:?} ('{}')",
main_param, carrier_name
),
verbose,
);
continue;
}
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 33-21: REMAP main param[{}] {:?}{:?} ('{}')",
idx, main_param, entry.phi_dst, carrier_name
),
verbose,
);
remapper.set_value(main_param, entry.phi_dst);
}
}
}
// Phase 177-3-B: Handle body-only carriers
// These are carriers in carrier_phis that are NOT in main function params.
// They appear in condition_bindings (added by Phase 176-5) but need PHI remapping.
for (carrier_name, entry) in &loop_header_phi_info.carrier_phis {
// Check if this carrier has a condition_binding
if let Some(binding) = boundary
.condition_bindings
.iter()
.find(|cb| cb.name == *carrier_name)
{
// Skip if it's a true condition-only variable (already protected above)
if condition_binding_ids.contains(&binding.join_value) {
continue;
}
// This is a body-only carrier - remap it to PHI dst
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 177-3-B: Body-only carrier '{}': JoinIR {:?} → PHI {:?}",
carrier_name, binding.join_value, entry.phi_dst
),
verbose,
);
remapper.set_value(binding.join_value, entry.phi_dst);
}
}
// Map loop_step's parameters
// DEBUG-177: Always log function_params keys to diagnose multi-carrier issue
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 33-21: function_params keys: {:?}",
function_params.keys().collect::<Vec<_>>()
),
verbose,
);
if function_params.get(loop_step_func_name).is_none() {
trace.stderr_if(
&format!(
"[cf_loop/joinir] WARNING: function_params.get('{}') returned None. Available keys: {:?}",
loop_step_func_name,
function_params.keys().collect::<Vec<_>>()
),
verbose,
);
}
if let Some(loop_step_params) = function_params.get(loop_step_func_name) {
// DEBUG-177: Always log loop_step params
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 33-21: loop_step ({}) params: {:?}",
loop_step_func_name, loop_step_params
),
verbose,
);
// Phase 177-FIX: Process loop_step params but skip if already mapped
//
// We use a name-based approach: for each carrier_phi, check if
// its join_value was already set in Phase 177-3-B (body-only carriers).
// Only process loop_step params for carriers NOT already handled.
for loop_step_param in loop_step_params {
// Phase 177-3: Don't override condition_bindings
if condition_binding_ids.contains(loop_step_param) {
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 177-FIX: Skipping condition_binding {:?}",
loop_step_param
),
verbose,
);
continue;
}
// Find which carrier this param belongs to by matching join_value
// Check if this param was already handled by Phase 177-3-B
let already_mapped = boundary.condition_bindings.iter().any(|cb| {
cb.join_value == *loop_step_param
&& loop_header_phi_info
.carrier_phis
.iter()
.any(|(name, _)| name == &cb.name)
});
if already_mapped {
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 177-FIX: Skipping {:?} (already mapped by Phase 177-3-B)",
loop_step_param
),
verbose,
);
continue;
}
// Phase 177-STRUCT-2: Use carrier_order for index-based matching
//
// Problem: BTreeMap iterates in alphabetical order, but JoinIR
// generates params in exit_bindings order.
//
// Solution: Use carrier_order (Vec<String>) which preserves insertion order.
if let Some(param_idx) =
loop_step_params.iter().position(|p| p == loop_step_param)
{
// Map params[i] to carrier_order[i]
if let (Some(carrier_name), Some(entry)) = (
loop_header_phi_info.get_carrier_at_index(param_idx),
loop_header_phi_info.get_entry_at_index(param_idx),
) {
trace.stderr_if(
&format!(
"[DEBUG-177] Phase 177-STRUCT-2: REMAP loop_step param[{}] {:?}{:?} (carrier '{}')",
param_idx, loop_step_param, entry.phi_dst, carrier_name
),
verbose,
);
remapper.set_value(*loop_step_param, entry.phi_dst);
}
}
}
}
if function_params.get(main_func_name).is_none()
&& function_params.get(loop_step_func_name).is_none()
{
// Fallback: Use old behavior (ValueId(0), ValueId(1), ...)
// This handles patterns that don't have loop_step function
if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(loop_var_name) {
// Phase 177-3: Don't override condition_bindings
if !condition_binding_ids.contains(&ValueId(0)) {
remapper.set_value(ValueId(0), phi_dst);
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 33-16 fallback: Override remap ValueId(0) → {:?} (PHI dst)",
phi_dst
),
debug,
);
} else {
trace.stderr_if(
"[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding ValueId(0)",
verbose,
);
}
}
// Phase 177-STRUCT-2: Use carrier_order for deterministic iteration
for (idx, carrier_name) in loop_header_phi_info.carrier_order.iter().enumerate() {
if carrier_name == loop_var_name {
continue;
}
let entry = match loop_header_phi_info.carrier_phis.get(carrier_name) {
Some(e) => e,
None => continue,
};
let join_value_id = ValueId(idx as u32);
// Phase 177-3: Don't override condition_bindings
if !condition_binding_ids.contains(&join_value_id) {
remapper.set_value(join_value_id, entry.phi_dst);
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 33-20 fallback: Override remap {:?}{:?} (carrier '{}' PHI dst)",
join_value_id, entry.phi_dst, carrier_name
),
debug,
);
} else {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding {:?} ('{}')",
join_value_id, carrier_name
),
verbose,
);
}
}
}
// Phase 177-3 DEBUG: Check remapper after Phase 33-21 overrides
trace.stderr_if("[DEBUG-177] === Remapper state after Phase 33-21 ===", verbose);
for binding in &boundary.condition_bindings {
let lookup_result = remapper.get_value(binding.join_value);
trace.stderr_if(
&format!(
"[DEBUG-177] '{}': JoinIR {:?}{:?} (after 33-21)",
binding.name, binding.join_value, lookup_result
),
verbose,
);
}
// Phase 201-A: loop_header_phi_info already built (no assignment needed)
}
}
// Phase 4: Merge blocks and rewrite instructions
// Phase 33-16: Pass mutable loop_header_phi_info for latch_incoming tracking
// Phase 177-3: Pass exit_block_id from allocator to avoid conflicts
let merge_result = instruction_rewriter::merge_and_rewrite(
builder,
mir_module,
&mut remapper,
&value_to_func_name,
&function_params,
boundary,
&mut loop_header_phi_info,
exit_block_id,
debug,
)?;
// Phase 4.5: Finalize loop header PHIs (insert into header block)
//
// By now, instruction_rewriter has set latch_incoming for all carriers.
// We can finalize the PHIs and insert them into the header block.
if !loop_header_phi_info.carrier_phis.is_empty() {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 4.5: Finalizing {} header PHIs",
loop_header_phi_info.carrier_phis.len()
),
debug,
);
LoopHeaderPhiBuilder::finalize(builder, &loop_header_phi_info, debug)?;
}
// Phase 5: Build exit PHI (expr result only, not carrier PHIs)
// Phase 33-20: Carrier PHIs are now taken from header PHI info, not exit block
// Phase 246-EX: REVERT Phase 33-20 - Use EXIT PHI dsts, not header PHI dsts!
let (exit_phi_result_id, exit_carrier_phis) = exit_phi_builder::build_exit_phi(
builder,
merge_result.exit_block_id,
&merge_result.exit_phi_inputs,
&merge_result.carrier_inputs,
debug,
)?;
// Phase 118 P2: Contract check (Fail-Fast) - exit_bindings LoopState carriers must have exit PHIs.
if let Some(boundary) = boundary {
contract_checks::verify_exit_bindings_have_exit_phis(boundary, &exit_carrier_phis)?;
}
// Phase 118 P1: Dev-only carrier-phi SSOT logs (exit_bindings vs carrier_inputs vs exit_carrier_phis)
if crate::config::env::joinir_dev_enabled() {
if let Some(boundary) = boundary {
let exit_binding_names: Vec<&str> = boundary
.exit_bindings
.iter()
.map(|b| b.carrier_name.as_str())
.collect();
let carrier_input_names: Vec<&str> =
merge_result.carrier_inputs.keys().map(|s| s.as_str()).collect();
let exit_phi_names: Vec<&str> =
exit_carrier_phis.keys().map(|s| s.as_str()).collect();
trace.stderr_if(
&format!(
"[joinir/phase118/dev] exit_bindings carriers={:?}",
exit_binding_names
),
true,
);
trace.stderr_if(
&format!(
"[joinir/phase118/dev] carrier_inputs keys={:?}",
carrier_input_names
),
true,
);
trace.stderr_if(
&format!(
"[joinir/phase118/dev] exit_carrier_phis keys={:?}",
exit_phi_names
),
true,
);
}
}
// Phase 246-EX: CRITICAL FIX - Use exit PHI dsts for variable_map reconnection
//
// **Why EXIT PHI, not HEADER PHI?**
//
// Header PHI represents the value at the BEGINNING of each iteration.
// Exit PHI represents the FINAL value when leaving the loop (from any exit path).
//
// For Pattern 2 loops with multiple exit paths (natural exit + break):
// - Header PHI: `%15 = phi [%3, bb7], [%42, bb14]` (loop variable at iteration start)
// - Exit PHI: `%5 = phi [%15, bb11], [%15, bb13]` (final value from exit paths)
//
// When we exit the loop, we want the FINAL value (%5), not the iteration-start value (%15).
// Phase 33-20 incorrectly used header PHI, causing loops to return initial values (e.g., 0 instead of 42).
//
// Example (_atoi):
// - Initial: result=0 (header PHI)
// - After iteration 1: result=4 (updated in loop body)
// - After iteration 2: result=42 (updated in loop body)
// - Exit: Should return 42 (exit PHI), not 0 (header PHI initial value)
//
// The exit PHI correctly merges values from both exit paths, giving us the final result.
let carrier_phis = &exit_carrier_phis;
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 246-EX: Using EXIT PHI dsts for variable_map (not header): {:?}",
carrier_phis
.iter()
.map(|(n, v)| (n.as_str(), v))
.collect::<Vec<_>>()
),
debug && !carrier_phis.is_empty(),
);
// Phase 6: Reconnect boundary (if specified)
// Phase 197-B: Pass remapper to enable per-carrier exit value lookup
// Phase 33-10-Refactor-P3: Delegate to ExitLineOrchestrator
// Phase 246-EX: Now uses EXIT PHI dsts (reverted Phase 33-20)
if let Some(boundary) = boundary {
exit_line::ExitLineOrchestrator::execute(builder, boundary, carrier_phis, debug)?;
}
let exit_block_id = merge_result.exit_block_id;
// Phase 201-A: Get entry block from loop_header_phi_info
// The header_block in loop_header_phi_info is the remapped entry block
let entry_block = loop_header_phi_info.header_block;
trace.stderr_if(
&format!(
"[cf_loop/joinir] Entry block (from loop_header_phi_info): {:?}",
entry_block
),
debug,
);
trace.stderr_if(
&format!(
"[cf_loop/joinir] Current block before emit_jump: {:?}",
builder.current_block
),
debug,
);
trace.stderr_if(
&format!(
"[cf_loop/joinir] Jumping to entry block: {:?}",
entry_block
),
debug,
);
crate::mir::builder::emission::branch::emit_jump(builder, entry_block)?;
trace.stderr_if(
&format!(
"[cf_loop/joinir] After emit_jump, current_block: {:?}",
builder.current_block
),
debug,
);
// Switch to exit block for subsequent code
builder.start_new_block(exit_block_id)?;
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 189: Merge complete: {} functions merged, continuing from {:?}",
mir_module.functions.len(),
exit_block_id
),
debug,
);
// Phase 200-3: Verify JoinIR contracts (debug only)
#[cfg(debug_assertions)]
{
if let Some(boundary) = boundary {
if let Some(ref func) = builder.scope_ctx.current_function {
verify_joinir_contracts(
func,
entry_block,
exit_block_id,
&loop_header_phi_info,
boundary,
);
}
trace.stderr_if(
"[cf_loop/joinir] Phase 200-3: Contract verification passed",
debug,
);
}
}
// Phase 201-A: Clear reserved ValueIds after merge completes
// Future loops will set their own reserved IDs
if !builder.comp_ctx.reserved_value_ids.is_empty() {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 201-A: Clearing reserved_value_ids (was {:?})",
builder.comp_ctx.reserved_value_ids
),
debug,
);
builder.comp_ctx.reserved_value_ids.clear();
}
// Phase 246-EX-FIX: Handle loop variable expr_result separately from carrier expr_result
//
// The loop variable (e.g., 'i') is returned via exit_phi_result_id, not carrier_phis.
// Other carriers use carrier_phis. We need to check which case we're in.
let expr_result_value = if let Some(b) = boundary {
if let Some(expr_result_id) = b.expr_result {
// Check if expr_result is the loop variable
if let Some(loop_var_name) = &b.loop_var_name {
// Find the exit binding for the loop variable
let loop_var_binding = b
.exit_bindings
.iter()
.find(|binding| binding.carrier_name == *loop_var_name);
if let Some(binding) = loop_var_binding {
if binding.join_exit_value == expr_result_id {
// expr_result is the loop variable! Use exit_phi_result_id
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 246-EX-FIX: expr_result {:?} is loop variable '{}', using exit_phi_result_id {:?}",
expr_result_id, loop_var_name, exit_phi_result_id
),
debug,
);
exit_phi_result_id
} else {
// expr_result is not the loop variable, resolve as carrier
expr_result_resolver::ExprResultResolver::resolve(
Some(expr_result_id),
b.exit_bindings.as_slice(),
&carrier_phis,
&remapper,
debug,
)?
}
} else {
// No loop variable binding, resolve normally
expr_result_resolver::ExprResultResolver::resolve(
Some(expr_result_id),
b.exit_bindings.as_slice(),
&carrier_phis,
&remapper,
debug,
)?
}
} else {
// No loop variable name, resolve normally
expr_result_resolver::ExprResultResolver::resolve(
Some(expr_result_id),
b.exit_bindings.as_slice(),
&carrier_phis,
&remapper,
debug,
)?
}
} else {
None
}
} else {
None
};
// Return expr_result if present, otherwise fall back to exit_phi_result_id
if let Some(resolved) = expr_result_value {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 246-EX-FIX: Returning expr_result_value {:?}",
resolved
),
debug,
);
Ok(Some(resolved))
} else {
// Fallback: return exit_phi_result_id (for legacy patterns or carrier-only loops)
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 221-R: Returning exit_phi_result_id (fallback): {:?}",
exit_phi_result_id
),
debug && exit_phi_result_id.is_some(),
);
Ok(exit_phi_result_id)
}
}
/// Phase 3: Allocate new ValueIds for all collected values
///
/// Phase 201-A: Accept reserved ValueIds that must not be reused.
/// These are PHI dst ValueIds that will be created by LoopHeaderPhiBuilder.
/// We must skip these IDs to prevent carrier value corruption.
fn remap_values(
builder: &mut crate::mir::builder::MirBuilder,
used_values: &std::collections::BTreeSet<ValueId>,
remapper: &mut crate::mir::builder::joinir_id_remapper::JoinIrIdRemapper,
reserved_ids: &std::collections::HashSet<ValueId>,
debug: bool,
) -> Result<(), String> {
let trace = trace::trace();
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 3: Remapping {} ValueIds (reserved: {})",
used_values.len(),
reserved_ids.len()
),
debug,
);
for old_value in used_values {
// Phase 201-A: Allocate new ValueId, skipping reserved PHI dsts
let new_value = loop {
let candidate = builder.next_value_id();
if !reserved_ids.contains(&candidate) {
break candidate;
}
// Skip reserved ID - will try next one
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 201-A: Skipping reserved PHI dst {:?}",
candidate
),
debug,
);
};
remapper.set_value(*old_value, new_value);
trace.stderr_if(
&format!(
"[cf_loop/joinir] Value remap: {:?}{:?}",
old_value, new_value
),
debug,
);
}
Ok(())
}
/// Verify that PHI dst values are not overwritten by later instructions (Phase 204-2)
///
/// # Checks
///
/// 1. PHI instructions define dst values in header block
/// 2. No subsequent instruction in the same block overwrites these dsts
///
/// # Rationale
///
/// PHI dst overwrite violates SSA invariant (single assignment) and causes undefined behavior.
/// While Phase 201 JoinValueSpace prevents ValueId collisions, manual coding errors can still
/// occur during pattern lowering.
///
/// # Panics
///
/// Panics in debug mode if PHI dst is overwritten by a later instruction.
#[cfg(debug_assertions)]
fn verify_no_phi_dst_overwrite(
func: &crate::mir::MirFunction,
header_block: crate::mir::BasicBlockId,
loop_info: &LoopHeaderPhiInfo,
) {
if loop_info.carrier_phis.is_empty() {
return; // No PHIs to verify
}
let header_block_data = func.blocks.get(&header_block).unwrap_or_else(|| {
panic!(
"[JoinIRVerifier] Header block {} not found ({} blocks in func)",
header_block,
func.blocks.len()
)
});
// 1. Collect all PHI dsts
let phi_dsts: std::collections::HashSet<crate::mir::ValueId> = loop_info
.carrier_phis
.values()
.map(|entry| entry.phi_dst)
.collect();
// 2. Check instructions after PHI definitions
let mut after_phis = false;
for instr in &header_block_data.instructions {
match instr {
crate::mir::MirInstruction::Phi { dst, .. } => {
// PHI instructions come first in basic block
if after_phis {
panic!(
"[JoinIRVerifier] PHI instruction {:?} appears after non-PHI instructions in block {}",
dst, header_block.0
);
}
}
_ => {
after_phis = true;
// Check if this instruction writes to a PHI dst
if let Some(dst) = get_instruction_dst(instr) {
if phi_dsts.contains(&dst) {
panic!(
"[JoinIRVerifier/Phase204] PHI dst {:?} is overwritten by instruction in header block {}: {:?}",
dst, header_block.0, instr
);
}
}
}
}
}
}
/// Helper: Extract dst ValueId from MirInstruction (Phase 204-2)
#[cfg(debug_assertions)]
fn get_instruction_dst(instr: &crate::mir::MirInstruction) -> Option<crate::mir::ValueId> {
use crate::mir::MirInstruction;
match instr {
// Instructions with always-present dst
MirInstruction::Const { dst, .. }
| MirInstruction::Load { dst, .. }
| MirInstruction::UnaryOp { dst, .. }
| MirInstruction::BinOp { dst, .. }
| MirInstruction::Compare { dst, .. }
| MirInstruction::TypeOp { dst, .. }
| MirInstruction::NewBox { dst, .. }
| MirInstruction::NewClosure { dst, .. }
| MirInstruction::Copy { dst, .. }
| MirInstruction::Cast { dst, .. }
| MirInstruction::TypeCheck { dst, .. }
| MirInstruction::Phi { dst, .. }
| MirInstruction::ArrayGet { dst, .. }
| MirInstruction::RefNew { dst, .. }
| MirInstruction::RefGet { dst, .. }
| MirInstruction::WeakNew { dst, .. }
| MirInstruction::WeakLoad { dst, .. }
| MirInstruction::WeakRef { dst, .. }
| MirInstruction::FutureNew { dst, .. }
| MirInstruction::Await { dst, .. } => Some(*dst),
// Instructions with Option<ValueId> dst
MirInstruction::BoxCall { dst, .. }
| MirInstruction::ExternCall { dst, .. }
| MirInstruction::Call { dst, .. }
| MirInstruction::PluginInvoke { dst, .. } => *dst,
// Instructions without dst (side-effects only)
_ => None,
}
}
/// Verify PHI inputs are defined (Phase 204-3 - Conservative sanity checks)
///
/// # Checks
///
/// 1. PHI inputs have reasonable ValueId values (< threshold)
/// 2. No obviously undefined values (e.g., suspiciously large IDs)
///
/// # Note
///
/// Full data-flow analysis (DFA) verification is deferred to Phase 205+.
/// This function only performs conservative sanity checks.
///
/// # Panics
///
/// Panics in debug mode if suspicious PHI inputs are detected.
#[cfg(debug_assertions)]
fn verify_phi_inputs_defined(
func: &crate::mir::MirFunction,
header_block: crate::mir::BasicBlockId,
) {
let header_block_data = func.blocks.get(&header_block).unwrap_or_else(|| {
panic!(
"[JoinIRVerifier] Header block {} not found ({} blocks in func)",
header_block,
func.blocks.len()
)
});
for instr in &header_block_data.instructions {
if let crate::mir::MirInstruction::Phi {
dst,
inputs,
type_hint: _,
} = instr
{
for (value_id, pred_block) in inputs {
// Conservative sanity check: ValueId should not be suspiciously large
// Phase 201 JoinValueSpace uses regions:
// - PHI Reserved: 0-99
// - Param: 100-999
// - Local: 1000+
// - Reasonable max: 100000 (arbitrary but catches obvious bugs)
if value_id.0 >= 100000 {
panic!(
"[JoinIRVerifier/Phase204-3] PHI {:?} has suspiciously large input {:?} from predecessor block {:?}",
dst, value_id, pred_block
);
}
}
}
}
}
/// Verify all loop contracts for a merged JoinIR function
///
/// This is the main entry point for verification. It runs all checks
/// and panics if any contract violation is found.
///
/// # Panics
///
/// Panics in debug mode if any contract violation is detected.
#[cfg(debug_assertions)]
fn verify_joinir_contracts(
func: &crate::mir::MirFunction,
header_block: crate::mir::BasicBlockId,
exit_block: crate::mir::BasicBlockId,
loop_info: &LoopHeaderPhiInfo,
boundary: &JoinInlineBoundary,
) {
// Phase 135 P1 Step 1: Verify condition_bindings consistency (before merge)
contract_checks::verify_condition_bindings_consistent(boundary);
contract_checks::verify_loop_header_phis(func, header_block, loop_info, boundary);
verify_no_phi_dst_overwrite(func, header_block, loop_info); // Phase 204-2
verify_phi_inputs_defined(func, header_block); // Phase 204-3
contract_checks::verify_exit_line(func, exit_block, boundary);
contract_checks::verify_valueid_regions(loop_info, boundary); // Phase 205-4
// Phase 135 P1 Step 2: Verify header PHI dsts not redefined (after merge)
let phi_dsts: std::collections::HashSet<_> = loop_info
.carrier_phis
.values()
.map(|entry| entry.phi_dst)
.collect();
contract_checks::verify_header_phi_dsts_not_redefined(func, header_block, &phi_dsts);
}