fix(joinir): Phase 177-3 ValueId collision fix for multi-carrier loops

Root cause: JoinIR ValueId collision between function parameters and condition bindings
- Same ValueId used for both `result_init` (carrier param) and `limit` (condition var)
- Phase 33-21 was overwriting condition bindings when remapping carrier PHIs

Fix implemented (Option B - immediate protection):
1. Phase 177-3: Protect condition-only variables from Phase 33-21 override
   - Collect condition_bindings that are NOT carriers (by checking exit_bindings)
   - Skip remapping for these protected ValueIds
2. Phase 177-3-B: Handle body-only carriers explicitly
   - Carriers that appear in condition_bindings (added by Phase 176-5)
   - Map them to correct PHI dsts by name lookup

Investigation tools added:
- [DEBUG-177] trace logs for remapper state tracking
- Phase 177-3 protection logging
- BoundaryInjector PHI collision detection

Test results:
-  Integer multi-carrier test: Output 3 (expected)
- ⚠️ String test: RC=0 but empty output (separate issue - string concat emit)

Design docs created:
- phase177-parse-string-design.md: _parse_string loop analysis
- phase177-carrier-evolution.md: Carrier progression Phase 174-179

Next: Investigate string concatenation emit for full _parse_string support
This commit is contained in:
nyash-codex
2025-12-08 16:34:04 +09:00
parent 99d329096f
commit 7a01ffe522
6 changed files with 646 additions and 40 deletions

View File

@ -12,15 +12,20 @@ use super::super::trace;
/// Phase 1: Allocate new block IDs for ALL functions (Phase 189)
///
/// DETERMINISM: Sort functions and blocks by name/ID to ensure consistent iteration order
///
/// Returns: (JoinIrIdRemapper, exit_block_id)
pub(super) fn allocate_blocks(
builder: &mut crate::mir::builder::MirBuilder,
mir_module: &MirModule,
debug: bool,
) -> Result<JoinIrIdRemapper, String> {
) -> Result<(JoinIrIdRemapper, crate::mir::BasicBlockId), String> {
let mut remapper = JoinIrIdRemapper::new();
// Create exit block for Return conversion (single for all functions)
let _exit_block_id = builder.block_gen.next();
// Phase 177-3: Allocate exit block FIRST to ensure it doesn't conflict with JoinIR blocks
// This exit_block_id will be returned and used by instruction_rewriter and exit_phi_builder
let exit_block_id = builder.block_gen.next();
eprintln!("[cf_loop/joinir/block_allocator] Phase 177-3: Allocated exit_block_id = {:?}", exit_block_id);
// Phase 195: Use unified trace
trace::trace().blocks("allocator", "Phase 189: Allocating block IDs for all functions");
@ -61,5 +66,6 @@ pub(super) fn allocate_blocks(
);
}
Ok(remapper)
// Phase 177-3: Return both remapper and exit_block_id
Ok((remapper, exit_block_id))
}

View File

@ -30,6 +30,11 @@ use std::collections::HashMap;
/// Uses extracted modules:
/// - tail_call_classifier: TailCallKind classification
/// - merge_result: MergeResult data structure
///
/// # Phase 177-3
///
/// `exit_block_id` is now passed in from block_allocator to ensure it doesn't
/// conflict with JoinIR function blocks.
pub(super) fn merge_and_rewrite(
builder: &mut crate::mir::builder::MirBuilder,
mir_module: &MirModule,
@ -38,13 +43,11 @@ pub(super) fn merge_and_rewrite(
function_params: &HashMap<String, Vec<ValueId>>,
boundary: Option<&JoinInlineBoundary>,
loop_header_phi_info: &mut LoopHeaderPhiInfo,
exit_block_id: BasicBlockId,
debug: bool,
) -> Result<MergeResult, String> {
// Create exit block for Return conversion (single for all functions)
let exit_block_id = builder.block_gen.next();
if debug {
eprintln!("[cf_loop/joinir] Exit block: {:?}", exit_block_id);
}
)-> Result<MergeResult, String> {
// Phase 177-3: exit_block_id is now passed in from block_allocator
eprintln!("[cf_loop/joinir/instruction_rewriter] Phase 177-3: Using exit_block_id = {:?}", exit_block_id);
// Phase 189 FIX: Build set of boundary join_inputs to skip their Const initializers
let boundary_input_set: std::collections::HashSet<ValueId> = boundary
@ -172,8 +175,58 @@ pub(super) fn merge_and_rewrite(
let mut found_tail_call = false;
let mut tail_call_target: Option<(BasicBlockId, Vec<ValueId>)> = None;
// Phase 177-3: Check if this block is the loop header with PHI nodes
let is_loop_header_with_phi = is_loop_entry_point && !loop_header_phi_info.carrier_phis.is_empty();
if is_loop_entry_point {
eprintln!(
"[cf_loop/joinir] Phase 177-3 DEBUG: is_loop_entry_point={}, carrier_phis.len()={}, is_loop_header_with_phi={}",
is_loop_entry_point,
loop_header_phi_info.carrier_phis.len(),
is_loop_header_with_phi
);
}
// Phase 177-3: Collect PHI dst IDs that will be created for this block
// (if this is the loop header)
let phi_dst_ids_for_block: std::collections::HashSet<ValueId> = if is_loop_header_with_phi {
loop_header_phi_info.carrier_phis.values()
.map(|entry| entry.phi_dst)
.collect()
} else {
std::collections::HashSet::new()
};
if is_loop_header_with_phi && !phi_dst_ids_for_block.is_empty() {
eprintln!(
"[cf_loop/joinir] Phase 177-3: Loop header with {} PHI dsts to protect: {:?}",
phi_dst_ids_for_block.len(),
phi_dst_ids_for_block
);
}
// First pass: Process all instructions, identify tail calls
for inst in &old_block.instructions {
// Phase 177-3: Skip Copy instructions that would overwrite PHI dsts
// The loop header PHIs will provide these values, so copies are redundant/harmful
if is_loop_header_with_phi {
if let MirInstruction::Copy { dst, src } = inst {
// Check if this copy's dst is a PHI dst (after remapping)
let dst_remapped = remapper.get_value(*dst).unwrap_or(*dst);
eprintln!(
"[cf_loop/joinir] Phase 177-3 DEBUG: Copy {:?} = {:?}, dst_remapped = {:?}, in phi_dsts = {}",
dst, src, dst_remapped, phi_dst_ids_for_block.contains(&dst_remapped)
);
if phi_dst_ids_for_block.contains(&dst_remapped) {
eprintln!(
"[cf_loop/joinir] Phase 177-3: ✅ Skipping loop header Copy to PHI dst {:?} (original {:?})",
dst_remapped, dst
);
continue; // Skip - would overwrite PHI
}
}
}
// Phase 189: Skip Const String instructions that define function names
if let MirInstruction::Const { dst, value } = inst {
if let crate::mir::types::ConstValue::String(_) = value {
@ -653,15 +706,30 @@ pub(super) fn merge_and_rewrite(
}
}
// Phase 177-3: Collect PHI dst IDs from loop_header_phi_info
let phi_dst_ids: std::collections::HashSet<ValueId> = loop_header_phi_info
.carrier_phis
.values()
.map(|entry| entry.phi_dst)
.collect();
// Use BoundaryInjector to inject Copy instructions
// Phase 177-3 Option B: Returns reallocations map for condition_bindings with PHI collisions
if let Some(ref mut current_func) = builder.current_function {
BoundaryInjector::inject_boundary_copies(
let _reallocations = BoundaryInjector::inject_boundary_copies(
current_func,
entry_block_remapped,
boundary,
&value_map_for_injector,
&phi_dst_ids,
debug,
)?;
// Note: reallocations map is returned but not currently used.
// If condition variables need to be referenced after this point,
// the reallocations map would need to be applied to update references.
// For now, condition_bindings are read-only variables used only in
// the loop condition, so no further updates are needed.
}
}

View File

@ -80,7 +80,8 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
}
// Phase 1: Allocate block IDs for all functions
let mut remapper = block_allocator::allocate_blocks(builder, mir_module, debug)?;
// Phase 177-3: block_allocator now returns exit_block_id to avoid conflicts
let (mut remapper, exit_block_id) = block_allocator::allocate_blocks(builder, mir_module, debug)?;
// Phase 2: Collect values from all functions
let (mut used_values, value_to_func_name, function_params) =
@ -113,6 +114,30 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
// Phase 3: Remap ValueIds
remap_values(builder, &used_values, &mut remapper, debug)?;
// Phase 177-3 DEBUG: Verify remapper state after Phase 3
eprintln!("[DEBUG-177] === Remapper state after Phase 3 ===");
eprintln!("[DEBUG-177] used_values count: {}", used_values.len());
for value_id in &used_values {
if let Some(remapped) = remapper.get_value(*value_id) {
eprintln!("[DEBUG-177] JoinIR {:?} → Host {:?}", value_id, remapped);
} else {
eprintln!("[DEBUG-177] JoinIR {:?} → NOT FOUND ❌", value_id);
}
}
// Check condition_bindings specifically
if let Some(boundary) = boundary {
eprintln!("[DEBUG-177] === Condition bindings check ===");
for binding in &boundary.condition_bindings {
let lookup_result = remapper.get_value(binding.join_value);
eprintln!(
"[DEBUG-177] '{}': JoinIR {:?}{:?}",
binding.name, binding.join_value, lookup_result
);
}
}
eprintln!("[DEBUG-177] ==============================");
// Phase 3.5: Build loop header PHIs (if loop pattern with loop_var_name)
//
// We need to know PHI dsts before instruction_rewriter runs, so that:
@ -215,9 +240,47 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
// 2. loop body uses PHI result
// 3. tail call args are correctly routed
// Map main's parameters
// MIR function keys use join_func_N format from join_func_name()
// Phase 177-3 fix: Protect condition-ONLY bindings from being overridden to PHI dsts
//
// Problem: condition_bindings may contain:
// 1. True condition-only variables (e.g., 'limit' in loop(i < limit)) - NOT carriers
// 2. Body-only carriers added by Phase 176-5 (e.g., 'result') - ARE carriers
//
// We must ONLY protect (1), not (2), because:
// - Condition-only vars should keep their HOST mapping (e.g., limit = %8)
// - Body-only carriers MUST be remapped to PHI dsts (e.g., result = %24)
//
// Solution: Protect condition_bindings that are NOT in exit_bindings (i.e., not carriers)
let carrier_names: std::collections::HashSet<&str> = boundary
.exit_bindings
.iter()
.map(|eb| eb.carrier_name.as_str())
.collect();
let condition_binding_ids: std::collections::HashSet<ValueId> = boundary
.condition_bindings
.iter()
.filter(|cb| !carrier_names.contains(cb.name.as_str()))
.map(|cb| cb.join_value)
.collect();
if !condition_binding_ids.is_empty() {
eprintln!(
"[cf_loop/joinir] Phase 177-3: Protected ValueIds (condition-only, not carriers): {:?}",
condition_binding_ids
);
for cb in &boundary.condition_bindings {
let is_carrier = carrier_names.contains(cb.name.as_str());
eprintln!(
"[cf_loop/joinir] Phase 177-3: '{}': JoinIR {:?} (carrier={})",
cb.name, cb.join_value, is_carrier
);
}
}
let main_func_name = "join_func_0";
let loop_step_func_name = "join_func_1";
if function_params.get(main_func_name).is_none() {
eprintln!(
"[cf_loop/joinir] WARNING: function_params.get('{}') returned None. Available keys: {:?}",
@ -226,30 +289,57 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
);
}
if let Some(main_params) = function_params.get(main_func_name) {
if debug {
eprintln!(
"[cf_loop/joinir] Phase 33-21: main ({}) params: {:?}",
main_func_name, main_params
);
}
eprintln!(
"[DEBUG-177] Phase 33-21: main ({}) params: {:?}",
main_func_name, main_params
);
eprintln!(
"[DEBUG-177] Phase 33-21: carrier_phis count: {}, names: {:?}",
phi_info.carrier_phis.len(),
phi_info.carrier_phis.iter().map(|(n, _)| n.as_str()).collect::<Vec<_>>()
);
// Map main's parameters to header PHI dsts
// main params: [i_init, carrier1_init, ...]
// carrier_phis: [("i", entry), ("sum", entry), ...]
for (idx, (carrier_name, entry)) in phi_info.carrier_phis.iter().enumerate() {
if let Some(&main_param) = main_params.get(idx) {
if debug {
// Phase 177-3: Don't override condition_bindings
if condition_binding_ids.contains(&main_param) {
eprintln!(
"[cf_loop/joinir] Phase 33-21: REMAP main param {:?} {:?} ('{}')",
main_param, entry.phi_dst, carrier_name
"[cf_loop/joinir] Phase 177-3: Skipping override for condition_binding {:?} ('{}')",
main_param, carrier_name
);
continue;
}
eprintln!(
"[DEBUG-177] Phase 33-21: REMAP main param[{}] {:?}{:?} ('{}')",
idx, main_param, entry.phi_dst, carrier_name
);
remapper.set_value(main_param, entry.phi_dst);
}
}
}
// Phase 177-3-B: Handle body-only carriers
// These are carriers in carrier_phis that are NOT in main function params.
// They appear in condition_bindings (added by Phase 176-5) but need PHI remapping.
for (carrier_name, entry) in &phi_info.carrier_phis {
// Check if this carrier has a condition_binding
if let Some(binding) = boundary.condition_bindings.iter().find(|cb| cb.name == *carrier_name) {
// Skip if it's a true condition-only variable (already protected above)
if condition_binding_ids.contains(&binding.join_value) {
continue;
}
// This is a body-only carrier - remap it to PHI dst
eprintln!(
"[cf_loop/joinir] Phase 177-3-B: Body-only carrier '{}': JoinIR {:?} → PHI {:?}",
carrier_name, binding.join_value, entry.phi_dst
);
remapper.set_value(binding.join_value, entry.phi_dst);
}
}
// Map loop_step's parameters
let loop_step_func_name = "join_func_1";
if debug {
eprintln!(
"[cf_loop/joinir] Phase 33-21: function_params keys: {:?}",
@ -275,6 +365,14 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
// carrier_phis: [("i", entry), ("sum", entry), ...]
for (idx, (carrier_name, entry)) in phi_info.carrier_phis.iter().enumerate() {
if let Some(&loop_step_param) = loop_step_params.get(idx) {
// Phase 177-3: Don't override condition_bindings
if condition_binding_ids.contains(&loop_step_param) {
eprintln!(
"[cf_loop/joinir] Phase 177-3: Skipping override for condition_binding {:?} ('{}')",
loop_step_param, carrier_name
);
continue;
}
if debug {
eprintln!(
"[cf_loop/joinir] Phase 33-21: REMAP loop_step param {:?}{:?} ('{}')",
@ -290,11 +388,18 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
// Fallback: Use old behavior (ValueId(0), ValueId(1), ...)
// This handles patterns that don't have loop_step function
if let Some(phi_dst) = phi_info.get_carrier_phi(loop_var_name) {
remapper.set_value(ValueId(0), phi_dst);
if debug {
// Phase 177-3: Don't override condition_bindings
if !condition_binding_ids.contains(&ValueId(0)) {
remapper.set_value(ValueId(0), phi_dst);
if debug {
eprintln!(
"[cf_loop/joinir] Phase 33-16 fallback: Override remap ValueId(0) → {:?} (PHI dst)",
phi_dst
);
}
} else {
eprintln!(
"[cf_loop/joinir] Phase 33-16 fallback: Override remap ValueId(0) → {:?} (PHI dst)",
phi_dst
"[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding ValueId(0)"
);
}
}
@ -303,16 +408,34 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
continue;
}
let join_value_id = ValueId(idx as u32);
remapper.set_value(join_value_id, entry.phi_dst);
if debug {
// Phase 177-3: Don't override condition_bindings
if !condition_binding_ids.contains(&join_value_id) {
remapper.set_value(join_value_id, entry.phi_dst);
if debug {
eprintln!(
"[cf_loop/joinir] Phase 33-20 fallback: Override remap {:?}{:?} (carrier '{}' PHI dst)",
join_value_id, entry.phi_dst, carrier_name
);
}
} else {
eprintln!(
"[cf_loop/joinir] Phase 33-20 fallback: Override remap {:?}{:?} (carrier '{}' PHI dst)",
join_value_id, entry.phi_dst, carrier_name
"[cf_loop/joinir] Phase 177-3 fallback: Skipping override for condition_binding {:?} ('{}')",
join_value_id, carrier_name
);
}
}
}
// Phase 177-3 DEBUG: Check remapper after Phase 33-21 overrides
eprintln!("[DEBUG-177] === Remapper state after Phase 33-21 ===");
for binding in &boundary.condition_bindings {
let lookup_result = remapper.get_value(binding.join_value);
eprintln!(
"[DEBUG-177] '{}': JoinIR {:?}{:?} (after 33-21)",
binding.name, binding.join_value, lookup_result
);
}
phi_info
} else {
LoopHeaderPhiInfo::empty(entry_block_remapped)
@ -323,6 +446,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
// Phase 4: Merge blocks and rewrite instructions
// Phase 33-16: Pass mutable loop_header_phi_info for latch_incoming tracking
// Phase 177-3: Pass exit_block_id from allocator to avoid conflicts
let merge_result = instruction_rewriter::merge_and_rewrite(
builder,
mir_module,
@ -331,6 +455,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
&function_params,
boundary,
&mut loop_header_phi_info,
exit_block_id,
debug,
)?;