refactor(joinir): make jump_args layout explicit (Phase 256)

This commit is contained in:
2025-12-20 13:04:24 +09:00
parent 1028bd419c
commit 4439d64da3
21 changed files with 715 additions and 195 deletions

View File

@ -19,13 +19,14 @@
//! &remapped_args,
//! block_id,
//! strict_mode,
//! boundary.jump_args_layout,
//! )?;
//! exit_phi_inputs.push((result.block_id, result.expr_result_value));
//! carrier_inputs.extend(result.carrier_values);
//! ```
use crate::mir::join_ir::lowering::carrier_info::CarrierRole;
use crate::mir::join_ir::lowering::inline_boundary::LoopExitBinding;
use crate::mir::join_ir::lowering::inline_boundary::{JumpArgsLayout, LoopExitBinding};
use crate::mir::{BasicBlockId, ValueId};
use std::collections::BTreeMap;
@ -51,8 +52,9 @@ pub struct ExitArgsCollectionResult {
/// # Phase 118 P2 Contract
///
/// - Every LoopState carrier in exit_bindings must have an exit PHI input
/// - jump_args order is assumed to match exit_bindings order, with at most one
/// leading extra slot (legacy layouts)
/// - jump_args order is assumed to match exit_bindings order
/// - Layout is enforced via JumpArgsLayout (no inference)
/// - Extra trailing args are treated as invariants and ignored
/// - This avoids Pattern3-specific assumptions such as "jump_args[0] is loop_var"
///
/// # Fail-Fast Guarantee
@ -76,6 +78,7 @@ impl ExitArgsCollectorBox {
/// * `remapped_args` - Remapped jump_args from JoinIR block (already in host value space)
/// * `block_id` - Source block ID for PHI inputs
/// * `strict_exit` - If true, Fail-Fast on any validation error
/// * `layout` - jump_args layout policy (SSOT from boundary)
///
/// # Returns
///
@ -84,16 +87,17 @@ impl ExitArgsCollectorBox {
///
/// # Phase 118 P2: SSOT Offset Calculation
///
/// The offset is determined by comparing jump_args length with exit_bindings:
/// - `len(jump_args) == len(exit_phi_bindings)`: offset = 0
/// - `len(jump_args) == len(exit_phi_bindings) + 1`: offset = 1 (legacy layout)
/// - Otherwise: error (or warning in non-strict mode)
/// The offset is determined by JumpArgsLayout:
/// - `CarriersOnly`: offset = 0
/// - `ExprResultPlusCarriers`: offset = 1
/// - Length mismatches are errors (or warnings in non-strict mode)
pub fn collect(
&self,
exit_bindings: &[LoopExitBinding],
remapped_args: &[ValueId],
block_id: BasicBlockId,
strict_exit: bool,
layout: JumpArgsLayout,
) -> Result<ExitArgsCollectionResult, String> {
// Filter exit bindings to get only LoopState carriers (skip ConditionOnly)
let exit_phi_bindings: Vec<_> = exit_bindings
@ -111,15 +115,11 @@ impl ExitArgsCollectorBox {
}
// Phase 118 P2: Calculate offset (SSOT)
let offset = self.calculate_offset(
remapped_args.len(),
exit_phi_bindings.len(),
block_id,
strict_exit,
)?;
let offset =
self.calculate_offset(remapped_args.len(), exit_phi_bindings.len(), block_id, strict_exit, layout)?;
// Collect expr result (first jump arg if offset > 0)
let expr_result_value = if offset > 0 {
let expr_result_value = if layout == JumpArgsLayout::ExprResultPlusCarriers && offset > 0 {
remapped_args.first().copied()
} else {
None
@ -168,14 +168,9 @@ impl ExitArgsCollectorBox {
exit_phi_bindings_len: usize,
block_id: BasicBlockId,
strict_exit: bool,
layout: JumpArgsLayout,
) -> Result<usize, String> {
if jump_args_len == exit_phi_bindings_len {
// Direct mapping: jump_args[i] = exit_phi_bindings[i]
Ok(0)
} else if jump_args_len == exit_phi_bindings_len + 1 {
// Legacy layout: jump_args[0] is expr result, jump_args[1..] are carriers
Ok(1)
} else if jump_args_len < exit_phi_bindings_len {
if jump_args_len < exit_phi_bindings_len {
// Too short - missing carriers
let msg = format!(
"[joinir/exit-line] jump_args too short: need {} carrier args (from exit_bindings) but got {} in block {:?}",
@ -188,15 +183,43 @@ impl ExitArgsCollectorBox {
Ok(0) // Best effort: try direct mapping
}
} else {
// Too long - extra args beyond carriers (e.g., invariants in Pattern 7)
// Phase 256 P1.8: Allow excess args as long as we have enough for carriers
// Direct mapping: jump_args[0..N] = exit_phi_bindings[0..N], rest ignored
#[cfg(debug_assertions)]
eprintln!(
"[joinir/exit-line] jump_args has {} extra args (block {:?}), ignoring invariants",
jump_args_len - exit_phi_bindings_len, block_id
);
Ok(0) // Direct mapping: first N args are carriers
match layout {
JumpArgsLayout::CarriersOnly => {
if jump_args_len > exit_phi_bindings_len {
#[cfg(debug_assertions)]
eprintln!(
"[joinir/exit-line] jump_args has {} extra args (block {:?}), ignoring invariants",
jump_args_len - exit_phi_bindings_len,
block_id
);
}
Ok(0)
}
JumpArgsLayout::ExprResultPlusCarriers => {
if jump_args_len >= exit_phi_bindings_len + 1 {
if jump_args_len > exit_phi_bindings_len + 1 {
#[cfg(debug_assertions)]
eprintln!(
"[joinir/exit-line] jump_args has {} extra args (block {:?}), ignoring invariants after expr_result",
jump_args_len - (exit_phi_bindings_len + 1),
block_id
);
}
Ok(1)
} else {
let msg = format!(
"[joinir/exit-line] expr_result layout requires leading slot: carriers={} args={} in block {:?}",
exit_phi_bindings_len, jump_args_len, block_id
);
if strict_exit {
Err(msg)
} else {
eprintln!("[DEBUG-177] {}", msg);
Ok(0)
}
}
}
}
}
}
@ -245,7 +268,13 @@ mod tests {
make_binding("count", CarrierRole::LoopState),
];
let args = vec![ValueId(10), ValueId(20)];
let result = collector.collect(&bindings, &args, BasicBlockId(1), true);
let result = collector.collect(
&bindings,
&args,
BasicBlockId(1),
true,
JumpArgsLayout::CarriersOnly,
);
assert!(result.is_ok());
let res = result.unwrap();
@ -261,7 +290,13 @@ mod tests {
let collector = ExitArgsCollectorBox::new();
let bindings = vec![make_binding("sum", CarrierRole::LoopState)];
let args = vec![ValueId(5), ValueId(10)]; // [expr_result, carrier]
let result = collector.collect(&bindings, &args, BasicBlockId(1), true);
let result = collector.collect(
&bindings,
&args,
BasicBlockId(1),
true,
JumpArgsLayout::ExprResultPlusCarriers,
);
assert!(result.is_ok());
let res = result.unwrap();
@ -278,7 +313,13 @@ mod tests {
make_binding("is_digit", CarrierRole::ConditionOnly), // Skip
];
let args = vec![ValueId(10)]; // Only one arg for LoopState carrier
let result = collector.collect(&bindings, &args, BasicBlockId(1), true);
let result = collector.collect(
&bindings,
&args,
BasicBlockId(1),
true,
JumpArgsLayout::CarriersOnly,
);
assert!(result.is_ok());
let res = result.unwrap();
@ -293,7 +334,13 @@ mod tests {
make_binding("count", CarrierRole::LoopState),
];
let args = vec![ValueId(10)]; // Missing one carrier
let result = collector.collect(&bindings, &args, BasicBlockId(1), true);
let result = collector.collect(
&bindings,
&args,
BasicBlockId(1),
true,
JumpArgsLayout::CarriersOnly,
);
assert!(result.is_err());
assert!(result.unwrap_err().contains("too short"));
@ -307,7 +354,13 @@ mod tests {
make_binding("count", CarrierRole::LoopState),
];
let args = vec![ValueId(10)]; // Missing one carrier
let result = collector.collect(&bindings, &args, BasicBlockId(1), false);
let result = collector.collect(
&bindings,
&args,
BasicBlockId(1),
false,
JumpArgsLayout::CarriersOnly,
);
// Non-strict mode: succeeds with warning
assert!(result.is_ok());
@ -328,4 +381,30 @@ mod tests {
assert_eq!(map["sum"].len(), 2);
assert_eq!(map["count"].len(), 1);
}
#[test]
fn test_collect_extra_invariants_no_expr_result() {
let collector = ExitArgsCollectorBox::new();
let bindings = vec![
make_binding("i", CarrierRole::LoopState),
make_binding("start", CarrierRole::LoopState),
make_binding("result", CarrierRole::LoopState),
];
let args = vec![ValueId(1), ValueId(2), ValueId(3), ValueId(99)]; // extra invariant
let result = collector.collect(
&bindings,
&args,
BasicBlockId(1),
true,
JumpArgsLayout::CarriersOnly,
);
assert!(result.is_ok());
let res = result.unwrap();
assert_eq!(res.expr_result_value, None);
assert_eq!(res.carrier_values.len(), 3);
assert_eq!(res.carrier_values[0].1, (BasicBlockId(1), ValueId(1)));
assert_eq!(res.carrier_values[1].1, (BasicBlockId(1), ValueId(2)));
assert_eq!(res.carrier_values[2].1, (BasicBlockId(1), ValueId(3)));
}
}

View File

@ -366,6 +366,7 @@ pub(super) fn merge_and_rewrite(
// We skip merging continuation functions, so any tail-call to k_exit must be
// lowered as an exit jump to `exit_block_id` (and contribute exit values).
let mut k_exit_lowering_decision: Option<LoweringDecision> = None;
let mut k_exit_jump_args: Option<Vec<ValueId>> = None;
// Phase 177-3: Check if this block is the loop header with PHI nodes
let is_loop_header_with_phi =
@ -462,6 +463,7 @@ pub(super) fn merge_and_rewrite(
if let Some(decision) = tail_call_policy.classify_tail_call(callee_name, &remapped_args) {
// This is a k_exit tail call - policy says normalize to exit jump
k_exit_lowering_decision = Some(decision);
k_exit_jump_args = old_block.jump_args.clone();
found_tail_call = true;
if debug {
log!(
@ -595,14 +597,17 @@ pub(super) fn merge_and_rewrite(
.map(|name| continuation_candidates.contains(name))
.unwrap_or(false);
let is_recursive_call = target_func_name
.as_ref()
.map(|name| name == func_name)
.unwrap_or(false);
if let Some(ref target_func_name) = target_func_name {
if let Some(target_params) = function_params.get(target_func_name) {
// Phase 256 P1.10: Detect call type for param binding strategy
// - Recursive call (loop_step → loop_step): Skip all param bindings (PHI handles via latch edges)
// - Exit call (loop_step → k_exit): Skip all param bindings (exit PHI handles via exit edges)
// - Header entry (main → loop_step at header): Skip all (PHI handles via entry edges)
let is_recursive_call = target_func_name == func_name;
// Phase 256 P1.10: Detect if target is the loop entry function
// When calling the loop entry function, header PHIs will define the carriers.
// We should skip param bindings in this case.
@ -753,97 +758,99 @@ pub(super) fn merge_and_rewrite(
// At this point, args[0] contains the updated loop variable value (i_next).
// We record this as the latch incoming so that the header PHI can reference
// the correct SSA value at loop continuation time.
if let Some(b) = boundary {
if let Some(loop_var_name) = &b.loop_var_name {
if !args.is_empty() {
// The first arg is the loop variable's updated value
let latch_value = args[0];
// The current block (new_block_id) is the latch block
loop_header_phi_info.set_latch_incoming(
loop_var_name,
new_block_id, // latch block
latch_value, // updated loop var value (i_next)
);
if debug {
log!(
true,
"[cf_loop/joinir] Phase 33-16: Set latch incoming for '{}': block={:?}, value={:?}",
loop_var_name, new_block_id, latch_value
if is_recursive_call {
if let Some(b) = boundary {
if let Some(loop_var_name) = &b.loop_var_name {
if !args.is_empty() {
// The first arg is the loop variable's updated value
let latch_value = args[0];
// The current block (new_block_id) is the latch block
loop_header_phi_info.set_latch_incoming(
loop_var_name,
new_block_id, // latch block
latch_value, // updated loop var value (i_next)
);
}
}
}
// Phase 33-20: Also set latch incoming for other carriers from exit_bindings
//
// args layout depends on whether we have a dedicated loop variable:
// - loop_var_name = Some(..): args[0] is the loop variable, args[1..] are other carriers
// - loop_var_name = None: args[0..] are carriers (no reserved loop-var slot)
//
// Phase 176-4 FIX: exit_bindings may include the loop variable itself; skip it when loop_var_name is set.
let mut carrier_arg_idx = if b.loop_var_name.is_some() { 1 } else { 0 };
for binding in b.exit_bindings.iter() {
// Skip if this binding is for the loop variable (already handled)
if let Some(ref loop_var) = b.loop_var_name {
if &binding.carrier_name == loop_var {
if debug {
log!(
true,
"[cf_loop/joinir] Phase 176-4: Skipping loop variable '{}' in exit_bindings (handled separately)",
binding.carrier_name
"[cf_loop/joinir] Phase 33-16: Set latch incoming for '{}': block={:?}, value={:?}",
loop_var_name, new_block_id, latch_value
);
}
continue; // Skip loop variable
}
}
// Process non-loop-variable carrier
if carrier_arg_idx < args.len() {
let latch_value = args[carrier_arg_idx];
loop_header_phi_info.set_latch_incoming(
&binding.carrier_name,
new_block_id,
latch_value,
);
// Phase 33-20: Also set latch incoming for other carriers from exit_bindings
//
// args layout depends on whether we have a dedicated loop variable:
// - loop_var_name = Some(..): args[0] is the loop variable, args[1..] are other carriers
// - loop_var_name = None: args[0..] are carriers (no reserved loop-var slot)
//
// Phase 176-4 FIX: exit_bindings may include the loop variable itself; skip it when loop_var_name is set.
let mut carrier_arg_idx = if b.loop_var_name.is_some() { 1 } else { 0 };
for binding in b.exit_bindings.iter() {
// Skip if this binding is for the loop variable (already handled)
if let Some(ref loop_var) = b.loop_var_name {
if &binding.carrier_name == loop_var {
if debug {
log!(
true,
"[cf_loop/joinir] Phase 176-4: Skipping loop variable '{}' in exit_bindings (handled separately)",
binding.carrier_name
);
}
continue; // Skip loop variable
}
}
if debug {
// Process non-loop-variable carrier
if carrier_arg_idx < args.len() {
let latch_value = args[carrier_arg_idx];
loop_header_phi_info.set_latch_incoming(
&binding.carrier_name,
new_block_id,
latch_value,
);
if debug {
log!(
true,
"[cf_loop/joinir] Phase 176-4: Set latch incoming for carrier '{}': block={:?}, value={:?} (arg[{}])",
binding.carrier_name, new_block_id, latch_value, carrier_arg_idx
);
}
carrier_arg_idx += 1;
} else if debug {
log!(
true,
"[cf_loop/joinir] Phase 176-4: Set latch incoming for carrier '{}': block={:?}, value={:?} (arg[{}])",
binding.carrier_name, new_block_id, latch_value, carrier_arg_idx
"[cf_loop/joinir] Phase 33-20 WARNING: No arg for carrier '{}' at index {}",
binding.carrier_name, carrier_arg_idx
);
}
carrier_arg_idx += 1;
} else if debug {
log!(
true,
"[cf_loop/joinir] Phase 33-20 WARNING: No arg for carrier '{}' at index {}",
binding.carrier_name, carrier_arg_idx
);
}
}
// Phase 255 P2: Set latch incoming for loop invariants
//
// Loop invariants don't have corresponding tail call args because they
// are not modified by the loop. Their latch incoming is the PHI dst itself
// (same value across all iterations).
for (inv_name, _inv_host_id) in b.loop_invariants.iter() {
if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(inv_name) {
// For invariants, latch incoming is the PHI dst (same value)
loop_header_phi_info.set_latch_incoming(
inv_name,
new_block_id, // latch block
phi_dst, // same as PHI dst (invariant value)
);
if debug {
log!(
true,
"[cf_loop/joinir] Phase 255 P2: Set latch incoming for loop invariant '{}': block={:?}, value={:?} (PHI dst itself)",
inv_name, new_block_id, phi_dst
// Phase 255 P2: Set latch incoming for loop invariants
//
// Loop invariants don't have corresponding tail call args because they
// are not modified by the loop. Their latch incoming is the PHI dst itself
// (same value across all iterations).
for (inv_name, _inv_host_id) in b.loop_invariants.iter() {
if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(inv_name) {
// For invariants, latch incoming is the PHI dst (same value)
loop_header_phi_info.set_latch_incoming(
inv_name,
new_block_id, // latch block
phi_dst, // same as PHI dst (invariant value)
);
if debug {
log!(
true,
"[cf_loop/joinir] Phase 255 P2: Set latch incoming for loop invariant '{}': block={:?}, value={:?} (PHI dst itself)",
inv_name, new_block_id, phi_dst
);
}
}
}
}
@ -963,6 +970,7 @@ pub(super) fn merge_and_rewrite(
&remapped_args,
new_block_id,
strict_exit,
b.jump_args_layout,
)?;
// Add expr_result to exit_phi_inputs (if present)
@ -1080,8 +1088,22 @@ pub(super) fn merge_and_rewrite(
// Collect exit values from k_exit arguments
if let Some(b) = boundary {
let collector = ExitArgsCollectorBox::new();
let exit_args: Vec<ValueId> = if let Some(ref jump_args) = k_exit_jump_args {
jump_args
.iter()
.map(|&arg| remapper.remap_value(arg))
.collect()
} else {
args.clone()
};
let collection_result =
collector.collect(&b.exit_bindings, &args, new_block_id, strict_exit)?;
collector.collect(
&b.exit_bindings,
&exit_args,
new_block_id,
strict_exit,
b.jump_args_layout,
)?;
if let Some(expr_result_value) = collection_result.expr_result_value {
exit_phi_inputs.push((collection_result.block_id, expr_result_value));
}

View File

@ -40,6 +40,7 @@ pub use merge_result::MergeContracts;
use super::trace;
use crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary;
use crate::mir::join_ir::lowering::error_tags;
use crate::mir::{MirModule, ValueId};
use std::collections::BTreeMap;
@ -144,6 +145,16 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
debug,
);
if let Some(boundary) = boundary {
if let Err(msg) = boundary.validate_jump_args_layout() {
return Err(error_tags::freeze_with_hint(
"phase256/jump_args_layout",
&msg,
"set JoinInlineBoundary.jump_args_layout via builder and avoid expr_result/carrier mismatch",
));
}
}
if verbose {
if let Some(boundary) = boundary {
let exit_summary: Vec<String> = boundary
@ -404,9 +415,17 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
debug && !reserved_phi_dsts.is_empty(),
);
// Phase 201-A: Also reserve JoinIR parameter ValueIds to avoid collisions
let mut reserved_value_ids = reserved_phi_dsts.clone();
for params in function_params.values() {
for &param in params {
reserved_value_ids.insert(param);
}
}
// Phase 201-A: Set reserved IDs in MirBuilder so next_value_id() skips them
// This protects against carrier corruption when break conditions emit Const instructions
builder.comp_ctx.reserved_value_ids = reserved_phi_dsts.clone();
builder.comp_ctx.reserved_value_ids = reserved_value_ids.clone();
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 201-A: Set builder.comp_ctx.reserved_value_ids = {:?}",
@ -420,7 +439,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
builder,
&used_values,
&mut remapper,
&reserved_phi_dsts,
&reserved_value_ids,
debug,
)?;
@ -533,8 +552,18 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
}
}
let main_func_name = "join_func_0";
let loop_step_func_name = "join_func_1";
let canonical_main = crate::mir::join_ir::lowering::canonical_names::MAIN;
let canonical_loop_step = crate::mir::join_ir::lowering::canonical_names::LOOP_STEP;
let main_func_name = if function_params.contains_key(canonical_main) {
canonical_main
} else {
"join_func_0"
};
let loop_step_func_name = if function_params.contains_key(canonical_loop_step) {
canonical_loop_step
} else {
"join_func_1"
};
if function_params.get(main_func_name).is_none() {
trace.stderr_if(
@ -1046,19 +1075,6 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
}
}
// Phase 201-A: Clear reserved ValueIds after merge completes
// Future loops will set their own reserved IDs
if !builder.comp_ctx.reserved_value_ids.is_empty() {
trace.stderr_if(
&format!(
"[cf_loop/joinir] Phase 201-A: Clearing reserved_value_ids (was {:?})",
builder.comp_ctx.reserved_value_ids
),
debug,
);
builder.comp_ctx.reserved_value_ids.clear();
}
// Phase 246-EX-FIX: Handle loop variable expr_result separately from carrier expr_result
//
// The loop variable (e.g., 'i') is returned via exit_phi_result_id, not carrier_phis.

View File

@ -381,6 +381,7 @@ mod tests {
condition_inputs: vec![], // Phase 171: Add missing field
condition_bindings: vec![], // Phase 171-fix: Add missing field
expr_result: None, // Phase 33-14: Add missing field
jump_args_layout: crate::mir::join_ir::lowering::inline_boundary::JumpArgsLayout::CarriersOnly,
loop_var_name: None, // Phase 33-16: Add missing field
carrier_info: None, // Phase 228: Add missing field
loop_invariants: vec![], // Phase 255 P2: Add missing field

View File

@ -135,6 +135,7 @@ mod tests {
condition_inputs: vec![], // Phase 171: Add missing field
condition_bindings: vec![], // Phase 171-fix: Add missing field
expr_result: None, // Phase 33-14: Add missing field
jump_args_layout: crate::mir::join_ir::lowering::inline_boundary::JumpArgsLayout::CarriersOnly,
loop_var_name: None, // Phase 33-16: Add missing field
carrier_info: None, // Phase 228: Add missing field
loop_invariants: vec![], // Phase 255 P2: Add missing field

View File

@ -365,7 +365,7 @@ impl MirBuilder {
/// - 2 carriers: i (loop index), start (segment start)
/// - 3 invariants: s (haystack), sep (separator), result (accumulator)
/// - Conditional step via Select (P0 pragmatic approach)
/// - Post-loop segment push in k_exit
/// - Post-loop segment push stays in host AST (k_exit is pure return)
pub(crate) fn cf_loop_pattern7_split_scan_impl(
&mut self,
condition: &ASTNode,