feat(joinir): Phase 200-B/C/D capture analysis + Phase 201-A reserved_value_ids infra

Phase 200-B: FunctionScopeCaptureAnalyzer implementation
- analyze_captured_vars_v2() with structural loop matching
- CapturedEnv for immutable function-scope variables
- ParamRole::Condition for condition-only variables

Phase 200-C: ConditionEnvBuilder extension
- build_with_captures() integrates CapturedEnv into ConditionEnv
- fn_body propagation through LoopPatternContext to Pattern 2

Phase 200-D: E2E verification
- capture detection working for base, limit, n etc.
- Test files: phase200d_capture_minimal.hako, phase200d_capture_in_condition.hako

Phase 201-A: MirBuilder reserved_value_ids infrastructure
- reserved_value_ids: HashSet<ValueId> field in MirBuilder
- next_value_id() skips reserved IDs
- merge/mod.rs sets/clears reserved IDs around JoinIR merge

Phase 201: JoinValueSpace design document
- Param/Local/PHI disjoint regions design
- API: alloc_param(), alloc_local(), reserve_phi()
- Migration plan for Pattern 1-4 lowerers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-12-09 18:32:03 +09:00
parent 3a9b44c4e2
commit 32a91e31ac
24 changed files with 2815 additions and 193 deletions

View File

@ -156,6 +156,17 @@ pub struct MirBuilder {
/// - NYASH_REGION_TRACE=1 のときだけ使われる開発用メタデータだよ。
pub(super) current_region_stack: Vec<RegionId>,
/// Phase 200-C: Original function body AST for capture analysis
/// Stored temporarily during function lowering to support FunctionScopeCaptureAnalyzer.
/// None when not lowering a function, or when fn_body is not available.
pub(super) fn_body_ast: Option<Vec<ASTNode>>,
/// Phase 201-A: Reserved ValueIds that must not be allocated
/// These are PHI dst ValueIds created by LoopHeaderPhiBuilder.
/// When next_value_id() encounters a reserved ID, it skips to the next.
/// Cleared after JoinIR merge completes.
pub(super) reserved_value_ids: HashSet<ValueId>,
// include guards removed
/// Loop context stacks for lowering break/continue inside nested control flow
/// Top of stack corresponds to the innermost active loop
@ -273,6 +284,8 @@ impl MirBuilder {
method_tail_index_source_len: 0,
current_region_stack: Vec::new(),
fn_body_ast: None, // Phase 200-C: Initialize to None
reserved_value_ids: HashSet::new(), // Phase 201-A: Initialize to empty
loop_header_stack: Vec::new(),
loop_exit_stack: Vec::new(),

View File

@ -343,6 +343,10 @@ impl MirBuilder {
params: Vec<String>,
body: Vec<ASTNode>,
) -> Result<(), String> {
// Phase 200-C: Store fn_body for capture analysis
eprintln!("[lower_static_method_as_function] Storing fn_body with {} nodes for '{}'", body.len(), func_name);
self.fn_body_ast = Some(body.clone());
// Step 1: Context準備
let mut ctx = self.prepare_lowering_context(&func_name);
@ -369,6 +373,9 @@ impl MirBuilder {
// Step 6: Context復元
self.restore_lowering_context(ctx);
// Phase 200-C: Clear fn_body_ast after function lowering
self.fn_body_ast = None;
Ok(())
}
@ -380,6 +387,9 @@ impl MirBuilder {
params: Vec<String>,
body: Vec<ASTNode>,
) -> Result<(), String> {
// Phase 200-C: Store fn_body for capture analysis
self.fn_body_ast = Some(body.clone());
// Step 1: Context準備instance methodでは不要だがAPI統一のため
let mut ctx = LoweringContext {
context_active: false,
@ -458,6 +468,9 @@ impl MirBuilder {
}
self.current_slot_registry = ctx.saved_slot_registry;
// Phase 200-C: Clear fn_body_ast after function lowering
self.fn_body_ast = None;
Ok(())
}
}

View File

@ -100,6 +100,31 @@ impl LoopHeaderPhiInfo {
pub fn all_latch_set(&self) -> bool {
self.carrier_phis.values().all(|e| e.latch_incoming.is_some())
}
/// Phase 201-A: Get reserved ValueIds (PHI dsts that must not be overwritten)
///
/// Returns a set of ValueIds that are used as PHI destinations in the loop header.
/// These IDs must not be reused for other purposes (e.g., Const instructions)
/// to prevent corruption of carrier values.
///
/// Includes:
/// - All carrier PHI dst ValueIds
/// - Expression result PHI dst (if present)
pub fn reserved_value_ids(&self) -> std::collections::HashSet<ValueId> {
let mut reserved = std::collections::HashSet::new();
// Add all carrier PHI dsts
for entry in self.carrier_phis.values() {
reserved.insert(entry.phi_dst);
}
// Add expression result PHI dst (if present)
if let Some(dst) = self.expr_result_phi {
reserved.insert(dst);
}
reserved
}
}
#[cfg(test)]

View File

@ -111,8 +111,93 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
}
}
// Phase 3: Remap ValueIds
remap_values(builder, &used_values, &mut remapper, debug)?;
// Phase 201-A: Build loop header PHI info BEFORE Phase 3
//
// We need to allocate PHI dst ValueIds before remap_values() runs,
// to prevent conflicts where a Const instruction gets a ValueId that
// will later be used as a PHI dst, causing carrier value corruption.
//
// This is a reordering of Phase 3 and Phase 3.5 logic.
let mut loop_header_phi_info = if let Some(boundary) = boundary {
if let Some(loop_var_name) = &boundary.loop_var_name {
// Get entry function and block for building PHI info
let (entry_func_name, entry_func) = mir_module
.functions
.iter()
.next()
.ok_or("JoinIR module has no functions (Phase 201-A)")?;
let entry_block_remapped = remapper
.get_block(entry_func_name, entry_func.entry_block)
.ok_or_else(|| format!("Entry block not found for {} (Phase 201-A)", entry_func_name))?;
// Get host's current block as the entry edge
let host_entry_block = builder.current_block.ok_or(
"Phase 201-A: No current block when building header PHIs"
)?;
// Get loop variable's initial value from HOST
let loop_var_init = boundary.host_inputs.first().copied().ok_or(
"Phase 201-A: No host_inputs in boundary for loop_var_init"
)?;
// Extract other carriers from exit_bindings
let other_carriers: Vec<(String, ValueId)> = boundary.exit_bindings
.iter()
.filter(|b| b.carrier_name != *loop_var_name)
.map(|b| (b.carrier_name.clone(), b.host_slot))
.collect();
if debug {
eprintln!(
"[cf_loop/joinir] Phase 201-A: Pre-building header PHIs for loop_var='{}' at {:?}",
loop_var_name, entry_block_remapped
);
eprintln!(
"[cf_loop/joinir] loop_var_init={:?}, carriers={:?}",
loop_var_init,
other_carriers.iter().map(|(n, _)| n.as_str()).collect::<Vec<_>>()
);
}
// Build PHI info (this allocates PHI dst ValueIds)
LoopHeaderPhiBuilder::build(
builder,
entry_block_remapped,
host_entry_block,
loop_var_name,
loop_var_init,
&other_carriers,
boundary.expr_result.is_some(),
debug,
)?
} else {
LoopHeaderPhiInfo::empty(remapper.get_block(
mir_module.functions.iter().next().unwrap().0,
mir_module.functions.iter().next().unwrap().1.entry_block
).unwrap())
}
} else {
LoopHeaderPhiInfo::empty(remapper.get_block(
mir_module.functions.iter().next().unwrap().0,
mir_module.functions.iter().next().unwrap().1.entry_block
).unwrap())
};
// Phase 201-A: Get reserved PHI dst ValueIds and set in MirBuilder
let reserved_phi_dsts = loop_header_phi_info.reserved_value_ids();
if debug && !reserved_phi_dsts.is_empty() {
eprintln!("[cf_loop/joinir] Phase 201-A: Reserved PHI dsts: {:?}", reserved_phi_dsts);
}
// Phase 201-A: Set reserved IDs in MirBuilder so next_value_id() skips them
// This protects against carrier corruption when break conditions emit Const instructions
builder.reserved_value_ids = reserved_phi_dsts.clone();
if debug && !builder.reserved_value_ids.is_empty() {
eprintln!("[cf_loop/joinir] Phase 201-A: Set builder.reserved_value_ids = {:?}", builder.reserved_value_ids);
}
// Phase 3: Remap ValueIds (with reserved PHI dsts protection)
remap_values(builder, &used_values, &mut remapper, &reserved_phi_dsts, debug)?;
// Phase 177-3 DEBUG: Verify remapper state after Phase 3
eprintln!("[DEBUG-177] === Remapper state after Phase 3 ===");
@ -138,86 +223,13 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
}
eprintln!("[DEBUG-177] ==============================");
// Phase 3.5: Build loop header PHIs (if loop pattern with loop_var_name)
// Phase 3.5: Override remapper for function parameters to use PHI dsts
//
// We need to know PHI dsts before instruction_rewriter runs, so that:
// 1. Tail call handling can set latch_incoming
// 2. Return handling can use PHI dsts for exit values
let (entry_func_name, entry_func) = mir_module
.functions
.iter()
.next()
.ok_or("JoinIR module has no functions (Phase 3.5)")?;
let entry_block_remapped = remapper
.get_block(entry_func_name, entry_func.entry_block)
.ok_or_else(|| format!("Entry block not found for {} (Phase 3.5)", entry_func_name))?;
// Phase 33-16: Get host's current block as the entry edge (the block that jumps INTO the loop)
let host_entry_block = builder.current_block.ok_or(
"Phase 33-16: No current block when building header PHIs"
)?;
let mut loop_header_phi_info = if let Some(boundary) = boundary {
// Phase 201-A: This phase now uses the loop_header_phi_info built before Phase 3.
// The PHI dst allocation has been moved earlier to prevent ValueId conflicts.
if let Some(boundary) = boundary {
if let Some(loop_var_name) = &boundary.loop_var_name {
// Phase 33-16: Get loop variable's initial value from HOST (not JoinIR's ValueId(0))
// boundary.host_inputs[0] is the host ValueId that holds the initial loop var value
let loop_var_init = boundary.host_inputs.first().copied().ok_or(
"Phase 33-16: No host_inputs in boundary for loop_var_init"
)?;
if debug {
eprintln!(
"[cf_loop/joinir] Phase 3.5: Building header PHIs for loop_var='{}' at {:?}",
loop_var_name, entry_block_remapped
);
eprintln!(
"[cf_loop/joinir] loop_var_init={:?} (from boundary.host_inputs[0])",
loop_var_init
);
eprintln!(
"[cf_loop/joinir] host_entry_block={:?} (where initial value comes from)",
host_entry_block
);
}
// Phase 33-20: Extract other carriers from exit_bindings
// Skip the loop variable (it's handled separately) and collect other carriers
eprintln!(
"[cf_loop/joinir] Phase 33-20 DEBUG: exit_bindings count={}, loop_var_name={:?}",
boundary.exit_bindings.len(),
loop_var_name
);
for b in boundary.exit_bindings.iter() {
eprintln!(
"[cf_loop/joinir] Phase 33-20 DEBUG: exit_binding: carrier_name={:?}, host_slot={:?}",
b.carrier_name, b.host_slot
);
}
let other_carriers: Vec<(String, ValueId)> = boundary.exit_bindings
.iter()
.filter(|b| b.carrier_name != *loop_var_name)
.map(|b| (b.carrier_name.clone(), b.host_slot))
.collect();
if debug && !other_carriers.is_empty() {
eprintln!(
"[cf_loop/joinir] Phase 33-20: Found {} other carriers from exit_bindings: {:?}",
other_carriers.len(),
other_carriers.iter().map(|(n, _)| n.as_str()).collect::<Vec<_>>()
);
}
let phi_info = LoopHeaderPhiBuilder::build(
builder,
entry_block_remapped, // header_block (JoinIR's entry block = loop header)
host_entry_block, // entry_block (host's block that jumps to loop header)
loop_var_name,
loop_var_init,
&other_carriers, // Phase 33-20: Pass other carriers from exit_bindings
boundary.expr_result.is_some(), // expr_result_is_loop_var
debug,
)?;
// Phase 201-A: PHI info is already built (before Phase 3) - just use it
// Phase 33-21: Override remapper for loop_step's parameters
//
@ -295,13 +307,13 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
);
eprintln!(
"[DEBUG-177] Phase 33-21: carrier_phis count: {}, names: {:?}",
phi_info.carrier_phis.len(),
phi_info.carrier_phis.iter().map(|(n, _)| n.as_str()).collect::<Vec<_>>()
loop_header_phi_info.carrier_phis.len(),
loop_header_phi_info.carrier_phis.iter().map(|(n, _)| n.as_str()).collect::<Vec<_>>()
);
// Map main's parameters to header PHI dsts
// main params: [i_init, carrier1_init, ...]
// carrier_phis: [("i", entry), ("sum", entry), ...]
for (idx, (carrier_name, entry)) in phi_info.carrier_phis.iter().enumerate() {
for (idx, (carrier_name, entry)) in loop_header_phi_info.carrier_phis.iter().enumerate() {
if let Some(&main_param) = main_params.get(idx) {
// Phase 177-3: Don't override condition_bindings
if condition_binding_ids.contains(&main_param) {
@ -323,7 +335,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
// Phase 177-3-B: Handle body-only carriers
// These are carriers in carrier_phis that are NOT in main function params.
// They appear in condition_bindings (added by Phase 176-5) but need PHI remapping.
for (carrier_name, entry) in &phi_info.carrier_phis {
for (carrier_name, entry) in &loop_header_phi_info.carrier_phis {
// Check if this carrier has a condition_binding
if let Some(binding) = boundary.condition_bindings.iter().find(|cb| cb.name == *carrier_name) {
// Skip if it's a true condition-only variable (already protected above)
@ -376,7 +388,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
// Check if this param was already handled by Phase 177-3-B
let already_mapped = boundary.condition_bindings.iter().any(|cb| {
cb.join_value == *loop_step_param &&
phi_info.carrier_phis.iter().any(|(name, _)| name == &cb.name)
loop_header_phi_info.carrier_phis.iter().any(|(name, _)| name == &cb.name)
});
if already_mapped {
eprintln!(
@ -394,8 +406,8 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
if let Some(param_idx) = loop_step_params.iter().position(|p| p == loop_step_param) {
// Map params[i] to carrier_order[i]
if let (Some(carrier_name), Some(entry)) = (
phi_info.get_carrier_at_index(param_idx),
phi_info.get_entry_at_index(param_idx),
loop_header_phi_info.get_carrier_at_index(param_idx),
loop_header_phi_info.get_entry_at_index(param_idx),
) {
eprintln!(
"[DEBUG-177] Phase 177-STRUCT-2: REMAP loop_step param[{}] {:?}{:?} (carrier '{}')",
@ -410,7 +422,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
if function_params.get(main_func_name).is_none() && function_params.get(loop_step_func_name).is_none() {
// Fallback: Use old behavior (ValueId(0), ValueId(1), ...)
// This handles patterns that don't have loop_step function
if let Some(phi_dst) = phi_info.get_carrier_phi(loop_var_name) {
if let Some(phi_dst) = loop_header_phi_info.get_carrier_phi(loop_var_name) {
// Phase 177-3: Don't override condition_bindings
if !condition_binding_ids.contains(&ValueId(0)) {
remapper.set_value(ValueId(0), phi_dst);
@ -427,11 +439,11 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
}
}
// Phase 177-STRUCT-2: Use carrier_order for deterministic iteration
for (idx, carrier_name) in phi_info.carrier_order.iter().enumerate() {
for (idx, carrier_name) in loop_header_phi_info.carrier_order.iter().enumerate() {
if carrier_name == loop_var_name {
continue;
}
let entry = match phi_info.carrier_phis.get(carrier_name) {
let entry = match loop_header_phi_info.carrier_phis.get(carrier_name) {
Some(e) => e,
None => continue,
};
@ -464,13 +476,9 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
);
}
phi_info
} else {
LoopHeaderPhiInfo::empty(entry_block_remapped)
// Phase 201-A: loop_header_phi_info already built (no assignment needed)
}
} else {
LoopHeaderPhiInfo::empty(entry_block_remapped)
};
}
// Phase 4: Merge blocks and rewrite instructions
// Phase 33-16: Pass mutable loop_header_phi_info for latch_incoming tracking
@ -541,17 +549,12 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
let exit_block_id = merge_result.exit_block_id;
// Jump from current block to entry function's entry block
// (Reuse entry_func_name and entry_block_remapped from Phase 3.5)
let entry_block = entry_block_remapped;
// Phase 201-A: Get entry block from loop_header_phi_info
// The header_block in loop_header_phi_info is the remapped entry block
let entry_block = loop_header_phi_info.header_block;
if debug {
eprintln!("[cf_loop/joinir] Entry function name: {}", entry_func_name);
eprintln!(
"[cf_loop/joinir] Entry function's entry_block (JoinIR local): {:?}",
entry_func.entry_block
);
eprintln!("[cf_loop/joinir] Entry block (remapped): {:?}", entry_block);
eprintln!("[cf_loop/joinir] Entry block (from loop_header_phi_info): {:?}", entry_block);
eprintln!(
"[cf_loop/joinir] Current block before emit_jump: {:?}",
builder.current_block
@ -598,22 +601,48 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks(
}
}
// Phase 201-A: Clear reserved ValueIds after merge completes
// Future loops will set their own reserved IDs
if !builder.reserved_value_ids.is_empty() {
if debug {
eprintln!("[cf_loop/joinir] Phase 201-A: Clearing reserved_value_ids (was {:?})", builder.reserved_value_ids);
}
builder.reserved_value_ids.clear();
}
Ok(exit_phi_result_id)
}
/// Phase 3: Allocate new ValueIds for all collected values
///
/// Phase 201-A: Accept reserved ValueIds that must not be reused.
/// These are PHI dst ValueIds that will be created by LoopHeaderPhiBuilder.
/// We must skip these IDs to prevent carrier value corruption.
fn remap_values(
builder: &mut crate::mir::builder::MirBuilder,
used_values: &std::collections::BTreeSet<ValueId>,
remapper: &mut crate::mir::builder::joinir_id_remapper::JoinIrIdRemapper,
reserved_ids: &std::collections::HashSet<ValueId>,
debug: bool,
) -> Result<(), String> {
if debug {
eprintln!("[cf_loop/joinir] Phase 3: Remapping {} ValueIds", used_values.len());
eprintln!("[cf_loop/joinir] Phase 3: Remapping {} ValueIds (reserved: {})",
used_values.len(), reserved_ids.len());
}
for old_value in used_values {
let new_value = builder.next_value_id();
// Phase 201-A: Allocate new ValueId, skipping reserved PHI dsts
let new_value = loop {
let candidate = builder.next_value_id();
if !reserved_ids.contains(&candidate) {
break candidate;
}
// Skip reserved ID - will try next one
if debug {
eprintln!("[cf_loop/joinir] Phase 201-A: Skipping reserved PHI dst {:?}", candidate);
}
};
remapper.set_value(*old_value, new_value);
if debug {
eprintln!(

View File

@ -125,16 +125,16 @@ impl ConditionEnvBuilder {
env
}
/// Build ConditionEnv with optional captured variables (Phase 200-A v2 entry point)
/// Build ConditionEnv with optional captured variables (Phase 200-B implementation)
///
/// # Phase 200-A Status
/// # Phase 200-B Implementation
///
/// Currently ignores `captured` parameter and delegates to existing implementation.
/// Integration with CapturedEnv will be implemented in Phase 200-B.
/// Adds captured function-scoped variables to ConditionEnv and generates
/// condition_bindings for the boundary builder.
///
/// # Future Behavior (Phase 200-B+)
/// # Behavior
///
/// - Add captured variables to ConditionEnv
/// - Add captured variables to ConditionEnv.captured field
/// - Generate condition_bindings for captured vars in boundary
/// - Track captured vars separately from loop params
/// - Ensure captured vars do NOT participate in header PHI or exit_bindings
@ -142,45 +142,91 @@ impl ConditionEnvBuilder {
/// # Arguments
///
/// * `loop_var_name` - Loop parameter name (e.g., "i", "pos")
/// * `_captured` - Function-scoped captured variables (Phase 200-B+)
/// * `_boundary` - Boundary builder for adding condition_bindings (Phase 200-B+)
/// * `captured` - Function-scoped captured variables with host ValueIds
/// * `boundary` - Boundary builder for adding condition_bindings
/// * `variable_map` - Host function's variable_map to resolve host ValueIds
///
/// # Returns
///
/// ConditionEnv with loop parameter mapping (Phase 200-A: same as build_loop_param_only)
/// ConditionEnv with loop parameter and captured variables
///
/// # Example (Future Phase 200-B)
/// # Example
///
/// ```ignore
/// let captured = analyze_captured_vars(fn_body, loop_ast, scope);
/// let mut boundary = JoinInlineBoundaryBuilder::new();
/// let env = ConditionEnvBuilder::build_with_captures(
/// "pos",
/// &captured, // Contains "digits" with host ValueId(42)
/// &captured, // Contains "digits"
/// &mut boundary,
/// &variable_map, // To resolve "digits" → ValueId(42)
/// );
/// // Phase 200-B: env will contain "pos" → ValueId(0), "digits" → ValueId(1)
/// // Phase 200-B: boundary.condition_bindings will have entry for "digits"
/// // env.params: "pos" → ValueId(0)
/// // env.captured: "digits" → ValueId(1)
/// // boundary.condition_bindings: [ConditionBinding { name: "digits", host_value: ValueId(42), join_value: ValueId(1) }]
/// ```
pub fn build_with_captures(
loop_var_name: &str,
_captured: &CapturedEnv,
_boundary: &mut JoinInlineBoundaryBuilder,
captured: &CapturedEnv,
boundary: &mut JoinInlineBoundaryBuilder,
variable_map: &BTreeMap<String, ValueId>,
) -> ConditionEnv {
// Phase 200-A: Delegate to existing implementation
// TODO(Phase 200-B): Integrate captured vars into ConditionEnv
//
// Integration steps:
// 1. Start with loop parameter in env (ValueId(0))
// 2. For each captured var:
// a. Allocate JoinIR-local ValueId (starting from 1)
// b. Add to ConditionEnv (var.name → join_id)
// c. Add to boundary.condition_bindings (host_id ↔ join_id)
// d. Mark as ParamRole::Condition (not Carrier or LoopParam)
// 3. Ensure captured vars are NOT in exit_bindings (condition-only)
// 4. Return populated ConditionEnv
use std::env;
Self::build_loop_param_only(loop_var_name)
let debug = env::var("NYASH_CAPTURE_DEBUG").is_ok();
if debug {
eprintln!("[capture/env_builder] Building ConditionEnv with {} captured vars", captured.vars.len());
}
// Step 1: Build base ConditionEnv with loop params (existing logic)
let mut env = Self::build_loop_param_only(loop_var_name);
// Step 2: Add captured vars as ParamRole::Condition
for var in &captured.vars {
// 2a: Resolve host_id from variable_map
let host_id = match variable_map.get(&var.name) {
Some(&id) => id,
None => {
if debug {
eprintln!("[capture/env_builder] WARNING: Captured var '{}' not found in variable_map, skipping", var.name);
}
continue;
}
};
// 2b: Add to boundary with Condition role
boundary.add_param_with_role(&var.name, host_id, crate::mir::join_ir::lowering::inline_boundary_builder::ParamRole::Condition);
// 2c: Get JoinIR ValueId from boundary
let join_id = boundary.get_condition_binding(&var.name)
.expect("captured var should be in boundary after add_param_with_role");
// 2d: Add to ConditionEnv.captured map
env.captured.insert(var.name.clone(), join_id);
if debug {
eprintln!("[capture/env_builder] Added captured var '{}': host={:?}, join={:?}", var.name, host_id, join_id);
}
}
// Step 3: Debug guard - Condition params must NOT be in PHI candidates
#[cfg(debug_assertions)]
for var in &captured.vars {
if env.contains(&var.name) && !env.is_captured(&var.name) {
panic!(
"Captured var '{}' must not be in loop params (ParamRole conflict)",
var.name
);
}
}
if debug {
let param_count = env.iter().count();
eprintln!("[capture/env_builder] Final ConditionEnv: {} params, {} captured", param_count, env.captured.len());
}
env
}
}

View File

@ -72,17 +72,19 @@ pub fn can_lower(builder: &MirBuilder, ctx: &super::router::LoopPatternContext)
/// Phase 194: Lowering function for Pattern 2
///
/// Wrapper around cf_loop_pattern2_with_break to match router signature
/// Phase 200-C: Pass fn_body to cf_loop_pattern2_with_break
pub fn lower(
builder: &mut MirBuilder,
ctx: &super::router::LoopPatternContext,
) -> Result<Option<ValueId>, String> {
builder.cf_loop_pattern2_with_break(ctx.condition, ctx.body, ctx.func_name, ctx.debug)
builder.cf_loop_pattern2_with_break_impl(ctx.condition, ctx.body, ctx.func_name, ctx.debug, ctx.fn_body)
}
impl MirBuilder {
/// Phase 179-B: Pattern 2 (Loop with Conditional Break) minimal lowerer
///
/// **Refactored**: Now uses PatternPipelineContext for unified preprocessing
/// **Phase 200-C**: Added fn_body parameter for capture analysis
///
/// # Pipeline (Phase 179-B)
/// 1. Build preprocessing context → PatternPipelineContext
@ -99,6 +101,19 @@ impl MirBuilder {
_body: &[ASTNode],
_func_name: &str,
debug: bool,
) -> Result<Option<ValueId>, String> {
// Phase 200-C: Delegate to impl function with fn_body=None for backward compatibility
self.cf_loop_pattern2_with_break_impl(condition, _body, _func_name, debug, None)
}
/// Phase 200-C: Pattern 2 implementation with optional fn_body for capture analysis
fn cf_loop_pattern2_with_break_impl(
&mut self,
condition: &ASTNode,
_body: &[ASTNode],
_func_name: &str,
debug: bool,
fn_body: Option<&[ASTNode]>,
) -> Result<Option<ValueId>, String> {
use crate::mir::join_ir::lowering::loop_with_break_minimal::lower_loop_with_break_minimal;
@ -129,9 +144,34 @@ impl MirBuilder {
// Phase 195: Use unified trace
trace::trace().varmap("pattern2_start", &self.variable_map);
// Phase 171-172: Use ConditionEnvBuilder for unified construction (Issue 5)
// Phase 200-C: Integrate capture analysis
use crate::mir::loop_pattern_detection::function_scope_capture::{analyze_captured_vars_v2, CapturedEnv};
use super::condition_env_builder::ConditionEnvBuilder;
use crate::mir::join_ir::lowering::condition_env::ConditionBinding;
eprintln!("[pattern2/phase200c] fn_body is {}", if fn_body.is_some() { "SOME" } else { "NONE" });
let captured_env = if let Some(fn_body_ref) = fn_body {
eprintln!("[pattern2/phase200c] fn_body has {} nodes", fn_body_ref.len());
// Phase 200-C: Use v2 API with structural matching
// Pass condition and body directly instead of constructing loop AST
analyze_captured_vars_v2(fn_body_ref, condition, _body, &scope)
} else {
eprintln!("[pattern2/phase200c] fn_body is None, using empty CapturedEnv");
// fn_body not available - use empty CapturedEnv
CapturedEnv::new()
};
eprintln!("[pattern2/capture] Phase 200-C: Captured {} variables",
captured_env.vars.len());
for var in &captured_env.vars {
eprintln!("[pattern2/capture] '{}': host_id={:?}, immutable={}",
var.name, var.host_id, var.is_immutable);
}
// Phase 200-C: Use existing path and manually add captured variables
// TODO Phase 200-D: Refactor to use build_with_captures with boundary builder
let (mut env, mut condition_bindings) = ConditionEnvBuilder::build_for_break_condition(
condition,
&loop_var_name,
@ -139,6 +179,26 @@ impl MirBuilder {
loop_var_id,
)?;
// Phase 200-C: Manually add captured variables to env for E2E testing
// This is a temporary approach until Phase 200-D refactors the boundary creation
for var in &captured_env.vars {
if let Some(&host_id) = self.variable_map.get(&var.name) {
// Allocate a JoinIR ValueId for this captured variable
let join_id = crate::mir::ValueId(env.len() as u32);
env.insert(var.name.clone(), join_id);
// Add to condition_bindings for boundary processing
condition_bindings.push(ConditionBinding {
name: var.name.clone(),
host_value: host_id,
join_value: join_id,
});
eprintln!("[pattern2/capture] Manually added captured '{}' to env: host={:?}, join={:?}",
var.name, host_id, join_id);
}
}
// Phase 190-impl-D: Calculate ValueId offset for body-local variables
// JoinIR main() params are: [ValueId(0), ValueId(1), ...] for (loop_var, carrier1, carrier2, ...)
// Body-local variables must start AFTER all carrier params to avoid collision.

View File

@ -54,6 +54,10 @@ pub struct LoopPatternContext<'a> {
/// Phase 192: Pattern classification based on features
pub pattern_kind: LoopPatternKind,
/// Phase 200-C: Optional function body AST for capture analysis
/// None if not available, Some(&[ASTNode]) if function body is accessible
pub fn_body: Option<&'a [ASTNode]>,
}
impl<'a> LoopPatternContext<'a> {
@ -87,8 +91,22 @@ impl<'a> LoopPatternContext<'a> {
has_break,
features,
pattern_kind,
fn_body: None, // Phase 200-C: Default to None
}
}
/// Phase 200-C: Create context with fn_body for capture analysis
pub fn with_fn_body(
condition: &'a ASTNode,
body: &'a [ASTNode],
func_name: &'a str,
debug: bool,
fn_body: &'a [ASTNode],
) -> Self {
let mut ctx = Self::new(condition, body, func_name, debug);
ctx.fn_body = Some(fn_body);
ctx
}
}
/// Phase 193: Feature extraction moved to ast_feature_extractor module

View File

@ -139,7 +139,17 @@ impl MirBuilder {
// Phase 194: Use table-driven router instead of if/else chain
use super::patterns::{route_loop_pattern, LoopPatternContext};
let ctx = LoopPatternContext::new(condition, body, &func_name, debug);
// Phase 200-C: Pass fn_body_ast to LoopPatternContext if available
// Clone fn_body_ast to avoid borrow checker issues
let fn_body_clone = self.fn_body_ast.clone();
eprintln!("[routing] fn_body_ast is {} for '{}'", if fn_body_clone.is_some() { "SOME" } else { "NONE" }, func_name);
let ctx = if let Some(ref fn_body) = fn_body_clone {
eprintln!("[routing] Creating ctx with fn_body ({} nodes)", fn_body.len());
LoopPatternContext::with_fn_body(condition, body, &func_name, debug, fn_body)
} else {
LoopPatternContext::new(condition, body, &func_name, debug)
};
if let Some(result) = route_loop_pattern(self, &ctx)? {
trace::trace().routing("router", func_name, "Pattern router succeeded");
return Ok(Some(result));

View File

@ -116,8 +116,16 @@ impl super::MirBuilder {
reg.ensure_slot(p, ty);
}
}
// Phase 200-C: Store fn_body_ast for inline main() lowering
eprintln!("[build_static_main_box] Storing fn_body_ast with {} nodes for inline main()", body.len());
self.fn_body_ast = Some(body.clone());
// Lower statements in order to preserve def→use
let lowered = self.cf_block(body.clone());
// Phase 200-C: Clear fn_body_ast after main() lowering
self.fn_body_ast = None;
self.variable_map = saved_var_map;
lowered
} else {

View File

@ -31,12 +31,26 @@ impl super::MirBuilder {
/// Allocate a new ValueId in the appropriate context
/// - Inside function: uses function-local allocator
/// - Outside function: uses module-global allocator
///
/// Phase 201-A: Skips reserved ValueIds (PHI dsts from LoopHeaderPhiBuilder)
/// to prevent carrier value corruption in JoinIR loops.
#[inline]
pub(crate) fn next_value_id(&mut self) -> super::ValueId {
if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context
} else {
self.value_gen.next() // Module context
loop {
let candidate = if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context
} else {
self.value_gen.next() // Module context
};
// Phase 201-A: Skip reserved PHI dst ValueIds
if !self.reserved_value_ids.contains(&candidate) {
return candidate;
}
// Reserved ID - try next one (loop continues)
if std::env::var("NYASH_201A_DEBUG").is_ok() {
eprintln!("[201-A] next_value_id: Skipping reserved {:?}", candidate);
}
}
}

View File

@ -20,6 +20,12 @@ use std::collections::HashMap;
/// Maps variable names to JoinIR-local ValueIds. Used when lowering
/// condition AST nodes to JoinIR instructions.
///
/// # Phase 200-B Extension
///
/// Added `captured` field to track function-scoped captured variables
/// separately from loop parameters. Captured variables have ParamRole::Condition
/// and do NOT participate in header PHI or ExitLine.
///
/// # Example
///
/// ```ignore
@ -27,6 +33,9 @@ use std::collections::HashMap;
/// env.insert("i".to_string(), ValueId(0)); // Loop parameter
/// env.insert("end".to_string(), ValueId(1)); // Condition-only var
///
/// // Phase 200-B: Add captured variable
/// env.captured.insert("digits".to_string(), ValueId(2));
///
/// // Later during lowering:
/// if let Some(value_id) = env.get("i") {
/// // Use value_id in JoinIR instruction
@ -34,7 +43,17 @@ use std::collections::HashMap;
/// ```
#[derive(Debug, Clone, Default)]
pub struct ConditionEnv {
/// Loop parameters and condition-only variables (legacy)
name_to_join: HashMap<String, ValueId>,
/// Phase 200-B: Captured function-scoped variables (ParamRole::Condition)
///
/// These variables are:
/// - Declared in function scope before the loop
/// - Never reassigned (effectively immutable)
/// - Used in loop condition or body
/// - NOT included in header PHI or ExitLine (condition-only)
pub captured: HashMap<String, ValueId>,
}
impl ConditionEnv {
@ -42,6 +61,7 @@ impl ConditionEnv {
pub fn new() -> Self {
Self {
name_to_join: HashMap::new(),
captured: HashMap::new(),
}
}
@ -57,38 +77,82 @@ impl ConditionEnv {
/// Look up a variable by name
///
/// Phase 200-B: Searches both name_to_join (loop params) and captured fields.
///
/// Returns `Some(ValueId)` if the variable exists in the environment,
/// `None` otherwise.
pub fn get(&self, name: &str) -> Option<ValueId> {
self.name_to_join.get(name).copied()
.or_else(|| self.captured.get(name).copied())
}
/// Check if a variable exists in the environment
///
/// Phase 200-B: Checks both name_to_join and captured fields.
pub fn contains(&self, name: &str) -> bool {
self.name_to_join.contains_key(name)
self.name_to_join.contains_key(name) || self.captured.contains_key(name)
}
/// Check if a variable is a captured (Condition role) variable
///
/// Phase 200-B: New method to distinguish captured vars from loop params.
pub fn is_captured(&self, name: &str) -> bool {
self.captured.contains_key(name)
}
/// Get the number of variables in the environment
///
/// Phase 200-B: Counts both name_to_join and captured fields.
pub fn len(&self) -> usize {
self.name_to_join.len()
self.name_to_join.len() + self.captured.len()
}
/// Check if the environment is empty
///
/// Phase 200-B: Checks both name_to_join and captured fields.
pub fn is_empty(&self) -> bool {
self.name_to_join.is_empty()
self.name_to_join.is_empty() && self.captured.is_empty()
}
/// Get an iterator over all (name, ValueId) pairs
///
/// Phase 200-B: Note - this only iterates over name_to_join (loop params).
/// For captured variables, access the `captured` field directly.
pub fn iter(&self) -> impl Iterator<Item = (&String, &ValueId)> {
self.name_to_join.iter()
}
/// Get all variable names (sorted)
///
/// Phase 200-B: Includes both name_to_join and captured variables.
pub fn names(&self) -> Vec<String> {
let mut names: Vec<_> = self.name_to_join.keys().cloned().collect();
let mut names: Vec<_> = self.name_to_join.keys()
.chain(self.captured.keys())
.cloned()
.collect();
names.sort();
names.dedup(); // Remove duplicates (shouldn't happen, but be safe)
names
}
/// Phase 201-A: Get the maximum ValueId used in this environment
///
/// Returns the highest ValueId.0 value from both name_to_join and captured,
/// or None if the environment is empty.
///
/// This is used by JoinIR lowering to determine the starting point for
/// alloc_value() to avoid ValueId collisions.
pub fn max_value_id(&self) -> Option<u32> {
let name_max = self.name_to_join.values().map(|v| v.0).max();
let captured_max = self.captured.values().map(|v| v.0).max();
match (name_max, captured_max) {
(Some(a), Some(b)) => Some(a.max(b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}
}
}
/// Binding between HOST and JoinIR ValueIds for condition variables

View File

@ -203,39 +203,63 @@ impl JoinInlineBoundaryBuilder {
/// builder.add_param_with_role("digits", ValueId(42), ParamRole::Condition);
/// builder.add_param_with_role("sum", ValueId(101), ParamRole::Carrier);
/// ```
pub fn add_param_with_role(&mut self, _name: &str, host_id: ValueId, role: ParamRole) {
// Phase 200-A: Basic routing only
// TODO(Phase 200-B): Implement full role-based routing
pub fn add_param_with_role(&mut self, name: &str, host_id: ValueId, role: ParamRole) {
// Phase 200-B: Full role-based routing implementation
//
// Routing implementation:
// - LoopParam: join_inputs + host_inputs
// - Condition: condition_bindings (with JoinIR-local ValueId allocation)
// - Carrier: join_inputs + host_inputs + exit_bindings
// Routing rules:
// - LoopParam: join_inputs + host_inputs (participates in PHI)
// - Condition: condition_bindings ONLY (no PHI, no ExitLine)
// - Carrier: join_inputs + host_inputs (participates in PHI + ExitLine)
// - ExprResult: Handled by set_expr_result
match role {
ParamRole::LoopParam | ParamRole::Carrier => {
// Existing behavior: add to join_inputs
// Note: In Phase 200-A, we don't have a simple add_input method
// that takes a name. This is a skeleton implementation.
// In Phase 200-B, we'll need to allocate JoinIR-local ValueIds.
// Add to join_inputs + host_inputs
let join_id = ValueId(self.boundary.join_inputs.len() as u32);
self.boundary.join_inputs.push(join_id);
self.boundary.host_inputs.push(host_id);
}
ParamRole::Condition => {
// Phase 200-A: Log only
// TODO(Phase 200-B): Add to condition_bindings without PHI
// Phase 200-B: Add to condition_bindings without PHI
// 1. Allocate JoinIR-local ValueId
// 2. Create ConditionBinding { name, host_id, join_id }
// 3. Add to self.boundary.condition_bindings
let join_id = ValueId(
(self.boundary.join_inputs.len() + self.boundary.condition_bindings.len()) as u32
);
// 2. Create ConditionBinding
let binding = ConditionBinding {
name: name.to_string(),
host_value: host_id,
join_value: join_id,
};
// 3. Add to condition_bindings
self.boundary.condition_bindings.push(binding);
}
ParamRole::ExprResult => {
// Handled separately by set_expr_result
// Handled separately by with_expr_result
// No action needed here
}
}
}
/// Get JoinIR ValueId for a condition-only binding (Phase 200-B)
///
/// Returns the JoinIR-local ValueId for a captured variable that was added
/// with ParamRole::Condition.
///
/// # Arguments
///
/// * `name` - Variable name to look up
///
/// # Returns
///
/// `Some(ValueId)` if the variable exists in condition_bindings, `None` otherwise.
pub fn get_condition_binding(&self, name: &str) -> Option<ValueId> {
self.boundary.condition_bindings.iter()
.find(|b| b.name == name)
.map(|b| b.join_value)
}
}
impl Default for JoinInlineBoundaryBuilder {
@ -379,14 +403,15 @@ mod tests {
#[test]
fn test_param_role_condition() {
let mut builder = JoinInlineBoundaryBuilder::new();
// Phase 200-A: Condition role is logged but not yet routed
// Phase 200-B: Condition role is added to condition_bindings
builder.add_param_with_role("digits", ValueId(42), ParamRole::Condition);
let boundary = builder.build();
// Phase 200-A: No action for Condition role yet
// Phase 200-B: This will add to condition_bindings
// Phase 200-B: Condition params go to condition_bindings, not join_inputs
assert_eq!(boundary.join_inputs.len(), 0);
assert_eq!(boundary.condition_bindings.len(), 0);
assert_eq!(boundary.condition_bindings.len(), 1);
assert_eq!(boundary.condition_bindings[0].name, "digits");
assert_eq!(boundary.condition_bindings[0].host_value, ValueId(42));
}
#[test]

View File

@ -95,20 +95,21 @@ impl CapturedEnv {
/// Analyzes function-scoped variables that can be safely captured for loop conditions/body.
///
/// # Phase 200-A Status
/// # Phase 200-B Implementation
///
/// Currently returns empty CapturedEnv (skeleton implementation).
/// Actual capture detection will be implemented in Phase 200-B.
/// Detects function-scoped variables that are effectively immutable constants
/// within a loop context (e.g., `digits` in JsonParser._atoi()).
///
/// # Future Detection Criteria (Phase 200-B+)
/// # Detection Criteria
///
/// A variable is captured if ALL of the following conditions are met:
///
/// 1. **Declared before the loop**: Variable must be declared in function scope before the loop
/// 2. **Never reassigned**: Variable is never reassigned within the function (is_immutable = true)
/// 3. **Referenced in loop**: Variable is referenced in loop condition or body
/// 4. **Not a loop parameter**: Variable is not the loop iteration variable
/// 5. **Not a body-local**: Variable is not declared inside the loop body
/// 2. **Safe constant init**: Initialized with string/integer literal only
/// 3. **Never reassigned**: Variable is never reassigned within the function (is_immutable = true)
/// 4. **Referenced in loop**: Variable is referenced in loop condition or body
/// 5. **Not a loop parameter**: Variable is not in scope.loop_params
/// 6. **Not a body-local**: Variable is not in scope.body_locals
///
/// # Example
///
@ -127,31 +128,536 @@ impl CapturedEnv {
///
/// # Arguments
///
/// * `_fn_body` - AST nodes of the function body (for analysis)
/// * `_loop_ast` - AST node of the loop statement
/// * `_scope` - LoopScopeShape (for excluding loop params and body-locals)
/// * `fn_body` - AST nodes of the function body (for analysis)
/// * `loop_ast` - AST node of the loop statement
/// * `scope` - LoopScopeShape (for excluding loop params and body-locals)
///
/// # Returns
///
/// `CapturedEnv` containing all captured variables (empty in Phase 200-A)
/// `CapturedEnv` containing all captured variables
pub fn analyze_captured_vars(
_fn_body: &[ASTNode],
_loop_ast: &ASTNode,
_scope: &LoopScopeShape,
fn_body: &[ASTNode],
loop_ast: &ASTNode,
scope: &LoopScopeShape,
) -> CapturedEnv {
// Phase 200-A: Skeleton implementation
// TODO(Phase 200-B): Implement actual capture detection
//
// Detection algorithm:
// 1. Find all `local` declarations before the loop in fn_body
// 2. For each declaration:
// a. Check if it's never reassigned in the function (is_immutable = true)
// b. Check if it's referenced in loop condition or body
// c. Exclude if it's in scope.pinned, scope.carriers, or scope.body_locals
// 3. Collect matching variables into CapturedEnv
// 4. Return the populated environment
use std::env;
CapturedEnv::new()
let debug = env::var("NYASH_CAPTURE_DEBUG").is_ok();
if debug {
eprintln!("[capture/debug] Starting capture analysis");
}
// Step 1: Find loop position in fn_body
let loop_index = match find_stmt_index(fn_body, loop_ast) {
Some(idx) => idx,
None => {
if debug {
eprintln!("[capture/debug] Loop not found in function body, returning empty CapturedEnv");
}
return CapturedEnv::new();
}
};
if debug {
eprintln!("[capture/debug] Loop found at index {}", loop_index);
}
// Step 2: Collect local declarations BEFORE the loop
let pre_loop_locals = collect_local_declarations(&fn_body[..loop_index]);
if debug {
eprintln!("[capture/debug] Found {} pre-loop local declarations", pre_loop_locals.len());
}
let mut env = CapturedEnv::new();
// Step 3: For each pre-loop local, check capture criteria
for (name, init_expr) in pre_loop_locals {
if debug {
eprintln!("[capture/check] Checking variable '{}'", name);
}
// 3a: Is init expression a safe constant?
if !is_safe_const_init(&init_expr) {
if debug {
eprintln!("[capture/reject] '{}': init is not a safe constant", name);
}
continue;
}
// 3b: Is this variable reassigned anywhere in fn_body?
if is_reassigned_in_fn(fn_body, &name) {
if debug {
eprintln!("[capture/reject] '{}': reassigned in function", name);
}
continue;
}
// 3c: Is this variable used in loop (condition or body)?
if !is_used_in_loop(loop_ast, &name) {
if debug {
eprintln!("[capture/reject] '{}': not used in loop", name);
}
continue;
}
// 3d: Skip if already in pinned, carriers, or body_locals
if scope.pinned.contains(&name) {
if debug {
eprintln!("[capture/reject] '{}': is a pinned variable", name);
}
continue;
}
if scope.carriers.contains(&name) {
if debug {
eprintln!("[capture/reject] '{}': is a carrier variable", name);
}
continue;
}
if scope.body_locals.contains(&name) {
if debug {
eprintln!("[capture/reject] '{}': is a body-local variable", name);
}
continue;
}
// All checks passed: add to CapturedEnv
// Note: We don't have access to variable_map here, so we use a placeholder ValueId
// The actual host_id will be resolved in ConditionEnvBuilder
if debug {
eprintln!("[capture/accept] '{}': ALL CHECKS PASSED, adding to CapturedEnv", name);
}
env.add_var(CapturedVar {
name: name.clone(),
host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder
is_immutable: true,
});
}
if debug {
eprintln!("[capture/result] Captured {} variables: {:?}",
env.vars.len(),
env.vars.iter().map(|v| &v.name).collect::<Vec<_>>()
);
}
env
}
/// Phase 200-C: Analyze captured vars with condition/body instead of loop_ast
///
/// This variant solves the pointer comparison problem when the loop AST is constructed
/// dynamically (e.g., in Pattern 2). Instead of passing a loop_ast reference,
/// we pass the condition and body directly and perform structural matching.
///
/// # Arguments
///
/// * `fn_body` - AST nodes of the function body (for analysis)
/// * `loop_condition` - Condition expression of the loop
/// * `loop_body` - Body statements of the loop
/// * `scope` - LoopScopeShape (for excluding loop params and body-locals)
///
/// # Returns
///
/// `CapturedEnv` containing all captured variables
pub fn analyze_captured_vars_v2(
fn_body: &[ASTNode],
loop_condition: &ASTNode,
loop_body: &[ASTNode],
scope: &LoopScopeShape,
) -> CapturedEnv {
use std::env;
let debug = env::var("NYASH_CAPTURE_DEBUG").is_ok();
if debug {
eprintln!("[capture/debug] Starting capture analysis v2 (structural matching)");
}
// Step 1: Find loop position in fn_body by structural matching
let loop_index = match find_loop_index_by_structure(fn_body, loop_condition, loop_body) {
Some(idx) => idx,
None => {
if debug {
eprintln!("[capture/debug] Loop not found in function body by structure, returning empty CapturedEnv");
}
return CapturedEnv::new();
}
};
if debug {
eprintln!("[capture/debug] Loop found at index {} by structure", loop_index);
}
// Step 2: Collect local declarations BEFORE the loop
let pre_loop_locals = collect_local_declarations(&fn_body[..loop_index]);
if debug {
eprintln!("[capture/debug] Found {} pre-loop local declarations", pre_loop_locals.len());
}
let mut env = CapturedEnv::new();
// Step 3: For each pre-loop local, check capture criteria
for (name, init_expr) in pre_loop_locals {
if debug {
eprintln!("[capture/check] Checking variable '{}'", name);
}
// 3a: Is init expression a safe constant?
if !is_safe_const_init(&init_expr) {
if debug {
eprintln!("[capture/reject] '{}': init is not a safe constant", name);
}
continue;
}
// 3b: Is this variable reassigned anywhere in fn_body?
if is_reassigned_in_fn(fn_body, &name) {
if debug {
eprintln!("[capture/reject] '{}': reassigned in function", name);
}
continue;
}
// 3c: Is this variable used in loop (condition or body)?
if !is_used_in_loop_parts(loop_condition, loop_body, &name) {
if debug {
eprintln!("[capture/reject] '{}': not used in loop", name);
}
continue;
}
// 3d: Skip if already in pinned, carriers, or body_locals
if scope.pinned.contains(&name) {
if debug {
eprintln!("[capture/reject] '{}': is a pinned variable", name);
}
continue;
}
if scope.carriers.contains(&name) {
if debug {
eprintln!("[capture/reject] '{}': is a carrier variable", name);
}
continue;
}
if scope.body_locals.contains(&name) {
if debug {
eprintln!("[capture/reject] '{}': is a body-local variable", name);
}
continue;
}
// All checks passed: add to CapturedEnv
if debug {
eprintln!("[capture/accept] '{}': ALL CHECKS PASSED, adding to CapturedEnv", name);
}
env.add_var(CapturedVar {
name: name.clone(),
host_id: ValueId(0), // Placeholder, will be resolved in ConditionEnvBuilder
is_immutable: true,
});
}
if debug {
eprintln!("[capture/result] Captured {} variables: {:?}",
env.vars.len(),
env.vars.iter().map(|v| &v.name).collect::<Vec<_>>()
);
}
env
}
/// Find the index of a loop statement in the function body
///
/// Returns Some(index) if found, None otherwise.
fn find_stmt_index(fn_body: &[ASTNode], loop_ast: &ASTNode) -> Option<usize> {
// Compare by pointer address (same AST node instance)
fn_body.iter().position(|stmt| {
std::ptr::eq(stmt as *const ASTNode, loop_ast as *const ASTNode)
})
}
/// Phase 200-C: Find loop index by structure matching (condition + body comparison)
///
/// Instead of pointer comparison, compare the loop structure.
/// This is useful when the loop AST is constructed dynamically.
fn find_loop_index_by_structure(
fn_body: &[ASTNode],
target_condition: &ASTNode,
target_body: &[ASTNode],
) -> Option<usize> {
for (idx, stmt) in fn_body.iter().enumerate() {
if let ASTNode::Loop { condition, body, .. } = stmt {
// Compare condition and body by structure
if ast_matches(condition, target_condition) && body_matches(body, target_body) {
return Some(idx);
}
}
}
None
}
/// Simple structural AST comparison
///
/// Uses Debug string comparison as a heuristic. This is not perfect but
/// works well enough for finding loops by structure.
fn ast_matches(a: &ASTNode, b: &ASTNode) -> bool {
format!("{:?}", a) == format!("{:?}", b)
}
/// Compare two body slices by structure
fn body_matches(a: &[ASTNode], b: &[ASTNode]) -> bool {
if a.len() != b.len() {
return false;
}
a.iter().zip(b.iter()).all(|(x, y)| ast_matches(x, y))
}
/// Collect local variable declarations from statements
///
/// Returns Vec<(name, init_expr)> for each variable declared with `local`.
fn collect_local_declarations(stmts: &[ASTNode]) -> Vec<(String, Option<Box<ASTNode>>)> {
let mut locals = Vec::new();
for stmt in stmts {
if let ASTNode::Local { variables, initial_values, .. } = stmt {
// Local declaration can have multiple variables (e.g., local a, b, c)
for (i, name) in variables.iter().enumerate() {
let init_expr = initial_values.get(i).and_then(|opt| opt.clone());
locals.push((name.clone(), init_expr));
}
}
}
locals
}
/// Check if expression is a safe constant (string/integer literal)
///
/// Phase 200-B: Only string and integer literals are allowed.
/// Future: May expand to include other safe constant patterns.
fn is_safe_const_init(expr: &Option<Box<ASTNode>>) -> bool {
match expr {
Some(boxed) => match boxed.as_ref() {
ASTNode::Literal { value, .. } => matches!(
value,
crate::ast::LiteralValue::String(_) | crate::ast::LiteralValue::Integer(_)
),
_ => false,
},
None => false,
}
}
/// Check if variable is reassigned anywhere in function body
///
/// Walks the entire function body AST to detect any assignments to the variable.
/// Returns true if the variable is reassigned (excluding the initial local declaration).
fn is_reassigned_in_fn(fn_body: &[ASTNode], name: &str) -> bool {
fn check_node(node: &ASTNode, name: &str) -> bool {
match node {
// Assignment to this variable
ASTNode::Assignment { target, value, .. } => {
// Check if target is the variable we're looking for
let is_target_match = match target.as_ref() {
ASTNode::Variable { name: var_name, .. } => var_name == name,
ASTNode::FieldAccess { .. } | ASTNode::Index { .. } => {
// Field access or index assignment doesn't count as reassignment
false
}
_ => false,
};
is_target_match || check_node(value, name)
}
// Grouped assignment expression: (x = expr)
ASTNode::GroupedAssignmentExpr { lhs, rhs, .. } => {
lhs == name || check_node(rhs, name)
}
// Recursive cases
ASTNode::If { condition, then_body, else_body, .. } => {
check_node(condition, name)
|| then_body.iter().any(|n| check_node(n, name))
|| else_body.as_ref().map_or(false, |body| body.iter().any(|n| check_node(n, name)))
}
ASTNode::Loop { condition, body, .. } => {
check_node(condition, name) || body.iter().any(|n| check_node(n, name))
}
ASTNode::While { condition, body, .. } => {
check_node(condition, name) || body.iter().any(|n| check_node(n, name))
}
ASTNode::TryCatch { try_body, catch_clauses, finally_body, .. } => {
try_body.iter().any(|n| check_node(n, name))
|| catch_clauses.iter().any(|clause| clause.body.iter().any(|n| check_node(n, name)))
|| finally_body.as_ref().map_or(false, |body| body.iter().any(|n| check_node(n, name)))
}
ASTNode::UnaryOp { operand, .. } => check_node(operand, name),
ASTNode::BinaryOp { left, right, .. } => {
check_node(left, name) || check_node(right, name)
}
ASTNode::MethodCall { object, arguments, .. } => {
check_node(object, name) || arguments.iter().any(|arg| check_node(arg, name))
}
ASTNode::FunctionCall { arguments, .. } => {
arguments.iter().any(|arg| check_node(arg, name))
}
ASTNode::FieldAccess { object, .. } => check_node(object, name),
ASTNode::Index { target, index, .. } => {
check_node(target, name) || check_node(index, name)
}
ASTNode::Return { value, .. } => {
value.as_ref().map_or(false, |v| check_node(v, name))
}
ASTNode::Local { .. } => {
// Local declarations are not reassignments
false
}
_ => false,
}
}
fn_body.iter().any(|stmt| check_node(stmt, name))
}
/// Check if variable is referenced in loop condition or body
///
/// Returns true if the variable name appears anywhere in the loop AST.
fn is_used_in_loop(loop_ast: &ASTNode, name: &str) -> bool {
fn check_usage(node: &ASTNode, name: &str) -> bool {
match node {
ASTNode::Variable { name: var_name, .. } => var_name == name,
ASTNode::Loop { condition, body, .. } => {
check_usage(condition, name) || body.iter().any(|n| check_usage(n, name))
}
ASTNode::If { condition, then_body, else_body, .. } => {
check_usage(condition, name)
|| then_body.iter().any(|n| check_usage(n, name))
|| else_body.as_ref().map_or(false, |body| body.iter().any(|n| check_usage(n, name)))
}
ASTNode::Assignment { target, value, .. } => {
check_usage(target, name) || check_usage(value, name)
}
ASTNode::UnaryOp { operand, .. } => check_usage(operand, name),
ASTNode::BinaryOp { left, right, .. } => {
check_usage(left, name) || check_usage(right, name)
}
ASTNode::MethodCall { object, arguments, .. } => {
check_usage(object, name) || arguments.iter().any(|arg| check_usage(arg, name))
}
ASTNode::FunctionCall { arguments, .. } => {
arguments.iter().any(|arg| check_usage(arg, name))
}
ASTNode::FieldAccess { object, .. } => check_usage(object, name),
ASTNode::Index { target, index, .. } => {
check_usage(target, name) || check_usage(index, name)
}
ASTNode::Return { value, .. } => {
value.as_ref().map_or(false, |v| check_usage(v, name))
}
ASTNode::Local { initial_values, .. } => {
initial_values.iter().any(|opt| {
opt.as_ref().map_or(false, |init| check_usage(init, name))
})
}
_ => false,
}
}
check_usage(loop_ast, name)
}
/// Phase 200-C: Check if variable is used in loop condition or body (separate parts)
///
/// This is used by analyze_captured_vars_v2 when condition and body are passed separately.
fn is_used_in_loop_parts(condition: &ASTNode, body: &[ASTNode], name: &str) -> bool {
fn check_usage(node: &ASTNode, name: &str) -> bool {
match node {
ASTNode::Variable { name: var_name, .. } => var_name == name,
ASTNode::Loop { condition, body, .. } => {
check_usage(condition, name) || body.iter().any(|n| check_usage(n, name))
}
ASTNode::If { condition, then_body, else_body, .. } => {
check_usage(condition, name)
|| then_body.iter().any(|n| check_usage(n, name))
|| else_body.as_ref().map_or(false, |body| body.iter().any(|n| check_usage(n, name)))
}
ASTNode::Assignment { target, value, .. } => {
check_usage(target, name) || check_usage(value, name)
}
ASTNode::UnaryOp { operand, .. } => check_usage(operand, name),
ASTNode::BinaryOp { left, right, .. } => {
check_usage(left, name) || check_usage(right, name)
}
ASTNode::MethodCall { object, arguments, .. } => {
check_usage(object, name) || arguments.iter().any(|arg| check_usage(arg, name))
}
ASTNode::FunctionCall { arguments, .. } => {
arguments.iter().any(|arg| check_usage(arg, name))
}
ASTNode::FieldAccess { object, .. } => check_usage(object, name),
ASTNode::Index { target, index, .. } => {
check_usage(target, name) || check_usage(index, name)
}
ASTNode::Return { value, .. } => {
value.as_ref().map_or(false, |v| check_usage(v, name))
}
ASTNode::Local { initial_values, .. } => {
initial_values.iter().any(|opt| {
opt.as_ref().map_or(false, |init| check_usage(init, name))
})
}
_ => false,
}
}
check_usage(condition, name) || body.iter().any(|n| check_usage(n, name))
}
#[cfg(test)]
@ -200,4 +706,372 @@ mod tests {
assert!(env.get("table").is_some());
assert!(env.get("nonexistent").is_none());
}
// Phase 200-B: Capture analysis tests
#[test]
fn test_capture_simple_digits() {
use crate::ast::{ASTNode, LiteralValue, Span};
// Build AST for:
// local digits = "0123456789"
// loop(i < 10) {
// local pos = digits.indexOf(ch)
// }
let digits_decl = ASTNode::Local {
variables: vec!["digits".to_string()],
initial_values: vec![Some(Box::new(ASTNode::Literal {
value: LiteralValue::String("0123456789".to_string()),
span: Span::unknown(),
}))],
span: Span::unknown(),
};
let loop_body = vec![ASTNode::Local {
variables: vec!["pos".to_string()],
initial_values: vec![Some(Box::new(ASTNode::MethodCall {
object: Box::new(ASTNode::Variable {
name: "digits".to_string(),
span: Span::unknown(),
}),
method: "indexOf".to_string(),
arguments: vec![ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}],
span: Span::unknown(),
}))],
span: Span::unknown(),
}];
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: crate::ast::BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(10),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: loop_body,
span: Span::unknown(),
};
let fn_body = vec![digits_decl, loop_node.clone()];
use std::collections::{BTreeSet, BTreeMap};
use crate::mir::BasicBlockId;
let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape {
header: BasicBlockId(0),
body: BasicBlockId(1),
latch: BasicBlockId(2),
exit: BasicBlockId(3),
pinned: BTreeSet::from(["i".to_string()]),
carriers: BTreeSet::new(),
body_locals: BTreeSet::from(["pos".to_string()]),
exit_live: BTreeSet::new(),
progress_carrier: None,
variable_definitions: BTreeMap::new(),
};
// IMPORTANT: Pass a reference to the same loop_node instance that's in fn_body
// find_stmt_index uses pointer comparison, so we must use &fn_body[1] instead of &loop_node
let env = analyze_captured_vars(&fn_body, &fn_body[1], &scope);
assert_eq!(env.vars.len(), 1);
assert!(env.get("digits").is_some());
let var = env.get("digits").unwrap();
assert_eq!(var.name, "digits");
assert!(var.is_immutable);
}
#[test]
fn test_capture_reassigned_rejected() {
use crate::ast::{ASTNode, LiteralValue, Span};
// Build AST for:
// local digits = "0123456789"
// digits = "abc" // reassignment
// loop(i < 10) {
// local pos = digits.indexOf(ch)
// }
let digits_decl = ASTNode::Local {
variables: vec!["digits".to_string()],
initial_values: vec![Some(Box::new(ASTNode::Literal {
value: LiteralValue::String("0123456789".to_string()),
span: Span::unknown(),
}))],
span: Span::unknown(),
};
let reassignment = ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "digits".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::Literal {
value: LiteralValue::String("abc".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
};
let loop_body = vec![ASTNode::Local {
variables: vec!["pos".to_string()],
initial_values: vec![Some(Box::new(ASTNode::MethodCall {
object: Box::new(ASTNode::Variable {
name: "digits".to_string(),
span: Span::unknown(),
}),
method: "indexOf".to_string(),
arguments: vec![],
span: Span::unknown(),
}))],
span: Span::unknown(),
}];
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: crate::ast::BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(10),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: loop_body,
span: Span::unknown(),
};
let fn_body = vec![digits_decl, reassignment, loop_node.clone()];
use std::collections::{BTreeSet, BTreeMap};
use crate::mir::BasicBlockId;
let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape {
header: BasicBlockId(0),
body: BasicBlockId(1),
latch: BasicBlockId(2),
exit: BasicBlockId(3),
pinned: BTreeSet::from(["i".to_string()]),
carriers: BTreeSet::new(),
body_locals: BTreeSet::from(["pos".to_string()]),
exit_live: BTreeSet::new(),
progress_carrier: None,
variable_definitions: BTreeMap::new(),
};
let env = analyze_captured_vars(&fn_body, &loop_node, &scope);
// Should reject because digits is reassigned
assert_eq!(env.vars.len(), 0);
}
#[test]
fn test_capture_after_loop_rejected() {
use crate::ast::{ASTNode, LiteralValue, Span};
// Build AST for:
// loop(i < 10) { }
// local digits = "0123456789" // defined AFTER loop
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: crate::ast::BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(10),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![],
span: Span::unknown(),
};
let digits_decl = ASTNode::Local {
variables: vec!["digits".to_string()],
initial_values: vec![Some(Box::new(ASTNode::Literal {
value: LiteralValue::String("0123456789".to_string()),
span: Span::unknown(),
}))],
span: Span::unknown(),
};
let fn_body = vec![loop_node.clone(), digits_decl];
use std::collections::{BTreeSet, BTreeMap};
use crate::mir::BasicBlockId;
let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape {
header: BasicBlockId(0),
body: BasicBlockId(1),
latch: BasicBlockId(2),
exit: BasicBlockId(3),
pinned: BTreeSet::from(["i".to_string()]),
carriers: BTreeSet::new(),
body_locals: BTreeSet::new(),
exit_live: BTreeSet::new(),
progress_carrier: None,
variable_definitions: BTreeMap::new(),
};
let env = analyze_captured_vars(&fn_body, &loop_node, &scope);
// Should reject because digits is defined after the loop
assert_eq!(env.vars.len(), 0);
}
#[test]
fn test_capture_method_call_init_rejected() {
use crate::ast::{ASTNode, LiteralValue, Span};
// Build AST for:
// local result = someBox.getValue() // MethodCall init
// loop(i < 10) {
// local x = result.something()
// }
let result_decl = ASTNode::Local {
variables: vec!["result".to_string()],
initial_values: vec![Some(Box::new(ASTNode::MethodCall {
object: Box::new(ASTNode::Variable {
name: "someBox".to_string(),
span: Span::unknown(),
}),
method: "getValue".to_string(),
arguments: vec![],
span: Span::unknown(),
}))],
span: Span::unknown(),
};
let loop_body = vec![ASTNode::Local {
variables: vec!["x".to_string()],
initial_values: vec![Some(Box::new(ASTNode::MethodCall {
object: Box::new(ASTNode::Variable {
name: "result".to_string(),
span: Span::unknown(),
}),
method: "something".to_string(),
arguments: vec![],
span: Span::unknown(),
}))],
span: Span::unknown(),
}];
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: crate::ast::BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(10),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: loop_body,
span: Span::unknown(),
};
let fn_body = vec![result_decl, loop_node.clone()];
use std::collections::{BTreeSet, BTreeMap};
use crate::mir::BasicBlockId;
let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape {
header: BasicBlockId(0),
body: BasicBlockId(1),
latch: BasicBlockId(2),
exit: BasicBlockId(3),
pinned: BTreeSet::from(["i".to_string()]),
carriers: BTreeSet::new(),
body_locals: BTreeSet::from(["x".to_string()]),
exit_live: BTreeSet::new(),
progress_carrier: None,
variable_definitions: BTreeMap::new(),
};
let env = analyze_captured_vars(&fn_body, &loop_node, &scope);
// Should reject because result is initialized with MethodCall (not safe constant)
assert_eq!(env.vars.len(), 0);
}
#[test]
fn test_capture_unused_in_loop_rejected() {
use crate::ast::{ASTNode, LiteralValue, Span};
// Build AST for:
// local digits = "0123456789" // not used in loop
// loop(i < 10) {
// print(i) // doesn't use digits
// }
let digits_decl = ASTNode::Local {
variables: vec!["digits".to_string()],
initial_values: vec![Some(Box::new(ASTNode::Literal {
value: LiteralValue::String("0123456789".to_string()),
span: Span::unknown(),
}))],
span: Span::unknown(),
};
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: crate::ast::BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(10),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![], // empty body, no usage of digits
span: Span::unknown(),
};
let fn_body = vec![digits_decl, loop_node.clone()];
use std::collections::{BTreeSet, BTreeMap};
use crate::mir::BasicBlockId;
let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape {
header: BasicBlockId(0),
body: BasicBlockId(1),
latch: BasicBlockId(2),
exit: BasicBlockId(3),
pinned: BTreeSet::from(["i".to_string()]),
carriers: BTreeSet::new(),
body_locals: BTreeSet::new(),
exit_live: BTreeSet::new(),
progress_carrier: None,
variable_definitions: BTreeMap::new(),
};
let env = analyze_captured_vars(&fn_body, &loop_node, &scope);
// Should reject because digits is not used in loop
assert_eq!(env.vars.len(), 0);
}
}