diff --git a/src/mir/builder.rs b/src/mir/builder.rs index 4ce83747..9e817a62 100644 --- a/src/mir/builder.rs +++ b/src/mir/builder.rs @@ -4,15 +4,20 @@ * Implements AST → MIR conversion with SSA construction */ -use super::slot_registry::resolve_slot_by_type_name; use super::{ - BasicBlock, BasicBlockId, CompareOp, ConstValue, Effect, EffectMask, - FunctionSignature, MirFunction, MirInstruction, MirModule, MirType, ValueId, + BasicBlock, BasicBlockId, CompareOp, ConstValue, Effect, EffectMask, FunctionSignature, + MirFunction, MirInstruction, MirModule, MirType, ValueId, }; -use crate::ast::{ASTNode, LiteralValue}; -use crate::mir::builder::builder_calls::CallTarget; +pub(crate) use builder_calls::CallTarget; use std::collections::HashMap; mod binding_context; // Phase 136 follow-up (Step 4/7): BindingContext extraction +mod builder_build; +mod builder_debug; +mod builder_emit; +mod builder_init; +mod builder_metadata; +mod builder_method_index; +mod builder_value_kind; mod builder_calls; mod call_resolution; // ChatGPT5 Pro: Type-safe call resolution utilities mod calls; // Call system modules (refactored from builder_calls) @@ -215,857 +220,6 @@ pub struct MirBuilder { pub(crate) repl_mode: bool, } -impl MirBuilder { - /// Create a new MIR builder - pub fn new() -> Self { - let plugin_method_sigs = plugin_sigs::load_plugin_method_sigs(); - let core_ctx = core_context::CoreContext::new(); - - // Phase 136 Step 7/7: Compilation context (new SSOT) - let comp_ctx = - compilation_context::CompilationContext::with_plugin_sigs(plugin_method_sigs.clone()); - - // フェーズM: no_phi_mode初期化削除 - #[allow(deprecated)] - Self { - current_module: None, - current_block: None, - - // Phase 136 Step 2/7: Core context (new SSOT) - core_ctx, - - type_ctx: type_context::TypeContext::new(), // Phase 136: Type context - scope_ctx: scope_context::ScopeContext::new(), // Phase 136 Step 3/7: Scope context - binding_ctx: binding_context::BindingContext::new(), // Phase 136 Step 4/7: Binding context - variable_ctx: variable_context::VariableContext::new(), // Phase 136 Step 5/7: Variable context - metadata_ctx: metadata_context::MetadataContext::new(), // Phase 136 Step 6/7: Metadata context - comp_ctx, // Phase 136 Step 7/7: Compilation context - pending_phis: Vec::new(), - - // Phase 2-5: binding_map initialization removed - - // フェーズM: no_phi_modeフィールド削除 - return_defer_active: false, - return_defer_slot: None, - return_defer_target: None, - return_deferred_emitted: false, - in_cleanup_block: false, - cleanup_allow_return: false, - cleanup_allow_throw: false, - suppress_pin_entry_copy_next: false, - - local_ssa_map: HashMap::new(), - schedule_mat_map: HashMap::new(), - pin_slot_names: HashMap::new(), - - in_unified_boxcall_fallback: false, - recursion_depth: 0, - root_is_app_mode: None, - static_box_singletons: HashMap::new(), // Phase 21.7: methodization support - repl_mode: false, // Phase 288 P2: REPL mode (default: file mode) - } - } - - // Phase 2-5: BindingContext sync helpers removed - binding_ctx is now SSOT - // Phase 2-6: VariableContext sync helpers removed - variable_ctx is now SSOT - - /// Push/pop helpers for If merge context (best-effort; optional usage) - pub(super) fn push_if_merge(&mut self, bb: BasicBlockId) { - // Phase 2-4: Use scope_ctx only (legacy field removed) - self.scope_ctx.push_if_merge(bb); - } - pub(super) fn pop_if_merge(&mut self) { - // Phase 2-4: Use scope_ctx only (legacy field removed) - let _ = self.scope_ctx.pop_if_merge(); - } - - /// Suppress entry pin copy for the next start_new_block (used for merge blocks). - pub(super) fn suppress_next_entry_pin_copy(&mut self) { - self.suppress_pin_entry_copy_next = true; - } - - // ---- Phase 74: BindingId allocation ---- - /// Allocate a new BindingId (parallel to ValueId allocation) - /// - /// ## Parallel ValueId/BindingId Allocation - /// - /// BindingId allocation is completely independent from ValueId allocation: - /// - `next_value_id()` increments `value_gen` counter - /// - `allocate_binding_id()` increments `next_binding_id` counter - /// - /// This parallelism enables: - /// 1. **Stable binding identity** across SSA transformations - /// 2. **Independent shadowing tracking** separate from SSA renaming - /// 3. **Future ScopeManager migration** (Phase 75+) without breaking SSA - /// - /// Example: - /// ```ignore - /// // local x = 1; <- allocate_binding_id() -> BindingId(0) - /// // next_value_id() -> ValueId(10) - /// // { - /// // local x = 2; <- allocate_binding_id() -> BindingId(1) - /// // next_value_id() -> ValueId(20) - /// // } - /// ``` - pub fn allocate_binding_id(&mut self) -> super::BindingId { - // Phase 136 Step 2/7 + Phase 2-2: Use core_ctx as SSOT (no sync needed) - self.core_ctx.next_binding() - } - - // ---- Hint helpers (no-op by default) ---- - // Phase 136 Step 6/7: Delegate to metadata_ctx with legacy sync - #[inline] - pub(crate) fn hint_scope_enter(&mut self, id: u32) { - self.metadata_ctx.hint_scope_enter(id); - } - #[inline] - pub(crate) fn hint_scope_leave(&mut self, id: u32) { - self.metadata_ctx.hint_scope_leave(id); - } - #[inline] - pub(crate) fn hint_join_result>(&mut self, var: S) { - self.metadata_ctx.hint_join_result(var); - } - - // ---------------------- - // Debug scope helpers (region_id for DebugHub events) - // ---------------------- - #[inline] - pub(crate) fn debug_next_join_id(&mut self) -> u32 { - // Phase 136 Step 2/7 + Phase 2-2: Use core_ctx as SSOT (no sync needed) - self.core_ctx.next_debug_join() - } - - #[inline] - pub(crate) fn debug_push_region>(&mut self, region: S) { - // Phase 2-4: Use scope_ctx only (legacy field removed) - let region = region.into(); - self.scope_ctx.debug_push_region(region); - } - - #[inline] - pub(crate) fn debug_pop_region(&mut self) { - // Phase 2-4: Use scope_ctx only (legacy field removed) - self.scope_ctx.debug_pop_region(); - } - - #[inline] - #[allow(deprecated)] - pub(crate) fn debug_current_region_id(&self) -> Option { - // Phase 136 Step 3/7: Read from scope_ctx (SSOT) - self.scope_ctx.debug_current_region_id() - } - - /// Hint for downstream metadata: set the logical source file name/path for the next build. - /// Phase 136 Step 6/7: Delegate to metadata_ctx - pub fn set_source_file_hint>(&mut self, source: S) { - self.metadata_ctx.set_source_file(source); - } - - /// Clear the source file hint (used when reusing the builder across modules). - /// Phase 136 Step 6/7: Delegate to metadata_ctx - pub fn clear_source_file_hint(&mut self) { - self.metadata_ctx.clear_source_file(); - } - - /// Resolve current source file hint (builder field or env fallback). - /// Phase 136 Step 6/7: Delegate to metadata_ctx - fn current_source_file(&self) -> Option { - self.metadata_ctx - .current_source_file() - .or_else(|| std::env::var("NYASH_SOURCE_FILE_HINT").ok()) - } - - /// Create a new MirFunction with source metadata applied. - fn new_function_with_metadata( - &self, - signature: FunctionSignature, - entry_block: BasicBlockId, - ) -> MirFunction { - let mut f = MirFunction::new(signature, entry_block); - f.metadata.source_file = self.current_source_file(); - f - } - - // ---------------------- - // Compile trace helpers (dev only; env-gated) - // ---------------------- - #[inline] - pub(super) fn compile_trace_enabled() -> bool { - std::env::var("NYASH_MIR_COMPILE_TRACE").ok().as_deref() == Some("1") - } - - #[inline] - pub(super) fn trace_compile>(&self, msg: S) { - if Self::compile_trace_enabled() { - eprintln!("[mir-compile] {}", msg.as_ref()); - } - } - - // ---------------------- - // Method tail index (performance helper) - // ---------------------- - fn rebuild_method_tail_index(&mut self) { - self.comp_ctx.method_tail_index.clear(); - if let Some(ref module) = self.current_module { - for name in module.functions.keys() { - if let (Some(dot), Some(slash)) = (name.rfind('.'), name.rfind('/')) { - if slash > dot { - let tail = &name[dot..]; - self.comp_ctx - .method_tail_index - .entry(tail.to_string()) - .or_insert_with(Vec::new) - .push(name.clone()); - } - } - } - self.comp_ctx.method_tail_index_source_len = module.functions.len(); - } else { - self.comp_ctx.method_tail_index_source_len = 0; - } - } - - fn ensure_method_tail_index(&mut self) { - let need_rebuild = match self.current_module { - Some(ref refmod) => { - self.comp_ctx.method_tail_index_source_len != refmod.functions.len() - } - None => self.comp_ctx.method_tail_index_source_len != 0, - }; - if need_rebuild { - self.rebuild_method_tail_index(); - } - } - - pub(super) fn method_candidates(&mut self, method: &str, arity: usize) -> Vec { - self.ensure_method_tail_index(); - let tail = format!(".{}{}", method, format!("/{}", arity)); - self.comp_ctx - .method_tail_index - .get(&tail) - .cloned() - .unwrap_or_default() - } - - pub(super) fn method_candidates_tail>(&mut self, tail: S) -> Vec { - self.ensure_method_tail_index(); - self.comp_ctx - .method_tail_index - .get(tail.as_ref()) - .cloned() - .unwrap_or_default() - } - - /// Build a complete MIR module from AST - pub fn build_module(&mut self, ast: ASTNode) -> Result { - self.prepare_module()?; - let result_value = self.lower_root(ast)?; - self.finalize_module(result_value) - } - - /// Build an expression and return its value ID - pub(super) fn build_expression(&mut self, ast: ASTNode) -> Result { - // Delegated to exprs.rs to keep this file lean - // Debug: Track recursion depth to detect infinite loops - const MAX_RECURSION_DEPTH: usize = 200; - self.recursion_depth += 1; - if self.recursion_depth > MAX_RECURSION_DEPTH { - eprintln!("\n[FATAL] ============================================"); - eprintln!( - "[FATAL] Recursion depth exceeded {} in build_expression", - MAX_RECURSION_DEPTH - ); - eprintln!("[FATAL] Current depth: {}", self.recursion_depth); - eprintln!("[FATAL] AST node type: {:?}", std::mem::discriminant(&ast)); - eprintln!("[FATAL] ============================================\n"); - return Err(format!( - "Recursion depth exceeded: {} (possible infinite loop)", - self.recursion_depth - )); - } - - let result = self.build_expression_impl(ast); - self.recursion_depth -= 1; - result - } - - /// Build a literal value - pub(super) fn build_literal(&mut self, literal: LiteralValue) -> Result { - // Determine type without moving literal - let ty_for_dst = match &literal { - LiteralValue::Integer(_) => Some(super::MirType::Integer), - LiteralValue::Float(_) => Some(super::MirType::Float), - LiteralValue::Bool(_) => Some(super::MirType::Bool), - LiteralValue::String(_) => Some(super::MirType::String), - _ => None, - }; - - // Emit via ConstantEmissionBox(仕様不変の統一ルート) - let dst = match literal { - LiteralValue::Integer(n) => { - crate::mir::builder::emission::constant::emit_integer(self, n) - } - LiteralValue::Float(f) => crate::mir::builder::emission::constant::emit_float(self, f), - LiteralValue::String(s) => { - crate::mir::builder::emission::constant::emit_string(self, s) - } - LiteralValue::Bool(b) => crate::mir::builder::emission::constant::emit_bool(self, b), - LiteralValue::Null => crate::mir::builder::emission::constant::emit_null(self), - LiteralValue::Void => crate::mir::builder::emission::constant::emit_void(self), - }; - // Annotate type - if let Some(ty) = ty_for_dst { - self.type_ctx.value_types.insert(dst, ty); - } - - Ok(dst) - } - - /// Build variable access - pub(super) fn build_variable_access(&mut self, name: String) -> Result { - // Step 5-5-G: __pin$ variables should NEVER be accessed from variable_map - // They are transient temporaries created during expression building and - // should not persist across blocks. If we see one here, it's a compiler bug. - if name.starts_with("__pin$") { - return Err(format!( - "COMPILER BUG: Attempt to access __pin$ temporary '{}' from variable_map. \ - __pin$ variables should only exist as direct SSA values, not as named variables.", - name - )); - } - - if let Some(&value_id) = self.variable_ctx.variable_map.get(&name) { - Ok(value_id) - } else { - Err(self.undefined_variable_message(&name)) - } - } - - pub(in crate::mir::builder) fn undefined_variable_message(&self, name: &str) -> String { - // Enhance diagnostics using Using simple registry (Phase 1) - let mut msg = format!("Undefined variable: {}", name); - - // Stage-3 keyword diagnostic (local/flow/try/catch/throw) - if name == "local" && !crate::config::env::parser_stage3_enabled() { - msg.push_str("\nHint: 'local' is a Stage-3 keyword. Prefer NYASH_FEATURES=stage3 (legacy: NYASH_PARSER_STAGE3=1 / HAKO_PARSER_STAGE3=1 for Stage-B)."); - msg.push_str("\nFor AotPrep verification, use tools/hakorune_emit_mir.sh which sets these automatically."); - } else if (name == "flow" || name == "try" || name == "catch" || name == "throw") - && !crate::config::env::parser_stage3_enabled() - { - msg.push_str(&format!("\nHint: '{}' is a Stage-3 keyword. Prefer NYASH_FEATURES=stage3 (legacy: NYASH_PARSER_STAGE3=1 / HAKO_PARSER_STAGE3=1 for Stage-B).", name)); - } - - let suggest = crate::using::simple_registry::suggest_using_for_symbol(name); - if !suggest.is_empty() { - msg.push_str("\nHint: symbol appears in using module(s): "); - msg.push_str(&suggest.join(", ")); - msg.push_str( - "\nConsider adding 'using [as Alias]' or check nyash.toml [using].", - ); - } - - msg - } - - /// Build assignment - pub(super) fn build_assignment( - &mut self, - var_name: String, - value: ASTNode, - ) -> Result { - // SSOT (LANGUAGE_REFERENCE_2025 / syntax-cheatsheet): - // - Assignment to an undeclared name is an error. - // - Use `local name = ...` (or `local name; name = ...`) to declare. - vars::assignment_resolver::AssignmentResolverBox::ensure_declared(self, &var_name)?; - - let value_id = self.build_expression(value)?; - - // Step 5-5-E: FIX variable map corruption bug - // REMOVED pin_to_slot() call - it was causing __pin$ temporaries to overwrite - // real variable names in the variable map. - // - // Root cause: pin_to_slot(raw_value_id, "@assign") would sometimes return - // a ValueId from a previous __pin$ temporary (e.g., __pin$767$@binop_lhs), - // causing variable_map["m"] to point to the wrong ValueId. - // - // SSA + PHI merges work correctly without explicit pinning here. - // The expression building already creates necessary temporaries. - - // Step 5-5-F: NEVER insert __pin$ temporaries into variable_map - // __pin$ variables are transient compiler-generated temporaries that should - // never be tracked as real variables. They are used only within expression - // building and should not persist across blocks or loops. - // - // BUG FIX: Previously, __pin$ variables would be inserted into variable_map, - // causing stale references after LoopForm transformation renumbers blocks. - // Result: VM would try to read undefined ValueIds (e.g., ValueId(270) at bb303). - if !var_name.starts_with("__pin$") { - // Phase 287: Release strong references for previous value BEFORE updating variable_map - // This ensures "alive until overwrite, then dropped" semantics - // ⚠️ Termination guard: don't emit after return/throw - if !self.is_current_block_terminated() { - if let Some(prev) = self.variable_ctx.variable_map.get(&var_name).copied() { - let _ = self.emit_instruction(MirInstruction::ReleaseStrong { - values: vec![prev], - }); - } - } - - // In SSA form, each assignment creates a new value - self.variable_ctx - .variable_map - .insert(var_name.clone(), value_id); - } - - Ok(value_id) - } - - /// Emit an instruction to the current basic block - pub(super) fn emit_instruction(&mut self, instruction: MirInstruction) -> Result<(), String> { - let block_id = self.current_block.ok_or("No current basic block")?; - - // Make instruction mutable for potential receiver materialization - let mut instruction = instruction; - - // Precompute debug metadata to avoid borrow conflicts later - let _dbg_fn_name = self - .scope_ctx - .current_function - .as_ref() - .map(|f| f.signature.name.clone()); - let _dbg_region_id = self.debug_current_region_id(); - // P0: PHI の軽量補強と観測は、関数ブロック取得前に実施して借用競合を避ける - if let MirInstruction::Phi { dst, inputs, .. } = &instruction { - origin::phi::propagate_phi_meta(self, *dst, inputs); - observe::ssa::emit_phi(self, *dst, inputs); - } - - // CRITICAL: Final receiver materialization for MethodCall - // This ensures the receiver has an in-block definition in the same block as the Call. - // Must happen BEFORE function mutable borrow to avoid borrowck conflicts. - if let MirInstruction::Call { - callee: Some(callee), - dst, - args, - effects, - .. - } = &instruction - { - use crate::mir::definitions::call_unified::Callee; - if let Callee::Method { - box_name, - method, - receiver: Some(r), - certainty, - box_kind, - } = callee.clone() - { - // LocalSSA: ensure receiver has a Copy in current_block - let r_local = crate::mir::builder::ssa::local::recv(self, r); - - // Update instruction with materialized receiver - let new_callee = Callee::Method { - box_name: box_name.clone(), - method: method.clone(), - receiver: Some(r_local), - certainty, - box_kind, - }; - instruction = MirInstruction::Call { - dst: *dst, - func: crate::mir::ValueId::INVALID, // Legacy dummy (not a real SSA use) - callee: Some(new_callee), - args: args.clone(), - effects: *effects, - }; - } - } - - if let Some(ref mut function) = self.scope_ctx.current_function { - // Pre-capture branch/jump targets for predecessor update after we finish - // mutably borrowing the current block. - let (then_t, else_t, jump_t) = match &instruction { - MirInstruction::Branch { - then_bb, else_bb, .. - } => (Some(*then_bb), Some(*else_bb), None), - MirInstruction::Jump { target, .. } => (None, None, Some(*target)), - _ => (None, None, None), - }; - - // Extract function name before mutable borrow to avoid borrowck error - let current_fn_name = function.signature.name.clone(); - - if let Some(block) = function.get_block_mut(block_id) { - // CRITICAL: Copy専用トレース(LocalSSA調査用) - if let MirInstruction::Copy { dst, src } = &instruction { - if std::env::var("NYASH_LOCAL_SSA_TRACE").ok().as_deref() == Some("1") { - eprintln!( - "[emit-inst] fn={} bb={:?} COPY %{} <- %{}", - current_fn_name, - self.current_block.map(|b| b.0).unwrap_or(0), - dst.0, - src.0 - ); - } - } - - // Invariant: Call must always carry a Callee (unified path). - if let MirInstruction::Call { callee, .. } = &instruction { - if callee.is_none() { - return Err("builder invariant violated: MirInstruction::Call.callee must be Some (unified call)".into()); - } else if std::env::var("NYASH_LOCAL_SSA_TRACE").ok().as_deref() == Some("1") { - use crate::mir::definitions::call_unified::Callee; - if let Some(Callee::Method { - box_name, - method, - receiver: Some(r), - .. - }) = callee - { - eprintln!( - "[emit-inst] fn={} bb={:?} Call {}.{} recv=%{}", - current_fn_name, - self.current_block.map(|b| b.0).unwrap_or(0), - box_name, - method, - r.0 - ); - } - } else if std::env::var("NYASH_BUILDER_TRACE_RECV").ok().as_deref() == Some("1") - { - use crate::mir::definitions::call_unified::Callee; - if let Some(Callee::Method { - box_name, - method, - receiver: Some(r), - .. - }) = callee - { - let names: Vec = self - .variable_ctx - .variable_map - .iter() - .filter(|(_, &vid)| vid == *r) - .map(|(k, _)| k.clone()) - .collect(); - eprintln!( - "[builder/recv-trace] fn={} bb={:?} method={}.{} recv=%{} aliases={:?}", - current_fn_name, - self.current_block, - box_name, - method, - r.0, - names - ); - } - } - } - if utils::builder_debug_enabled() { - eprintln!( - "[BUILDER] emit @bb{} -> {}", - block_id, - match &instruction { - MirInstruction::TypeOp { dst, op, value, ty } => - format!("typeop {:?} {} {:?} -> {}", op, value, ty, dst), - MirInstruction::Print { value, .. } => format!("print {}", value), - MirInstruction::BoxCall { - box_val, - method, - method_id, - args, - dst, - .. - } => { - if let Some(mid) = method_id { - format!( - "boxcall {}.{}[#{}]({:?}) -> {:?}", - box_val, method, mid, args, dst - ) - } else { - format!( - "boxcall {}.{}({:?}) -> {:?}", - box_val, method, args, dst - ) - } - } - MirInstruction::Call { - func, args, dst, .. - } => format!("call {}({:?}) -> {:?}", func, args, dst), - MirInstruction::NewBox { - dst, - box_type, - args, - } => format!("new {}({:?}) -> {}", box_type, args, dst), - MirInstruction::Const { dst, value } => - format!("const {:?} -> {}", value, dst), - MirInstruction::Branch { - condition, - then_bb, - else_bb, - .. - } => format!("br {}, {}, {}", condition, then_bb, else_bb), - MirInstruction::Jump { target, .. } => format!("br {}", target), - _ => format!("{:?}", instruction), - } - ); - } - // Phase 136 Step 6/7: Use metadata_ctx for span - block.add_instruction_with_span( - instruction.clone(), - self.metadata_ctx.current_span(), - ); - // Drop the mutable borrow of `block` before updating other blocks - } - // Update predecessor sets for branch/jump immediately so that - // debug_verify_phi_inputs can observe a consistent CFG without - // requiring a full function.update_cfg() pass. - if let Some(t) = then_t { - if let Some(succ) = function.get_block_mut(t) { - succ.add_predecessor(block_id); - } - } - if let Some(t) = else_t { - if let Some(succ) = function.get_block_mut(t) { - succ.add_predecessor(block_id); - } - } - if let Some(t) = jump_t { - if let Some(succ) = function.get_block_mut(t) { - succ.add_predecessor(block_id); - } - } - Ok(()) - } else { - Err(format!("Basic block {} does not exist", block_id)) - } - } - - /// Update an existing PHI instruction's inputs (for loop sealing) - /// Used by LoopFormBuilder to complete incomplete PHI nodes - #[allow(dead_code)] - pub(super) fn update_phi_instruction( - &mut self, - block: BasicBlockId, - phi_id: ValueId, - new_inputs: Vec<(BasicBlockId, ValueId)>, - ) -> Result<(), String> { - if let Some(ref mut function) = self.scope_ctx.current_function { - if let Some(block_data) = function.get_block_mut(block) { - // Find PHI instruction with matching dst - for inst in &mut block_data.instructions { - if let MirInstruction::Phi { dst, inputs, .. } = inst { - if *dst == phi_id { - *inputs = new_inputs; - return Ok(()); - } - } - } - Err(format!( - "PHI instruction {} not found in block {}", - phi_id, block - )) - } else { - Err(format!("Block {} not found", block)) - } - } else { - Err("No current function".to_string()) - } - } - - // フェーズM: is_no_phi_mode()メソッド削除 - - // フェーズM: insert_edge_copy()メソッド削除(no_phi_mode撤廃により不要) - - /// Build new expression: new ClassName(arguments) - pub(super) fn build_new_expression( - &mut self, - class: String, - arguments: Vec, - ) -> Result { - // Phase 9.78a: Unified Box creation using NewBox instruction - // Core-13 pure mode: emit ExternCall(env.box.new) with type name const only - if crate::config::env::mir_core13_pure() { - // Emit Const String for type name(ConstantEmissionBox) - let ty_id = crate::mir::builder::emission::constant::emit_string(self, class.clone()); - // Evaluate arguments (pass through to env.box.new shim) - let mut arg_vals: Vec = Vec::with_capacity(arguments.len()); - for a in arguments { - arg_vals.push(self.build_expression(a)?); - } - // Build arg list: [type, a1, a2, ...] - let mut args: Vec = Vec::with_capacity(1 + arg_vals.len()); - args.push(ty_id); - args.extend(arg_vals); - // Call env.box.new - // 📦 Hotfix 3: Use next_value_id() to respect function parameter reservation - let dst = self.next_value_id(); - self.emit_instruction(MirInstruction::ExternCall { - dst: Some(dst), - iface_name: "env.box".to_string(), - method_name: "new".to_string(), - args, - effects: EffectMask::PURE, - })?; - // 型注釈(最小) - self.type_ctx - .value_types - .insert(dst, super::MirType::Box(class.clone())); - return Ok(dst); - } - - // Optimization: Primitive wrappers → emit Const directly when possible - if class == "IntegerBox" && arguments.len() == 1 { - if let ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } = arguments[0].clone() - { - // 📦 Hotfix 3: Use next_value_id() to respect function parameter reservation - let dst = self.next_value_id(); - self.emit_instruction(MirInstruction::Const { - dst, - value: ConstValue::Integer(n), - })?; - self.type_ctx - .value_types - .insert(dst, super::MirType::Integer); - return Ok(dst); - } - } - - // First, evaluate all arguments to get their ValueIds - let mut arg_values = Vec::new(); - for arg in arguments { - let arg_value = self.build_expression(arg)?; - arg_values.push(arg_value); - } - - // Generate the destination ValueId - // 📦 Hotfix 3: Use next_value_id() to respect function parameter reservation - let dst = self.next_value_id(); - - // Emit NewBox instruction for all Box types - // VM will handle optimization for basic types internally - self.emit_instruction(MirInstruction::NewBox { - dst, - box_type: class.clone(), - args: arg_values.clone(), - })?; - // Phase 15.5: Unified box type handling - // All boxes (including former core boxes) are treated uniformly as Box types - self.type_ctx - .value_types - .insert(dst, super::MirType::Box(class.clone())); - - // Record origin for optimization: dst was created by NewBox of class - self.type_ctx.value_origin_newbox.insert(dst, class.clone()); - - // birth 呼び出し(Builder 正規化) - // 優先: 低下済みグローバル関数 `.birth/Arity`(Arity は me を含まない) - // 代替: 既存互換として BoxCall("birth")(プラグイン/ビルトインの初期化に対応) - if class != "StringBox" { - let arity = arg_values.len(); - let lowered = - crate::mir::builder::calls::function_lowering::generate_method_function_name( - &class, "birth", arity, - ); - let use_lowered = if let Some(ref module) = self.current_module { - module.functions.contains_key(&lowered) - } else { - false - }; - if use_lowered { - // Call Global("Class.birth/Arity") with argv = [me, args...] - let mut argv: Vec = Vec::with_capacity(1 + arity); - argv.push(dst); - argv.extend(arg_values.iter().copied()); - self.emit_legacy_call(None, CallTarget::Global(lowered), argv)?; - } else { - // Fallback policy: - // - For user-defined boxes (no explicit constructor), do NOT emit BoxCall("birth"). - // VM will treat plain NewBox as constructed; dev verify warns if needed. - // - For builtins/plugins, keep BoxCall("birth") fallback to preserve legacy init. - let is_user_box = self.comp_ctx.user_defined_boxes.contains_key(&class); // Phase 285LLVM-1.1: HashMap - // Dev safety: allow disabling birth() injection for builtins to avoid - // unified-call method dispatch issues while migrating. Off by default unless explicitly enabled. - let allow_builtin_birth = std::env::var("NYASH_DEV_BIRTH_INJECT_BUILTINS") - .ok() - .as_deref() - == Some("1"); - if !is_user_box && allow_builtin_birth { - let birt_mid = resolve_slot_by_type_name(&class, "birth"); - self.emit_box_or_plugin_call( - None, - dst, - "birth".to_string(), - birt_mid, - arg_values, - EffectMask::READ.add(Effect::ReadHeap), - )?; - } - } - } - - Ok(dst) - } - - /// Check if the current basic block is terminated - fn is_current_block_terminated(&self) -> bool { - if let (Some(block_id), Some(ref function)) = - (self.current_block, &self.scope_ctx.current_function) - { - if let Some(block) = function.get_block(block_id) { - return block.is_terminated(); - } - } - false - } - - // ============================================================================ - // Phase 26-A: ValueId型安全化メソッド - // ============================================================================ - - /// 型付きValueIdを発行(新API) - /// Phase 136 P0: Use SSOT allocator (next_value_id) to respect function context - pub fn new_typed_value(&mut self, kind: super::MirValueKind) -> super::TypedValueId { - let id = self.next_value_id(); - self.type_ctx.value_kinds.insert(id, kind); - super::TypedValueId::new(id, kind) - } - - /// 既存ValueIdの型情報を取得 - pub fn get_value_kind(&self, id: ValueId) -> Option { - self.type_ctx.value_kinds.get(&id).copied() - } - - /// 既存ValueIdに型情報を後付け(レガシー互換用) - pub fn register_value_kind(&mut self, id: ValueId, kind: super::MirValueKind) { - self.type_ctx.value_kinds.insert(id, kind); - } - - /// 型安全なパラメータ判定(ValueIdベース) - GUARD Bug Prevention - pub fn is_value_parameter(&self, id: ValueId) -> bool { - self.get_value_kind(id) - .map(|kind| kind.is_parameter()) - .unwrap_or(false) - } - - /// 型安全なローカル変数判定(ValueIdベース) - pub fn is_value_local(&self, id: ValueId) -> bool { - self.get_value_kind(id) - .map(|kind| kind.is_local()) - .unwrap_or(false) - } - - /// 型安全なLoopCarrier判定(ValueIdベース) - pub fn is_value_loop_carrier(&self, id: ValueId) -> bool { - self.get_value_kind(id) - .map(|kind| kind.is_loop_carrier()) - .unwrap_or(false) - } -} - impl Default for MirBuilder { fn default() -> Self { Self::new() diff --git a/src/mir/builder/builder_build.rs b/src/mir/builder/builder_build.rs new file mode 100644 index 00000000..9cd41d84 --- /dev/null +++ b/src/mir/builder/builder_build.rs @@ -0,0 +1,316 @@ +use super::builder_calls::CallTarget; +use crate::mir::slot_registry::resolve_slot_by_type_name; +use super::vars; +use super::{ConstValue, Effect, EffectMask, MirBuilder, MirInstruction, MirModule, ValueId}; +use crate::ast::{ASTNode, LiteralValue}; + +impl MirBuilder { + /// Build a complete MIR module from AST + pub fn build_module(&mut self, ast: ASTNode) -> Result { + self.prepare_module()?; + let result_value = self.lower_root(ast)?; + self.finalize_module(result_value) + } + + /// Build an expression and return its value ID + pub(in crate::mir) fn build_expression(&mut self, ast: ASTNode) -> Result { + // Delegated to exprs.rs to keep this file lean + // Debug: Track recursion depth to detect infinite loops + const MAX_RECURSION_DEPTH: usize = 200; + self.recursion_depth += 1; + if self.recursion_depth > MAX_RECURSION_DEPTH { + eprintln!("\n[FATAL] ============================================"); + eprintln!( + "[FATAL] Recursion depth exceeded {} in build_expression", + MAX_RECURSION_DEPTH + ); + eprintln!("[FATAL] Current depth: {}", self.recursion_depth); + eprintln!("[FATAL] AST node type: {:?}", std::mem::discriminant(&ast)); + eprintln!("[FATAL] ============================================\n"); + return Err(format!( + "Recursion depth exceeded: {} (possible infinite loop)", + self.recursion_depth + )); + } + + let result = self.build_expression_impl(ast); + self.recursion_depth -= 1; + result + } + + /// Build a literal value + pub(super) fn build_literal(&mut self, literal: LiteralValue) -> Result { + // Determine type without moving literal + let ty_for_dst = match &literal { + LiteralValue::Integer(_) => Some(super::MirType::Integer), + LiteralValue::Float(_) => Some(super::MirType::Float), + LiteralValue::Bool(_) => Some(super::MirType::Bool), + LiteralValue::String(_) => Some(super::MirType::String), + _ => None, + }; + + // Emit via ConstantEmissionBox(仕様不変の統一ルート) + let dst = match literal { + LiteralValue::Integer(n) => { + crate::mir::builder::emission::constant::emit_integer(self, n) + } + LiteralValue::Float(f) => crate::mir::builder::emission::constant::emit_float(self, f), + LiteralValue::String(s) => { + crate::mir::builder::emission::constant::emit_string(self, s) + } + LiteralValue::Bool(b) => crate::mir::builder::emission::constant::emit_bool(self, b), + LiteralValue::Null => crate::mir::builder::emission::constant::emit_null(self), + LiteralValue::Void => crate::mir::builder::emission::constant::emit_void(self), + }; + // Annotate type + if let Some(ty) = ty_for_dst { + self.type_ctx.value_types.insert(dst, ty); + } + + Ok(dst) + } + + /// Build variable access + pub(super) fn build_variable_access(&mut self, name: String) -> Result { + // Step 5-5-G: __pin$ variables should NEVER be accessed from variable_map + // They are transient temporaries created during expression building and + // should not persist across blocks. If we see one here, it's a compiler bug. + if name.starts_with("__pin$") { + return Err(format!( + "COMPILER BUG: Attempt to access __pin$ temporary '{}' from variable_map. \ + __pin$ variables should only exist as direct SSA values, not as named variables.", + name + )); + } + + if let Some(&value_id) = self.variable_ctx.variable_map.get(&name) { + Ok(value_id) + } else { + Err(self.undefined_variable_message(&name)) + } + } + + pub(in crate::mir::builder) fn undefined_variable_message(&self, name: &str) -> String { + // Enhance diagnostics using Using simple registry (Phase 1) + let mut msg = format!("Undefined variable: {}", name); + + // Stage-3 keyword diagnostic (local/flow/try/catch/throw) + if name == "local" && !crate::config::env::parser_stage3_enabled() { + msg.push_str("\nHint: 'local' is a Stage-3 keyword. Prefer NYASH_FEATURES=stage3 (legacy: NYASH_PARSER_STAGE3=1 / HAKO_PARSER_STAGE3=1 for Stage-B)."); + msg.push_str("\nFor AotPrep verification, use tools/hakorune_emit_mir.sh which sets these automatically."); + } else if (name == "flow" || name == "try" || name == "catch" || name == "throw") + && !crate::config::env::parser_stage3_enabled() + { + msg.push_str(&format!("\nHint: '{}' is a Stage-3 keyword. Prefer NYASH_FEATURES=stage3 (legacy: NYASH_PARSER_STAGE3=1 / HAKO_PARSER_STAGE3=1 for Stage-B).", name)); + } + + let suggest = crate::using::simple_registry::suggest_using_for_symbol(name); + if !suggest.is_empty() { + msg.push_str("\nHint: symbol appears in using module(s): "); + msg.push_str(&suggest.join(", ")); + msg.push_str( + "\nConsider adding 'using [as Alias]' or check nyash.toml [using].", + ); + } + + msg + } + + /// Build assignment + pub(super) fn build_assignment( + &mut self, + var_name: String, + value: ASTNode, + ) -> Result { + // SSOT (LANGUAGE_REFERENCE_2025 / syntax-cheatsheet): + // - Assignment to an undeclared name is an error. + // - Use `local name = ...` (or `local name; name = ...`) to declare. + vars::assignment_resolver::AssignmentResolverBox::ensure_declared(self, &var_name)?; + + let value_id = self.build_expression(value)?; + + // Step 5-5-E: FIX variable map corruption bug + // REMOVED pin_to_slot() call - it was causing __pin$ temporaries to overwrite + // real variable names in the variable map. + // + // Root cause: pin_to_slot(raw_value_id, "@assign") would sometimes return + // a ValueId from a previous __pin$ temporary (e.g., __pin$767$@binop_lhs), + // causing variable_map["m"] to point to the wrong ValueId. + // + // SSA + PHI merges work correctly without explicit pinning here. + // The expression building already creates necessary temporaries. + + // Step 5-5-F: NEVER insert __pin$ temporaries into variable_map + // __pin$ variables are transient compiler-generated temporaries that should + // never be tracked as real variables. They are used only within expression + // building and should not persist across blocks or loops. + // + // BUG FIX: Previously, __pin$ variables would be inserted into variable_map, + // causing stale references after LoopForm transformation renumbers blocks. + // Result: VM would try to read undefined ValueIds (e.g., ValueId(270) at bb303). + if !var_name.starts_with("__pin$") { + // Phase 287: Release strong references for previous value BEFORE updating variable_map + // This ensures "alive until overwrite, then dropped" semantics + // ⚠️ Termination guard: don't emit after return/throw + if !self.is_current_block_terminated() { + if let Some(prev) = self.variable_ctx.variable_map.get(&var_name).copied() { + let _ = self.emit_instruction(MirInstruction::ReleaseStrong { + values: vec![prev], + }); + } + } + + // In SSA form, each assignment creates a new value + self.variable_ctx + .variable_map + .insert(var_name.clone(), value_id); + } + + Ok(value_id) + } + + /// Build new expression: new ClassName(arguments) + pub(super) fn build_new_expression( + &mut self, + class: String, + arguments: Vec, + ) -> Result { + // Phase 9.78a: Unified Box creation using NewBox instruction + // Core-13 pure mode: emit ExternCall(env.box.new) with type name const only + if crate::config::env::mir_core13_pure() { + // Emit Const String for type name(ConstantEmissionBox) + let ty_id = crate::mir::builder::emission::constant::emit_string(self, class.clone()); + // Evaluate arguments (pass through to env.box.new shim) + let mut arg_vals: Vec = Vec::with_capacity(arguments.len()); + for a in arguments { + arg_vals.push(self.build_expression(a)?); + } + // Build arg list: [type, a1, a2, ...] + let mut args: Vec = Vec::with_capacity(1 + arg_vals.len()); + args.push(ty_id); + args.extend(arg_vals); + // Call env.box.new + // 📦 Hotfix 3: Use next_value_id() to respect function parameter reservation + let dst = self.next_value_id(); + self.emit_instruction(MirInstruction::ExternCall { + dst: Some(dst), + iface_name: "env.box".to_string(), + method_name: "new".to_string(), + args, + effects: EffectMask::PURE, + })?; + // 型注釈(最小) + self.type_ctx + .value_types + .insert(dst, super::MirType::Box(class.clone())); + return Ok(dst); + } + + // Optimization: Primitive wrappers → emit Const directly when possible + if class == "IntegerBox" && arguments.len() == 1 { + if let ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } = arguments[0].clone() + { + // 📦 Hotfix 3: Use next_value_id() to respect function parameter reservation + let dst = self.next_value_id(); + self.emit_instruction(MirInstruction::Const { + dst, + value: ConstValue::Integer(n), + })?; + self.type_ctx + .value_types + .insert(dst, super::MirType::Integer); + return Ok(dst); + } + } + + // First, evaluate all arguments to get their ValueIds + let mut arg_values = Vec::new(); + for arg in arguments { + let arg_value = self.build_expression(arg)?; + arg_values.push(arg_value); + } + + // Generate the destination ValueId + // 📦 Hotfix 3: Use next_value_id() to respect function parameter reservation + let dst = self.next_value_id(); + + // Emit NewBox instruction for all Box types + // VM will handle optimization for basic types internally + self.emit_instruction(MirInstruction::NewBox { + dst, + box_type: class.clone(), + args: arg_values.clone(), + })?; + // Phase 15.5: Unified box type handling + // All boxes (including former core boxes) are treated uniformly as Box types + self.type_ctx + .value_types + .insert(dst, super::MirType::Box(class.clone())); + + // Record origin for optimization: dst was created by NewBox of class + self.type_ctx.value_origin_newbox.insert(dst, class.clone()); + + // birth 呼び出し(Builder 正規化) + // 優先: 低下済みグローバル関数 `.birth/Arity`(Arity は me を含まない) + // 代替: 既存互換として BoxCall("birth")(プラグイン/ビルトインの初期化に対応) + if class != "StringBox" { + let arity = arg_values.len(); + let lowered = + crate::mir::builder::calls::function_lowering::generate_method_function_name( + &class, "birth", arity, + ); + let use_lowered = if let Some(ref module) = self.current_module { + module.functions.contains_key(&lowered) + } else { + false + }; + if use_lowered { + // Call Global("Class.birth/Arity") with argv = [me, args...] + let mut argv: Vec = Vec::with_capacity(1 + arity); + argv.push(dst); + argv.extend(arg_values.iter().copied()); + self.emit_legacy_call(None, CallTarget::Global(lowered), argv)?; + } else { + // Fallback policy: + // - For user-defined boxes (no explicit constructor), do NOT emit BoxCall("birth"). + // VM will treat plain NewBox as constructed; dev verify warns if needed. + // - For builtins/plugins, keep BoxCall("birth") fallback to preserve legacy init. + let is_user_box = self.comp_ctx.user_defined_boxes.contains_key(&class); // Phase 285LLVM-1.1: HashMap + // Dev safety: allow disabling birth() injection for builtins to avoid + // unified-call method dispatch issues while migrating. Off by default unless explicitly enabled. + let allow_builtin_birth = std::env::var("NYASH_DEV_BIRTH_INJECT_BUILTINS") + .ok() + .as_deref() + == Some("1"); + if !is_user_box && allow_builtin_birth { + let birt_mid = resolve_slot_by_type_name(&class, "birth"); + self.emit_box_or_plugin_call( + None, + dst, + "birth".to_string(), + birt_mid, + arg_values, + EffectMask::READ.add(Effect::ReadHeap), + )?; + } + } + } + + Ok(dst) + } + + /// Check if the current basic block is terminated + pub(super) fn is_current_block_terminated(&self) -> bool { + if let (Some(block_id), Some(ref function)) = + (self.current_block, &self.scope_ctx.current_function) + { + if let Some(block) = function.get_block(block_id) { + return block.is_terminated(); + } + } + false + } +} diff --git a/src/mir/builder/builder_debug.rs b/src/mir/builder/builder_debug.rs new file mode 100644 index 00000000..7a1cd899 --- /dev/null +++ b/src/mir/builder/builder_debug.rs @@ -0,0 +1,47 @@ +use super::MirBuilder; + +impl MirBuilder { + // ---------------------- + // Debug scope helpers (region_id for DebugHub events) + // ---------------------- + #[inline] + pub(crate) fn debug_next_join_id(&mut self) -> u32 { + // Phase 136 Step 2/7 + Phase 2-2: Use core_ctx as SSOT (no sync needed) + self.core_ctx.next_debug_join() + } + + #[inline] + pub(crate) fn debug_push_region>(&mut self, region: S) { + // Phase 2-4: Use scope_ctx only (legacy field removed) + let region = region.into(); + self.scope_ctx.debug_push_region(region); + } + + #[inline] + pub(crate) fn debug_pop_region(&mut self) { + // Phase 2-4: Use scope_ctx only (legacy field removed) + self.scope_ctx.debug_pop_region(); + } + + #[inline] + #[allow(deprecated)] + pub(crate) fn debug_current_region_id(&self) -> Option { + // Phase 136 Step 3/7: Read from scope_ctx (SSOT) + self.scope_ctx.debug_current_region_id() + } + + // ---------------------- + // Compile trace helpers (dev only; env-gated) + // ---------------------- + #[inline] + pub(super) fn compile_trace_enabled() -> bool { + std::env::var("NYASH_MIR_COMPILE_TRACE").ok().as_deref() == Some("1") + } + + #[inline] + pub(super) fn trace_compile>(&self, msg: S) { + if Self::compile_trace_enabled() { + eprintln!("[mir-compile] {}", msg.as_ref()); + } + } +} diff --git a/src/mir/builder/builder_emit.rs b/src/mir/builder/builder_emit.rs new file mode 100644 index 00000000..3f39db7a --- /dev/null +++ b/src/mir/builder/builder_emit.rs @@ -0,0 +1,256 @@ +use super::{origin, observe, utils}; +use super::{BasicBlockId, MirBuilder, MirInstruction, ValueId}; + +impl MirBuilder { + /// Emit an instruction to the current basic block + pub(in crate::mir) fn emit_instruction(&mut self, instruction: MirInstruction) -> Result<(), String> { + let block_id = self.current_block.ok_or("No current basic block")?; + + // Make instruction mutable for potential receiver materialization + let mut instruction = instruction; + + // Precompute debug metadata to avoid borrow conflicts later + let _dbg_fn_name = self + .scope_ctx + .current_function + .as_ref() + .map(|f| f.signature.name.clone()); + let _dbg_region_id = self.debug_current_region_id(); + // P0: PHI の軽量補強と観測は、関数ブロック取得前に実施して借用競合を避ける + if let MirInstruction::Phi { dst, inputs, .. } = &instruction { + origin::phi::propagate_phi_meta(self, *dst, inputs); + observe::ssa::emit_phi(self, *dst, inputs); + } + + // CRITICAL: Final receiver materialization for MethodCall + // This ensures the receiver has an in-block definition in the same block as the Call. + // Must happen BEFORE function mutable borrow to avoid borrowck conflicts. + if let MirInstruction::Call { + callee: Some(callee), + dst, + args, + effects, + .. + } = &instruction + { + use crate::mir::definitions::call_unified::Callee; + if let Callee::Method { + box_name, + method, + receiver: Some(r), + certainty, + box_kind, + } = callee.clone() + { + // LocalSSA: ensure receiver has a Copy in current_block + let r_local = crate::mir::builder::ssa::local::recv(self, r); + + // Update instruction with materialized receiver + let new_callee = Callee::Method { + box_name: box_name.clone(), + method: method.clone(), + receiver: Some(r_local), + certainty, + box_kind, + }; + instruction = MirInstruction::Call { + dst: *dst, + func: crate::mir::ValueId::INVALID, // Legacy dummy (not a real SSA use) + callee: Some(new_callee), + args: args.clone(), + effects: *effects, + }; + } + } + + if let Some(ref mut function) = self.scope_ctx.current_function { + // Pre-capture branch/jump targets for predecessor update after we finish + // mutably borrowing the current block. + let (then_t, else_t, jump_t) = match &instruction { + MirInstruction::Branch { + then_bb, else_bb, .. + } => (Some(*then_bb), Some(*else_bb), None), + MirInstruction::Jump { target, .. } => (None, None, Some(*target)), + _ => (None, None, None), + }; + + // Extract function name before mutable borrow to avoid borrowck error + let current_fn_name = function.signature.name.clone(); + + if let Some(block) = function.get_block_mut(block_id) { + // CRITICAL: Copy専用トレース(LocalSSA調査用) + if let MirInstruction::Copy { dst, src } = &instruction { + if std::env::var("NYASH_LOCAL_SSA_TRACE").ok().as_deref() == Some("1") { + eprintln!( + "[emit-inst] fn={} bb={:?} COPY %{} <- %{}", + current_fn_name, + self.current_block.map(|b| b.0).unwrap_or(0), + dst.0, + src.0 + ); + } + } + + // Invariant: Call must always carry a Callee (unified path). + if let MirInstruction::Call { callee, .. } = &instruction { + if callee.is_none() { + return Err("builder invariant violated: MirInstruction::Call.callee must be Some (unified call)".into()); + } else if std::env::var("NYASH_LOCAL_SSA_TRACE").ok().as_deref() == Some("1") { + use crate::mir::definitions::call_unified::Callee; + if let Some(Callee::Method { + box_name, + method, + receiver: Some(r), + .. + }) = callee + { + eprintln!( + "[emit-inst] fn={} bb={:?} Call {}.{} recv=%{}", + current_fn_name, + self.current_block.map(|b| b.0).unwrap_or(0), + box_name, + method, + r.0 + ); + } + } else if std::env::var("NYASH_BUILDER_TRACE_RECV").ok().as_deref() == Some("1") + { + use crate::mir::definitions::call_unified::Callee; + if let Some(Callee::Method { + box_name, + method, + receiver: Some(r), + .. + }) = callee + { + let names: Vec = self + .variable_ctx + .variable_map + .iter() + .filter(|(_, &vid)| vid == *r) + .map(|(k, _)| k.clone()) + .collect(); + eprintln!( + "[builder/recv-trace] fn={} bb={:?} method={}.{} recv=%{} aliases={:?}", + current_fn_name, + self.current_block, + box_name, + method, + r.0, + names + ); + } + } + } + if utils::builder_debug_enabled() { + eprintln!( + "[BUILDER] emit @bb{} -> {}", + block_id, + match &instruction { + MirInstruction::TypeOp { dst, op, value, ty } => + format!("typeop {:?} {} {:?} -> {}", op, value, ty, dst), + MirInstruction::Print { value, .. } => format!("print {}", value), + MirInstruction::BoxCall { + box_val, + method, + method_id, + args, + dst, + .. + } => { + if let Some(mid) = method_id { + format!( + "boxcall {}.{}[#{}]({:?}) -> {:?}", + box_val, method, mid, args, dst + ) + } else { + format!( + "boxcall {}.{}({:?}) -> {:?}", + box_val, method, args, dst + ) + } + } + MirInstruction::Call { + func, args, dst, .. + } => format!("call {}({:?}) -> {:?}", func, args, dst), + MirInstruction::NewBox { + dst, + box_type, + args, + } => format!("new {}({:?}) -> {}", box_type, args, dst), + MirInstruction::Const { dst, value } => + format!("const {:?} -> {}", value, dst), + MirInstruction::Branch { + condition, + then_bb, + else_bb, + .. + } => format!("br {}, {}, {}", condition, then_bb, else_bb), + MirInstruction::Jump { target, .. } => format!("br {}", target), + _ => format!("{:?}", instruction), + } + ); + } + // Phase 136 Step 6/7: Use metadata_ctx for span + block.add_instruction_with_span( + instruction.clone(), + self.metadata_ctx.current_span(), + ); + // Drop the mutable borrow of `block` before updating other blocks + } + // Update predecessor sets for branch/jump immediately so that + // debug_verify_phi_inputs can observe a consistent CFG without + // requiring a full function.update_cfg() pass. + if let Some(t) = then_t { + if let Some(succ) = function.get_block_mut(t) { + succ.add_predecessor(block_id); + } + } + if let Some(t) = else_t { + if let Some(succ) = function.get_block_mut(t) { + succ.add_predecessor(block_id); + } + } + if let Some(t) = jump_t { + if let Some(succ) = function.get_block_mut(t) { + succ.add_predecessor(block_id); + } + } + Ok(()) + } else { + Err(format!("Basic block {} does not exist", block_id)) + } + } + + /// Update an existing PHI instruction's inputs (for loop sealing) + /// Used by LoopFormBuilder to complete incomplete PHI nodes + #[allow(dead_code)] + pub(super) fn update_phi_instruction( + &mut self, + block: BasicBlockId, + phi_id: ValueId, + new_inputs: Vec<(BasicBlockId, ValueId)>, + ) -> Result<(), String> { + if let Some(ref mut function) = self.scope_ctx.current_function { + if let Some(block_data) = function.get_block_mut(block) { + // Find PHI instruction with matching dst + for inst in &mut block_data.instructions { + if let MirInstruction::Phi { dst, inputs, .. } = inst { + if *dst == phi_id { + *inputs = new_inputs; + return Ok(()); + } + } + } + Err(format!( + "PHI instruction {} not found in block {}", + phi_id, block + )) + } else { + Err(format!("Block {} not found", block)) + } + } else { + Err("No current function".to_string()) + } + } +} diff --git a/src/mir/builder/builder_init.rs b/src/mir/builder/builder_init.rs new file mode 100644 index 00000000..4025927a --- /dev/null +++ b/src/mir/builder/builder_init.rs @@ -0,0 +1,105 @@ +use super::{ + binding_context, compilation_context, core_context, metadata_context, scope_context, + type_context, variable_context, MirBuilder, +}; +use super::plugin_sigs; +use crate::mir::BindingId; +use std::collections::HashMap; + +impl MirBuilder { + /// Create a new MIR builder + pub fn new() -> Self { + let plugin_method_sigs = plugin_sigs::load_plugin_method_sigs(); + let core_ctx = core_context::CoreContext::new(); + + // Phase 136 Step 7/7: Compilation context (new SSOT) + let comp_ctx = + compilation_context::CompilationContext::with_plugin_sigs(plugin_method_sigs.clone()); + + // フェーズM: no_phi_mode初期化削除 + #[allow(deprecated)] + Self { + current_module: None, + current_block: None, + + // Phase 136 Step 2/7: Core context (new SSOT) + core_ctx, + + type_ctx: type_context::TypeContext::new(), // Phase 136: Type context + scope_ctx: scope_context::ScopeContext::new(), // Phase 136 Step 3/7: Scope context + binding_ctx: binding_context::BindingContext::new(), // Phase 136 Step 4/7: Binding context + variable_ctx: variable_context::VariableContext::new(), // Phase 136 Step 5/7: Variable context + metadata_ctx: metadata_context::MetadataContext::new(), // Phase 136 Step 6/7: Metadata context + comp_ctx, // Phase 136 Step 7/7: Compilation context + pending_phis: Vec::new(), + + // Phase 2-5: binding_map initialization removed + + // フェーズM: no_phi_modeフィールド削除 + return_defer_active: false, + return_defer_slot: None, + return_defer_target: None, + return_deferred_emitted: false, + in_cleanup_block: false, + cleanup_allow_return: false, + cleanup_allow_throw: false, + suppress_pin_entry_copy_next: false, + + local_ssa_map: HashMap::new(), + schedule_mat_map: HashMap::new(), + pin_slot_names: HashMap::new(), + + in_unified_boxcall_fallback: false, + recursion_depth: 0, + root_is_app_mode: None, + static_box_singletons: HashMap::new(), // Phase 21.7: methodization support + repl_mode: false, // Phase 288 P2: REPL mode (default: file mode) + } + } + + // Phase 2-5: BindingContext sync helpers removed - binding_ctx is now SSOT + // Phase 2-6: VariableContext sync helpers removed - variable_ctx is now SSOT + + /// Push/pop helpers for If merge context (best-effort; optional usage) + pub(super) fn push_if_merge(&mut self, bb: super::BasicBlockId) { + // Phase 2-4: Use scope_ctx only (legacy field removed) + self.scope_ctx.push_if_merge(bb); + } + pub(super) fn pop_if_merge(&mut self) { + // Phase 2-4: Use scope_ctx only (legacy field removed) + let _ = self.scope_ctx.pop_if_merge(); + } + + /// Suppress entry pin copy for the next start_new_block (used for merge blocks). + pub(super) fn suppress_next_entry_pin_copy(&mut self) { + self.suppress_pin_entry_copy_next = true; + } + + // ---- Phase 74: BindingId allocation ---- + /// Allocate a new BindingId (parallel to ValueId allocation) + /// + /// ## Parallel ValueId/BindingId Allocation + /// + /// BindingId allocation is completely independent from ValueId allocation: + /// - `next_value_id()` increments `value_gen` counter + /// - `allocate_binding_id()` increments `next_binding_id` counter + /// + /// This parallelism enables: + /// 1. **Stable binding identity** across SSA transformations + /// 2. **Independent shadowing tracking** separate from SSA renaming + /// 3. **Future ScopeManager migration** (Phase 75+) without breaking SSA + /// + /// Example: + /// ```ignore + /// // local x = 1; <- allocate_binding_id() -> BindingId(0) + /// // next_value_id() -> ValueId(10) + /// // { + /// // local x = 2; <- allocate_binding_id() -> BindingId(1) + /// // next_value_id() -> ValueId(20) + /// // } + /// ``` + pub fn allocate_binding_id(&mut self) -> BindingId { + // Phase 136 Step 2/7 + Phase 2-2: Use core_ctx as SSOT (no sync needed) + self.core_ctx.next_binding() + } +} diff --git a/src/mir/builder/builder_metadata.rs b/src/mir/builder/builder_metadata.rs new file mode 100644 index 00000000..28375d6b --- /dev/null +++ b/src/mir/builder/builder_metadata.rs @@ -0,0 +1,49 @@ +use super::{BasicBlockId, FunctionSignature, MirBuilder, MirFunction}; + +impl MirBuilder { + // ---- Hint helpers (no-op by default) ---- + // Phase 136 Step 6/7: Delegate to metadata_ctx with legacy sync + #[inline] + pub(crate) fn hint_scope_enter(&mut self, id: u32) { + self.metadata_ctx.hint_scope_enter(id); + } + #[inline] + pub(crate) fn hint_scope_leave(&mut self, id: u32) { + self.metadata_ctx.hint_scope_leave(id); + } + #[inline] + pub(crate) fn hint_join_result>(&mut self, var: S) { + self.metadata_ctx.hint_join_result(var); + } + + /// Hint for downstream metadata: set the logical source file name/path for the next build. + /// Phase 136 Step 6/7: Delegate to metadata_ctx + pub fn set_source_file_hint>(&mut self, source: S) { + self.metadata_ctx.set_source_file(source); + } + + /// Clear the source file hint (used when reusing the builder across modules). + /// Phase 136 Step 6/7: Delegate to metadata_ctx + pub fn clear_source_file_hint(&mut self) { + self.metadata_ctx.clear_source_file(); + } + + /// Resolve current source file hint (builder field or env fallback). + /// Phase 136 Step 6/7: Delegate to metadata_ctx + pub(super) fn current_source_file(&self) -> Option { + self.metadata_ctx + .current_source_file() + .or_else(|| std::env::var("NYASH_SOURCE_FILE_HINT").ok()) + } + + /// Create a new MirFunction with source metadata applied. + pub(super) fn new_function_with_metadata( + &self, + signature: FunctionSignature, + entry_block: BasicBlockId, + ) -> MirFunction { + let mut f = MirFunction::new(signature, entry_block); + f.metadata.source_file = self.current_source_file(); + f + } +} diff --git a/src/mir/builder/builder_method_index.rs b/src/mir/builder/builder_method_index.rs new file mode 100644 index 00000000..51a5958e --- /dev/null +++ b/src/mir/builder/builder_method_index.rs @@ -0,0 +1,58 @@ +use super::MirBuilder; + +impl MirBuilder { + // ---------------------- + // Method tail index (performance helper) + // ---------------------- + fn rebuild_method_tail_index(&mut self) { + self.comp_ctx.method_tail_index.clear(); + if let Some(ref module) = self.current_module { + for name in module.functions.keys() { + if let (Some(dot), Some(slash)) = (name.rfind('.'), name.rfind('/')) { + if slash > dot { + let tail = &name[dot..]; + self.comp_ctx + .method_tail_index + .entry(tail.to_string()) + .or_insert_with(Vec::new) + .push(name.clone()); + } + } + } + self.comp_ctx.method_tail_index_source_len = module.functions.len(); + } else { + self.comp_ctx.method_tail_index_source_len = 0; + } + } + + fn ensure_method_tail_index(&mut self) { + let need_rebuild = match self.current_module { + Some(ref refmod) => { + self.comp_ctx.method_tail_index_source_len != refmod.functions.len() + } + None => self.comp_ctx.method_tail_index_source_len != 0, + }; + if need_rebuild { + self.rebuild_method_tail_index(); + } + } + + pub(super) fn method_candidates(&mut self, method: &str, arity: usize) -> Vec { + self.ensure_method_tail_index(); + let tail = format!(".{}{}", method, format!("/{}", arity)); + self.comp_ctx + .method_tail_index + .get(&tail) + .cloned() + .unwrap_or_default() + } + + pub(super) fn method_candidates_tail>(&mut self, tail: S) -> Vec { + self.ensure_method_tail_index(); + self.comp_ctx + .method_tail_index + .get(tail.as_ref()) + .cloned() + .unwrap_or_default() + } +} diff --git a/src/mir/builder/builder_value_kind.rs b/src/mir/builder/builder_value_kind.rs new file mode 100644 index 00000000..be373c49 --- /dev/null +++ b/src/mir/builder/builder_value_kind.rs @@ -0,0 +1,47 @@ +use super::{MirBuilder, ValueId}; +use crate::mir::{MirValueKind, TypedValueId}; + +impl MirBuilder { + // ============================================================================ + // Phase 26-A: ValueId型安全化メソッド + // ============================================================================ + + /// 型付きValueIdを発行(新API) + /// Phase 136 P0: Use SSOT allocator (next_value_id) to respect function context + pub fn new_typed_value(&mut self, kind: MirValueKind) -> TypedValueId { + let id = self.next_value_id(); + self.type_ctx.value_kinds.insert(id, kind); + TypedValueId::new(id, kind) + } + + /// 既存ValueIdの型情報を取得 + pub fn get_value_kind(&self, id: ValueId) -> Option { + self.type_ctx.value_kinds.get(&id).copied() + } + + /// 既存ValueIdに型情報を後付け(レガシー互換用) + pub fn register_value_kind(&mut self, id: ValueId, kind: MirValueKind) { + self.type_ctx.value_kinds.insert(id, kind); + } + + /// 型安全なパラメータ判定(ValueIdベース) - GUARD Bug Prevention + pub fn is_value_parameter(&self, id: ValueId) -> bool { + self.get_value_kind(id) + .map(|kind| kind.is_parameter()) + .unwrap_or(false) + } + + /// 型安全なローカル変数判定(ValueIdベース) + pub fn is_value_local(&self, id: ValueId) -> bool { + self.get_value_kind(id) + .map(|kind| kind.is_local()) + .unwrap_or(false) + } + + /// 型安全なLoopCarrier判定(ValueIdベース) + pub fn is_value_loop_carrier(&self, id: ValueId) -> bool { + self.get_value_kind(id) + .map(|kind| kind.is_loop_carrier()) + .unwrap_or(false) + } +} diff --git a/src/mir/join_ir/lowering/carrier_info.rs b/src/mir/join_ir/lowering/carrier_info.rs deleted file mode 100644 index c6f2c8fb..00000000 --- a/src/mir/join_ir/lowering/carrier_info.rs +++ /dev/null @@ -1,1180 +0,0 @@ -//! Carrier variable metadata for JoinIR loop lowering -//! -//! This module defines metadata structures for tracking carrier variables -//! in loop lowering. This enables dynamic generation of exit bindings -//! without hardcoded variable names or ValueIds. -//! -//! Phase 193-2: Enhanced builder methods for flexible construction -//! -//! # Phase 183-2: Primary CarrierInfo Construction -//! -//! This module is the single source of truth for CarrierInfo initialization. -//! Both MIR and JoinIR contexts use `CarrierInfo::from_variable_map()` as the -//! primary construction method. -//! -//! - MIR context: `common_init.rs` delegates to this module -//! - JoinIR context: Uses `from_variable_map()` directly -//! -//! # Phase 76: BindingId-Based Promotion Tracking -//! -//! Replaces name-based promotion hacks (`"digit_pos"` → `"is_digit_pos"`) with -//! type-safe BindingId mapping. This eliminates fragile string matching while -//! maintaining backward compatibility through dual-path lookup. - -use crate::mir::ValueId; -use std::collections::BTreeMap; // Phase 222.5-D: HashMap → BTreeMap for determinism -use std::collections::BTreeSet; - -#[cfg(feature = "normalized_dev")] -use crate::mir::BindingId; // Phase 76+78: BindingId for promoted carriers - - -/// Phase 227: CarrierRole - Distinguishes loop state carriers from condition-only carriers -/// -/// When LoopBodyLocal variables are promoted to carriers, we need to know whether -/// they carry loop state (need exit PHI) or are only used in conditions (no exit PHI). -/// -/// # Example -/// -/// ```ignore -/// // LoopState carrier: sum needs exit PHI (value persists after loop) -/// loop(i < n) { -/// sum = sum + i; // sum updated in loop body -/// } -/// print(sum); // sum used after loop -/// -/// // ConditionOnly carrier: is_digit_pos does NOT need exit PHI -/// loop(p < s.length()) { -/// local digit_pos = digits.indexOf(s.substring(p, p+1)); -/// if digit_pos < 0 { break; } // Only used in condition -/// num_str = num_str + ch; -/// p = p + 1; -/// } -/// // digit_pos not used after loop -/// ``` -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CarrierRole { - /// Value needed after loop (sum, result, count, p, num_str) - /// - Participates in header PHI (loop iteration) - /// - Participates in exit PHI (final value after loop) - LoopState, - - /// Only used for loop condition (is_digit_pos, is_whitespace) - /// - Participates in header PHI (loop iteration) - /// - Does NOT participate in exit PHI (not needed after loop) - ConditionOnly, -} - -/// Phase 228: Initialization policy for carrier variables -/// -/// When carriers participate in header PHI, they need an initial value. -/// Most carriers use their host_id value (FromHost), but promoted LoopBodyLocal -/// carriers need explicit bool initialization (BoolConst). -/// -/// # Example -/// -/// ```ignore -/// // Regular carrier (sum): Use host_id value -/// CarrierVar { name: "sum", host_id: ValueId(10), init: FromHost, .. } -/// -/// // ConditionOnly carrier (is_digit_pos): Initialize with false -/// CarrierVar { name: "is_digit_pos", host_id: ValueId(15), init: BoolConst(false), .. } -/// -/// // Loop-local derived carrier (digit_value): Initialize with local zero (no host slot) -/// CarrierVar { name: "digit_value", host_id: ValueId(0), init: LoopLocalZero, .. } -/// ``` -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CarrierInit { - /// No explicit initialization (use host_id value) - FromHost, - /// Initialize with bool constant (for ConditionOnly carriers) - BoolConst(bool), - /// Initialize with loop-local zero (no host slot; used for derived carriers like digit_value) - LoopLocalZero, -} - -/// Phase 131 P1.5: Exit reconnection mode for JoinInlineBoundary -/// -/// Controls whether exit values are reconnected via PHI generation or direct assignment. -/// This separates Normalized shadow (DirectValue) from existing loop patterns (Phi). -/// -/// # Design Principle (SSOT) -/// -/// - **DirectValue**: Normalized loops prohibit PHI generation. Exit values are directly -/// wired to variable_map using remapped_exit_values from MergeResult. -/// - **Phi**: Existing loop patterns use PHI generation for exit value merging. -/// -/// # Example -/// -/// ```ignore -/// // Normalized shadow: loop(true) { x = 1; break } → DirectValue -/// JoinInlineBoundary { exit_reconnect_mode: ExitReconnectMode::DirectValue, .. } -/// -/// // Traditional loop: loop(i < 3) { sum = sum + i } → Phi -/// JoinInlineBoundary { exit_reconnect_mode: ExitReconnectMode::Phi, .. } -/// ``` -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ExitReconnectMode { - /// Existing loop patterns: PHI generation for exit value merging - /// - /// Used by Pattern 1-4 loops with multiple exit paths. - /// Exit values are collected into exit PHIs. - Phi, - - /// Normalized shadow: Direct variable_map update, no PHI generation - /// - /// Used by loop(true) { *; break } pattern. - /// Exit values are directly wired using MergeResult.remapped_exit_values. - DirectValue, -} - -impl Default for ExitReconnectMode { - /// Default to Phi mode for backward compatibility - fn default() -> Self { - Self::Phi - } -} - -// Phase 229: ConditionAlias removed - redundant with promoted_loopbodylocals -// The naming convention (old_name → "is_" or "is__match") -// is sufficient to resolve promoted variables dynamically. - -/// Information about a single carrier variable -#[derive(Debug, Clone)] -pub struct CarrierVar { - /// Variable name (e.g., "sum", "printed", "is_digit_pos") - pub name: String, - /// Host ValueId for this variable (MIR側) - pub host_id: ValueId, - /// Phase 177-STRUCT: JoinIR側でこのキャリアを表すValueId - /// - /// ヘッダPHIのdstや、exitで使う値を記録する。 - /// これにより、index ベースのマッチングを名前ベースに置き換えられる。 - /// - /// - `Some(vid)`: Header PHI生成後にセットされる - /// - `None`: まだPHI生成前、または該当なし - pub join_id: Option, - /// Phase 227: Role of this carrier (LoopState or ConditionOnly) - /// - /// - `LoopState`: Value needed after loop (participates in exit PHI) - /// - `ConditionOnly`: Only used for loop condition (no exit PHI) - pub role: CarrierRole, - /// Phase 228: Initialization policy for header PHI - /// - /// - `FromHost`: Use host_id value (regular carriers) - /// - `BoolConst(false)`: Initialize with false (promoted LoopBodyLocal carriers) - pub init: CarrierInit, - /// Phase 78: BindingId for this carrier (dev-only) - /// - /// For promoted carriers (e.g., is_digit_pos), this is allocated separately - /// by CarrierBindingAssigner. For source-derived carriers, this comes from - /// builder.binding_map. - /// - /// Enables type-safe lookup: BindingId → ValueId (join_id) in ConditionEnv. - /// - /// # Example - /// - /// ```ignore - /// // Source-derived carrier - /// CarrierVar { - /// name: "sum", - /// binding_id: Some(BindingId(5)), // from builder.binding_map["sum"] - /// .. - /// } - /// - /// // Promoted carrier - /// CarrierVar { - /// name: "is_digit_pos", - /// binding_id: Some(BindingId(10)), // allocated by CarrierBindingAssigner - /// .. - /// } - /// ``` - #[cfg(feature = "normalized_dev")] - pub binding_id: Option, -} - -impl CarrierVar { - /// Create a new CarrierVar with default LoopState role - /// - /// This is the primary constructor for CarrierVar. Use this instead of - /// struct literal syntax to ensure role defaults to LoopState. - pub fn new(name: String, host_id: ValueId) -> Self { - Self { - name, - host_id, - join_id: None, - role: CarrierRole::LoopState, - init: CarrierInit::FromHost, // Phase 228: Default to FromHost - #[cfg(feature = "normalized_dev")] - binding_id: None, // Phase 78: No BindingId by default - } - } - - /// Create a CarrierVar with explicit role - pub fn with_role(name: String, host_id: ValueId, role: CarrierRole) -> Self { - Self { - name, - host_id, - join_id: None, - role, - init: CarrierInit::FromHost, // Phase 228: Default to FromHost - #[cfg(feature = "normalized_dev")] - binding_id: None, // Phase 78: No BindingId by default - } - } - - /// Phase 228: Create a CarrierVar with explicit role and init policy - pub fn with_role_and_init( - name: String, - host_id: ValueId, - role: CarrierRole, - init: CarrierInit, - ) -> Self { - Self { - name, - host_id, - join_id: None, - role, - init, - #[cfg(feature = "normalized_dev")] - binding_id: None, // Phase 78: No BindingId by default - } - } -} - -/// Complete carrier information for a loop -#[derive(Debug, Clone)] -pub struct CarrierInfo { - /// Loop control variable name (e.g., "i") - pub loop_var_name: String, - /// Loop control variable ValueId in host - pub loop_var_id: ValueId, - /// Additional carrier variables (e.g., sum, printed) - pub carriers: Vec, - /// Phase 171-C-5: Trim pattern helper (if this CarrierInfo was created from Trim promotion) - pub trim_helper: Option, - /// Phase 224: Promoted LoopBodyLocal variables (e.g., "digit_pos" promoted to "is_digit_pos") - /// - /// These variables were originally LoopBodyLocal but have been promoted to carriers - /// during condition promotion (e.g., DigitPosPromoter). The lowerer should skip - /// LoopBodyLocal checks for these variables. - /// - /// Phase 229: Naming convention for promoted carriers: - /// - DigitPos pattern: "var" → "is_var" (e.g., "digit_pos" → "is_digit_pos") - /// - Trim pattern: "var" → "is_var_match" (e.g., "ch" → "is_ch_match") - /// - /// Condition variable resolution dynamically infers the carrier name from this list. - pub promoted_loopbodylocals: Vec, - - /// Phase 76: Type-safe promotion tracking (dev-only) - /// - /// Maps original BindingId to promoted BindingId, eliminating name-based hacks. - /// - /// # Example - /// - /// DigitPos promotion: - /// - Original: BindingId(5) for `"digit_pos"` - /// - Promoted: BindingId(10) for `"is_digit_pos"` - /// - Map entry: `promoted_bindings[BindingId(5)] = BindingId(10)` - /// - /// This enables type-safe resolution: - /// ```ignore - /// if let Some(promoted_bid) = carrier_info.promoted_bindings.get(&original_bid) { - /// // Lookup promoted carrier by BindingId (no string matching!) - /// } - /// ``` - /// - /// # Migration Strategy (Phase 76) - /// - /// - **Dual Path**: BindingId lookup (NEW) OR name-based fallback (LEGACY) - /// - **Populated by**: DigitPosPromoter, TrimLoopHelper (Phase 76) - /// - **Used by**: ConditionEnv::resolve_var_with_binding (Phase 75+) - /// - **Phase 77**: Remove name-based fallback after full migration - /// - /// # Design Notes - /// - /// **Q: Why BindingId map instead of name map?** - /// - **Type Safety**: Compiler-checked binding identity (no typos) - /// - **Shadowing-Aware**: BindingId distinguishes inner/outer scope vars - /// - **No Name Collisions**: BindingId is unique even if names shadow - /// - /// **Q: Why not remove `promoted_loopbodylocals` immediately?** - /// - **Legacy Compatibility**: Existing code uses name-based lookup - /// - **Gradual Migration**: Phase 76 adds BindingId, Phase 77 removes name-based - /// - **Fail-Safe**: Dual path ensures no regressions during transition - #[cfg(feature = "normalized_dev")] - pub promoted_bindings: BTreeMap, -} - -impl CarrierInfo { - /// Phase 193-2: Create CarrierInfo from a variable_map - /// - /// Automatically extracts all non-loop-control variables from the host's - /// variable_map. This eliminates manual carrier listing for simple cases. - /// - /// # Arguments - /// - /// * `loop_var_name` - Name of the loop control variable (e.g., "i") - /// * `variable_map` - Host function's variable_map (String → ValueId) - /// - /// # Returns - /// - /// CarrierInfo with loop_var and all other variables as carriers - /// - /// # Example - /// - /// ```ignore - /// let carrier_info = CarrierInfo::from_variable_map( - /// "i".to_string(), - /// &variable_map // {"i": ValueId(5), "sum": ValueId(10), "count": ValueId(11)} - /// )?; - /// // Result: CarrierInfo with loop_var="i", carriers=[sum, count] - /// ``` - pub fn from_variable_map( - loop_var_name: String, - variable_map: &BTreeMap, // Phase 222.5-D: HashMap → BTreeMap for determinism - ) -> Result { - // Find loop variable - let loop_var_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| { - format!( - "Loop variable '{}' not found in variable_map", - loop_var_name - ) - })?; - - // Collect all non-loop-var variables as carriers - let mut carriers: Vec = variable_map - .iter() - .filter(|(name, _)| *name != &loop_var_name) - .map(|(name, &id)| CarrierVar { - name: name.clone(), - host_id: id, - join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation - role: CarrierRole::LoopState, // Phase 227: Default to LoopState - init: CarrierInit::FromHost, // Phase 228: Default to FromHost - #[cfg(feature = "normalized_dev")] - binding_id: None, // Phase 78: Set by CarrierBindingAssigner - }) - .collect(); - - // Sort for determinism - carriers.sort_by(|a, b| a.name.cmp(&b.name)); - - Ok(CarrierInfo { - loop_var_name, - loop_var_id, - carriers, - trim_helper: None, // Phase 171-C-5: No Trim pattern by default - promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default - #[cfg(feature = "normalized_dev")] - promoted_bindings: BTreeMap::new(), // Phase 76: No promoted bindings by default - }) - } - - /// Phase 193-2: Create CarrierInfo with explicit carrier list - /// - /// Useful when you have specific carriers in mind and want explicit control - /// over which variables are treated as carriers. - /// - /// # Arguments - /// - /// * `loop_var_name` - Name of the loop control variable - /// * `loop_var_id` - ValueId of the loop variable - /// * `carrier_names` - Names of carrier variables (will look up in variable_map) - /// * `variable_map` - Host function's variable_map for lookups - /// - /// # Returns - /// - /// CarrierInfo with only the specified carriers - /// - /// # Example - /// - /// ```ignore - /// let carrier_info = CarrierInfo::with_explicit_carriers( - /// "i".to_string(), - /// ValueId(5), - /// vec!["sum".to_string(), "count".to_string()], - /// &variable_map - /// )?; - /// ``` - pub fn with_explicit_carriers( - loop_var_name: String, - loop_var_id: ValueId, - carrier_names: Vec, - variable_map: &BTreeMap, // Phase 222.5-D: HashMap → BTreeMap for determinism - ) -> Result { - let mut carriers = Vec::new(); - - for name in carrier_names { - let host_id = variable_map - .get(&name) - .copied() - .ok_or_else(|| format!("Carrier variable '{}' not found in variable_map", name))?; - - carriers.push(CarrierVar { - name, - host_id, - join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation - role: CarrierRole::LoopState, // Phase 227: Default to LoopState - init: CarrierInit::FromHost, // Phase 228: Default to FromHost - #[cfg(feature = "normalized_dev")] - binding_id: None, // Phase 78: Set by CarrierBindingAssigner - }); - } - - // Sort for determinism - carriers.sort_by(|a, b| a.name.cmp(&b.name)); - - Ok(CarrierInfo { - loop_var_name, - loop_var_id, - carriers, - trim_helper: None, // Phase 171-C-5: No Trim pattern by default - promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default - #[cfg(feature = "normalized_dev")] - promoted_bindings: BTreeMap::new(), // Phase 76: No promoted bindings by default - }) - } - - /// Phase 193-2: Create CarrierInfo with manual CarrierVar list - /// - /// Most explicit construction method - you provide everything directly. - /// Useful when you already have CarrierVar structs built elsewhere. - /// - /// # Arguments - /// - /// * `loop_var_name` - Name of the loop control variable - /// * `loop_var_id` - ValueId of the loop variable - /// * `carriers` - Vec of already-constructed CarrierVar structs - pub fn with_carriers( - loop_var_name: String, - loop_var_id: ValueId, - mut carriers: Vec, - ) -> Self { - // Sort for determinism - carriers.sort_by(|a, b| a.name.cmp(&b.name)); - - Self { - loop_var_name, - loop_var_id, - carriers, - trim_helper: None, // Phase 171-C-5: No Trim pattern by default - promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default - #[cfg(feature = "normalized_dev")] - promoted_bindings: BTreeMap::new(), // Phase 76: No promoted bindings by default - } - } - - /// Phase 193-2: Get carrier count - /// - /// Convenience method for checking how many carriers this info has. - pub fn carrier_count(&self) -> usize { - self.carriers.len() - } - - /// Phase 193-2: Check if this has multiple carriers - /// - /// Useful for pattern matching: "is this a multi-carrier loop?" - pub fn is_multi_carrier(&self) -> bool { - self.carriers.len() > 1 - } - - /// Phase 193-2: Find a carrier by name - /// - /// Lookup a specific carrier variable by name. - pub fn find_carrier(&self, name: &str) -> Option<&CarrierVar> { - self.carriers.iter().find(|c| c.name == name) - } - - /// Phase 171-C-4: Merge carriers from another CarrierInfo - /// - /// Deduplicates by carrier name. If a carrier with the same name already exists, - /// it will not be added again. - /// - /// # Arguments - /// - /// * `other` - Another CarrierInfo to merge from - /// - /// # Example - /// - /// ```ignore - /// let mut carrier_info = CarrierInfo::from_variable_map("i", &variable_map)?; - /// let promoted_carrier = TrimPatternInfo::to_carrier_info(); - /// carrier_info.merge_from(&promoted_carrier); - /// ``` - pub fn merge_from(&mut self, other: &CarrierInfo) { - for carrier in &other.carriers { - if !self.carriers.iter().any(|c| c.name == carrier.name) { - self.carriers.push(carrier.clone()); - } - } - // Maintain sorted order for determinism - self.carriers.sort_by(|a, b| a.name.cmp(&b.name)); - - // Phase 171-C-5: Also merge trim_helper if present - if other.trim_helper.is_some() { - self.trim_helper = other.trim_helper.clone(); - } - - // Phase 224: Merge promoted_loopbodylocals (deduplicate) - for promoted_var in &other.promoted_loopbodylocals { - if !self.promoted_loopbodylocals.contains(promoted_var) { - self.promoted_loopbodylocals.push(promoted_var.clone()); - } - } - - // Phase 76: Merge promoted_bindings (dev-only) - #[cfg(feature = "normalized_dev")] - { - for (original, promoted) in &other.promoted_bindings { - self.promoted_bindings.insert(*original, *promoted); - } - } - } - - /// Phase 171-C-5: Get Trim pattern helper - /// - /// Returns the TrimLoopHelper if this CarrierInfo was created from Trim promotion. - /// - /// # Returns - /// - /// * `Some(&TrimLoopHelper)` - If this CarrierInfo contains Trim pattern information - /// * `None` - If this is a regular CarrierInfo (not from Trim promotion) - /// - /// # Example - /// - /// ```ignore - /// if let Some(helper) = carrier_info.trim_helper() { - /// eprintln!("Trim pattern detected: {}", helper.carrier_name); - /// eprintln!("Whitespace chars: {:?}", helper.whitespace_chars); - /// } - /// ``` - pub fn trim_helper( - &self, - ) -> Option<&crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper> { - self.trim_helper.as_ref() - } - - /// Phase 229/231: Resolve promoted LoopBodyLocal name to carrier JoinIR ValueId - /// - /// This helper centralizes the naming convention for promoted variables so that - /// ScopeManager 実装がそれぞれ命名規約を再実装しなくて済むようにするよ。 - /// - /// 命名規約: - /// - DigitPos パターン: `"var"` → `"is_var"`(例: "digit_pos" → "is_digit_pos") - /// - Trim パターン : `"var"` → `"is_var_match"`(例: "ch" → "is_ch_match") - /// - /// # Arguments - /// - /// * `original_name` - 元の LoopBodyLocal 名(例: "digit_pos") - /// - /// # Returns - /// - /// * `Some(ValueId)` - 対応する carrier の join_id が見つかった場合 - /// * `None` - promoted_loopbodylocals に含まれない、または join_id 未設定の場合 - /// - /// # Phase 77: DEPRECATED - /// - /// This method uses fragile naming conventions ("is_*", "is_*_match") and will - /// be removed in Phase 78+ when all call sites migrate to BindingId-based lookup. - /// Use `resolve_promoted_with_binding()` for type-safe BindingId lookup. - #[deprecated( - since = "phase77", - note = "Use resolve_promoted_with_binding() for type-safe BindingId lookup" - )] - pub fn resolve_promoted_join_id(&self, original_name: &str) -> Option { - #[cfg(feature = "normalized_dev")] - eprintln!( - "[phase77/legacy/carrier_info] WARNING: Using deprecated name-based promoted lookup for '{}'", - original_name - ); - if !self - .promoted_loopbodylocals - .contains(&original_name.to_string()) - { - return None; - } - - let candidates = [ - format!("is_{}", original_name), // DigitPos pattern - format!("is_{}_match", original_name), // Trim pattern - ]; - - for carrier_name in &candidates { - // loop_var 自身が ConditionOnly carrier として扱われるケースは現状ほぼないが、 - // 将来の拡張に備えて loop_var_name も一応チェックしておく。 - if carrier_name == &self.loop_var_name { - if let Some(carrier) = self.carriers.iter().find(|c| c.name == self.loop_var_name) { - if let Some(join_id) = carrier.join_id { - return Some(join_id); - } - } - } - - if let Some(carrier) = self.carriers.iter().find(|c| c.name == *carrier_name) { - if let Some(join_id) = carrier.join_id { - return Some(join_id); - } - } - } - - None - } - - /// Phase 76: Type-safe promoted binding resolution (dev-only) - /// - /// Resolves a promoted LoopBodyLocal binding via BindingId map, eliminating - /// name-based hacks (`format!("is_{}", name)`). Falls back to legacy name-based - /// lookup for backward compatibility during Phase 76-77 migration. - /// - /// # Arguments - /// - /// * `original_binding` - Original LoopBodyLocal's BindingId (e.g., BindingId(5) for "digit_pos") - /// - /// # Returns - /// - /// * `Some(BindingId)` - Promoted carrier's BindingId (e.g., BindingId(10) for "is_digit_pos") - /// * `None` - No promotion mapping found - /// - /// # Example - /// - /// ```ignore - /// // DigitPos promotion: BindingId(5) "digit_pos" → BindingId(10) "is_digit_pos" - /// let original_bid = BindingId(5); - /// if let Some(promoted_bid) = carrier_info.resolve_promoted_with_binding(original_bid) { - /// // Lookup carrier by promoted BindingId (type-safe!) - /// let promoted_value = condition_env.get_by_binding(promoted_bid); - /// } - /// ``` - /// - /// # Migration Path (Phase 76-77) - /// - /// - **Phase 76**: BindingId map populated by promoters, dual path (BindingId OR name) - /// - **Phase 77**: Remove name-based fallback, BindingId-only lookup - /// - /// # Design Notes - /// - /// **Why not merge with `resolve_promoted_join_id()`?** - /// - Different input type: BindingId vs String - /// - Different output: BindingId vs ValueId - /// - Different usage: ScopeManager (BindingId) vs legacy lowerers (name) - /// - /// **Why BTreeMap instead of HashMap?** - /// - Deterministic iteration (Phase 222.5-D consistency) - /// - Debug-friendly sorted output - #[cfg(feature = "normalized_dev")] - pub fn resolve_promoted_with_binding(&self, original_binding: BindingId) -> Option { - self.promoted_bindings.get(&original_binding).copied() - } - - /// Phase 76: Record a promoted binding (dev-only) - /// - /// Helper method to populate the promoted_bindings map during promotion. - /// Called by wrapper functions that have access to both CarrierInfo and binding_map. - /// - /// # Arguments - /// - /// * `original_binding` - Original LoopBodyLocal's BindingId - /// * `promoted_binding` - Promoted carrier's BindingId - /// - /// # Example - /// - /// ```ignore - /// // After DigitPosPromoter creates CarrierInfo, record the binding mapping: - /// carrier_info.record_promoted_binding( - /// binding_map.get("digit_pos").copied().unwrap(), // BindingId(5) - /// binding_map.get("is_digit_pos").copied().unwrap() // BindingId(10) - /// ); - /// ``` - /// - /// # Phase 76 Note - /// - /// This method is currently UNUSED because promoters (DigitPosPromoter, TrimLoopHelper) - /// don't have access to binding_map. Actual population happens in a future phase when - /// we integrate BindingId tracking into the promotion pipeline. - #[cfg(feature = "normalized_dev")] - pub fn record_promoted_binding( - &mut self, - original_binding: BindingId, - promoted_binding: BindingId, - ) { - use super::debug_output_box::DebugOutputBox; - - // Phase 86: Use DebugOutputBox for consistent debug output - // Allow JOINIR_TEST_DEBUG override for test-specific diagnostics - let test_debug = std::env::var("JOINIR_TEST_DEBUG").is_ok(); - let debug = DebugOutputBox::new("binding_pilot/promoted_bindings"); - - if debug.is_enabled() || test_debug { - eprintln!( - "[binding_pilot/promoted_bindings] {} → {}", - original_binding, promoted_binding - ); - } - self.promoted_bindings - .insert(original_binding, promoted_binding); - } -} - -/// Exit metadata returned by lowerers -/// -/// This structure captures the mapping from JoinIR exit values to -/// carrier variable names, enabling dynamic binding generation. -#[derive(Debug, Clone)] -pub struct ExitMeta { - /// Exit value bindings: (carrier_name, join_exit_value_id) - /// - /// Example for Pattern 4: - /// ``` - /// vec![("sum".to_string(), ValueId(15))] - /// ``` - /// where ValueId(15) is the k_exit parameter in JoinIR-local space. - pub exit_values: Vec<(String, ValueId)>, -} - -/// Phase 33-14: JoinFragmentMeta - Distinguishes expr result from carrier updates -/// -/// ## Purpose -/// -/// Separates two distinct use cases for JoinIR loops: -/// -/// 1. **Expr Result Pattern** (joinir_min_loop.hako): -/// ```nyash -/// local result = loop(...) { ... } // Loop used as expression -/// return result -/// ``` -/// Here, the k_exit return value is the "expr result" that should go to exit_phi_inputs. -/// -/// 2. **Carrier Update Pattern** (trim pattern): -/// ```nyash -/// loop(...) { start = start + 1 } // Loop used for side effects -/// print(start) // Use carrier after loop -/// ``` -/// Here, there's no "expr result" - only carrier variable updates. -/// -/// ## SSA Correctness -/// -/// Previously, exit_phi_inputs mixed expr results with carrier updates, causing: -/// - PHI inputs that referenced undefined remapped values -/// - SSA-undef errors in VM execution -/// -/// With JoinFragmentMeta: -/// - `expr_result`: Only goes to exit_phi_inputs (generates PHI for expr value) -/// - `exit_meta`: Only goes to carrier_inputs (updates variable_map via carrier PHIs) -/// -/// ## Example: Pattern 2 (joinir_min_loop.hako) -/// -/// ```rust -/// JoinFragmentMeta { -/// expr_result: Some(i_exit), // k_exit returns i as expr value -/// exit_meta: ExitMeta::single("i".to_string(), i_exit), // Also a carrier -/// } -/// ``` -/// -/// ## Example: Pattern 3 (trim pattern) -/// -/// ```rust -/// JoinFragmentMeta { -/// expr_result: None, // Loop doesn't return a value -/// exit_meta: ExitMeta::multiple(vec![ -/// ("start".to_string(), start_exit), -/// ("end".to_string(), end_exit), -/// ]), -/// } -/// ``` -#[derive(Debug, Clone)] -pub struct JoinFragmentMeta { - /// Expression result ValueId from k_exit (JoinIR-local) - /// - /// - `Some(vid)`: Loop is used as expression, k_exit's return value → exit_phi_inputs - /// - `None`: Loop is used for side effects only, no PHI for expr value - pub expr_result: Option, - - /// Carrier variable exit bindings (existing ExitMeta) - /// - /// Maps carrier names to their JoinIR-local exit values. - /// These go to carrier_inputs for carrier PHI generation. - pub exit_meta: ExitMeta, - - /// Phase 132 P1: Continuation contract (SSOT) - /// Phase 256 P1.7: Changed from BTreeSet to BTreeSet - /// - /// JoinIR merge must NOT "guess" continuation functions by name. - /// Normalized shadow (and other frontends) must explicitly declare which function names - /// are continuations for the fragment, and merge must follow this contract. - /// - /// Merge may still choose to *skip* some continuation functions if and only if they - /// are structurally "skippable" (pure exit stubs). See merge/instruction_rewriter.rs. - /// - /// **Why Strings instead of JoinFuncIds**: The bridge uses JoinFunction.name as the - /// MirModule function key (e.g., "k_exit"), not "join_func_{id}". The merge code - /// looks up functions by name, so we must use actual function names here. - pub continuation_funcs: BTreeSet, -} - -impl JoinFragmentMeta { - /// Create JoinFragmentMeta for expression result pattern - /// - /// Use when the loop returns a value (like `return loop(...)`). - pub fn with_expr_result(expr_result: ValueId, exit_meta: ExitMeta) -> Self { - Self { - expr_result: Some(expr_result), - exit_meta, - continuation_funcs: BTreeSet::new(), - } - } - - /// Create JoinFragmentMeta for carrier-only pattern - /// - /// Use when the loop only updates carriers (like trim pattern). - pub fn carrier_only(exit_meta: ExitMeta) -> Self { - Self { - expr_result: None, - exit_meta, - continuation_funcs: BTreeSet::new(), - } - } - - /// Create empty JoinFragmentMeta (no expr result, no carriers) - pub fn empty() -> Self { - Self { - expr_result: None, - exit_meta: ExitMeta::empty(), - continuation_funcs: BTreeSet::new(), - } - } - - /// Check if this fragment has an expression result - pub fn has_expr_result(&self) -> bool { - self.expr_result.is_some() - } - - /// Phase 33-14: Backward compatibility - convert to ExitMeta - /// - /// During migration, some code may still expect ExitMeta. - /// This extracts just the carrier bindings. - #[deprecated(since = "33-14", note = "Use exit_meta directly for carrier access")] - pub fn to_exit_meta(&self) -> ExitMeta { - self.exit_meta.clone() - } -} - -impl ExitMeta { - /// Create new ExitMeta with no exit values - pub fn empty() -> Self { - Self { - exit_values: vec![], - } - } - - /// Create ExitMeta with a single exit value - pub fn single(carrier_name: String, join_value: ValueId) -> Self { - Self { - exit_values: vec![(carrier_name, join_value)], - } - } - - /// Create ExitMeta with multiple exit values - pub fn multiple(exit_values: Vec<(String, ValueId)>) -> Self { - Self { exit_values } - } - - /// Phase 193-2: Get the count of exit bindings - /// - /// Useful for checking if this ExitMeta has any exit values. - pub fn binding_count(&self) -> usize { - self.exit_values.len() - } - - /// Phase 193-2: Check if this has any exit values - pub fn is_empty(&self) -> bool { - self.exit_values.is_empty() - } - - /// Phase 193-2: Find a binding by carrier name - /// - /// Lookup a specific exit value by carrier name. - pub fn find_binding(&self, carrier_name: &str) -> Option { - self.exit_values - .iter() - .find(|(name, _)| name == carrier_name) - .map(|(_, value_id)| *value_id) - } - - /// Phase 193-2: Add a binding to ExitMeta - /// - /// Convenient way to build ExitMeta incrementally. - pub fn with_binding(mut self, carrier_name: String, join_value: ValueId) -> Self { - self.exit_values.push((carrier_name, join_value)); - self - } -} - -#[cfg(test)] -mod tests { - use super::*; - - // Helper: Create a CarrierVar for testing - fn test_carrier(name: &str, id: u32) -> CarrierVar { - CarrierVar { - name: name.to_string(), - host_id: ValueId(id), - join_id: None, // Phase 177-STRUCT-1 - role: CarrierRole::LoopState, // Phase 227: Default to LoopState - init: CarrierInit::FromHost, // Phase 228: Default to FromHost - #[cfg(feature = "normalized_dev")] - binding_id: None, // Phase 78: No BindingId by default - } - } - - // Helper: Create a CarrierInfo for testing - fn test_carrier_info(loop_var: &str, loop_id: u32, carriers: Vec) -> CarrierInfo { - CarrierInfo::with_carriers(loop_var.to_string(), ValueId(loop_id), carriers) - } - - #[test] - fn test_merge_from_empty() { - // Merge empty CarrierInfo should not change anything - let mut carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); - - let other = test_carrier_info("j", 20, vec![]); - - carrier_info.merge_from(&other); - - assert_eq!(carrier_info.carrier_count(), 1); - assert_eq!(carrier_info.carriers[0].name, "sum"); - } - - #[test] - fn test_merge_from_new_carrier() { - // Merge a new carrier that doesn't exist yet - let mut carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); - - let other = test_carrier_info("j", 20, vec![test_carrier("count", 15)]); - - carrier_info.merge_from(&other); - - assert_eq!(carrier_info.carrier_count(), 2); - // Should be sorted by name - assert_eq!(carrier_info.carriers[0].name, "count"); // 'c' < 's' - assert_eq!(carrier_info.carriers[1].name, "sum"); - } - - #[test] - fn test_merge_from_duplicate_carrier() { - // Merge a carrier with the same name should NOT duplicate - let mut carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); - - let other = test_carrier_info( - "j", - 20, - vec![test_carrier("sum", 999)], // Same name, different ID - ); - - carrier_info.merge_from(&other); - - // Should still have only 1 carrier (no duplication) - assert_eq!(carrier_info.carrier_count(), 1); - assert_eq!(carrier_info.carriers[0].name, "sum"); - // Original ID should be preserved - assert_eq!(carrier_info.carriers[0].host_id, ValueId(10)); - } - - #[test] - fn test_merge_from_multiple_carriers() { - // Merge multiple carriers - let mut carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); - - let other = test_carrier_info( - "j", - 20, - vec![test_carrier("count", 15), test_carrier("product", 18)], - ); - - carrier_info.merge_from(&other); - - assert_eq!(carrier_info.carrier_count(), 3); - // Should be sorted by name - assert_eq!(carrier_info.carriers[0].name, "count"); - assert_eq!(carrier_info.carriers[1].name, "product"); - assert_eq!(carrier_info.carriers[2].name, "sum"); - } - - #[test] - fn test_merge_from_preserves_determinism() { - // Test that merge maintains sorted order - let mut carrier_info = test_carrier_info( - "i", - 5, - vec![test_carrier("zebra", 30), test_carrier("alpha", 10)], - ); - - let other = test_carrier_info( - "j", - 20, - vec![test_carrier("beta", 15), test_carrier("gamma", 18)], - ); - - carrier_info.merge_from(&other); - - assert_eq!(carrier_info.carrier_count(), 4); - // Should be sorted alphabetically - assert_eq!(carrier_info.carriers[0].name, "alpha"); - assert_eq!(carrier_info.carriers[1].name, "beta"); - assert_eq!(carrier_info.carriers[2].name, "gamma"); - assert_eq!(carrier_info.carriers[3].name, "zebra"); - } - - #[test] - fn test_merge_from_with_trim_helper() { - // Test that trim_helper is merged - use crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper; - - let mut carrier_info = test_carrier_info("i", 5, vec![]); - - let mut other = test_carrier_info("j", 20, vec![]); - other.trim_helper = Some(TrimLoopHelper { - original_var: "ch".to_string(), - carrier_name: "is_whitespace".to_string(), - whitespace_chars: vec![" ".to_string(), "\t".to_string()], - }); - - carrier_info.merge_from(&other); - - // trim_helper should be copied - assert!(carrier_info.trim_helper.is_some()); - let helper = carrier_info.trim_helper.as_ref().unwrap(); - assert_eq!(helper.original_var, "ch"); - assert_eq!(helper.carrier_name, "is_whitespace"); - assert_eq!(helper.whitespace_count(), 2); - } - - #[test] - fn test_trim_helper_accessor() { - // Test the trim_helper() accessor method - use crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper; - - let mut carrier_info = test_carrier_info("i", 5, vec![]); - - // Initially None - assert!(carrier_info.trim_helper().is_none()); - - // Add trim_helper - carrier_info.trim_helper = Some(TrimLoopHelper { - original_var: "ch".to_string(), - carrier_name: "is_whitespace".to_string(), - whitespace_chars: vec![" ".to_string()], - }); - - // Now Some - assert!(carrier_info.trim_helper().is_some()); - let helper = carrier_info.trim_helper().unwrap(); - assert_eq!(helper.original_var, "ch"); - } - - // ========== Phase 76: promoted_bindings tests ========== - - #[test] - #[cfg(feature = "normalized_dev")] - fn test_promoted_bindings_record_and_resolve() { - use crate::mir::BindingId; - - let mut carrier_info = test_carrier_info("i", 5, vec![]); - - // Record a promotion: BindingId(5) → BindingId(10) - carrier_info.record_promoted_binding(BindingId(5), BindingId(10)); - - // Resolve should find the mapping - assert_eq!( - carrier_info.resolve_promoted_with_binding(BindingId(5)), - Some(BindingId(10)) - ); - - // Unknown BindingId should return None - assert_eq!( - carrier_info.resolve_promoted_with_binding(BindingId(99)), - None - ); - } - - #[test] - #[cfg(feature = "normalized_dev")] - fn test_promoted_bindings_multiple_mappings() { - use crate::mir::BindingId; - - let mut carrier_info = test_carrier_info("i", 5, vec![]); - - // Record multiple promotions (e.g., DigitPos + Trim in same loop) - carrier_info.record_promoted_binding(BindingId(5), BindingId(10)); // digit_pos → is_digit_pos - carrier_info.record_promoted_binding(BindingId(6), BindingId(11)); // ch → is_ch_match - - // Both should resolve independently - assert_eq!( - carrier_info.resolve_promoted_with_binding(BindingId(5)), - Some(BindingId(10)) - ); - assert_eq!( - carrier_info.resolve_promoted_with_binding(BindingId(6)), - Some(BindingId(11)) - ); - } - - #[test] - #[cfg(feature = "normalized_dev")] - fn test_promoted_bindings_merge() { - use crate::mir::BindingId; - - let mut carrier_info1 = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); - carrier_info1.record_promoted_binding(BindingId(1), BindingId(2)); - - let mut carrier_info2 = test_carrier_info("j", 20, vec![test_carrier("count", 15)]); - carrier_info2.record_promoted_binding(BindingId(3), BindingId(4)); - - // Merge carrier_info2 into carrier_info1 - carrier_info1.merge_from(&carrier_info2); - - // Both promoted_bindings should be present - assert_eq!( - carrier_info1.resolve_promoted_with_binding(BindingId(1)), - Some(BindingId(2)) - ); - assert_eq!( - carrier_info1.resolve_promoted_with_binding(BindingId(3)), - Some(BindingId(4)) - ); - } - - #[test] - #[cfg(feature = "normalized_dev")] - fn test_promoted_bindings_default_empty() { - use crate::mir::BindingId; - - // Newly created CarrierInfo should have empty promoted_bindings - let carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); - - assert_eq!( - carrier_info.resolve_promoted_with_binding(BindingId(0)), - None - ); - } - - #[test] - #[cfg(feature = "normalized_dev")] - fn test_promoted_bindings_overwrite() { - use crate::mir::BindingId; - - let mut carrier_info = test_carrier_info("i", 5, vec![]); - - // Record initial mapping - carrier_info.record_promoted_binding(BindingId(5), BindingId(10)); - - // Overwrite with new mapping (should replace) - carrier_info.record_promoted_binding(BindingId(5), BindingId(20)); - - // Should return the new value - assert_eq!( - carrier_info.resolve_promoted_with_binding(BindingId(5)), - Some(BindingId(20)) - ); - } -} diff --git a/src/mir/join_ir/lowering/carrier_info/carrier_info_impl.rs b/src/mir/join_ir/lowering/carrier_info/carrier_info_impl.rs new file mode 100644 index 00000000..59b2bf36 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_info/carrier_info_impl.rs @@ -0,0 +1,416 @@ +use super::types::{CarrierInfo, CarrierInit, CarrierRole, CarrierVar}; +use crate::mir::ValueId; +use std::collections::BTreeMap; // Phase 222.5-D: HashMap → BTreeMap for determinism + +#[cfg(feature = "normalized_dev")] +use crate::mir::BindingId; // Phase 76+78: BindingId for promoted carriers + +impl CarrierInfo { + /// Phase 193-2: Create CarrierInfo from a variable_map + /// + /// Automatically extracts all non-loop-control variables from the host's + /// variable_map. This eliminates manual carrier listing for simple cases. + /// + /// # Arguments + /// + /// * `loop_var_name` - Name of the loop control variable (e.g., "i") + /// * `variable_map` - Host function's variable_map (String → ValueId) + /// + /// # Returns + /// + /// CarrierInfo with loop_var and all other variables as carriers + /// + /// # Example + /// + /// ```ignore + /// let carrier_info = CarrierInfo::from_variable_map( + /// "i".to_string(), + /// &variable_map // {"i": ValueId(5), "sum": ValueId(10), "count": ValueId(11)} + /// )?; + /// // Result: CarrierInfo with loop_var="i", carriers=[sum, count] + /// ``` + pub fn from_variable_map( + loop_var_name: String, + variable_map: &BTreeMap, // Phase 222.5-D: HashMap → BTreeMap for determinism + ) -> Result { + // Find loop variable + let loop_var_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| { + format!( + "Loop variable '{}' not found in variable_map", + loop_var_name + ) + })?; + + // Collect all non-loop-var variables as carriers + let mut carriers: Vec = variable_map + .iter() + .filter(|(name, _)| *name != &loop_var_name) + .map(|(name, &id)| CarrierVar { + name: name.clone(), + host_id: id, + join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation + role: CarrierRole::LoopState, // Phase 227: Default to LoopState + init: CarrierInit::FromHost, // Phase 228: Default to FromHost + #[cfg(feature = "normalized_dev")] + binding_id: None, // Phase 78: Set by CarrierBindingAssigner + }) + .collect(); + + // Sort for determinism + carriers.sort_by(|a, b| a.name.cmp(&b.name)); + + Ok(CarrierInfo { + loop_var_name, + loop_var_id, + carriers, + trim_helper: None, // Phase 171-C-5: No Trim pattern by default + promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default + #[cfg(feature = "normalized_dev")] + promoted_bindings: BTreeMap::new(), // Phase 76: No promoted bindings by default + }) + } + + /// Phase 193-2: Create CarrierInfo with explicit carrier list + /// + /// Useful when you have specific carriers in mind and want explicit control + /// over which variables are treated as carriers. + /// + /// # Arguments + /// + /// * `loop_var_name` - Name of the loop control variable + /// * `loop_var_id` - ValueId of the loop variable + /// * `carrier_names` - Names of carrier variables (will look up in variable_map) + /// * `variable_map` - Host function's variable_map for lookups + /// + /// # Returns + /// + /// CarrierInfo with only the specified carriers + /// + /// # Example + /// + /// ```ignore + /// let carrier_info = CarrierInfo::with_explicit_carriers( + /// "i".to_string(), + /// ValueId(5), + /// vec!["sum".to_string(), "count".to_string()], + /// &variable_map + /// )?; + /// ``` + pub fn with_explicit_carriers( + loop_var_name: String, + loop_var_id: ValueId, + carrier_names: Vec, + variable_map: &BTreeMap, // Phase 222.5-D: HashMap → BTreeMap for determinism + ) -> Result { + let mut carriers = Vec::new(); + + for name in carrier_names { + let host_id = variable_map + .get(&name) + .copied() + .ok_or_else(|| format!("Carrier variable '{}' not found in variable_map", name))?; + + carriers.push(CarrierVar { + name, + host_id, + join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation + role: CarrierRole::LoopState, // Phase 227: Default to LoopState + init: CarrierInit::FromHost, // Phase 228: Default to FromHost + #[cfg(feature = "normalized_dev")] + binding_id: None, // Phase 78: Set by CarrierBindingAssigner + }); + } + + // Sort for determinism + carriers.sort_by(|a, b| a.name.cmp(&b.name)); + + Ok(CarrierInfo { + loop_var_name, + loop_var_id, + carriers, + trim_helper: None, // Phase 171-C-5: No Trim pattern by default + promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default + #[cfg(feature = "normalized_dev")] + promoted_bindings: BTreeMap::new(), // Phase 76: No promoted bindings by default + }) + } + + /// Phase 193-2: Create CarrierInfo with manual CarrierVar list + /// + /// Most explicit construction method - you provide everything directly. + /// Useful when you already have CarrierVar structs built elsewhere. + /// + /// # Arguments + /// + /// * `loop_var_name` - Name of the loop control variable + /// * `loop_var_id` - ValueId of the loop variable + /// * `carriers` - Vec of already-constructed CarrierVar structs + pub fn with_carriers( + loop_var_name: String, + loop_var_id: ValueId, + mut carriers: Vec, + ) -> Self { + // Sort for determinism + carriers.sort_by(|a, b| a.name.cmp(&b.name)); + + Self { + loop_var_name, + loop_var_id, + carriers, + trim_helper: None, // Phase 171-C-5: No Trim pattern by default + promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default + #[cfg(feature = "normalized_dev")] + promoted_bindings: BTreeMap::new(), // Phase 76: No promoted bindings by default + } + } + + /// Phase 193-2: Get carrier count + /// + /// Convenience method for checking how many carriers this info has. + pub fn carrier_count(&self) -> usize { + self.carriers.len() + } + + /// Phase 193-2: Check if this has multiple carriers + /// + /// Useful for pattern matching: "is this a multi-carrier loop?" + pub fn is_multi_carrier(&self) -> bool { + self.carriers.len() > 1 + } + + /// Phase 193-2: Find a carrier by name + /// + /// Lookup a specific carrier variable by name. + pub fn find_carrier(&self, name: &str) -> Option<&CarrierVar> { + self.carriers.iter().find(|c| c.name == name) + } + + /// Phase 171-C-4: Merge carriers from another CarrierInfo + /// + /// Deduplicates by carrier name. If a carrier with the same name already exists, + /// it will not be added again. + /// + /// # Arguments + /// + /// * `other` - Another CarrierInfo to merge from + /// + /// # Example + /// + /// ```ignore + /// let mut carrier_info = CarrierInfo::from_variable_map("i", &variable_map)?; + /// let promoted_carrier = TrimPatternInfo::to_carrier_info(); + /// carrier_info.merge_from(&promoted_carrier); + /// ``` + pub fn merge_from(&mut self, other: &CarrierInfo) { + for carrier in &other.carriers { + if !self.carriers.iter().any(|c| c.name == carrier.name) { + self.carriers.push(carrier.clone()); + } + } + // Maintain sorted order for determinism + self.carriers.sort_by(|a, b| a.name.cmp(&b.name)); + + // Phase 171-C-5: Also merge trim_helper if present + if other.trim_helper.is_some() { + self.trim_helper = other.trim_helper.clone(); + } + + // Phase 224: Merge promoted_loopbodylocals (deduplicate) + for promoted_var in &other.promoted_loopbodylocals { + if !self.promoted_loopbodylocals.contains(promoted_var) { + self.promoted_loopbodylocals.push(promoted_var.clone()); + } + } + + // Phase 76: Merge promoted_bindings (dev-only) + #[cfg(feature = "normalized_dev")] + { + for (original, promoted) in &other.promoted_bindings { + self.promoted_bindings.insert(*original, *promoted); + } + } + } + + /// Phase 171-C-5: Get Trim pattern helper + /// + /// Returns the TrimLoopHelper if this CarrierInfo was created from Trim promotion. + /// + /// # Returns + /// + /// * `Some(&TrimLoopHelper)` - If this CarrierInfo contains Trim pattern information + /// * `None` - If this is a regular CarrierInfo (not from Trim promotion) + /// + /// # Example + /// + /// ```ignore + /// if let Some(helper) = carrier_info.trim_helper() { + /// eprintln!("Trim pattern detected: {}", helper.carrier_name); + /// eprintln!("Whitespace chars: {:?}", helper.whitespace_chars); + /// } + /// ``` + pub fn trim_helper( + &self, + ) -> Option<&crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper> { + self.trim_helper.as_ref() + } + + /// Phase 229/231: Resolve promoted LoopBodyLocal name to carrier JoinIR ValueId + /// + /// This helper centralizes the naming convention for promoted variables so that + /// ScopeManager 実装がそれぞれ命名規約を再実装しなくて済むようにするよ。 + /// + /// 命名規約: + /// - DigitPos パターン: `"var"` → `"is_var"`(例: "digit_pos" → "is_digit_pos") + /// - Trim パターン : `"var"` → `"is_var_match"`(例: "ch" → "is_ch_match") + /// + /// # Arguments + /// + /// * `original_name` - 元の LoopBodyLocal 名(例: "digit_pos") + /// + /// # Returns + /// + /// * `Some(ValueId)` - 対応する carrier の join_id が見つかった場合 + /// * `None` - promoted_loopbodylocals に含まれない、または join_id 未設定の場合 + /// + /// # Phase 77: DEPRECATED + /// + /// This method uses fragile naming conventions ("is_*", "is_*_match") and will + /// be removed in Phase 78+ when all call sites migrate to BindingId-based lookup. + /// Use `resolve_promoted_with_binding()` for type-safe BindingId lookup. + #[deprecated( + since = "phase77", + note = "Use resolve_promoted_with_binding() for type-safe BindingId lookup" + )] + pub fn resolve_promoted_join_id(&self, original_name: &str) -> Option { + #[cfg(feature = "normalized_dev")] + eprintln!( + "[phase77/legacy/carrier_info] WARNING: Using deprecated name-based promoted lookup for '{}'", + original_name + ); + if !self + .promoted_loopbodylocals + .contains(&original_name.to_string()) + { + return None; + } + + let candidates = [ + format!("is_{}", original_name), // DigitPos pattern + format!("is_{}_match", original_name), // Trim pattern + ]; + + for carrier_name in &candidates { + // loop_var 自身が ConditionOnly carrier として扱われるケースは現状ほぼないが、 + // 将来の拡張に備えて loop_var_name も一応チェックしておく。 + if carrier_name == &self.loop_var_name { + if let Some(carrier) = self.carriers.iter().find(|c| c.name == self.loop_var_name) { + if let Some(join_id) = carrier.join_id { + return Some(join_id); + } + } + } + + if let Some(carrier) = self.carriers.iter().find(|c| c.name == *carrier_name) { + if let Some(join_id) = carrier.join_id { + return Some(join_id); + } + } + } + + None + } + + /// Phase 76: Type-safe promoted binding resolution (dev-only) + /// + /// Resolves a promoted LoopBodyLocal binding via BindingId map, eliminating + /// name-based hacks (`format!("is_{}", name)`). Falls back to legacy name-based + /// lookup for backward compatibility during Phase 76-77 migration. + /// + /// # Arguments + /// + /// * `original_binding` - Original LoopBodyLocal's BindingId (e.g., BindingId(5) for "digit_pos") + /// + /// # Returns + /// + /// * `Some(BindingId)` - Promoted carrier's BindingId (e.g., BindingId(10) for "is_digit_pos") + /// * `None` - No promotion mapping found + /// + /// # Example + /// + /// ```ignore + /// // DigitPos promotion: BindingId(5) "digit_pos" → BindingId(10) "is_digit_pos" + /// let original_bid = BindingId(5); + /// if let Some(promoted_bid) = carrier_info.resolve_promoted_with_binding(original_bid) { + /// // Lookup carrier by promoted BindingId (type-safe!) + /// let promoted_value = condition_env.get_by_binding(promoted_bid); + /// } + /// ``` + /// + /// # Migration Path (Phase 76-77) + /// + /// - **Phase 76**: BindingId map populated by promoters, dual path (BindingId OR name) + /// - **Phase 77**: Remove name-based fallback, BindingId-only lookup + /// + /// # Design Notes + /// + /// **Why not merge with `resolve_promoted_join_id()`?** + /// - Different input type: BindingId vs String + /// - Different output: BindingId vs ValueId + /// - Different usage: ScopeManager (BindingId) vs legacy lowerers (name) + /// + /// **Why BTreeMap instead of HashMap?** + /// - Deterministic iteration (Phase 222.5-D consistency) + /// - Debug-friendly sorted output + #[cfg(feature = "normalized_dev")] + pub fn resolve_promoted_with_binding(&self, original_binding: BindingId) -> Option { + self.promoted_bindings.get(&original_binding).copied() + } + + /// Phase 76: Record a promoted binding (dev-only) + /// + /// Helper method to populate the promoted_bindings map during promotion. + /// Called by wrapper functions that have access to both CarrierInfo and binding_map. + /// + /// # Arguments + /// + /// * `original_binding` - Original LoopBodyLocal's BindingId + /// * `promoted_binding` - Promoted carrier's BindingId + /// + /// # Example + /// + /// ```ignore + /// // After DigitPosPromoter creates CarrierInfo, record the binding mapping: + /// carrier_info.record_promoted_binding( + /// binding_map.get("digit_pos").copied().unwrap(), // BindingId(5) + /// binding_map.get("is_digit_pos").copied().unwrap() // BindingId(10) + /// ); + /// ``` + /// + /// # Phase 76 Note + /// + /// This method is currently UNUSED because promoters (DigitPosPromoter, TrimLoopHelper) + /// don't have access to binding_map. Actual population happens in a future phase when + /// we integrate BindingId tracking into the promotion pipeline. + #[cfg(feature = "normalized_dev")] + pub fn record_promoted_binding( + &mut self, + original_binding: BindingId, + promoted_binding: BindingId, + ) { + use super::debug_output_box::DebugOutputBox; + + // Phase 86: Use DebugOutputBox for consistent debug output + // Allow JOINIR_TEST_DEBUG override for test-specific diagnostics + let test_debug = std::env::var("JOINIR_TEST_DEBUG").is_ok(); + let debug = DebugOutputBox::new("binding_pilot/promoted_bindings"); + + if debug.is_enabled() || test_debug { + eprintln!( + "[binding_pilot/promoted_bindings] {} → {}", + original_binding, promoted_binding + ); + } + self.promoted_bindings + .insert(original_binding, promoted_binding); + } +} diff --git a/src/mir/join_ir/lowering/carrier_info/carrier_var.rs b/src/mir/join_ir/lowering/carrier_info/carrier_var.rs new file mode 100644 index 00000000..14232f97 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_info/carrier_var.rs @@ -0,0 +1,51 @@ +use super::types::{CarrierInit, CarrierRole, CarrierVar}; +use crate::mir::ValueId; + +impl CarrierVar { + /// Create a new CarrierVar with default LoopState role + /// + /// This is the primary constructor for CarrierVar. Use this instead of + /// struct literal syntax to ensure role defaults to LoopState. + pub fn new(name: String, host_id: ValueId) -> Self { + Self { + name, + host_id, + join_id: None, + role: CarrierRole::LoopState, + init: CarrierInit::FromHost, // Phase 228: Default to FromHost + #[cfg(feature = "normalized_dev")] + binding_id: None, // Phase 78: No BindingId by default + } + } + + /// Create a CarrierVar with explicit role + pub fn with_role(name: String, host_id: ValueId, role: CarrierRole) -> Self { + Self { + name, + host_id, + join_id: None, + role, + init: CarrierInit::FromHost, // Phase 228: Default to FromHost + #[cfg(feature = "normalized_dev")] + binding_id: None, // Phase 78: No BindingId by default + } + } + + /// Phase 228: Create a CarrierVar with explicit role and init policy + pub fn with_role_and_init( + name: String, + host_id: ValueId, + role: CarrierRole, + init: CarrierInit, + ) -> Self { + Self { + name, + host_id, + join_id: None, + role, + init, + #[cfg(feature = "normalized_dev")] + binding_id: None, // Phase 78: No BindingId by default + } + } +} diff --git a/src/mir/join_ir/lowering/carrier_info/exit_meta.rs b/src/mir/join_ir/lowering/carrier_info/exit_meta.rs new file mode 100644 index 00000000..b5d276e5 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_info/exit_meta.rs @@ -0,0 +1,101 @@ +use super::types::{ExitMeta, JoinFragmentMeta}; +use crate::mir::ValueId; +use std::collections::BTreeSet; + +impl JoinFragmentMeta { + /// Create JoinFragmentMeta for expression result pattern + /// + /// Use when the loop returns a value (like `return loop(...)`). + pub fn with_expr_result(expr_result: ValueId, exit_meta: ExitMeta) -> Self { + Self { + expr_result: Some(expr_result), + exit_meta, + continuation_funcs: BTreeSet::new(), + } + } + + /// Create JoinFragmentMeta for carrier-only pattern + /// + /// Use when the loop only updates carriers (like trim pattern). + pub fn carrier_only(exit_meta: ExitMeta) -> Self { + Self { + expr_result: None, + exit_meta, + continuation_funcs: BTreeSet::new(), + } + } + + /// Create empty JoinFragmentMeta (no expr result, no carriers) + pub fn empty() -> Self { + Self { + expr_result: None, + exit_meta: ExitMeta::empty(), + continuation_funcs: BTreeSet::new(), + } + } + + /// Check if this fragment has an expression result + pub fn has_expr_result(&self) -> bool { + self.expr_result.is_some() + } + + /// Phase 33-14: Backward compatibility - convert to ExitMeta + /// + /// During migration, some code may still expect ExitMeta. + /// This extracts just the carrier bindings. + #[deprecated(since = "33-14", note = "Use exit_meta directly for carrier access")] + pub fn to_exit_meta(&self) -> ExitMeta { + self.exit_meta.clone() + } +} + +impl ExitMeta { + /// Create new ExitMeta with no exit values + pub fn empty() -> Self { + Self { + exit_values: vec![], + } + } + + /// Create ExitMeta with a single exit value + pub fn single(carrier_name: String, join_value: ValueId) -> Self { + Self { + exit_values: vec![(carrier_name, join_value)], + } + } + + /// Create ExitMeta with multiple exit values + pub fn multiple(exit_values: Vec<(String, ValueId)>) -> Self { + Self { exit_values } + } + + /// Phase 193-2: Get the count of exit bindings + /// + /// Useful for checking if this ExitMeta has any exit values. + pub fn binding_count(&self) -> usize { + self.exit_values.len() + } + + /// Phase 193-2: Check if this has any exit values + pub fn is_empty(&self) -> bool { + self.exit_values.is_empty() + } + + /// Phase 193-2: Find a binding by carrier name + /// + /// Lookup a specific exit value by carrier name. + pub fn find_binding(&self, carrier_name: &str) -> Option { + self.exit_values + .iter() + .find(|(name, _)| name == carrier_name) + .map(|(_, value_id)| *value_id) + } + + /// Phase 193-2: Add a binding to ExitMeta + /// + /// Convenient way to build ExitMeta incrementally. + pub fn with_binding(mut self, carrier_name: String, join_value: ValueId) -> Self { + self.exit_values.push((carrier_name, join_value)); + self + } +} diff --git a/src/mir/join_ir/lowering/carrier_info/mod.rs b/src/mir/join_ir/lowering/carrier_info/mod.rs new file mode 100644 index 00000000..49f4ebb9 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_info/mod.rs @@ -0,0 +1,35 @@ +//! Carrier variable metadata for JoinIR loop lowering +//! +//! This module defines metadata structures for tracking carrier variables +//! in loop lowering. This enables dynamic generation of exit bindings +//! without hardcoded variable names or ValueIds. +//! +//! Phase 193-2: Enhanced builder methods for flexible construction +//! +//! # Phase 183-2: Primary CarrierInfo Construction +//! +//! This module is the single source of truth for CarrierInfo initialization. +//! Both MIR and JoinIR contexts use `CarrierInfo::from_variable_map()` as the +//! primary construction method. +//! +//! - MIR context: `common_init.rs` delegates to this module +//! - JoinIR context: Uses `from_variable_map()` directly +//! +//! # Phase 76: BindingId-Based Promotion Tracking +//! +//! Replaces name-based promotion hacks (`"digit_pos"` → `"is_digit_pos"`) with +//! type-safe BindingId mapping. This eliminates fragile string matching while +//! maintaining backward compatibility through dual-path lookup. + +mod carrier_info_impl; +mod carrier_var; +mod exit_meta; +mod types; + +#[cfg(test)] +mod tests; + +pub use types::{ + CarrierInfo, CarrierInit, CarrierRole, CarrierVar, ExitMeta, ExitReconnectMode, + JoinFragmentMeta, +}; diff --git a/src/mir/join_ir/lowering/carrier_info/tests.rs b/src/mir/join_ir/lowering/carrier_info/tests.rs new file mode 100644 index 00000000..105b8ec9 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_info/tests.rs @@ -0,0 +1,68 @@ +use super::*; +use crate::mir::ValueId; + +// Helper: Create a CarrierVar for testing +fn test_carrier(name: &str, id: u32) -> CarrierVar { + CarrierVar { + name: name.to_string(), + host_id: ValueId(id), + join_id: None, // Phase 177-STRUCT-1 + role: CarrierRole::LoopState, // Phase 227: Default to LoopState + init: CarrierInit::FromHost, // Phase 228: Default to FromHost + #[cfg(feature = "normalized_dev")] + binding_id: None, // Phase 78: No BindingId by default + } +} + +// Helper: Create a CarrierInfo for testing +fn test_carrier_info(loop_var: &str, loop_id: u32, carriers: Vec) -> CarrierInfo { + CarrierInfo::with_carriers(loop_var.to_string(), ValueId(loop_id), carriers) +} + +#[test] +fn test_merge_from_empty() { + // Merge empty CarrierInfo should not change anything + let mut carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); + + let other = test_carrier_info("j", 20, vec![]); + + carrier_info.merge_from(&other); + + assert_eq!(carrier_info.carrier_count(), 1); + assert_eq!(carrier_info.carriers[0].name, "sum"); +} + +#[test] +fn test_merge_from_new_carrier() { + // Merge a new carrier that doesn't exist yet + let mut carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); + + let other = test_carrier_info("j", 20, vec![test_carrier("count", 15)]); + + carrier_info.merge_from(&other); + + assert_eq!(carrier_info.carrier_count(), 2); + // Should be sorted by name + assert_eq!(carrier_info.carriers[0].name, "count"); // 'c' < 's' + assert_eq!(carrier_info.carriers[1].name, "sum"); +} + +#[test] +fn test_merge_from_duplicate_carrier() { + // Merge a carrier with the same name should NOT duplicate + let mut carrier_info = test_carrier_info("i", 5, vec![test_carrier("sum", 10)]); + + let other = test_carrier_info( + "j", + 20, + vec![test_carrier("sum", 999)], // Same name, different ID + ); + + carrier_info.merge_from(&other); + + // Should still have only 1 carrier (no duplication) + assert_eq!(carrier_info.carrier_count(), 1); + assert_eq!(carrier_info.carriers[0].name, "sum"); + // Original ID should be preserved + assert_eq!(carrier_info.carriers[0].host_id, ValueId(10)); +} diff --git a/src/mir/join_ir/lowering/carrier_info/types.rs b/src/mir/join_ir/lowering/carrier_info/types.rs new file mode 100644 index 00000000..5e7396b7 --- /dev/null +++ b/src/mir/join_ir/lowering/carrier_info/types.rs @@ -0,0 +1,332 @@ +use crate::mir::ValueId; +use std::collections::BTreeSet; + +#[cfg(feature = "normalized_dev")] +use std::collections::BTreeMap; // Phase 222.5-D: HashMap → BTreeMap for determinism + +#[cfg(feature = "normalized_dev")] +use crate::mir::BindingId; // Phase 76+78: BindingId for promoted carriers + +/// Phase 227: CarrierRole - Distinguishes loop state carriers from condition-only carriers +/// +/// When LoopBodyLocal variables are promoted to carriers, we need to know whether +/// they carry loop state (need exit PHI) or are only used in conditions (no exit PHI). +/// +/// # Example +/// +/// ```ignore +/// // LoopState carrier: sum needs exit PHI (value persists after loop) +/// loop(i < n) { +/// sum = sum + i; // sum updated in loop body +/// } +/// print(sum); // sum used after loop +/// +/// // ConditionOnly carrier: is_digit_pos does NOT need exit PHI +/// loop(p < s.length()) { +/// local digit_pos = digits.indexOf(s.substring(p, p+1)); +/// if digit_pos < 0 { break; } // Only used in condition +/// num_str = num_str + ch; +/// p = p + 1; +/// } +/// // digit_pos not used after loop +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CarrierRole { + /// Value needed after loop (sum, result, count, p, num_str) + /// - Participates in header PHI (loop iteration) + /// - Participates in exit PHI (final value after loop) + LoopState, + + /// Only used for loop condition (is_digit_pos, is_whitespace) + /// - Participates in header PHI (loop iteration) + /// - Does NOT participate in exit PHI (not needed after loop) + ConditionOnly, +} + +/// Phase 228: Initialization policy for carrier variables +/// +/// When carriers participate in header PHI, they need an initial value. +/// Most carriers use their host_id value (FromHost), but promoted LoopBodyLocal +/// carriers need explicit bool initialization (BoolConst). +/// +/// # Example +/// +/// ```ignore +/// // Regular carrier (sum): Use host_id value +/// CarrierVar { name: "sum", host_id: ValueId(10), init: FromHost, .. } +/// +/// // ConditionOnly carrier (is_digit_pos): Initialize with false +/// CarrierVar { name: "is_digit_pos", host_id: ValueId(15), init: BoolConst(false), .. } +/// +/// // Loop-local derived carrier (digit_value): Initialize with local zero (no host slot) +/// CarrierVar { name: "digit_value", host_id: ValueId(0), init: LoopLocalZero, .. } +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CarrierInit { + /// No explicit initialization (use host_id value) + FromHost, + /// Initialize with bool constant (for ConditionOnly carriers) + BoolConst(bool), + /// Initialize with loop-local zero (no host slot; used for derived carriers like digit_value) + LoopLocalZero, +} + +/// Phase 131 P1.5: Exit reconnection mode for JoinInlineBoundary +/// +/// Controls whether exit values are reconnected via PHI generation or direct assignment. +/// This separates Normalized shadow (DirectValue) from existing loop patterns (Phi). +/// +/// # Design Principle (SSOT) +/// +/// - **DirectValue**: Normalized loops prohibit PHI generation. Exit values are directly +/// wired to variable_map using remapped_exit_values from MergeResult. +/// - **Phi**: Existing loop patterns use PHI generation for exit value merging. +/// +/// # Example +/// +/// ```ignore +/// // Normalized shadow: loop(true) { x = 1; break } → DirectValue +/// JoinInlineBoundary { exit_reconnect_mode: ExitReconnectMode::DirectValue, .. } +/// +/// // Traditional loop: loop(i < 3) { sum = sum + i } → Phi +/// JoinInlineBoundary { exit_reconnect_mode: ExitReconnectMode::Phi, .. } +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ExitReconnectMode { + /// Existing loop patterns: PHI generation for exit value merging + /// + /// Used by Pattern 1-4 loops with multiple exit paths. + /// Exit values are collected into exit PHIs. + Phi, + + /// Normalized shadow: Direct variable_map update, no PHI generation + /// + /// Used by loop(true) { *; break } pattern. + /// Exit values are directly wired using MergeResult.remapped_exit_values. + DirectValue, +} + +impl Default for ExitReconnectMode { + /// Default to Phi mode for backward compatibility + fn default() -> Self { + Self::Phi + } +} + +// Phase 229: ConditionAlias removed - redundant with promoted_loopbodylocals +// The naming convention (old_name → "is_" or "is__match") +// is sufficient to resolve promoted variables dynamically. + +/// Information about a single carrier variable +#[derive(Debug, Clone)] +pub struct CarrierVar { + /// Variable name (e.g., "sum", "printed", "is_digit_pos") + pub name: String, + /// Host ValueId for this variable (MIR側) + pub host_id: ValueId, + /// Phase 177-STRUCT: JoinIR側でこのキャリアを表すValueId + /// + /// ヘッダPHIのdstや、exitで使う値を記録する。 + /// これにより、index ベースのマッチングを名前ベースに置き換えられる。 + /// + /// - `Some(vid)`: Header PHI生成後にセットされる + /// - `None`: まだPHI生成前、または該当なし + pub join_id: Option, + /// Phase 227: Role of this carrier (LoopState or ConditionOnly) + /// + /// - `LoopState`: Value needed after loop (participates in exit PHI) + /// - `ConditionOnly`: Only used for loop condition (no exit PHI) + pub role: CarrierRole, + /// Phase 228: Initialization policy for header PHI + /// + /// - `FromHost`: Use host_id value (regular carriers) + /// - `BoolConst(false)`: Initialize with false (promoted LoopBodyLocal carriers) + pub init: CarrierInit, + /// Phase 78: BindingId for this carrier (dev-only) + /// + /// For promoted carriers (e.g., is_digit_pos), this is allocated separately + /// by CarrierBindingAssigner. For source-derived carriers, this comes from + /// builder.binding_map. + /// + /// Enables type-safe lookup: BindingId → ValueId (join_id) in ConditionEnv. + /// + /// # Example + /// + /// ```ignore + /// // Source-derived carrier + /// CarrierVar { + /// name: "sum", + /// binding_id: Some(BindingId(5)), // from builder.binding_map["sum"] + /// .. + /// } + /// + /// // Promoted carrier + /// CarrierVar { + /// name: "is_digit_pos", + /// binding_id: Some(BindingId(10)), // allocated by CarrierBindingAssigner + /// .. + /// } + /// ``` + #[cfg(feature = "normalized_dev")] + pub binding_id: Option, +} + +/// Complete carrier information for a loop +#[derive(Debug, Clone)] +pub struct CarrierInfo { + /// Loop control variable name (e.g., "i") + pub loop_var_name: String, + /// Loop control variable ValueId in host + pub loop_var_id: ValueId, + /// Additional carrier variables (e.g., sum, printed) + pub carriers: Vec, + /// Phase 171-C-5: Trim pattern helper (if this CarrierInfo was created from Trim promotion) + pub trim_helper: Option, + /// Phase 224: Promoted LoopBodyLocal variables (e.g., "digit_pos" promoted to "is_digit_pos") + /// + /// These variables were originally LoopBodyLocal but have been promoted to carriers + /// during condition promotion (e.g., DigitPosPromoter). The lowerer should skip + /// LoopBodyLocal checks for these variables. + /// + /// Phase 229: Naming convention for promoted carriers: + /// - DigitPos pattern: "var" → "is_var" (e.g., "digit_pos" → "is_digit_pos") + /// - Trim pattern: "var" → "is_var_match" (e.g., "ch" → "is_ch_match") + /// + /// Condition variable resolution dynamically infers the carrier name from this list. + pub promoted_loopbodylocals: Vec, + + /// Phase 76: Type-safe promotion tracking (dev-only) + /// + /// Maps original BindingId to promoted BindingId, eliminating name-based hacks. + /// + /// # Example + /// + /// DigitPos promotion: + /// - Original: BindingId(5) for `"digit_pos"` + /// - Promoted: BindingId(10) for `"is_digit_pos"` + /// - Map entry: `promoted_bindings[BindingId(5)] = BindingId(10)` + /// + /// This enables type-safe resolution: + /// ```ignore + /// if let Some(promoted_bid) = carrier_info.promoted_bindings.get(&original_bid) { + /// // Lookup promoted carrier by BindingId (no string matching!) + /// } + /// ``` + /// + /// # Migration Strategy (Phase 76) + /// + /// - **Dual Path**: BindingId lookup (NEW) OR name-based fallback (LEGACY) + /// - **Populated by**: DigitPosPromoter, TrimLoopHelper (Phase 76) + /// - **Used by**: ConditionEnv::resolve_var_with_binding (Phase 75+) + /// - **Phase 77**: Remove name-based fallback after full migration + /// + /// # Design Notes + /// + /// **Q: Why BindingId map instead of name map?** + /// - **Type Safety**: Compiler-checked binding identity (no typos) + /// - **Shadowing-Aware**: BindingId distinguishes inner/outer scope vars + /// - **No Name Collisions**: BindingId is unique even if names shadow + /// + /// **Q: Why not remove `promoted_loopbodylocals` immediately?** + /// - **Legacy Compatibility**: Existing code uses name-based lookup + /// - **Gradual Migration**: Phase 76 adds BindingId, Phase 77 removes name-based + /// - **Fail-Safe**: Dual path ensures no regressions during transition + #[cfg(feature = "normalized_dev")] + pub promoted_bindings: BTreeMap, +} + +/// Exit metadata returned by lowerers +/// +/// This structure captures the mapping from JoinIR exit values to +/// carrier variable names, enabling dynamic binding generation. +#[derive(Debug, Clone)] +pub struct ExitMeta { + /// Exit value bindings: (carrier_name, join_exit_value_id) + /// + /// Example for Pattern 4: + /// ``` + /// vec![("sum".to_string(), ValueId(15))] + /// ``` + /// where ValueId(15) is the k_exit parameter in JoinIR-local space. + pub exit_values: Vec<(String, ValueId)>, +} + +/// Phase 33-14: JoinFragmentMeta - Distinguishes expr result from carrier updates +/// +/// ## Purpose +/// +/// Separates two distinct use cases for JoinIR loops: +/// +/// 1. **Expr Result Pattern** (joinir_min_loop.hako): +/// ```nyash +/// local result = loop(...) { ... } // Loop used as expression +/// return result +/// ``` +/// Here, the k_exit return value is the "expr result" that should go to exit_phi_inputs. +/// +/// 2. **Carrier Update Pattern** (trim pattern): +/// ```nyash +/// loop(...) { start = start + 1 } // Loop used for side effects +/// print(start) // Use carrier after loop +/// ``` +/// Here, there's no "expr result" - only carrier variable updates. +/// +/// ## SSA Correctness +/// +/// Previously, exit_phi_inputs mixed expr results with carrier updates, causing: +/// - PHI inputs that referenced undefined remapped values +/// - SSA-undef errors in VM execution +/// +/// With JoinFragmentMeta: +/// - `expr_result`: Only goes to exit_phi_inputs (generates PHI for expr value) +/// - `exit_meta`: Only goes to carrier_inputs (updates variable_map via carrier PHIs) +/// +/// ## Example: Pattern 2 (joinir_min_loop.hako) +/// +/// ```rust +/// JoinFragmentMeta { +/// expr_result: Some(i_exit), // k_exit returns i as expr value +/// exit_meta: ExitMeta::single("i".to_string(), i_exit), // Also a carrier +/// } +/// ``` +/// +/// ## Example: Pattern 3 (trim pattern) +/// +/// ```rust +/// JoinFragmentMeta { +/// expr_result: None, // Loop doesn't return a value +/// exit_meta: ExitMeta::multiple(vec![ +/// ("start".to_string(), start_exit), +/// ("end".to_string(), end_exit), +/// ]), +/// } +/// ``` +#[derive(Debug, Clone)] +pub struct JoinFragmentMeta { + /// Expression result ValueId from k_exit (JoinIR-local) + /// + /// - `Some(vid)`: Loop is used as expression, k_exit's return value → exit_phi_inputs + /// - `None`: Loop is used for side effects only, no PHI for expr value + pub expr_result: Option, + + /// Carrier variable exit bindings (existing ExitMeta) + /// + /// Maps carrier names to their JoinIR-local exit values. + /// These go to carrier_inputs for carrier PHI generation. + pub exit_meta: ExitMeta, + + /// Phase 132 P1: Continuation contract (SSOT) + /// Phase 256 P1.7: Changed from BTreeSet to BTreeSet + /// + /// JoinIR merge must NOT "guess" continuation functions by name. + /// Normalized shadow (and other frontends) must explicitly declare which function names + /// are continuations for the fragment, and merge must follow this contract. + /// + /// Merge may still choose to *skip* some continuation functions if and only if they + /// are structurally "skippable" (pure exit stubs). See merge/instruction_rewriter.rs. + /// + /// **Why Strings instead of JoinFuncIds**: The bridge uses JoinFunction.name as the + /// MirModule function key (e.g., "k_exit"), not "join_func_{id}". The merge code + /// looks up functions by name, so we must use actual function names here. + pub continuation_funcs: BTreeSet, +}