//! Carrier variable metadata for JoinIR loop lowering //! //! This module defines metadata structures for tracking carrier variables //! in loop lowering. This enables dynamic generation of exit bindings //! without hardcoded variable names or ValueIds. //! //! Phase 193-2: Enhanced builder methods for flexible construction //! //! # Phase 183-2: Primary CarrierInfo Construction //! //! This module is the single source of truth for CarrierInfo initialization. //! Both MIR and JoinIR contexts use `CarrierInfo::from_variable_map()` as the //! primary construction method. //! //! - MIR context: `common_init.rs` delegates to this module //! - JoinIR context: Uses `from_variable_map()` directly use crate::mir::ValueId; use std::collections::BTreeMap; // Phase 222.5-D: HashMap → BTreeMap for determinism /// Information about a single carrier variable #[derive(Debug, Clone)] pub struct CarrierVar { /// Variable name (e.g., "sum", "printed") pub name: String, /// Host ValueId for this variable (MIR側) pub host_id: ValueId, /// Phase 177-STRUCT: JoinIR側でこのキャリアを表すValueId /// /// ヘッダPHIのdstや、exitで使う値を記録する。 /// これにより、index ベースのマッチングを名前ベースに置き換えられる。 /// /// - `Some(vid)`: Header PHI生成後にセットされる /// - `None`: まだPHI生成前、または該当なし pub join_id: Option, } /// Complete carrier information for a loop #[derive(Debug, Clone)] pub struct CarrierInfo { /// Loop control variable name (e.g., "i") pub loop_var_name: String, /// Loop control variable ValueId in host pub loop_var_id: ValueId, /// Additional carrier variables (e.g., sum, printed) pub carriers: Vec, /// Phase 171-C-5: Trim pattern helper (if this CarrierInfo was created from Trim promotion) pub trim_helper: Option, /// Phase 224: Promoted LoopBodyLocal variables (e.g., "digit_pos" promoted to "is_digit_pos") /// /// These variables were originally LoopBodyLocal but have been promoted to carriers /// during condition promotion (e.g., DigitPosPromoter). The lowerer should skip /// LoopBodyLocal checks for these variables. pub promoted_loopbodylocals: Vec, } impl CarrierInfo { /// Phase 193-2: Create CarrierInfo from a variable_map /// /// Automatically extracts all non-loop-control variables from the host's /// variable_map. This eliminates manual carrier listing for simple cases. /// /// # Arguments /// /// * `loop_var_name` - Name of the loop control variable (e.g., "i") /// * `variable_map` - Host function's variable_map (String → ValueId) /// /// # Returns /// /// CarrierInfo with loop_var and all other variables as carriers /// /// # Example /// /// ```ignore /// let carrier_info = CarrierInfo::from_variable_map( /// "i".to_string(), /// &variable_map // {"i": ValueId(5), "sum": ValueId(10), "count": ValueId(11)} /// )?; /// // Result: CarrierInfo with loop_var="i", carriers=[sum, count] /// ``` pub fn from_variable_map( loop_var_name: String, variable_map: &BTreeMap, // Phase 222.5-D: HashMap → BTreeMap for determinism ) -> Result { // Find loop variable let loop_var_id = variable_map .get(&loop_var_name) .copied() .ok_or_else(|| { format!( "Loop variable '{}' not found in variable_map", loop_var_name ) })?; // Collect all non-loop-var variables as carriers let mut carriers: Vec = variable_map .iter() .filter(|(name, _)| *name != &loop_var_name) .map(|(name, &id)| CarrierVar { name: name.clone(), host_id: id, join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation }) .collect(); // Sort for determinism carriers.sort_by(|a, b| a.name.cmp(&b.name)); Ok(CarrierInfo { loop_var_name, loop_var_id, carriers, trim_helper: None, // Phase 171-C-5: No Trim pattern by default promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default }) } /// Phase 193-2: Create CarrierInfo with explicit carrier list /// /// Useful when you have specific carriers in mind and want explicit control /// over which variables are treated as carriers. /// /// # Arguments /// /// * `loop_var_name` - Name of the loop control variable /// * `loop_var_id` - ValueId of the loop variable /// * `carrier_names` - Names of carrier variables (will look up in variable_map) /// * `variable_map` - Host function's variable_map for lookups /// /// # Returns /// /// CarrierInfo with only the specified carriers /// /// # Example /// /// ```ignore /// let carrier_info = CarrierInfo::with_explicit_carriers( /// "i".to_string(), /// ValueId(5), /// vec!["sum".to_string(), "count".to_string()], /// &variable_map /// )?; /// ``` pub fn with_explicit_carriers( loop_var_name: String, loop_var_id: ValueId, carrier_names: Vec, variable_map: &BTreeMap, // Phase 222.5-D: HashMap → BTreeMap for determinism ) -> Result { let mut carriers = Vec::new(); for name in carrier_names { let host_id = variable_map.get(&name).copied().ok_or_else(|| { format!("Carrier variable '{}' not found in variable_map", name) })?; carriers.push(CarrierVar { name, host_id, join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation }); } // Sort for determinism carriers.sort_by(|a, b| a.name.cmp(&b.name)); Ok(CarrierInfo { loop_var_name, loop_var_id, carriers, trim_helper: None, // Phase 171-C-5: No Trim pattern by default promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default }) } /// Phase 193-2: Create CarrierInfo with manual CarrierVar list /// /// Most explicit construction method - you provide everything directly. /// Useful when you already have CarrierVar structs built elsewhere. /// /// # Arguments /// /// * `loop_var_name` - Name of the loop control variable /// * `loop_var_id` - ValueId of the loop variable /// * `carriers` - Vec of already-constructed CarrierVar structs pub fn with_carriers( loop_var_name: String, loop_var_id: ValueId, mut carriers: Vec, ) -> Self { // Sort for determinism carriers.sort_by(|a, b| a.name.cmp(&b.name)); Self { loop_var_name, loop_var_id, carriers, trim_helper: None, // Phase 171-C-5: No Trim pattern by default promoted_loopbodylocals: Vec::new(), // Phase 224: No promoted variables by default } } /// Phase 193-2: Get carrier count /// /// Convenience method for checking how many carriers this info has. pub fn carrier_count(&self) -> usize { self.carriers.len() } /// Phase 193-2: Check if this has multiple carriers /// /// Useful for pattern matching: "is this a multi-carrier loop?" pub fn is_multi_carrier(&self) -> bool { self.carriers.len() > 1 } /// Phase 193-2: Find a carrier by name /// /// Lookup a specific carrier variable by name. pub fn find_carrier(&self, name: &str) -> Option<&CarrierVar> { self.carriers.iter().find(|c| c.name == name) } /// Phase 171-C-4: Merge carriers from another CarrierInfo /// /// Deduplicates by carrier name. If a carrier with the same name already exists, /// it will not be added again. /// /// # Arguments /// /// * `other` - Another CarrierInfo to merge from /// /// # Example /// /// ```ignore /// let mut carrier_info = CarrierInfo::from_variable_map("i", &variable_map)?; /// let promoted_carrier = TrimPatternInfo::to_carrier_info(); /// carrier_info.merge_from(&promoted_carrier); /// ``` pub fn merge_from(&mut self, other: &CarrierInfo) { for carrier in &other.carriers { if !self.carriers.iter().any(|c| c.name == carrier.name) { self.carriers.push(carrier.clone()); } } // Maintain sorted order for determinism self.carriers.sort_by(|a, b| a.name.cmp(&b.name)); // Phase 171-C-5: Also merge trim_helper if present if other.trim_helper.is_some() { self.trim_helper = other.trim_helper.clone(); } // Phase 224: Merge promoted_loopbodylocals (deduplicate) for promoted_var in &other.promoted_loopbodylocals { if !self.promoted_loopbodylocals.contains(promoted_var) { self.promoted_loopbodylocals.push(promoted_var.clone()); } } } /// Phase 171-C-5: Get Trim pattern helper /// /// Returns the TrimLoopHelper if this CarrierInfo was created from Trim promotion. /// /// # Returns /// /// * `Some(&TrimLoopHelper)` - If this CarrierInfo contains Trim pattern information /// * `None` - If this is a regular CarrierInfo (not from Trim promotion) /// /// # Example /// /// ```ignore /// if let Some(helper) = carrier_info.trim_helper() { /// eprintln!("Trim pattern detected: {}", helper.carrier_name); /// eprintln!("Whitespace chars: {:?}", helper.whitespace_chars); /// } /// ``` pub fn trim_helper(&self) -> Option<&crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper> { self.trim_helper.as_ref() } } /// Exit metadata returned by lowerers /// /// This structure captures the mapping from JoinIR exit values to /// carrier variable names, enabling dynamic binding generation. #[derive(Debug, Clone)] pub struct ExitMeta { /// Exit value bindings: (carrier_name, join_exit_value_id) /// /// Example for Pattern 4: /// ``` /// vec![("sum".to_string(), ValueId(15))] /// ``` /// where ValueId(15) is the k_exit parameter in JoinIR-local space. pub exit_values: Vec<(String, ValueId)>, } /// Phase 33-14: JoinFragmentMeta - Distinguishes expr result from carrier updates /// /// ## Purpose /// /// Separates two distinct use cases for JoinIR loops: /// /// 1. **Expr Result Pattern** (joinir_min_loop.hako): /// ```nyash /// local result = loop(...) { ... } // Loop used as expression /// return result /// ``` /// Here, the k_exit return value is the "expr result" that should go to exit_phi_inputs. /// /// 2. **Carrier Update Pattern** (trim pattern): /// ```nyash /// loop(...) { start = start + 1 } // Loop used for side effects /// print(start) // Use carrier after loop /// ``` /// Here, there's no "expr result" - only carrier variable updates. /// /// ## SSA Correctness /// /// Previously, exit_phi_inputs mixed expr results with carrier updates, causing: /// - PHI inputs that referenced undefined remapped values /// - SSA-undef errors in VM execution /// /// With JoinFragmentMeta: /// - `expr_result`: Only goes to exit_phi_inputs (generates PHI for expr value) /// - `exit_meta`: Only goes to carrier_inputs (updates variable_map via carrier PHIs) /// /// ## Example: Pattern 2 (joinir_min_loop.hako) /// /// ```rust /// JoinFragmentMeta { /// expr_result: Some(i_exit), // k_exit returns i as expr value /// exit_meta: ExitMeta::single("i".to_string(), i_exit), // Also a carrier /// } /// ``` /// /// ## Example: Pattern 3 (trim pattern) /// /// ```rust /// JoinFragmentMeta { /// expr_result: None, // Loop doesn't return a value /// exit_meta: ExitMeta::multiple(vec![ /// ("start".to_string(), start_exit), /// ("end".to_string(), end_exit), /// ]), /// } /// ``` #[derive(Debug, Clone)] pub struct JoinFragmentMeta { /// Expression result ValueId from k_exit (JoinIR-local) /// /// - `Some(vid)`: Loop is used as expression, k_exit's return value → exit_phi_inputs /// - `None`: Loop is used for side effects only, no PHI for expr value pub expr_result: Option, /// Carrier variable exit bindings (existing ExitMeta) /// /// Maps carrier names to their JoinIR-local exit values. /// These go to carrier_inputs for carrier PHI generation. pub exit_meta: ExitMeta, } impl JoinFragmentMeta { /// Create JoinFragmentMeta for expression result pattern /// /// Use when the loop returns a value (like `return loop(...)`). pub fn with_expr_result(expr_result: ValueId, exit_meta: ExitMeta) -> Self { Self { expr_result: Some(expr_result), exit_meta, } } /// Create JoinFragmentMeta for carrier-only pattern /// /// Use when the loop only updates carriers (like trim pattern). pub fn carrier_only(exit_meta: ExitMeta) -> Self { Self { expr_result: None, exit_meta, } } /// Create empty JoinFragmentMeta (no expr result, no carriers) pub fn empty() -> Self { Self { expr_result: None, exit_meta: ExitMeta::empty(), } } /// Check if this fragment has an expression result pub fn has_expr_result(&self) -> bool { self.expr_result.is_some() } /// Phase 33-14: Backward compatibility - convert to ExitMeta /// /// During migration, some code may still expect ExitMeta. /// This extracts just the carrier bindings. #[deprecated(since = "33-14", note = "Use exit_meta directly for carrier access")] pub fn to_exit_meta(&self) -> ExitMeta { self.exit_meta.clone() } } impl ExitMeta { /// Create new ExitMeta with no exit values pub fn empty() -> Self { Self { exit_values: vec![], } } /// Create ExitMeta with a single exit value pub fn single(carrier_name: String, join_value: ValueId) -> Self { Self { exit_values: vec![(carrier_name, join_value)], } } /// Create ExitMeta with multiple exit values pub fn multiple(exit_values: Vec<(String, ValueId)>) -> Self { Self { exit_values } } /// Phase 193-2: Get the count of exit bindings /// /// Useful for checking if this ExitMeta has any exit values. pub fn binding_count(&self) -> usize { self.exit_values.len() } /// Phase 193-2: Check if this has any exit values pub fn is_empty(&self) -> bool { self.exit_values.is_empty() } /// Phase 193-2: Find a binding by carrier name /// /// Lookup a specific exit value by carrier name. pub fn find_binding(&self, carrier_name: &str) -> Option { self.exit_values .iter() .find(|(name, _)| name == carrier_name) .map(|(_, value_id)| *value_id) } /// Phase 193-2: Add a binding to ExitMeta /// /// Convenient way to build ExitMeta incrementally. pub fn with_binding(mut self, carrier_name: String, join_value: ValueId) -> Self { self.exit_values.push((carrier_name, join_value)); self } } #[cfg(test)] mod tests { use super::*; // Helper: Create a CarrierVar for testing fn test_carrier(name: &str, id: u32) -> CarrierVar { CarrierVar { name: name.to_string(), host_id: ValueId(id), join_id: None, // Phase 177-STRUCT-1 } } // Helper: Create a CarrierInfo for testing fn test_carrier_info(loop_var: &str, loop_id: u32, carriers: Vec) -> CarrierInfo { CarrierInfo::with_carriers( loop_var.to_string(), ValueId(loop_id), carriers, ) } #[test] fn test_merge_from_empty() { // Merge empty CarrierInfo should not change anything let mut carrier_info = test_carrier_info( "i", 5, vec![test_carrier("sum", 10)], ); let other = test_carrier_info("j", 20, vec![]); carrier_info.merge_from(&other); assert_eq!(carrier_info.carrier_count(), 1); assert_eq!(carrier_info.carriers[0].name, "sum"); } #[test] fn test_merge_from_new_carrier() { // Merge a new carrier that doesn't exist yet let mut carrier_info = test_carrier_info( "i", 5, vec![test_carrier("sum", 10)], ); let other = test_carrier_info( "j", 20, vec![test_carrier("count", 15)], ); carrier_info.merge_from(&other); assert_eq!(carrier_info.carrier_count(), 2); // Should be sorted by name assert_eq!(carrier_info.carriers[0].name, "count"); // 'c' < 's' assert_eq!(carrier_info.carriers[1].name, "sum"); } #[test] fn test_merge_from_duplicate_carrier() { // Merge a carrier with the same name should NOT duplicate let mut carrier_info = test_carrier_info( "i", 5, vec![test_carrier("sum", 10)], ); let other = test_carrier_info( "j", 20, vec![test_carrier("sum", 999)], // Same name, different ID ); carrier_info.merge_from(&other); // Should still have only 1 carrier (no duplication) assert_eq!(carrier_info.carrier_count(), 1); assert_eq!(carrier_info.carriers[0].name, "sum"); // Original ID should be preserved assert_eq!(carrier_info.carriers[0].host_id, ValueId(10)); } #[test] fn test_merge_from_multiple_carriers() { // Merge multiple carriers let mut carrier_info = test_carrier_info( "i", 5, vec![test_carrier("sum", 10)], ); let other = test_carrier_info( "j", 20, vec![ test_carrier("count", 15), test_carrier("product", 18), ], ); carrier_info.merge_from(&other); assert_eq!(carrier_info.carrier_count(), 3); // Should be sorted by name assert_eq!(carrier_info.carriers[0].name, "count"); assert_eq!(carrier_info.carriers[1].name, "product"); assert_eq!(carrier_info.carriers[2].name, "sum"); } #[test] fn test_merge_from_preserves_determinism() { // Test that merge maintains sorted order let mut carrier_info = test_carrier_info( "i", 5, vec![ test_carrier("zebra", 30), test_carrier("alpha", 10), ], ); let other = test_carrier_info( "j", 20, vec![ test_carrier("beta", 15), test_carrier("gamma", 18), ], ); carrier_info.merge_from(&other); assert_eq!(carrier_info.carrier_count(), 4); // Should be sorted alphabetically assert_eq!(carrier_info.carriers[0].name, "alpha"); assert_eq!(carrier_info.carriers[1].name, "beta"); assert_eq!(carrier_info.carriers[2].name, "gamma"); assert_eq!(carrier_info.carriers[3].name, "zebra"); } #[test] fn test_merge_from_with_trim_helper() { // Test that trim_helper is merged use crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper; let mut carrier_info = test_carrier_info("i", 5, vec![]); let mut other = test_carrier_info("j", 20, vec![]); other.trim_helper = Some(TrimLoopHelper { original_var: "ch".to_string(), carrier_name: "is_whitespace".to_string(), whitespace_chars: vec![" ".to_string(), "\t".to_string()], }); carrier_info.merge_from(&other); // trim_helper should be copied assert!(carrier_info.trim_helper.is_some()); let helper = carrier_info.trim_helper.as_ref().unwrap(); assert_eq!(helper.original_var, "ch"); assert_eq!(helper.carrier_name, "is_whitespace"); assert_eq!(helper.whitespace_count(), 2); } #[test] fn test_trim_helper_accessor() { // Test the trim_helper() accessor method use crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper; let mut carrier_info = test_carrier_info("i", 5, vec![]); // Initially None assert!(carrier_info.trim_helper().is_none()); // Add trim_helper carrier_info.trim_helper = Some(TrimLoopHelper { original_var: "ch".to_string(), carrier_name: "is_whitespace".to_string(), whitespace_chars: vec![" ".to_string()], }); // Now Some assert!(carrier_info.trim_helper().is_some()); let helper = carrier_info.trim_helper().unwrap(); assert_eq!(helper.original_var, "ch"); } }