Files
hakorune/src/mir/join_ir/lowering/carrier_info.rs
nyash-codex a32791b0ed refactor(joinir): Phase 183-2 Consolidate CarrierInfo initialization
Makes CarrierInfo::from_variable_map() the primary initialization method.
Common pattern initializer now delegates to this centralized logic.

## Changes

1. **Primary Method: CarrierInfo::from_variable_map()**:
   - Now the single source of truth for CarrierInfo construction
   - Used by both MIR and JoinIR contexts
   - Documented as primary initialization method (Phase 183-2)

2. **CommonPatternInitializer Refactoring**:
   - Converted to thin wrapper around `CarrierInfo::from_variable_map()`
   - Delegates carrier collection to primary method
   - Only adds pattern-specific exclusion filtering
   - Reduced code duplication (~30 lines removed)

3. **Documentation Updates**:
   - `carrier_info.rs`: Added Phase 183-2 section explaining primary role
   - `common_init.rs`: Documented delegation strategy
   - Clear separation of concerns between modules

4. **Removed Duplicate Logic**:
   - Eliminated manual carrier collection in `common_init.rs`
   - Removed `CarrierVar` import (no longer directly constructed)
   - Unified sorting and validation in one place

## Benefits

- **Single source of truth**: CarrierInfo construction logic in one module
- **Consistency**: Same initialization algorithm across MIR/JoinIR
- **Maintainability**: Changes to carrier logic only needed once
- **Testability**: Primary logic tested in carrier_info module

## Testing

 All carrier_info tests pass (7 tests)
 All pattern tests pass (124 tests)
 No behavioral changes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-08 22:19:41 +09:00

636 lines
20 KiB
Rust

//! Carrier variable metadata for JoinIR loop lowering
//!
//! This module defines metadata structures for tracking carrier variables
//! in loop lowering. This enables dynamic generation of exit bindings
//! without hardcoded variable names or ValueIds.
//!
//! Phase 193-2: Enhanced builder methods for flexible construction
//!
//! # Phase 183-2: Primary CarrierInfo Construction
//!
//! This module is the single source of truth for CarrierInfo initialization.
//! Both MIR and JoinIR contexts use `CarrierInfo::from_variable_map()` as the
//! primary construction method.
//!
//! - MIR context: `common_init.rs` delegates to this module
//! - JoinIR context: Uses `from_variable_map()` directly
use crate::mir::ValueId;
use std::collections::HashMap;
/// Information about a single carrier variable
#[derive(Debug, Clone)]
pub struct CarrierVar {
/// Variable name (e.g., "sum", "printed")
pub name: String,
/// Host ValueId for this variable (MIR側)
pub host_id: ValueId,
/// Phase 177-STRUCT: JoinIR側でこのキャリアを表すValueId
///
/// ヘッダPHIのdstや、exitで使う値を記録する。
/// これにより、index ベースのマッチングを名前ベースに置き換えられる。
///
/// - `Some(vid)`: Header PHI生成後にセットされる
/// - `None`: まだPHI生成前、または該当なし
pub join_id: Option<ValueId>,
}
/// Complete carrier information for a loop
#[derive(Debug, Clone)]
pub struct CarrierInfo {
/// Loop control variable name (e.g., "i")
pub loop_var_name: String,
/// Loop control variable ValueId in host
pub loop_var_id: ValueId,
/// Additional carrier variables (e.g., sum, printed)
pub carriers: Vec<CarrierVar>,
/// Phase 171-C-5: Trim pattern helper (if this CarrierInfo was created from Trim promotion)
pub trim_helper: Option<crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper>,
}
impl CarrierInfo {
/// Phase 193-2: Create CarrierInfo from a variable_map
///
/// Automatically extracts all non-loop-control variables from the host's
/// variable_map. This eliminates manual carrier listing for simple cases.
///
/// # Arguments
///
/// * `loop_var_name` - Name of the loop control variable (e.g., "i")
/// * `variable_map` - Host function's variable_map (String → ValueId)
///
/// # Returns
///
/// CarrierInfo with loop_var and all other variables as carriers
///
/// # Example
///
/// ```ignore
/// let carrier_info = CarrierInfo::from_variable_map(
/// "i".to_string(),
/// &variable_map // {"i": ValueId(5), "sum": ValueId(10), "count": ValueId(11)}
/// )?;
/// // Result: CarrierInfo with loop_var="i", carriers=[sum, count]
/// ```
pub fn from_variable_map(
loop_var_name: String,
variable_map: &HashMap<String, ValueId>,
) -> Result<Self, String> {
// Find loop variable
let loop_var_id = variable_map
.get(&loop_var_name)
.copied()
.ok_or_else(|| {
format!(
"Loop variable '{}' not found in variable_map",
loop_var_name
)
})?;
// Collect all non-loop-var variables as carriers
let mut carriers: Vec<CarrierVar> = variable_map
.iter()
.filter(|(name, _)| *name != &loop_var_name)
.map(|(name, &id)| CarrierVar {
name: name.clone(),
host_id: id,
join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation
})
.collect();
// Sort for determinism
carriers.sort_by(|a, b| a.name.cmp(&b.name));
Ok(CarrierInfo {
loop_var_name,
loop_var_id,
carriers,
trim_helper: None, // Phase 171-C-5: No Trim pattern by default
})
}
/// Phase 193-2: Create CarrierInfo with explicit carrier list
///
/// Useful when you have specific carriers in mind and want explicit control
/// over which variables are treated as carriers.
///
/// # Arguments
///
/// * `loop_var_name` - Name of the loop control variable
/// * `loop_var_id` - ValueId of the loop variable
/// * `carrier_names` - Names of carrier variables (will look up in variable_map)
/// * `variable_map` - Host function's variable_map for lookups
///
/// # Returns
///
/// CarrierInfo with only the specified carriers
///
/// # Example
///
/// ```ignore
/// let carrier_info = CarrierInfo::with_explicit_carriers(
/// "i".to_string(),
/// ValueId(5),
/// vec!["sum".to_string(), "count".to_string()],
/// &variable_map
/// )?;
/// ```
pub fn with_explicit_carriers(
loop_var_name: String,
loop_var_id: ValueId,
carrier_names: Vec<String>,
variable_map: &HashMap<String, ValueId>,
) -> Result<Self, String> {
let mut carriers = Vec::new();
for name in carrier_names {
let host_id = variable_map.get(&name).copied().ok_or_else(|| {
format!("Carrier variable '{}' not found in variable_map", name)
})?;
carriers.push(CarrierVar {
name,
host_id,
join_id: None, // Phase 177-STRUCT-1: Set by header PHI generation
});
}
// Sort for determinism
carriers.sort_by(|a, b| a.name.cmp(&b.name));
Ok(CarrierInfo {
loop_var_name,
loop_var_id,
carriers,
trim_helper: None, // Phase 171-C-5: No Trim pattern by default
})
}
/// Phase 193-2: Create CarrierInfo with manual CarrierVar list
///
/// Most explicit construction method - you provide everything directly.
/// Useful when you already have CarrierVar structs built elsewhere.
///
/// # Arguments
///
/// * `loop_var_name` - Name of the loop control variable
/// * `loop_var_id` - ValueId of the loop variable
/// * `carriers` - Vec of already-constructed CarrierVar structs
pub fn with_carriers(
loop_var_name: String,
loop_var_id: ValueId,
mut carriers: Vec<CarrierVar>,
) -> Self {
// Sort for determinism
carriers.sort_by(|a, b| a.name.cmp(&b.name));
Self {
loop_var_name,
loop_var_id,
carriers,
trim_helper: None, // Phase 171-C-5: No Trim pattern by default
}
}
/// Phase 193-2: Get carrier count
///
/// Convenience method for checking how many carriers this info has.
pub fn carrier_count(&self) -> usize {
self.carriers.len()
}
/// Phase 193-2: Check if this has multiple carriers
///
/// Useful for pattern matching: "is this a multi-carrier loop?"
pub fn is_multi_carrier(&self) -> bool {
self.carriers.len() > 1
}
/// Phase 193-2: Find a carrier by name
///
/// Lookup a specific carrier variable by name.
pub fn find_carrier(&self, name: &str) -> Option<&CarrierVar> {
self.carriers.iter().find(|c| c.name == name)
}
/// Phase 171-C-4: Merge carriers from another CarrierInfo
///
/// Deduplicates by carrier name. If a carrier with the same name already exists,
/// it will not be added again.
///
/// # Arguments
///
/// * `other` - Another CarrierInfo to merge from
///
/// # Example
///
/// ```ignore
/// let mut carrier_info = CarrierInfo::from_variable_map("i", &variable_map)?;
/// let promoted_carrier = TrimPatternInfo::to_carrier_info();
/// carrier_info.merge_from(&promoted_carrier);
/// ```
pub fn merge_from(&mut self, other: &CarrierInfo) {
for carrier in &other.carriers {
if !self.carriers.iter().any(|c| c.name == carrier.name) {
self.carriers.push(carrier.clone());
}
}
// Maintain sorted order for determinism
self.carriers.sort_by(|a, b| a.name.cmp(&b.name));
// Phase 171-C-5: Also merge trim_helper if present
if other.trim_helper.is_some() {
self.trim_helper = other.trim_helper.clone();
}
}
/// Phase 171-C-5: Get Trim pattern helper
///
/// Returns the TrimLoopHelper if this CarrierInfo was created from Trim promotion.
///
/// # Returns
///
/// * `Some(&TrimLoopHelper)` - If this CarrierInfo contains Trim pattern information
/// * `None` - If this is a regular CarrierInfo (not from Trim promotion)
///
/// # Example
///
/// ```ignore
/// if let Some(helper) = carrier_info.trim_helper() {
/// eprintln!("Trim pattern detected: {}", helper.carrier_name);
/// eprintln!("Whitespace chars: {:?}", helper.whitespace_chars);
/// }
/// ```
pub fn trim_helper(&self) -> Option<&crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper> {
self.trim_helper.as_ref()
}
}
/// Exit metadata returned by lowerers
///
/// This structure captures the mapping from JoinIR exit values to
/// carrier variable names, enabling dynamic binding generation.
#[derive(Debug, Clone)]
pub struct ExitMeta {
/// Exit value bindings: (carrier_name, join_exit_value_id)
///
/// Example for Pattern 4:
/// ```
/// vec![("sum".to_string(), ValueId(15))]
/// ```
/// where ValueId(15) is the k_exit parameter in JoinIR-local space.
pub exit_values: Vec<(String, ValueId)>,
}
/// Phase 33-14: JoinFragmentMeta - Distinguishes expr result from carrier updates
///
/// ## Purpose
///
/// Separates two distinct use cases for JoinIR loops:
///
/// 1. **Expr Result Pattern** (joinir_min_loop.hako):
/// ```nyash
/// local result = loop(...) { ... } // Loop used as expression
/// return result
/// ```
/// Here, the k_exit return value is the "expr result" that should go to exit_phi_inputs.
///
/// 2. **Carrier Update Pattern** (trim pattern):
/// ```nyash
/// loop(...) { start = start + 1 } // Loop used for side effects
/// print(start) // Use carrier after loop
/// ```
/// Here, there's no "expr result" - only carrier variable updates.
///
/// ## SSA Correctness
///
/// Previously, exit_phi_inputs mixed expr results with carrier updates, causing:
/// - PHI inputs that referenced undefined remapped values
/// - SSA-undef errors in VM execution
///
/// With JoinFragmentMeta:
/// - `expr_result`: Only goes to exit_phi_inputs (generates PHI for expr value)
/// - `exit_meta`: Only goes to carrier_inputs (updates variable_map via carrier PHIs)
///
/// ## Example: Pattern 2 (joinir_min_loop.hako)
///
/// ```rust
/// JoinFragmentMeta {
/// expr_result: Some(i_exit), // k_exit returns i as expr value
/// exit_meta: ExitMeta::single("i".to_string(), i_exit), // Also a carrier
/// }
/// ```
///
/// ## Example: Pattern 3 (trim pattern)
///
/// ```rust
/// JoinFragmentMeta {
/// expr_result: None, // Loop doesn't return a value
/// exit_meta: ExitMeta::multiple(vec![
/// ("start".to_string(), start_exit),
/// ("end".to_string(), end_exit),
/// ]),
/// }
/// ```
#[derive(Debug, Clone)]
pub struct JoinFragmentMeta {
/// Expression result ValueId from k_exit (JoinIR-local)
///
/// - `Some(vid)`: Loop is used as expression, k_exit's return value → exit_phi_inputs
/// - `None`: Loop is used for side effects only, no PHI for expr value
pub expr_result: Option<ValueId>,
/// Carrier variable exit bindings (existing ExitMeta)
///
/// Maps carrier names to their JoinIR-local exit values.
/// These go to carrier_inputs for carrier PHI generation.
pub exit_meta: ExitMeta,
}
impl JoinFragmentMeta {
/// Create JoinFragmentMeta for expression result pattern
///
/// Use when the loop returns a value (like `return loop(...)`).
pub fn with_expr_result(expr_result: ValueId, exit_meta: ExitMeta) -> Self {
Self {
expr_result: Some(expr_result),
exit_meta,
}
}
/// Create JoinFragmentMeta for carrier-only pattern
///
/// Use when the loop only updates carriers (like trim pattern).
pub fn carrier_only(exit_meta: ExitMeta) -> Self {
Self {
expr_result: None,
exit_meta,
}
}
/// Create empty JoinFragmentMeta (no expr result, no carriers)
pub fn empty() -> Self {
Self {
expr_result: None,
exit_meta: ExitMeta::empty(),
}
}
/// Check if this fragment has an expression result
pub fn has_expr_result(&self) -> bool {
self.expr_result.is_some()
}
/// Phase 33-14: Backward compatibility - convert to ExitMeta
///
/// During migration, some code may still expect ExitMeta.
/// This extracts just the carrier bindings.
#[deprecated(since = "33-14", note = "Use exit_meta directly for carrier access")]
pub fn to_exit_meta(&self) -> ExitMeta {
self.exit_meta.clone()
}
}
impl ExitMeta {
/// Create new ExitMeta with no exit values
pub fn empty() -> Self {
Self {
exit_values: vec![],
}
}
/// Create ExitMeta with a single exit value
pub fn single(carrier_name: String, join_value: ValueId) -> Self {
Self {
exit_values: vec![(carrier_name, join_value)],
}
}
/// Create ExitMeta with multiple exit values
pub fn multiple(exit_values: Vec<(String, ValueId)>) -> Self {
Self { exit_values }
}
/// Phase 193-2: Get the count of exit bindings
///
/// Useful for checking if this ExitMeta has any exit values.
pub fn binding_count(&self) -> usize {
self.exit_values.len()
}
/// Phase 193-2: Check if this has any exit values
pub fn is_empty(&self) -> bool {
self.exit_values.is_empty()
}
/// Phase 193-2: Find a binding by carrier name
///
/// Lookup a specific exit value by carrier name.
pub fn find_binding(&self, carrier_name: &str) -> Option<ValueId> {
self.exit_values
.iter()
.find(|(name, _)| name == carrier_name)
.map(|(_, value_id)| *value_id)
}
/// Phase 193-2: Add a binding to ExitMeta
///
/// Convenient way to build ExitMeta incrementally.
pub fn with_binding(mut self, carrier_name: String, join_value: ValueId) -> Self {
self.exit_values.push((carrier_name, join_value));
self
}
}
#[cfg(test)]
mod tests {
use super::*;
// Helper: Create a CarrierVar for testing
fn test_carrier(name: &str, id: u32) -> CarrierVar {
CarrierVar {
name: name.to_string(),
host_id: ValueId(id),
join_id: None, // Phase 177-STRUCT-1
}
}
// Helper: Create a CarrierInfo for testing
fn test_carrier_info(loop_var: &str, loop_id: u32, carriers: Vec<CarrierVar>) -> CarrierInfo {
CarrierInfo::with_carriers(
loop_var.to_string(),
ValueId(loop_id),
carriers,
)
}
#[test]
fn test_merge_from_empty() {
// Merge empty CarrierInfo should not change anything
let mut carrier_info = test_carrier_info(
"i",
5,
vec![test_carrier("sum", 10)],
);
let other = test_carrier_info("j", 20, vec![]);
carrier_info.merge_from(&other);
assert_eq!(carrier_info.carrier_count(), 1);
assert_eq!(carrier_info.carriers[0].name, "sum");
}
#[test]
fn test_merge_from_new_carrier() {
// Merge a new carrier that doesn't exist yet
let mut carrier_info = test_carrier_info(
"i",
5,
vec![test_carrier("sum", 10)],
);
let other = test_carrier_info(
"j",
20,
vec![test_carrier("count", 15)],
);
carrier_info.merge_from(&other);
assert_eq!(carrier_info.carrier_count(), 2);
// Should be sorted by name
assert_eq!(carrier_info.carriers[0].name, "count"); // 'c' < 's'
assert_eq!(carrier_info.carriers[1].name, "sum");
}
#[test]
fn test_merge_from_duplicate_carrier() {
// Merge a carrier with the same name should NOT duplicate
let mut carrier_info = test_carrier_info(
"i",
5,
vec![test_carrier("sum", 10)],
);
let other = test_carrier_info(
"j",
20,
vec![test_carrier("sum", 999)], // Same name, different ID
);
carrier_info.merge_from(&other);
// Should still have only 1 carrier (no duplication)
assert_eq!(carrier_info.carrier_count(), 1);
assert_eq!(carrier_info.carriers[0].name, "sum");
// Original ID should be preserved
assert_eq!(carrier_info.carriers[0].host_id, ValueId(10));
}
#[test]
fn test_merge_from_multiple_carriers() {
// Merge multiple carriers
let mut carrier_info = test_carrier_info(
"i",
5,
vec![test_carrier("sum", 10)],
);
let other = test_carrier_info(
"j",
20,
vec![
test_carrier("count", 15),
test_carrier("product", 18),
],
);
carrier_info.merge_from(&other);
assert_eq!(carrier_info.carrier_count(), 3);
// Should be sorted by name
assert_eq!(carrier_info.carriers[0].name, "count");
assert_eq!(carrier_info.carriers[1].name, "product");
assert_eq!(carrier_info.carriers[2].name, "sum");
}
#[test]
fn test_merge_from_preserves_determinism() {
// Test that merge maintains sorted order
let mut carrier_info = test_carrier_info(
"i",
5,
vec![
test_carrier("zebra", 30),
test_carrier("alpha", 10),
],
);
let other = test_carrier_info(
"j",
20,
vec![
test_carrier("beta", 15),
test_carrier("gamma", 18),
],
);
carrier_info.merge_from(&other);
assert_eq!(carrier_info.carrier_count(), 4);
// Should be sorted alphabetically
assert_eq!(carrier_info.carriers[0].name, "alpha");
assert_eq!(carrier_info.carriers[1].name, "beta");
assert_eq!(carrier_info.carriers[2].name, "gamma");
assert_eq!(carrier_info.carriers[3].name, "zebra");
}
#[test]
fn test_merge_from_with_trim_helper() {
// Test that trim_helper is merged
use crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper;
let mut carrier_info = test_carrier_info("i", 5, vec![]);
let mut other = test_carrier_info("j", 20, vec![]);
other.trim_helper = Some(TrimLoopHelper {
original_var: "ch".to_string(),
carrier_name: "is_whitespace".to_string(),
whitespace_chars: vec![" ".to_string(), "\t".to_string()],
});
carrier_info.merge_from(&other);
// trim_helper should be copied
assert!(carrier_info.trim_helper.is_some());
let helper = carrier_info.trim_helper.as_ref().unwrap();
assert_eq!(helper.original_var, "ch");
assert_eq!(helper.carrier_name, "is_whitespace");
assert_eq!(helper.whitespace_count(), 2);
}
#[test]
fn test_trim_helper_accessor() {
// Test the trim_helper() accessor method
use crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper;
let mut carrier_info = test_carrier_info("i", 5, vec![]);
// Initially None
assert!(carrier_info.trim_helper().is_none());
// Add trim_helper
carrier_info.trim_helper = Some(TrimLoopHelper {
original_var: "ch".to_string(),
carrier_name: "is_whitespace".to_string(),
whitespace_chars: vec![" ".to_string()],
});
// Now Some
assert!(carrier_info.trim_helper().is_some());
let helper = carrier_info.trim_helper().unwrap();
assert_eq!(helper.original_var, "ch");
}
}