diff --git a/docs/development/current/main/phase193_exit_binding_builder.md b/docs/development/current/main/phase193_exit_binding_builder.md new file mode 100644 index 00000000..dc76ef08 --- /dev/null +++ b/docs/development/current/main/phase193_exit_binding_builder.md @@ -0,0 +1,364 @@ +# Phase 193-4: ExitBindingBuilder Design & Implementation + +**Phase**: 193-4 +**Status**: Design Phase +**Date**: 2025-12-06 +**Goal**: Fully boxify loop exit binding generation for Pattern 3 & 4, eliminating hardcoded variable names and ValueId assumptions + +--- + +## Overview + +The ExitBindingBuilder box connects JoinIR exit values (from loop lowering) back to the host function's variable_map. This eliminates: +- Hardcoded variable names like `"sum"`, `"printed"` +- Assumptions about single-carrier patterns +- Complex ValueId plumbing scattered across Pattern 3/4 lowerers + +### Architecture Diagram + +``` +Pattern Lowerer + ↓ + ├─ CarrierInfo (loop_var, carriers[], host_ids) + ├─ ExitMeta (join_exit_values[]) + └─ variable_map + ↓ +ExitBindingBuilder + ↓ + ├─ LoopExitBinding[] (carrier → host mapping) + └─ JoinInlineBoundary update (host_outputs, join_outputs) + ↓ +Host function variable_map (updated with new ValueIds) +``` + +--- + +## Data Structures + +### Input: CarrierInfo + +```rust +pub struct CarrierInfo { + pub loop_var_name: String, // e.g., "i" + pub loop_var_id: ValueId, // Host-side ValueId for loop var + pub carriers: Vec, // [{ name: "sum", host_id: ValueId(10) }, ...] +} + +pub struct CarrierVar { + pub name: String, // Variable name (e.g., "sum") + pub host_id: ValueId, // Host-side ValueId (initial value) +} +``` + +### Input: ExitMeta + +```rust +pub struct ExitMeta { + pub exit_values: Vec<(String, ValueId)>, + // Example: [("sum", ValueId(15)), ("printed", ValueId(16))] + // where ValueId(15/16) are in JoinIR-local space (parameters/results) +} +``` + +### Output: LoopExitBinding (New) + +```rust +pub struct LoopExitBinding { + /// Carrier variable name (e.g., "sum", "printed") + pub carrier_name: String, + + /// Host-side ValueId for this carrier + pub host_id: ValueId, + + /// Join-side exit ValueId (from ExitMeta) + pub join_exit_id: ValueId, +} +``` + +### JoinInlineBoundary Updates + +```rust +pub struct JoinInlineBoundary { + // ... existing fields ... + + /// Host-side output ValueIds (one per carrier + loop_var) + pub host_outputs: Vec, + + /// Join-side output ValueIds (one per carrier + loop_var, in JoinIR space) + pub join_outputs: Vec, +} +``` + +--- + +## API Design + +### ExitBindingBuilder + +```rust +pub struct ExitBindingBuilder<'a> { + carrier_info: &'a CarrierInfo, + exit_meta: &'a ExitMeta, + variable_map: &'a mut HashMap, +} + +impl<'a> ExitBindingBuilder<'a> { + /// Create a new builder from metadata + pub fn new( + carrier_info: &'a CarrierInfo, + exit_meta: &'a ExitMeta, + variable_map: &'a mut HashMap, + ) -> Result; + + /// Generate loop exit bindings + /// + /// Returns one LoopExitBinding per carrier, in sorted order. + /// Updates variable_map with new post-loop ValueIds. + pub fn build_loop_exit_bindings(&mut self) -> Result, String>; + + /// Apply bindings to JoinInlineBoundary + /// + /// Sets host_outputs and join_outputs based on loop_var + carriers. + /// Must be called after build_loop_exit_bindings(). + pub fn apply_to_boundary(&self, boundary: &mut JoinInlineBoundary) -> Result<(), String>; + + /// Get the updated loop_var exit binding (always first) + pub fn loop_var_exit_binding(&self) -> LoopExitBinding; +} +``` + +--- + +## Validation Rules + +### Single Carrier Case + +**Input Example**: +``` +CarrierInfo { + loop_var_name: "i", + loop_var_id: ValueId(5), + carriers: [{ name: "sum", host_id: ValueId(10) }] +} + +ExitMeta { + exit_values: [("sum", ValueId(15))] +} + +variable_map: {"i": ValueId(5), "sum": ValueId(10)} +``` + +**Output**: +``` +LoopExitBinding { + carrier_name: "sum", + host_id: ValueId(10), + join_exit_id: ValueId(15) +} + +variable_map (updated): {"i": ValueId(5), "sum": ValueId(???)} // NEW ValueId for post-loop sum +``` + +### Multiple Carrier Case + +**Input Example**: +``` +CarrierInfo { + loop_var_name: "i", + loop_var_id: ValueId(5), + carriers: [ + { name: "printed", host_id: ValueId(11) }, + { name: "sum", host_id: ValueId(10) } + ] +} + +ExitMeta { + exit_values: [ + ("printed", ValueId(14)), + ("sum", ValueId(15)) + ] +} + +variable_map: {"i": ValueId(5), "sum": ValueId(10), "printed": ValueId(11)} +``` + +**Output**: +``` +LoopExitBinding[ + { carrier_name: "printed", host_id: ValueId(11), join_exit_id: ValueId(14) }, + { carrier_name: "sum", host_id: ValueId(10), join_exit_id: ValueId(15) } +] + +variable_map (updated): +{ + "i": ValueId(5), + "sum": ValueId(???), // NEW post-loop ValueId + "printed": ValueId(???) // NEW post-loop ValueId +} +``` + +### Error Cases + +1. **Carrier name mismatch**: ExitMeta contains carrier name not in CarrierInfo + - Error: `"Exit carrier 'foo' not found in CarrierInfo"` + +2. **Missing carrier in ExitMeta**: CarrierInfo has carrier not in ExitMeta + - Error: `"Carrier 'sum' missing in ExitMeta"` + +3. **Loop variable in ExitMeta**: ExitMeta erroneously maps loop_var + - Error: `"Loop variable 'i' should not be in exit_values"` + +--- + +## Implementation Strategy + +### File Structure + +**New file**: `src/mir/builder/control_flow/joinir/exit_binding.rs` + +```rust +use crate::mir::ValueId; +use crate::mir::join_ir::JoinInlineBoundary; +use crate::mir::join_ir::lowering::carrier_info::{CarrierInfo, ExitMeta}; +use std::collections::HashMap; + +pub struct LoopExitBinding { ... } + +pub struct ExitBindingBuilder<'a> { ... } + +impl<'a> ExitBindingBuilder<'a> { + pub fn new(...) -> Result { ... } + pub fn build_loop_exit_bindings(&mut self) -> Result, String> { ... } + pub fn apply_to_boundary(&self, boundary: &mut JoinInlineBoundary) -> Result<(), String> { ... } +} +``` + +### Module Declaration + +Update `src/mir/builder/control_flow/joinir/mod.rs`: + +```rust +pub mod exit_binding; +``` + +### Integration Points + +**Pattern 3 & 4 Lowerers**: + +```rust +// OLD: Direct boundary manipulation +boundary.host_outputs.push(sum_value_id); +boundary.join_outputs.push(join_sum_exit); +variable_map.insert("sum".to_string(), new_sum_id); + +// NEW: Via ExitBindingBuilder +let mut builder = ExitBindingBuilder::new(&carrier_info, &exit_meta, variable_map)?; +let _bindings = builder.build_loop_exit_bindings()?; +builder.apply_to_boundary(&mut boundary)?; +``` + +--- + +## Testing Strategy + +### Unit Tests (exit_binding.rs) + +```rust +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_single_carrier_binding() { ... } + + #[test] + fn test_multi_carrier_binding() { ... } + + #[test] + fn test_carrier_name_mismatch_error() { ... } + + #[test] + fn test_variable_map_update() { ... } +} +``` + +### Integration Tests + +**File**: `apps/tests/loop_continue_multi_carrier.hako` + +```hako +static box Main { + main() { + local sum = 0 + local printed = 0 + + loop(i = 0; i < 5; i = i + 1) { + if (i > 2) { + printed = printed + 1 + continue + } + sum = sum + i + } + + // Expected: sum = 0+1+2 = 3, printed = 2 (i=3,4) + print(sum) + print(printed) + } +} +``` + +**Test command**: +```bash +NYASH_JOINIR_CORE=1 ./target/release/hakorune apps/tests/loop_continue_multi_carrier.hako +# Expected output: +# 3 +# 2 +``` + +--- + +## Tracking Variable Updates + +### variable_map lifecycle + +1. **Before loop lowering**: `{"i": ValueId(5), "sum": ValueId(10), "printed": ValueId(11)}` + +2. **After JoinModule creation**: (unchanged) + +3. **ExitBindingBuilder::build_loop_exit_bindings()**: + - Allocates new ValueIds for post-loop carrier values + - Updates variable_map: `{"i": ValueId(5), "sum": ValueId(??), "printed": ValueId(??)}` + +4. **After loop lowering**: variable_map reflects post-loop state + +### Debugging support + +Optional environment variable: `NYASH_TRACE_EXIT_BINDING=1` + +Output example: +``` +[exit_binding] Carrier "sum": host_id=ValueId(10) → join_exit=ValueId(15) → post_loop=ValueId(23) +[exit_binding] Carrier "printed": host_id=ValueId(11) → join_exit=ValueId(14) → post_loop=ValueId(24) +[exit_binding] JoinInlineBoundary: host_outputs=[ValueId(5), ValueId(23), ValueId(24)] +``` + +--- + +## Related Phases + +- **Phase 188**: JoinInlineBoundary initial design +- **Phase 190**: CarrierInfo (Phase 193-2 enhancement) +- **Phase 193-3**: Pattern classification helpers +- **Phase 193-4**: ExitBindingBuilder (THIS PHASE) +- **Phase 193-5**: Multi-carrier testing and validation + +--- + +## Success Criteria + +- [ ] ExitBindingBuilder compiles and passes unit tests +- [ ] Pattern 3 & 4 lowerers refactored to use ExitBindingBuilder +- [ ] No hardcoded variable names or ValueId assumptions remain in lowering +- [ ] loop_continue_multi_carrier.hako test passes with correct output +- [ ] Variable map correctly reflects post-loop carrier state +- [ ] Debugging environment variable works as expected diff --git a/src/mir/builder/control_flow/joinir/patterns/exit_binding.rs b/src/mir/builder/control_flow/joinir/patterns/exit_binding.rs new file mode 100644 index 00000000..70bd0be0 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/exit_binding.rs @@ -0,0 +1,400 @@ +//! Phase 193-4: Exit Binding Builder +//! +//! Connects JoinIR exit values back to host function's variable_map, +//! eliminating hardcoded variable names and ValueId assumptions. +//! +//! This box fully abstractifies loop exit binding generation for Pattern 3 & 4. + +use crate::mir::ValueId; +use crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary; +use crate::mir::join_ir::lowering::carrier_info::{CarrierInfo, ExitMeta}; +use std::collections::HashMap; + +/// Mapping from JoinIR exit value to host function variable +#[derive(Debug, Clone)] +pub struct LoopExitBinding { + /// Carrier variable name (e.g., "sum", "printed") + pub carrier_name: String, + + /// Host-side ValueId for this carrier + pub host_id: ValueId, + + /// Join-side exit ValueId (from ExitMeta, in JoinIR space) + pub join_exit_id: ValueId, +} + +/// Builder for generating loop exit bindings +/// +/// Phase 193-4: Fully boxifies exit binding generation. +/// Eliminates hardcoded variable names and ValueId plumbing scattered across lowerers. +pub struct ExitBindingBuilder<'a> { + carrier_info: &'a CarrierInfo, + exit_meta: &'a ExitMeta, + variable_map: &'a mut HashMap, +} + +impl<'a> ExitBindingBuilder<'a> { + /// Create a new ExitBindingBuilder + /// + /// # Arguments + /// + /// * `carrier_info` - Metadata about loop variables and carriers + /// * `exit_meta` - Exit values from JoinIR lowering + /// * `variable_map` - Host function's variable map (will be updated) + /// + /// # Returns + /// + /// ExitBindingBuilder instance, or error if metadata is inconsistent + pub fn new( + carrier_info: &'a CarrierInfo, + exit_meta: &'a ExitMeta, + variable_map: &'a mut HashMap, + ) -> Result { + // Validate that all carriers in ExitMeta exist in CarrierInfo + for (carrier_name, _) in &exit_meta.exit_values { + if carrier_name == &carrier_info.loop_var_name { + return Err(format!( + "Loop variable '{}' should not be in exit_values", + carrier_name + )); + } + + if !carrier_info.find_carrier(carrier_name).is_some() { + return Err(format!( + "Exit carrier '{}' not found in CarrierInfo", + carrier_name + )); + } + } + + // Validate that all carriers in CarrierInfo have exit values + for carrier in &carrier_info.carriers { + if exit_meta.find_binding(&carrier.name).is_none() { + return Err(format!( + "Carrier '{}' missing in ExitMeta", + carrier.name + )); + } + } + + Ok(Self { + carrier_info, + exit_meta, + variable_map, + }) + } + + /// Generate loop exit bindings + /// + /// Returns one LoopExitBinding per carrier, in sorted order. + /// Updates variable_map with new post-loop ValueIds for each carrier. + /// + /// # Returns + /// + /// Vec of LoopExitBinding, one per carrier, sorted by carrier name + pub fn build_loop_exit_bindings(&mut self) -> Result, String> { + let mut bindings = Vec::new(); + + // Process each carrier in sorted order + for carrier in &self.carrier_info.carriers { + let join_exit_id = self.exit_meta.find_binding(&carrier.name) + .ok_or_else(|| format!("Carrier '{}' missing in ExitMeta", carrier.name))?; + + bindings.push(LoopExitBinding { + carrier_name: carrier.name.clone(), + host_id: carrier.host_id, + join_exit_id, + }); + + // Allocate new ValueId for post-loop carrier value + // This represents the carrier variable's value after the loop completes + let post_loop_id = self.allocate_new_value_id(); + self.variable_map.insert(carrier.name.clone(), post_loop_id); + } + + Ok(bindings) + } + + /// Apply bindings to JoinInlineBoundary + /// + /// Sets host_outputs and join_outputs based on loop_var + carriers. + /// Must be called after build_loop_exit_bindings(). + /// + /// # Arguments + /// + /// * `boundary` - JoinInlineBoundary to update + /// + /// # Returns + /// + /// Success or error if boundary cannot be updated + pub fn apply_to_boundary(&self, boundary: &mut JoinInlineBoundary) -> Result<(), String> { + // Always include loop_var exit first + let mut host_outputs = vec![self.carrier_info.loop_var_id]; + let mut join_outputs = vec![self.carrier_info.loop_var_id]; // Loop var exit id in JoinIR + + // Add carrier exits in sorted order + for carrier in &self.carrier_info.carriers { + let post_loop_id = self.variable_map.get(&carrier.name) + .copied() + .ok_or_else(|| { + format!("Post-loop ValueId not found for carrier '{}'", carrier.name) + })?; + + let join_exit_id = self.exit_meta.find_binding(&carrier.name) + .ok_or_else(|| format!("Exit value not found for carrier '{}'", carrier.name))?; + + host_outputs.push(post_loop_id); + join_outputs.push(join_exit_id); + } + + boundary.host_outputs = host_outputs; + boundary.join_outputs = join_outputs; + + Ok(()) + } + + /// Get the loop variable exit binding + /// + /// The loop variable is always the first exit (index 0). + pub fn loop_var_exit_binding(&self) -> LoopExitBinding { + LoopExitBinding { + carrier_name: self.carrier_info.loop_var_name.clone(), + host_id: self.carrier_info.loop_var_id, + join_exit_id: self.carrier_info.loop_var_id, // Loop var maps to itself + } + } + + /// Allocate a new ValueId for a post-loop carrier + /// + /// TODO: This should be delegated to a proper ValueId allocator + /// For now, we use a placeholder strategy + fn allocate_new_value_id(&self) -> ValueId { + // Find the maximum ValueId in current variable_map + let max_id = self.variable_map.values() + .map(|v| v.0) + .max() + .unwrap_or(0); + + // Allocate next sequential ID + // Note: This is a temporary strategy and should be replaced with + // proper ValueId allocation from the builder + ValueId(max_id + 1) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mir::join_ir::lowering::carrier_info::CarrierVar; + + #[test] + fn test_single_carrier_binding() { + let carrier_info = CarrierInfo::with_carriers( + "i".to_string(), + ValueId(5), + vec![CarrierVar { + name: "sum".to_string(), + host_id: ValueId(10), + }], + ); + + let exit_meta = ExitMeta::single("sum".to_string(), ValueId(15)); + + let mut variable_map = [ + ("i".to_string(), ValueId(5)), + ("sum".to_string(), ValueId(10)), + ] + .iter() + .cloned() + .collect(); + + let mut builder = ExitBindingBuilder::new(&carrier_info, &exit_meta, &mut variable_map) + .expect("Failed to create builder"); + + let bindings = builder.build_loop_exit_bindings().expect("Failed to build bindings"); + + assert_eq!(bindings.len(), 1); + assert_eq!(bindings[0].carrier_name, "sum"); + assert_eq!(bindings[0].host_id, ValueId(10)); + assert_eq!(bindings[0].join_exit_id, ValueId(15)); + + // Check that variable_map was updated with new post-loop ValueId + assert!(variable_map.contains_key("sum")); + let post_loop_id = variable_map["sum"]; + assert!(post_loop_id.0 > 10); // Should be allocated after max of existing IDs + } + + #[test] + fn test_multi_carrier_binding() { + let carrier_info = CarrierInfo::with_carriers( + "i".to_string(), + ValueId(5), + vec![ + CarrierVar { + name: "printed".to_string(), + host_id: ValueId(11), + }, + CarrierVar { + name: "sum".to_string(), + host_id: ValueId(10), + }, + ], + ); + + let exit_meta = ExitMeta::multiple(vec![ + ("printed".to_string(), ValueId(14)), + ("sum".to_string(), ValueId(15)), + ]); + + let mut variable_map = [ + ("i".to_string(), ValueId(5)), + ("sum".to_string(), ValueId(10)), + ("printed".to_string(), ValueId(11)), + ] + .iter() + .cloned() + .collect(); + + let mut builder = ExitBindingBuilder::new(&carrier_info, &exit_meta, &mut variable_map) + .expect("Failed to create builder"); + + let bindings = builder.build_loop_exit_bindings().expect("Failed to build bindings"); + + assert_eq!(bindings.len(), 2); + // Bindings should be sorted by carrier name + assert_eq!(bindings[0].carrier_name, "printed"); + assert_eq!(bindings[1].carrier_name, "sum"); + + // Check post-loop ValueIds are allocated + assert!(variable_map.contains_key("printed")); + assert!(variable_map.contains_key("sum")); + } + + #[test] + fn test_carrier_name_mismatch_error() { + let carrier_info = CarrierInfo::with_carriers( + "i".to_string(), + ValueId(5), + vec![CarrierVar { + name: "sum".to_string(), + host_id: ValueId(10), + }], + ); + + // ExitMeta with non-existent carrier + let exit_meta = ExitMeta::single("foo".to_string(), ValueId(15)); + + let mut variable_map = [ + ("i".to_string(), ValueId(5)), + ("sum".to_string(), ValueId(10)), + ] + .iter() + .cloned() + .collect(); + + let result = ExitBindingBuilder::new(&carrier_info, &exit_meta, &mut variable_map); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("not found in CarrierInfo")); + } + + #[test] + fn test_missing_carrier_in_exit_meta() { + let carrier_info = CarrierInfo::with_carriers( + "i".to_string(), + ValueId(5), + vec![CarrierVar { + name: "sum".to_string(), + host_id: ValueId(10), + }], + ); + + // ExitMeta is empty + let exit_meta = ExitMeta::empty(); + + let mut variable_map = [ + ("i".to_string(), ValueId(5)), + ("sum".to_string(), ValueId(10)), + ] + .iter() + .cloned() + .collect(); + + let result = ExitBindingBuilder::new(&carrier_info, &exit_meta, &mut variable_map); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("missing in ExitMeta")); + } + + #[test] + fn test_loop_var_in_exit_meta_error() { + let carrier_info = CarrierInfo::with_carriers( + "i".to_string(), + ValueId(5), + vec![CarrierVar { + name: "sum".to_string(), + host_id: ValueId(10), + }], + ); + + // ExitMeta incorrectly includes loop var + let exit_meta = ExitMeta::multiple(vec![ + ("i".to_string(), ValueId(5)), + ("sum".to_string(), ValueId(15)), + ]); + + let mut variable_map = [ + ("i".to_string(), ValueId(5)), + ("sum".to_string(), ValueId(10)), + ] + .iter() + .cloned() + .collect(); + + let result = ExitBindingBuilder::new(&carrier_info, &exit_meta, &mut variable_map); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("should not be in exit_values")); + } + + #[test] + fn test_apply_to_boundary() { + let carrier_info = CarrierInfo::with_carriers( + "i".to_string(), + ValueId(5), + vec![CarrierVar { + name: "sum".to_string(), + host_id: ValueId(10), + }], + ); + + let exit_meta = ExitMeta::single("sum".to_string(), ValueId(15)); + + let mut variable_map = [ + ("i".to_string(), ValueId(5)), + ("sum".to_string(), ValueId(10)), + ] + .iter() + .cloned() + .collect(); + + let mut builder = ExitBindingBuilder::new(&carrier_info, &exit_meta, &mut variable_map) + .expect("Failed to create builder"); + + let _ = builder.build_loop_exit_bindings().expect("Failed to build bindings"); + + let mut boundary = JoinInlineBoundary { + host_inputs: vec![], + join_inputs: vec![], + host_outputs: vec![], + join_outputs: vec![], + }; + + builder.apply_to_boundary(&mut boundary) + .expect("Failed to apply to boundary"); + + // Should have loop_var + sum carrier + assert_eq!(boundary.host_outputs.len(), 2); + assert_eq!(boundary.join_outputs.len(), 2); + + assert_eq!(boundary.host_outputs[0], ValueId(5)); // loop_var + assert_eq!(boundary.join_outputs[0], ValueId(5)); // loop_var in JoinIR + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/mod.rs b/src/mir/builder/control_flow/joinir/patterns/mod.rs index f1537b0c..fa8c7595 100644 --- a/src/mir/builder/control_flow/joinir/patterns/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/mod.rs @@ -14,8 +14,14 @@ //! Phase 193: AST Feature Extraction Modularization //! - ast_feature_extractor.rs: Pure function module for analyzing loop AST //! - High reusability for Pattern 5-6 and pattern analysis tools +//! +//! Phase 193-4: Exit Binding Builder +//! - exit_binding.rs: Fully boxified exit binding generation +//! - Eliminates hardcoded variable names and ValueId assumptions +//! - Supports both single and multi-carrier loop patterns pub(in crate::mir::builder) mod ast_feature_extractor; +pub(in crate::mir::builder) mod exit_binding; pub(in crate::mir::builder) mod pattern1_minimal; pub(in crate::mir::builder) mod pattern2_with_break; pub(in crate::mir::builder) mod pattern3_with_if_phi;