//! Loop Canonicalizer - AST Level Loop Preprocessing //! //! Phase 1: Type Definitions Only //! //! ## Purpose //! //! Decomposes AST-level loops into a normalized "skeleton" representation //! to prevent combinatorial explosion in pattern detection and lowering. //! //! ## Design Principle //! //! - **Input**: AST (LoopExpr) //! - **Output**: LoopSkeleton only (no JoinIR generation) //! - **Boundary**: No JoinIR-specific information (BlockId, ValueId, etc.) //! //! ## Architecture //! //! ``` //! AST → LoopSkeleton → Capability Guard → RoutingDecision → Pattern Lowerer //! ``` //! //! ## References //! //! - Design SSOT: `docs/development/current/main/design/loop-canonicalizer.md` //! - JoinIR Architecture: `docs/development/current/main/joinir-architecture-overview.md` //! - Pattern Space: `docs/development/current/main/loop_pattern_space.md` use crate::ast::{ASTNode, Span}; use crate::mir::loop_pattern_detection::LoopPatternKind; // ============================================================================ // Core Skeleton Types // ============================================================================ /// Loop skeleton - The canonical representation of a loop structure /// /// This is the single output type of the Canonicalizer. /// It represents the essential structure of a loop without any /// JoinIR-specific information. #[derive(Debug, Clone)] pub struct LoopSkeleton { /// Sequence of steps (HeaderCond, BodyInit, BreakCheck, Updates, Tail) pub steps: Vec, /// Carriers (loop variables with update rules and boundary crossing contracts) pub carriers: Vec, /// Exit contract (presence and payload of break/continue/return) pub exits: ExitContract, /// Captured variables from outer scope (optional) pub captured: Option>, /// Source location for debugging pub span: Span, } /// Skeleton step - Minimal step kinds for loop structure /// /// Each step represents a fundamental operation in the loop lifecycle. #[derive(Debug, Clone)] pub enum SkeletonStep { /// Loop continuation condition (the `cond` in `loop(cond)`) HeaderCond { expr: Box, }, /// Early exit check (`if cond { break }`) BreakCheck { cond: Box, has_value: bool, }, /// Skip check (`if cond { continue }`) ContinueCheck { cond: Box, }, /// Carrier update (`i = i + 1`, etc.) Update { carrier_name: String, update_kind: UpdateKind, }, /// Loop body (all other statements) Body { stmts: Vec, }, } /// Update kind - How a carrier variable is updated /// /// This categorization helps determine which pattern can handle the loop. #[derive(Debug, Clone)] pub enum UpdateKind { /// Constant step (`i = i + const`) ConstStep { delta: i64, }, /// Conditional update (`if cond { x = a } else { x = b }`) Conditional { then_value: Box, else_value: Box, }, /// Arbitrary update (everything else) Arbitrary, } /// Exit contract - What kinds of exits the loop has /// /// This determines the exit line architecture needed. #[derive(Debug, Clone)] pub struct ExitContract { pub has_break: bool, pub has_continue: bool, pub has_return: bool, pub break_has_value: bool, } /// Carrier slot - A loop variable with its role and update rule /// /// Carriers are variables that are updated in each iteration /// and need to cross loop boundaries (via PHI nodes in MIR). #[derive(Debug, Clone)] pub struct CarrierSlot { pub name: String, pub role: CarrierRole, pub update_kind: UpdateKind, } /// Carrier role - The semantic role of a carrier variable /// /// This helps determine the appropriate pattern and PHI structure. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CarrierRole { /// Loop counter (the `i` in `i < n`) Counter, /// Accumulator (the `sum` in `sum += x`) Accumulator, /// Condition variable (the `is_valid` in `while(is_valid)`) ConditionVar, /// Derived value (e.g., `digit_pos` computed from other carriers) Derived, } /// Captured slot - An outer variable used within the loop /// /// These are read-only references to variables defined outside the loop. /// (Write access would make them carriers instead.) #[derive(Debug, Clone)] pub struct CapturedSlot { pub name: String, pub is_mutable: bool, } // ============================================================================ // Capability Guard - Fail-Fast Reasons // ============================================================================ /// Routing decision - The result of pattern selection /// /// This contains both the chosen pattern (if any) and detailed /// diagnostic information about why other patterns were rejected. #[derive(Debug, Clone)] pub struct RoutingDecision { /// Selected pattern (None = Fail-Fast) pub chosen: Option, /// Missing capabilities that prevented other patterns pub missing_caps: Vec<&'static str>, /// Selection reasoning (for debugging) pub notes: Vec, /// Error tags for contract_checks integration pub error_tags: Vec, } /// Capability tags - Standardized vocabulary for Fail-Fast reasons /// /// These constants define the capabilities required by different patterns. /// When a loop lacks a required capability, it uses the corresponding tag /// to explain why it cannot be lowered by that pattern. pub mod capability_tags { /// Requires: Carrier update is constant step (`i = i + const`) pub const CAP_MISSING_CONST_STEP: &str = "CAP_MISSING_CONST_STEP"; /// Requires: Single break point only pub const CAP_MISSING_SINGLE_BREAK: &str = "CAP_MISSING_SINGLE_BREAK"; /// Requires: Single continue point only pub const CAP_MISSING_SINGLE_CONTINUE: &str = "CAP_MISSING_SINGLE_CONTINUE"; /// Requires: Loop header condition has no side effects pub const CAP_MISSING_PURE_HEADER: &str = "CAP_MISSING_PURE_HEADER"; /// Requires: Condition variable defined in outer local scope pub const CAP_MISSING_OUTER_LOCAL_COND: &str = "CAP_MISSING_OUTER_LOCAL_COND"; /// Requires: All exit bindings are complete (no missing values) pub const CAP_MISSING_EXIT_BINDINGS: &str = "CAP_MISSING_EXIT_BINDINGS"; /// Requires: LoopBodyLocal can be promoted to carrier pub const CAP_MISSING_CARRIER_PROMOTION: &str = "CAP_MISSING_CARRIER_PROMOTION"; /// Requires: Break value types are consistent across all break points pub const CAP_MISSING_BREAK_VALUE_TYPE: &str = "CAP_MISSING_BREAK_VALUE_TYPE"; } // ============================================================================ // Implementation Helpers // ============================================================================ impl LoopSkeleton { /// Create a new empty skeleton pub fn new(span: Span) -> Self { Self { steps: Vec::new(), carriers: Vec::new(), exits: ExitContract::default(), captured: None, span, } } /// Count the number of break checks in this skeleton pub fn count_break_checks(&self) -> usize { self.steps .iter() .filter(|s| matches!(s, SkeletonStep::BreakCheck { .. })) .count() } /// Count the number of continue checks in this skeleton pub fn count_continue_checks(&self) -> usize { self.steps .iter() .filter(|s| matches!(s, SkeletonStep::ContinueCheck { .. })) .count() } /// Get all carrier names pub fn carrier_names(&self) -> Vec<&str> { self.carriers.iter().map(|c| c.name.as_str()).collect() } } impl ExitContract { /// Create a contract with no exits pub fn none() -> Self { Self { has_break: false, has_continue: false, has_return: false, break_has_value: false, } } /// Check if any exit exists pub fn has_any_exit(&self) -> bool { self.has_break || self.has_continue || self.has_return } } impl Default for ExitContract { fn default() -> Self { Self::none() } } impl RoutingDecision { /// Create a successful routing decision pub fn success(pattern: LoopPatternKind) -> Self { Self { chosen: Some(pattern), missing_caps: Vec::new(), notes: Vec::new(), error_tags: Vec::new(), } } /// Create a failed routing decision (Fail-Fast) pub fn fail_fast(missing_caps: Vec<&'static str>, reason: String) -> Self { Self { chosen: None, missing_caps, notes: vec![reason.clone()], error_tags: vec![format!("[loop_canonicalizer/fail_fast] {}", reason)], } } /// Add a diagnostic note pub fn add_note(&mut self, note: String) { self.notes.push(note); } /// Check if routing succeeded pub fn is_success(&self) -> bool { self.chosen.is_some() } /// Check if routing failed pub fn is_fail_fast(&self) -> bool { self.chosen.is_none() } } // ============================================================================ // Display Implementations // ============================================================================ impl std::fmt::Display for CarrierRole { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { CarrierRole::Counter => write!(f, "Counter"), CarrierRole::Accumulator => write!(f, "Accumulator"), CarrierRole::ConditionVar => write!(f, "ConditionVar"), CarrierRole::Derived => write!(f, "Derived"), } } } // ============================================================================ // Phase 3: Pattern Recognition Helpers // ============================================================================ /// Try to extract skip_whitespace pattern from loop /// /// Pattern structure: /// ``` /// loop(cond) { /// // ... optional body statements (Body) /// if check_cond { /// carrier = carrier + const /// } else { /// break /// } /// } /// ``` /// /// Returns (carrier_name, delta, body_stmts) if pattern matches. fn try_extract_skip_whitespace_pattern( body: &[ASTNode], ) -> Option<(String, i64, Vec)> { if body.is_empty() { return None; } // Last statement must be if-else with break let last_stmt = &body[body.len() - 1]; let (then_body, else_body) = match last_stmt { ASTNode::If { then_body, else_body: Some(else_body), .. } => (then_body, else_body), _ => return None, }; // Then branch must be single assignment: carrier = carrier + const if then_body.len() != 1 { return None; } let (carrier_name, delta) = match &then_body[0] { ASTNode::Assignment { target, value, .. } => { // Extract target variable name let target_name = match target.as_ref() { ASTNode::Variable { name, .. } => name.clone(), _ => return None, }; // Value must be: target + const match value.as_ref() { ASTNode::BinaryOp { operator: crate::ast::BinaryOperator::Add, left, right, .. } => { // Left must be same variable let left_name = match left.as_ref() { ASTNode::Variable { name, .. } => name, _ => return None, }; if left_name != &target_name { return None; } // Right must be integer literal let delta = match right.as_ref() { ASTNode::Literal { value: crate::ast::LiteralValue::Integer(n), .. } => *n, _ => return None, }; (target_name, delta) } _ => return None, } } _ => return None, }; // Else branch must be single break if else_body.len() != 1 { return None; } match &else_body[0] { ASTNode::Break { .. } => { // Success! Extract body statements (all except last if) let body_stmts = body[..body.len() - 1].to_vec(); Some((carrier_name, delta, body_stmts)) } _ => None, } } // ============================================================================ // Phase 3: Canonicalization Entry Point // ============================================================================ /// Canonicalize a loop AST into LoopSkeleton (Phase 3: skip_whitespace pattern recognition) /// /// Currently supports only the skip_whitespace pattern: /// ``` /// loop(cond) { /// // ... optional body statements /// if check_cond { /// carrier = carrier + step /// } else { /// break /// } /// } /// ``` /// /// All other patterns return Fail-Fast with detailed reasoning. /// /// # Arguments /// - `loop_expr`: The loop AST node (must be `ASTNode::Loop`) /// /// # Returns /// - `Ok((skeleton, decision))`: Successfully extracted skeleton and routing decision /// - `Err(String)`: Malformed AST or internal error pub fn canonicalize_loop_expr( loop_expr: &ASTNode, ) -> Result<(LoopSkeleton, RoutingDecision), String> { // Extract loop components let (condition, body, span) = match loop_expr { ASTNode::Loop { condition, body, span, } => (condition.as_ref(), body, span.clone()), _ => return Err(format!("Expected Loop node, got: {:?}", loop_expr)), }; // Phase 3: Try to extract skip_whitespace pattern if let Some((carrier_name, delta, body_stmts)) = try_extract_skip_whitespace_pattern(body) { // Build skeleton for skip_whitespace pattern let mut skeleton = LoopSkeleton::new(span); // Step 1: Header condition skeleton.steps.push(SkeletonStep::HeaderCond { expr: Box::new(condition.clone()), }); // Step 2: Body statements (if any) if !body_stmts.is_empty() { skeleton.steps.push(SkeletonStep::Body { stmts: body_stmts, }); } // Step 3: Update step skeleton.steps.push(SkeletonStep::Update { carrier_name: carrier_name.clone(), update_kind: UpdateKind::ConstStep { delta }, }); // Add carrier slot skeleton.carriers.push(CarrierSlot { name: carrier_name, role: CarrierRole::Counter, update_kind: UpdateKind::ConstStep { delta }, }); // Set exit contract skeleton.exits = ExitContract { has_break: true, has_continue: false, has_return: false, break_has_value: false, }; // Phase 137-5: Decision policy SSOT - ExitContract determines pattern choice // Since has_break=true, this should route to Pattern2Break (not Pattern3IfPhi) // Pattern3IfPhi is for if-else PHI *without* break statements let decision = RoutingDecision::success(LoopPatternKind::Pattern2Break); return Ok((skeleton, decision)); } // Pattern not recognized - fail fast Ok(( LoopSkeleton::new(span), RoutingDecision::fail_fast( vec![capability_tags::CAP_MISSING_CONST_STEP], "Phase 3: Loop does not match skip_whitespace pattern".to_string(), ), )) } #[cfg(test)] mod tests { use super::*; #[test] fn test_skeleton_creation() { let skeleton = LoopSkeleton::new(Span::unknown()); assert_eq!(skeleton.steps.len(), 0); assert_eq!(skeleton.carriers.len(), 0); assert!(!skeleton.exits.has_any_exit()); } #[test] fn test_exit_contract() { let mut contract = ExitContract::none(); assert!(!contract.has_any_exit()); contract.has_break = true; assert!(contract.has_any_exit()); } #[test] fn test_routing_decision() { let success = RoutingDecision::success(LoopPatternKind::Pattern1SimpleWhile); assert!(success.is_success()); assert!(!success.is_fail_fast()); let fail = RoutingDecision::fail_fast( vec![capability_tags::CAP_MISSING_CONST_STEP], "Test failure".to_string(), ); assert!(!fail.is_success()); assert!(fail.is_fail_fast()); assert_eq!(fail.missing_caps.len(), 1); } #[test] fn test_carrier_role_display() { assert_eq!(CarrierRole::Counter.to_string(), "Counter"); assert_eq!(CarrierRole::Accumulator.to_string(), "Accumulator"); assert_eq!(CarrierRole::ConditionVar.to_string(), "ConditionVar"); assert_eq!(CarrierRole::Derived.to_string(), "Derived"); } #[test] fn test_skeleton_count_helpers() { use crate::ast::LiteralValue; let mut skeleton = LoopSkeleton::new(Span::unknown()); skeleton.steps.push(SkeletonStep::BreakCheck { cond: Box::new(ASTNode::Literal { value: LiteralValue::Bool(true), span: Span::unknown(), }), has_value: false, }); skeleton.steps.push(SkeletonStep::ContinueCheck { cond: Box::new(ASTNode::Literal { value: LiteralValue::Bool(true), span: Span::unknown(), }), }); assert_eq!(skeleton.count_break_checks(), 1); assert_eq!(skeleton.count_continue_checks(), 1); } #[test] fn test_skeleton_carrier_names() { let mut skeleton = LoopSkeleton::new(Span::unknown()); skeleton.carriers.push(CarrierSlot { name: "i".to_string(), role: CarrierRole::Counter, update_kind: UpdateKind::ConstStep { delta: 1 }, }); skeleton.carriers.push(CarrierSlot { name: "sum".to_string(), role: CarrierRole::Accumulator, update_kind: UpdateKind::Arbitrary, }); let names = skeleton.carrier_names(); assert_eq!(names, vec!["i", "sum"]); } // ============================================================================ // Phase 2: Canonicalize Tests // ============================================================================ #[test] fn test_canonicalize_rejects_non_loop() { use crate::ast::LiteralValue; let not_loop = ASTNode::Literal { value: LiteralValue::Bool(true), span: Span::unknown(), }; let result = canonicalize_loop_expr(¬_loop); assert!(result.is_err()); assert!(result.unwrap_err().contains("Expected Loop node")); } // ============================================================================ // Phase 3: skip_whitespace Pattern Tests // ============================================================================ #[test] fn test_skip_whitespace_pattern_recognition() { use crate::ast::{BinaryOperator, LiteralValue}; // Build skip_whitespace pattern: loop(p < len) { if is_ws == 1 { p = p + 1 } else { break } } let loop_node = ASTNode::Loop { condition: Box::new(ASTNode::BinaryOp { operator: BinaryOperator::Less, left: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), right: Box::new(ASTNode::Variable { name: "len".to_string(), span: Span::unknown(), }), span: Span::unknown(), }), body: vec![ASTNode::If { condition: Box::new(ASTNode::BinaryOp { operator: BinaryOperator::Equal, left: Box::new(ASTNode::Variable { name: "is_ws".to_string(), span: Span::unknown(), }), right: Box::new(ASTNode::Literal { value: LiteralValue::Integer(1), span: Span::unknown(), }), span: Span::unknown(), }), then_body: vec![ASTNode::Assignment { target: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), value: Box::new(ASTNode::BinaryOp { operator: BinaryOperator::Add, left: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), right: Box::new(ASTNode::Literal { value: LiteralValue::Integer(1), span: Span::unknown(), }), span: Span::unknown(), }), span: Span::unknown(), }], else_body: Some(vec![ASTNode::Break { span: Span::unknown(), }]), span: Span::unknown(), }], span: Span::unknown(), }; let result = canonicalize_loop_expr(&loop_node); assert!(result.is_ok()); let (skeleton, decision) = result.unwrap(); // Verify success assert!(decision.is_success()); // Phase 137-5: Pattern choice reflects ExitContract (has_break=true → Pattern2Break) assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern2Break)); assert_eq!(decision.missing_caps.len(), 0); // Verify skeleton structure assert_eq!(skeleton.steps.len(), 2); // HeaderCond + Update assert!(matches!( skeleton.steps[0], SkeletonStep::HeaderCond { .. } )); assert!(matches!( skeleton.steps[1], SkeletonStep::Update { .. } )); // Verify carrier assert_eq!(skeleton.carriers.len(), 1); assert_eq!(skeleton.carriers[0].name, "p"); assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter); match &skeleton.carriers[0].update_kind { UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1), _ => panic!("Expected ConstStep update"), } // Verify exit contract assert!(skeleton.exits.has_break); assert!(!skeleton.exits.has_continue); assert!(!skeleton.exits.has_return); assert!(!skeleton.exits.break_has_value); } #[test] fn test_skip_whitespace_with_body_statements() { use crate::ast::{BinaryOperator, LiteralValue}; // Build pattern with body statements before the if: // loop(p < len) { // local ch = get_char(p) // if is_ws { p = p + 1 } else { break } // } let loop_node = ASTNode::Loop { condition: Box::new(ASTNode::BinaryOp { operator: BinaryOperator::Less, left: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), right: Box::new(ASTNode::Variable { name: "len".to_string(), span: Span::unknown(), }), span: Span::unknown(), }), body: vec![ // Body statement ASTNode::Assignment { target: Box::new(ASTNode::Variable { name: "ch".to_string(), span: Span::unknown(), }), value: Box::new(ASTNode::FunctionCall { name: "get_char".to_string(), arguments: vec![ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }], span: Span::unknown(), }), span: Span::unknown(), }, // The if-else pattern ASTNode::If { condition: Box::new(ASTNode::Variable { name: "is_ws".to_string(), span: Span::unknown(), }), then_body: vec![ASTNode::Assignment { target: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), value: Box::new(ASTNode::BinaryOp { operator: BinaryOperator::Add, left: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), right: Box::new(ASTNode::Literal { value: LiteralValue::Integer(1), span: Span::unknown(), }), span: Span::unknown(), }), span: Span::unknown(), }], else_body: Some(vec![ASTNode::Break { span: Span::unknown(), }]), span: Span::unknown(), }, ], span: Span::unknown(), }; let result = canonicalize_loop_expr(&loop_node); assert!(result.is_ok()); let (skeleton, decision) = result.unwrap(); // Verify success assert!(decision.is_success()); // Phase 137-5: Pattern choice reflects ExitContract (has_break=true → Pattern2Break) assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern2Break)); // Verify skeleton has Body step assert_eq!(skeleton.steps.len(), 3); // HeaderCond + Body + Update assert!(matches!( skeleton.steps[0], SkeletonStep::HeaderCond { .. } )); assert!(matches!(skeleton.steps[1], SkeletonStep::Body { .. })); assert!(matches!( skeleton.steps[2], SkeletonStep::Update { .. } )); // Verify body contains 1 statement match &skeleton.steps[1] { SkeletonStep::Body { stmts } => assert_eq!(stmts.len(), 1), _ => panic!("Expected Body step"), } } #[test] fn test_skip_whitespace_fails_without_else() { use crate::ast::{BinaryOperator, LiteralValue}; // Build pattern without else branch (should fail) let loop_node = ASTNode::Loop { condition: Box::new(ASTNode::Literal { value: LiteralValue::Bool(true), span: Span::unknown(), }), body: vec![ASTNode::If { condition: Box::new(ASTNode::Literal { value: LiteralValue::Bool(true), span: Span::unknown(), }), then_body: vec![ASTNode::Assignment { target: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), value: Box::new(ASTNode::BinaryOp { operator: BinaryOperator::Add, left: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), right: Box::new(ASTNode::Literal { value: LiteralValue::Integer(1), span: Span::unknown(), }), span: Span::unknown(), }), span: Span::unknown(), }], else_body: None, // No else branch span: Span::unknown(), }], span: Span::unknown(), }; let result = canonicalize_loop_expr(&loop_node); assert!(result.is_ok()); let (_, decision) = result.unwrap(); assert!(decision.is_fail_fast()); assert!(decision .notes[0] .contains("does not match skip_whitespace pattern")); } #[test] fn test_skip_whitespace_fails_with_wrong_delta() { use crate::ast::{BinaryOperator, LiteralValue}; // Build pattern with wrong update (p = p - 1 instead of p = p + 1) let loop_node = ASTNode::Loop { condition: Box::new(ASTNode::Literal { value: LiteralValue::Bool(true), span: Span::unknown(), }), body: vec![ASTNode::If { condition: Box::new(ASTNode::Literal { value: LiteralValue::Bool(true), span: Span::unknown(), }), then_body: vec![ASTNode::Assignment { target: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), value: Box::new(ASTNode::BinaryOp { operator: BinaryOperator::Subtract, // Wrong operator left: Box::new(ASTNode::Variable { name: "p".to_string(), span: Span::unknown(), }), right: Box::new(ASTNode::Literal { value: LiteralValue::Integer(1), span: Span::unknown(), }), span: Span::unknown(), }), span: Span::unknown(), }], else_body: Some(vec![ASTNode::Break { span: Span::unknown(), }]), span: Span::unknown(), }], span: Span::unknown(), }; let result = canonicalize_loop_expr(&loop_node); assert!(result.is_ok()); let (_, decision) = result.unwrap(); assert!(decision.is_fail_fast()); } }