diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 0a78a3b5..93c03119 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -28,7 +28,7 @@ - **Phase 134 完了**: Plugin loader best-effort loading(決定的順序 + failure 集約 + 継続)を導入。 - **Phase 135 完了**: ConditionLoweringBox allocator SSOT(P0: 根治修正 + P1: contract_checks Fail-Fast 強化)。 - **Phase 136 完了**: MirBuilder Context SSOT 化(+ ValueId allocator 掃討)。 -- **Phase 137-2 完了**: Loop Canonicalizer(dev-only 観測)まで完了(既定挙動は不変)。 +- **Phase 137-5 完了**: Loop Canonicalizer(Decision Policy SSOT)まで完了(既定挙動は不変)。 - **Phase 88 完了**: continue + 可変ステップ(i=i+const 差分)を dev-only fixture で固定、StepCalculator Box 抽出。 - **Phase 89 完了**: P0(ContinueReturn detector)+ P1(lowering 実装)完了。 - **Phase 90 完了**: ParseStringComposite + `Null` literal + ContinueReturn(同一値の複数 return-if)を dev-only fixture で固定。 @@ -53,11 +53,11 @@ ## 次の指示書(優先順位) -### P0: Loop Canonicalizer の Phase 3(Skeleton→Decision の精密化) +### P0: Loop Canonicalizer の Phase 6(Router 委譲 - dev-only) -**状態**: ✅ Phase 2 まで完了、Phase 3 へ +**状態**: ✅ Phase 5 まで完了、Phase 6 へ -**目的**: `LoopSkeleton` から `RoutingDecision`(chosen/missing_caps/notes)を安定に計算し、代表ケース(`skip_whitespace`)で “期待する選択” をテストで固定する(既定挙動は不変)。 +**目的**: router の選択ロジックを “Canonicalizer decision を使う経路” に段階投入する(ただし既定挙動は不変、strict で parity を維持)。 SSOT: - `docs/development/current/main/design/loop-canonicalizer.md` diff --git a/docs/development/current/main/10-Now.md b/docs/development/current/main/10-Now.md index 5102b4f9..431b47e8 100644 --- a/docs/development/current/main/10-Now.md +++ b/docs/development/current/main/10-Now.md @@ -27,9 +27,9 @@ - Phase 136: MirBuilder の Context 分割を完了し、状態の SSOT を Context に一本化。 - 詳細: `docs/development/current/main/phases/phase-136/README.md` -## 2025‑12‑16:Phase 137‑2(短報) +## 2025‑12‑16:Phase 137‑5(短報) -- Loop Canonicalizer の Phase 2(dev-only 観測)まで完了(既定挙動は不変)。 +- Loop Canonicalizer の Phase 5(Decision Policy SSOT)まで完了(既定挙動は不変)。 - 設計 SSOT: `docs/development/current/main/design/loop-canonicalizer.md` - 実装: `src/mir/loop_canonicalizer/mod.rs`(+ 観測: `src/mir/builder/control_flow/joinir/routing.rs`) diff --git a/docs/development/current/main/design/loop-canonicalizer.md b/docs/development/current/main/design/loop-canonicalizer.md index 839e2c42..032b8206 100644 --- a/docs/development/current/main/design/loop-canonicalizer.md +++ b/docs/development/current/main/design/loop-canonicalizer.md @@ -1,6 +1,6 @@ # Loop Canonicalizer(設計 SSOT) -Status: Phase 3 done(skip_whitespace の安定認識まで) +Status: Phase 5 done(Decision Policy SSOT まで) Scope: ループ形の組み合わせ爆発を抑えるための “前処理” の設計(fixture/shape guard/fail-fast と整合) Related: - SSOT (契約/不変条件): `docs/development/current/main/joinir-architecture-overview.md` diff --git a/docs/development/current/main/phases/phase-137/README.md b/docs/development/current/main/phases/phase-137/README.md index 02074ff0..469993a9 100644 --- a/docs/development/current/main/phases/phase-137/README.md +++ b/docs/development/current/main/phases/phase-137/README.md @@ -1,7 +1,7 @@ # Phase 137: Loop Canonicalizer(前処理 SSOT) ## Status -- 状態: 🔶 進行中(Phase 3 完了) +- 状態: 🔶 進行中(Phase 5 完了) ## Goal - ループ形の組み合わせ爆発を抑えるため、`AST → LoopSkeleton → (capability/routing)` の前処理を SSOT 化する。 @@ -51,6 +51,13 @@ - ExitContract が pattern 選択の決定要因として明確化 - 構造的特徴(if-else 等)は `notes` に記録(将来の Pattern 細分化に備える) +## Phase 6(次): Router 委譲(dev-only / 段階投入) + +- 目標: “既存 router の結果” を最終SSOTとして維持したまま、dev-only で Canonicalizer の `RoutingDecision` を router 選択に使う経路を用意する。 +- 方針: + - まず dev-only で `RoutingDecision.chosen` を router に反映し、strict 時は parity を維持する(ズレたら Fail-Fast)。 + - 既定挙動(dev flags OFF)では現行 router をそのまま使う。 + ## SSOT - 設計 SSOT: `docs/development/current/main/design/loop-canonicalizer.md` diff --git a/src/mir/loop_canonicalizer/canonicalizer.rs b/src/mir/loop_canonicalizer/canonicalizer.rs new file mode 100644 index 00000000..04eadf9e --- /dev/null +++ b/src/mir/loop_canonicalizer/canonicalizer.rs @@ -0,0 +1,414 @@ +//! Loop Canonicalization Entry Point +//! +//! This module provides the main canonicalization logic that converts +//! AST loops into normalized LoopSkeleton structures. + +use crate::ast::ASTNode; +use crate::mir::loop_pattern_detection::LoopPatternKind; + +use super::capability_guard::{capability_tags, RoutingDecision}; +use super::pattern_recognizer::try_extract_skip_whitespace_pattern; +use super::skeleton_types::{ + CarrierRole, CarrierSlot, ExitContract, LoopSkeleton, SkeletonStep, UpdateKind, +}; + +// ============================================================================ +// Canonicalization Entry Point +// ============================================================================ + +/// Canonicalize a loop AST into LoopSkeleton (Phase 3: skip_whitespace pattern recognition) +/// +/// Currently supports only the skip_whitespace pattern: +/// ``` +/// loop(cond) { +/// // ... optional body statements +/// if check_cond { +/// carrier = carrier + step +/// } else { +/// break +/// } +/// } +/// ``` +/// +/// All other patterns return Fail-Fast with detailed reasoning. +/// +/// # Arguments +/// - `loop_expr`: The loop AST node (must be `ASTNode::Loop`) +/// +/// # Returns +/// - `Ok((skeleton, decision))`: Successfully extracted skeleton and routing decision +/// - `Err(String)`: Malformed AST or internal error +pub fn canonicalize_loop_expr( + loop_expr: &ASTNode, +) -> Result<(LoopSkeleton, RoutingDecision), String> { + // Extract loop components + let (condition, body, span) = match loop_expr { + ASTNode::Loop { + condition, + body, + span, + } => (condition.as_ref(), body, span.clone()), + _ => return Err(format!("Expected Loop node, got: {:?}", loop_expr)), + }; + + // Phase 3: Try to extract skip_whitespace pattern + if let Some((carrier_name, delta, body_stmts)) = try_extract_skip_whitespace_pattern(body) { + // Build skeleton for skip_whitespace pattern + let mut skeleton = LoopSkeleton::new(span); + + // Step 1: Header condition + skeleton.steps.push(SkeletonStep::HeaderCond { + expr: Box::new(condition.clone()), + }); + + // Step 2: Body statements (if any) + if !body_stmts.is_empty() { + skeleton.steps.push(SkeletonStep::Body { + stmts: body_stmts, + }); + } + + // Step 3: Update step + skeleton.steps.push(SkeletonStep::Update { + carrier_name: carrier_name.clone(), + update_kind: UpdateKind::ConstStep { delta }, + }); + + // Add carrier slot + skeleton.carriers.push(CarrierSlot { + name: carrier_name, + role: CarrierRole::Counter, + update_kind: UpdateKind::ConstStep { delta }, + }); + + // Set exit contract + skeleton.exits = ExitContract { + has_break: true, + has_continue: false, + has_return: false, + break_has_value: false, + }; + + // Phase 137-5: Decision policy SSOT - ExitContract determines pattern choice + // Since has_break=true, this should route to Pattern2Break (not Pattern3IfPhi) + // Pattern3IfPhi is for if-else PHI *without* break statements + let decision = RoutingDecision::success(LoopPatternKind::Pattern2Break); + return Ok((skeleton, decision)); + } + + // Pattern not recognized - fail fast + Ok(( + LoopSkeleton::new(span), + RoutingDecision::fail_fast( + vec![capability_tags::CAP_MISSING_CONST_STEP], + "Phase 3: Loop does not match skip_whitespace pattern".to_string(), + ), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::{BinaryOperator, LiteralValue, Span}; + + #[test] + fn test_canonicalize_rejects_non_loop() { + let not_loop = ASTNode::Literal { + value: LiteralValue::Bool(true), + span: Span::unknown(), + }; + + let result = canonicalize_loop_expr(¬_loop); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Expected Loop node")); + } + + #[test] + fn test_skip_whitespace_pattern_recognition() { + // Build skip_whitespace pattern: loop(p < len) { if is_ws == 1 { p = p + 1 } else { break } } + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Variable { + name: "len".to_string(), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: vec![ASTNode::If { + condition: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Equal, + left: Box::new(ASTNode::Variable { + name: "is_ws".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: Some(vec![ASTNode::Break { + span: Span::unknown(), + }]), + span: Span::unknown(), + }], + span: Span::unknown(), + }; + + let result = canonicalize_loop_expr(&loop_node); + assert!(result.is_ok()); + + let (skeleton, decision) = result.unwrap(); + + // Verify success + assert!(decision.is_success()); + // Phase 137-5: Pattern choice reflects ExitContract (has_break=true → Pattern2Break) + assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern2Break)); + assert_eq!(decision.missing_caps.len(), 0); + + // Verify skeleton structure + assert_eq!(skeleton.steps.len(), 2); // HeaderCond + Update + assert!(matches!( + skeleton.steps[0], + SkeletonStep::HeaderCond { .. } + )); + assert!(matches!( + skeleton.steps[1], + SkeletonStep::Update { .. } + )); + + // Verify carrier + assert_eq!(skeleton.carriers.len(), 1); + assert_eq!(skeleton.carriers[0].name, "p"); + assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter); + match &skeleton.carriers[0].update_kind { + UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1), + _ => panic!("Expected ConstStep update"), + } + + // Verify exit contract + assert!(skeleton.exits.has_break); + assert!(!skeleton.exits.has_continue); + assert!(!skeleton.exits.has_return); + assert!(!skeleton.exits.break_has_value); + } + + #[test] + fn test_skip_whitespace_with_body_statements() { + // Build pattern with body statements before the if: + // loop(p < len) { + // local ch = get_char(p) + // if is_ws { p = p + 1 } else { break } + // } + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Variable { + name: "len".to_string(), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: vec![ + // Body statement + ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "ch".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::FunctionCall { + name: "get_char".to_string(), + arguments: vec![ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }], + span: Span::unknown(), + }), + span: Span::unknown(), + }, + // The if-else pattern + ASTNode::If { + condition: Box::new(ASTNode::Variable { + name: "is_ws".to_string(), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: Some(vec![ASTNode::Break { + span: Span::unknown(), + }]), + span: Span::unknown(), + }, + ], + span: Span::unknown(), + }; + + let result = canonicalize_loop_expr(&loop_node); + assert!(result.is_ok()); + + let (skeleton, decision) = result.unwrap(); + + // Verify success + assert!(decision.is_success()); + // Phase 137-5: Pattern choice reflects ExitContract (has_break=true → Pattern2Break) + assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern2Break)); + + // Verify skeleton has Body step + assert_eq!(skeleton.steps.len(), 3); // HeaderCond + Body + Update + assert!(matches!( + skeleton.steps[0], + SkeletonStep::HeaderCond { .. } + )); + assert!(matches!(skeleton.steps[1], SkeletonStep::Body { .. })); + assert!(matches!( + skeleton.steps[2], + SkeletonStep::Update { .. } + )); + + // Verify body contains 1 statement + match &skeleton.steps[1] { + SkeletonStep::Body { stmts } => assert_eq!(stmts.len(), 1), + _ => panic!("Expected Body step"), + } + } + + #[test] + fn test_skip_whitespace_fails_without_else() { + // Build pattern without else branch (should fail) + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::Literal { + value: LiteralValue::Bool(true), + span: Span::unknown(), + }), + body: vec![ASTNode::If { + condition: Box::new(ASTNode::Literal { + value: LiteralValue::Bool(true), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: None, // No else branch + span: Span::unknown(), + }], + span: Span::unknown(), + }; + + let result = canonicalize_loop_expr(&loop_node); + assert!(result.is_ok()); + + let (_, decision) = result.unwrap(); + assert!(decision.is_fail_fast()); + assert!(decision + .notes[0] + .contains("does not match skip_whitespace pattern")); + } + + #[test] + fn test_skip_whitespace_fails_with_wrong_delta() { + // Build pattern with wrong update (p = p - 1 instead of p = p + 1) + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::Literal { + value: LiteralValue::Bool(true), + span: Span::unknown(), + }), + body: vec![ASTNode::If { + condition: Box::new(ASTNode::Literal { + value: LiteralValue::Bool(true), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Subtract, // Wrong operator + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: Some(vec![ASTNode::Break { + span: Span::unknown(), + }]), + span: Span::unknown(), + }], + span: Span::unknown(), + }; + + let result = canonicalize_loop_expr(&loop_node); + assert!(result.is_ok()); + + let (_, decision) = result.unwrap(); + assert!(decision.is_fail_fast()); + } +} diff --git a/src/mir/loop_canonicalizer/capability_guard.rs b/src/mir/loop_canonicalizer/capability_guard.rs new file mode 100644 index 00000000..10216e6b --- /dev/null +++ b/src/mir/loop_canonicalizer/capability_guard.rs @@ -0,0 +1,104 @@ +//! Capability Guard - Fail-Fast Reasons and Routing Decisions +//! +//! This module defines the vocabulary for pattern selection and failure reasons. +//! It provides standardized capability tags and routing decision structures. + +use crate::mir::loop_pattern_detection::LoopPatternKind; + +// ============================================================================ +// Routing Decision +// ============================================================================ + +/// Routing decision - The result of pattern selection +/// +/// This contains both the chosen pattern (if any) and detailed +/// diagnostic information about why other patterns were rejected. +#[derive(Debug, Clone)] +pub struct RoutingDecision { + /// Selected pattern (None = Fail-Fast) + pub chosen: Option, + + /// Missing capabilities that prevented other patterns + pub missing_caps: Vec<&'static str>, + + /// Selection reasoning (for debugging) + pub notes: Vec, + + /// Error tags for contract_checks integration + pub error_tags: Vec, +} + +impl RoutingDecision { + /// Create a successful routing decision + pub fn success(pattern: LoopPatternKind) -> Self { + Self { + chosen: Some(pattern), + missing_caps: Vec::new(), + notes: Vec::new(), + error_tags: Vec::new(), + } + } + + /// Create a failed routing decision (Fail-Fast) + pub fn fail_fast(missing_caps: Vec<&'static str>, reason: String) -> Self { + Self { + chosen: None, + missing_caps, + notes: vec![reason.clone()], + error_tags: vec![format!("[loop_canonicalizer/fail_fast] {}", reason)], + } + } + + /// Add a diagnostic note + pub fn add_note(&mut self, note: String) { + self.notes.push(note); + } + + /// Check if routing succeeded + pub fn is_success(&self) -> bool { + self.chosen.is_some() + } + + /// Check if routing failed + pub fn is_fail_fast(&self) -> bool { + self.chosen.is_none() + } +} + +// ============================================================================ +// Capability Tags +// ============================================================================ + +/// Capability tags - Standardized vocabulary for Fail-Fast reasons +/// +/// These constants define the capabilities required by different patterns. +/// When a loop lacks a required capability, it uses the corresponding tag +/// to explain why it cannot be lowered by that pattern. +/// +/// NOTE: This module will be deprecated in Phase 139 in favor of the +/// CapabilityTag enum for type safety. +pub mod capability_tags { + /// Requires: Carrier update is constant step (`i = i + const`) + pub const CAP_MISSING_CONST_STEP: &str = "CAP_MISSING_CONST_STEP"; + + /// Requires: Single break point only + pub const CAP_MISSING_SINGLE_BREAK: &str = "CAP_MISSING_SINGLE_BREAK"; + + /// Requires: Single continue point only + pub const CAP_MISSING_SINGLE_CONTINUE: &str = "CAP_MISSING_SINGLE_CONTINUE"; + + /// Requires: Loop header condition has no side effects + pub const CAP_MISSING_PURE_HEADER: &str = "CAP_MISSING_PURE_HEADER"; + + /// Requires: Condition variable defined in outer local scope + pub const CAP_MISSING_OUTER_LOCAL_COND: &str = "CAP_MISSING_OUTER_LOCAL_COND"; + + /// Requires: All exit bindings are complete (no missing values) + pub const CAP_MISSING_EXIT_BINDINGS: &str = "CAP_MISSING_EXIT_BINDINGS"; + + /// Requires: LoopBodyLocal can be promoted to carrier + pub const CAP_MISSING_CARRIER_PROMOTION: &str = "CAP_MISSING_CARRIER_PROMOTION"; + + /// Requires: Break value types are consistent across all break points + pub const CAP_MISSING_BREAK_VALUE_TYPE: &str = "CAP_MISSING_BREAK_VALUE_TYPE"; +} diff --git a/src/mir/loop_canonicalizer/mod.rs b/src/mir/loop_canonicalizer/mod.rs index 45d28d9c..c4f1415b 100644 --- a/src/mir/loop_canonicalizer/mod.rs +++ b/src/mir/loop_canonicalizer/mod.rs @@ -1,7 +1,5 @@ //! Loop Canonicalizer - AST Level Loop Preprocessing //! -//! Phase 1: Type Definitions Only -//! //! ## Purpose //! //! Decomposes AST-level loops into a normalized "skeleton" representation @@ -19,510 +17,60 @@ //! AST → LoopSkeleton → Capability Guard → RoutingDecision → Pattern Lowerer //! ``` //! +//! ## Module Structure (Phase 138 Refactoring) +//! +//! - `skeleton_types` - Core data structures (LoopSkeleton, SkeletonStep, etc.) +//! - `capability_guard` - Routing decisions and capability tags +//! - `pattern_recognizer` - Pattern detection logic (skip_whitespace, etc.) +//! - `canonicalizer` - Main canonicalization entry point +//! //! ## References //! //! - Design SSOT: `docs/development/current/main/design/loop-canonicalizer.md` //! - JoinIR Architecture: `docs/development/current/main/joinir-architecture-overview.md` //! - Pattern Space: `docs/development/current/main/loop_pattern_space.md` -use crate::ast::{ASTNode, Span}; -use crate::mir::loop_pattern_detection::LoopPatternKind; - // ============================================================================ -// Core Skeleton Types +// Module Declarations // ============================================================================ -/// Loop skeleton - The canonical representation of a loop structure -/// -/// This is the single output type of the Canonicalizer. -/// It represents the essential structure of a loop without any -/// JoinIR-specific information. -#[derive(Debug, Clone)] -pub struct LoopSkeleton { - /// Sequence of steps (HeaderCond, BodyInit, BreakCheck, Updates, Tail) - pub steps: Vec, - - /// Carriers (loop variables with update rules and boundary crossing contracts) - pub carriers: Vec, - - /// Exit contract (presence and payload of break/continue/return) - pub exits: ExitContract, - - /// Captured variables from outer scope (optional) - pub captured: Option>, - - /// Source location for debugging - pub span: Span, -} - -/// Skeleton step - Minimal step kinds for loop structure -/// -/// Each step represents a fundamental operation in the loop lifecycle. -#[derive(Debug, Clone)] -pub enum SkeletonStep { - /// Loop continuation condition (the `cond` in `loop(cond)`) - HeaderCond { - expr: Box, - }, - - /// Early exit check (`if cond { break }`) - BreakCheck { - cond: Box, - has_value: bool, - }, - - /// Skip check (`if cond { continue }`) - ContinueCheck { - cond: Box, - }, - - /// Carrier update (`i = i + 1`, etc.) - Update { - carrier_name: String, - update_kind: UpdateKind, - }, - - /// Loop body (all other statements) - Body { - stmts: Vec, - }, -} - -/// Update kind - How a carrier variable is updated -/// -/// This categorization helps determine which pattern can handle the loop. -#[derive(Debug, Clone)] -pub enum UpdateKind { - /// Constant step (`i = i + const`) - ConstStep { - delta: i64, - }, - - /// Conditional update (`if cond { x = a } else { x = b }`) - Conditional { - then_value: Box, - else_value: Box, - }, - - /// Arbitrary update (everything else) - Arbitrary, -} - -/// Exit contract - What kinds of exits the loop has -/// -/// This determines the exit line architecture needed. -#[derive(Debug, Clone)] -pub struct ExitContract { - pub has_break: bool, - pub has_continue: bool, - pub has_return: bool, - pub break_has_value: bool, -} - -/// Carrier slot - A loop variable with its role and update rule -/// -/// Carriers are variables that are updated in each iteration -/// and need to cross loop boundaries (via PHI nodes in MIR). -#[derive(Debug, Clone)] -pub struct CarrierSlot { - pub name: String, - pub role: CarrierRole, - pub update_kind: UpdateKind, -} - -/// Carrier role - The semantic role of a carrier variable -/// -/// This helps determine the appropriate pattern and PHI structure. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CarrierRole { - /// Loop counter (the `i` in `i < n`) - Counter, - - /// Accumulator (the `sum` in `sum += x`) - Accumulator, - - /// Condition variable (the `is_valid` in `while(is_valid)`) - ConditionVar, - - /// Derived value (e.g., `digit_pos` computed from other carriers) - Derived, -} - -/// Captured slot - An outer variable used within the loop -/// -/// These are read-only references to variables defined outside the loop. -/// (Write access would make them carriers instead.) -#[derive(Debug, Clone)] -pub struct CapturedSlot { - pub name: String, - pub is_mutable: bool, -} +mod skeleton_types; +mod capability_guard; +mod pattern_recognizer; +mod canonicalizer; // ============================================================================ -// Capability Guard - Fail-Fast Reasons +// Public Re-exports // ============================================================================ -/// Routing decision - The result of pattern selection -/// -/// This contains both the chosen pattern (if any) and detailed -/// diagnostic information about why other patterns were rejected. -#[derive(Debug, Clone)] -pub struct RoutingDecision { - /// Selected pattern (None = Fail-Fast) - pub chosen: Option, +// Skeleton Types +pub use skeleton_types::{ + CarrierRole, + CarrierSlot, + CapturedSlot, + ExitContract, + LoopSkeleton, + SkeletonStep, + UpdateKind, +}; - /// Missing capabilities that prevented other patterns - pub missing_caps: Vec<&'static str>, +// Capability Guard +pub use capability_guard::{ + capability_tags, + RoutingDecision, +}; - /// Selection reasoning (for debugging) - pub notes: Vec, - - /// Error tags for contract_checks integration - pub error_tags: Vec, -} - -/// Capability tags - Standardized vocabulary for Fail-Fast reasons -/// -/// These constants define the capabilities required by different patterns. -/// When a loop lacks a required capability, it uses the corresponding tag -/// to explain why it cannot be lowered by that pattern. -pub mod capability_tags { - /// Requires: Carrier update is constant step (`i = i + const`) - pub const CAP_MISSING_CONST_STEP: &str = "CAP_MISSING_CONST_STEP"; - - /// Requires: Single break point only - pub const CAP_MISSING_SINGLE_BREAK: &str = "CAP_MISSING_SINGLE_BREAK"; - - /// Requires: Single continue point only - pub const CAP_MISSING_SINGLE_CONTINUE: &str = "CAP_MISSING_SINGLE_CONTINUE"; - - /// Requires: Loop header condition has no side effects - pub const CAP_MISSING_PURE_HEADER: &str = "CAP_MISSING_PURE_HEADER"; - - /// Requires: Condition variable defined in outer local scope - pub const CAP_MISSING_OUTER_LOCAL_COND: &str = "CAP_MISSING_OUTER_LOCAL_COND"; - - /// Requires: All exit bindings are complete (no missing values) - pub const CAP_MISSING_EXIT_BINDINGS: &str = "CAP_MISSING_EXIT_BINDINGS"; - - /// Requires: LoopBodyLocal can be promoted to carrier - pub const CAP_MISSING_CARRIER_PROMOTION: &str = "CAP_MISSING_CARRIER_PROMOTION"; - - /// Requires: Break value types are consistent across all break points - pub const CAP_MISSING_BREAK_VALUE_TYPE: &str = "CAP_MISSING_BREAK_VALUE_TYPE"; -} +// Canonicalization Entry Point +pub use canonicalizer::canonicalize_loop_expr; // ============================================================================ -// Implementation Helpers +// Tests // ============================================================================ -impl LoopSkeleton { - /// Create a new empty skeleton - pub fn new(span: Span) -> Self { - Self { - steps: Vec::new(), - carriers: Vec::new(), - exits: ExitContract::default(), - captured: None, - span, - } - } - - /// Count the number of break checks in this skeleton - pub fn count_break_checks(&self) -> usize { - self.steps - .iter() - .filter(|s| matches!(s, SkeletonStep::BreakCheck { .. })) - .count() - } - - /// Count the number of continue checks in this skeleton - pub fn count_continue_checks(&self) -> usize { - self.steps - .iter() - .filter(|s| matches!(s, SkeletonStep::ContinueCheck { .. })) - .count() - } - - /// Get all carrier names - pub fn carrier_names(&self) -> Vec<&str> { - self.carriers.iter().map(|c| c.name.as_str()).collect() - } -} - -impl ExitContract { - /// Create a contract with no exits - pub fn none() -> Self { - Self { - has_break: false, - has_continue: false, - has_return: false, - break_has_value: false, - } - } - - /// Check if any exit exists - pub fn has_any_exit(&self) -> bool { - self.has_break || self.has_continue || self.has_return - } -} - -impl Default for ExitContract { - fn default() -> Self { - Self::none() - } -} - -impl RoutingDecision { - /// Create a successful routing decision - pub fn success(pattern: LoopPatternKind) -> Self { - Self { - chosen: Some(pattern), - missing_caps: Vec::new(), - notes: Vec::new(), - error_tags: Vec::new(), - } - } - - /// Create a failed routing decision (Fail-Fast) - pub fn fail_fast(missing_caps: Vec<&'static str>, reason: String) -> Self { - Self { - chosen: None, - missing_caps, - notes: vec![reason.clone()], - error_tags: vec![format!("[loop_canonicalizer/fail_fast] {}", reason)], - } - } - - /// Add a diagnostic note - pub fn add_note(&mut self, note: String) { - self.notes.push(note); - } - - /// Check if routing succeeded - pub fn is_success(&self) -> bool { - self.chosen.is_some() - } - - /// Check if routing failed - pub fn is_fail_fast(&self) -> bool { - self.chosen.is_none() - } -} - -// ============================================================================ -// Display Implementations -// ============================================================================ - -impl std::fmt::Display for CarrierRole { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - CarrierRole::Counter => write!(f, "Counter"), - CarrierRole::Accumulator => write!(f, "Accumulator"), - CarrierRole::ConditionVar => write!(f, "ConditionVar"), - CarrierRole::Derived => write!(f, "Derived"), - } - } -} - -// ============================================================================ -// Phase 3: Pattern Recognition Helpers -// ============================================================================ - -/// Try to extract skip_whitespace pattern from loop -/// -/// Pattern structure: -/// ``` -/// loop(cond) { -/// // ... optional body statements (Body) -/// if check_cond { -/// carrier = carrier + const -/// } else { -/// break -/// } -/// } -/// ``` -/// -/// Returns (carrier_name, delta, body_stmts) if pattern matches. -fn try_extract_skip_whitespace_pattern( - body: &[ASTNode], -) -> Option<(String, i64, Vec)> { - if body.is_empty() { - return None; - } - - // Last statement must be if-else with break - let last_stmt = &body[body.len() - 1]; - - let (then_body, else_body) = match last_stmt { - ASTNode::If { - then_body, - else_body: Some(else_body), - .. - } => (then_body, else_body), - _ => return None, - }; - - // Then branch must be single assignment: carrier = carrier + const - if then_body.len() != 1 { - return None; - } - - let (carrier_name, delta) = match &then_body[0] { - ASTNode::Assignment { target, value, .. } => { - // Extract target variable name - let target_name = match target.as_ref() { - ASTNode::Variable { name, .. } => name.clone(), - _ => return None, - }; - - // Value must be: target + const - match value.as_ref() { - ASTNode::BinaryOp { - operator: crate::ast::BinaryOperator::Add, - left, - right, - .. - } => { - // Left must be same variable - let left_name = match left.as_ref() { - ASTNode::Variable { name, .. } => name, - _ => return None, - }; - - if left_name != &target_name { - return None; - } - - // Right must be integer literal - let delta = match right.as_ref() { - ASTNode::Literal { - value: crate::ast::LiteralValue::Integer(n), - .. - } => *n, - _ => return None, - }; - - (target_name, delta) - } - _ => return None, - } - } - _ => return None, - }; - - // Else branch must be single break - if else_body.len() != 1 { - return None; - } - - match &else_body[0] { - ASTNode::Break { .. } => { - // Success! Extract body statements (all except last if) - let body_stmts = body[..body.len() - 1].to_vec(); - Some((carrier_name, delta, body_stmts)) - } - _ => None, - } -} - -// ============================================================================ -// Phase 3: Canonicalization Entry Point -// ============================================================================ - -/// Canonicalize a loop AST into LoopSkeleton (Phase 3: skip_whitespace pattern recognition) -/// -/// Currently supports only the skip_whitespace pattern: -/// ``` -/// loop(cond) { -/// // ... optional body statements -/// if check_cond { -/// carrier = carrier + step -/// } else { -/// break -/// } -/// } -/// ``` -/// -/// All other patterns return Fail-Fast with detailed reasoning. -/// -/// # Arguments -/// - `loop_expr`: The loop AST node (must be `ASTNode::Loop`) -/// -/// # Returns -/// - `Ok((skeleton, decision))`: Successfully extracted skeleton and routing decision -/// - `Err(String)`: Malformed AST or internal error -pub fn canonicalize_loop_expr( - loop_expr: &ASTNode, -) -> Result<(LoopSkeleton, RoutingDecision), String> { - // Extract loop components - let (condition, body, span) = match loop_expr { - ASTNode::Loop { - condition, - body, - span, - } => (condition.as_ref(), body, span.clone()), - _ => return Err(format!("Expected Loop node, got: {:?}", loop_expr)), - }; - - // Phase 3: Try to extract skip_whitespace pattern - if let Some((carrier_name, delta, body_stmts)) = try_extract_skip_whitespace_pattern(body) { - // Build skeleton for skip_whitespace pattern - let mut skeleton = LoopSkeleton::new(span); - - // Step 1: Header condition - skeleton.steps.push(SkeletonStep::HeaderCond { - expr: Box::new(condition.clone()), - }); - - // Step 2: Body statements (if any) - if !body_stmts.is_empty() { - skeleton.steps.push(SkeletonStep::Body { - stmts: body_stmts, - }); - } - - // Step 3: Update step - skeleton.steps.push(SkeletonStep::Update { - carrier_name: carrier_name.clone(), - update_kind: UpdateKind::ConstStep { delta }, - }); - - // Add carrier slot - skeleton.carriers.push(CarrierSlot { - name: carrier_name, - role: CarrierRole::Counter, - update_kind: UpdateKind::ConstStep { delta }, - }); - - // Set exit contract - skeleton.exits = ExitContract { - has_break: true, - has_continue: false, - has_return: false, - break_has_value: false, - }; - - // Phase 137-5: Decision policy SSOT - ExitContract determines pattern choice - // Since has_break=true, this should route to Pattern2Break (not Pattern3IfPhi) - // Pattern3IfPhi is for if-else PHI *without* break statements - let decision = RoutingDecision::success(LoopPatternKind::Pattern2Break); - return Ok((skeleton, decision)); - } - - // Pattern not recognized - fail fast - Ok(( - LoopSkeleton::new(span), - RoutingDecision::fail_fast( - vec![capability_tags::CAP_MISSING_CONST_STEP], - "Phase 3: Loop does not match skip_whitespace pattern".to_string(), - ), - )) -} - #[cfg(test)] mod tests { use super::*; + use crate::ast::Span; #[test] fn test_skeleton_creation() { @@ -543,6 +91,8 @@ mod tests { #[test] fn test_routing_decision() { + use crate::mir::loop_pattern_detection::LoopPatternKind; + let success = RoutingDecision::success(LoopPatternKind::Pattern1SimpleWhile); assert!(success.is_success()); assert!(!success.is_fail_fast()); @@ -566,7 +116,7 @@ mod tests { #[test] fn test_skeleton_count_helpers() { - use crate::ast::LiteralValue; + use crate::ast::{ASTNode, LiteralValue}; let mut skeleton = LoopSkeleton::new(Span::unknown()); @@ -608,324 +158,4 @@ mod tests { let names = skeleton.carrier_names(); assert_eq!(names, vec!["i", "sum"]); } - - // ============================================================================ - // Phase 2: Canonicalize Tests - // ============================================================================ - - #[test] - fn test_canonicalize_rejects_non_loop() { - use crate::ast::LiteralValue; - - let not_loop = ASTNode::Literal { - value: LiteralValue::Bool(true), - span: Span::unknown(), - }; - - let result = canonicalize_loop_expr(¬_loop); - assert!(result.is_err()); - assert!(result.unwrap_err().contains("Expected Loop node")); - } - - // ============================================================================ - // Phase 3: skip_whitespace Pattern Tests - // ============================================================================ - - #[test] - fn test_skip_whitespace_pattern_recognition() { - use crate::ast::{BinaryOperator, LiteralValue}; - - // Build skip_whitespace pattern: loop(p < len) { if is_ws == 1 { p = p + 1 } else { break } } - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Variable { - name: "len".to_string(), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - body: vec![ASTNode::If { - condition: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Equal, - left: Box::new(ASTNode::Variable { - name: "is_ws".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - then_body: vec![ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - span: Span::unknown(), - }], - else_body: Some(vec![ASTNode::Break { - span: Span::unknown(), - }]), - span: Span::unknown(), - }], - span: Span::unknown(), - }; - - let result = canonicalize_loop_expr(&loop_node); - assert!(result.is_ok()); - - let (skeleton, decision) = result.unwrap(); - - // Verify success - assert!(decision.is_success()); - // Phase 137-5: Pattern choice reflects ExitContract (has_break=true → Pattern2Break) - assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern2Break)); - assert_eq!(decision.missing_caps.len(), 0); - - // Verify skeleton structure - assert_eq!(skeleton.steps.len(), 2); // HeaderCond + Update - assert!(matches!( - skeleton.steps[0], - SkeletonStep::HeaderCond { .. } - )); - assert!(matches!( - skeleton.steps[1], - SkeletonStep::Update { .. } - )); - - // Verify carrier - assert_eq!(skeleton.carriers.len(), 1); - assert_eq!(skeleton.carriers[0].name, "p"); - assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter); - match &skeleton.carriers[0].update_kind { - UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1), - _ => panic!("Expected ConstStep update"), - } - - // Verify exit contract - assert!(skeleton.exits.has_break); - assert!(!skeleton.exits.has_continue); - assert!(!skeleton.exits.has_return); - assert!(!skeleton.exits.break_has_value); - } - - #[test] - fn test_skip_whitespace_with_body_statements() { - use crate::ast::{BinaryOperator, LiteralValue}; - - // Build pattern with body statements before the if: - // loop(p < len) { - // local ch = get_char(p) - // if is_ws { p = p + 1 } else { break } - // } - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Less, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Variable { - name: "len".to_string(), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - body: vec![ - // Body statement - ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "ch".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::FunctionCall { - name: "get_char".to_string(), - arguments: vec![ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }], - span: Span::unknown(), - }), - span: Span::unknown(), - }, - // The if-else pattern - ASTNode::If { - condition: Box::new(ASTNode::Variable { - name: "is_ws".to_string(), - span: Span::unknown(), - }), - then_body: vec![ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - span: Span::unknown(), - }], - else_body: Some(vec![ASTNode::Break { - span: Span::unknown(), - }]), - span: Span::unknown(), - }, - ], - span: Span::unknown(), - }; - - let result = canonicalize_loop_expr(&loop_node); - assert!(result.is_ok()); - - let (skeleton, decision) = result.unwrap(); - - // Verify success - assert!(decision.is_success()); - // Phase 137-5: Pattern choice reflects ExitContract (has_break=true → Pattern2Break) - assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern2Break)); - - // Verify skeleton has Body step - assert_eq!(skeleton.steps.len(), 3); // HeaderCond + Body + Update - assert!(matches!( - skeleton.steps[0], - SkeletonStep::HeaderCond { .. } - )); - assert!(matches!(skeleton.steps[1], SkeletonStep::Body { .. })); - assert!(matches!( - skeleton.steps[2], - SkeletonStep::Update { .. } - )); - - // Verify body contains 1 statement - match &skeleton.steps[1] { - SkeletonStep::Body { stmts } => assert_eq!(stmts.len(), 1), - _ => panic!("Expected Body step"), - } - } - - #[test] - fn test_skip_whitespace_fails_without_else() { - use crate::ast::{BinaryOperator, LiteralValue}; - - // Build pattern without else branch (should fail) - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::Literal { - value: LiteralValue::Bool(true), - span: Span::unknown(), - }), - body: vec![ASTNode::If { - condition: Box::new(ASTNode::Literal { - value: LiteralValue::Bool(true), - span: Span::unknown(), - }), - then_body: vec![ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - span: Span::unknown(), - }], - else_body: None, // No else branch - span: Span::unknown(), - }], - span: Span::unknown(), - }; - - let result = canonicalize_loop_expr(&loop_node); - assert!(result.is_ok()); - - let (_, decision) = result.unwrap(); - assert!(decision.is_fail_fast()); - assert!(decision - .notes[0] - .contains("does not match skip_whitespace pattern")); - } - - #[test] - fn test_skip_whitespace_fails_with_wrong_delta() { - use crate::ast::{BinaryOperator, LiteralValue}; - - // Build pattern with wrong update (p = p - 1 instead of p = p + 1) - let loop_node = ASTNode::Loop { - condition: Box::new(ASTNode::Literal { - value: LiteralValue::Bool(true), - span: Span::unknown(), - }), - body: vec![ASTNode::If { - condition: Box::new(ASTNode::Literal { - value: LiteralValue::Bool(true), - span: Span::unknown(), - }), - then_body: vec![ASTNode::Assignment { - target: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - value: Box::new(ASTNode::BinaryOp { - operator: BinaryOperator::Subtract, // Wrong operator - left: Box::new(ASTNode::Variable { - name: "p".to_string(), - span: Span::unknown(), - }), - right: Box::new(ASTNode::Literal { - value: LiteralValue::Integer(1), - span: Span::unknown(), - }), - span: Span::unknown(), - }), - span: Span::unknown(), - }], - else_body: Some(vec![ASTNode::Break { - span: Span::unknown(), - }]), - span: Span::unknown(), - }], - span: Span::unknown(), - }; - - let result = canonicalize_loop_expr(&loop_node); - assert!(result.is_ok()); - - let (_, decision) = result.unwrap(); - assert!(decision.is_fail_fast()); - } - } diff --git a/src/mir/loop_canonicalizer/pattern_recognizer.rs b/src/mir/loop_canonicalizer/pattern_recognizer.rs new file mode 100644 index 00000000..83ef7cde --- /dev/null +++ b/src/mir/loop_canonicalizer/pattern_recognizer.rs @@ -0,0 +1,249 @@ +//! Pattern Recognition Helpers +//! +//! This module contains pattern detection logic for specific loop structures. +//! Currently supports the skip_whitespace pattern, with room for future patterns. + +use crate::ast::ASTNode; + +// ============================================================================ +// Skip Whitespace Pattern +// ============================================================================ + +/// Try to extract skip_whitespace pattern from loop +/// +/// Pattern structure: +/// ``` +/// loop(cond) { +/// // ... optional body statements (Body) +/// if check_cond { +/// carrier = carrier + const +/// } else { +/// break +/// } +/// } +/// ``` +/// +/// Returns (carrier_name, delta, body_stmts) if pattern matches. +pub fn try_extract_skip_whitespace_pattern( + body: &[ASTNode], +) -> Option<(String, i64, Vec)> { + if body.is_empty() { + return None; + } + + // Last statement must be if-else with break + let last_stmt = &body[body.len() - 1]; + + let (then_body, else_body) = match last_stmt { + ASTNode::If { + then_body, + else_body: Some(else_body), + .. + } => (then_body, else_body), + _ => return None, + }; + + // Then branch must be single assignment: carrier = carrier + const + if then_body.len() != 1 { + return None; + } + + let (carrier_name, delta) = match &then_body[0] { + ASTNode::Assignment { target, value, .. } => { + // Extract target variable name + let target_name = match target.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + + // Value must be: target + const + match value.as_ref() { + ASTNode::BinaryOp { + operator: crate::ast::BinaryOperator::Add, + left, + right, + .. + } => { + // Left must be same variable + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => return None, + }; + + if left_name != &target_name { + return None; + } + + // Right must be integer literal + let delta = match right.as_ref() { + ASTNode::Literal { + value: crate::ast::LiteralValue::Integer(n), + .. + } => *n, + _ => return None, + }; + + (target_name, delta) + } + _ => return None, + } + } + _ => return None, + }; + + // Else branch must be single break + if else_body.len() != 1 { + return None; + } + + match &else_body[0] { + ASTNode::Break { .. } => { + // Success! Extract body statements (all except last if) + let body_stmts = body[..body.len() - 1].to_vec(); + Some((carrier_name, delta, body_stmts)) + } + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::{BinaryOperator, LiteralValue, Span}; + + #[test] + fn test_skip_whitespace_basic_pattern() { + // Build: if is_ws { p = p + 1 } else { break } + let body = vec![ASTNode::If { + condition: Box::new(ASTNode::Variable { + name: "is_ws".to_string(), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: Some(vec![ASTNode::Break { + span: Span::unknown(), + }]), + span: Span::unknown(), + }]; + + let result = try_extract_skip_whitespace_pattern(&body); + assert!(result.is_some()); + + let (carrier_name, delta, body_stmts) = result.unwrap(); + assert_eq!(carrier_name, "p"); + assert_eq!(delta, 1); + assert_eq!(body_stmts.len(), 0); + } + + #[test] + fn test_skip_whitespace_with_body() { + // Build: local ch = get_char(p); if is_ws { p = p + 1 } else { break } + let body = vec![ + ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "ch".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::FunctionCall { + name: "get_char".to_string(), + arguments: vec![ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }], + span: Span::unknown(), + }), + span: Span::unknown(), + }, + ASTNode::If { + condition: Box::new(ASTNode::Variable { + name: "is_ws".to_string(), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: Some(vec![ASTNode::Break { + span: Span::unknown(), + }]), + span: Span::unknown(), + }, + ]; + + let result = try_extract_skip_whitespace_pattern(&body); + assert!(result.is_some()); + + let (carrier_name, delta, body_stmts) = result.unwrap(); + assert_eq!(carrier_name, "p"); + assert_eq!(delta, 1); + assert_eq!(body_stmts.len(), 1); // The assignment before the if + } + + #[test] + fn test_skip_whitespace_rejects_no_else() { + // Build: if is_ws { p = p + 1 } (no else) + let body = vec![ASTNode::If { + condition: Box::new(ASTNode::Variable { + name: "is_ws".to_string(), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "p".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: None, + span: Span::unknown(), + }]; + + let result = try_extract_skip_whitespace_pattern(&body); + assert!(result.is_none()); + } +} diff --git a/src/mir/loop_canonicalizer/skeleton_types.rs b/src/mir/loop_canonicalizer/skeleton_types.rs new file mode 100644 index 00000000..1a619c8d --- /dev/null +++ b/src/mir/loop_canonicalizer/skeleton_types.rs @@ -0,0 +1,213 @@ +//! Skeleton Type Definitions +//! +//! Core data structures for the Loop Canonicalizer. +//! These types represent the normalized "skeleton" of a loop structure +//! without any JoinIR-specific information (BlockId, ValueId, etc.). + +use crate::ast::{ASTNode, Span}; + +// ============================================================================ +// Core Skeleton Types +// ============================================================================ + +/// Loop skeleton - The canonical representation of a loop structure +/// +/// This is the single output type of the Canonicalizer. +/// It represents the essential structure of a loop without any +/// JoinIR-specific information. +#[derive(Debug, Clone)] +pub struct LoopSkeleton { + /// Sequence of steps (HeaderCond, BodyInit, BreakCheck, Updates, Tail) + pub steps: Vec, + + /// Carriers (loop variables with update rules and boundary crossing contracts) + pub carriers: Vec, + + /// Exit contract (presence and payload of break/continue/return) + pub exits: ExitContract, + + /// Captured variables from outer scope (optional) + pub captured: Option>, + + /// Source location for debugging + pub span: Span, +} + +/// Skeleton step - Minimal step kinds for loop structure +/// +/// Each step represents a fundamental operation in the loop lifecycle. +#[derive(Debug, Clone)] +pub enum SkeletonStep { + /// Loop continuation condition (the `cond` in `loop(cond)`) + HeaderCond { + expr: Box, + }, + + /// Early exit check (`if cond { break }`) + BreakCheck { + cond: Box, + has_value: bool, + }, + + /// Skip check (`if cond { continue }`) + ContinueCheck { + cond: Box, + }, + + /// Carrier update (`i = i + 1`, etc.) + Update { + carrier_name: String, + update_kind: UpdateKind, + }, + + /// Loop body (all other statements) + Body { + stmts: Vec, + }, +} + +/// Update kind - How a carrier variable is updated +/// +/// This categorization helps determine which pattern can handle the loop. +#[derive(Debug, Clone)] +pub enum UpdateKind { + /// Constant step (`i = i + const`) + ConstStep { + delta: i64, + }, + + /// Conditional update (`if cond { x = a } else { x = b }`) + Conditional { + then_value: Box, + else_value: Box, + }, + + /// Arbitrary update (everything else) + Arbitrary, +} + +/// Carrier slot - A loop variable with its role and update rule +/// +/// Carriers are variables that are updated in each iteration +/// and need to cross loop boundaries (via PHI nodes in MIR). +#[derive(Debug, Clone)] +pub struct CarrierSlot { + pub name: String, + pub role: CarrierRole, + pub update_kind: UpdateKind, +} + +/// Carrier role - The semantic role of a carrier variable +/// +/// This helps determine the appropriate pattern and PHI structure. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CarrierRole { + /// Loop counter (the `i` in `i < n`) + Counter, + + /// Accumulator (the `sum` in `sum += x`) + Accumulator, + + /// Condition variable (the `is_valid` in `while(is_valid)`) + ConditionVar, + + /// Derived value (e.g., `digit_pos` computed from other carriers) + Derived, +} + +/// Captured slot - An outer variable used within the loop +/// +/// These are read-only references to variables defined outside the loop. +/// (Write access would make them carriers instead.) +#[derive(Debug, Clone)] +pub struct CapturedSlot { + pub name: String, + pub is_mutable: bool, +} + +// ============================================================================ +// Exit Contract +// ============================================================================ + +/// Exit contract - What kinds of exits the loop has +/// +/// This determines the exit line architecture needed. +#[derive(Debug, Clone)] +pub struct ExitContract { + pub has_break: bool, + pub has_continue: bool, + pub has_return: bool, + pub break_has_value: bool, +} + +// ============================================================================ +// Implementation Helpers +// ============================================================================ + +impl LoopSkeleton { + /// Create a new empty skeleton + pub fn new(span: Span) -> Self { + Self { + steps: Vec::new(), + carriers: Vec::new(), + exits: ExitContract::default(), + captured: None, + span, + } + } + + /// Count the number of break checks in this skeleton + pub fn count_break_checks(&self) -> usize { + self.steps + .iter() + .filter(|s| matches!(s, SkeletonStep::BreakCheck { .. })) + .count() + } + + /// Count the number of continue checks in this skeleton + pub fn count_continue_checks(&self) -> usize { + self.steps + .iter() + .filter(|s| matches!(s, SkeletonStep::ContinueCheck { .. })) + .count() + } + + /// Get all carrier names + pub fn carrier_names(&self) -> Vec<&str> { + self.carriers.iter().map(|c| c.name.as_str()).collect() + } +} + +impl ExitContract { + /// Create a contract with no exits + pub fn none() -> Self { + Self { + has_break: false, + has_continue: false, + has_return: false, + break_has_value: false, + } + } + + /// Check if any exit exists + pub fn has_any_exit(&self) -> bool { + self.has_break || self.has_continue || self.has_return + } +} + +impl Default for ExitContract { + fn default() -> Self { + Self::none() + } +} + +impl std::fmt::Display for CarrierRole { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CarrierRole::Counter => write!(f, "Counter"), + CarrierRole::Accumulator => write!(f, "Accumulator"), + CarrierRole::ConditionVar => write!(f, "ConditionVar"), + CarrierRole::Derived => write!(f, "Derived"), + } + } +}