diff --git a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs index feafa199..3704327f 100644 --- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs @@ -19,7 +19,7 @@ //! - This module now focuses on high-level feature extraction //! - Delegates to specialized analyzers for break/continue logic -use crate::ast::ASTNode; +use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer; use crate::mir::loop_pattern_detection::LoopFeatures; @@ -352,3 +352,130 @@ mod tests { assert_eq!(count_carriers_in_body(&empty), 0); } } + +// ============================================================================ +// Phase 140-P4-A: Skip Whitespace Pattern Detection (SSOT) +// ============================================================================ + +/// Skip whitespace pattern information +/// +/// This struct holds the extracted information from a recognized skip_whitespace pattern. +#[derive(Debug, Clone, PartialEq)] +pub struct SkipWhitespaceInfo { + /// Carrier variable name (e.g., "p") + pub carrier_name: String, + /// Constant step increment (e.g., 1 for `p = p + 1`) + pub delta: i64, + /// Body statements before the if-else (may be empty) + pub body_stmts: Vec, +} + +/// Detect skip_whitespace pattern in loop body (Phase 140-P4-A SSOT) +/// +/// Pattern structure: +/// ``` +/// loop(cond) { +/// // ... optional body statements (Body) +/// if check_cond { +/// carrier = carrier + const +/// } else { +/// break +/// } +/// } +/// ``` +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `Some(SkipWhitespaceInfo)` if the pattern matches, `None` otherwise +/// +/// # Notes +/// +/// This is the SSOT for skip_whitespace pattern detection. +/// Used by both loop_canonicalizer (Phase 137) and future pattern analyzers. +pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option { + if body.is_empty() { + return None; + } + + // Last statement must be if-else with break + let last_stmt = &body[body.len() - 1]; + + let (then_body, else_body) = match last_stmt { + ASTNode::If { + then_body, + else_body: Some(else_body), + .. + } => (then_body, else_body), + _ => return None, + }; + + // Then branch must be single assignment: carrier = carrier + const + if then_body.len() != 1 { + return None; + } + + let (carrier_name, delta) = match &then_body[0] { + ASTNode::Assignment { target, value, .. } => { + // Extract target variable name + let target_name = match target.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + + // Value must be: target + const + match value.as_ref() { + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } => { + // Left must be same variable + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => return None, + }; + + if left_name != &target_name { + return None; + } + + // Right must be integer literal + let delta = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => return None, + }; + + (target_name, delta) + } + _ => return None, + } + } + _ => return None, + }; + + // Else branch must be single break + if else_body.len() != 1 { + return None; + } + + match &else_body[0] { + ASTNode::Break { .. } => { + // Success! Extract body statements (all except last if) + let body_stmts = body[..body.len() - 1].to_vec(); + Some(SkipWhitespaceInfo { + carrier_name, + delta, + body_stmts, + }) + } + _ => None, + } +}