diff --git a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs index df6afd35..98f94084 100644 --- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs @@ -1,5 +1,7 @@ //! Phase 193: AST Feature Extractor Box //! +//! Phase 287 P1: Facade pattern - re-exports recognizers from pattern_recognizers/ +//! //! Modularized feature extraction from loop AST nodes. //! Separated from router.rs to improve reusability and testability. //! @@ -12,6 +14,7 @@ //! - **High reusability**: Used by router, future Pattern 5/6, and pattern analysis tools //! - **Independent testability**: Can be unit tested without MirBuilder context //! - **Extension-friendly**: Easy to add new feature detection methods +//! - **Facade pattern**: Re-exports from pattern_recognizers/ for backward compatibility //! //! # Phase 33-23: Refactoring //! @@ -19,6 +22,12 @@ //! - This module now focuses on high-level feature extraction //! - Delegates to specialized analyzers for break/continue logic //! +//! # Phase 287 P1: Modularization +//! +//! - Individual recognizers extracted to `pattern_recognizers/` subdirectory +//! - This file now acts as a facade, re-exporting public APIs +//! - Internal implementation moved to specialized modules +//! //! # Boundary (Phase 110) //! //! - **Routing SSOT**: Pattern routing and feature classification use this module (and @@ -26,61 +35,42 @@ //! - **Structure SSOT**: `crate::mir::control_tree` (StepTree) describes *control structure only* //! and must not drive routing decisions yet; it is used for dev-only observation and parity checks. -use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; +use crate::ast::ASTNode; use crate::mir::loop_pattern_detection::LoopFeatures; -/// Detect if a loop body contains continue statements -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `true` if at least one continue statement is found in the body or nested structures -/// -/// # Notes -/// -/// This is a simple recursive scan that doesn't handle nested loops perfectly, -/// but is sufficient for initial pattern detection. -pub(crate) fn detect_continue_in_body(body: &[ASTNode]) -> bool { - for stmt in body { - if has_continue_node(stmt) { - return true; - } - } - false -} +// Phase 287 P1: Use recognizer modules from parent +use super::pattern_recognizers; -/// Detect if a loop body contains break statements -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `true` if at least one break statement is found in the body or nested structures -pub(crate) fn detect_break_in_body(body: &[ASTNode]) -> bool { - for stmt in body { - if has_break_node(stmt) { - return true; - } - } - false -} +// Re-export continue/break/return detection +pub(crate) use pattern_recognizers::continue_break::{ + detect_break_in_body, detect_continue_in_body, detect_return_in_body, +}; -/// Detect if a loop body contains return statements -/// -/// This is used for dev-only parity checks with structure SSOT (StepTree). -pub(crate) fn detect_return_in_body(body: &[ASTNode]) -> bool { - for stmt in body { - if has_return_node(stmt) { - return true; - } - } - false -} +// Re-export infinite loop detection +use pattern_recognizers::infinite_loop::detect_infinite_loop; + +// Re-export if-else phi detection +use pattern_recognizers::if_else_phi::detect_if_else_phi_in_body; + +// Re-export carrier count estimation +use pattern_recognizers::carrier_count::count_carriers_in_body; + +// Re-export parse_number pattern detection +pub use pattern_recognizers::parse_number::{ + detect_parse_number_pattern, detect_read_digits_loop_true_pattern, ParseNumberInfo, + ReadDigitsLoopTrueInfo, +}; + +// Re-export parse_string pattern detection +pub use pattern_recognizers::parse_string::{ + detect_continue_pattern, detect_parse_string_pattern, ContinuePatternInfo, ParseStringInfo, +}; + +// Re-export skip_whitespace pattern detection +pub use pattern_recognizers::skip_whitespace::{detect_skip_whitespace_pattern, SkipWhitespaceInfo}; + +// Re-export escape pattern recognizer (existing module, not moved in P1) +pub use super::escape_pattern_recognizer::detect_escape_skip_pattern; /// Extract full feature set from loop body AST /// @@ -127,192 +117,7 @@ pub(crate) fn extract_features( break_count: if has_break { 1 } else { 0 }, continue_count: if has_continue { 1 } else { 0 }, is_infinite_loop, - ..Default::default() // Phase 188.1: Use Default for nesting fields - } -} - -/// Phase 131-11: Detect infinite loop (condition == Literal(Bool(true))) -/// -/// # Arguments -/// -/// * `condition` - Loop condition AST node -/// -/// # Returns -/// -/// `true` if condition is a boolean literal with value true -fn detect_infinite_loop(condition: &ASTNode) -> bool { - matches!( - condition, - ASTNode::Literal { - value: crate::ast::LiteralValue::Bool(true), - .. - } - ) -} - -/// Phase 212.5: Detect ANY if statement in loop body (structural detection) -/// -/// This function detects any if statement, regardless of whether it has an else branch. -/// Used for routing single-carrier if-update patterns to Pattern 3. -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `true` if at least one if statement is found (with or without else) -fn detect_if_in_body(body: &[ASTNode]) -> bool { - for node in body { - if let ASTNode::If { .. } = node { - return true; - } - } - false -} - -/// Detect if-else statements with potential PHI pattern -/// -/// Looks for if-else statements where both branches contain assignments. -/// This is a heuristic indicating a potential PHI merge point. -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `true` if at least one if-else statement with assignments in both branches is found -/// -/// # Phase 264 P0: Conservative Implementation -/// -/// Previously returned true if both if/else branches had assignments. -/// This was too broad - it caught simple conditional assignments like: -/// `if x then seg = "A" else seg = "B"` -/// -/// Pattern3 is designed for if-sum patterns with arithmetic accumulation: -/// `sum = sum + (if x then 1 else 0)` -/// -/// Phase 264 P0: Return false to prevent misclassification. -/// Effect: Loops with conditional assignment fall through to Pattern1. -/// -/// Phase 264 P1: TODO - Implement accurate if-sum signature detection. -fn detect_if_else_phi_in_body(body: &[ASTNode]) -> bool { - // Phase 282 P5: Proper if-else PHI detection (re-enabled with ExtractionBased safety) - // - // This function provides initial classification for Pattern3IfPhi. - // The actual validation is done by extractors::pattern3::extract_loop_with_if_phi_parts() - // which performs deep checks (PHI assignments, no control flow, etc.) - // - // Here we just check: Does the loop body contain an if-else statement? - // This allows Pattern3 to be attempted, and extraction will validate. - - for stmt in body { - if matches!(stmt, ASTNode::If { else_body: Some(_), .. }) { - return true; // Found if-else - } - } - false // No if-else found -} - -/// Count carrier variables (variables assigned in loop body) -/// -/// This is a heuristic: counts assignment statements as a proxy for carriers. -/// A more precise implementation would track which specific variables are assigned. -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// Count of distinct carrier variables (0 or 1 in current implementation) -/// -/// # Notes -/// -/// Current implementation returns 0 or 1 (at least one assignment present). -/// Future enhancement: track individual variable assignments for precise carrier count. -fn count_carriers_in_body(body: &[ASTNode]) -> usize { - let mut count = 0; - for node in body { - match node { - ASTNode::Assignment { .. } => count += 1, - ASTNode::If { - then_body, - else_body, - .. - } => { - count += count_carriers_in_body(then_body); - if let Some(else_body) = else_body { - count += count_carriers_in_body(else_body); - } - } - _ => {} - } - } - // Return at least 1 if we have assignments, otherwise 0 - if count > 0 { - 1 - } else { - 0 - } -} - -/// Recursive helper to check if AST node contains continue -fn has_continue_node(node: &ASTNode) -> bool { - match node { - ASTNode::Continue { .. } => true, - ASTNode::If { - then_body, - else_body, - .. - } => { - then_body.iter().any(has_continue_node) - || else_body - .as_ref() - .map_or(false, |e| e.iter().any(has_continue_node)) - } - ASTNode::Loop { body, .. } => body.iter().any(has_continue_node), - _ => false, - } -} - -/// Recursive helper to check if AST node contains break -fn has_break_node(node: &ASTNode) -> bool { - match node { - ASTNode::Break { .. } => true, - ASTNode::If { - then_body, - else_body, - .. - } => { - then_body.iter().any(has_break_node) - || else_body - .as_ref() - .map_or(false, |e| e.iter().any(has_break_node)) - } - ASTNode::Loop { body, .. } => body.iter().any(has_break_node), - _ => false, - } -} - -/// Recursive helper to check if AST node contains return -fn has_return_node(node: &ASTNode) -> bool { - match node { - ASTNode::Return { .. } => true, - ASTNode::If { - then_body, - else_body, - .. - } => { - then_body.iter().any(has_return_node) - || else_body - .as_ref() - .map_or(false, |e| e.iter().any(has_return_node)) - } - ASTNode::Loop { body, .. } => body.iter().any(has_return_node), - ASTNode::ScopeBox { body, .. } => body.iter().any(has_return_node), - _ => false, + ..Default::default() // Phase 188.1: Use Default for nesting fields } } @@ -320,829 +125,11 @@ fn has_return_node(node: &ASTNode) -> bool { mod tests { use super::*; - #[test] - fn test_detect_continue_simple() { - let continue_node = ASTNode::Continue { - span: crate::ast::Span::unknown(), - }; - assert!(has_continue_node(&continue_node)); - } - - #[test] - fn test_detect_break_simple() { - let break_node = ASTNode::Break { - span: crate::ast::Span::unknown(), - }; - assert!(has_break_node(&break_node)); - } - #[test] fn test_empty_body() { let empty: Vec = vec![]; assert!(!detect_continue_in_body(&empty)); assert!(!detect_break_in_body(&empty)); - assert!(!detect_if_else_phi_in_body(&empty)); assert_eq!(count_carriers_in_body(&empty), 0); } } - -// ============================================================================ -// Phase 142-P1: Continue Pattern Detection -// ============================================================================ - -/// Continue pattern information -/// -/// This struct holds the extracted information from a recognized continue pattern. -#[derive(Debug, Clone, PartialEq)] -pub struct ContinuePatternInfo { - /// Carrier variable name (e.g., "i") - pub carrier_name: String, - /// Constant step increment (e.g., 1 for `i = i + 1`) - pub delta: i64, - /// Body statements before the continue check (may be empty) - pub body_stmts: Vec, - /// Body statements after the continue check (usually includes carrier update) - pub rest_stmts: Vec, -} - -/// Detect continue pattern in loop body -/// -/// Pattern structure: -/// ``` -/// loop(cond) { -/// // ... optional body statements (Body) -/// if skip_cond { -/// carrier = carrier + const // Optional update before continue -/// continue -/// } -/// // ... rest of body statements (Rest) -/// carrier = carrier + const // Carrier update -/// } -/// ``` -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `Some(ContinuePatternInfo)` if the pattern matches, `None` otherwise -pub fn detect_continue_pattern(body: &[ASTNode]) -> Option { - if body.is_empty() { - return None; - } - - // Find the if statement with continue - let mut if_idx = None; - for (i, stmt) in body.iter().enumerate() { - if let ASTNode::If { then_body, .. } = stmt { - // Check if then_body contains continue - if then_body - .iter() - .any(|s| matches!(s, ASTNode::Continue { .. })) - { - if_idx = Some(i); - break; - } - } - } - - let if_idx = if_idx?; - - // Extract body statements before the if - let body_stmts = body[..if_idx].to_vec(); - - // Extract the if statement - let if_stmt = &body[if_idx]; - - // The if must have continue in then branch - let then_body = match if_stmt { - ASTNode::If { - then_body, - else_body, - .. - } => { - // For simple continue pattern, else_body should be None - if else_body.is_some() { - return None; - } - then_body - } - _ => return None, - }; - - // Check if then_body contains carrier update before continue - // For now, we'll look for the pattern after the if statement - - // Extract rest statements after the if - let rest_stmts = body[if_idx + 1..].to_vec(); - - // Find carrier update in rest_stmts (last statement should be carrier = carrier +/- const) - if rest_stmts.is_empty() { - return None; - } - - let last_stmt = &rest_stmts[rest_stmts.len() - 1]; - - let (carrier_name, delta) = match last_stmt { - ASTNode::Assignment { target, value, .. } => { - // Extract target variable name - let target_name = match target.as_ref() { - ASTNode::Variable { name, .. } => name.clone(), - _ => return None, - }; - - // Value must be: target (+|-) const - match value.as_ref() { - ASTNode::BinaryOp { - operator, - left, - right, - .. - } => { - // Accept both Add (+1) and Subtract (-1) - let op_multiplier = match operator { - BinaryOperator::Add => 1, - BinaryOperator::Subtract => -1, - _ => return None, - }; - - // Left must be same variable - let left_name = match left.as_ref() { - ASTNode::Variable { name, .. } => name, - _ => return None, - }; - - if left_name != &target_name { - return None; - } - - // Right must be integer literal - let const_val = match right.as_ref() { - ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } => *n, - _ => return None, - }; - - // Calculate delta with sign - let delta = const_val * op_multiplier; - - (target_name, delta) - } - _ => return None, - } - } - _ => return None, - }; - - // Check if then_body has carrier update before continue - // If so, we need to validate it matches - for stmt in then_body { - if let ASTNode::Assignment { target, .. } = stmt { - if let ASTNode::Variable { name, .. } = target.as_ref() { - if name == &carrier_name { - // There's a carrier update before continue - // For now, we'll just check it exists - // Could validate it matches the pattern later - } - } - } - } - - Some(ContinuePatternInfo { - carrier_name, - delta, - body_stmts, - rest_stmts, - }) -} - -// ============================================================================ -// Phase 143-P0: Parse Number/Digit Pattern Detection -// ============================================================================ - -/// Parse number pattern information -/// -/// This struct holds the extracted information from a recognized parse_number pattern. -#[derive(Debug, Clone, PartialEq)] -pub struct ParseNumberInfo { - /// Carrier variable name (e.g., "i") - pub carrier_name: String, - /// Constant step increment (e.g., 1 for `i = i + 1`) - pub delta: i64, - /// Body statements before the break check (may be empty) - pub body_stmts: Vec, - /// Rest statements after break check (usually includes result append and carrier update) - pub rest_stmts: Vec, -} - -/// Detect parse_number / digit collection pattern in loop body -/// -/// Phase 143-P0: Pattern with break in THEN clause (opposite of skip_whitespace) -/// -/// Pattern structure: -/// ``` -/// loop(cond) { -/// // ... optional body statements (ch, digit_pos computation) -/// if invalid_cond { -/// break -/// } -/// // ... rest statements (result append, carrier update) -/// carrier = carrier + const -/// } -/// ``` -/// -/// Recognized pattern: -/// - parse_number: `i < len`, `if digit_pos < 0 { break }`, `i = i + 1` -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `Some(ParseNumberInfo)` if the pattern matches, `None` otherwise -/// -/// # Notes -/// -/// This is complementary to skip_whitespace pattern (which has break in ELSE clause). -/// Used by loop_canonicalizer (Phase 143) for digit collection patterns. -pub fn detect_parse_number_pattern(body: &[ASTNode]) -> Option { - if body.is_empty() { - return None; - } - - // Find the if statement with break in THEN clause - let mut if_idx = None; - for (i, stmt) in body.iter().enumerate() { - if let ASTNode::If { - then_body, - else_body, - .. - } = stmt - { - // Check if then_body contains break and else_body is None - if else_body.is_none() - && then_body.len() == 1 - && matches!(then_body[0], ASTNode::Break { .. }) - { - if_idx = Some(i); - break; - } - } - } - - let if_idx = if_idx?; - - // Extract body statements before the if - let body_stmts = body[..if_idx].to_vec(); - - // Extract rest statements after the if (should include carrier update) - let rest_stmts = body[if_idx + 1..].to_vec(); - - if rest_stmts.is_empty() { - return None; - } - - // Find carrier update in rest_stmts (last statement should be carrier = carrier + const) - let last_stmt = &rest_stmts[rest_stmts.len() - 1]; - - let (carrier_name, delta) = match last_stmt { - ASTNode::Assignment { target, value, .. } => { - // Extract target variable name - let target_name = match target.as_ref() { - ASTNode::Variable { name, .. } => name.clone(), - _ => return None, - }; - - // Value must be: target (+|-) const - match value.as_ref() { - ASTNode::BinaryOp { - operator, - left, - right, - .. - } => { - // Accept both Add (+1) and Subtract (-1) - let op_multiplier = match operator { - BinaryOperator::Add => 1, - BinaryOperator::Subtract => -1, - _ => return None, - }; - - // Left must be same variable - let left_name = match left.as_ref() { - ASTNode::Variable { name, .. } => name, - _ => return None, - }; - - if left_name != &target_name { - return None; - } - - // Right must be integer literal - let const_val = match right.as_ref() { - ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } => *n, - _ => return None, - }; - - // Calculate delta with sign - let delta = const_val * op_multiplier; - - (target_name, delta) - } - _ => return None, - } - } - _ => return None, - }; - - Some(ParseNumberInfo { - carrier_name, - delta, - body_stmts, - rest_stmts, - }) -} - -// ============================================================================ -// Phase 143-P1/P2: Parse String/Array Pattern Detection -// ============================================================================ - -/// Parse string/array pattern information -/// -/// This struct holds the extracted information from a recognized parse_string or parse_array pattern. -/// Both patterns share the same structure: continue + return exits with carrier updates. -#[derive(Debug, Clone, PartialEq)] -pub struct ParseStringInfo { - /// Carrier variable name (e.g., "p") - pub carrier_name: String, - /// Base constant step increment (e.g., 1 for `p = p + 1`) - pub delta: i64, - /// Body statements before the return/continue checks - pub body_stmts: Vec, -} - -/// Detect parse_string or parse_array pattern in loop body -/// -/// Phase 143-P1/P2: Pattern with both continue (escape/separator handling) AND return (stop condition) -/// -/// Pattern structure (parse_string example): -/// ``` -/// loop(p < len) { -/// local ch = s.substring(p, p + 1) -/// -/// // Check for closing quote (return) -/// if ch == "\"" { -/// return result -/// } -/// -/// // Check for escape sequence (continue after processing) -/// if ch == "\\" { -/// result = result + ch -/// p = p + 1 -/// if p < len { -/// result = result + s.substring(p, p + 1) -/// p = p + 1 -/// continue -/// } -/// } -/// -/// // Regular character -/// result = result + ch -/// p = p + 1 -/// } -/// ``` -/// -/// Pattern structure (parse_array example): -/// ``` -/// loop(p < len) { -/// local ch = s.substring(p, p + 1) -/// -/// // Check for array end (return) -/// if ch == "]" { -/// return result -/// } -/// -/// // Check for separator (continue after processing) -/// if ch == "," { -/// arr.push(elem) -/// elem = "" -/// p = p + 1 -/// continue -/// } -/// -/// // Accumulate element -/// elem = elem + ch -/// p = p + 1 -/// } -/// ``` -/// -/// Recognized characteristics: -/// - Has return statement (early exit on stop condition: quote for string, ']' for array) -/// - Has continue statement (skip after separator: escape for string, ',' for array) -/// - Variable step update (p++ normally, but p+=2 on escape for string) -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `Some(ParseStringInfo)` if the pattern matches, `None` otherwise -/// -/// # Notes -/// -/// This detector handles both parse_string and parse_array patterns as they share -/// the same structural characteristics: -/// - Multiple exit types (return AND continue) -/// - Variable step increment (conditional on separator/escape) -/// - Nested control flow (separator/escape has nested if inside) -pub fn detect_parse_string_pattern(body: &[ASTNode]) -> Option { - if body.is_empty() { - return None; - } - - // We need to find: - // 1. An if statement with return in then_body - // 2. An if statement with continue in then_body (nested inside) - // 3. Carrier updates (normal and escape-case) - - let mut has_return = false; - let mut has_continue = false; - let mut carrier_name = None; - let mut delta = None; - - // Scan for return statement - for stmt in body { - if let ASTNode::If { then_body, .. } = stmt { - if then_body - .iter() - .any(|s| matches!(s, ASTNode::Return { .. })) - { - has_return = true; - break; - } - } - } - - if !has_return { - return None; - } - - // Scan for continue statement and carrier update (with recursive check for nested continue) - for stmt in body { - if let ASTNode::If { then_body, .. } = stmt { - // Check for continue in then_body (including nested) - if then_body.iter().any(|s| has_continue_node(s)) { - has_continue = true; - } - - // Extract carrier update from then_body - for s in then_body { - if let ASTNode::Assignment { target, value, .. } = s { - if let ASTNode::Variable { name, .. } = target.as_ref() { - if let ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left, - right, - .. - } = value.as_ref() - { - if let ASTNode::Variable { - name: left_name, .. - } = left.as_ref() - { - if left_name == name { - if let ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } = right.as_ref() - { - carrier_name = Some(name.clone()); - delta = Some(*n); - } - } - } - } - } - } - } - } - - // Also check for carrier update in main body - if let ASTNode::Assignment { target, value, .. } = stmt { - if let ASTNode::Variable { name, .. } = target.as_ref() { - if let ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left, - right, - .. - } = value.as_ref() - { - if let ASTNode::Variable { - name: left_name, .. - } = left.as_ref() - { - if left_name == name { - if let ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } = right.as_ref() - { - if carrier_name.is_none() { - carrier_name = Some(name.clone()); - delta = Some(*n); - } - } - } - } - } - } - } - } - - if !has_return || !has_continue { - return None; - } - - let carrier_name = carrier_name?; - let delta = delta?; - - // Extract body statements (for now, just the first statement which should be ch assignment) - let body_stmts = if !body.is_empty() { - vec![body[0].clone()] - } else { - vec![] - }; - - Some(ParseStringInfo { - carrier_name, - delta, - body_stmts, - }) -} - -// ============================================================================ -// Phase 140-P4-A: Skip Whitespace Pattern Detection (SSOT) -// ============================================================================ - -/// Skip whitespace pattern information -/// -/// This struct holds the extracted information from a recognized skip_whitespace pattern. -#[derive(Debug, Clone, PartialEq)] -pub struct SkipWhitespaceInfo { - /// Carrier variable name (e.g., "p") - pub carrier_name: String, - /// Constant step increment (e.g., 1 for `p = p + 1`) - pub delta: i64, - /// Body statements before the if-else (may be empty) - pub body_stmts: Vec, -} - -/// Detect skip_whitespace / trim leading/trailing pattern in loop body -/// -/// Phase 142 P0: Generalized to handle both +1 and -1 patterns -/// -/// Pattern structure: -/// ``` -/// loop(cond) { -/// // ... optional body statements (Body) -/// if check_cond { -/// carrier = carrier (+|-) const -/// } else { -/// break -/// } -/// } -/// ``` -/// -/// Recognized patterns: -/// - skip_whitespace: `p < len`, `p = p + 1` -/// - trim_leading: `start < end`, `start = start + 1` -/// - trim_trailing: `end > start`, `end = end - 1` -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `Some(SkipWhitespaceInfo)` if the pattern matches, `None` otherwise -/// -/// # Notes -/// -/// This is the SSOT for skip_whitespace/trim pattern detection. -/// Used by both loop_canonicalizer (Phase 137) and future pattern analyzers. -pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option { - if body.is_empty() { - return None; - } - - // Last statement must be if-else with break - let last_stmt = &body[body.len() - 1]; - - let (then_body, else_body) = match last_stmt { - ASTNode::If { - then_body, - else_body: Some(else_body), - .. - } => (then_body, else_body), - _ => return None, - }; - - // Then branch must be single assignment: carrier = carrier (+|-) const - if then_body.len() != 1 { - return None; - } - - let (carrier_name, delta) = match &then_body[0] { - ASTNode::Assignment { target, value, .. } => { - // Extract target variable name - let target_name = match target.as_ref() { - ASTNode::Variable { name, .. } => name.clone(), - _ => return None, - }; - - // Value must be: target (+|-) const - match value.as_ref() { - ASTNode::BinaryOp { - operator, - left, - right, - .. - } => { - // Phase 142 P0: Accept both Add (+1) and Subtract (-1) - let op_multiplier = match operator { - BinaryOperator::Add => 1, - BinaryOperator::Subtract => -1, - _ => return None, - }; - - // Left must be same variable - let left_name = match left.as_ref() { - ASTNode::Variable { name, .. } => name, - _ => return None, - }; - - if left_name != &target_name { - return None; - } - - // Right must be integer literal - let const_val = match right.as_ref() { - ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } => *n, - _ => return None, - }; - - // Calculate delta with sign (e.g., +1 or -1) - let delta = const_val * op_multiplier; - - (target_name, delta) - } - _ => return None, - } - } - _ => return None, - }; - - // Else branch must be single break - if else_body.len() != 1 { - return None; - } - - match &else_body[0] { - ASTNode::Break { .. } => { - // Success! Extract body statements (all except last if) - let body_stmts = body[..body.len() - 1].to_vec(); - Some(SkipWhitespaceInfo { - carrier_name, - delta, - body_stmts, - }) - } - _ => None, - } -} - -// ============================================================================ -// Phase 104: loop(true) + break-only digits (read_digits_from) -// ============================================================================ - -/// loop(true) + break-only digits pattern information -#[derive(Debug, Clone, PartialEq)] -pub struct ReadDigitsLoopTrueInfo { - /// Counter variable name (e.g., "i") - pub carrier_name: String, - /// Constant step increment (currently only supports +1) - pub delta: i64, - /// Body statements before the digit-check if (may include `ch = substring(...)`, `if ch==\"\" { break }`, etc.) - pub body_stmts: Vec, -} - -/// Detect read_digits_from-like pattern in loop body (loop(true) expected at callsite) -/// -/// Recognized minimal shape (JsonCursorBox/MiniJsonLoader): -/// ```text -/// loop(true) { -/// local ch = s.substring(i, i+1) -/// if ch == "" { break } -/// if is_digit(ch) { out = out + ch; i = i + 1 } else { break } -/// } -/// ``` -/// -/// Contract (Phase 104 minimal): -/// - Last statement is `if ... { ... } else { break }` -/// - Then branch contains an update `i = i + 1` -/// - Then branch may contain other updates (e.g., `out = out + ch`) -pub fn detect_read_digits_loop_true_pattern(body: &[ASTNode]) -> Option { - if body.is_empty() { - return None; - } - - // Last statement must be if-else with break - let last_stmt = &body[body.len() - 1]; - let (then_body, else_body) = match last_stmt { - ASTNode::If { - then_body, - else_body: Some(else_body), - .. - } => (then_body, else_body), - _ => return None, - }; - - // Else branch must be single break - if else_body.len() != 1 || !matches!(else_body[0], ASTNode::Break { .. }) { - return None; - } - - // Then branch must include `i = i + 1` (allow other statements too) - let mut carrier_name: Option = None; - let mut delta: Option = None; - for stmt in then_body { - let (name, d) = match stmt { - ASTNode::Assignment { target, value, .. } => { - let target_name = match target.as_ref() { - ASTNode::Variable { name, .. } => name.clone(), - _ => continue, - }; - match value.as_ref() { - ASTNode::BinaryOp { - operator: BinaryOperator::Add, - left, - right, - .. - } => { - let left_name = match left.as_ref() { - ASTNode::Variable { name, .. } => name, - _ => continue, - }; - if left_name != &target_name { - continue; - } - let const_val = match right.as_ref() { - ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } => *n, - _ => continue, - }; - (target_name, const_val) - } - _ => continue, - } - } - _ => continue, - }; - - // Phase 104 minimal: only accept +1 step - if d == 1 { - carrier_name = Some(name); - delta = Some(1); - break; - } - } - - let carrier_name = carrier_name?; - let delta = delta?; - - let body_stmts = body[..body.len() - 1].to_vec(); - Some(ReadDigitsLoopTrueInfo { - carrier_name, - delta, - body_stmts, - }) -} - -// ============================================================================ -// Phase 91 P5b (Escape Sequence Handling) Pattern -// ============================================================================ -// Moved to escape_pattern_recognizer.rs for better modularity -pub use super::escape_pattern_recognizer::detect_escape_skip_pattern; diff --git a/src/mir/builder/control_flow/joinir/patterns/mod.rs b/src/mir/builder/control_flow/joinir/patterns/mod.rs index ee9c4ea0..830a5b21 100644 --- a/src/mir/builder/control_flow/joinir/patterns/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/mod.rs @@ -54,6 +54,7 @@ pub(in crate::mir::builder) mod common; // Phase 255 P2: Common AST helpers pub(in crate::mir::builder) mod extractors; // Phase 282 P3: Common extraction interfaces +pub(in crate::mir::builder) mod pattern_recognizers; // Phase 287 P1: Modularized pattern recognizers pub(in crate::mir::builder) mod ast_feature_extractor; pub(in crate::mir::builder) mod policies; // Phase 93/94: Pattern routing policies (future expansion) pub(in crate::mir::builder) mod body_local_policy; // Phase 92 P3: promotion vs slot routing diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/carrier_count.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/carrier_count.rs new file mode 100644 index 00000000..66fa4417 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/carrier_count.rs @@ -0,0 +1,61 @@ +//! Carrier Count Estimation +//! +//! Phase 287 P1: Extracted from ast_feature_extractor.rs +//! +//! This module provides heuristic-based carrier variable counting. + +use crate::ast::ASTNode; + +/// Count carrier variables (variables assigned in loop body) +/// +/// This is a heuristic: counts assignment statements as a proxy for carriers. +/// A more precise implementation would track which specific variables are assigned. +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// Count of distinct carrier variables (0 or 1 in current implementation) +/// +/// # Notes +/// +/// Current implementation returns 0 or 1 (at least one assignment present). +/// Future enhancement: track individual variable assignments for precise carrier count. +pub(crate) fn count_carriers_in_body(body: &[ASTNode]) -> usize { + let mut count = 0; + for node in body { + match node { + ASTNode::Assignment { .. } => count += 1, + ASTNode::If { + then_body, + else_body, + .. + } => { + count += count_carriers_in_body(then_body); + if let Some(else_body) = else_body { + count += count_carriers_in_body(else_body); + } + } + _ => {} + } + } + // Return at least 1 if we have assignments, otherwise 0 + if count > 0 { + 1 + } else { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_body() { + let empty: Vec = vec![]; + assert_eq!(count_carriers_in_body(&empty), 0); + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/continue_break.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/continue_break.rs new file mode 100644 index 00000000..4cfef591 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/continue_break.rs @@ -0,0 +1,147 @@ +//! Continue/Break/Return Detection +//! +//! Phase 287 P1: Extracted from ast_feature_extractor.rs +//! +//! This module provides simple recursive detection of continue, break, and return statements +//! within loop bodies and nested structures. + +use crate::ast::ASTNode; + +/// Detect if a loop body contains continue statements +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `true` if at least one continue statement is found in the body or nested structures +/// +/// # Notes +/// +/// This is a simple recursive scan that doesn't handle nested loops perfectly, +/// but is sufficient for initial pattern detection. +pub(crate) fn detect_continue_in_body(body: &[ASTNode]) -> bool { + for stmt in body { + if has_continue_node(stmt) { + return true; + } + } + false +} + +/// Detect if a loop body contains break statements +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `true` if at least one break statement is found in the body or nested structures +pub(crate) fn detect_break_in_body(body: &[ASTNode]) -> bool { + for stmt in body { + if has_break_node(stmt) { + return true; + } + } + false +} + +/// Detect if a loop body contains return statements +/// +/// This is used for dev-only parity checks with structure SSOT (StepTree). +pub(crate) fn detect_return_in_body(body: &[ASTNode]) -> bool { + for stmt in body { + if has_return_node(stmt) { + return true; + } + } + false +} + +/// Recursive helper to check if AST node contains continue +pub(super) fn has_continue_node(node: &ASTNode) -> bool { + match node { + ASTNode::Continue { .. } => true, + ASTNode::If { + then_body, + else_body, + .. + } => { + then_body.iter().any(has_continue_node) + || else_body + .as_ref() + .map_or(false, |e| e.iter().any(has_continue_node)) + } + ASTNode::Loop { body, .. } => body.iter().any(has_continue_node), + _ => false, + } +} + +/// Recursive helper to check if AST node contains break +fn has_break_node(node: &ASTNode) -> bool { + match node { + ASTNode::Break { .. } => true, + ASTNode::If { + then_body, + else_body, + .. + } => { + then_body.iter().any(has_break_node) + || else_body + .as_ref() + .map_or(false, |e| e.iter().any(has_break_node)) + } + ASTNode::Loop { body, .. } => body.iter().any(has_break_node), + _ => false, + } +} + +/// Recursive helper to check if AST node contains return +fn has_return_node(node: &ASTNode) -> bool { + match node { + ASTNode::Return { .. } => true, + ASTNode::If { + then_body, + else_body, + .. + } => { + then_body.iter().any(has_return_node) + || else_body + .as_ref() + .map_or(false, |e| e.iter().any(has_return_node)) + } + ASTNode::Loop { body, .. } => body.iter().any(has_return_node), + ASTNode::ScopeBox { body, .. } => body.iter().any(has_return_node), + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_detect_continue_simple() { + let continue_node = ASTNode::Continue { + span: crate::ast::Span::unknown(), + }; + assert!(has_continue_node(&continue_node)); + } + + #[test] + fn test_detect_break_simple() { + let break_node = ASTNode::Break { + span: crate::ast::Span::unknown(), + }; + assert!(has_break_node(&break_node)); + } + + #[test] + fn test_empty_body() { + let empty: Vec = vec![]; + assert!(!detect_continue_in_body(&empty)); + assert!(!detect_break_in_body(&empty)); + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/if_else_phi.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/if_else_phi.rs new file mode 100644 index 00000000..4efb3d26 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/if_else_phi.rs @@ -0,0 +1,73 @@ +//! If-Else PHI Pattern Detection +//! +//! Phase 287 P1: Extracted from ast_feature_extractor.rs +//! +//! This module detects if-else statements with potential PHI patterns. + +use crate::ast::ASTNode; + +/// Detect if-else statements with potential PHI pattern +/// +/// Looks for if-else statements where both branches contain assignments. +/// This is a heuristic indicating a potential PHI merge point. +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `true` if at least one if-else statement with assignments in both branches is found +/// +/// # Phase 264 P0: Conservative Implementation +/// +/// Previously returned true if both if/else branches had assignments. +/// This was too broad - it caught simple conditional assignments like: +/// `if x then seg = "A" else seg = "B"` +/// +/// Pattern3 is designed for if-sum patterns with arithmetic accumulation: +/// `sum = sum + (if x then 1 else 0)` +/// +/// Phase 264 P0: Return false to prevent misclassification. +/// Effect: Loops with conditional assignment fall through to Pattern1. +/// +/// Phase 264 P1: TODO - Implement accurate if-sum signature detection. +pub(crate) fn detect_if_else_phi_in_body(body: &[ASTNode]) -> bool { + // Phase 282 P5: Proper if-else PHI detection (re-enabled with ExtractionBased safety) + // + // This function provides initial classification for Pattern3IfPhi. + // The actual validation is done by extractors::pattern3::extract_loop_with_if_phi_parts() + // which performs deep checks (PHI assignments, no control flow, etc.) + // + // Here we just check: Does the loop body contain an if-else statement? + // This allows Pattern3 to be attempted, and extraction will validate. + + for stmt in body { + if matches!(stmt, ASTNode::If { else_body: Some(_), .. }) { + return true; // Found if-else + } + } + false // No if-else found +} + +/// Phase 212.5: Detect ANY if statement in loop body (structural detection) +/// +/// This function detects any if statement, regardless of whether it has an else branch. +/// Used for routing single-carrier if-update patterns to Pattern 3. +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `true` if at least one if statement is found (with or without else) +#[allow(dead_code)] +fn detect_if_in_body(body: &[ASTNode]) -> bool { + for node in body { + if let ASTNode::If { .. } = node { + return true; + } + } + false +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/infinite_loop.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/infinite_loop.rs new file mode 100644 index 00000000..b108d1dd --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/infinite_loop.rs @@ -0,0 +1,26 @@ +//! Infinite Loop Detection +//! +//! Phase 287 P1: Extracted from ast_feature_extractor.rs +//! +//! This module detects infinite loop patterns (condition == true). + +use crate::ast::ASTNode; + +/// Phase 131-11: Detect infinite loop (condition == Literal(Bool(true))) +/// +/// # Arguments +/// +/// * `condition` - Loop condition AST node +/// +/// # Returns +/// +/// `true` if condition is a boolean literal with value true +pub(crate) fn detect_infinite_loop(condition: &ASTNode) -> bool { + matches!( + condition, + ASTNode::Literal { + value: crate::ast::LiteralValue::Bool(true), + .. + } + ) +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/mod.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/mod.rs new file mode 100644 index 00000000..3f7c3098 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/mod.rs @@ -0,0 +1,20 @@ +//! Pattern Recognizers Module +//! +//! Phase 287 P1: Modularization of AST pattern detection functions. +//! +//! This module contains specialized recognizers for different loop patterns: +//! - continue/break/return detection +//! - infinite loop detection +//! - if-else phi detection +//! - carrier count estimation +//! - parse_number/string/whitespace patterns +//! +//! Each recognizer is responsible for a single "question" about the AST structure. + +pub mod continue_break; +pub mod infinite_loop; +pub mod if_else_phi; +pub mod carrier_count; +pub mod parse_number; +pub mod parse_string; +pub mod skip_whitespace; diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/parse_number.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/parse_number.rs new file mode 100644 index 00000000..2830d553 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/parse_number.rs @@ -0,0 +1,259 @@ +//! Parse Number/Digit Pattern Detection +//! +//! Phase 287 P1: Extracted from ast_feature_extractor.rs +//! +//! This module detects parse_number and digit collection patterns. + +use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; + +/// Parse number pattern information +/// +/// This struct holds the extracted information from a recognized parse_number pattern. +#[derive(Debug, Clone, PartialEq)] +pub struct ParseNumberInfo { + /// Carrier variable name (e.g., "i") + pub carrier_name: String, + /// Constant step increment (e.g., 1 for `i = i + 1`) + pub delta: i64, + /// Body statements before the break check (may be empty) + pub body_stmts: Vec, + /// Rest statements after break check (usually includes result append and carrier update) + pub rest_stmts: Vec, +} + +/// Detect parse_number / digit collection pattern in loop body +/// +/// Phase 143-P0: Pattern with break in THEN clause (opposite of skip_whitespace) +/// +/// Pattern structure: +/// ``` +/// loop(cond) { +/// // ... optional body statements (ch, digit_pos computation) +/// if invalid_cond { +/// break +/// } +/// // ... rest statements (result append, carrier update) +/// carrier = carrier + const +/// } +/// ``` +/// +/// Recognized pattern: +/// - parse_number: `i < len`, `if digit_pos < 0 { break }`, `i = i + 1` +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `Some(ParseNumberInfo)` if the pattern matches, `None` otherwise +/// +/// # Notes +/// +/// This is complementary to skip_whitespace pattern (which has break in ELSE clause). +/// Used by loop_canonicalizer (Phase 143) for digit collection patterns. +pub fn detect_parse_number_pattern(body: &[ASTNode]) -> Option { + if body.is_empty() { + return None; + } + + // Find the if statement with break in THEN clause + let mut if_idx = None; + for (i, stmt) in body.iter().enumerate() { + if let ASTNode::If { + then_body, + else_body, + .. + } = stmt + { + // Check if then_body contains break and else_body is None + if else_body.is_none() + && then_body.len() == 1 + && matches!(then_body[0], ASTNode::Break { .. }) + { + if_idx = Some(i); + break; + } + } + } + + let if_idx = if_idx?; + + // Extract body statements before the if + let body_stmts = body[..if_idx].to_vec(); + + // Extract rest statements after the if (should include carrier update) + let rest_stmts = body[if_idx + 1..].to_vec(); + + if rest_stmts.is_empty() { + return None; + } + + // Find carrier update in rest_stmts (last statement should be carrier = carrier + const) + let last_stmt = &rest_stmts[rest_stmts.len() - 1]; + + let (carrier_name, delta) = match last_stmt { + ASTNode::Assignment { target, value, .. } => { + // Extract target variable name + let target_name = match target.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + + // Value must be: target (+|-) const + match value.as_ref() { + ASTNode::BinaryOp { + operator, + left, + right, + .. + } => { + // Accept both Add (+1) and Subtract (-1) + let op_multiplier = match operator { + BinaryOperator::Add => 1, + BinaryOperator::Subtract => -1, + _ => return None, + }; + + // Left must be same variable + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => return None, + }; + + if left_name != &target_name { + return None; + } + + // Right must be integer literal + let const_val = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => return None, + }; + + // Calculate delta with sign + let delta = const_val * op_multiplier; + + (target_name, delta) + } + _ => return None, + } + } + _ => return None, + }; + + Some(ParseNumberInfo { + carrier_name, + delta, + body_stmts, + rest_stmts, + }) +} + +/// loop(true) + break-only digits pattern information +#[derive(Debug, Clone, PartialEq)] +pub struct ReadDigitsLoopTrueInfo { + /// Counter variable name (e.g., "i") + pub carrier_name: String, + /// Constant step increment (currently only supports +1) + pub delta: i64, + /// Body statements before the digit-check if (may include `ch = substring(...)`, `if ch==\"\" { break }`, etc.) + pub body_stmts: Vec, +} + +/// Detect read_digits_from-like pattern in loop body (loop(true) expected at callsite) +/// +/// Recognized minimal shape (JsonCursorBox/MiniJsonLoader): +/// ```text +/// loop(true) { +/// local ch = s.substring(i, i+1) +/// if ch == "" { break } +/// if is_digit(ch) { out = out + ch; i = i + 1 } else { break } +/// } +/// ``` +/// +/// Contract (Phase 104 minimal): +/// - Last statement is `if ... { ... } else { break }` +/// - Then branch contains an update `i = i + 1` +/// - Then branch may contain other updates (e.g., `out = out + ch`) +pub fn detect_read_digits_loop_true_pattern(body: &[ASTNode]) -> Option { + if body.is_empty() { + return None; + } + + // Last statement must be if-else with break + let last_stmt = &body[body.len() - 1]; + let (then_body, else_body) = match last_stmt { + ASTNode::If { + then_body, + else_body: Some(else_body), + .. + } => (then_body, else_body), + _ => return None, + }; + + // Else branch must be single break + if else_body.len() != 1 || !matches!(else_body[0], ASTNode::Break { .. }) { + return None; + } + + // Then branch must include `i = i + 1` (allow other statements too) + let mut carrier_name: Option = None; + let mut delta: Option = None; + for stmt in then_body { + let (name, d) = match stmt { + ASTNode::Assignment { target, value, .. } => { + let target_name = match target.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => continue, + }; + match value.as_ref() { + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } => { + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => continue, + }; + if left_name != &target_name { + continue; + } + let const_val = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => continue, + }; + (target_name, const_val) + } + _ => continue, + } + } + _ => continue, + }; + + // Phase 104 minimal: only accept +1 step + if d == 1 { + carrier_name = Some(name); + delta = Some(1); + break; + } + } + + let carrier_name = carrier_name?; + let delta = delta?; + + let body_stmts = body[..body.len() - 1].to_vec(); + Some(ReadDigitsLoopTrueInfo { + carrier_name, + delta, + body_stmts, + }) +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/parse_string.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/parse_string.rs new file mode 100644 index 00000000..f1959ce7 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/parse_string.rs @@ -0,0 +1,393 @@ +//! Parse String/Array Pattern Detection +//! +//! Phase 287 P1: Extracted from ast_feature_extractor.rs +//! +//! This module detects parse_string and parse_array patterns with continue + return exits. + +use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; + +// Re-export has_continue_node from continue_break module +use super::continue_break::has_continue_node; + +/// Parse string/array pattern information +/// +/// This struct holds the extracted information from a recognized parse_string or parse_array pattern. +/// Both patterns share the same structure: continue + return exits with carrier updates. +#[derive(Debug, Clone, PartialEq)] +pub struct ParseStringInfo { + /// Carrier variable name (e.g., "p") + pub carrier_name: String, + /// Base constant step increment (e.g., 1 for `p = p + 1`) + pub delta: i64, + /// Body statements before the return/continue checks + pub body_stmts: Vec, +} + +/// Detect parse_string or parse_array pattern in loop body +/// +/// Phase 143-P1/P2: Pattern with both continue (escape/separator handling) AND return (stop condition) +/// +/// Pattern structure (parse_string example): +/// ``` +/// loop(p < len) { +/// local ch = s.substring(p, p + 1) +/// +/// // Check for closing quote (return) +/// if ch == "\"" { +/// return result +/// } +/// +/// // Check for escape sequence (continue after processing) +/// if ch == "\\" { +/// result = result + ch +/// p = p + 1 +/// if p < len { +/// result = result + s.substring(p, p + 1) +/// p = p + 1 +/// continue +/// } +/// } +/// +/// // Regular character +/// result = result + ch +/// p = p + 1 +/// } +/// ``` +/// +/// Pattern structure (parse_array example): +/// ``` +/// loop(p < len) { +/// local ch = s.substring(p, p + 1) +/// +/// // Check for array end (return) +/// if ch == "]" { +/// return result +/// } +/// +/// // Check for separator (continue after processing) +/// if ch == "," { +/// arr.push(elem) +/// elem = "" +/// p = p + 1 +/// continue +/// } +/// +/// // Accumulate element +/// elem = elem + ch +/// p = p + 1 +/// } +/// ``` +/// +/// Recognized characteristics: +/// - Has return statement (early exit on stop condition: quote for string, ']' for array) +/// - Has continue statement (skip after separator: escape for string, ',' for array) +/// - Variable step update (p++ normally, but p+=2 on escape for string) +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `Some(ParseStringInfo)` if the pattern matches, `None` otherwise +/// +/// # Notes +/// +/// This detector handles both parse_string and parse_array patterns as they share +/// the same structural characteristics: +/// - Multiple exit types (return AND continue) +/// - Variable step increment (conditional on separator/escape) +/// - Nested control flow (separator/escape has nested if inside) +pub fn detect_parse_string_pattern(body: &[ASTNode]) -> Option { + if body.is_empty() { + return None; + } + + // We need to find: + // 1. An if statement with return in then_body + // 2. An if statement with continue in then_body (nested inside) + // 3. Carrier updates (normal and escape-case) + + let mut has_return = false; + let mut has_continue = false; + let mut carrier_name = None; + let mut delta = None; + + // Scan for return statement + for stmt in body { + if let ASTNode::If { then_body, .. } = stmt { + if then_body + .iter() + .any(|s| matches!(s, ASTNode::Return { .. })) + { + has_return = true; + break; + } + } + } + + if !has_return { + return None; + } + + // Scan for continue statement and carrier update (with recursive check for nested continue) + for stmt in body { + if let ASTNode::If { then_body, .. } = stmt { + // Check for continue in then_body (including nested) + if then_body.iter().any(|s| has_continue_node(s)) { + has_continue = true; + } + + // Extract carrier update from then_body + for s in then_body { + if let ASTNode::Assignment { target, value, .. } = s { + if let ASTNode::Variable { name, .. } = target.as_ref() { + if let ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } = value.as_ref() + { + if let ASTNode::Variable { + name: left_name, .. + } = left.as_ref() + { + if left_name == name { + if let ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } = right.as_ref() + { + carrier_name = Some(name.clone()); + delta = Some(*n); + } + } + } + } + } + } + } + } + + // Also check for carrier update in main body + if let ASTNode::Assignment { target, value, .. } = stmt { + if let ASTNode::Variable { name, .. } = target.as_ref() { + if let ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } = value.as_ref() + { + if let ASTNode::Variable { + name: left_name, .. + } = left.as_ref() + { + if left_name == name { + if let ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } = right.as_ref() + { + if carrier_name.is_none() { + carrier_name = Some(name.clone()); + delta = Some(*n); + } + } + } + } + } + } + } + } + + if !has_return || !has_continue { + return None; + } + + let carrier_name = carrier_name?; + let delta = delta?; + + // Extract body statements (for now, just the first statement which should be ch assignment) + let body_stmts = if !body.is_empty() { + vec![body[0].clone()] + } else { + vec![] + }; + + Some(ParseStringInfo { + carrier_name, + delta, + body_stmts, + }) +} + +/// Continue pattern information +/// +/// This struct holds the extracted information from a recognized continue pattern. +#[derive(Debug, Clone, PartialEq)] +pub struct ContinuePatternInfo { + /// Carrier variable name (e.g., "i") + pub carrier_name: String, + /// Constant step increment (e.g., 1 for `i = i + 1`) + pub delta: i64, + /// Body statements before the continue check (may be empty) + pub body_stmts: Vec, + /// Body statements after the continue check (usually includes carrier update) + pub rest_stmts: Vec, +} + +/// Detect continue pattern in loop body +/// +/// Pattern structure: +/// ``` +/// loop(cond) { +/// // ... optional body statements (Body) +/// if skip_cond { +/// carrier = carrier + const // Optional update before continue +/// continue +/// } +/// // ... rest of body statements (Rest) +/// carrier = carrier + const // Carrier update +/// } +/// ``` +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `Some(ContinuePatternInfo)` if the pattern matches, `None` otherwise +pub fn detect_continue_pattern(body: &[ASTNode]) -> Option { + if body.is_empty() { + return None; + } + + // Find the if statement with continue + let mut if_idx = None; + for (i, stmt) in body.iter().enumerate() { + if let ASTNode::If { then_body, .. } = stmt { + // Check if then_body contains continue + if then_body + .iter() + .any(|s| matches!(s, ASTNode::Continue { .. })) + { + if_idx = Some(i); + break; + } + } + } + + let if_idx = if_idx?; + + // Extract body statements before the if + let body_stmts = body[..if_idx].to_vec(); + + // Extract the if statement + let if_stmt = &body[if_idx]; + + // The if must have continue in then branch + let then_body = match if_stmt { + ASTNode::If { + then_body, + else_body, + .. + } => { + // For simple continue pattern, else_body should be None + if else_body.is_some() { + return None; + } + then_body + } + _ => return None, + }; + + // Check if then_body contains carrier update before continue + // For now, we'll look for the pattern after the if statement + + // Extract rest statements after the if + let rest_stmts = body[if_idx + 1..].to_vec(); + + // Find carrier update in rest_stmts (last statement should be carrier = carrier +/- const) + if rest_stmts.is_empty() { + return None; + } + + let last_stmt = &rest_stmts[rest_stmts.len() - 1]; + + let (carrier_name, delta) = match last_stmt { + ASTNode::Assignment { target, value, .. } => { + // Extract target variable name + let target_name = match target.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + + // Value must be: target (+|-) const + match value.as_ref() { + ASTNode::BinaryOp { + operator, + left, + right, + .. + } => { + // Accept both Add (+1) and Subtract (-1) + let op_multiplier = match operator { + BinaryOperator::Add => 1, + BinaryOperator::Subtract => -1, + _ => return None, + }; + + // Left must be same variable + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => return None, + }; + + if left_name != &target_name { + return None; + } + + // Right must be integer literal + let const_val = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => return None, + }; + + // Calculate delta with sign + let delta = const_val * op_multiplier; + + (target_name, delta) + } + _ => return None, + } + } + _ => return None, + }; + + // Check if then_body has carrier update before continue + // If so, we need to validate it matches + for stmt in then_body { + if let ASTNode::Assignment { target, .. } = stmt { + if let ASTNode::Variable { name, .. } = target.as_ref() { + if name == &carrier_name { + // There's a carrier update before continue + // For now, we'll just check it exists + // Could validate it matches the pattern later + } + } + } + } + + Some(ContinuePatternInfo { + carrier_name, + delta, + body_stmts, + rest_stmts, + }) +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/skip_whitespace.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/skip_whitespace.rs new file mode 100644 index 00000000..ca84cbe5 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_recognizers/skip_whitespace.rs @@ -0,0 +1,147 @@ +//! Skip Whitespace Pattern Detection +//! +//! Phase 287 P1: Extracted from ast_feature_extractor.rs +//! +//! This module detects skip_whitespace and trim patterns. + +use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; + +/// Skip whitespace pattern information +/// +/// This struct holds the extracted information from a recognized skip_whitespace pattern. +#[derive(Debug, Clone, PartialEq)] +pub struct SkipWhitespaceInfo { + /// Carrier variable name (e.g., "p") + pub carrier_name: String, + /// Constant step increment (e.g., 1 for `p = p + 1`) + pub delta: i64, + /// Body statements before the if-else (may be empty) + pub body_stmts: Vec, +} + +/// Detect skip_whitespace / trim leading/trailing pattern in loop body +/// +/// Phase 142 P0: Generalized to handle both +1 and -1 patterns +/// +/// Pattern structure: +/// ``` +/// loop(cond) { +/// // ... optional body statements (Body) +/// if check_cond { +/// carrier = carrier (+|-) const +/// } else { +/// break +/// } +/// } +/// ``` +/// +/// Recognized patterns: +/// - skip_whitespace: `p < len`, `p = p + 1` +/// - trim_leading: `start < end`, `start = start + 1` +/// - trim_trailing: `end > start`, `end = end - 1` +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `Some(SkipWhitespaceInfo)` if the pattern matches, `None` otherwise +/// +/// # Notes +/// +/// This is the SSOT for skip_whitespace/trim pattern detection. +/// Used by both loop_canonicalizer (Phase 137) and future pattern analyzers. +pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option { + if body.is_empty() { + return None; + } + + // Last statement must be if-else with break + let last_stmt = &body[body.len() - 1]; + + let (then_body, else_body) = match last_stmt { + ASTNode::If { + then_body, + else_body: Some(else_body), + .. + } => (then_body, else_body), + _ => return None, + }; + + // Then branch must be single assignment: carrier = carrier (+|-) const + if then_body.len() != 1 { + return None; + } + + let (carrier_name, delta) = match &then_body[0] { + ASTNode::Assignment { target, value, .. } => { + // Extract target variable name + let target_name = match target.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + + // Value must be: target (+|-) const + match value.as_ref() { + ASTNode::BinaryOp { + operator, + left, + right, + .. + } => { + // Phase 142 P0: Accept both Add (+1) and Subtract (-1) + let op_multiplier = match operator { + BinaryOperator::Add => 1, + BinaryOperator::Subtract => -1, + _ => return None, + }; + + // Left must be same variable + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => return None, + }; + + if left_name != &target_name { + return None; + } + + // Right must be integer literal + let const_val = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => return None, + }; + + // Calculate delta with sign (e.g., +1 or -1) + let delta = const_val * op_multiplier; + + (target_name, delta) + } + _ => return None, + } + } + _ => return None, + }; + + // Else branch must be single break + if else_body.len() != 1 { + return None; + } + + match &else_body[0] { + ASTNode::Break { .. } => { + // Success! Extract body statements (all except last if) + let body_stmts = body[..body.len() - 1].to_vec(); + Some(SkipWhitespaceInfo { + carrier_name, + delta, + body_stmts, + }) + } + _ => None, + } +}