diff --git a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs index 59b62279..3c38dfa5 100644 --- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs @@ -1039,307 +1039,8 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option, -} - -/// Detect escape sequence handling pattern in loop body -/// -/// Phase 91 P5b: Pattern for string parsers with escape sequence support. -/// -/// Pattern structure: -/// ``` -/// loop(i < n) { -/// // ... optional body statements (Body) -/// if ch == "\"" { -/// break -/// } -/// if ch == "\\" { -/// i = i + escape_delta // e.g., +1 (total with final +1 = +2) -/// // optional: ch = s.substring(i, i+1) -/// } -/// out = out + ch // Accumulator pattern -/// i = i + 1 // Normal increment (always +1) -/// } -/// ``` -/// -/// # Arguments -/// -/// * `body` - Loop body statements to analyze -/// -/// # Returns -/// -/// `Some(EscapeSkipPatternInfo)` if the pattern matches, `None` otherwise -/// -/// # Notes -/// -/// This is the recognizer for P5b (Escape Sequence Handling). -/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing. -pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option { - if body.len() < 3 { - return None; // Need at least: body statements, break check, escape check - } - - // Phase 91 P5b Strategy: - // This is a simplified recognizer for escape sequence handling in string parsers. - // For now, we detect the minimum viable pattern: - // 1. Break check: if ch == quote_char { break } - // 2. Escape check: if ch == escape_char { counter = counter + escape_delta } [else { counter = counter + normal_delta }] - // - // Note: We rely on the order and pattern matching. If a loop body - // matches this exact structure, we recognize it as P5b. - - // Find break statement - scan for "if { ... break ... }" - let break_idx = find_break_in_if(body)?; - - // Find escape check after break - scan for second "if" with increment - let escape_idx = find_escape_in_if(body, break_idx)?; - - // For P5b, we need a consistent pattern. Extract minimal info: - // - counter_name, escape_delta, and normal_delta from the escape if statement - // - quote_char and escape_char (extracted from if conditions - for now, use defaults) - - let (counter_name, escape_delta, normal_delta) = extract_delta_pair_from_if(body, escape_idx)?; - - // Extract body statements before break check - let body_stmts = body[..break_idx].to_vec(); - - Some(EscapeSkipPatternInfo { - counter_name, - normal_delta, - escape_delta, - quote_char: '"', // Default for JSON/CSV (Phase 91 MVP) - escape_char: '\\', // Default for JSON/CSV (Phase 91 MVP) - body_stmts, - }) -} - -/// Helper: Find if statement containing break -fn find_break_in_if(body: &[ASTNode]) -> Option { - for (idx, stmt) in body.iter().enumerate() { - if let ASTNode::If { - then_body, - else_body: None, - .. - } = stmt { - // Check if then_body contains break - if then_body.len() == 1 && matches!(&then_body[0], ASTNode::Break { .. }) { - return Some(idx); - } - } - } - None -} - -/// Helper: Find if statement containing counter increment (escape check) -/// -/// Phase 91 P5b: Can be either: -/// - if ch == escape_char { i = i + 2 } (no else) -/// - if ch == escape_char { i = i + 2 } else { i = i + 1 } -fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option { - for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() { - let actual_idx = after_idx + 1 + idx; - if let ASTNode::If { - then_body, - else_body, - .. - } = stmt { - // Check if then_body contains an increment assignment (escape case) - let has_then_increment = then_body.iter().any(|s| { - if let ASTNode::Assignment { target, value, .. } = s { - try_extract_increment_assignment(target, value).is_some() - } else { - false - } - }); - - if has_then_increment { - // If-else format: check if else_body also has increment (normal case) - if let Some(else_stmts) = else_body { - let has_else_increment = else_stmts.iter().any(|s| { - if let ASTNode::Assignment { target, value, .. } = s { - try_extract_increment_assignment(target, value).is_some() - } else { - false - } - }); - if has_else_increment { - return Some(actual_idx); - } - } else { - // No-else format: just having then increment is enough - return Some(actual_idx); - } - } - } - } - None -} - -/// Helper: Find normal increment statement (assignment outside if blocks) -fn find_normal_increment( - body: &[ASTNode], - after_idx: usize, -) -> Option<(usize, String, i64)> { - for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() { - let actual_idx = after_idx + 1 + idx; - if let ASTNode::Assignment { target, value, .. } = stmt { - if let Some((counter, delta)) = try_extract_increment_assignment(target, value) { - return Some((actual_idx, counter, delta)); - } - } - } - None -} - -/// Helper: Extract both escape_delta and normal_delta from if statement -/// -/// Handles both: -/// - if ch == escape_char { i = i + 2 } else { i = i + 1 } -/// - if ch == escape_char { i = i + 2 } (followed by separate increment) -fn extract_delta_pair_from_if(body: &[ASTNode], idx: usize) -> Option<(String, i64, i64)> { - if idx >= body.len() { - return None; - } - - if let ASTNode::If { - then_body, - else_body, - .. - } = &body[idx] { - // Extract escape_delta from then_body - let mut escape_delta: Option = None; - let mut counter_name: Option = None; - - for stmt in then_body.iter() { - if let ASTNode::Assignment { target, value, .. } = stmt { - if let Some((name, delta)) = try_extract_increment_assignment(target, value) { - escape_delta = Some(delta); - counter_name = Some(name); - break; - } - } - } - - let (escape_delta, counter_name) = match (escape_delta, counter_name) { - (Some(d), Some(n)) => (d, n), - _ => return None, - }; - - // Extract normal_delta - let normal_delta = if let Some(else_stmts) = else_body { - // If-else format: extract from else_body - let mut found_delta: Option = None; - for stmt in else_stmts.iter() { - if let ASTNode::Assignment { target, value, .. } = stmt { - if let Some((name, delta)) = try_extract_increment_assignment(target, value) { - if name == counter_name { - found_delta = Some(delta); - break; - } - } - } - } - found_delta? - } else { - // No-else format: look for separate increment after this if - let mut found_delta: Option = None; - for stmt in body[(idx + 1)..].iter() { - if let ASTNode::Assignment { target, value, .. } = stmt { - if let Some((name, delta)) = try_extract_increment_assignment(target, value) { - if name == counter_name { - found_delta = Some(delta); - break; - } - } - } - } - found_delta? - }; - - Some((counter_name, escape_delta, normal_delta)) - } else { - None - } -} - -/// Helper: Extract escape delta from if statement (deprecated, use extract_delta_pair_from_if) -#[allow(dead_code)] -fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option { - if idx < body.len() { - if let ASTNode::If { - then_body, - .. - } = &body[idx] { - // Find the increment in then_body - for stmt in then_body.iter() { - if let ASTNode::Assignment { target, value, .. } = stmt { - if let Some((_, delta)) = try_extract_increment_assignment(target, value) { - return Some(delta); - } - } - } - } - } - None -} - - -/// Helper: Try to extract increment assignment (counter = counter (+|-) const) -fn try_extract_increment_assignment(target: &ASTNode, value: &ASTNode) -> Option<(String, i64)> { - let target_name = match target { - ASTNode::Variable { name, .. } => name.clone(), - _ => return None, - }; - - match value { - ASTNode::BinaryOp { - operator, - left, - right, - .. - } => { - let op_multiplier = match operator { - BinaryOperator::Add => 1, - BinaryOperator::Subtract => -1, - _ => return None, - }; - - let left_name = match left.as_ref() { - ASTNode::Variable { name, .. } => name, - _ => return None, - }; - - if left_name != &target_name { - return None; - } - - let const_val = match right.as_ref() { - ASTNode::Literal { - value: LiteralValue::Integer(n), - .. - } => *n, - _ => return None, - }; - - let delta = const_val * op_multiplier; - Some((target_name, delta)) - } - _ => None, - } -} +// ============================================================================ +// Phase 91 P5b (Escape Sequence Handling) Pattern +// ============================================================================ +// Moved to escape_pattern_recognizer.rs for better modularity +pub use super::escape_pattern_recognizer::{detect_escape_skip_pattern, EscapeSkipPatternInfo}; diff --git a/src/mir/builder/control_flow/joinir/patterns/escape_pattern_recognizer.rs b/src/mir/builder/control_flow/joinir/patterns/escape_pattern_recognizer.rs new file mode 100644 index 00000000..c709dad9 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/escape_pattern_recognizer.rs @@ -0,0 +1,255 @@ +//! Phase 91 P5b: Escape Pattern Recognizer Module +//! +//! Specialized recognizer for escape sequence handling patterns in string parsers. +//! Extracted from ast_feature_extractor.rs for improved modularity and reusability. +//! +//! # Design +//! +//! - **Single Responsibility**: Handles only P5b (escape sequence) pattern detection +//! - **Isolated Helpers**: Private helpers for break/escape detection +//! - **Clean Interface**: Exports only `detect_escape_skip_pattern()` + +use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; + +/// Information about a detected escape skip pattern +#[derive(Debug, Clone)] +pub struct EscapeSkipPatternInfo { + pub counter_name: String, + pub normal_delta: i64, + pub escape_delta: i64, + pub quote_char: char, + pub escape_char: char, + pub body_stmts: Vec, +} + +/// Detect escape sequence handling pattern in loop body +/// +/// # Pattern Structure +/// +/// ```text +/// loop(i < n) { +/// // ... body statements +/// if ch == "\"" { break } // Break check +/// if ch == "\\" { i = i + 2 } else { i = i + 1 } // Escape check with conditional delta +/// } +/// ``` +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `Some(EscapeSkipPatternInfo)` if the pattern matches, `None` otherwise +/// +/// # Notes +/// +/// This is the recognizer for P5b (Escape Sequence Handling). +/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing. +pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option { + if body.len() < 3 { + return None; // Need at least: body statements, break check, escape check + } + + // Find break statement - scan for "if { ... break ... }" + let break_idx = find_break_in_if(body)?; + + // Find escape check after break - scan for second "if" with increment + let escape_idx = find_escape_in_if(body, break_idx)?; + + // Extract counter_name, escape_delta, and normal_delta from the escape if statement + let (counter_name, escape_delta, normal_delta) = extract_delta_pair_from_if(body, escape_idx)?; + + // Extract body statements before break check + let body_stmts = body[..break_idx].to_vec(); + + Some(EscapeSkipPatternInfo { + counter_name, + normal_delta, + escape_delta, + quote_char: '"', // Default for JSON/CSV (Phase 91 MVP) + escape_char: '\\', // Default for JSON/CSV (Phase 91 MVP) + body_stmts, + }) +} + +// ============================================================================ +// Private Helpers (P5b-specific) +// ============================================================================ + +/// Find if statement containing break +fn find_break_in_if(body: &[ASTNode]) -> Option { + for (idx, stmt) in body.iter().enumerate() { + if let ASTNode::If { + then_body, + else_body: None, + .. + } = stmt { + // Check if then_body contains break + if then_body.len() == 1 && matches!(&then_body[0], ASTNode::Break { .. }) { + return Some(idx); + } + } + } + None +} + +/// Find if statement containing counter increment (escape check) +/// +/// Handles both: +/// - if ch == escape_char { i = i + 2 } (no else) +/// - if ch == escape_char { i = i + 2 } else { i = i + 1 } (with else) +fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option { + for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() { + let actual_idx = after_idx + 1 + idx; + if let ASTNode::If { + then_body, + else_body, + .. + } = stmt { + // Check if then_body contains an increment assignment (escape case) + let has_then_increment = then_body.iter().any(|s| { + if let ASTNode::Assignment { target, value, .. } = s { + try_extract_increment_assignment(target, value).is_some() + } else { + false + } + }); + + if has_then_increment { + // If-else format: check if else_body also has increment (normal case) + if let Some(else_stmts) = else_body { + let has_else_increment = else_stmts.iter().any(|s| { + if let ASTNode::Assignment { target, value, .. } = s { + try_extract_increment_assignment(target, value).is_some() + } else { + false + } + }); + if has_else_increment { + return Some(actual_idx); + } + } else { + // No-else format: just having then increment is enough + return Some(actual_idx); + } + } + } + } + None +} + +/// Extract both escape_delta and normal_delta from if statement +/// +/// Handles both: +/// - if ch == escape_char { i = i + 2 } else { i = i + 1 } +/// - if ch == escape_char { i = i + 2 } (followed by separate increment) +fn extract_delta_pair_from_if(body: &[ASTNode], idx: usize) -> Option<(String, i64, i64)> { + if idx >= body.len() { + return None; + } + + if let ASTNode::If { + then_body, + else_body, + .. + } = &body[idx] { + // Extract escape_delta from then_body + let mut escape_delta: Option = None; + let mut counter_name: Option = None; + + for stmt in then_body.iter() { + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((name, delta)) = try_extract_increment_assignment(target, value) { + escape_delta = Some(delta); + counter_name = Some(name); + break; + } + } + } + + let (escape_delta, counter_name) = match (escape_delta, counter_name) { + (Some(d), Some(n)) => (d, n), + _ => return None, + }; + + // Extract normal_delta + let normal_delta = if let Some(else_stmts) = else_body { + // If-else format: extract from else_body + let mut found_delta: Option = None; + for stmt in else_stmts.iter() { + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((name, delta)) = try_extract_increment_assignment(target, value) { + if name == counter_name { + found_delta = Some(delta); + break; + } + } + } + } + found_delta? + } else { + // No-else format: look for separate increment after this if + let mut found_delta: Option = None; + for stmt in body[(idx + 1)..].iter() { + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((name, delta)) = try_extract_increment_assignment(target, value) { + if name == counter_name { + found_delta = Some(delta); + break; + } + } + } + } + found_delta? + }; + + Some((counter_name, escape_delta, normal_delta)) + } else { + None + } +} + +/// Try to extract increment assignment (counter = counter (+|-) const) +fn try_extract_increment_assignment(target: &ASTNode, value: &ASTNode) -> Option<(String, i64)> { + let target_name = match target { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + + match value { + ASTNode::BinaryOp { + operator, + left, + right, + .. + } => { + let op_multiplier = match operator { + BinaryOperator::Add => 1, + BinaryOperator::Subtract => -1, + _ => return None, + }; + + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => return None, + }; + + if left_name != &target_name { + return None; + } + + let const_val = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => return None, + }; + + let delta = const_val * op_multiplier; + Some((target_name, delta)) + } + _ => None, + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/mod.rs b/src/mir/builder/control_flow/joinir/patterns/mod.rs index 9d0ea9ec..36b3f6ab 100644 --- a/src/mir/builder/control_flow/joinir/patterns/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/mod.rs @@ -40,8 +40,13 @@ //! //! Phase 179-B: Generic Pattern Framework //! - pattern_pipeline.rs: Unified preprocessing pipeline for Patterns 1-4 +//! +//! Phase 91 P5b: Escape Pattern Recognizer +//! - escape_pattern_recognizer.rs: P5b (escape sequence handling) pattern detection +//! - Extracted from ast_feature_extractor for improved modularity pub(in crate::mir::builder) mod ast_feature_extractor; +pub(in crate::mir::builder) mod escape_pattern_recognizer; // Phase 91 P5b pub(in crate::mir::builder) mod common_init; pub(in crate::mir::builder) mod condition_env_builder; pub(in crate::mir::builder) mod conversion_pipeline;