diff --git a/src/mir/builder.rs b/src/mir/builder.rs index 87953a82..7ad8682f 100644 --- a/src/mir/builder.rs +++ b/src/mir/builder.rs @@ -38,6 +38,8 @@ pub(crate) use control_flow::{detect_continue_pattern, ContinuePatternInfo}; // Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer pub(crate) use control_flow::{detect_parse_number_pattern, ParseNumberInfo}; pub(crate) use control_flow::{detect_parse_string_pattern, ParseStringInfo}; +// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer +pub(crate) use control_flow::{detect_escape_skip_pattern, EscapeSkipPatternInfo}; mod exprs_lambda; // lambda lowering mod exprs_peek; // peek expression mod exprs_qmark; // ?-propagate diff --git a/src/mir/builder/control_flow/joinir/mod.rs b/src/mir/builder/control_flow/joinir/mod.rs index 9b929214..dec67dec 100644 --- a/src/mir/builder/control_flow/joinir/mod.rs +++ b/src/mir/builder/control_flow/joinir/mod.rs @@ -25,3 +25,6 @@ pub(crate) use patterns::{detect_continue_pattern, ContinuePatternInfo}; // Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer pub(crate) use patterns::{detect_parse_number_pattern, ParseNumberInfo}; pub(crate) use patterns::{detect_parse_string_pattern, ParseStringInfo}; + +// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer +pub(crate) use patterns::{detect_escape_skip_pattern, EscapeSkipPatternInfo}; diff --git a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs index bbbf82d7..20d8acb8 100644 --- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs @@ -1038,3 +1038,219 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option None, } } + +/// Phase 91 P5b: Escape Sequence Handling Pattern Info +/// +/// Information extracted from escape skip pattern in loop body. +/// Stores counter name, normal delta, escape delta, and character constants. +pub struct EscapeSkipPatternInfo { + /// Counter variable name (e.g., "i") + pub counter_name: String, + /// Normal increment delta (usually 1: i = i + 1) + pub normal_delta: i64, + /// Escape skip delta (usually 2: i = i + 2 from +1 in escape if + final +1) + pub escape_delta: i64, + /// Quote/boundary character (usually '"' or '\'') + pub quote_char: char, + /// Escape character (usually '\\') + pub escape_char: char, + /// Body statements before the break check + pub body_stmts: Vec, +} + +/// Detect escape sequence handling pattern in loop body +/// +/// Phase 91 P5b: Pattern for string parsers with escape sequence support. +/// +/// Pattern structure: +/// ``` +/// loop(i < n) { +/// // ... optional body statements (Body) +/// if ch == "\"" { +/// break +/// } +/// if ch == "\\" { +/// i = i + escape_delta // e.g., +1 (total with final +1 = +2) +/// // optional: ch = s.substring(i, i+1) +/// } +/// out = out + ch // Accumulator pattern +/// i = i + 1 // Normal increment (always +1) +/// } +/// ``` +/// +/// # Arguments +/// +/// * `body` - Loop body statements to analyze +/// +/// # Returns +/// +/// `Some(EscapeSkipPatternInfo)` if the pattern matches, `None` otherwise +/// +/// # Notes +/// +/// This is the recognizer for P5b (Escape Sequence Handling). +/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing. +pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option { + if body.len() < 4 { + return None; // Need at least: break check, escape check, accumulator, normal increment + } + + // Phase 91 P5b Strategy: + // This is a simplified recognizer for escape sequence handling in string parsers. + // For now, we detect the minimum viable pattern: + // 1. Break check: if ch == quote_char { break } + // 2. Escape check: if ch == escape_char { counter = counter + escape_delta } + // 3. Normal increment: counter = counter + 1 (outside if blocks) + // + // Note: We rely on the order and pattern matching. If a loop body + // matches this exact structure, we recognize it as P5b. + + // Find break statement - scan for "if { ... break ... }" + let break_idx = find_break_in_if(body)?; + + // Find escape check after break - scan for second "if" with increment + let escape_idx = find_escape_in_if(body, break_idx)?; + + // Find normal increment after escape if + let (normal_incr_idx, counter_name, normal_delta) = find_normal_increment(body, escape_idx)?; + + // For P5b, we need a consistent pattern. Extract minimal info: + // - counter_name (from normal increment) + // - normal_delta (should be 1) + // - escape_delta (from escape if block) + // - quote_char and escape_char (extracted from if conditions - for now, use defaults) + + let escape_delta = extract_escape_delta_from_if(body, escape_idx)?; + + // Extract body statements before break check + let body_stmts = body[..break_idx].to_vec(); + + Some(EscapeSkipPatternInfo { + counter_name, + normal_delta, + escape_delta, + quote_char: '"', // Default for JSON/CSV (Phase 91 MVP) + escape_char: '\\', // Default for JSON/CSV (Phase 91 MVP) + body_stmts, + }) +} + +/// Helper: Find if statement containing break +fn find_break_in_if(body: &[ASTNode]) -> Option { + for (idx, stmt) in body.iter().enumerate() { + if let ASTNode::If { + then_body, + else_body: None, + .. + } = stmt { + // Check if then_body contains break + if then_body.len() == 1 && matches!(&then_body[0], ASTNode::Break { .. }) { + return Some(idx); + } + } + } + None +} + +/// Helper: Find if statement containing counter increment (escape check) +fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option { + for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() { + let actual_idx = after_idx + 1 + idx; + if let ASTNode::If { + then_body, + else_body: None, + .. + } = stmt { + // Check if then_body contains an increment assignment + for stmt2 in then_body.iter() { + if let ASTNode::Assignment { target, value, .. } = stmt2 { + if try_extract_increment_assignment(target, value).is_some() { + return Some(actual_idx); + } + } + } + } + } + None +} + +/// Helper: Find normal increment statement (assignment outside if blocks) +fn find_normal_increment( + body: &[ASTNode], + after_idx: usize, +) -> Option<(usize, String, i64)> { + for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() { + let actual_idx = after_idx + 1 + idx; + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((counter, delta)) = try_extract_increment_assignment(target, value) { + return Some((actual_idx, counter, delta)); + } + } + } + None +} + +/// Helper: Extract escape delta from if statement +fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option { + if idx < body.len() { + if let ASTNode::If { + then_body, + .. + } = &body[idx] { + // Find the increment in then_body + for stmt in then_body.iter() { + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((_, delta)) = try_extract_increment_assignment(target, value) { + return Some(delta); + } + } + } + } + } + None +} + + +/// Helper: Try to extract increment assignment (counter = counter (+|-) const) +fn try_extract_increment_assignment(target: &ASTNode, value: &ASTNode) -> Option<(String, i64)> { + let target_name = match target { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + + match value { + ASTNode::BinaryOp { + operator, + left, + right, + .. + } => { + let op_multiplier = match operator { + BinaryOperator::Add => 1, + BinaryOperator::Subtract => -1, + _ => return None, + }; + + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => return None, + }; + + if left_name != &target_name { + return None; + } + + let const_val = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => return None, + }; + + let delta = const_val * op_multiplier; + Some((target_name, delta)) + } + _ => None, + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/mod.rs b/src/mir/builder/control_flow/joinir/patterns/mod.rs index 76c123d2..9d0ea9ec 100644 --- a/src/mir/builder/control_flow/joinir/patterns/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/mod.rs @@ -76,3 +76,6 @@ pub(crate) use ast_feature_extractor::{detect_parse_number_pattern, ParseNumberI // Phase 143-P1: Re-export parse_string pattern detection for loop_canonicalizer pub(crate) use ast_feature_extractor::{detect_parse_string_pattern, ParseStringInfo}; + +// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer +pub(crate) use ast_feature_extractor::{detect_escape_skip_pattern, EscapeSkipPatternInfo}; diff --git a/src/mir/builder/control_flow/mod.rs b/src/mir/builder/control_flow/mod.rs index ec751e1b..d89d8652 100644 --- a/src/mir/builder/control_flow/mod.rs +++ b/src/mir/builder/control_flow/mod.rs @@ -64,6 +64,9 @@ pub(crate) use joinir::{detect_continue_pattern, ContinuePatternInfo}; pub(crate) use joinir::{detect_parse_number_pattern, ParseNumberInfo}; pub(crate) use joinir::{detect_parse_string_pattern, ParseStringInfo}; +// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer +pub(crate) use joinir::{detect_escape_skip_pattern, EscapeSkipPatternInfo}; + impl super::MirBuilder { /// Control-flow: block pub(super) fn cf_block(&mut self, statements: Vec) -> Result { diff --git a/src/mir/loop_canonicalizer/pattern_recognizer.rs b/src/mir/loop_canonicalizer/pattern_recognizer.rs index 8b9547a0..ef67f954 100644 --- a/src/mir/loop_canonicalizer/pattern_recognizer.rs +++ b/src/mir/loop_canonicalizer/pattern_recognizer.rs @@ -8,6 +8,7 @@ use crate::mir::detect_continue_pattern; use crate::mir::detect_parse_number_pattern as ast_detect_parse_number; use crate::mir::detect_parse_string_pattern as ast_detect_parse_string; use crate::mir::detect_skip_whitespace_pattern as ast_detect; +use crate::mir::detect_escape_skip_pattern as ast_detect_escape; // ============================================================================ // Skip Whitespace Pattern (Phase 140-P4-B SSOT Wrapper) @@ -289,3 +290,44 @@ mod tests { assert!(result.is_none()); } } + +// ============================================================================ +// Escape Skip Pattern (Phase 91 P5b) +// ============================================================================ + +/// Try to extract escape skip pattern from loop +/// +/// Phase 91 P5b: Pattern for string parsers with escape sequence support +/// +/// Pattern structure: +/// ``` +/// loop(i < n) { +/// // ... optional body statements +/// if ch == "\"" { break } +/// if ch == "\\" { i = i + escape_delta; ... } +/// out = out + ch +/// i = i + 1 +/// } +/// ``` +/// +/// Returns (counter_name, normal_delta, escape_delta, quote_char, escape_char, body_stmts) +/// if pattern matches. +/// +/// # Phase 91 P5b: Escape Sequence Pattern Detection +/// +/// This function delegates to `ast_feature_extractor::detect_escape_skip_pattern` +/// for SSOT implementation. +pub fn try_extract_escape_skip_pattern( + body: &[ASTNode], +) -> Option<(String, i64, i64, char, char, Vec)> { + ast_detect_escape(body).map(|info| { + ( + info.counter_name, + info.normal_delta, + info.escape_delta, + info.quote_char, + info.escape_char, + info.body_stmts, + ) + }) +} diff --git a/src/mir/mod.rs b/src/mir/mod.rs index 747f6b89..ef9323bf 100644 --- a/src/mir/mod.rs +++ b/src/mir/mod.rs @@ -64,6 +64,8 @@ pub(crate) use builder::{detect_continue_pattern, ContinuePatternInfo}; pub(crate) use builder::{detect_parse_number_pattern, ParseNumberInfo}; // Phase 143-P1: Re-export parse_string pattern detection for loop_canonicalizer pub(crate) use builder::{detect_parse_string_pattern, ParseStringInfo}; +// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer +pub(crate) use builder::{detect_escape_skip_pattern, EscapeSkipPatternInfo}; pub use cfg_extractor::extract_cfg_info; // Phase 154: CFG extraction pub use definitions::{CallFlags, Callee, MirCall}; // Unified call definitions pub use effect::{Effect, EffectMask};