diff --git a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs index 20d8acb8..59b62279 100644 --- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs @@ -1091,16 +1091,15 @@ pub struct EscapeSkipPatternInfo { /// This is the recognizer for P5b (Escape Sequence Handling). /// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing. pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option { - if body.len() < 4 { - return None; // Need at least: break check, escape check, accumulator, normal increment + if body.len() < 3 { + return None; // Need at least: body statements, break check, escape check } // Phase 91 P5b Strategy: // This is a simplified recognizer for escape sequence handling in string parsers. // For now, we detect the minimum viable pattern: // 1. Break check: if ch == quote_char { break } - // 2. Escape check: if ch == escape_char { counter = counter + escape_delta } - // 3. Normal increment: counter = counter + 1 (outside if blocks) + // 2. Escape check: if ch == escape_char { counter = counter + escape_delta } [else { counter = counter + normal_delta }] // // Note: We rely on the order and pattern matching. If a loop body // matches this exact structure, we recognize it as P5b. @@ -1111,16 +1110,11 @@ pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option Option { } /// Helper: Find if statement containing counter increment (escape check) +/// +/// Phase 91 P5b: Can be either: +/// - if ch == escape_char { i = i + 2 } (no else) +/// - if ch == escape_char { i = i + 2 } else { i = i + 1 } fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option { for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() { let actual_idx = after_idx + 1 + idx; if let ASTNode::If { then_body, - else_body: None, + else_body, .. } = stmt { - // Check if then_body contains an increment assignment - for stmt2 in then_body.iter() { - if let ASTNode::Assignment { target, value, .. } = stmt2 { - if try_extract_increment_assignment(target, value).is_some() { + // Check if then_body contains an increment assignment (escape case) + let has_then_increment = then_body.iter().any(|s| { + if let ASTNode::Assignment { target, value, .. } = s { + try_extract_increment_assignment(target, value).is_some() + } else { + false + } + }); + + if has_then_increment { + // If-else format: check if else_body also has increment (normal case) + if let Some(else_stmts) = else_body { + let has_else_increment = else_stmts.iter().any(|s| { + if let ASTNode::Assignment { target, value, .. } = s { + try_extract_increment_assignment(target, value).is_some() + } else { + false + } + }); + if has_else_increment { return Some(actual_idx); } + } else { + // No-else format: just having then increment is enough + return Some(actual_idx); } } } @@ -1190,7 +1207,79 @@ fn find_normal_increment( None } -/// Helper: Extract escape delta from if statement +/// Helper: Extract both escape_delta and normal_delta from if statement +/// +/// Handles both: +/// - if ch == escape_char { i = i + 2 } else { i = i + 1 } +/// - if ch == escape_char { i = i + 2 } (followed by separate increment) +fn extract_delta_pair_from_if(body: &[ASTNode], idx: usize) -> Option<(String, i64, i64)> { + if idx >= body.len() { + return None; + } + + if let ASTNode::If { + then_body, + else_body, + .. + } = &body[idx] { + // Extract escape_delta from then_body + let mut escape_delta: Option = None; + let mut counter_name: Option = None; + + for stmt in then_body.iter() { + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((name, delta)) = try_extract_increment_assignment(target, value) { + escape_delta = Some(delta); + counter_name = Some(name); + break; + } + } + } + + let (escape_delta, counter_name) = match (escape_delta, counter_name) { + (Some(d), Some(n)) => (d, n), + _ => return None, + }; + + // Extract normal_delta + let normal_delta = if let Some(else_stmts) = else_body { + // If-else format: extract from else_body + let mut found_delta: Option = None; + for stmt in else_stmts.iter() { + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((name, delta)) = try_extract_increment_assignment(target, value) { + if name == counter_name { + found_delta = Some(delta); + break; + } + } + } + } + found_delta? + } else { + // No-else format: look for separate increment after this if + let mut found_delta: Option = None; + for stmt in body[(idx + 1)..].iter() { + if let ASTNode::Assignment { target, value, .. } = stmt { + if let Some((name, delta)) = try_extract_increment_assignment(target, value) { + if name == counter_name { + found_delta = Some(delta); + break; + } + } + } + } + found_delta? + }; + + Some((counter_name, escape_delta, normal_delta)) + } else { + None + } +} + +/// Helper: Extract escape delta from if statement (deprecated, use extract_delta_pair_from_if) +#[allow(dead_code)] fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option { if idx < body.len() { if let ASTNode::If { diff --git a/src/mir/loop_canonicalizer/canonicalizer.rs b/src/mir/loop_canonicalizer/canonicalizer.rs index 0d9b6330..bf86c386 100644 --- a/src/mir/loop_canonicalizer/canonicalizer.rs +++ b/src/mir/loop_canonicalizer/canonicalizer.rs @@ -8,7 +8,7 @@ use crate::mir::loop_pattern_detection::LoopPatternKind; use super::capability_guard::{CapabilityTag, RoutingDecision}; use super::pattern_recognizer::{ - try_extract_continue_pattern, try_extract_parse_number_pattern, + try_extract_continue_pattern, try_extract_escape_skip_pattern, try_extract_parse_number_pattern, try_extract_parse_string_pattern, try_extract_skip_whitespace_pattern, }; use super::skeleton_types::{ @@ -306,6 +306,69 @@ pub fn canonicalize_loop_expr( return Ok((skeleton, decision)); } + // ======================================================================== + // Phase 91 P5b: Escape Sequence Handling Pattern + // ======================================================================== + // Position: After skip_whitespace (post-existing patterns) + // Purpose: Recognize escape sequence handling in string parsers + // Chosen: Pattern2Break (same as skip_whitespace, but with richer Skeleton) + // Notes: Added for parity/observability, lowering deferred to Phase 92 + + if let Some((counter_name, normal_delta, escape_delta, _quote_char, _escape_char, body_stmts)) = + try_extract_escape_skip_pattern(body) + { + // Build skeleton for escape skip pattern (P5b) + let mut skeleton = LoopSkeleton::new(span); + + // Step 1: Header condition + skeleton.steps.push(SkeletonStep::HeaderCond { + expr: Box::new(condition.clone()), + }); + + // Step 2: Body statements (if any) + if !body_stmts.is_empty() { + skeleton + .steps + .push(SkeletonStep::Body { stmts: body_stmts }); + } + + // Step 3: Update step with ConditionalStep (escape_delta vs normal_delta) + // Pattern: normal i = i + 1, escape i = i + escape_delta (e.g., +2) + // Represented as UpdateKind::ConditionalStep with both deltas + skeleton.steps.push(SkeletonStep::Update { + carrier_name: counter_name.clone(), + update_kind: UpdateKind::ConditionalStep { + then_delta: escape_delta, // Escape branch: +2 or other + else_delta: normal_delta, // Normal branch: +1 + }, + }); + + // Add carrier slot with conditional step update + skeleton.carriers.push(CarrierSlot { + name: counter_name, + role: CarrierRole::Counter, + update_kind: UpdateKind::ConditionalStep { + then_delta: escape_delta, + else_delta: normal_delta, + }, + }); + + // Set exit contract (P5b has break for string boundary detection) + skeleton.exits = ExitContract { + has_break: true, + has_continue: false, + has_return: false, + break_has_value: false, + }; + + // Phase 91 P5b Decision Policy: + // Same as skip_whitespace (Pattern2Break) + // P5b is a "detailed version" of Pattern2, not a separate chosen pattern + // Notes field would record escape-specific details (Phase 91 MVP: omitted) + let decision = RoutingDecision::success(LoopPatternKind::Pattern2Break); + return Ok((skeleton, decision)); + } + // Pattern not recognized - fail fast Ok(( LoopSkeleton::new(span), @@ -1571,4 +1634,177 @@ mod tests { assert!(!skeleton.exits.has_return); assert!(!skeleton.exits.break_has_value); } + + #[test] + fn test_escape_skip_pattern_recognition() { + // Phase 91 P5b: Escape sequence handling pattern + // Build: loop(i < len) { + // ch = get_char(i) + // if ch == "\"" { break } + // if ch == "\\" { i = i + 2 } else { i = i + 1 } + // } + let loop_node = ASTNode::Loop { + condition: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Less, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Variable { + name: "len".to_string(), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + body: vec![ + // Body: ch = get_char(i) + ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "ch".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::FunctionCall { + name: "get_char".to_string(), + arguments: vec![ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }], + span: Span::unknown(), + }), + span: Span::unknown(), + }, + // Break check: if ch == "\"" { break } + ASTNode::If { + condition: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Equal, + left: Box::new(ASTNode::Variable { + name: "ch".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::String("\"".to_string()), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Break { + span: Span::unknown(), + }], + else_body: None, + span: Span::unknown(), + }, + // Escape check: if ch == "\\" { i = i + 2 } else { i = i + 1 } + ASTNode::If { + condition: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Equal, + left: Box::new(ASTNode::Variable { + name: "ch".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::String("\\".to_string()), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + then_body: vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(2), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }], + else_body: Some(vec![ASTNode::Assignment { + target: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(ASTNode::Variable { + name: "i".to_string(), + span: Span::unknown(), + }), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: Span::unknown(), + }), + span: Span::unknown(), + }), + span: Span::unknown(), + }]), + span: Span::unknown(), + }, + ], + span: Span::unknown(), + }; + + let result = canonicalize_loop_expr(&loop_node); + assert!(result.is_ok(), "Escape pattern canonicalization should succeed"); + + let (skeleton, decision) = result.unwrap(); + + // Verify decision success + assert!(decision.is_success(), "Decision should indicate success"); + assert_eq!( + decision.chosen, + Some(LoopPatternKind::Pattern2Break), + "P5b should route to Pattern2Break (has_break=true)" + ); + assert!(decision.missing_caps.is_empty(), "No missing capabilities"); + + // Verify skeleton structure + // Expected: HeaderCond + Body + Update + assert!( + skeleton.steps.len() >= 3, + "Expected at least 3 steps: HeaderCond, Body, Update" + ); + assert!( + matches!(skeleton.steps[0], SkeletonStep::HeaderCond { .. }), + "First step should be HeaderCond" + ); + assert!( + matches!(skeleton.steps[skeleton.steps.len() - 1], SkeletonStep::Update { .. }), + "Last step should be Update" + ); + + // Verify carrier (counter variable "i") + assert_eq!(skeleton.carriers.len(), 1, "Should have 1 carrier"); + let carrier = &skeleton.carriers[0]; + assert_eq!(carrier.name, "i", "Carrier should be named 'i'"); + assert_eq!(carrier.role, CarrierRole::Counter, "Carrier should be a Counter"); + + // Verify ConditionalStep with escape_delta=2, normal_delta=1 + match &carrier.update_kind { + UpdateKind::ConditionalStep { + then_delta, + else_delta, + } => { + assert_eq!(*then_delta, 2, "Escape delta (then) should be 2"); + assert_eq!(*else_delta, 1, "Normal delta (else) should be 1"); + } + other => panic!( + "Expected ConditionalStep, got {:?}", + other + ), + } + + // Verify exit contract (P5b has break for string boundary) + assert!(skeleton.exits.has_break, "P5b should have break"); + assert!(!skeleton.exits.has_continue, "P5b should not have continue"); + assert!(!skeleton.exits.has_return, "P5b should not have return"); + assert!(!skeleton.exits.break_has_value, "Break should not have value"); + } } diff --git a/src/mir/loop_canonicalizer/skeleton_types.rs b/src/mir/loop_canonicalizer/skeleton_types.rs index c4e3f2d2..484adc7a 100644 --- a/src/mir/loop_canonicalizer/skeleton_types.rs +++ b/src/mir/loop_canonicalizer/skeleton_types.rs @@ -65,7 +65,11 @@ pub enum UpdateKind { /// Constant step (`i = i + const`) ConstStep { delta: i64 }, - /// Conditional update (`if cond { x = a } else { x = b }`) + /// Conditional step with numeric deltas (`if escape { i = i + 2 } else { i = i + 1 }`) + /// Phase 91 P5b: Used for escape sequence handling and similar conditional increments + ConditionalStep { then_delta: i64, else_delta: i64 }, + + /// Conditional update with AST expressions (`if cond { x = a } else { x = b }`) Conditional { then_value: Box, else_value: Box,