feat(phase-91): Phase 91 Step 2-B/2-D - P5b escape sequence pattern recognition

### Changes #### 1. UpdateKind Extension (skeleton_types.rs) - Added `ConditionalStep { then_delta: i64, else_delta: i64 }` variant - Enables P5b patterns with conditional numeric deltas - Used for escape sequence handling (i.e., +2 vs +1 based on escape char) #### 2. Canonicalizer Integration (canonicalizer.rs) - Added P5b pattern detection after existing patterns - Routes to Pattern2Break (same as skip_whitespace, reflects has_break=true) - Builds LoopSkeleton with ConditionalStep update - Position: AFTER skip_whitespace (Pattern2Break refinement, not separate choice) #### 3. AST Recognizer Enhancement (ast_feature_extractor.rs) - Updated `find_escape_in_if` to handle both: - `if ch == '\\' { i += 2 }` (no else) - `if ch == '\\' { i += 2 } else { i += 1 }` (with else) - Added `extract_delta_pair_from_if` for clean delta extraction - Extracts counter_name, escape_delta, normal_delta from single if-else statement #### 4. Comprehensive Unit Test (canonicalizer.rs) - Test: `test_escape_skip_pattern_recognition` - Verifies full P5b pattern recognition - Confirms ConditionalStep with escape_delta=2, normal_delta=1 - Validates skeleton structure and exit contract (has_break=true) ### Results ✅ Build: 1062/1062 tests PASS (+1 new P5b test) ✅ No regressions ✅ P5b pattern now recognized and routed correctly ### Next Steps - Step 2-E: Parity verification with strict mode - Step 2-F: Documentation updates Phase 91 P5b implementation on track! 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-16 14:54:41 +09:00
parent 7db554a763
commit 570c1f6b73
3 changed files with 348 additions and 19 deletions
--- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs
+++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs
@ -1091,16 +1091,15 @@ pub struct EscapeSkipPatternInfo {
 /// This is the recognizer for P5b (Escape Sequence Handling).
 /// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing.
 pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternInfo> {
-    if body.len() < 4 {
-        return None;  // Need at least: break check, escape check, accumulator, normal increment
+    if body.len() < 3 {
+        return None;  // Need at least: body statements, break check, escape check
    }

    // Phase 91 P5b Strategy:
    // This is a simplified recognizer for escape sequence handling in string parsers.
    // For now, we detect the minimum viable pattern:
    // 1. Break check: if ch == quote_char { break }
-    // 2. Escape check: if ch == escape_char { counter = counter + escape_delta }
-    // 3. Normal increment: counter = counter + 1 (outside if blocks)
+    // 2. Escape check: if ch == escape_char { counter = counter + escape_delta } [else { counter = counter + normal_delta }]
    //
    // Note: We rely on the order and pattern matching. If a loop body
    // matches this exact structure, we recognize it as P5b.
@ -1111,16 +1110,11 @@ pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternI
    // Find escape check after break - scan for second "if" with increment
    let escape_idx = find_escape_in_if(body, break_idx)?;

-    // Find normal increment after escape if
-    let (normal_incr_idx, counter_name, normal_delta) = find_normal_increment(body, escape_idx)?;
-
    // For P5b, we need a consistent pattern. Extract minimal info:
-    // - counter_name (from normal increment)
-    // - normal_delta (should be 1)
-    // - escape_delta (from escape if block)
+    // - counter_name, escape_delta, and normal_delta from the escape if statement
    // - quote_char and escape_char (extracted from if conditions - for now, use defaults)

-    let escape_delta = extract_escape_delta_from_if(body, escape_idx)?;
+    let (counter_name, escape_delta, normal_delta) = extract_delta_pair_from_if(body, escape_idx)?;

    // Extract body statements before break check
    let body_stmts = body[..break_idx].to_vec();
@ -1153,20 +1147,43 @@ fn find_break_in_if(body: &[ASTNode]) -> Option<usize> {
 }

 /// Helper: Find if statement containing counter increment (escape check)
+///
+/// Phase 91 P5b: Can be either:
+/// - if ch == escape_char { i = i + 2 } (no else)
+/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
 fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option<usize> {
    for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
        let actual_idx = after_idx + 1 + idx;
        if let ASTNode::If {
            then_body,
-            else_body: None,
+            else_body,
            ..
        } = stmt {
-            // Check if then_body contains an increment assignment
-            for stmt2 in then_body.iter() {
-                if let ASTNode::Assignment { target, value, .. } = stmt2 {
-                    if try_extract_increment_assignment(target, value).is_some() {
+            // Check if then_body contains an increment assignment (escape case)
+            let has_then_increment = then_body.iter().any(|s| {
+                if let ASTNode::Assignment { target, value, .. } = s {
+                    try_extract_increment_assignment(target, value).is_some()
+                } else {
+                    false
+                }
+            });
+
+            if has_then_increment {
+                // If-else format: check if else_body also has increment (normal case)
+                if let Some(else_stmts) = else_body {
+                    let has_else_increment = else_stmts.iter().any(|s| {
+                        if let ASTNode::Assignment { target, value, .. } = s {
+                            try_extract_increment_assignment(target, value).is_some()
+                        } else {
+                            false
+                        }
+                    });
+                    if has_else_increment {
                        return Some(actual_idx);
                    }
+                } else {
+                    // No-else format: just having then increment is enough
+                    return Some(actual_idx);
                }
            }
        }
@ -1190,7 +1207,79 @@ fn find_normal_increment(
    None
 }

-/// Helper: Extract escape delta from if statement
+/// Helper: Extract both escape_delta and normal_delta from if statement
+///
+/// Handles both:
+/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
+/// - if ch == escape_char { i = i + 2 } (followed by separate increment)
+fn extract_delta_pair_from_if(body: &[ASTNode], idx: usize) -> Option<(String, i64, i64)> {
+    if idx >= body.len() {
+        return None;
+    }
+
+    if let ASTNode::If {
+        then_body,
+        else_body,
+        ..
+    } = &body[idx] {
+        // Extract escape_delta from then_body
+        let mut escape_delta: Option<i64> = None;
+        let mut counter_name: Option<String> = None;
+
+        for stmt in then_body.iter() {
+            if let ASTNode::Assignment { target, value, .. } = stmt {
+                if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
+                    escape_delta = Some(delta);
+                    counter_name = Some(name);
+                    break;
+                }
+            }
+        }
+
+        let (escape_delta, counter_name) = match (escape_delta, counter_name) {
+            (Some(d), Some(n)) => (d, n),
+            _ => return None,
+        };
+
+        // Extract normal_delta
+        let normal_delta = if let Some(else_stmts) = else_body {
+            // If-else format: extract from else_body
+            let mut found_delta: Option<i64> = None;
+            for stmt in else_stmts.iter() {
+                if let ASTNode::Assignment { target, value, .. } = stmt {
+                    if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
+                        if name == counter_name {
+                            found_delta = Some(delta);
+                            break;
+                        }
+                    }
+                }
+            }
+            found_delta?
+        } else {
+            // No-else format: look for separate increment after this if
+            let mut found_delta: Option<i64> = None;
+            for stmt in body[(idx + 1)..].iter() {
+                if let ASTNode::Assignment { target, value, .. } = stmt {
+                    if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
+                        if name == counter_name {
+                            found_delta = Some(delta);
+                            break;
+                        }
+                    }
+                }
+            }
+            found_delta?
+        };
+
+        Some((counter_name, escape_delta, normal_delta))
+    } else {
+        None
+    }
+}
+
+/// Helper: Extract escape delta from if statement (deprecated, use extract_delta_pair_from_if)
+#[allow(dead_code)]
 fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option<i64> {
    if idx < body.len() {
        if let ASTNode::If {