feat(phase-91): Phase 91 Step 2-B/2-D - P5b escape sequence pattern recognition

### Changes

#### 1. UpdateKind Extension (skeleton_types.rs)
- Added `ConditionalStep { then_delta: i64, else_delta: i64 }` variant
- Enables P5b patterns with conditional numeric deltas
- Used for escape sequence handling (i.e., +2 vs +1 based on escape char)

#### 2. Canonicalizer Integration (canonicalizer.rs)
- Added P5b pattern detection after existing patterns
- Routes to Pattern2Break (same as skip_whitespace, reflects has_break=true)
- Builds LoopSkeleton with ConditionalStep update
- Position: AFTER skip_whitespace (Pattern2Break refinement, not separate choice)

#### 3. AST Recognizer Enhancement (ast_feature_extractor.rs)
- Updated `find_escape_in_if` to handle both:
  - `if ch == '\\' { i += 2 }` (no else)
  - `if ch == '\\' { i += 2 } else { i += 1 }` (with else)
- Added `extract_delta_pair_from_if` for clean delta extraction
- Extracts counter_name, escape_delta, normal_delta from single if-else statement

#### 4. Comprehensive Unit Test (canonicalizer.rs)
- Test: `test_escape_skip_pattern_recognition`
- Verifies full P5b pattern recognition
- Confirms ConditionalStep with escape_delta=2, normal_delta=1
- Validates skeleton structure and exit contract (has_break=true)

### Results

 Build: 1062/1062 tests PASS (+1 new P5b test)
 No regressions
 P5b pattern now recognized and routed correctly

### Next Steps

- Step 2-E: Parity verification with strict mode
- Step 2-F: Documentation updates

Phase 91 P5b implementation on track!

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-12-16 14:54:41 +09:00
parent 7db554a763
commit 570c1f6b73
3 changed files with 348 additions and 19 deletions

View File

@ -1091,16 +1091,15 @@ pub struct EscapeSkipPatternInfo {
/// This is the recognizer for P5b (Escape Sequence Handling).
/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing.
pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternInfo> {
if body.len() < 4 {
return None; // Need at least: break check, escape check, accumulator, normal increment
if body.len() < 3 {
return None; // Need at least: body statements, break check, escape check
}
// Phase 91 P5b Strategy:
// This is a simplified recognizer for escape sequence handling in string parsers.
// For now, we detect the minimum viable pattern:
// 1. Break check: if ch == quote_char { break }
// 2. Escape check: if ch == escape_char { counter = counter + escape_delta }
// 3. Normal increment: counter = counter + 1 (outside if blocks)
// 2. Escape check: if ch == escape_char { counter = counter + escape_delta } [else { counter = counter + normal_delta }]
//
// Note: We rely on the order and pattern matching. If a loop body
// matches this exact structure, we recognize it as P5b.
@ -1111,16 +1110,11 @@ pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternI
// Find escape check after break - scan for second "if" with increment
let escape_idx = find_escape_in_if(body, break_idx)?;
// Find normal increment after escape if
let (normal_incr_idx, counter_name, normal_delta) = find_normal_increment(body, escape_idx)?;
// For P5b, we need a consistent pattern. Extract minimal info:
// - counter_name (from normal increment)
// - normal_delta (should be 1)
// - escape_delta (from escape if block)
// - counter_name, escape_delta, and normal_delta from the escape if statement
// - quote_char and escape_char (extracted from if conditions - for now, use defaults)
let escape_delta = extract_escape_delta_from_if(body, escape_idx)?;
let (counter_name, escape_delta, normal_delta) = extract_delta_pair_from_if(body, escape_idx)?;
// Extract body statements before break check
let body_stmts = body[..break_idx].to_vec();
@ -1153,20 +1147,43 @@ fn find_break_in_if(body: &[ASTNode]) -> Option<usize> {
}
/// Helper: Find if statement containing counter increment (escape check)
///
/// Phase 91 P5b: Can be either:
/// - if ch == escape_char { i = i + 2 } (no else)
/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option<usize> {
for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
let actual_idx = after_idx + 1 + idx;
if let ASTNode::If {
then_body,
else_body: None,
else_body,
..
} = stmt {
// Check if then_body contains an increment assignment
for stmt2 in then_body.iter() {
if let ASTNode::Assignment { target, value, .. } = stmt2 {
if try_extract_increment_assignment(target, value).is_some() {
// Check if then_body contains an increment assignment (escape case)
let has_then_increment = then_body.iter().any(|s| {
if let ASTNode::Assignment { target, value, .. } = s {
try_extract_increment_assignment(target, value).is_some()
} else {
false
}
});
if has_then_increment {
// If-else format: check if else_body also has increment (normal case)
if let Some(else_stmts) = else_body {
let has_else_increment = else_stmts.iter().any(|s| {
if let ASTNode::Assignment { target, value, .. } = s {
try_extract_increment_assignment(target, value).is_some()
} else {
false
}
});
if has_else_increment {
return Some(actual_idx);
}
} else {
// No-else format: just having then increment is enough
return Some(actual_idx);
}
}
}
@ -1190,7 +1207,79 @@ fn find_normal_increment(
None
}
/// Helper: Extract escape delta from if statement
/// Helper: Extract both escape_delta and normal_delta from if statement
///
/// Handles both:
/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
/// - if ch == escape_char { i = i + 2 } (followed by separate increment)
fn extract_delta_pair_from_if(body: &[ASTNode], idx: usize) -> Option<(String, i64, i64)> {
if idx >= body.len() {
return None;
}
if let ASTNode::If {
then_body,
else_body,
..
} = &body[idx] {
// Extract escape_delta from then_body
let mut escape_delta: Option<i64> = None;
let mut counter_name: Option<String> = None;
for stmt in then_body.iter() {
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
escape_delta = Some(delta);
counter_name = Some(name);
break;
}
}
}
let (escape_delta, counter_name) = match (escape_delta, counter_name) {
(Some(d), Some(n)) => (d, n),
_ => return None,
};
// Extract normal_delta
let normal_delta = if let Some(else_stmts) = else_body {
// If-else format: extract from else_body
let mut found_delta: Option<i64> = None;
for stmt in else_stmts.iter() {
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
if name == counter_name {
found_delta = Some(delta);
break;
}
}
}
}
found_delta?
} else {
// No-else format: look for separate increment after this if
let mut found_delta: Option<i64> = None;
for stmt in body[(idx + 1)..].iter() {
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
if name == counter_name {
found_delta = Some(delta);
break;
}
}
}
}
found_delta?
};
Some((counter_name, escape_delta, normal_delta))
} else {
None
}
}
/// Helper: Extract escape delta from if statement (deprecated, use extract_delta_pair_from_if)
#[allow(dead_code)]
fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option<i64> {
if idx < body.len() {
if let ASTNode::If {