feat(phase-91): Phase 91 Step 2-B/2-D - P5b escape sequence pattern recognition

### Changes

#### 1. UpdateKind Extension (skeleton_types.rs)
- Added `ConditionalStep { then_delta: i64, else_delta: i64 }` variant
- Enables P5b patterns with conditional numeric deltas
- Used for escape sequence handling (i.e., +2 vs +1 based on escape char)

#### 2. Canonicalizer Integration (canonicalizer.rs)
- Added P5b pattern detection after existing patterns
- Routes to Pattern2Break (same as skip_whitespace, reflects has_break=true)
- Builds LoopSkeleton with ConditionalStep update
- Position: AFTER skip_whitespace (Pattern2Break refinement, not separate choice)

#### 3. AST Recognizer Enhancement (ast_feature_extractor.rs)
- Updated `find_escape_in_if` to handle both:
  - `if ch == '\\' { i += 2 }` (no else)
  - `if ch == '\\' { i += 2 } else { i += 1 }` (with else)
- Added `extract_delta_pair_from_if` for clean delta extraction
- Extracts counter_name, escape_delta, normal_delta from single if-else statement

#### 4. Comprehensive Unit Test (canonicalizer.rs)
- Test: `test_escape_skip_pattern_recognition`
- Verifies full P5b pattern recognition
- Confirms ConditionalStep with escape_delta=2, normal_delta=1
- Validates skeleton structure and exit contract (has_break=true)

### Results

 Build: 1062/1062 tests PASS (+1 new P5b test)
 No regressions
 P5b pattern now recognized and routed correctly

### Next Steps

- Step 2-E: Parity verification with strict mode
- Step 2-F: Documentation updates

Phase 91 P5b implementation on track!

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-12-16 14:54:41 +09:00
parent 7db554a763
commit 570c1f6b73
3 changed files with 348 additions and 19 deletions

View File

@ -1091,16 +1091,15 @@ pub struct EscapeSkipPatternInfo {
/// This is the recognizer for P5b (Escape Sequence Handling). /// This is the recognizer for P5b (Escape Sequence Handling).
/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing. /// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing.
pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternInfo> { pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternInfo> {
if body.len() < 4 { if body.len() < 3 {
return None; // Need at least: break check, escape check, accumulator, normal increment return None; // Need at least: body statements, break check, escape check
} }
// Phase 91 P5b Strategy: // Phase 91 P5b Strategy:
// This is a simplified recognizer for escape sequence handling in string parsers. // This is a simplified recognizer for escape sequence handling in string parsers.
// For now, we detect the minimum viable pattern: // For now, we detect the minimum viable pattern:
// 1. Break check: if ch == quote_char { break } // 1. Break check: if ch == quote_char { break }
// 2. Escape check: if ch == escape_char { counter = counter + escape_delta } // 2. Escape check: if ch == escape_char { counter = counter + escape_delta } [else { counter = counter + normal_delta }]
// 3. Normal increment: counter = counter + 1 (outside if blocks)
// //
// Note: We rely on the order and pattern matching. If a loop body // Note: We rely on the order and pattern matching. If a loop body
// matches this exact structure, we recognize it as P5b. // matches this exact structure, we recognize it as P5b.
@ -1111,16 +1110,11 @@ pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternI
// Find escape check after break - scan for second "if" with increment // Find escape check after break - scan for second "if" with increment
let escape_idx = find_escape_in_if(body, break_idx)?; let escape_idx = find_escape_in_if(body, break_idx)?;
// Find normal increment after escape if
let (normal_incr_idx, counter_name, normal_delta) = find_normal_increment(body, escape_idx)?;
// For P5b, we need a consistent pattern. Extract minimal info: // For P5b, we need a consistent pattern. Extract minimal info:
// - counter_name (from normal increment) // - counter_name, escape_delta, and normal_delta from the escape if statement
// - normal_delta (should be 1)
// - escape_delta (from escape if block)
// - quote_char and escape_char (extracted from if conditions - for now, use defaults) // - quote_char and escape_char (extracted from if conditions - for now, use defaults)
let escape_delta = extract_escape_delta_from_if(body, escape_idx)?; let (counter_name, escape_delta, normal_delta) = extract_delta_pair_from_if(body, escape_idx)?;
// Extract body statements before break check // Extract body statements before break check
let body_stmts = body[..break_idx].to_vec(); let body_stmts = body[..break_idx].to_vec();
@ -1153,20 +1147,43 @@ fn find_break_in_if(body: &[ASTNode]) -> Option<usize> {
} }
/// Helper: Find if statement containing counter increment (escape check) /// Helper: Find if statement containing counter increment (escape check)
///
/// Phase 91 P5b: Can be either:
/// - if ch == escape_char { i = i + 2 } (no else)
/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option<usize> { fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option<usize> {
for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() { for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
let actual_idx = after_idx + 1 + idx; let actual_idx = after_idx + 1 + idx;
if let ASTNode::If { if let ASTNode::If {
then_body, then_body,
else_body: None, else_body,
.. ..
} = stmt { } = stmt {
// Check if then_body contains an increment assignment // Check if then_body contains an increment assignment (escape case)
for stmt2 in then_body.iter() { let has_then_increment = then_body.iter().any(|s| {
if let ASTNode::Assignment { target, value, .. } = stmt2 { if let ASTNode::Assignment { target, value, .. } = s {
if try_extract_increment_assignment(target, value).is_some() { try_extract_increment_assignment(target, value).is_some()
} else {
false
}
});
if has_then_increment {
// If-else format: check if else_body also has increment (normal case)
if let Some(else_stmts) = else_body {
let has_else_increment = else_stmts.iter().any(|s| {
if let ASTNode::Assignment { target, value, .. } = s {
try_extract_increment_assignment(target, value).is_some()
} else {
false
}
});
if has_else_increment {
return Some(actual_idx); return Some(actual_idx);
} }
} else {
// No-else format: just having then increment is enough
return Some(actual_idx);
} }
} }
} }
@ -1190,7 +1207,79 @@ fn find_normal_increment(
None None
} }
/// Helper: Extract escape delta from if statement /// Helper: Extract both escape_delta and normal_delta from if statement
///
/// Handles both:
/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
/// - if ch == escape_char { i = i + 2 } (followed by separate increment)
fn extract_delta_pair_from_if(body: &[ASTNode], idx: usize) -> Option<(String, i64, i64)> {
if idx >= body.len() {
return None;
}
if let ASTNode::If {
then_body,
else_body,
..
} = &body[idx] {
// Extract escape_delta from then_body
let mut escape_delta: Option<i64> = None;
let mut counter_name: Option<String> = None;
for stmt in then_body.iter() {
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
escape_delta = Some(delta);
counter_name = Some(name);
break;
}
}
}
let (escape_delta, counter_name) = match (escape_delta, counter_name) {
(Some(d), Some(n)) => (d, n),
_ => return None,
};
// Extract normal_delta
let normal_delta = if let Some(else_stmts) = else_body {
// If-else format: extract from else_body
let mut found_delta: Option<i64> = None;
for stmt in else_stmts.iter() {
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
if name == counter_name {
found_delta = Some(delta);
break;
}
}
}
}
found_delta?
} else {
// No-else format: look for separate increment after this if
let mut found_delta: Option<i64> = None;
for stmt in body[(idx + 1)..].iter() {
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
if name == counter_name {
found_delta = Some(delta);
break;
}
}
}
}
found_delta?
};
Some((counter_name, escape_delta, normal_delta))
} else {
None
}
}
/// Helper: Extract escape delta from if statement (deprecated, use extract_delta_pair_from_if)
#[allow(dead_code)]
fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option<i64> { fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option<i64> {
if idx < body.len() { if idx < body.len() {
if let ASTNode::If { if let ASTNode::If {

View File

@ -8,7 +8,7 @@ use crate::mir::loop_pattern_detection::LoopPatternKind;
use super::capability_guard::{CapabilityTag, RoutingDecision}; use super::capability_guard::{CapabilityTag, RoutingDecision};
use super::pattern_recognizer::{ use super::pattern_recognizer::{
try_extract_continue_pattern, try_extract_parse_number_pattern, try_extract_continue_pattern, try_extract_escape_skip_pattern, try_extract_parse_number_pattern,
try_extract_parse_string_pattern, try_extract_skip_whitespace_pattern, try_extract_parse_string_pattern, try_extract_skip_whitespace_pattern,
}; };
use super::skeleton_types::{ use super::skeleton_types::{
@ -306,6 +306,69 @@ pub fn canonicalize_loop_expr(
return Ok((skeleton, decision)); return Ok((skeleton, decision));
} }
// ========================================================================
// Phase 91 P5b: Escape Sequence Handling Pattern
// ========================================================================
// Position: After skip_whitespace (post-existing patterns)
// Purpose: Recognize escape sequence handling in string parsers
// Chosen: Pattern2Break (same as skip_whitespace, but with richer Skeleton)
// Notes: Added for parity/observability, lowering deferred to Phase 92
if let Some((counter_name, normal_delta, escape_delta, _quote_char, _escape_char, body_stmts)) =
try_extract_escape_skip_pattern(body)
{
// Build skeleton for escape skip pattern (P5b)
let mut skeleton = LoopSkeleton::new(span);
// Step 1: Header condition
skeleton.steps.push(SkeletonStep::HeaderCond {
expr: Box::new(condition.clone()),
});
// Step 2: Body statements (if any)
if !body_stmts.is_empty() {
skeleton
.steps
.push(SkeletonStep::Body { stmts: body_stmts });
}
// Step 3: Update step with ConditionalStep (escape_delta vs normal_delta)
// Pattern: normal i = i + 1, escape i = i + escape_delta (e.g., +2)
// Represented as UpdateKind::ConditionalStep with both deltas
skeleton.steps.push(SkeletonStep::Update {
carrier_name: counter_name.clone(),
update_kind: UpdateKind::ConditionalStep {
then_delta: escape_delta, // Escape branch: +2 or other
else_delta: normal_delta, // Normal branch: +1
},
});
// Add carrier slot with conditional step update
skeleton.carriers.push(CarrierSlot {
name: counter_name,
role: CarrierRole::Counter,
update_kind: UpdateKind::ConditionalStep {
then_delta: escape_delta,
else_delta: normal_delta,
},
});
// Set exit contract (P5b has break for string boundary detection)
skeleton.exits = ExitContract {
has_break: true,
has_continue: false,
has_return: false,
break_has_value: false,
};
// Phase 91 P5b Decision Policy:
// Same as skip_whitespace (Pattern2Break)
// P5b is a "detailed version" of Pattern2, not a separate chosen pattern
// Notes field would record escape-specific details (Phase 91 MVP: omitted)
let decision = RoutingDecision::success(LoopPatternKind::Pattern2Break);
return Ok((skeleton, decision));
}
// Pattern not recognized - fail fast // Pattern not recognized - fail fast
Ok(( Ok((
LoopSkeleton::new(span), LoopSkeleton::new(span),
@ -1571,4 +1634,177 @@ mod tests {
assert!(!skeleton.exits.has_return); assert!(!skeleton.exits.has_return);
assert!(!skeleton.exits.break_has_value); assert!(!skeleton.exits.break_has_value);
} }
#[test]
fn test_escape_skip_pattern_recognition() {
// Phase 91 P5b: Escape sequence handling pattern
// Build: loop(i < len) {
// ch = get_char(i)
// if ch == "\"" { break }
// if ch == "\\" { i = i + 2 } else { i = i + 1 }
// }
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Variable {
name: "len".to_string(),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![
// Body: ch = get_char(i)
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::FunctionCall {
name: "get_char".to_string(),
arguments: vec![ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}],
span: Span::unknown(),
}),
span: Span::unknown(),
},
// Break check: if ch == "\"" { break }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String("\"".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Break {
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
},
// Escape check: if ch == "\\" { i = i + 2 } else { i = i + 1 }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String("\\".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(2),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}]),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let result = canonicalize_loop_expr(&loop_node);
assert!(result.is_ok(), "Escape pattern canonicalization should succeed");
let (skeleton, decision) = result.unwrap();
// Verify decision success
assert!(decision.is_success(), "Decision should indicate success");
assert_eq!(
decision.chosen,
Some(LoopPatternKind::Pattern2Break),
"P5b should route to Pattern2Break (has_break=true)"
);
assert!(decision.missing_caps.is_empty(), "No missing capabilities");
// Verify skeleton structure
// Expected: HeaderCond + Body + Update
assert!(
skeleton.steps.len() >= 3,
"Expected at least 3 steps: HeaderCond, Body, Update"
);
assert!(
matches!(skeleton.steps[0], SkeletonStep::HeaderCond { .. }),
"First step should be HeaderCond"
);
assert!(
matches!(skeleton.steps[skeleton.steps.len() - 1], SkeletonStep::Update { .. }),
"Last step should be Update"
);
// Verify carrier (counter variable "i")
assert_eq!(skeleton.carriers.len(), 1, "Should have 1 carrier");
let carrier = &skeleton.carriers[0];
assert_eq!(carrier.name, "i", "Carrier should be named 'i'");
assert_eq!(carrier.role, CarrierRole::Counter, "Carrier should be a Counter");
// Verify ConditionalStep with escape_delta=2, normal_delta=1
match &carrier.update_kind {
UpdateKind::ConditionalStep {
then_delta,
else_delta,
} => {
assert_eq!(*then_delta, 2, "Escape delta (then) should be 2");
assert_eq!(*else_delta, 1, "Normal delta (else) should be 1");
}
other => panic!(
"Expected ConditionalStep, got {:?}",
other
),
}
// Verify exit contract (P5b has break for string boundary)
assert!(skeleton.exits.has_break, "P5b should have break");
assert!(!skeleton.exits.has_continue, "P5b should not have continue");
assert!(!skeleton.exits.has_return, "P5b should not have return");
assert!(!skeleton.exits.break_has_value, "Break should not have value");
}
} }

View File

@ -65,7 +65,11 @@ pub enum UpdateKind {
/// Constant step (`i = i + const`) /// Constant step (`i = i + const`)
ConstStep { delta: i64 }, ConstStep { delta: i64 },
/// Conditional update (`if cond { x = a } else { x = b }`) /// Conditional step with numeric deltas (`if escape { i = i + 2 } else { i = i + 1 }`)
/// Phase 91 P5b: Used for escape sequence handling and similar conditional increments
ConditionalStep { then_delta: i64, else_delta: i64 },
/// Conditional update with AST expressions (`if cond { x = a } else { x = b }`)
Conditional { Conditional {
then_value: Box<ASTNode>, then_value: Box<ASTNode>,
else_value: Box<ASTNode>, else_value: Box<ASTNode>,