feat(phase-91): Phase 91 Step 2-B/2-D - P5b escape sequence pattern recognition
### Changes
#### 1. UpdateKind Extension (skeleton_types.rs)
- Added `ConditionalStep { then_delta: i64, else_delta: i64 }` variant
- Enables P5b patterns with conditional numeric deltas
- Used for escape sequence handling (i.e., +2 vs +1 based on escape char)
#### 2. Canonicalizer Integration (canonicalizer.rs)
- Added P5b pattern detection after existing patterns
- Routes to Pattern2Break (same as skip_whitespace, reflects has_break=true)
- Builds LoopSkeleton with ConditionalStep update
- Position: AFTER skip_whitespace (Pattern2Break refinement, not separate choice)
#### 3. AST Recognizer Enhancement (ast_feature_extractor.rs)
- Updated `find_escape_in_if` to handle both:
- `if ch == '\\' { i += 2 }` (no else)
- `if ch == '\\' { i += 2 } else { i += 1 }` (with else)
- Added `extract_delta_pair_from_if` for clean delta extraction
- Extracts counter_name, escape_delta, normal_delta from single if-else statement
#### 4. Comprehensive Unit Test (canonicalizer.rs)
- Test: `test_escape_skip_pattern_recognition`
- Verifies full P5b pattern recognition
- Confirms ConditionalStep with escape_delta=2, normal_delta=1
- Validates skeleton structure and exit contract (has_break=true)
### Results
✅ Build: 1062/1062 tests PASS (+1 new P5b test)
✅ No regressions
✅ P5b pattern now recognized and routed correctly
### Next Steps
- Step 2-E: Parity verification with strict mode
- Step 2-F: Documentation updates
Phase 91 P5b implementation on track!
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -1091,16 +1091,15 @@ pub struct EscapeSkipPatternInfo {
|
||||
/// This is the recognizer for P5b (Escape Sequence Handling).
|
||||
/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing.
|
||||
pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternInfo> {
|
||||
if body.len() < 4 {
|
||||
return None; // Need at least: break check, escape check, accumulator, normal increment
|
||||
if body.len() < 3 {
|
||||
return None; // Need at least: body statements, break check, escape check
|
||||
}
|
||||
|
||||
// Phase 91 P5b Strategy:
|
||||
// This is a simplified recognizer for escape sequence handling in string parsers.
|
||||
// For now, we detect the minimum viable pattern:
|
||||
// 1. Break check: if ch == quote_char { break }
|
||||
// 2. Escape check: if ch == escape_char { counter = counter + escape_delta }
|
||||
// 3. Normal increment: counter = counter + 1 (outside if blocks)
|
||||
// 2. Escape check: if ch == escape_char { counter = counter + escape_delta } [else { counter = counter + normal_delta }]
|
||||
//
|
||||
// Note: We rely on the order and pattern matching. If a loop body
|
||||
// matches this exact structure, we recognize it as P5b.
|
||||
@ -1111,16 +1110,11 @@ pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternI
|
||||
// Find escape check after break - scan for second "if" with increment
|
||||
let escape_idx = find_escape_in_if(body, break_idx)?;
|
||||
|
||||
// Find normal increment after escape if
|
||||
let (normal_incr_idx, counter_name, normal_delta) = find_normal_increment(body, escape_idx)?;
|
||||
|
||||
// For P5b, we need a consistent pattern. Extract minimal info:
|
||||
// - counter_name (from normal increment)
|
||||
// - normal_delta (should be 1)
|
||||
// - escape_delta (from escape if block)
|
||||
// - counter_name, escape_delta, and normal_delta from the escape if statement
|
||||
// - quote_char and escape_char (extracted from if conditions - for now, use defaults)
|
||||
|
||||
let escape_delta = extract_escape_delta_from_if(body, escape_idx)?;
|
||||
let (counter_name, escape_delta, normal_delta) = extract_delta_pair_from_if(body, escape_idx)?;
|
||||
|
||||
// Extract body statements before break check
|
||||
let body_stmts = body[..break_idx].to_vec();
|
||||
@ -1153,20 +1147,43 @@ fn find_break_in_if(body: &[ASTNode]) -> Option<usize> {
|
||||
}
|
||||
|
||||
/// Helper: Find if statement containing counter increment (escape check)
|
||||
///
|
||||
/// Phase 91 P5b: Can be either:
|
||||
/// - if ch == escape_char { i = i + 2 } (no else)
|
||||
/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
|
||||
fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option<usize> {
|
||||
for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
|
||||
let actual_idx = after_idx + 1 + idx;
|
||||
if let ASTNode::If {
|
||||
then_body,
|
||||
else_body: None,
|
||||
else_body,
|
||||
..
|
||||
} = stmt {
|
||||
// Check if then_body contains an increment assignment
|
||||
for stmt2 in then_body.iter() {
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt2 {
|
||||
if try_extract_increment_assignment(target, value).is_some() {
|
||||
// Check if then_body contains an increment assignment (escape case)
|
||||
let has_then_increment = then_body.iter().any(|s| {
|
||||
if let ASTNode::Assignment { target, value, .. } = s {
|
||||
try_extract_increment_assignment(target, value).is_some()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
|
||||
if has_then_increment {
|
||||
// If-else format: check if else_body also has increment (normal case)
|
||||
if let Some(else_stmts) = else_body {
|
||||
let has_else_increment = else_stmts.iter().any(|s| {
|
||||
if let ASTNode::Assignment { target, value, .. } = s {
|
||||
try_extract_increment_assignment(target, value).is_some()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
if has_else_increment {
|
||||
return Some(actual_idx);
|
||||
}
|
||||
} else {
|
||||
// No-else format: just having then increment is enough
|
||||
return Some(actual_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1190,7 +1207,79 @@ fn find_normal_increment(
|
||||
None
|
||||
}
|
||||
|
||||
/// Helper: Extract escape delta from if statement
|
||||
/// Helper: Extract both escape_delta and normal_delta from if statement
|
||||
///
|
||||
/// Handles both:
|
||||
/// - if ch == escape_char { i = i + 2 } else { i = i + 1 }
|
||||
/// - if ch == escape_char { i = i + 2 } (followed by separate increment)
|
||||
fn extract_delta_pair_from_if(body: &[ASTNode], idx: usize) -> Option<(String, i64, i64)> {
|
||||
if idx >= body.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} = &body[idx] {
|
||||
// Extract escape_delta from then_body
|
||||
let mut escape_delta: Option<i64> = None;
|
||||
let mut counter_name: Option<String> = None;
|
||||
|
||||
for stmt in then_body.iter() {
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt {
|
||||
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
|
||||
escape_delta = Some(delta);
|
||||
counter_name = Some(name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (escape_delta, counter_name) = match (escape_delta, counter_name) {
|
||||
(Some(d), Some(n)) => (d, n),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Extract normal_delta
|
||||
let normal_delta = if let Some(else_stmts) = else_body {
|
||||
// If-else format: extract from else_body
|
||||
let mut found_delta: Option<i64> = None;
|
||||
for stmt in else_stmts.iter() {
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt {
|
||||
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
|
||||
if name == counter_name {
|
||||
found_delta = Some(delta);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
found_delta?
|
||||
} else {
|
||||
// No-else format: look for separate increment after this if
|
||||
let mut found_delta: Option<i64> = None;
|
||||
for stmt in body[(idx + 1)..].iter() {
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt {
|
||||
if let Some((name, delta)) = try_extract_increment_assignment(target, value) {
|
||||
if name == counter_name {
|
||||
found_delta = Some(delta);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
found_delta?
|
||||
};
|
||||
|
||||
Some((counter_name, escape_delta, normal_delta))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper: Extract escape delta from if statement (deprecated, use extract_delta_pair_from_if)
|
||||
#[allow(dead_code)]
|
||||
fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option<i64> {
|
||||
if idx < body.len() {
|
||||
if let ASTNode::If {
|
||||
|
||||
@ -8,7 +8,7 @@ use crate::mir::loop_pattern_detection::LoopPatternKind;
|
||||
|
||||
use super::capability_guard::{CapabilityTag, RoutingDecision};
|
||||
use super::pattern_recognizer::{
|
||||
try_extract_continue_pattern, try_extract_parse_number_pattern,
|
||||
try_extract_continue_pattern, try_extract_escape_skip_pattern, try_extract_parse_number_pattern,
|
||||
try_extract_parse_string_pattern, try_extract_skip_whitespace_pattern,
|
||||
};
|
||||
use super::skeleton_types::{
|
||||
@ -306,6 +306,69 @@ pub fn canonicalize_loop_expr(
|
||||
return Ok((skeleton, decision));
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Phase 91 P5b: Escape Sequence Handling Pattern
|
||||
// ========================================================================
|
||||
// Position: After skip_whitespace (post-existing patterns)
|
||||
// Purpose: Recognize escape sequence handling in string parsers
|
||||
// Chosen: Pattern2Break (same as skip_whitespace, but with richer Skeleton)
|
||||
// Notes: Added for parity/observability, lowering deferred to Phase 92
|
||||
|
||||
if let Some((counter_name, normal_delta, escape_delta, _quote_char, _escape_char, body_stmts)) =
|
||||
try_extract_escape_skip_pattern(body)
|
||||
{
|
||||
// Build skeleton for escape skip pattern (P5b)
|
||||
let mut skeleton = LoopSkeleton::new(span);
|
||||
|
||||
// Step 1: Header condition
|
||||
skeleton.steps.push(SkeletonStep::HeaderCond {
|
||||
expr: Box::new(condition.clone()),
|
||||
});
|
||||
|
||||
// Step 2: Body statements (if any)
|
||||
if !body_stmts.is_empty() {
|
||||
skeleton
|
||||
.steps
|
||||
.push(SkeletonStep::Body { stmts: body_stmts });
|
||||
}
|
||||
|
||||
// Step 3: Update step with ConditionalStep (escape_delta vs normal_delta)
|
||||
// Pattern: normal i = i + 1, escape i = i + escape_delta (e.g., +2)
|
||||
// Represented as UpdateKind::ConditionalStep with both deltas
|
||||
skeleton.steps.push(SkeletonStep::Update {
|
||||
carrier_name: counter_name.clone(),
|
||||
update_kind: UpdateKind::ConditionalStep {
|
||||
then_delta: escape_delta, // Escape branch: +2 or other
|
||||
else_delta: normal_delta, // Normal branch: +1
|
||||
},
|
||||
});
|
||||
|
||||
// Add carrier slot with conditional step update
|
||||
skeleton.carriers.push(CarrierSlot {
|
||||
name: counter_name,
|
||||
role: CarrierRole::Counter,
|
||||
update_kind: UpdateKind::ConditionalStep {
|
||||
then_delta: escape_delta,
|
||||
else_delta: normal_delta,
|
||||
},
|
||||
});
|
||||
|
||||
// Set exit contract (P5b has break for string boundary detection)
|
||||
skeleton.exits = ExitContract {
|
||||
has_break: true,
|
||||
has_continue: false,
|
||||
has_return: false,
|
||||
break_has_value: false,
|
||||
};
|
||||
|
||||
// Phase 91 P5b Decision Policy:
|
||||
// Same as skip_whitespace (Pattern2Break)
|
||||
// P5b is a "detailed version" of Pattern2, not a separate chosen pattern
|
||||
// Notes field would record escape-specific details (Phase 91 MVP: omitted)
|
||||
let decision = RoutingDecision::success(LoopPatternKind::Pattern2Break);
|
||||
return Ok((skeleton, decision));
|
||||
}
|
||||
|
||||
// Pattern not recognized - fail fast
|
||||
Ok((
|
||||
LoopSkeleton::new(span),
|
||||
@ -1571,4 +1634,177 @@ mod tests {
|
||||
assert!(!skeleton.exits.has_return);
|
||||
assert!(!skeleton.exits.break_has_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_skip_pattern_recognition() {
|
||||
// Phase 91 P5b: Escape sequence handling pattern
|
||||
// Build: loop(i < len) {
|
||||
// ch = get_char(i)
|
||||
// if ch == "\"" { break }
|
||||
// if ch == "\\" { i = i + 2 } else { i = i + 1 }
|
||||
// }
|
||||
let loop_node = ASTNode::Loop {
|
||||
condition: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Less,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "i".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Variable {
|
||||
name: "len".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
body: vec![
|
||||
// Body: ch = get_char(i)
|
||||
ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "ch".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::FunctionCall {
|
||||
name: "get_char".to_string(),
|
||||
arguments: vec![ASTNode::Variable {
|
||||
name: "i".to_string(),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
},
|
||||
// Break check: if ch == "\"" { break }
|
||||
ASTNode::If {
|
||||
condition: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Equal,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "ch".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::String("\"".to_string()),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
then_body: vec![ASTNode::Break {
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
else_body: None,
|
||||
span: Span::unknown(),
|
||||
},
|
||||
// Escape check: if ch == "\\" { i = i + 2 } else { i = i + 1 }
|
||||
ASTNode::If {
|
||||
condition: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Equal,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "ch".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::String("\\".to_string()),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
then_body: vec![ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "i".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "i".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Integer(2),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
else_body: Some(vec![ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "i".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "i".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Integer(1),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}]),
|
||||
span: Span::unknown(),
|
||||
},
|
||||
],
|
||||
span: Span::unknown(),
|
||||
};
|
||||
|
||||
let result = canonicalize_loop_expr(&loop_node);
|
||||
assert!(result.is_ok(), "Escape pattern canonicalization should succeed");
|
||||
|
||||
let (skeleton, decision) = result.unwrap();
|
||||
|
||||
// Verify decision success
|
||||
assert!(decision.is_success(), "Decision should indicate success");
|
||||
assert_eq!(
|
||||
decision.chosen,
|
||||
Some(LoopPatternKind::Pattern2Break),
|
||||
"P5b should route to Pattern2Break (has_break=true)"
|
||||
);
|
||||
assert!(decision.missing_caps.is_empty(), "No missing capabilities");
|
||||
|
||||
// Verify skeleton structure
|
||||
// Expected: HeaderCond + Body + Update
|
||||
assert!(
|
||||
skeleton.steps.len() >= 3,
|
||||
"Expected at least 3 steps: HeaderCond, Body, Update"
|
||||
);
|
||||
assert!(
|
||||
matches!(skeleton.steps[0], SkeletonStep::HeaderCond { .. }),
|
||||
"First step should be HeaderCond"
|
||||
);
|
||||
assert!(
|
||||
matches!(skeleton.steps[skeleton.steps.len() - 1], SkeletonStep::Update { .. }),
|
||||
"Last step should be Update"
|
||||
);
|
||||
|
||||
// Verify carrier (counter variable "i")
|
||||
assert_eq!(skeleton.carriers.len(), 1, "Should have 1 carrier");
|
||||
let carrier = &skeleton.carriers[0];
|
||||
assert_eq!(carrier.name, "i", "Carrier should be named 'i'");
|
||||
assert_eq!(carrier.role, CarrierRole::Counter, "Carrier should be a Counter");
|
||||
|
||||
// Verify ConditionalStep with escape_delta=2, normal_delta=1
|
||||
match &carrier.update_kind {
|
||||
UpdateKind::ConditionalStep {
|
||||
then_delta,
|
||||
else_delta,
|
||||
} => {
|
||||
assert_eq!(*then_delta, 2, "Escape delta (then) should be 2");
|
||||
assert_eq!(*else_delta, 1, "Normal delta (else) should be 1");
|
||||
}
|
||||
other => panic!(
|
||||
"Expected ConditionalStep, got {:?}",
|
||||
other
|
||||
),
|
||||
}
|
||||
|
||||
// Verify exit contract (P5b has break for string boundary)
|
||||
assert!(skeleton.exits.has_break, "P5b should have break");
|
||||
assert!(!skeleton.exits.has_continue, "P5b should not have continue");
|
||||
assert!(!skeleton.exits.has_return, "P5b should not have return");
|
||||
assert!(!skeleton.exits.break_has_value, "Break should not have value");
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,7 +65,11 @@ pub enum UpdateKind {
|
||||
/// Constant step (`i = i + const`)
|
||||
ConstStep { delta: i64 },
|
||||
|
||||
/// Conditional update (`if cond { x = a } else { x = b }`)
|
||||
/// Conditional step with numeric deltas (`if escape { i = i + 2 } else { i = i + 1 }`)
|
||||
/// Phase 91 P5b: Used for escape sequence handling and similar conditional increments
|
||||
ConditionalStep { then_delta: i64, else_delta: i64 },
|
||||
|
||||
/// Conditional update with AST expressions (`if cond { x = a } else { x = b }`)
|
||||
Conditional {
|
||||
then_value: Box<ASTNode>,
|
||||
else_value: Box<ASTNode>,
|
||||
|
||||
Reference in New Issue
Block a user