feat(canonicalizer): Phase 143-P0 - parse_number pattern support
Add parse_number pattern recognition to canonicalizer, expanding adaptation
range for digit collection loops with break in THEN clause.
## Changes
### New Recognizer (ast_feature_extractor.rs)
- `detect_parse_number_pattern()`: Detects `if invalid { break }` pattern
- `ParseNumberInfo`: Struct for extracted pattern info
- ~150 lines added
### Canonicalizer Integration (canonicalizer.rs)
- Parse_number pattern detection before skip_whitespace
- LoopSkeleton construction with 4 steps (Header + Body x2 + Update)
- Routes to Pattern2Break (has_break=true)
- ~60 lines modified
### Export Chain (6 files)
- patterns/mod.rs → joinir/mod.rs → control_flow/mod.rs
- builder.rs → mir/mod.rs
- 8 lines total
### Tests
- `test_parse_number_pattern_recognized()`: Unit test for recognition
- Strict parity verification: GREEN (canonical and router agree)
- ~130 lines added
## Pattern Comparison
| Aspect | Skip Whitespace | Parse Number |
|--------|----------------|--------------|
| Break location | ELSE clause | THEN clause |
| Pattern | `if cond { update } else { break }` | `if invalid { break } rest... update` |
| Body after if | None | Required (result append) |
## Results
- ✅ Skeleton creation successful
- ✅ RoutingDecision matches router (Pattern2Break)
- ✅ Strict parity OK (canonicalizer ↔ router agreement)
- ✅ Unit test PASS
- ✅ Manual test: test_pattern2_parse_number.hako executes correctly
## Statistics
- New patterns: 1 (parse_number)
- Total patterns: 3 (skip_whitespace, parse_number, continue)
- Lines added: ~280
- Files modified: 8
- Parity status: Green ✅
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -18,3 +18,9 @@ pub(in crate::mir::builder) mod trace;
|
||||
|
||||
// Phase 140-P4-A: Re-export for loop_canonicalizer SSOT (crate-wide visibility)
|
||||
pub(crate) use patterns::{detect_skip_whitespace_pattern, SkipWhitespaceInfo};
|
||||
|
||||
// Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer
|
||||
pub(crate) use patterns::{detect_continue_pattern, ContinuePatternInfo};
|
||||
|
||||
// Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer
|
||||
pub(crate) use patterns::{detect_parse_number_pattern, ParseNumberInfo};
|
||||
|
||||
@ -353,6 +353,330 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 142-P1: Continue Pattern Detection
|
||||
// ============================================================================
|
||||
|
||||
/// Continue pattern information
|
||||
///
|
||||
/// This struct holds the extracted information from a recognized continue pattern.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ContinuePatternInfo {
|
||||
/// Carrier variable name (e.g., "i")
|
||||
pub carrier_name: String,
|
||||
/// Constant step increment (e.g., 1 for `i = i + 1`)
|
||||
pub delta: i64,
|
||||
/// Body statements before the continue check (may be empty)
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
/// Body statements after the continue check (usually includes carrier update)
|
||||
pub rest_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect continue pattern in loop body
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements (Body)
|
||||
/// if skip_cond {
|
||||
/// carrier = carrier + const // Optional update before continue
|
||||
/// continue
|
||||
/// }
|
||||
/// // ... rest of body statements (Rest)
|
||||
/// carrier = carrier + const // Carrier update
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(ContinuePatternInfo)` if the pattern matches, `None` otherwise
|
||||
pub fn detect_continue_pattern(body: &[ASTNode]) -> Option<ContinuePatternInfo> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the if statement with continue
|
||||
let mut if_idx = None;
|
||||
for (i, stmt) in body.iter().enumerate() {
|
||||
if let ASTNode::If { then_body, .. } = stmt {
|
||||
// Check if then_body contains continue
|
||||
if then_body
|
||||
.iter()
|
||||
.any(|s| matches!(s, ASTNode::Continue { .. }))
|
||||
{
|
||||
if_idx = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let if_idx = if_idx?;
|
||||
|
||||
// Extract body statements before the if
|
||||
let body_stmts = body[..if_idx].to_vec();
|
||||
|
||||
// Extract the if statement
|
||||
let if_stmt = &body[if_idx];
|
||||
|
||||
// The if must have continue in then branch
|
||||
let then_body = match if_stmt {
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} => {
|
||||
// For simple continue pattern, else_body should be None
|
||||
if else_body.is_some() {
|
||||
return None;
|
||||
}
|
||||
then_body
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Check if then_body contains carrier update before continue
|
||||
// For now, we'll look for the pattern after the if statement
|
||||
|
||||
// Extract rest statements after the if
|
||||
let rest_stmts = body[if_idx + 1..].to_vec();
|
||||
|
||||
// Find carrier update in rest_stmts (last statement should be carrier = carrier +/- const)
|
||||
if rest_stmts.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let last_stmt = &rest_stmts[rest_stmts.len() - 1];
|
||||
|
||||
let (carrier_name, delta) = match last_stmt {
|
||||
ASTNode::Assignment { target, value, .. } => {
|
||||
// Extract target variable name
|
||||
let target_name = match target.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Value must be: target (+|-) const
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
// Accept both Add (+1) and Subtract (-1)
|
||||
let op_multiplier = match operator {
|
||||
BinaryOperator::Add => 1,
|
||||
BinaryOperator::Subtract => -1,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Left must be same variable
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if left_name != &target_name {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Right must be integer literal
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Calculate delta with sign
|
||||
let delta = const_val * op_multiplier;
|
||||
|
||||
(target_name, delta)
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Check if then_body has carrier update before continue
|
||||
// If so, we need to validate it matches
|
||||
for stmt in then_body {
|
||||
if let ASTNode::Assignment { target, .. } = stmt {
|
||||
if let ASTNode::Variable { name, .. } = target.as_ref() {
|
||||
if name == &carrier_name {
|
||||
// There's a carrier update before continue
|
||||
// For now, we'll just check it exists
|
||||
// Could validate it matches the pattern later
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(ContinuePatternInfo {
|
||||
carrier_name,
|
||||
delta,
|
||||
body_stmts,
|
||||
rest_stmts,
|
||||
})
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 143-P0: Parse Number/Digit Pattern Detection
|
||||
// ============================================================================
|
||||
|
||||
/// Parse number pattern information
|
||||
///
|
||||
/// This struct holds the extracted information from a recognized parse_number pattern.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ParseNumberInfo {
|
||||
/// Carrier variable name (e.g., "i")
|
||||
pub carrier_name: String,
|
||||
/// Constant step increment (e.g., 1 for `i = i + 1`)
|
||||
pub delta: i64,
|
||||
/// Body statements before the break check (may be empty)
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
/// Rest statements after break check (usually includes result append and carrier update)
|
||||
pub rest_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect parse_number / digit collection pattern in loop body
|
||||
///
|
||||
/// Phase 143-P0: Pattern with break in THEN clause (opposite of skip_whitespace)
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements (ch, digit_pos computation)
|
||||
/// if invalid_cond {
|
||||
/// break
|
||||
/// }
|
||||
/// // ... rest statements (result append, carrier update)
|
||||
/// carrier = carrier + const
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Recognized pattern:
|
||||
/// - parse_number: `i < len`, `if digit_pos < 0 { break }`, `i = i + 1`
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(ParseNumberInfo)` if the pattern matches, `None` otherwise
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// This is complementary to skip_whitespace pattern (which has break in ELSE clause).
|
||||
/// Used by loop_canonicalizer (Phase 143) for digit collection patterns.
|
||||
pub fn detect_parse_number_pattern(body: &[ASTNode]) -> Option<ParseNumberInfo> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the if statement with break in THEN clause
|
||||
let mut if_idx = None;
|
||||
for (i, stmt) in body.iter().enumerate() {
|
||||
if let ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} = stmt
|
||||
{
|
||||
// Check if then_body contains break and else_body is None
|
||||
if else_body.is_none()
|
||||
&& then_body.len() == 1
|
||||
&& matches!(then_body[0], ASTNode::Break { .. })
|
||||
{
|
||||
if_idx = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let if_idx = if_idx?;
|
||||
|
||||
// Extract body statements before the if
|
||||
let body_stmts = body[..if_idx].to_vec();
|
||||
|
||||
// Extract rest statements after the if (should include carrier update)
|
||||
let rest_stmts = body[if_idx + 1..].to_vec();
|
||||
|
||||
if rest_stmts.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find carrier update in rest_stmts (last statement should be carrier = carrier + const)
|
||||
let last_stmt = &rest_stmts[rest_stmts.len() - 1];
|
||||
|
||||
let (carrier_name, delta) = match last_stmt {
|
||||
ASTNode::Assignment { target, value, .. } => {
|
||||
// Extract target variable name
|
||||
let target_name = match target.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Value must be: target (+|-) const
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
// Accept both Add (+1) and Subtract (-1)
|
||||
let op_multiplier = match operator {
|
||||
BinaryOperator::Add => 1,
|
||||
BinaryOperator::Subtract => -1,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Left must be same variable
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if left_name != &target_name {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Right must be integer literal
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Calculate delta with sign
|
||||
let delta = const_val * op_multiplier;
|
||||
|
||||
(target_name, delta)
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(ParseNumberInfo {
|
||||
carrier_name,
|
||||
delta,
|
||||
body_stmts,
|
||||
rest_stmts,
|
||||
})
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 140-P4-A: Skip Whitespace Pattern Detection (SSOT)
|
||||
// ============================================================================
|
||||
@ -370,20 +694,27 @@ pub struct SkipWhitespaceInfo {
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect skip_whitespace pattern in loop body (Phase 140-P4-A SSOT)
|
||||
/// Detect skip_whitespace / trim leading/trailing pattern in loop body
|
||||
///
|
||||
/// Phase 142 P0: Generalized to handle both +1 and -1 patterns
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements (Body)
|
||||
/// if check_cond {
|
||||
/// carrier = carrier + const
|
||||
/// carrier = carrier (+|-) const
|
||||
/// } else {
|
||||
/// break
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Recognized patterns:
|
||||
/// - skip_whitespace: `p < len`, `p = p + 1`
|
||||
/// - trim_leading: `start < end`, `start = start + 1`
|
||||
/// - trim_trailing: `end > start`, `end = end - 1`
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
@ -394,7 +725,7 @@ pub struct SkipWhitespaceInfo {
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// This is the SSOT for skip_whitespace pattern detection.
|
||||
/// This is the SSOT for skip_whitespace/trim pattern detection.
|
||||
/// Used by both loop_canonicalizer (Phase 137) and future pattern analyzers.
|
||||
pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespaceInfo> {
|
||||
if body.is_empty() {
|
||||
@ -413,7 +744,7 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespace
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Then branch must be single assignment: carrier = carrier + const
|
||||
// Then branch must be single assignment: carrier = carrier (+|-) const
|
||||
if then_body.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
@ -426,14 +757,21 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespace
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Value must be: target + const
|
||||
// Value must be: target (+|-) const
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
operator,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
// Phase 142 P0: Accept both Add (+1) and Subtract (-1)
|
||||
let op_multiplier = match operator {
|
||||
BinaryOperator::Add => 1,
|
||||
BinaryOperator::Subtract => -1,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Left must be same variable
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
@ -445,7 +783,7 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespace
|
||||
}
|
||||
|
||||
// Right must be integer literal
|
||||
let delta = match right.as_ref() {
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
@ -453,6 +791,9 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespace
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Calculate delta with sign (e.g., +1 or -1)
|
||||
let delta = const_val * op_multiplier;
|
||||
|
||||
(target_name, delta)
|
||||
}
|
||||
_ => return None,
|
||||
|
||||
@ -67,3 +67,9 @@ pub(in crate::mir::builder) use router::{route_loop_pattern, LoopPatternContext}
|
||||
|
||||
// Phase 140-P4-A: Re-export for loop_canonicalizer SSOT (crate-wide visibility)
|
||||
pub(crate) use ast_feature_extractor::{detect_skip_whitespace_pattern, SkipWhitespaceInfo};
|
||||
|
||||
// Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer
|
||||
pub(crate) use ast_feature_extractor::{detect_continue_pattern, ContinuePatternInfo};
|
||||
|
||||
// Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer
|
||||
pub(crate) use ast_feature_extractor::{detect_parse_number_pattern, ParseNumberInfo};
|
||||
|
||||
@ -57,6 +57,12 @@ pub(in crate::mir::builder) mod utils;
|
||||
// Phase 140-P4-A: Re-export for loop_canonicalizer SSOT (crate-wide visibility)
|
||||
pub(crate) use joinir::{detect_skip_whitespace_pattern, SkipWhitespaceInfo};
|
||||
|
||||
// Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer
|
||||
pub(crate) use joinir::{detect_continue_pattern, ContinuePatternInfo};
|
||||
|
||||
// Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer
|
||||
pub(crate) use joinir::{detect_parse_number_pattern, ParseNumberInfo};
|
||||
|
||||
impl super::MirBuilder {
|
||||
/// Control-flow: block
|
||||
pub(super) fn cf_block(&mut self, statements: Vec<ASTNode>) -> Result<ValueId, String> {
|
||||
|
||||
Reference in New Issue
Block a user