Files
hakorune/src/mir/loop_canonicalizer/pattern_recognizer.rs

291 lines
9.7 KiB
Rust
Raw Normal View History

//! Pattern Recognition Helpers
//!
//! Phase 140-P4-B: This module now delegates to SSOT implementations in ast_feature_extractor.
//! Provides backward-compatible wrappers for existing callsites.
use crate::ast::ASTNode;
feat(canonicalizer): Phase 143-P0 - parse_number pattern support Add parse_number pattern recognition to canonicalizer, expanding adaptation range for digit collection loops with break in THEN clause. ## Changes ### New Recognizer (ast_feature_extractor.rs) - `detect_parse_number_pattern()`: Detects `if invalid { break }` pattern - `ParseNumberInfo`: Struct for extracted pattern info - ~150 lines added ### Canonicalizer Integration (canonicalizer.rs) - Parse_number pattern detection before skip_whitespace - LoopSkeleton construction with 4 steps (Header + Body x2 + Update) - Routes to Pattern2Break (has_break=true) - ~60 lines modified ### Export Chain (6 files) - patterns/mod.rs → joinir/mod.rs → control_flow/mod.rs - builder.rs → mir/mod.rs - 8 lines total ### Tests - `test_parse_number_pattern_recognized()`: Unit test for recognition - Strict parity verification: GREEN (canonical and router agree) - ~130 lines added ## Pattern Comparison | Aspect | Skip Whitespace | Parse Number | |--------|----------------|--------------| | Break location | ELSE clause | THEN clause | | Pattern | `if cond { update } else { break }` | `if invalid { break } rest... update` | | Body after if | None | Required (result append) | ## Results - ✅ Skeleton creation successful - ✅ RoutingDecision matches router (Pattern2Break) - ✅ Strict parity OK (canonicalizer ↔ router agreement) - ✅ Unit test PASS - ✅ Manual test: test_pattern2_parse_number.hako executes correctly ## Statistics - New patterns: 1 (parse_number) - Total patterns: 3 (skip_whitespace, parse_number, continue) - Lines added: ~280 - Files modified: 8 - Parity status: Green ✅ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 09:08:37 +09:00
use crate::mir::detect_continue_pattern;
use crate::mir::detect_parse_number_pattern as ast_detect_parse_number;
feat(mir): Phase 143 P1 - Add parse_string pattern to canonicalizer Expand loop canonicalizer to recognize parse_string patterns with both continue (escape handling) and return (quote found) statements. ## Implementation ### New Pattern Detection (ast_feature_extractor.rs) - Add `detect_parse_string_pattern()` function - Support nested continue detection using `has_continue_node()` helper - Recognize both return and continue in same loop body - Return ParseStringInfo { carrier_name, delta, body_stmts } - ~120 lines added ### Canonicalizer Integration (canonicalizer.rs) - Try parse_string pattern first (most specific) - Build LoopSkeleton with HeaderCond, Body, Update steps - Set ExitContract: has_continue=true, has_return=true - Route to Pattern4Continue (both exits present) - ~45 lines modified ### Export Chain - Add re-exports through 7 module levels: ast_feature_extractor → patterns → joinir → control_flow → builder → mir - 10 lines total across 7 files ### Unit Test - Add `test_parse_string_pattern_recognized()` in canonicalizer.rs - Verify skeleton structure (3+ steps) - Verify carrier (name="p", delta=1, role=Counter) - Verify exit contract (continue=true, return=true, break=false) - Verify routing decision (Pattern4Continue, no missing_caps) - ~180 lines added ## Target Pattern `tools/selfhost/test_pattern4_parse_string.hako` Pattern structure: - Check for closing quote → return - Check for escape sequence → continue (nested inside another if) - Regular character processing → p++ ## Results - ✅ Strict parity green: Pattern4Continue - ✅ All 19 unit tests pass - ✅ Nested continue detection working - ✅ ExitContract correctly set (first pattern with both continue+return) - ✅ Default behavior unchanged ## Technical Challenges 1. Nested continue detection required recursive search 2. First pattern with both has_continue=true AND has_return=true 3. Variable step updates (p++ vs p+=2) handled with base delta ## Statistics - New patterns: 1 (parse_string) - Total patterns: 4 (skip_whitespace, parse_number, continue, parse_string) - New capabilities: 0 (uses existing ConstStep) - Lines added: ~300 - Files modified: 9 - Parity status: Green ✅ Phase 143 P1: Complete 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 12:37:47 +09:00
use crate::mir::detect_parse_string_pattern as ast_detect_parse_string;
feat(canonicalizer): Phase 143-P0 - parse_number pattern support Add parse_number pattern recognition to canonicalizer, expanding adaptation range for digit collection loops with break in THEN clause. ## Changes ### New Recognizer (ast_feature_extractor.rs) - `detect_parse_number_pattern()`: Detects `if invalid { break }` pattern - `ParseNumberInfo`: Struct for extracted pattern info - ~150 lines added ### Canonicalizer Integration (canonicalizer.rs) - Parse_number pattern detection before skip_whitespace - LoopSkeleton construction with 4 steps (Header + Body x2 + Update) - Routes to Pattern2Break (has_break=true) - ~60 lines modified ### Export Chain (6 files) - patterns/mod.rs → joinir/mod.rs → control_flow/mod.rs - builder.rs → mir/mod.rs - 8 lines total ### Tests - `test_parse_number_pattern_recognized()`: Unit test for recognition - Strict parity verification: GREEN (canonical and router agree) - ~130 lines added ## Pattern Comparison | Aspect | Skip Whitespace | Parse Number | |--------|----------------|--------------| | Break location | ELSE clause | THEN clause | | Pattern | `if cond { update } else { break }` | `if invalid { break } rest... update` | | Body after if | None | Required (result append) | ## Results - ✅ Skeleton creation successful - ✅ RoutingDecision matches router (Pattern2Break) - ✅ Strict parity OK (canonicalizer ↔ router agreement) - ✅ Unit test PASS - ✅ Manual test: test_pattern2_parse_number.hako executes correctly ## Statistics - New patterns: 1 (parse_number) - Total patterns: 3 (skip_whitespace, parse_number, continue) - Lines added: ~280 - Files modified: 8 - Parity status: Green ✅ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 09:08:37 +09:00
use crate::mir::detect_skip_whitespace_pattern as ast_detect;
// ============================================================================
// Skip Whitespace Pattern (Phase 140-P4-B SSOT Wrapper)
// ============================================================================
/// Try to extract skip_whitespace pattern from loop
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... optional body statements (Body)
/// if check_cond {
/// carrier = carrier + const
/// } else {
/// break
/// }
/// }
/// ```
///
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
///
/// # Phase 140-P4-B: SSOT Migration
///
/// This function now delegates to `ast_feature_extractor::detect_skip_whitespace_pattern`
/// for SSOT implementation. This wrapper maintains backward compatibility for existing callsites.
pub fn try_extract_skip_whitespace_pattern(
body: &[ASTNode],
) -> Option<(String, i64, Vec<ASTNode>)> {
ast_detect(body).map(|info| (info.carrier_name, info.delta, info.body_stmts))
}
feat(canonicalizer): Phase 143-P0 - parse_number pattern support Add parse_number pattern recognition to canonicalizer, expanding adaptation range for digit collection loops with break in THEN clause. ## Changes ### New Recognizer (ast_feature_extractor.rs) - `detect_parse_number_pattern()`: Detects `if invalid { break }` pattern - `ParseNumberInfo`: Struct for extracted pattern info - ~150 lines added ### Canonicalizer Integration (canonicalizer.rs) - Parse_number pattern detection before skip_whitespace - LoopSkeleton construction with 4 steps (Header + Body x2 + Update) - Routes to Pattern2Break (has_break=true) - ~60 lines modified ### Export Chain (6 files) - patterns/mod.rs → joinir/mod.rs → control_flow/mod.rs - builder.rs → mir/mod.rs - 8 lines total ### Tests - `test_parse_number_pattern_recognized()`: Unit test for recognition - Strict parity verification: GREEN (canonical and router agree) - ~130 lines added ## Pattern Comparison | Aspect | Skip Whitespace | Parse Number | |--------|----------------|--------------| | Break location | ELSE clause | THEN clause | | Pattern | `if cond { update } else { break }` | `if invalid { break } rest... update` | | Body after if | None | Required (result append) | ## Results - ✅ Skeleton creation successful - ✅ RoutingDecision matches router (Pattern2Break) - ✅ Strict parity OK (canonicalizer ↔ router agreement) - ✅ Unit test PASS - ✅ Manual test: test_pattern2_parse_number.hako executes correctly ## Statistics - New patterns: 1 (parse_number) - Total patterns: 3 (skip_whitespace, parse_number, continue) - Lines added: ~280 - Files modified: 8 - Parity status: Green ✅ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 09:08:37 +09:00
// ============================================================================
// Parse Number Pattern (Phase 143-P0)
// ============================================================================
/// Try to extract parse_number pattern from loop
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... optional body statements (ch, digit_pos computation)
/// if invalid_cond {
/// break
/// }
/// // ... rest statements (result append, carrier update)
/// carrier = carrier + const
/// }
/// ```
///
/// Returns (carrier_name, delta, body_stmts, rest_stmts) if pattern matches.
///
/// # Phase 143-P0: Parse Number Pattern Detection
///
/// This function delegates to `ast_feature_extractor::detect_parse_number_pattern`
/// for SSOT implementation.
pub fn try_extract_parse_number_pattern(
body: &[ASTNode],
) -> Option<(String, i64, Vec<ASTNode>, Vec<ASTNode>)> {
ast_detect_parse_number(body).map(|info| {
(
info.carrier_name,
info.delta,
info.body_stmts,
info.rest_stmts,
)
})
}
feat(mir): Phase 143 P1 - Add parse_string pattern to canonicalizer Expand loop canonicalizer to recognize parse_string patterns with both continue (escape handling) and return (quote found) statements. ## Implementation ### New Pattern Detection (ast_feature_extractor.rs) - Add `detect_parse_string_pattern()` function - Support nested continue detection using `has_continue_node()` helper - Recognize both return and continue in same loop body - Return ParseStringInfo { carrier_name, delta, body_stmts } - ~120 lines added ### Canonicalizer Integration (canonicalizer.rs) - Try parse_string pattern first (most specific) - Build LoopSkeleton with HeaderCond, Body, Update steps - Set ExitContract: has_continue=true, has_return=true - Route to Pattern4Continue (both exits present) - ~45 lines modified ### Export Chain - Add re-exports through 7 module levels: ast_feature_extractor → patterns → joinir → control_flow → builder → mir - 10 lines total across 7 files ### Unit Test - Add `test_parse_string_pattern_recognized()` in canonicalizer.rs - Verify skeleton structure (3+ steps) - Verify carrier (name="p", delta=1, role=Counter) - Verify exit contract (continue=true, return=true, break=false) - Verify routing decision (Pattern4Continue, no missing_caps) - ~180 lines added ## Target Pattern `tools/selfhost/test_pattern4_parse_string.hako` Pattern structure: - Check for closing quote → return - Check for escape sequence → continue (nested inside another if) - Regular character processing → p++ ## Results - ✅ Strict parity green: Pattern4Continue - ✅ All 19 unit tests pass - ✅ Nested continue detection working - ✅ ExitContract correctly set (first pattern with both continue+return) - ✅ Default behavior unchanged ## Technical Challenges 1. Nested continue detection required recursive search 2. First pattern with both has_continue=true AND has_return=true 3. Variable step updates (p++ vs p+=2) handled with base delta ## Statistics - New patterns: 1 (parse_string) - Total patterns: 4 (skip_whitespace, parse_number, continue, parse_string) - New capabilities: 0 (uses existing ConstStep) - Lines added: ~300 - Files modified: 9 - Parity status: Green ✅ Phase 143 P1: Complete 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 12:37:47 +09:00
// ============================================================================
// Parse String Pattern (Phase 143-P1)
// ============================================================================
/// Try to extract parse_string pattern from loop
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... body statements (ch computation)
/// if quote_cond {
/// return result
/// }
/// if escape_cond {
/// // ... escape handling
/// carrier = carrier + const
/// continue
/// }
/// // ... regular character handling
/// carrier = carrier + const
/// }
/// ```
///
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
///
/// # Phase 143-P1: Parse String Pattern Detection
///
/// This function delegates to `ast_feature_extractor::detect_parse_string_pattern`
/// for SSOT implementation.
pub fn try_extract_parse_string_pattern(body: &[ASTNode]) -> Option<(String, i64, Vec<ASTNode>)> {
ast_detect_parse_string(body).map(|info| (info.carrier_name, info.delta, info.body_stmts))
}
feat(canonicalizer): Phase 143-P0 - parse_number pattern support Add parse_number pattern recognition to canonicalizer, expanding adaptation range for digit collection loops with break in THEN clause. ## Changes ### New Recognizer (ast_feature_extractor.rs) - `detect_parse_number_pattern()`: Detects `if invalid { break }` pattern - `ParseNumberInfo`: Struct for extracted pattern info - ~150 lines added ### Canonicalizer Integration (canonicalizer.rs) - Parse_number pattern detection before skip_whitespace - LoopSkeleton construction with 4 steps (Header + Body x2 + Update) - Routes to Pattern2Break (has_break=true) - ~60 lines modified ### Export Chain (6 files) - patterns/mod.rs → joinir/mod.rs → control_flow/mod.rs - builder.rs → mir/mod.rs - 8 lines total ### Tests - `test_parse_number_pattern_recognized()`: Unit test for recognition - Strict parity verification: GREEN (canonical and router agree) - ~130 lines added ## Pattern Comparison | Aspect | Skip Whitespace | Parse Number | |--------|----------------|--------------| | Break location | ELSE clause | THEN clause | | Pattern | `if cond { update } else { break }` | `if invalid { break } rest... update` | | Body after if | None | Required (result append) | ## Results - ✅ Skeleton creation successful - ✅ RoutingDecision matches router (Pattern2Break) - ✅ Strict parity OK (canonicalizer ↔ router agreement) - ✅ Unit test PASS - ✅ Manual test: test_pattern2_parse_number.hako executes correctly ## Statistics - New patterns: 1 (parse_number) - Total patterns: 3 (skip_whitespace, parse_number, continue) - Lines added: ~280 - Files modified: 8 - Parity status: Green ✅ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 09:08:37 +09:00
// ============================================================================
// Continue Pattern (Phase 142-P1)
// ============================================================================
/// Try to extract continue pattern from loop
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... optional body statements (Body)
/// if skip_cond {
/// carrier = carrier + const // Optional update before continue
/// continue
/// }
/// // ... rest of body statements (Rest)
/// carrier = carrier + const // Carrier update
/// }
/// ```
///
/// Returns (carrier_name, delta, body_stmts, rest_stmts) if pattern matches.
///
/// # Phase 142-P1: Continue Pattern Detection
///
/// This function delegates to `ast_feature_extractor::detect_continue_pattern`
/// for SSOT implementation.
pub fn try_extract_continue_pattern(
body: &[ASTNode],
) -> Option<(String, i64, Vec<ASTNode>, Vec<ASTNode>)> {
detect_continue_pattern(body).map(|info| {
(
info.carrier_name,
info.delta,
info.body_stmts,
info.rest_stmts,
)
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::{BinaryOperator, LiteralValue, Span};
#[test]
fn test_skip_whitespace_basic_pattern() {
// Build: if is_ws { p = p + 1 } else { break }
let body = vec![ASTNode::If {
condition: Box::new(ASTNode::Variable {
name: "is_ws".to_string(),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Break {
span: Span::unknown(),
}]),
span: Span::unknown(),
}];
let result = try_extract_skip_whitespace_pattern(&body);
assert!(result.is_some());
let (carrier_name, delta, body_stmts) = result.unwrap();
assert_eq!(carrier_name, "p");
assert_eq!(delta, 1);
assert_eq!(body_stmts.len(), 0);
}
#[test]
fn test_skip_whitespace_with_body() {
// Build: local ch = get_char(p); if is_ws { p = p + 1 } else { break }
let body = vec![
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::FunctionCall {
name: "get_char".to_string(),
arguments: vec![ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}],
span: Span::unknown(),
}),
span: Span::unknown(),
},
ASTNode::If {
condition: Box::new(ASTNode::Variable {
name: "is_ws".to_string(),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Break {
span: Span::unknown(),
}]),
span: Span::unknown(),
},
];
let result = try_extract_skip_whitespace_pattern(&body);
assert!(result.is_some());
let (carrier_name, delta, body_stmts) = result.unwrap();
assert_eq!(carrier_name, "p");
assert_eq!(delta, 1);
assert_eq!(body_stmts.len(), 1); // The assignment before the if
}
#[test]
fn test_skip_whitespace_rejects_no_else() {
// Build: if is_ws { p = p + 1 } (no else)
let body = vec![ASTNode::If {
condition: Box::new(ASTNode::Variable {
name: "is_ws".to_string(),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
}];
let result = try_extract_skip_whitespace_pattern(&body);
assert!(result.is_none());
}
}