2025-12-16 06:41:46 +09:00
|
|
|
//! Pattern Recognition Helpers
|
|
|
|
|
//!
|
2025-12-16 07:09:22 +09:00
|
|
|
//! Phase 140-P4-B: This module now delegates to SSOT implementations in ast_feature_extractor.
|
|
|
|
|
//! Provides backward-compatible wrappers for existing callsites.
|
2025-12-16 06:41:46 +09:00
|
|
|
|
|
|
|
|
use crate::ast::ASTNode;
|
2025-12-16 09:08:37 +09:00
|
|
|
use crate::mir::detect_continue_pattern;
|
|
|
|
|
use crate::mir::detect_parse_number_pattern as ast_detect_parse_number;
|
feat(mir): Phase 143 P1 - Add parse_string pattern to canonicalizer
Expand loop canonicalizer to recognize parse_string patterns with both
continue (escape handling) and return (quote found) statements.
## Implementation
### New Pattern Detection (ast_feature_extractor.rs)
- Add `detect_parse_string_pattern()` function
- Support nested continue detection using `has_continue_node()` helper
- Recognize both return and continue in same loop body
- Return ParseStringInfo { carrier_name, delta, body_stmts }
- ~120 lines added
### Canonicalizer Integration (canonicalizer.rs)
- Try parse_string pattern first (most specific)
- Build LoopSkeleton with HeaderCond, Body, Update steps
- Set ExitContract: has_continue=true, has_return=true
- Route to Pattern4Continue (both exits present)
- ~45 lines modified
### Export Chain
- Add re-exports through 7 module levels:
ast_feature_extractor → patterns → joinir → control_flow → builder → mir
- 10 lines total across 7 files
### Unit Test
- Add `test_parse_string_pattern_recognized()` in canonicalizer.rs
- Verify skeleton structure (3+ steps)
- Verify carrier (name="p", delta=1, role=Counter)
- Verify exit contract (continue=true, return=true, break=false)
- Verify routing decision (Pattern4Continue, no missing_caps)
- ~180 lines added
## Target Pattern
`tools/selfhost/test_pattern4_parse_string.hako`
Pattern structure:
- Check for closing quote → return
- Check for escape sequence → continue (nested inside another if)
- Regular character processing → p++
## Results
- ✅ Strict parity green: Pattern4Continue
- ✅ All 19 unit tests pass
- ✅ Nested continue detection working
- ✅ ExitContract correctly set (first pattern with both continue+return)
- ✅ Default behavior unchanged
## Technical Challenges
1. Nested continue detection required recursive search
2. First pattern with both has_continue=true AND has_return=true
3. Variable step updates (p++ vs p+=2) handled with base delta
## Statistics
- New patterns: 1 (parse_string)
- Total patterns: 4 (skip_whitespace, parse_number, continue, parse_string)
- New capabilities: 0 (uses existing ConstStep)
- Lines added: ~300
- Files modified: 9
- Parity status: Green ✅
Phase 143 P1: Complete
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 12:37:47 +09:00
|
|
|
use crate::mir::detect_parse_string_pattern as ast_detect_parse_string;
|
2025-12-16 09:08:37 +09:00
|
|
|
use crate::mir::detect_skip_whitespace_pattern as ast_detect;
|
2025-12-16 06:41:46 +09:00
|
|
|
|
|
|
|
|
// ============================================================================
|
2025-12-16 07:09:22 +09:00
|
|
|
// Skip Whitespace Pattern (Phase 140-P4-B SSOT Wrapper)
|
2025-12-16 06:41:46 +09:00
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Try to extract skip_whitespace pattern from loop
|
|
|
|
|
///
|
|
|
|
|
/// Pattern structure:
|
|
|
|
|
/// ```
|
|
|
|
|
/// loop(cond) {
|
|
|
|
|
/// // ... optional body statements (Body)
|
|
|
|
|
/// if check_cond {
|
|
|
|
|
/// carrier = carrier + const
|
|
|
|
|
/// } else {
|
|
|
|
|
/// break
|
|
|
|
|
/// }
|
|
|
|
|
/// }
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
|
2025-12-16 07:09:22 +09:00
|
|
|
///
|
|
|
|
|
/// # Phase 140-P4-B: SSOT Migration
|
|
|
|
|
///
|
|
|
|
|
/// This function now delegates to `ast_feature_extractor::detect_skip_whitespace_pattern`
|
|
|
|
|
/// for SSOT implementation. This wrapper maintains backward compatibility for existing callsites.
|
2025-12-16 06:41:46 +09:00
|
|
|
pub fn try_extract_skip_whitespace_pattern(
|
|
|
|
|
body: &[ASTNode],
|
|
|
|
|
) -> Option<(String, i64, Vec<ASTNode>)> {
|
2025-12-16 07:09:22 +09:00
|
|
|
ast_detect(body).map(|info| (info.carrier_name, info.delta, info.body_stmts))
|
2025-12-16 06:41:46 +09:00
|
|
|
}
|
|
|
|
|
|
2025-12-16 09:08:37 +09:00
|
|
|
// ============================================================================
|
|
|
|
|
// Parse Number Pattern (Phase 143-P0)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Try to extract parse_number pattern from loop
|
|
|
|
|
///
|
|
|
|
|
/// Pattern structure:
|
|
|
|
|
/// ```
|
|
|
|
|
/// loop(cond) {
|
|
|
|
|
/// // ... optional body statements (ch, digit_pos computation)
|
|
|
|
|
/// if invalid_cond {
|
|
|
|
|
/// break
|
|
|
|
|
/// }
|
|
|
|
|
/// // ... rest statements (result append, carrier update)
|
|
|
|
|
/// carrier = carrier + const
|
|
|
|
|
/// }
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// Returns (carrier_name, delta, body_stmts, rest_stmts) if pattern matches.
|
|
|
|
|
///
|
|
|
|
|
/// # Phase 143-P0: Parse Number Pattern Detection
|
|
|
|
|
///
|
|
|
|
|
/// This function delegates to `ast_feature_extractor::detect_parse_number_pattern`
|
|
|
|
|
/// for SSOT implementation.
|
|
|
|
|
pub fn try_extract_parse_number_pattern(
|
|
|
|
|
body: &[ASTNode],
|
|
|
|
|
) -> Option<(String, i64, Vec<ASTNode>, Vec<ASTNode>)> {
|
|
|
|
|
ast_detect_parse_number(body).map(|info| {
|
|
|
|
|
(
|
|
|
|
|
info.carrier_name,
|
|
|
|
|
info.delta,
|
|
|
|
|
info.body_stmts,
|
|
|
|
|
info.rest_stmts,
|
|
|
|
|
)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
feat(mir): Phase 143 P1 - Add parse_string pattern to canonicalizer
Expand loop canonicalizer to recognize parse_string patterns with both
continue (escape handling) and return (quote found) statements.
## Implementation
### New Pattern Detection (ast_feature_extractor.rs)
- Add `detect_parse_string_pattern()` function
- Support nested continue detection using `has_continue_node()` helper
- Recognize both return and continue in same loop body
- Return ParseStringInfo { carrier_name, delta, body_stmts }
- ~120 lines added
### Canonicalizer Integration (canonicalizer.rs)
- Try parse_string pattern first (most specific)
- Build LoopSkeleton with HeaderCond, Body, Update steps
- Set ExitContract: has_continue=true, has_return=true
- Route to Pattern4Continue (both exits present)
- ~45 lines modified
### Export Chain
- Add re-exports through 7 module levels:
ast_feature_extractor → patterns → joinir → control_flow → builder → mir
- 10 lines total across 7 files
### Unit Test
- Add `test_parse_string_pattern_recognized()` in canonicalizer.rs
- Verify skeleton structure (3+ steps)
- Verify carrier (name="p", delta=1, role=Counter)
- Verify exit contract (continue=true, return=true, break=false)
- Verify routing decision (Pattern4Continue, no missing_caps)
- ~180 lines added
## Target Pattern
`tools/selfhost/test_pattern4_parse_string.hako`
Pattern structure:
- Check for closing quote → return
- Check for escape sequence → continue (nested inside another if)
- Regular character processing → p++
## Results
- ✅ Strict parity green: Pattern4Continue
- ✅ All 19 unit tests pass
- ✅ Nested continue detection working
- ✅ ExitContract correctly set (first pattern with both continue+return)
- ✅ Default behavior unchanged
## Technical Challenges
1. Nested continue detection required recursive search
2. First pattern with both has_continue=true AND has_return=true
3. Variable step updates (p++ vs p+=2) handled with base delta
## Statistics
- New patterns: 1 (parse_string)
- Total patterns: 4 (skip_whitespace, parse_number, continue, parse_string)
- New capabilities: 0 (uses existing ConstStep)
- Lines added: ~300
- Files modified: 9
- Parity status: Green ✅
Phase 143 P1: Complete
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 12:37:47 +09:00
|
|
|
// ============================================================================
|
|
|
|
|
// Parse String Pattern (Phase 143-P1)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Try to extract parse_string pattern from loop
|
|
|
|
|
///
|
|
|
|
|
/// Pattern structure:
|
|
|
|
|
/// ```
|
|
|
|
|
/// loop(cond) {
|
|
|
|
|
/// // ... body statements (ch computation)
|
|
|
|
|
/// if quote_cond {
|
|
|
|
|
/// return result
|
|
|
|
|
/// }
|
|
|
|
|
/// if escape_cond {
|
|
|
|
|
/// // ... escape handling
|
|
|
|
|
/// carrier = carrier + const
|
|
|
|
|
/// continue
|
|
|
|
|
/// }
|
|
|
|
|
/// // ... regular character handling
|
|
|
|
|
/// carrier = carrier + const
|
|
|
|
|
/// }
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
|
|
|
|
|
///
|
|
|
|
|
/// # Phase 143-P1: Parse String Pattern Detection
|
|
|
|
|
///
|
|
|
|
|
/// This function delegates to `ast_feature_extractor::detect_parse_string_pattern`
|
|
|
|
|
/// for SSOT implementation.
|
|
|
|
|
pub fn try_extract_parse_string_pattern(body: &[ASTNode]) -> Option<(String, i64, Vec<ASTNode>)> {
|
|
|
|
|
ast_detect_parse_string(body).map(|info| (info.carrier_name, info.delta, info.body_stmts))
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-16 09:08:37 +09:00
|
|
|
// ============================================================================
|
|
|
|
|
// Continue Pattern (Phase 142-P1)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Try to extract continue pattern from loop
|
|
|
|
|
///
|
|
|
|
|
/// Pattern structure:
|
|
|
|
|
/// ```
|
|
|
|
|
/// loop(cond) {
|
|
|
|
|
/// // ... optional body statements (Body)
|
|
|
|
|
/// if skip_cond {
|
|
|
|
|
/// carrier = carrier + const // Optional update before continue
|
|
|
|
|
/// continue
|
|
|
|
|
/// }
|
|
|
|
|
/// // ... rest of body statements (Rest)
|
|
|
|
|
/// carrier = carrier + const // Carrier update
|
|
|
|
|
/// }
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// Returns (carrier_name, delta, body_stmts, rest_stmts) if pattern matches.
|
|
|
|
|
///
|
|
|
|
|
/// # Phase 142-P1: Continue Pattern Detection
|
|
|
|
|
///
|
|
|
|
|
/// This function delegates to `ast_feature_extractor::detect_continue_pattern`
|
|
|
|
|
/// for SSOT implementation.
|
|
|
|
|
pub fn try_extract_continue_pattern(
|
|
|
|
|
body: &[ASTNode],
|
|
|
|
|
) -> Option<(String, i64, Vec<ASTNode>, Vec<ASTNode>)> {
|
|
|
|
|
detect_continue_pattern(body).map(|info| {
|
|
|
|
|
(
|
|
|
|
|
info.carrier_name,
|
|
|
|
|
info.delta,
|
|
|
|
|
info.body_stmts,
|
|
|
|
|
info.rest_stmts,
|
|
|
|
|
)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-16 06:41:46 +09:00
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
use crate::ast::{BinaryOperator, LiteralValue, Span};
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_skip_whitespace_basic_pattern() {
|
|
|
|
|
// Build: if is_ws { p = p + 1 } else { break }
|
|
|
|
|
let body = vec![ASTNode::If {
|
|
|
|
|
condition: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "is_ws".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
then_body: vec![ASTNode::Assignment {
|
|
|
|
|
target: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "p".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
value: Box::new(ASTNode::BinaryOp {
|
|
|
|
|
operator: BinaryOperator::Add,
|
|
|
|
|
left: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "p".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
right: Box::new(ASTNode::Literal {
|
|
|
|
|
value: LiteralValue::Integer(1),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}],
|
|
|
|
|
else_body: Some(vec![ASTNode::Break {
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}]),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}];
|
|
|
|
|
|
|
|
|
|
let result = try_extract_skip_whitespace_pattern(&body);
|
|
|
|
|
assert!(result.is_some());
|
|
|
|
|
|
|
|
|
|
let (carrier_name, delta, body_stmts) = result.unwrap();
|
|
|
|
|
assert_eq!(carrier_name, "p");
|
|
|
|
|
assert_eq!(delta, 1);
|
|
|
|
|
assert_eq!(body_stmts.len(), 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_skip_whitespace_with_body() {
|
|
|
|
|
// Build: local ch = get_char(p); if is_ws { p = p + 1 } else { break }
|
|
|
|
|
let body = vec![
|
|
|
|
|
ASTNode::Assignment {
|
|
|
|
|
target: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "ch".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
value: Box::new(ASTNode::FunctionCall {
|
|
|
|
|
name: "get_char".to_string(),
|
|
|
|
|
arguments: vec![ASTNode::Variable {
|
|
|
|
|
name: "p".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}],
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
},
|
|
|
|
|
ASTNode::If {
|
|
|
|
|
condition: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "is_ws".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
then_body: vec![ASTNode::Assignment {
|
|
|
|
|
target: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "p".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
value: Box::new(ASTNode::BinaryOp {
|
|
|
|
|
operator: BinaryOperator::Add,
|
|
|
|
|
left: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "p".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
right: Box::new(ASTNode::Literal {
|
|
|
|
|
value: LiteralValue::Integer(1),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}],
|
|
|
|
|
else_body: Some(vec![ASTNode::Break {
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}]),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
},
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
let result = try_extract_skip_whitespace_pattern(&body);
|
|
|
|
|
assert!(result.is_some());
|
|
|
|
|
|
|
|
|
|
let (carrier_name, delta, body_stmts) = result.unwrap();
|
|
|
|
|
assert_eq!(carrier_name, "p");
|
|
|
|
|
assert_eq!(delta, 1);
|
|
|
|
|
assert_eq!(body_stmts.len(), 1); // The assignment before the if
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_skip_whitespace_rejects_no_else() {
|
|
|
|
|
// Build: if is_ws { p = p + 1 } (no else)
|
|
|
|
|
let body = vec![ASTNode::If {
|
|
|
|
|
condition: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "is_ws".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
then_body: vec![ASTNode::Assignment {
|
|
|
|
|
target: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "p".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
value: Box::new(ASTNode::BinaryOp {
|
|
|
|
|
operator: BinaryOperator::Add,
|
|
|
|
|
left: Box::new(ASTNode::Variable {
|
|
|
|
|
name: "p".to_string(),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
right: Box::new(ASTNode::Literal {
|
|
|
|
|
value: LiteralValue::Integer(1),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}),
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}],
|
|
|
|
|
else_body: None,
|
|
|
|
|
span: Span::unknown(),
|
|
|
|
|
}];
|
|
|
|
|
|
|
|
|
|
let result = try_extract_skip_whitespace_pattern(&body);
|
|
|
|
|
assert!(result.is_none());
|
|
|
|
|
}
|
|
|
|
|
}
|