refactor(joinir): Phase 287 P1 - Modularize ast_feature_extractor (facade pattern)
Extracted pattern recognizers from ast_feature_extractor.rs (1,148 lines) into specialized modules under pattern_recognizers/ directory. **Structure**: - ast_feature_extractor.rs: Facade (135 lines, re-exports) - pattern_recognizers/: 8 modules (1,126 lines total) - continue_break.rs: continue/break/return detection - infinite_loop.rs: loop(true) detection - if_else_phi.rs: if-else PHI pattern detection - carrier_count.rs: carrier variable counting - parse_number.rs: parse_number pattern (+ read_digits) - parse_string.rs: parse_string pattern (+ continue pattern) - skip_whitespace.rs: skip_whitespace pattern **Contract**: - Semantic invariance: All existing APIs preserved via re-exports - No routing changes, no detection spec changes - Public API unchanged (facade pattern) **Verification**: - Build: 0 errors, 0 warnings - Pattern6: RC:9 (maintained) - Smoke tests: 154/154 PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -54,6 +54,7 @@
|
||||
|
||||
pub(in crate::mir::builder) mod common; // Phase 255 P2: Common AST helpers
|
||||
pub(in crate::mir::builder) mod extractors; // Phase 282 P3: Common extraction interfaces
|
||||
pub(in crate::mir::builder) mod pattern_recognizers; // Phase 287 P1: Modularized pattern recognizers
|
||||
pub(in crate::mir::builder) mod ast_feature_extractor;
|
||||
pub(in crate::mir::builder) mod policies; // Phase 93/94: Pattern routing policies (future expansion)
|
||||
pub(in crate::mir::builder) mod body_local_policy; // Phase 92 P3: promotion vs slot routing
|
||||
|
||||
@ -0,0 +1,61 @@
|
||||
//! Carrier Count Estimation
|
||||
//!
|
||||
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
|
||||
//!
|
||||
//! This module provides heuristic-based carrier variable counting.
|
||||
|
||||
use crate::ast::ASTNode;
|
||||
|
||||
/// Count carrier variables (variables assigned in loop body)
|
||||
///
|
||||
/// This is a heuristic: counts assignment statements as a proxy for carriers.
|
||||
/// A more precise implementation would track which specific variables are assigned.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Count of distinct carrier variables (0 or 1 in current implementation)
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// Current implementation returns 0 or 1 (at least one assignment present).
|
||||
/// Future enhancement: track individual variable assignments for precise carrier count.
|
||||
pub(crate) fn count_carriers_in_body(body: &[ASTNode]) -> usize {
|
||||
let mut count = 0;
|
||||
for node in body {
|
||||
match node {
|
||||
ASTNode::Assignment { .. } => count += 1,
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} => {
|
||||
count += count_carriers_in_body(then_body);
|
||||
if let Some(else_body) = else_body {
|
||||
count += count_carriers_in_body(else_body);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// Return at least 1 if we have assignments, otherwise 0
|
||||
if count > 0 {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_empty_body() {
|
||||
let empty: Vec<ASTNode> = vec![];
|
||||
assert_eq!(count_carriers_in_body(&empty), 0);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,147 @@
|
||||
//! Continue/Break/Return Detection
|
||||
//!
|
||||
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
|
||||
//!
|
||||
//! This module provides simple recursive detection of continue, break, and return statements
|
||||
//! within loop bodies and nested structures.
|
||||
|
||||
use crate::ast::ASTNode;
|
||||
|
||||
/// Detect if a loop body contains continue statements
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `true` if at least one continue statement is found in the body or nested structures
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// This is a simple recursive scan that doesn't handle nested loops perfectly,
|
||||
/// but is sufficient for initial pattern detection.
|
||||
pub(crate) fn detect_continue_in_body(body: &[ASTNode]) -> bool {
|
||||
for stmt in body {
|
||||
if has_continue_node(stmt) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Detect if a loop body contains break statements
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `true` if at least one break statement is found in the body or nested structures
|
||||
pub(crate) fn detect_break_in_body(body: &[ASTNode]) -> bool {
|
||||
for stmt in body {
|
||||
if has_break_node(stmt) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Detect if a loop body contains return statements
|
||||
///
|
||||
/// This is used for dev-only parity checks with structure SSOT (StepTree).
|
||||
pub(crate) fn detect_return_in_body(body: &[ASTNode]) -> bool {
|
||||
for stmt in body {
|
||||
if has_return_node(stmt) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Recursive helper to check if AST node contains continue
|
||||
pub(super) fn has_continue_node(node: &ASTNode) -> bool {
|
||||
match node {
|
||||
ASTNode::Continue { .. } => true,
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} => {
|
||||
then_body.iter().any(has_continue_node)
|
||||
|| else_body
|
||||
.as_ref()
|
||||
.map_or(false, |e| e.iter().any(has_continue_node))
|
||||
}
|
||||
ASTNode::Loop { body, .. } => body.iter().any(has_continue_node),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursive helper to check if AST node contains break
|
||||
fn has_break_node(node: &ASTNode) -> bool {
|
||||
match node {
|
||||
ASTNode::Break { .. } => true,
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} => {
|
||||
then_body.iter().any(has_break_node)
|
||||
|| else_body
|
||||
.as_ref()
|
||||
.map_or(false, |e| e.iter().any(has_break_node))
|
||||
}
|
||||
ASTNode::Loop { body, .. } => body.iter().any(has_break_node),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursive helper to check if AST node contains return
|
||||
fn has_return_node(node: &ASTNode) -> bool {
|
||||
match node {
|
||||
ASTNode::Return { .. } => true,
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} => {
|
||||
then_body.iter().any(has_return_node)
|
||||
|| else_body
|
||||
.as_ref()
|
||||
.map_or(false, |e| e.iter().any(has_return_node))
|
||||
}
|
||||
ASTNode::Loop { body, .. } => body.iter().any(has_return_node),
|
||||
ASTNode::ScopeBox { body, .. } => body.iter().any(has_return_node),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_detect_continue_simple() {
|
||||
let continue_node = ASTNode::Continue {
|
||||
span: crate::ast::Span::unknown(),
|
||||
};
|
||||
assert!(has_continue_node(&continue_node));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_break_simple() {
|
||||
let break_node = ASTNode::Break {
|
||||
span: crate::ast::Span::unknown(),
|
||||
};
|
||||
assert!(has_break_node(&break_node));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_body() {
|
||||
let empty: Vec<ASTNode> = vec![];
|
||||
assert!(!detect_continue_in_body(&empty));
|
||||
assert!(!detect_break_in_body(&empty));
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,73 @@
|
||||
//! If-Else PHI Pattern Detection
|
||||
//!
|
||||
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
|
||||
//!
|
||||
//! This module detects if-else statements with potential PHI patterns.
|
||||
|
||||
use crate::ast::ASTNode;
|
||||
|
||||
/// Detect if-else statements with potential PHI pattern
|
||||
///
|
||||
/// Looks for if-else statements where both branches contain assignments.
|
||||
/// This is a heuristic indicating a potential PHI merge point.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `true` if at least one if-else statement with assignments in both branches is found
|
||||
///
|
||||
/// # Phase 264 P0: Conservative Implementation
|
||||
///
|
||||
/// Previously returned true if both if/else branches had assignments.
|
||||
/// This was too broad - it caught simple conditional assignments like:
|
||||
/// `if x then seg = "A" else seg = "B"`
|
||||
///
|
||||
/// Pattern3 is designed for if-sum patterns with arithmetic accumulation:
|
||||
/// `sum = sum + (if x then 1 else 0)`
|
||||
///
|
||||
/// Phase 264 P0: Return false to prevent misclassification.
|
||||
/// Effect: Loops with conditional assignment fall through to Pattern1.
|
||||
///
|
||||
/// Phase 264 P1: TODO - Implement accurate if-sum signature detection.
|
||||
pub(crate) fn detect_if_else_phi_in_body(body: &[ASTNode]) -> bool {
|
||||
// Phase 282 P5: Proper if-else PHI detection (re-enabled with ExtractionBased safety)
|
||||
//
|
||||
// This function provides initial classification for Pattern3IfPhi.
|
||||
// The actual validation is done by extractors::pattern3::extract_loop_with_if_phi_parts()
|
||||
// which performs deep checks (PHI assignments, no control flow, etc.)
|
||||
//
|
||||
// Here we just check: Does the loop body contain an if-else statement?
|
||||
// This allows Pattern3 to be attempted, and extraction will validate.
|
||||
|
||||
for stmt in body {
|
||||
if matches!(stmt, ASTNode::If { else_body: Some(_), .. }) {
|
||||
return true; // Found if-else
|
||||
}
|
||||
}
|
||||
false // No if-else found
|
||||
}
|
||||
|
||||
/// Phase 212.5: Detect ANY if statement in loop body (structural detection)
|
||||
///
|
||||
/// This function detects any if statement, regardless of whether it has an else branch.
|
||||
/// Used for routing single-carrier if-update patterns to Pattern 3.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `true` if at least one if statement is found (with or without else)
|
||||
#[allow(dead_code)]
|
||||
fn detect_if_in_body(body: &[ASTNode]) -> bool {
|
||||
for node in body {
|
||||
if let ASTNode::If { .. } = node {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
@ -0,0 +1,26 @@
|
||||
//! Infinite Loop Detection
|
||||
//!
|
||||
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
|
||||
//!
|
||||
//! This module detects infinite loop patterns (condition == true).
|
||||
|
||||
use crate::ast::ASTNode;
|
||||
|
||||
/// Phase 131-11: Detect infinite loop (condition == Literal(Bool(true)))
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `condition` - Loop condition AST node
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `true` if condition is a boolean literal with value true
|
||||
pub(crate) fn detect_infinite_loop(condition: &ASTNode) -> bool {
|
||||
matches!(
|
||||
condition,
|
||||
ASTNode::Literal {
|
||||
value: crate::ast::LiteralValue::Bool(true),
|
||||
..
|
||||
}
|
||||
)
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
//! Pattern Recognizers Module
|
||||
//!
|
||||
//! Phase 287 P1: Modularization of AST pattern detection functions.
|
||||
//!
|
||||
//! This module contains specialized recognizers for different loop patterns:
|
||||
//! - continue/break/return detection
|
||||
//! - infinite loop detection
|
||||
//! - if-else phi detection
|
||||
//! - carrier count estimation
|
||||
//! - parse_number/string/whitespace patterns
|
||||
//!
|
||||
//! Each recognizer is responsible for a single "question" about the AST structure.
|
||||
|
||||
pub mod continue_break;
|
||||
pub mod infinite_loop;
|
||||
pub mod if_else_phi;
|
||||
pub mod carrier_count;
|
||||
pub mod parse_number;
|
||||
pub mod parse_string;
|
||||
pub mod skip_whitespace;
|
||||
@ -0,0 +1,259 @@
|
||||
//! Parse Number/Digit Pattern Detection
|
||||
//!
|
||||
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
|
||||
//!
|
||||
//! This module detects parse_number and digit collection patterns.
|
||||
|
||||
use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
|
||||
|
||||
/// Parse number pattern information
|
||||
///
|
||||
/// This struct holds the extracted information from a recognized parse_number pattern.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ParseNumberInfo {
|
||||
/// Carrier variable name (e.g., "i")
|
||||
pub carrier_name: String,
|
||||
/// Constant step increment (e.g., 1 for `i = i + 1`)
|
||||
pub delta: i64,
|
||||
/// Body statements before the break check (may be empty)
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
/// Rest statements after break check (usually includes result append and carrier update)
|
||||
pub rest_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect parse_number / digit collection pattern in loop body
|
||||
///
|
||||
/// Phase 143-P0: Pattern with break in THEN clause (opposite of skip_whitespace)
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements (ch, digit_pos computation)
|
||||
/// if invalid_cond {
|
||||
/// break
|
||||
/// }
|
||||
/// // ... rest statements (result append, carrier update)
|
||||
/// carrier = carrier + const
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Recognized pattern:
|
||||
/// - parse_number: `i < len`, `if digit_pos < 0 { break }`, `i = i + 1`
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(ParseNumberInfo)` if the pattern matches, `None` otherwise
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// This is complementary to skip_whitespace pattern (which has break in ELSE clause).
|
||||
/// Used by loop_canonicalizer (Phase 143) for digit collection patterns.
|
||||
pub fn detect_parse_number_pattern(body: &[ASTNode]) -> Option<ParseNumberInfo> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the if statement with break in THEN clause
|
||||
let mut if_idx = None;
|
||||
for (i, stmt) in body.iter().enumerate() {
|
||||
if let ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} = stmt
|
||||
{
|
||||
// Check if then_body contains break and else_body is None
|
||||
if else_body.is_none()
|
||||
&& then_body.len() == 1
|
||||
&& matches!(then_body[0], ASTNode::Break { .. })
|
||||
{
|
||||
if_idx = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let if_idx = if_idx?;
|
||||
|
||||
// Extract body statements before the if
|
||||
let body_stmts = body[..if_idx].to_vec();
|
||||
|
||||
// Extract rest statements after the if (should include carrier update)
|
||||
let rest_stmts = body[if_idx + 1..].to_vec();
|
||||
|
||||
if rest_stmts.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find carrier update in rest_stmts (last statement should be carrier = carrier + const)
|
||||
let last_stmt = &rest_stmts[rest_stmts.len() - 1];
|
||||
|
||||
let (carrier_name, delta) = match last_stmt {
|
||||
ASTNode::Assignment { target, value, .. } => {
|
||||
// Extract target variable name
|
||||
let target_name = match target.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Value must be: target (+|-) const
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
// Accept both Add (+1) and Subtract (-1)
|
||||
let op_multiplier = match operator {
|
||||
BinaryOperator::Add => 1,
|
||||
BinaryOperator::Subtract => -1,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Left must be same variable
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if left_name != &target_name {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Right must be integer literal
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Calculate delta with sign
|
||||
let delta = const_val * op_multiplier;
|
||||
|
||||
(target_name, delta)
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(ParseNumberInfo {
|
||||
carrier_name,
|
||||
delta,
|
||||
body_stmts,
|
||||
rest_stmts,
|
||||
})
|
||||
}
|
||||
|
||||
/// loop(true) + break-only digits pattern information
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ReadDigitsLoopTrueInfo {
|
||||
/// Counter variable name (e.g., "i")
|
||||
pub carrier_name: String,
|
||||
/// Constant step increment (currently only supports +1)
|
||||
pub delta: i64,
|
||||
/// Body statements before the digit-check if (may include `ch = substring(...)`, `if ch==\"\" { break }`, etc.)
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect read_digits_from-like pattern in loop body (loop(true) expected at callsite)
|
||||
///
|
||||
/// Recognized minimal shape (JsonCursorBox/MiniJsonLoader):
|
||||
/// ```text
|
||||
/// loop(true) {
|
||||
/// local ch = s.substring(i, i+1)
|
||||
/// if ch == "" { break }
|
||||
/// if is_digit(ch) { out = out + ch; i = i + 1 } else { break }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Contract (Phase 104 minimal):
|
||||
/// - Last statement is `if ... { ... } else { break }`
|
||||
/// - Then branch contains an update `i = i + 1`
|
||||
/// - Then branch may contain other updates (e.g., `out = out + ch`)
|
||||
pub fn detect_read_digits_loop_true_pattern(body: &[ASTNode]) -> Option<ReadDigitsLoopTrueInfo> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Last statement must be if-else with break
|
||||
let last_stmt = &body[body.len() - 1];
|
||||
let (then_body, else_body) = match last_stmt {
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body: Some(else_body),
|
||||
..
|
||||
} => (then_body, else_body),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Else branch must be single break
|
||||
if else_body.len() != 1 || !matches!(else_body[0], ASTNode::Break { .. }) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Then branch must include `i = i + 1` (allow other statements too)
|
||||
let mut carrier_name: Option<String> = None;
|
||||
let mut delta: Option<i64> = None;
|
||||
for stmt in then_body {
|
||||
let (name, d) = match stmt {
|
||||
ASTNode::Assignment { target, value, .. } => {
|
||||
let target_name = match target.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => continue,
|
||||
};
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => continue,
|
||||
};
|
||||
if left_name != &target_name {
|
||||
continue;
|
||||
}
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => continue,
|
||||
};
|
||||
(target_name, const_val)
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Phase 104 minimal: only accept +1 step
|
||||
if d == 1 {
|
||||
carrier_name = Some(name);
|
||||
delta = Some(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let carrier_name = carrier_name?;
|
||||
let delta = delta?;
|
||||
|
||||
let body_stmts = body[..body.len() - 1].to_vec();
|
||||
Some(ReadDigitsLoopTrueInfo {
|
||||
carrier_name,
|
||||
delta,
|
||||
body_stmts,
|
||||
})
|
||||
}
|
||||
@ -0,0 +1,393 @@
|
||||
//! Parse String/Array Pattern Detection
|
||||
//!
|
||||
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
|
||||
//!
|
||||
//! This module detects parse_string and parse_array patterns with continue + return exits.
|
||||
|
||||
use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
|
||||
|
||||
// Re-export has_continue_node from continue_break module
|
||||
use super::continue_break::has_continue_node;
|
||||
|
||||
/// Parse string/array pattern information
|
||||
///
|
||||
/// This struct holds the extracted information from a recognized parse_string or parse_array pattern.
|
||||
/// Both patterns share the same structure: continue + return exits with carrier updates.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ParseStringInfo {
|
||||
/// Carrier variable name (e.g., "p")
|
||||
pub carrier_name: String,
|
||||
/// Base constant step increment (e.g., 1 for `p = p + 1`)
|
||||
pub delta: i64,
|
||||
/// Body statements before the return/continue checks
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect parse_string or parse_array pattern in loop body
|
||||
///
|
||||
/// Phase 143-P1/P2: Pattern with both continue (escape/separator handling) AND return (stop condition)
|
||||
///
|
||||
/// Pattern structure (parse_string example):
|
||||
/// ```
|
||||
/// loop(p < len) {
|
||||
/// local ch = s.substring(p, p + 1)
|
||||
///
|
||||
/// // Check for closing quote (return)
|
||||
/// if ch == "\"" {
|
||||
/// return result
|
||||
/// }
|
||||
///
|
||||
/// // Check for escape sequence (continue after processing)
|
||||
/// if ch == "\\" {
|
||||
/// result = result + ch
|
||||
/// p = p + 1
|
||||
/// if p < len {
|
||||
/// result = result + s.substring(p, p + 1)
|
||||
/// p = p + 1
|
||||
/// continue
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// // Regular character
|
||||
/// result = result + ch
|
||||
/// p = p + 1
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Pattern structure (parse_array example):
|
||||
/// ```
|
||||
/// loop(p < len) {
|
||||
/// local ch = s.substring(p, p + 1)
|
||||
///
|
||||
/// // Check for array end (return)
|
||||
/// if ch == "]" {
|
||||
/// return result
|
||||
/// }
|
||||
///
|
||||
/// // Check for separator (continue after processing)
|
||||
/// if ch == "," {
|
||||
/// arr.push(elem)
|
||||
/// elem = ""
|
||||
/// p = p + 1
|
||||
/// continue
|
||||
/// }
|
||||
///
|
||||
/// // Accumulate element
|
||||
/// elem = elem + ch
|
||||
/// p = p + 1
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Recognized characteristics:
|
||||
/// - Has return statement (early exit on stop condition: quote for string, ']' for array)
|
||||
/// - Has continue statement (skip after separator: escape for string, ',' for array)
|
||||
/// - Variable step update (p++ normally, but p+=2 on escape for string)
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(ParseStringInfo)` if the pattern matches, `None` otherwise
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// This detector handles both parse_string and parse_array patterns as they share
|
||||
/// the same structural characteristics:
|
||||
/// - Multiple exit types (return AND continue)
|
||||
/// - Variable step increment (conditional on separator/escape)
|
||||
/// - Nested control flow (separator/escape has nested if inside)
|
||||
pub fn detect_parse_string_pattern(body: &[ASTNode]) -> Option<ParseStringInfo> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// We need to find:
|
||||
// 1. An if statement with return in then_body
|
||||
// 2. An if statement with continue in then_body (nested inside)
|
||||
// 3. Carrier updates (normal and escape-case)
|
||||
|
||||
let mut has_return = false;
|
||||
let mut has_continue = false;
|
||||
let mut carrier_name = None;
|
||||
let mut delta = None;
|
||||
|
||||
// Scan for return statement
|
||||
for stmt in body {
|
||||
if let ASTNode::If { then_body, .. } = stmt {
|
||||
if then_body
|
||||
.iter()
|
||||
.any(|s| matches!(s, ASTNode::Return { .. }))
|
||||
{
|
||||
has_return = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !has_return {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Scan for continue statement and carrier update (with recursive check for nested continue)
|
||||
for stmt in body {
|
||||
if let ASTNode::If { then_body, .. } = stmt {
|
||||
// Check for continue in then_body (including nested)
|
||||
if then_body.iter().any(|s| has_continue_node(s)) {
|
||||
has_continue = true;
|
||||
}
|
||||
|
||||
// Extract carrier update from then_body
|
||||
for s in then_body {
|
||||
if let ASTNode::Assignment { target, value, .. } = s {
|
||||
if let ASTNode::Variable { name, .. } = target.as_ref() {
|
||||
if let ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} = value.as_ref()
|
||||
{
|
||||
if let ASTNode::Variable {
|
||||
name: left_name, ..
|
||||
} = left.as_ref()
|
||||
{
|
||||
if left_name == name {
|
||||
if let ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} = right.as_ref()
|
||||
{
|
||||
carrier_name = Some(name.clone());
|
||||
delta = Some(*n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also check for carrier update in main body
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt {
|
||||
if let ASTNode::Variable { name, .. } = target.as_ref() {
|
||||
if let ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} = value.as_ref()
|
||||
{
|
||||
if let ASTNode::Variable {
|
||||
name: left_name, ..
|
||||
} = left.as_ref()
|
||||
{
|
||||
if left_name == name {
|
||||
if let ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} = right.as_ref()
|
||||
{
|
||||
if carrier_name.is_none() {
|
||||
carrier_name = Some(name.clone());
|
||||
delta = Some(*n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !has_return || !has_continue {
|
||||
return None;
|
||||
}
|
||||
|
||||
let carrier_name = carrier_name?;
|
||||
let delta = delta?;
|
||||
|
||||
// Extract body statements (for now, just the first statement which should be ch assignment)
|
||||
let body_stmts = if !body.is_empty() {
|
||||
vec![body[0].clone()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Some(ParseStringInfo {
|
||||
carrier_name,
|
||||
delta,
|
||||
body_stmts,
|
||||
})
|
||||
}
|
||||
|
||||
/// Continue pattern information
|
||||
///
|
||||
/// This struct holds the extracted information from a recognized continue pattern.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ContinuePatternInfo {
|
||||
/// Carrier variable name (e.g., "i")
|
||||
pub carrier_name: String,
|
||||
/// Constant step increment (e.g., 1 for `i = i + 1`)
|
||||
pub delta: i64,
|
||||
/// Body statements before the continue check (may be empty)
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
/// Body statements after the continue check (usually includes carrier update)
|
||||
pub rest_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect continue pattern in loop body
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements (Body)
|
||||
/// if skip_cond {
|
||||
/// carrier = carrier + const // Optional update before continue
|
||||
/// continue
|
||||
/// }
|
||||
/// // ... rest of body statements (Rest)
|
||||
/// carrier = carrier + const // Carrier update
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(ContinuePatternInfo)` if the pattern matches, `None` otherwise
|
||||
pub fn detect_continue_pattern(body: &[ASTNode]) -> Option<ContinuePatternInfo> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the if statement with continue
|
||||
let mut if_idx = None;
|
||||
for (i, stmt) in body.iter().enumerate() {
|
||||
if let ASTNode::If { then_body, .. } = stmt {
|
||||
// Check if then_body contains continue
|
||||
if then_body
|
||||
.iter()
|
||||
.any(|s| matches!(s, ASTNode::Continue { .. }))
|
||||
{
|
||||
if_idx = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let if_idx = if_idx?;
|
||||
|
||||
// Extract body statements before the if
|
||||
let body_stmts = body[..if_idx].to_vec();
|
||||
|
||||
// Extract the if statement
|
||||
let if_stmt = &body[if_idx];
|
||||
|
||||
// The if must have continue in then branch
|
||||
let then_body = match if_stmt {
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body,
|
||||
..
|
||||
} => {
|
||||
// For simple continue pattern, else_body should be None
|
||||
if else_body.is_some() {
|
||||
return None;
|
||||
}
|
||||
then_body
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Check if then_body contains carrier update before continue
|
||||
// For now, we'll look for the pattern after the if statement
|
||||
|
||||
// Extract rest statements after the if
|
||||
let rest_stmts = body[if_idx + 1..].to_vec();
|
||||
|
||||
// Find carrier update in rest_stmts (last statement should be carrier = carrier +/- const)
|
||||
if rest_stmts.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let last_stmt = &rest_stmts[rest_stmts.len() - 1];
|
||||
|
||||
let (carrier_name, delta) = match last_stmt {
|
||||
ASTNode::Assignment { target, value, .. } => {
|
||||
// Extract target variable name
|
||||
let target_name = match target.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Value must be: target (+|-) const
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
// Accept both Add (+1) and Subtract (-1)
|
||||
let op_multiplier = match operator {
|
||||
BinaryOperator::Add => 1,
|
||||
BinaryOperator::Subtract => -1,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Left must be same variable
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if left_name != &target_name {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Right must be integer literal
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Calculate delta with sign
|
||||
let delta = const_val * op_multiplier;
|
||||
|
||||
(target_name, delta)
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Check if then_body has carrier update before continue
|
||||
// If so, we need to validate it matches
|
||||
for stmt in then_body {
|
||||
if let ASTNode::Assignment { target, .. } = stmt {
|
||||
if let ASTNode::Variable { name, .. } = target.as_ref() {
|
||||
if name == &carrier_name {
|
||||
// There's a carrier update before continue
|
||||
// For now, we'll just check it exists
|
||||
// Could validate it matches the pattern later
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(ContinuePatternInfo {
|
||||
carrier_name,
|
||||
delta,
|
||||
body_stmts,
|
||||
rest_stmts,
|
||||
})
|
||||
}
|
||||
@ -0,0 +1,147 @@
|
||||
//! Skip Whitespace Pattern Detection
|
||||
//!
|
||||
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
|
||||
//!
|
||||
//! This module detects skip_whitespace and trim patterns.
|
||||
|
||||
use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
|
||||
|
||||
/// Skip whitespace pattern information
|
||||
///
|
||||
/// This struct holds the extracted information from a recognized skip_whitespace pattern.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct SkipWhitespaceInfo {
|
||||
/// Carrier variable name (e.g., "p")
|
||||
pub carrier_name: String,
|
||||
/// Constant step increment (e.g., 1 for `p = p + 1`)
|
||||
pub delta: i64,
|
||||
/// Body statements before the if-else (may be empty)
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect skip_whitespace / trim leading/trailing pattern in loop body
|
||||
///
|
||||
/// Phase 142 P0: Generalized to handle both +1 and -1 patterns
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements (Body)
|
||||
/// if check_cond {
|
||||
/// carrier = carrier (+|-) const
|
||||
/// } else {
|
||||
/// break
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Recognized patterns:
|
||||
/// - skip_whitespace: `p < len`, `p = p + 1`
|
||||
/// - trim_leading: `start < end`, `start = start + 1`
|
||||
/// - trim_trailing: `end > start`, `end = end - 1`
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(SkipWhitespaceInfo)` if the pattern matches, `None` otherwise
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// This is the SSOT for skip_whitespace/trim pattern detection.
|
||||
/// Used by both loop_canonicalizer (Phase 137) and future pattern analyzers.
|
||||
pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespaceInfo> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Last statement must be if-else with break
|
||||
let last_stmt = &body[body.len() - 1];
|
||||
|
||||
let (then_body, else_body) = match last_stmt {
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body: Some(else_body),
|
||||
..
|
||||
} => (then_body, else_body),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Then branch must be single assignment: carrier = carrier (+|-) const
|
||||
if then_body.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (carrier_name, delta) = match &then_body[0] {
|
||||
ASTNode::Assignment { target, value, .. } => {
|
||||
// Extract target variable name
|
||||
let target_name = match target.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Value must be: target (+|-) const
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
// Phase 142 P0: Accept both Add (+1) and Subtract (-1)
|
||||
let op_multiplier = match operator {
|
||||
BinaryOperator::Add => 1,
|
||||
BinaryOperator::Subtract => -1,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Left must be same variable
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if left_name != &target_name {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Right must be integer literal
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Calculate delta with sign (e.g., +1 or -1)
|
||||
let delta = const_val * op_multiplier;
|
||||
|
||||
(target_name, delta)
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Else branch must be single break
|
||||
if else_body.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
match &else_body[0] {
|
||||
ASTNode::Break { .. } => {
|
||||
// Success! Extract body statements (all except last if)
|
||||
let body_stmts = body[..body.len() - 1].to_vec();
|
||||
Some(SkipWhitespaceInfo {
|
||||
carrier_name,
|
||||
delta,
|
||||
body_stmts,
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user