refactor(joinir): Phase 287 P1 - Modularize ast_feature_extractor (facade pattern)

Extracted pattern recognizers from ast_feature_extractor.rs (1,148 lines) into
specialized modules under pattern_recognizers/ directory.

**Structure**:
- ast_feature_extractor.rs: Facade (135 lines, re-exports)
- pattern_recognizers/: 8 modules (1,126 lines total)
  - continue_break.rs: continue/break/return detection
  - infinite_loop.rs: loop(true) detection
  - if_else_phi.rs: if-else PHI pattern detection
  - carrier_count.rs: carrier variable counting
  - parse_number.rs: parse_number pattern (+ read_digits)
  - parse_string.rs: parse_string pattern (+ continue pattern)
  - skip_whitespace.rs: skip_whitespace pattern

**Contract**:
- Semantic invariance: All existing APIs preserved via re-exports
- No routing changes, no detection spec changes
- Public API unchanged (facade pattern)

**Verification**:
- Build: 0 errors, 0 warnings
- Pattern6: RC:9 (maintained)
- Smoke tests: 154/154 PASS

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-27 10:42:17 +09:00
parent e51777b448
commit de1cd1fea0
10 changed files with 1169 additions and 1055 deletions

View File

@ -54,6 +54,7 @@
pub(in crate::mir::builder) mod common; // Phase 255 P2: Common AST helpers
pub(in crate::mir::builder) mod extractors; // Phase 282 P3: Common extraction interfaces
pub(in crate::mir::builder) mod pattern_recognizers; // Phase 287 P1: Modularized pattern recognizers
pub(in crate::mir::builder) mod ast_feature_extractor;
pub(in crate::mir::builder) mod policies; // Phase 93/94: Pattern routing policies (future expansion)
pub(in crate::mir::builder) mod body_local_policy; // Phase 92 P3: promotion vs slot routing

View File

@ -0,0 +1,61 @@
//! Carrier Count Estimation
//!
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
//!
//! This module provides heuristic-based carrier variable counting.
use crate::ast::ASTNode;
/// Count carrier variables (variables assigned in loop body)
///
/// This is a heuristic: counts assignment statements as a proxy for carriers.
/// A more precise implementation would track which specific variables are assigned.
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// Count of distinct carrier variables (0 or 1 in current implementation)
///
/// # Notes
///
/// Current implementation returns 0 or 1 (at least one assignment present).
/// Future enhancement: track individual variable assignments for precise carrier count.
pub(crate) fn count_carriers_in_body(body: &[ASTNode]) -> usize {
let mut count = 0;
for node in body {
match node {
ASTNode::Assignment { .. } => count += 1,
ASTNode::If {
then_body,
else_body,
..
} => {
count += count_carriers_in_body(then_body);
if let Some(else_body) = else_body {
count += count_carriers_in_body(else_body);
}
}
_ => {}
}
}
// Return at least 1 if we have assignments, otherwise 0
if count > 0 {
1
} else {
0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_body() {
let empty: Vec<ASTNode> = vec![];
assert_eq!(count_carriers_in_body(&empty), 0);
}
}

View File

@ -0,0 +1,147 @@
//! Continue/Break/Return Detection
//!
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
//!
//! This module provides simple recursive detection of continue, break, and return statements
//! within loop bodies and nested structures.
use crate::ast::ASTNode;
/// Detect if a loop body contains continue statements
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `true` if at least one continue statement is found in the body or nested structures
///
/// # Notes
///
/// This is a simple recursive scan that doesn't handle nested loops perfectly,
/// but is sufficient for initial pattern detection.
pub(crate) fn detect_continue_in_body(body: &[ASTNode]) -> bool {
for stmt in body {
if has_continue_node(stmt) {
return true;
}
}
false
}
/// Detect if a loop body contains break statements
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `true` if at least one break statement is found in the body or nested structures
pub(crate) fn detect_break_in_body(body: &[ASTNode]) -> bool {
for stmt in body {
if has_break_node(stmt) {
return true;
}
}
false
}
/// Detect if a loop body contains return statements
///
/// This is used for dev-only parity checks with structure SSOT (StepTree).
pub(crate) fn detect_return_in_body(body: &[ASTNode]) -> bool {
for stmt in body {
if has_return_node(stmt) {
return true;
}
}
false
}
/// Recursive helper to check if AST node contains continue
pub(super) fn has_continue_node(node: &ASTNode) -> bool {
match node {
ASTNode::Continue { .. } => true,
ASTNode::If {
then_body,
else_body,
..
} => {
then_body.iter().any(has_continue_node)
|| else_body
.as_ref()
.map_or(false, |e| e.iter().any(has_continue_node))
}
ASTNode::Loop { body, .. } => body.iter().any(has_continue_node),
_ => false,
}
}
/// Recursive helper to check if AST node contains break
fn has_break_node(node: &ASTNode) -> bool {
match node {
ASTNode::Break { .. } => true,
ASTNode::If {
then_body,
else_body,
..
} => {
then_body.iter().any(has_break_node)
|| else_body
.as_ref()
.map_or(false, |e| e.iter().any(has_break_node))
}
ASTNode::Loop { body, .. } => body.iter().any(has_break_node),
_ => false,
}
}
/// Recursive helper to check if AST node contains return
fn has_return_node(node: &ASTNode) -> bool {
match node {
ASTNode::Return { .. } => true,
ASTNode::If {
then_body,
else_body,
..
} => {
then_body.iter().any(has_return_node)
|| else_body
.as_ref()
.map_or(false, |e| e.iter().any(has_return_node))
}
ASTNode::Loop { body, .. } => body.iter().any(has_return_node),
ASTNode::ScopeBox { body, .. } => body.iter().any(has_return_node),
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_continue_simple() {
let continue_node = ASTNode::Continue {
span: crate::ast::Span::unknown(),
};
assert!(has_continue_node(&continue_node));
}
#[test]
fn test_detect_break_simple() {
let break_node = ASTNode::Break {
span: crate::ast::Span::unknown(),
};
assert!(has_break_node(&break_node));
}
#[test]
fn test_empty_body() {
let empty: Vec<ASTNode> = vec![];
assert!(!detect_continue_in_body(&empty));
assert!(!detect_break_in_body(&empty));
}
}

View File

@ -0,0 +1,73 @@
//! If-Else PHI Pattern Detection
//!
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
//!
//! This module detects if-else statements with potential PHI patterns.
use crate::ast::ASTNode;
/// Detect if-else statements with potential PHI pattern
///
/// Looks for if-else statements where both branches contain assignments.
/// This is a heuristic indicating a potential PHI merge point.
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `true` if at least one if-else statement with assignments in both branches is found
///
/// # Phase 264 P0: Conservative Implementation
///
/// Previously returned true if both if/else branches had assignments.
/// This was too broad - it caught simple conditional assignments like:
/// `if x then seg = "A" else seg = "B"`
///
/// Pattern3 is designed for if-sum patterns with arithmetic accumulation:
/// `sum = sum + (if x then 1 else 0)`
///
/// Phase 264 P0: Return false to prevent misclassification.
/// Effect: Loops with conditional assignment fall through to Pattern1.
///
/// Phase 264 P1: TODO - Implement accurate if-sum signature detection.
pub(crate) fn detect_if_else_phi_in_body(body: &[ASTNode]) -> bool {
// Phase 282 P5: Proper if-else PHI detection (re-enabled with ExtractionBased safety)
//
// This function provides initial classification for Pattern3IfPhi.
// The actual validation is done by extractors::pattern3::extract_loop_with_if_phi_parts()
// which performs deep checks (PHI assignments, no control flow, etc.)
//
// Here we just check: Does the loop body contain an if-else statement?
// This allows Pattern3 to be attempted, and extraction will validate.
for stmt in body {
if matches!(stmt, ASTNode::If { else_body: Some(_), .. }) {
return true; // Found if-else
}
}
false // No if-else found
}
/// Phase 212.5: Detect ANY if statement in loop body (structural detection)
///
/// This function detects any if statement, regardless of whether it has an else branch.
/// Used for routing single-carrier if-update patterns to Pattern 3.
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `true` if at least one if statement is found (with or without else)
#[allow(dead_code)]
fn detect_if_in_body(body: &[ASTNode]) -> bool {
for node in body {
if let ASTNode::If { .. } = node {
return true;
}
}
false
}

View File

@ -0,0 +1,26 @@
//! Infinite Loop Detection
//!
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
//!
//! This module detects infinite loop patterns (condition == true).
use crate::ast::ASTNode;
/// Phase 131-11: Detect infinite loop (condition == Literal(Bool(true)))
///
/// # Arguments
///
/// * `condition` - Loop condition AST node
///
/// # Returns
///
/// `true` if condition is a boolean literal with value true
pub(crate) fn detect_infinite_loop(condition: &ASTNode) -> bool {
matches!(
condition,
ASTNode::Literal {
value: crate::ast::LiteralValue::Bool(true),
..
}
)
}

View File

@ -0,0 +1,20 @@
//! Pattern Recognizers Module
//!
//! Phase 287 P1: Modularization of AST pattern detection functions.
//!
//! This module contains specialized recognizers for different loop patterns:
//! - continue/break/return detection
//! - infinite loop detection
//! - if-else phi detection
//! - carrier count estimation
//! - parse_number/string/whitespace patterns
//!
//! Each recognizer is responsible for a single "question" about the AST structure.
pub mod continue_break;
pub mod infinite_loop;
pub mod if_else_phi;
pub mod carrier_count;
pub mod parse_number;
pub mod parse_string;
pub mod skip_whitespace;

View File

@ -0,0 +1,259 @@
//! Parse Number/Digit Pattern Detection
//!
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
//!
//! This module detects parse_number and digit collection patterns.
use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
/// Parse number pattern information
///
/// This struct holds the extracted information from a recognized parse_number pattern.
#[derive(Debug, Clone, PartialEq)]
pub struct ParseNumberInfo {
/// Carrier variable name (e.g., "i")
pub carrier_name: String,
/// Constant step increment (e.g., 1 for `i = i + 1`)
pub delta: i64,
/// Body statements before the break check (may be empty)
pub body_stmts: Vec<ASTNode>,
/// Rest statements after break check (usually includes result append and carrier update)
pub rest_stmts: Vec<ASTNode>,
}
/// Detect parse_number / digit collection pattern in loop body
///
/// Phase 143-P0: Pattern with break in THEN clause (opposite of skip_whitespace)
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... optional body statements (ch, digit_pos computation)
/// if invalid_cond {
/// break
/// }
/// // ... rest statements (result append, carrier update)
/// carrier = carrier + const
/// }
/// ```
///
/// Recognized pattern:
/// - parse_number: `i < len`, `if digit_pos < 0 { break }`, `i = i + 1`
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `Some(ParseNumberInfo)` if the pattern matches, `None` otherwise
///
/// # Notes
///
/// This is complementary to skip_whitespace pattern (which has break in ELSE clause).
/// Used by loop_canonicalizer (Phase 143) for digit collection patterns.
pub fn detect_parse_number_pattern(body: &[ASTNode]) -> Option<ParseNumberInfo> {
if body.is_empty() {
return None;
}
// Find the if statement with break in THEN clause
let mut if_idx = None;
for (i, stmt) in body.iter().enumerate() {
if let ASTNode::If {
then_body,
else_body,
..
} = stmt
{
// Check if then_body contains break and else_body is None
if else_body.is_none()
&& then_body.len() == 1
&& matches!(then_body[0], ASTNode::Break { .. })
{
if_idx = Some(i);
break;
}
}
}
let if_idx = if_idx?;
// Extract body statements before the if
let body_stmts = body[..if_idx].to_vec();
// Extract rest statements after the if (should include carrier update)
let rest_stmts = body[if_idx + 1..].to_vec();
if rest_stmts.is_empty() {
return None;
}
// Find carrier update in rest_stmts (last statement should be carrier = carrier + const)
let last_stmt = &rest_stmts[rest_stmts.len() - 1];
let (carrier_name, delta) = match last_stmt {
ASTNode::Assignment { target, value, .. } => {
// Extract target variable name
let target_name = match target.as_ref() {
ASTNode::Variable { name, .. } => name.clone(),
_ => return None,
};
// Value must be: target (+|-) const
match value.as_ref() {
ASTNode::BinaryOp {
operator,
left,
right,
..
} => {
// Accept both Add (+1) and Subtract (-1)
let op_multiplier = match operator {
BinaryOperator::Add => 1,
BinaryOperator::Subtract => -1,
_ => return None,
};
// Left must be same variable
let left_name = match left.as_ref() {
ASTNode::Variable { name, .. } => name,
_ => return None,
};
if left_name != &target_name {
return None;
}
// Right must be integer literal
let const_val = match right.as_ref() {
ASTNode::Literal {
value: LiteralValue::Integer(n),
..
} => *n,
_ => return None,
};
// Calculate delta with sign
let delta = const_val * op_multiplier;
(target_name, delta)
}
_ => return None,
}
}
_ => return None,
};
Some(ParseNumberInfo {
carrier_name,
delta,
body_stmts,
rest_stmts,
})
}
/// loop(true) + break-only digits pattern information
#[derive(Debug, Clone, PartialEq)]
pub struct ReadDigitsLoopTrueInfo {
/// Counter variable name (e.g., "i")
pub carrier_name: String,
/// Constant step increment (currently only supports +1)
pub delta: i64,
/// Body statements before the digit-check if (may include `ch = substring(...)`, `if ch==\"\" { break }`, etc.)
pub body_stmts: Vec<ASTNode>,
}
/// Detect read_digits_from-like pattern in loop body (loop(true) expected at callsite)
///
/// Recognized minimal shape (JsonCursorBox/MiniJsonLoader):
/// ```text
/// loop(true) {
/// local ch = s.substring(i, i+1)
/// if ch == "" { break }
/// if is_digit(ch) { out = out + ch; i = i + 1 } else { break }
/// }
/// ```
///
/// Contract (Phase 104 minimal):
/// - Last statement is `if ... { ... } else { break }`
/// - Then branch contains an update `i = i + 1`
/// - Then branch may contain other updates (e.g., `out = out + ch`)
pub fn detect_read_digits_loop_true_pattern(body: &[ASTNode]) -> Option<ReadDigitsLoopTrueInfo> {
if body.is_empty() {
return None;
}
// Last statement must be if-else with break
let last_stmt = &body[body.len() - 1];
let (then_body, else_body) = match last_stmt {
ASTNode::If {
then_body,
else_body: Some(else_body),
..
} => (then_body, else_body),
_ => return None,
};
// Else branch must be single break
if else_body.len() != 1 || !matches!(else_body[0], ASTNode::Break { .. }) {
return None;
}
// Then branch must include `i = i + 1` (allow other statements too)
let mut carrier_name: Option<String> = None;
let mut delta: Option<i64> = None;
for stmt in then_body {
let (name, d) = match stmt {
ASTNode::Assignment { target, value, .. } => {
let target_name = match target.as_ref() {
ASTNode::Variable { name, .. } => name.clone(),
_ => continue,
};
match value.as_ref() {
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left,
right,
..
} => {
let left_name = match left.as_ref() {
ASTNode::Variable { name, .. } => name,
_ => continue,
};
if left_name != &target_name {
continue;
}
let const_val = match right.as_ref() {
ASTNode::Literal {
value: LiteralValue::Integer(n),
..
} => *n,
_ => continue,
};
(target_name, const_val)
}
_ => continue,
}
}
_ => continue,
};
// Phase 104 minimal: only accept +1 step
if d == 1 {
carrier_name = Some(name);
delta = Some(1);
break;
}
}
let carrier_name = carrier_name?;
let delta = delta?;
let body_stmts = body[..body.len() - 1].to_vec();
Some(ReadDigitsLoopTrueInfo {
carrier_name,
delta,
body_stmts,
})
}

View File

@ -0,0 +1,393 @@
//! Parse String/Array Pattern Detection
//!
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
//!
//! This module detects parse_string and parse_array patterns with continue + return exits.
use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
// Re-export has_continue_node from continue_break module
use super::continue_break::has_continue_node;
/// Parse string/array pattern information
///
/// This struct holds the extracted information from a recognized parse_string or parse_array pattern.
/// Both patterns share the same structure: continue + return exits with carrier updates.
#[derive(Debug, Clone, PartialEq)]
pub struct ParseStringInfo {
/// Carrier variable name (e.g., "p")
pub carrier_name: String,
/// Base constant step increment (e.g., 1 for `p = p + 1`)
pub delta: i64,
/// Body statements before the return/continue checks
pub body_stmts: Vec<ASTNode>,
}
/// Detect parse_string or parse_array pattern in loop body
///
/// Phase 143-P1/P2: Pattern with both continue (escape/separator handling) AND return (stop condition)
///
/// Pattern structure (parse_string example):
/// ```
/// loop(p < len) {
/// local ch = s.substring(p, p + 1)
///
/// // Check for closing quote (return)
/// if ch == "\"" {
/// return result
/// }
///
/// // Check for escape sequence (continue after processing)
/// if ch == "\\" {
/// result = result + ch
/// p = p + 1
/// if p < len {
/// result = result + s.substring(p, p + 1)
/// p = p + 1
/// continue
/// }
/// }
///
/// // Regular character
/// result = result + ch
/// p = p + 1
/// }
/// ```
///
/// Pattern structure (parse_array example):
/// ```
/// loop(p < len) {
/// local ch = s.substring(p, p + 1)
///
/// // Check for array end (return)
/// if ch == "]" {
/// return result
/// }
///
/// // Check for separator (continue after processing)
/// if ch == "," {
/// arr.push(elem)
/// elem = ""
/// p = p + 1
/// continue
/// }
///
/// // Accumulate element
/// elem = elem + ch
/// p = p + 1
/// }
/// ```
///
/// Recognized characteristics:
/// - Has return statement (early exit on stop condition: quote for string, ']' for array)
/// - Has continue statement (skip after separator: escape for string, ',' for array)
/// - Variable step update (p++ normally, but p+=2 on escape for string)
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `Some(ParseStringInfo)` if the pattern matches, `None` otherwise
///
/// # Notes
///
/// This detector handles both parse_string and parse_array patterns as they share
/// the same structural characteristics:
/// - Multiple exit types (return AND continue)
/// - Variable step increment (conditional on separator/escape)
/// - Nested control flow (separator/escape has nested if inside)
pub fn detect_parse_string_pattern(body: &[ASTNode]) -> Option<ParseStringInfo> {
if body.is_empty() {
return None;
}
// We need to find:
// 1. An if statement with return in then_body
// 2. An if statement with continue in then_body (nested inside)
// 3. Carrier updates (normal and escape-case)
let mut has_return = false;
let mut has_continue = false;
let mut carrier_name = None;
let mut delta = None;
// Scan for return statement
for stmt in body {
if let ASTNode::If { then_body, .. } = stmt {
if then_body
.iter()
.any(|s| matches!(s, ASTNode::Return { .. }))
{
has_return = true;
break;
}
}
}
if !has_return {
return None;
}
// Scan for continue statement and carrier update (with recursive check for nested continue)
for stmt in body {
if let ASTNode::If { then_body, .. } = stmt {
// Check for continue in then_body (including nested)
if then_body.iter().any(|s| has_continue_node(s)) {
has_continue = true;
}
// Extract carrier update from then_body
for s in then_body {
if let ASTNode::Assignment { target, value, .. } = s {
if let ASTNode::Variable { name, .. } = target.as_ref() {
if let ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left,
right,
..
} = value.as_ref()
{
if let ASTNode::Variable {
name: left_name, ..
} = left.as_ref()
{
if left_name == name {
if let ASTNode::Literal {
value: LiteralValue::Integer(n),
..
} = right.as_ref()
{
carrier_name = Some(name.clone());
delta = Some(*n);
}
}
}
}
}
}
}
}
// Also check for carrier update in main body
if let ASTNode::Assignment { target, value, .. } = stmt {
if let ASTNode::Variable { name, .. } = target.as_ref() {
if let ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left,
right,
..
} = value.as_ref()
{
if let ASTNode::Variable {
name: left_name, ..
} = left.as_ref()
{
if left_name == name {
if let ASTNode::Literal {
value: LiteralValue::Integer(n),
..
} = right.as_ref()
{
if carrier_name.is_none() {
carrier_name = Some(name.clone());
delta = Some(*n);
}
}
}
}
}
}
}
}
if !has_return || !has_continue {
return None;
}
let carrier_name = carrier_name?;
let delta = delta?;
// Extract body statements (for now, just the first statement which should be ch assignment)
let body_stmts = if !body.is_empty() {
vec![body[0].clone()]
} else {
vec![]
};
Some(ParseStringInfo {
carrier_name,
delta,
body_stmts,
})
}
/// Continue pattern information
///
/// This struct holds the extracted information from a recognized continue pattern.
#[derive(Debug, Clone, PartialEq)]
pub struct ContinuePatternInfo {
/// Carrier variable name (e.g., "i")
pub carrier_name: String,
/// Constant step increment (e.g., 1 for `i = i + 1`)
pub delta: i64,
/// Body statements before the continue check (may be empty)
pub body_stmts: Vec<ASTNode>,
/// Body statements after the continue check (usually includes carrier update)
pub rest_stmts: Vec<ASTNode>,
}
/// Detect continue pattern in loop body
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... optional body statements (Body)
/// if skip_cond {
/// carrier = carrier + const // Optional update before continue
/// continue
/// }
/// // ... rest of body statements (Rest)
/// carrier = carrier + const // Carrier update
/// }
/// ```
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `Some(ContinuePatternInfo)` if the pattern matches, `None` otherwise
pub fn detect_continue_pattern(body: &[ASTNode]) -> Option<ContinuePatternInfo> {
if body.is_empty() {
return None;
}
// Find the if statement with continue
let mut if_idx = None;
for (i, stmt) in body.iter().enumerate() {
if let ASTNode::If { then_body, .. } = stmt {
// Check if then_body contains continue
if then_body
.iter()
.any(|s| matches!(s, ASTNode::Continue { .. }))
{
if_idx = Some(i);
break;
}
}
}
let if_idx = if_idx?;
// Extract body statements before the if
let body_stmts = body[..if_idx].to_vec();
// Extract the if statement
let if_stmt = &body[if_idx];
// The if must have continue in then branch
let then_body = match if_stmt {
ASTNode::If {
then_body,
else_body,
..
} => {
// For simple continue pattern, else_body should be None
if else_body.is_some() {
return None;
}
then_body
}
_ => return None,
};
// Check if then_body contains carrier update before continue
// For now, we'll look for the pattern after the if statement
// Extract rest statements after the if
let rest_stmts = body[if_idx + 1..].to_vec();
// Find carrier update in rest_stmts (last statement should be carrier = carrier +/- const)
if rest_stmts.is_empty() {
return None;
}
let last_stmt = &rest_stmts[rest_stmts.len() - 1];
let (carrier_name, delta) = match last_stmt {
ASTNode::Assignment { target, value, .. } => {
// Extract target variable name
let target_name = match target.as_ref() {
ASTNode::Variable { name, .. } => name.clone(),
_ => return None,
};
// Value must be: target (+|-) const
match value.as_ref() {
ASTNode::BinaryOp {
operator,
left,
right,
..
} => {
// Accept both Add (+1) and Subtract (-1)
let op_multiplier = match operator {
BinaryOperator::Add => 1,
BinaryOperator::Subtract => -1,
_ => return None,
};
// Left must be same variable
let left_name = match left.as_ref() {
ASTNode::Variable { name, .. } => name,
_ => return None,
};
if left_name != &target_name {
return None;
}
// Right must be integer literal
let const_val = match right.as_ref() {
ASTNode::Literal {
value: LiteralValue::Integer(n),
..
} => *n,
_ => return None,
};
// Calculate delta with sign
let delta = const_val * op_multiplier;
(target_name, delta)
}
_ => return None,
}
}
_ => return None,
};
// Check if then_body has carrier update before continue
// If so, we need to validate it matches
for stmt in then_body {
if let ASTNode::Assignment { target, .. } = stmt {
if let ASTNode::Variable { name, .. } = target.as_ref() {
if name == &carrier_name {
// There's a carrier update before continue
// For now, we'll just check it exists
// Could validate it matches the pattern later
}
}
}
}
Some(ContinuePatternInfo {
carrier_name,
delta,
body_stmts,
rest_stmts,
})
}

View File

@ -0,0 +1,147 @@
//! Skip Whitespace Pattern Detection
//!
//! Phase 287 P1: Extracted from ast_feature_extractor.rs
//!
//! This module detects skip_whitespace and trim patterns.
use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
/// Skip whitespace pattern information
///
/// This struct holds the extracted information from a recognized skip_whitespace pattern.
#[derive(Debug, Clone, PartialEq)]
pub struct SkipWhitespaceInfo {
/// Carrier variable name (e.g., "p")
pub carrier_name: String,
/// Constant step increment (e.g., 1 for `p = p + 1`)
pub delta: i64,
/// Body statements before the if-else (may be empty)
pub body_stmts: Vec<ASTNode>,
}
/// Detect skip_whitespace / trim leading/trailing pattern in loop body
///
/// Phase 142 P0: Generalized to handle both +1 and -1 patterns
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... optional body statements (Body)
/// if check_cond {
/// carrier = carrier (+|-) const
/// } else {
/// break
/// }
/// }
/// ```
///
/// Recognized patterns:
/// - skip_whitespace: `p < len`, `p = p + 1`
/// - trim_leading: `start < end`, `start = start + 1`
/// - trim_trailing: `end > start`, `end = end - 1`
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `Some(SkipWhitespaceInfo)` if the pattern matches, `None` otherwise
///
/// # Notes
///
/// This is the SSOT for skip_whitespace/trim pattern detection.
/// Used by both loop_canonicalizer (Phase 137) and future pattern analyzers.
pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespaceInfo> {
if body.is_empty() {
return None;
}
// Last statement must be if-else with break
let last_stmt = &body[body.len() - 1];
let (then_body, else_body) = match last_stmt {
ASTNode::If {
then_body,
else_body: Some(else_body),
..
} => (then_body, else_body),
_ => return None,
};
// Then branch must be single assignment: carrier = carrier (+|-) const
if then_body.len() != 1 {
return None;
}
let (carrier_name, delta) = match &then_body[0] {
ASTNode::Assignment { target, value, .. } => {
// Extract target variable name
let target_name = match target.as_ref() {
ASTNode::Variable { name, .. } => name.clone(),
_ => return None,
};
// Value must be: target (+|-) const
match value.as_ref() {
ASTNode::BinaryOp {
operator,
left,
right,
..
} => {
// Phase 142 P0: Accept both Add (+1) and Subtract (-1)
let op_multiplier = match operator {
BinaryOperator::Add => 1,
BinaryOperator::Subtract => -1,
_ => return None,
};
// Left must be same variable
let left_name = match left.as_ref() {
ASTNode::Variable { name, .. } => name,
_ => return None,
};
if left_name != &target_name {
return None;
}
// Right must be integer literal
let const_val = match right.as_ref() {
ASTNode::Literal {
value: LiteralValue::Integer(n),
..
} => *n,
_ => return None,
};
// Calculate delta with sign (e.g., +1 or -1)
let delta = const_val * op_multiplier;
(target_name, delta)
}
_ => return None,
}
}
_ => return None,
};
// Else branch must be single break
if else_body.len() != 1 {
return None;
}
match &else_body[0] {
ASTNode::Break { .. } => {
// Success! Extract body statements (all except last if)
let body_stmts = body[..body.len() - 1].to_vec();
Some(SkipWhitespaceInfo {
carrier_name,
delta,
body_stmts,
})
}
_ => None,
}
}