feat(phase-91): Step 2-A完了 - AST recognizer & re-export chain

## Step 2-A: AST Recognizer (detect_escape_skip_pattern)
- 追加ファイル: src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs
  - EscapeSkipPatternInfo 構造体定義
  - detect_escape_skip_pattern() メイン関数 (MVP実装)
  - Helper関数: find_break_in_if, find_escape_in_if, find_normal_increment等

## Step 2-B: Re-export Chain (SSOT準備)
- 5ファイルで re-export を追加:
  1. src/mir/builder/control_flow/joinir/patterns/mod.rs
  2. src/mir/builder/control_flow/joinir/mod.rs
  3. src/mir/builder/control_flow/mod.rs
  4. src/mir/builder.rs
  5. src/mir/mod.rs
  6. src/mir/loop_canonicalizer/pattern_recognizer.rs

## Pattern Recognizer Wrapper
- try_extract_escape_skip_pattern() を pattern_recognizer.rs に追加
- 既存パターン(skip_whitespace等)に倣う設計

## Phase 91 MVP Design
- Quote/escape char は期待値("と\)にハードコード(Phase 91 MVP)
- Normal delta は常に1を期待
- Escape delta は AST から抽出

## Test Results
 cargo build --release: 成功
 cargo test --release --lib: 1061/1061 PASS
- 退行なし

## 次: Step 2-B本体 (Canonicalizer統合)
- canonicalizer.rs に detect_escape_skip_pattern() 統合
- LoopSkeleton & RoutingDecision を構築
- Pattern2Break に寄せる

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-12-16 14:36:32 +09:00
parent 42f8f4d088
commit 7db554a763
7 changed files with 271 additions and 0 deletions

View File

@ -38,6 +38,8 @@ pub(crate) use control_flow::{detect_continue_pattern, ContinuePatternInfo};
// Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer
pub(crate) use control_flow::{detect_parse_number_pattern, ParseNumberInfo};
pub(crate) use control_flow::{detect_parse_string_pattern, ParseStringInfo};
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
pub(crate) use control_flow::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
mod exprs_lambda; // lambda lowering
mod exprs_peek; // peek expression
mod exprs_qmark; // ?-propagate

View File

@ -25,3 +25,6 @@ pub(crate) use patterns::{detect_continue_pattern, ContinuePatternInfo};
// Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer
pub(crate) use patterns::{detect_parse_number_pattern, ParseNumberInfo};
pub(crate) use patterns::{detect_parse_string_pattern, ParseStringInfo};
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
pub(crate) use patterns::{detect_escape_skip_pattern, EscapeSkipPatternInfo};

View File

@ -1038,3 +1038,219 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespace
_ => None,
}
}
/// Phase 91 P5b: Escape Sequence Handling Pattern Info
///
/// Information extracted from escape skip pattern in loop body.
/// Stores counter name, normal delta, escape delta, and character constants.
pub struct EscapeSkipPatternInfo {
/// Counter variable name (e.g., "i")
pub counter_name: String,
/// Normal increment delta (usually 1: i = i + 1)
pub normal_delta: i64,
/// Escape skip delta (usually 2: i = i + 2 from +1 in escape if + final +1)
pub escape_delta: i64,
/// Quote/boundary character (usually '"' or '\'')
pub quote_char: char,
/// Escape character (usually '\\')
pub escape_char: char,
/// Body statements before the break check
pub body_stmts: Vec<ASTNode>,
}
/// Detect escape sequence handling pattern in loop body
///
/// Phase 91 P5b: Pattern for string parsers with escape sequence support.
///
/// Pattern structure:
/// ```
/// loop(i < n) {
/// // ... optional body statements (Body)
/// if ch == "\"" {
/// break
/// }
/// if ch == "\\" {
/// i = i + escape_delta // e.g., +1 (total with final +1 = +2)
/// // optional: ch = s.substring(i, i+1)
/// }
/// out = out + ch // Accumulator pattern
/// i = i + 1 // Normal increment (always +1)
/// }
/// ```
///
/// # Arguments
///
/// * `body` - Loop body statements to analyze
///
/// # Returns
///
/// `Some(EscapeSkipPatternInfo)` if the pattern matches, `None` otherwise
///
/// # Notes
///
/// This is the recognizer for P5b (Escape Sequence Handling).
/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing.
pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternInfo> {
if body.len() < 4 {
return None; // Need at least: break check, escape check, accumulator, normal increment
}
// Phase 91 P5b Strategy:
// This is a simplified recognizer for escape sequence handling in string parsers.
// For now, we detect the minimum viable pattern:
// 1. Break check: if ch == quote_char { break }
// 2. Escape check: if ch == escape_char { counter = counter + escape_delta }
// 3. Normal increment: counter = counter + 1 (outside if blocks)
//
// Note: We rely on the order and pattern matching. If a loop body
// matches this exact structure, we recognize it as P5b.
// Find break statement - scan for "if { ... break ... }"
let break_idx = find_break_in_if(body)?;
// Find escape check after break - scan for second "if" with increment
let escape_idx = find_escape_in_if(body, break_idx)?;
// Find normal increment after escape if
let (normal_incr_idx, counter_name, normal_delta) = find_normal_increment(body, escape_idx)?;
// For P5b, we need a consistent pattern. Extract minimal info:
// - counter_name (from normal increment)
// - normal_delta (should be 1)
// - escape_delta (from escape if block)
// - quote_char and escape_char (extracted from if conditions - for now, use defaults)
let escape_delta = extract_escape_delta_from_if(body, escape_idx)?;
// Extract body statements before break check
let body_stmts = body[..break_idx].to_vec();
Some(EscapeSkipPatternInfo {
counter_name,
normal_delta,
escape_delta,
quote_char: '"', // Default for JSON/CSV (Phase 91 MVP)
escape_char: '\\', // Default for JSON/CSV (Phase 91 MVP)
body_stmts,
})
}
/// Helper: Find if statement containing break
fn find_break_in_if(body: &[ASTNode]) -> Option<usize> {
for (idx, stmt) in body.iter().enumerate() {
if let ASTNode::If {
then_body,
else_body: None,
..
} = stmt {
// Check if then_body contains break
if then_body.len() == 1 && matches!(&then_body[0], ASTNode::Break { .. }) {
return Some(idx);
}
}
}
None
}
/// Helper: Find if statement containing counter increment (escape check)
fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option<usize> {
for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
let actual_idx = after_idx + 1 + idx;
if let ASTNode::If {
then_body,
else_body: None,
..
} = stmt {
// Check if then_body contains an increment assignment
for stmt2 in then_body.iter() {
if let ASTNode::Assignment { target, value, .. } = stmt2 {
if try_extract_increment_assignment(target, value).is_some() {
return Some(actual_idx);
}
}
}
}
}
None
}
/// Helper: Find normal increment statement (assignment outside if blocks)
fn find_normal_increment(
body: &[ASTNode],
after_idx: usize,
) -> Option<(usize, String, i64)> {
for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
let actual_idx = after_idx + 1 + idx;
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((counter, delta)) = try_extract_increment_assignment(target, value) {
return Some((actual_idx, counter, delta));
}
}
}
None
}
/// Helper: Extract escape delta from if statement
fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option<i64> {
if idx < body.len() {
if let ASTNode::If {
then_body,
..
} = &body[idx] {
// Find the increment in then_body
for stmt in then_body.iter() {
if let ASTNode::Assignment { target, value, .. } = stmt {
if let Some((_, delta)) = try_extract_increment_assignment(target, value) {
return Some(delta);
}
}
}
}
}
None
}
/// Helper: Try to extract increment assignment (counter = counter (+|-) const)
fn try_extract_increment_assignment(target: &ASTNode, value: &ASTNode) -> Option<(String, i64)> {
let target_name = match target {
ASTNode::Variable { name, .. } => name.clone(),
_ => return None,
};
match value {
ASTNode::BinaryOp {
operator,
left,
right,
..
} => {
let op_multiplier = match operator {
BinaryOperator::Add => 1,
BinaryOperator::Subtract => -1,
_ => return None,
};
let left_name = match left.as_ref() {
ASTNode::Variable { name, .. } => name,
_ => return None,
};
if left_name != &target_name {
return None;
}
let const_val = match right.as_ref() {
ASTNode::Literal {
value: LiteralValue::Integer(n),
..
} => *n,
_ => return None,
};
let delta = const_val * op_multiplier;
Some((target_name, delta))
}
_ => None,
}
}

View File

@ -76,3 +76,6 @@ pub(crate) use ast_feature_extractor::{detect_parse_number_pattern, ParseNumberI
// Phase 143-P1: Re-export parse_string pattern detection for loop_canonicalizer
pub(crate) use ast_feature_extractor::{detect_parse_string_pattern, ParseStringInfo};
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
pub(crate) use ast_feature_extractor::{detect_escape_skip_pattern, EscapeSkipPatternInfo};

View File

@ -64,6 +64,9 @@ pub(crate) use joinir::{detect_continue_pattern, ContinuePatternInfo};
pub(crate) use joinir::{detect_parse_number_pattern, ParseNumberInfo};
pub(crate) use joinir::{detect_parse_string_pattern, ParseStringInfo};
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
pub(crate) use joinir::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
impl super::MirBuilder {
/// Control-flow: block
pub(super) fn cf_block(&mut self, statements: Vec<ASTNode>) -> Result<ValueId, String> {

View File

@ -8,6 +8,7 @@ use crate::mir::detect_continue_pattern;
use crate::mir::detect_parse_number_pattern as ast_detect_parse_number;
use crate::mir::detect_parse_string_pattern as ast_detect_parse_string;
use crate::mir::detect_skip_whitespace_pattern as ast_detect;
use crate::mir::detect_escape_skip_pattern as ast_detect_escape;
// ============================================================================
// Skip Whitespace Pattern (Phase 140-P4-B SSOT Wrapper)
@ -289,3 +290,44 @@ mod tests {
assert!(result.is_none());
}
}
// ============================================================================
// Escape Skip Pattern (Phase 91 P5b)
// ============================================================================
/// Try to extract escape skip pattern from loop
///
/// Phase 91 P5b: Pattern for string parsers with escape sequence support
///
/// Pattern structure:
/// ```
/// loop(i < n) {
/// // ... optional body statements
/// if ch == "\"" { break }
/// if ch == "\\" { i = i + escape_delta; ... }
/// out = out + ch
/// i = i + 1
/// }
/// ```
///
/// Returns (counter_name, normal_delta, escape_delta, quote_char, escape_char, body_stmts)
/// if pattern matches.
///
/// # Phase 91 P5b: Escape Sequence Pattern Detection
///
/// This function delegates to `ast_feature_extractor::detect_escape_skip_pattern`
/// for SSOT implementation.
pub fn try_extract_escape_skip_pattern(
body: &[ASTNode],
) -> Option<(String, i64, i64, char, char, Vec<ASTNode>)> {
ast_detect_escape(body).map(|info| {
(
info.counter_name,
info.normal_delta,
info.escape_delta,
info.quote_char,
info.escape_char,
info.body_stmts,
)
})
}

View File

@ -64,6 +64,8 @@ pub(crate) use builder::{detect_continue_pattern, ContinuePatternInfo};
pub(crate) use builder::{detect_parse_number_pattern, ParseNumberInfo};
// Phase 143-P1: Re-export parse_string pattern detection for loop_canonicalizer
pub(crate) use builder::{detect_parse_string_pattern, ParseStringInfo};
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
pub(crate) use builder::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
pub use cfg_extractor::extract_cfg_info; // Phase 154: CFG extraction
pub use definitions::{CallFlags, Callee, MirCall}; // Unified call definitions
pub use effect::{Effect, EffectMask};