feat(phase-91): Step 2-A完了 - AST recognizer & re-export chain
## Step 2-A: AST Recognizer (detect_escape_skip_pattern) - 追加ファイル: src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs - EscapeSkipPatternInfo 構造体定義 - detect_escape_skip_pattern() メイン関数 (MVP実装) - Helper関数: find_break_in_if, find_escape_in_if, find_normal_increment等 ## Step 2-B: Re-export Chain (SSOT準備) - 5ファイルで re-export を追加: 1. src/mir/builder/control_flow/joinir/patterns/mod.rs 2. src/mir/builder/control_flow/joinir/mod.rs 3. src/mir/builder/control_flow/mod.rs 4. src/mir/builder.rs 5. src/mir/mod.rs 6. src/mir/loop_canonicalizer/pattern_recognizer.rs ## Pattern Recognizer Wrapper - try_extract_escape_skip_pattern() を pattern_recognizer.rs に追加 - 既存パターン(skip_whitespace等)に倣う設計 ## Phase 91 MVP Design - Quote/escape char は期待値("と\)にハードコード(Phase 91 MVP) - Normal delta は常に1を期待 - Escape delta は AST から抽出 ## Test Results ✅ cargo build --release: 成功 ✅ cargo test --release --lib: 1061/1061 PASS - 退行なし ## 次: Step 2-B本体 (Canonicalizer統合) - canonicalizer.rs に detect_escape_skip_pattern() 統合 - LoopSkeleton & RoutingDecision を構築 - Pattern2Break に寄せる 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -38,6 +38,8 @@ pub(crate) use control_flow::{detect_continue_pattern, ContinuePatternInfo};
|
||||
// Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer
|
||||
pub(crate) use control_flow::{detect_parse_number_pattern, ParseNumberInfo};
|
||||
pub(crate) use control_flow::{detect_parse_string_pattern, ParseStringInfo};
|
||||
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
|
||||
pub(crate) use control_flow::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
|
||||
mod exprs_lambda; // lambda lowering
|
||||
mod exprs_peek; // peek expression
|
||||
mod exprs_qmark; // ?-propagate
|
||||
|
||||
@ -25,3 +25,6 @@ pub(crate) use patterns::{detect_continue_pattern, ContinuePatternInfo};
|
||||
// Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer
|
||||
pub(crate) use patterns::{detect_parse_number_pattern, ParseNumberInfo};
|
||||
pub(crate) use patterns::{detect_parse_string_pattern, ParseStringInfo};
|
||||
|
||||
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
|
||||
pub(crate) use patterns::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
|
||||
|
||||
@ -1038,3 +1038,219 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option<SkipWhitespace
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 91 P5b: Escape Sequence Handling Pattern Info
|
||||
///
|
||||
/// Information extracted from escape skip pattern in loop body.
|
||||
/// Stores counter name, normal delta, escape delta, and character constants.
|
||||
pub struct EscapeSkipPatternInfo {
|
||||
/// Counter variable name (e.g., "i")
|
||||
pub counter_name: String,
|
||||
/// Normal increment delta (usually 1: i = i + 1)
|
||||
pub normal_delta: i64,
|
||||
/// Escape skip delta (usually 2: i = i + 2 from +1 in escape if + final +1)
|
||||
pub escape_delta: i64,
|
||||
/// Quote/boundary character (usually '"' or '\'')
|
||||
pub quote_char: char,
|
||||
/// Escape character (usually '\\')
|
||||
pub escape_char: char,
|
||||
/// Body statements before the break check
|
||||
pub body_stmts: Vec<ASTNode>,
|
||||
}
|
||||
|
||||
/// Detect escape sequence handling pattern in loop body
|
||||
///
|
||||
/// Phase 91 P5b: Pattern for string parsers with escape sequence support.
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(i < n) {
|
||||
/// // ... optional body statements (Body)
|
||||
/// if ch == "\"" {
|
||||
/// break
|
||||
/// }
|
||||
/// if ch == "\\" {
|
||||
/// i = i + escape_delta // e.g., +1 (total with final +1 = +2)
|
||||
/// // optional: ch = s.substring(i, i+1)
|
||||
/// }
|
||||
/// out = out + ch // Accumulator pattern
|
||||
/// i = i + 1 // Normal increment (always +1)
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `body` - Loop body statements to analyze
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(EscapeSkipPatternInfo)` if the pattern matches, `None` otherwise
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// This is the recognizer for P5b (Escape Sequence Handling).
|
||||
/// Used by loop_canonicalizer (Phase 91) for pattern detection and decision routing.
|
||||
pub fn detect_escape_skip_pattern(body: &[ASTNode]) -> Option<EscapeSkipPatternInfo> {
|
||||
if body.len() < 4 {
|
||||
return None; // Need at least: break check, escape check, accumulator, normal increment
|
||||
}
|
||||
|
||||
// Phase 91 P5b Strategy:
|
||||
// This is a simplified recognizer for escape sequence handling in string parsers.
|
||||
// For now, we detect the minimum viable pattern:
|
||||
// 1. Break check: if ch == quote_char { break }
|
||||
// 2. Escape check: if ch == escape_char { counter = counter + escape_delta }
|
||||
// 3. Normal increment: counter = counter + 1 (outside if blocks)
|
||||
//
|
||||
// Note: We rely on the order and pattern matching. If a loop body
|
||||
// matches this exact structure, we recognize it as P5b.
|
||||
|
||||
// Find break statement - scan for "if { ... break ... }"
|
||||
let break_idx = find_break_in_if(body)?;
|
||||
|
||||
// Find escape check after break - scan for second "if" with increment
|
||||
let escape_idx = find_escape_in_if(body, break_idx)?;
|
||||
|
||||
// Find normal increment after escape if
|
||||
let (normal_incr_idx, counter_name, normal_delta) = find_normal_increment(body, escape_idx)?;
|
||||
|
||||
// For P5b, we need a consistent pattern. Extract minimal info:
|
||||
// - counter_name (from normal increment)
|
||||
// - normal_delta (should be 1)
|
||||
// - escape_delta (from escape if block)
|
||||
// - quote_char and escape_char (extracted from if conditions - for now, use defaults)
|
||||
|
||||
let escape_delta = extract_escape_delta_from_if(body, escape_idx)?;
|
||||
|
||||
// Extract body statements before break check
|
||||
let body_stmts = body[..break_idx].to_vec();
|
||||
|
||||
Some(EscapeSkipPatternInfo {
|
||||
counter_name,
|
||||
normal_delta,
|
||||
escape_delta,
|
||||
quote_char: '"', // Default for JSON/CSV (Phase 91 MVP)
|
||||
escape_char: '\\', // Default for JSON/CSV (Phase 91 MVP)
|
||||
body_stmts,
|
||||
})
|
||||
}
|
||||
|
||||
/// Helper: Find if statement containing break
|
||||
fn find_break_in_if(body: &[ASTNode]) -> Option<usize> {
|
||||
for (idx, stmt) in body.iter().enumerate() {
|
||||
if let ASTNode::If {
|
||||
then_body,
|
||||
else_body: None,
|
||||
..
|
||||
} = stmt {
|
||||
// Check if then_body contains break
|
||||
if then_body.len() == 1 && matches!(&then_body[0], ASTNode::Break { .. }) {
|
||||
return Some(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Helper: Find if statement containing counter increment (escape check)
|
||||
fn find_escape_in_if(body: &[ASTNode], after_idx: usize) -> Option<usize> {
|
||||
for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
|
||||
let actual_idx = after_idx + 1 + idx;
|
||||
if let ASTNode::If {
|
||||
then_body,
|
||||
else_body: None,
|
||||
..
|
||||
} = stmt {
|
||||
// Check if then_body contains an increment assignment
|
||||
for stmt2 in then_body.iter() {
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt2 {
|
||||
if try_extract_increment_assignment(target, value).is_some() {
|
||||
return Some(actual_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Helper: Find normal increment statement (assignment outside if blocks)
|
||||
fn find_normal_increment(
|
||||
body: &[ASTNode],
|
||||
after_idx: usize,
|
||||
) -> Option<(usize, String, i64)> {
|
||||
for (idx, stmt) in body[(after_idx + 1)..].iter().enumerate() {
|
||||
let actual_idx = after_idx + 1 + idx;
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt {
|
||||
if let Some((counter, delta)) = try_extract_increment_assignment(target, value) {
|
||||
return Some((actual_idx, counter, delta));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Helper: Extract escape delta from if statement
|
||||
fn extract_escape_delta_from_if(body: &[ASTNode], idx: usize) -> Option<i64> {
|
||||
if idx < body.len() {
|
||||
if let ASTNode::If {
|
||||
then_body,
|
||||
..
|
||||
} = &body[idx] {
|
||||
// Find the increment in then_body
|
||||
for stmt in then_body.iter() {
|
||||
if let ASTNode::Assignment { target, value, .. } = stmt {
|
||||
if let Some((_, delta)) = try_extract_increment_assignment(target, value) {
|
||||
return Some(delta);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
|
||||
/// Helper: Try to extract increment assignment (counter = counter (+|-) const)
|
||||
fn try_extract_increment_assignment(target: &ASTNode, value: &ASTNode) -> Option<(String, i64)> {
|
||||
let target_name = match target {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
match value {
|
||||
ASTNode::BinaryOp {
|
||||
operator,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
let op_multiplier = match operator {
|
||||
BinaryOperator::Add => 1,
|
||||
BinaryOperator::Subtract => -1,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if left_name != &target_name {
|
||||
return None;
|
||||
}
|
||||
|
||||
let const_val = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let delta = const_val * op_multiplier;
|
||||
Some((target_name, delta))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,3 +76,6 @@ pub(crate) use ast_feature_extractor::{detect_parse_number_pattern, ParseNumberI
|
||||
|
||||
// Phase 143-P1: Re-export parse_string pattern detection for loop_canonicalizer
|
||||
pub(crate) use ast_feature_extractor::{detect_parse_string_pattern, ParseStringInfo};
|
||||
|
||||
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
|
||||
pub(crate) use ast_feature_extractor::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
|
||||
|
||||
@ -64,6 +64,9 @@ pub(crate) use joinir::{detect_continue_pattern, ContinuePatternInfo};
|
||||
pub(crate) use joinir::{detect_parse_number_pattern, ParseNumberInfo};
|
||||
pub(crate) use joinir::{detect_parse_string_pattern, ParseStringInfo};
|
||||
|
||||
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
|
||||
pub(crate) use joinir::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
|
||||
|
||||
impl super::MirBuilder {
|
||||
/// Control-flow: block
|
||||
pub(super) fn cf_block(&mut self, statements: Vec<ASTNode>) -> Result<ValueId, String> {
|
||||
|
||||
@ -8,6 +8,7 @@ use crate::mir::detect_continue_pattern;
|
||||
use crate::mir::detect_parse_number_pattern as ast_detect_parse_number;
|
||||
use crate::mir::detect_parse_string_pattern as ast_detect_parse_string;
|
||||
use crate::mir::detect_skip_whitespace_pattern as ast_detect;
|
||||
use crate::mir::detect_escape_skip_pattern as ast_detect_escape;
|
||||
|
||||
// ============================================================================
|
||||
// Skip Whitespace Pattern (Phase 140-P4-B SSOT Wrapper)
|
||||
@ -289,3 +290,44 @@ mod tests {
|
||||
assert!(result.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Escape Skip Pattern (Phase 91 P5b)
|
||||
// ============================================================================
|
||||
|
||||
/// Try to extract escape skip pattern from loop
|
||||
///
|
||||
/// Phase 91 P5b: Pattern for string parsers with escape sequence support
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(i < n) {
|
||||
/// // ... optional body statements
|
||||
/// if ch == "\"" { break }
|
||||
/// if ch == "\\" { i = i + escape_delta; ... }
|
||||
/// out = out + ch
|
||||
/// i = i + 1
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Returns (counter_name, normal_delta, escape_delta, quote_char, escape_char, body_stmts)
|
||||
/// if pattern matches.
|
||||
///
|
||||
/// # Phase 91 P5b: Escape Sequence Pattern Detection
|
||||
///
|
||||
/// This function delegates to `ast_feature_extractor::detect_escape_skip_pattern`
|
||||
/// for SSOT implementation.
|
||||
pub fn try_extract_escape_skip_pattern(
|
||||
body: &[ASTNode],
|
||||
) -> Option<(String, i64, i64, char, char, Vec<ASTNode>)> {
|
||||
ast_detect_escape(body).map(|info| {
|
||||
(
|
||||
info.counter_name,
|
||||
info.normal_delta,
|
||||
info.escape_delta,
|
||||
info.quote_char,
|
||||
info.escape_char,
|
||||
info.body_stmts,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@ -64,6 +64,8 @@ pub(crate) use builder::{detect_continue_pattern, ContinuePatternInfo};
|
||||
pub(crate) use builder::{detect_parse_number_pattern, ParseNumberInfo};
|
||||
// Phase 143-P1: Re-export parse_string pattern detection for loop_canonicalizer
|
||||
pub(crate) use builder::{detect_parse_string_pattern, ParseStringInfo};
|
||||
// Phase 91 P5b: Re-export escape skip pattern detection for loop_canonicalizer
|
||||
pub(crate) use builder::{detect_escape_skip_pattern, EscapeSkipPatternInfo};
|
||||
pub use cfg_extractor::extract_cfg_info; // Phase 154: CFG extraction
|
||||
pub use definitions::{CallFlags, Callee, MirCall}; // Unified call definitions
|
||||
pub use effect::{Effect, EffectMask};
|
||||
|
||||
Reference in New Issue
Block a user