feat(joinir): Phase 142 P2 Step 3-A - Pattern4 early return fail-fast

This commit is contained in:
nyash-codex
2025-12-16 13:48:30 +09:00
parent 42339ca77f
commit 2674e074b6
8 changed files with 1029 additions and 30 deletions

View File

@ -678,12 +678,13 @@ pub fn detect_parse_number_pattern(body: &[ASTNode]) -> Option<ParseNumberInfo>
}
// ============================================================================
// Phase 143-P1: Parse String Pattern Detection
// Phase 143-P1/P2: Parse String/Array Pattern Detection
// ============================================================================
/// Parse string pattern information
/// Parse string/array pattern information
///
/// This struct holds the extracted information from a recognized parse_string pattern.
/// This struct holds the extracted information from a recognized parse_string or parse_array pattern.
/// Both patterns share the same structure: continue + return exits with carrier updates.
#[derive(Debug, Clone, PartialEq)]
pub struct ParseStringInfo {
/// Carrier variable name (e.g., "p")
@ -694,11 +695,11 @@ pub struct ParseStringInfo {
pub body_stmts: Vec<ASTNode>,
}
/// Detect parse_string pattern in loop body
/// Detect parse_string or parse_array pattern in loop body
///
/// Phase 143-P1: Pattern with both continue (escape handling) AND return (quote found)
/// Phase 143-P1/P2: Pattern with both continue (escape/separator handling) AND return (stop condition)
///
/// Pattern structure:
/// Pattern structure (parse_string example):
/// ```
/// loop(p < len) {
/// local ch = s.substring(p, p + 1)
@ -725,10 +726,34 @@ pub struct ParseStringInfo {
/// }
/// ```
///
/// Pattern structure (parse_array example):
/// ```
/// loop(p < len) {
/// local ch = s.substring(p, p + 1)
///
/// // Check for array end (return)
/// if ch == "]" {
/// return result
/// }
///
/// // Check for separator (continue after processing)
/// if ch == "," {
/// arr.push(elem)
/// elem = ""
/// p = p + 1
/// continue
/// }
///
/// // Accumulate element
/// elem = elem + ch
/// p = p + 1
/// }
/// ```
///
/// Recognized characteristics:
/// - Has return statement (early exit on quote)
/// - Has continue statement (skip after escape processing)
/// - Variable step update (p++ normally, but p+=2 on escape)
/// - Has return statement (early exit on stop condition: quote for string, ']' for array)
/// - Has continue statement (skip after separator: escape for string, ',' for array)
/// - Variable step update (p++ normally, but p+=2 on escape for string)
///
/// # Arguments
///
@ -740,10 +765,11 @@ pub struct ParseStringInfo {
///
/// # Notes
///
/// This is more complex than parse_number or continue patterns due to:
/// This detector handles both parse_string and parse_array patterns as they share
/// the same structural characteristics:
/// - Multiple exit types (return AND continue)
/// - Variable step increment (conditional on escape sequence)
/// - Nested control flow (escape has nested if inside)
/// - Variable step increment (conditional on separator/escape)
/// - Nested control flow (separator/escape has nested if inside)
pub fn detect_parse_string_pattern(body: &[ASTNode]) -> Option<ParseStringInfo> {
if body.is_empty() {
return None;

View File

@ -39,6 +39,49 @@ use crate::mir::loop_pattern_detection::error_messages;
use crate::mir::ValueId;
use std::collections::BTreeMap;
/// Phase 142 P2: Detect return statements in loop body
///
/// This is a helper function for Fail-Fast behavior when return statements
/// are detected in Pattern4 (continue) loops, which are not yet fully supported.
///
/// # Arguments
///
/// * `body` - Loop body statements to scan
///
/// # Returns
///
/// `true` if at least one return statement is found in the body
fn has_return_in_body(body: &[ASTNode]) -> bool {
for stmt in body {
if has_return_node(stmt) {
return true;
}
}
false
}
/// Helper: Recursively check if node or its children contain return
fn has_return_node(node: &ASTNode) -> bool {
match node {
ASTNode::Return { .. } => true,
ASTNode::If {
then_body,
else_body,
..
} => {
then_body.iter().any(|n| has_return_node(n))
|| else_body
.as_ref()
.map_or(false, |body| body.iter().any(|n| has_return_node(n)))
}
ASTNode::Loop { body, .. } => {
// Nested loops: scan recursively (though not common in our patterns)
body.iter().any(|n| has_return_node(n))
}
_ => false,
}
}
/// Phase 194+: Detection function for Pattern 4
///
/// Phase 192: Updated to use pattern_kind for consistency
@ -101,6 +144,16 @@ pub(crate) fn lower(
builder: &mut MirBuilder,
ctx: &super::router::LoopPatternContext,
) -> Result<Option<ValueId>, String> {
// Phase 142 P2: Check for return statements (not yet supported)
if has_return_in_body(ctx.body) {
return Err(
"[Pattern4] Early return is not yet supported in continue loops. \
This will be implemented in Phase 142 P2. \
Pattern: loop with both continue and return statements."
.to_string(),
);
}
// Phase 33-19: Connect stub to actual implementation
builder.cf_loop_pattern4_with_continue(ctx.condition, ctx.body, ctx.func_name, ctx.debug)
}

View File

@ -21,7 +21,7 @@ use super::skeleton_types::{
/// Canonicalize a loop AST into LoopSkeleton
///
/// Phase 143-P1: Now supports parse_string pattern in addition to skip_whitespace, parse_number, and continue
/// Phase 143-P2: Now supports parse_array pattern in addition to parse_string, skip_whitespace, parse_number, and continue
///
/// Supported patterns:
/// 1. Skip whitespace (break in ELSE clause):
@ -61,15 +61,15 @@ use super::skeleton_types::{
/// }
/// ```
///
/// 4. Parse string (both continue AND return):
/// 4. Parse string/array (both continue AND return):
/// ```
/// loop(cond) {
/// // ... body statements
/// if quote_cond {
/// if stop_cond { // quote for string, ']' for array
/// return result
/// }
/// if escape_cond {
/// // ... escape handling
/// if separator_cond { // escape for string, ',' for array
/// // ... separator handling
/// carrier = carrier + step
/// continue
/// }
@ -77,6 +77,9 @@ use super::skeleton_types::{
/// }
/// ```
///
/// Note: parse_string and parse_array share the same structural pattern
/// (continue + return exits) and are recognized by the same detector.
///
/// All other patterns return Fail-Fast with detailed reasoning.
///
/// # Arguments
@ -98,9 +101,10 @@ pub fn canonicalize_loop_expr(
_ => return Err(format!("Expected Loop node, got: {:?}", loop_expr)),
};
// Phase 143-P1: Try to extract parse_string pattern first (most specific)
// Phase 143-P1/P2: Try to extract parse_string/parse_array pattern first (most specific)
// Note: Both parse_string and parse_array share the same structure (continue + return)
if let Some((carrier_name, delta, body_stmts)) = try_extract_parse_string_pattern(body) {
// Build skeleton for parse_string pattern
// Build skeleton for parse_string/parse_array pattern
let mut skeleton = LoopSkeleton::new(span);
// Step 1: Header condition
@ -128,7 +132,7 @@ pub fn canonicalize_loop_expr(
update_kind: UpdateKind::ConstStep { delta },
});
// Set exit contract for parse_string pattern
// Set exit contract for parse_string/parse_array pattern
skeleton.exits = ExitContract {
has_break: false,
has_continue: true,
@ -307,7 +311,7 @@ pub fn canonicalize_loop_expr(
LoopSkeleton::new(span),
RoutingDecision::fail_fast(
vec![CapabilityTag::ConstStep],
"Phase 143-P1: Loop does not match skip_whitespace, parse_number, continue, or parse_string pattern"
"Phase 143-P2: Loop does not match skip_whitespace, parse_number, continue, parse_string, or parse_array pattern"
.to_string(),
),
))
@ -556,7 +560,7 @@ mod tests {
let (_, decision) = result.unwrap();
assert!(decision.is_fail_fast());
assert!(decision.notes[0].contains(
"does not match skip_whitespace, parse_number, continue, or parse_string pattern"
"does not match skip_whitespace, parse_number, continue, parse_string, or parse_array pattern"
));
}
@ -1087,6 +1091,356 @@ mod tests {
assert!(!skeleton.exits.break_has_value);
}
#[test]
fn test_parse_array_pattern_recognized() {
// Phase 143-P2: Test parse_array pattern (both continue AND return)
// Build: loop(p < len) {
// local ch = s.substring(p, p + 1)
// if ch == "]" { return 0 }
// if ch == "," { p = p + 1; continue }
// p = p + 1
// }
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Variable {
name: "len".to_string(),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![
// Body statement: local ch = s.substring(p, p + 1)
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::FunctionCall {
name: "substring".to_string(),
arguments: vec![
ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
},
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
span: Span::unknown(),
},
// Stop check: if ch == "]" { return 0 }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String("]".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Return {
value: Some(Box::new(ASTNode::Literal {
value: LiteralValue::Integer(0),
span: Span::unknown(),
})),
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
},
// Separator check: if ch == "," { p = p + 1; continue }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String(",".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
ASTNode::Continue {
span: Span::unknown(),
},
],
else_body: None,
span: Span::unknown(),
},
// Regular update: p = p + 1
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let result = canonicalize_loop_expr(&loop_node);
assert!(result.is_ok());
let (skeleton, decision) = result.unwrap();
// Verify success
assert!(decision.is_success());
// chosen == Pattern4Continue (has both continue and return)
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern4Continue));
// missing_caps == []
assert!(decision.missing_caps.is_empty());
// Verify skeleton structure
// HeaderCond + Body (ch assignment) + Update
assert!(skeleton.steps.len() >= 2);
assert!(matches!(skeleton.steps[0], SkeletonStep::HeaderCond { .. }));
// Verify carrier
assert_eq!(skeleton.carriers.len(), 1);
assert_eq!(skeleton.carriers[0].name, "p");
assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter);
match &skeleton.carriers[0].update_kind {
UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1),
_ => panic!("Expected ConstStep update"),
}
// Verify exit contract
assert!(!skeleton.exits.has_break);
assert!(skeleton.exits.has_continue);
assert!(skeleton.exits.has_return);
assert!(!skeleton.exits.break_has_value);
}
#[test]
fn test_parse_object_pattern_recognized() {
// Phase 143-P3: Test parse_object pattern (same structure as parse_array)
// Build: loop(p < len) {
// local ch = s.substring(p, p + 1)
// if ch == "}" { return 0 }
// if ch == "," { p = p + 1; continue }
// p = p + 1
// }
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Variable {
name: "len".to_string(),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![
// Body statement: local ch = s.substring(p, p + 1)
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::FunctionCall {
name: "substring".to_string(),
arguments: vec![
ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
},
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
span: Span::unknown(),
},
// Stop check: if ch == "}" { return 0 }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String("}".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Return {
value: Some(Box::new(ASTNode::Literal {
value: LiteralValue::Integer(0),
span: Span::unknown(),
})),
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
},
// Separator check: if ch == "," { p = p + 1; continue }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String(",".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
ASTNode::Continue {
span: Span::unknown(),
},
],
else_body: None,
span: Span::unknown(),
},
// Regular update: p = p + 1
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let result = canonicalize_loop_expr(&loop_node);
assert!(result.is_ok());
let (skeleton, decision) = result.unwrap();
// Verify success
assert!(decision.is_success());
// chosen == Pattern4Continue (has both continue and return)
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern4Continue));
// missing_caps == []
assert!(decision.missing_caps.is_empty());
// Verify skeleton structure
// HeaderCond + Body (ch assignment) + Update
assert!(skeleton.steps.len() >= 2);
assert!(matches!(skeleton.steps[0], SkeletonStep::HeaderCond { .. }));
// Verify carrier
assert_eq!(skeleton.carriers.len(), 1);
assert_eq!(skeleton.carriers[0].name, "p");
assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter);
match &skeleton.carriers[0].update_kind {
UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1),
_ => panic!("Expected ConstStep update"),
}
// Verify exit contract
assert!(!skeleton.exits.has_break);
assert!(skeleton.exits.has_continue);
assert!(skeleton.exits.has_return);
assert!(!skeleton.exits.break_has_value);
}
#[test]
fn test_parse_number_pattern_recognized() {
// Phase 143-P0: Test parse_number pattern (break in THEN clause)

View File

@ -77,34 +77,35 @@ pub fn try_extract_parse_number_pattern(
}
// ============================================================================
// Parse String Pattern (Phase 143-P1)
// Parse String/Array Pattern (Phase 143-P1/P2)
// ============================================================================
/// Try to extract parse_string pattern from loop
/// Try to extract parse_string or parse_array pattern from loop
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... body statements (ch computation)
/// if quote_cond {
/// if stop_cond { // quote for string, ']' for array
/// return result
/// }
/// if escape_cond {
/// // ... escape handling
/// if separator_cond { // escape for string, ',' for array
/// // ... separator handling
/// carrier = carrier + const
/// continue
/// }
/// // ... regular character handling
/// // ... regular processing
/// carrier = carrier + const
/// }
/// ```
///
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
///
/// # Phase 143-P1: Parse String Pattern Detection
/// # Phase 143-P1/P2: Parse String/Array Pattern Detection
///
/// This function delegates to `ast_feature_extractor::detect_parse_string_pattern`
/// for SSOT implementation.
/// for SSOT implementation. The same detector handles both parse_string and
/// parse_array patterns as they share the same structural characteristics.
pub fn try_extract_parse_string_pattern(body: &[ASTNode]) -> Option<(String, i64, Vec<ASTNode>)> {
ast_detect_parse_string(body).map(|info| (info.carrier_name, info.delta, info.body_stmts))
}