feat(joinir): Phase 142 P2 Step 3-A - Pattern4 early return fail-fast

This commit is contained in:
nyash-codex
2025-12-16 13:48:30 +09:00
parent 42339ca77f
commit 2674e074b6
8 changed files with 1029 additions and 30 deletions

View File

@ -21,7 +21,7 @@ use super::skeleton_types::{
/// Canonicalize a loop AST into LoopSkeleton
///
/// Phase 143-P1: Now supports parse_string pattern in addition to skip_whitespace, parse_number, and continue
/// Phase 143-P2: Now supports parse_array pattern in addition to parse_string, skip_whitespace, parse_number, and continue
///
/// Supported patterns:
/// 1. Skip whitespace (break in ELSE clause):
@ -61,15 +61,15 @@ use super::skeleton_types::{
/// }
/// ```
///
/// 4. Parse string (both continue AND return):
/// 4. Parse string/array (both continue AND return):
/// ```
/// loop(cond) {
/// // ... body statements
/// if quote_cond {
/// if stop_cond { // quote for string, ']' for array
/// return result
/// }
/// if escape_cond {
/// // ... escape handling
/// if separator_cond { // escape for string, ',' for array
/// // ... separator handling
/// carrier = carrier + step
/// continue
/// }
@ -77,6 +77,9 @@ use super::skeleton_types::{
/// }
/// ```
///
/// Note: parse_string and parse_array share the same structural pattern
/// (continue + return exits) and are recognized by the same detector.
///
/// All other patterns return Fail-Fast with detailed reasoning.
///
/// # Arguments
@ -98,9 +101,10 @@ pub fn canonicalize_loop_expr(
_ => return Err(format!("Expected Loop node, got: {:?}", loop_expr)),
};
// Phase 143-P1: Try to extract parse_string pattern first (most specific)
// Phase 143-P1/P2: Try to extract parse_string/parse_array pattern first (most specific)
// Note: Both parse_string and parse_array share the same structure (continue + return)
if let Some((carrier_name, delta, body_stmts)) = try_extract_parse_string_pattern(body) {
// Build skeleton for parse_string pattern
// Build skeleton for parse_string/parse_array pattern
let mut skeleton = LoopSkeleton::new(span);
// Step 1: Header condition
@ -128,7 +132,7 @@ pub fn canonicalize_loop_expr(
update_kind: UpdateKind::ConstStep { delta },
});
// Set exit contract for parse_string pattern
// Set exit contract for parse_string/parse_array pattern
skeleton.exits = ExitContract {
has_break: false,
has_continue: true,
@ -307,7 +311,7 @@ pub fn canonicalize_loop_expr(
LoopSkeleton::new(span),
RoutingDecision::fail_fast(
vec![CapabilityTag::ConstStep],
"Phase 143-P1: Loop does not match skip_whitespace, parse_number, continue, or parse_string pattern"
"Phase 143-P2: Loop does not match skip_whitespace, parse_number, continue, parse_string, or parse_array pattern"
.to_string(),
),
))
@ -556,7 +560,7 @@ mod tests {
let (_, decision) = result.unwrap();
assert!(decision.is_fail_fast());
assert!(decision.notes[0].contains(
"does not match skip_whitespace, parse_number, continue, or parse_string pattern"
"does not match skip_whitespace, parse_number, continue, parse_string, or parse_array pattern"
));
}
@ -1087,6 +1091,356 @@ mod tests {
assert!(!skeleton.exits.break_has_value);
}
#[test]
fn test_parse_array_pattern_recognized() {
// Phase 143-P2: Test parse_array pattern (both continue AND return)
// Build: loop(p < len) {
// local ch = s.substring(p, p + 1)
// if ch == "]" { return 0 }
// if ch == "," { p = p + 1; continue }
// p = p + 1
// }
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Variable {
name: "len".to_string(),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![
// Body statement: local ch = s.substring(p, p + 1)
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::FunctionCall {
name: "substring".to_string(),
arguments: vec![
ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
},
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
span: Span::unknown(),
},
// Stop check: if ch == "]" { return 0 }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String("]".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Return {
value: Some(Box::new(ASTNode::Literal {
value: LiteralValue::Integer(0),
span: Span::unknown(),
})),
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
},
// Separator check: if ch == "," { p = p + 1; continue }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String(",".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
ASTNode::Continue {
span: Span::unknown(),
},
],
else_body: None,
span: Span::unknown(),
},
// Regular update: p = p + 1
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let result = canonicalize_loop_expr(&loop_node);
assert!(result.is_ok());
let (skeleton, decision) = result.unwrap();
// Verify success
assert!(decision.is_success());
// chosen == Pattern4Continue (has both continue and return)
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern4Continue));
// missing_caps == []
assert!(decision.missing_caps.is_empty());
// Verify skeleton structure
// HeaderCond + Body (ch assignment) + Update
assert!(skeleton.steps.len() >= 2);
assert!(matches!(skeleton.steps[0], SkeletonStep::HeaderCond { .. }));
// Verify carrier
assert_eq!(skeleton.carriers.len(), 1);
assert_eq!(skeleton.carriers[0].name, "p");
assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter);
match &skeleton.carriers[0].update_kind {
UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1),
_ => panic!("Expected ConstStep update"),
}
// Verify exit contract
assert!(!skeleton.exits.has_break);
assert!(skeleton.exits.has_continue);
assert!(skeleton.exits.has_return);
assert!(!skeleton.exits.break_has_value);
}
#[test]
fn test_parse_object_pattern_recognized() {
// Phase 143-P3: Test parse_object pattern (same structure as parse_array)
// Build: loop(p < len) {
// local ch = s.substring(p, p + 1)
// if ch == "}" { return 0 }
// if ch == "," { p = p + 1; continue }
// p = p + 1
// }
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Variable {
name: "len".to_string(),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![
// Body statement: local ch = s.substring(p, p + 1)
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::FunctionCall {
name: "substring".to_string(),
arguments: vec![
ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
},
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
span: Span::unknown(),
},
// Stop check: if ch == "}" { return 0 }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String("}".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Return {
value: Some(Box::new(ASTNode::Literal {
value: LiteralValue::Integer(0),
span: Span::unknown(),
})),
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
},
// Separator check: if ch == "," { p = p + 1; continue }
ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::String(",".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
ASTNode::Continue {
span: Span::unknown(),
},
],
else_body: None,
span: Span::unknown(),
},
// Regular update: p = p + 1
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let result = canonicalize_loop_expr(&loop_node);
assert!(result.is_ok());
let (skeleton, decision) = result.unwrap();
// Verify success
assert!(decision.is_success());
// chosen == Pattern4Continue (has both continue and return)
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern4Continue));
// missing_caps == []
assert!(decision.missing_caps.is_empty());
// Verify skeleton structure
// HeaderCond + Body (ch assignment) + Update
assert!(skeleton.steps.len() >= 2);
assert!(matches!(skeleton.steps[0], SkeletonStep::HeaderCond { .. }));
// Verify carrier
assert_eq!(skeleton.carriers.len(), 1);
assert_eq!(skeleton.carriers[0].name, "p");
assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter);
match &skeleton.carriers[0].update_kind {
UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1),
_ => panic!("Expected ConstStep update"),
}
// Verify exit contract
assert!(!skeleton.exits.has_break);
assert!(skeleton.exits.has_continue);
assert!(skeleton.exits.has_return);
assert!(!skeleton.exits.break_has_value);
}
#[test]
fn test_parse_number_pattern_recognized() {
// Phase 143-P0: Test parse_number pattern (break in THEN clause)

View File

@ -77,34 +77,35 @@ pub fn try_extract_parse_number_pattern(
}
// ============================================================================
// Parse String Pattern (Phase 143-P1)
// Parse String/Array Pattern (Phase 143-P1/P2)
// ============================================================================
/// Try to extract parse_string pattern from loop
/// Try to extract parse_string or parse_array pattern from loop
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... body statements (ch computation)
/// if quote_cond {
/// if stop_cond { // quote for string, ']' for array
/// return result
/// }
/// if escape_cond {
/// // ... escape handling
/// if separator_cond { // escape for string, ',' for array
/// // ... separator handling
/// carrier = carrier + const
/// continue
/// }
/// // ... regular character handling
/// // ... regular processing
/// carrier = carrier + const
/// }
/// ```
///
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
///
/// # Phase 143-P1: Parse String Pattern Detection
/// # Phase 143-P1/P2: Parse String/Array Pattern Detection
///
/// This function delegates to `ast_feature_extractor::detect_parse_string_pattern`
/// for SSOT implementation.
/// for SSOT implementation. The same detector handles both parse_string and
/// parse_array patterns as they share the same structural characteristics.
pub fn try_extract_parse_string_pattern(body: &[ASTNode]) -> Option<(String, i64, Vec<ASTNode>)> {
ast_detect_parse_string(body).map(|info| (info.carrier_name, info.delta, info.body_stmts))
}