feat(mir): Loop Canonicalizer Phase 3 - skip_whitespace pattern recognition
## Summary
skip_whitespace パターンを Skeleton→Decision で認識可能に。
dev-only 観測で chosen=Pattern3IfPhi / missing_caps=[] を固定。
## Changes
- src/mir/loop_canonicalizer/mod.rs:
- try_extract_skip_whitespace_pattern() 追加
- loop(cond) { ... if check { p = p + 1 } else { break } } パターン認識
- carrier name, delta, body statements を抽出
- canonicalize_loop_expr() 拡張(skip_whitespace 対応)
- Pattern3IfPhi 成功時は RoutingDecision::success 返却
- Skeleton に HeaderCond, Body, Update ステップ追加
- CarrierSlot に Counter role 設定
- ExitContract に has_break=true 設定
- Phase 3 unit tests 追加
- test_skip_whitespace_pattern_recognition: 基本パターン
- test_skip_whitespace_with_body_statements: body 付きパターン
- test_skip_whitespace_fails_without_else: else なし失敗
- test_skip_whitespace_fails_with_wrong_delta: 減算パターン失敗
- Phase 2 obsolete tests 削除
- src/mir/builder/control_flow/joinir/routing.rs:
- Debug 出力拡張(chosen pattern 表示)
## Tests
- cargo test --release --lib loop_canonicalizer::tests: PASS(11 tests)
- cargo test --release --lib: PASS(1044 tests, 退行なし)
- HAKO_JOINIR_DEBUG=1 test_pattern3_skip_whitespace.hako:
- chosen=Pattern3IfPhi ✅
- missing_caps=[] ✅
## Validation
- ✅ dev-only 観測(HAKO_JOINIR_DEBUG=1)のときだけログ出力
- ✅ フラグ OFF 時は完全不変
- ✅ skip_whitespace パターンで SUCCESS 固定
- ✅ unit tests で全パターン固定
Phase 137-3 complete
This commit is contained in:
@ -326,14 +326,116 @@ impl std::fmt::Display for CarrierRole {
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 2: Canonicalization Entry Point
|
||||
// Phase 3: Pattern Recognition Helpers
|
||||
// ============================================================================
|
||||
|
||||
/// Canonicalize a loop AST into LoopSkeleton (Phase 2: Minimal Implementation)
|
||||
/// Try to extract skip_whitespace pattern from loop
|
||||
///
|
||||
/// Pattern structure:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements (Body)
|
||||
/// if check_cond {
|
||||
/// carrier = carrier + const
|
||||
/// } else {
|
||||
/// break
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
|
||||
fn try_extract_skip_whitespace_pattern(
|
||||
body: &[ASTNode],
|
||||
) -> Option<(String, i64, Vec<ASTNode>)> {
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Last statement must be if-else with break
|
||||
let last_stmt = &body[body.len() - 1];
|
||||
|
||||
let (then_body, else_body) = match last_stmt {
|
||||
ASTNode::If {
|
||||
then_body,
|
||||
else_body: Some(else_body),
|
||||
..
|
||||
} => (then_body, else_body),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Then branch must be single assignment: carrier = carrier + const
|
||||
if then_body.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (carrier_name, delta) = match &then_body[0] {
|
||||
ASTNode::Assignment { target, value, .. } => {
|
||||
// Extract target variable name
|
||||
let target_name = match target.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name.clone(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Value must be: target + const
|
||||
match value.as_ref() {
|
||||
ASTNode::BinaryOp {
|
||||
operator: crate::ast::BinaryOperator::Add,
|
||||
left,
|
||||
right,
|
||||
..
|
||||
} => {
|
||||
// Left must be same variable
|
||||
let left_name = match left.as_ref() {
|
||||
ASTNode::Variable { name, .. } => name,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if left_name != &target_name {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Right must be integer literal
|
||||
let delta = match right.as_ref() {
|
||||
ASTNode::Literal {
|
||||
value: crate::ast::LiteralValue::Integer(n),
|
||||
..
|
||||
} => *n,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
(target_name, delta)
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
// Else branch must be single break
|
||||
if else_body.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
match &else_body[0] {
|
||||
ASTNode::Break { .. } => {
|
||||
// Success! Extract body statements (all except last if)
|
||||
let body_stmts = body[..body.len() - 1].to_vec();
|
||||
Some((carrier_name, delta, body_stmts))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 3: Canonicalization Entry Point
|
||||
// ============================================================================
|
||||
|
||||
/// Canonicalize a loop AST into LoopSkeleton (Phase 3: skip_whitespace pattern recognition)
|
||||
///
|
||||
/// Currently supports only the skip_whitespace pattern:
|
||||
/// ```
|
||||
/// loop(cond) {
|
||||
/// // ... optional body statements
|
||||
/// if check_cond {
|
||||
/// carrier = carrier + step
|
||||
/// } else {
|
||||
@ -363,66 +465,55 @@ pub fn canonicalize_loop_expr(
|
||||
_ => return Err(format!("Expected Loop node, got: {:?}", loop_expr)),
|
||||
};
|
||||
|
||||
// Phase 2: Minimal implementation - detect skip_whitespace pattern only
|
||||
// Pattern: loop(cond) { if check { update } else { break } }
|
||||
// Phase 3: Try to extract skip_whitespace pattern
|
||||
if let Some((carrier_name, delta, body_stmts)) = try_extract_skip_whitespace_pattern(body) {
|
||||
// Build skeleton for skip_whitespace pattern
|
||||
let mut skeleton = LoopSkeleton::new(span);
|
||||
|
||||
// Check for minimal pattern: single if-else with break
|
||||
if body.len() != 1 {
|
||||
return Ok((
|
||||
LoopSkeleton::new(span),
|
||||
RoutingDecision::fail_fast(
|
||||
vec![capability_tags::CAP_MISSING_SINGLE_BREAK],
|
||||
format!("Phase 2: Only single-statement loops supported (got {} statements)", body.len()),
|
||||
),
|
||||
));
|
||||
// Step 1: Header condition
|
||||
skeleton.steps.push(SkeletonStep::HeaderCond {
|
||||
expr: Box::new(condition.clone()),
|
||||
});
|
||||
|
||||
// Step 2: Body statements (if any)
|
||||
if !body_stmts.is_empty() {
|
||||
skeleton.steps.push(SkeletonStep::Body {
|
||||
stmts: body_stmts,
|
||||
});
|
||||
}
|
||||
|
||||
// Step 3: Update step
|
||||
skeleton.steps.push(SkeletonStep::Update {
|
||||
carrier_name: carrier_name.clone(),
|
||||
update_kind: UpdateKind::ConstStep { delta },
|
||||
});
|
||||
|
||||
// Add carrier slot
|
||||
skeleton.carriers.push(CarrierSlot {
|
||||
name: carrier_name,
|
||||
role: CarrierRole::Counter,
|
||||
update_kind: UpdateKind::ConstStep { delta },
|
||||
});
|
||||
|
||||
// Set exit contract
|
||||
skeleton.exits = ExitContract {
|
||||
has_break: true,
|
||||
has_continue: false,
|
||||
has_return: false,
|
||||
break_has_value: false,
|
||||
};
|
||||
|
||||
// Success! Return Pattern3WithIfPhi
|
||||
let decision = RoutingDecision::success(LoopPatternKind::Pattern3IfPhi);
|
||||
return Ok((skeleton, decision));
|
||||
}
|
||||
|
||||
// Check if it's an if-else statement
|
||||
let _if_stmt = match &body[0] {
|
||||
ASTNode::If {
|
||||
condition: _if_cond,
|
||||
then_body: _then_body,
|
||||
else_body,
|
||||
..
|
||||
} => {
|
||||
// Must have else branch
|
||||
if else_body.is_none() {
|
||||
return Ok((
|
||||
LoopSkeleton::new(span),
|
||||
RoutingDecision::fail_fast(
|
||||
vec![capability_tags::CAP_MISSING_SINGLE_BREAK],
|
||||
"Phase 2: If statement must have else branch".to_string(),
|
||||
),
|
||||
));
|
||||
}
|
||||
// Phase 2: Just validate structure, don't extract components yet
|
||||
()
|
||||
}
|
||||
_ => {
|
||||
return Ok((
|
||||
LoopSkeleton::new(span),
|
||||
RoutingDecision::fail_fast(
|
||||
vec![capability_tags::CAP_MISSING_SINGLE_BREAK],
|
||||
"Phase 2: Loop body must be single if-else statement".to_string(),
|
||||
),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// Build minimal skeleton
|
||||
let mut skeleton = LoopSkeleton::new(span);
|
||||
|
||||
// Add header condition
|
||||
skeleton.steps.push(SkeletonStep::HeaderCond {
|
||||
expr: Box::new(condition.clone()),
|
||||
});
|
||||
|
||||
// For now, just mark as unsupported - full pattern detection will come in Phase 3
|
||||
// Pattern not recognized - fail fast
|
||||
Ok((
|
||||
skeleton,
|
||||
LoopSkeleton::new(span),
|
||||
RoutingDecision::fail_fast(
|
||||
vec![capability_tags::CAP_MISSING_CONST_STEP],
|
||||
"Phase 2: Pattern detection not yet implemented".to_string(),
|
||||
"Phase 3: Loop does not match skip_whitespace pattern".to_string(),
|
||||
),
|
||||
))
|
||||
}
|
||||
@ -534,22 +625,60 @@ mod tests {
|
||||
assert!(result.unwrap_err().contains("Expected Loop node"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_minimal_loop_structure() {
|
||||
use crate::ast::LiteralValue;
|
||||
// ============================================================================
|
||||
// Phase 3: skip_whitespace Pattern Tests
|
||||
// ============================================================================
|
||||
|
||||
// Build minimal loop: loop(true) { if true { } else { break } }
|
||||
#[test]
|
||||
fn test_skip_whitespace_pattern_recognition() {
|
||||
use crate::ast::{BinaryOperator, LiteralValue};
|
||||
|
||||
// Build skip_whitespace pattern: loop(p < len) { if is_ws == 1 { p = p + 1 } else { break } }
|
||||
let loop_node = ASTNode::Loop {
|
||||
condition: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Bool(true),
|
||||
condition: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Less,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Variable {
|
||||
name: "len".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
body: vec![ASTNode::If {
|
||||
condition: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Bool(true),
|
||||
condition: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Equal,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "is_ws".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Integer(1),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
then_body: vec![],
|
||||
then_body: vec![ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Integer(1),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
else_body: Some(vec![ASTNode::Break {
|
||||
span: Span::unknown(),
|
||||
}]),
|
||||
@ -562,37 +691,106 @@ mod tests {
|
||||
assert!(result.is_ok());
|
||||
|
||||
let (skeleton, decision) = result.unwrap();
|
||||
// Should have header condition step
|
||||
assert_eq!(skeleton.steps.len(), 1);
|
||||
|
||||
// Verify success
|
||||
assert!(decision.is_success());
|
||||
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern3IfPhi));
|
||||
assert_eq!(decision.missing_caps.len(), 0);
|
||||
|
||||
// Verify skeleton structure
|
||||
assert_eq!(skeleton.steps.len(), 2); // HeaderCond + Update
|
||||
assert!(matches!(
|
||||
skeleton.steps[0],
|
||||
SkeletonStep::HeaderCond { .. }
|
||||
));
|
||||
assert!(matches!(
|
||||
skeleton.steps[1],
|
||||
SkeletonStep::Update { .. }
|
||||
));
|
||||
|
||||
// Phase 2: Should fail-fast (pattern detection not implemented)
|
||||
assert!(decision.is_fail_fast());
|
||||
assert!(decision.notes[0].contains("Pattern detection not yet implemented"));
|
||||
// Verify carrier
|
||||
assert_eq!(skeleton.carriers.len(), 1);
|
||||
assert_eq!(skeleton.carriers[0].name, "p");
|
||||
assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter);
|
||||
match &skeleton.carriers[0].update_kind {
|
||||
UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1),
|
||||
_ => panic!("Expected ConstStep update"),
|
||||
}
|
||||
|
||||
// Verify exit contract
|
||||
assert!(skeleton.exits.has_break);
|
||||
assert!(!skeleton.exits.has_continue);
|
||||
assert!(!skeleton.exits.has_return);
|
||||
assert!(!skeleton.exits.break_has_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_rejects_multi_statement_loop() {
|
||||
use crate::ast::LiteralValue;
|
||||
fn test_skip_whitespace_with_body_statements() {
|
||||
use crate::ast::{BinaryOperator, LiteralValue};
|
||||
|
||||
// Build loop with 2 statements
|
||||
// Build pattern with body statements before the if:
|
||||
// loop(p < len) {
|
||||
// local ch = get_char(p)
|
||||
// if is_ws { p = p + 1 } else { break }
|
||||
// }
|
||||
let loop_node = ASTNode::Loop {
|
||||
condition: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Bool(true),
|
||||
condition: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Less,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Variable {
|
||||
name: "len".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
body: vec![
|
||||
ASTNode::Print {
|
||||
expression: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::String("test".to_string()),
|
||||
// Body statement
|
||||
ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "ch".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::FunctionCall {
|
||||
name: "get_char".to_string(),
|
||||
arguments: vec![ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
},
|
||||
ASTNode::Break {
|
||||
// The if-else pattern
|
||||
ASTNode::If {
|
||||
condition: Box::new(ASTNode::Variable {
|
||||
name: "is_ws".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
then_body: vec![ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Integer(1),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
else_body: Some(vec![ASTNode::Break {
|
||||
span: Span::unknown(),
|
||||
}]),
|
||||
span: Span::unknown(),
|
||||
},
|
||||
],
|
||||
@ -602,16 +800,36 @@ mod tests {
|
||||
let result = canonicalize_loop_expr(&loop_node);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let (_, decision) = result.unwrap();
|
||||
assert!(decision.is_fail_fast());
|
||||
assert!(decision.notes[0].contains("Only single-statement loops supported"));
|
||||
let (skeleton, decision) = result.unwrap();
|
||||
|
||||
// Verify success
|
||||
assert!(decision.is_success());
|
||||
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern3IfPhi));
|
||||
|
||||
// Verify skeleton has Body step
|
||||
assert_eq!(skeleton.steps.len(), 3); // HeaderCond + Body + Update
|
||||
assert!(matches!(
|
||||
skeleton.steps[0],
|
||||
SkeletonStep::HeaderCond { .. }
|
||||
));
|
||||
assert!(matches!(skeleton.steps[1], SkeletonStep::Body { .. }));
|
||||
assert!(matches!(
|
||||
skeleton.steps[2],
|
||||
SkeletonStep::Update { .. }
|
||||
));
|
||||
|
||||
// Verify body contains 1 statement
|
||||
match &skeleton.steps[1] {
|
||||
SkeletonStep::Body { stmts } => assert_eq!(stmts.len(), 1),
|
||||
_ => panic!("Expected Body step"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_rejects_if_without_else() {
|
||||
use crate::ast::LiteralValue;
|
||||
fn test_skip_whitespace_fails_without_else() {
|
||||
use crate::ast::{BinaryOperator, LiteralValue};
|
||||
|
||||
// Build loop with if (no else)
|
||||
// Build pattern without else branch (should fail)
|
||||
let loop_node = ASTNode::Loop {
|
||||
condition: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Bool(true),
|
||||
@ -622,7 +840,25 @@ mod tests {
|
||||
value: LiteralValue::Bool(true),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
then_body: vec![],
|
||||
then_body: vec![ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Add,
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Integer(1),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
else_body: None, // No else branch
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
@ -634,6 +870,58 @@ mod tests {
|
||||
|
||||
let (_, decision) = result.unwrap();
|
||||
assert!(decision.is_fail_fast());
|
||||
assert!(decision.notes[0].contains("must have else branch"));
|
||||
assert!(decision
|
||||
.notes[0]
|
||||
.contains("does not match skip_whitespace pattern"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_whitespace_fails_with_wrong_delta() {
|
||||
use crate::ast::{BinaryOperator, LiteralValue};
|
||||
|
||||
// Build pattern with wrong update (p = p - 1 instead of p = p + 1)
|
||||
let loop_node = ASTNode::Loop {
|
||||
condition: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Bool(true),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
body: vec![ASTNode::If {
|
||||
condition: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Bool(true),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
then_body: vec![ASTNode::Assignment {
|
||||
target: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
value: Box::new(ASTNode::BinaryOp {
|
||||
operator: BinaryOperator::Subtract, // Wrong operator
|
||||
left: Box::new(ASTNode::Variable {
|
||||
name: "p".to_string(),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
right: Box::new(ASTNode::Literal {
|
||||
value: LiteralValue::Integer(1),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
else_body: Some(vec![ASTNode::Break {
|
||||
span: Span::unknown(),
|
||||
}]),
|
||||
span: Span::unknown(),
|
||||
}],
|
||||
span: Span::unknown(),
|
||||
};
|
||||
|
||||
let result = canonicalize_loop_expr(&loop_node);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let (_, decision) = result.unwrap();
|
||||
assert!(decision.is_fail_fast());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user