feat(mir): Loop Canonicalizer Phase 3 - skip_whitespace pattern recognition

## Summary
skip_whitespace パターンを Skeleton→Decision で認識可能に。
dev-only 観測で chosen=Pattern3IfPhi / missing_caps=[] を固定。

## Changes
- src/mir/loop_canonicalizer/mod.rs:
  - try_extract_skip_whitespace_pattern() 追加
    - loop(cond) { ... if check { p = p + 1 } else { break } } パターン認識
    - carrier name, delta, body statements を抽出
  - canonicalize_loop_expr() 拡張(skip_whitespace 対応)
    - Pattern3IfPhi 成功時は RoutingDecision::success 返却
    - Skeleton に HeaderCond, Body, Update ステップ追加
    - CarrierSlot に Counter role 設定
    - ExitContract に has_break=true 設定
  - Phase 3 unit tests 追加
    - test_skip_whitespace_pattern_recognition: 基本パターン
    - test_skip_whitespace_with_body_statements: body 付きパターン
    - test_skip_whitespace_fails_without_else: else なし失敗
    - test_skip_whitespace_fails_with_wrong_delta: 減算パターン失敗
  - Phase 2 obsolete tests 削除
- src/mir/builder/control_flow/joinir/routing.rs:
  - Debug 出力拡張(chosen pattern 表示)

## Tests
- cargo test --release --lib loop_canonicalizer::tests: PASS(11 tests)
- cargo test --release --lib: PASS(1044 tests, 退行なし)
- HAKO_JOINIR_DEBUG=1 test_pattern3_skip_whitespace.hako:
  - chosen=Pattern3IfPhi 
  - missing_caps=[] 

## Validation
-  dev-only 観測(HAKO_JOINIR_DEBUG=1)のときだけログ出力
-  フラグ OFF 時は完全不変
-  skip_whitespace パターンで SUCCESS 固定
-  unit tests で全パターン固定

Phase 137-3 complete
This commit is contained in:
nyash-codex
2025-12-16 05:38:18 +09:00
parent e8d93f107c
commit a0009d474d
7 changed files with 405 additions and 108 deletions

View File

@ -326,14 +326,116 @@ impl std::fmt::Display for CarrierRole {
}
// ============================================================================
// Phase 2: Canonicalization Entry Point
// Phase 3: Pattern Recognition Helpers
// ============================================================================
/// Canonicalize a loop AST into LoopSkeleton (Phase 2: Minimal Implementation)
/// Try to extract skip_whitespace pattern from loop
///
/// Pattern structure:
/// ```
/// loop(cond) {
/// // ... optional body statements (Body)
/// if check_cond {
/// carrier = carrier + const
/// } else {
/// break
/// }
/// }
/// ```
///
/// Returns (carrier_name, delta, body_stmts) if pattern matches.
fn try_extract_skip_whitespace_pattern(
body: &[ASTNode],
) -> Option<(String, i64, Vec<ASTNode>)> {
if body.is_empty() {
return None;
}
// Last statement must be if-else with break
let last_stmt = &body[body.len() - 1];
let (then_body, else_body) = match last_stmt {
ASTNode::If {
then_body,
else_body: Some(else_body),
..
} => (then_body, else_body),
_ => return None,
};
// Then branch must be single assignment: carrier = carrier + const
if then_body.len() != 1 {
return None;
}
let (carrier_name, delta) = match &then_body[0] {
ASTNode::Assignment { target, value, .. } => {
// Extract target variable name
let target_name = match target.as_ref() {
ASTNode::Variable { name, .. } => name.clone(),
_ => return None,
};
// Value must be: target + const
match value.as_ref() {
ASTNode::BinaryOp {
operator: crate::ast::BinaryOperator::Add,
left,
right,
..
} => {
// Left must be same variable
let left_name = match left.as_ref() {
ASTNode::Variable { name, .. } => name,
_ => return None,
};
if left_name != &target_name {
return None;
}
// Right must be integer literal
let delta = match right.as_ref() {
ASTNode::Literal {
value: crate::ast::LiteralValue::Integer(n),
..
} => *n,
_ => return None,
};
(target_name, delta)
}
_ => return None,
}
}
_ => return None,
};
// Else branch must be single break
if else_body.len() != 1 {
return None;
}
match &else_body[0] {
ASTNode::Break { .. } => {
// Success! Extract body statements (all except last if)
let body_stmts = body[..body.len() - 1].to_vec();
Some((carrier_name, delta, body_stmts))
}
_ => None,
}
}
// ============================================================================
// Phase 3: Canonicalization Entry Point
// ============================================================================
/// Canonicalize a loop AST into LoopSkeleton (Phase 3: skip_whitespace pattern recognition)
///
/// Currently supports only the skip_whitespace pattern:
/// ```
/// loop(cond) {
/// // ... optional body statements
/// if check_cond {
/// carrier = carrier + step
/// } else {
@ -363,66 +465,55 @@ pub fn canonicalize_loop_expr(
_ => return Err(format!("Expected Loop node, got: {:?}", loop_expr)),
};
// Phase 2: Minimal implementation - detect skip_whitespace pattern only
// Pattern: loop(cond) { if check { update } else { break } }
// Phase 3: Try to extract skip_whitespace pattern
if let Some((carrier_name, delta, body_stmts)) = try_extract_skip_whitespace_pattern(body) {
// Build skeleton for skip_whitespace pattern
let mut skeleton = LoopSkeleton::new(span);
// Check for minimal pattern: single if-else with break
if body.len() != 1 {
return Ok((
LoopSkeleton::new(span),
RoutingDecision::fail_fast(
vec![capability_tags::CAP_MISSING_SINGLE_BREAK],
format!("Phase 2: Only single-statement loops supported (got {} statements)", body.len()),
),
));
// Step 1: Header condition
skeleton.steps.push(SkeletonStep::HeaderCond {
expr: Box::new(condition.clone()),
});
// Step 2: Body statements (if any)
if !body_stmts.is_empty() {
skeleton.steps.push(SkeletonStep::Body {
stmts: body_stmts,
});
}
// Step 3: Update step
skeleton.steps.push(SkeletonStep::Update {
carrier_name: carrier_name.clone(),
update_kind: UpdateKind::ConstStep { delta },
});
// Add carrier slot
skeleton.carriers.push(CarrierSlot {
name: carrier_name,
role: CarrierRole::Counter,
update_kind: UpdateKind::ConstStep { delta },
});
// Set exit contract
skeleton.exits = ExitContract {
has_break: true,
has_continue: false,
has_return: false,
break_has_value: false,
};
// Success! Return Pattern3WithIfPhi
let decision = RoutingDecision::success(LoopPatternKind::Pattern3IfPhi);
return Ok((skeleton, decision));
}
// Check if it's an if-else statement
let _if_stmt = match &body[0] {
ASTNode::If {
condition: _if_cond,
then_body: _then_body,
else_body,
..
} => {
// Must have else branch
if else_body.is_none() {
return Ok((
LoopSkeleton::new(span),
RoutingDecision::fail_fast(
vec![capability_tags::CAP_MISSING_SINGLE_BREAK],
"Phase 2: If statement must have else branch".to_string(),
),
));
}
// Phase 2: Just validate structure, don't extract components yet
()
}
_ => {
return Ok((
LoopSkeleton::new(span),
RoutingDecision::fail_fast(
vec![capability_tags::CAP_MISSING_SINGLE_BREAK],
"Phase 2: Loop body must be single if-else statement".to_string(),
),
));
}
};
// Build minimal skeleton
let mut skeleton = LoopSkeleton::new(span);
// Add header condition
skeleton.steps.push(SkeletonStep::HeaderCond {
expr: Box::new(condition.clone()),
});
// For now, just mark as unsupported - full pattern detection will come in Phase 3
// Pattern not recognized - fail fast
Ok((
skeleton,
LoopSkeleton::new(span),
RoutingDecision::fail_fast(
vec![capability_tags::CAP_MISSING_CONST_STEP],
"Phase 2: Pattern detection not yet implemented".to_string(),
"Phase 3: Loop does not match skip_whitespace pattern".to_string(),
),
))
}
@ -534,22 +625,60 @@ mod tests {
assert!(result.unwrap_err().contains("Expected Loop node"));
}
#[test]
fn test_canonicalize_minimal_loop_structure() {
use crate::ast::LiteralValue;
// ============================================================================
// Phase 3: skip_whitespace Pattern Tests
// ============================================================================
// Build minimal loop: loop(true) { if true { } else { break } }
#[test]
fn test_skip_whitespace_pattern_recognition() {
use crate::ast::{BinaryOperator, LiteralValue};
// Build skip_whitespace pattern: loop(p < len) { if is_ws == 1 { p = p + 1 } else { break } }
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Variable {
name: "len".to_string(),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![ASTNode::If {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::Variable {
name: "is_ws".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
then_body: vec![],
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Break {
span: Span::unknown(),
}]),
@ -562,37 +691,106 @@ mod tests {
assert!(result.is_ok());
let (skeleton, decision) = result.unwrap();
// Should have header condition step
assert_eq!(skeleton.steps.len(), 1);
// Verify success
assert!(decision.is_success());
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern3IfPhi));
assert_eq!(decision.missing_caps.len(), 0);
// Verify skeleton structure
assert_eq!(skeleton.steps.len(), 2); // HeaderCond + Update
assert!(matches!(
skeleton.steps[0],
SkeletonStep::HeaderCond { .. }
));
assert!(matches!(
skeleton.steps[1],
SkeletonStep::Update { .. }
));
// Phase 2: Should fail-fast (pattern detection not implemented)
assert!(decision.is_fail_fast());
assert!(decision.notes[0].contains("Pattern detection not yet implemented"));
// Verify carrier
assert_eq!(skeleton.carriers.len(), 1);
assert_eq!(skeleton.carriers[0].name, "p");
assert_eq!(skeleton.carriers[0].role, CarrierRole::Counter);
match &skeleton.carriers[0].update_kind {
UpdateKind::ConstStep { delta } => assert_eq!(*delta, 1),
_ => panic!("Expected ConstStep update"),
}
// Verify exit contract
assert!(skeleton.exits.has_break);
assert!(!skeleton.exits.has_continue);
assert!(!skeleton.exits.has_return);
assert!(!skeleton.exits.break_has_value);
}
#[test]
fn test_canonicalize_rejects_multi_statement_loop() {
use crate::ast::LiteralValue;
fn test_skip_whitespace_with_body_statements() {
use crate::ast::{BinaryOperator, LiteralValue};
// Build loop with 2 statements
// Build pattern with body statements before the if:
// loop(p < len) {
// local ch = get_char(p)
// if is_ws { p = p + 1 } else { break }
// }
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Variable {
name: "len".to_string(),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
body: vec![
ASTNode::Print {
expression: Box::new(ASTNode::Literal {
value: LiteralValue::String("test".to_string()),
// Body statement
ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "ch".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::FunctionCall {
name: "get_char".to_string(),
arguments: vec![ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}],
span: Span::unknown(),
}),
span: Span::unknown(),
},
ASTNode::Break {
// The if-else pattern
ASTNode::If {
condition: Box::new(ASTNode::Variable {
name: "is_ws".to_string(),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Break {
span: Span::unknown(),
}]),
span: Span::unknown(),
},
],
@ -602,16 +800,36 @@ mod tests {
let result = canonicalize_loop_expr(&loop_node);
assert!(result.is_ok());
let (_, decision) = result.unwrap();
assert!(decision.is_fail_fast());
assert!(decision.notes[0].contains("Only single-statement loops supported"));
let (skeleton, decision) = result.unwrap();
// Verify success
assert!(decision.is_success());
assert_eq!(decision.chosen, Some(LoopPatternKind::Pattern3IfPhi));
// Verify skeleton has Body step
assert_eq!(skeleton.steps.len(), 3); // HeaderCond + Body + Update
assert!(matches!(
skeleton.steps[0],
SkeletonStep::HeaderCond { .. }
));
assert!(matches!(skeleton.steps[1], SkeletonStep::Body { .. }));
assert!(matches!(
skeleton.steps[2],
SkeletonStep::Update { .. }
));
// Verify body contains 1 statement
match &skeleton.steps[1] {
SkeletonStep::Body { stmts } => assert_eq!(stmts.len(), 1),
_ => panic!("Expected Body step"),
}
}
#[test]
fn test_canonicalize_rejects_if_without_else() {
use crate::ast::LiteralValue;
fn test_skip_whitespace_fails_without_else() {
use crate::ast::{BinaryOperator, LiteralValue};
// Build loop with if (no else)
// Build pattern without else branch (should fail)
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
@ -622,7 +840,25 @@ mod tests {
value: LiteralValue::Bool(true),
span: Span::unknown(),
}),
then_body: vec![],
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: None, // No else branch
span: Span::unknown(),
}],
@ -634,6 +870,58 @@ mod tests {
let (_, decision) = result.unwrap();
assert!(decision.is_fail_fast());
assert!(decision.notes[0].contains("must have else branch"));
assert!(decision
.notes[0]
.contains("does not match skip_whitespace pattern"));
}
#[test]
fn test_skip_whitespace_fails_with_wrong_delta() {
use crate::ast::{BinaryOperator, LiteralValue};
// Build pattern with wrong update (p = p - 1 instead of p = p + 1)
let loop_node = ASTNode::Loop {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
span: Span::unknown(),
}),
body: vec![ASTNode::If {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Subtract, // Wrong operator
left: Box::new(ASTNode::Variable {
name: "p".to_string(),
span: Span::unknown(),
}),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Break {
span: Span::unknown(),
}]),
span: Span::unknown(),
}],
span: Span::unknown(),
};
let result = canonicalize_loop_expr(&loop_node);
assert!(result.is_ok());
let (_, decision) = result.unwrap();
assert!(decision.is_fail_fast());
}
}