From ae2a1d433966faac45dca6625f9635c504e9800c Mon Sep 17 00:00:00 2001 From: nyash-codex Date: Wed, 17 Dec 2025 21:24:46 +0900 Subject: [PATCH] refactor(joinir): boxify Pattern2 routing and schedule facts --- .../patterns/loop_true_counter_extractor.rs | 146 ++++++++++++++-- .../control_flow/joinir/patterns/mod.rs | 1 + .../pattern2_break_condition_policy_router.rs | 49 ++++++ .../joinir/patterns/pattern2_with_break.rs | 133 +++++---------- .../joinir/patterns/policies/README.md | 17 ++ .../policies/loop_true_read_digits_policy.rs | 61 +++++++ .../joinir/patterns/policies/mod.rs | 1 + .../read_digits_break_condition_box.rs | 156 +++++++++++++++++- .../lowering/loop_with_break_minimal.rs | 11 +- src/mir/join_ir/lowering/step_schedule.rs | 118 +++++++++---- src/mir/loop_canonicalizer/canonicalizer.rs | 5 +- 11 files changed, 550 insertions(+), 148 deletions(-) create mode 100644 src/mir/builder/control_flow/joinir/patterns/pattern2_break_condition_policy_router.rs create mode 100644 src/mir/builder/control_flow/joinir/patterns/policies/loop_true_read_digits_policy.rs diff --git a/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs index 06251405..d00c77c3 100644 --- a/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs @@ -1,11 +1,22 @@ -//! LoopTrueCounterExtractorBox - loop(true) からの loop counter 抽出(Pattern2専用) +//! LoopTrueCounterExtractorBox - loop(true) からの loop counter 抽出(Pattern2) //! -//! Phase 104: read_digits_from 形(loop(true) + break-only)を Pattern2 で扱うため、 -//! `condition` 側に loop var が無いケースで body から loop counter(例: i)を一意に確定する。 +//! 目的: `loop(true)` のように header condition から loop counter を抽出できないケースで、 +//! body から loop counter(例: `i`)を一意に確定する。 //! -//! SSOT/Fail-Fast: -//! - 目的は「曖昧な loop(true) を通さない」こと。 -//! - 1変数・+1 だけを許可し、取りこぼしは理由付き Err にする。 +//! ## 責務(SSOT) +//! - Pattern2 の「loop(true) を扱う入口」を1箇所に集約する +//! - 曖昧な loop(true) を **通さない**(Fail-Fast で理由を返す) +//! +//! ## Contract(Fail-Fast) +//! 許可(read_digits(loop(true)) 系で必要な最小): +//! - カウンタ候補が **ちょうど1つ** +//! - 更新が `i = i + 1` 形(定数 1 のみ) +//! - `s.substring(i, i + 1)` 形が body のどこかに存在(誤マッチ防止) +//! - `i` が loop-outer var(`variable_map` に存在)である +//! +//! 禁止: +//! - 候補なし / 複数候補 / 更新形が曖昧(+1 以外)/ substring 形が無い +//! - loop-outer でない(variable_map にいない) use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; use crate::mir::ValueId; @@ -26,7 +37,7 @@ impl LoopTrueCounterExtractorBox { /// Extract a unique loop counter variable from loop(true) body. /// - /// Current supported shape (Phase 104 minimal): + /// Current supported shape (minimal): /// - There exists an assignment `i = i + 1` somewhere in the body (including nested if). /// - There exists a substring read using that counter: `s.substring(i, i + 1)` (same `i`). /// @@ -118,14 +129,14 @@ impl LoopTrueCounterExtractorBox { let loop_var_name = match candidates.len() { 0 => { return Err( - "[phase104/loop-true-counter] Cannot find unique counter update `i = i + 1` in loop(true) body" + "[pattern2/loop_true_counter/contract/no_candidate] Cannot find unique counter update `i = i + 1` in loop(true) body" .to_string(), ); } 1 => candidates[0].clone(), _ => { return Err(format!( - "[phase104/loop-true-counter] Multiple counter candidates found in loop(true) body: {:?}", + "[pattern2/loop_true_counter/contract/multiple_candidates] Multiple counter candidates found in loop(true) body: {:?}", candidates )); } @@ -133,14 +144,14 @@ impl LoopTrueCounterExtractorBox { let host_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| { format!( - "[phase104/loop-true-counter] Counter '{}' not found in variable_map (loop-outer var required)", + "[pattern2/loop_true_counter/contract/not_loop_outer] Counter '{}' not found in variable_map (loop-outer var required)", loop_var_name ) })?; if !has_substring_read(body, &loop_var_name) { return Err(format!( - "[phase104/loop-true-counter] Counter '{}' found, but missing substring pattern `s.substring({}, {} + 1)`", + "[pattern2/loop_true_counter/contract/missing_substring_guard] Counter '{}' found, but missing substring pattern `s.substring({}, {} + 1)`", loop_var_name, loop_var_name, loop_var_name )); } @@ -209,3 +220,116 @@ fn has_substring_read(body: &[ASTNode], counter: &str) -> bool { body.iter().any(|s| walk(s, counter)) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::Span; + + fn span() -> Span { + Span::unknown() + } + + fn var(name: &str) -> ASTNode { + ASTNode::Variable { + name: name.to_string(), + span: span(), + } + } + + fn lit_i(n: i64) -> ASTNode { + ASTNode::Literal { + value: LiteralValue::Integer(n), + span: span(), + } + } + + fn add(left: ASTNode, right: ASTNode) -> ASTNode { + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(left), + right: Box::new(right), + span: span(), + } + } + + fn assign(target: ASTNode, value: ASTNode) -> ASTNode { + ASTNode::Assignment { + target: Box::new(target), + value: Box::new(value), + span: span(), + } + } + + fn substring_i(s_var: &str, i_var: &str) -> ASTNode { + ASTNode::MethodCall { + object: Box::new(var(s_var)), + method: "substring".to_string(), + arguments: vec![var(i_var), add(var(i_var), lit_i(1))], + span: span(), + } + } + + fn local_one(name: &str, init: ASTNode) -> ASTNode { + ASTNode::Local { + variables: vec![name.to_string()], + initial_values: vec![Some(Box::new(init))], + span: span(), + } + } + + #[test] + fn extract_single_candidate_ok() { + let body = vec![ + local_one("ch", substring_i("s", "i")), + assign(var("i"), add(var("i"), lit_i(1))), + ]; + let mut variable_map = BTreeMap::new(); + variable_map.insert("i".to_string(), ValueId(123)); + + let (name, host_id) = + LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map) + .unwrap(); + assert_eq!(name, "i"); + assert_eq!(host_id, ValueId(123)); + } + + #[test] + fn extract_rejects_no_candidate() { + let body = vec![local_one("ch", substring_i("s", "i"))]; + let mut variable_map = BTreeMap::new(); + variable_map.insert("i".to_string(), ValueId(1)); + let err = + LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map) + .unwrap_err(); + assert!(err.contains("no_candidate")); + } + + #[test] + fn extract_rejects_multiple_candidates() { + let body = vec![ + local_one("ch", substring_i("s", "i")), + assign(var("i"), add(var("i"), lit_i(1))), + local_one("ch2", substring_i("s", "j")), + assign(var("j"), add(var("j"), lit_i(1))), + ]; + let mut variable_map = BTreeMap::new(); + variable_map.insert("i".to_string(), ValueId(1)); + variable_map.insert("j".to_string(), ValueId(2)); + let err = + LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map) + .unwrap_err(); + assert!(err.contains("multiple_candidates")); + } + + #[test] + fn extract_rejects_missing_substring_guard() { + let body = vec![assign(var("i"), add(var("i"), lit_i(1)))]; + let mut variable_map = BTreeMap::new(); + variable_map.insert("i".to_string(), ValueId(1)); + let err = + LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map) + .unwrap_err(); + assert!(err.contains("missing_substring_guard")); + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/mod.rs b/src/mir/builder/control_flow/joinir/patterns/mod.rs index af2c16f8..38aa9f2d 100644 --- a/src/mir/builder/control_flow/joinir/patterns/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/mod.rs @@ -56,6 +56,7 @@ pub(in crate::mir::builder) mod escape_pattern_recognizer; // Phase 91 P5b pub(in crate::mir::builder) mod common_init; pub(in crate::mir::builder) mod loop_true_counter_extractor; // Phase 104: loop(true) counter extraction for Pattern2 pub(in crate::mir::builder) mod read_digits_break_condition_box; // Phase 104: break cond normalization for read_digits(loop(true)) +pub(in crate::mir::builder) mod pattern2_break_condition_policy_router; // Phase 105: policy router box for Pattern2 break condition pub(in crate::mir::builder) mod condition_env_builder; pub(in crate::mir::builder) mod conversion_pipeline; pub(in crate::mir::builder) mod exit_binding; diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2_break_condition_policy_router.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2_break_condition_policy_router.rs new file mode 100644 index 00000000..fbb88a44 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2_break_condition_policy_router.rs @@ -0,0 +1,49 @@ +//! Pattern2 break-condition routing (policy → break_cond + allow-list + flags) +//! +//! This box exists to keep `pattern2_with_break.rs` focused on orchestration and +//! JoinIR wiring. It applies Pattern2-specific policies to derive: +//! - a normalized `break when true` condition node +//! - an allow-list of body-local variables permitted in conditions (Phase 92 P3) +//! - flags that affect later lowering (e.g. schedule and promotion heuristics) +//! +//! Fail-Fast: Policy rejections are returned as an error string and must not +//! silently fall back to an unrelated route. + +use crate::ast::ASTNode; +use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer; + +use super::policies::{loop_true_read_digits_policy, PolicyDecision}; + +#[derive(Debug, Clone)] +pub(crate) struct Pattern2BreakConditionRouting { + pub break_condition_node: ASTNode, + pub allowed_body_locals_for_conditions: Vec, + pub is_loop_true_read_digits: bool, +} + +pub(crate) struct Pattern2BreakConditionPolicyRouterBox; + +impl Pattern2BreakConditionPolicyRouterBox { + pub(crate) fn route(condition: &ASTNode, body: &[ASTNode]) -> Result { + // loop(true) read-digits family: + // - multiple breaks exist; normalize as: + // `break_when_true := (ch == "") || !(is_digit(ch))` + match loop_true_read_digits_policy::classify_loop_true_read_digits(condition, body) { + PolicyDecision::Use(result) => Ok(Pattern2BreakConditionRouting { + break_condition_node: result.break_condition_node, + allowed_body_locals_for_conditions: vec![result.allowed_ch_var], + is_loop_true_read_digits: true, + }), + PolicyDecision::Reject(reason) => Err(format!("[cf_loop/pattern2] {}", reason)), + PolicyDecision::None => Ok(Pattern2BreakConditionRouting { + break_condition_node: BreakConditionAnalyzer::extract_break_condition_node(body) + .map_err(|_| { + "[cf_loop/pattern2] Failed to extract break condition from loop body".to_string() + })?, + allowed_body_locals_for_conditions: Vec::new(), + is_loop_true_read_digits: false, + }), + } + } +} + diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs index bf39777e..176e3419 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs @@ -21,6 +21,7 @@ use super::policies::p5b_escape_derived_policy::{ classify_p5b_escape_derived, P5bEscapeDerivedDecision, }; use super::policies::PolicyDecision; +use super::pattern2_break_condition_policy_router::Pattern2BreakConditionPolicyRouterBox; use std::collections::BTreeMap; struct Pattern2DebugLog { @@ -59,6 +60,11 @@ struct Pattern2Inputs { /// Phase 92 P3: Diagnostics / debug metadata for the allow-listed variable. read_only_body_local_slot: Option, break_condition_node: ASTNode, + /// loop(true) + break-only digits(read_digits_from family) + /// + /// Policy-routed: multiple breaks are normalized into a single `break when true` + /// condition, and Pattern2 must schedule body-init before break check. + is_loop_true_read_digits: bool, /// Phase 93 P0: ConditionOnly recipe for derived slot recalculation condition_only_recipe: Option, /// Phase 94: BodyLocalDerived recipe for P5b "ch" reassignment + escape counter. @@ -496,65 +502,8 @@ fn prepare_pattern2_inputs( } } - // Break condition extraction (SSOT + Phase 104 extension) - // - // SSOT (BreakConditionAnalyzer): - // - `if cond { break }` -> `cond` - // - `if cond { ... } else { break }` -> `!cond` - // - // Phase 104 (read_digits loop(true)): - // - multiple breaks exist; normalize as: - // `break_when_true := (ch == \"\") || !(is_digit(ch))` - let (break_condition_node, phase104_recipe_and_allowed) = { - use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox; - use crate::ast::{BinaryOperator, Span, UnaryOperator}; - use crate::mir::join_ir::lowering::common::condition_only_emitter::{BreakSemantics, ConditionOnlyRecipe}; - use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer; - - if LoopTrueCounterExtractorBox::is_loop_true(condition) - && super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some() - { - let (ch_var, eos_cond, digit_literals) = - super::read_digits_break_condition_box::ReadDigitsBreakConditionBox::extract_eos_and_digit_set(body) - .map_err(|e| format!("[cf_loop/pattern2] {}", e))?; - - // ConditionOnly derived slot: "is ch a digit?" (computed each iteration from body-local `ch`) - let is_digit_name = "__phase104_is_digit".to_string(); - let recipe = ConditionOnlyRecipe { - name: is_digit_name.clone(), - original_var: ch_var.clone(), - whitespace_chars: digit_literals, - break_semantics: BreakSemantics::WhenMatch, - }; - - let break_on_not_digit = ASTNode::UnaryOp { - operator: UnaryOperator::Not, - operand: Box::new(ASTNode::Variable { - name: is_digit_name.clone(), - span: Span::unknown(), - }), - span: Span::unknown(), - }; - - let break_when_true = ASTNode::BinaryOp { - operator: BinaryOperator::Or, - left: Box::new(eos_cond), - right: Box::new(break_on_not_digit), - span: Span::unknown(), - }; - - ( - break_when_true, - Some((ch_var, is_digit_name, recipe)), - ) - } else { - ( - BreakConditionAnalyzer::extract_break_condition_node(body) - .map_err(|_| "[cf_loop/pattern2] Failed to extract break condition from loop body".to_string())?, - None, - ) - } - }; + // Break condition extraction is policy-routed (SSOT). + let break_routing = Pattern2BreakConditionPolicyRouterBox::route(condition, body)?; let mut inputs = Pattern2Inputs { loop_var_name, @@ -568,18 +517,18 @@ fn prepare_pattern2_inputs( body_local_env, allowed_body_locals_for_conditions: Vec::new(), read_only_body_local_slot: None, - break_condition_node, + break_condition_node: break_routing.break_condition_node, + is_loop_true_read_digits: break_routing.is_loop_true_read_digits, condition_only_recipe: None, // Phase 93 P0: Will be set by apply_trim_and_normalize body_local_derived_recipe: None, // Phase 94: Will be set after normalization }; - // Phase 104: read_digits(loop(true)) wires derived is-digit slot + allow-list in one place. - if let Some((ch_var, is_digit_name, recipe)) = phase104_recipe_and_allowed { + // loop(true) read-digits family wires body-local allow-list in one place. + if !break_routing.allowed_body_locals_for_conditions.is_empty() { use crate::mir::join_ir::lowering::common::body_local_slot::ReadOnlyBodyLocalSlotBox; - inputs.condition_only_recipe = Some(recipe); - inputs.allowed_body_locals_for_conditions = vec![ch_var.clone(), is_digit_name]; + inputs.allowed_body_locals_for_conditions = break_routing.allowed_body_locals_for_conditions.clone(); inputs.read_only_body_local_slot = Some(ReadOnlyBodyLocalSlotBox::extract_single( - &[ch_var], + &inputs.allowed_body_locals_for_conditions, body, )?); } @@ -597,7 +546,6 @@ fn promote_and_prepare_carriers( ) -> Result<(), String> { use crate::mir::join_ir::lowering::digitpos_condition_normalizer::DigitPosConditionNormalizer; use crate::mir::loop_pattern_detection::loop_condition_scope::LoopConditionScopeBox; - use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox; let cond_scope = LoopConditionScopeBox::analyze( &inputs.loop_var_name, @@ -615,24 +563,22 @@ fn promote_and_prepare_carriers( .collect(); if cond_scope.has_loop_body_local() { - // Phase 104: read_digits(loop(true)) pre-wires slot/allow-list/recipe in prepare_pattern2_inputs. + // loop(true) read-digits family pre-wires slot/allow-list in prepare_pattern2_inputs. // Do not run promotion heuristics here. - if !(LoopTrueCounterExtractorBox::is_loop_true(condition) - && super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some()) - { - match classify_for_pattern2( - builder, - &inputs.loop_var_name, - &inputs.scope, - &inputs.break_condition_node, - &cond_scope, - body, - ) { - PolicyDecision::Use(BodyLocalRoute::Promotion { - promoted_carrier, - promoted_var, - carrier_name, - }) => { + if !inputs.is_loop_true_read_digits { + match classify_for_pattern2( + builder, + &inputs.loop_var_name, + &inputs.scope, + &inputs.break_condition_node, + &cond_scope, + body, + ) { + PolicyDecision::Use(BodyLocalRoute::Promotion { + promoted_carrier, + promoted_var, + carrier_name, + }) => { // Phase 133 P1: Check if this is a Trim promotion (A-3 pattern) // Trim promotions are handled by TrimLoopLowerer (apply_trim_and_normalize) // which provides SSOT for env/join_id, so we defer to that path. @@ -733,8 +679,8 @@ fn promote_and_prepare_carriers( ), ); } - } - PolicyDecision::Use(BodyLocalRoute::ReadOnlySlot(slot)) => { + } + PolicyDecision::Use(BodyLocalRoute::ReadOnlySlot(slot)) => { log.log( "body_local_slot", format!( @@ -744,14 +690,14 @@ fn promote_and_prepare_carriers( ); inputs.allowed_body_locals_for_conditions = vec![slot.name.clone()]; inputs.read_only_body_local_slot = Some(slot); - } - PolicyDecision::Reject(reason) => { + } + PolicyDecision::Reject(reason) => { return Err(error_messages::format_error_pattern2_promotion_failed( &cond_body_local_vars, &reason, )); + } + PolicyDecision::None => {} } - PolicyDecision::None => {} - } } } @@ -869,12 +815,11 @@ fn apply_trim_and_normalize( let log = Pattern2DebugLog::new(verbose); let mut alloc_join_value = || inputs.join_value_space.alloc_param(); - // Phase 104: read_digits(loop(true)) uses a digit-guard OR-chain which resembles + // loop(true) read-digits family uses a digit-guard OR-chain which resembles // "trim-like" patterns; do not route through TrimLoopLowerer. - let is_phase104_read_digits = super::loop_true_counter_extractor::LoopTrueCounterExtractorBox::is_loop_true(condition) - && super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some(); + let disable_trim = inputs.is_loop_true_read_digits; - let effective_break_condition = if !is_phase104_read_digits { + let effective_break_condition = if !disable_trim { if let Some(trim_result) = super::trim_loop_lowering::TrimLoopLowerer::try_lower_trim_like_loop( builder, &inputs.scope, @@ -1052,7 +997,7 @@ pub(crate) fn can_lower(builder: &MirBuilder, ctx: &super::router::LoopPatternCo } // Phase 188/Refactor: Use common carrier update validation - // Phase 104: Support loop(true) by extracting the counter from the body. + // loop(true): Support by extracting the counter from the body (LoopTrueCounterExtractorBox SSOT). let loop_var_name = if LoopTrueCounterExtractorBox::is_loop_true(ctx.condition) { match LoopTrueCounterExtractorBox::extract_loop_counter_from_body( ctx.body, diff --git a/src/mir/builder/control_flow/joinir/patterns/policies/README.md b/src/mir/builder/control_flow/joinir/patterns/policies/README.md index 31ae2e82..8ebb6837 100644 --- a/src/mir/builder/control_flow/joinir/patterns/policies/README.md +++ b/src/mir/builder/control_flow/joinir/patterns/policies/README.md @@ -55,6 +55,23 @@ --- +### loop_true_read_digits_policy.rs (Phase 104/105) +**現在の場所**: `patterns/policies/loop_true_read_digits_policy.rs` + +**責務**: `loop(true)` + break-only digits(`read_digits_from` family)の認識とルーティング + +**判断基準**: +- `loop(true)` である +- read-digits(loop(true)) detector に一致する +- `ReadDigitsBreakConditionBox` が eos 条件 + digit 条件を抽出できる + +**出力**: +- `PolicyDecision::Use(LoopTrueReadDigitsPolicyResult)` - `break_when_true := (ch == \"\") || !(digit_cond)` と `ch` allow-list +- `PolicyDecision::Reject(String)` - 検出失敗理由(Fail-Fast) +- `PolicyDecision::None` - 該当なし + +--- + ### body_local_policy.rs **現在の場所**: `patterns/body_local_policy.rs` diff --git a/src/mir/builder/control_flow/joinir/patterns/policies/loop_true_read_digits_policy.rs b/src/mir/builder/control_flow/joinir/patterns/policies/loop_true_read_digits_policy.rs new file mode 100644 index 00000000..57f4d382 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/policies/loop_true_read_digits_policy.rs @@ -0,0 +1,61 @@ +//! loop(true) + break-only digits (read_digits_from family) policy +//! +//! Goal: keep Pattern2 core lowering structural by moving shape recognition + routing +//! for loop(true) read-digits family into a dedicated policy box. +//! +//! This policy is intentionally narrow: +//! - It only triggers when the body matches the read-digits(loop(true)) detector. +//! - It returns a normalized "break when true" condition AST: +//! `break_when_true := (ch == "") || !(digit_cond)` +//! - It provides the single allowed body-local variable name (`ch`) for condition lowering. + +use super::PolicyDecision; +use crate::ast::{ASTNode, BinaryOperator, Span, UnaryOperator}; + +pub(crate) struct LoopTrueReadDigitsPolicyResult { + pub break_condition_node: ASTNode, + pub allowed_ch_var: String, +} + +pub(crate) fn classify_loop_true_read_digits( + condition: &ASTNode, + body: &[ASTNode], +) -> PolicyDecision { + use crate::mir::builder::control_flow::joinir::patterns::{ + ast_feature_extractor::detect_read_digits_loop_true_pattern, + loop_true_counter_extractor::LoopTrueCounterExtractorBox, + read_digits_break_condition_box::ReadDigitsBreakConditionBox, + }; + + if !LoopTrueCounterExtractorBox::is_loop_true(condition) { + return PolicyDecision::None; + } + if detect_read_digits_loop_true_pattern(body).is_none() { + return PolicyDecision::None; + } + + let (ch_var, eos_cond, digit_cond) = + match ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(body) { + Ok(v) => v, + Err(e) => return PolicyDecision::Reject(e), + }; + + let break_on_not_digit = ASTNode::UnaryOp { + operator: UnaryOperator::Not, + operand: Box::new(digit_cond), + span: Span::unknown(), + }; + + let break_when_true = ASTNode::BinaryOp { + operator: BinaryOperator::Or, + left: Box::new(eos_cond), + right: Box::new(break_on_not_digit), + span: Span::unknown(), + }; + + PolicyDecision::Use(LoopTrueReadDigitsPolicyResult { + break_condition_node: break_when_true, + allowed_ch_var: ch_var, + }) +} + diff --git a/src/mir/builder/control_flow/joinir/patterns/policies/mod.rs b/src/mir/builder/control_flow/joinir/patterns/policies/mod.rs index 29e88067..07b5565c 100644 --- a/src/mir/builder/control_flow/joinir/patterns/policies/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/policies/mod.rs @@ -32,3 +32,4 @@ pub enum PolicyDecision { pub(in crate::mir::builder) mod p5b_escape_derived_policy; pub(in crate::mir::builder) mod trim_policy; +pub(in crate::mir::builder) mod loop_true_read_digits_policy; diff --git a/src/mir/builder/control_flow/joinir/patterns/read_digits_break_condition_box.rs b/src/mir/builder/control_flow/joinir/patterns/read_digits_break_condition_box.rs index d989d28c..a90058b5 100644 --- a/src/mir/builder/control_flow/joinir/patterns/read_digits_break_condition_box.rs +++ b/src/mir/builder/control_flow/joinir/patterns/read_digits_break_condition_box.rs @@ -10,9 +10,9 @@ use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; pub(crate) struct ReadDigitsBreakConditionBox; impl ReadDigitsBreakConditionBox { - pub(crate) fn extract_eos_and_digit_set( + pub(crate) fn extract_eos_and_digit_condition( body: &[ASTNode], - ) -> Result<(String, ASTNode, Vec), String> { + ) -> Result<(String, ASTNode, ASTNode), String> { if body.is_empty() { return Err("[phase104/read-digits] empty loop body".to_string()); } @@ -66,7 +66,7 @@ impl ReadDigitsBreakConditionBox { )); } - Ok((ch_var, eos_cond, digit_literals)) + Ok((ch_var, eos_cond, digit_cond)) } } @@ -158,7 +158,155 @@ fn collect_eq_string_literals( "[phase104/read-digits] non-digit literal in digit condition: {:?}", lit )); - } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::Span; + + fn span() -> Span { + Span::unknown() + } + + fn var(name: &str) -> ASTNode { + ASTNode::Variable { + name: name.to_string(), + span: span(), + } + } + + fn str_lit(s: &str) -> ASTNode { + ASTNode::Literal { + value: LiteralValue::String(s.to_string()), + span: span(), + } + } + + fn eq(left: ASTNode, right: ASTNode) -> ASTNode { + ASTNode::BinaryOp { + operator: BinaryOperator::Equal, + left: Box::new(left), + right: Box::new(right), + span: span(), + } + } + + fn or(left: ASTNode, right: ASTNode) -> ASTNode { + ASTNode::BinaryOp { + operator: BinaryOperator::Or, + left: Box::new(left), + right: Box::new(right), + span: span(), + } + } + + fn if_then_break(cond: ASTNode) -> ASTNode { + ASTNode::If { + condition: Box::new(cond), + then_body: vec![ASTNode::Break { span: span() }], + else_body: None, + span: span(), + } + } + + fn if_else_break(cond: ASTNode, then_body: Vec) -> ASTNode { + ASTNode::If { + condition: Box::new(cond), + then_body, + else_body: Some(vec![ASTNode::Break { span: span() }]), + span: span(), + } + } + + fn digit_chain(var_name: &str, digits: &[&str]) -> ASTNode { + let mut it = digits.iter(); + let first = it + .next() + .expect("digits must be non-empty"); + let mut acc = eq(var(var_name), str_lit(first)); + for d in it { + acc = or(acc, eq(var(var_name), str_lit(d))); + } + acc + } + + #[test] + fn test_extract_ok_eos_and_digit_condition() { + let digit_cond = digit_chain( + "ch", + &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], + ); + + let body = vec![ + if_then_break(eq(var("ch"), str_lit(""))), + if_else_break( + digit_cond.clone(), + vec![ + ASTNode::Assignment { + target: Box::new(var("out")), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(var("out")), + right: Box::new(var("ch")), + span: span(), + }), + span: span(), + }, + ASTNode::Assignment { + target: Box::new(var("i")), + value: Box::new(ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left: Box::new(var("i")), + right: Box::new(ASTNode::Literal { + value: LiteralValue::Integer(1), + span: span(), + }), + span: span(), + }), + span: span(), + }, + ], + ), + ]; + + let (ch_var, eos_cond, extracted_digit_cond) = + ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(&body).unwrap(); + + assert_eq!(ch_var, "ch"); + assert_eq!(eos_cond, eq(var("ch"), str_lit(""))); + assert_eq!(extracted_digit_cond, digit_cond); + } + + #[test] + fn test_extract_rejects_missing_digit_literal() { + let digit_cond = digit_chain( + "ch", + &["0", "1", "2", "3", "4", "5", "6", "7", "8"], // missing "9" + ); + let body = vec![ + if_then_break(eq(var("ch"), str_lit(""))), + if_else_break(digit_cond, vec![ASTNode::Break { span: span() }]), + ]; + + let err = ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(&body) + .unwrap_err(); + assert!(err.contains("literal set mismatch")); + } + + #[test] + fn test_extract_rejects_mixed_var_names() { + let digit_cond = or(eq(var("ch"), str_lit("0")), eq(var("x"), str_lit("1"))); + let body = vec![ + if_then_break(eq(var("ch"), str_lit(""))), + if_else_break(digit_cond, vec![ASTNode::Break { span: span() }]), + ]; + + let err = ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(&body) + .unwrap_err(); + assert!(err.contains("mixed variable names")); + } +} match var_name.as_deref() { None => *var_name = Some(name), diff --git a/src/mir/join_ir/lowering/loop_with_break_minimal.rs b/src/mir/join_ir/lowering/loop_with_break_minimal.rs index 118bedc1..a5a67f62 100644 --- a/src/mir/join_ir/lowering/loop_with_break_minimal.rs +++ b/src/mir/join_ir/lowering/loop_with_break_minimal.rs @@ -75,7 +75,8 @@ use crate::mir::join_ir::lowering::loop_body_local_env::LoopBodyLocalEnv; use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape; use crate::mir::join_ir::lowering::loop_update_analyzer::UpdateExpr; use crate::mir::join_ir::lowering::step_schedule::{ - build_pattern2_schedule_from_decision, decide_pattern2_schedule, Pattern2StepKind, + build_pattern2_schedule_from_decision, decide_pattern2_schedule, Pattern2ScheduleFactsBox, + Pattern2StepKind, }; use crate::mir::join_ir::lowering::update_env::UpdateEnv; use crate::mir::loop_canonicalizer::LoopSkeleton; @@ -381,12 +382,18 @@ pub(crate) fn lower_loop_with_break_minimal( // Decide evaluation order (header/body-init/break/updates/tail) up-front. // Phase 93 P0: Pass condition_only_recipe existence to schedule context. // When recipe exists, body-init must happen before break check. - let schedule_decision = decide_pattern2_schedule( + let has_allowed_body_locals_in_conditions = loop_cond_scope.has_loop_body_local() + && allowed_body_locals_for_conditions + .map(|allow| !allow.is_empty()) + .unwrap_or(false); + let schedule_facts = Pattern2ScheduleFactsBox::gather( body_local_env.as_ref().map(|env| &**env), carrier_info, condition_only_recipe.is_some(), body_local_derived_recipe.is_some(), + has_allowed_body_locals_in_conditions, ); + let schedule_decision = decide_pattern2_schedule(&schedule_facts); let schedule = build_pattern2_schedule_from_decision(&schedule_decision); // Collect fragments per step; append them according to the schedule below. diff --git a/src/mir/join_ir/lowering/step_schedule.rs b/src/mir/join_ir/lowering/step_schedule.rs index 314d947a..b4bbc66f 100644 --- a/src/mir/join_ir/lowering/step_schedule.rs +++ b/src/mir/join_ir/lowering/step_schedule.rs @@ -91,7 +91,49 @@ pub(crate) struct ScheduleDebugContext { pub has_body_local_derived_recipe: bool, } -/// Decide Pattern2 schedule based on loop characteristics +/// Facts about Pattern2 that drive step scheduling. +/// +/// This struct is intentionally "facts only" (no decision). +#[derive(Debug, Clone)] +pub(crate) struct Pattern2ScheduleFacts { + pub has_body_local_init: bool, + pub has_loop_local_carrier: bool, + pub has_condition_only_recipe: bool, + pub has_body_local_derived_recipe: bool, +} + +pub(crate) struct Pattern2ScheduleFactsBox; + +impl Pattern2ScheduleFactsBox { + pub(crate) fn gather( + body_local_env: Option<&LoopBodyLocalEnv>, + carrier_info: &CarrierInfo, + has_condition_only_recipe: bool, + has_body_local_derived_recipe: bool, + has_allowed_body_locals_in_conditions: bool, + ) -> Pattern2ScheduleFacts { + // NOTE: `body_local_env` may be empty here because it's populated after schedule + // decision (Phase 191 body-local init lowering happens later). + // + // For Phase 92+ patterns where conditions reference allowed body-local variables + // (e.g., `ch == ""`), we must still schedule BodyInit before BreakCheck. + let has_body_local_init = body_local_env.map(|env| !env.is_empty()).unwrap_or(false) + || has_allowed_body_locals_in_conditions; + let has_loop_local_carrier = carrier_info + .carriers + .iter() + .any(|c| matches!(c.init, CarrierInit::LoopLocalZero)); + + Pattern2ScheduleFacts { + has_body_local_init, + has_loop_local_carrier, + has_condition_only_recipe, + has_body_local_derived_recipe, + } + } +} + +/// Decide Pattern2 schedule based on loop characteristics (SSOT). /// /// Phase 93 Refactoring: Single source of truth for schedule decisions /// @@ -111,30 +153,19 @@ pub(crate) struct ScheduleDebugContext { /// # Returns /// /// `ScheduleDecision` with decision, reason, and debug context -pub(crate) fn decide_pattern2_schedule( - body_local_env: Option<&LoopBodyLocalEnv>, - carrier_info: &CarrierInfo, - has_condition_only_recipe: bool, - has_body_local_derived_recipe: bool, -) -> ScheduleDecision { - let has_body_local_init = body_local_env.map(|env| !env.is_empty()).unwrap_or(false); - let has_loop_local_carrier = carrier_info - .carriers - .iter() - .any(|c| matches!(c.init, CarrierInit::LoopLocalZero)); +pub(crate) fn decide_pattern2_schedule(facts: &Pattern2ScheduleFacts) -> ScheduleDecision { + let body_init_first = facts.has_condition_only_recipe + || facts.has_body_local_derived_recipe + || facts.has_body_local_init + || facts.has_loop_local_carrier; - let body_init_first = has_condition_only_recipe - || has_body_local_derived_recipe - || has_body_local_init - || has_loop_local_carrier; - - let reason = if has_condition_only_recipe { + let reason = if facts.has_condition_only_recipe { "ConditionOnly requires body-init before break" - } else if has_body_local_derived_recipe { + } else if facts.has_body_local_derived_recipe { "BodyLocalDerived requires body-init before break" - } else if has_body_local_init { + } else if facts.has_body_local_init { "body-local variables require init before break" - } else if has_loop_local_carrier { + } else if facts.has_loop_local_carrier { "loop-local carrier requires init before break" } else { "default schedule" @@ -144,10 +175,10 @@ pub(crate) fn decide_pattern2_schedule( body_init_first, reason, debug_ctx: ScheduleDebugContext { - has_body_local_init, - has_loop_local_carrier, - has_condition_only_recipe, - has_body_local_derived_recipe, + has_body_local_init: facts.has_body_local_init, + has_loop_local_carrier: facts.has_loop_local_carrier, + has_condition_only_recipe: facts.has_condition_only_recipe, + has_body_local_derived_recipe: facts.has_body_local_derived_recipe, }, } } @@ -263,7 +294,8 @@ mod tests { #[test] fn default_schedule_break_before_body_init() { - let decision = decide_pattern2_schedule(None, &carrier_info(vec![]), false, false); + let facts = Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![]), false, false, false); + let decision = decide_pattern2_schedule(&facts); let schedule = build_pattern2_schedule_from_decision(&decision); assert_eq!( schedule.steps(), @@ -283,12 +315,14 @@ mod tests { let mut body_env = LoopBodyLocalEnv::new(); body_env.insert("tmp".to_string(), ValueId(5)); - let decision = decide_pattern2_schedule( + let facts = Pattern2ScheduleFactsBox::gather( Some(&body_env), &carrier_info(vec![carrier(false)]), false, false, + false, ); + let decision = decide_pattern2_schedule(&facts); let schedule = build_pattern2_schedule_from_decision(&decision); assert_eq!( schedule.steps(), @@ -305,12 +339,9 @@ mod tests { #[test] fn loop_local_carrier_triggers_body_first() { - let decision = decide_pattern2_schedule( - None, - &carrier_info(vec![carrier(true)]), - false, - false, - ); + let facts = + Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![carrier(true)]), false, false, false); + let decision = decide_pattern2_schedule(&facts); let schedule = build_pattern2_schedule_from_decision(&decision); assert_eq!( schedule.steps(), @@ -329,7 +360,8 @@ mod tests { #[test] fn condition_only_recipe_triggers_body_first() { // Empty body_local_env but has condition_only_recipe - let decision = decide_pattern2_schedule(None, &carrier_info(vec![]), true, false); + let facts = Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![]), true, false, false); + let decision = decide_pattern2_schedule(&facts); let schedule = build_pattern2_schedule_from_decision(&decision); assert_eq!( schedule.steps(), @@ -344,6 +376,24 @@ mod tests { assert_eq!(schedule.reason(), "ConditionOnly requires body-init before break"); } + #[test] + fn allowed_body_local_deps_trigger_body_first_even_if_env_empty() { + let facts = Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![]), false, false, true); + let decision = decide_pattern2_schedule(&facts); + let schedule = build_pattern2_schedule_from_decision(&decision); + assert_eq!( + schedule.steps(), + &[ + Pattern2StepKind::HeaderCond, + Pattern2StepKind::BodyInit, + Pattern2StepKind::BreakCheck, + Pattern2StepKind::Updates, + Pattern2StepKind::Tail + ] + ); + assert_eq!(schedule.reason(), "body-local variables require init before break"); + } + #[test] fn test_pattern3_if_sum_schedule() { let schedule = pattern3_if_sum_schedule(); diff --git a/src/mir/loop_canonicalizer/canonicalizer.rs b/src/mir/loop_canonicalizer/canonicalizer.rs index af4c23b9..88d67c43 100644 --- a/src/mir/loop_canonicalizer/canonicalizer.rs +++ b/src/mir/loop_canonicalizer/canonicalizer.rs @@ -674,9 +674,8 @@ mod tests { let (_, decision) = result.unwrap(); assert!(decision.is_fail_fast()); - assert!(decision.notes[0].contains( - "does not match skip_whitespace, parse_number, continue, parse_string, or parse_array pattern" - )); + assert!(decision.notes[0].contains("Loop does not match")); + assert!(decision.notes[0].contains("skip_whitespace")); } #[test]