refactor(joinir): boxify Pattern2 routing and schedule facts

This commit is contained in:
nyash-codex
2025-12-17 21:24:46 +09:00
parent 950560a3d9
commit ae2a1d4339
11 changed files with 550 additions and 148 deletions

View File

@ -1,11 +1,22 @@
//! LoopTrueCounterExtractorBox - loop(true) からの loop counter 抽出Pattern2専用 //! LoopTrueCounterExtractorBox - loop(true) からの loop counter 抽出Pattern2
//! //!
//! Phase 104: read_digits_from 形loop(true) + break-onlyを Pattern2 で扱うため //! 目的: `loop(true)` のように header condition から loop counter を抽出できないケースで
//! `condition` 側に loop var が無いケースで body から loop counter例: i)を一意に確定する。 //! body から loop counter例: `i`)を一意に確定する。
//! //!
//! SSOT/Fail-Fast: //! ## 責務SSOT
//! - 目的は「曖昧な loop(true) を通さない」こと。 //! - Pattern2 の「loop(true) を扱う入口」を1箇所に集約する
//! - 1変数・+1 だけを許可し、取りこぼしは理由付き Err にする。 //! - 曖昧な loop(true) を **通さない**Fail-Fast で理由を返す)
//!
//! ## ContractFail-Fast
//! 許可read_digits(loop(true)) 系で必要な最小):
//! - カウンタ候補が **ちょうど1つ**
//! - 更新が `i = i + 1` 形(定数 1 のみ)
//! - `s.substring(i, i + 1)` 形が body のどこかに存在(誤マッチ防止)
//! - `i` が loop-outer var`variable_map` に存在)である
//!
//! 禁止:
//! - 候補なし / 複数候補 / 更新形が曖昧(+1 以外)/ substring 形が無い
//! - loop-outer でないvariable_map にいない)
use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
use crate::mir::ValueId; use crate::mir::ValueId;
@ -26,7 +37,7 @@ impl LoopTrueCounterExtractorBox {
/// Extract a unique loop counter variable from loop(true) body. /// Extract a unique loop counter variable from loop(true) body.
/// ///
/// Current supported shape (Phase 104 minimal): /// Current supported shape (minimal):
/// - There exists an assignment `i = i + 1` somewhere in the body (including nested if). /// - There exists an assignment `i = i + 1` somewhere in the body (including nested if).
/// - There exists a substring read using that counter: `s.substring(i, i + 1)` (same `i`). /// - There exists a substring read using that counter: `s.substring(i, i + 1)` (same `i`).
/// ///
@ -118,14 +129,14 @@ impl LoopTrueCounterExtractorBox {
let loop_var_name = match candidates.len() { let loop_var_name = match candidates.len() {
0 => { 0 => {
return Err( return Err(
"[phase104/loop-true-counter] Cannot find unique counter update `i = i + 1` in loop(true) body" "[pattern2/loop_true_counter/contract/no_candidate] Cannot find unique counter update `i = i + 1` in loop(true) body"
.to_string(), .to_string(),
); );
} }
1 => candidates[0].clone(), 1 => candidates[0].clone(),
_ => { _ => {
return Err(format!( return Err(format!(
"[phase104/loop-true-counter] Multiple counter candidates found in loop(true) body: {:?}", "[pattern2/loop_true_counter/contract/multiple_candidates] Multiple counter candidates found in loop(true) body: {:?}",
candidates candidates
)); ));
} }
@ -133,14 +144,14 @@ impl LoopTrueCounterExtractorBox {
let host_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| { let host_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| {
format!( format!(
"[phase104/loop-true-counter] Counter '{}' not found in variable_map (loop-outer var required)", "[pattern2/loop_true_counter/contract/not_loop_outer] Counter '{}' not found in variable_map (loop-outer var required)",
loop_var_name loop_var_name
) )
})?; })?;
if !has_substring_read(body, &loop_var_name) { if !has_substring_read(body, &loop_var_name) {
return Err(format!( return Err(format!(
"[phase104/loop-true-counter] Counter '{}' found, but missing substring pattern `s.substring({}, {} + 1)`", "[pattern2/loop_true_counter/contract/missing_substring_guard] Counter '{}' found, but missing substring pattern `s.substring({}, {} + 1)`",
loop_var_name, loop_var_name, loop_var_name loop_var_name, loop_var_name, loop_var_name
)); ));
} }
@ -209,3 +220,116 @@ fn has_substring_read(body: &[ASTNode], counter: &str) -> bool {
body.iter().any(|s| walk(s, counter)) body.iter().any(|s| walk(s, counter))
} }
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::Span;
fn span() -> Span {
Span::unknown()
}
fn var(name: &str) -> ASTNode {
ASTNode::Variable {
name: name.to_string(),
span: span(),
}
}
fn lit_i(n: i64) -> ASTNode {
ASTNode::Literal {
value: LiteralValue::Integer(n),
span: span(),
}
}
fn add(left: ASTNode, right: ASTNode) -> ASTNode {
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(left),
right: Box::new(right),
span: span(),
}
}
fn assign(target: ASTNode, value: ASTNode) -> ASTNode {
ASTNode::Assignment {
target: Box::new(target),
value: Box::new(value),
span: span(),
}
}
fn substring_i(s_var: &str, i_var: &str) -> ASTNode {
ASTNode::MethodCall {
object: Box::new(var(s_var)),
method: "substring".to_string(),
arguments: vec![var(i_var), add(var(i_var), lit_i(1))],
span: span(),
}
}
fn local_one(name: &str, init: ASTNode) -> ASTNode {
ASTNode::Local {
variables: vec![name.to_string()],
initial_values: vec![Some(Box::new(init))],
span: span(),
}
}
#[test]
fn extract_single_candidate_ok() {
let body = vec![
local_one("ch", substring_i("s", "i")),
assign(var("i"), add(var("i"), lit_i(1))),
];
let mut variable_map = BTreeMap::new();
variable_map.insert("i".to_string(), ValueId(123));
let (name, host_id) =
LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map)
.unwrap();
assert_eq!(name, "i");
assert_eq!(host_id, ValueId(123));
}
#[test]
fn extract_rejects_no_candidate() {
let body = vec![local_one("ch", substring_i("s", "i"))];
let mut variable_map = BTreeMap::new();
variable_map.insert("i".to_string(), ValueId(1));
let err =
LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map)
.unwrap_err();
assert!(err.contains("no_candidate"));
}
#[test]
fn extract_rejects_multiple_candidates() {
let body = vec![
local_one("ch", substring_i("s", "i")),
assign(var("i"), add(var("i"), lit_i(1))),
local_one("ch2", substring_i("s", "j")),
assign(var("j"), add(var("j"), lit_i(1))),
];
let mut variable_map = BTreeMap::new();
variable_map.insert("i".to_string(), ValueId(1));
variable_map.insert("j".to_string(), ValueId(2));
let err =
LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map)
.unwrap_err();
assert!(err.contains("multiple_candidates"));
}
#[test]
fn extract_rejects_missing_substring_guard() {
let body = vec![assign(var("i"), add(var("i"), lit_i(1)))];
let mut variable_map = BTreeMap::new();
variable_map.insert("i".to_string(), ValueId(1));
let err =
LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map)
.unwrap_err();
assert!(err.contains("missing_substring_guard"));
}
}

View File

@ -56,6 +56,7 @@ pub(in crate::mir::builder) mod escape_pattern_recognizer; // Phase 91 P5b
pub(in crate::mir::builder) mod common_init; pub(in crate::mir::builder) mod common_init;
pub(in crate::mir::builder) mod loop_true_counter_extractor; // Phase 104: loop(true) counter extraction for Pattern2 pub(in crate::mir::builder) mod loop_true_counter_extractor; // Phase 104: loop(true) counter extraction for Pattern2
pub(in crate::mir::builder) mod read_digits_break_condition_box; // Phase 104: break cond normalization for read_digits(loop(true)) pub(in crate::mir::builder) mod read_digits_break_condition_box; // Phase 104: break cond normalization for read_digits(loop(true))
pub(in crate::mir::builder) mod pattern2_break_condition_policy_router; // Phase 105: policy router box for Pattern2 break condition
pub(in crate::mir::builder) mod condition_env_builder; pub(in crate::mir::builder) mod condition_env_builder;
pub(in crate::mir::builder) mod conversion_pipeline; pub(in crate::mir::builder) mod conversion_pipeline;
pub(in crate::mir::builder) mod exit_binding; pub(in crate::mir::builder) mod exit_binding;

View File

@ -0,0 +1,49 @@
//! Pattern2 break-condition routing (policy → break_cond + allow-list + flags)
//!
//! This box exists to keep `pattern2_with_break.rs` focused on orchestration and
//! JoinIR wiring. It applies Pattern2-specific policies to derive:
//! - a normalized `break when true` condition node
//! - an allow-list of body-local variables permitted in conditions (Phase 92 P3)
//! - flags that affect later lowering (e.g. schedule and promotion heuristics)
//!
//! Fail-Fast: Policy rejections are returned as an error string and must not
//! silently fall back to an unrelated route.
use crate::ast::ASTNode;
use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer;
use super::policies::{loop_true_read_digits_policy, PolicyDecision};
#[derive(Debug, Clone)]
pub(crate) struct Pattern2BreakConditionRouting {
pub break_condition_node: ASTNode,
pub allowed_body_locals_for_conditions: Vec<String>,
pub is_loop_true_read_digits: bool,
}
pub(crate) struct Pattern2BreakConditionPolicyRouterBox;
impl Pattern2BreakConditionPolicyRouterBox {
pub(crate) fn route(condition: &ASTNode, body: &[ASTNode]) -> Result<Pattern2BreakConditionRouting, String> {
// loop(true) read-digits family:
// - multiple breaks exist; normalize as:
// `break_when_true := (ch == "") || !(is_digit(ch))`
match loop_true_read_digits_policy::classify_loop_true_read_digits(condition, body) {
PolicyDecision::Use(result) => Ok(Pattern2BreakConditionRouting {
break_condition_node: result.break_condition_node,
allowed_body_locals_for_conditions: vec![result.allowed_ch_var],
is_loop_true_read_digits: true,
}),
PolicyDecision::Reject(reason) => Err(format!("[cf_loop/pattern2] {}", reason)),
PolicyDecision::None => Ok(Pattern2BreakConditionRouting {
break_condition_node: BreakConditionAnalyzer::extract_break_condition_node(body)
.map_err(|_| {
"[cf_loop/pattern2] Failed to extract break condition from loop body".to_string()
})?,
allowed_body_locals_for_conditions: Vec::new(),
is_loop_true_read_digits: false,
}),
}
}
}

View File

@ -21,6 +21,7 @@ use super::policies::p5b_escape_derived_policy::{
classify_p5b_escape_derived, P5bEscapeDerivedDecision, classify_p5b_escape_derived, P5bEscapeDerivedDecision,
}; };
use super::policies::PolicyDecision; use super::policies::PolicyDecision;
use super::pattern2_break_condition_policy_router::Pattern2BreakConditionPolicyRouterBox;
use std::collections::BTreeMap; use std::collections::BTreeMap;
struct Pattern2DebugLog { struct Pattern2DebugLog {
@ -59,6 +60,11 @@ struct Pattern2Inputs {
/// Phase 92 P3: Diagnostics / debug metadata for the allow-listed variable. /// Phase 92 P3: Diagnostics / debug metadata for the allow-listed variable.
read_only_body_local_slot: Option<ReadOnlyBodyLocalSlot>, read_only_body_local_slot: Option<ReadOnlyBodyLocalSlot>,
break_condition_node: ASTNode, break_condition_node: ASTNode,
/// loop(true) + break-only digitsread_digits_from family
///
/// Policy-routed: multiple breaks are normalized into a single `break when true`
/// condition, and Pattern2 must schedule body-init before break check.
is_loop_true_read_digits: bool,
/// Phase 93 P0: ConditionOnly recipe for derived slot recalculation /// Phase 93 P0: ConditionOnly recipe for derived slot recalculation
condition_only_recipe: Option<crate::mir::join_ir::lowering::common::condition_only_emitter::ConditionOnlyRecipe>, condition_only_recipe: Option<crate::mir::join_ir::lowering::common::condition_only_emitter::ConditionOnlyRecipe>,
/// Phase 94: BodyLocalDerived recipe for P5b "ch" reassignment + escape counter. /// Phase 94: BodyLocalDerived recipe for P5b "ch" reassignment + escape counter.
@ -496,65 +502,8 @@ fn prepare_pattern2_inputs(
} }
} }
// Break condition extraction (SSOT + Phase 104 extension) // Break condition extraction is policy-routed (SSOT).
// let break_routing = Pattern2BreakConditionPolicyRouterBox::route(condition, body)?;
// SSOT (BreakConditionAnalyzer):
// - `if cond { break }` -> `cond`
// - `if cond { ... } else { break }` -> `!cond`
//
// Phase 104 (read_digits loop(true)):
// - multiple breaks exist; normalize as:
// `break_when_true := (ch == \"\") || !(is_digit(ch))`
let (break_condition_node, phase104_recipe_and_allowed) = {
use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox;
use crate::ast::{BinaryOperator, Span, UnaryOperator};
use crate::mir::join_ir::lowering::common::condition_only_emitter::{BreakSemantics, ConditionOnlyRecipe};
use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer;
if LoopTrueCounterExtractorBox::is_loop_true(condition)
&& super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some()
{
let (ch_var, eos_cond, digit_literals) =
super::read_digits_break_condition_box::ReadDigitsBreakConditionBox::extract_eos_and_digit_set(body)
.map_err(|e| format!("[cf_loop/pattern2] {}", e))?;
// ConditionOnly derived slot: "is ch a digit?" (computed each iteration from body-local `ch`)
let is_digit_name = "__phase104_is_digit".to_string();
let recipe = ConditionOnlyRecipe {
name: is_digit_name.clone(),
original_var: ch_var.clone(),
whitespace_chars: digit_literals,
break_semantics: BreakSemantics::WhenMatch,
};
let break_on_not_digit = ASTNode::UnaryOp {
operator: UnaryOperator::Not,
operand: Box::new(ASTNode::Variable {
name: is_digit_name.clone(),
span: Span::unknown(),
}),
span: Span::unknown(),
};
let break_when_true = ASTNode::BinaryOp {
operator: BinaryOperator::Or,
left: Box::new(eos_cond),
right: Box::new(break_on_not_digit),
span: Span::unknown(),
};
(
break_when_true,
Some((ch_var, is_digit_name, recipe)),
)
} else {
(
BreakConditionAnalyzer::extract_break_condition_node(body)
.map_err(|_| "[cf_loop/pattern2] Failed to extract break condition from loop body".to_string())?,
None,
)
}
};
let mut inputs = Pattern2Inputs { let mut inputs = Pattern2Inputs {
loop_var_name, loop_var_name,
@ -568,18 +517,18 @@ fn prepare_pattern2_inputs(
body_local_env, body_local_env,
allowed_body_locals_for_conditions: Vec::new(), allowed_body_locals_for_conditions: Vec::new(),
read_only_body_local_slot: None, read_only_body_local_slot: None,
break_condition_node, break_condition_node: break_routing.break_condition_node,
is_loop_true_read_digits: break_routing.is_loop_true_read_digits,
condition_only_recipe: None, // Phase 93 P0: Will be set by apply_trim_and_normalize condition_only_recipe: None, // Phase 93 P0: Will be set by apply_trim_and_normalize
body_local_derived_recipe: None, // Phase 94: Will be set after normalization body_local_derived_recipe: None, // Phase 94: Will be set after normalization
}; };
// Phase 104: read_digits(loop(true)) wires derived is-digit slot + allow-list in one place. // loop(true) read-digits family wires body-local allow-list in one place.
if let Some((ch_var, is_digit_name, recipe)) = phase104_recipe_and_allowed { if !break_routing.allowed_body_locals_for_conditions.is_empty() {
use crate::mir::join_ir::lowering::common::body_local_slot::ReadOnlyBodyLocalSlotBox; use crate::mir::join_ir::lowering::common::body_local_slot::ReadOnlyBodyLocalSlotBox;
inputs.condition_only_recipe = Some(recipe); inputs.allowed_body_locals_for_conditions = break_routing.allowed_body_locals_for_conditions.clone();
inputs.allowed_body_locals_for_conditions = vec![ch_var.clone(), is_digit_name];
inputs.read_only_body_local_slot = Some(ReadOnlyBodyLocalSlotBox::extract_single( inputs.read_only_body_local_slot = Some(ReadOnlyBodyLocalSlotBox::extract_single(
&[ch_var], &inputs.allowed_body_locals_for_conditions,
body, body,
)?); )?);
} }
@ -597,7 +546,6 @@ fn promote_and_prepare_carriers(
) -> Result<(), String> { ) -> Result<(), String> {
use crate::mir::join_ir::lowering::digitpos_condition_normalizer::DigitPosConditionNormalizer; use crate::mir::join_ir::lowering::digitpos_condition_normalizer::DigitPosConditionNormalizer;
use crate::mir::loop_pattern_detection::loop_condition_scope::LoopConditionScopeBox; use crate::mir::loop_pattern_detection::loop_condition_scope::LoopConditionScopeBox;
use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox;
let cond_scope = LoopConditionScopeBox::analyze( let cond_scope = LoopConditionScopeBox::analyze(
&inputs.loop_var_name, &inputs.loop_var_name,
@ -615,24 +563,22 @@ fn promote_and_prepare_carriers(
.collect(); .collect();
if cond_scope.has_loop_body_local() { if cond_scope.has_loop_body_local() {
// Phase 104: read_digits(loop(true)) pre-wires slot/allow-list/recipe in prepare_pattern2_inputs. // loop(true) read-digits family pre-wires slot/allow-list in prepare_pattern2_inputs.
// Do not run promotion heuristics here. // Do not run promotion heuristics here.
if !(LoopTrueCounterExtractorBox::is_loop_true(condition) if !inputs.is_loop_true_read_digits {
&& super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some()) match classify_for_pattern2(
{ builder,
match classify_for_pattern2( &inputs.loop_var_name,
builder, &inputs.scope,
&inputs.loop_var_name, &inputs.break_condition_node,
&inputs.scope, &cond_scope,
&inputs.break_condition_node, body,
&cond_scope, ) {
body, PolicyDecision::Use(BodyLocalRoute::Promotion {
) { promoted_carrier,
PolicyDecision::Use(BodyLocalRoute::Promotion { promoted_var,
promoted_carrier, carrier_name,
promoted_var, }) => {
carrier_name,
}) => {
// Phase 133 P1: Check if this is a Trim promotion (A-3 pattern) // Phase 133 P1: Check if this is a Trim promotion (A-3 pattern)
// Trim promotions are handled by TrimLoopLowerer (apply_trim_and_normalize) // Trim promotions are handled by TrimLoopLowerer (apply_trim_and_normalize)
// which provides SSOT for env/join_id, so we defer to that path. // which provides SSOT for env/join_id, so we defer to that path.
@ -733,8 +679,8 @@ fn promote_and_prepare_carriers(
), ),
); );
} }
} }
PolicyDecision::Use(BodyLocalRoute::ReadOnlySlot(slot)) => { PolicyDecision::Use(BodyLocalRoute::ReadOnlySlot(slot)) => {
log.log( log.log(
"body_local_slot", "body_local_slot",
format!( format!(
@ -744,14 +690,14 @@ fn promote_and_prepare_carriers(
); );
inputs.allowed_body_locals_for_conditions = vec![slot.name.clone()]; inputs.allowed_body_locals_for_conditions = vec![slot.name.clone()];
inputs.read_only_body_local_slot = Some(slot); inputs.read_only_body_local_slot = Some(slot);
} }
PolicyDecision::Reject(reason) => { PolicyDecision::Reject(reason) => {
return Err(error_messages::format_error_pattern2_promotion_failed( return Err(error_messages::format_error_pattern2_promotion_failed(
&cond_body_local_vars, &reason, &cond_body_local_vars, &reason,
)); ));
}
PolicyDecision::None => {}
} }
PolicyDecision::None => {}
}
} }
} }
@ -869,12 +815,11 @@ fn apply_trim_and_normalize(
let log = Pattern2DebugLog::new(verbose); let log = Pattern2DebugLog::new(verbose);
let mut alloc_join_value = || inputs.join_value_space.alloc_param(); let mut alloc_join_value = || inputs.join_value_space.alloc_param();
// Phase 104: read_digits(loop(true)) uses a digit-guard OR-chain which resembles // loop(true) read-digits family uses a digit-guard OR-chain which resembles
// "trim-like" patterns; do not route through TrimLoopLowerer. // "trim-like" patterns; do not route through TrimLoopLowerer.
let is_phase104_read_digits = super::loop_true_counter_extractor::LoopTrueCounterExtractorBox::is_loop_true(condition) let disable_trim = inputs.is_loop_true_read_digits;
&& super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some();
let effective_break_condition = if !is_phase104_read_digits { let effective_break_condition = if !disable_trim {
if let Some(trim_result) = super::trim_loop_lowering::TrimLoopLowerer::try_lower_trim_like_loop( if let Some(trim_result) = super::trim_loop_lowering::TrimLoopLowerer::try_lower_trim_like_loop(
builder, builder,
&inputs.scope, &inputs.scope,
@ -1052,7 +997,7 @@ pub(crate) fn can_lower(builder: &MirBuilder, ctx: &super::router::LoopPatternCo
} }
// Phase 188/Refactor: Use common carrier update validation // Phase 188/Refactor: Use common carrier update validation
// Phase 104: Support loop(true) by extracting the counter from the body. // loop(true): Support by extracting the counter from the body (LoopTrueCounterExtractorBox SSOT).
let loop_var_name = if LoopTrueCounterExtractorBox::is_loop_true(ctx.condition) { let loop_var_name = if LoopTrueCounterExtractorBox::is_loop_true(ctx.condition) {
match LoopTrueCounterExtractorBox::extract_loop_counter_from_body( match LoopTrueCounterExtractorBox::extract_loop_counter_from_body(
ctx.body, ctx.body,

View File

@ -55,6 +55,23 @@
--- ---
### loop_true_read_digits_policy.rs (Phase 104/105)
**現在の場所**: `patterns/policies/loop_true_read_digits_policy.rs`
**責務**: `loop(true)` + break-only digits`read_digits_from` familyの認識とルーティング
**判断基準**:
- `loop(true)` である
- read-digits(loop(true)) detector に一致する
- `ReadDigitsBreakConditionBox` が eos 条件 + digit 条件を抽出できる
**出力**:
- `PolicyDecision::Use(LoopTrueReadDigitsPolicyResult)` - `break_when_true := (ch == \"\") || !(digit_cond)``ch` allow-list
- `PolicyDecision::Reject(String)` - 検出失敗理由Fail-Fast
- `PolicyDecision::None` - 該当なし
---
### body_local_policy.rs ### body_local_policy.rs
**現在の場所**: `patterns/body_local_policy.rs` **現在の場所**: `patterns/body_local_policy.rs`

View File

@ -0,0 +1,61 @@
//! loop(true) + break-only digits (read_digits_from family) policy
//!
//! Goal: keep Pattern2 core lowering structural by moving shape recognition + routing
//! for loop(true) read-digits family into a dedicated policy box.
//!
//! This policy is intentionally narrow:
//! - It only triggers when the body matches the read-digits(loop(true)) detector.
//! - It returns a normalized "break when true" condition AST:
//! `break_when_true := (ch == "") || !(digit_cond)`
//! - It provides the single allowed body-local variable name (`ch`) for condition lowering.
use super::PolicyDecision;
use crate::ast::{ASTNode, BinaryOperator, Span, UnaryOperator};
pub(crate) struct LoopTrueReadDigitsPolicyResult {
pub break_condition_node: ASTNode,
pub allowed_ch_var: String,
}
pub(crate) fn classify_loop_true_read_digits(
condition: &ASTNode,
body: &[ASTNode],
) -> PolicyDecision<LoopTrueReadDigitsPolicyResult> {
use crate::mir::builder::control_flow::joinir::patterns::{
ast_feature_extractor::detect_read_digits_loop_true_pattern,
loop_true_counter_extractor::LoopTrueCounterExtractorBox,
read_digits_break_condition_box::ReadDigitsBreakConditionBox,
};
if !LoopTrueCounterExtractorBox::is_loop_true(condition) {
return PolicyDecision::None;
}
if detect_read_digits_loop_true_pattern(body).is_none() {
return PolicyDecision::None;
}
let (ch_var, eos_cond, digit_cond) =
match ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(body) {
Ok(v) => v,
Err(e) => return PolicyDecision::Reject(e),
};
let break_on_not_digit = ASTNode::UnaryOp {
operator: UnaryOperator::Not,
operand: Box::new(digit_cond),
span: Span::unknown(),
};
let break_when_true = ASTNode::BinaryOp {
operator: BinaryOperator::Or,
left: Box::new(eos_cond),
right: Box::new(break_on_not_digit),
span: Span::unknown(),
};
PolicyDecision::Use(LoopTrueReadDigitsPolicyResult {
break_condition_node: break_when_true,
allowed_ch_var: ch_var,
})
}

View File

@ -32,3 +32,4 @@ pub enum PolicyDecision<T> {
pub(in crate::mir::builder) mod p5b_escape_derived_policy; pub(in crate::mir::builder) mod p5b_escape_derived_policy;
pub(in crate::mir::builder) mod trim_policy; pub(in crate::mir::builder) mod trim_policy;
pub(in crate::mir::builder) mod loop_true_read_digits_policy;

View File

@ -10,9 +10,9 @@ use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
pub(crate) struct ReadDigitsBreakConditionBox; pub(crate) struct ReadDigitsBreakConditionBox;
impl ReadDigitsBreakConditionBox { impl ReadDigitsBreakConditionBox {
pub(crate) fn extract_eos_and_digit_set( pub(crate) fn extract_eos_and_digit_condition(
body: &[ASTNode], body: &[ASTNode],
) -> Result<(String, ASTNode, Vec<String>), String> { ) -> Result<(String, ASTNode, ASTNode), String> {
if body.is_empty() { if body.is_empty() {
return Err("[phase104/read-digits] empty loop body".to_string()); return Err("[phase104/read-digits] empty loop body".to_string());
} }
@ -66,7 +66,7 @@ impl ReadDigitsBreakConditionBox {
)); ));
} }
Ok((ch_var, eos_cond, digit_literals)) Ok((ch_var, eos_cond, digit_cond))
} }
} }
@ -158,7 +158,155 @@ fn collect_eq_string_literals(
"[phase104/read-digits] non-digit literal in digit condition: {:?}", "[phase104/read-digits] non-digit literal in digit condition: {:?}",
lit lit
)); ));
} }
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::Span;
fn span() -> Span {
Span::unknown()
}
fn var(name: &str) -> ASTNode {
ASTNode::Variable {
name: name.to_string(),
span: span(),
}
}
fn str_lit(s: &str) -> ASTNode {
ASTNode::Literal {
value: LiteralValue::String(s.to_string()),
span: span(),
}
}
fn eq(left: ASTNode, right: ASTNode) -> ASTNode {
ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(left),
right: Box::new(right),
span: span(),
}
}
fn or(left: ASTNode, right: ASTNode) -> ASTNode {
ASTNode::BinaryOp {
operator: BinaryOperator::Or,
left: Box::new(left),
right: Box::new(right),
span: span(),
}
}
fn if_then_break(cond: ASTNode) -> ASTNode {
ASTNode::If {
condition: Box::new(cond),
then_body: vec![ASTNode::Break { span: span() }],
else_body: None,
span: span(),
}
}
fn if_else_break(cond: ASTNode, then_body: Vec<ASTNode>) -> ASTNode {
ASTNode::If {
condition: Box::new(cond),
then_body,
else_body: Some(vec![ASTNode::Break { span: span() }]),
span: span(),
}
}
fn digit_chain(var_name: &str, digits: &[&str]) -> ASTNode {
let mut it = digits.iter();
let first = it
.next()
.expect("digits must be non-empty");
let mut acc = eq(var(var_name), str_lit(first));
for d in it {
acc = or(acc, eq(var(var_name), str_lit(d)));
}
acc
}
#[test]
fn test_extract_ok_eos_and_digit_condition() {
let digit_cond = digit_chain(
"ch",
&["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
);
let body = vec![
if_then_break(eq(var("ch"), str_lit(""))),
if_else_break(
digit_cond.clone(),
vec![
ASTNode::Assignment {
target: Box::new(var("out")),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(var("out")),
right: Box::new(var("ch")),
span: span(),
}),
span: span(),
},
ASTNode::Assignment {
target: Box::new(var("i")),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(var("i")),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: span(),
}),
span: span(),
}),
span: span(),
},
],
),
];
let (ch_var, eos_cond, extracted_digit_cond) =
ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(&body).unwrap();
assert_eq!(ch_var, "ch");
assert_eq!(eos_cond, eq(var("ch"), str_lit("")));
assert_eq!(extracted_digit_cond, digit_cond);
}
#[test]
fn test_extract_rejects_missing_digit_literal() {
let digit_cond = digit_chain(
"ch",
&["0", "1", "2", "3", "4", "5", "6", "7", "8"], // missing "9"
);
let body = vec![
if_then_break(eq(var("ch"), str_lit(""))),
if_else_break(digit_cond, vec![ASTNode::Break { span: span() }]),
];
let err = ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(&body)
.unwrap_err();
assert!(err.contains("literal set mismatch"));
}
#[test]
fn test_extract_rejects_mixed_var_names() {
let digit_cond = or(eq(var("ch"), str_lit("0")), eq(var("x"), str_lit("1")));
let body = vec![
if_then_break(eq(var("ch"), str_lit(""))),
if_else_break(digit_cond, vec![ASTNode::Break { span: span() }]),
];
let err = ReadDigitsBreakConditionBox::extract_eos_and_digit_condition(&body)
.unwrap_err();
assert!(err.contains("mixed variable names"));
}
}
match var_name.as_deref() { match var_name.as_deref() {
None => *var_name = Some(name), None => *var_name = Some(name),

View File

@ -75,7 +75,8 @@ use crate::mir::join_ir::lowering::loop_body_local_env::LoopBodyLocalEnv;
use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape; use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape;
use crate::mir::join_ir::lowering::loop_update_analyzer::UpdateExpr; use crate::mir::join_ir::lowering::loop_update_analyzer::UpdateExpr;
use crate::mir::join_ir::lowering::step_schedule::{ use crate::mir::join_ir::lowering::step_schedule::{
build_pattern2_schedule_from_decision, decide_pattern2_schedule, Pattern2StepKind, build_pattern2_schedule_from_decision, decide_pattern2_schedule, Pattern2ScheduleFactsBox,
Pattern2StepKind,
}; };
use crate::mir::join_ir::lowering::update_env::UpdateEnv; use crate::mir::join_ir::lowering::update_env::UpdateEnv;
use crate::mir::loop_canonicalizer::LoopSkeleton; use crate::mir::loop_canonicalizer::LoopSkeleton;
@ -381,12 +382,18 @@ pub(crate) fn lower_loop_with_break_minimal(
// Decide evaluation order (header/body-init/break/updates/tail) up-front. // Decide evaluation order (header/body-init/break/updates/tail) up-front.
// Phase 93 P0: Pass condition_only_recipe existence to schedule context. // Phase 93 P0: Pass condition_only_recipe existence to schedule context.
// When recipe exists, body-init must happen before break check. // When recipe exists, body-init must happen before break check.
let schedule_decision = decide_pattern2_schedule( let has_allowed_body_locals_in_conditions = loop_cond_scope.has_loop_body_local()
&& allowed_body_locals_for_conditions
.map(|allow| !allow.is_empty())
.unwrap_or(false);
let schedule_facts = Pattern2ScheduleFactsBox::gather(
body_local_env.as_ref().map(|env| &**env), body_local_env.as_ref().map(|env| &**env),
carrier_info, carrier_info,
condition_only_recipe.is_some(), condition_only_recipe.is_some(),
body_local_derived_recipe.is_some(), body_local_derived_recipe.is_some(),
has_allowed_body_locals_in_conditions,
); );
let schedule_decision = decide_pattern2_schedule(&schedule_facts);
let schedule = build_pattern2_schedule_from_decision(&schedule_decision); let schedule = build_pattern2_schedule_from_decision(&schedule_decision);
// Collect fragments per step; append them according to the schedule below. // Collect fragments per step; append them according to the schedule below.

View File

@ -91,7 +91,49 @@ pub(crate) struct ScheduleDebugContext {
pub has_body_local_derived_recipe: bool, pub has_body_local_derived_recipe: bool,
} }
/// Decide Pattern2 schedule based on loop characteristics /// Facts about Pattern2 that drive step scheduling.
///
/// This struct is intentionally "facts only" (no decision).
#[derive(Debug, Clone)]
pub(crate) struct Pattern2ScheduleFacts {
pub has_body_local_init: bool,
pub has_loop_local_carrier: bool,
pub has_condition_only_recipe: bool,
pub has_body_local_derived_recipe: bool,
}
pub(crate) struct Pattern2ScheduleFactsBox;
impl Pattern2ScheduleFactsBox {
pub(crate) fn gather(
body_local_env: Option<&LoopBodyLocalEnv>,
carrier_info: &CarrierInfo,
has_condition_only_recipe: bool,
has_body_local_derived_recipe: bool,
has_allowed_body_locals_in_conditions: bool,
) -> Pattern2ScheduleFacts {
// NOTE: `body_local_env` may be empty here because it's populated after schedule
// decision (Phase 191 body-local init lowering happens later).
//
// For Phase 92+ patterns where conditions reference allowed body-local variables
// (e.g., `ch == ""`), we must still schedule BodyInit before BreakCheck.
let has_body_local_init = body_local_env.map(|env| !env.is_empty()).unwrap_or(false)
|| has_allowed_body_locals_in_conditions;
let has_loop_local_carrier = carrier_info
.carriers
.iter()
.any(|c| matches!(c.init, CarrierInit::LoopLocalZero));
Pattern2ScheduleFacts {
has_body_local_init,
has_loop_local_carrier,
has_condition_only_recipe,
has_body_local_derived_recipe,
}
}
}
/// Decide Pattern2 schedule based on loop characteristics (SSOT).
/// ///
/// Phase 93 Refactoring: Single source of truth for schedule decisions /// Phase 93 Refactoring: Single source of truth for schedule decisions
/// ///
@ -111,30 +153,19 @@ pub(crate) struct ScheduleDebugContext {
/// # Returns /// # Returns
/// ///
/// `ScheduleDecision` with decision, reason, and debug context /// `ScheduleDecision` with decision, reason, and debug context
pub(crate) fn decide_pattern2_schedule( pub(crate) fn decide_pattern2_schedule(facts: &Pattern2ScheduleFacts) -> ScheduleDecision {
body_local_env: Option<&LoopBodyLocalEnv>, let body_init_first = facts.has_condition_only_recipe
carrier_info: &CarrierInfo, || facts.has_body_local_derived_recipe
has_condition_only_recipe: bool, || facts.has_body_local_init
has_body_local_derived_recipe: bool, || facts.has_loop_local_carrier;
) -> ScheduleDecision {
let has_body_local_init = body_local_env.map(|env| !env.is_empty()).unwrap_or(false);
let has_loop_local_carrier = carrier_info
.carriers
.iter()
.any(|c| matches!(c.init, CarrierInit::LoopLocalZero));
let body_init_first = has_condition_only_recipe let reason = if facts.has_condition_only_recipe {
|| has_body_local_derived_recipe
|| has_body_local_init
|| has_loop_local_carrier;
let reason = if has_condition_only_recipe {
"ConditionOnly requires body-init before break" "ConditionOnly requires body-init before break"
} else if has_body_local_derived_recipe { } else if facts.has_body_local_derived_recipe {
"BodyLocalDerived requires body-init before break" "BodyLocalDerived requires body-init before break"
} else if has_body_local_init { } else if facts.has_body_local_init {
"body-local variables require init before break" "body-local variables require init before break"
} else if has_loop_local_carrier { } else if facts.has_loop_local_carrier {
"loop-local carrier requires init before break" "loop-local carrier requires init before break"
} else { } else {
"default schedule" "default schedule"
@ -144,10 +175,10 @@ pub(crate) fn decide_pattern2_schedule(
body_init_first, body_init_first,
reason, reason,
debug_ctx: ScheduleDebugContext { debug_ctx: ScheduleDebugContext {
has_body_local_init, has_body_local_init: facts.has_body_local_init,
has_loop_local_carrier, has_loop_local_carrier: facts.has_loop_local_carrier,
has_condition_only_recipe, has_condition_only_recipe: facts.has_condition_only_recipe,
has_body_local_derived_recipe, has_body_local_derived_recipe: facts.has_body_local_derived_recipe,
}, },
} }
} }
@ -263,7 +294,8 @@ mod tests {
#[test] #[test]
fn default_schedule_break_before_body_init() { fn default_schedule_break_before_body_init() {
let decision = decide_pattern2_schedule(None, &carrier_info(vec![]), false, false); let facts = Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![]), false, false, false);
let decision = decide_pattern2_schedule(&facts);
let schedule = build_pattern2_schedule_from_decision(&decision); let schedule = build_pattern2_schedule_from_decision(&decision);
assert_eq!( assert_eq!(
schedule.steps(), schedule.steps(),
@ -283,12 +315,14 @@ mod tests {
let mut body_env = LoopBodyLocalEnv::new(); let mut body_env = LoopBodyLocalEnv::new();
body_env.insert("tmp".to_string(), ValueId(5)); body_env.insert("tmp".to_string(), ValueId(5));
let decision = decide_pattern2_schedule( let facts = Pattern2ScheduleFactsBox::gather(
Some(&body_env), Some(&body_env),
&carrier_info(vec![carrier(false)]), &carrier_info(vec![carrier(false)]),
false, false,
false, false,
false,
); );
let decision = decide_pattern2_schedule(&facts);
let schedule = build_pattern2_schedule_from_decision(&decision); let schedule = build_pattern2_schedule_from_decision(&decision);
assert_eq!( assert_eq!(
schedule.steps(), schedule.steps(),
@ -305,12 +339,9 @@ mod tests {
#[test] #[test]
fn loop_local_carrier_triggers_body_first() { fn loop_local_carrier_triggers_body_first() {
let decision = decide_pattern2_schedule( let facts =
None, Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![carrier(true)]), false, false, false);
&carrier_info(vec![carrier(true)]), let decision = decide_pattern2_schedule(&facts);
false,
false,
);
let schedule = build_pattern2_schedule_from_decision(&decision); let schedule = build_pattern2_schedule_from_decision(&decision);
assert_eq!( assert_eq!(
schedule.steps(), schedule.steps(),
@ -329,7 +360,8 @@ mod tests {
#[test] #[test]
fn condition_only_recipe_triggers_body_first() { fn condition_only_recipe_triggers_body_first() {
// Empty body_local_env but has condition_only_recipe // Empty body_local_env but has condition_only_recipe
let decision = decide_pattern2_schedule(None, &carrier_info(vec![]), true, false); let facts = Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![]), true, false, false);
let decision = decide_pattern2_schedule(&facts);
let schedule = build_pattern2_schedule_from_decision(&decision); let schedule = build_pattern2_schedule_from_decision(&decision);
assert_eq!( assert_eq!(
schedule.steps(), schedule.steps(),
@ -344,6 +376,24 @@ mod tests {
assert_eq!(schedule.reason(), "ConditionOnly requires body-init before break"); assert_eq!(schedule.reason(), "ConditionOnly requires body-init before break");
} }
#[test]
fn allowed_body_local_deps_trigger_body_first_even_if_env_empty() {
let facts = Pattern2ScheduleFactsBox::gather(None, &carrier_info(vec![]), false, false, true);
let decision = decide_pattern2_schedule(&facts);
let schedule = build_pattern2_schedule_from_decision(&decision);
assert_eq!(
schedule.steps(),
&[
Pattern2StepKind::HeaderCond,
Pattern2StepKind::BodyInit,
Pattern2StepKind::BreakCheck,
Pattern2StepKind::Updates,
Pattern2StepKind::Tail
]
);
assert_eq!(schedule.reason(), "body-local variables require init before break");
}
#[test] #[test]
fn test_pattern3_if_sum_schedule() { fn test_pattern3_if_sum_schedule() {
let schedule = pattern3_if_sum_schedule(); let schedule = pattern3_if_sum_schedule();

View File

@ -674,9 +674,8 @@ mod tests {
let (_, decision) = result.unwrap(); let (_, decision) = result.unwrap();
assert!(decision.is_fail_fast()); assert!(decision.is_fail_fast());
assert!(decision.notes[0].contains( assert!(decision.notes[0].contains("Loop does not match"));
"does not match skip_whitespace, parse_number, continue, parse_string, or parse_array pattern" assert!(decision.notes[0].contains("skip_whitespace"));
));
} }
#[test] #[test]