diff --git a/apps/tests/phase103_if_only_early_return_min.hako b/apps/tests/phase103_if_only_early_return_min.hako index 14ca3245..01c14e3a 100644 --- a/apps/tests/phase103_if_only_early_return_min.hako +++ b/apps/tests/phase103_if_only_early_return_min.hako @@ -2,7 +2,7 @@ // Goal: one branch returns early (no join), other returns later. // Expect: two numeric lines "7" then "2". -box Main { +static box Main { g(flag) { if flag == 0 { return 7 @@ -12,9 +12,8 @@ box Main { } main() { - print(me.g(0)) - print(me.g(1)) + print(g(0)) + print(g(1)) return "OK" } } - diff --git a/apps/tests/phase103_if_only_merge_min.hako b/apps/tests/phase103_if_only_merge_min.hako index 2e9aa7b0..aadfc05b 100644 --- a/apps/tests/phase103_if_only_merge_min.hako +++ b/apps/tests/phase103_if_only_merge_min.hako @@ -2,7 +2,7 @@ // Goal: require merge (PHI-equivalent) in an if-only program (no loops), // and include one nested if to ensure nested merge stability. -box Main { +static box Main { main() { local x = 0 @@ -20,4 +20,3 @@ box Main { return "OK" } } - diff --git a/apps/tests/phase104_read_digits_loop_true_min.hako b/apps/tests/phase104_read_digits_loop_true_min.hako new file mode 100644 index 00000000..9c3fdb5d --- /dev/null +++ b/apps/tests/phase104_read_digits_loop_true_min.hako @@ -0,0 +1,35 @@ +// Phase 104: read_digits_from loop(true) + break-only minimal fixture +// Expect numeric output lines: 2 then 1 + +static box Main { + read_digits_min(s, pos) { + local i = pos + local out = "" + + loop(true) { + local ch = s.substring(i, i + 1) + + // end-of-string guard + if ch == "" { break } + + // digit check (match real-app shape; keep on one line to avoid ASI ambiguity) + if ch == "0" || ch == "1" || ch == "2" || ch == "3" || ch == "4" || ch == "5" || ch == "6" || ch == "7" || ch == "8" || ch == "9" { + out = out + ch + i = i + 1 + } else { + break + } + } + + return out + } + + main() { + local a = read_digits_min("12x", 0) + local b = read_digits_min("9", 0) + + print(a.length()) + print(b.length()) + return "OK" + } +} diff --git a/docs/development/current/main/01-JoinIR-Selfhost-INDEX.md b/docs/development/current/main/01-JoinIR-Selfhost-INDEX.md index 8fea0f20..b97b4c9f 100644 --- a/docs/development/current/main/01-JoinIR-Selfhost-INDEX.md +++ b/docs/development/current/main/01-JoinIR-Selfhost-INDEX.md @@ -61,8 +61,10 @@ JoinIR の箱構造と責務、ループ/if の lowering パターンを把握 - `docs/development/current/main/phases/phase-102/README.md` 14. Phase 103: if-only regression baseline(VM + LLVM EXE / plan) - `docs/development/current/main/phases/phase-103/README.md` +15. Phase 104: loop(true) break-only digits(VM + LLVM EXE) + - `docs/development/current/main/phases/phase-104/README.md` 6. MIR Builder(Context 分割の入口) - - `src/mir/builder/README.md` + - `src/mir/builder/README.md` 7. Scope/BindingId(shadowing・束縛同一性の段階移行) - `docs/development/current/main/phase73-scope-manager-design.md` - `docs/development/current/main/PHASE_74_SUMMARY.md` diff --git a/docs/development/current/main/10-Now.md b/docs/development/current/main/10-Now.md index 7232feb4..9f6408d6 100644 --- a/docs/development/current/main/10-Now.md +++ b/docs/development/current/main/10-Now.md @@ -1,5 +1,12 @@ # Self Current Task — Now (main) +## 2025-12-17:Phase 104 完了 ✅ + +**Phase 104: loop(true) + break-only digits(read_digits 系)** +- read_digits_from 形の `loop(true)` を Pattern2 で受理(loop var 抽出 + break cond 正規化) +- fixture: `apps/tests/phase104_read_digits_loop_true_min.hako`(expected: `2`, `1`) +- smoke: `tools/smokes/v2/profiles/integration/apps/phase104_read_digits_vm.sh` / `tools/smokes/v2/profiles/integration/apps/phase104_read_digits_llvm_exe.sh` + ## 2025-12-17:Phase 103 P0 完了 ✅ **Phase 103: if-only regression baseline** diff --git a/docs/development/current/main/phases/phase-104/README.md b/docs/development/current/main/phases/phase-104/README.md new file mode 100644 index 00000000..bc13368d --- /dev/null +++ b/docs/development/current/main/phases/phase-104/README.md @@ -0,0 +1,9 @@ +# Phase 104: loop(true) + break-only digits(read_digits 系) + +目的: `loop(true)` の break-only ループ(read_digits_from 形)を Pattern2 経路で VM/LLVM EXE parity 固定する。 +Fixture: `apps/tests/phase104_read_digits_loop_true_min.hako`(expected: `2`, `1`) +Smokes: `tools/smokes/v2/profiles/integration/apps/phase104_read_digits_vm.sh` / `tools/smokes/v2/profiles/integration/apps/phase104_read_digits_llvm_exe.sh` + +DONE: +- loop(true) counter 抽出: `LoopTrueCounterExtractorBox` +- break 条件(break when true)正規化 + digit set 固定: `ReadDigitsBreakConditionBox` diff --git a/src/mir/builder.rs b/src/mir/builder.rs index 7ad8682f..8b65c904 100644 --- a/src/mir/builder.rs +++ b/src/mir/builder.rs @@ -33,6 +33,8 @@ mod control_flow; // thin wrappers to centralize control-flow entrypoints // Phase 140-P4-A: Re-export for loop_canonicalizer SSOT (crate-wide visibility) pub(crate) use control_flow::{detect_skip_whitespace_pattern, SkipWhitespaceInfo}; +// Phase 104: Re-export read_digits(loop(true)) detection for loop_canonicalizer +pub(crate) use control_flow::{detect_read_digits_loop_true_pattern, ReadDigitsLoopTrueInfo}; // Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer pub(crate) use control_flow::{detect_continue_pattern, ContinuePatternInfo}; // Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer diff --git a/src/mir/builder/control_flow/joinir/mod.rs b/src/mir/builder/control_flow/joinir/mod.rs index dec67dec..050a9614 100644 --- a/src/mir/builder/control_flow/joinir/mod.rs +++ b/src/mir/builder/control_flow/joinir/mod.rs @@ -19,6 +19,9 @@ pub(in crate::mir::builder) mod trace; // Phase 140-P4-A: Re-export for loop_canonicalizer SSOT (crate-wide visibility) pub(crate) use patterns::{detect_skip_whitespace_pattern, SkipWhitespaceInfo}; +// Phase 104: Re-export read_digits(loop(true)) detection for loop_canonicalizer +pub(crate) use patterns::{detect_read_digits_loop_true_pattern, ReadDigitsLoopTrueInfo}; + // Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer pub(crate) use patterns::{detect_continue_pattern, ContinuePatternInfo}; diff --git a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs index d2b02dad..5a1ee6ef 100644 --- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs @@ -1043,6 +1043,115 @@ pub fn detect_skip_whitespace_pattern(body: &[ASTNode]) -> Option, +} + +/// Detect read_digits_from-like pattern in loop body (loop(true) expected at callsite) +/// +/// Recognized minimal shape (JsonCursorBox/MiniJsonLoader): +/// ```text +/// loop(true) { +/// local ch = s.substring(i, i+1) +/// if ch == "" { break } +/// if is_digit(ch) { out = out + ch; i = i + 1 } else { break } +/// } +/// ``` +/// +/// Contract (Phase 104 minimal): +/// - Last statement is `if ... { ... } else { break }` +/// - Then branch contains an update `i = i + 1` +/// - Then branch may contain other updates (e.g., `out = out + ch`) +pub fn detect_read_digits_loop_true_pattern(body: &[ASTNode]) -> Option { + if body.is_empty() { + return None; + } + + // Last statement must be if-else with break + let last_stmt = &body[body.len() - 1]; + let (then_body, else_body) = match last_stmt { + ASTNode::If { + then_body, + else_body: Some(else_body), + .. + } => (then_body, else_body), + _ => return None, + }; + + // Else branch must be single break + if else_body.len() != 1 || !matches!(else_body[0], ASTNode::Break { .. }) { + return None; + } + + // Then branch must include `i = i + 1` (allow other statements too) + let mut carrier_name: Option = None; + let mut delta: Option = None; + for stmt in then_body { + let (name, d) = match stmt { + ASTNode::Assignment { target, value, .. } => { + let target_name = match target.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => continue, + }; + match value.as_ref() { + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } => { + let left_name = match left.as_ref() { + ASTNode::Variable { name, .. } => name, + _ => continue, + }; + if left_name != &target_name { + continue; + } + let const_val = match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::Integer(n), + .. + } => *n, + _ => continue, + }; + (target_name, const_val) + } + _ => continue, + } + } + _ => continue, + }; + + // Phase 104 minimal: only accept +1 step + if d == 1 { + carrier_name = Some(name); + delta = Some(1); + break; + } + } + + let carrier_name = carrier_name?; + let delta = delta?; + + let body_stmts = body[..body.len() - 1].to_vec(); + Some(ReadDigitsLoopTrueInfo { + carrier_name, + delta, + body_stmts, + }) +} + // ============================================================================ // Phase 91 P5b (Escape Sequence Handling) Pattern // ============================================================================ diff --git a/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs new file mode 100644 index 00000000..06251405 --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs @@ -0,0 +1,211 @@ +//! LoopTrueCounterExtractorBox - loop(true) からの loop counter 抽出(Pattern2専用) +//! +//! Phase 104: read_digits_from 形(loop(true) + break-only)を Pattern2 で扱うため、 +//! `condition` 側に loop var が無いケースで body から loop counter(例: i)を一意に確定する。 +//! +//! SSOT/Fail-Fast: +//! - 目的は「曖昧な loop(true) を通さない」こと。 +//! - 1変数・+1 だけを許可し、取りこぼしは理由付き Err にする。 + +use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; +use crate::mir::ValueId; +use std::collections::BTreeMap; + +pub(crate) struct LoopTrueCounterExtractorBox; + +impl LoopTrueCounterExtractorBox { + pub(crate) fn is_loop_true(condition: &ASTNode) -> bool { + matches!( + condition, + ASTNode::Literal { + value: LiteralValue::Bool(true), + .. + } + ) + } + + /// Extract a unique loop counter variable from loop(true) body. + /// + /// Current supported shape (Phase 104 minimal): + /// - There exists an assignment `i = i + 1` somewhere in the body (including nested if). + /// - There exists a substring read using that counter: `s.substring(i, i + 1)` (same `i`). + /// + /// Fail-Fast (returns Err) when: + /// - No counter candidate found + /// - Multiple different candidates found + /// - Candidate not found in `variable_map` (loop-outer var required) + /// - Substring pattern not found (guards against accidental matches) + pub(crate) fn extract_loop_counter_from_body( + body: &[ASTNode], + variable_map: &BTreeMap, + ) -> Result<(String, ValueId), String> { + let mut candidates: Vec = Vec::new(); + + fn walk(node: &ASTNode, out: &mut Vec) { + match node { + ASTNode::Assignment { target, value, .. } => { + if let (Some(name), true) = ( + extract_var_name(target.as_ref()), + is_self_plus_const_one(value.as_ref(), target.as_ref()), + ) { + out.push(name); + } + } + ASTNode::If { + condition: _, + then_body, + else_body, + .. + } => { + for s in then_body { + walk(s, out); + } + if let Some(eb) = else_body { + for s in eb { + walk(s, out); + } + } + } + ASTNode::Loop { body, .. } => { + for s in body { + walk(s, out); + } + } + _ => {} + } + } + + fn extract_var_name(n: &ASTNode) -> Option { + match n { + ASTNode::Variable { name, .. } => Some(name.clone()), + _ => None, + } + } + + fn is_self_plus_const_one(value: &ASTNode, target: &ASTNode) -> bool { + let target_name = match extract_var_name(target) { + Some(n) => n, + None => return false, + }; + match value { + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } => { + let left_is_var = matches!(left.as_ref(), ASTNode::Variable { name, .. } if name == &target_name); + let right_is_one = matches!( + right.as_ref(), + ASTNode::Literal { + value: LiteralValue::Integer(1), + .. + } + ); + left_is_var && right_is_one + } + _ => false, + } + } + + for stmt in body { + walk(stmt, &mut candidates); + } + + candidates.sort(); + candidates.dedup(); + + let loop_var_name = match candidates.len() { + 0 => { + return Err( + "[phase104/loop-true-counter] Cannot find unique counter update `i = i + 1` in loop(true) body" + .to_string(), + ); + } + 1 => candidates[0].clone(), + _ => { + return Err(format!( + "[phase104/loop-true-counter] Multiple counter candidates found in loop(true) body: {:?}", + candidates + )); + } + }; + + let host_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| { + format!( + "[phase104/loop-true-counter] Counter '{}' not found in variable_map (loop-outer var required)", + loop_var_name + ) + })?; + + if !has_substring_read(body, &loop_var_name) { + return Err(format!( + "[phase104/loop-true-counter] Counter '{}' found, but missing substring pattern `s.substring({}, {} + 1)`", + loop_var_name, loop_var_name, loop_var_name + )); + } + + Ok((loop_var_name, host_id)) + } +} + +fn has_substring_read(body: &[ASTNode], counter: &str) -> bool { + fn walk(node: &ASTNode, counter: &str) -> bool { + match node { + ASTNode::Assignment { value, .. } => walk(value.as_ref(), counter), + ASTNode::Local { + initial_values, .. + } => initial_values + .iter() + .filter_map(|v| v.as_ref()) + .any(|v| walk(v.as_ref(), counter)), + ASTNode::MethodCall { + object: _, + method, + arguments, + .. + } => { + if method == "substring" && arguments.len() == 2 { + let a0 = &arguments[0]; + let a1 = &arguments[1]; + let a0_ok = matches!(a0, ASTNode::Variable { name, .. } if name == counter); + let a1_ok = matches!( + a1, + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } if matches!(left.as_ref(), ASTNode::Variable { name, .. } if name == counter) + && matches!(right.as_ref(), ASTNode::Literal { value: LiteralValue::Integer(1), .. }) + ); + if a0_ok && a1_ok { + return true; + } + } + // Search recursively in args + arguments.iter().any(|a| walk(a, counter)) + } + ASTNode::BinaryOp { left, right, .. } => walk(left.as_ref(), counter) || walk(right.as_ref(), counter), + ASTNode::If { + condition, + then_body, + else_body, + .. + } => { + walk(condition.as_ref(), counter) + || then_body.iter().any(|s| walk(s, counter)) + || else_body + .as_ref() + .map(|eb| eb.iter().any(|s| walk(s, counter))) + .unwrap_or(false) + } + ASTNode::Loop { body, condition, .. } => { + walk(condition.as_ref(), counter) || body.iter().any(|s| walk(s, counter)) + } + _ => false, + } + } + + body.iter().any(|s| walk(s, counter)) +} diff --git a/src/mir/builder/control_flow/joinir/patterns/mod.rs b/src/mir/builder/control_flow/joinir/patterns/mod.rs index f37cc31a..af2c16f8 100644 --- a/src/mir/builder/control_flow/joinir/patterns/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/mod.rs @@ -54,6 +54,8 @@ pub(in crate::mir::builder) mod policies; // Phase 93/94: Pattern routing polici pub(in crate::mir::builder) mod body_local_policy; // Phase 92 P3: promotion vs slot routing pub(in crate::mir::builder) mod escape_pattern_recognizer; // Phase 91 P5b pub(in crate::mir::builder) mod common_init; +pub(in crate::mir::builder) mod loop_true_counter_extractor; // Phase 104: loop(true) counter extraction for Pattern2 +pub(in crate::mir::builder) mod read_digits_break_condition_box; // Phase 104: break cond normalization for read_digits(loop(true)) pub(in crate::mir::builder) mod condition_env_builder; pub(in crate::mir::builder) mod conversion_pipeline; pub(in crate::mir::builder) mod exit_binding; @@ -79,6 +81,9 @@ pub(in crate::mir::builder) use router::{route_loop_pattern, LoopPatternContext} // Phase 140-P4-A: Re-export for loop_canonicalizer SSOT (crate-wide visibility) pub(crate) use ast_feature_extractor::{detect_skip_whitespace_pattern, SkipWhitespaceInfo}; +// Phase 104: Re-export read_digits(loop(true)) detection for loop_canonicalizer +pub(crate) use ast_feature_extractor::{detect_read_digits_loop_true_pattern, ReadDigitsLoopTrueInfo}; + // Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer pub(crate) use ast_feature_extractor::{detect_continue_pattern, ContinuePatternInfo}; diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs index 3b0f3f28..bf39777e 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2_with_break.rs @@ -496,16 +496,67 @@ fn prepare_pattern2_inputs( } } - // Break condition extraction + // Break condition extraction (SSOT + Phase 104 extension) // - // Use the analyzer SSOT to produce "break when is true" as an owned AST node: + // SSOT (BreakConditionAnalyzer): // - `if cond { break }` -> `cond` // - `if cond { ... } else { break }` -> `!cond` - use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer; - let break_condition_node = BreakConditionAnalyzer::extract_break_condition_node(body) - .map_err(|_| "[cf_loop/pattern2] Failed to extract break condition from loop body".to_string())?; + // + // Phase 104 (read_digits loop(true)): + // - multiple breaks exist; normalize as: + // `break_when_true := (ch == \"\") || !(is_digit(ch))` + let (break_condition_node, phase104_recipe_and_allowed) = { + use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox; + use crate::ast::{BinaryOperator, Span, UnaryOperator}; + use crate::mir::join_ir::lowering::common::condition_only_emitter::{BreakSemantics, ConditionOnlyRecipe}; + use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer; - Ok(Pattern2Inputs { + if LoopTrueCounterExtractorBox::is_loop_true(condition) + && super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some() + { + let (ch_var, eos_cond, digit_literals) = + super::read_digits_break_condition_box::ReadDigitsBreakConditionBox::extract_eos_and_digit_set(body) + .map_err(|e| format!("[cf_loop/pattern2] {}", e))?; + + // ConditionOnly derived slot: "is ch a digit?" (computed each iteration from body-local `ch`) + let is_digit_name = "__phase104_is_digit".to_string(); + let recipe = ConditionOnlyRecipe { + name: is_digit_name.clone(), + original_var: ch_var.clone(), + whitespace_chars: digit_literals, + break_semantics: BreakSemantics::WhenMatch, + }; + + let break_on_not_digit = ASTNode::UnaryOp { + operator: UnaryOperator::Not, + operand: Box::new(ASTNode::Variable { + name: is_digit_name.clone(), + span: Span::unknown(), + }), + span: Span::unknown(), + }; + + let break_when_true = ASTNode::BinaryOp { + operator: BinaryOperator::Or, + left: Box::new(eos_cond), + right: Box::new(break_on_not_digit), + span: Span::unknown(), + }; + + ( + break_when_true, + Some((ch_var, is_digit_name, recipe)), + ) + } else { + ( + BreakConditionAnalyzer::extract_break_condition_node(body) + .map_err(|_| "[cf_loop/pattern2] Failed to extract break condition from loop body".to_string())?, + None, + ) + } + }; + + let mut inputs = Pattern2Inputs { loop_var_name, loop_var_id, carrier_info, @@ -520,7 +571,20 @@ fn prepare_pattern2_inputs( break_condition_node, condition_only_recipe: None, // Phase 93 P0: Will be set by apply_trim_and_normalize body_local_derived_recipe: None, // Phase 94: Will be set after normalization - }) + }; + + // Phase 104: read_digits(loop(true)) wires derived is-digit slot + allow-list in one place. + if let Some((ch_var, is_digit_name, recipe)) = phase104_recipe_and_allowed { + use crate::mir::join_ir::lowering::common::body_local_slot::ReadOnlyBodyLocalSlotBox; + inputs.condition_only_recipe = Some(recipe); + inputs.allowed_body_locals_for_conditions = vec![ch_var.clone(), is_digit_name]; + inputs.read_only_body_local_slot = Some(ReadOnlyBodyLocalSlotBox::extract_single( + &[ch_var], + body, + )?); + } + + Ok(inputs) } fn promote_and_prepare_carriers( @@ -533,6 +597,7 @@ fn promote_and_prepare_carriers( ) -> Result<(), String> { use crate::mir::join_ir::lowering::digitpos_condition_normalizer::DigitPosConditionNormalizer; use crate::mir::loop_pattern_detection::loop_condition_scope::LoopConditionScopeBox; + use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox; let cond_scope = LoopConditionScopeBox::analyze( &inputs.loop_var_name, @@ -550,6 +615,11 @@ fn promote_and_prepare_carriers( .collect(); if cond_scope.has_loop_body_local() { + // Phase 104: read_digits(loop(true)) pre-wires slot/allow-list/recipe in prepare_pattern2_inputs. + // Do not run promotion heuristics here. + if !(LoopTrueCounterExtractorBox::is_loop_true(condition) + && super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some()) + { match classify_for_pattern2( builder, &inputs.loop_var_name, @@ -682,6 +752,7 @@ fn promote_and_prepare_carriers( } PolicyDecision::None => {} } + } } log.log( @@ -798,8 +869,13 @@ fn apply_trim_and_normalize( let log = Pattern2DebugLog::new(verbose); let mut alloc_join_value = || inputs.join_value_space.alloc_param(); - let effective_break_condition = if let Some(trim_result) = - super::trim_loop_lowering::TrimLoopLowerer::try_lower_trim_like_loop( + // Phase 104: read_digits(loop(true)) uses a digit-guard OR-chain which resembles + // "trim-like" patterns; do not route through TrimLoopLowerer. + let is_phase104_read_digits = super::loop_true_counter_extractor::LoopTrueCounterExtractorBox::is_loop_true(condition) + && super::ast_feature_extractor::detect_read_digits_loop_true_pattern(body).is_some(); + + let effective_break_condition = if !is_phase104_read_digits { + if let Some(trim_result) = super::trim_loop_lowering::TrimLoopLowerer::try_lower_trim_like_loop( builder, &inputs.scope, condition, @@ -834,6 +910,9 @@ fn apply_trim_and_normalize( ), ); trim_result.condition + } else { + inputs.break_condition_node.clone() + } } else { inputs.break_condition_node.clone() }; @@ -926,7 +1005,9 @@ fn collect_body_local_variables( /// - ConditionalStep updates are supported (if skeleton provides them) pub(crate) fn can_lower(builder: &MirBuilder, ctx: &super::router::LoopPatternContext) -> bool { use super::common_init::CommonPatternInitializer; + use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox; use crate::mir::loop_pattern_detection::LoopPatternKind; + let trace_enabled = trace::trace().is_joinir_enabled() || crate::config::env::joinir_dev_enabled(); // Basic pattern check if ctx.pattern_kind != LoopPatternKind::Pattern2Break { @@ -971,17 +1052,44 @@ pub(crate) fn can_lower(builder: &MirBuilder, ctx: &super::router::LoopPatternCo } // Phase 188/Refactor: Use common carrier update validation - // Extracts loop variable for dummy carrier creation (not used but required by API) - let loop_var_name = match builder.extract_loop_variable_from_condition(ctx.condition) { - Ok(name) => name, - Err(_) => return false, + // Phase 104: Support loop(true) by extracting the counter from the body. + let loop_var_name = if LoopTrueCounterExtractorBox::is_loop_true(ctx.condition) { + match LoopTrueCounterExtractorBox::extract_loop_counter_from_body( + ctx.body, + &builder.variable_ctx.variable_map, + ) { + Ok((name, _host_id)) => name, + Err(e) => { + if trace_enabled { + trace::trace().debug("pattern2/can_lower", &format!("reject loop(true): {}", e)); + } + return false; + } + } + } else { + match builder.extract_loop_variable_from_condition(ctx.condition) { + Ok(name) => name, + Err(e) => { + if trace_enabled { + trace::trace().debug("pattern2/can_lower", &format!("reject loop(cond): {}", e)); + } + return false; + } + } }; - CommonPatternInitializer::check_carrier_updates_allowed( + let ok = CommonPatternInitializer::check_carrier_updates_allowed( ctx.body, &loop_var_name, &builder.variable_ctx.variable_map, - ) + ); + if !ok && trace_enabled { + trace::trace().debug( + "pattern2/can_lower", + "reject: carrier updates contain complex RHS (UpdateRhs::Other)", + ); + } + ok } /// Phase 194: Lowering function for Pattern 2 diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern_pipeline.rs b/src/mir/builder/control_flow/joinir/patterns/pattern_pipeline.rs index c4434425..2acc8b01 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern_pipeline.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern_pipeline.rs @@ -45,6 +45,7 @@ use crate::mir::ValueId; use std::collections::{BTreeMap, BTreeSet}; // Phase 222.5-D: HashMap → BTreeMap for determinism use super::common_init::CommonPatternInitializer; +use super::loop_true_counter_extractor::LoopTrueCounterExtractorBox; use super::loop_scope_shape_builder::LoopScopeShapeBuilder; /// Phase 179-B: Unified Pattern Pipeline Context @@ -260,12 +261,26 @@ pub(crate) fn build_pattern_context( variant: PatternVariant, ) -> Result { // Step 1: Common initialization (all patterns) - let (loop_var_name, loop_var_id, carrier_info) = CommonPatternInitializer::initialize_pattern( - builder, - condition, - &builder.variable_ctx.variable_map, - None, // No exclusions for now (Pattern 2/4 will filter carriers later) - )?; + // + // Phase 104: Pattern2 now supports `loop(true)` by extracting the counter from the body. + // This path must be strict and conservative to avoid "accidental" routing. + let (loop_var_name, loop_var_id, carrier_info) = if variant == PatternVariant::Pattern2 + && LoopTrueCounterExtractorBox::is_loop_true(condition) + { + let (name, host_id) = LoopTrueCounterExtractorBox::extract_loop_counter_from_body( + body, + &builder.variable_ctx.variable_map, + )?; + let carrier_info = CarrierInfo::from_variable_map(name.clone(), &builder.variable_ctx.variable_map)?; + (name, host_id, carrier_info) + } else { + CommonPatternInitializer::initialize_pattern( + builder, + condition, + &builder.variable_ctx.variable_map, + None, // No exclusions for now (Pattern 2/4 will filter carriers later) + )? + }; // Step 2: Build LoopScopeShape let loop_scope = match variant { diff --git a/src/mir/builder/control_flow/joinir/patterns/read_digits_break_condition_box.rs b/src/mir/builder/control_flow/joinir/patterns/read_digits_break_condition_box.rs new file mode 100644 index 00000000..d989d28c --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/read_digits_break_condition_box.rs @@ -0,0 +1,179 @@ +//! ReadDigitsBreakConditionBox (Phase 104) +//! +//! Responsibility (analysis only): +//! - For `loop(true)` read_digits_from shape, extract: +//! - the EOS break condition (`if ch == "" { break }`) +//! - the digit literal set used in the final `if { ... } else { break }` + +use crate::ast::{ASTNode, BinaryOperator, LiteralValue}; + +pub(crate) struct ReadDigitsBreakConditionBox; + +impl ReadDigitsBreakConditionBox { + pub(crate) fn extract_eos_and_digit_set( + body: &[ASTNode], + ) -> Result<(String, ASTNode, Vec), String> { + if body.is_empty() { + return Err("[phase104/read-digits] empty loop body".to_string()); + } + + let (digit_cond, has_else_break) = match &body[body.len() - 1] { + ASTNode::If { + condition, + else_body: Some(else_body), + .. + } => ( + condition.as_ref().clone(), + else_body.len() == 1 && matches!(else_body[0], ASTNode::Break { .. }), + ), + _ => return Err("[phase104/read-digits] last statement is not if-else".to_string()), + }; + if !has_else_break { + return Err("[phase104/read-digits] last if does not have `else { break }`".to_string()); + } + + let (ch_var, eos_cond) = find_eos_break_condition(body).ok_or_else(|| { + "[phase104/read-digits] missing `if ch == \"\" { break }` guard".to_string() + })?; + + let mut digit_literals: Vec = Vec::new(); + let mut digit_var_name: Option = None; + collect_eq_string_literals(&digit_cond, &mut digit_literals, &mut digit_var_name)?; + + let digit_var_name = digit_var_name.ok_or_else(|| { + "[phase104/read-digits] digit condition does not reference a variable".to_string() + })?; + if digit_var_name != ch_var { + return Err(format!( + "[phase104/read-digits] digit condition var '{}' != eos var '{}'", + digit_var_name, ch_var + )); + } + + digit_literals.sort(); + digit_literals.dedup(); + + if digit_literals.is_empty() { + return Err("[phase104/read-digits] digit condition has no string literals".to_string()); + } + + // Phase 104 minimal: require the canonical digit set. + let expected: Vec = (0..=9).map(|d| d.to_string()).collect(); + if digit_literals != expected { + return Err(format!( + "[phase104/read-digits] digit condition literal set mismatch: got={:?}, expected={:?}", + digit_literals, expected + )); + } + + Ok((ch_var, eos_cond, digit_literals)) + } +} + +fn find_eos_break_condition(body: &[ASTNode]) -> Option<(String, ASTNode)> { + for stmt in body { + let (cond, then_body, else_body) = match stmt { + ASTNode::If { + condition, + then_body, + else_body, + .. + } => (condition, then_body, else_body), + _ => continue, + }; + + if else_body.is_some() { + continue; + } + if then_body.len() != 1 || !matches!(then_body[0], ASTNode::Break { .. }) { + continue; + } + if let Some(var_name) = ch_eq_empty_var(cond.as_ref()) { + return Some((var_name, cond.as_ref().clone())); + } + } + None +} + +fn ch_eq_empty_var(cond: &ASTNode) -> Option { + match cond { + ASTNode::BinaryOp { + operator: BinaryOperator::Equal, + left, + right, + .. + } => { + let name = match left.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return None, + }; + match right.as_ref() { + ASTNode::Literal { + value: LiteralValue::String(s), + .. + } if s.is_empty() => Some(name), + _ => None, + } + } + _ => None, + } +} + +fn collect_eq_string_literals( + cond: &ASTNode, + out: &mut Vec, + var_name: &mut Option, +) -> Result<(), String> { + match cond { + ASTNode::BinaryOp { + operator: BinaryOperator::Or, + left, + right, + .. + } => { + collect_eq_string_literals(left.as_ref(), out, var_name)?; + collect_eq_string_literals(right.as_ref(), out, var_name)?; + Ok(()) + } + ASTNode::BinaryOp { + operator: BinaryOperator::Equal, + left, + right, + .. + } => { + let (name, lit) = match (left.as_ref(), right.as_ref()) { + (ASTNode::Variable { name, .. }, ASTNode::Literal { value: LiteralValue::String(s), .. }) => { + (name.clone(), s.clone()) + } + (ASTNode::Literal { value: LiteralValue::String(s), .. }, ASTNode::Variable { name, .. }) => { + (name.clone(), s.clone()) + } + _ => { + return Err("[phase104/read-digits] digit condition must be OR of `ch == \"d\"`".to_string()); + } + }; + + if lit.len() != 1 || !lit.chars().all(|c| c.is_ascii_digit()) { + return Err(format!( + "[phase104/read-digits] non-digit literal in digit condition: {:?}", + lit + )); + } + + match var_name.as_deref() { + None => *var_name = Some(name), + Some(prev) if prev == name => {} + Some(prev) => { + return Err(format!( + "[phase104/read-digits] mixed variable names in digit condition: '{}' vs '{}'", + prev, name + )); + } + } + + out.push(lit); + Ok(()) + } + _ => Err("[phase104/read-digits] digit condition must be OR-chain of equality checks".to_string()), + } +} diff --git a/src/mir/builder/control_flow/mod.rs b/src/mir/builder/control_flow/mod.rs index d89d8652..cb00a27c 100644 --- a/src/mir/builder/control_flow/mod.rs +++ b/src/mir/builder/control_flow/mod.rs @@ -57,6 +57,9 @@ pub(in crate::mir::builder) mod utils; // Phase 140-P4-A: Re-export for loop_canonicalizer SSOT (crate-wide visibility) pub(crate) use joinir::{detect_skip_whitespace_pattern, SkipWhitespaceInfo}; +// Phase 104: Re-export read_digits(loop(true)) detection for loop_canonicalizer +pub(crate) use joinir::{detect_read_digits_loop_true_pattern, ReadDigitsLoopTrueInfo}; + // Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer pub(crate) use joinir::{detect_continue_pattern, ContinuePatternInfo}; diff --git a/src/mir/loop_canonicalizer/canonicalizer.rs b/src/mir/loop_canonicalizer/canonicalizer.rs index dce6392c..af4c23b9 100644 --- a/src/mir/loop_canonicalizer/canonicalizer.rs +++ b/src/mir/loop_canonicalizer/canonicalizer.rs @@ -9,7 +9,8 @@ use crate::mir::loop_pattern_detection::LoopPatternKind; use super::capability_guard::{CapabilityTag, RoutingDecision}; use super::pattern_recognizer::{ try_extract_continue_pattern, try_extract_escape_skip_pattern, try_extract_parse_number_pattern, - try_extract_parse_string_pattern, try_extract_skip_whitespace_pattern, + try_extract_parse_string_pattern, try_extract_read_digits_loop_true_pattern, + try_extract_skip_whitespace_pattern, }; use super::skeleton_types::{ CarrierRole, CarrierSlot, ExitContract, LoopSkeleton, SkeletonStep, UpdateKind, @@ -203,6 +204,53 @@ pub fn canonicalize_loop_expr( return Ok((skeleton, decision)); } + // Phase 104: loop(true) + break-only digits (read_digits_from) + // + // Shape (JsonCursorBox.read_digits_from / MiniJsonLoader.read_digits_from): + // - loop(true) + // - last statement is `if is_digit { ... i = i + 1 } else { break }` + // - may have `if ch == "" { break }` and substring read before it + if matches!( + condition, + ASTNode::Literal { + value: crate::ast::LiteralValue::Bool(true), + .. + } + ) { + if let Some((carrier_name, delta, body_stmts)) = try_extract_read_digits_loop_true_pattern(body) { + let mut skeleton = LoopSkeleton::new(span); + + skeleton.steps.push(SkeletonStep::HeaderCond { + expr: Box::new(condition.clone()), + }); + + if !body_stmts.is_empty() { + skeleton.steps.push(SkeletonStep::Body { stmts: body_stmts }); + } + + skeleton.steps.push(SkeletonStep::Update { + carrier_name: carrier_name.clone(), + update_kind: UpdateKind::ConstStep { delta }, + }); + + skeleton.carriers.push(CarrierSlot { + name: carrier_name, + role: CarrierRole::Counter, + update_kind: UpdateKind::ConstStep { delta }, + }); + + skeleton.exits = ExitContract { + has_break: true, + has_continue: false, + has_return: false, + break_has_value: false, + }; + + let decision = RoutingDecision::success(LoopPatternKind::Pattern2Break); + return Ok((skeleton, decision)); + } + } + // Phase 143-P0: Try to extract parse_number pattern (break in THEN clause) if let Some((carrier_name, delta, body_stmts, rest_stmts)) = try_extract_parse_number_pattern(body) @@ -378,7 +426,7 @@ pub fn canonicalize_loop_expr( LoopSkeleton::new(span), RoutingDecision::fail_fast( vec![CapabilityTag::ConstStep], - "Phase 143-P2: Loop does not match skip_whitespace, parse_number, continue, parse_string, or parse_array pattern" + "Phase 143-P2: Loop does not match read_digits(loop(true)), skip_whitespace, parse_number, continue, parse_string, or parse_array pattern" .to_string(), ), )) diff --git a/src/mir/loop_canonicalizer/pattern_recognizer.rs b/src/mir/loop_canonicalizer/pattern_recognizer.rs index 8bc214d1..e1c77933 100644 --- a/src/mir/loop_canonicalizer/pattern_recognizer.rs +++ b/src/mir/loop_canonicalizer/pattern_recognizer.rs @@ -8,6 +8,7 @@ use crate::mir::detect_continue_pattern; use crate::mir::detect_parse_number_pattern as ast_detect_parse_number; use crate::mir::detect_parse_string_pattern as ast_detect_parse_string; use crate::mir::detect_skip_whitespace_pattern as ast_detect; +use crate::mir::detect_read_digits_loop_true_pattern as ast_detect_read_digits; use crate::mir::detect_escape_skip_pattern as ast_detect_escape; // ============================================================================ @@ -40,6 +41,19 @@ pub fn try_extract_skip_whitespace_pattern( ast_detect(body).map(|info| (info.carrier_name, info.delta, info.body_stmts)) } +// ============================================================================ +// Phase 104: Read Digits loop(true) Pattern +// ============================================================================ + +/// Try to extract read_digits_from-like pattern from loop(true) body. +/// +/// Returns (carrier_name, delta, body_stmts) if pattern matches. +pub fn try_extract_read_digits_loop_true_pattern( + body: &[ASTNode], +) -> Option<(String, i64, Vec)> { + ast_detect_read_digits(body).map(|info| (info.carrier_name, info.delta, info.body_stmts)) +} + // ============================================================================ // Parse Number Pattern (Phase 143-P0) // ============================================================================ diff --git a/src/mir/mod.rs b/src/mir/mod.rs index ef9323bf..a7e29b89 100644 --- a/src/mir/mod.rs +++ b/src/mir/mod.rs @@ -58,6 +58,8 @@ pub use builder::MirBuilder; // Phase 140-P4-A: Re-export for loop_canonicalizer SSOT pub(crate) use builder::{detect_skip_whitespace_pattern, SkipWhitespaceInfo}; +// Phase 104: Re-export read_digits(loop(true)) detection for loop_canonicalizer +pub(crate) use builder::{detect_read_digits_loop_true_pattern, ReadDigitsLoopTrueInfo}; // Phase 142-P1: Re-export continue pattern detection for loop_canonicalizer pub(crate) use builder::{detect_continue_pattern, ContinuePatternInfo}; // Phase 143-P0: Re-export parse_number pattern detection for loop_canonicalizer diff --git a/tools/smokes/v2/profiles/integration/apps/phase104_read_digits_llvm_exe.sh b/tools/smokes/v2/profiles/integration/apps/phase104_read_digits_llvm_exe.sh new file mode 100644 index 00000000..7757e615 --- /dev/null +++ b/tools/smokes/v2/profiles/integration/apps/phase104_read_digits_llvm_exe.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Phase 104: read_digits loop(true) + break-only (LLVM EXE parity) + +source "$(dirname "$0")/../../../lib/test_runner.sh" +export SMOKES_USE_PYVM=0 +require_env || exit 2 + +# LLVM availability checks (graceful SKIP) +if ! command -v llvm-config-18 &> /dev/null; then + test_skip "llvm-config-18 not found"; exit 0 +fi + +if ! "$NYASH_BIN" --help 2>&1 | grep -q "llvm"; then + test_skip "hakorune --backend llvm not available"; exit 0 +fi + +if ! python3 -c "import llvmlite" 2>/dev/null; then + test_skip "Python llvmlite not found"; exit 0 +fi + +# Phase 97/98/100 SSOT: plugin dlopen check → build only if needed → dlopen recheck. +FILEBOX_SO="$NYASH_ROOT/plugins/nyash-filebox-plugin/libnyash_filebox_plugin.so" +MAPBOX_SO="$NYASH_ROOT/plugins/nyash-map-plugin/libnyash_map_plugin.so" +STRINGBOX_SO="$NYASH_ROOT/plugins/nyash-string-plugin/libnyash_string_plugin.so" +CONSOLEBOX_SO="$NYASH_ROOT/plugins/nyash-console-plugin/libnyash_console_plugin.so" +INTEGERBOX_SO="$NYASH_ROOT/plugins/nyash-integer-plugin/libnyash_integer_plugin.so" + +check_plugins() { + python3 - "$FILEBOX_SO" "$MAPBOX_SO" "$STRINGBOX_SO" "$CONSOLEBOX_SO" "$INTEGERBOX_SO" <<'PY' +import ctypes +import os +import sys +names = ["FileBox", "MapBox", "StringBox", "ConsoleBox", "IntegerBox"] +paths = sys.argv[1:] +failures = [] +for name, path in zip(names, paths): + if not os.path.isfile(path): + failures.append(f"[plugin/missing] {name}: {path}") + continue + try: + ctypes.CDLL(path) + except Exception as e: # noqa: BLE001 + failures.append(f"[plugin/dlopen] {name}: {path} ({e})") +if failures: + print("\n".join(failures)) + sys.exit(1) +print("OK") +PY +} + +echo "[INFO] Checking plugin artifacts (FileBox/MapBox/StringBox/ConsoleBox/IntegerBox)" +if ! CHECK_OUTPUT=$(check_plugins 2>&1); then + echo "$CHECK_OUTPUT" + echo "[INFO] Missing/broken plugin detected, running build-all (core plugins)" + BUILD_LOG="/tmp/phase104_read_digits_plugin_build.log" + if ! bash "$NYASH_ROOT/tools/plugins/build-all.sh" \ + nyash-filebox-plugin nyash-map-plugin nyash-string-plugin nyash-console-plugin nyash-integer-plugin \ + >"$BUILD_LOG" 2>&1; then + echo "[FAIL] tools/plugins/build-all.sh failed for core plugins" + tail -n 80 "$BUILD_LOG" + exit 1 + fi + if ! CHECK_OUTPUT=$(check_plugins 2>&1); then + echo "$CHECK_OUTPUT" + echo "[FAIL] Plugin artifacts still missing or unloadable after build-all" + tail -n 80 "$BUILD_LOG" + exit 1 + fi +fi + +mkdir -p "$NYASH_ROOT/tmp" + +INPUT_HAKO="$NYASH_ROOT/apps/tests/phase104_read_digits_loop_true_min.hako" +OUTPUT_EXE="$NYASH_ROOT/tmp/phase104_read_digits_llvm_exe" + +echo "[INFO] Building: $INPUT_HAKO → $OUTPUT_EXE" + +BUILD_LOG="/tmp/phase104_read_digits_build.log" +if ! env NYASH_DISABLE_PLUGINS=0 "$NYASH_ROOT/tools/build_llvm.sh" "$INPUT_HAKO" -o "$OUTPUT_EXE" 2>&1 | tee "$BUILD_LOG"; then + echo "[FAIL] build_llvm.sh failed" + tail -n 80 "$BUILD_LOG" + exit 1 +fi + +if [ ! -x "$OUTPUT_EXE" ]; then + echo "[FAIL] Executable not created or not executable: $OUTPUT_EXE" + ls -la "$OUTPUT_EXE" 2>/dev/null || echo "File does not exist" + exit 1 +fi + +echo "[INFO] Executing: $OUTPUT_EXE" + +set +e +OUTPUT=$(timeout "${RUN_TIMEOUT_SECS:-10}" env NYASH_DISABLE_PLUGINS=0 "$OUTPUT_EXE" 2>&1) +EXIT_CODE=$? +set -e + +if [ "$EXIT_CODE" -ne 0 ]; then + echo "[FAIL] Execution failed with exit code $EXIT_CODE" + echo "$OUTPUT" | tail -n 80 + exit 1 +fi + +EXPECTED=$'2\n1' +CLEAN=$(printf "%s\n" "$OUTPUT" | grep -v '^\[' | grep -E '^-?[0-9]+$' | head -n 2 | paste -sd '\n' - | tr -d '\r') + +echo "[INFO] CLEAN output:" +echo "$CLEAN" + +if [ "$CLEAN" = "$EXPECTED" ]; then + test_pass "phase104_read_digits_llvm_exe: output matches expected (2, 1)" +else + echo "[FAIL] Output mismatch" + echo "[INFO] Raw output (tail):" + echo "$OUTPUT" | tail -n 80 + echo "[INFO] Expected:" + printf "%s\n" "$EXPECTED" + exit 1 +fi + diff --git a/tools/smokes/v2/profiles/integration/apps/phase104_read_digits_vm.sh b/tools/smokes/v2/profiles/integration/apps/phase104_read_digits_vm.sh new file mode 100644 index 00000000..94e3d5b1 --- /dev/null +++ b/tools/smokes/v2/profiles/integration/apps/phase104_read_digits_vm.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Phase 104: read_digits loop(true) + break-only (VM) + +source "$(dirname "$0")/../../../lib/test_runner.sh" +export SMOKES_USE_PYVM=0 +require_env || exit 2 + +PASS_COUNT=0 +FAIL_COUNT=0 +RUN_TIMEOUT_SECS=${RUN_TIMEOUT_SECS:-10} + +INPUT="$NYASH_ROOT/apps/tests/phase104_read_digits_loop_true_min.hako" + +echo "[INFO] Phase 104: read_digits loop(true) break-only (VM) - $INPUT" + +set +e +OUTPUT=$(timeout "$RUN_TIMEOUT_SECS" env \ + NYASH_DISABLE_PLUGINS=0 \ + HAKO_JOINIR_STRICT=1 \ + "$NYASH_BIN" --backend vm "$INPUT" 2>&1) +EXIT_CODE=$? +set -e + +if [ "$EXIT_CODE" -eq 124 ]; then + echo "[FAIL] hakorune timed out (>${RUN_TIMEOUT_SECS}s)" + FAIL_COUNT=$((FAIL_COUNT + 1)) +elif [ "$EXIT_CODE" -eq 0 ]; then + EXPECTED=$'2\n1' + CLEAN=$(printf "%s\n" "$OUTPUT" | grep -E '^-?[0-9]+$' | head -n 2 | paste -sd '\n' - | tr -d '\r') + if [ "$CLEAN" = "$EXPECTED" ]; then + echo "[PASS] Output verified: 2 then 1" + PASS_COUNT=$((PASS_COUNT + 1)) + else + echo "[FAIL] Unexpected output (expected lines: 2 then 1)" + echo "[INFO] output (tail):" + echo "$OUTPUT" | tail -n 60 || true + FAIL_COUNT=$((FAIL_COUNT + 1)) + fi +else + echo "[FAIL] hakorune failed with exit code $EXIT_CODE" + echo "[INFO] output (tail):" + echo "$OUTPUT" | tail -n 60 || true + FAIL_COUNT=$((FAIL_COUNT + 1)) +fi + +echo "[INFO] PASS: $PASS_COUNT, FAIL: $FAIL_COUNT" + +if [ "$FAIL_COUNT" -eq 0 ]; then + test_pass "phase104_read_digits_vm: All tests passed" + exit 0 +else + test_fail "phase104_read_digits_vm: $FAIL_COUNT test(s) failed" + exit 1 +fi