fix(joinir): Phase 96 next_non_ws break condition SSOT

This commit is contained in:
nyash-codex
2025-12-17 01:59:21 +09:00
parent db4453eb3c
commit bc1a09f2c3
6 changed files with 106 additions and 40 deletions

View File

@ -1,3 +1,3 @@
- Phase 96: MiniJsonLoader の next_non_ws ループを Trim policy/SSOT で固定。
- フィクスチャ: apps/tests/phase96_json_loader_next_non_ws_min.hako現状は 0/0 出力で最小固定
- フィクスチャ: apps/tests/phase96_json_loader_next_non_ws_min.hako期待: `2``-1` を出力
- smoke: tools/smokes/v2/profiles/integration/apps/phase96_json_loader_next_non_ws_vm.shVM, strict、出力一致チェック

View File

@ -291,7 +291,7 @@ pub(crate) fn has_break_in_else_clause(body: &[ASTNode]) -> bool {
///
/// Searches for the first break pattern in an if statement:
/// - `if <condition> { break }` - returns <condition>
/// - `if <condition> { ... } else { break }` - returns `!<condition>` (negated)
/// - `if <condition> { ... } else { break }` - returns `<condition>` (caller must negate)
///
/// This is used to delegate break condition lowering to `condition_to_joinir`.
///
@ -301,7 +301,7 @@ pub(crate) fn has_break_in_else_clause(body: &[ASTNode]) -> bool {
///
/// # Returns
///
/// `Some(&ASTNode)` - The condition AST node (negated for else-break pattern)
/// `Some(&ASTNode)` - The condition AST node (not negated)
/// `None` - No break statement found or break is not in a simple if statement
///
/// # Examples
@ -316,9 +316,13 @@ pub(crate) fn has_break_in_else_clause(body: &[ASTNode]) -> bool {
/// // Pattern 2: if condition { ... } else { break }
/// loop(start < end) {
/// if ch == " " { start = start + 1 } else { break }
/// // <- Returns the "!(ch == " ")" condition (negated)
/// // <- Returns the "(ch == " ")" condition (caller must negate)
/// }
/// ```
///
/// If you need a normalized break condition as an owned AST node
/// ("break when <cond> is true"), use
/// `BreakConditionAnalyzer::extract_break_condition_node`.
pub(crate) fn extract_break_condition(body: &[ASTNode]) -> Option<&ASTNode> {
BreakConditionAnalyzer::extract_break_condition(body).ok()
}

View File

@ -229,23 +229,13 @@ fn prepare_pattern2_inputs(
}
// Break condition extraction
use super::ast_feature_extractor;
let break_condition_raw =
ast_feature_extractor::extract_break_condition(body).ok_or_else(|| {
"[cf_loop/pattern2] Failed to extract break condition from loop body".to_string()
})?;
let break_in_else = ast_feature_extractor::has_break_in_else_clause(body);
use crate::ast::UnaryOperator;
let break_condition_node = if break_in_else {
let span = crate::ast::Span::unknown();
ASTNode::UnaryOp {
operator: UnaryOperator::Not,
operand: Box::new(break_condition_raw.clone()),
span,
}
} else {
break_condition_raw.clone()
};
//
// Use the analyzer SSOT to produce "break when <cond> is true" as an owned AST node:
// - `if cond { break }` -> `cond`
// - `if cond { ... } else { break }` -> `!cond`
use crate::mir::loop_pattern_detection::break_condition_analyzer::BreakConditionAnalyzer;
let break_condition_node = BreakConditionAnalyzer::extract_break_condition_node(body)
.map_err(|_| "[cf_loop/pattern2] Failed to extract break condition from loop body".to_string())?;
Ok(Pattern2Inputs {
loop_var_name,

View File

@ -40,6 +40,7 @@ use crate::ast::ASTNode;
use crate::mir::builder::MirBuilder;
use crate::mir::join_ir::lowering::carrier_info::CarrierInfo;
use crate::mir::join_ir::lowering::condition_env::ConditionBinding;
use crate::mir::join_ir::lowering::common::condition_only_emitter::BreakSemantics;
use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape;
use crate::mir::loop_pattern_detection::loop_body_carrier_promoter::{
LoopBodyCarrierPromoter, PromotionRequest, PromotionResult,
@ -308,9 +309,19 @@ impl TrimLoopLowerer {
verbose,
);
// Step 6: Setup ConditionEnv bindings FIRST to determine if ConditionOnly
// Step 6: Setup ConditionEnv bindings FIRST to determine break semantics.
//
// IMPORTANT: derive semantics from the already-normalized `break_cond`
// (Pattern2 extracts "break when <cond> is true"), not from the raw body
// `if/else` structure which may be rewritten during earlier analyses.
let break_semantics = Self::infer_break_semantics_from_break_cond(break_cond);
let (condition_bindings, condition_only_recipe) =
Self::setup_condition_env_bindings(builder, trim_helper, alloc_join_value)?;
Self::setup_condition_env_bindings(
builder,
trim_helper,
break_semantics,
alloc_join_value,
)?;
trace.emit_if(
"trim",
@ -506,6 +517,7 @@ impl TrimLoopLowerer {
fn setup_condition_env_bindings(
builder: &mut MirBuilder,
trim_helper: &crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper,
break_semantics: BreakSemantics,
alloc_join_value: &mut dyn FnMut() -> ValueId,
) -> Result<(Vec<ConditionBinding>, Option<crate::mir::join_ir::lowering::common::condition_only_emitter::ConditionOnlyRecipe>), String> {
use crate::mir::builder::control_flow::joinir::patterns::trim_pattern_lowerer::TrimPatternLowerer;
@ -514,8 +526,11 @@ impl TrimLoopLowerer {
let verbose = crate::config::env::joinir_dev_enabled() || trace.is_joinir_enabled();
// Phase 93 P0: Do NOT add is_ch_match to ConditionBinding
// Phase 93 Refactoring: Use explicit factory method for ConditionOnly pattern
let recipe = ConditionOnlyRecipe::from_trim_helper_condition_only(trim_helper);
// Phase 93 Refactoring: Use explicit factory method based on loop shape.
let recipe = match break_semantics {
BreakSemantics::WhenMatch => ConditionOnlyRecipe::from_trim_helper_condition_only(trim_helper),
BreakSemantics::WhenNotMatch => ConditionOnlyRecipe::from_trim_helper_normal_trim(trim_helper),
};
trace.emit_if(
"trim",
@ -531,6 +546,23 @@ impl TrimLoopLowerer {
// Return empty bindings - the derived slot will be recalculated, not bound
Ok((Vec::new(), Some(recipe)))
}
fn infer_break_semantics_from_break_cond(break_cond: &ASTNode) -> BreakSemantics {
// Pattern2 passes `break_cond` as "break when <cond> is true".
//
// - find-first (ConditionOnly): break when match is true -> `is_match`
// - trim/skip-whitespace: break when match is false -> `!is_ws`
//
// So: a top-level `!` means "break on non-match".
match break_cond {
ASTNode::UnaryOp { operator, .. }
if matches!(operator, crate::ast::UnaryOperator::Not) =>
{
BreakSemantics::WhenNotMatch
}
_ => BreakSemantics::WhenMatch,
}
}
}
#[cfg(test)]

View File

@ -63,19 +63,13 @@ impl BreakConditionAnalyzer {
} = stmt
{
// Pattern 1: Check if the then_body contains a break statement
if then_body
.iter()
.any(|node| matches!(node, ASTNode::Break { .. }))
{
if Self::has_break_in_stmts(then_body) {
return Ok(condition.as_ref());
}
// Pattern 2: Check if the else_body contains a break statement
if let Some(else_stmts) = else_body {
if else_stmts
.iter()
.any(|node| matches!(node, ASTNode::Break { .. }))
{
if Self::has_break_in_stmts(else_stmts) {
// For else-break pattern, return the condition
// Note: Caller must negate this condition
return Ok(condition.as_ref());
@ -86,6 +80,36 @@ impl BreakConditionAnalyzer {
Err("No if-else-break pattern found".to_string())
}
/// Extract a break condition as an owned AST node suitable for lowering.
///
/// This returns the condition in the "break when <cond> is true" form:
/// - `if cond { break }` -> `cond`
/// - `if cond { ... } else { break }` -> `!cond`
///
/// Use this when the caller needs a normalized break condition without separately
/// re-checking whether the break was in then/else.
pub fn extract_break_condition_node(body: &[ASTNode]) -> Result<ASTNode, String> {
for stmt in body {
if let ASTNode::If {
condition,
then_body,
else_body,
..
} = stmt
{
if Self::has_break_in_stmts(then_body) {
return Ok(condition.as_ref().clone());
}
if let Some(else_stmts) = else_body {
if Self::has_break_in_stmts(else_stmts) {
return Ok(Self::negate_condition(condition.as_ref()));
}
}
}
}
Err("No if-else-break pattern found".to_string())
}
/// Check if break exists in else clause
///
/// Helper function to determine if a break statement is in the else clause
@ -198,11 +222,27 @@ impl BreakConditionAnalyzer {
}
}
// Helper: Check if statements contain break
// Helper: Check if statements contain break (recursive)
fn has_break_in_stmts(stmts: &[ASTNode]) -> bool {
stmts
.iter()
.any(|stmt| matches!(stmt, ASTNode::Break { .. }))
stmts.iter().any(Self::has_break_node)
}
fn has_break_node(node: &ASTNode) -> bool {
match node {
ASTNode::Break { .. } => true,
ASTNode::If {
then_body,
else_body,
..
} => {
then_body.iter().any(Self::has_break_node)
|| else_body
.as_ref()
.map_or(false, |e| e.iter().any(Self::has_break_node))
}
ASTNode::Loop { body, .. } => body.iter().any(Self::has_break_node),
_ => false,
}
}
// Helper: Recursively collect variables

View File

@ -27,13 +27,13 @@ if [ "$EXIT_CODE" -eq 124 ]; then
echo "[FAIL] hakorune timed out (>${RUN_TIMEOUT_SECS}s)"
FAIL_COUNT=$((FAIL_COUNT + 1))
elif [ "$EXIT_CODE" -eq 0 ]; then
EXPECTED=$'0\n0'
EXPECTED=$'2\n-1'
CLEAN=$(printf "%s\n" "$OUTPUT" | grep -E '^-?[0-9]+$' | head -n 2 | paste -sd '\n' - | tr -d '\r')
if [ "$CLEAN" = "$EXPECTED" ]; then
echo "[PASS] Output verified: 0 then 0"
echo "[PASS] Output verified: 2 then -1"
PASS_COUNT=$((PASS_COUNT + 1))
else
echo "[FAIL] Unexpected output (expected lines: 0 then 0)"
echo "[FAIL] Unexpected output (expected lines: 2 then -1)"
echo "[INFO] output (tail):"
echo "$OUTPUT" | tail -n 50 || true
FAIL_COUNT=$((FAIL_COUNT + 1))