Files
hakorune/src/mir/builder/control_flow/joinir/routing.rs
nyash-codex 1742f0412e feat(normalized): Phase 141 P1.5 - external env inputs + KnownIntrinsic SSOT
## Task B: External env input bug fix (Priority 1)

Fix: Suffix normalization couldn't access prefix-built local variables

**Problem**: `s.length()` failed because 's' (from prefix `s = "abc"`) was
not in available_inputs during suffix normalization.

**Root cause**: `AvailableInputsCollectorBox::collect()` only collected
function params and CapturedEnv, missing `builder.variable_map`.

**Solution**: Add `prefix_variables` parameter with 3-source merge:
1. Function params (highest priority)
2. Prefix variables (medium priority - NEW)
3. CapturedEnv (lowest priority)

**Changed files**:
- src/mir/control_tree/normalized_shadow/available_inputs_collector.rs
- src/mir/builder/control_flow/normalization/execute_box.rs
- src/mir/builder/control_flow/joinir/patterns/policies/normalized_shadow_suffix_router_box.rs
- src/mir/builder/control_flow/joinir/routing.rs
- src/mir/builder/stmts.rs
- src/mir/control_tree/normalized_shadow/dev_pipeline.rs
- docs/development/current/main/design/normalized-expr-lowering.md (Available Inputs SSOT section)

**Tests**: 3 new unit tests (prefix merge, priority order)

## Task A: KnownIntrinsic SSOT化 (Priority 2)

Eliminate string literal scattered matching by centralizing to registry.

**Problem**: Adding new intrinsics required editing if/match chains with
hard-coded string literals (`if method == KnownIntrinsic::Length0.method_name()`).

**Solution**: Create `KnownIntrinsicRegistryBox` as SSOT:
- `lookup(method, arity) -> Option<KnownIntrinsic>`
- `get_spec(intrinsic) -> KnownIntrinsicSpec`
- Adding new intrinsics now requires: (1) enum variant, (2) registry entry only

**Changed files**:
- src/mir/control_tree/normalized_shadow/common/known_intrinsics.rs (NEW)
- src/mir/control_tree/normalized_shadow/common/expr_lowerer_box.rs
- src/mir/control_tree/normalized_shadow/common/expr_lowering_contract.rs (deprecated methods removed)
- src/mir/control_tree/normalized_shadow/common/mod.rs
- docs/development/current/main/design/normalized-expr-lowering.md (Known Intrinsic SSOT section)

**Impact**: ~30% code reduction in intrinsic matching logic

## Task C: Better diagnostics (Priority 3)

Add `OutOfScopeReason::IntrinsicNotWhitelisted` for precise diagnostics.

**Changed files**:
- src/mir/control_tree/normalized_shadow/common/expr_lowering_contract.rs (enum variant)
- src/mir/control_tree/normalized_shadow/common/expr_lowerer_box.rs (diagnostic logic)

## Verification

 Build: `cargo build --release` - PASS
 Phase 97 regression: next_non_ws LLVM EXE - PASS
 Phase 131: loop(true) break-once VM - PASS
 Phase 136: return literal VM - PASS
 Phase 137: return x+2 VM - PASS

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-19 03:59:41 +09:00

502 lines
21 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! JoinIR routing logic for loop lowering
use super::trace;
use crate::ast::ASTNode;
use crate::mir::builder::MirBuilder;
use crate::mir::ValueId;
/// Pattern 選択の SSOT 入口
///
/// 既存の分散した選択ロジックをここに集約する。
/// 将来的には Canonicalizer decision に委譲する。
///
/// Phase 137-6-S1: 現時点では既存の router ロジックLoopFeatures ベース)を使用
/// Phase 137-6-S2: dev-only で canonicalizer decision を提案として受け取る
pub(in crate::mir::builder) fn choose_pattern_kind(
condition: &ASTNode,
body: &[ASTNode],
) -> crate::mir::loop_pattern_detection::LoopPatternKind {
use crate::mir::builder::control_flow::joinir::patterns::ast_feature_extractor as ast_features;
use crate::mir::builder::control_flow::joinir::patterns::policies::balanced_depth_scan_policy_box::BalancedDepthScanPolicyBox;
use crate::mir::builder::control_flow::joinir::patterns::policies::PolicyDecision;
use crate::mir::loop_pattern_detection;
// Phase 107: Route balanced depth-scan (return-in-loop) to Pattern2 via policy.
//
// This keeps Pattern routing structural: no by-name dispatch, no silent fallback.
match BalancedDepthScanPolicyBox::decide(condition, body) {
PolicyDecision::Use(_) => {
return loop_pattern_detection::LoopPatternKind::Pattern2Break;
}
PolicyDecision::Reject(_reason) => {
// In strict mode, treat "close-but-unsupported" as a fail-fast
// Pattern2 route so the policy can surface the precise contract violation.
if crate::config::env::joinir_dev::strict_enabled() {
return loop_pattern_detection::LoopPatternKind::Pattern2Break;
}
}
PolicyDecision::None => {}
}
// Phase 193: Use AST Feature Extractor Box for break/continue detection
let has_continue = ast_features::detect_continue_in_body(body);
let has_break = ast_features::detect_break_in_body(body);
let has_return = ast_features::detect_return_in_body(body);
// Phase 110: StepTree parity check (structure-only SSOT).
//
// This is dev-only; strict mode turns mismatch into a fail-fast.
if crate::config::env::joinir_dev_enabled() {
use crate::ast::Span;
use crate::mir::control_tree::StepTreeBuilderBox;
let loop_ast = ASTNode::Loop {
condition: Box::new(condition.clone()),
body: body.to_vec(),
span: Span::unknown(),
};
let tree = StepTreeBuilderBox::build_from_ast(&loop_ast);
if tree.features.has_break != has_break
|| tree.features.has_continue != has_continue
|| tree.features.has_return != has_return
{
let msg = format!(
"[choose_pattern_kind/STEPTREE_PARITY] step_tree(break={}, cont={}, ret={}) != extractor(break={}, cont={}, ret={})",
tree.features.has_break,
tree.features.has_continue,
tree.features.has_return,
has_break,
has_continue,
has_return
);
if crate::config::env::joinir_dev::strict_enabled() {
panic!("{}", msg);
} else {
trace::trace().dev("choose_pattern_kind/step_tree_parity", &msg);
}
}
}
// Phase 193: Extract features using modularized extractor
let features = ast_features::extract_features(condition, body, has_continue, has_break);
// Phase 192: Classify pattern based on features (既存の router 結果)
let router_choice = loop_pattern_detection::classify(&features);
// Phase 137-6-S2: dev-only で Canonicalizer の提案を取得
if crate::config::env::joinir_dev_enabled() {
use crate::ast::Span;
use crate::mir::loop_canonicalizer::canonicalize_loop_expr;
let loop_ast = ASTNode::Loop {
condition: Box::new(condition.clone()),
body: body.to_vec(),
span: Span::unknown(),
};
if let Ok((_skeleton, decision)) = canonicalize_loop_expr(&loop_ast) {
if let Some(canonical_choice) = decision.chosen {
// parity check
if canonical_choice != router_choice {
let msg = format!(
"[choose_pattern_kind/PARITY] router={:?}, canonicalizer={:?}",
router_choice, canonical_choice
);
if crate::config::env::joinir_dev::strict_enabled() {
// strict mode: 不一致は Fail-Fast
panic!("{}", msg);
} else {
// debug mode: ログのみ
trace::trace().dev("choose_pattern_kind/parity", &msg);
}
} else {
// Patterns match - success!
trace::trace().dev(
"choose_pattern_kind/parity",
&format!(
"[choose_pattern_kind/PARITY] OK: canonical and actual agree on {:?}",
canonical_choice
),
);
}
// TODO (Phase 137-6-S3): ここで canonical_choice を返す
// 現時点では router_choice を維持(既定挙動不変)
//
// 有効化条件(将来実装):
// 1. joinir_dev_enabled() && 新フラグ(例: canonicalizer_preferred()
// 2. または joinir_dev_enabled() をそのまま使用
//
// 注意: 有効化時は全 Pattern の parity が green であること
//
// 有効化後のコード例:
// ```rust
// if crate::config::env::canonicalizer_preferred() {
// return canonical_choice;
// }
// ```
}
}
}
router_choice
}
impl MirBuilder {
/// Phase 49: Try JoinIR Frontend for mainline integration
///
/// Returns `Ok(Some(value))` if the loop is successfully lowered via JoinIR,
/// `Ok(None)` if no JoinIR pattern matched (unsupported loop structure).
/// Phase 187-2: Legacy LoopBuilder removed - all loops must use JoinIR.
///
/// # Phase 49-4: Multi-target support
///
/// Targets are enabled via separate dev flags:
/// - `HAKO_JOINIR_PRINT_TOKENS_MAIN=1`: JsonTokenizer.print_tokens/0
/// - `HAKO_JOINIR_ARRAY_FILTER_MAIN=1`: ArrayExtBox.filter/2
///
/// Note: Arity in function names does NOT include implicit `me` receiver.
/// - Instance method `print_tokens()` → `/0` (no explicit params)
/// - Static method `filter(arr, pred)` → `/2` (two params)
pub(in crate::mir::builder) fn try_cf_loop_joinir(
&mut self,
condition: &ASTNode,
body: &[ASTNode],
) -> Result<Option<ValueId>, String> {
// Get current function name
let func_name = self
.scope_ctx
.current_function
.as_ref()
.map(|f| f.signature.name.clone())
.unwrap_or_default();
// Phase 195: Use unified trace
trace::trace().routing("router", &func_name, "try_cf_loop_joinir called");
// Phase 170-4: Structure-based routing option
// When NYASH_JOINIR_STRUCTURE_ONLY=1, skip function name whitelist
// and route purely based on loop structure analysis
// Phase 196: Default to structure-first routing now that LoopBuilder is removed.
// - Default: ON (structure_only = true) to allow JoinIR patterns to run for all loops.
// - To revert to the previous whitelist-only behavior, set NYASH_JOINIR_STRUCTURE_ONLY=0.
let structure_only = crate::config::env::joinir_structure_only_enabled();
if structure_only {
trace::trace().routing(
"router",
&func_name,
"Structure-only mode enabled, skipping whitelist",
);
} else {
// Phase 49-4 + Phase 80: Multi-target routing (legacy whitelist)
// - JoinIR は常時 ON。legacy LoopBuilder は削除済み。
// - 代表2本print_tokens / ArrayExt.filterも常に JoinIR で試行する。
// Note: Arity does NOT include implicit `me` receiver
// Phase 188: Add "main" routing for loop pattern expansion
// Phase 170: Add JsonParserBox methods for selfhost validation
let is_target = match func_name.as_str() {
"main" => true, // Phase 188-Impl-1: Enable JoinIR for main function (Pattern 1)
"JoinIrMin.main/0" => true, // Phase 188-Impl-2: Enable JoinIR for JoinIrMin.main/0 (Pattern 2)
"JsonTokenizer.print_tokens/0" => true,
"ArrayExtBox.filter/2" => true,
// Phase 170-A-1: Enable JsonParserBox methods for JoinIR routing
"JsonParserBox._trim/1" => true,
"JsonParserBox._skip_whitespace/2" => true,
"JsonParserBox._match_literal/3" => true, // Phase 182: Fixed arity (s, pos, literal)
"JsonParserBox._parse_string/2" => true,
"JsonParserBox._parse_array/2" => true,
"JsonParserBox._parse_object/2" => true,
// Phase 182: Add simple loop methods
"JsonParserBox._parse_number/2" => true, // P2 Break (s, pos)
"JsonParserBox._atoi/1" => true, // P2 Break (s)
// Phase 170-A-1: Test methods (simplified versions)
"TrimTest.trim/1" => true,
"Main.trim/1" => true, // Phase 171-fix: Main box variant
"Main.trim_string_simple/1" => true, // Phase 33-13: Simple trim variant
"TrimTest.main/0" => true, // Phase 170: TrimTest.main for loop pattern test
// Phase 173: JsonParser P5 expansion test
"JsonParserTest._skip_whitespace/3" => true,
"JsonParserTest.main/0" => true,
// Phase 174: JsonParser complex loop P5B extension test
"JsonParserStringTest.parse_string_min/0" => true,
"JsonParserStringTest.main/0" => true,
// Phase 175: P5 multi-carrier support (2 carriers: pos + result)
"JsonParserStringTest2.parse_string_min2/0" => true,
"JsonParserStringTest2.main/0" => true,
_ => false,
};
if !is_target {
return Ok(None);
}
}
// Debug log when routing through JoinIR Frontend
// Phase 195: Check trace flags directly from JoinLoopTrace
let debug = trace::trace().is_loopform_enabled() || trace::trace().is_mainline_enabled();
trace::trace().routing(
"router",
&func_name,
"Routing through JoinIR Frontend mainline",
);
// Phase 49-3: Implement JoinIR Frontend integration
self.cf_loop_joinir_impl(condition, body, &func_name, debug)
}
/// Phase 49-3: JoinIR Frontend integration implementation
///
/// Routes loop compilation through either:
/// 1. Normalized shadow (Phase 131 P1) - dev-only for loop(true) break-once
/// 2. Pattern-based router (Phase 194+) - preferred for new patterns
/// 3. Legacy binding path (Phase 49-3) - for whitelisted functions only
pub(in crate::mir::builder) fn cf_loop_joinir_impl(
&mut self,
condition: &ASTNode,
body: &[ASTNode],
func_name: &str,
debug: bool,
) -> Result<Option<ValueId>, String> {
// Phase 131 P1: Try Normalized shadow first (dev-only)
if crate::config::env::joinir_dev_enabled() {
if let Some(result) = self.try_normalized_shadow(condition, body, func_name, debug)? {
return Ok(Some(result));
}
}
// Phase 137-2/137-4: Dev-only observation via Loop Canonicalizer
if crate::config::env::joinir_dev_enabled() {
use crate::ast::Span;
use crate::mir::loop_canonicalizer::canonicalize_loop_expr;
// Reconstruct loop AST for canonicalizer
let loop_ast = ASTNode::Loop {
condition: Box::new(condition.clone()),
body: body.to_vec(),
span: Span::unknown(),
};
match canonicalize_loop_expr(&loop_ast) {
Ok((skeleton, decision)) => {
trace::trace().dev(
"loop_canonicalizer",
&format!("Function: {}", func_name),
);
trace::trace().dev(
"loop_canonicalizer",
&format!(" Skeleton steps: {}", skeleton.steps.len()),
);
trace::trace().dev(
"loop_canonicalizer",
&format!(" Carriers: {}", skeleton.carriers.len()),
);
trace::trace().dev(
"loop_canonicalizer",
&format!(" Has exits: {}", skeleton.exits.has_any_exit()),
);
trace::trace().dev(
"loop_canonicalizer",
&format!(
" Decision: {}",
if decision.is_success() {
"SUCCESS"
} else {
"FAIL_FAST"
}
),
);
if let Some(pattern) = decision.chosen {
trace::trace().dev(
"loop_canonicalizer",
&format!(" Chosen pattern: {:?}", pattern),
);
}
trace::trace().dev(
"loop_canonicalizer",
&format!(" Missing caps: {:?}", decision.missing_caps),
);
if decision.is_fail_fast() {
trace::trace().dev(
"loop_canonicalizer",
&format!(" Reason: {}", decision.notes.join("; ")),
);
}
// Phase 137-4: Router parity verification
if let Some(canonical_pattern) = decision.chosen {
// Get actual pattern from router (will be determined by LoopPatternContext)
// We need to defer this check until after ctx is created
// Store decision for later parity check
trace::trace().debug(
"canonicalizer",
&format!(
"Phase 137-4: Canonical pattern chosen: {:?} (parity check pending)",
canonical_pattern
),
);
}
}
Err(e) => {
trace::trace().dev("loop_canonicalizer", &format!("Function: {}", func_name));
trace::trace().dev("loop_canonicalizer", &format!(" Error: {}", e));
}
}
}
// Phase 194: Use table-driven router instead of if/else chain
use super::patterns::{route_loop_pattern, LoopPatternContext};
// Phase 200-C: Pass fn_body_ast to LoopPatternContext if available
// Clone fn_body_ast to avoid borrow checker issues
let fn_body_clone = self.comp_ctx.fn_body_ast.clone();
trace::trace().routing(
"router",
func_name,
&format!(
"fn_body_ast is {}",
if fn_body_clone.is_some() {
"SOME"
} else {
"NONE"
}
),
);
let mut ctx = if let Some(ref fn_body) = fn_body_clone {
trace::trace().routing(
"router",
func_name,
&format!("Creating ctx with fn_body ({} nodes)", fn_body.len()),
);
LoopPatternContext::with_fn_body(condition, body, &func_name, debug, fn_body)
} else {
LoopPatternContext::new(condition, body, &func_name, debug)
};
// Phase 137-4: Router parity verification (after ctx is created)
// Phase 92 P1-0: Skeleton setting removed - patterns retrieve skeleton internally if needed
if crate::config::env::joinir_dev_enabled() {
let (result, _skeleton_opt) = self.verify_router_parity(condition, body, func_name, &ctx);
result?;
}
if let Some(result) = route_loop_pattern(self, &ctx)? {
trace::trace().routing("router", func_name, "Pattern router succeeded");
return Ok(Some(result));
}
// Phase 187-2: Pattern router failed, try legacy whitelist
trace::trace().routing(
"router",
func_name,
"Pattern router found no match, trying legacy whitelist",
);
// Phase 132-R0 Task 4: Delegate to legacy binding path (legacy/routing_legacy_binding.rs)
self.cf_loop_joinir_legacy_binding(condition, body, func_name, debug)
}
/// Phase 131 P1: Try Normalized shadow lowering (dev-only)
///
/// Returns:
/// - Ok(Some(value_id)): Successfully lowered and merged via Normalized
/// - Ok(None): Out of scope (not a Normalized pattern)
/// - Err(msg): In scope but failed (Fail-Fast in strict mode)
///
/// Phase 134 P0: Unified with NormalizationPlanBox/ExecuteBox
fn try_normalized_shadow(
&mut self,
condition: &ASTNode,
body: &[ASTNode],
func_name: &str,
debug: bool,
) -> Result<Option<ValueId>, String> {
use crate::ast::Span;
use crate::mir::builder::control_flow::normalization::{NormalizationPlanBox, NormalizationExecuteBox, PlanKind};
// Build loop AST for pattern detection
let loop_ast = ASTNode::Loop {
condition: Box::new(condition.clone()),
body: body.to_vec(),
span: Span::unknown(),
};
// Phase 134 P0: Delegate pattern detection to NormalizationPlanBox (SSOT)
// Convert loop to remaining format (single-element array)
let remaining = vec![loop_ast];
let plan = match NormalizationPlanBox::plan_block_suffix(self, &remaining, func_name, debug)? {
Some(plan) => plan,
None => {
if debug {
trace::trace().routing(
"router/normalized",
func_name,
"NormalizationPlanBox returned None (not a normalized pattern)",
);
}
return Ok(None);
}
};
// Only handle loop-only patterns here
// (suffix patterns with post-statements go through suffix_router_box)
match &plan.kind {
PlanKind::LoopOnly => {
if debug {
trace::trace().routing(
"router/normalized",
func_name,
"Loop-only pattern detected, proceeding with normalization",
);
}
}
PlanKind::LoopWithPost { .. } => {
// This should not happen in try_normalized_shadow context
// (post patterns should be caught by suffix_router_box earlier)
if debug {
trace::trace().routing(
"router/normalized",
func_name,
"Loop+post pattern in try_normalized_shadow (unexpected, using legacy)",
);
}
return Ok(None);
}
}
// Phase 134 P0: Delegate execution to NormalizationExecuteBox (SSOT)
// Phase 141 P1.5: Pass prefix_variables (using variable_map at this point)
// Clone to avoid borrow checker conflict (self is borrowed mutably in execute)
let prefix_var_map = self.variable_ctx.variable_map.clone();
match NormalizationExecuteBox::execute(self, &plan, &remaining, func_name, debug, Some(&prefix_var_map)) {
Ok(value_id) => {
if debug {
trace::trace().routing(
"router/normalized",
func_name,
"Normalization succeeded",
);
}
Ok(Some(value_id))
}
Err(e) => {
if crate::config::env::joinir_dev::strict_enabled() {
use crate::mir::join_ir::lowering::error_tags;
return Err(error_tags::freeze_with_hint(
"phase134/routing/normalized",
&e,
"Loop should be supported by Normalized but execution failed. \
Check that condition is Bool(true) and body ends with break.",
));
}
trace::trace().routing("router/normalized/error", func_name, &e);
Ok(None) // Non-strict: fallback
}
}
}
}