Files
hakorune/src/mir/builder/control_flow/joinir/routing.rs
nyash-codex 7a9ebf7fc1 feat(joinir): Phase 170-4 Structure-based routing opt-in
Add NYASH_JOINIR_STRUCTURE_ONLY=1 environment variable to bypass
function name whitelist and route purely based on loop structure.

When enabled:
- Skips hardcoded function name whitelist (13 entries)
- Routes directly to pattern detection (LoopPatternContext)
- Falls back to legacy LoopBuilder if no pattern matches

This is Phase 1 of structure-based routing migration:
- Phase 1: Opt-in via env flag (this commit) 
- Phase 2: Make structure-based default (future)
- Phase 3: Remove whitelist entirely (future)

Verified:
- test_loop_return.hako → RC: 2 
- test_trim_loop.hako → RC: 3 

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-07 13:03:43 +09:00

352 lines
15 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! JoinIR routing logic for loop lowering
use crate::ast::ASTNode;
use crate::mir::builder::MirBuilder;
use crate::mir::ValueId;
use super::trace;
impl MirBuilder {
/// Phase 49: Try JoinIR Frontend for mainline integration
///
/// Returns `Ok(Some(value))` if the current function should use JoinIR Frontend,
/// `Ok(None)` to fall through to the legacy LoopBuilder path.
///
/// # Phase 49-4: Multi-target support
///
/// Targets are enabled via separate dev flags:
/// - `HAKO_JOINIR_PRINT_TOKENS_MAIN=1`: JsonTokenizer.print_tokens/0
/// - `HAKO_JOINIR_ARRAY_FILTER_MAIN=1`: ArrayExtBox.filter/2
///
/// Note: Arity in function names does NOT include implicit `me` receiver.
/// - Instance method `print_tokens()` → `/0` (no explicit params)
/// - Static method `filter(arr, pred)` → `/2` (two params)
pub(in crate::mir::builder) fn try_cf_loop_joinir(
&mut self,
condition: &ASTNode,
body: &[ASTNode],
) -> Result<Option<ValueId>, String> {
// Get current function name
let func_name = self
.current_function
.as_ref()
.map(|f| f.signature.name.clone())
.unwrap_or_default();
// Phase 195: Use unified trace
trace::trace().routing("router", &func_name, "try_cf_loop_joinir called");
// Phase 170-4: Structure-based routing option
// When NYASH_JOINIR_STRUCTURE_ONLY=1, skip function name whitelist
// and route purely based on loop structure analysis
let structure_only = std::env::var("NYASH_JOINIR_STRUCTURE_ONLY")
.ok()
.as_deref()
== Some("1");
if structure_only {
trace::trace().routing("router", &func_name, "Structure-only mode enabled, skipping whitelist");
} else {
// Phase 49-4 + Phase 80: Multi-target routing (legacy whitelist)
// - Core ON なら代表2本print_tokens / ArrayExt.filterは JoinIR を優先し、失敗したら LoopBuilder へフォールバック
// - Core OFF では従来通り dev フラグで opt-in
// Note: Arity does NOT include implicit `me` receiver
// Phase 188: Add "main" routing for loop pattern expansion
// Phase 170: Add JsonParserBox methods for selfhost validation
let core_on = crate::config::env::joinir_core_enabled();
let is_target = match func_name.as_str() {
"main" => true, // Phase 188-Impl-1: Enable JoinIR for main function (Pattern 1)
"JoinIrMin.main/0" => true, // Phase 188-Impl-2: Enable JoinIR for JoinIrMin.main/0 (Pattern 2)
"JsonTokenizer.print_tokens/0" => {
if core_on {
true
} else {
std::env::var("HAKO_JOINIR_PRINT_TOKENS_MAIN")
.ok()
.as_deref()
== Some("1")
}
}
"ArrayExtBox.filter/2" => {
if core_on {
true
} else {
std::env::var("HAKO_JOINIR_ARRAY_FILTER_MAIN")
.ok()
.as_deref()
== Some("1")
}
}
// Phase 170-A-1: Enable JsonParserBox methods for JoinIR routing
"JsonParserBox._trim/1" => true,
"JsonParserBox._skip_whitespace/2" => true,
"JsonParserBox._match_literal/2" => true,
"JsonParserBox._parse_string/2" => true,
"JsonParserBox._parse_array/2" => true,
"JsonParserBox._parse_object/2" => true,
// Phase 170-A-1: Test methods (simplified versions)
"TrimTest.trim/1" => true,
"Main.trim/1" => true, // Phase 171-fix: Main box variant
"Main.trim_string_simple/1" => true, // Phase 33-13: Simple trim variant
"TrimTest.main/0" => true, // Phase 170: TrimTest.main for loop pattern test
_ => false,
};
if !is_target {
return Ok(None);
}
}
// Debug log when routing through JoinIR Frontend
// Phase 195: Check trace flags directly from JoinLoopTrace
let debug = trace::trace().is_loopform_enabled() || trace::trace().is_mainline_enabled();
trace::trace().routing("router", &func_name, "Routing through JoinIR Frontend mainline");
// Phase 49-3: Implement JoinIR Frontend integration
self.cf_loop_joinir_impl(condition, body, &func_name, debug)
}
/// Phase 49-3: JoinIR Frontend integration implementation
///
/// # Pipeline
/// 1. Build Loop AST → JSON v0 format (with "defs" array)
/// 2. AstToJoinIrLowerer::lower_program_json() → JoinModule
/// 3. convert_join_module_to_mir_with_meta() → MirModule
/// 4. Merge MIR blocks into current_function
///
/// # Phase 49-4 Note
///
/// JoinIR Frontend expects a complete function definition with:
/// - local variable initializations
/// - loop body
/// - return statement
///
/// Since cf_loop only has access to the loop condition and body,
/// we construct a minimal JSON v0 wrapper with function name "simple"
/// to match the JoinIR Frontend's expected pattern.
pub(in crate::mir::builder) fn cf_loop_joinir_impl(
&mut self,
condition: &ASTNode,
body: &[ASTNode],
func_name: &str,
debug: bool,
) -> Result<Option<ValueId>, String> {
use super::super::super::loop_frontend_binding::LoopFrontendBinding;
use crate::mir::join_ir::frontend::{AstToJoinIrLowerer, JoinFuncMetaMap};
use crate::mir::join_ir_vm_bridge::convert_join_module_to_mir_with_meta;
use crate::mir::types::ConstValue;
use crate::mir::MirInstruction;
use crate::r#macro::ast_json::ast_to_json;
// Phase 194: Use table-driven router instead of if/else chain
// This makes adding new patterns trivial - just add an entry to LOOP_PATTERNS table
use super::patterns::{route_loop_pattern, LoopPatternContext};
let ctx = LoopPatternContext::new(condition, body, &func_name, debug);
if let Some(result) = route_loop_pattern(self, &ctx)? {
// Phase 195: Use unified trace
trace::trace().routing("router", func_name, "Pattern router succeeded");
return Ok(Some(result));
}
// Phase 195: Use unified trace
trace::trace().routing("router", func_name, "Pattern router found no match, continuing to legacy path");
// Phase 50: Create appropriate binding based on function name
let binding = match func_name {
"JsonTokenizer.print_tokens/0" => LoopFrontendBinding::for_print_tokens(),
"ArrayExtBox.filter/2" => LoopFrontendBinding::for_array_filter(),
_ => {
// Phase 195: Use unified trace
trace::trace().routing("router", func_name, "No binding defined, falling back");
return Ok(None);
}
};
// Phase 195: Use unified trace
trace::trace().debug(
"router",
&format!(
"Using binding for '{}': counter={}, acc={:?}, pattern={:?}",
func_name, binding.counter_var, binding.accumulator_var, binding.pattern
),
);
// Step 1: Convert condition and body to JSON
let condition_json = ast_to_json(condition);
let mut body_json: Vec<serde_json::Value> = body.iter().map(|s| ast_to_json(s)).collect();
// Phase 50: Rename variables in body (e.g., "out" → "acc" for filter)
binding.rename_body_variables(&mut body_json);
// Phase 50: Generate Local declarations from binding
let (i_local, acc_local, n_local) = binding.generate_local_declarations();
// Phase 52/56: Build params from external_refs
// Instance methods need `me`, static methods need their parameters (arr, pred, etc.)
let mut params: Vec<serde_json::Value> = Vec::new();
// Phase 52: Add 'me' for instance methods
if binding.needs_me_receiver() {
// Phase 195: Use unified trace
trace::trace().debug("router", "Adding 'me' to params (instance method)");
params.push(serde_json::json!("me"));
}
// Phase 56: Add external_refs as parameters (arr, pred for filter)
for ext_ref in &binding.external_refs {
// Skip "me" and "me.*" as they're handled above
if ext_ref == "me" || ext_ref.starts_with("me.") {
continue;
}
// Phase 195: Use unified trace
trace::trace().debug("router", &format!("Adding '{}' to params (external_ref)", ext_ref));
params.push(serde_json::json!(ext_ref));
}
// Step 2: Construct JSON v0 format with "defs" array
// The function is named "simple" to match JoinIR Frontend's pattern matching
// Phase 50: Include i/acc/n Local declarations to satisfy JoinIR Frontend expectations
let program_json = serde_json::json!({
"defs": [
{
"name": "simple",
"params": params,
"body": {
"type": "Block",
"body": [
// Phase 50: Inject i/acc/n Local declarations
i_local,
acc_local,
n_local,
{
"type": "Loop",
"cond": condition_json, // JoinIR Frontend expects "cond" not "condition"
"body": body_json
},
// Return the accumulator (or null for side-effect loops)
{
"type": "Return",
"value": { "kind": "Variable", "name": "acc" }
}
]
}
}
]
});
// Phase 195: Use unified trace
trace::trace().debug(
"router",
&format!(
"Generated JSON v0 for {}: {}",
func_name,
serde_json::to_string_pretty(&program_json).unwrap_or_default()
),
);
// Step 3: Lower to JoinIR
// Phase 49-4: Use catch_unwind for graceful fallback on unsupported patterns
// The JoinIR Frontend may panic if the loop doesn't match expected patterns
// (e.g., missing variable initializations like "i must be initialized")
let join_module = {
let json_clone = program_json.clone();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let mut lowerer = AstToJoinIrLowerer::new();
lowerer.lower_program_json(&json_clone)
}));
match result {
Ok(module) => module,
Err(e) => {
// Extract panic message for debugging
let panic_msg = if let Some(s) = e.downcast_ref::<&str>() {
s.to_string()
} else if let Some(s) = e.downcast_ref::<String>() {
s.clone()
} else {
"unknown panic".to_string()
};
// Phase 195: Use unified trace
trace::trace().debug(
"router",
&format!(
"JoinIR lowering failed for {}: {}, falling back to legacy",
func_name, panic_msg
),
);
// Return None to fall back to legacy LoopBuilder
return Ok(None);
}
}
};
// Phase 49-3 MVP: Use empty meta map (full if-analysis is Phase 40+ territory)
let join_meta = JoinFuncMetaMap::new();
// Phase 195: Use unified trace
trace::trace().joinir_stats(
"router",
join_module.functions.len(),
join_module
.functions
.values()
.map(|f| f.body.len())
.sum(),
);
// Step 4: Convert JoinModule to MIR
let mir_module = convert_join_module_to_mir_with_meta(&join_module, &join_meta)
.map_err(|e| format!("JoinIR→MIR conversion failed: {}", e.message))?;
// Phase 195: Use unified trace for MIR module stats
if trace::trace().is_joinir_enabled() {
trace::trace().debug(
"router",
&format!("MirModule has {} functions", mir_module.functions.len()),
);
for (name, func) in &mir_module.functions {
trace::trace().debug(
"router",
&format!(
" - {}: {} blocks, entry={:?}",
name,
func.blocks.len(),
func.entry_block
),
);
// Phase 189: Debug - show block contents
for (block_id, block) in &func.blocks {
trace::trace().blocks(
"router",
&format!("Block {:?}: {} instructions", block_id, block.instructions.len()),
);
for (i, inst) in block.instructions.iter().enumerate() {
trace::trace().instructions("router", &format!("[{}] {:?}", i, inst));
}
if let Some(ref term) = block.terminator {
trace::trace().instructions("router", &format!("terminator: {:?}", term));
}
}
}
}
// Step 5: Merge MIR blocks into current_function
// For Phase 49-3, we'll use a simplified approach:
// - Add generated blocks to current_function
// - Jump from current_block to the entry of generated loop
// - The loop exit becomes the new current_block
// Phase 188-Impl-3: Pass None for boundary (legacy path without boundary)
// Phase 189: Discard exit PHI result (legacy path doesn't need it)
let _ = self.merge_joinir_mir_blocks(&mir_module, None, debug)?;
// Return void for now (loop doesn't have a meaningful return value in this context)
let void_val = self.next_value_id();
self.emit_instruction(MirInstruction::Const {
dst: void_val,
value: ConstValue::Void,
})?;
Ok(Some(void_val))
}
}