Add NYASH_JOINIR_STRUCTURE_ONLY=1 environment variable to bypass function name whitelist and route purely based on loop structure. When enabled: - Skips hardcoded function name whitelist (13 entries) - Routes directly to pattern detection (LoopPatternContext) - Falls back to legacy LoopBuilder if no pattern matches This is Phase 1 of structure-based routing migration: - Phase 1: Opt-in via env flag (this commit) ✅ - Phase 2: Make structure-based default (future) - Phase 3: Remove whitelist entirely (future) Verified: - test_loop_return.hako → RC: 2 ✅ - test_trim_loop.hako → RC: 3 ✅ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
352 lines
15 KiB
Rust
352 lines
15 KiB
Rust
//! JoinIR routing logic for loop lowering
|
||
|
||
use crate::ast::ASTNode;
|
||
use crate::mir::builder::MirBuilder;
|
||
use crate::mir::ValueId;
|
||
use super::trace;
|
||
|
||
impl MirBuilder {
|
||
/// Phase 49: Try JoinIR Frontend for mainline integration
|
||
///
|
||
/// Returns `Ok(Some(value))` if the current function should use JoinIR Frontend,
|
||
/// `Ok(None)` to fall through to the legacy LoopBuilder path.
|
||
///
|
||
/// # Phase 49-4: Multi-target support
|
||
///
|
||
/// Targets are enabled via separate dev flags:
|
||
/// - `HAKO_JOINIR_PRINT_TOKENS_MAIN=1`: JsonTokenizer.print_tokens/0
|
||
/// - `HAKO_JOINIR_ARRAY_FILTER_MAIN=1`: ArrayExtBox.filter/2
|
||
///
|
||
/// Note: Arity in function names does NOT include implicit `me` receiver.
|
||
/// - Instance method `print_tokens()` → `/0` (no explicit params)
|
||
/// - Static method `filter(arr, pred)` → `/2` (two params)
|
||
pub(in crate::mir::builder) fn try_cf_loop_joinir(
|
||
&mut self,
|
||
condition: &ASTNode,
|
||
body: &[ASTNode],
|
||
) -> Result<Option<ValueId>, String> {
|
||
// Get current function name
|
||
let func_name = self
|
||
.current_function
|
||
.as_ref()
|
||
.map(|f| f.signature.name.clone())
|
||
.unwrap_or_default();
|
||
|
||
// Phase 195: Use unified trace
|
||
trace::trace().routing("router", &func_name, "try_cf_loop_joinir called");
|
||
|
||
// Phase 170-4: Structure-based routing option
|
||
// When NYASH_JOINIR_STRUCTURE_ONLY=1, skip function name whitelist
|
||
// and route purely based on loop structure analysis
|
||
let structure_only = std::env::var("NYASH_JOINIR_STRUCTURE_ONLY")
|
||
.ok()
|
||
.as_deref()
|
||
== Some("1");
|
||
|
||
if structure_only {
|
||
trace::trace().routing("router", &func_name, "Structure-only mode enabled, skipping whitelist");
|
||
} else {
|
||
// Phase 49-4 + Phase 80: Multi-target routing (legacy whitelist)
|
||
// - Core ON なら代表2本(print_tokens / ArrayExt.filter)は JoinIR を優先し、失敗したら LoopBuilder へフォールバック
|
||
// - Core OFF では従来通り dev フラグで opt-in
|
||
// Note: Arity does NOT include implicit `me` receiver
|
||
// Phase 188: Add "main" routing for loop pattern expansion
|
||
// Phase 170: Add JsonParserBox methods for selfhost validation
|
||
let core_on = crate::config::env::joinir_core_enabled();
|
||
let is_target = match func_name.as_str() {
|
||
"main" => true, // Phase 188-Impl-1: Enable JoinIR for main function (Pattern 1)
|
||
"JoinIrMin.main/0" => true, // Phase 188-Impl-2: Enable JoinIR for JoinIrMin.main/0 (Pattern 2)
|
||
"JsonTokenizer.print_tokens/0" => {
|
||
if core_on {
|
||
true
|
||
} else {
|
||
std::env::var("HAKO_JOINIR_PRINT_TOKENS_MAIN")
|
||
.ok()
|
||
.as_deref()
|
||
== Some("1")
|
||
}
|
||
}
|
||
"ArrayExtBox.filter/2" => {
|
||
if core_on {
|
||
true
|
||
} else {
|
||
std::env::var("HAKO_JOINIR_ARRAY_FILTER_MAIN")
|
||
.ok()
|
||
.as_deref()
|
||
== Some("1")
|
||
}
|
||
}
|
||
// Phase 170-A-1: Enable JsonParserBox methods for JoinIR routing
|
||
"JsonParserBox._trim/1" => true,
|
||
"JsonParserBox._skip_whitespace/2" => true,
|
||
"JsonParserBox._match_literal/2" => true,
|
||
"JsonParserBox._parse_string/2" => true,
|
||
"JsonParserBox._parse_array/2" => true,
|
||
"JsonParserBox._parse_object/2" => true,
|
||
// Phase 170-A-1: Test methods (simplified versions)
|
||
"TrimTest.trim/1" => true,
|
||
"Main.trim/1" => true, // Phase 171-fix: Main box variant
|
||
"Main.trim_string_simple/1" => true, // Phase 33-13: Simple trim variant
|
||
"TrimTest.main/0" => true, // Phase 170: TrimTest.main for loop pattern test
|
||
_ => false,
|
||
};
|
||
|
||
if !is_target {
|
||
return Ok(None);
|
||
}
|
||
}
|
||
|
||
// Debug log when routing through JoinIR Frontend
|
||
// Phase 195: Check trace flags directly from JoinLoopTrace
|
||
let debug = trace::trace().is_loopform_enabled() || trace::trace().is_mainline_enabled();
|
||
trace::trace().routing("router", &func_name, "Routing through JoinIR Frontend mainline");
|
||
|
||
// Phase 49-3: Implement JoinIR Frontend integration
|
||
self.cf_loop_joinir_impl(condition, body, &func_name, debug)
|
||
}
|
||
|
||
/// Phase 49-3: JoinIR Frontend integration implementation
|
||
///
|
||
/// # Pipeline
|
||
/// 1. Build Loop AST → JSON v0 format (with "defs" array)
|
||
/// 2. AstToJoinIrLowerer::lower_program_json() → JoinModule
|
||
/// 3. convert_join_module_to_mir_with_meta() → MirModule
|
||
/// 4. Merge MIR blocks into current_function
|
||
///
|
||
/// # Phase 49-4 Note
|
||
///
|
||
/// JoinIR Frontend expects a complete function definition with:
|
||
/// - local variable initializations
|
||
/// - loop body
|
||
/// - return statement
|
||
///
|
||
/// Since cf_loop only has access to the loop condition and body,
|
||
/// we construct a minimal JSON v0 wrapper with function name "simple"
|
||
/// to match the JoinIR Frontend's expected pattern.
|
||
pub(in crate::mir::builder) fn cf_loop_joinir_impl(
|
||
&mut self,
|
||
condition: &ASTNode,
|
||
body: &[ASTNode],
|
||
func_name: &str,
|
||
debug: bool,
|
||
) -> Result<Option<ValueId>, String> {
|
||
use super::super::super::loop_frontend_binding::LoopFrontendBinding;
|
||
use crate::mir::join_ir::frontend::{AstToJoinIrLowerer, JoinFuncMetaMap};
|
||
use crate::mir::join_ir_vm_bridge::convert_join_module_to_mir_with_meta;
|
||
use crate::mir::types::ConstValue;
|
||
use crate::mir::MirInstruction;
|
||
use crate::r#macro::ast_json::ast_to_json;
|
||
|
||
// Phase 194: Use table-driven router instead of if/else chain
|
||
// This makes adding new patterns trivial - just add an entry to LOOP_PATTERNS table
|
||
use super::patterns::{route_loop_pattern, LoopPatternContext};
|
||
|
||
let ctx = LoopPatternContext::new(condition, body, &func_name, debug);
|
||
if let Some(result) = route_loop_pattern(self, &ctx)? {
|
||
// Phase 195: Use unified trace
|
||
trace::trace().routing("router", func_name, "Pattern router succeeded");
|
||
return Ok(Some(result));
|
||
}
|
||
|
||
// Phase 195: Use unified trace
|
||
trace::trace().routing("router", func_name, "Pattern router found no match, continuing to legacy path");
|
||
|
||
// Phase 50: Create appropriate binding based on function name
|
||
let binding = match func_name {
|
||
"JsonTokenizer.print_tokens/0" => LoopFrontendBinding::for_print_tokens(),
|
||
"ArrayExtBox.filter/2" => LoopFrontendBinding::for_array_filter(),
|
||
_ => {
|
||
// Phase 195: Use unified trace
|
||
trace::trace().routing("router", func_name, "No binding defined, falling back");
|
||
return Ok(None);
|
||
}
|
||
};
|
||
|
||
// Phase 195: Use unified trace
|
||
trace::trace().debug(
|
||
"router",
|
||
&format!(
|
||
"Using binding for '{}': counter={}, acc={:?}, pattern={:?}",
|
||
func_name, binding.counter_var, binding.accumulator_var, binding.pattern
|
||
),
|
||
);
|
||
|
||
// Step 1: Convert condition and body to JSON
|
||
let condition_json = ast_to_json(condition);
|
||
let mut body_json: Vec<serde_json::Value> = body.iter().map(|s| ast_to_json(s)).collect();
|
||
|
||
// Phase 50: Rename variables in body (e.g., "out" → "acc" for filter)
|
||
binding.rename_body_variables(&mut body_json);
|
||
|
||
// Phase 50: Generate Local declarations from binding
|
||
let (i_local, acc_local, n_local) = binding.generate_local_declarations();
|
||
|
||
// Phase 52/56: Build params from external_refs
|
||
// Instance methods need `me`, static methods need their parameters (arr, pred, etc.)
|
||
let mut params: Vec<serde_json::Value> = Vec::new();
|
||
|
||
// Phase 52: Add 'me' for instance methods
|
||
if binding.needs_me_receiver() {
|
||
// Phase 195: Use unified trace
|
||
trace::trace().debug("router", "Adding 'me' to params (instance method)");
|
||
params.push(serde_json::json!("me"));
|
||
}
|
||
|
||
// Phase 56: Add external_refs as parameters (arr, pred for filter)
|
||
for ext_ref in &binding.external_refs {
|
||
// Skip "me" and "me.*" as they're handled above
|
||
if ext_ref == "me" || ext_ref.starts_with("me.") {
|
||
continue;
|
||
}
|
||
// Phase 195: Use unified trace
|
||
trace::trace().debug("router", &format!("Adding '{}' to params (external_ref)", ext_ref));
|
||
params.push(serde_json::json!(ext_ref));
|
||
}
|
||
|
||
// Step 2: Construct JSON v0 format with "defs" array
|
||
// The function is named "simple" to match JoinIR Frontend's pattern matching
|
||
// Phase 50: Include i/acc/n Local declarations to satisfy JoinIR Frontend expectations
|
||
let program_json = serde_json::json!({
|
||
"defs": [
|
||
{
|
||
"name": "simple",
|
||
"params": params,
|
||
"body": {
|
||
"type": "Block",
|
||
"body": [
|
||
// Phase 50: Inject i/acc/n Local declarations
|
||
i_local,
|
||
acc_local,
|
||
n_local,
|
||
{
|
||
"type": "Loop",
|
||
"cond": condition_json, // JoinIR Frontend expects "cond" not "condition"
|
||
"body": body_json
|
||
},
|
||
// Return the accumulator (or null for side-effect loops)
|
||
{
|
||
"type": "Return",
|
||
"value": { "kind": "Variable", "name": "acc" }
|
||
}
|
||
]
|
||
}
|
||
}
|
||
]
|
||
});
|
||
|
||
// Phase 195: Use unified trace
|
||
trace::trace().debug(
|
||
"router",
|
||
&format!(
|
||
"Generated JSON v0 for {}: {}",
|
||
func_name,
|
||
serde_json::to_string_pretty(&program_json).unwrap_or_default()
|
||
),
|
||
);
|
||
|
||
// Step 3: Lower to JoinIR
|
||
// Phase 49-4: Use catch_unwind for graceful fallback on unsupported patterns
|
||
// The JoinIR Frontend may panic if the loop doesn't match expected patterns
|
||
// (e.g., missing variable initializations like "i must be initialized")
|
||
let join_module = {
|
||
let json_clone = program_json.clone();
|
||
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||
let mut lowerer = AstToJoinIrLowerer::new();
|
||
lowerer.lower_program_json(&json_clone)
|
||
}));
|
||
|
||
match result {
|
||
Ok(module) => module,
|
||
Err(e) => {
|
||
// Extract panic message for debugging
|
||
let panic_msg = if let Some(s) = e.downcast_ref::<&str>() {
|
||
s.to_string()
|
||
} else if let Some(s) = e.downcast_ref::<String>() {
|
||
s.clone()
|
||
} else {
|
||
"unknown panic".to_string()
|
||
};
|
||
|
||
// Phase 195: Use unified trace
|
||
trace::trace().debug(
|
||
"router",
|
||
&format!(
|
||
"JoinIR lowering failed for {}: {}, falling back to legacy",
|
||
func_name, panic_msg
|
||
),
|
||
);
|
||
// Return None to fall back to legacy LoopBuilder
|
||
return Ok(None);
|
||
}
|
||
}
|
||
};
|
||
// Phase 49-3 MVP: Use empty meta map (full if-analysis is Phase 40+ territory)
|
||
let join_meta = JoinFuncMetaMap::new();
|
||
|
||
// Phase 195: Use unified trace
|
||
trace::trace().joinir_stats(
|
||
"router",
|
||
join_module.functions.len(),
|
||
join_module
|
||
.functions
|
||
.values()
|
||
.map(|f| f.body.len())
|
||
.sum(),
|
||
);
|
||
|
||
// Step 4: Convert JoinModule to MIR
|
||
let mir_module = convert_join_module_to_mir_with_meta(&join_module, &join_meta)
|
||
.map_err(|e| format!("JoinIR→MIR conversion failed: {}", e.message))?;
|
||
|
||
// Phase 195: Use unified trace for MIR module stats
|
||
if trace::trace().is_joinir_enabled() {
|
||
trace::trace().debug(
|
||
"router",
|
||
&format!("MirModule has {} functions", mir_module.functions.len()),
|
||
);
|
||
for (name, func) in &mir_module.functions {
|
||
trace::trace().debug(
|
||
"router",
|
||
&format!(
|
||
" - {}: {} blocks, entry={:?}",
|
||
name,
|
||
func.blocks.len(),
|
||
func.entry_block
|
||
),
|
||
);
|
||
// Phase 189: Debug - show block contents
|
||
for (block_id, block) in &func.blocks {
|
||
trace::trace().blocks(
|
||
"router",
|
||
&format!("Block {:?}: {} instructions", block_id, block.instructions.len()),
|
||
);
|
||
for (i, inst) in block.instructions.iter().enumerate() {
|
||
trace::trace().instructions("router", &format!("[{}] {:?}", i, inst));
|
||
}
|
||
if let Some(ref term) = block.terminator {
|
||
trace::trace().instructions("router", &format!("terminator: {:?}", term));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Step 5: Merge MIR blocks into current_function
|
||
// For Phase 49-3, we'll use a simplified approach:
|
||
// - Add generated blocks to current_function
|
||
// - Jump from current_block to the entry of generated loop
|
||
// - The loop exit becomes the new current_block
|
||
// Phase 188-Impl-3: Pass None for boundary (legacy path without boundary)
|
||
// Phase 189: Discard exit PHI result (legacy path doesn't need it)
|
||
let _ = self.merge_joinir_mir_blocks(&mir_module, None, debug)?;
|
||
|
||
// Return void for now (loop doesn't have a meaningful return value in this context)
|
||
let void_val = self.next_value_id();
|
||
self.emit_instruction(MirInstruction::Const {
|
||
dst: void_val,
|
||
value: ConstValue::Void,
|
||
})?;
|
||
|
||
Ok(Some(void_val))
|
||
}
|
||
}
|