feat(joinir): Phase 188 JoinInlineBoundary + Pattern 1 working! 🎉

Major milestone: loop_min_while.hako outputs "0 1 2" correctly!

## JoinInlineBoundary (Option D from ChatGPT Pro design review)
- New struct for clean SSA boundary between JoinIR and host function
- JoinIR uses local ValueIds (0,1,2...) - no host ValueId dependency
- Copy injection at entry block connects host → JoinIR values

## Pattern 1 Simple While Loop
- Refactored to use pure local ValueIds
- Removed Pattern1Context dependency on host ValueIds
- Clean separation: lowerer generates, merger connects

## Key Design Principles (Box Theory)
- Box A: JoinIR Frontend (host-agnostic)
- Box B: Join→MIR Bridge (independent functions)
- Box C: JoinInlineBoundary (boundary info only)
- Box D: JoinMirInlineMerger (Copy injection)

## Files Changed
- NEW: inline_boundary.rs - JoinInlineBoundary struct
- control_flow.rs - merge with boundary, void return fix
- simple_while_minimal.rs - pure local ValueIds
- mod.rs - module export

Test: NYASH_DISABLE_PLUGINS=1 ./target/release/hakorune apps/tests/loop_min_while.hako
Output: 0\n1\n2 

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-12-05 13:46:44 +09:00
parent a7f3200fba
commit d303d24b43
4 changed files with 363 additions and 66 deletions

View File

@ -358,7 +358,8 @@ impl super::MirBuilder {
// - Add generated blocks to current_function
// - Jump from current_block to the entry of generated loop
// - The loop exit becomes the new current_block
self.merge_joinir_mir_blocks(&mir_module, debug)?;
// Phase 188-Impl-3: Pass None for boundary (legacy path without boundary)
self.merge_joinir_mir_blocks(&mir_module, None, debug)?;
// Return void for now (loop doesn't have a meaningful return value in this context)
let void_val = self.next_value_id();
@ -484,26 +485,38 @@ impl super::MirBuilder {
}
// Merge JoinIR blocks into current function
self.merge_joinir_mir_blocks(&mir_module, debug)?;
// Phase 188-Impl-3: Create and pass JoinInlineBoundary for Pattern 1
let boundary = crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary::new_inputs_only(
vec![ValueId(0)], // JoinIR's main() parameter (loop variable)
vec![loop_var_id], // Host's loop variable
);
self.merge_joinir_mir_blocks(&mir_module, Some(&boundary), debug)?;
// Return void/0 as loop result (Pattern 1 loops return 0)
// Use the current block to emit the constant
let zero_val = self.value_gen.next();
use crate::mir::types::ConstValue;
let current_block = self.current_block.ok_or_else(|| {
"[cf_loop/joinir/pattern1] No current block available".to_string()
})?;
// Phase 188-Impl-4-FIX: Return Void instead of trying to emit Const
//
// PROBLEM: Emitting instructions after merge_joinir_mir_blocks is fragile because:
// 1. merge creates exit block and switches to it
// 2. We try to add Const to exit block
// 3. But subsequent code (return statement) might overwrite the block
//
// SOLUTION: Loops don't produce values - they return Void.
// The subsequent "return 0" statement will emit its own Const + Return.
//
// This is cleaner because:
// - Loop lowering doesn't need to know about the return value
// - The return statement handles its own code generation
// - No risk of instructions being lost due to block management issues
if let Some(ref mut func) = self.current_function {
if let Some(block) = func.get_block_mut(current_block) {
block.instructions.push(crate::mir::MirInstruction::Const {
dst: zero_val,
value: ConstValue::Integer(0),
});
}
let void_val = crate::mir::builder::emission::constant::emit_void(self);
if debug {
eprintln!(
"[cf_loop/joinir/pattern1] Loop complete, returning Void {:?}",
void_val
);
}
Ok(Some(zero_val))
Ok(Some(void_val))
}
/// Phase 49-3.2: Merge JoinIR-generated MIR blocks into current_function
@ -521,9 +534,25 @@ impl super::MirBuilder {
/// - Pattern 1 (Simple While) generates 3 functions: entry + loop_step + k_exit
/// - All functions are flattened into current_function with global ID remapping
/// - Single exit block receives all Return instructions from all functions
///
/// # Phase 188-Impl-3: JoinInlineBoundary Support
///
/// When `boundary` is provided, injects Copy instructions at the entry block
/// to connect host ValueIds to JoinIR local ValueIds:
///
/// ```text
/// entry_block:
/// // Injected by boundary
/// ValueId(100) = Copy ValueId(4) // join_input → host_input
/// // Original JoinIR instructions follow...
/// ```
///
/// This enables clean separation: JoinIR uses local IDs (0,1,2...),
/// host uses its own IDs, and Copy instructions bridge the gap.
fn merge_joinir_mir_blocks(
&mut self,
mir_module: &crate::mir::MirModule,
boundary: Option<&crate::mir::join_ir::lowering::inline_boundary::JoinInlineBoundary>,
debug: bool,
) -> Result<(), String> {
use crate::mir::{BasicBlock, BasicBlockId, MirInstruction, ValueId};
@ -588,14 +617,19 @@ impl super::MirBuilder {
// 3. Collect all ValueIds used across ALL functions (Phase 189)
// Also build a map of ValueId → function name for Call→Jump conversion
// Phase 188-Impl-3: Also collect function parameters for tail call conversion
if debug {
eprintln!("[cf_loop/joinir] Phase 189: Collecting value IDs from all functions");
}
let mut used_values: std::collections::BTreeSet<ValueId> =
std::collections::BTreeSet::new();
let mut value_to_func_name: HashMap<ValueId, String> = HashMap::new();
let mut function_params: HashMap<String, Vec<ValueId>> = HashMap::new();
for (func_name, func) in &mir_module.functions {
// Phase 188-Impl-3: Collect function parameters for tail call conversion
function_params.insert(func_name.clone(), func.params.clone());
for func in mir_module.functions.values() {
for block in func.blocks.values() {
Self::collect_values_in_block(block, &mut used_values);
// Phase 189: Track Const String instructions that define function names
@ -731,6 +765,7 @@ impl super::MirBuilder {
}
// Second pass: Insert parameter bindings for tail calls
// Phase 188-Impl-3: Use actual parameter ValueIds from target function
if let Some((target_block, args)) = tail_call_target {
if debug {
eprintln!(
@ -739,20 +774,36 @@ impl super::MirBuilder {
);
}
// Insert Copy instructions for parameter binding
for (i, arg_val_remapped) in args.iter().enumerate() {
let param_val_original = ValueId(i as u32);
if let Some(&param_val_remapped) = value_map.get(&param_val_original) {
new_block.instructions.push(MirInstruction::Copy {
dst: param_val_remapped,
src: *arg_val_remapped,
});
// Find the target function name from the target_block
// We need to reverse-lookup the function name from the entry block
let mut target_func_name: Option<String> = None;
for (fname, &entry_block) in &function_entry_map {
if entry_block == target_block {
target_func_name = Some(fname.clone());
break;
}
}
if debug {
eprintln!(
"[cf_loop/joinir] Param binding: arg {:?} → param {:?}",
arg_val_remapped, param_val_remapped
);
if let Some(target_func_name) = target_func_name {
if let Some(target_params) = function_params.get(&target_func_name) {
// Insert Copy instructions for parameter binding
for (i, arg_val_remapped) in args.iter().enumerate() {
if i < target_params.len() {
let param_val_original = target_params[i];
if let Some(&param_val_remapped) = value_map.get(&param_val_original) {
new_block.instructions.push(MirInstruction::Copy {
dst: param_val_remapped,
src: *arg_val_remapped,
});
if debug {
eprintln!(
"[cf_loop/joinir] Param binding: arg {:?} → param {:?}",
arg_val_remapped, param_val_remapped
);
}
}
}
}
}
}
@ -804,6 +855,62 @@ impl super::MirBuilder {
}
}
// Phase 188-Impl-3: Inject Copy instructions for boundary inputs
if let Some(boundary) = boundary {
// Get entry function's entry block (first function by convention)
let (entry_func_name, entry_func) = mir_module
.functions
.iter()
.next()
.ok_or("JoinIR module has no functions")?;
let entry_block_remapped = block_map[&(entry_func_name.clone(), entry_func.entry_block)];
if debug {
eprintln!(
"[cf_loop/joinir] Phase 188-Impl-3: Injecting {} Copy instructions at entry block {:?}",
boundary.join_inputs.len(),
entry_block_remapped
);
}
// Inject Copy instructions: join_input_remapped = Copy host_input
if let Some(ref mut current_func) = self.current_function {
if let Some(entry_block) = current_func.get_block_mut(entry_block_remapped) {
// Insert Copy instructions at the BEGINNING of the block
let mut copy_instructions = Vec::new();
for (join_in, host_in) in boundary.join_inputs.iter().zip(&boundary.host_inputs) {
// join_in is JoinIR's local ValueId (e.g., ValueId(0))
// host_in is host function's ValueId (e.g., ValueId(4))
// We need to remap join_in to the merged space
if let Some(&join_in_remapped) = value_map.get(join_in) {
copy_instructions.push(MirInstruction::Copy {
dst: join_in_remapped,
src: *host_in,
});
if debug {
eprintln!(
"[cf_loop/joinir] Copy boundary: {:?} (host) → {:?} (join_remapped)",
host_in, join_in_remapped
);
}
} else {
if debug {
eprintln!(
"[cf_loop/joinir] WARNING: join_input {:?} not found in value_map",
join_in
);
}
}
}
// Insert at beginning (reverse order so they appear in correct order)
for inst in copy_instructions.into_iter().rev() {
entry_block.instructions.insert(0, inst);
}
}
}
}
// 6. Create exit block (empty for now, will be populated after loop)
if let Some(ref mut func) = self.current_function {
let exit_block = BasicBlock::new(exit_block_id);

View File

@ -0,0 +1,168 @@
//! Phase 188-Impl-3: JoinInlineBoundary - Boundary information for JoinIR inlining
//!
//! This module defines the boundary between JoinIR fragments and the host MIR function.
//! It enables clean separation of concerns:
//!
//! - **Box A**: JoinIR Frontend (doesn't know about host ValueIds)
//! - **Box B**: Join→MIR Bridge (converts to MIR using local ValueIds)
//! - **Box C**: JoinInlineBoundary (stores boundary info - THIS FILE)
//! - **Box D**: JoinMirInlineMerger (injects Copy instructions at boundary)
//!
//! ## Design Philosophy
//!
//! The JoinIR lowerer should work with **local ValueIds** (0, 1, 2, ...) without
//! knowing anything about the host function's ValueId space. This ensures:
//!
//! 1. **Modularity**: JoinIR lowerers are pure transformers
//! 2. **Reusability**: Same lowerer can be used in different contexts
//! 3. **Testability**: JoinIR can be tested independently
//! 4. **Correctness**: SSA properties are maintained via explicit Copy instructions
//!
//! ## Example
//!
//! For `loop(i < 3) { print(i); i = i + 1 }`:
//!
//! ```text
//! Host Function:
//! ValueId(4) = Const 0 // i = 0 in host
//!
//! JoinIR Fragment (uses local IDs 0, 1, 2, ...):
//! ValueId(0) = param // i_param (local to JoinIR)
//! ValueId(1) = Const 3
//! ValueId(2) = Compare ...
//!
//! Boundary:
//! join_inputs: [ValueId(0)] // JoinIR's param slot
//! host_inputs: [ValueId(4)] // Host's `i` variable
//!
//! Merged MIR (with Copy injection):
//! entry:
//! ValueId(100) = Copy ValueId(4) // Connect host→JoinIR
//! ValueId(101) = Const 3
//! ...
//! ```
use crate::mir::ValueId;
/// Boundary information for inlining a JoinIR fragment into a host function
///
/// This structure captures the "interface" between a JoinIR fragment and the
/// host function, allowing the merger to inject necessary Copy instructions
/// to connect the two SSA value spaces.
///
/// # Design Note
///
/// This is a **pure data structure** with no logic. All transformation logic
/// lives in the merger (merge_joinir_mir_blocks).
#[derive(Debug, Clone)]
pub struct JoinInlineBoundary {
/// JoinIR-local ValueIds that act as "input slots"
///
/// These are the ValueIds used **inside** the JoinIR fragment to refer
/// to values that come from the host. They should be small sequential
/// IDs (0, 1, 2, ...) since JoinIR lowerers allocate locally.
///
/// Example: For a loop variable `i`, JoinIR uses ValueId(0) as the parameter.
pub join_inputs: Vec<ValueId>,
/// Host-function ValueIds that provide the input values
///
/// These are the ValueIds from the **host function** that correspond to
/// the join_inputs. The merger will inject Copy instructions to connect
/// host_inputs[i] → join_inputs[i].
///
/// Example: If host has `i` as ValueId(4), then host_inputs = [ValueId(4)].
pub host_inputs: Vec<ValueId>,
/// JoinIR-local ValueIds that represent outputs (if any)
///
/// For loops that produce values (e.g., loop result), these are the
/// JoinIR-local ValueIds that should be visible to the host after inlining.
///
/// Currently unused for Pattern 1 (Simple While), reserved for future patterns.
pub join_outputs: Vec<ValueId>,
/// Host-function ValueIds that receive the outputs
///
/// These are the destination ValueIds in the host function that should
/// receive the values from join_outputs.
///
/// Currently unused for Pattern 1 (Simple While), reserved for future patterns.
pub host_outputs: Vec<ValueId>,
}
impl JoinInlineBoundary {
/// Create a new boundary with input mappings only
///
/// This is the common case for loops like Pattern 1 where:
/// - Inputs: loop variables (e.g., `i` in `loop(i < 3)`)
/// - Outputs: none (loop returns void/0)
pub fn new_inputs_only(join_inputs: Vec<ValueId>, host_inputs: Vec<ValueId>) -> Self {
assert_eq!(
join_inputs.len(),
host_inputs.len(),
"join_inputs and host_inputs must have same length"
);
Self {
join_inputs,
host_inputs,
join_outputs: vec![],
host_outputs: vec![],
}
}
/// Create a new boundary with both inputs and outputs
///
/// Reserved for future loop patterns that produce values.
#[allow(dead_code)]
pub fn new_with_outputs(
join_inputs: Vec<ValueId>,
host_inputs: Vec<ValueId>,
join_outputs: Vec<ValueId>,
host_outputs: Vec<ValueId>,
) -> Self {
assert_eq!(
join_inputs.len(),
host_inputs.len(),
"join_inputs and host_inputs must have same length"
);
assert_eq!(
join_outputs.len(),
host_outputs.len(),
"join_outputs and host_outputs must have same length"
);
Self {
join_inputs,
host_inputs,
join_outputs,
host_outputs,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_boundary_inputs_only() {
let boundary = JoinInlineBoundary::new_inputs_only(
vec![ValueId(0)], // JoinIR uses ValueId(0) for loop var
vec![ValueId(4)], // Host has loop var at ValueId(4)
);
assert_eq!(boundary.join_inputs.len(), 1);
assert_eq!(boundary.host_inputs.len(), 1);
assert_eq!(boundary.join_outputs.len(), 0);
assert_eq!(boundary.host_outputs.len(), 0);
}
#[test]
#[should_panic(expected = "join_inputs and host_inputs must have same length")]
fn test_boundary_mismatched_inputs() {
JoinInlineBoundary::new_inputs_only(
vec![ValueId(0), ValueId(1)],
vec![ValueId(4)],
);
}
}

View File

@ -27,6 +27,7 @@ pub mod if_merge; // Phase 33-7
pub mod if_phi_context; // Phase 61-1
pub mod if_phi_spec; // Phase 61-2
pub mod if_select; // Phase 33
pub mod inline_boundary; // Phase 188-Impl-3: JoinIR→Host boundary
pub mod loop_form_intake;
pub mod loop_patterns; // Phase 188: Pattern-based loop lowering (3 patterns)
pub mod loop_scope_shape;

View File

@ -50,17 +50,21 @@ use crate::mir::join_ir::{
use crate::mir::ValueId;
/// Context passed from the host function to the Pattern 1 lowerer
///
/// Phase 188-Impl-3: This context is now REMOVED - JoinIR uses only local ValueIds
/// The boundary mapping is handled by JoinInlineBoundary instead.
#[deprecated(since = "188-Impl-3", note = "Use JoinInlineBoundary for host integration")]
pub struct Pattern1Context {
/// The loop variable ValueId from the host function (e.g., ValueId(6) for `i`)
/// DEPRECATED: This is no longer used, JoinIR uses local ValueIds
pub loop_var: ValueId,
/// ValueId allocator function
/// DEPRECATED: JoinIR allocates sequentially from 0
pub value_allocator: Box<dyn FnMut() -> ValueId>,
}
impl Pattern1Context {
/// Create a standalone context with hardcoded ValueIds (for backward compatibility)
pub fn standalone() -> Self {
let mut counter = 1000u32;
let mut counter = 0u32;
Self {
loop_var: ValueId(counter),
value_allocator: Box::new(move || {
@ -73,34 +77,51 @@ impl Pattern1Context {
/// Lower Pattern 1 (Simple While Loop) to JoinIR
///
/// This is a minimal implementation for loop_min_while.hako.
/// It generates JoinIR that integrates with the host function's variable bindings.
/// # Phase 188-Impl-3: Pure JoinIR Fragment Generation
///
/// # Phase 188-Impl-2: Host Variable Integration
/// This version generates JoinIR using **local ValueIds only** (0, 1, 2, ...).
/// It has NO knowledge of the host function's ValueId space. The boundary mapping
/// is handled separately via JoinInlineBoundary.
///
/// This version accepts the host's loop variable ValueId and allocates fresh IDs
/// for intermediate values. This ensures the generated JoinIR connects properly
/// to the host function's variable bindings.
/// ## Design Philosophy
///
/// If called without a context (from legacy code), it uses standalone mode with
/// hardcoded ValueIds for backward compatibility.
/// - **Box A**: JoinIR Frontend (doesn't know about host ValueIds)
/// - **Box B**: This function - converts to JoinIR with local IDs
/// - **Box C**: JoinInlineBoundary - stores boundary info
/// - **Box D**: merge_joinir_mir_blocks - injects Copy instructions
///
/// This clean separation ensures JoinIR lowerers are:
/// - Pure transformers (no side effects)
/// - Reusable (same lowerer works in any context)
/// - Testable (can test JoinIR independently)
///
/// # Arguments
///
/// * `_scope` - LoopScopeShape (reserved for future generic implementation)
/// * `ctx` - Pattern1Context containing host variable bindings (or None for standalone)
/// * `_ctx` - DEPRECATED: No longer used, kept for backward compatibility
///
/// # Returns
///
/// * `Some(JoinModule)` - Successfully lowered to JoinIR
/// * `None` - Pattern not matched (fallback to other lowerers)
///
/// # Boundary Contract
///
/// This function returns a JoinModule with:
/// - **Input slot**: ValueId(0) in loop_step function represents the loop variable
/// - **Caller responsibility**: Create JoinInlineBoundary to map ValueId(0) to host's loop var
pub fn lower_simple_while_minimal(
_scope: LoopScopeShape,
ctx: Option<Pattern1Context>,
_ctx: Option<Pattern1Context>,
) -> Option<JoinModule> {
let mut ctx = ctx.unwrap_or_else(Pattern1Context::standalone);
// Phase 188-Impl-1: Hardcoded JoinIR for loop_min_while.hako
// This establishes the infrastructure. Generic implementation in Phase 188-Impl-2+.
// Phase 188-Impl-3: Use local ValueId allocator (sequential from 0)
// JoinIR has NO knowledge of host ValueIds - boundary handled separately
let mut value_counter = 0u32;
let mut alloc_value = || {
let id = ValueId(value_counter);
value_counter += 1;
id
};
let mut join_module = JoinModule::new();
@ -112,32 +133,32 @@ pub fn lower_simple_while_minimal(
let k_exit_id = JoinFuncId::new(2);
// ==================================================================
// ValueId allocation (Phase 188-Impl-2: Use host variable + allocator)
// ValueId allocation (Phase 188-Impl-3: Sequential local IDs)
// ==================================================================
// Host's loop variable (e.g., ValueId(6) for `i`)
let i_init = ctx.loop_var;
// Allocate fresh IDs for local values
let loop_result = (ctx.value_allocator)();
let const_0_main = (ctx.value_allocator)();
// main() locals
let i_init = alloc_value(); // ValueId(0) - loop init value
let loop_result = alloc_value(); // ValueId(1) - result from loop_step
let const_0_main = alloc_value(); // ValueId(2) - return value
// loop_step locals
let i_param = (ctx.value_allocator)();
let const_3 = (ctx.value_allocator)();
let cmp_lt = (ctx.value_allocator)();
let exit_cond = (ctx.value_allocator)();
let const_1 = (ctx.value_allocator)();
let i_next = (ctx.value_allocator)();
let i_param = alloc_value(); // ValueId(3) - parameter
let const_3 = alloc_value(); // ValueId(4) - comparison constant
let cmp_lt = alloc_value(); // ValueId(5) - i < 3
let exit_cond = alloc_value(); // ValueId(6) - !(i < 3)
let const_1 = alloc_value(); // ValueId(7) - increment constant
let i_next = alloc_value(); // ValueId(8) - i + 1
// k_exit locals
let const_0_exit = alloc_value(); // ValueId(9) - exit return value
// ==================================================================
// main() function
// ==================================================================
let mut main_func = JoinFunction::new(main_id, "main".to_string(), vec![]);
// Phase 188-Impl-3: main() takes i as a parameter (boundary input)
// The host will inject a Copy instruction: i_init_local = Copy host_i
let mut main_func = JoinFunction::new(main_id, "main".to_string(), vec![i_init]);
// Phase 188-Impl-2: Skip i_init = 0 (host already initialized the variable)
// The host's ValueId (i_init) is already bound to 0 in the host function
// result = loop_step(i_init) ← Use host's i directly
// result = loop_step(i_init)
main_func.body.push(JoinInst::Call {
func: loop_step_id,
args: vec![i_init],
@ -244,7 +265,7 @@ pub fn lower_simple_while_minimal(
let mut k_exit_func = JoinFunction::new(k_exit_id, "k_exit".to_string(), vec![]);
// return 0 (Pattern 1 has no exit values)
let const_0_exit = ValueId(3000);
// Phase 188-Impl-3: Use pre-allocated const_0_exit (ValueId(9))
k_exit_func.body.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_0_exit,
value: ConstValue::Integer(0),