Files
hakorune/src/mir/builder/utils.rs
nyash-codex 32a91e31ac feat(joinir): Phase 200-B/C/D capture analysis + Phase 201-A reserved_value_ids infra
Phase 200-B: FunctionScopeCaptureAnalyzer implementation
- analyze_captured_vars_v2() with structural loop matching
- CapturedEnv for immutable function-scope variables
- ParamRole::Condition for condition-only variables

Phase 200-C: ConditionEnvBuilder extension
- build_with_captures() integrates CapturedEnv into ConditionEnv
- fn_body propagation through LoopPatternContext to Pattern 2

Phase 200-D: E2E verification
- capture detection working for base, limit, n etc.
- Test files: phase200d_capture_minimal.hako, phase200d_capture_in_condition.hako

Phase 201-A: MirBuilder reserved_value_ids infrastructure
- reserved_value_ids: HashSet<ValueId> field in MirBuilder
- next_value_id() skips reserved IDs
- merge/mod.rs sets/clears reserved IDs around JoinIR merge

Phase 201: JoinValueSpace design document
- Param/Local/PHI disjoint regions design
- API: alloc_param(), alloc_local(), reserve_phi()
- Migration plan for Pattern 1-4 lowerers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 18:32:03 +09:00

537 lines
22 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use super::{BasicBlock, BasicBlockId};
use crate::mir::{BarrierOp, SpannedInstruction, TypeOpKind, WeakRefOp};
use std::sync::atomic::{AtomicUsize, Ordering};
// include path resolver removed (using handles modules)
// Optional builder debug logging
pub(super) fn builder_debug_enabled() -> bool {
std::env::var("NYASH_BUILDER_DEBUG").is_ok()
}
static BUILDER_DEBUG_COUNT: AtomicUsize = AtomicUsize::new(0);
pub(super) fn builder_debug_log(msg: &str) {
if builder_debug_enabled() {
// Optional cap: limit the number of builder debug lines to avoid flooding the terminal.
// Set via env: NYASH_BUILDER_DEBUG_LIMIT=<N> (default: unlimited)
if let Ok(cap_s) = std::env::var("NYASH_BUILDER_DEBUG_LIMIT") {
if let Ok(cap) = cap_s.parse::<usize>() {
let n = BUILDER_DEBUG_COUNT.fetch_add(1, Ordering::Relaxed);
if n >= cap {
return;
}
}
}
eprintln!("[BUILDER] {}", msg);
}
}
impl super::MirBuilder {
// ---- Value ID allocation (function-local or module-global) ----
/// Allocate a new ValueId in the appropriate context
/// - Inside function: uses function-local allocator
/// - Outside function: uses module-global allocator
///
/// Phase 201-A: Skips reserved ValueIds (PHI dsts from LoopHeaderPhiBuilder)
/// to prevent carrier value corruption in JoinIR loops.
#[inline]
pub(crate) fn next_value_id(&mut self) -> super::ValueId {
loop {
let candidate = if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context
} else {
self.value_gen.next() // Module context
};
// Phase 201-A: Skip reserved PHI dst ValueIds
if !self.reserved_value_ids.contains(&candidate) {
return candidate;
}
// Reserved ID - try next one (loop continues)
if std::env::var("NYASH_201A_DEBUG").is_ok() {
eprintln!("[201-A] next_value_id: Skipping reserved {:?}", candidate);
}
}
}
// ---- LocalSSA convenience (readability helpers) ----
#[allow(dead_code)]
#[inline]
pub(crate) fn local_recv(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::recv(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_arg(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::arg(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_cmp_operand(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::cmp_operand(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_field_base(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::field_base(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_cond(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::cond(self, v)
}
/// Ensure a basic block exists in the current function
pub(crate) fn ensure_block_exists(&mut self, block_id: BasicBlockId) -> Result<(), String> {
if let Some(ref mut function) = self.current_function {
if !function.blocks.contains_key(&block_id) {
let block = BasicBlock::new(block_id);
function.add_block(block);
}
Ok(())
} else {
Err("No current function".to_string())
}
}
/// Start a new basic block and set as current
pub(crate) fn start_new_block(&mut self, block_id: BasicBlockId) -> Result<(), String> {
if let Some(ref mut function) = self.current_function {
if !function.blocks.contains_key(&block_id) {
function.add_block(BasicBlock::new(block_id));
}
self.current_block = Some(block_id);
// Local SSA cache is per-block; clear on block switch
self.local_ssa_map.clear();
// BlockSchedule materialize cache is per-block as well
self.schedule_mat_map.clear();
// Entry materialization for pinned slots: re-read from variable_map after PHIs are emitted.
// This ensures pinned slots reflect the correct PHI values in merge blocks.
//
// Strategy: Instead of emitting Copy instructions (which would be before PHIs),
// we simply update the variable_map to point to the current block's values.
// LoopBuilder and IfBuilder already update variable_map with PHI values, so
// pinned slots will automatically pick up the correct values.
//
// No action needed here - just clear caches.
if !self.suppress_pin_entry_copy_next {
// Cache clearing is already done above, so nothing more to do here.
// The key insight: pinned slot variables are part of variable_map,
// and LoopBuilder/IfBuilder already manage PHIs for ALL variables in variable_map,
// including pinned slots.
}
if false && !self.suppress_pin_entry_copy_next {
// Keep old code for reference
// First pass: copy all pin slots and remember old->new mapping
let names: Vec<String> = self.variable_map.keys().cloned().collect();
let mut pin_renames: Vec<(super::ValueId, super::ValueId)> = Vec::new();
for name in names.iter() {
if !name.starts_with("__pin$") {
continue;
}
if let Some(&src) = self.variable_map.get(name) {
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::Copy { dst, src })?;
crate::mir::builder::metadata::propagate::propagate(self, src, dst);
self.variable_map.insert(name.clone(), dst);
pin_renames.push((src, dst));
}
}
// Second pass: update any user variables that pointed to old pin ids to the new ones
if !pin_renames.is_empty() {
let snapshot: Vec<(String, super::ValueId)> = self
.variable_map
.iter()
.filter(|(k, _)| !k.starts_with("__pin$"))
.map(|(k, &v)| (k.clone(), v))
.collect();
for (k, v) in snapshot.into_iter() {
if let Some((_, newv)) = pin_renames.iter().find(|(oldv, _)| *oldv == v) {
self.variable_map.insert(k, *newv);
}
}
}
}
// Reset suppression flag after use (one-shot)
self.suppress_pin_entry_copy_next = false;
Ok(())
} else {
Err("No current function".to_string())
}
}
}
impl super::MirBuilder {
/// Phase 87: BoxCall のメソッド戻り値型を推論CoreMethodId ベース)
///
/// 責務: ビルトイン Box のメソッド戻り値型を型安全に返す
/// - Phase 84-4-B のハードコード (75行) を CoreMethodId で統合 (25行に削減)
/// - plugin_method_sigs に登録されていないメソッドの型推論
/// - PhiTypeResolver が依存する base 定義の型情報を提供
fn infer_boxcall_return_type(
&self,
box_val: super::ValueId,
method: &str,
) -> Option<super::MirType> {
use crate::runtime::{CoreBoxId, CoreMethodId};
// 1. box_val の型を取得
let box_ty = self.value_types.get(&box_val)?;
// 2. Box 型名を取得
let box_name = match box_ty {
super::MirType::Box(name) => name.as_str(),
super::MirType::String => "StringBox", // String → StringBox として扱う
_ => return None,
};
// 3. Phase 87: CoreBoxId/CoreMethodId による型安全な型推論
let box_id = CoreBoxId::from_name(box_name)?;
let method_id = CoreMethodId::from_box_and_method(box_id, method);
if let Some(method_id) = method_id {
// CoreMethodId で定義されたメソッドの戻り値型
let type_name = method_id.return_type_name();
return Some(match type_name {
"StringBox" => super::MirType::Box("StringBox".to_string()),
"IntegerBox" => super::MirType::Box("IntegerBox".to_string()),
"BoolBox" => super::MirType::Box("BoolBox".to_string()),
"ArrayBox" => super::MirType::Box("ArrayBox".to_string()),
"FileBox" => super::MirType::Box("FileBox".to_string()),
"Void" => super::MirType::Void,
"Unknown" => super::MirType::Unknown,
_ => super::MirType::Unknown,
});
}
// 4. CoreMethodId で未定義のメソッドStage1Cli 等の特殊 Box
if box_name == "Stage1CliBox" && matches!(method, "parse" | "compile" | "execute") {
return Some(super::MirType::Unknown);
}
// 5. Result-like Box の汎用メソッドQMark 用)
if method == "isOk" {
return Some(super::MirType::Box("BoolBox".to_string()));
}
if method == "getValue" {
return Some(super::MirType::Unknown); // Result<T> の T
}
// 6. 未知のメソッド → Unknown として登録None を返すとPhiTypeResolverが使えない
if std::env::var("NYASH_BOXCALL_TYPE_DEBUG").ok().as_deref() == Some("1") {
eprintln!(
"[boxcall_type] unknown method {}.{} → Unknown",
box_name, method
);
}
Some(super::MirType::Unknown)
}
/// Emit a Box method call or plugin call (unified BoxCall)
pub(super) fn emit_box_or_plugin_call(
&mut self,
dst: Option<super::ValueId>,
box_val: super::ValueId,
method: String,
method_id: Option<u16>,
args: Vec<super::ValueId>,
effects: super::EffectMask,
) -> Result<(), String> {
// Ensure receiver has a definition in the current block to avoid undefined use across
// block boundaries (LoopForm/header, if-joins, etc.).
// LocalSSA: ensure receiver has an in-block definition (kind=0 = recv)
let box_val = self.local_recv(box_val);
// LocalSSA: ensure args are materialized in current block
let args: Vec<super::ValueId> = args.into_iter().map(|a| self.local_arg(a)).collect();
// Check environment variable for unified call usage, with safe overrides for core/user boxes
let use_unified_env = super::calls::call_unified::is_unified_call_enabled();
// First, try to determine the box type
let mut box_type: Option<String> = self.value_origin_newbox.get(&box_val).cloned();
if box_type.is_none() {
if let Some(t) = self.value_types.get(&box_val) {
match t {
super::MirType::String => box_type = Some("StringBox".to_string()),
super::MirType::Box(name) => box_type = Some(name.clone()),
_ => {}
}
}
}
// Route decision is centralized in RouterPolicyBox仕様不変
let bx_name = box_type.clone().unwrap_or_else(|| "UnknownBox".to_string());
let route = crate::mir::builder::router::policy::choose_route(
&bx_name,
&method,
crate::mir::definitions::call_unified::TypeCertainty::Union,
args.len(),
);
if super::utils::builder_debug_enabled()
|| std::env::var("NYASH_LOCAL_SSA_TRACE").ok().as_deref() == Some("1")
{
if matches!(
method.as_str(),
"parse" | "substring" | "has_errors" | "length"
) {
eprintln!(
"[boxcall-decision] method={} bb={:?} recv=%{} class_hint={:?} prefer_legacy={}",
method,
self.current_block,
box_val.0,
box_type,
matches!(route, crate::mir::builder::router::policy::Route::BoxCall)
);
}
}
// Unified path from BoxCall helper is only allowed when we are not
// already in a BoxCall fallback originating from emit_unified_call.
// in_unified_boxcall_fallback is set by emit_unified_call's RouterPolicy
// guard when it has already decided that this call must be a BoxCall.
if use_unified_env
&& matches!(route, crate::mir::builder::router::policy::Route::Unified)
&& !self.in_unified_boxcall_fallback
{
let target = super::builder_calls::CallTarget::Method {
box_type,
method: method.clone(),
receiver: box_val,
};
return self.emit_unified_call(dst, target, args);
}
// Legacy implementation
self.emit_instruction(super::MirInstruction::BoxCall {
dst,
box_val,
method: method.clone(),
method_id,
args,
effects,
})?;
if let Some(d) = dst {
let mut recv_box: Option<String> = self.value_origin_newbox.get(&box_val).cloned();
if recv_box.is_none() {
if let Some(t) = self.value_types.get(&box_val) {
match t {
super::MirType::String => recv_box = Some("StringBox".to_string()),
super::MirType::Box(name) => recv_box = Some(name.clone()),
_ => {}
}
}
}
if let Some(bt) = recv_box {
if let Some(mt) = self.plugin_method_sigs.get(&(bt.clone(), method.clone())) {
self.value_types.insert(d, mt.clone());
} else {
// Phase 84-4-B: ビルトイン Box のメソッド戻り値型推論
// plugin_method_sigs に登録されていない場合のフォールバック
if let Some(ret_ty) = self.infer_boxcall_return_type(box_val, &method) {
self.value_types.insert(d, ret_ty.clone());
if std::env::var("NYASH_BOXCALL_TYPE_TRACE").ok().as_deref() == Some("1") {
eprintln!(
"[boxcall_type] registered %{} = BoxCall(%{}, {}) → {:?}",
d.0, box_val.0, method, ret_ty
);
}
}
}
}
}
Ok(())
}
#[allow(dead_code)]
pub(super) fn emit_type_check(
&mut self,
value: super::ValueId,
expected_type: String,
) -> Result<super::ValueId, String> {
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::TypeOp {
dst,
op: TypeOpKind::Check,
value,
ty: super::MirType::Box(expected_type),
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_cast(
&mut self,
value: super::ValueId,
target_type: super::MirType,
) -> Result<super::ValueId, String> {
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::TypeOp {
dst,
op: TypeOpKind::Cast,
value,
ty: target_type.clone(),
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_weak_new(
&mut self,
box_val: super::ValueId,
) -> Result<super::ValueId, String> {
if crate::config::env::mir_core13_pure() {
return Ok(box_val);
}
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::WeakRef {
dst,
op: WeakRefOp::New,
value: box_val,
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_weak_load(
&mut self,
weak_ref: super::ValueId,
) -> Result<super::ValueId, String> {
if crate::config::env::mir_core13_pure() {
return Ok(weak_ref);
}
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::WeakRef {
dst,
op: WeakRefOp::Load,
value: weak_ref,
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_barrier_read(&mut self, ptr: super::ValueId) -> Result<(), String> {
self.emit_instruction(super::MirInstruction::Barrier {
op: BarrierOp::Read,
ptr,
})
}
#[allow(dead_code)]
pub(super) fn emit_barrier_write(&mut self, ptr: super::ValueId) -> Result<(), String> {
self.emit_instruction(super::MirInstruction::Barrier {
op: BarrierOp::Write,
ptr,
})
}
/// Pin a block-crossing ephemeral value into a pseudo local slot and register it in variable_map
/// so it participates in PHI merges across branches/blocks. Safe default for correctness-first.
pub(crate) fn pin_to_slot(
&mut self,
v: super::ValueId,
hint: &str,
) -> Result<super::ValueId, String> {
self.temp_slot_counter = self.temp_slot_counter.wrapping_add(1);
let slot_name = format!("__pin${}${}", self.temp_slot_counter, hint);
// Phase 25.1b: Use function-local ID allocator to avoid SSA verification failures
let dst = if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context: use local ID
} else {
self.value_gen.next() // Module context: use global ID
};
self.emit_instruction(super::MirInstruction::Copy { dst, src: v })?;
if super::utils::builder_debug_enabled()
|| std::env::var("NYASH_PIN_TRACE").ok().as_deref() == Some("1")
{
super::utils::builder_debug_log(&format!(
"pin slot={} src={} dst={}",
slot_name, v.0, dst.0
));
}
// Propagate lightweight metadata so downstream resolution/type inference remains stable
crate::mir::builder::metadata::propagate::propagate(self, v, dst);
// Remember pin slot name for both the original and the pinned value.
// LocalSSA uses this to redirect old pinned values to the latest slot value.
self.pin_slot_names.insert(v, slot_name.clone());
self.pin_slot_names.insert(dst, slot_name.clone());
self.variable_map.insert(slot_name, dst);
Ok(dst)
}
/// Ensure a value has a local definition in the current block by inserting a Copy.
#[allow(dead_code)]
pub(crate) fn materialize_local(
&mut self,
v: super::ValueId,
) -> Result<super::ValueId, String> {
// Phase 25.1b: Use function-local ID allocator to avoid SSA verification failures
let dst = if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context: use local ID
} else {
self.value_gen.next() // Module context: use global ID
};
self.emit_instruction(super::MirInstruction::Copy { dst, src: v })?;
// Propagate metadata (type/origin) from source to the new local copy
crate::mir::builder::metadata::propagate::propagate(self, v, dst);
Ok(dst)
}
/// Insert a Copy immediately after PHI nodes in the current block (position-stable).
#[allow(dead_code)]
pub(crate) fn insert_copy_after_phis(
&mut self,
dst: super::ValueId,
src: super::ValueId,
) -> Result<(), String> {
if let (Some(ref mut function), Some(bb)) = (&mut self.current_function, self.current_block)
{
if std::env::var("NYASH_SCHEDULE_TRACE").ok().as_deref() == Some("1") {
eprintln!(
"[utils/insert-copy-after-phis] bb={:?} dst=%{} src=%{} attempting...",
bb, dst.0, src.0
);
}
if let Some(block) = function.get_block_mut(bb) {
if std::env::var("NYASH_SCHEDULE_TRACE").ok().as_deref() == Some("1") {
eprintln!("[utils/insert-copy-after-phis] bb={:?} dst=%{} src=%{} phi_count={} SUCCESS",
bb, dst.0, src.0, block.phi_instructions().count());
}
// Propagate effects on the block
block.insert_spanned_after_phis(SpannedInstruction {
inst: super::MirInstruction::Copy { dst, src },
span: self.current_span,
});
// Lightweight metadata propagation (unified)
crate::mir::builder::metadata::propagate::propagate(self, src, dst);
return Ok(());
} else {
if std::env::var("NYASH_SCHEDULE_TRACE").ok().as_deref() == Some("1") {
eprintln!("[utils/insert-copy-after-phis] bb={:?} dst=%{} src=%{} FAILED: block not found",
bb, dst.0, src.0);
}
}
}
Err("No current function/block to insert copy".to_string())
}
/// Ensure a value is safe to use in the current block by slotifying (pinning) it.
/// Currently correctness-first: always pin to get a block-local def and PHI participation.
pub(crate) fn ensure_slotified_for_use(
&mut self,
v: super::ValueId,
hint: &str,
) -> Result<super::ValueId, String> {
self.pin_to_slot(v, hint)
}
/// Local SSA: ensure a value has a definition in the current block and cache it per-block.
/// kind: 0 = recv (reserved for args in future)
pub(crate) fn local_ssa_ensure(&mut self, v: super::ValueId, kind: u8) -> super::ValueId {
use super::ssa::local::{ensure, LocalKind};
let lk = match kind {
0 => LocalKind::Recv,
1 => LocalKind::Arg,
2 => LocalKind::CompareOperand,
4 => LocalKind::Cond,
x => LocalKind::Other(x),
};
ensure(self, v, lk)
}
}