Files
hakorune/src/mir/builder/utils.rs

449 lines
18 KiB
Rust
Raw Normal View History

use super::{BasicBlock, BasicBlockId};
use crate::mir::{BarrierOp, SpannedInstruction, TypeOpKind, WeakRefOp};
use std::sync::atomic::{AtomicUsize, Ordering};
feat: using構文完全実装&json_native大幅進化 ## 🎉 using構文の完全実装(ChatGPT作業) - ✅ **include → using移行完了**: 全ファイルでusing構文に統一 - `local X = include` → `using "path" as X` - 約70ファイルを一括変換 - ✅ **AST/パーサー/MIR完全対応**: using専用処理実装 - ASTNode::Using追加 - MIRビルダーでの解決処理 - include互換性も維持 ## 🚀 json_native実装進化(ChatGPT追加実装) - ✅ **浮動小数点対応追加**: is_float/parse_float実装 - ✅ **配列/オブジェクトパーサー実装**: parse_array/parse_object完成 - ✅ **エスケープ処理強化**: Unicode対応、全制御文字サポート - ✅ **StringUtils大幅拡張**: 文字列操作メソッド多数追加 - contains, index_of_string, split, join等 - 大文字小文字変換(全アルファベット対応) ## 💡 MIR SIMD & ハイブリッド戦略考察 - **MIR15 SIMD命令案**: SimdLoad/SimdScan等の新命令セット - **C ABIハイブリッド**: ホットパスのみC委託で10倍速化可能 - **並行処理でyyjson超え**: 100KB以上で2-10倍速の可能性 - **3層アーキテクチャ**: Nyash層/MIR層/C ABI層の美しい分離 ## 📊 技術的成果 - using構文により名前空間管理が明確化 - json_nativeが実用レベルに接近(完成度25%→40%) - 将来的にyyjsonの70%速度達成可能と判明 ChatGPT爆速実装×Claude深い考察の完璧な協働! 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-25 00:41:56 +09:00
// include path resolver removed (using handles modules)
// Optional builder debug logging
pub(super) fn builder_debug_enabled() -> bool {
std::env::var("NYASH_BUILDER_DEBUG").is_ok()
}
static BUILDER_DEBUG_COUNT: AtomicUsize = AtomicUsize::new(0);
pub(super) fn builder_debug_log(msg: &str) {
if builder_debug_enabled() {
// Optional cap: limit the number of builder debug lines to avoid flooding the terminal.
// Set via env: NYASH_BUILDER_DEBUG_LIMIT=<N> (default: unlimited)
if let Ok(cap_s) = std::env::var("NYASH_BUILDER_DEBUG_LIMIT") {
if let Ok(cap) = cap_s.parse::<usize>() {
let n = BUILDER_DEBUG_COUNT.fetch_add(1, Ordering::Relaxed);
if n >= cap {
return;
}
}
}
eprintln!("[BUILDER] {}", msg);
}
}
impl super::MirBuilder {
// ---- Value ID allocation (function-local or module-global) ----
/// Allocate a new ValueId in the appropriate context
/// - Inside function: uses function-local allocator
/// - Outside function: uses module-global allocator
#[inline]
fix(loop/phi): loop header pinned receiver PHI の未定義ValueId解消 **問題**: TestArgs.process/1 で `Invalid value: use of undefined value ValueId(14)` - loop条件でのmethod call(args.length())がpin_to_slotで pinned receiver作成 - header PHIが未定義のValueIdを参照していた **根本原因**: - pinned変数のheader PHI作成時、`preheader_value = value` として header blockで作成された値を使用 - 正しくは preheader block の元値を参照すべき **修正内容**: 1. **find_copy_source ヘルパー追加** (src/mir/loop_builder.rs:50-80) - Copy命令を遡ってpreheaderの元値を特定 - NYASH_LOOP_TRACE=1 でデバッグ出力 2. **pinned変数PHI作成ロジック強化** (lines 368-410) - NEW pinned変数: find_copy_source()で正しいpreheader値取得 - INHERITED pinned変数: pre_vars_snapshot から値取得 - PHI inputs に正しい preheader_value を設定 3. **LoopFormOps::new_value修正** (lines 1122-1127) - value_gen.next() → next_value_id() に統一 - 関数ローカルアロケーター使用でValueId整合性確保 4. **next_value_id可視性拡大** (src/mir/builder/utils.rs:33) - pub(super) → pub(crate) でループビルダーから使用可能に **テスト結果**: ✅ Test1 (Direct VM): PASS ✅ Test3 (MIR verify): PASS ⚠️ Test2 (Stage-B): ValueId(17)はif-block PHI問題(別issue) **残存問題**: - Stage-B の ValueId(17) はループ前のif-blockで発生 - if-block PHI reassignment (%0 = phi [%2, bb3]) の構造的問題 - 別タスクで対処予定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 06:31:31 +09:00
pub(crate) fn next_value_id(&mut self) -> super::ValueId {
if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context
} else {
self.value_gen.next() // Module context
}
}
// ---- LocalSSA convenience (readability helpers) ----
#[allow(dead_code)]
#[inline]
pub(crate) fn local_recv(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::recv(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_arg(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::arg(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_cmp_operand(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::cmp_operand(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_field_base(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::field_base(self, v)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn local_cond(&mut self, v: super::ValueId) -> super::ValueId {
super::ssa::local::cond(self, v)
}
/// Ensure a basic block exists in the current function
pub(crate) fn ensure_block_exists(&mut self, block_id: BasicBlockId) -> Result<(), String> {
if let Some(ref mut function) = self.current_function {
if !function.blocks.contains_key(&block_id) {
let block = BasicBlock::new(block_id);
function.add_block(block);
}
Ok(())
} else {
Err("No current function".to_string())
}
}
/// Start a new basic block and set as current
pub(crate) fn start_new_block(&mut self, block_id: BasicBlockId) -> Result<(), String> {
if let Some(ref mut function) = self.current_function {
if !function.blocks.contains_key(&block_id) {
function.add_block(BasicBlock::new(block_id));
}
self.current_block = Some(block_id);
// Local SSA cache is per-block; clear on block switch
self.local_ssa_map.clear();
// BlockSchedule materialize cache is per-block as well
self.schedule_mat_map.clear();
// Entry materialization for pinned slots: re-read from variable_map after PHIs are emitted.
// This ensures pinned slots reflect the correct PHI values in merge blocks.
//
// Strategy: Instead of emitting Copy instructions (which would be before PHIs),
// we simply update the variable_map to point to the current block's values.
// LoopBuilder and IfBuilder already update variable_map with PHI values, so
// pinned slots will automatically pick up the correct values.
//
// No action needed here - just clear caches.
if !self.suppress_pin_entry_copy_next {
// Cache clearing is already done above, so nothing more to do here.
// The key insight: pinned slot variables are part of variable_map,
// and LoopBuilder/IfBuilder already manage PHIs for ALL variables in variable_map,
// including pinned slots.
}
if false && !self.suppress_pin_entry_copy_next {
// Keep old code for reference
// First pass: copy all pin slots and remember old->new mapping
let names: Vec<String> = self.variable_map.keys().cloned().collect();
let mut pin_renames: Vec<(super::ValueId, super::ValueId)> = Vec::new();
for name in names.iter() {
if !name.starts_with("__pin$") {
continue;
}
if let Some(&src) = self.variable_map.get(name) {
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::Copy { dst, src })?;
crate::mir::builder::metadata::propagate::propagate(self, src, dst);
self.variable_map.insert(name.clone(), dst);
pin_renames.push((src, dst));
}
}
// Second pass: update any user variables that pointed to old pin ids to the new ones
if !pin_renames.is_empty() {
let snapshot: Vec<(String, super::ValueId)> = self
.variable_map
.iter()
.filter(|(k, _)| !k.starts_with("__pin$"))
.map(|(k, &v)| (k.clone(), v))
.collect();
for (k, v) in snapshot.into_iter() {
if let Some((_, newv)) = pin_renames.iter().find(|(oldv, _)| *oldv == v) {
self.variable_map.insert(k, *newv);
}
}
}
}
// Reset suppression flag after use (one-shot)
self.suppress_pin_entry_copy_next = false;
Ok(())
} else {
Err("No current function".to_string())
}
}
}
impl super::MirBuilder {
/// Emit a Box method call or plugin call (unified BoxCall)
pub(super) fn emit_box_or_plugin_call(
&mut self,
dst: Option<super::ValueId>,
box_val: super::ValueId,
method: String,
method_id: Option<u16>,
args: Vec<super::ValueId>,
effects: super::EffectMask,
) -> Result<(), String> {
// Ensure receiver has a definition in the current block to avoid undefined use across
// block boundaries (LoopForm/header, if-joins, etc.).
// LocalSSA: ensure receiver has an in-block definition (kind=0 = recv)
let box_val = self.local_recv(box_val);
// LocalSSA: ensure args are materialized in current block
let args: Vec<super::ValueId> = args.into_iter().map(|a| self.local_arg(a)).collect();
// Check environment variable for unified call usage, with safe overrides for core/user boxes
let use_unified_env = super::calls::call_unified::is_unified_call_enabled();
// First, try to determine the box type
let mut box_type: Option<String> = self.value_origin_newbox.get(&box_val).cloned();
if box_type.is_none() {
if let Some(t) = self.value_types.get(&box_val) {
match t {
super::MirType::String => box_type = Some("StringBox".to_string()),
super::MirType::Box(name) => box_type = Some(name.clone()),
_ => {}
}
}
}
// Route decision is centralized in RouterPolicyBox仕様不変
let bx_name = box_type.clone().unwrap_or_else(|| "UnknownBox".to_string());
let route = crate::mir::builder::router::policy::choose_route(
&bx_name,
&method,
crate::mir::definitions::call_unified::TypeCertainty::Union,
args.len(),
);
if super::utils::builder_debug_enabled()
|| std::env::var("NYASH_LOCAL_SSA_TRACE").ok().as_deref() == Some("1")
{
if matches!(
method.as_str(),
"parse" | "substring" | "has_errors" | "length"
) {
eprintln!(
"[boxcall-decision] method={} bb={:?} recv=%{} class_hint={:?} prefer_legacy={}",
method,
self.current_block,
box_val.0,
box_type,
matches!(route, crate::mir::builder::router::policy::Route::BoxCall)
);
}
}
fix(builder): 修正案A実装 - emit_unified_call↔emit_box_or_plugin_call再入防止 🎯 無限再帰の構造的防止(修正案A採用) ## 問題 Phase 2リファクタリング後、stack overflow発生: ``` emit_unified_call (emit.rs:15) ↓ emit_box_or_plugin_call (utils.rs:136) ↓ line 190 emit_unified_call (emit.rs:15) ← 無限ループ! ``` ## 修正案A: 再入防止ガード(採用理由) - B(Math機能削除): 対処療法で仕様削減 ❌ - C("birth"特別扱い): 局所的修正で他Boxに波及 ❌ - A(構造的再入防止): 根治的アプローチ ✅ ## 実装内容 ### 1. MirBuilder にフラグ追加 ```rust pub(super) in_unified_boxcall_fallback: bool ``` 役割: RouterPolicyでRoute::BoxCallと決めたフォールバック中マーク ### 2. emit_unified_call 側修正 (emit.rs) ```rust // Route::BoxCall のときだけ self.in_unified_boxcall_fallback = true; emit_box_or_plugin_call(...); self.in_unified_boxcall_fallback = false; ``` ### 3. emit_box_or_plugin_call 側修正 (utils.rs) ```rust if use_unified_env && matches!(route, Route::Unified) && !self.in_unified_boxcall_fallback // ← 追加 { // emit_unified_call(...) への再入を防止 } ``` ## 構造的改善 - RouterPolicyBox の決定を優先 - emit_unified_call → emit_box_or_plugin_call の一方向化 - 「上位の決定を尊重する」という明確なルール ## 残存課題 ⚠️ まだstack overflowが残存(別の再帰ルート存在の可能性) → 次のステップでstack trace解析が必要 ## テスト状況 - Test 1 (Direct VM): ✅ 成功 - Test 2 (Stage-B): ❌ stack overflow(別ルート調査中) - Test 3 (MIR verification): ✅ 成功 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 17:31:09 +09:00
// Unified path from BoxCall helper is only allowed when we are not
// already in a BoxCall fallback originating from emit_unified_call.
// in_unified_boxcall_fallback is set by emit_unified_call's RouterPolicy
// guard when it has already decided that this call must be a BoxCall.
if use_unified_env
&& matches!(route, crate::mir::builder::router::policy::Route::Unified)
&& !self.in_unified_boxcall_fallback
{
let target = super::builder_calls::CallTarget::Method {
box_type,
method: method.clone(),
receiver: box_val,
};
return self.emit_unified_call(dst, target, args);
}
// Legacy implementation
self.emit_instruction(super::MirInstruction::BoxCall {
dst,
box_val,
method: method.clone(),
method_id,
args,
effects,
})?;
if let Some(d) = dst {
let mut recv_box: Option<String> = self.value_origin_newbox.get(&box_val).cloned();
if recv_box.is_none() {
if let Some(t) = self.value_types.get(&box_val) {
match t {
super::MirType::String => recv_box = Some("StringBox".to_string()),
super::MirType::Box(name) => recv_box = Some(name.clone()),
_ => {}
}
}
}
if let Some(bt) = recv_box {
if let Some(mt) = self.plugin_method_sigs.get(&(bt.clone(), method.clone())) {
self.value_types.insert(d, mt.clone());
} else {
// Phase 15.5: Unified plugin-based type resolution
// Former core boxes (StringBox, ArrayBox, MapBox) now use plugin_method_sigs only
// No special hardcoded inference - all boxes treated uniformly
}
}
}
Ok(())
}
#[allow(dead_code)]
pub(super) fn emit_type_check(
&mut self,
value: super::ValueId,
expected_type: String,
) -> Result<super::ValueId, String> {
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::TypeOp {
dst,
op: TypeOpKind::Check,
value,
ty: super::MirType::Box(expected_type),
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_cast(
&mut self,
value: super::ValueId,
target_type: super::MirType,
) -> Result<super::ValueId, String> {
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::TypeOp {
dst,
op: TypeOpKind::Cast,
value,
ty: target_type.clone(),
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_weak_new(
&mut self,
box_val: super::ValueId,
) -> Result<super::ValueId, String> {
if crate::config::env::mir_core13_pure() {
return Ok(box_val);
}
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::WeakRef {
dst,
op: WeakRefOp::New,
value: box_val,
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_weak_load(
&mut self,
weak_ref: super::ValueId,
) -> Result<super::ValueId, String> {
if crate::config::env::mir_core13_pure() {
return Ok(weak_ref);
}
let dst = self.next_value_id();
self.emit_instruction(super::MirInstruction::WeakRef {
dst,
op: WeakRefOp::Load,
value: weak_ref,
})?;
Ok(dst)
}
#[allow(dead_code)]
pub(super) fn emit_barrier_read(&mut self, ptr: super::ValueId) -> Result<(), String> {
self.emit_instruction(super::MirInstruction::Barrier {
op: BarrierOp::Read,
ptr,
})
}
#[allow(dead_code)]
pub(super) fn emit_barrier_write(&mut self, ptr: super::ValueId) -> Result<(), String> {
self.emit_instruction(super::MirInstruction::Barrier {
op: BarrierOp::Write,
ptr,
})
}
/// Pin a block-crossing ephemeral value into a pseudo local slot and register it in variable_map
/// so it participates in PHI merges across branches/blocks. Safe default for correctness-first.
pub(crate) fn pin_to_slot(
&mut self,
v: super::ValueId,
hint: &str,
) -> Result<super::ValueId, String> {
self.temp_slot_counter = self.temp_slot_counter.wrapping_add(1);
let slot_name = format!("__pin${}${}", self.temp_slot_counter, hint);
// Phase 25.1b: Use function-local ID allocator to avoid SSA verification failures
let dst = if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context: use local ID
} else {
self.value_gen.next() // Module context: use global ID
};
self.emit_instruction(super::MirInstruction::Copy { dst, src: v })?;
if super::utils::builder_debug_enabled()
|| std::env::var("NYASH_PIN_TRACE").ok().as_deref() == Some("1")
{
super::utils::builder_debug_log(&format!(
"pin slot={} src={} dst={}",
slot_name, v.0, dst.0
));
}
// Propagate lightweight metadata so downstream resolution/type inference remains stable
crate::mir::builder::metadata::propagate::propagate(self, v, dst);
// Remember pin slot name for both the original and the pinned value.
// LocalSSA uses this to redirect old pinned values to the latest slot value.
self.pin_slot_names.insert(v, slot_name.clone());
self.pin_slot_names.insert(dst, slot_name.clone());
self.variable_map.insert(slot_name, dst);
Ok(dst)
}
/// Ensure a value has a local definition in the current block by inserting a Copy.
#[allow(dead_code)]
pub(crate) fn materialize_local(
&mut self,
v: super::ValueId,
) -> Result<super::ValueId, String> {
// Phase 25.1b: Use function-local ID allocator to avoid SSA verification failures
let dst = if let Some(ref mut f) = self.current_function {
f.next_value_id() // Function context: use local ID
} else {
self.value_gen.next() // Module context: use global ID
};
self.emit_instruction(super::MirInstruction::Copy { dst, src: v })?;
// Propagate metadata (type/origin) from source to the new local copy
crate::mir::builder::metadata::propagate::propagate(self, v, dst);
Ok(dst)
}
/// Insert a Copy immediately after PHI nodes in the current block (position-stable).
#[allow(dead_code)]
pub(crate) fn insert_copy_after_phis(
&mut self,
dst: super::ValueId,
src: super::ValueId,
) -> Result<(), String> {
if let (Some(ref mut function), Some(bb)) = (&mut self.current_function, self.current_block)
{
if std::env::var("NYASH_SCHEDULE_TRACE").ok().as_deref() == Some("1") {
eprintln!(
"[utils/insert-copy-after-phis] bb={:?} dst=%{} src=%{} attempting...",
bb, dst.0, src.0
);
}
if let Some(block) = function.get_block_mut(bb) {
if std::env::var("NYASH_SCHEDULE_TRACE").ok().as_deref() == Some("1") {
eprintln!("[utils/insert-copy-after-phis] bb={:?} dst=%{} src=%{} phi_count={} SUCCESS",
bb, dst.0, src.0, block.phi_instructions().count());
}
// Propagate effects on the block
block.insert_spanned_after_phis(SpannedInstruction {
inst: super::MirInstruction::Copy { dst, src },
span: self.current_span,
});
// Lightweight metadata propagation (unified)
crate::mir::builder::metadata::propagate::propagate(self, src, dst);
return Ok(());
} else {
if std::env::var("NYASH_SCHEDULE_TRACE").ok().as_deref() == Some("1") {
eprintln!("[utils/insert-copy-after-phis] bb={:?} dst=%{} src=%{} FAILED: block not found",
bb, dst.0, src.0);
}
}
}
Err("No current function/block to insert copy".to_string())
}
/// Ensure a value is safe to use in the current block by slotifying (pinning) it.
/// Currently correctness-first: always pin to get a block-local def and PHI participation.
pub(crate) fn ensure_slotified_for_use(
&mut self,
v: super::ValueId,
hint: &str,
) -> Result<super::ValueId, String> {
self.pin_to_slot(v, hint)
}
/// Local SSA: ensure a value has a definition in the current block and cache it per-block.
/// kind: 0 = recv (reserved for args in future)
pub(crate) fn local_ssa_ensure(&mut self, v: super::ValueId, kind: u8) -> super::ValueId {
use super::ssa::local::{ensure, LocalKind};
let lk = match kind {
0 => LocalKind::Recv,
1 => LocalKind::Arg,
2 => LocalKind::CompareOperand,
4 => LocalKind::Cond,
x => LocalKind::Other(x),
};
ensure(self, v, lk)
}
}