diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 6cf0735b..422d88aa 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -5,7 +5,50 @@ Summary - Keep fallbacks minimal; fix MIR annotations first. - ExternCall(console/debug) auto‑selects ptr/handle by IR type. - StringBox NewBox i8* fast path; print/log choose automatically. - - Implement multi-function lowering and Call lowering for MIR14. +- Implement multi-function lowering and Call lowering for MIR14. + +Update — 2025-09-12 (LLVM flow + BB naming) +- codegen/mod.rs match arms cleanup: + - BinOp: unify to instructions::lower_binop(...) + - BoxCall: delegate solely to instructions::lower_boxcall(...); removed unreachable legacy code +- BasicBlock naming made function‑scoped and unique: create_basic_blocks now prefixes with function label (e.g., Main_join_2_bb23) +- Terminator fallback: when a MIR block lacks a terminator, emit a conservative jump to the next block (or entry if last) +- Build: cargo build --features llvm passes +- AOT emit status: improved (bb name collision resolved), but verifier still flags a missing terminator in Main.esc_json/1 (e.g., Main_esc_json_1_bb88) + - Likely cause: flow lowering edge case; MIR dump is correct, LLVM lowering missed a terminator + - Plan below addresses this with hardened flow lowering and instrumentation + +Hot Update — 2025-09-12 (quick) +- Flow: moved function verify to post‑lower; added terminator fallback only when MIR lacks one +- Compare: allow ptr↔int comparisons for all ops via ptr→i64 bridge +- Strings: substring now accepts i64(handle) receiver (i2p); len/lastIndexOf stable +- Arrays: method_id未注入でも get/set/push/length を NyRT 経由で処理 +- BoxCall: println→env.console.log フォールバック、同モジュール関数呼び出し/名前指定 invoke_by_name 経路を追加 +- PHI: 文字列/Box等を含む場合は i8* を選択する型推定に改善 +- 現在のブロッカー: esc_json/1 で「phi incoming value missing」 + - 対応: emit_jump/emit_branch の incoming 配線をログ付きで点検し、値未定義箇所(by‑name/fast‑path戻り)を補完 + +Plan — PHI/SSA Hardening (Sealed SSA) +- Sealed SSA 入れ替え(安全に段階導入) + - Blockごとに `sealed: bool` と `incomplete_phis: Map` を保持 + - 値取得APIを一本化: `value_at_end_of_block(var, bb)` で vmap を再帰解決+仮PHI生成 + - `seal(bb)` で pred 確定後に incomplete_phis を埋め、`fold_trivial_phi` で単純化 +- 配線の方向を整理 + - emit_jump/emit_branch では直接 incoming を配線しない(to 側で必要時に解決) + - fast/slow 両レーンは同じ Var に書く(合流で拾えるように) +- 型の前処理を一箇所へ + - Bool は i1 固定(必要なら zext は PHI 外側) + - ptr/int 混在禁止。必要箇所で ptrtoint/inttoptr を生成し、PHI では等型を保証 +- 計測と検証 + - `seal(bb)` 時に incomplete_phis が残れば panic(場所特定) + - `[PHI] add incoming var=.. pred=.. ty=..` をデバッグ出力 + - verify は関数降下完了後に 1 回 +- フォールバックのゲート + - by-name invoke は環境変数で明示ON(デフォルトOFF)にする方針に切替(ノイズ低減) + +Refactor Policy +- 慌てず小さな箱(モジュール)を積む。必要なら随時リファクタリングOK。 +- 代替案(必要時のみ): llvmlite の薄層で最低限の命令面を実装し、dep_tree_min_string を先に通す。 Done (today) - BoxCall legacy block removed in LLVM codegen; delegation only. @@ -41,6 +84,16 @@ Refactor — LLVM codegen instructions modularized (done) Next (short, focused) - Call Lowering の分離(完了): `instructions/call.rs` に分割し、`mod.rs` から委譲。 - 多関数 Lower 検証: selfhost minimal(dep_tree_min_string)を LLVM で通す(必要なら型注釈の微調整)。 +- Flow lowering hardening (in progress next): + - Ensure every lowered block has a terminator; use builder.get_insert_block().get_terminator() guard before fallback + - Instrument per‑block lowering (bid, has terminator?, emitted kind) to isolate misses + - Keep fallback minimal and only when MIR.block.terminator is None and LLVM has no terminator +- MIR readable debug tools: + - Add --dump-mir-readable to print Nyash‑like pseudo code per function/block + - Optional DOT output (follow‑up) +- Debug hints in MIR (debug builds only): + - Add #[cfg(debug_assertions)] fields like MirInstruction::debug_hint and MirMetadata::block_sources + - Gate emission by env (NYASH_MIR_DEBUG=1) - Map コア専用エントリ化(env.box.new 特例整理)と代表スモークの常時化(CI) - types.rs の将来分割(任意): - `types/convert.rs`(i64<->ptr, f64→box), `types/classify.rs`, `types/map_types.rs` diff --git a/src/backend/llvm/compiler/codegen/instructions/arith.rs b/src/backend/llvm/compiler/codegen/instructions/arith.rs index 43a04e08..01cf09e7 100644 --- a/src/backend/llvm/compiler/codegen/instructions/arith.rs +++ b/src/backend/llvm/compiler/codegen/instructions/arith.rs @@ -148,48 +148,44 @@ pub(in super::super) fn lower_compare<'ctx>( } } else if let (BasicValueEnum::PointerValue(lp), BasicValueEnum::IntValue(ri)) = (lv, rv) { use CompareOp as C; - match op { - C::Eq | C::Ne => { - let i64t = codegen.context.i64_type(); - let li = codegen - .builder - .build_ptr_to_int(lp, i64t, "pi_l") - .map_err(|e| e.to_string())?; - let pred = if matches!(op, C::Eq) { - inkwell::IntPredicate::EQ - } else { - inkwell::IntPredicate::NE - }; - codegen - .builder - .build_int_compare(pred, li, ri, "pcmpi") - .map_err(|e| e.to_string())? - .into() - } - _ => return Err("unsupported pointer-int comparison (only Eq/Ne)".to_string()), - } + let i64t = codegen.context.i64_type(); + let li = codegen + .builder + .build_ptr_to_int(lp, i64t, "pi_l") + .map_err(|e| e.to_string())?; + let pred = match op { + C::Eq => inkwell::IntPredicate::EQ, + C::Ne => inkwell::IntPredicate::NE, + C::Lt => inkwell::IntPredicate::SLT, + C::Le => inkwell::IntPredicate::SLE, + C::Gt => inkwell::IntPredicate::SGT, + C::Ge => inkwell::IntPredicate::SGE, + }; + codegen + .builder + .build_int_compare(pred, li, ri, "pcmpi") + .map_err(|e| e.to_string())? + .into() } else if let (BasicValueEnum::IntValue(li), BasicValueEnum::PointerValue(rp)) = (lv, rv) { use CompareOp as C; - match op { - C::Eq | C::Ne => { - let i64t = codegen.context.i64_type(); - let ri = codegen - .builder - .build_ptr_to_int(rp, i64t, "pi_r") - .map_err(|e| e.to_string())?; - let pred = if matches!(op, C::Eq) { - inkwell::IntPredicate::EQ - } else { - inkwell::IntPredicate::NE - }; - codegen - .builder - .build_int_compare(pred, li, ri, "pcmpi") - .map_err(|e| e.to_string())? - .into() - } - _ => return Err("unsupported int-pointer comparison (only Eq/Ne)".to_string()), - } + let i64t = codegen.context.i64_type(); + let ri = codegen + .builder + .build_ptr_to_int(rp, i64t, "pi_r") + .map_err(|e| e.to_string())?; + let pred = match op { + C::Eq => inkwell::IntPredicate::EQ, + C::Ne => inkwell::IntPredicate::NE, + C::Lt => inkwell::IntPredicate::SLT, + C::Le => inkwell::IntPredicate::SLE, + C::Gt => inkwell::IntPredicate::SGT, + C::Ge => inkwell::IntPredicate::SGE, + }; + codegen + .builder + .build_int_compare(pred, li, ri, "pcmpi") + .map_err(|e| e.to_string())? + .into() } else { return Err("compare type mismatch".to_string()); }; diff --git a/src/backend/llvm/compiler/codegen/instructions/arrays.rs b/src/backend/llvm/compiler/codegen/instructions/arrays.rs index 6749aad0..0688fcfb 100644 --- a/src/backend/llvm/compiler/codegen/instructions/arrays.rs +++ b/src/backend/llvm/compiler/codegen/instructions/arrays.rs @@ -17,7 +17,8 @@ pub(super) fn try_handle_array_method<'ctx>( recv_h: inkwell::values::IntValue<'ctx>, ) -> Result { // Only when receiver is ArrayBox - let is_array = matches!(func.metadata.value_types.get(box_val), Some(crate::mir::MirType::Box(b)) if b == "ArrayBox"); + let is_array = matches!(func.metadata.value_types.get(box_val), Some(crate::mir::MirType::Box(b)) if b == "ArrayBox") + || matches!(method, "get" | "set" | "push" | "length"); if !is_array { return Ok(false); } @@ -127,4 +128,3 @@ pub(super) fn try_handle_array_method<'ctx>( _ => Ok(false), } } - diff --git a/src/backend/llvm/compiler/codegen/instructions/blocks.rs b/src/backend/llvm/compiler/codegen/instructions/blocks.rs index b9d9288a..24dcd2f4 100644 --- a/src/backend/llvm/compiler/codegen/instructions/blocks.rs +++ b/src/backend/llvm/compiler/codegen/instructions/blocks.rs @@ -3,6 +3,7 @@ use inkwell::values::{BasicValueEnum, FunctionValue, PhiValue}; use std::collections::HashMap; use crate::backend::llvm::context::CodegenContext; +use super::super::types::map_mirtype_to_basic; use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; // Small, safe extraction: create LLVM basic blocks for a MIR function and @@ -59,7 +60,19 @@ pub(in super::super) fn precreate_phis<'ctx>( { if let crate::mir::instruction::MirInstruction::Phi { dst, inputs } = inst { let mut phi_ty: Option = None; + // Prefer pointer when any input (or dst) is String/Box/Array/Future/Unknown + let mut wants_ptr = false; if let Some(mt) = func.metadata.value_types.get(dst) { + wants_ptr |= matches!(mt, crate::mir::MirType::String | crate::mir::MirType::Box(_) | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown); + } + for (_, iv) in inputs.iter() { + if let Some(mt) = func.metadata.value_types.get(iv) { + wants_ptr |= matches!(mt, crate::mir::MirType::String | crate::mir::MirType::Box(_) | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown); + } + } + if wants_ptr { + phi_ty = Some(codegen.context.ptr_type(inkwell::AddressSpace::from(0)).into()); + } else if let Some(mt) = func.metadata.value_types.get(dst) { phi_ty = Some(map_mirtype_to_basic(codegen.context, mt)); } else if let Some((_, iv)) = inputs.first() { if let Some(mt) = func.metadata.value_types.get(iv) { diff --git a/src/backend/llvm/compiler/codegen/instructions/boxcall.rs b/src/backend/llvm/compiler/codegen/instructions/boxcall.rs index 11b501d4..3dc847d7 100644 --- a/src/backend/llvm/compiler/codegen/instructions/boxcall.rs +++ b/src/backend/llvm/compiler/codegen/instructions/boxcall.rs @@ -5,8 +5,10 @@ use inkwell::values::BasicValueEnum as BVE; use crate::backend::llvm::context::CodegenContext; mod fields; -mod invoke; +pub(crate) mod invoke; mod marshal; +use self::marshal as marshal_mod; +use self::invoke as invoke_mod; use crate::mir::{function::MirFunction, ValueId}; // BoxCall lowering (large): mirrors existing logic; kept in one function for now @@ -66,6 +68,11 @@ pub(in super::super) fn lower_boxcall<'ctx>( return Ok(()); } + // Console convenience: treat println as env.console.log + if method == "println" { + return super::externcall::lower_externcall(codegen, func, vmap, dst, &"env.console".to_string(), &"log".to_string(), args); + } + // getField/setField if fields::try_handle_field_method(codegen, vmap, dst, method, args, recv_h)? { return Ok(()); @@ -106,6 +113,107 @@ pub(in super::super) fn lower_boxcall<'ctx>( )?; return Ok(()); } else { + // Fallback: treat as direct call to a user function in the same module, if present. + // Compose candidate name like "./" (e.g., Main.esc_json/1) + let arity = args.len(); + let module_name = func + .signature + .name + .split('.') + .next() + .unwrap_or("") + .to_string(); + if !module_name.is_empty() { + let candidate = format!("{}.{}{}", module_name, method, format!("/{}", arity)); + // Sanitize symbol the same way as codegen/mod.rs does + let sym: String = { + let mut s = String::from("ny_f_"); + s.push_str(&candidate.replace('.', "_").replace('/', "_").replace('-', "_")); + s + }; + if let Some(callee) = codegen.module.get_function(&sym) { + let mut call_args: Vec = Vec::with_capacity(args.len()); + for a in args { + let v = *vmap.get(a).ok_or("boxcall func arg missing")?; + call_args.push(v.into()); + } + let call = codegen + .builder + .build_call(callee, &call_args, "user_meth_call") + .map_err(|e| e.to_string())?; + if let Some(d) = dst { + if let Some(rv) = call.try_as_basic_value().left() { + vmap.insert(*d, rv); + } + } + return Ok(()); + } + } + // Last resort: invoke plugin by name (host resolves method_id) + { + use crate::backend::llvm::compiler::codegen::instructions::boxcall::marshal::get_i64 as get_i64_any; + let i64t = codegen.context.i64_type(); + let argc = i64t.const_int(args.len() as u64, false); + let mname = codegen + .builder + .build_global_string_ptr(method, "meth_name") + .map_err(|e| e.to_string())?; + // up to 2 args for this minimal path + let a1 = if let Some(v0) = args.get(0) { get_i64_any(codegen, vmap, *v0)? } else { i64t.const_zero() }; + let a2 = if let Some(v1) = args.get(1) { get_i64_any(codegen, vmap, *v1)? } else { i64t.const_zero() }; + let fnty = i64t.fn_type( + &[ + i64t.into(), // recv handle + codegen.context.ptr_type(AddressSpace::from(0)).into(), // method cstr + i64t.into(), i64t.into(), i64t.into(), // argc, a1, a2 + ], + false, + ); + let callee = codegen + .module + .get_function("nyash.plugin.invoke_by_name_i64") + .unwrap_or_else(|| codegen.module.add_function("nyash.plugin.invoke_by_name_i64", fnty, None)); + let call = codegen + .builder + .build_call(callee, &[recv_h.into(), mname.as_pointer_value().into(), argc.into(), a1.into(), a2.into()], "pinvoke_by_name") + .map_err(|e| e.to_string())?; + if let Some(d) = dst { + let rv = call + .try_as_basic_value() + .left() + .ok_or("invoke_by_name returned void".to_string())?; + // Inline minimal return normalization similar to store_invoke_return() + if let Some(mt) = func.metadata.value_types.get(d) { + match mt { + crate::mir::MirType::Integer => { vmap.insert(*d, rv); } + crate::mir::MirType::Bool => { + if let BVE::IntValue(iv) = rv { + let i64t = codegen.context.i64_type(); + let zero = i64t.const_zero(); + let b1 = codegen.builder.build_int_compare(inkwell::IntPredicate::NE, iv, zero, "bool_i64_to_i1").map_err(|e| e.to_string())?; + vmap.insert(*d, b1.into()); + } else { vmap.insert(*d, rv); } + } + crate::mir::MirType::String => { + if let BVE::IntValue(iv) = rv { + let p = codegen.builder.build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "str_h2p_ret").map_err(|e| e.to_string())?; + vmap.insert(*d, p.into()); + } else { vmap.insert(*d, rv); } + } + crate::mir::MirType::Box(_) | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown => { + if let BVE::IntValue(iv) = rv { + let p = codegen.builder.build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "h2p_ret").map_err(|e| e.to_string())?; + vmap.insert(*d, p.into()); + } else { vmap.insert(*d, rv); } + } + _ => { vmap.insert(*d, rv); } + } + } else { + vmap.insert(*d, rv); + } + } + return Ok(()); + } Err(format!("BoxCall requires method_id for method '{}'. The method_id should be automatically injected during MIR compilation.", method)) } } diff --git a/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs b/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs index d779d959..269f297e 100644 --- a/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs +++ b/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs @@ -159,9 +159,23 @@ fn store_invoke_return<'ctx>( ) -> Result<(), String> { if let Some(mt) = func.metadata.value_types.get(&dst) { match mt { - crate::mir::MirType::Integer | crate::mir::MirType::Bool => { + crate::mir::MirType::Integer => { vmap.insert(dst, rv); } + crate::mir::MirType::Bool => { + // Normalize i64 bool (0/1) to i1 + if let BVE::IntValue(iv) = rv { + let i64t = codegen.context.i64_type(); + let zero = i64t.const_zero(); + let b1 = codegen + .builder + .build_int_compare(inkwell::IntPredicate::NE, iv, zero, "bool_i64_to_i1") + .map_err(|e| e.to_string())?; + vmap.insert(dst, b1.into()); + } else { + vmap.insert(dst, rv); + } + } crate::mir::MirType::String => { // keep as i64 handle vmap.insert(dst, rv); @@ -191,4 +205,3 @@ fn store_invoke_return<'ctx>( } Ok(()) } - diff --git a/src/backend/llvm/compiler/codegen/instructions/flow.rs b/src/backend/llvm/compiler/codegen/instructions/flow.rs index 180b6375..c9980628 100644 --- a/src/backend/llvm/compiler/codegen/instructions/flow.rs +++ b/src/backend/llvm/compiler/codegen/instructions/flow.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use crate::backend::llvm::context::CodegenContext; use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; -use super::super::types::to_bool; +use super::super::types::{to_bool, map_mirtype_to_basic}; pub(in super::super) fn emit_return<'ctx>( codegen: &CodegenContext<'ctx>, @@ -20,9 +20,22 @@ pub(in super::super) fn emit_return<'ctx>( } (_t, Some(vid)) => { let v = *vmap.get(vid).ok_or("ret value missing")?; + // If function expects a pointer but we have an integer handle, convert i64 -> ptr + let expected = map_mirtype_to_basic(codegen.context, &func.signature.return_type); + use inkwell::types::BasicTypeEnum as BT; + let v_adj = match (expected, v) { + (BT::PointerType(pt), BasicValueEnum::IntValue(iv)) => { + codegen + .builder + .build_int_to_ptr(iv, pt, "ret_i2p") + .map_err(|e| e.to_string())? + .into() + } + _ => v, + }; codegen .builder - .build_return(Some(&v)) + .build_return(Some(&v_adj)) .map_err(|e| e.to_string())?; Ok(()) } diff --git a/src/backend/llvm/compiler/codegen/instructions/strings.rs b/src/backend/llvm/compiler/codegen/instructions/strings.rs index d5100fd1..4258aaab 100644 --- a/src/backend/llvm/compiler/codegen/instructions/strings.rs +++ b/src/backend/llvm/compiler/codegen/instructions/strings.rs @@ -16,15 +16,13 @@ pub(super) fn try_handle_string_method<'ctx>( args: &[ValueId], recv_v: BVE<'ctx>, ) -> Result { - // Only act if receiver is annotated as String or StringBox + // Act if receiver is annotated as String/StringBox, or if the actual value is an i8* (string literal path) let is_string_recv = match func.metadata.value_types.get(box_val) { Some(crate::mir::MirType::String) => true, Some(crate::mir::MirType::Box(b)) if b == "StringBox" => true, - _ => false, + _ => matches!(recv_v, BVE::PointerValue(_)), }; - if !is_string_recv { - return Ok(false); - } + // Do not early-return; allow method-specific checks below to validate types // concat fast-paths if method == "concat" { @@ -153,9 +151,13 @@ pub(super) fn try_handle_string_method<'ctx>( } let i64t = codegen.context.i64_type(); let i8p = codegen.context.ptr_type(AddressSpace::from(0)); - // receiver must be i8* for this fast path + // receiver preferably i8*; if it's a handle (i64), conservatively cast to i8* let recv_p = match recv_v { BVE::PointerValue(p) => p, + BVE::IntValue(iv) => codegen + .builder + .build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "str_h2p_sub") + .map_err(|e| e.to_string())?, _ => return Ok(false), }; let a0 = *vmap.get(&args[0]).ok_or("substring start arg missing")?; diff --git a/src/backend/llvm/compiler/codegen/mod.rs b/src/backend/llvm/compiler/codegen/mod.rs index 1aa27c0e..85f35b77 100644 --- a/src/backend/llvm/compiler/codegen/mod.rs +++ b/src/backend/llvm/compiler/codegen/mod.rs @@ -319,14 +319,14 @@ impl LLVMCompiler { instructions::emit_jump(&codegen, *bid, &entry_first, &bb_map, &phis_by_block, &vmap)?; } } - // Verify per-function - if !llvm_func.verify(true) { - return Err(format!("Function verification failed: {}", name)); - } } - // Close the per-function lowering loop + // Verify the fully-lowered function once, after all blocks + if !llvm_func.verify(true) { + return Err(format!("Function verification failed: {}", name)); } - + + } + // End of per-function lowering loop // Build entry wrapper ny_main -> call entry function let i64t = codegen.context.i64_type();