From 45f13cf7a8d749fad62f30dbe775db6c2a345506 Mon Sep 17 00:00:00 2001 From: Selfhosting Dev Date: Fri, 12 Sep 2025 19:23:16 +0900 Subject: [PATCH] docs: Add LLVM Python harness plan to CURRENT_TASK - Added llvmlite verification harness strategy - Python as parallel verification path for PHI/SSA issues - Nyash ABI wrapper for LLVM emit abstraction - NYASH_LLVM_USE_HARNESS=1 flag for mode switching - Goal: Rust implementation in 1-2 days, Python for rapid verification Acknowledging reality: When stuck at minimal viable implementation, changing implementation language is a practical solution. 'Simple is Best' - the core Nyash philosophy. --- CURRENT_TASK.md | 85 +++++++++- .../compiler/codegen/instructions/arith.rs | 91 +++++----- .../codegen/instructions/arith_ops.rs | 155 ++++++------------ .../compiler/codegen/instructions/arrays.rs | 32 ++-- .../compiler/codegen/instructions/boxcall.rs | 57 ++++--- .../codegen/instructions/builder_cursor.rs | 10 +- .../compiler/codegen/instructions/call.rs | 48 +++--- .../instructions/externcall/console.rs | 37 ++--- .../codegen/instructions/externcall/env.rs | 98 +++++------ .../codegen/instructions/externcall/mod.rs | 18 +- .../compiler/codegen/instructions/flow.rs | 81 ++++++++- .../compiler/codegen/instructions/loopform.rs | 30 ++++ .../compiler/codegen/instructions/maps.rs | 52 +++--- .../llvm/compiler/codegen/instructions/mem.rs | 76 ++++----- .../llvm/compiler/codegen/instructions/mod.rs | 1 + .../compiler/codegen/instructions/newbox.rs | 52 +++--- .../compiler/codegen/instructions/strings.rs | 96 +++++------ src/backend/llvm/compiler/codegen/mod.rs | 70 +++++--- 18 files changed, 599 insertions(+), 490 deletions(-) diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 518e7c85..4bc186c2 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -7,6 +7,23 @@ Summary - StringBox NewBox i8* fast path; print/log choose automatically. - Implement multi-function lowering and Call lowering for MIR14. +Compact Roadmap (2025‑09‑12) +- Focus: LLVM AOT → Flow hardening, PHI(sealed)安定化, LoopForm導入, BuilderCursor厳格化。 +- Now: + - Fallback terminator整備、PHI(sealed)はsnapshot参照へ、castはpred終端直前に限定。 + - LoopForm Step 2.5/3(検出2段/dispatch骨格)完了。非破壊(Break集約のみ)。 + - BuilderCursor: post‑terminator挿入を即panic(strings/arith_ops/memへ適用済)。 +- Next (short): + 1) BuilderCursor厳格化の適用拡大(externcall→newbox→arrays→maps→call)。 + 2) Sealed SSA を既定ONに一本化(finalize_phis停止、seal_blockで完結)。 + 3) LoopForm header PHI正規化の安定化(latch→header ON 時も verifier green)。 + 4) body→dispatchを単純ボディで常用化(段階ゲート)。 + 5) 計測: dispatch-only PHI/ゼロ合成減少、post‑terminator検知ゼロ継続。 +- Flags: + - `NYASH_ENABLE_LOOPFORM=1`(非破壊ON) + - `NYASH_LOOPFORM_BODY2DISPATCH=1`(実験: 単純ボディのbody→dispatch) + - `NYASH_LOOPFORM_LATCH2HEADER=1`(PHI正規化後に有効化) + Update — 2025-09-12 (LLVM flow + BB naming) - codegen/mod.rs match arms cleanup: - BinOp: unify to instructions::lower_binop(...) @@ -29,6 +46,14 @@ Hot Update — 2025-09-12 (quick) - 現在のブロッカー: esc_json/1 で「phi incoming value missing」 - 対応: emit_jump/emit_branch の incoming 配線をログ付きで点検し、値未定義箇所(by‑name/fast‑path戻り)を補完 +Hot Update — 2025‑09‑12 (Plan: LLVM wrapper via Nyash ABI) +- 背景: Rust/inkwell のビルド時間と反復速度が課題。LLVM生成を Nyash から呼べる ABI に抽象化し、将来 Nyash スクリプトで LLVM ビルダー実装へ移行する。 +- 方針: Rust 実装は当面維持(1–2日で dep_tree_min_string をグリーンに)。併走で llvmlite(Python) を「検証ハーネス」として導入し、PHI/Loop 形の仕様検証→ Rust へ反映。 +- 入口: Nyash ABI で LLVM emit をラップ。 + - モード切替フラグ: `NYASH_LLVM_USE_HARNESS=1`(ON時は llvmlite ハーネスに委譲)。 + - I/O 仕様: 入力=MIR(JSON/メモリ), 出力=.o(`NYASH_AOT_OBJECT_OUT` に書き出し)。 +- 受け入れ: harness ON/OFF で dep_tree_min_string の出力一致(機能同値)。 + Hot Repro — esc_json/1 PHI 配線(2025‑09‑12) - 対象: apps/selfhost/tools/dep_tree_min_string.nyash - 実行(LLVM): @@ -40,10 +65,12 @@ Hot Repro — esc_json/1 PHI 配線(2025‑09‑12) - 原因仮説: Sealed ON で `seal_block` が pred終端時点の値(value_at_end_of_block)ではなく関数作業用 vmap を参照しているため、未定義扱いになっている。 Next Steps(Sealed SSA 段階導入) -1) block_end_values を導入し、各BB降下完了時に vmap スナップショットを保存。`seal_block` は pred のスナップショットから in_vid を取得。 -2) Sealed=ON で apps/selfhost/tools/dep_tree_min_string.nyash を再確認(PHIログ=ON)。OFF/ON の一致を比較し、incoming が pred数で揃うことを検証。 -3) 足りない型整合(String/Box/Array→i8*)があれば `coerce_to_type` を拡張。 -4) グリーン後、Sealed をデフォルトONにする前にスモーク一式で回帰確認。 +1) block_end_values を導入し、各BB降下完了時に vmap スナップショットを保存。`seal_block` は pred のスナップショットから in_vid を取得。(完了) +2) Sealed=ON を既定にし、emit_* 側の配線を停止(`finalize_phis` 無効化)。(実装済/整備中) +3) BuilderCursor を lowering 全域に適用(externcall/newbox/arrays/maps/call)。 +4) Sealed=ON で apps/selfhost/tools/dep_tree_min_string.nyash を再確認(PHIログ=ON)。 +5) 足りない型整合(String/Box/Array→i8*)があれば `coerce_to_type` を拡張。 +6) グリーン後、LoopForm BODY→DISPATCH を単純ボディで常用化。 TODO — Sealed SSA 段階導入(実装タスク) - [x] block_end_values 追加(LLVM Lower 内の per-BB 終端スナップショット) @@ -155,8 +182,10 @@ Done (today) ディスパッチは `externcall/mod.rs` に集約(挙動差分なし・0‑diff)。 - String Fast‑Path 追加(LLVM/NYRT): `substring(start,end)` と `lastIndexOf(needle)` を実装。 - Compare: String/StringBox 注釈のときは内容比較(`nyash.string.eq_hh`)にブリッジ。 - - LLVM 多関数 Lower の骨格を実装: 全関数を事前宣言→順次Lower→`ny_main` ラッパで呼び出し正規化。 - - Call Lowering 追加(MIR14の `MirInstruction::Call`): callee 文字列定数を解決し、対応するLLVM関数を呼び出し(引数束縛・戻り値格納)。 +- LLVM 多関数 Lower の骨格を実装: 全関数を事前宣言→順次Lower→`ny_main` ラッパで呼び出し正規化。 +- Call Lowering 追加(MIR14の `MirInstruction::Call`): callee 文字列定数を解決し、対応するLLVM関数を呼び出し(引数束縛・戻り値格納)。 +- BuilderCursor 適用(第1弾): strings/arith_ops/mem を Cursor 経由に統一。post-terminator 挿入検知を強化。 +- Sealed SSA: `finalize_phis` を停止し、`seal_block` に一本化。LoopForm latch→header の header PHI 正規化を追加(ゲート付)。 Refactor — LLVM codegen instructions modularized (done) - Goal achieved: `instructions.rs` を段階分割し、責務ごとに再配置(0‑diff)。 @@ -211,6 +240,50 @@ Risks/Guards - Avoid broad implicit conversions; keep concat fallback gated by annotations only. - Ensure nyash.map.* との一致(core Map); plugin経路は環境変数で明示切替。 - Keep LLVM smokes green continuously; do not gate on VM/JIT. +- BuilderCursor 全域適用前は `codegen.builder` の直接使用が残存し、挿入点の撹乱によるドミナンス違反のリスクあり(対策: 全域 Cursor 化)。 + +Hot Update — 2025-09-12 (LoopForm Step 2.5/3) +- Context reset: コンテキスト問題でひらきなおし。LoopForm を安全な骨格から段階導入する。 +- While 検出強化(Step 2.5): then/else → header への back-edge を Jump 2 段まで許容し、短い側を body、他方を after に決定(ログに header/body/after/loop_id を表示)。 +- dispatch 骨格(Step 3 最小): dispatch に phi(tag:i8, payload:i64) を作り、switch(tag){ Next(0)→latch, Break(1)→exit } を実装。 + - いまは header(false)=Break のみを dispatch に供給(body→dispatch は既定OFFのまま・安全導入)。 + - latch は unreachable(header の pred を増やさず PHI 整合を保つ)。 +- BuilderCursor 強化(局所): at_end で終端検知/closed 初期化、emit_instr で post-terminator 挿入を即 panic。 +- 互換: MIR Const Void は i64(0) に無害化して Lower 継続性を向上。 + +LoopForm Flags(実験) +- `NYASH_ENABLE_LOOPFORM=1`: LoopForm 検出/配線を有効化(非破壊・Break 集約のみ)。 +- `NYASH_LOOPFORM_BODY2DISPATCH=1`: 単純ボディの Jump→header を dispatch へ差替え(tag=0/payload=0 を追加)。 +- `NYASH_LOOPFORM_LATCH2HEADER=1`: latch→header を有効化(現状は推奨OFF。header PHI 正規化後にONする)。 + +Next Flow(これからの流れ=段階導入) +1) BuilderCursor 厳格化の適用拡大(短期) + - 直叩き `build_*` を `emit_instr/emit_term/with_block` に段階置換(strings → arith_ops → mem → types)。 + - 軽量トラッカーで post-terminator 挿入を即検知(panic、犯人BB特定)。 +2) LoopForm 反復の本線(中期) + - header PHI 正規化(LoopForm 追加predを含めて「pred数=エントリ数」を保証)。 + - 実装: finalize_phis を LoopForm-aware に拡張(MIR由来pred + 追加pred(latch) をマージ)。 + - 受け渡し: pred 終端直前に局所 cast(既存の `coerce_to_type` を流用)。 + - 受入: `NYASH_LOOPFORM_LATCH2HEADER=1` をONにしても verifier green(PHI欠落なし)を確認。 +3) body→dispatch 導線の常用化(中期) + - 単純ボディから開始(終端が1つ=back-edge のみ)。 + - その後に複数出口/ネスト break/continue を段階解放(tag/payload で正規化)。 +4) 可視化と計測(並行) + - ループごとに dispatch-only PHI を確認(PHI個数/ゼロ合成の削減)。 + - post-terminator 挿入検知のカバレッジをログ化。 + +Acceptance(段階ごと) +- A1: LoopForm ON でも従来挙動と等価(Break 集約のみ・非破壊、smoke green)。 +- A2: BuilderCursor 厳格化で post-terminator が検知ゼロ(panic不発)が続く。 +- A3: header PHI 正規化後、latch→header 有効でも verifier green(PHI 欠落なし)。 +- A4: body→dispatch を単純ボディで常用化し、dispatch 以外に PHI が出ないことを確認。 +- A5: `NYASH_LLVM_USE_HARNESS=1`(llvmlite)と OFF(Rust)の出力が dep_tree_min_string で機能一致。 + +Execution Plan — Next 48h +1) BuilderCursor 全域適用(externcall/newbox/arrays/maps/call)。 +2) Sealed=ON で dep_tree_min_string をグリーン(PHI/ドミナンス違反ゼロ)。 +3) (並行)llvmlite 検証ハーネス追加(Nyash ABI 経由、ゲートで切替)。 +4) BODY→DISPATCH 常用化(単純ボディ)。 ## 🎉 LLVMプラグイン戻り値表示問題修正進行中(2025-09-10) diff --git a/src/backend/llvm/compiler/codegen/instructions/arith.rs b/src/backend/llvm/compiler/codegen/instructions/arith.rs index a47321c9..c917e6d8 100644 --- a/src/backend/llvm/compiler/codegen/instructions/arith.rs +++ b/src/backend/llvm/compiler/codegen/instructions/arith.rs @@ -3,16 +3,22 @@ use std::collections::HashMap; use inkwell::values::BasicValueEnum; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, CompareOp, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, CompareOp, ValueId}; +use super::builder_cursor::BuilderCursor; /// Compare lowering: return the resulting BasicValueEnum (i1) -pub(in super::super) fn lower_compare<'ctx>( +pub(in super::super) fn lower_compare<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &HashMap>, op: &CompareOp, lhs: &ValueId, rhs: &ValueId, + bb_map: &std::collections::HashMap>, + preds: &std::collections::HashMap>, + block_end_values: &std::collections::HashMap>>, ) -> Result, String> { use crate::backend::llvm::compiler::helpers::{as_float, as_int}; let lv = if let Some(v) = vmap.get(lhs).copied() { @@ -46,10 +52,10 @@ pub(in super::super) fn lower_compare<'ctx>( if l_is_str && r_is_str { let i64t = codegen.context.i64_type(); // Convert both sides to handles if needed - let to_handle = |v: BasicValueEnum<'ctx>| -> Result, String> { + let mut to_handle = |v: BasicValueEnum<'ctx>| -> Result, String> { match v { BasicValueEnum::IntValue(iv) => { - if iv.get_type() == i64t { Ok(iv) } else { codegen.builder.build_int_s_extend(iv, i64t, "i2i64").map_err(|e| e.to_string()) } + if iv.get_type() == i64t { Ok(iv) } else { cursor.emit_instr(cur_bid, |b| b.build_int_s_extend(iv, i64t, "i2i64")).map_err(|e| e.to_string()) } } BasicValueEnum::PointerValue(pv) => { let fnty = i64t.fn_type(&[codegen.context.ptr_type(inkwell::AddressSpace::from(0)).into()], false); @@ -57,9 +63,8 @@ pub(in super::super) fn lower_compare<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[pv.into()], "str_ptr_to_handle_cmp") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[pv.into()], "str_ptr_to_handle_cmp")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -77,9 +82,8 @@ pub(in super::super) fn lower_compare<'ctx>( .module .get_function("nyash.string.eq_hh") .unwrap_or_else(|| codegen.module.add_function("nyash.string.eq_hh", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[lh.into(), rh.into()], "str_eq_hh") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[lh.into(), rh.into()], "str_eq_hh")) .map_err(|e| e.to_string())?; let iv = call .try_as_basic_value() @@ -92,27 +96,29 @@ pub(in super::super) fn lower_compare<'ctx>( } else { inkwell::IntPredicate::EQ }; - let b = codegen - .builder - .build_int_compare(pred, iv, zero, "str_eq_to_bool") + let b = cursor + .emit_instr(cur_bid, |bd| bd.build_int_compare(pred, iv, zero, "str_eq_to_bool")) .map_err(|e| e.to_string())?; return Ok(b.into()); } } - let out = if let (Some(mut li), Some(mut ri)) = (as_int(lv), as_int(rv)) { + let out = if let (Some(_li0), Some(_ri0)) = (as_int(lv), as_int(rv)) { + // Localize integer operands into current block to satisfy dominance + let mut li = super::flow::localize_to_i64(codegen, cursor, cur_bid, *lhs, bb_map, preds, block_end_values, vmap) + .unwrap_or_else(|_| as_int(lv).unwrap()); + let mut ri = super::flow::localize_to_i64(codegen, cursor, cur_bid, *rhs, bb_map, preds, block_end_values, vmap) + .unwrap_or_else(|_| as_int(rv).unwrap()); // Normalize integer widths: extend the narrower to match the wider to satisfy LLVM let lw = li.get_type().get_bit_width(); let rw = ri.get_type().get_bit_width(); if lw != rw { if lw < rw { - li = codegen - .builder - .build_int_z_extend(li, ri.get_type(), "icmp_zext_l") + li = cursor + .emit_instr(cur_bid, |b| b.build_int_z_extend(li, ri.get_type(), "icmp_zext_l")) .map_err(|e| e.to_string())?; } else { - ri = codegen - .builder - .build_int_z_extend(ri, li.get_type(), "icmp_zext_r") + ri = cursor + .emit_instr(cur_bid, |b| b.build_int_z_extend(ri, li.get_type(), "icmp_zext_r")) .map_err(|e| e.to_string())?; } } @@ -125,9 +131,8 @@ pub(in super::super) fn lower_compare<'ctx>( C::Gt => inkwell::IntPredicate::SGT, C::Ge => inkwell::IntPredicate::SGE, }; - codegen - .builder - .build_int_compare(pred, li, ri, "icmp") + cursor + .emit_instr(cur_bid, |b| b.build_int_compare(pred, li, ri, "icmp")) .map_err(|e| e.to_string())? .into() } else if let (Some(lf), Some(rf)) = (as_float(lv), as_float(rv)) { @@ -140,9 +145,8 @@ pub(in super::super) fn lower_compare<'ctx>( C::Gt => inkwell::FloatPredicate::OGT, C::Ge => inkwell::FloatPredicate::OGE, }; - codegen - .builder - .build_float_compare(pred, lf, rf, "fcmp") + cursor + .emit_instr(cur_bid, |b| b.build_float_compare(pred, lf, rf, "fcmp")) .map_err(|e| e.to_string())? .into() } else if let (BasicValueEnum::PointerValue(lp), BasicValueEnum::PointerValue(rp)) = (lv, rv) { @@ -151,22 +155,19 @@ pub(in super::super) fn lower_compare<'ctx>( match op { C::Eq | C::Ne => { let i64t = codegen.context.i64_type(); - let li = codegen - .builder - .build_ptr_to_int(lp, i64t, "pi_l") + let li = cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(lp, i64t, "pi_l")) .map_err(|e| e.to_string())?; - let ri = codegen - .builder - .build_ptr_to_int(rp, i64t, "pi_r") + let ri = cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(rp, i64t, "pi_r")) .map_err(|e| e.to_string())?; let pred = if matches!(op, C::Eq) { inkwell::IntPredicate::EQ } else { inkwell::IntPredicate::NE }; - codegen - .builder - .build_int_compare(pred, li, ri, "pcmp") + cursor + .emit_instr(cur_bid, |b| b.build_int_compare(pred, li, ri, "pcmp")) .map_err(|e| e.to_string())? .into() } @@ -175,9 +176,8 @@ pub(in super::super) fn lower_compare<'ctx>( } else if let (BasicValueEnum::PointerValue(lp), BasicValueEnum::IntValue(ri)) = (lv, rv) { use CompareOp as C; let i64t = codegen.context.i64_type(); - let li = codegen - .builder - .build_ptr_to_int(lp, i64t, "pi_l") + let li = cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(lp, i64t, "pi_l")) .map_err(|e| e.to_string())?; let pred = match op { C::Eq => inkwell::IntPredicate::EQ, @@ -187,17 +187,15 @@ pub(in super::super) fn lower_compare<'ctx>( C::Gt => inkwell::IntPredicate::SGT, C::Ge => inkwell::IntPredicate::SGE, }; - codegen - .builder - .build_int_compare(pred, li, ri, "pcmpi") + cursor + .emit_instr(cur_bid, |b| b.build_int_compare(pred, li, ri, "pcmpi")) .map_err(|e| e.to_string())? .into() } else if let (BasicValueEnum::IntValue(li), BasicValueEnum::PointerValue(rp)) = (lv, rv) { use CompareOp as C; let i64t = codegen.context.i64_type(); - let ri = codegen - .builder - .build_ptr_to_int(rp, i64t, "pi_r") + let ri = cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(rp, i64t, "pi_r")) .map_err(|e| e.to_string())?; let pred = match op { C::Eq => inkwell::IntPredicate::EQ, @@ -207,9 +205,8 @@ pub(in super::super) fn lower_compare<'ctx>( C::Gt => inkwell::IntPredicate::SGT, C::Ge => inkwell::IntPredicate::SGE, }; - codegen - .builder - .build_int_compare(pred, li, ri, "pcmpi") + cursor + .emit_instr(cur_bid, |b| b.build_int_compare(pred, li, ri, "pcmpi")) .map_err(|e| e.to_string())? .into() } else { diff --git a/src/backend/llvm/compiler/codegen/instructions/arith_ops.rs b/src/backend/llvm/compiler/codegen/instructions/arith_ops.rs index 9cffa590..191ebe52 100644 --- a/src/backend/llvm/compiler/codegen/instructions/arith_ops.rs +++ b/src/backend/llvm/compiler/codegen/instructions/arith_ops.rs @@ -4,11 +4,14 @@ use inkwell::{values::BasicValueEnum, AddressSpace}; use crate::backend::llvm::compiler::codegen::types; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, instruction::UnaryOp, BinaryOp, ValueId}; +use crate::mir::{function::MirFunction, instruction::UnaryOp, BasicBlockId, BinaryOp, ValueId}; +use super::builder_cursor::BuilderCursor; /// Lower UnaryOp and store into vmap (0-diff) -pub(in super::super) fn lower_unary<'ctx>( +pub(in super::super) fn lower_unary<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &mut HashMap>, dst: ValueId, op: &UnaryOp, @@ -19,15 +22,15 @@ pub(in super::super) fn lower_unary<'ctx>( let out = match op { UnaryOp::Neg => { if let Some(iv) = as_int(v) { - codegen - .builder - .build_int_neg(iv, "ineg") + cursor + .emit_instr(cur_bid, |b| b + .build_int_neg(iv, "ineg")) .map_err(|e| e.to_string())? .into() } else if let Some(fv) = as_float(v) { - codegen - .builder - .build_float_neg(fv, "fneg") + cursor + .emit_instr(cur_bid, |b| b + .build_float_neg(fv, "fneg")) .map_err(|e| e.to_string())? .into() } else { @@ -36,9 +39,9 @@ pub(in super::super) fn lower_unary<'ctx>( } UnaryOp::Not | UnaryOp::BitNot => { if let Some(iv) = as_int(v) { - codegen - .builder - .build_not(iv, "inot") + cursor + .emit_instr(cur_bid, |b| b + .build_not(iv, "inot")) .map_err(|e| e.to_string())? .into() } else { @@ -51,8 +54,10 @@ pub(in super::super) fn lower_unary<'ctx>( } /// Lower BinOp and store into vmap (includes concat fallback) -pub(in super::super) fn lower_binop<'ctx>( +pub(in super::super) fn lower_binop<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &mut HashMap>, dst: ValueId, @@ -96,9 +101,9 @@ pub(in super::super) fn lower_binop<'ctx>( .module .get_function("nyash.string.concat_ss") .unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_ss", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[lp.into(), rp.into()], "concat_ss") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[lp.into(), rp.into()], "concat_ss")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -115,9 +120,9 @@ pub(in super::super) fn lower_binop<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty_conv, None)); - let call_c = codegen - .builder - .build_call(conv, &[lp.into()], "lhs_i8_to_handle") + let call_c = cursor + .emit_instr(cur_bid, |b| b + .build_call(conv, &[lp.into()], "lhs_i8_to_handle")) .map_err(|e| e.to_string())?; let lh = call_c .try_as_basic_value() @@ -129,9 +134,9 @@ pub(in super::super) fn lower_binop<'ctx>( .module .get_function("nyash.string.concat_hh") .unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_hh", fnty_hh, None)); - let call = codegen - .builder - .build_call(callee, &[lh.into(), ri.into()], "concat_hh") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[lh.into(), ri.into()], "concat_hh")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -180,9 +185,9 @@ pub(in super::super) fn lower_binop<'ctx>( .module .get_function("nyash.string.concat_hh") .unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_hh", fnty_hh, None)); - let call = codegen - .builder - .build_call(callee, &[li.into(), rh.into()], "concat_hh") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[li.into(), rh.into()], "concat_hh")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -197,9 +202,9 @@ pub(in super::super) fn lower_binop<'ctx>( .module .get_function("nyash.string.concat_is") .unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_is", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[li.into(), rp.into()], "concat_is") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[li.into(), rp.into()], "concat_is")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -219,97 +224,33 @@ pub(in super::super) fn lower_binop<'ctx>( let out = if let (Some(li), Some(ri)) = (as_int(lv), as_int(rv)) { use BinaryOp as B; match op { - B::Add => codegen - .builder - .build_int_add(li, ri, "iadd") - .map_err(|e| e.to_string())? - .into(), - B::Sub => codegen - .builder - .build_int_sub(li, ri, "isub") - .map_err(|e| e.to_string())? - .into(), - B::Mul => codegen - .builder - .build_int_mul(li, ri, "imul") - .map_err(|e| e.to_string())? - .into(), - B::Div => codegen - .builder - .build_int_signed_div(li, ri, "idiv") - .map_err(|e| e.to_string())? - .into(), - B::Mod => codegen - .builder - .build_int_signed_rem(li, ri, "imod") - .map_err(|e| e.to_string())? - .into(), - B::BitAnd => codegen - .builder - .build_and(li, ri, "iand") - .map_err(|e| e.to_string())? - .into(), - B::BitOr => codegen - .builder - .build_or(li, ri, "ior") - .map_err(|e| e.to_string())? - .into(), - B::BitXor => codegen - .builder - .build_xor(li, ri, "ixor") - .map_err(|e| e.to_string())? - .into(), - B::Shl => codegen - .builder - .build_left_shift(li, ri, "ishl") - .map_err(|e| e.to_string())? - .into(), - B::Shr => codegen - .builder - .build_right_shift(li, ri, false, "ishr") - .map_err(|e| e.to_string())? - .into(), + B::Add => cursor.emit_instr(cur_bid, |b| b.build_int_add(li, ri, "iadd")).map_err(|e| e.to_string())?.into(), + B::Sub => cursor.emit_instr(cur_bid, |b| b.build_int_sub(li, ri, "isub")).map_err(|e| e.to_string())?.into(), + B::Mul => cursor.emit_instr(cur_bid, |b| b.build_int_mul(li, ri, "imul")).map_err(|e| e.to_string())?.into(), + B::Div => cursor.emit_instr(cur_bid, |b| b.build_int_signed_div(li, ri, "idiv")).map_err(|e| e.to_string())?.into(), + B::Mod => cursor.emit_instr(cur_bid, |b| b.build_int_signed_rem(li, ri, "imod")).map_err(|e| e.to_string())?.into(), + B::BitAnd => cursor.emit_instr(cur_bid, |b| b.build_and(li, ri, "iand")).map_err(|e| e.to_string())?.into(), + B::BitOr => cursor.emit_instr(cur_bid, |b| b.build_or(li, ri, "ior")).map_err(|e| e.to_string())?.into(), + B::BitXor => cursor.emit_instr(cur_bid, |b| b.build_xor(li, ri, "ixor")).map_err(|e| e.to_string())?.into(), + B::Shl => cursor.emit_instr(cur_bid, |b| b.build_left_shift(li, ri, "ishl")).map_err(|e| e.to_string())?.into(), + B::Shr => cursor.emit_instr(cur_bid, |b| b.build_right_shift(li, ri, false, "ishr")).map_err(|e| e.to_string())?.into(), B::And | B::Or => { // Treat as logical on integers: convert to i1 and and/or let lb = types::to_bool(codegen.context, li.into(), &codegen.builder)?; let rb = types::to_bool(codegen.context, ri.into(), &codegen.builder)?; match op { - B::And => codegen - .builder - .build_and(lb, rb, "land") - .map_err(|e| e.to_string())? - .into(), - _ => codegen - .builder - .build_or(lb, rb, "lor") - .map_err(|e| e.to_string())? - .into(), + B::And => cursor.emit_instr(cur_bid, |b| b.build_and(lb, rb, "land")).map_err(|e| e.to_string())?.into(), + _ => cursor.emit_instr(cur_bid, |b| b.build_or(lb, rb, "lor")).map_err(|e| e.to_string())?.into(), } } } } else if let (Some(lf), Some(rf)) = (as_float(lv), as_float(rv)) { use BinaryOp as B; match op { - B::Add => codegen - .builder - .build_float_add(lf, rf, "fadd") - .map_err(|e| e.to_string())? - .into(), - B::Sub => codegen - .builder - .build_float_sub(lf, rf, "fsub") - .map_err(|e| e.to_string())? - .into(), - B::Mul => codegen - .builder - .build_float_mul(lf, rf, "fmul") - .map_err(|e| e.to_string())? - .into(), - B::Div => codegen - .builder - .build_float_div(lf, rf, "fdiv") - .map_err(|e| e.to_string())? - .into(), + B::Add => cursor.emit_instr(cur_bid, |b| b.build_float_add(lf, rf, "fadd")).map_err(|e| e.to_string())?.into(), + B::Sub => cursor.emit_instr(cur_bid, |b| b.build_float_sub(lf, rf, "fsub")).map_err(|e| e.to_string())?.into(), + B::Mul => cursor.emit_instr(cur_bid, |b| b.build_float_mul(lf, rf, "fmul")).map_err(|e| e.to_string())?.into(), + B::Div => cursor.emit_instr(cur_bid, |b| b.build_float_div(lf, rf, "fdiv")).map_err(|e| e.to_string())?.into(), B::Mod => return Err("fmod not supported yet".to_string()), _ => return Err("bit/logic ops on float".to_string()), } diff --git a/src/backend/llvm/compiler/codegen/instructions/arrays.rs b/src/backend/llvm/compiler/codegen/instructions/arrays.rs index 356524e4..aa521e1e 100644 --- a/src/backend/llvm/compiler/codegen/instructions/arrays.rs +++ b/src/backend/llvm/compiler/codegen/instructions/arrays.rs @@ -3,11 +3,14 @@ use std::collections::HashMap; use inkwell::values::BasicValueEnum as BVE; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; +use super::builder_cursor::BuilderCursor; /// Handle ArrayBox fast-paths. Returns true if handled. -pub(super) fn try_handle_array_method<'ctx>( +pub(super) fn try_handle_array_method<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &mut HashMap>, dst: &Option, @@ -42,9 +45,8 @@ pub(super) fn try_handle_array_method<'ctx>( .module .get_function("nyash_array_get_h") .unwrap_or_else(|| codegen.module.add_function("nyash_array_get_h", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into(), idx_i.into()], "aget") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), idx_i.into()], "aget")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -79,9 +81,8 @@ pub(super) fn try_handle_array_method<'ctx>( .module .get_function("nyash_array_set_h") .unwrap_or_else(|| codegen.module.add_function("nyash_array_set_h", fnty, None)); - let _ = codegen - .builder - .build_call(callee, &[recv_h.into(), idx_i.into(), val_i.into()], "aset") + let _ = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), idx_i.into(), val_i.into()], "aset")) .map_err(|e| e.to_string())?; Ok(true) } @@ -95,9 +96,8 @@ pub(super) fn try_handle_array_method<'ctx>( let val_v = *vmap.get(&args[0]).ok_or("array.push value missing")?; let val_i = match val_v { BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "val_p2i") + BVE::PointerValue(pv) => cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "val_p2i")) .map_err(|e| e.to_string())?, _ => return Err("array.push value must be int or handle ptr".to_string()), }; @@ -106,9 +106,8 @@ pub(super) fn try_handle_array_method<'ctx>( .module .get_function("nyash_array_push_h") .unwrap_or_else(|| codegen.module.add_function("nyash_array_push_h", fnty, None)); - let _ = codegen - .builder - .build_call(callee, &[recv_h.into(), val_i.into()], "apush") + let _ = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), val_i.into()], "apush")) .map_err(|e| e.to_string())?; Ok(true) } @@ -124,9 +123,8 @@ pub(super) fn try_handle_array_method<'ctx>( .module .get_function("nyash_array_length_h") .unwrap_or_else(|| codegen.module.add_function("nyash_array_length_h", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into()], "alen") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into()], "alen")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call diff --git a/src/backend/llvm/compiler/codegen/instructions/boxcall.rs b/src/backend/llvm/compiler/codegen/instructions/boxcall.rs index b3e69f0c..a603bcb0 100644 --- a/src/backend/llvm/compiler/codegen/instructions/boxcall.rs +++ b/src/backend/llvm/compiler/codegen/instructions/boxcall.rs @@ -9,11 +9,14 @@ pub(crate) mod invoke; mod marshal; use self::marshal as marshal_mod; use self::invoke as invoke_mod; -use crate::mir::{function::MirFunction, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; +use super::builder_cursor::BuilderCursor; // BoxCall lowering (large): mirrors existing logic; kept in one function for now -pub(in super::super) fn lower_boxcall<'ctx>( +pub(in super::super) fn lower_boxcall<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &mut HashMap>, dst: &Option, @@ -23,6 +26,9 @@ pub(in super::super) fn lower_boxcall<'ctx>( args: &[ValueId], box_type_ids: &HashMap, entry_builder: &inkwell::builder::Builder<'ctx>, + bb_map: &std::collections::HashMap>, + preds: &std::collections::HashMap>, + block_end_values: &std::collections::HashMap>>, ) -> Result<(), String> { use crate::backend::llvm::compiler::helpers::{as_float, as_int}; use super::super::types::classify_tag; @@ -32,16 +38,14 @@ pub(in super::super) fn lower_boxcall<'ctx>( BVE::PointerValue(pv) => pv, BVE::IntValue(iv) => { let pty = codegen.context.ptr_type(AddressSpace::from(0)); - codegen - .builder - .build_int_to_ptr(iv, pty, "recv_i2p") + cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(iv, pty, "recv_i2p")) .map_err(|e| e.to_string())? } _ => return Err("box receiver must be pointer or i64 handle".to_string()), }; - let recv_h = codegen - .builder - .build_ptr_to_int(recv_p, i64t, "recv_p2i") + let recv_h = cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(recv_p, i64t, "recv_p2i")) .map_err(|e| e.to_string())?; // Resolve type_id @@ -54,23 +58,26 @@ pub(in super::super) fn lower_boxcall<'ctx>( }; // Delegate String methods - if super::strings::try_handle_string_method(codegen, func, vmap, dst, box_val, method, args, recv_v)? { + if super::strings::try_handle_string_method( + codegen, cursor, cur_bid, func, vmap, dst, box_val, method, args, recv_v, + bb_map, preds, block_end_values, + )? { return Ok(()); } // Delegate Map methods first (to avoid Array fallback catching get/set ambiguously) - if super::maps::try_handle_map_method(codegen, func, vmap, dst, box_val, method, args, recv_h)? { + if super::maps::try_handle_map_method(codegen, cursor, cur_bid, func, vmap, dst, box_val, method, args, recv_h)? { return Ok(()); } // Delegate Array methods - if super::arrays::try_handle_array_method(codegen, func, vmap, dst, box_val, method, args, recv_h)? { + if super::arrays::try_handle_array_method(codegen, cursor, cur_bid, func, vmap, dst, box_val, method, args, recv_h)? { return Ok(()); } // Console convenience: treat println as env.console.log if method == "println" { - return super::externcall::lower_externcall(codegen, func, vmap, dst, &"env.console".to_string(), &"log".to_string(), args); + return super::externcall::lower_externcall(codegen, cursor, cur_bid, func, vmap, dst, &"env.console".to_string(), &"log".to_string(), args); } // getField/setField @@ -85,9 +92,8 @@ pub(in super::super) fn lower_boxcall<'ctx>( .module .get_function("nyash_array_length_h") .unwrap_or_else(|| codegen.module.add_function("nyash_array_length_h", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into()], "alen_fallback") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into()], "alen_fallback")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -141,9 +147,8 @@ pub(in super::super) fn lower_boxcall<'ctx>( let tv = coerce_to_type(codegen, v, exp_tys[i])?; call_args.push(tv.into()); } - let call = codegen - .builder - .build_call(callee, &call_args, "user_meth_call") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &call_args, "user_meth_call")) .map_err(|e| e.to_string())?; if let Some(d) = dst { if let Some(rv) = call.try_as_basic_value().left() { @@ -158,9 +163,8 @@ pub(in super::super) fn lower_boxcall<'ctx>( use crate::backend::llvm::compiler::codegen::instructions::boxcall::marshal::get_i64 as get_i64_any; let i64t = codegen.context.i64_type(); let argc = i64t.const_int(args.len() as u64, false); - let mname = codegen - .builder - .build_global_string_ptr(method, "meth_name") + let mname = cursor + .emit_instr(cur_bid, |b| b.build_global_string_ptr(method, "meth_name")) .map_err(|e| e.to_string())?; // up to 2 args for this minimal path let a1 = if let Some(v0) = args.get(0) { get_i64_any(codegen, vmap, *v0)? } else { i64t.const_zero() }; @@ -177,9 +181,8 @@ pub(in super::super) fn lower_boxcall<'ctx>( .module .get_function("nyash.plugin.invoke_by_name_i64") .unwrap_or_else(|| codegen.module.add_function("nyash.plugin.invoke_by_name_i64", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into(), mname.as_pointer_value().into(), argc.into(), a1.into(), a2.into()], "pinvoke_by_name") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), mname.as_pointer_value().into(), argc.into(), a1.into(), a2.into()], "pinvoke_by_name")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -194,19 +197,19 @@ pub(in super::super) fn lower_boxcall<'ctx>( if let BVE::IntValue(iv) = rv { let i64t = codegen.context.i64_type(); let zero = i64t.const_zero(); - let b1 = codegen.builder.build_int_compare(inkwell::IntPredicate::NE, iv, zero, "bool_i64_to_i1").map_err(|e| e.to_string())?; + let b1 = cursor.emit_instr(cur_bid, |bd| bd.build_int_compare(inkwell::IntPredicate::NE, iv, zero, "bool_i64_to_i1")).map_err(|e| e.to_string())?; vmap.insert(*d, b1.into()); } else { vmap.insert(*d, rv); } } crate::mir::MirType::String => { if let BVE::IntValue(iv) = rv { - let p = codegen.builder.build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "str_h2p_ret").map_err(|e| e.to_string())?; + let p = cursor.emit_instr(cur_bid, |bd| bd.build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "str_h2p_ret")).map_err(|e| e.to_string())?; vmap.insert(*d, p.into()); } else { vmap.insert(*d, rv); } } crate::mir::MirType::Box(_) | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown => { if let BVE::IntValue(iv) = rv { - let p = codegen.builder.build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "h2p_ret").map_err(|e| e.to_string())?; + let p = cursor.emit_instr(cur_bid, |bd| bd.build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "h2p_ret")).map_err(|e| e.to_string())?; vmap.insert(*d, p.into()); } else { vmap.insert(*d, rv); } } diff --git a/src/backend/llvm/compiler/codegen/instructions/builder_cursor.rs b/src/backend/llvm/compiler/codegen/instructions/builder_cursor.rs index 576152eb..eed102c0 100644 --- a/src/backend/llvm/compiler/codegen/instructions/builder_cursor.rs +++ b/src/backend/llvm/compiler/codegen/instructions/builder_cursor.rs @@ -54,7 +54,9 @@ impl<'ctx, 'b> BuilderCursor<'ctx, 'b> { pub fn at_end(&mut self, bid: BasicBlockId, bb: BasicBlock<'ctx>) { self.cur_bid = Some(bid); self.cur_llbb = Some(bb); - self.closed_by_bid.insert(bid, false); + // Mark closed if LLVM already has a terminator in this block + let has_term = unsafe { bb.get_terminator() }.is_some(); + self.closed_by_bid.insert(bid, has_term); self.builder.position_at_end(bb); } @@ -70,6 +72,12 @@ impl<'ctx, 'b> BuilderCursor<'ctx, 'b> { pub fn emit_instr(&mut self, bid: BasicBlockId, f: impl FnOnce(&Builder<'ctx>) -> T) -> T { self.assert_open(bid); + // Extra hard guard: check actual LLVM block state before inserting + if let Some(bb) = self.cur_llbb { + if unsafe { bb.get_terminator() }.is_some() { + panic!("post-terminator insert detected in bb {}", bid.as_u32()); + } + } f(self.builder) } diff --git a/src/backend/llvm/compiler/codegen/instructions/call.rs b/src/backend/llvm/compiler/codegen/instructions/call.rs index e6a2e76f..2a599c33 100644 --- a/src/backend/llvm/compiler/codegen/instructions/call.rs +++ b/src/backend/llvm/compiler/codegen/instructions/call.rs @@ -3,15 +3,18 @@ use std::collections::HashMap; use inkwell::{types::BasicMetadataTypeEnum as BMT, values::{BasicMetadataValueEnum, BasicValueEnum as BVE, FunctionValue}}; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; +use crate::backend::llvm::compiler::codegen::instructions::builder_cursor::BuilderCursor; /// Lower a direct Call where callee is provided as a const string ValueId in MIR14. /// /// Requirements: /// - `const_strs`: mapping from ValueId to the string literal value within the same function. /// - `llvm_funcs`: predeclared LLVM functions keyed by MIR function name (same string as const). -pub(in super::super) fn lower_call<'ctx>( +pub(in super::super) fn lower_call<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, _func: &MirFunction, vmap: &mut HashMap>, dst: &Option, @@ -43,12 +46,11 @@ pub(in super::super) fn lower_call<'ctx>( let v = *vmap .get(a) .ok_or_else(|| format!("call arg missing: {}", a.as_u32()))?; - let tv = coerce_to_type(codegen, v, exp_tys[i])?; + let tv = coerce_to_type_cursor(codegen, cursor, cur_bid, v, exp_tys[i])?; params.push(tv.into()); } - let call = codegen - .builder - .build_call(*target, ¶ms, "call") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(*target, ¶ms, "call")) .map_err(|e| e.to_string())?; if let Some(d) = dst { if let Some(rv) = call.try_as_basic_value().left() { @@ -58,8 +60,10 @@ pub(in super::super) fn lower_call<'ctx>( Ok(()) } -fn coerce_to_type<'ctx>( +fn coerce_to_type_cursor<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, val: BVE<'ctx>, target: BMT<'ctx>, ) -> Result, String> { @@ -71,40 +75,34 @@ fn coerce_to_type<'ctx>( if bw_src == bw_dst { Ok(iv.into()) } else if bw_src < bw_dst { - Ok(codegen - .builder - .build_int_z_extend(iv, it, "call_zext") + Ok(cursor + .emit_instr(cur_bid, |b| b.build_int_z_extend(iv, it, "call_zext")) .map_err(|e| e.to_string())? .into()) } else if bw_dst == 1 { Ok(super::super::types::to_bool(codegen.context, iv.into(), &codegen.builder)?.into()) } else { - Ok(codegen - .builder - .build_int_truncate(iv, it, "call_trunc") + Ok(cursor + .emit_instr(cur_bid, |b| b.build_int_truncate(iv, it, "call_trunc")) .map_err(|e| e.to_string())? .into()) } } - (BVE::PointerValue(pv), BMTy::IntType(it)) => Ok(codegen - .builder - .build_ptr_to_int(pv, it, "call_p2i") + (BVE::PointerValue(pv), BMTy::IntType(it)) => Ok(cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, it, "call_p2i")) .map_err(|e| e.to_string())? .into()), - (BVE::FloatValue(fv), BMTy::IntType(it)) => Ok(codegen - .builder - .build_float_to_signed_int(fv, it, "call_f2i") + (BVE::FloatValue(fv), BMTy::IntType(it)) => Ok(cursor + .emit_instr(cur_bid, |b| b.build_float_to_signed_int(fv, it, "call_f2i")) .map_err(|e| e.to_string())? .into()), - (BVE::IntValue(iv), BMTy::PointerType(pt)) => Ok(codegen - .builder - .build_int_to_ptr(iv, pt, "call_i2p") + (BVE::IntValue(iv), BMTy::PointerType(pt)) => Ok(cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(iv, pt, "call_i2p")) .map_err(|e| e.to_string())? .into()), (BVE::PointerValue(pv), BMTy::PointerType(_)) => Ok(pv.into()), - (BVE::IntValue(iv), BMTy::FloatType(ft)) => Ok(codegen - .builder - .build_signed_int_to_float(iv, ft, "call_i2f") + (BVE::IntValue(iv), BMTy::FloatType(ft)) => Ok(cursor + .emit_instr(cur_bid, |b| b.build_signed_int_to_float(iv, ft, "call_i2f")) .map_err(|e| e.to_string())? .into()), (BVE::FloatValue(fv), BMTy::FloatType(_)) => Ok(fv.into()), diff --git a/src/backend/llvm/compiler/codegen/instructions/externcall/console.rs b/src/backend/llvm/compiler/codegen/instructions/externcall/console.rs index 8ff91bd6..3ec064cc 100644 --- a/src/backend/llvm/compiler/codegen/instructions/externcall/console.rs +++ b/src/backend/llvm/compiler/codegen/instructions/externcall/console.rs @@ -4,10 +4,13 @@ use inkwell::values::BasicValueEnum as BVE; use inkwell::AddressSpace; use crate::backend::llvm::context::CodegenContext; -use crate::mir::ValueId; +use crate::mir::{BasicBlockId, ValueId}; +use crate::backend::llvm::compiler::codegen::instructions::builder_cursor::BuilderCursor; -pub(super) fn lower_log_or_trace<'ctx>( +pub(super) fn lower_log_or_trace<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &mut HashMap>, dst: &Option, iface_name: &str, @@ -36,9 +39,8 @@ pub(super) fn lower_log_or_trace<'ctx>( .module .get_function(fname) .unwrap_or_else(|| codegen.module.add_function(fname, fnty, None)); - let _ = codegen - .builder - .build_call(callee, &[pv.into()], "console_log_p") + let _ = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[pv.into()], "console_log_p")) .map_err(|e| e.to_string())?; if let Some(d) = dst { vmap.insert(*d, codegen.context.i64_type().const_zero().into()); @@ -50,16 +52,14 @@ pub(super) fn lower_log_or_trace<'ctx>( let arg_val = match av { BVE::IntValue(iv) => { if iv.get_type() == codegen.context.bool_type() { - codegen - .builder - .build_int_z_extend(iv, codegen.context.i64_type(), "bool2i64") + cursor + .emit_instr(cur_bid, |b| b.build_int_z_extend(iv, codegen.context.i64_type(), "bool2i64")) .map_err(|e| e.to_string())? } else if iv.get_type() == codegen.context.i64_type() { iv } else { - codegen - .builder - .build_int_s_extend(iv, codegen.context.i64_type(), "int2i64") + cursor + .emit_instr(cur_bid, |b| b.build_int_s_extend(iv, codegen.context.i64_type(), "int2i64")) .map_err(|e| e.to_string())? } } @@ -83,9 +83,8 @@ pub(super) fn lower_log_or_trace<'ctx>( .module .get_function(fname) .unwrap_or_else(|| codegen.module.add_function(fname, fnty, None)); - let _ = codegen - .builder - .build_call(callee, &[arg_val.into()], "console_log_h") + let _ = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[arg_val.into()], "console_log_h")) .map_err(|e| e.to_string())?; if let Some(d) = dst { vmap.insert(*d, codegen.context.i64_type().const_zero().into()); @@ -95,8 +94,10 @@ pub(super) fn lower_log_or_trace<'ctx>( } } -pub(super) fn lower_readline<'ctx>( +pub(super) fn lower_readline<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &mut HashMap>, dst: &Option, args: &[ValueId], @@ -110,9 +111,8 @@ pub(super) fn lower_readline<'ctx>( .module .get_function("nyash.console.readline") .unwrap_or_else(|| codegen.module.add_function("nyash.console.readline", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[], "readline") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[], "readline")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -123,4 +123,3 @@ pub(super) fn lower_readline<'ctx>( } Ok(()) } - diff --git a/src/backend/llvm/compiler/codegen/instructions/externcall/env.rs b/src/backend/llvm/compiler/codegen/instructions/externcall/env.rs index 44a08261..a3c925e4 100644 --- a/src/backend/llvm/compiler/codegen/instructions/externcall/env.rs +++ b/src/backend/llvm/compiler/codegen/instructions/externcall/env.rs @@ -4,10 +4,13 @@ use inkwell::values::BasicValueEnum as BVE; use inkwell::AddressSpace; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; +use crate::backend::llvm::compiler::codegen::instructions::builder_cursor::BuilderCursor; -pub(super) fn lower_future_spawn_instance<'ctx>( +pub(super) fn lower_future_spawn_instance<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &mut HashMap>, dst: &Option, args: &[ValueId], @@ -20,9 +23,8 @@ pub(super) fn lower_future_spawn_instance<'ctx>( let recv_v = *vmap.get(&args[0]).ok_or("recv missing")?; let recv_h = match recv_v { BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "recv_p2i") + BVE::PointerValue(pv) => cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "recv_p2i")) .map_err(|e| e.to_string())?, _ => return Err("spawn_instance recv must be int or ptr".to_string()), }; @@ -36,9 +38,8 @@ pub(super) fn lower_future_spawn_instance<'ctx>( .module .get_function("nyash.future.spawn_instance") .unwrap_or_else(|| codegen.module.add_function("nyash.future.spawn_instance", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into(), name_p.into()], "spawn_instance") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), name_p.into()], "spawn_instance")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -50,8 +51,10 @@ pub(super) fn lower_future_spawn_instance<'ctx>( Ok(()) } -pub(super) fn lower_local_get<'ctx>( +pub(super) fn lower_local_get<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &mut HashMap>, dst: &Option, @@ -73,9 +76,8 @@ pub(super) fn lower_local_get<'ctx>( .module .get_function("nyash.env.local.get_h") .unwrap_or_else(|| codegen.module.add_function("nyash.env.local.get_h", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[name_p.into()], "local_get_h") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[name_p.into()], "local_get_h")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -98,9 +100,8 @@ pub(super) fn lower_local_get<'ctx>( | crate::mir::MirType::Unknown => { let h = rv.into_int_value(); let pty = codegen.context.ptr_type(AddressSpace::from(0)); - let ptr = codegen - .builder - .build_int_to_ptr(h, pty, "local_get_handle_to_ptr") + let ptr = cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(h, pty, "local_get_handle_to_ptr")) .map_err(|e| e.to_string())?; vmap.insert(*d, ptr.into()); } @@ -115,8 +116,10 @@ pub(super) fn lower_local_get<'ctx>( Ok(()) } -pub(super) fn lower_box_new<'ctx>( +pub(super) fn lower_box_new<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &mut HashMap>, dst: &Option, args: &[ValueId], @@ -137,18 +140,16 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.env.box.new") .unwrap_or_else(|| codegen.module.add_function("nyash.env.box.new", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[name_p.into()], "env_box_new") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[name_p.into()], "env_box_new")) .map_err(|e| e.to_string())?; let h = call .try_as_basic_value() .left() .ok_or("env.box.new returned void".to_string())? .into_int_value(); - let out_ptr = codegen - .builder - .build_int_to_ptr(h, i8p, "box_handle_to_ptr") + let out_ptr = cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(h, i8p, "box_handle_to_ptr")) .map_err(|e| e.to_string())?; if let Some(d) = dst { vmap.insert(*d, out_ptr.into()); @@ -192,9 +193,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_f64") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[fv.into()], "arg1_f64_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[fv.into()], "arg1_f64_to_box")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -208,9 +208,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[pv.into()], "arg1_i8_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[pv.into()], "arg1_i8_to_box")) .map_err(|e| e.to_string())?; let rv = call.try_as_basic_value().left().ok_or("from_i8_string returned void".to_string())?; if let BVE::IntValue(h) = rv { h } else { return Err("from_i8_string ret expected i64".to_string()); } @@ -229,9 +228,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_f64") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[fv.into()], "arg2_f64_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[fv.into()], "arg2_f64_to_box")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -245,9 +243,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[pv.into()], "arg2_i8_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[pv.into()], "arg2_i8_to_box")) .map_err(|e| e.to_string())?; let rv = call.try_as_basic_value().left().ok_or("from_i8_string returned void".to_string())?; if let BVE::IntValue(h) = rv { h } else { return Err("from_i8_string ret expected i64".to_string()); } @@ -266,9 +263,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_f64") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[fv.into()], "arg3_f64_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[fv.into()], "arg3_f64_to_box")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -282,9 +278,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[pv.into()], "arg3_i8_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[pv.into()], "arg3_i8_to_box")) .map_err(|e| e.to_string())?; let rv = call.try_as_basic_value().left().ok_or("from_i8_string returned void".to_string())?; if let BVE::IntValue(h) = rv { h } else { return Err("from_i8_string ret expected i64".to_string()); } @@ -303,9 +298,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_f64") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[fv.into()], "arg4_f64_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[fv.into()], "arg4_f64_to_box")) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() @@ -319,9 +313,8 @@ pub(super) fn lower_box_new<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[pv.into()], "arg4_i8_to_box") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[pv.into()], "arg4_i8_to_box")) .map_err(|e| e.to_string())?; let rv = call.try_as_basic_value().left().ok_or("from_i8_string returned void".to_string())?; if let BVE::IntValue(h) = rv { h } else { return Err("from_i8_string ret expected i64".to_string()); } @@ -329,22 +322,20 @@ pub(super) fn lower_box_new<'ctx>( _ => return Err("unsupported arg value for env.box.new".to_string()), }; } - let call = codegen - .builder - .build_call( + let call = cursor + .emit_instr(cur_bid, |b| b.build_call( callee, &[ty_ptr.into(), argc_val.into(), a1.into(), a2.into(), a3.into(), a4.into()], "env_box_new_i64x", - ) + )) .map_err(|e| e.to_string())?; let rv = call .try_as_basic_value() .left() .ok_or("env.box.new_i64 returned void".to_string())?; let i64v = if let BVE::IntValue(iv) = rv { iv } else { return Err("env.box.new_i64 ret expected i64".to_string()); }; - let out_ptr = codegen - .builder - .build_int_to_ptr(i64v, i8p, "box_handle_to_ptr") + let out_ptr = cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(i64v, i8p, "box_handle_to_ptr")) .map_err(|e| e.to_string())?; if let Some(d) = dst { vmap.insert(*d, out_ptr.into()); @@ -353,4 +344,3 @@ pub(super) fn lower_box_new<'ctx>( } Err("env.box.new requires at least 1 arg".to_string()) } - diff --git a/src/backend/llvm/compiler/codegen/instructions/externcall/mod.rs b/src/backend/llvm/compiler/codegen/instructions/externcall/mod.rs index 1feddd0b..b1dff077 100644 --- a/src/backend/llvm/compiler/codegen/instructions/externcall/mod.rs +++ b/src/backend/llvm/compiler/codegen/instructions/externcall/mod.rs @@ -4,12 +4,15 @@ mod env; use std::collections::HashMap; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; use inkwell::values::BasicValueEnum as BVE; +use crate::backend::llvm::compiler::codegen::instructions::builder_cursor::BuilderCursor; /// Full ExternCall lowering dispatcher (console/debug/env.*) -pub(in super::super) fn lower_externcall<'ctx>( +pub(in super::super) fn lower_externcall<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &mut HashMap>, dst: &Option, @@ -22,21 +25,21 @@ pub(in super::super) fn lower_externcall<'ctx>( && matches!(method_name, "log" | "warn" | "error")) || (iface_name == "env.debug" && method_name == "trace") { - return console::lower_log_or_trace(codegen, vmap, dst, iface_name, method_name, args); + return console::lower_log_or_trace(codegen, cursor, cur_bid, vmap, dst, iface_name, method_name, args); } if iface_name == "env.console" && method_name == "readLine" { - return console::lower_readline(codegen, vmap, dst, args); + return console::lower_readline(codegen, cursor, cur_bid, vmap, dst, args); } // env.* if iface_name == "env.future" && method_name == "spawn_instance" { - return env::lower_future_spawn_instance(codegen, vmap, dst, args); + return env::lower_future_spawn_instance(codegen, cursor, cur_bid, vmap, dst, args); } if iface_name == "env.local" && method_name == "get" { - return env::lower_local_get(codegen, func, vmap, dst, args); + return env::lower_local_get(codegen, cursor, cur_bid, func, vmap, dst, args); } if iface_name == "env.box" && method_name == "new" { - return env::lower_box_new(codegen, vmap, dst, args); + return env::lower_box_new(codegen, cursor, cur_bid, vmap, dst, args); } Err(format!( @@ -44,4 +47,3 @@ pub(in super::super) fn lower_externcall<'ctx>( iface_name, method_name )) } - diff --git a/src/backend/llvm/compiler/codegen/instructions/flow.rs b/src/backend/llvm/compiler/codegen/instructions/flow.rs index a00a2f32..4fcbde5c 100644 --- a/src/backend/llvm/compiler/codegen/instructions/flow.rs +++ b/src/backend/llvm/compiler/codegen/instructions/flow.rs @@ -1,5 +1,5 @@ use inkwell::basic_block::BasicBlock; -use inkwell::values::{BasicValueEnum, PhiValue}; +use inkwell::values::{BasicValueEnum, IntValue, PhiValue}; use std::collections::HashMap; use crate::backend::llvm::context::CodegenContext; @@ -111,9 +111,19 @@ pub(in super::super) fn emit_branch<'ctx, 'b>( Vec<(ValueId, PhiValue<'ctx>, Vec<(BasicBlockId, ValueId)>)>, >, vmap: &HashMap>, + preds: &HashMap>, + block_end_values: &HashMap>>, ) -> Result<(), String> { + // Localize condition as i64 and convert to i1 via != 0 let cond_v = *vmap.get(condition).ok_or("cond missing")?; - let b = to_bool(codegen.context, cond_v, &codegen.builder)?; + let b = match cond_v { + BasicValueEnum::IntValue(_) | BasicValueEnum::PointerValue(_) | BasicValueEnum::FloatValue(_) => { + let ci = localize_to_i64(codegen, cursor, bid, *condition, bb_map, preds, block_end_values, vmap)?; + let zero = codegen.context.i64_type().const_zero(); + codegen.builder.build_int_compare(inkwell::IntPredicate::NE, ci, zero, "cond_nez").map_err(|e| e.to_string())? + } + _ => to_bool(codegen.context, cond_v, &codegen.builder)?, + }; let sealed = std::env::var("NYASH_LLVM_PHI_SEALED").ok().as_deref() == Some("1"); // then if !sealed { @@ -478,3 +488,70 @@ pub(in super::super) fn finalize_phis<'ctx, 'b>( } Ok(()) } + +/// Localize a MIR value as an i64 in the current block by creating a PHI that merges +/// predecessor snapshots. This avoids using values defined in non-dominating blocks. +/// Sealed SSA mode is assumed; when a predecessor snapshot is missing, synthesize zero. +pub(in super::super) fn localize_to_i64<'ctx, 'b>( + codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, + vid: ValueId, + bb_map: &std::collections::HashMap>, + preds: &std::collections::HashMap>, + block_end_values: &std::collections::HashMap>>, + vmap: &std::collections::HashMap>, +) -> Result, String> { + let i64t = codegen.context.i64_type(); + let cur_llbb = *bb_map.get(&cur_bid).ok_or("cur bb missing")?; + // If no predecessors, fallback to current vmap or zero + let pred_list = preds.get(&cur_bid).cloned().unwrap_or_default(); + if pred_list.is_empty() { + if let Some(v) = vmap.get(&vid).copied() { + return Ok(match v { + BasicValueEnum::IntValue(iv) => { + if iv.get_type() == i64t { iv } + else { codegen.builder.build_int_z_extend(iv, i64t, "loc_zext").map_err(|e| e.to_string())? } + } + BasicValueEnum::PointerValue(pv) => codegen.builder.build_ptr_to_int(pv, i64t, "loc_p2i").map_err(|e| e.to_string())?, + BasicValueEnum::FloatValue(fv) => codegen.builder.build_float_to_signed_int(fv, i64t, "loc_f2i").map_err(|e| e.to_string())?, + _ => i64t.const_zero(), + }); + } + return Ok(i64t.const_zero()); + } + // Build PHI at the top of current block (before any non-PHI), then restore insertion point + let saved_ip = codegen.builder.get_insert_block(); + if let Some(first) = cur_llbb.get_first_instruction() { + codegen.builder.position_before(&first); + } else { + codegen.builder.position_at_end(cur_llbb); + } + let phi = codegen.builder.build_phi(i64t, &format!("loc_i64_{}", vid.as_u32())).map_err(|e| e.to_string())?; + for p in &pred_list { + let pred_bb = *bb_map.get(p).ok_or("pred bb missing")?; + // Fetch snapshot at end of pred; if missing, synthesize zero + let mut val = block_end_values + .get(p) + .and_then(|m| m.get(&vid).copied()) + .unwrap_or_else(|| i64t.const_zero().into()); + // Coerce to i64 + use inkwell::types::BasicTypeEnum as BT; + val = match val { + BasicValueEnum::IntValue(iv) => { + if iv.get_type() == i64t { iv.into() } + else { codegen.builder.build_int_z_extend(iv, i64t, "loc_zext_p").map_err(|e| e.to_string())?.into() } + } + BasicValueEnum::PointerValue(pv) => codegen.builder.build_ptr_to_int(pv, i64t, "loc_p2i_p").map_err(|e| e.to_string())?.into(), + BasicValueEnum::FloatValue(fv) => codegen.builder.build_float_to_signed_int(fv, i64t, "loc_f2i_p").map_err(|e| e.to_string())?.into(), + _ => i64t.const_zero().into(), + }; + match val { + BasicValueEnum::IntValue(iv) => phi.add_incoming(&[(&iv, pred_bb)]), + _ => unreachable!(), + } + } + // Restore insertion point + if let Some(bb) = saved_ip { codegen.builder.position_at_end(bb); } + Ok(phi.as_basic_value().into_int_value()) +} diff --git a/src/backend/llvm/compiler/codegen/instructions/loopform.rs b/src/backend/llvm/compiler/codegen/instructions/loopform.rs index e1f407c7..4fd9f10f 100644 --- a/src/backend/llvm/compiler/codegen/instructions/loopform.rs +++ b/src/backend/llvm/compiler/codegen/instructions/loopform.rs @@ -161,3 +161,33 @@ pub fn lower_while_loopform<'ctx, 'b>( } Ok(true) } + +/// LoopForm header PHI normalization: when enabling latch→header, header gains an extra LLVM +/// predecessor (latch) that is not represented in MIR predecessors. To satisfy LLVM's verifier, +/// ensure every PHI in the header has an incoming for the latch. For Phase 1, we conservatively +/// wire a typed zero as the incoming value for the latch. +pub fn normalize_header_phis_for_latch<'ctx>( + codegen: &CodegenContext<'ctx>, + header_bid: BasicBlockId, + latch_bb: BasicBlock<'ctx>, + phis: &[(ValueId, PhiValue<'ctx>, Vec<(BasicBlockId, ValueId)>)], +) -> Result<(), String> { + use inkwell::types::BasicTypeEnum as BT; + let _ = header_bid; // reserved for future diagnostics + for (_dst, phi, _inputs) in phis { + let bt = phi.as_basic_value().get_type(); + let z = match bt { + BT::IntType(it) => it.const_zero().into(), + BT::FloatType(ft) => ft.const_zero().into(), + BT::PointerType(pt) => pt.const_zero().into(), + _ => return Err("unsupported phi type for latch incoming".to_string()), + }; + match z { + BasicValueEnum::IntValue(iv) => phi.add_incoming(&[(&iv, latch_bb)]), + BasicValueEnum::FloatValue(fv) => phi.add_incoming(&[(&fv, latch_bb)]), + BasicValueEnum::PointerValue(pv) => phi.add_incoming(&[(&pv, latch_bb)]), + _ => return Err("unsupported zero value kind for latch incoming".to_string()), + } + } + Ok(()) +} diff --git a/src/backend/llvm/compiler/codegen/instructions/maps.rs b/src/backend/llvm/compiler/codegen/instructions/maps.rs index 07575aaf..7495d644 100644 --- a/src/backend/llvm/compiler/codegen/instructions/maps.rs +++ b/src/backend/llvm/compiler/codegen/instructions/maps.rs @@ -3,11 +3,14 @@ use std::collections::HashMap; use inkwell::{values::BasicValueEnum as BVE, AddressSpace}; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; +use super::builder_cursor::BuilderCursor; /// Handle MapBox fast-paths (core-first). Returns true if handled. -pub(super) fn try_handle_map_method<'ctx>( +pub(super) fn try_handle_map_method<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &mut HashMap>, dst: &Option, @@ -35,9 +38,8 @@ pub(super) fn try_handle_map_method<'ctx>( .module .get_function("nyash.map.size_h") .unwrap_or_else(|| codegen.module.add_function("nyash.map.size_h", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into()], "msize") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into()], "msize")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -58,9 +60,8 @@ pub(super) fn try_handle_map_method<'ctx>( let key_v = *vmap.get(&args[0]).ok_or("map.has key missing")?; let key_i = match key_v { BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "key_p2i") + BVE::PointerValue(pv) => cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "key_p2i")) .map_err(|e| e.to_string())?, _ => return Err("map.has key must be int or handle ptr".to_string()), }; @@ -69,9 +70,8 @@ pub(super) fn try_handle_map_method<'ctx>( .module .get_function("nyash.map.has_h") .unwrap_or_else(|| codegen.module.add_function("nyash.map.has_h", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into(), key_i.into()], "mhas") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), key_i.into()], "mhas")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -97,9 +97,8 @@ pub(super) fn try_handle_map_method<'ctx>( .module .get_function("nyash.map.get_h") .unwrap_or_else(|| codegen.module.add_function("nyash.map.get_h", fnty, None)); - codegen - .builder - .build_call(callee, &[recv_h.into(), iv.into()], "mget") + cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), iv.into()], "mget")) .map_err(|e| e.to_string())? } BVE::PointerValue(pv) => { @@ -110,9 +109,8 @@ pub(super) fn try_handle_map_method<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty_conv, None)); - let kcall = codegen - .builder - .build_call(conv, &[pv.into()], "key_i8_to_handle") + let kcall = cursor + .emit_instr(cur_bid, |b| b.build_call(conv, &[pv.into()], "key_i8_to_handle")) .map_err(|e| e.to_string())?; let kh = kcall .try_as_basic_value() @@ -124,9 +122,8 @@ pub(super) fn try_handle_map_method<'ctx>( .module .get_function("nyash.map.get_hh") .unwrap_or_else(|| codegen.module.add_function("nyash.map.get_hh", fnty, None)); - codegen - .builder - .build_call(callee, &[recv_h.into(), kh.into()], "mget_hh") + cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), kh.into()], "mget_hh")) .map_err(|e| e.to_string())? } _ => return Err("map.get key must be int or pointer".to_string()), @@ -151,17 +148,15 @@ pub(super) fn try_handle_map_method<'ctx>( let val_v = *vmap.get(&args[1]).ok_or("map.set value missing")?; let key_i = match key_v { BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "key_p2i") + BVE::PointerValue(pv) => cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "key_p2i")) .map_err(|e| e.to_string())?, _ => return Err("map.set key must be int or handle ptr".to_string()), }; let val_i = match val_v { BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "val_p2i") + BVE::PointerValue(pv) => cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "val_p2i")) .map_err(|e| e.to_string())?, _ => return Err("map.set value must be int or handle ptr".to_string()), }; @@ -170,9 +165,8 @@ pub(super) fn try_handle_map_method<'ctx>( .module .get_function("nyash.map.set_h") .unwrap_or_else(|| codegen.module.add_function("nyash.map.set_h", fnty, None)); - let _ = codegen - .builder - .build_call(callee, &[recv_h.into(), key_i.into(), val_i.into()], "mset") + let _ = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[recv_h.into(), key_i.into(), val_i.into()], "mset")) .map_err(|e| e.to_string())?; Ok(true) } diff --git a/src/backend/llvm/compiler/codegen/instructions/mem.rs b/src/backend/llvm/compiler/codegen/instructions/mem.rs index 415bedef..66553c02 100644 --- a/src/backend/llvm/compiler/codegen/instructions/mem.rs +++ b/src/backend/llvm/compiler/codegen/instructions/mem.rs @@ -3,11 +3,14 @@ use std::collections::HashMap; use inkwell::values::BasicValueEnum; use crate::backend::llvm::context::CodegenContext; -use crate::mir::ValueId; +use crate::mir::{BasicBlockId, ValueId}; +use super::builder_cursor::BuilderCursor; // Lower Store: handle allocas with element type tracking and integer width adjust -pub(in super::super) fn lower_store<'ctx>( +pub(in super::super) fn lower_store<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &HashMap>, allocas: &mut HashMap>, alloca_elem_types: &mut HashMap>, @@ -30,38 +33,31 @@ pub(in super::super) fn lower_store<'ctx>( let bw_src = iv.get_type().get_bit_width(); let bw_dst = t.get_bit_width(); if bw_src < bw_dst { - let adj = codegen - .builder - .build_int_z_extend(iv, t, "zext") + let adj = cursor + .emit_instr(cur_bid, |b| b.build_int_z_extend(iv, t, "zext")) .map_err(|e| e.to_string())?; - codegen - .builder - .build_store(existing, adj) + cursor + .emit_instr(cur_bid, |b| b.build_store(existing, adj)) .map_err(|e| e.to_string())?; } else if bw_src > bw_dst { - let adj = codegen - .builder - .build_int_truncate(iv, t, "trunc") + let adj = cursor + .emit_instr(cur_bid, |b| b.build_int_truncate(iv, t, "trunc")) .map_err(|e| e.to_string())?; - codegen - .builder - .build_store(existing, adj) + cursor + .emit_instr(cur_bid, |b| b.build_store(existing, adj)) .map_err(|e| e.to_string())?; } else { - codegen - .builder - .build_store(existing, iv) + cursor + .emit_instr(cur_bid, |b| b.build_store(existing, iv)) .map_err(|e| e.to_string())?; } } (BasicValueEnum::PointerValue(pv), BasicTypeEnum::PointerType(pt)) => { - let adj = codegen - .builder - .build_pointer_cast(pv, pt, "pcast") + let adj = cursor + .emit_instr(cur_bid, |b| b.build_pointer_cast(pv, pt, "pcast")) .map_err(|e| e.to_string())?; - codegen - .builder - .build_store(existing, adj) + cursor + .emit_instr(cur_bid, |b| b.build_store(existing, adj)) .map_err(|e| e.to_string())?; } (BasicValueEnum::FloatValue(fv), BasicTypeEnum::FloatType(ft)) => { @@ -69,27 +65,23 @@ pub(in super::super) fn lower_store<'ctx>( if fv.get_type() != ft { return Err("float width mismatch in store".to_string()); } - codegen - .builder - .build_store(existing, fv) + cursor + .emit_instr(cur_bid, |b| b.build_store(existing, fv)) .map_err(|e| e.to_string())?; } _ => return Err("store type mismatch".to_string()), } } else { - codegen - .builder - .build_store(existing, val) + cursor + .emit_instr(cur_bid, |b| b.build_store(existing, val)) .map_err(|e| e.to_string())?; } } else { - let slot = codegen - .builder - .build_alloca(elem_ty, &format!("slot_{}", ptr.as_u32())) + let slot = cursor + .emit_instr(cur_bid, |b| b.build_alloca(elem_ty, &format!("slot_{}", ptr.as_u32()))) .map_err(|e| e.to_string())?; - codegen - .builder - .build_store(slot, val) + cursor + .emit_instr(cur_bid, |b| b.build_store(slot, val)) .map_err(|e| e.to_string())?; allocas.insert(*ptr, slot); alloca_elem_types.insert(*ptr, elem_ty); @@ -97,8 +89,10 @@ pub(in super::super) fn lower_store<'ctx>( Ok(()) } -pub(in super::super) fn lower_load<'ctx>( +pub(in super::super) fn lower_load<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &mut HashMap>, allocas: &mut HashMap>, alloca_elem_types: &mut HashMap>, @@ -112,17 +106,15 @@ pub(in super::super) fn lower_load<'ctx>( } else { // Default new slot as i64 for uninitialized loads let i64t = codegen.context.i64_type(); - let slot = codegen - .builder - .build_alloca(i64t, &format!("slot_{}", ptr.as_u32())) + let slot = cursor + .emit_instr(cur_bid, |b| b.build_alloca(i64t, &format!("slot_{}", ptr.as_u32()))) .map_err(|e| e.to_string())?; allocas.insert(*ptr, slot); alloca_elem_types.insert(*ptr, i64t.into()); (slot, i64t.into()) }; - let lv = codegen - .builder - .build_load(elem_ty, slot, &format!("load_{}", dst.as_u32())) + let lv = cursor + .emit_instr(cur_bid, |b| b.build_load(elem_ty, slot, &format!("load_{}", dst.as_u32()))) .map_err(|e| e.to_string())?; vmap.insert(*dst, lv); Ok(()) diff --git a/src/backend/llvm/compiler/codegen/instructions/mod.rs b/src/backend/llvm/compiler/codegen/instructions/mod.rs index 93212245..94df9e60 100644 --- a/src/backend/llvm/compiler/codegen/instructions/mod.rs +++ b/src/backend/llvm/compiler/codegen/instructions/mod.rs @@ -25,3 +25,4 @@ pub(super) use consts::lower_const; pub(super) use arith_ops::{lower_binop, lower_unary}; pub(super) use call::lower_call; pub(super) use loopform::{LoopFormContext, lower_while_loopform}; +pub(super) use loopform::normalize_header_phis_for_latch; diff --git a/src/backend/llvm/compiler/codegen/instructions/newbox.rs b/src/backend/llvm/compiler/codegen/instructions/newbox.rs index 0b742113..973f6312 100644 --- a/src/backend/llvm/compiler/codegen/instructions/newbox.rs +++ b/src/backend/llvm/compiler/codegen/instructions/newbox.rs @@ -4,11 +4,14 @@ use inkwell::AddressSpace; use inkwell::values::BasicValueEnum as BVE; use crate::backend::llvm::context::CodegenContext; -use crate::mir::ValueId; +use crate::mir::{BasicBlockId, ValueId}; +use super::builder_cursor::BuilderCursor; // NewBox lowering (subset consistent with existing code) -pub(in super::super) fn lower_newbox<'ctx>( +pub(in super::super) fn lower_newbox<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, vmap: &mut HashMap>, dst: ValueId, box_type: &str, @@ -37,9 +40,8 @@ pub(in super::super) fn lower_newbox<'ctx>( let v = *vmap.get(&args[0]).ok_or("newbox arg[0] missing")?; a1 = match v { BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "arg0_p2i") + BVE::PointerValue(pv) => cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "arg0_p2i")) .map_err(|e| e.to_string())?, _ => { return Err( @@ -53,9 +55,8 @@ pub(in super::super) fn lower_newbox<'ctx>( let v = *vmap.get(&args[1]).ok_or("newbox arg[1] missing")?; a2 = match v { BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "arg1_p2i") + BVE::PointerValue(pv) => cursor + .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "arg1_p2i")) .map_err(|e| e.to_string())?, _ => { return Err( @@ -66,9 +67,8 @@ pub(in super::super) fn lower_newbox<'ctx>( }; } let tid = i64t.const_int(type_id as u64, true); - let call = codegen - .builder - .build_call(callee, &[tid.into(), argc.into(), a1.into(), a2.into()], "birth_i64") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[tid.into(), argc.into(), a1.into(), a2.into()], "birth_i64")) .map_err(|e| e.to_string())?; let h = call .try_as_basic_value() @@ -76,9 +76,8 @@ pub(in super::super) fn lower_newbox<'ctx>( .ok_or("birth_i64 returned void".to_string())? .into_int_value(); let pty = codegen.context.ptr_type(AddressSpace::from(0)); - let ptr = codegen - .builder - .build_int_to_ptr(h, pty, "handle_to_ptr") + let ptr = cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(h, pty, "handle_to_ptr")) .map_err(|e| e.to_string())?; vmap.insert(dst, ptr.into()); Ok(()) @@ -106,9 +105,8 @@ pub(in super::super) fn lower_newbox<'ctx>( .get_function("nyash.box.birth_h") .unwrap_or_else(|| codegen.module.add_function("nyash.box.birth_h", fn_ty, None)); let tid = i64t.const_int(type_id as u64, true); - let call = codegen - .builder - .build_call(callee, &[tid.into()], "birth") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[tid.into()], "birth")) .map_err(|e| e.to_string())?; let h_i64 = call .try_as_basic_value() @@ -116,9 +114,8 @@ pub(in super::super) fn lower_newbox<'ctx>( .ok_or("birth_h returned void".to_string())? .into_int_value(); let pty = codegen.context.ptr_type(AddressSpace::from(0)); - let ptr = codegen - .builder - .build_int_to_ptr(h_i64, pty, "handle_to_ptr") + let ptr = cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(h_i64, pty, "handle_to_ptr")) .map_err(|e| e.to_string())?; vmap.insert(dst, ptr.into()); Ok(()) @@ -130,13 +127,11 @@ pub(in super::super) fn lower_newbox<'ctx>( .module .get_function("nyash.env.box.new") .unwrap_or_else(|| codegen.module.add_function("nyash.env.box.new", fn_ty, None)); - let tn = codegen - .builder - .build_global_string_ptr(box_type, "box_type_name") + let tn = cursor + .emit_instr(cur_bid, |b| b.build_global_string_ptr(box_type, "box_type_name")) .map_err(|e| e.to_string())?; - let call = codegen - .builder - .build_call(callee, &[tn.as_pointer_value().into()], "env_box_new") + let call = cursor + .emit_instr(cur_bid, |b| b.build_call(callee, &[tn.as_pointer_value().into()], "env_box_new")) .map_err(|e| e.to_string())?; let h_i64 = call .try_as_basic_value() @@ -144,9 +139,8 @@ pub(in super::super) fn lower_newbox<'ctx>( .ok_or("env.box.new returned void".to_string())? .into_int_value(); let pty = codegen.context.ptr_type(AddressSpace::from(0)); - let ptr = codegen - .builder - .build_int_to_ptr(h_i64, pty, "handle_to_ptr") + let ptr = cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(h_i64, pty, "handle_to_ptr")) .map_err(|e| e.to_string())?; vmap.insert(dst, ptr.into()); Ok(()) diff --git a/src/backend/llvm/compiler/codegen/instructions/strings.rs b/src/backend/llvm/compiler/codegen/instructions/strings.rs index 7e01becb..100aada5 100644 --- a/src/backend/llvm/compiler/codegen/instructions/strings.rs +++ b/src/backend/llvm/compiler/codegen/instructions/strings.rs @@ -3,11 +3,15 @@ use std::collections::HashMap; use inkwell::{values::BasicValueEnum as BVE, AddressSpace}; use crate::backend::llvm::context::CodegenContext; -use crate::mir::{function::MirFunction, ValueId}; +use crate::mir::{function::MirFunction, BasicBlockId, ValueId}; +use super::builder_cursor::BuilderCursor; +use super::flow::localize_to_i64; /// Handle String-specific methods. Returns true if handled, false to let caller continue. -pub(super) fn try_handle_string_method<'ctx>( +pub(super) fn try_handle_string_method<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + cursor: &mut BuilderCursor<'ctx, 'b>, + cur_bid: BasicBlockId, func: &MirFunction, vmap: &mut HashMap>, dst: &Option, @@ -15,6 +19,9 @@ pub(super) fn try_handle_string_method<'ctx>( method: &str, args: &[ValueId], recv_v: BVE<'ctx>, + bb_map: &std::collections::HashMap>, + preds: &std::collections::HashMap>, + block_end_values: &std::collections::HashMap>>, ) -> Result { // Act if receiver is annotated as String/StringBox, or if the actual value is an i8* (string literal path) let is_string_recv = match func.metadata.value_types.get(box_val) { @@ -38,9 +45,9 @@ pub(super) fn try_handle_string_method<'ctx>( .module .get_function("nyash.string.concat_ss") .unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_ss", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[lp.into(), rp.into()], "concat_ss_call") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[lp.into(), rp.into()], "concat_ss_call")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -51,16 +58,18 @@ pub(super) fn try_handle_string_method<'ctx>( } return Ok(true); } - (BVE::PointerValue(lp), BVE::IntValue(ri)) => { + (BVE::PointerValue(lp), BVE::IntValue(_ri)) => { let i64t = codegen.context.i64_type(); + // Localize rhs integer in current block + let ri = localize_to_i64(codegen, cursor, cur_bid, args[0], bb_map, preds, block_end_values, vmap)?; let fnty = i8p.fn_type(&[i8p.into(), i64t.into()], false); let callee = codegen .module .get_function("nyash.string.concat_si") .unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_si", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[lp.into(), ri.into()], "concat_si_call") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[lp.into(), ri.into()], "concat_si_call")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -71,16 +80,18 @@ pub(super) fn try_handle_string_method<'ctx>( } return Ok(true); } - (BVE::IntValue(li), BVE::PointerValue(rp)) => { + (BVE::IntValue(_li), BVE::PointerValue(rp)) => { let i64t = codegen.context.i64_type(); + // Localize receiver integer in current block (box_val) + let li = localize_to_i64(codegen, cursor, cur_bid, *box_val, bb_map, preds, block_end_values, vmap)?; let fnty = i8p.fn_type(&[i64t.into(), i8p.into()], false); let callee = codegen .module .get_function("nyash.string.concat_is") .unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_is", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[li.into(), rp.into()], "concat_is_call") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[li.into(), rp.into()], "concat_is_call")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -107,9 +118,9 @@ pub(super) fn try_handle_string_method<'ctx>( .module .get_function("nyash.box.from_i8_string") .unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[p.into()], "str_ptr_to_handle") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[p.into()], "str_ptr_to_handle")) .map_err(|e| e.to_string())?; let rv = call @@ -130,9 +141,9 @@ pub(super) fn try_handle_string_method<'ctx>( .module .get_function("nyash.string.len_h") .unwrap_or_else(|| codegen.module.add_function("nyash.string.len_h", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_h.into()], "strlen_h") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[recv_h.into()], "strlen_h")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -154,46 +165,23 @@ pub(super) fn try_handle_string_method<'ctx>( // receiver preferably i8*; if it's a handle (i64), conservatively cast to i8* let recv_p = match recv_v { BVE::PointerValue(p) => p, - BVE::IntValue(iv) => codegen - .builder - .build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "str_h2p_sub") + BVE::IntValue(iv) => cursor + .emit_instr(cur_bid, |b| b + .build_int_to_ptr(iv, codegen.context.ptr_type(AddressSpace::from(0)), "str_h2p_sub")) .map_err(|e| e.to_string())?, _ => return Ok(false), }; - let a0 = *vmap.get(&args[0]).ok_or("substring start arg missing")?; - let a1 = *vmap.get(&args[1]).ok_or("substring end arg missing")?; - let s = match a0 { - BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "substr_s_p2i") - .map_err(|e| e.to_string())?, - BVE::FloatValue(fv) => codegen - .builder - .build_float_to_signed_int(fv, i64t, "substr_s_f2i") - .map_err(|e| e.to_string())?, - _ => i64t.const_zero(), - }; - let e = match a1 { - BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "substr_e_p2i") - .map_err(|e| e.to_string())?, - BVE::FloatValue(fv) => codegen - .builder - .build_float_to_signed_int(fv, i64t, "substr_e_f2i") - .map_err(|e| e.to_string())?, - _ => i64t.const_zero(), - }; + // Localize start/end indices to current block via sealed snapshots (i64) + let s = localize_to_i64(codegen, cursor, cur_bid, args[0], bb_map, preds, block_end_values, vmap)?; + let e = localize_to_i64(codegen, cursor, cur_bid, args[1], bb_map, preds, block_end_values, vmap)?; let fnty = i8p.fn_type(&[i8p.into(), i64t.into(), i64t.into()], false); let callee = codegen .module .get_function("nyash.string.substring_sii") .unwrap_or_else(|| codegen.module.add_function("nyash.string.substring_sii", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_p.into(), s.into(), e.into()], "substring_call") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[recv_p.into(), s.into(), e.into()], "substring_call")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call @@ -227,9 +215,9 @@ pub(super) fn try_handle_string_method<'ctx>( .module .get_function("nyash.string.lastIndexOf_ss") .unwrap_or_else(|| codegen.module.add_function("nyash.string.lastIndexOf_ss", fnty, None)); - let call = codegen - .builder - .build_call(callee, &[recv_p.into(), needle_p.into()], "lastindexof_call") + let call = cursor + .emit_instr(cur_bid, |b| b + .build_call(callee, &[recv_p.into(), needle_p.into()], "lastindexof_call")) .map_err(|e| e.to_string())?; if let Some(d) = dst { let rv = call diff --git a/src/backend/llvm/compiler/codegen/mod.rs b/src/backend/llvm/compiler/codegen/mod.rs index d5c7a9e9..78ea4249 100644 --- a/src/backend/llvm/compiler/codegen/mod.rs +++ b/src/backend/llvm/compiler/codegen/mod.rs @@ -219,7 +219,7 @@ impl LLVMCompiler { for inst in &block.instructions { match inst { MirInstruction::NewBox { dst, box_type, args } => { - instructions::lower_newbox(&codegen, &mut vmap, *dst, box_type, args, &box_type_ids)?; + instructions::lower_newbox(&codegen, &mut cursor, *bid, &mut vmap, *dst, box_type, args, &box_type_ids)?; defined_in_block.insert(*dst); }, MirInstruction::Const { dst, value } => { @@ -236,7 +236,11 @@ impl LLVMCompiler { .const_int(*b as u64, false) .into(), ConstValue::String(s) => { - // Hoist string creation to entry block to dominate all uses + // Hoist string creation to entry block to dominate all uses. + // If the entry block already has a terminator, insert just before it. + let entry_term = unsafe { entry_bb.get_terminator() }; + if let Some(t) = entry_term { entry_builder.position_before(&t); } + else { entry_builder.position_at_end(entry_bb); } let gv = entry_builder .build_global_string_ptr(s, "str") .map_err(|e| e.to_string())?; @@ -270,7 +274,7 @@ impl LLVMCompiler { defined_in_block.insert(*dst); }, MirInstruction::Call { dst, func: callee, args, .. } => { - instructions::lower_call(&codegen, func, &mut vmap, dst, callee, args, &const_strs, &llvm_funcs)?; + instructions::lower_call(&codegen, &mut cursor, *bid, func, &mut vmap, dst, callee, args, &const_strs, &llvm_funcs)?; if let Some(d) = dst { defined_in_block.insert(*d); } } MirInstruction::BoxCall { @@ -284,6 +288,8 @@ impl LLVMCompiler { // Delegate to refactored lowering and skip legacy body instructions::lower_boxcall( &codegen, + &mut cursor, + *bid, func, &mut vmap, dst, @@ -293,31 +299,34 @@ impl LLVMCompiler { args, &box_type_ids, &entry_builder, + &bb_map, + &preds, + &block_end_values, )?; if let Some(d) = dst { defined_in_block.insert(*d); } }, MirInstruction::ExternCall { dst, iface_name, method_name, args, effects: _ } => { - instructions::lower_externcall(&codegen, func, &mut vmap, dst, iface_name, method_name, args)?; + instructions::lower_externcall(&codegen, &mut cursor, *bid, func, &mut vmap, dst, iface_name, method_name, args)?; if let Some(d) = dst { defined_in_block.insert(*d); } }, MirInstruction::UnaryOp { dst, op, operand } => { - instructions::lower_unary(&codegen, &mut vmap, *dst, op, operand)?; + instructions::lower_unary(&codegen, &mut cursor, *bid, &mut vmap, *dst, op, operand)?; defined_in_block.insert(*dst); }, MirInstruction::BinOp { dst, op, lhs, rhs } => { - instructions::lower_binop(&codegen, func, &mut vmap, *dst, op, lhs, rhs)?; + instructions::lower_binop(&codegen, &mut cursor, *bid, func, &mut vmap, *dst, op, lhs, rhs)?; defined_in_block.insert(*dst); }, MirInstruction::Compare { dst, op, lhs, rhs } => { - let out = instructions::lower_compare(&codegen, func, &vmap, op, lhs, rhs)?; + let out = instructions::lower_compare(&codegen, &mut cursor, *bid, func, &vmap, op, lhs, rhs, &bb_map, &preds, &block_end_values)?; vmap.insert(*dst, out); defined_in_block.insert(*dst); }, MirInstruction::Store { value, ptr } => { - instructions::lower_store(&codegen, &vmap, &mut allocas, &mut alloca_elem_types, value, ptr)?; + instructions::lower_store(&codegen, &mut cursor, *bid, &vmap, &mut allocas, &mut alloca_elem_types, value, ptr)?; }, MirInstruction::Load { dst, ptr } => { - instructions::lower_load(&codegen, &mut vmap, &mut allocas, &mut alloca_elem_types, dst, ptr)?; + instructions::lower_load(&codegen, &mut cursor, *bid, &mut vmap, &mut allocas, &mut alloca_elem_types, dst, ptr)?; defined_in_block.insert(*dst); }, MirInstruction::Phi { .. } => { @@ -430,7 +439,7 @@ impl LLVMCompiler { } } if !handled_by_loopform { - instructions::emit_branch(&codegen, &mut cursor, *bid, condition, then_bb, else_bb, &bb_map, &phis_by_block, &vmap)?; + instructions::emit_branch(&codegen, &mut cursor, *bid, condition, then_bb, else_bb, &bb_map, &phis_by_block, &vmap, &preds, &block_end_values)?; } } _ => { @@ -490,21 +499,25 @@ impl LLVMCompiler { if sealed_mode { instructions::flow::seal_block(&codegen, &mut cursor, func, *bid, &succs, &bb_map, &phis_by_block, &block_end_values, &vmap)?; sealed_blocks.insert(*bid); - // If all predecessors of a successor are sealed, finalize its PHIs - if let Some(succ_list) = succs.get(bid) { - for sb in succ_list { - if let Some(pre) = preds.get(sb) { - if pre.iter().all(|p| sealed_blocks.contains(p)) { - instructions::flow::finalize_phis(&codegen, &mut cursor, func, *sb, &preds, &bb_map, &phis_by_block, &block_end_values, &vmap)?; - } - } - } - } - // Note: LoopForm latch→header adds a new LLVM pred not represented in MIR. - // Header PHI normalization for this extra pred will be implemented later - // using a LoopForm-aware finalize that does not rely on MIR inputs. + // In sealed mode, we rely on seal_block to add incoming per pred when each pred is sealed. + // finalize_phis is intentionally skipped to avoid duplicate incoming entries. + // LoopForm latch→header is normalized in a separate post-pass below. } } + // LoopForm header PHI normalization when latch→header is enabled (post-pass per function) + if std::env::var("NYASH_ENABLE_LOOPFORM").ok().as_deref() == Some("1") && + std::env::var("NYASH_LOOPFORM_LATCH2HEADER").ok().as_deref() == Some("1") { + for (hdr_bid, (_dispatch_bb, _tag_phi, _payload_phi, latch_bb)) in &loopform_registry { + if let Some(phis) = phis_by_block.get(hdr_bid) { + instructions::normalize_header_phis_for_latch( + &codegen, + *hdr_bid, + *latch_bb, + phis, + )?; + } + } + } // Finalize function: ensure every basic block is closed with a terminator. // As a last resort, insert 'unreachable' into blocks that remain unterminated. for bb in llvm_func.get_basic_blocks() { @@ -515,6 +528,17 @@ impl LLVMCompiler { } // Verify the fully-lowered function once, after all blocks if !llvm_func.verify(true) { + if std::env::var("NYASH_LLVM_DUMP_ON_FAIL").ok().as_deref() == Some("1") { + let ir = codegen.module.print_to_string().to_string(); + let dump_dir = std::path::Path::new("tmp"); + let _ = std::fs::create_dir_all(dump_dir); + let dump_path = dump_dir.join(format!("llvm_fail_{}.ll", sanitize(name))); + if let Err(e) = std::fs::write(&dump_path, ir) { + eprintln!("[LLVM] failed to write IR dump: {}", e); + } else { + eprintln!("[LLVM] wrote IR dump: {}", dump_path.display()); + } + } return Err(format!("Function verification failed: {}", name)); }