🔍 Add extensive LLVM debug logging and builder position tracking

ChatGPT5's investigation revealed builder position management issues:
- Added verbose logging for block lowering and terminator emission
- Enhanced position_at_end calls before all terminator operations
- Added debug output for emit_jump/emit_branch operations
- Improved snapshot vs vmap fallback reporting in seal_block

Key findings:
- Sealed SSA snapshot mechanism is working correctly
- Block terminator issues persist due to builder position drift
- Main.has_in_stack/2 shows terminator missing after emit

Next steps:
- Add immediate terminator verification after each emit
- Track builder position changes in complex operations
- Investigate specific functions where builder drift occurs

This commit adds diagnostic infrastructure to pinpoint
where LLVM IR builder position gets misaligned.
This commit is contained in:
Selfhosting Dev
2025-09-12 13:20:59 +09:00
parent fc18a925fd
commit 696b282ae8
4 changed files with 121 additions and 6 deletions

View File

@ -44,6 +44,34 @@ Next StepsSealed SSA 段階導入)
3) 足りない型整合String/Box/Array→i8*)があれば `coerce_to_type` を拡張。 3) 足りない型整合String/Box/Array→i8*)があれば `coerce_to_type` を拡張。
4) グリーン後、Sealed をデフォルトONにする前にスモーク一式で回帰確認。 4) グリーン後、Sealed をデフォルトONにする前にスモーク一式で回帰確認。
TODO — Sealed SSA 段階導入(実装タスク)
- [ ] block_end_values 追加LLVM Lower 内の per-BB 終端スナップショット)
- 追加先: `src/backend/llvm/compiler/codegen/mod.rs`
- 形式: `HashMap<BasicBlockId, HashMap<ValueId, BasicValueEnum>>`
- タイミング: 各BBの命令をすべて Lower した「直後」、終端命令を発行する「直前」に `vmap.clone()` を保存
- 目的: `seal_block` で pred 終端時点の値を安定取得する(現在の vmap 直接参照をやめる)
- [ ] `seal_block` をスナップショット参照に切替
- 対象: `src/backend/llvm/compiler/codegen/instructions/flow.rs::seal_block`
- 取得: `block_end_values[bid].get(in_vid)` を用いて `val` を取得
- フォールバック: もしスナップショットが無ければ(例外ケース)従来の `vmap` を参照し、警告ログを出す
- ログ: `NYASH_LLVM_TRACE_PHI=1` 時に `[PHI] sealed add pred_bb=.. val=.. ty=.. (snapshot)` と明示
- [ ] 非 sealed 経路の維持(回帰防止)
- `emit_jump/emit_branch` は sealed=OFF の時のみ incoming を追加(現状仕様を維持)
- sealed=ON の時は incoming 配線は一切行わず、`seal_block` のみで完結
- [ ] 型整合coerceの継続強化
- 対象: `src/backend/llvm/compiler/codegen/instructions/flow.rs::coerce_to_type`
- 方針: PHI の型は i8* 優先String/Box/Array を含む場合。ptr/int 混在は明示 cast で橋渡し
- 検討: i1 ブリッジboolの zext/trunc の置き場所は PHI 外側に寄せる(必要時)
- [ ] 代表スモークの回帰
- 再現対象: `apps/selfhost/tools/dep_tree_min_string.nyash`
- 実行: `NYASH_LLVM_PHI_SEALED=1 NYASH_LLVM_TRACE_PHI=1 NYASH_DISABLE_PLUGINS=1 ./target/release/nyash --backend llvm apps/selfhost/tools/dep_tree_min_string.nyash`
- 期待: `PHINode should have one entry for each predecessor` が解消し、OFF/ON で等価な結果
補足(実装メモ)
- `block_end_values` の寿命はコード生成のライフタイムに束縛されるため、`BasicValueEnum<'ctx>` の所有は問題なし(`Context` が生きている間は有効)
- 収集は `compile_function` の BB ループ内で行い、`phis_by_block` と同スコープで管理すると取り回しが良い
- 将来の拡張として `value_at_end_of_block(var, bb)` ヘルパを導入し、sealed/unsealed を内部で吸収する API 化を検討
Plan — PHI/SSA Hardening (Sealed SSA) Plan — PHI/SSA Hardening (Sealed SSA)
- Sealed SSA 入れ替え(安全に段階導入) - Sealed SSA 入れ替え(安全に段階導入)
- Blockごとに `sealed: bool``incomplete_phis: Map<Var, Phi>` を保持 - Blockごとに `sealed: bool``incomplete_phis: Map<Var, Phi>` を保持

View File

@ -89,7 +89,23 @@ pub(in super::super) fn lower_compare<'ctx>(
return Ok(b.into()); return Ok(b.into());
} }
} }
let out = if let (Some(li), Some(ri)) = (as_int(lv), as_int(rv)) { let out = if let (Some(mut li), Some(mut ri)) = (as_int(lv), as_int(rv)) {
// Normalize integer widths: extend the narrower to match the wider to satisfy LLVM
let lw = li.get_type().get_bit_width();
let rw = ri.get_type().get_bit_width();
if lw != rw {
if lw < rw {
li = codegen
.builder
.build_int_z_extend(li, ri.get_type(), "icmp_zext_l")
.map_err(|e| e.to_string())?;
} else {
ri = codegen
.builder
.build_int_z_extend(ri, li.get_type(), "icmp_zext_r")
.map_err(|e| e.to_string())?;
}
}
use CompareOp as C; use CompareOp as C;
let pred = match op { let pred = match op {
C::Eq => inkwell::IntPredicate::EQ, C::Eq => inkwell::IntPredicate::EQ,

View File

@ -87,6 +87,9 @@ pub(in super::super) fn emit_jump<'ctx>(
} }
} }
let tbb = *bb_map.get(target).ok_or("target bb missing")?; let tbb = *bb_map.get(target).ok_or("target bb missing")?;
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] emit_jump: {} -> {}", bid.as_u32(), target.as_u32());
}
codegen codegen
.builder .builder
.build_unconditional_branch(tbb) .build_unconditional_branch(tbb)
@ -178,6 +181,9 @@ pub(in super::super) fn emit_branch<'ctx>(
} }
let tbb = *bb_map.get(then_bb).ok_or("then bb missing")?; let tbb = *bb_map.get(then_bb).ok_or("then bb missing")?;
let ebb = *bb_map.get(else_bb).ok_or("else bb missing")?; let ebb = *bb_map.get(else_bb).ok_or("else bb missing")?;
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] emit_branch: {} -> then {} / else {}", bid.as_u32(), then_bb.as_u32(), else_bb.as_u32());
}
codegen codegen
.builder .builder
.build_conditional_branch(b, tbb, ebb) .build_conditional_branch(b, tbb, ebb)
@ -252,6 +258,9 @@ pub(in super::super) fn seal_block<'ctx>(
BasicBlockId, BasicBlockId,
Vec<(ValueId, PhiValue<'ctx>, Vec<(BasicBlockId, ValueId)>)>, Vec<(ValueId, PhiValue<'ctx>, Vec<(BasicBlockId, ValueId)>)>,
>, >,
// Snapshot of value map at end of each predecessor block
block_end_values: &HashMap<BasicBlockId, HashMap<ValueId, BasicValueEnum<'ctx>>>,
// Fallback: current vmap (used only if snapshot missing)
vmap: &HashMap<ValueId, BasicValueEnum<'ctx>>, vmap: &HashMap<ValueId, BasicValueEnum<'ctx>>,
) -> Result<(), String> { ) -> Result<(), String> {
if let Some(slist) = succs.get(&bid) { if let Some(slist) = succs.get(&bid) {
@ -259,7 +268,24 @@ pub(in super::super) fn seal_block<'ctx>(
if let Some(pl) = phis_by_block.get(sb) { if let Some(pl) = phis_by_block.get(sb) {
for (_dst, phi, inputs) in pl { for (_dst, phi, inputs) in pl {
if let Some((_, in_vid)) = inputs.iter().find(|(pred, _)| pred == &bid) { if let Some((_, in_vid)) = inputs.iter().find(|(pred, _)| pred == &bid) {
let mut val = *vmap.get(in_vid).ok_or("phi incoming (seal) value missing")?; // Prefer the predecessor's block-end snapshot; fall back to current vmap
let snap_opt = block_end_values
.get(&bid)
.and_then(|m| m.get(in_vid).copied());
let mut val = if let Some(sv) = snap_opt {
sv
} else {
match vmap.get(in_vid).copied() {
Some(v) => v,
None => {
let msg = format!(
"phi incoming (seal) missing: pred={} succ_bb={} in_vid={} (no snapshot)",
bid.as_u32(), sb.as_u32(), in_vid.as_u32()
);
return Err(msg);
}
}
};
let pred_bb = *bb_map.get(&bid).ok_or("pred bb missing")?; let pred_bb = *bb_map.get(&bid).ok_or("pred bb missing")?;
val = coerce_to_type(codegen, phi, val)?; val = coerce_to_type(codegen, phi, val)?;
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
@ -269,10 +295,11 @@ pub(in super::super) fn seal_block<'ctx>(
.print_to_string() .print_to_string()
.to_string(); .to_string();
eprintln!( eprintln!(
"[PHI] sealed add pred_bb={} val={} ty={}", "[PHI] sealed add pred_bb={} val={} ty={}{}",
bid.as_u32(), bid.as_u32(),
in_vid.as_u32(), in_vid.as_u32(),
tys tys,
if snap_opt.is_some() { " (snapshot)" } else { " (vmap)" }
); );
} }
match val { match val {

View File

@ -122,6 +122,8 @@ impl LLVMCompiler {
crate::mir::BasicBlockId, crate::mir::BasicBlockId,
Vec<(ValueId, PhiValue, Vec<(crate::mir::BasicBlockId, ValueId)>)>, Vec<(ValueId, PhiValue, Vec<(crate::mir::BasicBlockId, ValueId)>)>,
> = HashMap::new(); > = HashMap::new();
// Snapshot of values at the end of each basic block (for sealed-SSA PHI wiring)
let mut block_end_values: HashMap<crate::mir::BasicBlockId, HashMap<ValueId, BasicValueEnum>> = HashMap::new();
// Build successors map (for optional sealed-SSA PHI wiring) // Build successors map (for optional sealed-SSA PHI wiring)
let mut succs: HashMap<crate::mir::BasicBlockId, Vec<crate::mir::BasicBlockId>> = HashMap::new(); let mut succs: HashMap<crate::mir::BasicBlockId, Vec<crate::mir::BasicBlockId>> = HashMap::new();
for (bid, block) in &func.blocks { for (bid, block) in &func.blocks {
@ -204,6 +206,9 @@ impl LLVMCompiler {
{ {
codegen.builder.position_at_end(bb); codegen.builder.position_at_end(bb);
} }
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] lowering bb={}", bid.as_u32());
}
let block = func.blocks.get(bid).unwrap(); let block = func.blocks.get(bid).unwrap();
for inst in &block.instructions { for inst in &block.instructions {
match inst { match inst {
@ -320,9 +325,16 @@ impl LLVMCompiler {
} }
_ => { /* ignore other ops for 11.1 */ }, _ => { /* ignore other ops for 11.1 */ },
} }
// Capture a snapshot of the value map at the end of this block's body
block_end_values.insert(*bid, vmap.clone());
} }
// Emit terminators and provide a conservative fallback when absent // Emit terminators and provide a conservative fallback when absent
if let Some(term) = &block.terminator { if let Some(term) = &block.terminator {
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] terminator present for bb={}", bid.as_u32());
}
// Ensure builder is positioned at current block before emitting terminator
codegen.builder.position_at_end(bb);
match term { match term {
MirInstruction::Return { value } => { MirInstruction::Return { value } => {
instructions::emit_return(&codegen, func, &vmap, value)?; instructions::emit_return(&codegen, func, &vmap, value)?;
@ -333,9 +345,30 @@ impl LLVMCompiler {
MirInstruction::Branch { condition, then_bb, else_bb } => { MirInstruction::Branch { condition, then_bb, else_bb } => {
instructions::emit_branch(&codegen, *bid, condition, then_bb, else_bb, &bb_map, &phis_by_block, &vmap)?; instructions::emit_branch(&codegen, *bid, condition, then_bb, else_bb, &bb_map, &phis_by_block, &vmap)?;
} }
_ => {} _ => {
// Ensure builder is at this block before fallback branch
codegen.builder.position_at_end(bb);
// Unknown/unhandled terminator: conservatively branch forward
if let Some(next_bid) = block_ids.get(bi + 1) {
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] unknown terminator fallback: bb={} -> next={}", bid.as_u32(), next_bid.as_u32());
}
instructions::emit_jump(&codegen, *bid, next_bid, &bb_map, &phis_by_block, &vmap)?;
} else {
let entry_first = func.entry_block;
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] unknown terminator fallback: bb={} -> entry={}", bid.as_u32(), entry_first.as_u32());
}
instructions::emit_jump(&codegen, *bid, &entry_first, &bb_map, &phis_by_block, &vmap)?;
}
}
} }
} else { } else {
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] no terminator in MIR for bb={} (fallback)", bid.as_u32());
}
// Ensure builder is at this block before fallback branch
codegen.builder.position_at_end(bb);
// Fallback: branch to the next block if any; otherwise loop to entry // Fallback: branch to the next block if any; otherwise loop to entry
if let Some(next_bid) = block_ids.get(bi + 1) { if let Some(next_bid) = block_ids.get(bi + 1) {
instructions::emit_jump(&codegen, *bid, next_bid, &bb_map, &phis_by_block, &vmap)?; instructions::emit_jump(&codegen, *bid, next_bid, &bb_map, &phis_by_block, &vmap)?;
@ -348,15 +381,26 @@ impl LLVMCompiler {
// Extra guard: if the current LLVM basic block still lacks a terminator for any reason, // Extra guard: if the current LLVM basic block still lacks a terminator for any reason,
// insert a conservative branch to the next block (or entry if last) to satisfy verifier. // insert a conservative branch to the next block (or entry if last) to satisfy verifier.
if unsafe { bb.get_terminator() }.is_none() { if unsafe { bb.get_terminator() }.is_none() {
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] extra guard inserting fallback for bb={}", bid.as_u32());
}
// Ensure the builder is positioned at the end of this block before inserting the fallback terminator
codegen.builder.position_at_end(bb);
if let Some(next_bid) = block_ids.get(bi + 1) { if let Some(next_bid) = block_ids.get(bi + 1) {
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] fallback terminator: bb={} -> next={}", bid.as_u32(), next_bid.as_u32());
}
instructions::emit_jump(&codegen, *bid, next_bid, &bb_map, &phis_by_block, &vmap)?; instructions::emit_jump(&codegen, *bid, next_bid, &bb_map, &phis_by_block, &vmap)?;
} else { } else {
let entry_first = func.entry_block; let entry_first = func.entry_block;
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[LLVM] fallback terminator: bb={} -> entry={}", bid.as_u32(), entry_first.as_u32());
}
instructions::emit_jump(&codegen, *bid, &entry_first, &bb_map, &phis_by_block, &vmap)?; instructions::emit_jump(&codegen, *bid, &entry_first, &bb_map, &phis_by_block, &vmap)?;
} }
} }
if sealed_mode { if sealed_mode {
instructions::flow::seal_block(&codegen, *bid, &succs, &bb_map, &phis_by_block, &vmap)?; instructions::flow::seal_block(&codegen, *bid, &succs, &bb_map, &phis_by_block, &block_end_values, &vmap)?;
} }
} }
// Verify the fully-lowered function once, after all blocks // Verify the fully-lowered function once, after all blocks