fix(mir): conservative PHI box for If/Loop and Stage1 resolver SSA

This commit is contained in:
nyash-codex
2025-11-18 09:26:39 +09:00
parent fa087eeeea
commit 8b37e9711d
6 changed files with 181 additions and 19 deletions

View File

@ -96,3 +96,20 @@ Status: planning構造バグ切り出しフェーズ・挙動は変えない
- 小さいテストを書く → verifier で赤を出す → LoopBuilder / IfForm / MirBuilder を直す → 緑になるまで繰り返す。 - 小さいテストを書く → verifier で赤を出す → LoopBuilder / IfForm / MirBuilder を直す → 緑になるまで繰り返す。
- これにより、StageB / Stage1 / selfhost の土台となる Rust MIR 層が安定し、その上に Nyash selfhost 側の MirBuilder を載せやすくする。 - これにより、StageB / Stage1 / selfhost の土台となる Rust MIR 層が安定し、その上に Nyash selfhost 側の MirBuilder を載せやすくする。
- なお、StageB 最小ハーネス(`stageb_min_sample.hako`については、Rust MIR builder 経由の直接 VM / MIR verify は既に緑であり、残っている stack overflow は `compiler_stageb.hako` 側の Nyash ボックス連鎖に起因するものと考えられる。Rust 層では `emit_unified_call` / BoxCall / legacy 警戒の再入防止フラグと再帰深度カウンタを導入済みであり、以降は Nyash 側に浅い再帰ガードを置いて原因ボックスを特定するフェーズへ引き継ぐ。 - なお、StageB 最小ハーネス(`stageb_min_sample.hako`については、Rust MIR builder 経由の直接 VM / MIR verify は既に緑であり、残っている stack overflow は `compiler_stageb.hako` 側の Nyash ボックス連鎖に起因するものと考えられる。Rust 層では `emit_unified_call` / BoxCall / legacy 警戒の再入防止フラグと再帰深度カウンタを導入済みであり、以降は Nyash 側に浅い再帰ガードを置いて原因ボックスを特定するフェーズへ引き継ぐ。
### 実績メモConservative PHI Box 完了)
- IfForm / LoopForm v2 の PHI 生成は「Conservative PHI Box」実装により根治済み:
- If については `merge_modified_vars``src/mir/builder/phi.rs`)が **全変数の union に対して PHI を張る保守的実装**に切り替わり、
- 片側の branch でしか定義されない変数(`then` のみ / `else` のみ)についても、
- pre_if スナップショットまたは `const void` を使った安全な SSA 定義に統一。
- Loop については LoopForm v2`LoopFormBuilder`)側で header/exit PHI の扱いを整理し、
- Carrier / Pinned / Invariant に分離したうえで exit PHI から `variable_map` を一貫して再束縛する構造に寄せた。
- Box 理論の観点では:
- Conservative Box: まず「安全側」に全変数に PHI を張る(正しさ優先)。
- Elimination Box: 将来の最適化フェーズで、使われない PHI を削る(効率最適化)。
- この Conservative PHI 実装により、Stage1 using resolver 一式の代表テスト:
- `mir_parserbox_parse_program2_harness_parses_minimal_source`
- `mir_stage1_using_resolver_min_fragment_verifies`
- `mir_stage1_using_resolver_full_collect_entries_verifies`
がすべて緑になっており、「片側 branch だけで定義された変数の nondominating use」系のバグは Rust 側では止血済み。***

View File

@ -53,17 +53,25 @@ pub(crate) fn depth(builder: &super::MirBuilder) -> usize {
} }
/// Add predecessor edge metadata to a basic block. /// Add predecessor edge metadata to a basic block.
/// 📦 Hotfix 6: Auto-create block if it doesn't exist yet
/// This ensures add_predecessor() works even before start_new_block() is called.
pub(crate) fn add_predecessor( pub(crate) fn add_predecessor(
builder: &mut MirBuilder, builder: &mut MirBuilder,
block: BasicBlockId, block: BasicBlockId,
pred: BasicBlockId, pred: BasicBlockId,
) -> Result<(), String> { ) -> Result<(), String> {
if let Some(ref mut function) = builder.current_function { if let Some(ref mut function) = builder.current_function {
// 📦 Hotfix 6: Ensure block exists (same as start_new_block logic)
// Create block if not present, without changing current_block
if !function.blocks.contains_key(&block) {
function.add_block(super::BasicBlock::new(block));
}
if let Some(bb) = function.get_block_mut(block) { if let Some(bb) = function.get_block_mut(block) {
bb.add_predecessor(pred); bb.add_predecessor(pred);
return Ok(()); return Ok(());
} }
return Err(format!("Block {} not found", block)); return Err(format!("Block {} not found (impossible after auto-create)", block));
} }
Err("No current function".to_string()) Err("No current function".to_string())
} }

View File

@ -20,26 +20,101 @@ impl MirBuilder {
else_map_end_opt: &Option<std::collections::HashMap<String, super::ValueId>>, else_map_end_opt: &Option<std::collections::HashMap<String, super::ValueId>>,
skip_var: Option<&str>, skip_var: Option<&str>,
) -> Result<(), String> { ) -> Result<(), String> {
// 📦 Conservative PHI Generation (Box Theory)
// Generate PHI for ALL variables present in ANY branch (union), not just modified ones.
// This ensures correctness: if a variable is defined in one branch but not the other,
// we use the predecessor value as fallback (or skip if undefined everywhere).
//
// Theory: Conservative ∘ Elimination = Minimal SSA
// - Conservative (this): correctness-first, generate all PHIs
// - Elimination (future): efficiency optimization, remove unused PHIs
use std::collections::HashSet;
// Collect all variables from all sources
let mut all_vars = HashSet::new();
all_vars.extend(pre_if_snapshot.keys().cloned());
all_vars.extend(then_map_end.keys().cloned());
if let Some(ref else_map) = else_map_end_opt {
all_vars.extend(else_map.keys().cloned());
}
// Keep track of which vars were changed (for debugging/hints)
let changed = crate::mir::phi_core::if_phi::compute_modified_names( let changed = crate::mir::phi_core::if_phi::compute_modified_names(
pre_if_snapshot, pre_if_snapshot,
then_map_end, then_map_end,
else_map_end_opt, else_map_end_opt,
); );
use std::collections::HashSet;
let changed_set: HashSet<String> = changed.iter().cloned().collect(); let changed_set: HashSet<String> = changed.iter().cloned().collect();
for name in changed {
// Debug: Conservative PHI trace
let trace_conservative = std::env::var("NYASH_CONSERVATIVE_PHI_TRACE").ok().as_deref() == Some("1");
if trace_conservative {
eprintln!("[Conservative PHI] all_vars count: {}", all_vars.len());
eprintln!("[Conservative PHI] pre_if_snapshot: {:?}", pre_if_snapshot.keys().collect::<Vec<_>>());
eprintln!("[Conservative PHI] then_map_end: {:?}", then_map_end.keys().collect::<Vec<_>>());
if let Some(ref else_map) = else_map_end_opt {
eprintln!("[Conservative PHI] else_map_end: {:?}", else_map.keys().collect::<Vec<_>>());
}
}
// Generate PHI for all variables (Conservative)
for name in all_vars {
if skip_var.map(|s| s == name).unwrap_or(false) { if skip_var.map(|s| s == name).unwrap_or(false) {
if trace_conservative {
eprintln!("[Conservative PHI] Skipping {}: matches skip_var", name);
}
continue; continue;
} }
let pre = match pre_if_snapshot.get(name.as_str()) {
Some(v) => *v, // 📦 Conservative PHI: Fallback to predecessor value if not defined in a branch
None => continue, // unknown before-if; skip // This handles variables defined in only one branch (e.g., bb16 defines %51, but bb15 doesn't)
}; let pre_val_opt = pre_if_snapshot.get(name.as_str()).copied();
let then_v = then_map_end.get(name.as_str()).copied().unwrap_or(pre);
let else_v = else_map_end_opt // Get values from each branch, falling back to predecessor value
let then_v_opt = then_map_end.get(name.as_str()).copied()
.or(pre_val_opt);
let else_v_opt = else_map_end_opt
.as_ref() .as_ref()
.and_then(|m| m.get(name.as_str()).copied()) .and_then(|m| m.get(name.as_str()).copied())
.unwrap_or(pre); .or(pre_val_opt);
// 📦 Conservative PHI: Handle variables defined in only ONE branch
// If variable exists in one branch but not the other (and not in predecessor),
// create a fresh "undefined" ValueId for the missing branch.
// This ensures all control flow paths have a definition at the merge point.
let (then_v, else_v) = match (then_v_opt, else_v_opt) {
(Some(tv), Some(ev)) => {
if trace_conservative {
eprintln!("[Conservative PHI] Generating PHI for {}: then={:?} else={:?}", name, tv, ev);
}
(tv, ev)
},
(Some(tv), None) => {
// Variable exists in then branch but not else or predecessor
// Emit a 'const void' instruction to represent undefined value
let undef = crate::mir::builder::emission::constant::emit_void(self);
if trace_conservative {
eprintln!("[Conservative PHI] One-branch variable {}: then={:?} else=void({:?})", name, tv, undef);
}
(tv, undef)
},
(None, Some(ev)) => {
// Variable exists in else branch but not then or predecessor
// Emit a 'const void' instruction to represent undefined value
let undef = crate::mir::builder::emission::constant::emit_void(self);
if trace_conservative {
eprintln!("[Conservative PHI] One-branch variable {}: then=void({:?}) else={:?}", name, undef, ev);
}
(undef, ev)
},
(None, None) => {
// Variable doesn't exist anywhere - skip
if trace_conservative {
eprintln!("[Conservative PHI] Skipping {}: undefined everywhere", name);
}
continue
}
};
// フェーズM: 常にPHI命令を使用no_phi_mode撤廃 // フェーズM: 常にPHI命令を使用no_phi_mode撤廃
// incoming の predecessor は "実際に merge に遷移してくる出口ブロック" を使用する // incoming の predecessor は "実際に merge に遷移してくる出口ブロック" を使用する
let mut inputs: Vec<(super::BasicBlockId, super::ValueId)> = Vec::new(); let mut inputs: Vec<(super::BasicBlockId, super::ValueId)> = Vec::new();

View File

@ -154,9 +154,11 @@ impl<'a> LoopBuilder<'a> {
body: Vec<ASTNode>, body: Vec<ASTNode>,
) -> Result<ValueId, String> { ) -> Result<ValueId, String> {
// Check feature flag for LoopForm PHI v2 // Check feature flag for LoopForm PHI v2
// 📦 Default to true (v2 is now stable and default)
// Set NYASH_LOOPFORM_PHI_V2=0 to use legacy version if needed
let use_loopform_v2 = std::env::var("NYASH_LOOPFORM_PHI_V2") let use_loopform_v2 = std::env::var("NYASH_LOOPFORM_PHI_V2")
.map(|v| v == "1" || v.to_lowercase() == "true") .map(|v| v == "1" || v.to_lowercase() == "true")
.unwrap_or(false); .unwrap_or(true);
if std::env::var("NYASH_LOOPFORM_DEBUG").is_ok() { if std::env::var("NYASH_LOOPFORM_DEBUG").is_ok() {
eprintln!("[build_loop] use_loopform_v2={}", use_loopform_v2); eprintln!("[build_loop] use_loopform_v2={}", use_loopform_v2);
@ -165,7 +167,8 @@ impl<'a> LoopBuilder<'a> {
if use_loopform_v2 { if use_loopform_v2 {
self.build_loop_with_loopform(condition, body) self.build_loop_with_loopform(condition, body)
} else { } else {
eprintln!("⚠️ WARNING: Using legacy loop builder! Set NYASH_LOOPFORM_PHI_V2=1"); eprintln!("⚠️ WARNING: Using legacy loop builder! LoopForm v2 is now default.");
eprintln!("⚠️ Legacy version may be deprecated in future releases.");
self.build_loop_legacy(condition, body) self.build_loop_legacy(condition, body)
} }
} }
@ -222,7 +225,10 @@ impl<'a> LoopBuilder<'a> {
let exit_id = self.new_block(); let exit_id = self.new_block();
// Jump from current block to preheader // Jump from current block to preheader
let entry_block = self.current_block()?;
self.emit_jump(preheader_id)?; self.emit_jump(preheader_id)?;
// 📦 Hotfix 6: Add CFG predecessor for preheader (same as legacy version)
crate::mir::builder::loops::add_predecessor(self.parent_builder, preheader_id, entry_block)?;
// Initialize LoopFormBuilder with preheader and header blocks // Initialize LoopFormBuilder with preheader and header blocks
let mut loopform = LoopFormBuilder::new(preheader_id, header_id); let mut loopform = LoopFormBuilder::new(preheader_id, header_id);
@ -259,6 +265,8 @@ impl<'a> LoopBuilder<'a> {
// Pass 2: Emit preheader (copies and jump to header) // Pass 2: Emit preheader (copies and jump to header)
loopform.emit_preheader(self)?; loopform.emit_preheader(self)?;
// 📦 Hotfix 6: Add CFG predecessor for header from preheader (same as legacy version)
crate::mir::builder::loops::add_predecessor(self.parent_builder, header_id, preheader_id)?;
// Pass 3: Emit header PHIs (incomplete, only preheader edge) // Pass 3: Emit header PHIs (incomplete, only preheader edge)
self.set_current_block(header_id)?; self.set_current_block(header_id)?;
@ -301,8 +309,23 @@ impl<'a> LoopBuilder<'a> {
} }
} }
self.emit_branch(cond_value, body_id, exit_id)?; self.emit_branch(cond_value, body_id, exit_id)?;
// 📦 Hotfix 6: Add CFG predecessors for branch targets (Cytron et al. 1991 requirement)
// This ensures exit_block.predecessors is populated before Exit PHI generation
if std::env::var("NYASH_LOOPFORM_DEBUG").is_ok() {
eprintln!("[loopform/condition] BEFORE add_predecessor: exit_id={:?}, branch_source={:?}", exit_id, branch_source_block);
}
crate::mir::builder::loops::add_predecessor(self.parent_builder, body_id, branch_source_block)?;
crate::mir::builder::loops::add_predecessor(self.parent_builder, exit_id, branch_source_block)?;
if std::env::var("NYASH_LOOPFORM_DEBUG").is_ok() { if std::env::var("NYASH_LOOPFORM_DEBUG").is_ok() {
eprintln!("[loopform/condition] AFTER emit_branch: current_block={:?}", self.current_block()?); eprintln!("[loopform/condition] AFTER emit_branch: current_block={:?}", self.current_block()?);
eprintln!("[loopform/condition] Added predecessors: body={:?} exit={:?} from={:?}",
body_id, exit_id, branch_source_block);
// Verify predecessors were added
if let Some(ref func) = self.parent_builder.current_function {
if let Some(exit_block) = func.blocks.get(&exit_id) {
eprintln!("[loopform/condition] exit_block.predecessors = {:?}", exit_block.predecessors);
}
}
} }
// Lower loop body // Lower loop body
@ -336,6 +359,8 @@ impl<'a> LoopBuilder<'a> {
} }
self.emit_jump(header_id)?; self.emit_jump(header_id)?;
// 📦 Hotfix 6: Add CFG predecessor for header from latch (same as legacy version)
crate::mir::builder::loops::add_predecessor(self.parent_builder, header_id, latch_id)?;
// Pass 4: Seal PHIs with latch values // Pass 4: Seal PHIs with latch values
loopform.seal_phis(self, actual_latch_id)?; loopform.seal_phis(self, actual_latch_id)?;
@ -1256,6 +1281,19 @@ impl<'a> LoopFormOps for LoopBuilder<'a> {
} }
} }
fn get_block_predecessors(&self, block: BasicBlockId) -> std::collections::HashSet<BasicBlockId> {
// 📦 Hotfix 6: Get actual CFG predecessors for PHI validation
if let Some(ref func) = self.parent_builder.current_function {
if let Some(bb) = func.blocks.get(&block) {
bb.predecessors.clone()
} else {
std::collections::HashSet::new() // Non-existent blocks have no predecessors
}
} else {
std::collections::HashSet::new()
}
}
fn is_parameter(&self, name: &str) -> bool { fn is_parameter(&self, name: &str) -> bool {
// A parameter is a true function parameter that doesn't change across iterations // A parameter is a true function parameter that doesn't change across iterations
// Pinned receivers (__pin$*$@*) are NOT parameters - they're carriers // Pinned receivers (__pin$*$@*) are NOT parameters - they're carriers

View File

@ -331,8 +331,15 @@ impl LoopFormBuilder {
.push((branch_source_block, carrier.header_phi)); .push((branch_source_block, carrier.header_phi));
} }
// 📦 Hotfix 6: Get actual CFG predecessors for exit block
let exit_preds = ops.get_block_predecessors(exit_id);
if debug {
eprintln!("[DEBUG/exit_phi] Exit block predecessors: {:?}", exit_preds);
}
// Add break snapshot values // Add break snapshot values
// 📦 Hotfix 2: Skip non-existent blocks (幽霊ブロック対策) // 📦 Hotfix 2: Skip non-existent blocks (幽霊ブロック対策)
// 📦 Hotfix 6: Skip blocks not in CFG predecessors (unreachable continuation対策)
for (block_id, snapshot) in exit_snapshots { for (block_id, snapshot) in exit_snapshots {
// Validate block existence before adding to PHI inputs // Validate block existence before adding to PHI inputs
if !ops.block_exists(*block_id) { if !ops.block_exists(*block_id) {
@ -342,6 +349,15 @@ impl LoopFormBuilder {
continue; // Skip ghost blocks continue; // Skip ghost blocks
} }
// Hotfix 6: Skip blocks not in actual CFG predecessors
// This catches unreachable continuation blocks created after break/continue
if !exit_preds.contains(block_id) {
if debug {
eprintln!("[DEBUG/exit_phi] ⚠️ Skipping block {:?} (not in CFG predecessors)", block_id);
}
continue; // Skip blocks not actually branching to exit
}
for (var_name, &value) in snapshot { for (var_name, &value) in snapshot {
all_vars all_vars
.entry(var_name.clone()) .entry(var_name.clone())
@ -409,6 +425,11 @@ pub trait LoopFormOps {
/// Used to skip non-existent blocks when building exit PHIs. /// Used to skip non-existent blocks when building exit PHIs.
fn block_exists(&self, block: BasicBlockId) -> bool; fn block_exists(&self, block: BasicBlockId) -> bool;
/// 📦 Get actual CFG predecessors for a block (Hotfix 6: PHI input validation)
/// Returns the set of blocks that actually branch to this block in the CFG.
/// Used to validate exit PHI inputs against actual control flow.
fn get_block_predecessors(&self, block: BasicBlockId) -> std::collections::HashSet<BasicBlockId>;
/// Check if a variable is a function parameter /// Check if a variable is a function parameter
fn is_parameter(&self, name: &str) -> bool; fn is_parameter(&self, name: &str) -> bool;
@ -520,6 +541,11 @@ mod tests {
true true
} }
fn get_block_predecessors(&self, _block: BasicBlockId) -> std::collections::HashSet<BasicBlockId> {
// MockOps: return empty set (no CFG in test)
std::collections::HashSet::new()
}
fn is_parameter(&self, name: &str) -> bool { fn is_parameter(&self, name: &str) -> bool {
self.params.iter().any(|p| p == name) self.params.iter().any(|p| p == name)
} }

View File

@ -68,14 +68,12 @@ static box Stage1UsingResolverMini {
let mut mc = MirCompiler::with_options(false); let mut mc = MirCompiler::with_options(false);
let cr = mc.compile(ast).expect("compile"); let cr = mc.compile(ast).expect("compile");
// DEBUG: Print MIR structure for _collect_using_entries // DEBUG: Print MIR structure for ALL functions (temporary)
for (fname, func) in &cr.module.functions { for (fname, func) in &cr.module.functions {
if fname.contains("_collect_using_entries") { eprintln!("\n=== MIR for {} ===", fname);
eprintln!("\n=== MIR for {} ===", fname); let printer = MirPrinter::new();
let printer = MirPrinter::new(); let mir_text = printer.print_function(func);
let mir_text = printer.print_function(func); eprintln!("{}", mir_text);
eprintln!("{}", mir_text);
}
} }
let mut verifier = MirVerifier::new(); let mut verifier = MirVerifier::new();