Files
hakorune/src/mir/verification/cfg.rs

266 lines
11 KiB
Rust

use crate::mir::function::MirFunction;
use crate::mir::verification::utils;
use crate::mir::verification_types::VerificationError;
use crate::mir::{BasicBlockId, MirInstruction, ValueId};
use std::collections::{HashMap, HashSet};
/// Verify CFG references and reachability
pub fn check_control_flow(function: &MirFunction) -> Result<(), Vec<VerificationError>> {
let mut errors = Vec::new();
for (block_id, block) in &function.blocks {
let expected_successors = block.successors_from_terminator();
if expected_successors != block.successors {
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: format!(
"Successors cache mismatch: cached={:?}, expected={:?}",
block.successors, expected_successors
),
});
}
for successor in &block.successors {
if !function.blocks.contains_key(successor) {
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: format!("References non-existent block {}", successor),
});
}
}
// Phase 260 P0: Fail-fast if terminator edge-args and legacy jump_args diverge.
if block.has_legacy_jump_args() && block.legacy_jump_args_layout().is_none() {
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: "Legacy jump_args layout missing".to_string(),
});
}
if let Some(term) = &block.terminator {
match term {
MirInstruction::Jump {
edge_args: Some(edge_args),
..
} => {
if block.has_legacy_jump_args() {
let Some(legacy_layout) = block.legacy_jump_args_layout() else {
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: "Legacy jump_args layout missing with edge-args present"
.to_string(),
});
continue;
};
let legacy_values = block.legacy_jump_args_values().unwrap_or_default();
if edge_args.values.as_slice() != legacy_values {
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: format!(
"Edge-args values mismatch: edge_args={:?}, legacy={:?}",
edge_args.values, legacy_values
),
});
}
if edge_args.layout != legacy_layout {
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: format!(
"Edge-args layout mismatch: edge_args={:?}, legacy={:?}",
edge_args.layout, legacy_layout
),
});
}
}
}
MirInstruction::Branch {
then_edge_args,
else_edge_args,
..
} => {
if block.has_legacy_jump_args()
&& (then_edge_args.is_some() || else_edge_args.is_some())
{
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: "Legacy jump_args present on multi-edge terminator with edge-args"
.to_string(),
});
}
}
_ => {}
}
}
}
// Unreachable blocks are allowed in MIR.
// They are created intentionally by break/continue/return statements via
// switch_to_unreachable_block_with_void() to continue SSA construction after
// control flow terminators. This is standard practice (see LLVM's `unreachable`).
// Dead code elimination pass (TODO) will remove them during optimization.
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
/// Verify that merge blocks do not use predecessor-defined values directly (must go through Phi)
pub fn check_merge_uses(function: &MirFunction) -> Result<(), Vec<VerificationError>> {
if crate::config::env::verify_allow_no_phi() {
return Ok(());
}
let mut errors = Vec::new();
let preds = utils::compute_predecessors(function);
let def_block = utils::compute_def_blocks(function);
let dominators = utils::compute_dominators(function);
let mut phi_dsts_in_block: HashMap<BasicBlockId, HashSet<ValueId>> = HashMap::new();
for (bid, block) in &function.blocks {
let set = phi_dsts_in_block.entry(*bid).or_default();
for sp in block.all_spanned_instructions() {
if let crate::mir::MirInstruction::Phi { dst, .. } = sp.inst {
set.insert(*dst);
}
}
}
for (bid, block) in &function.blocks {
let Some(pred_list) = preds.get(bid) else {
continue;
};
if pred_list.len() < 2 {
continue;
}
let phi_dsts = phi_dsts_in_block.get(bid);
let doms_of_block = dominators.get(bid).unwrap();
for sp in block.all_spanned_instructions() {
if let crate::mir::MirInstruction::Phi { .. } = sp.inst {
continue;
}
for used in sp.inst.used_values() {
if let Some(&db) = def_block.get(&used) {
if !doms_of_block.contains(&db) {
let is_phi_dst = phi_dsts.map(|s| s.contains(&used)).unwrap_or(false);
if !is_phi_dst {
errors.push(VerificationError::MergeUsesPredecessorValue {
value: used,
merge_block: *bid,
pred_block: db,
});
}
}
}
}
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
/// Phase 257 P1-1: Verify PHI inputs reference actual CFG predecessors
///
/// Checks:
/// 1. Each PHI input references an actual CFG predecessor (no phantom predecessors)
/// 2. All reachable predecessors have corresponding PHI inputs (no missing inputs)
pub(super) fn check_phi_predecessors(
function: &crate::mir::MirFunction,
) -> Result<(), Vec<crate::mir::verification_types::VerificationError>> {
use crate::mir::verification::utils::compute_predecessors;
use crate::mir::verification_types::VerificationError;
use crate::mir::MirInstruction;
use std::collections::HashSet;
let mut errors = Vec::new();
let preds = compute_predecessors(function);
// Compute reachable blocks to filter out unreachable ones
// (Unreachable blocks may have incomplete PHIs, which is OK)
let reachable = compute_reachable_blocks(function);
for (block_id, block) in &function.blocks {
// Skip unreachable blocks
if !reachable.contains(block_id) {
continue;
}
for instr in &block.instructions {
if let MirInstruction::Phi { dst, inputs, .. } = instr {
let expected_preds = match preds.get(block_id) {
Some(p) => p,
None => {
errors.push(VerificationError::InvalidPhi {
phi_value: *dst,
block: *block_id,
reason: format!("Block bb{} has PHI but no predecessors", block_id.0),
});
continue;
}
};
// Collect PHI input predecessor blocks
let phi_input_preds: HashSet<_> = inputs.iter().map(|(bb, _)| *bb).collect();
// Check 1: Each PHI input block is actually a predecessor (no phantom preds)
for (pred_block, _value) in inputs {
if !expected_preds.contains(pred_block) {
errors.push(VerificationError::InvalidPhi {
phi_value: *dst,
block: *block_id,
reason: format!(
"PHI dst={:?} has input from non-predecessor bb{} (actual preds: {:?})",
dst, pred_block.0, expected_preds
),
});
}
}
// Check 2: All reachable predecessors have PHI inputs (no missing inputs)
// This is CRITICAL - catches the "no input for predecessor" runtime error
for &expected_pred in expected_preds {
// Only check reachable predecessors
if reachable.contains(&expected_pred) && !phi_input_preds.contains(&expected_pred) {
errors.push(VerificationError::InvalidPhi {
phi_value: *dst,
block: *block_id,
reason: format!(
"PHI dst={:?} missing input from reachable predecessor bb{} (has inputs from: {:?})",
dst, expected_pred.0, phi_input_preds
),
});
}
}
}
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
/// Compute reachable blocks from entry block
fn compute_reachable_blocks(function: &crate::mir::MirFunction) -> std::collections::HashSet<crate::mir::BasicBlockId> {
use crate::mir::BasicBlockId;
use std::collections::{HashSet, VecDeque};
let mut reachable = HashSet::new();
let mut queue = VecDeque::new();
let entry = BasicBlockId(0); // Entry block is always bb0
queue.push_back(entry);
reachable.insert(entry);
while let Some(current) = queue.pop_front() {
if let Some(block) = function.blocks.get(&current) {
for &successor in &block.successors {
if reachable.insert(successor) {
queue.push_back(successor);
}
}
}
}
reachable
}