diff --git a/docs/development/current/main/phase204-phi-contract-verifier.md b/docs/development/current/main/phase204-phi-contract-verifier.md new file mode 100644 index 00000000..3d941283 --- /dev/null +++ b/docs/development/current/main/phase204-phi-contract-verifier.md @@ -0,0 +1,330 @@ +# Phase 204: JoinIR PHI Contract Verifier Enhancement + +## 1. Overview + +Phase 204 enhances JoinIR verification infrastructure to catch contract violations early in debug builds. + +### 1.1 Motivation + +While Phase 201-202 solved ValueId collision through JoinValueSpace region separation, we still need runtime verification to catch: +- Manual coding errors during pattern lowering +- Invariant violations during JoinIR → MIR transformation +- PHI contract violations (dst overwrite, undefined inputs) + +### 1.2 Design Philosophy + +- **Debug-only**: All verification runs with `#[cfg(debug_assertions)]` - zero runtime cost in release builds +- **Fail-Fast**: Panic immediately on contract violation (no silent corruption) +- **Box-First**: Verification logic is modular and reusable across patterns + +## 2. Current State Analysis + +### 2.1 Existing Verification (Phase 200-3) + +Located in `src/mir/builder/control_flow/joinir/merge/mod.rs`: + +```rust +#[cfg(debug_assertions)] +fn verify_loop_header_phis( + func: &MirFunction, + header_block: BasicBlockId, + loop_info: &LoopHeaderPhiInfo, + boundary: &JoinInlineBoundary, +) +``` + +**Checks**: +1. Loop variable PHI existence (if `loop_var_name` is Some) +2. Carrier PHI existence (all carriers in `LoopHeaderPhiInfo` have corresponding PHI instructions) +3. PHI dst matches `entry.phi_dst` in header block + +```rust +#[cfg(debug_assertions)] +fn verify_exit_line( + func: &MirFunction, + exit_block: BasicBlockId, + boundary: &JoinInlineBoundary, +) +``` + +**Checks**: +1. Exit block exists in function +2. Exit bindings have reasonable host_slot values (< 1000000 sanity check) + +```rust +#[cfg(debug_assertions)] +fn verify_exit_line_contract( + boundary: &JoinInlineBoundary, + carrier_phis: &BTreeMap, + variable_map: &HashMap, +) +``` + +**Checks** (Phase 190-impl-D-3, in `reconnector.rs`): +1. Every exit_binding has corresponding entry in carrier_phis +2. Every exit_binding's carrier exists in variable_map after reconnect + +### 2.2 Coverage Gaps + +| Contract | Current | Needed | +|----------|---------|--------| +| PHI exists | ✅ | - | +| PHI dst match | ✅ | - | +| **PHI dst overwrite** | ❌ | ✅ Detect if PHI dst is overwritten by later instructions | +| **PHI inputs defined** | ❌ | ✅ Verify all incoming values are defined before PHI | +| Exit block exists | ✅ | - | +| Exit bindings valid | ✅ | - | +| **ValueId regions** | ❌ | ✅ Verify Param/Local/PHI region separation | + +## 3. Enhancement Design + +### 3.1 Task 204-2: PHI dst Overwrite Detection + +#### Problem + +Header PHI creates a dst ValueId, but later instructions might accidentally overwrite it: + +```rust +// Header block +%10 = phi [%5, entry], [%20, latch] // PHI dst +... +%10 = binop %8 + %9 // ❌ Overwrites PHI dst! +``` + +This violates SSA invariant (single assignment) and causes undefined behavior. + +#### Detection Algorithm + +```rust +fn verify_no_phi_dst_overwrite( + func: &MirFunction, + header_block: BasicBlockId, + loop_info: &LoopHeaderPhiInfo, +) { + let block_data = &func.blocks[&header_block]; + + // 1. Collect all PHI dsts + let phi_dsts: HashSet = loop_info.carrier_phis.values() + .map(|entry| entry.phi_dst) + .collect(); + + if phi_dsts.is_empty() { + return; // No PHIs to verify + } + + // 2. Check instructions after PHI definitions + let mut after_phis = false; + for instr in &block_data.instructions { + match instr { + MirInstruction::Phi { dst, .. } => { + // PHI instructions come first + if !after_phis { + continue; + } + } + _ => { + after_phis = true; + // Check if this instruction writes to a PHI dst + if let Some(dst) = instr.dst() { + if phi_dsts.contains(&dst) { + panic!( + "[JoinIRVerifier] PHI dst {:?} is overwritten by {:?} in header block {}", + dst, instr, header_block + ); + } + } + } + } + } +} +``` + +#### Integration Point + +Add to `verify_joinir_contracts()`: + +```rust +#[cfg(debug_assertions)] +fn verify_joinir_contracts(...) { + verify_loop_header_phis(...); + verify_no_phi_dst_overwrite(func, header_block, loop_info); // ← NEW + verify_exit_line(...); +} +``` + +### 3.2 Task 204-3: PHI Inputs Defined Verification + +#### Problem + +PHI instructions reference incoming values from predecessor blocks. These values must be defined before the PHI: + +```rust +// Entry block +%5 = const 0 +jump header + +// Header block +%10 = phi [%5, entry], [%99, latch] // ❌ %99 undefined if latch not executed! +``` + +#### Detection Algorithm + +```rust +fn verify_phi_inputs_defined( + func: &MirFunction, + header_block: BasicBlockId, +) { + let block_data = &func.blocks[&header_block]; + + for instr in &block_data.instructions { + if let MirInstruction::Phi { inputs, .. } = instr { + for (value_id, pred_block) in inputs { + // Check if value_id is defined before this PHI + // Options: + // 1. Conservative: Check if value_id exists in any reachable block + // 2. Optimistic: Assume all pred_blocks are reachable (current approach) + // 3. Full DFA: Track defined values through CFG (future enhancement) + + // Phase 204: Conservative check - value_id should not be "obviously undefined" + // (e.g., ValueId > reasonable threshold, or in PHI reserved region but not a PHI dst) + + // For now, we rely on existing SSA construction to handle this. + // This is a placeholder for future enhancement. + } + } + } +} +``` + +**Note**: Full PHI input verification requires data-flow analysis. Phase 204 focuses on simpler checks first. This can be enhanced in Phase 205+ if needed. + +### 3.3 Task 204-4: JoinValueSpace Region Verification + +#### Problem + +Phase 201 introduced JoinValueSpace with region separation: +- PHI Reserved: 0-99 +- Param Region: 100-999 +- Local Region: 1000+ + +We need to verify these contracts are maintained during JoinIR lowering. + +#### Detection Algorithm + +```rust +fn verify_value_id_regions( + boundary: &JoinInlineBoundary, + loop_info: &LoopHeaderPhiInfo, + join_value_space: &JoinValueSpace, +) { + // 1. Verify Param region usage + for (host_id, join_id) in boundary.host_inputs.iter().zip(&boundary.join_inputs) { + let region = join_value_space.region_of(*join_id); + if region != Region::Param { + panic!( + "[JoinIRVerifier] Boundary input {:?} is in {:?} region, expected Param", + join_id, region + ); + } + } + + // 2. Verify PHI dst region (should be in PHI Reserved or Param) + for (carrier_name, entry) in &loop_info.carrier_phis { + let region = join_value_space.region_of(entry.phi_dst); + // PHI dst can be in Param region (assigned from frontend) or Local region + // This is OK since PHI dst comes from host side + // Just verify it's not in obviously wrong range + if entry.phi_dst.0 > 100000 { + panic!( + "[JoinIRVerifier] Carrier '{}' PHI dst {:?} has suspiciously large ID", + carrier_name, entry.phi_dst + ); + } + } + + // 3. Run JoinValueSpace's own verification + if let Err(e) = join_value_space.verify_no_overlap() { + panic!("[JoinIRVerifier] JoinValueSpace overlap detected: {}", e); + } +} +``` + +#### Integration Challenge + +`JoinValueSpace` is created in pattern lowerers (e.g., `pattern2_with_break.rs`), but verification runs in `merge/mod.rs`. We need to pass `JoinValueSpace` through the pipeline: + +**Option A**: Add `JoinValueSpace` to `JoinInlineBoundary` (metadata field) +**Option B**: Add `JoinValueSpace` to `LoopHeaderPhiInfo` (already has carrier info) +**Option C**: Create new `JoinIRVerificationContext` struct + +**Recommendation**: **Option B** (add to `LoopHeaderPhiInfo`) since it's already passed to verification functions. + +```rust +// In src/mir/join_ir/lowering/loop_header_phi_builder.rs +pub struct LoopHeaderPhiInfo { + pub carrier_phis: BTreeMap, + + // Phase 204: Add JoinValueSpace for verification + #[cfg(debug_assertions)] + pub join_value_space: Option, +} +``` + +## 4. Implementation Plan + +### Task 204-1: Design Document ✅ +- This document + +### Task 204-2: PHI dst Overwrite Detection +1. Implement `verify_no_phi_dst_overwrite()` in `merge/mod.rs` +2. Add to `verify_joinir_contracts()` +3. Unit test: Create test with PHI dst overwrite, verify panic + +### Task 204-3: PHI Inputs Defined Verification +1. Implement `verify_phi_inputs_defined()` stub in `merge/mod.rs` +2. Add conservative sanity checks (value_id < threshold) +3. Add TODO for future DFA-based verification +4. Unit test: Create test with undefined PHI input, verify panic (if detectable) + +### Task 204-4: JoinValueSpace Region Verification +1. Add `join_value_space: Option` to `LoopHeaderPhiInfo` (`#[cfg(debug_assertions)]`) +2. Update all pattern lowerers to pass JoinValueSpace to PHI builder +3. Implement `verify_value_id_regions()` in `merge/mod.rs` +4. Add to `verify_joinir_contracts()` +5. Unit test: Create test with region violation, verify panic + +### Task 204-5: Integration +1. Verify all patterns (P1/P2/P3/P4) call `verify_joinir_contracts()` +2. Run full test suite (821 tests) with debug assertions +3. Verify no false positives + +### Task 204-6: Unit Tests +1. Test `verify_no_phi_dst_overwrite()`: positive + negative cases +2. Test `verify_phi_inputs_defined()`: sanity checks +3. Test `verify_value_id_regions()`: Param/Local boundary violations + +### Task 204-7: Documentation +- Update this document with implementation results +- Update CURRENT_TASK.md +- Update joinir-architecture-overview.md (Section 1.9: ValueId Space Management) + +## 5. Success Criteria + +1. ✅ All debug assertions enabled in test suite (821 tests pass) +2. ✅ PHI dst overwrite detection catches manual errors +3. ✅ JoinValueSpace region violations caught early +4. ✅ No false positives (existing tests still pass) +5. ✅ Documentation updated + +## 6. Non-Goals (Phase 204) + +- **Full DFA-based verification**: Too complex for this phase, defer to Phase 205+ +- **Release build verification**: Debug-only for now (zero runtime cost) +- **Cross-function verification**: Focus on single-function contracts first + +## 7. References + +- Phase 200-3: Initial JoinIR verification infrastructure +- Phase 201: JoinValueSpace introduction +- Phase 190-impl-D-3: ExitLine contract verification +- joinir-architecture-overview.md: Section 1.9 (ValueId Space Management) diff --git a/src/mir/builder/control_flow/joinir/merge/exit_line/reconnector.rs b/src/mir/builder/control_flow/joinir/merge/exit_line/reconnector.rs index 4bda76af..f4325c52 100644 --- a/src/mir/builder/control_flow/joinir/merge/exit_line/reconnector.rs +++ b/src/mir/builder/control_flow/joinir/merge/exit_line/reconnector.rs @@ -171,7 +171,7 @@ impl ExitLineReconnector { fn verify_exit_line_contract( boundary: &JoinInlineBoundary, carrier_phis: &BTreeMap, - variable_map: &std::collections::HashMap, + variable_map: &BTreeMap, ) { for binding in &boundary.exit_bindings { // Contract 1: carrier_phis must contain this carrier diff --git a/src/mir/builder/control_flow/joinir/merge/mod.rs b/src/mir/builder/control_flow/joinir/merge/mod.rs index 1cfd13d1..35598bfc 100644 --- a/src/mir/builder/control_flow/joinir/merge/mod.rs +++ b/src/mir/builder/control_flow/joinir/merge/mod.rs @@ -589,7 +589,7 @@ pub(in crate::mir::builder) fn merge_joinir_mir_blocks( if let Some(ref func) = builder.current_function { verify_joinir_contracts( func, - entry_block_remapped, + entry_block, exit_block_id, &loop_header_phi_info, boundary, @@ -783,6 +783,112 @@ fn verify_exit_line( } } +/// Verify that PHI dst values are not overwritten by later instructions (Phase 204-2) +/// +/// # Checks +/// +/// 1. PHI instructions define dst values in header block +/// 2. No subsequent instruction in the same block overwrites these dsts +/// +/// # Rationale +/// +/// PHI dst overwrite violates SSA invariant (single assignment) and causes undefined behavior. +/// While Phase 201 JoinValueSpace prevents ValueId collisions, manual coding errors can still +/// occur during pattern lowering. +/// +/// # Panics +/// +/// Panics in debug mode if PHI dst is overwritten by a later instruction. +#[cfg(debug_assertions)] +fn verify_no_phi_dst_overwrite( + func: &crate::mir::MirFunction, + header_block: crate::mir::BasicBlockId, + loop_info: &LoopHeaderPhiInfo, +) { + if loop_info.carrier_phis.is_empty() { + return; // No PHIs to verify + } + + let header_block_data = func.blocks.get(&header_block).unwrap_or_else(|| { + panic!( + "[JoinIRVerifier] Header block {} not found ({} blocks in func)", + header_block, + func.blocks.len() + ) + }); + + // 1. Collect all PHI dsts + let phi_dsts: std::collections::HashSet = loop_info + .carrier_phis + .values() + .map(|entry| entry.phi_dst) + .collect(); + + // 2. Check instructions after PHI definitions + let mut after_phis = false; + for instr in &header_block_data.instructions { + match instr { + crate::mir::MirInstruction::Phi { dst, .. } => { + // PHI instructions come first in basic block + if after_phis { + panic!( + "[JoinIRVerifier] PHI instruction {:?} appears after non-PHI instructions in block {}", + dst, header_block.0 + ); + } + } + _ => { + after_phis = true; + // Check if this instruction writes to a PHI dst + if let Some(dst) = get_instruction_dst(instr) { + if phi_dsts.contains(&dst) { + panic!( + "[JoinIRVerifier/Phase204] PHI dst {:?} is overwritten by instruction in header block {}: {:?}", + dst, header_block.0, instr + ); + } + } + } + } + } +} + +/// Helper: Extract dst ValueId from MirInstruction (Phase 204-2) +#[cfg(debug_assertions)] +fn get_instruction_dst(instr: &crate::mir::MirInstruction) -> Option { + use crate::mir::MirInstruction; + match instr { + // Instructions with always-present dst + MirInstruction::Const { dst, .. } + | MirInstruction::Load { dst, .. } + | MirInstruction::UnaryOp { dst, .. } + | MirInstruction::BinOp { dst, .. } + | MirInstruction::Compare { dst, .. } + | MirInstruction::TypeOp { dst, .. } + | MirInstruction::NewBox { dst, .. } + | MirInstruction::NewClosure { dst, .. } + | MirInstruction::Copy { dst, .. } + | MirInstruction::Cast { dst, .. } + | MirInstruction::TypeCheck { dst, .. } + | MirInstruction::Phi { dst, .. } + | MirInstruction::ArrayGet { dst, .. } + | MirInstruction::RefNew { dst, .. } + | MirInstruction::RefGet { dst, .. } + | MirInstruction::WeakNew { dst, .. } + | MirInstruction::WeakLoad { dst, .. } + | MirInstruction::WeakRef { dst, .. } + | MirInstruction::FutureNew { dst, .. } + | MirInstruction::Await { dst, .. } => Some(*dst), + // Instructions with Option dst + MirInstruction::BoxCall { dst, .. } + | MirInstruction::ExternCall { dst, .. } + | MirInstruction::Call { dst, .. } + | MirInstruction::PluginInvoke { dst, .. } => *dst, + // Instructions without dst (side-effects only) + _ => None, + } +} + /// Verify all loop contracts for a merged JoinIR function /// /// This is the main entry point for verification. It runs all checks @@ -800,5 +906,6 @@ fn verify_joinir_contracts( boundary: &JoinInlineBoundary, ) { verify_loop_header_phis(func, header_block, loop_info, boundary); + verify_no_phi_dst_overwrite(func, header_block, loop_info); // Phase 204-2 verify_exit_line(func, exit_block, boundary); }