diff --git a/mir_25_instruction_mapping.md b/mir_25_instruction_mapping.md new file mode 100644 index 00000000..9c3f39e6 --- /dev/null +++ b/mir_25_instruction_mapping.md @@ -0,0 +1,85 @@ +# MIR 25-Instruction Mapping Plan + +## Current State: 32 Instructions → Target: 25 Instructions + +### Tier-0: Universal Core (8 instructions) +1. **Const** ✅ (already exists) +2. **BinOp** ✅ (already exists) +3. **Compare** ✅ (already exists) +4. **Branch** ✅ (already exists) +5. **Jump** ✅ (already exists) +6. **Phi** ✅ (already exists) +7. **Call** ✅ (already exists) +8. **Return** ✅ (already exists) + +### Tier-1: Nyash Semantics (12 instructions) +9. **NewBox** ✅ (already exists) +10. **BoxFieldLoad** ← RENAME from Load/RefGet +11. **BoxFieldStore** ← RENAME from Store/RefSet +12. **BoxCall** ✅ (already exists) +13. **Safepoint** ✅ (already exists as separate instruction) +14. **RefGet** → RENAME to RefGet ✅ +15. **RefSet** → RENAME to RefSet ✅ +16. **WeakNew** ✅ (already exists) +17. **WeakLoad** ✅ (already exists) +18. **WeakCheck** ← NEW (check weak reference validity) +19. **Send** ← NEW (Bus communication) +20. **Recv** ← NEW (Bus communication) + +### Tier-2: Implementation Assistance (5 instructions) +21. **TailCall** ← NEW (tail call optimization) +22. **Adopt** ← NEW (ownership transfer) +23. **Release** ← NEW (ownership release) +24. **MemCopy** ← NEW (optimized memory operations) +25. **AtomicFence** ← RENAME from BarrierRead/BarrierWrite + +## Instructions to Remove/Consolidate (7 instructions) +- **UnaryOp** → Merge into BinOp or eliminate +- **Load** → Consolidate into BoxFieldLoad +- **Store** → Consolidate into BoxFieldStore +- **ArrayGet** → Use BoxFieldLoad with array indexing +- **ArraySet** → Use BoxFieldStore with array indexing +- **Cast** → Eliminate or merge into BinOp +- **Copy** → Eliminate (optimization-specific) +- **Debug** → Remove from MIR (keep as separate system) +- **Print** → Use Call with print function +- **Throw** → Use Call with exception function +- **Catch** → Use Call with catch handler +- **RefNew** → Eliminate (use NewBox) +- **TypeCheck** → Use Compare with type introspection +- **BarrierRead/BarrierWrite** → Consolidate into AtomicFence +- **FutureNew/FutureSet/Await** → Use BoxCall with Future methods + +## Effect System Mapping + +### Current → New Effect Categories +- **Pure**: Const, BinOp, Compare, Phi, RefGet, WeakNew, WeakLoad, WeakCheck +- **Mut**: BoxFieldStore, RefSet, Adopt, Release, MemCopy +- **Io**: Send, Recv, Safepoint, AtomicFence +- **Control**: Branch, Jump, Return, TailCall +- **Context-dependent**: Call, BoxCall + +## Implementation Strategy + +### Phase 1: Core Instruction Consolidation +1. Rename Load → BoxFieldLoad +2. Rename Store → BoxFieldStore +3. Remove eliminated instructions +4. Add missing new instructions + +### Phase 2: Effect System Update +1. Update effect classification to 4 categories +2. Update all instruction effect mappings +3. Implement effect-based optimization rules + +### Phase 3: Backend Updates +1. Update Interpreter backend +2. Update VM backend +3. Update WASM backend +4. Ensure all support exactly 25 instructions + +### Phase 4: Verification System +1. Implement ownership forest verification +2. Add strong cycle detection +3. Add weak reference safety checks +4. Implement RefSet ownership validation \ No newline at end of file diff --git a/src/mir/effect.rs b/src/mir/effect.rs index efa43a0d..17f8462c 100644 --- a/src/mir/effect.rs +++ b/src/mir/effect.rs @@ -10,51 +10,65 @@ use std::fmt; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct EffectMask(u16); -/// Individual effect types +/// Individual effect types for the 4-category MIR hierarchy #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Effect { - /// Pure computation with no side effects + /// Pure computation with no side effects (Tier-0: reorderable, CSE/LICM eligible) Pure = 0x0001, - /// Reads from heap/memory (but doesn't modify) - ReadHeap = 0x0002, - /// Writes to heap/memory - WriteHeap = 0x0004, - /// Performs I/O operations (file, network, console) - IO = 0x0008, - /// P2P/network communication - P2P = 0x0010, - /// Foreign Function Interface calls - FFI = 0x0020, - /// May panic or throw exceptions - Panic = 0x0040, - /// Allocates memory - Alloc = 0x0080, - /// Accesses global state - Global = 0x0100, - /// Thread/async operations - Async = 0x0200, - /// Unsafe operations - Unsafe = 0x0400, - /// Debug/logging operations - Debug = 0x0800, - /// Memory barrier operations - Barrier = 0x1000, + /// Mutable operations (Tier-1: same Box/Field dependency preservation) + Mut = 0x0002, + /// I/O operations (Tier-1: no reordering, side effects present) + Io = 0x0004, + /// Control flow operations (Tier-0: affects execution flow) + Control = 0x0008, + + // Legacy effects for compatibility (will be mapped to above categories) + /// Reads from heap/memory (maps to Pure if read-only) + ReadHeap = 0x0010, + /// Writes to heap/memory (maps to Mut) + WriteHeap = 0x0020, + /// P2P/network communication (maps to Io) + P2P = 0x0040, + /// Foreign Function Interface calls (maps to Io) + FFI = 0x0080, + /// May panic or throw exceptions (maps to Io) + Panic = 0x0100, + /// Allocates memory (maps to Mut) + Alloc = 0x0200, + /// Accesses global state (maps to Io) + Global = 0x0400, + /// Thread/async operations (maps to Io) + Async = 0x0800, + /// Unsafe operations (maps to Io) + Unsafe = 0x1000, + /// Debug/logging operations (maps to Io) + Debug = 0x2000, + /// Memory barrier operations (maps to Io) + Barrier = 0x4000, } impl EffectMask { /// No effects - pure computation pub const PURE: Self = Self(Effect::Pure as u16); + /// Mutable operations (writes, ownership changes) + pub const MUT: Self = Self(Effect::Mut as u16); + + /// I/O operations (external effects, cannot reorder) + pub const IO: Self = Self(Effect::Io as u16); + + /// Control flow operations + pub const CONTROL: Self = Self(Effect::Control as u16); + + // Legacy constants for compatibility /// Memory read effects pub const READ: Self = Self(Effect::ReadHeap as u16); + pub const read: Self = Self::READ; // Lowercase alias for compatibility /// Memory write effects (includes read) pub const WRITE: Self = Self((Effect::WriteHeap as u16) | (Effect::ReadHeap as u16)); - /// I/O effects - pub const IO: Self = Self(Effect::IO as u16); - - /// P2P communication effects + /// P2P communication effects pub const P2P: Self = Self(Effect::P2P as u16); /// Panic/exception effects @@ -115,15 +129,50 @@ impl EffectMask { /// Check if the computation is pure (no side effects) pub fn is_pure(self) -> bool { - self.0 == 0 || self.0 == (Effect::Pure as u16) + self.contains(Effect::Pure) || self.0 == 0 + } + + /// Check if the computation is mutable (modifies state) + pub fn is_mut(self) -> bool { + self.contains(Effect::Mut) || + self.contains(Effect::WriteHeap) || + self.contains(Effect::Alloc) + } + + /// Check if the computation has I/O effects (external side effects) + pub fn is_io(self) -> bool { + self.contains(Effect::Io) || + self.contains(Effect::P2P) || + self.contains(Effect::FFI) || + self.contains(Effect::Global) || + self.contains(Effect::Async) || + self.contains(Effect::Unsafe) || + self.contains(Effect::Debug) || + self.contains(Effect::Barrier) || + self.contains(Effect::Panic) + } + + /// Check if the computation affects control flow + pub fn is_control(self) -> bool { + self.contains(Effect::Control) + } + + /// Get the primary effect category for MIR optimization + pub fn primary_category(self) -> Effect { + if self.is_control() { + Effect::Control + } else if self.is_io() { + Effect::Io + } else if self.is_mut() { + Effect::Mut + } else { + Effect::Pure + } } /// Check if the computation only reads (doesn't modify state) pub fn is_read_only(self) -> bool { - !self.contains(Effect::WriteHeap) && - !self.contains(Effect::IO) && - !self.contains(Effect::P2P) && - !self.contains(Effect::Global) + !self.is_mut() && !self.is_io() } /// Check if parallel execution is safe @@ -142,14 +191,15 @@ impl EffectMask { pub fn effect_names(self) -> Vec<&'static str> { let mut names = Vec::new(); - if self.is_pure() { - names.push("pure"); - return names; - } + // Primary categories + if self.contains(Effect::Pure) { names.push("pure"); } + if self.contains(Effect::Mut) { names.push("mut"); } + if self.contains(Effect::Io) { names.push("io"); } + if self.contains(Effect::Control) { names.push("control"); } + // Legacy effects for detailed tracking if self.contains(Effect::ReadHeap) { names.push("read"); } if self.contains(Effect::WriteHeap) { names.push("write"); } - if self.contains(Effect::IO) { names.push("io"); } if self.contains(Effect::P2P) { names.push("p2p"); } if self.contains(Effect::FFI) { names.push("ffi"); } if self.contains(Effect::Panic) { names.push("panic"); } @@ -160,6 +210,10 @@ impl EffectMask { if self.contains(Effect::Debug) { names.push("debug"); } if self.contains(Effect::Barrier) { names.push("barrier"); } + if names.is_empty() { + names.push("none"); + } + names } } @@ -240,8 +294,8 @@ mod tests { assert!(effects.contains(Effect::WriteHeap)); assert!(!effects.is_read_only()); - effects = effects.add(Effect::IO); - assert!(effects.contains(Effect::IO)); + effects = effects.add(Effect::Io); + assert!(effects.contains(Effect::Io)); assert!(!effects.is_parallel_safe()); } @@ -253,7 +307,7 @@ mod tests { let combined = read_effect | io_effect; assert!(combined.contains(Effect::ReadHeap)); - assert!(combined.contains(Effect::IO)); + assert!(combined.contains(Effect::Io)); assert!(!combined.is_pure()); assert!(!combined.is_parallel_safe()); } diff --git a/src/mir/instruction_v2.rs b/src/mir/instruction_v2.rs new file mode 100644 index 00000000..d384c607 --- /dev/null +++ b/src/mir/instruction_v2.rs @@ -0,0 +1,543 @@ +/*! + * MIR 25-Instruction Specification Implementation + * + * Complete hierarchical MIR instruction set based on ChatGPT5 + AI Council design + */ + +use super::{ValueId, EffectMask, Effect, BasicBlockId}; +use std::fmt; + +/// MIR instruction types - exactly 25 instructions per specification +#[derive(Debug, Clone, PartialEq)] +pub enum MirInstructionV2 { + // === TIER-0: UNIVERSAL CORE (8 instructions) === + + /// Load a constant value (pure) + /// `%dst = const value` + Const { + dst: ValueId, + value: ConstValue, + }, + + /// Binary arithmetic operation (pure) + /// `%dst = %lhs op %rhs` + BinOp { + dst: ValueId, + op: BinaryOp, + lhs: ValueId, + rhs: ValueId, + }, + + /// Compare two values (pure) + /// `%dst = %lhs cmp %rhs` + Compare { + dst: ValueId, + op: CompareOp, + lhs: ValueId, + rhs: ValueId, + }, + + /// Conditional branch (control) + /// `br %condition -> %then_bb, %else_bb` + Branch { + condition: ValueId, + then_bb: BasicBlockId, + else_bb: BasicBlockId, + }, + + /// Unconditional jump (control) + /// `jmp %target_bb` + Jump { + target: BasicBlockId, + }, + + /// SSA phi function for merging values (pure) + /// `%dst = phi [%val1 from %bb1, %val2 from %bb2, ...]` + Phi { + dst: ValueId, + inputs: Vec<(BasicBlockId, ValueId)>, + }, + + /// External function call (context-dependent) + /// `%dst = call %func(%args...)` + Call { + dst: Option, + func: ValueId, + args: Vec, + effects: EffectMask, + }, + + /// Return from function (control) + /// `ret %value` or `ret void` + Return { + value: Option, + }, + + // === TIER-1: NYASH SEMANTICS (12 instructions) === + + /// Create a new Box instance (strong ownership node in ownership forest) + /// `%dst = new_box "BoxType"(%args...)` + NewBox { + dst: ValueId, + box_type: String, + args: Vec, + }, + + /// Load Box field value (pure) + /// `%dst = %box.field` + BoxFieldLoad { + dst: ValueId, + box_val: ValueId, + field: String, + }, + + /// Store value to Box field (mut) + /// `%box.field = %value` + BoxFieldStore { + box_val: ValueId, + field: String, + value: ValueId, + }, + + /// Box method invocation (context-dependent) + /// `%dst = %box.method(%args...)` + BoxCall { + dst: Option, + box_val: ValueId, + method: String, + args: Vec, + effects: EffectMask, + }, + + /// Safepoint for finalization/interrupts (io) + /// `safepoint` + Safepoint, + + /// Get reference as value (pure) + /// `%dst = ref_get %reference` + RefGet { + dst: ValueId, + reference: ValueId, + }, + + /// Set/replace reference target with ownership validation (mut) + /// `ref_set %reference = %new_target` + RefSet { + reference: ValueId, + new_target: ValueId, + }, + + /// Create weak reference handle (non-owning link) (pure) + /// `%dst = weak_new %box` + WeakNew { + dst: ValueId, + box_val: ValueId, + }, + + /// Load from weak reference with liveness check (returns null if dead) (pure) + /// `%dst = weak_load %weak_ref` + WeakLoad { + dst: ValueId, + weak_ref: ValueId, + }, + + /// Check weak reference validity (returns bool) (pure) + /// `%dst = weak_check %weak_ref` + WeakCheck { + dst: ValueId, + weak_ref: ValueId, + }, + + /// Send message via Bus system (io) + /// `send %bus, %message` + Send { + bus: ValueId, + message: ValueId, + }, + + /// Receive message from Bus system (io) + /// `%dst = recv %bus` + Recv { + dst: ValueId, + bus: ValueId, + }, + + // === TIER-2: IMPLEMENTATION ASSISTANCE (5 instructions) === + + /// Tail call optimization (control) + /// `tail_call %func(%args...)` + TailCall { + func: ValueId, + args: Vec, + effects: EffectMask, + }, + + /// Ownership transfer: this takes strong ownership of child (mut) + /// `adopt %parent, %child` + Adopt { + parent: ValueId, + child: ValueId, + }, + + /// Release strong ownership (weakify or nullify) (mut) + /// `release %reference` + Release { + reference: ValueId, + }, + + /// Optimized memory copy for structs/arrays (mut) + /// `memcopy %dest, %src, %size` + MemCopy { + dest: ValueId, + src: ValueId, + size: ValueId, + }, + + /// Atomic fence for concurrency ordering at Actor/Port boundaries (io) + /// `atomic_fence %ordering` + AtomicFence { + ordering: AtomicOrdering, + }, +} + +/// Constant values in MIR +#[derive(Debug, Clone, PartialEq)] +pub enum ConstValue { + Integer(i64), + Float(f64), + Bool(bool), + String(String), + Null, + Void, +} + +/// Binary operations +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + // Arithmetic + Add, Sub, Mul, Div, Mod, + + // Bitwise + BitAnd, BitOr, BitXor, Shl, Shr, + + // Logical + And, Or, +} + +/// Comparison operations +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompareOp { + Eq, Ne, Lt, Le, Gt, Ge, +} + +/// Atomic ordering for AtomicFence instruction +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AtomicOrdering { + Relaxed, + Acquire, + Release, + AcqRel, + SeqCst, +} + +impl MirInstructionV2 { + /// Get the effect mask for this instruction according to 4-category system + pub fn effects(&self) -> EffectMask { + match self { + // TIER-0: Universal Core + // Pure operations + MirInstructionV2::Const { .. } | + MirInstructionV2::BinOp { .. } | + MirInstructionV2::Compare { .. } | + MirInstructionV2::Phi { .. } => EffectMask::PURE, + + // Control flow operations + MirInstructionV2::Branch { .. } | + MirInstructionV2::Jump { .. } | + MirInstructionV2::Return { .. } => EffectMask::CONTROL, + + // Context-dependent operations + MirInstructionV2::Call { effects, .. } => *effects, + + // TIER-1: Nyash Semantics + // Pure operations + MirInstructionV2::BoxFieldLoad { .. } | + MirInstructionV2::RefGet { .. } | + MirInstructionV2::WeakNew { .. } | + MirInstructionV2::WeakLoad { .. } | + MirInstructionV2::WeakCheck { .. } => EffectMask::PURE, + + // Mutable operations + MirInstructionV2::NewBox { .. } => EffectMask::MUT.add(Effect::Alloc), + MirInstructionV2::BoxFieldStore { .. } | + MirInstructionV2::RefSet { .. } => EffectMask::MUT, + + // I/O operations + MirInstructionV2::Safepoint | + MirInstructionV2::Send { .. } | + MirInstructionV2::Recv { .. } => EffectMask::IO, + + // Context-dependent operations + MirInstructionV2::BoxCall { effects, .. } => *effects, + + // TIER-2: Implementation Assistance + // Control flow operations + MirInstructionV2::TailCall { .. } => EffectMask::CONTROL, + + // Mutable operations + MirInstructionV2::Adopt { .. } | + MirInstructionV2::Release { .. } | + MirInstructionV2::MemCopy { .. } => EffectMask::MUT, + + // I/O operations + MirInstructionV2::AtomicFence { .. } => EffectMask::IO.add(Effect::Barrier), + } + } + + /// Get the destination ValueId if this instruction produces a value + pub fn dst_value(&self) -> Option { + match self { + MirInstructionV2::Const { dst, .. } | + MirInstructionV2::BinOp { dst, .. } | + MirInstructionV2::Compare { dst, .. } | + MirInstructionV2::Phi { dst, .. } | + MirInstructionV2::NewBox { dst, .. } | + MirInstructionV2::BoxFieldLoad { dst, .. } | + MirInstructionV2::RefGet { dst, .. } | + MirInstructionV2::WeakNew { dst, .. } | + MirInstructionV2::WeakLoad { dst, .. } | + MirInstructionV2::WeakCheck { dst, .. } | + MirInstructionV2::Recv { dst, .. } => Some(*dst), + + MirInstructionV2::Call { dst, .. } | + MirInstructionV2::BoxCall { dst, .. } => *dst, + + _ => None, + } + } + + /// Get all ValueIds used by this instruction + pub fn used_values(&self) -> Vec { + match self { + MirInstructionV2::Const { .. } => vec![], + + MirInstructionV2::BinOp { lhs, rhs, .. } | + MirInstructionV2::Compare { lhs, rhs, .. } => vec![*lhs, *rhs], + + MirInstructionV2::Branch { condition, .. } => vec![*condition], + + MirInstructionV2::Jump { .. } => vec![], + + MirInstructionV2::Phi { inputs, .. } => { + inputs.iter().map(|(_, value_id)| *value_id).collect() + }, + + MirInstructionV2::Call { func, args, .. } => { + let mut values = vec![*func]; + values.extend(args.iter().copied()); + values + }, + + MirInstructionV2::Return { value } => { + value.map(|v| vec![v]).unwrap_or_default() + }, + + MirInstructionV2::NewBox { args, .. } => args.clone(), + + MirInstructionV2::BoxFieldLoad { box_val, .. } => vec![*box_val], + + MirInstructionV2::BoxFieldStore { box_val, value, .. } => vec![*box_val, *value], + + MirInstructionV2::BoxCall { box_val, args, .. } => { + let mut values = vec![*box_val]; + values.extend(args.iter().copied()); + values + }, + + MirInstructionV2::Safepoint => vec![], + + MirInstructionV2::RefGet { reference, .. } => vec![*reference], + + MirInstructionV2::RefSet { reference, new_target, .. } => vec![*reference, *new_target], + + MirInstructionV2::WeakNew { box_val, .. } => vec![*box_val], + + MirInstructionV2::WeakLoad { weak_ref, .. } | + MirInstructionV2::WeakCheck { weak_ref, .. } => vec![*weak_ref], + + MirInstructionV2::Send { bus, message, .. } => vec![*bus, *message], + + MirInstructionV2::Recv { bus, .. } => vec![*bus], + + MirInstructionV2::TailCall { func, args, .. } => { + let mut values = vec![*func]; + values.extend(args.iter().copied()); + values + }, + + MirInstructionV2::Adopt { parent, child, .. } => vec![*parent, *child], + + MirInstructionV2::Release { reference, .. } => vec![*reference], + + MirInstructionV2::MemCopy { dest, src, size, .. } => vec![*dest, *src, *size], + + MirInstructionV2::AtomicFence { .. } => vec![], + } + } + + /// Get the instruction tier (0, 1, or 2) + pub fn tier(&self) -> u8 { + match self { + // Tier-0: Universal Core + MirInstructionV2::Const { .. } | + MirInstructionV2::BinOp { .. } | + MirInstructionV2::Compare { .. } | + MirInstructionV2::Branch { .. } | + MirInstructionV2::Jump { .. } | + MirInstructionV2::Phi { .. } | + MirInstructionV2::Call { .. } | + MirInstructionV2::Return { .. } => 0, + + // Tier-1: Nyash Semantics + MirInstructionV2::NewBox { .. } | + MirInstructionV2::BoxFieldLoad { .. } | + MirInstructionV2::BoxFieldStore { .. } | + MirInstructionV2::BoxCall { .. } | + MirInstructionV2::Safepoint { .. } | + MirInstructionV2::RefGet { .. } | + MirInstructionV2::RefSet { .. } | + MirInstructionV2::WeakNew { .. } | + MirInstructionV2::WeakLoad { .. } | + MirInstructionV2::WeakCheck { .. } | + MirInstructionV2::Send { .. } | + MirInstructionV2::Recv { .. } => 1, + + // Tier-2: Implementation Assistance + MirInstructionV2::TailCall { .. } | + MirInstructionV2::Adopt { .. } | + MirInstructionV2::Release { .. } | + MirInstructionV2::MemCopy { .. } | + MirInstructionV2::AtomicFence { .. } => 2, + } + } + + /// Get a human-readable description of the instruction + pub fn description(&self) -> &'static str { + match self { + // Tier-0 + MirInstructionV2::Const { .. } => "Load constant value", + MirInstructionV2::BinOp { .. } => "Binary arithmetic operation", + MirInstructionV2::Compare { .. } => "Compare two values", + MirInstructionV2::Branch { .. } => "Conditional branch", + MirInstructionV2::Jump { .. } => "Unconditional jump", + MirInstructionV2::Phi { .. } => "SSA phi function", + MirInstructionV2::Call { .. } => "External function call", + MirInstructionV2::Return { .. } => "Return from function", + + // Tier-1 + MirInstructionV2::NewBox { .. } => "Create Box instance", + MirInstructionV2::BoxFieldLoad { .. } => "Load Box field value", + MirInstructionV2::BoxFieldStore { .. } => "Store to Box field", + MirInstructionV2::BoxCall { .. } => "Box method invocation", + MirInstructionV2::Safepoint => "Finalization/interrupt safepoint", + MirInstructionV2::RefGet { .. } => "Get reference as value", + MirInstructionV2::RefSet { .. } => "Set reference target", + MirInstructionV2::WeakNew { .. } => "Create weak reference", + MirInstructionV2::WeakLoad { .. } => "Load from weak reference", + MirInstructionV2::WeakCheck { .. } => "Check weak reference validity", + MirInstructionV2::Send { .. } => "Send Bus message", + MirInstructionV2::Recv { .. } => "Receive Bus message", + + // Tier-2 + MirInstructionV2::TailCall { .. } => "Tail call optimization", + MirInstructionV2::Adopt { .. } => "Transfer ownership", + MirInstructionV2::Release { .. } => "Release ownership", + MirInstructionV2::MemCopy { .. } => "Optimized memory copy", + MirInstructionV2::AtomicFence { .. } => "Atomic memory fence", + } + } +} + +impl fmt::Display for MirInstructionV2 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.description()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mir::{ValueIdGenerator, BasicBlockIdGenerator}; + + #[test] + fn test_instruction_count() { + // Verify we have exactly 25 instruction variants + // This is a compile-time verification + let _tier0_count = 8; // Const, BinOp, Compare, Branch, Jump, Phi, Call, Return + let _tier1_count = 12; // NewBox, BoxFieldLoad/Store, BoxCall, Safepoint, RefGet/Set, WeakNew/Load/Check, Send, Recv + let _tier2_count = 5; // TailCall, Adopt, Release, MemCopy, AtomicFence + let _total = _tier0_count + _tier1_count + _tier2_count; + assert_eq!(_total, 25, "MIR instruction set must have exactly 25 instructions"); + } + + #[test] + fn test_effect_categories() { + let mut value_gen = ValueIdGenerator::new(); + let mut bb_gen = BasicBlockIdGenerator::new(); + + // Test pure operations + let const_inst = MirInstructionV2::Const { + dst: value_gen.next(), + value: ConstValue::Integer(42), + }; + assert!(const_inst.effects().is_pure(), "Const should be pure"); + assert_eq!(const_inst.tier(), 0, "Const should be Tier-0"); + + // Test mut operations + let store_inst = MirInstructionV2::BoxFieldStore { + box_val: value_gen.next(), + field: "value".to_string(), + value: value_gen.next(), + }; + assert!(store_inst.effects().is_mut(), "BoxFieldStore should be mut"); + assert_eq!(store_inst.tier(), 1, "BoxFieldStore should be Tier-1"); + + // Test io operations + let send_inst = MirInstructionV2::Send { + bus: value_gen.next(), + message: value_gen.next(), + }; + assert!(send_inst.effects().is_io(), "Send should be io"); + assert_eq!(send_inst.tier(), 1, "Send should be Tier-1"); + + // Test control operations + let branch_inst = MirInstructionV2::Branch { + condition: value_gen.next(), + then_bb: bb_gen.next(), + else_bb: bb_gen.next(), + }; + assert!(branch_inst.effects().is_control(), "Branch should be control"); + assert_eq!(branch_inst.tier(), 0, "Branch should be Tier-0"); + } + + #[test] + fn test_ownership_operations() { + let mut value_gen = ValueIdGenerator::new(); + + // Test ownership transfer + let adopt_inst = MirInstructionV2::Adopt { + parent: value_gen.next(), + child: value_gen.next(), + }; + assert!(adopt_inst.effects().is_mut(), "Adopt should be mut"); + assert_eq!(adopt_inst.tier(), 2, "Adopt should be Tier-2"); + + // Test weak reference operations + let weak_check = MirInstructionV2::WeakCheck { + dst: value_gen.next(), + weak_ref: value_gen.next(), + }; + assert!(weak_check.effects().is_pure(), "WeakCheck should be pure"); + assert_eq!(weak_check.tier(), 1, "WeakCheck should be Tier-1"); + } +} \ No newline at end of file diff --git a/src/mir/mod.rs b/src/mir/mod.rs index 78c6bff3..8e84462f 100644 --- a/src/mir/mod.rs +++ b/src/mir/mod.rs @@ -6,20 +6,24 @@ */ pub mod instruction; +pub mod instruction_v2; // New 25-instruction specification pub mod basic_block; pub mod function; pub mod builder; pub mod verification; +pub mod ownership_verifier; // Ownership forest verification pub mod printer; pub mod value_id; pub mod effect; // Re-export main types for easy access pub use instruction::{MirInstruction, BinaryOp, CompareOp, UnaryOp, ConstValue, MirType}; +pub use instruction_v2::{MirInstructionV2, AtomicOrdering}; // New 25-instruction set pub use basic_block::{BasicBlock, BasicBlockId, BasicBlockIdGenerator}; pub use function::{MirFunction, MirModule, FunctionSignature}; pub use builder::MirBuilder; pub use verification::{MirVerifier, VerificationError}; +pub use ownership_verifier::{OwnershipVerifier, OwnershipError, OwnershipStats}; // Ownership forest verification pub use printer::MirPrinter; pub use value_id::{ValueId, LocalId, ValueIdGenerator}; pub use effect::{EffectMask, Effect}; diff --git a/src/mir/ownership_verifier.rs b/src/mir/ownership_verifier.rs new file mode 100644 index 00000000..4a75b831 --- /dev/null +++ b/src/mir/ownership_verifier.rs @@ -0,0 +1,559 @@ +/*! + * Ownership Forest Verification System + * + * Implements ownership forest validation rules per ChatGPT5 specification: + * - Ownership forest: strong in-degree ≤ 1 + * - Strong cycle prohibition: strong edges form DAG (forest) + * - Weak/strong interaction: bidirectional strong → error + * - RefSet safety: strong→strong requires Release of old target + * - WeakLoad/WeakCheck deterministic behavior: null/false on expiration + */ + +use super::{MirInstructionV2, ValueId, MirFunction, MirModule}; +use std::collections::{HashMap, HashSet, VecDeque}; + +/// Ownership forest verification errors +#[derive(Debug, Clone, PartialEq)] +pub enum OwnershipError { + /// Strong reference has multiple owners (violates forest constraint) + MultipleStrongOwners { + target: ValueId, + owners: Vec, + }, + + /// Strong reference cycle detected (violates DAG constraint) + StrongCycle { + cycle: Vec, + }, + + /// Bidirectional strong references (should be strong + weak) + BidirectionalStrong { + first: ValueId, + second: ValueId, + }, + + /// RefSet without proper Release of old target + UnsafeRefSet { + reference: ValueId, + old_target: ValueId, + new_target: ValueId, + }, + + /// WeakLoad on expired reference (should return null deterministically) + WeakLoadExpired { + weak_ref: ValueId, + dead_target: ValueId, + }, + + /// Use after Release (accessing released ownership) + UseAfterRelease { + value: ValueId, + released_at: String, + }, + + /// Invalid ownership transfer via Adopt + InvalidAdopt { + parent: ValueId, + child: ValueId, + reason: String, + }, +} + +/// Ownership forest verifier +pub struct OwnershipVerifier { + /// Strong ownership edges: child -> parent + strong_edges: HashMap, + + /// Weak reference edges: weak_ref -> target + weak_edges: HashMap, + + /// Released references (no longer valid for ownership) + released: HashSet, + + /// Track live weak references for liveness checking + live_weak_refs: HashSet, + + /// Track dead targets for WeakLoad/WeakCheck determinism + dead_targets: HashSet, +} + +impl OwnershipVerifier { + /// Create a new ownership verifier + pub fn new() -> Self { + Self { + strong_edges: HashMap::new(), + weak_edges: HashMap::new(), + released: HashSet::new(), + live_weak_refs: HashSet::new(), + dead_targets: HashSet::new(), + } + } + + /// Verify ownership forest properties for an entire module + pub fn verify_module(&mut self, module: &MirModule) -> Result<(), Vec> { + let mut errors = Vec::new(); + + for function in module.functions.values() { + if let Err(mut function_errors) = self.verify_function(function) { + errors.append(&mut function_errors); + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } + + /// Verify ownership forest properties for a single function + pub fn verify_function(&mut self, function: &MirFunction) -> Result<(), Vec> { + let mut errors = Vec::new(); + + // Reset state for this function + self.strong_edges.clear(); + self.weak_edges.clear(); + self.released.clear(); + self.live_weak_refs.clear(); + self.dead_targets.clear(); + + // Process all instructions to build ownership graph + for block in function.blocks.values() { + for instruction in block.all_instructions() { + if let Err(mut inst_errors) = self.process_instruction(instruction) { + errors.append(&mut inst_errors); + } + } + } + + // Verify global ownership forest properties + if let Err(mut forest_errors) = self.verify_ownership_forest() { + errors.append(&mut forest_errors); + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } + + /// Process a single instruction and update ownership state + fn process_instruction(&mut self, instruction: &MirInstructionV2) -> Result<(), Vec> { + let mut errors = Vec::new(); + + match instruction { + // NewBox creates a new ownership root + MirInstructionV2::NewBox { dst, .. } => { + // New boxes are ownership roots (no parent) + // Clear any existing ownership for this value + self.strong_edges.remove(dst); + }, + + // RefSet changes ownership relationships + MirInstructionV2::RefSet { reference, new_target } => { + // Check if the reference currently has a strong target + if let Some(old_target) = self.strong_edges.get(reference) { + // Strong→Strong replacement requires explicit Release + if !self.released.contains(old_target) { + errors.push(OwnershipError::UnsafeRefSet { + reference: *reference, + old_target: *old_target, + new_target: *new_target, + }); + } + } + + // Set new strong ownership + self.strong_edges.insert(*reference, *new_target); + + // Verify no multiple strong owners after this change + if let Err(mut multiple_errors) = self.check_multiple_owners(*new_target) { + errors.append(&mut multiple_errors); + } + }, + + // Adopt transfers ownership + MirInstructionV2::Adopt { parent, child } => { + // Verify the adoption is valid + if self.released.contains(child) { + errors.push(OwnershipError::InvalidAdopt { + parent: *parent, + child: *child, + reason: "Cannot adopt released reference".to_string(), + }); + } + + // Check for cycle creation + if self.would_create_cycle(*parent, *child) { + errors.push(OwnershipError::InvalidAdopt { + parent: *parent, + child: *child, + reason: "Would create strong cycle".to_string(), + }); + } + + // Establish strong ownership + self.strong_edges.insert(*child, *parent); + }, + + // Release removes ownership + MirInstructionV2::Release { reference } => { + self.strong_edges.remove(reference); + self.released.insert(*reference); + + // Mark any targets of this reference as potentially dead + if let Some(target) = self.weak_edges.get(reference) { + self.dead_targets.insert(*target); + } + }, + + // WeakNew creates weak reference + MirInstructionV2::WeakNew { dst, box_val } => { + self.weak_edges.insert(*dst, *box_val); + self.live_weak_refs.insert(*dst); + }, + + // WeakLoad checks liveness + MirInstructionV2::WeakLoad { weak_ref, .. } => { + if let Some(target) = self.weak_edges.get(weak_ref) { + if self.dead_targets.contains(target) { + // This is actually expected behavior - WeakLoad should return null + // We track this for deterministic behavior verification + } + } + }, + + // WeakCheck verifies liveness + MirInstructionV2::WeakCheck { weak_ref, .. } => { + if let Some(target) = self.weak_edges.get(weak_ref) { + if self.dead_targets.contains(target) { + // This is expected - WeakCheck should return false + } + } + }, + + // Other instructions don't affect ownership + _ => {}, + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } + + /// Verify global ownership forest properties + fn verify_ownership_forest(&self) -> Result<(), Vec> { + let mut errors = Vec::new(); + + // Check for multiple strong owners (violates forest constraint) + let mut target_owners: HashMap> = HashMap::new(); + for (child, parent) in &self.strong_edges { + target_owners.entry(*parent).or_insert_with(Vec::new).push(*child); + } + + for (target, owners) in target_owners { + if owners.len() > 1 { + errors.push(OwnershipError::MultipleStrongOwners { target, owners }); + } + } + + // Check for strong cycles (violates DAG constraint) + if let Some(cycle) = self.find_strong_cycle() { + errors.push(OwnershipError::StrongCycle { cycle }); + } + + // Check for bidirectional strong edges + for (child, parent) in &self.strong_edges { + if let Some(grandparent) = self.strong_edges.get(parent) { + if grandparent == child { + errors.push(OwnershipError::BidirectionalStrong { + first: *child, + second: *parent, + }); + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } + + /// Check if a value has multiple strong owners + fn check_multiple_owners(&self, target: ValueId) -> Result<(), Vec> { + let owners: Vec = self.strong_edges + .iter() + .filter(|(_, &parent)| parent == target) + .map(|(&child, _)| child) + .collect(); + + if owners.len() > 1 { + Err(vec![OwnershipError::MultipleStrongOwners { target, owners }]) + } else { + Ok(()) + } + } + + /// Check if adding an edge would create a cycle + fn would_create_cycle(&self, parent: ValueId, child: ValueId) -> bool { + // DFS to see if parent is reachable from child through strong edges + let mut visited = HashSet::new(); + let mut stack = vec![child]; + + while let Some(current) = stack.pop() { + if current == parent { + return true; // Cycle detected + } + + if visited.insert(current) { + // Add all strong children of current to stack + for (&potential_child, &potential_parent) in &self.strong_edges { + if potential_parent == current { + stack.push(potential_child); + } + } + } + } + + false + } + + /// Find any strong cycle in the ownership graph + fn find_strong_cycle(&self) -> Option> { + let mut visited = HashSet::new(); + let mut rec_stack = HashSet::new(); + let mut path = Vec::new(); + + // Get all nodes in the graph + let mut all_nodes = HashSet::new(); + for (&child, &parent) in &self.strong_edges { + all_nodes.insert(child); + all_nodes.insert(parent); + } + + // DFS from each unvisited node + for &node in &all_nodes { + if !visited.contains(&node) { + if let Some(cycle) = self.dfs_cycle(node, &mut visited, &mut rec_stack, &mut path) { + return Some(cycle); + } + } + } + + None + } + + /// DFS cycle detection helper + fn dfs_cycle( + &self, + node: ValueId, + visited: &mut HashSet, + rec_stack: &mut HashSet, + path: &mut Vec, + ) -> Option> { + visited.insert(node); + rec_stack.insert(node); + path.push(node); + + // Visit all strong children + for (&child, &parent) in &self.strong_edges { + if parent == node { + if rec_stack.contains(&child) { + // Found cycle - return path from child to current + let cycle_start = path.iter().position(|&x| x == child).unwrap(); + return Some(path[cycle_start..].to_vec()); + } + + if !visited.contains(&child) { + if let Some(cycle) = self.dfs_cycle(child, visited, rec_stack, path) { + return Some(cycle); + } + } + } + } + + rec_stack.remove(&node); + path.pop(); + None + } + + /// Get ownership statistics for debugging + pub fn ownership_stats(&self) -> OwnershipStats { + OwnershipStats { + strong_edges: self.strong_edges.len(), + weak_edges: self.weak_edges.len(), + released_count: self.released.len(), + live_weak_refs: self.live_weak_refs.len(), + dead_targets: self.dead_targets.len(), + } + } +} + +/// Ownership statistics for debugging and analysis +#[derive(Debug, Clone, PartialEq)] +pub struct OwnershipStats { + pub strong_edges: usize, + pub weak_edges: usize, + pub released_count: usize, + pub live_weak_refs: usize, + pub dead_targets: usize, +} + +impl Default for OwnershipVerifier { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mir::{ValueIdGenerator, ConstValue}; + + #[test] + fn test_ownership_forest_basic() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + let parent = value_gen.next(); + let child = value_gen.next(); + + // Create ownership relationship + let adopt = MirInstructionV2::Adopt { parent, child }; + assert!(verifier.process_instruction(&adopt).is_ok()); + + // Verify forest properties + assert!(verifier.verify_ownership_forest().is_ok()); + + let stats = verifier.ownership_stats(); + assert_eq!(stats.strong_edges, 1); + } + + #[test] + fn test_multiple_owners_error() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + let parent1 = value_gen.next(); + let parent2 = value_gen.next(); + let child = value_gen.next(); + + // Create multiple ownership (invalid) + verifier.strong_edges.insert(child, parent1); + verifier.strong_edges.insert(child, parent2); // This overwrites, but we'll manually create conflict + + // Manually create the conflicting state for testing + verifier.strong_edges.clear(); + verifier.strong_edges.insert(parent1, child); // parent1 -> child + verifier.strong_edges.insert(parent2, child); // parent2 -> child (multiple owners of child) + + let result = verifier.verify_ownership_forest(); + assert!(result.is_err()); + + if let Err(errors) = result { + assert!(errors.iter().any(|e| matches!(e, OwnershipError::MultipleStrongOwners { .. }))); + } + } + + #[test] + fn test_strong_cycle_detection() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + let a = value_gen.next(); + let b = value_gen.next(); + let c = value_gen.next(); + + // Create cycle: a -> b -> c -> a + verifier.strong_edges.insert(b, a); + verifier.strong_edges.insert(c, b); + verifier.strong_edges.insert(a, c); + + let result = verifier.verify_ownership_forest(); + assert!(result.is_err()); + + if let Err(errors) = result { + assert!(errors.iter().any(|e| matches!(e, OwnershipError::StrongCycle { .. }))); + } + } + + #[test] + fn test_weak_reference_safety() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + let target = value_gen.next(); + let weak_ref = value_gen.next(); + + // Create weak reference + let weak_new = MirInstructionV2::WeakNew { + dst: weak_ref, + box_val: target, + }; + assert!(verifier.process_instruction(&weak_new).is_ok()); + + // Release the target + let release = MirInstructionV2::Release { + reference: target, + }; + assert!(verifier.process_instruction(&release).is_ok()); + + // WeakLoad should handle expired reference gracefully + let weak_load = MirInstructionV2::WeakLoad { + dst: value_gen.next(), + weak_ref, + }; + assert!(verifier.process_instruction(&weak_load).is_ok()); + + let stats = verifier.ownership_stats(); + assert_eq!(stats.weak_edges, 1); + assert_eq!(stats.dead_targets, 1); + } + + #[test] + fn test_unsafe_ref_set() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + let reference = value_gen.next(); + let old_target = value_gen.next(); + let new_target = value_gen.next(); + + // Set initial strong ownership + verifier.strong_edges.insert(reference, old_target); + + // Try to change without Release (should error) + let ref_set = MirInstructionV2::RefSet { reference, new_target }; + let result = verifier.process_instruction(&ref_set); + + assert!(result.is_err()); + if let Err(errors) = result { + assert!(errors.iter().any(|e| matches!(e, OwnershipError::UnsafeRefSet { .. }))); + } + } + + #[test] + fn test_safe_ref_set_with_release() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + let reference = value_gen.next(); + let old_target = value_gen.next(); + let new_target = value_gen.next(); + + // Set initial strong ownership + verifier.strong_edges.insert(reference, old_target); + + // Release old target first + verifier.released.insert(old_target); + + // Now RefSet should be safe + let ref_set = MirInstructionV2::RefSet { reference, new_target }; + assert!(verifier.process_instruction(&ref_set).is_ok()); + } +} \ No newline at end of file diff --git a/tests/mir_phase8_5_hierarchical_25_instructions.rs b/tests/mir_phase8_5_hierarchical_25_instructions.rs new file mode 100644 index 00000000..996871a7 --- /dev/null +++ b/tests/mir_phase8_5_hierarchical_25_instructions.rs @@ -0,0 +1,478 @@ +/*! + * Phase 8.5 MIR 25-Instruction Hierarchical Implementation Tests + * + * Comprehensive test suite for the ChatGPT5 + AI Council designed MIR system + */ + +use crate::mir::{ + MirInstructionV2, ConstValue, BinaryOp, CompareOp, AtomicOrdering, + EffectMask, Effect, ValueIdGenerator, BasicBlockIdGenerator, + OwnershipVerifier, OwnershipError, +}; + +/// Test that we have exactly 25 instructions in the specification +#[test] +fn test_mir_instruction_count() { + // This is verified at compile time by the instruction enum + // Each tier should have the correct count: + // Tier-0: 8 instructions + // Tier-1: 12 instructions + // Tier-2: 5 instructions + // Total: 25 instructions + + let mut value_gen = ValueIdGenerator::new(); + let mut bb_gen = BasicBlockIdGenerator::new(); + + // Tier-0: Universal Core (8 instructions) + let tier0_instructions = vec![ + MirInstructionV2::Const { dst: value_gen.next(), value: ConstValue::Integer(42) }, + MirInstructionV2::BinOp { dst: value_gen.next(), op: BinaryOp::Add, lhs: value_gen.next(), rhs: value_gen.next() }, + MirInstructionV2::Compare { dst: value_gen.next(), op: CompareOp::Eq, lhs: value_gen.next(), rhs: value_gen.next() }, + MirInstructionV2::Branch { condition: value_gen.next(), then_bb: bb_gen.next(), else_bb: bb_gen.next() }, + MirInstructionV2::Jump { target: bb_gen.next() }, + MirInstructionV2::Phi { dst: value_gen.next(), inputs: vec![(bb_gen.next(), value_gen.next())] }, + MirInstructionV2::Call { dst: Some(value_gen.next()), func: value_gen.next(), args: vec![], effects: EffectMask::PURE }, + MirInstructionV2::Return { value: Some(value_gen.next()) }, + ]; + + for inst in &tier0_instructions { + assert_eq!(inst.tier(), 0, "Tier-0 instruction should have tier 0"); + } + assert_eq!(tier0_instructions.len(), 8, "Tier-0 should have exactly 8 instructions"); + + // Tier-1: Nyash Semantics (12 instructions) + let tier1_instructions = vec![ + MirInstructionV2::NewBox { dst: value_gen.next(), box_type: "TestBox".to_string(), args: vec![] }, + MirInstructionV2::BoxFieldLoad { dst: value_gen.next(), box_val: value_gen.next(), field: "value".to_string() }, + MirInstructionV2::BoxFieldStore { box_val: value_gen.next(), field: "value".to_string(), value: value_gen.next() }, + MirInstructionV2::BoxCall { dst: Some(value_gen.next()), box_val: value_gen.next(), method: "test".to_string(), args: vec![], effects: EffectMask::PURE }, + MirInstructionV2::Safepoint, + MirInstructionV2::RefGet { dst: value_gen.next(), reference: value_gen.next() }, + MirInstructionV2::RefSet { reference: value_gen.next(), new_target: value_gen.next() }, + MirInstructionV2::WeakNew { dst: value_gen.next(), box_val: value_gen.next() }, + MirInstructionV2::WeakLoad { dst: value_gen.next(), weak_ref: value_gen.next() }, + MirInstructionV2::WeakCheck { dst: value_gen.next(), weak_ref: value_gen.next() }, + MirInstructionV2::Send { bus: value_gen.next(), message: value_gen.next() }, + MirInstructionV2::Recv { dst: value_gen.next(), bus: value_gen.next() }, + ]; + + for inst in &tier1_instructions { + assert_eq!(inst.tier(), 1, "Tier-1 instruction should have tier 1"); + } + assert_eq!(tier1_instructions.len(), 12, "Tier-1 should have exactly 12 instructions"); + + // Tier-2: Implementation Assistance (5 instructions) + let tier2_instructions = vec![ + MirInstructionV2::TailCall { func: value_gen.next(), args: vec![], effects: EffectMask::PURE }, + MirInstructionV2::Adopt { parent: value_gen.next(), child: value_gen.next() }, + MirInstructionV2::Release { reference: value_gen.next() }, + MirInstructionV2::MemCopy { dest: value_gen.next(), src: value_gen.next(), size: value_gen.next() }, + MirInstructionV2::AtomicFence { ordering: AtomicOrdering::SeqCst }, + ]; + + for inst in &tier2_instructions { + assert_eq!(inst.tier(), 2, "Tier-2 instruction should have tier 2"); + } + assert_eq!(tier2_instructions.len(), 5, "Tier-2 should have exactly 5 instructions"); + + // Total verification + let total_instructions = tier0_instructions.len() + tier1_instructions.len() + tier2_instructions.len(); + assert_eq!(total_instructions, 25, "Total instruction count must be exactly 25"); +} + +/// Test the 4-category effect system +#[test] +fn test_effect_categories() { + let mut value_gen = ValueIdGenerator::new(); + let mut bb_gen = BasicBlockIdGenerator::new(); + + // Test Pure effects + let pure_instructions = vec![ + MirInstructionV2::Const { dst: value_gen.next(), value: ConstValue::Integer(42) }, + MirInstructionV2::BinOp { dst: value_gen.next(), op: BinaryOp::Add, lhs: value_gen.next(), rhs: value_gen.next() }, + MirInstructionV2::Compare { dst: value_gen.next(), op: CompareOp::Eq, lhs: value_gen.next(), rhs: value_gen.next() }, + MirInstructionV2::Phi { dst: value_gen.next(), inputs: vec![(bb_gen.next(), value_gen.next())] }, + MirInstructionV2::BoxFieldLoad { dst: value_gen.next(), box_val: value_gen.next(), field: "value".to_string() }, + MirInstructionV2::RefGet { dst: value_gen.next(), reference: value_gen.next() }, + MirInstructionV2::WeakNew { dst: value_gen.next(), box_val: value_gen.next() }, + MirInstructionV2::WeakLoad { dst: value_gen.next(), weak_ref: value_gen.next() }, + MirInstructionV2::WeakCheck { dst: value_gen.next(), weak_ref: value_gen.next() }, + ]; + + for inst in pure_instructions { + let effects = inst.effects(); + assert!(effects.is_pure() || effects.primary_category() == Effect::Pure, + "Instruction should be pure: {:?}", inst); + } + + // Test Mut effects + let mut_instructions = vec![ + MirInstructionV2::BoxFieldStore { box_val: value_gen.next(), field: "value".to_string(), value: value_gen.next() }, + MirInstructionV2::RefSet { reference: value_gen.next(), new_target: value_gen.next() }, + MirInstructionV2::Adopt { parent: value_gen.next(), child: value_gen.next() }, + MirInstructionV2::Release { reference: value_gen.next() }, + MirInstructionV2::MemCopy { dest: value_gen.next(), src: value_gen.next(), size: value_gen.next() }, + ]; + + for inst in mut_instructions { + let effects = inst.effects(); + assert!(effects.is_mut() || effects.primary_category() == Effect::Mut, + "Instruction should be mut: {:?}", inst); + } + + // Test Io effects + let io_instructions = vec![ + MirInstructionV2::Safepoint, + MirInstructionV2::Send { bus: value_gen.next(), message: value_gen.next() }, + MirInstructionV2::Recv { dst: value_gen.next(), bus: value_gen.next() }, + MirInstructionV2::AtomicFence { ordering: AtomicOrdering::SeqCst }, + ]; + + for inst in io_instructions { + let effects = inst.effects(); + assert!(effects.is_io() || effects.primary_category() == Effect::Io, + "Instruction should be io: {:?}", inst); + } + + // Test Control effects + let control_instructions = vec![ + MirInstructionV2::Branch { condition: value_gen.next(), then_bb: bb_gen.next(), else_bb: bb_gen.next() }, + MirInstructionV2::Jump { target: bb_gen.next() }, + MirInstructionV2::Return { value: Some(value_gen.next()) }, + MirInstructionV2::TailCall { func: value_gen.next(), args: vec![], effects: EffectMask::PURE }, + ]; + + for inst in control_instructions { + let effects = inst.effects(); + assert!(effects.is_control() || effects.primary_category() == Effect::Control, + "Instruction should be control: {:?}", inst); + } +} + +/// Test optimization safety based on effect categories +#[test] +fn test_optimization_safety() { + let mut value_gen = ValueIdGenerator::new(); + + // Pure operations should be reorderable and eligible for CSE/LICM + let const_inst = MirInstructionV2::Const { dst: value_gen.next(), value: ConstValue::Integer(42) }; + let binop_inst = MirInstructionV2::BinOp { + dst: value_gen.next(), + op: BinaryOp::Add, + lhs: value_gen.next(), + rhs: value_gen.next() + }; + + assert!(const_inst.effects().is_pure(), "Const should be pure and reorderable"); + assert!(binop_inst.effects().is_pure(), "BinOp should be pure and reorderable"); + + // Mut operations should preserve same Box/Field dependencies + let store_inst = MirInstructionV2::BoxFieldStore { + box_val: value_gen.next(), + field: "value".to_string(), + value: value_gen.next() + }; + + assert!(store_inst.effects().is_mut(), "BoxFieldStore should be mut"); + assert!(!store_inst.effects().is_pure(), "Mut operations cannot be reordered freely"); + + // Io operations should not be reordered + let send_inst = MirInstructionV2::Send { + bus: value_gen.next(), + message: value_gen.next() + }; + + assert!(send_inst.effects().is_io(), "Send should be io"); + assert!(!send_inst.effects().is_read_only(), "Io operations have external effects"); +} + +/// Test ownership forest verification +#[test] +fn test_ownership_forest_verification() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + // Test basic ownership establishment + let parent = value_gen.next(); + let child = value_gen.next(); + + let adopt_inst = MirInstructionV2::Adopt { parent, child }; + assert!(verifier.process_instruction(&adopt_inst).is_ok(), "Basic adoption should succeed"); + + let stats = verifier.ownership_stats(); + assert_eq!(stats.strong_edges, 1, "Should have one strong edge"); + + // Test forest property verification + assert!(verifier.verify_ownership_forest().is_ok(), "Basic forest should be valid"); + + // Test weak reference creation + let weak_ref = value_gen.next(); + let weak_new_inst = MirInstructionV2::WeakNew { dst: weak_ref, box_val: child }; + assert!(verifier.process_instruction(&weak_new_inst).is_ok(), "Weak reference creation should succeed"); + + let stats_after_weak = verifier.ownership_stats(); + assert_eq!(stats_after_weak.weak_edges, 1, "Should have one weak edge"); + assert_eq!(stats_after_weak.live_weak_refs, 1, "Should have one live weak reference"); +} + +/// Test ownership forest violations +#[test] +fn test_ownership_violations() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + // Test unsafe RefSet (changing strong reference without Release) + let reference = value_gen.next(); + let old_target = value_gen.next(); + let new_target = value_gen.next(); + + // Manually set up initial state + verifier.strong_edges.insert(reference, old_target); + + // Try to change reference without releasing old target + let unsafe_ref_set = MirInstructionV2::RefSet { reference, new_target }; + let result = verifier.process_instruction(&unsafe_ref_set); + + assert!(result.is_err(), "Unsafe RefSet should be rejected"); + if let Err(errors) = result { + assert!(errors.iter().any(|e| matches!(e, OwnershipError::UnsafeRefSet { .. })), + "Should detect unsafe RefSet"); + } +} + +/// Test weak reference liveness tracking +#[test] +fn test_weak_reference_liveness() { + let mut verifier = OwnershipVerifier::new(); + let mut value_gen = ValueIdGenerator::new(); + + let target = value_gen.next(); + let weak_ref = value_gen.next(); + + // Create weak reference to target + let weak_new = MirInstructionV2::WeakNew { dst: weak_ref, box_val: target }; + assert!(verifier.process_instruction(&weak_new).is_ok()); + + // Release the target + let release = MirInstructionV2::Release { reference: target }; + assert!(verifier.process_instruction(&release).is_ok()); + + // Check that target is now considered dead + let stats = verifier.ownership_stats(); + assert_eq!(stats.dead_targets, 1, "Target should be marked as dead"); + + // WeakLoad should handle expired reference deterministically + let weak_load = MirInstructionV2::WeakLoad { dst: value_gen.next(), weak_ref }; + assert!(verifier.process_instruction(&weak_load).is_ok(), + "WeakLoad should handle expired reference gracefully"); + + // WeakCheck should also handle expired reference deterministically + let weak_check = MirInstructionV2::WeakCheck { dst: value_gen.next(), weak_ref }; + assert!(verifier.process_instruction(&weak_check).is_ok(), + "WeakCheck should handle expired reference gracefully"); +} + +/// Test Bus communication instructions +#[test] +fn test_bus_operations() { + let mut value_gen = ValueIdGenerator::new(); + + let bus = value_gen.next(); + let message = value_gen.next(); + + // Test Send instruction + let send_inst = MirInstructionV2::Send { bus, message }; + assert_eq!(send_inst.tier(), 1, "Send should be Tier-1"); + assert!(send_inst.effects().is_io(), "Send should have io effects"); + + let used_values = send_inst.used_values(); + assert_eq!(used_values.len(), 2, "Send should use bus and message"); + assert!(used_values.contains(&bus) && used_values.contains(&message)); + + // Test Recv instruction + let recv_inst = MirInstructionV2::Recv { dst: value_gen.next(), bus }; + assert_eq!(recv_inst.tier(), 1, "Recv should be Tier-1"); + assert!(recv_inst.effects().is_io(), "Recv should have io effects"); + + let recv_used = recv_inst.used_values(); + assert_eq!(recv_used.len(), 1, "Recv should use only bus"); + assert!(recv_used.contains(&bus)); +} + +/// Test implementation assistance instructions (Tier-2) +#[test] +fn test_implementation_assistance() { + let mut value_gen = ValueIdGenerator::new(); + + // Test TailCall + let tail_call = MirInstructionV2::TailCall { + func: value_gen.next(), + args: vec![value_gen.next()], + effects: EffectMask::PURE + }; + assert_eq!(tail_call.tier(), 2, "TailCall should be Tier-2"); + assert!(tail_call.effects().is_control(), "TailCall should be control flow"); + + // Test MemCopy + let mem_copy = MirInstructionV2::MemCopy { + dest: value_gen.next(), + src: value_gen.next(), + size: value_gen.next() + }; + assert_eq!(mem_copy.tier(), 2, "MemCopy should be Tier-2"); + assert!(mem_copy.effects().is_mut(), "MemCopy should be mut"); + + // Test AtomicFence + let atomic_fence = MirInstructionV2::AtomicFence { ordering: AtomicOrdering::AcqRel }; + assert_eq!(atomic_fence.tier(), 2, "AtomicFence should be Tier-2"); + assert!(atomic_fence.effects().is_io(), "AtomicFence should be io"); + assert!(atomic_fence.effects().contains(Effect::Barrier), "AtomicFence should have barrier effect"); +} + +/// Test instruction descriptions and display +#[test] +fn test_instruction_descriptions() { + let mut value_gen = ValueIdGenerator::new(); + + let const_inst = MirInstructionV2::Const { dst: value_gen.next(), value: ConstValue::Integer(42) }; + assert_eq!(const_inst.description(), "Load constant value"); + + let send_inst = MirInstructionV2::Send { bus: value_gen.next(), message: value_gen.next() }; + assert_eq!(send_inst.description(), "Send Bus message"); + + let adopt_inst = MirInstructionV2::Adopt { parent: value_gen.next(), child: value_gen.next() }; + assert_eq!(adopt_inst.description(), "Transfer ownership"); + + // Test Display trait + assert_eq!(format!("{}", const_inst), "Load constant value"); + assert_eq!(format!("{}", send_inst), "Send Bus message"); + assert_eq!(format!("{}", adopt_inst), "Transfer ownership"); +} + +/// Test value ID tracking for dependencies +#[test] +fn test_value_id_tracking() { + let mut value_gen = ValueIdGenerator::new(); + + let dst = value_gen.next(); + let lhs = value_gen.next(); + let rhs = value_gen.next(); + + let binop = MirInstructionV2::BinOp { dst, op: BinaryOp::Add, lhs, rhs }; + + // Test destination value + assert_eq!(binop.dst_value(), Some(dst), "BinOp should produce destination value"); + + // Test used values + let used = binop.used_values(); + assert_eq!(used.len(), 2, "BinOp should use two values"); + assert!(used.contains(&lhs) && used.contains(&rhs), "Should use lhs and rhs"); + + // Test instruction with no destination + let store = MirInstructionV2::BoxFieldStore { + box_val: value_gen.next(), + field: "value".to_string(), + value: value_gen.next() + }; + assert_eq!(store.dst_value(), None, "BoxFieldStore should not produce destination value"); +} + +/// Test the complete 25-instruction specification compliance +#[test] +fn test_complete_specification_compliance() { + // This test verifies that our implementation matches the exact specification + + // Verify we can create all 25 instruction types without compilation errors + let mut value_gen = ValueIdGenerator::new(); + let mut bb_gen = BasicBlockIdGenerator::new(); + + let all_instructions = vec![ + // Tier-0: Universal Core (8) + MirInstructionV2::Const { dst: value_gen.next(), value: ConstValue::Integer(42) }, + MirInstructionV2::BinOp { dst: value_gen.next(), op: BinaryOp::Add, lhs: value_gen.next(), rhs: value_gen.next() }, + MirInstructionV2::Compare { dst: value_gen.next(), op: CompareOp::Eq, lhs: value_gen.next(), rhs: value_gen.next() }, + MirInstructionV2::Branch { condition: value_gen.next(), then_bb: bb_gen.next(), else_bb: bb_gen.next() }, + MirInstructionV2::Jump { target: bb_gen.next() }, + MirInstructionV2::Phi { dst: value_gen.next(), inputs: vec![] }, + MirInstructionV2::Call { dst: Some(value_gen.next()), func: value_gen.next(), args: vec![], effects: EffectMask::PURE }, + MirInstructionV2::Return { value: Some(value_gen.next()) }, + + // Tier-1: Nyash Semantics (12) + MirInstructionV2::NewBox { dst: value_gen.next(), box_type: "TestBox".to_string(), args: vec![] }, + MirInstructionV2::BoxFieldLoad { dst: value_gen.next(), box_val: value_gen.next(), field: "field".to_string() }, + MirInstructionV2::BoxFieldStore { box_val: value_gen.next(), field: "field".to_string(), value: value_gen.next() }, + MirInstructionV2::BoxCall { dst: Some(value_gen.next()), box_val: value_gen.next(), method: "method".to_string(), args: vec![], effects: EffectMask::PURE }, + MirInstructionV2::Safepoint, + MirInstructionV2::RefGet { dst: value_gen.next(), reference: value_gen.next() }, + MirInstructionV2::RefSet { reference: value_gen.next(), new_target: value_gen.next() }, + MirInstructionV2::WeakNew { dst: value_gen.next(), box_val: value_gen.next() }, + MirInstructionV2::WeakLoad { dst: value_gen.next(), weak_ref: value_gen.next() }, + MirInstructionV2::WeakCheck { dst: value_gen.next(), weak_ref: value_gen.next() }, + MirInstructionV2::Send { bus: value_gen.next(), message: value_gen.next() }, + MirInstructionV2::Recv { dst: value_gen.next(), bus: value_gen.next() }, + + // Tier-2: Implementation Assistance (5) + MirInstructionV2::TailCall { func: value_gen.next(), args: vec![], effects: EffectMask::PURE }, + MirInstructionV2::Adopt { parent: value_gen.next(), child: value_gen.next() }, + MirInstructionV2::Release { reference: value_gen.next() }, + MirInstructionV2::MemCopy { dest: value_gen.next(), src: value_gen.next(), size: value_gen.next() }, + MirInstructionV2::AtomicFence { ordering: AtomicOrdering::SeqCst }, + ]; + + assert_eq!(all_instructions.len(), 25, "Must have exactly 25 instructions"); + + // Verify tier distribution + let tier0_count = all_instructions.iter().filter(|i| i.tier() == 0).count(); + let tier1_count = all_instructions.iter().filter(|i| i.tier() == 1).count(); + let tier2_count = all_instructions.iter().filter(|i| i.tier() == 2).count(); + + assert_eq!(tier0_count, 8, "Tier-0 should have 8 instructions"); + assert_eq!(tier1_count, 12, "Tier-1 should have 12 instructions"); + assert_eq!(tier2_count, 5, "Tier-2 should have 5 instructions"); + + // Verify each instruction has proper effect classification + for instruction in &all_instructions { + let effects = instruction.effects(); + let category = effects.primary_category(); + + // Ensure every instruction has a valid effect category + assert!( + matches!(category, Effect::Pure | Effect::Mut | Effect::Io | Effect::Control), + "Instruction must have valid effect category: {:?}", instruction + ); + } +} + +/// Performance test: Ensure effect calculations are fast +#[test] +fn test_effect_calculation_performance() { + use std::time::Instant; + + let mut value_gen = ValueIdGenerator::new(); + let mut bb_gen = BasicBlockIdGenerator::new(); + + // Create a large number of instructions + let mut instructions = Vec::new(); + for _ in 0..10000 { + instructions.push(MirInstructionV2::BinOp { + dst: value_gen.next(), + op: BinaryOp::Add, + lhs: value_gen.next(), + rhs: value_gen.next() + }); + } + + // Measure effect calculation time + let start = Instant::now(); + for instruction in &instructions { + let _ = instruction.effects(); + let _ = instruction.tier(); + let _ = instruction.dst_value(); + let _ = instruction.used_values(); + } + let elapsed = start.elapsed(); + + // Should be very fast (< 10ms for 10k instructions) + assert!(elapsed.as_millis() < 100, + "Effect calculations should be fast, took {:?}", elapsed); +} \ No newline at end of file