Phase 9.78h: Stabilize MIR26 pipeline

- Add lib test to sync Core Instruction names with docs (INSTRUCTION_SET.md) - Optimizer: Pass 0 normalizes legacy ops -> unified (TypeCheck/Cast->TypeOp, WeakNew/WeakLoad->WeakRef, BarrierRead/Write->Barrier) - Optimizer: diagnostics for legacy ops; forbid via NYASH_OPT_DIAG_FORBID_LEGACY=1 - Runner: treat diagnostics (unlowered/legacy) as fatal when enabled - Printer: unify legacy print style to TypeOp/WeakRef/Barrier - Verifier: treat Phi inputs correctly (skip merge/dominance violations for Phi) - Docs: update PHI_NORMALIZATION_PLAN; CURRENT_TASK: add risk note for op duplication - Misc: PHI debugging/logs kept stable; snapshots still green
2025-08-26 06:30:01 +09:00
parent ff53fc90b1
commit 391a095f4c
10 changed files with 537 additions and 24 deletions
--- a/src/mir/instruction_introspection.rs
+++ b/src/mir/instruction_introspection.rs
@ -33,3 +33,55 @@ pub fn core_instruction_names() -> &'static [&'static str] {
    ]
 }

+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use std::path::Path;
+    use std::collections::BTreeSet;
+
+    // Ensure docs/reference/mir/INSTRUCTION_SET.md and implementation list stay in perfect sync (26 items)
+    #[test]
+    fn mir26_doc_and_impl_are_in_sync() {
+        // 1) Read the canonical list from docs
+        let doc_path = Path::new("docs/reference/mir/INSTRUCTION_SET.md");
+        let content = fs::read_to_string(doc_path)
+            .expect("Failed to read docs/reference/mir/INSTRUCTION_SET.md");
+
+        let mut in_core = false;
+        let mut doc_names: Vec<String> = Vec::new();
+        for line in content.lines() {
+            let line = line.trim();
+            if line.starts_with("## Core Instructions") {
+                in_core = true;
+                continue;
+            }
+            if in_core && line.starts_with("## ") { // stop at next section (Meta)
+                break;
+            }
+            if in_core {
+                if let Some(rest) = line.strip_prefix("- ") {
+                    // Strip annotations like （...） or (...) and trailing spaces
+                    let name = rest
+                        .split(|c: char| c.is_whitespace() || c == '（' || c == '(')
+                        .next()
+                        .unwrap_or("")
+                        .trim();
+                    if !name.is_empty() {
+                        doc_names.push(name.to_string());
+                    }
+                }
+            }
+        }
+
+        // 2) Implementation list
+        let impl_names = core_instruction_names();
+        // Keep the source-of-truth synced: names and counts must match
+        assert_eq!(doc_names.len(), impl_names.len(), "Doc and impl must list the same number of core instructions");
+
+        // 3) Compare as sets (order agnostic)
+        let doc_set: BTreeSet<_> = doc_names.iter().map(|s| s.as_str()).collect();
+        let impl_set: BTreeSet<_> = impl_names.iter().copied().collect();
+        assert_eq!(doc_set, impl_set, "MIR core instruction names must match docs exactly");
+    }
+}
--- a/src/mir/mod.rs
+++ b/src/mir/mod.rs
@ -73,8 +73,10 @@ impl MirCompiler {
        if self.optimize {
            let mut optimizer = MirOptimizer::new();
            let stats = optimizer.optimize_module(&mut module);
-            if std::env::var("NYASH_OPT_DIAG_FAIL").is_ok() && stats.diagnostics_reported > 0 {
-                return Err(format!("Diagnostic failure: {} unlowered type-op calls detected", stats.diagnostics_reported));
+            if (std::env::var("NYASH_OPT_DIAG_FAIL").is_ok()
+                || std::env::var("NYASH_OPT_DIAG_FORBID_LEGACY").is_ok())
+                && stats.diagnostics_reported > 0 {
+                return Err(format!("Diagnostic failure: {} issues detected (unlowered/legacy)", stats.diagnostics_reported));
            }
        }
        
--- a/src/mir/optimizer.rs
+++ b/src/mir/optimizer.rs
@ -40,6 +40,9 @@ impl MirOptimizer {
            println!("🚀 Starting MIR optimization passes");
        }
        
+        // Pass 0: Normalize legacy instructions to unified forms (TypeOp/WeakRef/Barrier)
+        stats.merge(self.normalize_legacy_instructions(module));
+        
        // Pass 1: Dead code elimination
        stats.merge(self.eliminate_dead_code(module));
        
@ -61,8 +64,11 @@ impl MirOptimizer {
            println!("✅ Optimization complete: {}", stats);
        }
        // Diagnostics (informational): report unlowered patterns
-        let diag = self.diagnose_unlowered_type_ops(module);
-        stats.merge(diag);
+        let diag1 = self.diagnose_unlowered_type_ops(module);
+        stats.merge(diag1);
+        // Diagnostics (policy): detect legacy (pre-unified) instructions when requested
+        let diag2 = self.diagnose_legacy_instructions(module);
+        stats.merge(diag2);
        
        stats
    }
@ -289,6 +295,83 @@ impl MirOptimizer {
    }
 }

+impl MirOptimizer {
+    /// Normalize legacy instructions into unified MIR26 forms.
+    /// - TypeCheck/Cast → TypeOp(Check/Cast)
+    /// - WeakNew/WeakLoad → WeakRef(New/Load)
+    /// - BarrierRead/BarrierWrite → Barrier(Read/Write)
+    fn normalize_legacy_instructions(&mut self, module: &mut MirModule) -> OptimizationStats {
+        use super::{TypeOpKind, WeakRefOp, BarrierOp, MirInstruction as I, MirType};
+        let mut stats = OptimizationStats::new();
+        for (_fname, function) in &mut module.functions {
+            for (_bb, block) in &mut function.blocks {
+                // Rewrite in-place for normal instructions
+                for inst in &mut block.instructions {
+                    match inst {
+                        I::TypeCheck { dst, value, expected_type } => {
+                            let ty = MirType::Box(expected_type.clone());
+                            *inst = I::TypeOp { dst: *dst, op: TypeOpKind::Check, value: *value, ty };
+                            stats.reorderings += 0; // no-op; keep stats structure alive
+                        }
+                        I::Cast { dst, value, target_type } => {
+                            let ty = target_type.clone();
+                            *inst = I::TypeOp { dst: *dst, op: TypeOpKind::Cast, value: *value, ty };
+                        }
+                        I::WeakNew { dst, box_val } => {
+                            let val = *box_val;
+                            *inst = I::WeakRef { dst: *dst, op: WeakRefOp::New, value: val };
+                        }
+                        I::WeakLoad { dst, weak_ref } => {
+                            let val = *weak_ref;
+                            *inst = I::WeakRef { dst: *dst, op: WeakRefOp::Load, value: val };
+                        }
+                        I::BarrierRead { ptr } => {
+                            let val = *ptr;
+                            *inst = I::Barrier { op: BarrierOp::Read, ptr: val };
+                        }
+                        I::BarrierWrite { ptr } => {
+                            let val = *ptr;
+                            *inst = I::Barrier { op: BarrierOp::Write, ptr: val };
+                        }
+                        _ => {}
+                    }
+                }
+                // Rewrite terminator, if any
+                if let Some(term) = &mut block.terminator {
+                    match term {
+                        I::TypeCheck { dst, value, expected_type } => {
+                            let ty = MirType::Box(expected_type.clone());
+                            *term = I::TypeOp { dst: *dst, op: TypeOpKind::Check, value: *value, ty };
+                        }
+                        I::Cast { dst, value, target_type } => {
+                            let ty = target_type.clone();
+                            *term = I::TypeOp { dst: *dst, op: TypeOpKind::Cast, value: *value, ty };
+                        }
+                        I::WeakNew { dst, box_val } => {
+                            let val = *box_val;
+                            *term = I::WeakRef { dst: *dst, op: WeakRefOp::New, value: val };
+                        }
+                        I::WeakLoad { dst, weak_ref } => {
+                            let val = *weak_ref;
+                            *term = I::WeakRef { dst: *dst, op: WeakRefOp::Load, value: val };
+                        }
+                        I::BarrierRead { ptr } => {
+                            let val = *ptr;
+                            *term = I::Barrier { op: BarrierOp::Read, ptr: val };
+                        }
+                        I::BarrierWrite { ptr } => {
+                            let val = *ptr;
+                            *term = I::Barrier { op: BarrierOp::Write, ptr: val };
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+        stats
+    }
+}
+
 /// Map string type name to MIR type (optimizer-level helper)
 fn map_type_name(name: &str) -> MirType {
    match name {
@ -433,6 +516,53 @@ impl MirOptimizer {
        }
        stats
    }
+
+    /// Diagnostic: detect legacy instructions that should be unified into the canonical 26
+    /// Legacy set: TypeCheck/Cast/WeakNew/WeakLoad/BarrierRead/BarrierWrite
+    /// When NYASH_OPT_DIAG or NYASH_OPT_DIAG_FORBID_LEGACY is set, prints diagnostics.
+    fn diagnose_legacy_instructions(&mut self, module: &MirModule) -> OptimizationStats {
+        let mut stats = OptimizationStats::new();
+        let diag_on = self.debug
+            || std::env::var("NYASH_OPT_DIAG").is_ok()
+            || std::env::var("NYASH_OPT_DIAG_FORBID_LEGACY").is_ok();
+        for (fname, function) in &module.functions {
+            let mut count = 0usize;
+            for (_bb, block) in &function.blocks {
+                for inst in &block.instructions {
+                    match inst {
+                        MirInstruction::TypeCheck { .. }
+                        | MirInstruction::Cast { .. }
+                        | MirInstruction::WeakNew { .. }
+                        | MirInstruction::WeakLoad { .. }
+                        | MirInstruction::BarrierRead { .. }
+                        | MirInstruction::BarrierWrite { .. } => { count += 1; }
+                        _ => {}
+                    }
+                }
+                if let Some(term) = &block.terminator {
+                    match term {
+                        MirInstruction::TypeCheck { .. }
+                        | MirInstruction::Cast { .. }
+                        | MirInstruction::WeakNew { .. }
+                        | MirInstruction::WeakLoad { .. }
+                        | MirInstruction::BarrierRead { .. }
+                        | MirInstruction::BarrierWrite { .. } => { count += 1; }
+                        _ => {}
+                    }
+                }
+            }
+            if count > 0 {
+                stats.diagnostics_reported += count;
+                if diag_on {
+                    eprintln!(
+                        "[OPT][DIAG] Function '{}' has {} legacy MIR ops (TypeCheck/Cast/WeakNew/WeakLoad/BarrierRead/BarrierWrite): unify to TypeOp/WeakRef/Barrier",
+                        fname, count
+                    );
+                }
+            }
+        }
+        stats
+    }
 }

 #[cfg(test)]
--- a/src/mir/printer.rs
+++ b/src/mir/printer.rs
@ -357,8 +357,10 @@ impl MirPrinter {
                format!("{} = new {}({})", dst, box_type, args_str)
            },
            
+            // Legacy -> Unified print: TypeCheck as TypeOp(check)
            MirInstruction::TypeCheck { dst, value, expected_type } => {
-                format!("{} = type_check {} is {}", dst, value, expected_type)
+                // Print using unified TypeOp style to avoid naming divergence
+                format!("{} = typeop check {} {}", dst, value, expected_type)
            },
            
            MirInstruction::Cast { dst, value, target_type } => {
@ -424,20 +426,24 @@ impl MirPrinter {
                format!("ref_set {}.{} = {}", reference, field, value)
            },
            
+            // Legacy -> Unified print: WeakNew as weakref new
            MirInstruction::WeakNew { dst, box_val } => {
-                format!("{} = weak_new {}", dst, box_val)
+                format!("{} = weakref new {}", dst, box_val)
            },
            
+            // Legacy -> Unified print: WeakLoad as weakref load
            MirInstruction::WeakLoad { dst, weak_ref } => {
-                format!("{} = weak_load {}", dst, weak_ref)
+                format!("{} = weakref load {}", dst, weak_ref)
            },
            
+            // Legacy -> Unified print: BarrierRead as barrier read
            MirInstruction::BarrierRead { ptr } => {
-                format!("barrier_read {}", ptr)
+                format!("barrier read {}", ptr)
            },
            
+            // Legacy -> Unified print: BarrierWrite as barrier write
            MirInstruction::BarrierWrite { ptr } => {
-                format!("barrier_write {}", ptr)
+                format!("barrier write {}", ptr)
            },
            
            MirInstruction::WeakRef { dst, op, value } => {
--- a/src/mir/verification.rs
+++ b/src/mir/verification.rs
@ -148,7 +148,31 @@ impl MirVerifier {
        } else {
            if dlog::on("NYASH_DEBUG_VERIFIER") {
                eprintln!("[VERIFY] {} errors in function {}", local_errors.len(), function.signature.name);
-                for e in &local_errors { eprintln!("  • {:?}", e); }
+                for e in &local_errors {
+                    match e {
+                        VerificationError::MergeUsesPredecessorValue { value, merge_block, pred_block } => {
+                            eprintln!(
+                                "  • MergeUsesPredecessorValue: value=%{:?} merge_bb={:?} pred_bb={:?} -- hint: insert/use Phi in merge block for values from predecessors",
+                                value, merge_block, pred_block
+                            );
+                        }
+                        VerificationError::DominatorViolation { value, use_block, def_block } => {
+                            eprintln!(
+                                "  • DominatorViolation: value=%{:?} use_bb={:?} def_bb={:?} -- hint: ensure definition dominates use, or route via Phi",
+                                value, use_block, def_block
+                            );
+                        }
+                        VerificationError::InvalidPhi { phi_value, block, reason } => {
+                            eprintln!(
+                                "  • InvalidPhi: phi_dst=%{:?} in bb={:?} reason={} -- hint: check inputs cover all predecessors and placed at block start",
+                                phi_value, block, reason
+                            );
+                        }
+                        other => {
+                            eprintln!("  • {:?}", other);
+                        }
+                    }
+                }
            }
            Err(local_errors)
        }
@ -342,6 +366,8 @@ impl MirVerifier {

        for (use_block_id, block) in &function.blocks {
            for instruction in block.all_instructions() {
+                // Phi inputs are special: they are defined in predecessors; skip dominance check for them
+                if let super::MirInstruction::Phi { .. } = instruction { continue; }
                for used_value in instruction.used_values() {
                    if let Some(&def_bb) = def_block.get(&used_value) {
                        if def_bb != *use_block_id {
@ -418,6 +444,8 @@ impl MirVerifier {
            let doms_of_block = dominators.get(bid).unwrap();
            // check instructions including terminator
            for inst in block.all_instructions() {
+                // Skip Phi: its inputs are allowed to come from predecessors by SSA definition
+                if let super::MirInstruction::Phi { .. } = inst { continue; }
                for used in inst.used_values() {
                    if let Some(&db) = def_block.get(&used) {
                        // If def doesn't dominate merge block, it must be routed via phi