Phase 9.78h: Stabilize MIR26 pipeline
- Add lib test to sync Core Instruction names with docs (INSTRUCTION_SET.md) - Optimizer: Pass 0 normalizes legacy ops -> unified (TypeCheck/Cast->TypeOp, WeakNew/WeakLoad->WeakRef, BarrierRead/Write->Barrier) - Optimizer: diagnostics for legacy ops; forbid via NYASH_OPT_DIAG_FORBID_LEGACY=1 - Runner: treat diagnostics (unlowered/legacy) as fatal when enabled - Printer: unify legacy print style to TypeOp/WeakRef/Barrier - Verifier: treat Phi inputs correctly (skip merge/dominance violations for Phi) - Docs: update PHI_NORMALIZATION_PLAN; CURRENT_TASK: add risk note for op duplication - Misc: PHI debugging/logs kept stable; snapshots still green
This commit is contained in:
@ -33,3 +33,55 @@ pub fn core_instruction_names() -> &'static [&'static str] {
|
||||
]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
// Ensure docs/reference/mir/INSTRUCTION_SET.md and implementation list stay in perfect sync (26 items)
|
||||
#[test]
|
||||
fn mir26_doc_and_impl_are_in_sync() {
|
||||
// 1) Read the canonical list from docs
|
||||
let doc_path = Path::new("docs/reference/mir/INSTRUCTION_SET.md");
|
||||
let content = fs::read_to_string(doc_path)
|
||||
.expect("Failed to read docs/reference/mir/INSTRUCTION_SET.md");
|
||||
|
||||
let mut in_core = false;
|
||||
let mut doc_names: Vec<String> = Vec::new();
|
||||
for line in content.lines() {
|
||||
let line = line.trim();
|
||||
if line.starts_with("## Core Instructions") {
|
||||
in_core = true;
|
||||
continue;
|
||||
}
|
||||
if in_core && line.starts_with("## ") { // stop at next section (Meta)
|
||||
break;
|
||||
}
|
||||
if in_core {
|
||||
if let Some(rest) = line.strip_prefix("- ") {
|
||||
// Strip annotations like (...) or (...) and trailing spaces
|
||||
let name = rest
|
||||
.split(|c: char| c.is_whitespace() || c == '(' || c == '(')
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.trim();
|
||||
if !name.is_empty() {
|
||||
doc_names.push(name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Implementation list
|
||||
let impl_names = core_instruction_names();
|
||||
// Keep the source-of-truth synced: names and counts must match
|
||||
assert_eq!(doc_names.len(), impl_names.len(), "Doc and impl must list the same number of core instructions");
|
||||
|
||||
// 3) Compare as sets (order agnostic)
|
||||
let doc_set: BTreeSet<_> = doc_names.iter().map(|s| s.as_str()).collect();
|
||||
let impl_set: BTreeSet<_> = impl_names.iter().copied().collect();
|
||||
assert_eq!(doc_set, impl_set, "MIR core instruction names must match docs exactly");
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,8 +73,10 @@ impl MirCompiler {
|
||||
if self.optimize {
|
||||
let mut optimizer = MirOptimizer::new();
|
||||
let stats = optimizer.optimize_module(&mut module);
|
||||
if std::env::var("NYASH_OPT_DIAG_FAIL").is_ok() && stats.diagnostics_reported > 0 {
|
||||
return Err(format!("Diagnostic failure: {} unlowered type-op calls detected", stats.diagnostics_reported));
|
||||
if (std::env::var("NYASH_OPT_DIAG_FAIL").is_ok()
|
||||
|| std::env::var("NYASH_OPT_DIAG_FORBID_LEGACY").is_ok())
|
||||
&& stats.diagnostics_reported > 0 {
|
||||
return Err(format!("Diagnostic failure: {} issues detected (unlowered/legacy)", stats.diagnostics_reported));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -40,6 +40,9 @@ impl MirOptimizer {
|
||||
println!("🚀 Starting MIR optimization passes");
|
||||
}
|
||||
|
||||
// Pass 0: Normalize legacy instructions to unified forms (TypeOp/WeakRef/Barrier)
|
||||
stats.merge(self.normalize_legacy_instructions(module));
|
||||
|
||||
// Pass 1: Dead code elimination
|
||||
stats.merge(self.eliminate_dead_code(module));
|
||||
|
||||
@ -61,8 +64,11 @@ impl MirOptimizer {
|
||||
println!("✅ Optimization complete: {}", stats);
|
||||
}
|
||||
// Diagnostics (informational): report unlowered patterns
|
||||
let diag = self.diagnose_unlowered_type_ops(module);
|
||||
stats.merge(diag);
|
||||
let diag1 = self.diagnose_unlowered_type_ops(module);
|
||||
stats.merge(diag1);
|
||||
// Diagnostics (policy): detect legacy (pre-unified) instructions when requested
|
||||
let diag2 = self.diagnose_legacy_instructions(module);
|
||||
stats.merge(diag2);
|
||||
|
||||
stats
|
||||
}
|
||||
@ -289,6 +295,83 @@ impl MirOptimizer {
|
||||
}
|
||||
}
|
||||
|
||||
impl MirOptimizer {
|
||||
/// Normalize legacy instructions into unified MIR26 forms.
|
||||
/// - TypeCheck/Cast → TypeOp(Check/Cast)
|
||||
/// - WeakNew/WeakLoad → WeakRef(New/Load)
|
||||
/// - BarrierRead/BarrierWrite → Barrier(Read/Write)
|
||||
fn normalize_legacy_instructions(&mut self, module: &mut MirModule) -> OptimizationStats {
|
||||
use super::{TypeOpKind, WeakRefOp, BarrierOp, MirInstruction as I, MirType};
|
||||
let mut stats = OptimizationStats::new();
|
||||
for (_fname, function) in &mut module.functions {
|
||||
for (_bb, block) in &mut function.blocks {
|
||||
// Rewrite in-place for normal instructions
|
||||
for inst in &mut block.instructions {
|
||||
match inst {
|
||||
I::TypeCheck { dst, value, expected_type } => {
|
||||
let ty = MirType::Box(expected_type.clone());
|
||||
*inst = I::TypeOp { dst: *dst, op: TypeOpKind::Check, value: *value, ty };
|
||||
stats.reorderings += 0; // no-op; keep stats structure alive
|
||||
}
|
||||
I::Cast { dst, value, target_type } => {
|
||||
let ty = target_type.clone();
|
||||
*inst = I::TypeOp { dst: *dst, op: TypeOpKind::Cast, value: *value, ty };
|
||||
}
|
||||
I::WeakNew { dst, box_val } => {
|
||||
let val = *box_val;
|
||||
*inst = I::WeakRef { dst: *dst, op: WeakRefOp::New, value: val };
|
||||
}
|
||||
I::WeakLoad { dst, weak_ref } => {
|
||||
let val = *weak_ref;
|
||||
*inst = I::WeakRef { dst: *dst, op: WeakRefOp::Load, value: val };
|
||||
}
|
||||
I::BarrierRead { ptr } => {
|
||||
let val = *ptr;
|
||||
*inst = I::Barrier { op: BarrierOp::Read, ptr: val };
|
||||
}
|
||||
I::BarrierWrite { ptr } => {
|
||||
let val = *ptr;
|
||||
*inst = I::Barrier { op: BarrierOp::Write, ptr: val };
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// Rewrite terminator, if any
|
||||
if let Some(term) = &mut block.terminator {
|
||||
match term {
|
||||
I::TypeCheck { dst, value, expected_type } => {
|
||||
let ty = MirType::Box(expected_type.clone());
|
||||
*term = I::TypeOp { dst: *dst, op: TypeOpKind::Check, value: *value, ty };
|
||||
}
|
||||
I::Cast { dst, value, target_type } => {
|
||||
let ty = target_type.clone();
|
||||
*term = I::TypeOp { dst: *dst, op: TypeOpKind::Cast, value: *value, ty };
|
||||
}
|
||||
I::WeakNew { dst, box_val } => {
|
||||
let val = *box_val;
|
||||
*term = I::WeakRef { dst: *dst, op: WeakRefOp::New, value: val };
|
||||
}
|
||||
I::WeakLoad { dst, weak_ref } => {
|
||||
let val = *weak_ref;
|
||||
*term = I::WeakRef { dst: *dst, op: WeakRefOp::Load, value: val };
|
||||
}
|
||||
I::BarrierRead { ptr } => {
|
||||
let val = *ptr;
|
||||
*term = I::Barrier { op: BarrierOp::Read, ptr: val };
|
||||
}
|
||||
I::BarrierWrite { ptr } => {
|
||||
let val = *ptr;
|
||||
*term = I::Barrier { op: BarrierOp::Write, ptr: val };
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stats
|
||||
}
|
||||
}
|
||||
|
||||
/// Map string type name to MIR type (optimizer-level helper)
|
||||
fn map_type_name(name: &str) -> MirType {
|
||||
match name {
|
||||
@ -433,6 +516,53 @@ impl MirOptimizer {
|
||||
}
|
||||
stats
|
||||
}
|
||||
|
||||
/// Diagnostic: detect legacy instructions that should be unified into the canonical 26
|
||||
/// Legacy set: TypeCheck/Cast/WeakNew/WeakLoad/BarrierRead/BarrierWrite
|
||||
/// When NYASH_OPT_DIAG or NYASH_OPT_DIAG_FORBID_LEGACY is set, prints diagnostics.
|
||||
fn diagnose_legacy_instructions(&mut self, module: &MirModule) -> OptimizationStats {
|
||||
let mut stats = OptimizationStats::new();
|
||||
let diag_on = self.debug
|
||||
|| std::env::var("NYASH_OPT_DIAG").is_ok()
|
||||
|| std::env::var("NYASH_OPT_DIAG_FORBID_LEGACY").is_ok();
|
||||
for (fname, function) in &module.functions {
|
||||
let mut count = 0usize;
|
||||
for (_bb, block) in &function.blocks {
|
||||
for inst in &block.instructions {
|
||||
match inst {
|
||||
MirInstruction::TypeCheck { .. }
|
||||
| MirInstruction::Cast { .. }
|
||||
| MirInstruction::WeakNew { .. }
|
||||
| MirInstruction::WeakLoad { .. }
|
||||
| MirInstruction::BarrierRead { .. }
|
||||
| MirInstruction::BarrierWrite { .. } => { count += 1; }
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let Some(term) = &block.terminator {
|
||||
match term {
|
||||
MirInstruction::TypeCheck { .. }
|
||||
| MirInstruction::Cast { .. }
|
||||
| MirInstruction::WeakNew { .. }
|
||||
| MirInstruction::WeakLoad { .. }
|
||||
| MirInstruction::BarrierRead { .. }
|
||||
| MirInstruction::BarrierWrite { .. } => { count += 1; }
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
if count > 0 {
|
||||
stats.diagnostics_reported += count;
|
||||
if diag_on {
|
||||
eprintln!(
|
||||
"[OPT][DIAG] Function '{}' has {} legacy MIR ops (TypeCheck/Cast/WeakNew/WeakLoad/BarrierRead/BarrierWrite): unify to TypeOp/WeakRef/Barrier",
|
||||
fname, count
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
stats
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@ -357,8 +357,10 @@ impl MirPrinter {
|
||||
format!("{} = new {}({})", dst, box_type, args_str)
|
||||
},
|
||||
|
||||
// Legacy -> Unified print: TypeCheck as TypeOp(check)
|
||||
MirInstruction::TypeCheck { dst, value, expected_type } => {
|
||||
format!("{} = type_check {} is {}", dst, value, expected_type)
|
||||
// Print using unified TypeOp style to avoid naming divergence
|
||||
format!("{} = typeop check {} {}", dst, value, expected_type)
|
||||
},
|
||||
|
||||
MirInstruction::Cast { dst, value, target_type } => {
|
||||
@ -424,20 +426,24 @@ impl MirPrinter {
|
||||
format!("ref_set {}.{} = {}", reference, field, value)
|
||||
},
|
||||
|
||||
// Legacy -> Unified print: WeakNew as weakref new
|
||||
MirInstruction::WeakNew { dst, box_val } => {
|
||||
format!("{} = weak_new {}", dst, box_val)
|
||||
format!("{} = weakref new {}", dst, box_val)
|
||||
},
|
||||
|
||||
// Legacy -> Unified print: WeakLoad as weakref load
|
||||
MirInstruction::WeakLoad { dst, weak_ref } => {
|
||||
format!("{} = weak_load {}", dst, weak_ref)
|
||||
format!("{} = weakref load {}", dst, weak_ref)
|
||||
},
|
||||
|
||||
// Legacy -> Unified print: BarrierRead as barrier read
|
||||
MirInstruction::BarrierRead { ptr } => {
|
||||
format!("barrier_read {}", ptr)
|
||||
format!("barrier read {}", ptr)
|
||||
},
|
||||
|
||||
// Legacy -> Unified print: BarrierWrite as barrier write
|
||||
MirInstruction::BarrierWrite { ptr } => {
|
||||
format!("barrier_write {}", ptr)
|
||||
format!("barrier write {}", ptr)
|
||||
},
|
||||
|
||||
MirInstruction::WeakRef { dst, op, value } => {
|
||||
|
||||
@ -148,7 +148,31 @@ impl MirVerifier {
|
||||
} else {
|
||||
if dlog::on("NYASH_DEBUG_VERIFIER") {
|
||||
eprintln!("[VERIFY] {} errors in function {}", local_errors.len(), function.signature.name);
|
||||
for e in &local_errors { eprintln!(" • {:?}", e); }
|
||||
for e in &local_errors {
|
||||
match e {
|
||||
VerificationError::MergeUsesPredecessorValue { value, merge_block, pred_block } => {
|
||||
eprintln!(
|
||||
" • MergeUsesPredecessorValue: value=%{:?} merge_bb={:?} pred_bb={:?} -- hint: insert/use Phi in merge block for values from predecessors",
|
||||
value, merge_block, pred_block
|
||||
);
|
||||
}
|
||||
VerificationError::DominatorViolation { value, use_block, def_block } => {
|
||||
eprintln!(
|
||||
" • DominatorViolation: value=%{:?} use_bb={:?} def_bb={:?} -- hint: ensure definition dominates use, or route via Phi",
|
||||
value, use_block, def_block
|
||||
);
|
||||
}
|
||||
VerificationError::InvalidPhi { phi_value, block, reason } => {
|
||||
eprintln!(
|
||||
" • InvalidPhi: phi_dst=%{:?} in bb={:?} reason={} -- hint: check inputs cover all predecessors and placed at block start",
|
||||
phi_value, block, reason
|
||||
);
|
||||
}
|
||||
other => {
|
||||
eprintln!(" • {:?}", other);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(local_errors)
|
||||
}
|
||||
@ -342,6 +366,8 @@ impl MirVerifier {
|
||||
|
||||
for (use_block_id, block) in &function.blocks {
|
||||
for instruction in block.all_instructions() {
|
||||
// Phi inputs are special: they are defined in predecessors; skip dominance check for them
|
||||
if let super::MirInstruction::Phi { .. } = instruction { continue; }
|
||||
for used_value in instruction.used_values() {
|
||||
if let Some(&def_bb) = def_block.get(&used_value) {
|
||||
if def_bb != *use_block_id {
|
||||
@ -418,6 +444,8 @@ impl MirVerifier {
|
||||
let doms_of_block = dominators.get(bid).unwrap();
|
||||
// check instructions including terminator
|
||||
for inst in block.all_instructions() {
|
||||
// Skip Phi: its inputs are allowed to come from predecessors by SSA definition
|
||||
if let super::MirInstruction::Phi { .. } = inst { continue; }
|
||||
for used in inst.used_values() {
|
||||
if let Some(&db) = def_block.get(&used) {
|
||||
// If def doesn't dominate merge block, it must be routed via phi
|
||||
|
||||
Reference in New Issue
Block a user