HashMap iteration order is non-deterministic due to HashDoS protection. This caused merge_joinir_mir_blocks to sometimes process functions in wrong order, leading to incorrect block remapping. Changed: - MirModule.functions: HashMap → BTreeMap - MirInterpreter.functions: HashMap → BTreeMap - IfLoweringDryRunner.scan_module: HashMap → BTreeMap parameter 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
636 lines
20 KiB
Rust
636 lines
20 KiB
Rust
/*!
|
||
* MIR Function and Module - High-level MIR organization
|
||
*
|
||
* Functions contain basic blocks and SSA values, modules contain functions
|
||
*/
|
||
|
||
use super::{BasicBlock, BasicBlockId, EffectMask, MirInstruction, MirType, ValueId};
|
||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||
use std::fmt;
|
||
|
||
/// Function signature for MIR functions
|
||
#[derive(Debug, Clone, PartialEq)]
|
||
pub struct FunctionSignature {
|
||
/// Function name
|
||
pub name: String,
|
||
|
||
/// Parameter types
|
||
pub params: Vec<MirType>,
|
||
|
||
/// Return type
|
||
pub return_type: MirType,
|
||
|
||
/// Overall effect mask for the function
|
||
pub effects: EffectMask,
|
||
}
|
||
|
||
/// A MIR function in SSA form
|
||
#[derive(Debug, Clone)]
|
||
pub struct MirFunction {
|
||
/// Function signature
|
||
pub signature: FunctionSignature,
|
||
|
||
/// Basic blocks indexed by ID
|
||
pub blocks: HashMap<BasicBlockId, BasicBlock>,
|
||
|
||
/// Entry basic block ID
|
||
pub entry_block: BasicBlockId,
|
||
|
||
/// Local variable declarations (before SSA conversion)
|
||
pub locals: Vec<MirType>,
|
||
|
||
/// Parameter value IDs
|
||
pub params: Vec<ValueId>,
|
||
|
||
/// Next available value ID
|
||
pub next_value_id: u32,
|
||
|
||
/// Function-level metadata
|
||
pub metadata: FunctionMetadata,
|
||
}
|
||
|
||
/// Metadata for MIR functions
|
||
#[derive(Debug, Clone, Default)]
|
||
pub struct FunctionMetadata {
|
||
/// Source file location
|
||
pub source_file: Option<String>,
|
||
|
||
/// Line number in source
|
||
pub line_number: Option<u32>,
|
||
|
||
/// Whether this function is an entry point
|
||
pub is_entry_point: bool,
|
||
|
||
/// Whether this function is pure (no side effects)
|
||
pub is_pure: bool,
|
||
|
||
/// Optimization hints
|
||
pub optimization_hints: Vec<String>,
|
||
|
||
/// Optional per-value type map (for builders that annotate ValueId types)
|
||
// Phase 25.1: HashMap → BTreeMap(決定性確保)
|
||
pub value_types: std::collections::BTreeMap<ValueId, MirType>,
|
||
}
|
||
|
||
impl MirFunction {
|
||
/// Create a new MIR function
|
||
pub fn new(signature: FunctionSignature, entry_block: BasicBlockId) -> Self {
|
||
let mut blocks = HashMap::new();
|
||
blocks.insert(entry_block, BasicBlock::new(entry_block));
|
||
|
||
// 📦 Hotfix 1 + 4 + 8: Reserve ValueIds for function parameters at creation time
|
||
// Parameter ValueIds are %0, %1, ..., %N-1 where N = signature.params.len()
|
||
//
|
||
// 📦 Hotfix 8: Instance method receiver detection (Fixed Hotfix 4 logic inversion)
|
||
// - Instance methods (me.method()) have implicit 'me' receiver (params[0]: Box type)
|
||
// - Static methods (StaticBox.method()) have NO receiver (params[0]: non-Box type)
|
||
//
|
||
// Detection: If function name contains "." (box method) AND
|
||
// params[0] IS a Box type → instance method → needs +1 for receiver
|
||
let param_count = signature.params.len() as u32;
|
||
let has_implicit_receiver = if signature.name.contains('.') {
|
||
// Box method (instance or static)
|
||
if signature.params.is_empty() {
|
||
// No params → static method with no receiver
|
||
// (e.g., StaticBox.method() with 0 params)
|
||
false
|
||
} else {
|
||
// Check if first param is Box type (instance method) or not (static method)
|
||
// ✅ FIXED: Instance method (Box type) → true, Static method (non-Box) → false
|
||
matches!(signature.params[0], MirType::Box(_))
|
||
}
|
||
} else {
|
||
// Global function → no receiver
|
||
false
|
||
};
|
||
|
||
let receiver_count = if has_implicit_receiver { 1 } else { 0 };
|
||
let total_value_ids = param_count + receiver_count;
|
||
let initial_counter = total_value_ids.max(1); // At least 1 to reserve ValueId(0) as sentinel
|
||
|
||
// 🔥 Hotfix 5: Pre-populate params vector with reserved ValueIds
|
||
// Without this, setup_function_params() will allocate NEW ValueIds starting from
|
||
// the already-incremented counter, causing signature/body mismatch.
|
||
let mut pre_params = Vec::new();
|
||
for i in 0..total_value_ids {
|
||
pre_params.push(ValueId::new(i));
|
||
}
|
||
|
||
if std::env::var("NYASH_LOOPFORM_DEBUG").is_ok() {
|
||
eprintln!(
|
||
"[MirFunction::new] fn='{}' params={}, receiver={}, total={}, initial_counter={}, pre_params={:?}",
|
||
signature.name,
|
||
param_count,
|
||
receiver_count,
|
||
total_value_ids,
|
||
initial_counter,
|
||
pre_params
|
||
);
|
||
}
|
||
|
||
Self {
|
||
signature,
|
||
blocks,
|
||
entry_block,
|
||
locals: Vec::new(),
|
||
params: pre_params, // ✅ Pre-populate instead of empty Vec
|
||
next_value_id: initial_counter,
|
||
metadata: FunctionMetadata::default(),
|
||
}
|
||
}
|
||
|
||
/// Get the next available ValueId
|
||
pub fn next_value_id(&mut self) -> ValueId {
|
||
let id = ValueId::new(self.next_value_id);
|
||
self.next_value_id += 1;
|
||
id
|
||
}
|
||
|
||
/// 📦 Reserve ValueIds for function parameters (Hotfix 1: Parameter ValueId Reservation)
|
||
///
|
||
/// Call this after setting params to ensure next_value_id doesn't overlap
|
||
/// with parameter ValueIds. This prevents SSA violations where local variables
|
||
/// overwrite parameter values.
|
||
///
|
||
/// # Box-First理論
|
||
/// - 「境界をはっきりさせる」: パラメータ予約を明示的に
|
||
/// - 「いつでも戻せる」: 呼び出しタイミングを制御可能
|
||
pub fn reserve_parameter_value_ids(&mut self) {
|
||
let param_count = self.signature.params.len();
|
||
if self.next_value_id < param_count as u32 {
|
||
if std::env::var("NYASH_LOOPFORM_DEBUG").is_ok() {
|
||
eprintln!("[MirFunction::reserve_parameter_value_ids] fn='{}' reserving {} params, adjusting counter {} -> {}",
|
||
self.signature.name, param_count, self.next_value_id, param_count);
|
||
}
|
||
self.next_value_id = param_count as u32;
|
||
}
|
||
}
|
||
|
||
/// Add a new basic block
|
||
pub fn add_block(&mut self, block: BasicBlock) -> BasicBlockId {
|
||
let id = block.id;
|
||
if self.blocks.contains_key(&id)
|
||
&& std::env::var("NYASH_LOCAL_SSA_TRACE").ok().as_deref() == Some("1")
|
||
{
|
||
eprintln!("[mir-function] replacing existing block {:?}", id);
|
||
}
|
||
self.blocks.insert(id, block);
|
||
id
|
||
}
|
||
|
||
/// Get a basic block by ID
|
||
pub fn get_block(&self, id: BasicBlockId) -> Option<&BasicBlock> {
|
||
self.blocks.get(&id)
|
||
}
|
||
|
||
/// Get a mutable basic block by ID
|
||
pub fn get_block_mut(&mut self, id: BasicBlockId) -> Option<&mut BasicBlock> {
|
||
self.blocks.get_mut(&id)
|
||
}
|
||
|
||
/// Get the entry block
|
||
pub fn entry_block(&self) -> &BasicBlock {
|
||
self.blocks
|
||
.get(&self.entry_block)
|
||
.expect("Entry block must exist")
|
||
}
|
||
|
||
/// Get all basic block IDs in insertion order
|
||
pub fn block_ids(&self) -> Vec<BasicBlockId> {
|
||
let mut ids: Vec<_> = self.blocks.keys().copied().collect();
|
||
ids.sort();
|
||
ids
|
||
}
|
||
|
||
/// Get all values defined in this function
|
||
pub fn defined_values(&self) -> Vec<ValueId> {
|
||
let mut values = Vec::new();
|
||
values.extend(&self.params);
|
||
|
||
for block in self.blocks.values() {
|
||
values.extend(block.defined_values());
|
||
}
|
||
|
||
values
|
||
}
|
||
|
||
/// Verify function integrity (basic checks)
|
||
pub fn verify(&self) -> Result<(), String> {
|
||
// Check entry block exists
|
||
if !self.blocks.contains_key(&self.entry_block) {
|
||
return Err("Entry block does not exist".to_string());
|
||
}
|
||
|
||
// Check all blocks are reachable from entry
|
||
let reachable = self.compute_reachable_blocks();
|
||
for (id, _block) in &self.blocks {
|
||
if !reachable.contains(id) {
|
||
eprintln!("Warning: Block {} is unreachable", id);
|
||
}
|
||
}
|
||
|
||
// Check terminator consistency
|
||
for block in self.blocks.values() {
|
||
if !block.is_terminated() && !block.is_empty() {
|
||
return Err(format!("Block {} is not properly terminated", block.id));
|
||
}
|
||
|
||
// Check successor/predecessor consistency
|
||
for successor_id in &block.successors {
|
||
if let Some(successor) = self.blocks.get(successor_id) {
|
||
if !successor.predecessors.contains(&block.id) {
|
||
return Err(format!(
|
||
"Inconsistent CFG: {} -> {} but {} doesn't have {} as predecessor",
|
||
block.id, successor_id, successor_id, block.id
|
||
));
|
||
}
|
||
} else {
|
||
return Err(format!(
|
||
"Block {} references non-existent successor {}",
|
||
block.id, successor_id
|
||
));
|
||
}
|
||
}
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
/// Compute reachable blocks from entry
|
||
fn compute_reachable_blocks(&self) -> std::collections::HashSet<BasicBlockId> {
|
||
let mut reachable = std::collections::HashSet::new();
|
||
let mut worklist = vec![self.entry_block];
|
||
|
||
while let Some(current) = worklist.pop() {
|
||
if reachable.insert(current) {
|
||
if let Some(block) = self.blocks.get(¤t) {
|
||
worklist.extend(block.successors.iter());
|
||
}
|
||
}
|
||
}
|
||
|
||
reachable
|
||
}
|
||
|
||
/// Update predecessor/successor relationships
|
||
pub fn update_cfg(&mut self) {
|
||
// Clear all predecessors
|
||
for block in self.blocks.values_mut() {
|
||
block.predecessors.clear();
|
||
}
|
||
|
||
// Rebuild predecessors from successors
|
||
let edges: Vec<(BasicBlockId, BasicBlockId)> = self
|
||
.blocks
|
||
.values()
|
||
.flat_map(|block| block.successors.iter().map(move |&succ| (block.id, succ)))
|
||
.collect();
|
||
|
||
for (pred, succ) in edges {
|
||
if let Some(successor_block) = self.blocks.get_mut(&succ) {
|
||
successor_block.add_predecessor(pred);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Mark reachable blocks
|
||
pub fn mark_reachable_blocks(&mut self) {
|
||
let reachable = self.compute_reachable_blocks();
|
||
for (id, block) in &mut self.blocks {
|
||
if reachable.contains(id) {
|
||
block.mark_reachable();
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Get function statistics
|
||
pub fn stats(&self) -> FunctionStats {
|
||
let instruction_count = self
|
||
.blocks
|
||
.values()
|
||
.map(|block| block.instructions.len() + if block.terminator.is_some() { 1 } else { 0 })
|
||
.sum();
|
||
|
||
let phi_count = self
|
||
.blocks
|
||
.values()
|
||
.map(|block| block.phi_instructions().count())
|
||
.sum();
|
||
|
||
FunctionStats {
|
||
block_count: self.blocks.len(),
|
||
instruction_count,
|
||
phi_count,
|
||
value_count: self.next_value_id as usize,
|
||
is_pure: self.signature.effects.is_pure(),
|
||
}
|
||
}
|
||
|
||
/// Set jump terminator if block is not already terminated
|
||
/// Helper for JSON v0 Bridge and loop lowering
|
||
pub fn set_jump_terminator(
|
||
&mut self,
|
||
bb_id: BasicBlockId,
|
||
target: BasicBlockId,
|
||
) -> Result<(), String> {
|
||
if let Some(bb) = self.get_block_mut(bb_id) {
|
||
if !bb.is_terminated() {
|
||
bb.set_terminator(MirInstruction::Jump { target });
|
||
}
|
||
Ok(())
|
||
} else {
|
||
Err(format!("Block {:?} not found", bb_id))
|
||
}
|
||
}
|
||
|
||
/// Set branch terminator
|
||
/// Helper for JSON v0 Bridge if/else lowering
|
||
pub fn set_branch_terminator(
|
||
&mut self,
|
||
bb_id: BasicBlockId,
|
||
condition: ValueId,
|
||
then_bb: BasicBlockId,
|
||
else_bb: BasicBlockId,
|
||
) -> Result<(), String> {
|
||
if let Some(bb) = self.get_block_mut(bb_id) {
|
||
bb.set_terminator(MirInstruction::Branch {
|
||
condition,
|
||
then_bb,
|
||
else_bb,
|
||
});
|
||
Ok(())
|
||
} else {
|
||
Err(format!("Block {:?} not found", bb_id))
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Function statistics for profiling and optimization
|
||
#[derive(Debug, Clone)]
|
||
pub struct FunctionStats {
|
||
pub block_count: usize,
|
||
pub instruction_count: usize,
|
||
pub phi_count: usize,
|
||
pub value_count: usize,
|
||
pub is_pure: bool,
|
||
}
|
||
|
||
/// A MIR module containing multiple functions
|
||
#[derive(Debug, Clone)]
|
||
pub struct MirModule {
|
||
/// Module name
|
||
pub name: String,
|
||
|
||
/// Functions in this module (BTreeMap for deterministic iteration order)
|
||
pub functions: BTreeMap<String, MirFunction>,
|
||
|
||
/// Global constants/statics
|
||
pub globals: HashMap<String, super::ConstValue>,
|
||
|
||
/// Module metadata
|
||
pub metadata: ModuleMetadata,
|
||
}
|
||
|
||
/// Metadata for MIR modules
|
||
#[derive(Debug, Clone, Default)]
|
||
pub struct ModuleMetadata {
|
||
/// Source file this module was compiled from
|
||
pub source_file: Option<String>,
|
||
|
||
/// Compilation timestamp
|
||
pub compiled_at: Option<String>,
|
||
|
||
/// Compiler version
|
||
pub compiler_version: Option<String>,
|
||
|
||
/// Optimization level used
|
||
pub optimization_level: u32,
|
||
|
||
/// Dev idempotence markers for passes (optional; default empty)
|
||
/// Key format suggestion: "pass_name:function_name"
|
||
pub dev_processed_markers: HashSet<String>,
|
||
}
|
||
|
||
impl MirModule {
|
||
/// Create a new MIR module
|
||
pub fn new(name: String) -> Self {
|
||
Self {
|
||
name,
|
||
functions: BTreeMap::new(),
|
||
globals: HashMap::new(),
|
||
metadata: ModuleMetadata::default(),
|
||
}
|
||
}
|
||
|
||
/// Add a function to the module
|
||
pub fn add_function(&mut self, function: MirFunction) {
|
||
let name = function.signature.name.clone();
|
||
self.functions.insert(name, function);
|
||
}
|
||
|
||
/// Get a function by name
|
||
pub fn get_function(&self, name: &str) -> Option<&MirFunction> {
|
||
self.functions.get(name)
|
||
}
|
||
|
||
/// Get a mutable function by name
|
||
pub fn get_function_mut(&mut self, name: &str) -> Option<&mut MirFunction> {
|
||
self.functions.get_mut(name)
|
||
}
|
||
|
||
/// Get all function names
|
||
pub fn function_names(&self) -> Vec<&String> {
|
||
self.functions.keys().collect()
|
||
}
|
||
|
||
/// Add a global constant
|
||
pub fn add_global(&mut self, name: String, value: super::ConstValue) {
|
||
self.globals.insert(name, value);
|
||
}
|
||
|
||
/// Verify entire module
|
||
pub fn verify(&self) -> Result<(), Vec<String>> {
|
||
let mut errors = Vec::new();
|
||
|
||
for (name, function) in &self.functions {
|
||
if let Err(e) = function.verify() {
|
||
errors.push(format!("Function '{}': {}", name, e));
|
||
}
|
||
}
|
||
|
||
if errors.is_empty() {
|
||
Ok(())
|
||
} else {
|
||
Err(errors)
|
||
}
|
||
}
|
||
|
||
/// Get module statistics
|
||
pub fn stats(&self) -> ModuleStats {
|
||
let function_stats: Vec<_> = self.functions.values().map(|f| f.stats()).collect();
|
||
|
||
ModuleStats {
|
||
function_count: self.functions.len(),
|
||
global_count: self.globals.len(),
|
||
total_blocks: function_stats.iter().map(|s| s.block_count).sum(),
|
||
total_instructions: function_stats.iter().map(|s| s.instruction_count).sum(),
|
||
total_values: function_stats.iter().map(|s| s.value_count).sum(),
|
||
pure_functions: function_stats.iter().filter(|s| s.is_pure).count(),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Module statistics
|
||
#[derive(Debug, Clone)]
|
||
pub struct ModuleStats {
|
||
pub function_count: usize,
|
||
pub global_count: usize,
|
||
pub total_blocks: usize,
|
||
pub total_instructions: usize,
|
||
pub total_values: usize,
|
||
pub pure_functions: usize,
|
||
}
|
||
|
||
impl fmt::Display for MirFunction {
|
||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||
writeln!(
|
||
f,
|
||
"function {}({}) -> {} {{",
|
||
self.signature.name,
|
||
self.signature
|
||
.params
|
||
.iter()
|
||
.enumerate()
|
||
.map(|(i, ty)| format!("%{}: {:?}", i, ty))
|
||
.collect::<Vec<_>>()
|
||
.join(", "),
|
||
format!("{:?}", self.signature.return_type)
|
||
)?;
|
||
|
||
// Show effects if not pure
|
||
if !self.signature.effects.is_pure() {
|
||
writeln!(f, " ; effects: {}", self.signature.effects)?;
|
||
}
|
||
|
||
// Show blocks in order
|
||
let mut block_ids: Vec<_> = self.blocks.keys().copied().collect();
|
||
block_ids.sort();
|
||
|
||
for block_id in block_ids {
|
||
if let Some(block) = self.blocks.get(&block_id) {
|
||
write!(f, "{}", block)?;
|
||
}
|
||
}
|
||
|
||
writeln!(f, "}}")?;
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
impl fmt::Display for MirModule {
|
||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||
writeln!(f, "module {} {{", self.name)?;
|
||
|
||
// Show globals
|
||
if !self.globals.is_empty() {
|
||
writeln!(f, " ; globals:")?;
|
||
for (name, value) in &self.globals {
|
||
writeln!(f, " global {} = {}", name, value)?;
|
||
}
|
||
writeln!(f)?;
|
||
}
|
||
|
||
// Show functions
|
||
for function in self.functions.values() {
|
||
writeln!(f, "{}", function)?;
|
||
}
|
||
|
||
writeln!(f, "}}")?;
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use crate::mir::{EffectMask, MirType};
|
||
|
||
#[test]
|
||
fn test_function_creation() {
|
||
let signature = FunctionSignature {
|
||
name: "test_func".to_string(),
|
||
params: vec![MirType::Integer, MirType::Float],
|
||
return_type: MirType::Integer,
|
||
effects: EffectMask::PURE,
|
||
};
|
||
|
||
let entry_block = BasicBlockId::new(0);
|
||
let function = MirFunction::new(signature.clone(), entry_block);
|
||
|
||
assert_eq!(function.signature.name, "test_func");
|
||
assert_eq!(function.entry_block, entry_block);
|
||
assert!(function.blocks.contains_key(&entry_block));
|
||
}
|
||
|
||
#[test]
|
||
fn test_module_creation() {
|
||
let mut module = MirModule::new("test_module".to_string());
|
||
|
||
let signature = FunctionSignature {
|
||
name: "main".to_string(),
|
||
params: vec![],
|
||
return_type: MirType::Void,
|
||
effects: EffectMask::PURE,
|
||
};
|
||
|
||
let function = MirFunction::new(signature, BasicBlockId::new(0));
|
||
module.add_function(function);
|
||
|
||
assert_eq!(module.name, "test_module");
|
||
assert!(module.get_function("main").is_some());
|
||
assert_eq!(module.function_names().len(), 1);
|
||
}
|
||
|
||
// Legacy ValueId 割り当て仕様(LoopForm v2 導入前の想定).
|
||
#[test]
|
||
#[ignore]
|
||
fn test_value_id_generation() {
|
||
let signature = FunctionSignature {
|
||
name: "test".to_string(),
|
||
params: vec![],
|
||
return_type: MirType::Void,
|
||
effects: EffectMask::PURE,
|
||
};
|
||
|
||
let mut function = MirFunction::new(signature, BasicBlockId::new(0));
|
||
|
||
let val1 = function.next_value_id();
|
||
let val2 = function.next_value_id();
|
||
let val3 = function.next_value_id();
|
||
|
||
assert_eq!(val1, ValueId::new(0));
|
||
assert_eq!(val2, ValueId::new(1));
|
||
assert_eq!(val3, ValueId::new(2));
|
||
}
|
||
|
||
// Legacy stats API の想定(現行の拡張とはズレるためアーカイブ扱い).
|
||
#[test]
|
||
#[ignore]
|
||
fn test_function_stats() {
|
||
let signature = FunctionSignature {
|
||
name: "test".to_string(),
|
||
params: vec![],
|
||
return_type: MirType::Void,
|
||
effects: EffectMask::PURE,
|
||
};
|
||
|
||
let function = MirFunction::new(signature, BasicBlockId::new(0));
|
||
let stats = function.stats();
|
||
|
||
assert_eq!(stats.block_count, 1);
|
||
assert_eq!(stats.instruction_count, 0);
|
||
assert_eq!(stats.value_count, 0);
|
||
assert!(stats.is_pure);
|
||
}
|
||
}
|