feat(llvm-py): Major breakthrough in Python LLVM backend! 🎉

 Print and FileBox paths now working correctly
 Resolver simplified by removing overly aggressive fast-path optimization
 Both OFF/ON in compare_harness_on_off.sh now use Python version
 String handle propagation issues resolved

Key changes:
- Removed instruction reordering in llvm_builder.py (respecting MIR order)
- Resolver now more conservative but reliable
- compare_harness_on_off.sh updated to use Python backend for both paths

This marks a major milestone towards Phase 15 self-hosting with Python/llvmlite!

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Selfhosting Dev
2025-09-14 00:44:28 +09:00
parent 2a9aa5368d
commit 658a0d46da
37 changed files with 403 additions and 690 deletions

View File

@ -1,15 +0,0 @@
use super::LLVMCompiler;
use crate::box_trait::NyashBox;
use crate::mir::function::MirModule;
impl LLVMCompiler {
pub fn compile_and_execute(
&mut self,
mir_module: &MirModule,
temp_path: &str,
) -> Result<Box<dyn NyashBox>, String> {
let obj_path = format!("{}.o", temp_path);
self.compile_module(mir_module, &obj_path)?;
self.run_interpreter(mir_module)
}
}

View File

@ -1,214 +0,0 @@
use inkwell::builder::Builder;
use inkwell::context::Context;
use inkwell::types::BasicTypeEnum;
use inkwell::values::{BasicValueEnum, FloatValue, IntValue, PointerValue};
use inkwell::AddressSpace;
use crate::mir::MirType;
use crate::mir::CompareOp;
pub(crate) fn map_type<'ctx>(
ctx: &'ctx Context,
ty: &MirType,
) -> Result<BasicTypeEnum<'ctx>, String> {
Ok(match ty {
MirType::Integer => ctx.i64_type().into(),
MirType::Float => ctx.f64_type().into(),
MirType::Bool => ctx.bool_type().into(),
MirType::String => ctx
.ptr_type(inkwell::AddressSpace::from(0))
.into(),
MirType::Void => return Err("Void has no value type".to_string()),
MirType::Box(_) => ctx
.ptr_type(inkwell::AddressSpace::from(0))
.into(),
MirType::Array(_) | MirType::Future(_) | MirType::Unknown => ctx
.ptr_type(inkwell::AddressSpace::from(0))
.into(),
})
}
pub(crate) fn as_int<'ctx>(v: BasicValueEnum<'ctx>) -> Option<IntValue<'ctx>> {
if let BasicValueEnum::IntValue(iv) = v {
Some(iv)
} else {
None
}
}
pub(crate) fn as_float<'ctx>(v: BasicValueEnum<'ctx>) -> Option<FloatValue<'ctx>> {
if let BasicValueEnum::FloatValue(fv) = v {
Some(fv)
} else {
None
}
}
pub(crate) fn to_i64_any<'ctx>(
ctx: &'ctx Context,
builder: &Builder<'ctx>,
v: BasicValueEnum<'ctx>,
) -> Result<IntValue<'ctx>, String> {
let i64t = ctx.i64_type();
Ok(match v {
BasicValueEnum::IntValue(iv) => {
if iv.get_type().get_bit_width() == 64 {
iv
} else if iv.get_type().get_bit_width() < 64 {
builder
.build_int_z_extend(iv, i64t, "zext_i64")
.map_err(|e| e.to_string())?
} else {
builder
.build_int_truncate(iv, i64t, "trunc_i64")
.map_err(|e| e.to_string())?
}
}
BasicValueEnum::PointerValue(pv) => builder
.build_ptr_to_int(pv, i64t, "p2i64")
.map_err(|e| e.to_string())?,
BasicValueEnum::FloatValue(fv) => {
// Bitcast f64 -> i64 via stack slot
let slot = builder
.get_insert_block()
.and_then(|bb| bb.get_parent())
.and_then(|f| f.get_first_basic_block())
.map(|entry| {
let eb = ctx.create_builder();
eb.position_at_end(entry);
eb
})
.unwrap_or_else(|| ctx.create_builder());
let tmp = slot
.build_alloca(i64t, "f2i_tmp")
.map_err(|e| e.to_string())?;
let fptr_ty = ctx.ptr_type(AddressSpace::from(0));
let castp = builder
.build_pointer_cast(tmp, fptr_ty, "i64p_to_f64p")
.map_err(|e| e.to_string())?;
builder.build_store(castp, fv).map_err(|e| e.to_string())?;
builder
.build_load(i64t, tmp, "ld_f2i")
.map_err(|e| e.to_string())?
.into_int_value()
}
_ => return Err("unsupported value for i64 conversion".to_string()),
})
}
pub(crate) fn i64_to_ptr<'ctx>(
ctx: &'ctx Context,
builder: &Builder<'ctx>,
iv: IntValue<'ctx>,
) -> Result<PointerValue<'ctx>, String> {
let pty = ctx.ptr_type(AddressSpace::from(0));
builder
.build_int_to_ptr(iv, pty, "i64_to_ptr")
.map_err(|e| e.to_string())
}
pub(crate) fn classify_tag<'ctx>(v: BasicValueEnum<'ctx>) -> i64 {
match v {
BasicValueEnum::FloatValue(_) => 5, // float
BasicValueEnum::PointerValue(_) => 8, // handle/ptr
BasicValueEnum::IntValue(_) => 3, // integer/bool
_ => 0,
}
}
pub(crate) fn to_bool<'ctx>(
ctx: &'ctx Context,
b: BasicValueEnum<'ctx>,
builder: &Builder<'ctx>,
) -> Result<IntValue<'ctx>, String> {
if let Some(bb) = as_int(b) {
if bb.get_type().get_bit_width() == 1 {
Ok(bb)
} else {
Ok(builder
.build_int_compare(
inkwell::IntPredicate::NE,
bb,
bb.get_type().const_zero(),
"tobool",
)
.map_err(|e| e.to_string())?)
}
} else if let Some(fv) = as_float(b) {
let zero = fv.get_type().const_float(0.0);
Ok(builder
.build_float_compare(inkwell::FloatPredicate::ONE, fv, zero, "toboolf")
.map_err(|e| e.to_string())?)
} else if let BasicValueEnum::PointerValue(pv) = b {
let i64t = ctx.i64_type();
let p2i = builder
.build_ptr_to_int(pv, i64t, "p2i")
.map_err(|e| e.to_string())?;
Ok(builder
.build_int_compare(inkwell::IntPredicate::NE, p2i, i64t.const_zero(), "toboolp")
.map_err(|e| e.to_string())?)
} else {
Err("Unsupported value for boolean conversion".to_string())
}
}
pub(crate) fn cmp_eq_ne_any<'ctx>(
ctx: &'ctx Context,
builder: &Builder<'ctx>,
op: &CompareOp,
lv: BasicValueEnum<'ctx>,
rv: BasicValueEnum<'ctx>,
) -> Result<BasicValueEnum<'ctx>, String> {
use crate::mir::CompareOp as C;
match (lv, rv) {
(BasicValueEnum::IntValue(li), BasicValueEnum::IntValue(ri)) => {
let pred = if matches!(op, C::Eq) {
inkwell::IntPredicate::EQ
} else {
inkwell::IntPredicate::NE
};
Ok(builder
.build_int_compare(pred, li, ri, "icmp")
.map_err(|e| e.to_string())?
.into())
}
(BasicValueEnum::FloatValue(lf), BasicValueEnum::FloatValue(rf)) => {
let pred = if matches!(op, C::Eq) {
inkwell::FloatPredicate::OEQ
} else {
inkwell::FloatPredicate::ONE
};
Ok(builder
.build_float_compare(pred, lf, rf, "fcmp")
.map_err(|e| e.to_string())?
.into())
}
(BasicValueEnum::PointerValue(_), _) | (_, BasicValueEnum::PointerValue(_)) => {
let li = to_i64_any(ctx, builder, lv)?;
let ri = to_i64_any(ctx, builder, rv)?;
let pred = if matches!(op, C::Eq) {
inkwell::IntPredicate::EQ
} else {
inkwell::IntPredicate::NE
};
Ok(builder
.build_int_compare(pred, li, ri, "pcmp_any")
.map_err(|e| e.to_string())?
.into())
}
_ => Err("compare type mismatch".to_string()),
}
}
pub(crate) fn map_mirtype_to_basic<'ctx>(ctx: &'ctx Context, ty: &MirType) -> BasicTypeEnum<'ctx> {
match ty {
MirType::Integer => ctx.i64_type().into(),
MirType::Float => ctx.f64_type().into(),
MirType::Bool => ctx.bool_type().into(),
MirType::String => ctx.ptr_type(AddressSpace::from(0)).into(),
MirType::Box(_) | MirType::Array(_) | MirType::Future(_) | MirType::Unknown => {
ctx.ptr_type(AddressSpace::from(0)).into()
}
MirType::Void => ctx.i64_type().into(),
}
}

View File

@ -1,8 +0,0 @@
use super::LLVMCompiler;
use crate::box_trait::{BoolBox, IntegerBox, NyashBox, StringBox};
use crate::boxes::{function_box::FunctionBox, math_box::FloatBox, null_box::NullBox};
use crate::mir::function::MirModule;
use crate::mir::instruction::{BinaryOp, ConstValue, MirInstruction};
use std::collections::HashMap;
include!("mock_impl.in.rs");

View File

@ -1,251 +0,0 @@
impl LLVMCompiler {
pub fn new() -> Result<Self, String> {
Ok(Self {
values: HashMap::new(),
})
}
pub fn compile_module(&self, mir_module: &MirModule, output_path: &str) -> Result<(), String> {
// Mock implementation - in a real scenario this would:
// 1. Create LLVM context and module
// 2. Convert MIR instructions to LLVM IR
// 3. Generate object file
println!("🔧 Mock LLVM Compilation:");
println!(" Module: {}", mir_module.name);
println!(" Functions: {}", mir_module.functions.len());
println!(" Output: {}", output_path);
// Find entry function (prefer is_entry_point, then Main.main, then main, else first)
let main_func = if let Some((_n, f)) = mir_module
.functions
.iter()
.find(|(_n, f)| f.metadata.is_entry_point)
{
f
} else if let Some(f) = mir_module.functions.get("Main.main") {
f
} else if let Some(f) = mir_module.functions.get("main") {
f
} else if let Some((_n, f)) = mir_module.functions.iter().next() {
f
} else {
return Err("Main.main function not found");
};
println!(
" Main function found with {} blocks",
main_func.blocks.len()
);
// Simulate object file generation
std::fs::write(output_path, b"Mock object file")?;
println!(" ✅ Mock object file created");
Ok(())
}
pub fn compile_and_execute(
&mut self,
mir_module: &MirModule,
temp_path: &str,
) -> Result<Box<dyn NyashBox>, String> {
// Mock implementation - interprets MIR instructions to simulate execution
eprintln!("⚠️⚠️⚠️ WARNING: Using MOCK LLVM Implementation! ⚠️⚠️⚠️");
eprintln!("⚠️ This is NOT real LLVM execution!");
eprintln!("⚠️ Build with --features llvm for real compilation!");
println!("🚀 Mock LLVM Compile & Execute (MIR Interpreter Mode):");
// 1. Mock object file generation
let obj_path = format!("{}.o", temp_path);
self.compile_module(mir_module, &obj_path)?;
// 2. Find and execute main function
let main_func = mir_module
.functions
.get("Main.main")
.ok_or("Main.main function not found")?;
println!(" ⚡ Interpreting MIR instructions...");
// 3. Execute MIR instructions
let result = self.interpret_function(main_func)?;
// 4. Cleanup mock files
let _ = std::fs::remove_file(&obj_path);
Ok(result)
}
/// Interpret a MIR function by executing its instructions
fn interpret_function(
&mut self,
func: &crate::mir::function::MirFunction,
) -> Result<Box<dyn NyashBox>, String> {
// Clear value storage
self.values.clear();
// For now, just execute the entry block
if let Some(entry_block) = func.blocks.get(&0) {
for inst in &entry_block.instructions {
match inst {
MirInstruction::Const { dst, value } => {
let nyash_value = match value {
ConstValue::Integer(i) => {
Box::new(IntegerBox::new(*i)) as Box<dyn NyashBox>
}
ConstValue::Float(f) => {
Box::new(FloatBox::new(*f)) as Box<dyn NyashBox>
}
ConstValue::String(s) => {
Box::new(StringBox::new(s.clone())) as Box<dyn NyashBox>
}
ConstValue::Bool(b) => Box::new(BoolBox::new(*b)) as Box<dyn NyashBox>,
ConstValue::Null => Box::new(NullBox::new()) as Box<dyn NyashBox>,
};
self.values.insert(*dst, nyash_value);
println!(" 📝 %{} = const {:?}", dst.0, value);
}
MirInstruction::BinOp { dst, op, lhs, rhs } => {
// Get operands
let left = self
.values
.get(lhs)
.ok_or_else(|| format!("Value %{} not found", lhs.0))?;
let right = self
.values
.get(rhs)
.ok_or_else(|| format!("Value %{} not found", rhs.0))?;
// Simple integer arithmetic for now
if let (Some(l), Some(r)) = (
left.as_any().downcast_ref::<IntegerBox>(),
right.as_any().downcast_ref::<IntegerBox>(),
) {
let result = match op {
BinaryOp::Add => l.value + r.value,
BinaryOp::Sub => l.value - r.value,
BinaryOp::Mul => l.value * r.value,
BinaryOp::Div => {
if r.value == 0 {
return Err("Division by zero".to_string());
}
l.value / r.value
}
BinaryOp::Mod => l.value % r.value,
_ => {
return Err(
"Binary operation not supported in mock".to_string()
);
}
};
self.values.insert(*dst, Box::new(IntegerBox::new(result)));
println!(
" 📊 %{} = %{} {:?} %{} = {}",
dst.0, lhs.0, op, rhs.0, result
);
} else {
return Err(
"Binary operation on non-integer values not supported in mock"
.to_string(),
);
}
}
MirInstruction::Return { value } => {
if let Some(val_id) = value {
let result = self
.values
.get(val_id)
.ok_or_else(|| format!("Return value %{} not found", val_id.0))?
.clone_box();
println!(" ✅ Returning value from %{}", val_id.0);
return Ok(result);
} else {
println!(" ✅ Void return");
return Ok(Box::new(IntegerBox::new(0)));
}
}
MirInstruction::FunctionNew {
dst,
params,
body,
captures,
me,
} => {
// Minimal: build FunctionBox with empty captures unless provided
let mut env = crate::boxes::function_box::ClosureEnv::new();
// Materialize captures (by value) if any
for (name, vid) in captures.iter() {
let v = self.values.get(vid).ok_or_else(|| {
format!("Value %{} not found for capture {}", vid.0, name)
})?;
env.captures.insert(name.clone(), v.clone_box());
}
// me capture (weak) if provided and is a box
if let Some(m) = me {
if let Some(b) = self.values.get(m) {
if let Some(arc) = std::sync::Arc::downcast::<dyn NyashBox>({
let bx: std::sync::Arc<dyn NyashBox> =
std::sync::Arc::from(b.clone_box());
bx
})
.ok()
{
env.me_value = Some(std::sync::Arc::downgrade(&arc));
}
}
}
let fun = FunctionBox::with_env(params.clone(), body.clone(), env);
self.values.insert(*dst, Box::new(fun));
println!(" 🧰 %{} = function_new (params={})", dst.0, params.len());
}
MirInstruction::Call {
dst, func, args, ..
} => {
// Resolve callee
let cal = self
.values
.get(func)
.ok_or_else(|| format!("Call target %{} not found", func.0))?;
if let Some(fb) = cal.as_any().downcast_ref::<FunctionBox>() {
// Collect args as NyashBox
let mut argv: Vec<Box<dyn NyashBox>> = Vec::new();
for a in args {
let av = self
.values
.get(a)
.ok_or_else(|| format!("Arg %{} not found", a.0))?;
argv.push(av.clone_box());
}
let out = crate::interpreter::run_function_box(fb, argv)
.map_err(|e| format!("FunctionBox call failed: {:?}", e))?;
if let Some(d) = dst {
self.values.insert(*d, out);
}
println!(
" 📞 call %{} -> {}",
func.0,
dst.map(|v| v.0).unwrap_or(u32::MAX)
);
} else {
println!(" ⚠️ Skipping call: callee not FunctionBox");
}
}
_ => {
// Other instructions not yet implemented
println!(" ⚠️ Skipping instruction: {:?}", inst);
}
}
}
}
// Default return
Ok(Box::new(IntegerBox::new(0)))
}
}

View File

@ -1,33 +0,0 @@
use crate::box_trait::NyashBox;
use crate::mir::ValueId;
use std::collections::HashMap;
pub struct LLVMCompiler {
values: HashMap<ValueId, Box<dyn NyashBox>>,
}
#[cfg(not(feature = "llvm"))]
mod mock;
#[cfg(not(feature = "llvm"))]
pub use mock::*;
#[cfg(feature = "llvm")]
mod aot;
#[cfg(feature = "llvm")]
mod codegen;
#[cfg(feature = "llvm")]
mod helpers;
#[cfg(feature = "llvm")]
mod interpreter;
#[cfg(feature = "llvm")]
pub use aot::*;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_llvm_module_creation() {
assert!(true);
}
}