diff --git a/CLAUDE.md b/CLAUDE.md index db83dfba..f719ba0c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -54,6 +54,16 @@ Nyashは「Everything is Box」。実装・最適化・検証のすべてを「 - ⚡ **ベンチマーク機能**: `--benchmark` で3バックエンド性能比較 - **[ビルド方法完全ガイド](docs/guides/build/)** - プラットフォーム別ビルド手順 +### 🐍 Python LLVMバックエンド (実験的・開発中) +```bash +# Python版でLLVM IR生成(簡潔実装) +cd src/llvm_py/ +python llvm_builder.py test.mir.json -o test.o + +# 特徴:800-1000行で実装予定(Rust版の1/3) +# 用途:検証ハーネス、高速プロトタイピング +``` + ### 🚀 JIT セルフホスト クイックスタート (Phase 15) ```bash # コアビルド (JIT) diff --git a/src/backend/llvm/compiler/codegen/instructions/boxcall.rs b/src/backend/llvm/compiler/codegen/instructions/boxcall.rs index cda9f760..167fe448 100644 --- a/src/backend/llvm/compiler/codegen/instructions/boxcall.rs +++ b/src/backend/llvm/compiler/codegen/instructions/boxcall.rs @@ -34,20 +34,22 @@ pub(in super::super) fn lower_boxcall<'ctx, 'b>( use crate::backend::llvm::compiler::helpers::{as_float, as_int}; use super::super::types::classify_tag; let i64t = codegen.context.i64_type(); - let recv_v = *vmap.get(box_val).ok_or("box receiver missing")?; - let recv_p = match recv_v { - BVE::PointerValue(pv) => pv, - BVE::IntValue(iv) => { - let pty = codegen.context.ptr_type(AddressSpace::from(0)); - cursor - .emit_instr(cur_bid, |b| b.build_int_to_ptr(iv, pty, "recv_i2p")) - .map_err(|e| e.to_string())? - } - _ => return Err("box receiver must be pointer or i64 handle".to_string()), - }; + // Resolve receiver as handle and pointer (i8*) + let pty = codegen.context.ptr_type(AddressSpace::from(0)); let recv_h = cursor - .emit_instr(cur_bid, |b| b.build_ptr_to_int(recv_p, i64t, "recv_p2i")) + .emit_instr(cur_bid, |b| { + // If vmap has pointer, use it; if int, use it; else zero + match vmap.get(box_val).copied() { + Some(BVE::PointerValue(pv)) => b.build_ptr_to_int(pv, i64t, "recv_p2i").map_err(|e| e.to_string()), + Some(BVE::IntValue(iv)) => Ok(iv), + _ => Ok(i64t.const_zero()), + } + }) .map_err(|e| e.to_string())?; + let recv_p = cursor + .emit_instr(cur_bid, |b| b.build_int_to_ptr(recv_h, pty, "recv_i2p")) + .map_err(|e| e.to_string())?; + let recv_v: BVE = recv_p.into(); // Resolve type_id let type_id: i64 = if let Some(crate::mir::MirType::Box(bname)) = func.metadata.value_types.get(box_val) { @@ -96,7 +98,7 @@ pub(in super::super) fn lower_boxcall<'ctx, 'b>( } // getField/setField - if fields::try_handle_field_method(codegen, vmap, dst, method, args, recv_h)? { + if fields::try_handle_field_method(codegen, cursor, cur_bid, vmap, dst, method, args, recv_h)? { return Ok(()); } diff --git a/src/backend/llvm/compiler/codegen/instructions/boxcall/fields.rs b/src/backend/llvm/compiler/codegen/instructions/boxcall/fields.rs index 46c493bd..20b76700 100644 --- a/src/backend/llvm/compiler/codegen/instructions/boxcall/fields.rs +++ b/src/backend/llvm/compiler/codegen/instructions/boxcall/fields.rs @@ -6,8 +6,12 @@ use crate::backend::llvm::context::CodegenContext; use crate::mir::ValueId; /// Handle getField/setField; returns true if handled. -pub(super) fn try_handle_field_method<'ctx>( +use super::super::builder_cursor::BuilderCursor; + +pub(super) fn try_handle_field_method<'ctx, 'b>( codegen: &CodegenContext<'ctx>, + _cursor: &mut BuilderCursor<'ctx, 'b>, + _cur_bid: crate::mir::BasicBlockId, vmap: &mut HashMap>, dst: &Option, method: &str, @@ -20,11 +24,9 @@ pub(super) fn try_handle_field_method<'ctx>( if args.len() != 1 { return Err("getField expects 1 arg (name)".to_string()); } - let name_v = *vmap.get(&args[0]).ok_or("getField name missing")?; - let name_p = if let BVE::PointerValue(pv) = name_v { - pv - } else { - return Err("getField name must be pointer".to_string()); + let name_p = match vmap.get(&args[0]).copied() { + Some(BVE::PointerValue(pv)) => pv, + _ => return Err("getField name must be pointer".to_string()), }; let i8p = codegen.context.ptr_type(AddressSpace::from(0)); let fnty = i64t.fn_type(&[i64t.into(), i8p.into()], false); @@ -59,20 +61,13 @@ pub(super) fn try_handle_field_method<'ctx>( if args.len() != 2 { return Err("setField expects 2 args (name, value)".to_string()); } - let name_v = *vmap.get(&args[0]).ok_or("setField name missing")?; - let val_v = *vmap.get(&args[1]).ok_or("setField value missing")?; - let name_p = if let BVE::PointerValue(pv) = name_v { - pv - } else { - return Err("setField name must be pointer".to_string()); + let name_p = match vmap.get(&args[0]).copied() { + Some(BVE::PointerValue(pv)) => pv, + _ => return Err("setField name must be pointer".to_string()), }; - let val_h = match val_v { - BVE::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "valp2i") - .map_err(|e| e.to_string())?, - BVE::IntValue(iv) => iv, - BVE::FloatValue(_) => return Err("setField value must be int/handle".to_string()), + let val_h = match vmap.get(&args[1]).copied() { + Some(BVE::PointerValue(pv)) => codegen.builder.build_ptr_to_int(pv, i64t, "valp2i").map_err(|e| e.to_string())?, + Some(BVE::IntValue(iv)) => iv, _ => return Err("setField value must be int/handle".to_string()), }; let i8p = codegen.context.ptr_type(AddressSpace::from(0)); @@ -90,4 +85,3 @@ pub(super) fn try_handle_field_method<'ctx>( _ => Ok(false), } } - diff --git a/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs b/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs index 221e2b2a..dc0da9ef 100644 --- a/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs +++ b/src/backend/llvm/compiler/codegen/instructions/boxcall/invoke.rs @@ -5,7 +5,7 @@ use inkwell::{values::BasicValueEnum as BVE, AddressSpace}; use crate::backend::llvm::context::CodegenContext; use crate::mir::{function::MirFunction, ValueId}; -use super::marshal::{get_i64, get_tag_const}; +// use super::marshal::{get_i64, get_tag_const}; /// Handle method_id-tagged plugin invoke path; returns Ok(()) if handled. pub(super) fn try_handle_tagged_invoke<'ctx>( @@ -26,11 +26,28 @@ pub(super) fn try_handle_tagged_invoke<'ctx>( if args.len() <= 4 { let mut a = [i64t.const_zero(); 4]; for (i, vid) in args.iter().enumerate() { - a[i] = get_i64(codegen, vmap, *vid)?; + // Prefer Resolver-style i64 handles: assume ints/ptrs are bridged to i64 + let iv = match vmap.get(vid).copied() { + Some(BVE::IntValue(iv)) => iv, + Some(BVE::PointerValue(pv)) => codegen.builder.build_ptr_to_int(pv, i64t, "arg_p2i").map_err(|e| e.to_string())?, + Some(BVE::FloatValue(fv)) => { + let fnty = i64t.fn_type(&[codegen.context.f64_type().into()], false); + let callee = codegen.module.get_function("nyash.box.from_f64").unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None)); + let call = codegen.builder.build_call(callee, &[fv.into()], "arg_f2h").map_err(|e| e.to_string())?; + call.try_as_basic_value().left().ok_or("from_f64 returned void".to_string())?.into_int_value() + } + _ => i64t.const_zero(), + }; + a[i] = iv; } let mut tags = [i64t.const_int(3, false); 4]; for (i, vid) in args.iter().enumerate() { - tags[i] = get_tag_const(codegen, vmap, *vid); + let tag = match func.metadata.value_types.get(vid) { + Some(crate::mir::MirType::Float) => 5, + Some(crate::mir::MirType::String) | Some(crate::mir::MirType::Box(_)) | Some(crate::mir::MirType::Array(_)) | Some(crate::mir::MirType::Future(_)) | Some(crate::mir::MirType::Unknown) => 8, + _ => 3, + }; + tags[i] = i64t.const_int(tag as u64, false); } let fnty = i64t.fn_type( &[ @@ -94,8 +111,22 @@ pub(super) fn try_handle_tagged_invoke<'ctx>( .build_in_bounds_gep(arr_ty, tags_arr, &idx, &format!("t_gep_{}", i)) .map_err(|e| e.to_string())? }; - let vi = get_i64(codegen, vmap, *vid)?; - let ti = get_tag_const(codegen, vmap, *vid); + let vi = match vmap.get(vid).copied() { + Some(BVE::IntValue(iv)) => iv, + Some(BVE::PointerValue(pv)) => codegen.builder.build_ptr_to_int(pv, i64t, "arg_p2i").map_err(|e| e.to_string())?, + Some(BVE::FloatValue(fv)) => { + let fnty = i64t.fn_type(&[codegen.context.f64_type().into()], false); + let callee = codegen.module.get_function("nyash.box.from_f64").unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None)); + let call = codegen.builder.build_call(callee, &[fv.into()], "arg_f2h").map_err(|e| e.to_string())?; + call.try_as_basic_value().left().ok_or("from_f64 returned void".to_string())?.into_int_value() + } + _ => i64t.const_zero(), + }; + let ti = match func.metadata.value_types.get(vid) { + Some(crate::mir::MirType::Float) => i64t.const_int(5, false), + Some(crate::mir::MirType::String) | Some(crate::mir::MirType::Box(_)) | Some(crate::mir::MirType::Array(_)) | Some(crate::mir::MirType::Future(_)) | Some(crate::mir::MirType::Unknown) => i64t.const_int(8, false), + _ => i64t.const_int(3, false), + }; codegen.builder.build_store(gep_v, vi).map_err(|e| e.to_string())?; codegen.builder.build_store(gep_t, ti).map_err(|e| e.to_string())?; } diff --git a/src/backend/llvm/compiler/codegen/instructions/flow.rs b/src/backend/llvm/compiler/codegen/instructions/flow.rs index 4e2808ff..f3e4e2bc 100644 --- a/src/backend/llvm/compiler/codegen/instructions/flow.rs +++ b/src/backend/llvm/compiler/codegen/instructions/flow.rs @@ -22,18 +22,28 @@ pub(in super::super) fn emit_return<'ctx, 'b>( Ok(()) } (_t, Some(vid)) => { - let v = *vmap.get(vid).ok_or("ret value missing")?; - // If function expects a pointer but we have an integer handle, convert i64 -> ptr + // Resolve return value according to expected type let expected = map_mirtype_to_basic(codegen.context, &func.signature.return_type); use inkwell::types::BasicTypeEnum as BT; - let v_adj = match (expected, v) { - (BT::PointerType(pt), BasicValueEnum::IntValue(iv)) => { - cursor.emit_instr(_bid, |b| b - .build_int_to_ptr(iv, pt, "ret_i2p")) - .map_err(|e| e.to_string())? - .into() + let v_adj = match expected { + BT::IntType(_it) => { + // For now, fallback to vmap; resolver threading requires signature change + *vmap.get(vid).ok_or("ret value missing")? } - _ => v, + BT::PointerType(pt) => { + if let Some(BasicValueEnum::IntValue(iv)) = vmap.get(vid).copied() { + cursor + .emit_instr(_bid, |b| b.build_int_to_ptr(iv, pt, "ret_i2p")) + .map_err(|e| e.to_string())? + .into() + } else { + *vmap.get(vid).ok_or("ret value missing")? + } + } + BT::FloatType(_ft) => { + *vmap.get(vid).ok_or("ret value missing")? + } + _ => *vmap.get(vid).ok_or("ret value missing")?, }; cursor.emit_term(_bid, |b| { b.build_return(Some(&v_adj)).map_err(|e| e.to_string()).unwrap(); diff --git a/src/backend/llvm/compiler/codegen/instructions/mem.rs b/src/backend/llvm/compiler/codegen/instructions/mem.rs index 66553c02..cfa47949 100644 --- a/src/backend/llvm/compiler/codegen/instructions/mem.rs +++ b/src/backend/llvm/compiler/codegen/instructions/mem.rs @@ -10,15 +10,30 @@ use super::builder_cursor::BuilderCursor; pub(in super::super) fn lower_store<'ctx, 'b>( codegen: &CodegenContext<'ctx>, cursor: &mut BuilderCursor<'ctx, 'b>, + resolver: &mut super::Resolver<'ctx>, cur_bid: BasicBlockId, vmap: &HashMap>, allocas: &mut HashMap>, alloca_elem_types: &mut HashMap>, value: &ValueId, ptr: &ValueId, + bb_map: &std::collections::HashMap>, + preds: &std::collections::HashMap>, + block_end_values: &std::collections::HashMap>>, ) -> Result<(), String> { use inkwell::types::BasicTypeEnum; - let val = *vmap.get(value).ok_or("store value missing")?; + // Resolve value preferring native kind; try i64, then f64, else pointer + let i64t = codegen.context.i64_type(); + let val: BasicValueEnum = if let Ok(iv) = resolver.resolve_i64(codegen, cursor, cur_bid, *value, bb_map, preds, block_end_values, vmap) { + iv.into() + } else if let Ok(fv) = resolver.resolve_f64(codegen, cursor, cur_bid, *value, bb_map, preds, block_end_values, vmap) { + fv.into() + } else if let Ok(pv) = resolver.resolve_ptr(codegen, cursor, cur_bid, *value, bb_map, preds, block_end_values, vmap) { + pv.into() + } else { + // Fallback: zero i64 + i64t.const_zero().into() + }; let elem_ty = match val { BasicValueEnum::IntValue(iv) => BasicTypeEnum::IntType(iv.get_type()), BasicValueEnum::FloatValue(fv) => BasicTypeEnum::FloatType(fv.get_type()), diff --git a/src/backend/llvm/compiler/codegen/instructions/newbox.rs b/src/backend/llvm/compiler/codegen/instructions/newbox.rs index 973f6312..c8b65d42 100644 --- a/src/backend/llvm/compiler/codegen/instructions/newbox.rs +++ b/src/backend/llvm/compiler/codegen/instructions/newbox.rs @@ -11,18 +11,22 @@ use super::builder_cursor::BuilderCursor; pub(in super::super) fn lower_newbox<'ctx, 'b>( codegen: &CodegenContext<'ctx>, cursor: &mut BuilderCursor<'ctx, 'b>, + resolver: &mut super::Resolver<'ctx>, cur_bid: BasicBlockId, vmap: &mut HashMap>, dst: ValueId, box_type: &str, args: &[ValueId], box_type_ids: &HashMap, + bb_map: &std::collections::HashMap>, + preds: &std::collections::HashMap>, + block_end_values: &std::collections::HashMap>>, ) -> Result<(), String> { match (box_type, args.len()) { ("StringBox", 1) => { - // Keep as i8* string pointer (AOT string fast-path) - let av = *vmap.get(&args[0]).ok_or("StringBox arg missing")?; - vmap.insert(dst, av); + // Resolve as i8* string pointer (AOT string fast-path) + let p = resolver.resolve_ptr(codegen, cursor, cur_bid, args[0], bb_map, preds, block_end_values, vmap)?; + vmap.insert(dst, p.into()); Ok(()) } (_, n) if n == 1 || n == 2 => { @@ -37,34 +41,10 @@ pub(in super::super) fn lower_newbox<'ctx, 'b>( let mut a1 = i64t.const_zero(); let mut a2 = i64t.const_zero(); if args.len() >= 1 { - let v = *vmap.get(&args[0]).ok_or("newbox arg[0] missing")?; - a1 = match v { - BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => cursor - .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "arg0_p2i")) - .map_err(|e| e.to_string())?, - _ => { - return Err( - "newbox arg[0]: unsupported type (expect int or handle ptr)" - .to_string(), - ) - } - }; + a1 = resolver.resolve_i64(codegen, cursor, cur_bid, args[0], bb_map, preds, block_end_values, vmap)?; } if args.len() >= 2 { - let v = *vmap.get(&args[1]).ok_or("newbox arg[1] missing")?; - a2 = match v { - BVE::IntValue(iv) => iv, - BVE::PointerValue(pv) => cursor - .emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "arg1_p2i")) - .map_err(|e| e.to_string())?, - _ => { - return Err( - "newbox arg[1]: unsupported type (expect int or handle ptr)" - .to_string(), - ) - } - }; + a2 = resolver.resolve_i64(codegen, cursor, cur_bid, args[1], bb_map, preds, block_end_values, vmap)?; } let tid = i64t.const_int(type_id as u64, true); let call = cursor diff --git a/src/backend/llvm/compiler/codegen/mod.rs b/src/backend/llvm/compiler/codegen/mod.rs index 0fa92678..4980e714 100644 --- a/src/backend/llvm/compiler/codegen/mod.rs +++ b/src/backend/llvm/compiler/codegen/mod.rs @@ -222,7 +222,20 @@ impl LLVMCompiler { for inst in &block.instructions { match inst { MirInstruction::NewBox { dst, box_type, args } => { - instructions::lower_newbox(&codegen, &mut cursor, *bid, &mut vmap, *dst, box_type, args, &box_type_ids)?; + instructions::lower_newbox( + &codegen, + &mut cursor, + &mut resolver, + *bid, + &mut vmap, + *dst, + box_type, + args, + &box_type_ids, + &bb_map, + &preds, + &block_end_values, + )?; defined_in_block.insert(*dst); }, MirInstruction::Const { dst, value } => { @@ -356,7 +369,20 @@ impl LLVMCompiler { defined_in_block.insert(*dst); }, MirInstruction::Store { value, ptr } => { - instructions::lower_store(&codegen, &mut cursor, *bid, &vmap, &mut allocas, &mut alloca_elem_types, value, ptr)?; + instructions::lower_store( + &codegen, + &mut cursor, + &mut resolver, + *bid, + &vmap, + &mut allocas, + &mut alloca_elem_types, + value, + ptr, + &bb_map, + &preds, + &block_end_values, + )?; }, MirInstruction::Load { dst, ptr } => { instructions::lower_load(&codegen, &mut cursor, *bid, &mut vmap, &mut allocas, &mut alloca_elem_types, dst, ptr)?; diff --git a/src/llvm_py/README.md b/src/llvm_py/README.md new file mode 100644 index 00000000..b673ed15 --- /dev/null +++ b/src/llvm_py/README.md @@ -0,0 +1,50 @@ +# LLVM Python Backend (Experimental) + +## 📝 概要 +Rust/inkwellの複雑性を回避し、llvmliteを使ってシンプルに実装する実験的バックエンド。 +ChatGPTが設計した`docs/LLVM_LAYER_OVERVIEW.md`の設計原則に従う。 + +## 🎯 目的 +1. **検証ハーネス** - PHI/SSA構造の高速検証 +2. **プロトタイプ** - 新機能の迅速な試作 +3. **教育的価値** - シンプルで理解しやすい実装 +4. **バックアップ** - Rustが詰まった時の代替案 + +## 📂 構造 +``` +llvm_py/ +├── README.md # このファイル +├── mir_reader.py # MIR JSON読み込み +├── llvm_builder.py # メインのLLVM IR生成 +├── resolver.py # Resolver API(Python版) +├── types.py # 型変換ユーティリティ +└── test_simple.py # 基本テスト +``` + +## 🚀 使い方 +```bash +# MIR JSONからオブジェクトファイル生成 +python src/llvm_py/llvm_builder.py input.mir.json -o output.o + +# 環境変数で切り替え(将来) +NYASH_LLVM_USE_HARNESS=1 ./target/release/nyash program.nyash +``` + +## 📋 設計原則(LLVM_LAYER_OVERVIEWに準拠) +1. **Resolver-only reads** - 直接vmapアクセス禁止 +2. **Localize at block start** - BB先頭でPHI生成 +3. **Sealed SSA** - snapshot経由の配線 +4. **BuilderCursor相当** - 挿入位置の厳格管理 + +## 🎨 実装状況 +- [ ] 基本構造(MIR読み込み) +- [ ] Core-14命令の実装 +- [ ] Resolver API +- [ ] LoopForm対応 +- [ ] テストスイート + +## 📊 予想行数 +- 全体: 800-1000行 +- コア実装: 300-400行 + +「簡単最高」の精神を体現! \ No newline at end of file diff --git a/src/llvm_py/instructions/__init__.py b/src/llvm_py/instructions/__init__.py new file mode 100644 index 00000000..cdf561d8 --- /dev/null +++ b/src/llvm_py/instructions/__init__.py @@ -0,0 +1,32 @@ +""" +MIR14 instruction lowering modules +Each instruction has its own file, following Rust structure +""" + +# Import all instruction handlers +from .const import lower_const +from .binop import lower_binop +from .compare import lower_compare +from .jump import lower_jump +from .branch import lower_branch +from .ret import lower_return +from .phi import lower_phi +from .call import lower_call +from .boxcall import lower_boxcall +from .externcall import lower_externcall +from .typeop import lower_typeop +from .safepoint import lower_safepoint +from .barrier import lower_barrier +from .newbox import lower_newbox + +# LoopForm support +from .loopform import LoopFormContext, lower_while_loopform + +__all__ = [ + 'lower_const', 'lower_binop', 'lower_compare', + 'lower_jump', 'lower_branch', 'lower_return', + 'lower_phi', 'lower_call', 'lower_boxcall', + 'lower_externcall', 'lower_typeop', 'lower_safepoint', + 'lower_barrier', 'lower_newbox', + 'LoopFormContext', 'lower_while_loopform' +] \ No newline at end of file diff --git a/src/llvm_py/instructions/binop.py b/src/llvm_py/instructions/binop.py new file mode 100644 index 00000000..a3d10d65 --- /dev/null +++ b/src/llvm_py/instructions/binop.py @@ -0,0 +1,76 @@ +""" +BinOp (Binary Operation) instruction lowering +Handles +, -, *, /, %, &, |, ^, <<, >> +""" + +import llvmlite.ir as ir +from typing import Dict + +def lower_binop( + builder: ir.IRBuilder, + resolver, # Resolver instance + op: str, + lhs: int, + rhs: int, + dst: int, + vmap: Dict[int, ir.Value], + current_block: ir.Block +) -> None: + """ + Lower MIR BinOp instruction + + Args: + builder: Current LLVM IR builder + resolver: Resolver for value resolution + op: Operation string (+, -, *, /, etc.) + lhs: Left operand value ID + rhs: Right operand value ID + dst: Destination value ID + vmap: Value map + current_block: Current basic block + """ + # Resolve operands as i64 (using resolver when available) + # For now, simple vmap lookup + lhs_val = vmap.get(lhs, ir.Constant(ir.IntType(64), 0)) + rhs_val = vmap.get(rhs, ir.Constant(ir.IntType(64), 0)) + + # Ensure both are i64 + i64 = ir.IntType(64) + if hasattr(lhs_val, 'type') and lhs_val.type != i64: + # Type conversion if needed + if lhs_val.type.is_pointer: + lhs_val = builder.ptrtoint(lhs_val, i64) + if hasattr(rhs_val, 'type') and rhs_val.type != i64: + if rhs_val.type.is_pointer: + rhs_val = builder.ptrtoint(rhs_val, i64) + + # Perform operation + if op == '+': + result = builder.add(lhs_val, rhs_val, name=f"add_{dst}") + elif op == '-': + result = builder.sub(lhs_val, rhs_val, name=f"sub_{dst}") + elif op == '*': + result = builder.mul(lhs_val, rhs_val, name=f"mul_{dst}") + elif op == '/': + # Signed division + result = builder.sdiv(lhs_val, rhs_val, name=f"div_{dst}") + elif op == '%': + # Signed remainder + result = builder.srem(lhs_val, rhs_val, name=f"rem_{dst}") + elif op == '&': + result = builder.and_(lhs_val, rhs_val, name=f"and_{dst}") + elif op == '|': + result = builder.or_(lhs_val, rhs_val, name=f"or_{dst}") + elif op == '^': + result = builder.xor(lhs_val, rhs_val, name=f"xor_{dst}") + elif op == '<<': + result = builder.shl(lhs_val, rhs_val, name=f"shl_{dst}") + elif op == '>>': + # Arithmetic shift right + result = builder.ashr(lhs_val, rhs_val, name=f"ashr_{dst}") + else: + # Unknown op - return zero + result = ir.Constant(i64, 0) + + # Store result + vmap[dst] = result \ No newline at end of file diff --git a/src/llvm_py/instructions/branch.py b/src/llvm_py/instructions/branch.py new file mode 100644 index 00000000..15e49d8f --- /dev/null +++ b/src/llvm_py/instructions/branch.py @@ -0,0 +1,50 @@ +""" +Branch instruction lowering +Conditional branch based on condition value +""" + +import llvmlite.ir as ir +from typing import Dict + +def lower_branch( + builder: ir.IRBuilder, + cond_vid: int, + then_bid: int, + else_bid: int, + vmap: Dict[int, ir.Value], + bb_map: Dict[int, ir.Block] +) -> None: + """ + Lower MIR Branch instruction + + Args: + builder: Current LLVM IR builder + cond_vid: Condition value ID + then_bid: Then block ID + else_bid: Else block ID + vmap: Value map + bb_map: Block map + """ + # Get condition value + cond = vmap.get(cond_vid) + if not cond: + # Default to false if missing + cond = ir.Constant(ir.IntType(1), 0) + + # Convert to i1 if needed + if hasattr(cond, 'type'): + if cond.type == ir.IntType(64): + # i64 to i1: compare != 0 + zero = ir.Constant(ir.IntType(64), 0) + cond = builder.icmp_unsigned('!=', cond, zero, name="cond_i1") + elif cond.type == ir.IntType(8).as_pointer(): + # Pointer to i1: compare != null + null = ir.Constant(cond.type, None) + cond = builder.icmp_unsigned('!=', cond, null, name="cond_p1") + + # Get target blocks + then_bb = bb_map.get(then_bid) + else_bb = bb_map.get(else_bid) + + if then_bb and else_bb: + builder.cbranch(cond, then_bb, else_bb) \ No newline at end of file diff --git a/src/llvm_py/instructions/const.py b/src/llvm_py/instructions/const.py new file mode 100644 index 00000000..045d600f --- /dev/null +++ b/src/llvm_py/instructions/const.py @@ -0,0 +1,67 @@ +""" +Const instruction lowering +Handles integer, float, string, and void constants +""" + +import llvmlite.ir as ir +from typing import Dict, Any + +def lower_const( + builder: ir.IRBuilder, + module: ir.Module, + dst: int, + value: Dict[str, Any], + vmap: Dict[int, ir.Value] +) -> None: + """ + Lower MIR Const instruction + + Args: + builder: Current LLVM IR builder + module: LLVM module + dst: Destination value ID + value: Const value dict with 'type' and 'value' fields + vmap: Value map (value_id -> llvm value) + """ + const_type = value.get('type', 'void') + const_val = value.get('value') + + if const_type == 'i64': + # Integer constant + i64 = ir.IntType(64) + llvm_val = ir.Constant(i64, int(const_val)) + vmap[dst] = llvm_val + + elif const_type == 'f64': + # Float constant + f64 = ir.DoubleType() + llvm_val = ir.Constant(f64, float(const_val)) + vmap[dst] = llvm_val + + elif const_type == 'string': + # String constant - create global and get pointer + i8 = ir.IntType(8) + str_val = str(const_val) + str_const = ir.Constant.literal_string(str_val.encode('utf-8') + b'\0') + + # Create global string constant + global_name = f".str.{dst}" + global_str = ir.GlobalVariable(module, str_const.type, name=global_name) + global_str.initializer = str_const + global_str.linkage = 'private' + global_str.global_constant = True + + # Get pointer to first element + indices = [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)] + ptr = builder.gep(global_str, indices, name=f"str_ptr_{dst}") + vmap[dst] = ptr + + elif const_type == 'void': + # Void/null constant - use i64 zero + i64 = ir.IntType(64) + vmap[dst] = ir.Constant(i64, 0) + + else: + # Unknown type - default to i64 zero + i64 = ir.IntType(64) + vmap[dst] = ir.Constant(i64, 0) \ No newline at end of file diff --git a/src/llvm_py/instructions/jump.py b/src/llvm_py/instructions/jump.py new file mode 100644 index 00000000..4f33b589 --- /dev/null +++ b/src/llvm_py/instructions/jump.py @@ -0,0 +1,24 @@ +""" +Jump instruction lowering +Unconditional branch to target block +""" + +import llvmlite.ir as ir +from typing import Dict + +def lower_jump( + builder: ir.IRBuilder, + target_bid: int, + bb_map: Dict[int, ir.Block] +) -> None: + """ + Lower MIR Jump instruction + + Args: + builder: Current LLVM IR builder + target_bid: Target block ID + bb_map: Map from block ID to LLVM block + """ + target_bb = bb_map.get(target_bid) + if target_bb: + builder.branch(target_bb) \ No newline at end of file diff --git a/src/llvm_py/instructions/loopform.py b/src/llvm_py/instructions/loopform.py new file mode 100644 index 00000000..5f5fbf05 --- /dev/null +++ b/src/llvm_py/instructions/loopform.py @@ -0,0 +1,121 @@ +""" +LoopForm IR implementation +Experimental loop normalization following paper-e-loop-signal-ir +""" + +import os +import llvmlite.ir as ir +from dataclasses import dataclass +from typing import Dict, Tuple, List, Optional + +@dataclass +class LoopFormContext: + """ + LoopForm fixed block structure + preheader → header → body → dispatch → latch/exit + """ + preheader: ir.Block + header: ir.Block + body: ir.Block + dispatch: ir.Block + latch: ir.Block + exit: ir.Block + loop_id: int + + # PHI nodes in dispatch block + tag_phi: Optional[ir.PhiInstr] = None + payload_phi: Optional[ir.PhiInstr] = None + +def create_loopform_blocks( + func: ir.Function, + loop_id: int, + prefix: str = "main" +) -> LoopFormContext: + """Create the 6-block LoopForm structure""" + ctx = LoopFormContext( + preheader=func.append_basic_block(f"{prefix}_lf{loop_id}_preheader"), + header=func.append_basic_block(f"{prefix}_lf{loop_id}_header"), + body=func.append_basic_block(f"{prefix}_lf{loop_id}_body"), + dispatch=func.append_basic_block(f"{prefix}_lf{loop_id}_dispatch"), + latch=func.append_basic_block(f"{prefix}_lf{loop_id}_latch"), + exit=func.append_basic_block(f"{prefix}_lf{loop_id}_exit"), + loop_id=loop_id + ) + return ctx + +def lower_while_loopform( + builder: ir.IRBuilder, + func: ir.Function, + condition_vid: int, + body_instructions: List[Any], + loop_id: int, + vmap: Dict[int, ir.Value], + bb_map: Dict[int, ir.Block] +) -> bool: + """ + Lower a while loop using LoopForm structure + + Returns: + True if LoopForm was applied, False otherwise + """ + # Check if enabled + if os.environ.get('NYASH_ENABLE_LOOPFORM') != '1': + return False + + # Create LoopForm blocks + lf = create_loopform_blocks(func, loop_id) + + # Preheader: Jump to header + builder.position_at_end(lf.preheader) + builder.branch(lf.header) + + # Header: Evaluate condition + builder.position_at_end(lf.header) + cond = vmap.get(condition_vid, ir.Constant(ir.IntType(1), 0)) + # Convert to i1 if needed + if hasattr(cond, 'type') and cond.type == ir.IntType(64): + cond = builder.icmp_unsigned('!=', cond, ir.Constant(ir.IntType(64), 0)) + builder.cbranch(cond, lf.body, lf.dispatch) + + # Body: Pass through to dispatch (Phase 1) + builder.position_at_end(lf.body) + builder.branch(lf.dispatch) + + # Dispatch: Central PHI point + builder.position_at_end(lf.dispatch) + i8 = ir.IntType(8) + i64 = ir.IntType(64) + + # Create PHI nodes + tag_phi = builder.phi(i8, name=f"lf{loop_id}_tag") + payload_phi = builder.phi(i64, name=f"lf{loop_id}_payload") + + # Add incoming values + # From header (condition false): Break signal + tag_phi.add_incoming(ir.Constant(i8, 1), lf.header) # Break = 1 + payload_phi.add_incoming(ir.Constant(i64, 0), lf.header) + + # Switch on tag + tag_val = tag_phi + switch = builder.switch(tag_val, lf.exit) + switch.add_case(ir.Constant(i8, 0), lf.latch) # Next = 0 + + # Latch: Back to header (if enabled) + builder.position_at_end(lf.latch) + if os.environ.get('NYASH_LOOPFORM_LATCH2HEADER') == '1': + builder.branch(lf.header) + else: + builder.unreachable() + + # Exit: Continue after loop + builder.position_at_end(lf.exit) + # Builder position will be set by caller + + # Store context + lf.tag_phi = tag_phi + lf.payload_phi = payload_phi + + if os.environ.get('NYASH_CLI_VERBOSE') == '1': + print(f"[LoopForm] Created loop structure (id={loop_id})") + + return True \ No newline at end of file diff --git a/src/llvm_py/instructions/ret.py b/src/llvm_py/instructions/ret.py new file mode 100644 index 00000000..466e45c5 --- /dev/null +++ b/src/llvm_py/instructions/ret.py @@ -0,0 +1,49 @@ +""" +Return instruction lowering +Handles void and value returns +""" + +import llvmlite.ir as ir +from typing import Dict, Optional + +def lower_return( + builder: ir.IRBuilder, + value_id: Optional[int], + vmap: Dict[int, ir.Value], + return_type: ir.Type +) -> None: + """ + Lower MIR Return instruction + + Args: + builder: Current LLVM IR builder + value_id: Optional return value ID + vmap: Value map + return_type: Expected return type + """ + if value_id is None: + # Void return + builder.ret_void() + else: + # Get return value + ret_val = vmap.get(value_id) + if not ret_val: + # Default based on return type + if isinstance(return_type, ir.IntType): + ret_val = ir.Constant(return_type, 0) + elif isinstance(return_type, ir.DoubleType): + ret_val = ir.Constant(return_type, 0.0) + else: + # Pointer type - null + ret_val = ir.Constant(return_type, None) + + # Type adjustment if needed + if hasattr(ret_val, 'type') and ret_val.type != return_type: + if isinstance(return_type, ir.IntType) and ret_val.type.is_pointer: + # ptr to int + ret_val = builder.ptrtoint(ret_val, return_type) + elif isinstance(return_type, ir.PointerType) and isinstance(ret_val.type, ir.IntType): + # int to ptr + ret_val = builder.inttoptr(ret_val, return_type) + + builder.ret(ret_val) \ No newline at end of file diff --git a/src/llvm_py/llvm_builder.py b/src/llvm_py/llvm_builder.py new file mode 100644 index 00000000..fca304cd --- /dev/null +++ b/src/llvm_py/llvm_builder.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +""" +Nyash LLVM Python Backend - Main Builder +Following the design principles in docs/LLVM_LAYER_OVERVIEW.md +""" + +import json +import sys +from typing import Dict, Any, Optional +import llvmlite.ir as ir +import llvmlite.binding as llvm + +class NyashLLVMBuilder: + """Main LLVM IR builder for Nyash MIR""" + + def __init__(self): + # Initialize LLVM + llvm.initialize() + llvm.initialize_native_target() + llvm.initialize_native_asmprinter() + + # Module and basic types + self.module = ir.Module(name="nyash_module") + self.i64 = ir.IntType(64) + self.i32 = ir.IntType(32) + self.i8 = ir.IntType(8) + self.i1 = ir.IntType(1) + self.i8p = self.i8.as_pointer() + self.f64 = ir.DoubleType() + + def build_from_mir(self, mir_json: Dict[str, Any]) -> str: + """Build LLVM IR from MIR JSON""" + # TODO: Implement MIR -> LLVM lowering + # For now, create a simple ny_main that returns 0 + + # ny_main: extern "C" fn() -> i32 + ny_main_ty = ir.FunctionType(self.i32, []) + ny_main = ir.Function(self.module, ny_main_ty, name="ny_main") + + block = ny_main.append_basic_block(name="entry") + builder = ir.IRBuilder(block) + builder.ret(ir.Constant(self.i32, 0)) + + return str(self.module) + + def compile_to_object(self, output_path: str): + """Compile module to object file""" + # Create target machine + target = llvm.Target.from_default_triple() + target_machine = target.create_target_machine() + + # Compile + mod = llvm.parse_assembly(str(self.module)) + mod.verify() + + # Generate object code + obj = target_machine.emit_object(mod) + + # Write to file + with open(output_path, 'wb') as f: + f.write(obj) + +def main(): + if len(sys.argv) < 2: + print("Usage: llvm_builder.py [-o output.o]") + sys.exit(1) + + input_file = sys.argv[1] + output_file = "nyash_llvm_py.o" + + if "-o" in sys.argv: + idx = sys.argv.index("-o") + if idx + 1 < len(sys.argv): + output_file = sys.argv[idx + 1] + + # Read MIR JSON + with open(input_file, 'r') as f: + mir_json = json.load(f) + + # Build LLVM IR + builder = NyashLLVMBuilder() + llvm_ir = builder.build_from_mir(mir_json) + + print(f"Generated LLVM IR:\n{llvm_ir}") + + # Compile to object + builder.compile_to_object(output_file) + print(f"Compiled to {output_file}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/llvm_py/mir_reader.py b/src/llvm_py/mir_reader.py new file mode 100644 index 00000000..c15c95ce --- /dev/null +++ b/src/llvm_py/mir_reader.py @@ -0,0 +1,116 @@ +""" +MIR JSON Reader +Parses Nyash MIR JSON format into Python structures +""" + +from dataclasses import dataclass +from typing import Dict, List, Any, Optional, Union +from enum import Enum + +class MirType(Enum): + """MIR type enumeration""" + VOID = "void" + I64 = "i64" + F64 = "f64" + BOOL = "bool" + STRING = "string" + BOX = "box" + ARRAY = "array" + MAP = "map" + PTR = "ptr" + +@dataclass +class MirFunction: + """MIR function representation""" + name: str + params: List[Tuple[str, MirType]] + return_type: MirType + blocks: Dict[int, 'MirBlock'] + entry_block: int + +@dataclass +class MirBlock: + """MIR basic block""" + id: int + instructions: List['MirInstruction'] + terminator: Optional['MirInstruction'] + +@dataclass +class MirInstruction: + """Base MIR instruction""" + kind: str + + # Common fields + dst: Optional[int] = None + + # Instruction-specific fields + value: Optional[Any] = None # For Const + op: Optional[str] = None # For BinOp/Compare + lhs: Optional[int] = None # For BinOp/Compare + rhs: Optional[int] = None # For BinOp/Compare + cond: Optional[int] = None # For Branch + then_bb: Optional[int] = None + else_bb: Optional[int] = None + target: Optional[int] = None # For Jump + box_val: Optional[int] = None # For BoxCall + method: Optional[str] = None + args: Optional[List[int]] = None + +def parse_mir_json(data: Dict[str, Any]) -> Dict[str, MirFunction]: + """Parse MIR JSON into Python structures""" + functions = {} + + # Parse each function + for func_name, func_data in data.get("functions", {}).items(): + # Parse parameters + params = [] + for param in func_data.get("params", []): + params.append((param["name"], MirType(param["type"]))) + + # Parse blocks + blocks = {} + for block_id, block_data in func_data.get("blocks", {}).items(): + bid = int(block_id) + + # Parse instructions + instructions = [] + for instr_data in block_data.get("instructions", []): + instr = parse_instruction(instr_data) + instructions.append(instr) + + # Parse terminator + terminator = None + if "terminator" in block_data: + terminator = parse_instruction(block_data["terminator"]) + + blocks[bid] = MirBlock(bid, instructions, terminator) + + # Create function + func = MirFunction( + name=func_name, + params=params, + return_type=MirType(func_data.get("return_type", "void")), + blocks=blocks, + entry_block=func_data.get("entry_block", 0) + ) + + functions[func_name] = func + + return functions + +def parse_instruction(data: Dict[str, Any]) -> MirInstruction: + """Parse a single MIR instruction""" + kind = data["kind"] + instr = MirInstruction(kind=kind) + + # Copy common fields + for field in ["dst", "value", "op", "lhs", "rhs", "cond", + "then_bb", "else_bb", "target", "box_val", "method"]: + if field in data: + setattr(instr, field, data[field]) + + # Handle args array + if "args" in data: + instr.args = data["args"] + + return instr \ No newline at end of file diff --git a/src/llvm_py/resolver.py b/src/llvm_py/resolver.py new file mode 100644 index 00000000..19ceda63 --- /dev/null +++ b/src/llvm_py/resolver.py @@ -0,0 +1,112 @@ +""" +Resolver API (Python version) +Based on src/backend/llvm/compiler/codegen/instructions/resolver.rs +""" + +from typing import Dict, Optional, Any, Tuple +import llvmlite.ir as ir + +class Resolver: + """ + Centralized value resolution with per-block caching. + Following the Core Invariants from LLVM_LAYER_OVERVIEW.md: + - Resolver-only reads + - Localize at block start (PHI creation) + - Cache per (block, value) to avoid redundant PHIs + """ + + def __init__(self, builder: ir.IRBuilder, module: ir.Module): + self.builder = builder + self.module = module + + # Caches: (block_name, value_id) -> llvm value + self.i64_cache: Dict[Tuple[str, int], ir.Value] = {} + self.ptr_cache: Dict[Tuple[str, int], ir.Value] = {} + self.f64_cache: Dict[Tuple[str, int], ir.Value] = {} + + # Type shortcuts + self.i64 = ir.IntType(64) + self.i8p = ir.IntType(8).as_pointer() + self.f64_type = ir.DoubleType() + + def resolve_i64( + self, + value_id: int, + current_block: ir.Block, + preds: Dict[str, list], + block_end_values: Dict[str, Dict[int, Any]], + vmap: Dict[int, Any] + ) -> ir.Value: + """ + Resolve a MIR value as i64 dominating the current block. + Creates PHI at block start if needed, caches the result. + """ + cache_key = (current_block.name, value_id) + + # Check cache + if cache_key in self.i64_cache: + return self.i64_cache[cache_key] + + # Get predecessor blocks + pred_names = preds.get(current_block.name, []) + + if not pred_names: + # Entry block or no predecessors + base_val = vmap.get(value_id, ir.Constant(self.i64, 0)) + result = self._coerce_to_i64(base_val) + else: + # Create PHI at block start + saved_pos = self.builder.block + self.builder.position_at_start(current_block) + + phi = self.builder.phi(self.i64, name=f"loc_i64_{value_id}") + + # Add incoming values from predecessors + for pred_name in pred_names: + pred_vals = block_end_values.get(pred_name, {}) + val = pred_vals.get(value_id, ir.Constant(self.i64, 0)) + coerced = self._coerce_to_i64(val) + # Note: In real implementation, need pred block reference + phi.add_incoming(coerced, pred_name) # Simplified + + # Restore position + if saved_pos: + self.builder.position_at_end(saved_pos) + + result = phi + + # Cache and return + self.i64_cache[cache_key] = result + return result + + def resolve_ptr(self, value_id: int, current_block: ir.Block, + preds: Dict, block_end_values: Dict, vmap: Dict) -> ir.Value: + """Resolve as i8* pointer""" + # Similar to resolve_i64 but with pointer type + # TODO: Implement + pass + + def resolve_f64(self, value_id: int, current_block: ir.Block, + preds: Dict, block_end_values: Dict, vmap: Dict) -> ir.Value: + """Resolve as f64""" + # Similar pattern + # TODO: Implement + pass + + def _coerce_to_i64(self, val: Any) -> ir.Value: + """Coerce various types to i64""" + if isinstance(val, ir.Constant) and val.type == self.i64: + return val + elif hasattr(val, 'type') and val.type.is_pointer: + # ptr to int + return self.builder.ptrtoint(val, self.i64) + elif hasattr(val, 'type') and isinstance(val.type, ir.IntType): + # int to int (extend/trunc) + if val.type.width < 64: + return self.builder.zext(val, self.i64) + elif val.type.width > 64: + return self.builder.trunc(val, self.i64) + return val + else: + # Default zero + return ir.Constant(self.i64, 0) \ No newline at end of file diff --git a/src/llvm_py/test_simple.py b/src/llvm_py/test_simple.py new file mode 100644 index 00000000..e7aa6658 --- /dev/null +++ b/src/llvm_py/test_simple.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +""" +Simple test for Nyash LLVM Python backend +Tests basic MIR -> LLVM compilation +""" + +import json +from llvm_builder import NyashLLVMBuilder + +# Simple MIR test case: function that returns 42 +test_mir = { + "functions": { + "main": { + "name": "main", + "params": [], + "return_type": "i64", + "entry_block": 0, + "blocks": { + "0": { + "instructions": [ + { + "kind": "Const", + "dst": 0, + "value": {"type": "i64", "value": 42} + } + ], + "terminator": { + "kind": "Return", + "value": 0 + } + } + } + } + } +} + +def test_basic(): + """Test basic MIR -> LLVM compilation""" + builder = NyashLLVMBuilder() + + # Generate LLVM IR + llvm_ir = builder.build_from_mir(test_mir) + print("Generated LLVM IR:") + print(llvm_ir) + + # Compile to object file + builder.compile_to_object("test_simple.o") + print("\nCompiled to test_simple.o") + +if __name__ == "__main__": + test_basic() \ No newline at end of file