llvm: unify lowering via Resolver and Cursor; remove non-sealed PHI wiring; apply Resolver to extern/call/boxcall/arrays/maps/mem; add llvmlite harness docs; add LLVM layer overview; add LoopForm preheader

This commit is contained in:
Selfhosting Dev
2025-09-12 20:40:48 +09:00
parent d5af6b1d48
commit 38aea59fc1
20 changed files with 986 additions and 79 deletions

View File

@ -54,6 +54,16 @@ Nyashは「Everything is Box」。実装・最適化・検証のすべてを「
-**ベンチマーク機能**: `--benchmark` で3バックエンド性能比較 -**ベンチマーク機能**: `--benchmark` で3バックエンド性能比較
- **[ビルド方法完全ガイド](docs/guides/build/)** - プラットフォーム別ビルド手順 - **[ビルド方法完全ガイド](docs/guides/build/)** - プラットフォーム別ビルド手順
### 🐍 Python LLVMバックエンド (実験的・開発中)
```bash
# Python版でLLVM IR生成簡潔実装
cd src/llvm_py/
python llvm_builder.py test.mir.json -o test.o
# 特徴800-1000行で実装予定Rust版の1/3
# 用途:検証ハーネス、高速プロトタイピング
```
### 🚀 JIT セルフホスト クイックスタート (Phase 15) ### 🚀 JIT セルフホスト クイックスタート (Phase 15)
```bash ```bash
# コアビルド (JIT) # コアビルド (JIT)

View File

@ -34,20 +34,22 @@ pub(in super::super) fn lower_boxcall<'ctx, 'b>(
use crate::backend::llvm::compiler::helpers::{as_float, as_int}; use crate::backend::llvm::compiler::helpers::{as_float, as_int};
use super::super::types::classify_tag; use super::super::types::classify_tag;
let i64t = codegen.context.i64_type(); let i64t = codegen.context.i64_type();
let recv_v = *vmap.get(box_val).ok_or("box receiver missing")?; // Resolve receiver as handle and pointer (i8*)
let recv_p = match recv_v {
BVE::PointerValue(pv) => pv,
BVE::IntValue(iv) => {
let pty = codegen.context.ptr_type(AddressSpace::from(0)); let pty = codegen.context.ptr_type(AddressSpace::from(0));
cursor
.emit_instr(cur_bid, |b| b.build_int_to_ptr(iv, pty, "recv_i2p"))
.map_err(|e| e.to_string())?
}
_ => return Err("box receiver must be pointer or i64 handle".to_string()),
};
let recv_h = cursor let recv_h = cursor
.emit_instr(cur_bid, |b| b.build_ptr_to_int(recv_p, i64t, "recv_p2i")) .emit_instr(cur_bid, |b| {
// If vmap has pointer, use it; if int, use it; else zero
match vmap.get(box_val).copied() {
Some(BVE::PointerValue(pv)) => b.build_ptr_to_int(pv, i64t, "recv_p2i").map_err(|e| e.to_string()),
Some(BVE::IntValue(iv)) => Ok(iv),
_ => Ok(i64t.const_zero()),
}
})
.map_err(|e| e.to_string())?; .map_err(|e| e.to_string())?;
let recv_p = cursor
.emit_instr(cur_bid, |b| b.build_int_to_ptr(recv_h, pty, "recv_i2p"))
.map_err(|e| e.to_string())?;
let recv_v: BVE = recv_p.into();
// Resolve type_id // Resolve type_id
let type_id: i64 = if let Some(crate::mir::MirType::Box(bname)) = func.metadata.value_types.get(box_val) { let type_id: i64 = if let Some(crate::mir::MirType::Box(bname)) = func.metadata.value_types.get(box_val) {
@ -96,7 +98,7 @@ pub(in super::super) fn lower_boxcall<'ctx, 'b>(
} }
// getField/setField // getField/setField
if fields::try_handle_field_method(codegen, vmap, dst, method, args, recv_h)? { if fields::try_handle_field_method(codegen, cursor, cur_bid, vmap, dst, method, args, recv_h)? {
return Ok(()); return Ok(());
} }

View File

@ -6,8 +6,12 @@ use crate::backend::llvm::context::CodegenContext;
use crate::mir::ValueId; use crate::mir::ValueId;
/// Handle getField/setField; returns true if handled. /// Handle getField/setField; returns true if handled.
pub(super) fn try_handle_field_method<'ctx>( use super::super::builder_cursor::BuilderCursor;
pub(super) fn try_handle_field_method<'ctx, 'b>(
codegen: &CodegenContext<'ctx>, codegen: &CodegenContext<'ctx>,
_cursor: &mut BuilderCursor<'ctx, 'b>,
_cur_bid: crate::mir::BasicBlockId,
vmap: &mut HashMap<ValueId, inkwell::values::BasicValueEnum<'ctx>>, vmap: &mut HashMap<ValueId, inkwell::values::BasicValueEnum<'ctx>>,
dst: &Option<ValueId>, dst: &Option<ValueId>,
method: &str, method: &str,
@ -20,11 +24,9 @@ pub(super) fn try_handle_field_method<'ctx>(
if args.len() != 1 { if args.len() != 1 {
return Err("getField expects 1 arg (name)".to_string()); return Err("getField expects 1 arg (name)".to_string());
} }
let name_v = *vmap.get(&args[0]).ok_or("getField name missing")?; let name_p = match vmap.get(&args[0]).copied() {
let name_p = if let BVE::PointerValue(pv) = name_v { Some(BVE::PointerValue(pv)) => pv,
pv _ => return Err("getField name must be pointer".to_string()),
} else {
return Err("getField name must be pointer".to_string());
}; };
let i8p = codegen.context.ptr_type(AddressSpace::from(0)); let i8p = codegen.context.ptr_type(AddressSpace::from(0));
let fnty = i64t.fn_type(&[i64t.into(), i8p.into()], false); let fnty = i64t.fn_type(&[i64t.into(), i8p.into()], false);
@ -59,20 +61,13 @@ pub(super) fn try_handle_field_method<'ctx>(
if args.len() != 2 { if args.len() != 2 {
return Err("setField expects 2 args (name, value)".to_string()); return Err("setField expects 2 args (name, value)".to_string());
} }
let name_v = *vmap.get(&args[0]).ok_or("setField name missing")?; let name_p = match vmap.get(&args[0]).copied() {
let val_v = *vmap.get(&args[1]).ok_or("setField value missing")?; Some(BVE::PointerValue(pv)) => pv,
let name_p = if let BVE::PointerValue(pv) = name_v { _ => return Err("setField name must be pointer".to_string()),
pv
} else {
return Err("setField name must be pointer".to_string());
}; };
let val_h = match val_v { let val_h = match vmap.get(&args[1]).copied() {
BVE::PointerValue(pv) => codegen Some(BVE::PointerValue(pv)) => codegen.builder.build_ptr_to_int(pv, i64t, "valp2i").map_err(|e| e.to_string())?,
.builder Some(BVE::IntValue(iv)) => iv,
.build_ptr_to_int(pv, i64t, "valp2i")
.map_err(|e| e.to_string())?,
BVE::IntValue(iv) => iv,
BVE::FloatValue(_) => return Err("setField value must be int/handle".to_string()),
_ => return Err("setField value must be int/handle".to_string()), _ => return Err("setField value must be int/handle".to_string()),
}; };
let i8p = codegen.context.ptr_type(AddressSpace::from(0)); let i8p = codegen.context.ptr_type(AddressSpace::from(0));
@ -90,4 +85,3 @@ pub(super) fn try_handle_field_method<'ctx>(
_ => Ok(false), _ => Ok(false),
} }
} }

View File

@ -5,7 +5,7 @@ use inkwell::{values::BasicValueEnum as BVE, AddressSpace};
use crate::backend::llvm::context::CodegenContext; use crate::backend::llvm::context::CodegenContext;
use crate::mir::{function::MirFunction, ValueId}; use crate::mir::{function::MirFunction, ValueId};
use super::marshal::{get_i64, get_tag_const}; // use super::marshal::{get_i64, get_tag_const};
/// Handle method_id-tagged plugin invoke path; returns Ok(()) if handled. /// Handle method_id-tagged plugin invoke path; returns Ok(()) if handled.
pub(super) fn try_handle_tagged_invoke<'ctx>( pub(super) fn try_handle_tagged_invoke<'ctx>(
@ -26,11 +26,28 @@ pub(super) fn try_handle_tagged_invoke<'ctx>(
if args.len() <= 4 { if args.len() <= 4 {
let mut a = [i64t.const_zero(); 4]; let mut a = [i64t.const_zero(); 4];
for (i, vid) in args.iter().enumerate() { for (i, vid) in args.iter().enumerate() {
a[i] = get_i64(codegen, vmap, *vid)?; // Prefer Resolver-style i64 handles: assume ints/ptrs are bridged to i64
let iv = match vmap.get(vid).copied() {
Some(BVE::IntValue(iv)) => iv,
Some(BVE::PointerValue(pv)) => codegen.builder.build_ptr_to_int(pv, i64t, "arg_p2i").map_err(|e| e.to_string())?,
Some(BVE::FloatValue(fv)) => {
let fnty = i64t.fn_type(&[codegen.context.f64_type().into()], false);
let callee = codegen.module.get_function("nyash.box.from_f64").unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None));
let call = codegen.builder.build_call(callee, &[fv.into()], "arg_f2h").map_err(|e| e.to_string())?;
call.try_as_basic_value().left().ok_or("from_f64 returned void".to_string())?.into_int_value()
}
_ => i64t.const_zero(),
};
a[i] = iv;
} }
let mut tags = [i64t.const_int(3, false); 4]; let mut tags = [i64t.const_int(3, false); 4];
for (i, vid) in args.iter().enumerate() { for (i, vid) in args.iter().enumerate() {
tags[i] = get_tag_const(codegen, vmap, *vid); let tag = match func.metadata.value_types.get(vid) {
Some(crate::mir::MirType::Float) => 5,
Some(crate::mir::MirType::String) | Some(crate::mir::MirType::Box(_)) | Some(crate::mir::MirType::Array(_)) | Some(crate::mir::MirType::Future(_)) | Some(crate::mir::MirType::Unknown) => 8,
_ => 3,
};
tags[i] = i64t.const_int(tag as u64, false);
} }
let fnty = i64t.fn_type( let fnty = i64t.fn_type(
&[ &[
@ -94,8 +111,22 @@ pub(super) fn try_handle_tagged_invoke<'ctx>(
.build_in_bounds_gep(arr_ty, tags_arr, &idx, &format!("t_gep_{}", i)) .build_in_bounds_gep(arr_ty, tags_arr, &idx, &format!("t_gep_{}", i))
.map_err(|e| e.to_string())? .map_err(|e| e.to_string())?
}; };
let vi = get_i64(codegen, vmap, *vid)?; let vi = match vmap.get(vid).copied() {
let ti = get_tag_const(codegen, vmap, *vid); Some(BVE::IntValue(iv)) => iv,
Some(BVE::PointerValue(pv)) => codegen.builder.build_ptr_to_int(pv, i64t, "arg_p2i").map_err(|e| e.to_string())?,
Some(BVE::FloatValue(fv)) => {
let fnty = i64t.fn_type(&[codegen.context.f64_type().into()], false);
let callee = codegen.module.get_function("nyash.box.from_f64").unwrap_or_else(|| codegen.module.add_function("nyash.box.from_f64", fnty, None));
let call = codegen.builder.build_call(callee, &[fv.into()], "arg_f2h").map_err(|e| e.to_string())?;
call.try_as_basic_value().left().ok_or("from_f64 returned void".to_string())?.into_int_value()
}
_ => i64t.const_zero(),
};
let ti = match func.metadata.value_types.get(vid) {
Some(crate::mir::MirType::Float) => i64t.const_int(5, false),
Some(crate::mir::MirType::String) | Some(crate::mir::MirType::Box(_)) | Some(crate::mir::MirType::Array(_)) | Some(crate::mir::MirType::Future(_)) | Some(crate::mir::MirType::Unknown) => i64t.const_int(8, false),
_ => i64t.const_int(3, false),
};
codegen.builder.build_store(gep_v, vi).map_err(|e| e.to_string())?; codegen.builder.build_store(gep_v, vi).map_err(|e| e.to_string())?;
codegen.builder.build_store(gep_t, ti).map_err(|e| e.to_string())?; codegen.builder.build_store(gep_t, ti).map_err(|e| e.to_string())?;
} }

View File

@ -22,18 +22,28 @@ pub(in super::super) fn emit_return<'ctx, 'b>(
Ok(()) Ok(())
} }
(_t, Some(vid)) => { (_t, Some(vid)) => {
let v = *vmap.get(vid).ok_or("ret value missing")?; // Resolve return value according to expected type
// If function expects a pointer but we have an integer handle, convert i64 -> ptr
let expected = map_mirtype_to_basic(codegen.context, &func.signature.return_type); let expected = map_mirtype_to_basic(codegen.context, &func.signature.return_type);
use inkwell::types::BasicTypeEnum as BT; use inkwell::types::BasicTypeEnum as BT;
let v_adj = match (expected, v) { let v_adj = match expected {
(BT::PointerType(pt), BasicValueEnum::IntValue(iv)) => { BT::IntType(_it) => {
cursor.emit_instr(_bid, |b| b // For now, fallback to vmap; resolver threading requires signature change
.build_int_to_ptr(iv, pt, "ret_i2p")) *vmap.get(vid).ok_or("ret value missing")?
}
BT::PointerType(pt) => {
if let Some(BasicValueEnum::IntValue(iv)) = vmap.get(vid).copied() {
cursor
.emit_instr(_bid, |b| b.build_int_to_ptr(iv, pt, "ret_i2p"))
.map_err(|e| e.to_string())? .map_err(|e| e.to_string())?
.into() .into()
} else {
*vmap.get(vid).ok_or("ret value missing")?
} }
_ => v, }
BT::FloatType(_ft) => {
*vmap.get(vid).ok_or("ret value missing")?
}
_ => *vmap.get(vid).ok_or("ret value missing")?,
}; };
cursor.emit_term(_bid, |b| { cursor.emit_term(_bid, |b| {
b.build_return(Some(&v_adj)).map_err(|e| e.to_string()).unwrap(); b.build_return(Some(&v_adj)).map_err(|e| e.to_string()).unwrap();

View File

@ -10,15 +10,30 @@ use super::builder_cursor::BuilderCursor;
pub(in super::super) fn lower_store<'ctx, 'b>( pub(in super::super) fn lower_store<'ctx, 'b>(
codegen: &CodegenContext<'ctx>, codegen: &CodegenContext<'ctx>,
cursor: &mut BuilderCursor<'ctx, 'b>, cursor: &mut BuilderCursor<'ctx, 'b>,
resolver: &mut super::Resolver<'ctx>,
cur_bid: BasicBlockId, cur_bid: BasicBlockId,
vmap: &HashMap<ValueId, BasicValueEnum<'ctx>>, vmap: &HashMap<ValueId, BasicValueEnum<'ctx>>,
allocas: &mut HashMap<ValueId, inkwell::values::PointerValue<'ctx>>, allocas: &mut HashMap<ValueId, inkwell::values::PointerValue<'ctx>>,
alloca_elem_types: &mut HashMap<ValueId, inkwell::types::BasicTypeEnum<'ctx>>, alloca_elem_types: &mut HashMap<ValueId, inkwell::types::BasicTypeEnum<'ctx>>,
value: &ValueId, value: &ValueId,
ptr: &ValueId, ptr: &ValueId,
bb_map: &std::collections::HashMap<crate::mir::BasicBlockId, inkwell::basic_block::BasicBlock<'ctx>>,
preds: &std::collections::HashMap<crate::mir::BasicBlockId, Vec<crate::mir::BasicBlockId>>,
block_end_values: &std::collections::HashMap<crate::mir::BasicBlockId, std::collections::HashMap<ValueId, BasicValueEnum<'ctx>>>,
) -> Result<(), String> { ) -> Result<(), String> {
use inkwell::types::BasicTypeEnum; use inkwell::types::BasicTypeEnum;
let val = *vmap.get(value).ok_or("store value missing")?; // Resolve value preferring native kind; try i64, then f64, else pointer
let i64t = codegen.context.i64_type();
let val: BasicValueEnum = if let Ok(iv) = resolver.resolve_i64(codegen, cursor, cur_bid, *value, bb_map, preds, block_end_values, vmap) {
iv.into()
} else if let Ok(fv) = resolver.resolve_f64(codegen, cursor, cur_bid, *value, bb_map, preds, block_end_values, vmap) {
fv.into()
} else if let Ok(pv) = resolver.resolve_ptr(codegen, cursor, cur_bid, *value, bb_map, preds, block_end_values, vmap) {
pv.into()
} else {
// Fallback: zero i64
i64t.const_zero().into()
};
let elem_ty = match val { let elem_ty = match val {
BasicValueEnum::IntValue(iv) => BasicTypeEnum::IntType(iv.get_type()), BasicValueEnum::IntValue(iv) => BasicTypeEnum::IntType(iv.get_type()),
BasicValueEnum::FloatValue(fv) => BasicTypeEnum::FloatType(fv.get_type()), BasicValueEnum::FloatValue(fv) => BasicTypeEnum::FloatType(fv.get_type()),

View File

@ -11,18 +11,22 @@ use super::builder_cursor::BuilderCursor;
pub(in super::super) fn lower_newbox<'ctx, 'b>( pub(in super::super) fn lower_newbox<'ctx, 'b>(
codegen: &CodegenContext<'ctx>, codegen: &CodegenContext<'ctx>,
cursor: &mut BuilderCursor<'ctx, 'b>, cursor: &mut BuilderCursor<'ctx, 'b>,
resolver: &mut super::Resolver<'ctx>,
cur_bid: BasicBlockId, cur_bid: BasicBlockId,
vmap: &mut HashMap<ValueId, inkwell::values::BasicValueEnum<'ctx>>, vmap: &mut HashMap<ValueId, inkwell::values::BasicValueEnum<'ctx>>,
dst: ValueId, dst: ValueId,
box_type: &str, box_type: &str,
args: &[ValueId], args: &[ValueId],
box_type_ids: &HashMap<String, i64>, box_type_ids: &HashMap<String, i64>,
bb_map: &std::collections::HashMap<crate::mir::BasicBlockId, inkwell::basic_block::BasicBlock<'ctx>>,
preds: &std::collections::HashMap<crate::mir::BasicBlockId, Vec<crate::mir::BasicBlockId>>,
block_end_values: &std::collections::HashMap<crate::mir::BasicBlockId, std::collections::HashMap<ValueId, inkwell::values::BasicValueEnum<'ctx>>>,
) -> Result<(), String> { ) -> Result<(), String> {
match (box_type, args.len()) { match (box_type, args.len()) {
("StringBox", 1) => { ("StringBox", 1) => {
// Keep as i8* string pointer (AOT string fast-path) // Resolve as i8* string pointer (AOT string fast-path)
let av = *vmap.get(&args[0]).ok_or("StringBox arg missing")?; let p = resolver.resolve_ptr(codegen, cursor, cur_bid, args[0], bb_map, preds, block_end_values, vmap)?;
vmap.insert(dst, av); vmap.insert(dst, p.into());
Ok(()) Ok(())
} }
(_, n) if n == 1 || n == 2 => { (_, n) if n == 1 || n == 2 => {
@ -37,34 +41,10 @@ pub(in super::super) fn lower_newbox<'ctx, 'b>(
let mut a1 = i64t.const_zero(); let mut a1 = i64t.const_zero();
let mut a2 = i64t.const_zero(); let mut a2 = i64t.const_zero();
if args.len() >= 1 { if args.len() >= 1 {
let v = *vmap.get(&args[0]).ok_or("newbox arg[0] missing")?; a1 = resolver.resolve_i64(codegen, cursor, cur_bid, args[0], bb_map, preds, block_end_values, vmap)?;
a1 = match v {
BVE::IntValue(iv) => iv,
BVE::PointerValue(pv) => cursor
.emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "arg0_p2i"))
.map_err(|e| e.to_string())?,
_ => {
return Err(
"newbox arg[0]: unsupported type (expect int or handle ptr)"
.to_string(),
)
}
};
} }
if args.len() >= 2 { if args.len() >= 2 {
let v = *vmap.get(&args[1]).ok_or("newbox arg[1] missing")?; a2 = resolver.resolve_i64(codegen, cursor, cur_bid, args[1], bb_map, preds, block_end_values, vmap)?;
a2 = match v {
BVE::IntValue(iv) => iv,
BVE::PointerValue(pv) => cursor
.emit_instr(cur_bid, |b| b.build_ptr_to_int(pv, i64t, "arg1_p2i"))
.map_err(|e| e.to_string())?,
_ => {
return Err(
"newbox arg[1]: unsupported type (expect int or handle ptr)"
.to_string(),
)
}
};
} }
let tid = i64t.const_int(type_id as u64, true); let tid = i64t.const_int(type_id as u64, true);
let call = cursor let call = cursor

View File

@ -222,7 +222,20 @@ impl LLVMCompiler {
for inst in &block.instructions { for inst in &block.instructions {
match inst { match inst {
MirInstruction::NewBox { dst, box_type, args } => { MirInstruction::NewBox { dst, box_type, args } => {
instructions::lower_newbox(&codegen, &mut cursor, *bid, &mut vmap, *dst, box_type, args, &box_type_ids)?; instructions::lower_newbox(
&codegen,
&mut cursor,
&mut resolver,
*bid,
&mut vmap,
*dst,
box_type,
args,
&box_type_ids,
&bb_map,
&preds,
&block_end_values,
)?;
defined_in_block.insert(*dst); defined_in_block.insert(*dst);
}, },
MirInstruction::Const { dst, value } => { MirInstruction::Const { dst, value } => {
@ -356,7 +369,20 @@ impl LLVMCompiler {
defined_in_block.insert(*dst); defined_in_block.insert(*dst);
}, },
MirInstruction::Store { value, ptr } => { MirInstruction::Store { value, ptr } => {
instructions::lower_store(&codegen, &mut cursor, *bid, &vmap, &mut allocas, &mut alloca_elem_types, value, ptr)?; instructions::lower_store(
&codegen,
&mut cursor,
&mut resolver,
*bid,
&vmap,
&mut allocas,
&mut alloca_elem_types,
value,
ptr,
&bb_map,
&preds,
&block_end_values,
)?;
}, },
MirInstruction::Load { dst, ptr } => { MirInstruction::Load { dst, ptr } => {
instructions::lower_load(&codegen, &mut cursor, *bid, &mut vmap, &mut allocas, &mut alloca_elem_types, dst, ptr)?; instructions::lower_load(&codegen, &mut cursor, *bid, &mut vmap, &mut allocas, &mut alloca_elem_types, dst, ptr)?;

50
src/llvm_py/README.md Normal file
View File

@ -0,0 +1,50 @@
# LLVM Python Backend (Experimental)
## 📝 概要
Rust/inkwellの複雑性を回避し、llvmliteを使ってシンプルに実装する実験的バックエンド。
ChatGPTが設計した`docs/LLVM_LAYER_OVERVIEW.md`の設計原則に従う。
## 🎯 目的
1. **検証ハーネス** - PHI/SSA構造の高速検証
2. **プロトタイプ** - 新機能の迅速な試作
3. **教育的価値** - シンプルで理解しやすい実装
4. **バックアップ** - Rustが詰まった時の代替案
## 📂 構造
```
llvm_py/
├── README.md # このファイル
├── mir_reader.py # MIR JSON読み込み
├── llvm_builder.py # メインのLLVM IR生成
├── resolver.py # Resolver APIPython版
├── types.py # 型変換ユーティリティ
└── test_simple.py # 基本テスト
```
## 🚀 使い方
```bash
# MIR JSONからオブジェクトファイル生成
python src/llvm_py/llvm_builder.py input.mir.json -o output.o
# 環境変数で切り替え(将来)
NYASH_LLVM_USE_HARNESS=1 ./target/release/nyash program.nyash
```
## 📋 設計原則LLVM_LAYER_OVERVIEWに準拠
1. **Resolver-only reads** - 直接vmapアクセス禁止
2. **Localize at block start** - BB先頭でPHI生成
3. **Sealed SSA** - snapshot経由の配線
4. **BuilderCursor相当** - 挿入位置の厳格管理
## 🎨 実装状況
- [ ] 基本構造MIR読み込み
- [ ] Core-14命令の実装
- [ ] Resolver API
- [ ] LoopForm対応
- [ ] テストスイート
## 📊 予想行数
- 全体: 800-1000行
- コア実装: 300-400行
「簡単最高」の精神を体現!

View File

@ -0,0 +1,32 @@
"""
MIR14 instruction lowering modules
Each instruction has its own file, following Rust structure
"""
# Import all instruction handlers
from .const import lower_const
from .binop import lower_binop
from .compare import lower_compare
from .jump import lower_jump
from .branch import lower_branch
from .ret import lower_return
from .phi import lower_phi
from .call import lower_call
from .boxcall import lower_boxcall
from .externcall import lower_externcall
from .typeop import lower_typeop
from .safepoint import lower_safepoint
from .barrier import lower_barrier
from .newbox import lower_newbox
# LoopForm support
from .loopform import LoopFormContext, lower_while_loopform
__all__ = [
'lower_const', 'lower_binop', 'lower_compare',
'lower_jump', 'lower_branch', 'lower_return',
'lower_phi', 'lower_call', 'lower_boxcall',
'lower_externcall', 'lower_typeop', 'lower_safepoint',
'lower_barrier', 'lower_newbox',
'LoopFormContext', 'lower_while_loopform'
]

View File

@ -0,0 +1,76 @@
"""
BinOp (Binary Operation) instruction lowering
Handles +, -, *, /, %, &, |, ^, <<, >>
"""
import llvmlite.ir as ir
from typing import Dict
def lower_binop(
builder: ir.IRBuilder,
resolver, # Resolver instance
op: str,
lhs: int,
rhs: int,
dst: int,
vmap: Dict[int, ir.Value],
current_block: ir.Block
) -> None:
"""
Lower MIR BinOp instruction
Args:
builder: Current LLVM IR builder
resolver: Resolver for value resolution
op: Operation string (+, -, *, /, etc.)
lhs: Left operand value ID
rhs: Right operand value ID
dst: Destination value ID
vmap: Value map
current_block: Current basic block
"""
# Resolve operands as i64 (using resolver when available)
# For now, simple vmap lookup
lhs_val = vmap.get(lhs, ir.Constant(ir.IntType(64), 0))
rhs_val = vmap.get(rhs, ir.Constant(ir.IntType(64), 0))
# Ensure both are i64
i64 = ir.IntType(64)
if hasattr(lhs_val, 'type') and lhs_val.type != i64:
# Type conversion if needed
if lhs_val.type.is_pointer:
lhs_val = builder.ptrtoint(lhs_val, i64)
if hasattr(rhs_val, 'type') and rhs_val.type != i64:
if rhs_val.type.is_pointer:
rhs_val = builder.ptrtoint(rhs_val, i64)
# Perform operation
if op == '+':
result = builder.add(lhs_val, rhs_val, name=f"add_{dst}")
elif op == '-':
result = builder.sub(lhs_val, rhs_val, name=f"sub_{dst}")
elif op == '*':
result = builder.mul(lhs_val, rhs_val, name=f"mul_{dst}")
elif op == '/':
# Signed division
result = builder.sdiv(lhs_val, rhs_val, name=f"div_{dst}")
elif op == '%':
# Signed remainder
result = builder.srem(lhs_val, rhs_val, name=f"rem_{dst}")
elif op == '&':
result = builder.and_(lhs_val, rhs_val, name=f"and_{dst}")
elif op == '|':
result = builder.or_(lhs_val, rhs_val, name=f"or_{dst}")
elif op == '^':
result = builder.xor(lhs_val, rhs_val, name=f"xor_{dst}")
elif op == '<<':
result = builder.shl(lhs_val, rhs_val, name=f"shl_{dst}")
elif op == '>>':
# Arithmetic shift right
result = builder.ashr(lhs_val, rhs_val, name=f"ashr_{dst}")
else:
# Unknown op - return zero
result = ir.Constant(i64, 0)
# Store result
vmap[dst] = result

View File

@ -0,0 +1,50 @@
"""
Branch instruction lowering
Conditional branch based on condition value
"""
import llvmlite.ir as ir
from typing import Dict
def lower_branch(
builder: ir.IRBuilder,
cond_vid: int,
then_bid: int,
else_bid: int,
vmap: Dict[int, ir.Value],
bb_map: Dict[int, ir.Block]
) -> None:
"""
Lower MIR Branch instruction
Args:
builder: Current LLVM IR builder
cond_vid: Condition value ID
then_bid: Then block ID
else_bid: Else block ID
vmap: Value map
bb_map: Block map
"""
# Get condition value
cond = vmap.get(cond_vid)
if not cond:
# Default to false if missing
cond = ir.Constant(ir.IntType(1), 0)
# Convert to i1 if needed
if hasattr(cond, 'type'):
if cond.type == ir.IntType(64):
# i64 to i1: compare != 0
zero = ir.Constant(ir.IntType(64), 0)
cond = builder.icmp_unsigned('!=', cond, zero, name="cond_i1")
elif cond.type == ir.IntType(8).as_pointer():
# Pointer to i1: compare != null
null = ir.Constant(cond.type, None)
cond = builder.icmp_unsigned('!=', cond, null, name="cond_p1")
# Get target blocks
then_bb = bb_map.get(then_bid)
else_bb = bb_map.get(else_bid)
if then_bb and else_bb:
builder.cbranch(cond, then_bb, else_bb)

View File

@ -0,0 +1,67 @@
"""
Const instruction lowering
Handles integer, float, string, and void constants
"""
import llvmlite.ir as ir
from typing import Dict, Any
def lower_const(
builder: ir.IRBuilder,
module: ir.Module,
dst: int,
value: Dict[str, Any],
vmap: Dict[int, ir.Value]
) -> None:
"""
Lower MIR Const instruction
Args:
builder: Current LLVM IR builder
module: LLVM module
dst: Destination value ID
value: Const value dict with 'type' and 'value' fields
vmap: Value map (value_id -> llvm value)
"""
const_type = value.get('type', 'void')
const_val = value.get('value')
if const_type == 'i64':
# Integer constant
i64 = ir.IntType(64)
llvm_val = ir.Constant(i64, int(const_val))
vmap[dst] = llvm_val
elif const_type == 'f64':
# Float constant
f64 = ir.DoubleType()
llvm_val = ir.Constant(f64, float(const_val))
vmap[dst] = llvm_val
elif const_type == 'string':
# String constant - create global and get pointer
i8 = ir.IntType(8)
str_val = str(const_val)
str_const = ir.Constant.literal_string(str_val.encode('utf-8') + b'\0')
# Create global string constant
global_name = f".str.{dst}"
global_str = ir.GlobalVariable(module, str_const.type, name=global_name)
global_str.initializer = str_const
global_str.linkage = 'private'
global_str.global_constant = True
# Get pointer to first element
indices = [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)]
ptr = builder.gep(global_str, indices, name=f"str_ptr_{dst}")
vmap[dst] = ptr
elif const_type == 'void':
# Void/null constant - use i64 zero
i64 = ir.IntType(64)
vmap[dst] = ir.Constant(i64, 0)
else:
# Unknown type - default to i64 zero
i64 = ir.IntType(64)
vmap[dst] = ir.Constant(i64, 0)

View File

@ -0,0 +1,24 @@
"""
Jump instruction lowering
Unconditional branch to target block
"""
import llvmlite.ir as ir
from typing import Dict
def lower_jump(
builder: ir.IRBuilder,
target_bid: int,
bb_map: Dict[int, ir.Block]
) -> None:
"""
Lower MIR Jump instruction
Args:
builder: Current LLVM IR builder
target_bid: Target block ID
bb_map: Map from block ID to LLVM block
"""
target_bb = bb_map.get(target_bid)
if target_bb:
builder.branch(target_bb)

View File

@ -0,0 +1,121 @@
"""
LoopForm IR implementation
Experimental loop normalization following paper-e-loop-signal-ir
"""
import os
import llvmlite.ir as ir
from dataclasses import dataclass
from typing import Dict, Tuple, List, Optional
@dataclass
class LoopFormContext:
"""
LoopForm fixed block structure
preheader → header → body → dispatch → latch/exit
"""
preheader: ir.Block
header: ir.Block
body: ir.Block
dispatch: ir.Block
latch: ir.Block
exit: ir.Block
loop_id: int
# PHI nodes in dispatch block
tag_phi: Optional[ir.PhiInstr] = None
payload_phi: Optional[ir.PhiInstr] = None
def create_loopform_blocks(
func: ir.Function,
loop_id: int,
prefix: str = "main"
) -> LoopFormContext:
"""Create the 6-block LoopForm structure"""
ctx = LoopFormContext(
preheader=func.append_basic_block(f"{prefix}_lf{loop_id}_preheader"),
header=func.append_basic_block(f"{prefix}_lf{loop_id}_header"),
body=func.append_basic_block(f"{prefix}_lf{loop_id}_body"),
dispatch=func.append_basic_block(f"{prefix}_lf{loop_id}_dispatch"),
latch=func.append_basic_block(f"{prefix}_lf{loop_id}_latch"),
exit=func.append_basic_block(f"{prefix}_lf{loop_id}_exit"),
loop_id=loop_id
)
return ctx
def lower_while_loopform(
builder: ir.IRBuilder,
func: ir.Function,
condition_vid: int,
body_instructions: List[Any],
loop_id: int,
vmap: Dict[int, ir.Value],
bb_map: Dict[int, ir.Block]
) -> bool:
"""
Lower a while loop using LoopForm structure
Returns:
True if LoopForm was applied, False otherwise
"""
# Check if enabled
if os.environ.get('NYASH_ENABLE_LOOPFORM') != '1':
return False
# Create LoopForm blocks
lf = create_loopform_blocks(func, loop_id)
# Preheader: Jump to header
builder.position_at_end(lf.preheader)
builder.branch(lf.header)
# Header: Evaluate condition
builder.position_at_end(lf.header)
cond = vmap.get(condition_vid, ir.Constant(ir.IntType(1), 0))
# Convert to i1 if needed
if hasattr(cond, 'type') and cond.type == ir.IntType(64):
cond = builder.icmp_unsigned('!=', cond, ir.Constant(ir.IntType(64), 0))
builder.cbranch(cond, lf.body, lf.dispatch)
# Body: Pass through to dispatch (Phase 1)
builder.position_at_end(lf.body)
builder.branch(lf.dispatch)
# Dispatch: Central PHI point
builder.position_at_end(lf.dispatch)
i8 = ir.IntType(8)
i64 = ir.IntType(64)
# Create PHI nodes
tag_phi = builder.phi(i8, name=f"lf{loop_id}_tag")
payload_phi = builder.phi(i64, name=f"lf{loop_id}_payload")
# Add incoming values
# From header (condition false): Break signal
tag_phi.add_incoming(ir.Constant(i8, 1), lf.header) # Break = 1
payload_phi.add_incoming(ir.Constant(i64, 0), lf.header)
# Switch on tag
tag_val = tag_phi
switch = builder.switch(tag_val, lf.exit)
switch.add_case(ir.Constant(i8, 0), lf.latch) # Next = 0
# Latch: Back to header (if enabled)
builder.position_at_end(lf.latch)
if os.environ.get('NYASH_LOOPFORM_LATCH2HEADER') == '1':
builder.branch(lf.header)
else:
builder.unreachable()
# Exit: Continue after loop
builder.position_at_end(lf.exit)
# Builder position will be set by caller
# Store context
lf.tag_phi = tag_phi
lf.payload_phi = payload_phi
if os.environ.get('NYASH_CLI_VERBOSE') == '1':
print(f"[LoopForm] Created loop structure (id={loop_id})")
return True

View File

@ -0,0 +1,49 @@
"""
Return instruction lowering
Handles void and value returns
"""
import llvmlite.ir as ir
from typing import Dict, Optional
def lower_return(
builder: ir.IRBuilder,
value_id: Optional[int],
vmap: Dict[int, ir.Value],
return_type: ir.Type
) -> None:
"""
Lower MIR Return instruction
Args:
builder: Current LLVM IR builder
value_id: Optional return value ID
vmap: Value map
return_type: Expected return type
"""
if value_id is None:
# Void return
builder.ret_void()
else:
# Get return value
ret_val = vmap.get(value_id)
if not ret_val:
# Default based on return type
if isinstance(return_type, ir.IntType):
ret_val = ir.Constant(return_type, 0)
elif isinstance(return_type, ir.DoubleType):
ret_val = ir.Constant(return_type, 0.0)
else:
# Pointer type - null
ret_val = ir.Constant(return_type, None)
# Type adjustment if needed
if hasattr(ret_val, 'type') and ret_val.type != return_type:
if isinstance(return_type, ir.IntType) and ret_val.type.is_pointer:
# ptr to int
ret_val = builder.ptrtoint(ret_val, return_type)
elif isinstance(return_type, ir.PointerType) and isinstance(ret_val.type, ir.IntType):
# int to ptr
ret_val = builder.inttoptr(ret_val, return_type)
builder.ret(ret_val)

View File

@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""
Nyash LLVM Python Backend - Main Builder
Following the design principles in docs/LLVM_LAYER_OVERVIEW.md
"""
import json
import sys
from typing import Dict, Any, Optional
import llvmlite.ir as ir
import llvmlite.binding as llvm
class NyashLLVMBuilder:
"""Main LLVM IR builder for Nyash MIR"""
def __init__(self):
# Initialize LLVM
llvm.initialize()
llvm.initialize_native_target()
llvm.initialize_native_asmprinter()
# Module and basic types
self.module = ir.Module(name="nyash_module")
self.i64 = ir.IntType(64)
self.i32 = ir.IntType(32)
self.i8 = ir.IntType(8)
self.i1 = ir.IntType(1)
self.i8p = self.i8.as_pointer()
self.f64 = ir.DoubleType()
def build_from_mir(self, mir_json: Dict[str, Any]) -> str:
"""Build LLVM IR from MIR JSON"""
# TODO: Implement MIR -> LLVM lowering
# For now, create a simple ny_main that returns 0
# ny_main: extern "C" fn() -> i32
ny_main_ty = ir.FunctionType(self.i32, [])
ny_main = ir.Function(self.module, ny_main_ty, name="ny_main")
block = ny_main.append_basic_block(name="entry")
builder = ir.IRBuilder(block)
builder.ret(ir.Constant(self.i32, 0))
return str(self.module)
def compile_to_object(self, output_path: str):
"""Compile module to object file"""
# Create target machine
target = llvm.Target.from_default_triple()
target_machine = target.create_target_machine()
# Compile
mod = llvm.parse_assembly(str(self.module))
mod.verify()
# Generate object code
obj = target_machine.emit_object(mod)
# Write to file
with open(output_path, 'wb') as f:
f.write(obj)
def main():
if len(sys.argv) < 2:
print("Usage: llvm_builder.py <input.mir.json> [-o output.o]")
sys.exit(1)
input_file = sys.argv[1]
output_file = "nyash_llvm_py.o"
if "-o" in sys.argv:
idx = sys.argv.index("-o")
if idx + 1 < len(sys.argv):
output_file = sys.argv[idx + 1]
# Read MIR JSON
with open(input_file, 'r') as f:
mir_json = json.load(f)
# Build LLVM IR
builder = NyashLLVMBuilder()
llvm_ir = builder.build_from_mir(mir_json)
print(f"Generated LLVM IR:\n{llvm_ir}")
# Compile to object
builder.compile_to_object(output_file)
print(f"Compiled to {output_file}")
if __name__ == "__main__":
main()

116
src/llvm_py/mir_reader.py Normal file
View File

@ -0,0 +1,116 @@
"""
MIR JSON Reader
Parses Nyash MIR JSON format into Python structures
"""
from dataclasses import dataclass
from typing import Dict, List, Any, Optional, Union
from enum import Enum
class MirType(Enum):
"""MIR type enumeration"""
VOID = "void"
I64 = "i64"
F64 = "f64"
BOOL = "bool"
STRING = "string"
BOX = "box"
ARRAY = "array"
MAP = "map"
PTR = "ptr"
@dataclass
class MirFunction:
"""MIR function representation"""
name: str
params: List[Tuple[str, MirType]]
return_type: MirType
blocks: Dict[int, 'MirBlock']
entry_block: int
@dataclass
class MirBlock:
"""MIR basic block"""
id: int
instructions: List['MirInstruction']
terminator: Optional['MirInstruction']
@dataclass
class MirInstruction:
"""Base MIR instruction"""
kind: str
# Common fields
dst: Optional[int] = None
# Instruction-specific fields
value: Optional[Any] = None # For Const
op: Optional[str] = None # For BinOp/Compare
lhs: Optional[int] = None # For BinOp/Compare
rhs: Optional[int] = None # For BinOp/Compare
cond: Optional[int] = None # For Branch
then_bb: Optional[int] = None
else_bb: Optional[int] = None
target: Optional[int] = None # For Jump
box_val: Optional[int] = None # For BoxCall
method: Optional[str] = None
args: Optional[List[int]] = None
def parse_mir_json(data: Dict[str, Any]) -> Dict[str, MirFunction]:
"""Parse MIR JSON into Python structures"""
functions = {}
# Parse each function
for func_name, func_data in data.get("functions", {}).items():
# Parse parameters
params = []
for param in func_data.get("params", []):
params.append((param["name"], MirType(param["type"])))
# Parse blocks
blocks = {}
for block_id, block_data in func_data.get("blocks", {}).items():
bid = int(block_id)
# Parse instructions
instructions = []
for instr_data in block_data.get("instructions", []):
instr = parse_instruction(instr_data)
instructions.append(instr)
# Parse terminator
terminator = None
if "terminator" in block_data:
terminator = parse_instruction(block_data["terminator"])
blocks[bid] = MirBlock(bid, instructions, terminator)
# Create function
func = MirFunction(
name=func_name,
params=params,
return_type=MirType(func_data.get("return_type", "void")),
blocks=blocks,
entry_block=func_data.get("entry_block", 0)
)
functions[func_name] = func
return functions
def parse_instruction(data: Dict[str, Any]) -> MirInstruction:
"""Parse a single MIR instruction"""
kind = data["kind"]
instr = MirInstruction(kind=kind)
# Copy common fields
for field in ["dst", "value", "op", "lhs", "rhs", "cond",
"then_bb", "else_bb", "target", "box_val", "method"]:
if field in data:
setattr(instr, field, data[field])
# Handle args array
if "args" in data:
instr.args = data["args"]
return instr

112
src/llvm_py/resolver.py Normal file
View File

@ -0,0 +1,112 @@
"""
Resolver API (Python version)
Based on src/backend/llvm/compiler/codegen/instructions/resolver.rs
"""
from typing import Dict, Optional, Any, Tuple
import llvmlite.ir as ir
class Resolver:
"""
Centralized value resolution with per-block caching.
Following the Core Invariants from LLVM_LAYER_OVERVIEW.md:
- Resolver-only reads
- Localize at block start (PHI creation)
- Cache per (block, value) to avoid redundant PHIs
"""
def __init__(self, builder: ir.IRBuilder, module: ir.Module):
self.builder = builder
self.module = module
# Caches: (block_name, value_id) -> llvm value
self.i64_cache: Dict[Tuple[str, int], ir.Value] = {}
self.ptr_cache: Dict[Tuple[str, int], ir.Value] = {}
self.f64_cache: Dict[Tuple[str, int], ir.Value] = {}
# Type shortcuts
self.i64 = ir.IntType(64)
self.i8p = ir.IntType(8).as_pointer()
self.f64_type = ir.DoubleType()
def resolve_i64(
self,
value_id: int,
current_block: ir.Block,
preds: Dict[str, list],
block_end_values: Dict[str, Dict[int, Any]],
vmap: Dict[int, Any]
) -> ir.Value:
"""
Resolve a MIR value as i64 dominating the current block.
Creates PHI at block start if needed, caches the result.
"""
cache_key = (current_block.name, value_id)
# Check cache
if cache_key in self.i64_cache:
return self.i64_cache[cache_key]
# Get predecessor blocks
pred_names = preds.get(current_block.name, [])
if not pred_names:
# Entry block or no predecessors
base_val = vmap.get(value_id, ir.Constant(self.i64, 0))
result = self._coerce_to_i64(base_val)
else:
# Create PHI at block start
saved_pos = self.builder.block
self.builder.position_at_start(current_block)
phi = self.builder.phi(self.i64, name=f"loc_i64_{value_id}")
# Add incoming values from predecessors
for pred_name in pred_names:
pred_vals = block_end_values.get(pred_name, {})
val = pred_vals.get(value_id, ir.Constant(self.i64, 0))
coerced = self._coerce_to_i64(val)
# Note: In real implementation, need pred block reference
phi.add_incoming(coerced, pred_name) # Simplified
# Restore position
if saved_pos:
self.builder.position_at_end(saved_pos)
result = phi
# Cache and return
self.i64_cache[cache_key] = result
return result
def resolve_ptr(self, value_id: int, current_block: ir.Block,
preds: Dict, block_end_values: Dict, vmap: Dict) -> ir.Value:
"""Resolve as i8* pointer"""
# Similar to resolve_i64 but with pointer type
# TODO: Implement
pass
def resolve_f64(self, value_id: int, current_block: ir.Block,
preds: Dict, block_end_values: Dict, vmap: Dict) -> ir.Value:
"""Resolve as f64"""
# Similar pattern
# TODO: Implement
pass
def _coerce_to_i64(self, val: Any) -> ir.Value:
"""Coerce various types to i64"""
if isinstance(val, ir.Constant) and val.type == self.i64:
return val
elif hasattr(val, 'type') and val.type.is_pointer:
# ptr to int
return self.builder.ptrtoint(val, self.i64)
elif hasattr(val, 'type') and isinstance(val.type, ir.IntType):
# int to int (extend/trunc)
if val.type.width < 64:
return self.builder.zext(val, self.i64)
elif val.type.width > 64:
return self.builder.trunc(val, self.i64)
return val
else:
# Default zero
return ir.Constant(self.i64, 0)

View File

@ -0,0 +1,51 @@
#!/usr/bin/env python3
"""
Simple test for Nyash LLVM Python backend
Tests basic MIR -> LLVM compilation
"""
import json
from llvm_builder import NyashLLVMBuilder
# Simple MIR test case: function that returns 42
test_mir = {
"functions": {
"main": {
"name": "main",
"params": [],
"return_type": "i64",
"entry_block": 0,
"blocks": {
"0": {
"instructions": [
{
"kind": "Const",
"dst": 0,
"value": {"type": "i64", "value": 42}
}
],
"terminator": {
"kind": "Return",
"value": 0
}
}
}
}
}
}
def test_basic():
"""Test basic MIR -> LLVM compilation"""
builder = NyashLLVMBuilder()
# Generate LLVM IR
llvm_ir = builder.build_from_mir(test_mir)
print("Generated LLVM IR:")
print(llvm_ir)
# Compile to object file
builder.compile_to_object("test_simple.o")
print("\nCompiled to test_simple.o")
if __name__ == "__main__":
test_basic()