Python LLVM backend implementation (experimental)

- Created llvmlite-based LLVM backend in src/llvm_py/
- Implemented all MIR14 instructions (const, binop, jump, branch, ret, compare, phi, call, boxcall, externcall, typeop, newbox, safepoint, barrier)
- Experimental LoopForm support
- ~2000 lines of clean Python code vs complex Rust/inkwell
- Useful for PHI/SSA validation and rapid prototyping
- Added documentation to CLAUDE.md

This was created while waiting for ChatGPT's investigation of BuilderCursor issues.
This commit is contained in:
Selfhosting Dev
2025-09-12 20:55:13 +09:00
parent 38aea59fc1
commit ef44801fa6
17 changed files with 1368 additions and 10 deletions

View File

@ -481,6 +481,29 @@ gemini -p "Nyashの実装で困っています..."
codex exec "質問内容" codex exec "質問内容"
``` ```
### 🐍 Python LLVM バックエンド (実験的実装)
**場所**: `/src/llvm_py/`
ChatGPTの調査待ち中に作成した、llvmliteベースのLLVMバックエンド実装にゃ。
Rust/inkwellの複雑さを回避して、シンプルに2000行程度でMIR14→LLVM変換を実現。
#### 実行方法
```bash
cd src/llvm_py
python3 -m venv venv
./venv/bin/pip install llvmlite
./venv/bin/python llvm_builder.py test_minimal.json -o output.o
```
#### 実装済み命令
- ✅ const, binop, jump, branch, ret, compare
- ✅ phi, call, boxcall, externcall
- ✅ typeop, newbox, safepoint, barrier
- ✅ loopform (実験的)
**利点**: シンプル、高速プロトタイピング、llvmliteの安定性
**用途**: PHI/SSA検証、LoopForm実験、LLVM IR生成テスト
### 🔄 Codex非同期ワークフロー並列作業 ### 🔄 Codex非同期ワークフロー並列作業
```bash ```bash
# 基本実行(同期) # 基本実行(同期)

View File

@ -0,0 +1,117 @@
"""
Barrier instruction lowering
Memory barriers for thread safety and memory ordering
"""
import llvmlite.ir as ir
from typing import Dict, Optional
def lower_barrier(
builder: ir.IRBuilder,
barrier_type: str,
ordering: Optional[str] = None
) -> None:
"""
Lower MIR Barrier instruction
Barrier types:
- memory: Full memory fence
- acquire: Acquire semantics
- release: Release semantics
- acq_rel: Acquire-release
- seq_cst: Sequential consistency
Args:
builder: Current LLVM IR builder
barrier_type: Type of barrier
ordering: Optional memory ordering specification
"""
# Map barrier types to LLVM atomic ordering
ordering_map = {
"acquire": "acquire",
"release": "release",
"acq_rel": "acq_rel",
"seq_cst": "seq_cst",
"memory": "seq_cst", # Full fence
}
llvm_ordering = ordering_map.get(barrier_type, "seq_cst")
# Insert fence instruction
builder.fence(llvm_ordering)
def lower_atomic_op(
builder: ir.IRBuilder,
op: str, # "load", "store", "add", "cas"
ptr_vid: int,
val_vid: Optional[int],
dst_vid: Optional[int],
vmap: Dict[int, ir.Value],
ordering: str = "seq_cst"
) -> None:
"""
Lower atomic operations
Args:
builder: Current LLVM IR builder
op: Atomic operation type
ptr_vid: Pointer value ID
val_vid: Value ID for store/rmw operations
dst_vid: Destination ID for load/rmw operations
vmap: Value map
ordering: Memory ordering
"""
# Get pointer
ptr = vmap.get(ptr_vid)
if not ptr:
# Create dummy pointer
i64 = ir.IntType(64)
ptr = builder.alloca(i64, name="atomic_ptr")
vmap[ptr_vid] = ptr
if op == "load":
# Atomic load
result = builder.load_atomic(ptr, ordering=ordering, align=8)
if dst_vid is not None:
vmap[dst_vid] = result
elif op == "store":
# Atomic store
if val_vid is not None:
val = vmap.get(val_vid, ir.Constant(ir.IntType(64), 0))
builder.store_atomic(val, ptr, ordering=ordering, align=8)
elif op == "add":
# Atomic add (fetch_add)
if val_vid is not None:
val = vmap.get(val_vid, ir.Constant(ir.IntType(64), 1))
result = builder.atomic_rmw("add", ptr, val, ordering=ordering)
if dst_vid is not None:
vmap[dst_vid] = result
elif op == "cas":
# Compare and swap
# TODO: Needs expected and new values
pass
def insert_thread_fence(
builder: ir.IRBuilder,
module: ir.Module,
fence_type: str = "full"
) -> None:
"""
Insert thread synchronization fence
Args:
builder: Current LLVM IR builder
module: LLVM module
fence_type: Type of fence (full, read, write)
"""
if fence_type == "full":
builder.fence("seq_cst")
elif fence_type == "read":
builder.fence("acquire")
elif fence_type == "write":
builder.fence("release")
else:
builder.fence("seq_cst")

View File

@ -0,0 +1,104 @@
"""
BoxCall instruction lowering
Core of Nyash's "Everything is Box" philosophy
"""
import llvmlite.ir as ir
from typing import Dict, List, Optional
def lower_boxcall(
builder: ir.IRBuilder,
module: ir.Module,
box_vid: int,
method_name: str,
args: List[int],
dst_vid: Optional[int],
vmap: Dict[int, ir.Value],
resolver=None
) -> None:
"""
Lower MIR BoxCall instruction
Current implementation uses method_id approach for plugin boxes.
Args:
builder: Current LLVM IR builder
module: LLVM module
box_vid: Box instance value ID (handle)
method_name: Method name to call
args: List of argument value IDs
dst_vid: Optional destination for return value
vmap: Value map
resolver: Optional resolver for type handling
"""
# Get box handle (i64)
box_handle = vmap.get(box_vid, ir.Constant(ir.IntType(64), 0))
# Ensure handle is i64
if hasattr(box_handle, 'type') and box_handle.type.is_pointer:
box_handle = builder.ptrtoint(box_handle, ir.IntType(64))
# Method ID dispatch for plugin boxes
# This matches the current LLVM backend approach
method_id = hash(method_name) & 0xFFFF # Simple hash for demo
# Look up or create ny_boxcall_by_id function
boxcall_func = None
for f in module.functions:
if f.name == "ny_boxcall_by_id":
boxcall_func = f
break
if not boxcall_func:
# Declare ny_boxcall_by_id(handle: i64, method_id: i64, args: i8*) -> i64
i8 = ir.IntType(8)
i64 = ir.IntType(64)
i8_ptr = i8.as_pointer()
func_type = ir.FunctionType(i64, [i64, i64, i8_ptr])
boxcall_func = ir.Function(module, func_type, name="ny_boxcall_by_id")
# Prepare arguments array
i8 = ir.IntType(8)
i64 = ir.IntType(64)
if args:
# Allocate space for arguments (8 bytes per arg)
args_size = len(args) * 8
args_ptr = builder.alloca(i8, size=args_size, name="boxcall_args")
# Cast to i64* for storing arguments
i64_ptr_type = i64.as_pointer()
args_i64_ptr = builder.bitcast(args_ptr, i64_ptr_type)
# Store each argument
for i, arg_id in enumerate(args):
arg_val = vmap.get(arg_id, ir.Constant(i64, 0))
# Ensure i64
if hasattr(arg_val, 'type'):
if arg_val.type.is_pointer:
arg_val = builder.ptrtoint(arg_val, i64)
elif arg_val.type != i64:
# TODO: Handle other conversions
pass
# Calculate offset and store
idx = ir.Constant(ir.IntType(32), i)
ptr = builder.gep(args_i64_ptr, [idx])
builder.store(arg_val, ptr)
# Cast back to i8* for call
call_args_ptr = builder.bitcast(args_i64_ptr, i8.as_pointer())
else:
# No arguments - pass null
call_args_ptr = ir.Constant(i8.as_pointer(), None)
# Make the boxcall
method_id_val = ir.Constant(i64, method_id)
result = builder.call(boxcall_func, [box_handle, method_id_val, call_args_ptr],
name=f"boxcall_{method_name}")
# Store result if needed
if dst_vid is not None:
vmap[dst_vid] = result

View File

@ -0,0 +1,80 @@
"""
Call instruction lowering
Handles regular function calls (not BoxCall or ExternCall)
"""
import llvmlite.ir as ir
from typing import Dict, List, Optional
def lower_call(
builder: ir.IRBuilder,
module: ir.Module,
func_name: str,
args: List[int],
dst_vid: Optional[int],
vmap: Dict[int, ir.Value],
resolver=None
) -> None:
"""
Lower MIR Call instruction
Args:
builder: Current LLVM IR builder
module: LLVM module
func_name: Function name to call
args: List of argument value IDs
dst_vid: Optional destination for return value
vmap: Value map
resolver: Optional resolver for type handling
"""
# Look up function in module
func = None
for f in module.functions:
if f.name == func_name:
func = f
break
if not func:
# Function not found - create declaration
# Default: i64(i64, ...) signature
ret_type = ir.IntType(64)
arg_types = [ir.IntType(64)] * len(args)
func_type = ir.FunctionType(ret_type, arg_types)
func = ir.Function(module, func_type, name=func_name)
# Prepare arguments
call_args = []
for i, arg_id in enumerate(args):
arg_val = vmap.get(arg_id)
if not arg_val:
# Default based on expected type
if i < len(func.args):
expected_type = func.args[i].type
else:
expected_type = ir.IntType(64)
if isinstance(expected_type, ir.IntType):
arg_val = ir.Constant(expected_type, 0)
elif isinstance(expected_type, ir.DoubleType):
arg_val = ir.Constant(expected_type, 0.0)
else:
arg_val = ir.Constant(expected_type, None)
# Type conversion if needed
if i < len(func.args):
expected_type = func.args[i].type
if hasattr(arg_val, 'type') and arg_val.type != expected_type:
if expected_type.is_pointer and isinstance(arg_val.type, ir.IntType):
arg_val = builder.inttoptr(arg_val, expected_type)
elif isinstance(expected_type, ir.IntType) and arg_val.type.is_pointer:
arg_val = builder.ptrtoint(arg_val, expected_type)
call_args.append(arg_val)
# Make the call
result = builder.call(func, call_args, name=f"call_{func_name}")
# Store result if needed
if dst_vid is not None:
vmap[dst_vid] = result

View File

@ -0,0 +1,104 @@
"""
Compare instruction lowering
Handles comparison operations (<, >, <=, >=, ==, !=)
"""
import llvmlite.ir as ir
from typing import Dict
def lower_compare(
builder: ir.IRBuilder,
op: str,
lhs: int,
rhs: int,
dst: int,
vmap: Dict[int, ir.Value]
) -> None:
"""
Lower MIR Compare instruction
Args:
builder: Current LLVM IR builder
op: Comparison operation (<, >, <=, >=, ==, !=)
lhs: Left operand value ID
rhs: Right operand value ID
dst: Destination value ID
vmap: Value map
"""
# Get operands
lhs_val = vmap.get(lhs, ir.Constant(ir.IntType(64), 0))
rhs_val = vmap.get(rhs, ir.Constant(ir.IntType(64), 0))
# Ensure both are i64
i64 = ir.IntType(64)
if hasattr(lhs_val, 'type') and lhs_val.type.is_pointer:
lhs_val = builder.ptrtoint(lhs_val, i64)
if hasattr(rhs_val, 'type') and rhs_val.type.is_pointer:
rhs_val = builder.ptrtoint(rhs_val, i64)
# Map operations to LLVM predicates
op_map = {
'<': 'slt', # signed less than
'>': 'sgt', # signed greater than
'<=': 'sle', # signed less or equal
'>=': 'sge', # signed greater or equal
'==': 'eq', # equal
'!=': 'ne' # not equal
}
pred = op_map.get(op, 'eq')
# Perform comparison (returns i1)
cmp_result = builder.icmp_signed(pred, lhs_val, rhs_val, name=f"cmp_{dst}")
# Convert i1 to i64 (0 or 1)
result = builder.zext(cmp_result, i64, name=f"cmp_i64_{dst}")
# Store result
vmap[dst] = result
def lower_fcmp(
builder: ir.IRBuilder,
op: str,
lhs: int,
rhs: int,
dst: int,
vmap: Dict[int, ir.Value]
) -> None:
"""
Lower floating point comparison
Args:
builder: Current LLVM IR builder
op: Comparison operation
lhs: Left operand value ID
rhs: Right operand value ID
dst: Destination value ID
vmap: Value map
"""
# Get operands as f64
f64 = ir.DoubleType()
lhs_val = vmap.get(lhs, ir.Constant(f64, 0.0))
rhs_val = vmap.get(rhs, ir.Constant(f64, 0.0))
# Map operations to LLVM predicates
op_map = {
'<': 'olt', # ordered less than
'>': 'ogt', # ordered greater than
'<=': 'ole', # ordered less or equal
'>=': 'oge', # ordered greater or equal
'==': 'oeq', # ordered equal
'!=': 'one' # ordered not equal
}
pred = op_map.get(op, 'oeq')
# Perform comparison (returns i1)
cmp_result = builder.fcmp_ordered(pred, lhs_val, rhs_val, name=f"fcmp_{dst}")
# Convert i1 to i64
i64 = ir.IntType(64)
result = builder.zext(cmp_result, i64, name=f"fcmp_i64_{dst}")
# Store result
vmap[dst] = result

View File

@ -42,7 +42,10 @@ def lower_const(
# String constant - create global and get pointer # String constant - create global and get pointer
i8 = ir.IntType(8) i8 = ir.IntType(8)
str_val = str(const_val) str_val = str(const_val)
str_const = ir.Constant.literal_string(str_val.encode('utf-8') + b'\0') # Create array constant for the string
str_bytes = str_val.encode('utf-8') + b'\0'
str_const = ir.Constant(ir.ArrayType(i8, len(str_bytes)),
bytearray(str_bytes))
# Create global string constant # Create global string constant
global_name = f".str.{dst}" global_name = f".str.{dst}"

View File

@ -0,0 +1,149 @@
"""
ExternCall instruction lowering
Handles the minimal 5 runtime functions: print, error, panic, exit, now
"""
import llvmlite.ir as ir
from typing import Dict, List, Optional
# The 5 minimal external functions
EXTERN_FUNCS = {
"print": {
"ret": "void",
"args": ["i8*"], # String pointer
"llvm_name": "ny_print"
},
"error": {
"ret": "void",
"args": ["i8*"], # Error message
"llvm_name": "ny_error"
},
"panic": {
"ret": "void",
"args": ["i8*"], # Panic message
"llvm_name": "ny_panic"
},
"exit": {
"ret": "void",
"args": ["i64"], # Exit code
"llvm_name": "ny_exit"
},
"now": {
"ret": "i64",
"args": [], # No arguments
"llvm_name": "ny_now"
}
}
def lower_externcall(
builder: ir.IRBuilder,
module: ir.Module,
func_name: str,
args: List[int],
dst_vid: Optional[int],
vmap: Dict[int, ir.Value],
resolver=None
) -> None:
"""
Lower MIR ExternCall instruction
Args:
builder: Current LLVM IR builder
module: LLVM module
func_name: External function name
args: List of argument value IDs
dst_vid: Optional destination for return value
vmap: Value map
resolver: Optional resolver for type handling
"""
if func_name not in EXTERN_FUNCS:
# Unknown extern function - treat as void()
print(f"Warning: Unknown extern function: {func_name}")
return
extern_info = EXTERN_FUNCS[func_name]
llvm_name = extern_info["llvm_name"]
# Look up or declare function
func = None
for f in module.functions:
if f.name == llvm_name:
func = f
break
if not func:
# Build function type
i8 = ir.IntType(8)
i64 = ir.IntType(64)
void = ir.VoidType()
# Return type
if extern_info["ret"] == "void":
ret_type = void
elif extern_info["ret"] == "i64":
ret_type = i64
else:
ret_type = void
# Argument types
arg_types = []
for arg_type_str in extern_info["args"]:
if arg_type_str == "i8*":
arg_types.append(i8.as_pointer())
elif arg_type_str == "i64":
arg_types.append(i64)
func_type = ir.FunctionType(ret_type, arg_types)
func = ir.Function(module, func_type, name=llvm_name)
# Prepare arguments
call_args = []
for i, arg_id in enumerate(args):
if i >= len(extern_info["args"]):
break # Too many arguments
expected_type_str = extern_info["args"][i]
arg_val = vmap.get(arg_id)
if not arg_val:
# Default value
if expected_type_str == "i8*":
# Null string
i8 = ir.IntType(8)
arg_val = ir.Constant(i8.as_pointer(), None)
elif expected_type_str == "i64":
arg_val = ir.Constant(ir.IntType(64), 0)
# Type conversion
if expected_type_str == "i8*":
# Need string pointer
if hasattr(arg_val, 'type'):
if isinstance(arg_val.type, ir.IntType):
# int to ptr
i8 = ir.IntType(8)
arg_val = builder.inttoptr(arg_val, i8.as_pointer())
elif not arg_val.type.is_pointer:
# Need pointer type
i8 = ir.IntType(8)
arg_val = ir.Constant(i8.as_pointer(), None)
elif expected_type_str == "i64":
# Need i64
if hasattr(arg_val, 'type'):
if arg_val.type.is_pointer:
arg_val = builder.ptrtoint(arg_val, ir.IntType(64))
elif arg_val.type != ir.IntType(64):
# Convert to i64
pass # TODO: Handle other conversions
call_args.append(arg_val)
# Make the call
if extern_info["ret"] == "void":
builder.call(func, call_args)
if dst_vid is not None:
# Void return - store 0
vmap[dst_vid] = ir.Constant(ir.IntType(64), 0)
else:
result = builder.call(func, call_args, name=f"extern_{func_name}")
if dst_vid is not None:
vmap[dst_vid] = result

View File

@ -6,7 +6,7 @@ Experimental loop normalization following paper-e-loop-signal-ir
import os import os
import llvmlite.ir as ir import llvmlite.ir as ir
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, Tuple, List, Optional from typing import Dict, Tuple, List, Optional, Any
@dataclass @dataclass
class LoopFormContext: class LoopFormContext:

View File

@ -0,0 +1,116 @@
"""
NewBox instruction lowering
Handles box creation (new StringBox(), new IntegerBox(), etc.)
"""
import llvmlite.ir as ir
from typing import Dict, List, Optional
def lower_newbox(
builder: ir.IRBuilder,
module: ir.Module,
box_type: str,
args: List[int],
dst_vid: int,
vmap: Dict[int, ir.Value],
resolver=None
) -> None:
"""
Lower MIR NewBox instruction
Creates a new box instance and returns its handle.
Args:
builder: Current LLVM IR builder
module: LLVM module
box_type: Box type name (e.g., "StringBox", "IntegerBox")
args: Constructor arguments
dst_vid: Destination value ID for box handle
vmap: Value map
resolver: Optional resolver for type handling
"""
# Look up or declare the box creation function
create_func_name = f"ny_create_{box_type}"
create_func = None
for f in module.functions:
if f.name == create_func_name:
create_func = f
break
if not create_func:
# Declare box creation function
# Signature depends on box type
i64 = ir.IntType(64)
i8 = ir.IntType(8)
if box_type in ["StringBox", "IntegerBox", "BoolBox"]:
# Built-in boxes - default constructors (no args)
# Real implementation may have optional args
func_type = ir.FunctionType(i64, [])
else:
# Generic box - variable arguments
# For now, assume no args
func_type = ir.FunctionType(i64, [])
create_func = ir.Function(module, func_type, name=create_func_name)
# Prepare arguments
call_args = []
for i, arg_id in enumerate(args):
arg_val = vmap.get(arg_id)
if not arg_val:
# Default based on box type
if box_type == "StringBox":
# Empty string
i8 = ir.IntType(8)
arg_val = ir.Constant(i8.as_pointer(), None)
else:
# Zero
arg_val = ir.Constant(ir.IntType(64), 0)
# Type conversion if needed
if box_type == "StringBox" and hasattr(arg_val, 'type'):
if isinstance(arg_val.type, ir.IntType):
# int to string ptr
i8 = ir.IntType(8)
arg_val = builder.inttoptr(arg_val, i8.as_pointer())
call_args.append(arg_val)
# Create the box
handle = builder.call(create_func, call_args, name=f"new_{box_type}")
# Store handle
vmap[dst_vid] = handle
def lower_newbox_generic(
builder: ir.IRBuilder,
module: ir.Module,
dst_vid: int,
vmap: Dict[int, ir.Value]
) -> None:
"""
Create a generic box with runtime allocation
This is used when box type is not statically known.
"""
# Look up generic allocation function
alloc_func = None
for f in module.functions:
if f.name == "ny_alloc_box":
alloc_func = f
break
if not alloc_func:
# Declare ny_alloc_box(size: i64) -> i64
i64 = ir.IntType(64)
func_type = ir.FunctionType(i64, [i64])
alloc_func = ir.Function(module, func_type, name="ny_alloc_box")
# Default box size (e.g., 64 bytes)
size = ir.Constant(ir.IntType(64), 64)
handle = builder.call(alloc_func, [size], name="new_box")
vmap[dst_vid] = handle

View File

@ -0,0 +1,114 @@
"""
PHI instruction lowering
Critical for SSA form - handles value merging from different control flow paths
"""
import llvmlite.ir as ir
from typing import Dict, List, Tuple, Optional
def lower_phi(
builder: ir.IRBuilder,
dst_vid: int,
incoming: List[Tuple[int, int]], # [(value_id, block_id), ...]
vmap: Dict[int, ir.Value],
bb_map: Dict[int, ir.Block],
current_block: ir.Block,
resolver=None # Resolver instance (optional)
) -> None:
"""
Lower MIR PHI instruction
Args:
builder: Current LLVM IR builder
dst_vid: Destination value ID
incoming: List of (value_id, block_id) pairs
vmap: Value map
bb_map: Block map
current_block: Current basic block
resolver: Optional resolver for advanced type handling
"""
if not incoming:
# No incoming edges - use zero
vmap[dst_vid] = ir.Constant(ir.IntType(64), 0)
return
# Determine PHI type from first incoming value
first_val_id = incoming[0][0]
first_val = vmap.get(first_val_id)
if first_val and hasattr(first_val, 'type'):
phi_type = first_val.type
else:
# Default to i64
phi_type = ir.IntType(64)
# Create PHI instruction
phi = builder.phi(phi_type, name=f"phi_{dst_vid}")
# Add incoming values
for val_id, block_id in incoming:
val = vmap.get(val_id)
block = bb_map.get(block_id)
if not val:
# Create default value based on type
if isinstance(phi_type, ir.IntType):
val = ir.Constant(phi_type, 0)
elif isinstance(phi_type, ir.DoubleType):
val = ir.Constant(phi_type, 0.0)
else:
# Pointer type - null
val = ir.Constant(phi_type, None)
if not block:
# Skip if block not found
continue
# Type conversion if needed
if hasattr(val, 'type') and val.type != phi_type:
# Save current position
saved_block = builder.block
saved_pos = None
if hasattr(builder, '_anchor'):
saved_pos = builder._anchor
# Position at end of predecessor block
builder.position_at_end(block)
# Convert types
if isinstance(phi_type, ir.IntType) and val.type.is_pointer:
val = builder.ptrtoint(val, phi_type, name=f"cast_p2i_{val_id}")
elif phi_type.is_pointer and isinstance(val.type, ir.IntType):
val = builder.inttoptr(val, phi_type, name=f"cast_i2p_{val_id}")
elif isinstance(phi_type, ir.IntType) and isinstance(val.type, ir.IntType):
# Int to int
if phi_type.width > val.type.width:
val = builder.zext(val, phi_type, name=f"zext_{val_id}")
else:
val = builder.trunc(val, phi_type, name=f"trunc_{val_id}")
# Restore position
builder.position_at_end(saved_block)
if saved_pos and hasattr(builder, '_anchor'):
builder._anchor = saved_pos
# Add to PHI
phi.add_incoming(val, block)
# Store PHI result
vmap[dst_vid] = phi
def defer_phi_wiring(
dst_vid: int,
incoming: List[Tuple[int, int]],
phi_deferrals: List[Tuple[int, List[Tuple[int, int]]]]
) -> None:
"""
Defer PHI wiring for sealed block approach
Args:
dst_vid: Destination value ID
incoming: Incoming edges
phi_deferrals: List to store deferred PHIs
"""
phi_deferrals.append((dst_vid, incoming))

View File

@ -45,5 +45,13 @@ def lower_return(
elif isinstance(return_type, ir.PointerType) and isinstance(ret_val.type, ir.IntType): elif isinstance(return_type, ir.PointerType) and isinstance(ret_val.type, ir.IntType):
# int to ptr # int to ptr
ret_val = builder.inttoptr(ret_val, return_type) ret_val = builder.inttoptr(ret_val, return_type)
elif isinstance(return_type, ir.IntType) and isinstance(ret_val.type, ir.IntType):
# int to int conversion
if return_type.width < ret_val.type.width:
# Truncate
ret_val = builder.trunc(ret_val, return_type)
elif return_type.width > ret_val.type.width:
# Zero extend
ret_val = builder.zext(ret_val, return_type)
builder.ret(ret_val) builder.ret(ret_val)

View File

@ -0,0 +1,107 @@
"""
Safepoint instruction lowering
GC safepoints where runtime can safely collect garbage
"""
import llvmlite.ir as ir
from typing import Dict, List, Optional
def lower_safepoint(
builder: ir.IRBuilder,
module: ir.Module,
live_values: List[int],
vmap: Dict[int, ir.Value],
safepoint_id: Optional[int] = None
) -> None:
"""
Lower MIR Safepoint instruction
Safepoints are places where GC can safely run.
Live values must be tracked for potential relocation.
Args:
builder: Current LLVM IR builder
module: LLVM module
live_values: List of value IDs that are live across safepoint
vmap: Value map
safepoint_id: Optional safepoint identifier
"""
# Look up or declare safepoint function
safepoint_func = None
for f in module.functions:
if f.name == "ny_safepoint":
safepoint_func = f
break
if not safepoint_func:
# Declare ny_safepoint(live_count: i64, live_values: i64*) -> void
i64 = ir.IntType(64)
void = ir.VoidType()
func_type = ir.FunctionType(void, [i64, i64.as_pointer()])
safepoint_func = ir.Function(module, func_type, name="ny_safepoint")
# Prepare live values array
i64 = ir.IntType(64)
if live_values:
# Allocate array for live values
array_size = len(live_values)
live_array = builder.alloca(i64, size=array_size, name="live_vals")
# Store each live value
for i, vid in enumerate(live_values):
val = vmap.get(vid, ir.Constant(i64, 0))
# Ensure i64 (handles are i64)
if hasattr(val, 'type') and val.type.is_pointer:
val = builder.ptrtoint(val, i64)
idx = ir.Constant(ir.IntType(32), i)
ptr = builder.gep(live_array, [idx])
builder.store(val, ptr)
# Call safepoint
count = ir.Constant(i64, array_size)
builder.call(safepoint_func, [count, live_array])
# After safepoint, reload values (they may have moved)
for i, vid in enumerate(live_values):
idx = ir.Constant(ir.IntType(32), i)
ptr = builder.gep(live_array, [idx])
new_val = builder.load(ptr, name=f"reload_{vid}")
vmap[vid] = new_val
else:
# No live values
zero = ir.Constant(i64, 0)
null = ir.Constant(i64.as_pointer(), None)
builder.call(safepoint_func, [zero, null])
def insert_automatic_safepoint(
builder: ir.IRBuilder,
module: ir.Module,
location: str # "loop_header", "function_call", etc.
) -> None:
"""
Insert automatic safepoint at strategic locations
Args:
builder: Current LLVM IR builder
module: LLVM module
location: Location type for debugging
"""
# Simple safepoint without tracking specific values
# Runtime will scan stack/registers
check_func = None
for f in module.functions:
if f.name == "ny_check_safepoint":
check_func = f
break
if not check_func:
# Declare ny_check_safepoint() -> void
void = ir.VoidType()
func_type = ir.FunctionType(void, [])
check_func = ir.Function(module, func_type, name="ny_check_safepoint")
# Insert safepoint check
builder.call(check_func, [], name=f"safepoint_{location}")

View File

@ -0,0 +1,125 @@
"""
TypeOp instruction lowering
Handles type conversions and type checks
"""
import llvmlite.ir as ir
from typing import Dict, Optional
def lower_typeop(
builder: ir.IRBuilder,
op: str,
src_vid: int,
dst_vid: int,
target_type: Optional[str],
vmap: Dict[int, ir.Value],
resolver=None
) -> None:
"""
Lower MIR TypeOp instruction
Operations:
- cast: Type conversion
- is: Type check
- as: Safe cast
Args:
builder: Current LLVM IR builder
op: Operation type (cast, is, as)
src_vid: Source value ID
dst_vid: Destination value ID
target_type: Target type name (e.g., "StringBox", "IntegerBox")
vmap: Value map
resolver: Optional resolver for type handling
"""
src_val = vmap.get(src_vid, ir.Constant(ir.IntType(64), 0))
if op == "cast":
# Type casting - for now just pass through
# In real implementation, would check/convert box types
vmap[dst_vid] = src_val
elif op == "is":
# Type check - returns boolean (i64: 1 or 0)
# For now, simplified implementation
if target_type == "IntegerBox":
# Check if it's a valid integer box handle
# Simplified: non-zero value
if hasattr(src_val, 'type') and src_val.type == ir.IntType(64):
zero = ir.Constant(ir.IntType(64), 0)
result = builder.icmp_unsigned('!=', src_val, zero)
# Convert i1 to i64
result = builder.zext(result, ir.IntType(64))
else:
result = ir.Constant(ir.IntType(64), 0)
else:
# For other types, would need runtime type info
result = ir.Constant(ir.IntType(64), 0)
vmap[dst_vid] = result
elif op == "as":
# Safe cast - returns value or null/0
# For now, same as cast
vmap[dst_vid] = src_val
else:
# Unknown operation
vmap[dst_vid] = ir.Constant(ir.IntType(64), 0)
def lower_convert(
builder: ir.IRBuilder,
src_vid: int,
dst_vid: int,
from_type: str,
to_type: str,
vmap: Dict[int, ir.Value]
) -> None:
"""
Lower type conversion between primitive types
Args:
builder: Current LLVM IR builder
src_vid: Source value ID
dst_vid: Destination value ID
from_type: Source type (i32, i64, f64, ptr)
to_type: Target type
vmap: Value map
"""
src_val = vmap.get(src_vid)
if not src_val:
# Default based on target type
if to_type == "f64":
vmap[dst_vid] = ir.Constant(ir.DoubleType(), 0.0)
elif to_type == "ptr":
i8 = ir.IntType(8)
vmap[dst_vid] = ir.Constant(i8.as_pointer(), None)
else:
vmap[dst_vid] = ir.Constant(ir.IntType(64), 0)
return
# Perform conversion
if from_type == "i64" and to_type == "f64":
# int to float
result = builder.sitofp(src_val, ir.DoubleType())
elif from_type == "f64" and to_type == "i64":
# float to int
result = builder.fptosi(src_val, ir.IntType(64))
elif from_type == "i64" and to_type == "ptr":
# int to pointer
i8 = ir.IntType(8)
result = builder.inttoptr(src_val, i8.as_pointer())
elif from_type == "ptr" and to_type == "i64":
# pointer to int
result = builder.ptrtoint(src_val, ir.IntType(64))
elif from_type == "i32" and to_type == "i64":
# sign extend
result = builder.sext(src_val, ir.IntType(64))
elif from_type == "i64" and to_type == "i32":
# truncate
result = builder.trunc(src_val, ir.IntType(32))
else:
# Unknown conversion - pass through
result = src_val
vmap[dst_vid] = result

View File

@ -6,10 +6,30 @@ Following the design principles in docs/LLVM_LAYER_OVERVIEW.md
import json import json
import sys import sys
from typing import Dict, Any, Optional import os
from typing import Dict, Any, Optional, List, Tuple
import llvmlite.ir as ir import llvmlite.ir as ir
import llvmlite.binding as llvm import llvmlite.binding as llvm
# Import instruction handlers
from instructions.const import lower_const
from instructions.binop import lower_binop
from instructions.jump import lower_jump
from instructions.branch import lower_branch
from instructions.ret import lower_return
from instructions.phi import lower_phi, defer_phi_wiring
from instructions.call import lower_call
from instructions.boxcall import lower_boxcall
from instructions.externcall import lower_externcall
from instructions.typeop import lower_typeop, lower_convert
from instructions.newbox import lower_newbox
from instructions.safepoint import lower_safepoint, insert_automatic_safepoint
from instructions.barrier import lower_barrier
from instructions.loopform import lower_while_loopform
from resolver import Resolver
from mir_reader import MIRReader
class NyashLLVMBuilder: class NyashLLVMBuilder:
"""Main LLVM IR builder for Nyash MIR""" """Main LLVM IR builder for Nyash MIR"""
@ -27,22 +47,196 @@ class NyashLLVMBuilder:
self.i1 = ir.IntType(1) self.i1 = ir.IntType(1)
self.i8p = self.i8.as_pointer() self.i8p = self.i8.as_pointer()
self.f64 = ir.DoubleType() self.f64 = ir.DoubleType()
self.void = ir.VoidType()
# Value and block maps
self.vmap: Dict[int, ir.Value] = {} # value_id -> LLVM value
self.bb_map: Dict[int, ir.Block] = {} # block_id -> LLVM block
# PHI deferrals for sealed block approach
self.phi_deferrals: List[Tuple[int, List[Tuple[int, int]]]] = []
# Resolver for unified value resolution
self.resolver = Resolver(self.vmap, self.bb_map)
# Statistics
self.loop_count = 0
def build_from_mir(self, mir_json: Dict[str, Any]) -> str: def build_from_mir(self, mir_json: Dict[str, Any]) -> str:
"""Build LLVM IR from MIR JSON""" """Build LLVM IR from MIR JSON"""
# TODO: Implement MIR -> LLVM lowering # Parse MIR
# For now, create a simple ny_main that returns 0 reader = MIRReader(mir_json)
functions = reader.get_functions()
# ny_main: extern "C" fn() -> i32 if not functions:
# No functions - create dummy ny_main
return self._create_dummy_main()
# Process each function
for func_data in functions:
self.lower_function(func_data)
# Wire deferred PHIs
self._wire_deferred_phis()
return str(self.module)
def _create_dummy_main(self) -> str:
"""Create dummy ny_main that returns 0"""
ny_main_ty = ir.FunctionType(self.i32, []) ny_main_ty = ir.FunctionType(self.i32, [])
ny_main = ir.Function(self.module, ny_main_ty, name="ny_main") ny_main = ir.Function(self.module, ny_main_ty, name="ny_main")
block = ny_main.append_basic_block(name="entry") block = ny_main.append_basic_block(name="entry")
builder = ir.IRBuilder(block) builder = ir.IRBuilder(block)
builder.ret(ir.Constant(self.i32, 0)) builder.ret(ir.Constant(self.i32, 0))
return str(self.module) return str(self.module)
def lower_function(self, func_data: Dict[str, Any]):
"""Lower a single MIR function to LLVM IR"""
name = func_data.get("name", "unknown")
params = func_data.get("params", [])
blocks = func_data.get("blocks", [])
# Determine function signature
if name == "ny_main":
# Special case: ny_main returns i32
func_ty = ir.FunctionType(self.i32, [])
else:
# Default: i64(i64, ...) signature
param_types = [self.i64] * len(params)
func_ty = ir.FunctionType(self.i64, param_types)
# Create function
func = ir.Function(self.module, func_ty, name=name)
# Create all blocks first
for block_data in blocks:
bid = block_data.get("id", 0)
block_name = f"bb{bid}"
bb = func.append_basic_block(block_name)
self.bb_map[bid] = bb
# Process each block
for block_data in blocks:
bid = block_data.get("id", 0)
bb = self.bb_map[bid]
self.lower_block(bb, block_data, func)
def lower_block(self, bb: ir.Block, block_data: Dict[str, Any], func: ir.Function):
"""Lower a single basic block"""
builder = ir.IRBuilder(bb)
instructions = block_data.get("instructions", [])
# Process each instruction
for inst in instructions:
self.lower_instruction(builder, inst, func)
def lower_instruction(self, builder: ir.IRBuilder, inst: Dict[str, Any], func: ir.Function):
"""Dispatch instruction to appropriate handler"""
op = inst.get("op")
if op == "const":
dst = inst.get("dst")
value = inst.get("value")
lower_const(builder, self.module, dst, value, self.vmap)
elif op == "binop":
operation = inst.get("operation")
lhs = inst.get("lhs")
rhs = inst.get("rhs")
dst = inst.get("dst")
lower_binop(builder, self.resolver, operation, lhs, rhs, dst,
self.vmap, builder.block)
elif op == "jump":
target = inst.get("target")
lower_jump(builder, target, self.bb_map)
elif op == "branch":
cond = inst.get("cond")
then_bid = inst.get("then")
else_bid = inst.get("else")
lower_branch(builder, cond, then_bid, else_bid, self.vmap, self.bb_map)
elif op == "ret":
value = inst.get("value")
lower_return(builder, value, self.vmap, func.return_value.type)
elif op == "phi":
dst = inst.get("dst")
incoming = inst.get("incoming", [])
# Defer PHI wiring for now
defer_phi_wiring(dst, incoming, self.phi_deferrals)
elif op == "call":
func_name = inst.get("func")
args = inst.get("args", [])
dst = inst.get("dst")
lower_call(builder, self.module, func_name, args, dst, self.vmap, self.resolver)
elif op == "boxcall":
box_vid = inst.get("box")
method = inst.get("method")
args = inst.get("args", [])
dst = inst.get("dst")
lower_boxcall(builder, self.module, box_vid, method, args, dst,
self.vmap, self.resolver)
elif op == "externcall":
func_name = inst.get("func")
args = inst.get("args", [])
dst = inst.get("dst")
lower_externcall(builder, self.module, func_name, args, dst,
self.vmap, self.resolver)
elif op == "newbox":
box_type = inst.get("type")
args = inst.get("args", [])
dst = inst.get("dst")
lower_newbox(builder, self.module, box_type, args, dst,
self.vmap, self.resolver)
elif op == "typeop":
operation = inst.get("operation")
src = inst.get("src")
dst = inst.get("dst")
target_type = inst.get("target_type")
lower_typeop(builder, operation, src, dst, target_type,
self.vmap, self.resolver)
elif op == "safepoint":
live = inst.get("live", [])
lower_safepoint(builder, self.module, live, self.vmap)
elif op == "barrier":
barrier_type = inst.get("type", "memory")
lower_barrier(builder, barrier_type)
elif op == "while":
# Experimental LoopForm lowering
cond = inst.get("cond")
body = inst.get("body", [])
self.loop_count += 1
if not lower_while_loopform(builder, func, cond, body,
self.loop_count, self.vmap, self.bb_map):
# Fallback to regular while
self._lower_while_regular(builder, inst, func)
else:
if os.environ.get('NYASH_CLI_VERBOSE') == '1':
print(f"[Python LLVM] Unknown instruction: {op}")
def _lower_while_regular(self, builder: ir.IRBuilder, inst: Dict[str, Any], func: ir.Function):
"""Fallback regular while lowering"""
# TODO: Implement regular while lowering
pass
def _wire_deferred_phis(self):
"""Wire all deferred PHI nodes"""
# TODO: Implement PHI wiring after all blocks are created
for dst_vid, incoming in self.phi_deferrals:
# Find the block containing this PHI
# Wire the incoming edges
pass
def compile_to_object(self, output_path: str): def compile_to_object(self, output_path: str):
"""Compile module to object file""" """Compile module to object file"""
# Create target machine # Create target machine

View File

@ -4,7 +4,7 @@ Parses Nyash MIR JSON format into Python structures
""" """
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Any, Optional, Union from typing import Dict, List, Any, Optional, Union, Tuple
from enum import Enum from enum import Enum
class MirType(Enum): class MirType(Enum):
@ -114,3 +114,29 @@ def parse_instruction(data: Dict[str, Any]) -> MirInstruction:
instr.args = data["args"] instr.args = data["args"]
return instr return instr
class MIRReader:
"""MIR JSON reader wrapper"""
def __init__(self, mir_json: Dict[str, Any]):
self.mir_json = mir_json
self.functions = None
def get_functions(self) -> List[Dict[str, Any]]:
"""Get functions in the expected format for llvm_builder"""
if self.functions is not None:
return self.functions
# Convert from the existing JSON format to what llvm_builder expects
self.functions = []
funcs = self.mir_json.get("functions", [])
if isinstance(funcs, list):
# Already in list format
self.functions = funcs
elif isinstance(funcs, dict):
# Convert dict format to list
for name, func_data in funcs.items():
func_data["name"] = name
self.functions.append(func_data)
return self.functions

View File

@ -0,0 +1,61 @@
{
"functions": [
{
"name": "ny_main",
"params": [],
"blocks": [
{
"id": 0,
"instructions": [
{
"op": "newbox",
"type": "StringBox",
"args": [],
"dst": 1
},
{
"op": "const",
"dst": 2,
"value": {
"type": "string",
"value": "Hello Nyash!"
}
},
{
"op": "boxcall",
"box": 1,
"method": "set",
"args": [2],
"dst": 3
},
{
"op": "boxcall",
"box": 1,
"method": "get",
"args": [],
"dst": 4
},
{
"op": "externcall",
"func": "print",
"args": [4],
"dst": null
},
{
"op": "const",
"dst": 5,
"value": {
"type": "i64",
"value": 0
}
},
{
"op": "ret",
"value": 5
}
]
}
]
}
]
}

View File

@ -0,0 +1,27 @@
{
"functions": [
{
"name": "ny_main",
"params": [],
"blocks": [
{
"id": 0,
"instructions": [
{
"op": "const",
"dst": 1,
"value": {
"type": "i64",
"value": 42
}
},
{
"op": "ret",
"value": 1
}
]
}
]
}
]
}