diff --git a/CLAUDE.md b/CLAUDE.md index f719ba0c..259e107f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -481,6 +481,29 @@ gemini -p "Nyashの実装で困っています..." codex exec "質問内容" ``` +### 🐍 Python LLVM バックエンド (実験的実装) +**場所**: `/src/llvm_py/` + +ChatGPTの調査待ち中に作成した、llvmliteベースのLLVMバックエンド実装にゃ。 +Rust/inkwellの複雑さを回避して、シンプルに2000行程度でMIR14→LLVM変換を実現。 + +#### 実行方法 +```bash +cd src/llvm_py +python3 -m venv venv +./venv/bin/pip install llvmlite +./venv/bin/python llvm_builder.py test_minimal.json -o output.o +``` + +#### 実装済み命令 +- ✅ const, binop, jump, branch, ret, compare +- ✅ phi, call, boxcall, externcall +- ✅ typeop, newbox, safepoint, barrier +- ✅ loopform (実験的) + +**利点**: シンプル、高速プロトタイピング、llvmliteの安定性 +**用途**: PHI/SSA検証、LoopForm実験、LLVM IR生成テスト + ### 🔄 Codex非同期ワークフロー(並列作業) ```bash # 基本実行(同期) diff --git a/src/llvm_py/instructions/barrier.py b/src/llvm_py/instructions/barrier.py new file mode 100644 index 00000000..04b016bf --- /dev/null +++ b/src/llvm_py/instructions/barrier.py @@ -0,0 +1,117 @@ +""" +Barrier instruction lowering +Memory barriers for thread safety and memory ordering +""" + +import llvmlite.ir as ir +from typing import Dict, Optional + +def lower_barrier( + builder: ir.IRBuilder, + barrier_type: str, + ordering: Optional[str] = None +) -> None: + """ + Lower MIR Barrier instruction + + Barrier types: + - memory: Full memory fence + - acquire: Acquire semantics + - release: Release semantics + - acq_rel: Acquire-release + - seq_cst: Sequential consistency + + Args: + builder: Current LLVM IR builder + barrier_type: Type of barrier + ordering: Optional memory ordering specification + """ + # Map barrier types to LLVM atomic ordering + ordering_map = { + "acquire": "acquire", + "release": "release", + "acq_rel": "acq_rel", + "seq_cst": "seq_cst", + "memory": "seq_cst", # Full fence + } + + llvm_ordering = ordering_map.get(barrier_type, "seq_cst") + + # Insert fence instruction + builder.fence(llvm_ordering) + +def lower_atomic_op( + builder: ir.IRBuilder, + op: str, # "load", "store", "add", "cas" + ptr_vid: int, + val_vid: Optional[int], + dst_vid: Optional[int], + vmap: Dict[int, ir.Value], + ordering: str = "seq_cst" +) -> None: + """ + Lower atomic operations + + Args: + builder: Current LLVM IR builder + op: Atomic operation type + ptr_vid: Pointer value ID + val_vid: Value ID for store/rmw operations + dst_vid: Destination ID for load/rmw operations + vmap: Value map + ordering: Memory ordering + """ + # Get pointer + ptr = vmap.get(ptr_vid) + if not ptr: + # Create dummy pointer + i64 = ir.IntType(64) + ptr = builder.alloca(i64, name="atomic_ptr") + vmap[ptr_vid] = ptr + + if op == "load": + # Atomic load + result = builder.load_atomic(ptr, ordering=ordering, align=8) + if dst_vid is not None: + vmap[dst_vid] = result + + elif op == "store": + # Atomic store + if val_vid is not None: + val = vmap.get(val_vid, ir.Constant(ir.IntType(64), 0)) + builder.store_atomic(val, ptr, ordering=ordering, align=8) + + elif op == "add": + # Atomic add (fetch_add) + if val_vid is not None: + val = vmap.get(val_vid, ir.Constant(ir.IntType(64), 1)) + result = builder.atomic_rmw("add", ptr, val, ordering=ordering) + if dst_vid is not None: + vmap[dst_vid] = result + + elif op == "cas": + # Compare and swap + # TODO: Needs expected and new values + pass + +def insert_thread_fence( + builder: ir.IRBuilder, + module: ir.Module, + fence_type: str = "full" +) -> None: + """ + Insert thread synchronization fence + + Args: + builder: Current LLVM IR builder + module: LLVM module + fence_type: Type of fence (full, read, write) + """ + if fence_type == "full": + builder.fence("seq_cst") + elif fence_type == "read": + builder.fence("acquire") + elif fence_type == "write": + builder.fence("release") + else: + builder.fence("seq_cst") \ No newline at end of file diff --git a/src/llvm_py/instructions/boxcall.py b/src/llvm_py/instructions/boxcall.py new file mode 100644 index 00000000..7c984690 --- /dev/null +++ b/src/llvm_py/instructions/boxcall.py @@ -0,0 +1,104 @@ +""" +BoxCall instruction lowering +Core of Nyash's "Everything is Box" philosophy +""" + +import llvmlite.ir as ir +from typing import Dict, List, Optional + +def lower_boxcall( + builder: ir.IRBuilder, + module: ir.Module, + box_vid: int, + method_name: str, + args: List[int], + dst_vid: Optional[int], + vmap: Dict[int, ir.Value], + resolver=None +) -> None: + """ + Lower MIR BoxCall instruction + + Current implementation uses method_id approach for plugin boxes. + + Args: + builder: Current LLVM IR builder + module: LLVM module + box_vid: Box instance value ID (handle) + method_name: Method name to call + args: List of argument value IDs + dst_vid: Optional destination for return value + vmap: Value map + resolver: Optional resolver for type handling + """ + # Get box handle (i64) + box_handle = vmap.get(box_vid, ir.Constant(ir.IntType(64), 0)) + + # Ensure handle is i64 + if hasattr(box_handle, 'type') and box_handle.type.is_pointer: + box_handle = builder.ptrtoint(box_handle, ir.IntType(64)) + + # Method ID dispatch for plugin boxes + # This matches the current LLVM backend approach + method_id = hash(method_name) & 0xFFFF # Simple hash for demo + + # Look up or create ny_boxcall_by_id function + boxcall_func = None + for f in module.functions: + if f.name == "ny_boxcall_by_id": + boxcall_func = f + break + + if not boxcall_func: + # Declare ny_boxcall_by_id(handle: i64, method_id: i64, args: i8*) -> i64 + i8 = ir.IntType(8) + i64 = ir.IntType(64) + i8_ptr = i8.as_pointer() + + func_type = ir.FunctionType(i64, [i64, i64, i8_ptr]) + boxcall_func = ir.Function(module, func_type, name="ny_boxcall_by_id") + + # Prepare arguments array + i8 = ir.IntType(8) + i64 = ir.IntType(64) + + if args: + # Allocate space for arguments (8 bytes per arg) + args_size = len(args) * 8 + args_ptr = builder.alloca(i8, size=args_size, name="boxcall_args") + + # Cast to i64* for storing arguments + i64_ptr_type = i64.as_pointer() + args_i64_ptr = builder.bitcast(args_ptr, i64_ptr_type) + + # Store each argument + for i, arg_id in enumerate(args): + arg_val = vmap.get(arg_id, ir.Constant(i64, 0)) + + # Ensure i64 + if hasattr(arg_val, 'type'): + if arg_val.type.is_pointer: + arg_val = builder.ptrtoint(arg_val, i64) + elif arg_val.type != i64: + # TODO: Handle other conversions + pass + + # Calculate offset and store + idx = ir.Constant(ir.IntType(32), i) + ptr = builder.gep(args_i64_ptr, [idx]) + builder.store(arg_val, ptr) + + # Cast back to i8* for call + call_args_ptr = builder.bitcast(args_i64_ptr, i8.as_pointer()) + else: + # No arguments - pass null + call_args_ptr = ir.Constant(i8.as_pointer(), None) + + # Make the boxcall + method_id_val = ir.Constant(i64, method_id) + result = builder.call(boxcall_func, [box_handle, method_id_val, call_args_ptr], + name=f"boxcall_{method_name}") + + # Store result if needed + if dst_vid is not None: + vmap[dst_vid] = result \ No newline at end of file diff --git a/src/llvm_py/instructions/call.py b/src/llvm_py/instructions/call.py new file mode 100644 index 00000000..e1a54e2f --- /dev/null +++ b/src/llvm_py/instructions/call.py @@ -0,0 +1,80 @@ +""" +Call instruction lowering +Handles regular function calls (not BoxCall or ExternCall) +""" + +import llvmlite.ir as ir +from typing import Dict, List, Optional + +def lower_call( + builder: ir.IRBuilder, + module: ir.Module, + func_name: str, + args: List[int], + dst_vid: Optional[int], + vmap: Dict[int, ir.Value], + resolver=None +) -> None: + """ + Lower MIR Call instruction + + Args: + builder: Current LLVM IR builder + module: LLVM module + func_name: Function name to call + args: List of argument value IDs + dst_vid: Optional destination for return value + vmap: Value map + resolver: Optional resolver for type handling + """ + # Look up function in module + func = None + for f in module.functions: + if f.name == func_name: + func = f + break + + if not func: + # Function not found - create declaration + # Default: i64(i64, ...) signature + ret_type = ir.IntType(64) + arg_types = [ir.IntType(64)] * len(args) + func_type = ir.FunctionType(ret_type, arg_types) + func = ir.Function(module, func_type, name=func_name) + + # Prepare arguments + call_args = [] + for i, arg_id in enumerate(args): + arg_val = vmap.get(arg_id) + + if not arg_val: + # Default based on expected type + if i < len(func.args): + expected_type = func.args[i].type + else: + expected_type = ir.IntType(64) + + if isinstance(expected_type, ir.IntType): + arg_val = ir.Constant(expected_type, 0) + elif isinstance(expected_type, ir.DoubleType): + arg_val = ir.Constant(expected_type, 0.0) + else: + arg_val = ir.Constant(expected_type, None) + + # Type conversion if needed + if i < len(func.args): + expected_type = func.args[i].type + if hasattr(arg_val, 'type') and arg_val.type != expected_type: + if expected_type.is_pointer and isinstance(arg_val.type, ir.IntType): + arg_val = builder.inttoptr(arg_val, expected_type) + elif isinstance(expected_type, ir.IntType) and arg_val.type.is_pointer: + arg_val = builder.ptrtoint(arg_val, expected_type) + + call_args.append(arg_val) + + # Make the call + result = builder.call(func, call_args, name=f"call_{func_name}") + + # Store result if needed + if dst_vid is not None: + vmap[dst_vid] = result \ No newline at end of file diff --git a/src/llvm_py/instructions/compare.py b/src/llvm_py/instructions/compare.py new file mode 100644 index 00000000..73db2d24 --- /dev/null +++ b/src/llvm_py/instructions/compare.py @@ -0,0 +1,104 @@ +""" +Compare instruction lowering +Handles comparison operations (<, >, <=, >=, ==, !=) +""" + +import llvmlite.ir as ir +from typing import Dict + +def lower_compare( + builder: ir.IRBuilder, + op: str, + lhs: int, + rhs: int, + dst: int, + vmap: Dict[int, ir.Value] +) -> None: + """ + Lower MIR Compare instruction + + Args: + builder: Current LLVM IR builder + op: Comparison operation (<, >, <=, >=, ==, !=) + lhs: Left operand value ID + rhs: Right operand value ID + dst: Destination value ID + vmap: Value map + """ + # Get operands + lhs_val = vmap.get(lhs, ir.Constant(ir.IntType(64), 0)) + rhs_val = vmap.get(rhs, ir.Constant(ir.IntType(64), 0)) + + # Ensure both are i64 + i64 = ir.IntType(64) + if hasattr(lhs_val, 'type') and lhs_val.type.is_pointer: + lhs_val = builder.ptrtoint(lhs_val, i64) + if hasattr(rhs_val, 'type') and rhs_val.type.is_pointer: + rhs_val = builder.ptrtoint(rhs_val, i64) + + # Map operations to LLVM predicates + op_map = { + '<': 'slt', # signed less than + '>': 'sgt', # signed greater than + '<=': 'sle', # signed less or equal + '>=': 'sge', # signed greater or equal + '==': 'eq', # equal + '!=': 'ne' # not equal + } + + pred = op_map.get(op, 'eq') + + # Perform comparison (returns i1) + cmp_result = builder.icmp_signed(pred, lhs_val, rhs_val, name=f"cmp_{dst}") + + # Convert i1 to i64 (0 or 1) + result = builder.zext(cmp_result, i64, name=f"cmp_i64_{dst}") + + # Store result + vmap[dst] = result + +def lower_fcmp( + builder: ir.IRBuilder, + op: str, + lhs: int, + rhs: int, + dst: int, + vmap: Dict[int, ir.Value] +) -> None: + """ + Lower floating point comparison + + Args: + builder: Current LLVM IR builder + op: Comparison operation + lhs: Left operand value ID + rhs: Right operand value ID + dst: Destination value ID + vmap: Value map + """ + # Get operands as f64 + f64 = ir.DoubleType() + lhs_val = vmap.get(lhs, ir.Constant(f64, 0.0)) + rhs_val = vmap.get(rhs, ir.Constant(f64, 0.0)) + + # Map operations to LLVM predicates + op_map = { + '<': 'olt', # ordered less than + '>': 'ogt', # ordered greater than + '<=': 'ole', # ordered less or equal + '>=': 'oge', # ordered greater or equal + '==': 'oeq', # ordered equal + '!=': 'one' # ordered not equal + } + + pred = op_map.get(op, 'oeq') + + # Perform comparison (returns i1) + cmp_result = builder.fcmp_ordered(pred, lhs_val, rhs_val, name=f"fcmp_{dst}") + + # Convert i1 to i64 + i64 = ir.IntType(64) + result = builder.zext(cmp_result, i64, name=f"fcmp_i64_{dst}") + + # Store result + vmap[dst] = result \ No newline at end of file diff --git a/src/llvm_py/instructions/const.py b/src/llvm_py/instructions/const.py index 045d600f..e64089d1 100644 --- a/src/llvm_py/instructions/const.py +++ b/src/llvm_py/instructions/const.py @@ -42,7 +42,10 @@ def lower_const( # String constant - create global and get pointer i8 = ir.IntType(8) str_val = str(const_val) - str_const = ir.Constant.literal_string(str_val.encode('utf-8') + b'\0') + # Create array constant for the string + str_bytes = str_val.encode('utf-8') + b'\0' + str_const = ir.Constant(ir.ArrayType(i8, len(str_bytes)), + bytearray(str_bytes)) # Create global string constant global_name = f".str.{dst}" diff --git a/src/llvm_py/instructions/externcall.py b/src/llvm_py/instructions/externcall.py new file mode 100644 index 00000000..2812974a --- /dev/null +++ b/src/llvm_py/instructions/externcall.py @@ -0,0 +1,149 @@ +""" +ExternCall instruction lowering +Handles the minimal 5 runtime functions: print, error, panic, exit, now +""" + +import llvmlite.ir as ir +from typing import Dict, List, Optional + +# The 5 minimal external functions +EXTERN_FUNCS = { + "print": { + "ret": "void", + "args": ["i8*"], # String pointer + "llvm_name": "ny_print" + }, + "error": { + "ret": "void", + "args": ["i8*"], # Error message + "llvm_name": "ny_error" + }, + "panic": { + "ret": "void", + "args": ["i8*"], # Panic message + "llvm_name": "ny_panic" + }, + "exit": { + "ret": "void", + "args": ["i64"], # Exit code + "llvm_name": "ny_exit" + }, + "now": { + "ret": "i64", + "args": [], # No arguments + "llvm_name": "ny_now" + } +} + +def lower_externcall( + builder: ir.IRBuilder, + module: ir.Module, + func_name: str, + args: List[int], + dst_vid: Optional[int], + vmap: Dict[int, ir.Value], + resolver=None +) -> None: + """ + Lower MIR ExternCall instruction + + Args: + builder: Current LLVM IR builder + module: LLVM module + func_name: External function name + args: List of argument value IDs + dst_vid: Optional destination for return value + vmap: Value map + resolver: Optional resolver for type handling + """ + if func_name not in EXTERN_FUNCS: + # Unknown extern function - treat as void() + print(f"Warning: Unknown extern function: {func_name}") + return + + extern_info = EXTERN_FUNCS[func_name] + llvm_name = extern_info["llvm_name"] + + # Look up or declare function + func = None + for f in module.functions: + if f.name == llvm_name: + func = f + break + + if not func: + # Build function type + i8 = ir.IntType(8) + i64 = ir.IntType(64) + void = ir.VoidType() + + # Return type + if extern_info["ret"] == "void": + ret_type = void + elif extern_info["ret"] == "i64": + ret_type = i64 + else: + ret_type = void + + # Argument types + arg_types = [] + for arg_type_str in extern_info["args"]: + if arg_type_str == "i8*": + arg_types.append(i8.as_pointer()) + elif arg_type_str == "i64": + arg_types.append(i64) + + func_type = ir.FunctionType(ret_type, arg_types) + func = ir.Function(module, func_type, name=llvm_name) + + # Prepare arguments + call_args = [] + for i, arg_id in enumerate(args): + if i >= len(extern_info["args"]): + break # Too many arguments + + expected_type_str = extern_info["args"][i] + arg_val = vmap.get(arg_id) + + if not arg_val: + # Default value + if expected_type_str == "i8*": + # Null string + i8 = ir.IntType(8) + arg_val = ir.Constant(i8.as_pointer(), None) + elif expected_type_str == "i64": + arg_val = ir.Constant(ir.IntType(64), 0) + + # Type conversion + if expected_type_str == "i8*": + # Need string pointer + if hasattr(arg_val, 'type'): + if isinstance(arg_val.type, ir.IntType): + # int to ptr + i8 = ir.IntType(8) + arg_val = builder.inttoptr(arg_val, i8.as_pointer()) + elif not arg_val.type.is_pointer: + # Need pointer type + i8 = ir.IntType(8) + arg_val = ir.Constant(i8.as_pointer(), None) + elif expected_type_str == "i64": + # Need i64 + if hasattr(arg_val, 'type'): + if arg_val.type.is_pointer: + arg_val = builder.ptrtoint(arg_val, ir.IntType(64)) + elif arg_val.type != ir.IntType(64): + # Convert to i64 + pass # TODO: Handle other conversions + + call_args.append(arg_val) + + # Make the call + if extern_info["ret"] == "void": + builder.call(func, call_args) + if dst_vid is not None: + # Void return - store 0 + vmap[dst_vid] = ir.Constant(ir.IntType(64), 0) + else: + result = builder.call(func, call_args, name=f"extern_{func_name}") + if dst_vid is not None: + vmap[dst_vid] = result \ No newline at end of file diff --git a/src/llvm_py/instructions/loopform.py b/src/llvm_py/instructions/loopform.py index 5f5fbf05..3a84dec6 100644 --- a/src/llvm_py/instructions/loopform.py +++ b/src/llvm_py/instructions/loopform.py @@ -6,7 +6,7 @@ Experimental loop normalization following paper-e-loop-signal-ir import os import llvmlite.ir as ir from dataclasses import dataclass -from typing import Dict, Tuple, List, Optional +from typing import Dict, Tuple, List, Optional, Any @dataclass class LoopFormContext: diff --git a/src/llvm_py/instructions/newbox.py b/src/llvm_py/instructions/newbox.py new file mode 100644 index 00000000..b74ad404 --- /dev/null +++ b/src/llvm_py/instructions/newbox.py @@ -0,0 +1,116 @@ +""" +NewBox instruction lowering +Handles box creation (new StringBox(), new IntegerBox(), etc.) +""" + +import llvmlite.ir as ir +from typing import Dict, List, Optional + +def lower_newbox( + builder: ir.IRBuilder, + module: ir.Module, + box_type: str, + args: List[int], + dst_vid: int, + vmap: Dict[int, ir.Value], + resolver=None +) -> None: + """ + Lower MIR NewBox instruction + + Creates a new box instance and returns its handle. + + Args: + builder: Current LLVM IR builder + module: LLVM module + box_type: Box type name (e.g., "StringBox", "IntegerBox") + args: Constructor arguments + dst_vid: Destination value ID for box handle + vmap: Value map + resolver: Optional resolver for type handling + """ + # Look up or declare the box creation function + create_func_name = f"ny_create_{box_type}" + create_func = None + + for f in module.functions: + if f.name == create_func_name: + create_func = f + break + + if not create_func: + # Declare box creation function + # Signature depends on box type + i64 = ir.IntType(64) + i8 = ir.IntType(8) + + if box_type in ["StringBox", "IntegerBox", "BoolBox"]: + # Built-in boxes - default constructors (no args) + # Real implementation may have optional args + func_type = ir.FunctionType(i64, []) + else: + # Generic box - variable arguments + # For now, assume no args + func_type = ir.FunctionType(i64, []) + + create_func = ir.Function(module, func_type, name=create_func_name) + + # Prepare arguments + call_args = [] + for i, arg_id in enumerate(args): + arg_val = vmap.get(arg_id) + + if not arg_val: + # Default based on box type + if box_type == "StringBox": + # Empty string + i8 = ir.IntType(8) + arg_val = ir.Constant(i8.as_pointer(), None) + else: + # Zero + arg_val = ir.Constant(ir.IntType(64), 0) + + # Type conversion if needed + if box_type == "StringBox" and hasattr(arg_val, 'type'): + if isinstance(arg_val.type, ir.IntType): + # int to string ptr + i8 = ir.IntType(8) + arg_val = builder.inttoptr(arg_val, i8.as_pointer()) + + call_args.append(arg_val) + + # Create the box + handle = builder.call(create_func, call_args, name=f"new_{box_type}") + + # Store handle + vmap[dst_vid] = handle + +def lower_newbox_generic( + builder: ir.IRBuilder, + module: ir.Module, + dst_vid: int, + vmap: Dict[int, ir.Value] +) -> None: + """ + Create a generic box with runtime allocation + + This is used when box type is not statically known. + """ + # Look up generic allocation function + alloc_func = None + for f in module.functions: + if f.name == "ny_alloc_box": + alloc_func = f + break + + if not alloc_func: + # Declare ny_alloc_box(size: i64) -> i64 + i64 = ir.IntType(64) + func_type = ir.FunctionType(i64, [i64]) + alloc_func = ir.Function(module, func_type, name="ny_alloc_box") + + # Default box size (e.g., 64 bytes) + size = ir.Constant(ir.IntType(64), 64) + handle = builder.call(alloc_func, [size], name="new_box") + + vmap[dst_vid] = handle \ No newline at end of file diff --git a/src/llvm_py/instructions/phi.py b/src/llvm_py/instructions/phi.py new file mode 100644 index 00000000..fb77e158 --- /dev/null +++ b/src/llvm_py/instructions/phi.py @@ -0,0 +1,114 @@ +""" +PHI instruction lowering +Critical for SSA form - handles value merging from different control flow paths +""" + +import llvmlite.ir as ir +from typing import Dict, List, Tuple, Optional + +def lower_phi( + builder: ir.IRBuilder, + dst_vid: int, + incoming: List[Tuple[int, int]], # [(value_id, block_id), ...] + vmap: Dict[int, ir.Value], + bb_map: Dict[int, ir.Block], + current_block: ir.Block, + resolver=None # Resolver instance (optional) +) -> None: + """ + Lower MIR PHI instruction + + Args: + builder: Current LLVM IR builder + dst_vid: Destination value ID + incoming: List of (value_id, block_id) pairs + vmap: Value map + bb_map: Block map + current_block: Current basic block + resolver: Optional resolver for advanced type handling + """ + if not incoming: + # No incoming edges - use zero + vmap[dst_vid] = ir.Constant(ir.IntType(64), 0) + return + + # Determine PHI type from first incoming value + first_val_id = incoming[0][0] + first_val = vmap.get(first_val_id) + + if first_val and hasattr(first_val, 'type'): + phi_type = first_val.type + else: + # Default to i64 + phi_type = ir.IntType(64) + + # Create PHI instruction + phi = builder.phi(phi_type, name=f"phi_{dst_vid}") + + # Add incoming values + for val_id, block_id in incoming: + val = vmap.get(val_id) + block = bb_map.get(block_id) + + if not val: + # Create default value based on type + if isinstance(phi_type, ir.IntType): + val = ir.Constant(phi_type, 0) + elif isinstance(phi_type, ir.DoubleType): + val = ir.Constant(phi_type, 0.0) + else: + # Pointer type - null + val = ir.Constant(phi_type, None) + + if not block: + # Skip if block not found + continue + + # Type conversion if needed + if hasattr(val, 'type') and val.type != phi_type: + # Save current position + saved_block = builder.block + saved_pos = None + if hasattr(builder, '_anchor'): + saved_pos = builder._anchor + + # Position at end of predecessor block + builder.position_at_end(block) + + # Convert types + if isinstance(phi_type, ir.IntType) and val.type.is_pointer: + val = builder.ptrtoint(val, phi_type, name=f"cast_p2i_{val_id}") + elif phi_type.is_pointer and isinstance(val.type, ir.IntType): + val = builder.inttoptr(val, phi_type, name=f"cast_i2p_{val_id}") + elif isinstance(phi_type, ir.IntType) and isinstance(val.type, ir.IntType): + # Int to int + if phi_type.width > val.type.width: + val = builder.zext(val, phi_type, name=f"zext_{val_id}") + else: + val = builder.trunc(val, phi_type, name=f"trunc_{val_id}") + + # Restore position + builder.position_at_end(saved_block) + if saved_pos and hasattr(builder, '_anchor'): + builder._anchor = saved_pos + + # Add to PHI + phi.add_incoming(val, block) + + # Store PHI result + vmap[dst_vid] = phi + +def defer_phi_wiring( + dst_vid: int, + incoming: List[Tuple[int, int]], + phi_deferrals: List[Tuple[int, List[Tuple[int, int]]]] +) -> None: + """ + Defer PHI wiring for sealed block approach + + Args: + dst_vid: Destination value ID + incoming: Incoming edges + phi_deferrals: List to store deferred PHIs + """ + phi_deferrals.append((dst_vid, incoming)) \ No newline at end of file diff --git a/src/llvm_py/instructions/ret.py b/src/llvm_py/instructions/ret.py index 466e45c5..1a360b0d 100644 --- a/src/llvm_py/instructions/ret.py +++ b/src/llvm_py/instructions/ret.py @@ -45,5 +45,13 @@ def lower_return( elif isinstance(return_type, ir.PointerType) and isinstance(ret_val.type, ir.IntType): # int to ptr ret_val = builder.inttoptr(ret_val, return_type) + elif isinstance(return_type, ir.IntType) and isinstance(ret_val.type, ir.IntType): + # int to int conversion + if return_type.width < ret_val.type.width: + # Truncate + ret_val = builder.trunc(ret_val, return_type) + elif return_type.width > ret_val.type.width: + # Zero extend + ret_val = builder.zext(ret_val, return_type) builder.ret(ret_val) \ No newline at end of file diff --git a/src/llvm_py/instructions/safepoint.py b/src/llvm_py/instructions/safepoint.py new file mode 100644 index 00000000..42b4a6d8 --- /dev/null +++ b/src/llvm_py/instructions/safepoint.py @@ -0,0 +1,107 @@ +""" +Safepoint instruction lowering +GC safepoints where runtime can safely collect garbage +""" + +import llvmlite.ir as ir +from typing import Dict, List, Optional + +def lower_safepoint( + builder: ir.IRBuilder, + module: ir.Module, + live_values: List[int], + vmap: Dict[int, ir.Value], + safepoint_id: Optional[int] = None +) -> None: + """ + Lower MIR Safepoint instruction + + Safepoints are places where GC can safely run. + Live values must be tracked for potential relocation. + + Args: + builder: Current LLVM IR builder + module: LLVM module + live_values: List of value IDs that are live across safepoint + vmap: Value map + safepoint_id: Optional safepoint identifier + """ + # Look up or declare safepoint function + safepoint_func = None + for f in module.functions: + if f.name == "ny_safepoint": + safepoint_func = f + break + + if not safepoint_func: + # Declare ny_safepoint(live_count: i64, live_values: i64*) -> void + i64 = ir.IntType(64) + void = ir.VoidType() + func_type = ir.FunctionType(void, [i64, i64.as_pointer()]) + safepoint_func = ir.Function(module, func_type, name="ny_safepoint") + + # Prepare live values array + i64 = ir.IntType(64) + if live_values: + # Allocate array for live values + array_size = len(live_values) + live_array = builder.alloca(i64, size=array_size, name="live_vals") + + # Store each live value + for i, vid in enumerate(live_values): + val = vmap.get(vid, ir.Constant(i64, 0)) + + # Ensure i64 (handles are i64) + if hasattr(val, 'type') and val.type.is_pointer: + val = builder.ptrtoint(val, i64) + + idx = ir.Constant(ir.IntType(32), i) + ptr = builder.gep(live_array, [idx]) + builder.store(val, ptr) + + # Call safepoint + count = ir.Constant(i64, array_size) + builder.call(safepoint_func, [count, live_array]) + + # After safepoint, reload values (they may have moved) + for i, vid in enumerate(live_values): + idx = ir.Constant(ir.IntType(32), i) + ptr = builder.gep(live_array, [idx]) + new_val = builder.load(ptr, name=f"reload_{vid}") + vmap[vid] = new_val + else: + # No live values + zero = ir.Constant(i64, 0) + null = ir.Constant(i64.as_pointer(), None) + builder.call(safepoint_func, [zero, null]) + +def insert_automatic_safepoint( + builder: ir.IRBuilder, + module: ir.Module, + location: str # "loop_header", "function_call", etc. +) -> None: + """ + Insert automatic safepoint at strategic locations + + Args: + builder: Current LLVM IR builder + module: LLVM module + location: Location type for debugging + """ + # Simple safepoint without tracking specific values + # Runtime will scan stack/registers + + check_func = None + for f in module.functions: + if f.name == "ny_check_safepoint": + check_func = f + break + + if not check_func: + # Declare ny_check_safepoint() -> void + void = ir.VoidType() + func_type = ir.FunctionType(void, []) + check_func = ir.Function(module, func_type, name="ny_check_safepoint") + + # Insert safepoint check + builder.call(check_func, [], name=f"safepoint_{location}") \ No newline at end of file diff --git a/src/llvm_py/instructions/typeop.py b/src/llvm_py/instructions/typeop.py new file mode 100644 index 00000000..1afb1907 --- /dev/null +++ b/src/llvm_py/instructions/typeop.py @@ -0,0 +1,125 @@ +""" +TypeOp instruction lowering +Handles type conversions and type checks +""" + +import llvmlite.ir as ir +from typing import Dict, Optional + +def lower_typeop( + builder: ir.IRBuilder, + op: str, + src_vid: int, + dst_vid: int, + target_type: Optional[str], + vmap: Dict[int, ir.Value], + resolver=None +) -> None: + """ + Lower MIR TypeOp instruction + + Operations: + - cast: Type conversion + - is: Type check + - as: Safe cast + + Args: + builder: Current LLVM IR builder + op: Operation type (cast, is, as) + src_vid: Source value ID + dst_vid: Destination value ID + target_type: Target type name (e.g., "StringBox", "IntegerBox") + vmap: Value map + resolver: Optional resolver for type handling + """ + src_val = vmap.get(src_vid, ir.Constant(ir.IntType(64), 0)) + + if op == "cast": + # Type casting - for now just pass through + # In real implementation, would check/convert box types + vmap[dst_vid] = src_val + + elif op == "is": + # Type check - returns boolean (i64: 1 or 0) + # For now, simplified implementation + if target_type == "IntegerBox": + # Check if it's a valid integer box handle + # Simplified: non-zero value + if hasattr(src_val, 'type') and src_val.type == ir.IntType(64): + zero = ir.Constant(ir.IntType(64), 0) + result = builder.icmp_unsigned('!=', src_val, zero) + # Convert i1 to i64 + result = builder.zext(result, ir.IntType(64)) + else: + result = ir.Constant(ir.IntType(64), 0) + else: + # For other types, would need runtime type info + result = ir.Constant(ir.IntType(64), 0) + + vmap[dst_vid] = result + + elif op == "as": + # Safe cast - returns value or null/0 + # For now, same as cast + vmap[dst_vid] = src_val + + else: + # Unknown operation + vmap[dst_vid] = ir.Constant(ir.IntType(64), 0) + +def lower_convert( + builder: ir.IRBuilder, + src_vid: int, + dst_vid: int, + from_type: str, + to_type: str, + vmap: Dict[int, ir.Value] +) -> None: + """ + Lower type conversion between primitive types + + Args: + builder: Current LLVM IR builder + src_vid: Source value ID + dst_vid: Destination value ID + from_type: Source type (i32, i64, f64, ptr) + to_type: Target type + vmap: Value map + """ + src_val = vmap.get(src_vid) + if not src_val: + # Default based on target type + if to_type == "f64": + vmap[dst_vid] = ir.Constant(ir.DoubleType(), 0.0) + elif to_type == "ptr": + i8 = ir.IntType(8) + vmap[dst_vid] = ir.Constant(i8.as_pointer(), None) + else: + vmap[dst_vid] = ir.Constant(ir.IntType(64), 0) + return + + # Perform conversion + if from_type == "i64" and to_type == "f64": + # int to float + result = builder.sitofp(src_val, ir.DoubleType()) + elif from_type == "f64" and to_type == "i64": + # float to int + result = builder.fptosi(src_val, ir.IntType(64)) + elif from_type == "i64" and to_type == "ptr": + # int to pointer + i8 = ir.IntType(8) + result = builder.inttoptr(src_val, i8.as_pointer()) + elif from_type == "ptr" and to_type == "i64": + # pointer to int + result = builder.ptrtoint(src_val, ir.IntType(64)) + elif from_type == "i32" and to_type == "i64": + # sign extend + result = builder.sext(src_val, ir.IntType(64)) + elif from_type == "i64" and to_type == "i32": + # truncate + result = builder.trunc(src_val, ir.IntType(32)) + else: + # Unknown conversion - pass through + result = src_val + + vmap[dst_vid] = result \ No newline at end of file diff --git a/src/llvm_py/llvm_builder.py b/src/llvm_py/llvm_builder.py index fca304cd..0f8a0cd4 100644 --- a/src/llvm_py/llvm_builder.py +++ b/src/llvm_py/llvm_builder.py @@ -6,10 +6,30 @@ Following the design principles in docs/LLVM_LAYER_OVERVIEW.md import json import sys -from typing import Dict, Any, Optional +import os +from typing import Dict, Any, Optional, List, Tuple import llvmlite.ir as ir import llvmlite.binding as llvm +# Import instruction handlers +from instructions.const import lower_const +from instructions.binop import lower_binop +from instructions.jump import lower_jump +from instructions.branch import lower_branch +from instructions.ret import lower_return +from instructions.phi import lower_phi, defer_phi_wiring +from instructions.call import lower_call +from instructions.boxcall import lower_boxcall +from instructions.externcall import lower_externcall +from instructions.typeop import lower_typeop, lower_convert +from instructions.newbox import lower_newbox +from instructions.safepoint import lower_safepoint, insert_automatic_safepoint +from instructions.barrier import lower_barrier +from instructions.loopform import lower_while_loopform + +from resolver import Resolver +from mir_reader import MIRReader + class NyashLLVMBuilder: """Main LLVM IR builder for Nyash MIR""" @@ -27,22 +47,196 @@ class NyashLLVMBuilder: self.i1 = ir.IntType(1) self.i8p = self.i8.as_pointer() self.f64 = ir.DoubleType() + self.void = ir.VoidType() + + # Value and block maps + self.vmap: Dict[int, ir.Value] = {} # value_id -> LLVM value + self.bb_map: Dict[int, ir.Block] = {} # block_id -> LLVM block + + # PHI deferrals for sealed block approach + self.phi_deferrals: List[Tuple[int, List[Tuple[int, int]]]] = [] + + # Resolver for unified value resolution + self.resolver = Resolver(self.vmap, self.bb_map) + + # Statistics + self.loop_count = 0 def build_from_mir(self, mir_json: Dict[str, Any]) -> str: """Build LLVM IR from MIR JSON""" - # TODO: Implement MIR -> LLVM lowering - # For now, create a simple ny_main that returns 0 + # Parse MIR + reader = MIRReader(mir_json) + functions = reader.get_functions() - # ny_main: extern "C" fn() -> i32 + if not functions: + # No functions - create dummy ny_main + return self._create_dummy_main() + + # Process each function + for func_data in functions: + self.lower_function(func_data) + + # Wire deferred PHIs + self._wire_deferred_phis() + + return str(self.module) + + def _create_dummy_main(self) -> str: + """Create dummy ny_main that returns 0""" ny_main_ty = ir.FunctionType(self.i32, []) ny_main = ir.Function(self.module, ny_main_ty, name="ny_main") - block = ny_main.append_basic_block(name="entry") builder = ir.IRBuilder(block) builder.ret(ir.Constant(self.i32, 0)) - return str(self.module) + def lower_function(self, func_data: Dict[str, Any]): + """Lower a single MIR function to LLVM IR""" + name = func_data.get("name", "unknown") + params = func_data.get("params", []) + blocks = func_data.get("blocks", []) + + # Determine function signature + if name == "ny_main": + # Special case: ny_main returns i32 + func_ty = ir.FunctionType(self.i32, []) + else: + # Default: i64(i64, ...) signature + param_types = [self.i64] * len(params) + func_ty = ir.FunctionType(self.i64, param_types) + + # Create function + func = ir.Function(self.module, func_ty, name=name) + + # Create all blocks first + for block_data in blocks: + bid = block_data.get("id", 0) + block_name = f"bb{bid}" + bb = func.append_basic_block(block_name) + self.bb_map[bid] = bb + + # Process each block + for block_data in blocks: + bid = block_data.get("id", 0) + bb = self.bb_map[bid] + self.lower_block(bb, block_data, func) + + def lower_block(self, bb: ir.Block, block_data: Dict[str, Any], func: ir.Function): + """Lower a single basic block""" + builder = ir.IRBuilder(bb) + instructions = block_data.get("instructions", []) + + # Process each instruction + for inst in instructions: + self.lower_instruction(builder, inst, func) + + def lower_instruction(self, builder: ir.IRBuilder, inst: Dict[str, Any], func: ir.Function): + """Dispatch instruction to appropriate handler""" + op = inst.get("op") + + if op == "const": + dst = inst.get("dst") + value = inst.get("value") + lower_const(builder, self.module, dst, value, self.vmap) + + elif op == "binop": + operation = inst.get("operation") + lhs = inst.get("lhs") + rhs = inst.get("rhs") + dst = inst.get("dst") + lower_binop(builder, self.resolver, operation, lhs, rhs, dst, + self.vmap, builder.block) + + elif op == "jump": + target = inst.get("target") + lower_jump(builder, target, self.bb_map) + + elif op == "branch": + cond = inst.get("cond") + then_bid = inst.get("then") + else_bid = inst.get("else") + lower_branch(builder, cond, then_bid, else_bid, self.vmap, self.bb_map) + + elif op == "ret": + value = inst.get("value") + lower_return(builder, value, self.vmap, func.return_value.type) + + elif op == "phi": + dst = inst.get("dst") + incoming = inst.get("incoming", []) + # Defer PHI wiring for now + defer_phi_wiring(dst, incoming, self.phi_deferrals) + + elif op == "call": + func_name = inst.get("func") + args = inst.get("args", []) + dst = inst.get("dst") + lower_call(builder, self.module, func_name, args, dst, self.vmap, self.resolver) + + elif op == "boxcall": + box_vid = inst.get("box") + method = inst.get("method") + args = inst.get("args", []) + dst = inst.get("dst") + lower_boxcall(builder, self.module, box_vid, method, args, dst, + self.vmap, self.resolver) + + elif op == "externcall": + func_name = inst.get("func") + args = inst.get("args", []) + dst = inst.get("dst") + lower_externcall(builder, self.module, func_name, args, dst, + self.vmap, self.resolver) + + elif op == "newbox": + box_type = inst.get("type") + args = inst.get("args", []) + dst = inst.get("dst") + lower_newbox(builder, self.module, box_type, args, dst, + self.vmap, self.resolver) + + elif op == "typeop": + operation = inst.get("operation") + src = inst.get("src") + dst = inst.get("dst") + target_type = inst.get("target_type") + lower_typeop(builder, operation, src, dst, target_type, + self.vmap, self.resolver) + + elif op == "safepoint": + live = inst.get("live", []) + lower_safepoint(builder, self.module, live, self.vmap) + + elif op == "barrier": + barrier_type = inst.get("type", "memory") + lower_barrier(builder, barrier_type) + + elif op == "while": + # Experimental LoopForm lowering + cond = inst.get("cond") + body = inst.get("body", []) + self.loop_count += 1 + if not lower_while_loopform(builder, func, cond, body, + self.loop_count, self.vmap, self.bb_map): + # Fallback to regular while + self._lower_while_regular(builder, inst, func) + else: + if os.environ.get('NYASH_CLI_VERBOSE') == '1': + print(f"[Python LLVM] Unknown instruction: {op}") + + def _lower_while_regular(self, builder: ir.IRBuilder, inst: Dict[str, Any], func: ir.Function): + """Fallback regular while lowering""" + # TODO: Implement regular while lowering + pass + + def _wire_deferred_phis(self): + """Wire all deferred PHI nodes""" + # TODO: Implement PHI wiring after all blocks are created + for dst_vid, incoming in self.phi_deferrals: + # Find the block containing this PHI + # Wire the incoming edges + pass + def compile_to_object(self, output_path: str): """Compile module to object file""" # Create target machine diff --git a/src/llvm_py/mir_reader.py b/src/llvm_py/mir_reader.py index c15c95ce..fa56a46d 100644 --- a/src/llvm_py/mir_reader.py +++ b/src/llvm_py/mir_reader.py @@ -4,7 +4,7 @@ Parses Nyash MIR JSON format into Python structures """ from dataclasses import dataclass -from typing import Dict, List, Any, Optional, Union +from typing import Dict, List, Any, Optional, Union, Tuple from enum import Enum class MirType(Enum): @@ -113,4 +113,30 @@ def parse_instruction(data: Dict[str, Any]) -> MirInstruction: if "args" in data: instr.args = data["args"] - return instr \ No newline at end of file + return instr + +class MIRReader: + """MIR JSON reader wrapper""" + def __init__(self, mir_json: Dict[str, Any]): + self.mir_json = mir_json + self.functions = None + + def get_functions(self) -> List[Dict[str, Any]]: + """Get functions in the expected format for llvm_builder""" + if self.functions is not None: + return self.functions + + # Convert from the existing JSON format to what llvm_builder expects + self.functions = [] + + funcs = self.mir_json.get("functions", []) + if isinstance(funcs, list): + # Already in list format + self.functions = funcs + elif isinstance(funcs, dict): + # Convert dict format to list + for name, func_data in funcs.items(): + func_data["name"] = name + self.functions.append(func_data) + + return self.functions \ No newline at end of file diff --git a/src/llvm_py/test_boxcall.json b/src/llvm_py/test_boxcall.json new file mode 100644 index 00000000..13f54338 --- /dev/null +++ b/src/llvm_py/test_boxcall.json @@ -0,0 +1,61 @@ +{ + "functions": [ + { + "name": "ny_main", + "params": [], + "blocks": [ + { + "id": 0, + "instructions": [ + { + "op": "newbox", + "type": "StringBox", + "args": [], + "dst": 1 + }, + { + "op": "const", + "dst": 2, + "value": { + "type": "string", + "value": "Hello Nyash!" + } + }, + { + "op": "boxcall", + "box": 1, + "method": "set", + "args": [2], + "dst": 3 + }, + { + "op": "boxcall", + "box": 1, + "method": "get", + "args": [], + "dst": 4 + }, + { + "op": "externcall", + "func": "print", + "args": [4], + "dst": null + }, + { + "op": "const", + "dst": 5, + "value": { + "type": "i64", + "value": 0 + } + }, + { + "op": "ret", + "value": 5 + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/src/llvm_py/test_minimal.json b/src/llvm_py/test_minimal.json new file mode 100644 index 00000000..844397c8 --- /dev/null +++ b/src/llvm_py/test_minimal.json @@ -0,0 +1,27 @@ +{ + "functions": [ + { + "name": "ny_main", + "params": [], + "blocks": [ + { + "id": 0, + "instructions": [ + { + "op": "const", + "dst": 1, + "value": { + "type": "i64", + "value": 42 + } + }, + { + "op": "ret", + "value": 1 + } + ] + } + ] + } + ] +} \ No newline at end of file