llvm: unify lowering via Resolver and Cursor; remove non-sealed PHI wiring; apply Resolver to extern/call/boxcall/arrays/maps/mem; add llvmlite harness docs; add LLVM layer overview; add LoopForm preheader

This commit is contained in:
Selfhosting Dev
2025-09-12 20:40:48 +09:00
parent d5af6b1d48
commit 38aea59fc1
20 changed files with 986 additions and 79 deletions

View File

@ -0,0 +1,32 @@
"""
MIR14 instruction lowering modules
Each instruction has its own file, following Rust structure
"""
# Import all instruction handlers
from .const import lower_const
from .binop import lower_binop
from .compare import lower_compare
from .jump import lower_jump
from .branch import lower_branch
from .ret import lower_return
from .phi import lower_phi
from .call import lower_call
from .boxcall import lower_boxcall
from .externcall import lower_externcall
from .typeop import lower_typeop
from .safepoint import lower_safepoint
from .barrier import lower_barrier
from .newbox import lower_newbox
# LoopForm support
from .loopform import LoopFormContext, lower_while_loopform
__all__ = [
'lower_const', 'lower_binop', 'lower_compare',
'lower_jump', 'lower_branch', 'lower_return',
'lower_phi', 'lower_call', 'lower_boxcall',
'lower_externcall', 'lower_typeop', 'lower_safepoint',
'lower_barrier', 'lower_newbox',
'LoopFormContext', 'lower_while_loopform'
]

View File

@ -0,0 +1,76 @@
"""
BinOp (Binary Operation) instruction lowering
Handles +, -, *, /, %, &, |, ^, <<, >>
"""
import llvmlite.ir as ir
from typing import Dict
def lower_binop(
builder: ir.IRBuilder,
resolver, # Resolver instance
op: str,
lhs: int,
rhs: int,
dst: int,
vmap: Dict[int, ir.Value],
current_block: ir.Block
) -> None:
"""
Lower MIR BinOp instruction
Args:
builder: Current LLVM IR builder
resolver: Resolver for value resolution
op: Operation string (+, -, *, /, etc.)
lhs: Left operand value ID
rhs: Right operand value ID
dst: Destination value ID
vmap: Value map
current_block: Current basic block
"""
# Resolve operands as i64 (using resolver when available)
# For now, simple vmap lookup
lhs_val = vmap.get(lhs, ir.Constant(ir.IntType(64), 0))
rhs_val = vmap.get(rhs, ir.Constant(ir.IntType(64), 0))
# Ensure both are i64
i64 = ir.IntType(64)
if hasattr(lhs_val, 'type') and lhs_val.type != i64:
# Type conversion if needed
if lhs_val.type.is_pointer:
lhs_val = builder.ptrtoint(lhs_val, i64)
if hasattr(rhs_val, 'type') and rhs_val.type != i64:
if rhs_val.type.is_pointer:
rhs_val = builder.ptrtoint(rhs_val, i64)
# Perform operation
if op == '+':
result = builder.add(lhs_val, rhs_val, name=f"add_{dst}")
elif op == '-':
result = builder.sub(lhs_val, rhs_val, name=f"sub_{dst}")
elif op == '*':
result = builder.mul(lhs_val, rhs_val, name=f"mul_{dst}")
elif op == '/':
# Signed division
result = builder.sdiv(lhs_val, rhs_val, name=f"div_{dst}")
elif op == '%':
# Signed remainder
result = builder.srem(lhs_val, rhs_val, name=f"rem_{dst}")
elif op == '&':
result = builder.and_(lhs_val, rhs_val, name=f"and_{dst}")
elif op == '|':
result = builder.or_(lhs_val, rhs_val, name=f"or_{dst}")
elif op == '^':
result = builder.xor(lhs_val, rhs_val, name=f"xor_{dst}")
elif op == '<<':
result = builder.shl(lhs_val, rhs_val, name=f"shl_{dst}")
elif op == '>>':
# Arithmetic shift right
result = builder.ashr(lhs_val, rhs_val, name=f"ashr_{dst}")
else:
# Unknown op - return zero
result = ir.Constant(i64, 0)
# Store result
vmap[dst] = result

View File

@ -0,0 +1,50 @@
"""
Branch instruction lowering
Conditional branch based on condition value
"""
import llvmlite.ir as ir
from typing import Dict
def lower_branch(
builder: ir.IRBuilder,
cond_vid: int,
then_bid: int,
else_bid: int,
vmap: Dict[int, ir.Value],
bb_map: Dict[int, ir.Block]
) -> None:
"""
Lower MIR Branch instruction
Args:
builder: Current LLVM IR builder
cond_vid: Condition value ID
then_bid: Then block ID
else_bid: Else block ID
vmap: Value map
bb_map: Block map
"""
# Get condition value
cond = vmap.get(cond_vid)
if not cond:
# Default to false if missing
cond = ir.Constant(ir.IntType(1), 0)
# Convert to i1 if needed
if hasattr(cond, 'type'):
if cond.type == ir.IntType(64):
# i64 to i1: compare != 0
zero = ir.Constant(ir.IntType(64), 0)
cond = builder.icmp_unsigned('!=', cond, zero, name="cond_i1")
elif cond.type == ir.IntType(8).as_pointer():
# Pointer to i1: compare != null
null = ir.Constant(cond.type, None)
cond = builder.icmp_unsigned('!=', cond, null, name="cond_p1")
# Get target blocks
then_bb = bb_map.get(then_bid)
else_bb = bb_map.get(else_bid)
if then_bb and else_bb:
builder.cbranch(cond, then_bb, else_bb)

View File

@ -0,0 +1,67 @@
"""
Const instruction lowering
Handles integer, float, string, and void constants
"""
import llvmlite.ir as ir
from typing import Dict, Any
def lower_const(
builder: ir.IRBuilder,
module: ir.Module,
dst: int,
value: Dict[str, Any],
vmap: Dict[int, ir.Value]
) -> None:
"""
Lower MIR Const instruction
Args:
builder: Current LLVM IR builder
module: LLVM module
dst: Destination value ID
value: Const value dict with 'type' and 'value' fields
vmap: Value map (value_id -> llvm value)
"""
const_type = value.get('type', 'void')
const_val = value.get('value')
if const_type == 'i64':
# Integer constant
i64 = ir.IntType(64)
llvm_val = ir.Constant(i64, int(const_val))
vmap[dst] = llvm_val
elif const_type == 'f64':
# Float constant
f64 = ir.DoubleType()
llvm_val = ir.Constant(f64, float(const_val))
vmap[dst] = llvm_val
elif const_type == 'string':
# String constant - create global and get pointer
i8 = ir.IntType(8)
str_val = str(const_val)
str_const = ir.Constant.literal_string(str_val.encode('utf-8') + b'\0')
# Create global string constant
global_name = f".str.{dst}"
global_str = ir.GlobalVariable(module, str_const.type, name=global_name)
global_str.initializer = str_const
global_str.linkage = 'private'
global_str.global_constant = True
# Get pointer to first element
indices = [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)]
ptr = builder.gep(global_str, indices, name=f"str_ptr_{dst}")
vmap[dst] = ptr
elif const_type == 'void':
# Void/null constant - use i64 zero
i64 = ir.IntType(64)
vmap[dst] = ir.Constant(i64, 0)
else:
# Unknown type - default to i64 zero
i64 = ir.IntType(64)
vmap[dst] = ir.Constant(i64, 0)

View File

@ -0,0 +1,24 @@
"""
Jump instruction lowering
Unconditional branch to target block
"""
import llvmlite.ir as ir
from typing import Dict
def lower_jump(
builder: ir.IRBuilder,
target_bid: int,
bb_map: Dict[int, ir.Block]
) -> None:
"""
Lower MIR Jump instruction
Args:
builder: Current LLVM IR builder
target_bid: Target block ID
bb_map: Map from block ID to LLVM block
"""
target_bb = bb_map.get(target_bid)
if target_bb:
builder.branch(target_bb)

View File

@ -0,0 +1,121 @@
"""
LoopForm IR implementation
Experimental loop normalization following paper-e-loop-signal-ir
"""
import os
import llvmlite.ir as ir
from dataclasses import dataclass
from typing import Dict, Tuple, List, Optional
@dataclass
class LoopFormContext:
"""
LoopForm fixed block structure
preheader → header → body → dispatch → latch/exit
"""
preheader: ir.Block
header: ir.Block
body: ir.Block
dispatch: ir.Block
latch: ir.Block
exit: ir.Block
loop_id: int
# PHI nodes in dispatch block
tag_phi: Optional[ir.PhiInstr] = None
payload_phi: Optional[ir.PhiInstr] = None
def create_loopform_blocks(
func: ir.Function,
loop_id: int,
prefix: str = "main"
) -> LoopFormContext:
"""Create the 6-block LoopForm structure"""
ctx = LoopFormContext(
preheader=func.append_basic_block(f"{prefix}_lf{loop_id}_preheader"),
header=func.append_basic_block(f"{prefix}_lf{loop_id}_header"),
body=func.append_basic_block(f"{prefix}_lf{loop_id}_body"),
dispatch=func.append_basic_block(f"{prefix}_lf{loop_id}_dispatch"),
latch=func.append_basic_block(f"{prefix}_lf{loop_id}_latch"),
exit=func.append_basic_block(f"{prefix}_lf{loop_id}_exit"),
loop_id=loop_id
)
return ctx
def lower_while_loopform(
builder: ir.IRBuilder,
func: ir.Function,
condition_vid: int,
body_instructions: List[Any],
loop_id: int,
vmap: Dict[int, ir.Value],
bb_map: Dict[int, ir.Block]
) -> bool:
"""
Lower a while loop using LoopForm structure
Returns:
True if LoopForm was applied, False otherwise
"""
# Check if enabled
if os.environ.get('NYASH_ENABLE_LOOPFORM') != '1':
return False
# Create LoopForm blocks
lf = create_loopform_blocks(func, loop_id)
# Preheader: Jump to header
builder.position_at_end(lf.preheader)
builder.branch(lf.header)
# Header: Evaluate condition
builder.position_at_end(lf.header)
cond = vmap.get(condition_vid, ir.Constant(ir.IntType(1), 0))
# Convert to i1 if needed
if hasattr(cond, 'type') and cond.type == ir.IntType(64):
cond = builder.icmp_unsigned('!=', cond, ir.Constant(ir.IntType(64), 0))
builder.cbranch(cond, lf.body, lf.dispatch)
# Body: Pass through to dispatch (Phase 1)
builder.position_at_end(lf.body)
builder.branch(lf.dispatch)
# Dispatch: Central PHI point
builder.position_at_end(lf.dispatch)
i8 = ir.IntType(8)
i64 = ir.IntType(64)
# Create PHI nodes
tag_phi = builder.phi(i8, name=f"lf{loop_id}_tag")
payload_phi = builder.phi(i64, name=f"lf{loop_id}_payload")
# Add incoming values
# From header (condition false): Break signal
tag_phi.add_incoming(ir.Constant(i8, 1), lf.header) # Break = 1
payload_phi.add_incoming(ir.Constant(i64, 0), lf.header)
# Switch on tag
tag_val = tag_phi
switch = builder.switch(tag_val, lf.exit)
switch.add_case(ir.Constant(i8, 0), lf.latch) # Next = 0
# Latch: Back to header (if enabled)
builder.position_at_end(lf.latch)
if os.environ.get('NYASH_LOOPFORM_LATCH2HEADER') == '1':
builder.branch(lf.header)
else:
builder.unreachable()
# Exit: Continue after loop
builder.position_at_end(lf.exit)
# Builder position will be set by caller
# Store context
lf.tag_phi = tag_phi
lf.payload_phi = payload_phi
if os.environ.get('NYASH_CLI_VERBOSE') == '1':
print(f"[LoopForm] Created loop structure (id={loop_id})")
return True

View File

@ -0,0 +1,49 @@
"""
Return instruction lowering
Handles void and value returns
"""
import llvmlite.ir as ir
from typing import Dict, Optional
def lower_return(
builder: ir.IRBuilder,
value_id: Optional[int],
vmap: Dict[int, ir.Value],
return_type: ir.Type
) -> None:
"""
Lower MIR Return instruction
Args:
builder: Current LLVM IR builder
value_id: Optional return value ID
vmap: Value map
return_type: Expected return type
"""
if value_id is None:
# Void return
builder.ret_void()
else:
# Get return value
ret_val = vmap.get(value_id)
if not ret_val:
# Default based on return type
if isinstance(return_type, ir.IntType):
ret_val = ir.Constant(return_type, 0)
elif isinstance(return_type, ir.DoubleType):
ret_val = ir.Constant(return_type, 0.0)
else:
# Pointer type - null
ret_val = ir.Constant(return_type, None)
# Type adjustment if needed
if hasattr(ret_val, 'type') and ret_val.type != return_type:
if isinstance(return_type, ir.IntType) and ret_val.type.is_pointer:
# ptr to int
ret_val = builder.ptrtoint(ret_val, return_type)
elif isinstance(return_type, ir.PointerType) and isinstance(ret_val.type, ir.IntType):
# int to ptr
ret_val = builder.inttoptr(ret_val, return_type)
builder.ret(ret_val)