Files
hakorune/src/llvm_py/builders/function_lower.py
nyash-codex f6c5dc9e43 Phase 22.x WIP: LLVM backend improvements + MIR builder enhancements
LLVM backend improvements:
- Add native LLVM backend support (NYASH_LLVM_BACKEND=native)
- Add crate backend selector with priority (crate > llvmlite)
- Add native_llvm_builder.py for native IR generation
- Add NYASH_LLVM_NATIVE_TRACE=1 for IR dump

MIR builder enhancements:
- Refactor lower_if_compare_* boxes for better code generation
- Refactor lower_return_* boxes for optimized returns
- Refactor lower_loop_* boxes for loop handling
- Refactor lower_method_* boxes for method calls
- Update pattern_util_box for better pattern matching

Smoke tests:
- Add phase2100 S3 backend selector tests (17 new tests)
- Add phase2120 native backend tests (4 new tests)
- Add phase2034 MIR builder internal tests (2 new tests)
- Add phase2211 TLV shim parity test

Documentation:
- Update ENV_VARS.md with LLVM backend variables
- Update CURRENT_TASK.md with progress
- Update README.md and CHANGELOG.md

Config:
- Add NYASH_LLVM_BACKEND env support in src/config/env.rs
- Update ny_mir_builder.sh for backend selection
- Update dispatch.rs for backend routing

Tools:
- Add tools/native_llvm_builder.py
- Update smokes/v2/profiles/quick/core/phase2100/run_all.sh

Known: Many Hako builder internal files modified for optimization
2025-11-09 23:40:36 +09:00

329 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from typing import Dict, Any, List
from llvmlite import ir
from trace import debug as trace_debug
from prepass.if_merge import plan_ret_phi_predeclare
from prepass.loops import detect_simple_while
from phi_wiring import (
setup_phi_placeholders as _setup_phi_placeholders,
finalize_phis as _finalize_phis,
build_succs as _build_succs,
)
def lower_function(builder, func_data: Dict[str, Any]):
"""Lower a single MIR function to LLVM IR using the given builder context.
This is a faithful extraction of NyashLLVMBuilder.lower_function.
"""
import os, re
name = func_data.get("name", "unknown")
builder.current_function_name = name
params = func_data.get("params", [])
blocks = func_data.get("blocks", [])
# Determine function signature
if name == "ny_main":
# Special case: ny_main returns i64 to match runtime (nyrt) expectations
func_ty = ir.FunctionType(builder.i64, [])
else:
# Default: i64(i64, ...) signature; derive arity from '/N' suffix when params missing
m = re.search(r"/(\d+)$", name)
arity = int(m.group(1)) if m else len(params)
param_types = [builder.i64] * arity
func_ty = ir.FunctionType(builder.i64, param_types)
# Reset per-function maps and resolver caches to avoid cross-function collisions
try:
builder.vmap.clear()
except Exception:
builder.vmap = {}
try:
builder.bb_map.clear()
except Exception:
builder.bb_map = {}
try:
# Reset resolver caches keyed by block names
builder.resolver.i64_cache.clear()
builder.resolver.ptr_cache.clear()
builder.resolver.f64_cache.clear()
if hasattr(builder.resolver, '_end_i64_cache'):
builder.resolver._end_i64_cache.clear()
if hasattr(builder.resolver, 'string_ids'):
builder.resolver.string_ids.clear()
if hasattr(builder.resolver, 'string_literals'):
builder.resolver.string_literals.clear()
if hasattr(builder.resolver, 'string_ptrs'):
builder.resolver.string_ptrs.clear()
except Exception:
pass
# Create or reuse function
func = None
for f in builder.module.functions:
if f.name == name:
func = f
break
if func is None:
func = ir.Function(builder.module, func_ty, name=name)
# Map parameters to vmap (value_id: 0..arity-1)
try:
arity = len(func.args)
for i in range(arity):
builder.vmap[i] = func.args[i]
except Exception:
pass
# Build predecessor map from control-flow edges
builder.preds = {}
for block_data in blocks:
bid = block_data.get("id", 0)
builder.preds.setdefault(bid, [])
for block_data in blocks:
src = block_data.get("id", 0)
for inst in block_data.get("instructions", []):
op = inst.get("op")
if op == "jump":
t = inst.get("target")
if t is not None:
builder.preds.setdefault(t, []).append(src)
elif op == "branch":
th = inst.get("then")
el = inst.get("else")
if th is not None:
builder.preds.setdefault(th, []).append(src)
if el is not None:
builder.preds.setdefault(el, []).append(src)
# Create all blocks first
for block_data in blocks:
bid = block_data.get("id", 0)
block_name = f"bb{bid}"
bb = func.append_basic_block(block_name)
builder.bb_map[bid] = bb
# Build quick lookup for blocks by id
block_by_id: Dict[int, Dict[str, Any]] = {}
for block_data in blocks:
block_by_id[block_data.get("id", 0)] = block_data
# Determine entry block: first with no predecessors; fallback to first block
entry_bid = None
for bid, preds in builder.preds.items():
if len(preds) == 0:
entry_bid = bid
break
if entry_bid is None and blocks:
entry_bid = blocks[0].get("id", 0)
# Compute approx preds-first order
visited = set()
order: List[int] = []
def visit(bid: int):
if bid in visited:
return
visited.add(bid)
for p in builder.preds.get(bid, []):
visit(p)
order.append(bid)
if entry_bid is not None:
visit(entry_bid)
for bid in block_by_id.keys():
if bid not in visited:
visit(bid)
# Prepass: collect PHI metadata and placeholders
_setup_phi_placeholders(builder, blocks)
# Optional: if-merge prepass (gate NYASH_LLVM_PREPASS_IFMERGE)
try:
if os.environ.get('NYASH_LLVM_PREPASS_IFMERGE') == '1':
plan = plan_ret_phi_predeclare(block_by_id)
if plan:
if not hasattr(builder, 'block_phi_incomings') or builder.block_phi_incomings is None:
builder.block_phi_incomings = {}
for bbid, ret_vid in plan.items():
try:
preds_raw = [p for p in builder.preds.get(bbid, []) if p != bbid]
except Exception:
preds_raw = []
seen = set(); preds_list = []
for p in preds_raw:
if p not in seen:
preds_list.append(p); seen.add(p)
try:
builder.block_phi_incomings.setdefault(int(bbid), {})[int(ret_vid)] = [
(int(p), int(ret_vid)) for p in preds_list
]
except Exception:
pass
try:
trace_debug(f"[prepass] if-merge: plan metadata at bb{bbid} for v{ret_vid} preds={preds_list}")
except Exception:
pass
except Exception:
pass
# Predeclare PHIs for used-in-block values defined in predecessors (multi-pred only)
try:
from cfg.utils import build_preds_succs
local_preds, _ = build_preds_succs(block_by_id)
def _collect_defs(block):
defs = set()
for ins in block.get('instructions') or []:
try:
dstv = ins.get('dst')
if isinstance(dstv, int):
defs.add(int(dstv))
except Exception:
pass
return defs
def _collect_uses(block):
uses = set()
for ins in block.get('instructions') or []:
for k in ('lhs','rhs','value','cond','box_val'):
try:
v = ins.get(k)
if isinstance(v, int):
uses.add(int(v))
except Exception:
pass
return uses
if not hasattr(builder, 'block_phi_incomings') or builder.block_phi_incomings is None:
builder.block_phi_incomings = {}
for bid, blk in block_by_id.items():
try:
preds_raw = [p for p in local_preds.get(int(bid), []) if p != int(bid)]
except Exception:
preds_raw = []
seen = set(); preds_list = []
for p in preds_raw:
if p not in seen:
preds_list.append(p); seen.add(p)
if len(preds_list) <= 1:
continue
defs = _collect_defs(blk)
uses = _collect_uses(blk)
need = [u for u in uses if u not in defs]
if not need:
continue
for vid in need:
try:
builder.block_phi_incomings.setdefault(int(bid), {}).setdefault(int(vid), [])
builder.block_phi_incomings[int(bid)][int(vid)] = [(int(p), int(vid)) for p in preds_list]
except Exception:
pass
try:
builder.resolver.block_phi_incomings = builder.block_phi_incomings
except Exception:
pass
except Exception:
pass
# Optional: simple loop prepass
loop_plan = None
try:
if os.environ.get('NYASH_LLVM_PREPASS_LOOP') == '1':
loop_plan = detect_simple_while(block_by_id)
if loop_plan is not None:
trace_debug(f"[prepass] detect loop header=bb{loop_plan['header']} then=bb{loop_plan['then']} latch=bb{loop_plan['latch']} exit=bb{loop_plan['exit']}")
except Exception:
loop_plan = None
from builders.block_lower import lower_blocks as _lower_blocks
_lower_blocks(builder, func, block_by_id, order, loop_plan)
# Optional: capture lowering ctx for downstream helpers
try:
builder.ctx = dict(
module=builder.module,
i64=builder.i64,
i32=builder.i32,
i8=builder.i8,
i1=builder.i1,
i8p=builder.i8p,
vmap=builder.vmap,
bb_map=builder.bb_map,
preds=builder.preds,
block_end_values=builder.block_end_values,
resolver=builder.resolver,
trace_phi=os.environ.get('NYASH_LLVM_TRACE_PHI') == '1',
verbose=os.environ.get('NYASH_CLI_VERBOSE') == '1',
)
builder.resolver.ctx = builder.ctx
except Exception:
pass
# Finalize PHIs for this function
_finalize_phis(builder)
# Safety pass: ensure every basic block ends with a terminator.
# This avoids llvmlite IR parse errors like "expected instruction opcode" on empty blocks.
try:
_enforce_terminators(builder, func, block_by_id)
except Exception:
# Non-fatal in bring-up; better to emit IR than crash
pass
def _enforce_terminators(builder, func: ir.Function, block_by_id: Dict[int, Dict[str, Any]]):
import re
succs = _build_succs(getattr(builder, 'preds', {}) or {})
for bb in func.blocks:
try:
if bb.terminator is not None:
continue
except Exception:
# If property access fails, try to add a branch/ret anyway
pass
# Parse block id from name like "bb123"
bid = None
try:
m = re.match(r"bb(\d+)$", str(bb.name))
bid = int(m.group(1)) if m else None
except Exception:
bid = None
# Choose a reasonable successor if any
target_bb = None
if bid is not None:
for s in (succs.get(int(bid), []) or []):
try:
cand = builder.bb_map.get(int(s))
except Exception:
cand = None
if cand is not None and cand is not bb:
target_bb = cand
break
ib = ir.IRBuilder(bb)
if target_bb is not None:
try:
ib.position_at_end(bb)
except Exception:
pass
ib.branch(target_bb)
try:
trace_debug(f"[llvm-py] enforce_terminators: br from {bb.name} -> {target_bb.name}")
except Exception:
pass
continue
# Fallback: insert a return of 0 matching function return type (i32 for ny_main, else i64)
try:
rty = func.function_type.return_type
if str(rty) == str(builder.i32):
ib.ret(ir.Constant(builder.i32, 0))
elif str(rty) == str(builder.i64):
ib.ret(ir.Constant(builder.i64, 0))
else:
# Unknown/void synthesize a dummy br to self to keep parser happy (unreachable in practice)
ib.branch(bb)
try:
trace_debug(f"[llvm-py] enforce_terminators: ret/br injected in {bb.name}")
except Exception:
pass
except Exception:
# Last resort: do nothing
pass