Phase 22.x WIP: LLVM backend improvements + MIR builder enhancements
LLVM backend improvements: - Add native LLVM backend support (NYASH_LLVM_BACKEND=native) - Add crate backend selector with priority (crate > llvmlite) - Add native_llvm_builder.py for native IR generation - Add NYASH_LLVM_NATIVE_TRACE=1 for IR dump MIR builder enhancements: - Refactor lower_if_compare_* boxes for better code generation - Refactor lower_return_* boxes for optimized returns - Refactor lower_loop_* boxes for loop handling - Refactor lower_method_* boxes for method calls - Update pattern_util_box for better pattern matching Smoke tests: - Add phase2100 S3 backend selector tests (17 new tests) - Add phase2120 native backend tests (4 new tests) - Add phase2034 MIR builder internal tests (2 new tests) - Add phase2211 TLV shim parity test Documentation: - Update ENV_VARS.md with LLVM backend variables - Update CURRENT_TASK.md with progress - Update README.md and CHANGELOG.md Config: - Add NYASH_LLVM_BACKEND env support in src/config/env.rs - Update ny_mir_builder.sh for backend selection - Update dispatch.rs for backend routing Tools: - Add tools/native_llvm_builder.py - Update smokes/v2/profiles/quick/core/phase2100/run_all.sh Known: Many Hako builder internal files modified for optimization
This commit is contained in:
241
tools/native_llvm_builder.py
Normal file
241
tools/native_llvm_builder.py
Normal file
@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Native LLVM Builder (bootstrap)
|
||||
|
||||
Goal: minimal Python-only emitter that generates LLVM IR text from a tiny
|
||||
subset of Nyash MIR JSON and compiles it to an object via `llc`.
|
||||
|
||||
Supported (MVP):
|
||||
- schema_version v1 or tolerant shapes
|
||||
- Single function: ny_main(): i64
|
||||
- Instructions: const(i64), binop(add/sub/mul/div/mod/&/|/^/<< >>), compare(==)
|
||||
- ret(value)
|
||||
|
||||
Usage:
|
||||
python3 tools/native_llvm_builder.py --in in.json --emit obj --out out.o
|
||||
|
||||
Notes:
|
||||
- No external Python packages required. Assumes `llc` is in PATH.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _normalize_canary(v: dict) -> dict:
|
||||
# Coerce schema_version
|
||||
sv = v.get("schema_version")
|
||||
if isinstance(sv, int) and sv == 1:
|
||||
v["schema_version"] = "1.0"
|
||||
if isinstance(sv, str) and sv == "1":
|
||||
v["schema_version"] = "1.0"
|
||||
# Normalize blocks.inst -> instructions
|
||||
funs = v.get("functions")
|
||||
if isinstance(funs, list):
|
||||
for f in funs:
|
||||
blks = f.get("blocks")
|
||||
if isinstance(blks, list):
|
||||
for b in blks:
|
||||
if "inst" in b and "instructions" not in b:
|
||||
b["instructions"] = b.pop("inst")
|
||||
ins = b.get("instructions")
|
||||
if isinstance(ins, list):
|
||||
for insn in ins:
|
||||
if insn.get("op") == "const":
|
||||
if "value" in insn and isinstance(insn["value"], dict) and "type" in insn["value"]:
|
||||
pass
|
||||
else:
|
||||
ty = insn.pop("ty", "i64")
|
||||
val = insn.pop("value", 0)
|
||||
insn["value"] = {"type": ty, "value": val}
|
||||
return v
|
||||
|
||||
|
||||
def build_ir(ny_json: dict) -> str:
|
||||
ny = _normalize_canary(ny_json)
|
||||
funs = ny.get("functions", [])
|
||||
fn = None
|
||||
for f in funs:
|
||||
if f.get("name") == "ny_main":
|
||||
fn = f
|
||||
break
|
||||
if fn is None:
|
||||
raise ValueError("ny_main not found")
|
||||
blocks = fn.get("blocks", [])
|
||||
if not blocks:
|
||||
# trivial
|
||||
return (
|
||||
"; ModuleID = \"nyash_native\"\n"
|
||||
"define i64 @ny_main(){\n ret i64 0\n}\n"
|
||||
)
|
||||
|
||||
# IR pieces
|
||||
lines = []
|
||||
# Keep IR minimal; let llc choose target triple/datalayout
|
||||
lines.append("; ModuleID = \"nyash_native\"")
|
||||
lines.append("")
|
||||
lines.append("define i64 @ny_main(){")
|
||||
|
||||
# Simple vmap; const map holds immediate ints; ssa map holds emitted names
|
||||
const_map = {}
|
||||
ssa_map = {}
|
||||
is_i1 = set()
|
||||
|
||||
def val_of(vid):
|
||||
if vid in ssa_map:
|
||||
return f"%{ssa_map[vid]}", (vid in is_i1)
|
||||
if vid in const_map:
|
||||
return f"i64 {const_map[vid]}", False
|
||||
# default zero
|
||||
return "i64 0", False
|
||||
|
||||
tmp_idx = 0
|
||||
def fresh(name):
|
||||
nonlocal tmp_idx
|
||||
tmp_idx += 1
|
||||
return f"{name}_{tmp_idx}"
|
||||
|
||||
# Emit each block with an explicit label: bb<id>:
|
||||
for b in blocks:
|
||||
bid = b.get("id")
|
||||
lines.append(f"bb{bid}:")
|
||||
ins = b.get("instructions", [])
|
||||
for insn in ins:
|
||||
op = insn.get("op")
|
||||
if op == "const":
|
||||
dst = insn.get("dst")
|
||||
v = insn.get("value", {})
|
||||
ty = v.get("type", insn.get("ty", "i64"))
|
||||
val = v.get("value", 0)
|
||||
if ty != "i64":
|
||||
val = 0
|
||||
const_map[dst] = int(val)
|
||||
elif op == "binop":
|
||||
dst = insn.get("dst")
|
||||
opx = (insn.get("operation") or '').lower()
|
||||
aliases = {
|
||||
'add': '+', 'plus': '+', 'sub': '-', 'minus': '-', 'mul': '*', 'times': '*',
|
||||
'div': '/', 'mod': '%', 'rem': '%', 'band': '&', 'bitand': '&', 'bor': '|', 'bitor': '|',
|
||||
'bxor': '^', 'xor': '^', 'shl': '<<', 'shr': '>>', 'ashr': '>>'
|
||||
}
|
||||
sym = aliases.get(opx, opx)
|
||||
lhs = insn.get("lhs"); rhs = insn.get("rhs")
|
||||
lv, _ = val_of(lhs); rv, _ = val_of(rhs)
|
||||
name = fresh("bin")
|
||||
if sym == '+':
|
||||
lines.append(f" %{name} = add i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '-':
|
||||
lines.append(f" %{name} = sub i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '*':
|
||||
lines.append(f" %{name} = mul i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '/':
|
||||
lines.append(f" %{name} = sdiv i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '%':
|
||||
lines.append(f" %{name} = srem i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '&':
|
||||
lines.append(f" %{name} = and i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '|':
|
||||
lines.append(f" %{name} = or i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '^':
|
||||
lines.append(f" %{name} = xor i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '<<':
|
||||
lines.append(f" %{name} = shl i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
elif sym == '>>':
|
||||
lines.append(f" %{name} = ashr i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
else:
|
||||
lines.append(f" %{name} = add i64 0, 0")
|
||||
ssa_map[dst] = name
|
||||
elif op == "compare":
|
||||
dst = insn.get("dst")
|
||||
opx = insn.get("operation") or insn.get("cmp") or '=='
|
||||
lhs = insn.get("lhs"); rhs = insn.get("rhs")
|
||||
lv, _ = val_of(lhs); rv, _ = val_of(rhs)
|
||||
name = fresh("cmp")
|
||||
# Support eq/lt minimal
|
||||
if opx in ('==', 'Eq'): pred = 'eq'
|
||||
elif opx in ('<', 'Lt'): pred = 'slt'
|
||||
else: pred = 'ne'
|
||||
lines.append(f" %{name} = icmp {pred} i64 {lv.split()[-1]}, {rv.split()[-1]}")
|
||||
ssa_map[dst] = name
|
||||
is_i1.add(dst)
|
||||
elif op == "branch":
|
||||
# Conditional branch: {cond, then, else}
|
||||
cond = insn.get("cond")
|
||||
then_id = insn.get("then")
|
||||
else_id = insn.get("else")
|
||||
cv, ci1 = val_of(cond)
|
||||
if ci1:
|
||||
cond_name = cv if cv.startswith('%') else f"%{cv}"
|
||||
lines.append(f" br i1 {cond_name}, label %bb{then_id}, label %bb{else_id}")
|
||||
else:
|
||||
# Build i1 from i64 via icmp ne 0
|
||||
name = fresh("cnd")
|
||||
lines.append(f" %{name} = icmp ne i64 {cv.split()[-1]}, 0")
|
||||
lines.append(f" br i1 %{name}, label %bb{then_id}, label %bb{else_id}")
|
||||
elif op == "jump":
|
||||
target = insn.get("target")
|
||||
lines.append(f" br label %bb{target}")
|
||||
elif op == "ret":
|
||||
vid = insn.get("value")
|
||||
if vid in is_i1:
|
||||
vname = ssa_map.get(vid)
|
||||
z = fresh("zext")
|
||||
lines.append(f" %{z} = zext i1 %{vname} to i64")
|
||||
lines.append(f" ret i64 %{z}")
|
||||
elif vid in ssa_map:
|
||||
lines.append(f" ret i64 %{ssa_map[vid]}")
|
||||
elif vid in const_map:
|
||||
lines.append(f" ret i64 {const_map[vid]}")
|
||||
else:
|
||||
lines.append(" ret i64 0")
|
||||
lines.append("}")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def compile_ir_to_obj(ir_text: str, out_obj: Path) -> None:
|
||||
tmp = Path("/tmp/native_ir_{}.ll".format(os.getpid()))
|
||||
tmp.write_text(ir_text)
|
||||
try:
|
||||
subprocess.check_call(["llc", "-filetype=obj", "-o", str(out_obj), str(tmp)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
finally:
|
||||
try:
|
||||
tmp.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--in", dest="infile", required=True)
|
||||
ap.add_argument("--emit", dest="emit", default="obj")
|
||||
ap.add_argument("--out", dest="out", required=True)
|
||||
args = ap.parse_args()
|
||||
|
||||
with open(args.infile, 'r') as f:
|
||||
ny = json.load(f)
|
||||
|
||||
ir = build_ir(ny)
|
||||
if os.environ.get('NYASH_LLVM_NATIVE_TRACE') in ('1','true','on','YES','yes','True'):
|
||||
print(ir, file=sys.stderr)
|
||||
if args.emit == 'll':
|
||||
Path(args.out).write_text(ir)
|
||||
print(f"[native] ll written: {args.out}")
|
||||
return
|
||||
if args.emit == 'obj':
|
||||
compile_ir_to_obj(ir, Path(args.out))
|
||||
print(f"[native] obj written: {args.out}")
|
||||
return
|
||||
print("error: unsupported emit kind", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print(f"[native] error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user