Files
hakorune/tools/native_llvm_builder.py

242 lines
8.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Native LLVM Builder (bootstrap)
Goal: minimal Python-only emitter that generates LLVM IR text from a tiny
subset of Nyash MIR JSON and compiles it to an object via `llc`.
Supported (MVP):
- schema_version v1 or tolerant shapes
- Single function: ny_main(): i64
- Instructions: const(i64), binop(add/sub/mul/div/mod/&/|/^/<< >>), compare(==)
- ret(value)
Usage:
python3 tools/native_llvm_builder.py --in in.json --emit obj --out out.o
Notes:
- No external Python packages required. Assumes `llc` is in PATH.
"""
import argparse
import json
import os
import subprocess
import sys
from pathlib import Path
def _normalize_canary(v: dict) -> dict:
# Coerce schema_version
sv = v.get("schema_version")
if isinstance(sv, int) and sv == 1:
v["schema_version"] = "1.0"
if isinstance(sv, str) and sv == "1":
v["schema_version"] = "1.0"
# Normalize blocks.inst -> instructions
funs = v.get("functions")
if isinstance(funs, list):
for f in funs:
blks = f.get("blocks")
if isinstance(blks, list):
for b in blks:
if "inst" in b and "instructions" not in b:
b["instructions"] = b.pop("inst")
ins = b.get("instructions")
if isinstance(ins, list):
for insn in ins:
if insn.get("op") == "const":
if "value" in insn and isinstance(insn["value"], dict) and "type" in insn["value"]:
pass
else:
ty = insn.pop("ty", "i64")
val = insn.pop("value", 0)
insn["value"] = {"type": ty, "value": val}
return v
def build_ir(ny_json: dict) -> str:
ny = _normalize_canary(ny_json)
funs = ny.get("functions", [])
fn = None
for f in funs:
if f.get("name") == "ny_main":
fn = f
break
if fn is None:
raise ValueError("ny_main not found")
blocks = fn.get("blocks", [])
if not blocks:
# trivial
return (
"; ModuleID = \"nyash_native\"\n"
"define i64 @ny_main(){\n ret i64 0\n}\n"
)
# IR pieces
lines = []
# Keep IR minimal; let llc choose target triple/datalayout
lines.append("; ModuleID = \"nyash_native\"")
lines.append("")
lines.append("define i64 @ny_main(){")
# Simple vmap; const map holds immediate ints; ssa map holds emitted names
const_map = {}
ssa_map = {}
is_i1 = set()
def val_of(vid):
if vid in ssa_map:
return f"%{ssa_map[vid]}", (vid in is_i1)
if vid in const_map:
return f"i64 {const_map[vid]}", False
# default zero
return "i64 0", False
tmp_idx = 0
def fresh(name):
nonlocal tmp_idx
tmp_idx += 1
return f"{name}_{tmp_idx}"
# Emit each block with an explicit label: bb<id>:
for b in blocks:
bid = b.get("id")
lines.append(f"bb{bid}:")
ins = b.get("instructions", [])
for insn in ins:
op = insn.get("op")
if op == "const":
dst = insn.get("dst")
v = insn.get("value", {})
ty = v.get("type", insn.get("ty", "i64"))
val = v.get("value", 0)
if ty != "i64":
val = 0
const_map[dst] = int(val)
elif op == "binop":
dst = insn.get("dst")
opx = (insn.get("operation") or '').lower()
aliases = {
'add': '+', 'plus': '+', 'sub': '-', 'minus': '-', 'mul': '*', 'times': '*',
'div': '/', 'mod': '%', 'rem': '%', 'band': '&', 'bitand': '&', 'bor': '|', 'bitor': '|',
'bxor': '^', 'xor': '^', 'shl': '<<', 'shr': '>>', 'ashr': '>>'
}
sym = aliases.get(opx, opx)
lhs = insn.get("lhs"); rhs = insn.get("rhs")
lv, _ = val_of(lhs); rv, _ = val_of(rhs)
name = fresh("bin")
if sym == '+':
lines.append(f" %{name} = add i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '-':
lines.append(f" %{name} = sub i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '*':
lines.append(f" %{name} = mul i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '/':
lines.append(f" %{name} = sdiv i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '%':
lines.append(f" %{name} = srem i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '&':
lines.append(f" %{name} = and i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '|':
lines.append(f" %{name} = or i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '^':
lines.append(f" %{name} = xor i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '<<':
lines.append(f" %{name} = shl i64 {lv.split()[-1]}, {rv.split()[-1]}")
elif sym == '>>':
lines.append(f" %{name} = ashr i64 {lv.split()[-1]}, {rv.split()[-1]}")
else:
lines.append(f" %{name} = add i64 0, 0")
ssa_map[dst] = name
elif op == "compare":
dst = insn.get("dst")
opx = insn.get("operation") or insn.get("cmp") or '=='
lhs = insn.get("lhs"); rhs = insn.get("rhs")
lv, _ = val_of(lhs); rv, _ = val_of(rhs)
name = fresh("cmp")
# Support eq/lt minimal
if opx in ('==', 'Eq'): pred = 'eq'
elif opx in ('<', 'Lt'): pred = 'slt'
else: pred = 'ne'
lines.append(f" %{name} = icmp {pred} i64 {lv.split()[-1]}, {rv.split()[-1]}")
ssa_map[dst] = name
is_i1.add(dst)
elif op == "branch":
# Conditional branch: {cond, then, else}
cond = insn.get("cond")
then_id = insn.get("then")
else_id = insn.get("else")
cv, ci1 = val_of(cond)
if ci1:
cond_name = cv if cv.startswith('%') else f"%{cv}"
lines.append(f" br i1 {cond_name}, label %bb{then_id}, label %bb{else_id}")
else:
# Build i1 from i64 via icmp ne 0
name = fresh("cnd")
lines.append(f" %{name} = icmp ne i64 {cv.split()[-1]}, 0")
lines.append(f" br i1 %{name}, label %bb{then_id}, label %bb{else_id}")
elif op == "jump":
target = insn.get("target")
lines.append(f" br label %bb{target}")
elif op == "ret":
vid = insn.get("value")
if vid in is_i1:
vname = ssa_map.get(vid)
z = fresh("zext")
lines.append(f" %{z} = zext i1 %{vname} to i64")
lines.append(f" ret i64 %{z}")
elif vid in ssa_map:
lines.append(f" ret i64 %{ssa_map[vid]}")
elif vid in const_map:
lines.append(f" ret i64 {const_map[vid]}")
else:
lines.append(" ret i64 0")
lines.append("}")
return "\n".join(lines) + "\n"
def compile_ir_to_obj(ir_text: str, out_obj: Path) -> None:
tmp = Path("/tmp/native_ir_{}.ll".format(os.getpid()))
tmp.write_text(ir_text)
try:
subprocess.check_call(["llc", "-filetype=obj", "-o", str(out_obj), str(tmp)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
finally:
try:
tmp.unlink()
except Exception:
pass
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--in", dest="infile", required=True)
ap.add_argument("--emit", dest="emit", default="obj")
ap.add_argument("--out", dest="out", required=True)
args = ap.parse_args()
with open(args.infile, 'r') as f:
ny = json.load(f)
ir = build_ir(ny)
if os.environ.get('NYASH_LLVM_NATIVE_TRACE') in ('1','true','on','YES','yes','True'):
print(ir, file=sys.stderr)
if args.emit == 'll':
Path(args.out).write_text(ir)
print(f"[native] ll written: {args.out}")
return
if args.emit == 'obj':
compile_ir_to_obj(ir, Path(args.out))
print(f"[native] obj written: {args.out}")
return
print("error: unsupported emit kind", file=sys.stderr)
sys.exit(2)
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"[native] error: {e}", file=sys.stderr)
sys.exit(1)