Phase 22.x WIP: LLVM backend improvements + MIR builder enhancements

LLVM backend improvements:
- Add native LLVM backend support (NYASH_LLVM_BACKEND=native)
- Add crate backend selector with priority (crate > llvmlite)
- Add native_llvm_builder.py for native IR generation
- Add NYASH_LLVM_NATIVE_TRACE=1 for IR dump

MIR builder enhancements:
- Refactor lower_if_compare_* boxes for better code generation
- Refactor lower_return_* boxes for optimized returns
- Refactor lower_loop_* boxes for loop handling
- Refactor lower_method_* boxes for method calls
- Update pattern_util_box for better pattern matching

Smoke tests:
- Add phase2100 S3 backend selector tests (17 new tests)
- Add phase2120 native backend tests (4 new tests)
- Add phase2034 MIR builder internal tests (2 new tests)
- Add phase2211 TLV shim parity test

Documentation:
- Update ENV_VARS.md with LLVM backend variables
- Update CURRENT_TASK.md with progress
- Update README.md and CHANGELOG.md

Config:
- Add NYASH_LLVM_BACKEND env support in src/config/env.rs
- Update ny_mir_builder.sh for backend selection
- Update dispatch.rs for backend routing

Tools:
- Add tools/native_llvm_builder.py
- Update smokes/v2/profiles/quick/core/phase2100/run_all.sh

Known: Many Hako builder internal files modified for optimization
This commit is contained in:
nyash-codex
2025-11-09 23:40:36 +09:00
parent fb6129183d
commit f6c5dc9e43
65 changed files with 1965 additions and 434 deletions

View File

@ -6,6 +6,7 @@ use std::process::Command;
use anyhow::{bail, Context, Result};
use clap::{ArgAction, Parser};
use serde_json::Value as JsonValue;
#[derive(Parser, Debug)]
#[command(
@ -86,29 +87,59 @@ fn main() -> Result<()> {
return Ok(());
}
// Prepare input JSON path: either from file or stdin -> temp file
// Prepare input JSON path: either from file or stdin -> temp file.
// Optionally normalize canary JSON into the shape expected by the Python builder
// when HAKO_LLVM_CANARY_NORMALIZE=1 (no default behavior change).
let mut temp_path: Option<PathBuf> = None;
let canary_norm = env::var("HAKO_LLVM_CANARY_NORMALIZE").ok().as_deref() == Some("1");
let input_path = if args.infile == "-" {
let mut buf = String::new();
std::io::stdin()
.read_to_string(&mut buf)
.context("reading MIR JSON from stdin")?;
// Basic sanity check that it's JSON
let _: serde_json::Value =
let mut val: serde_json::Value =
serde_json::from_str(&buf).context("stdin does not contain valid JSON")?;
if canary_norm {
val = normalize_canary_json(val);
}
let tmp = std::env::temp_dir().join("ny_llvmc_stdin.json");
let mut f = File::create(&tmp).context("create temp json file")?;
f.write_all(buf.as_bytes()).context("write temp json")?;
let out = serde_json::to_vec(&val).context("serialize normalized json")?;
f.write_all(&out).context("write temp json")?;
temp_path = Some(tmp.clone());
tmp
} else {
PathBuf::from(&args.infile)
let p = PathBuf::from(&args.infile);
if canary_norm {
// Read file, normalize, and write to a temp path
let mut buf = String::new();
File::open(&p).and_then(|mut f| f.read_to_string(&mut buf)).context("read input json")?;
let mut val: serde_json::Value = serde_json::from_str(&buf).context("input is not valid JSON")?;
val = normalize_canary_json(val);
let tmp = std::env::temp_dir().join("ny_llvmc_in.json");
let mut f = File::create(&tmp).context("create temp json file")?;
let out = serde_json::to_vec(&val).context("serialize normalized json")?;
f.write_all(&out).context("write temp json")?;
temp_path = Some(tmp.clone());
tmp
} else {
p
}
};
if !input_path.exists() {
bail!("input JSON not found: {}", input_path.display());
}
// Optional: preflight shape/hints (best-effort; no behavior change)
if let Ok(s) = std::fs::read_to_string(&input_path) {
if let Ok(val) = serde_json::from_str::<JsonValue>(&s) {
if let Some(hint) = shape_hint(&val) {
eprintln!("[ny-llvmc/hint] {}", hint);
}
}
}
// Produce object first
let obj_path = if emit_exe {
let mut p = args.out.clone();
@ -118,6 +149,20 @@ fn main() -> Result<()> {
args.out.clone()
};
// Optional: print concise shape hint in verbose mode when not normalizing
if env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") && env::var("HAKO_LLVM_CANARY_NORMALIZE").ok().as_deref() != Some("1") {
if let Ok(mut f) = File::open(&input_path) {
let mut buf = String::new();
if f.read_to_string(&mut buf).is_ok() {
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&buf) {
if let Some(h) = shape_hint(&val) {
eprintln!("[ny-llvmc/hint] {}", h);
}
}
}
}
}
run_harness_in(&harness_path, &input_path, &obj_path).with_context(|| {
format!(
"failed to compile MIR JSON via harness: {}",
@ -144,6 +189,93 @@ fn main() -> Result<()> {
Ok(())
}
/// Return a concise hint if the MIR JSON likely has a schema/shape mismatch for the Python harness.
fn shape_hint(v: &JsonValue) -> Option<String> {
// Accept both v0/v1 tolerant; only emit hint on common canary shapes
// 1) schema_version numeric 1 rather than string "1.0"
if let Some(sv) = v.get("schema_version") {
if sv.is_number() {
if sv.as_i64() == Some(1) {
return Some("schema_version=1 detected; set to \"1.0\" or enable HAKO_LLVM_CANARY_NORMALIZE=1".into());
}
} else if sv.as_str() == Some("1") {
return Some("schema_version=\"1\" detected; prefer \"1.0\" or enable HAKO_LLVM_CANARY_NORMALIZE=1".into());
}
}
// 2) blocks use 'inst' instead of 'instructions'
if let Some(funcs) = v.get("functions") {
if let Some(arr) = funcs.as_array() {
for f in arr {
if let Some(blocks) = f.get("blocks").and_then(|b| b.as_array()) {
for b in blocks {
if b.get("inst").is_some() && b.get("instructions").is_none() {
return Some("block key 'inst' found; rename to 'instructions' or enable HAKO_LLVM_CANARY_NORMALIZE=1".into());
}
}
}
}
}
}
None
}
/// Normalize a very small canary JSON into the shape expected by the Python harness.
/// - Accepts schema_version as number or string; coerces to "1.0" when 1.
/// - Renames block key 'inst' -> 'instructions'.
/// - Converts const {"ty":"i64","value":N} into {"value":{"type":"i64","value":N}}
fn normalize_canary_json(mut v: serde_json::Value) -> serde_json::Value {
use serde_json::{Map, Value};
// schema_version: number 1 -> string "1.0"
match v.get_mut("schema_version") {
Some(Value::Number(n)) if n.as_i64() == Some(1) => {
*v.get_mut("schema_version").unwrap() = Value::String("1.0".to_string());
}
Some(Value::String(s)) if s == "1" => {
*v.get_mut("schema_version").unwrap() = Value::String("1.0".to_string());
}
_ => {}
}
// functions as array
if let Some(funcs) = v.get_mut("functions") {
if let Value::Array(ref mut arr) = funcs {
for func in arr.iter_mut() {
if let Value::Object(ref mut fm) = func {
if let Some(blocks_v) = fm.get_mut("blocks") {
if let Value::Array(ref mut blks) = blocks_v {
for blk in blks.iter_mut() {
if let Value::Object(ref mut bm) = blk {
// Rename 'inst' -> 'instructions'
if let Some(insts) = bm.remove("inst") {
bm.insert("instructions".to_string(), insts);
}
// Normalize instructions
if let Some(Value::Array(ref mut ins_arr)) = bm.get_mut("instructions") {
for ins in ins_arr.iter_mut() {
if let Value::Object(ref mut im) = ins {
if im.get("op").and_then(|x| x.as_str()) == Some("const") {
// if 'ty' and flat 'value' exist, wrap into typed value
if let (Some(ty), Some(val)) = (im.remove("ty"), im.remove("value")) {
let mut val_obj = Map::new();
if let Value::String(ts) = ty { val_obj.insert("type".to_string(), Value::String(ts)); }
else { val_obj.insert("type".to_string(), ty); }
val_obj.insert("value".to_string(), val);
im.insert("value".to_string(), Value::Object(val_obj));
}
}
}
}
}
}
}
}
}
}
}
}
}
v
}
fn run_harness_dummy(harness: &Path, out: &Path) -> Result<()> {
ensure_python()?;
let mut cmd = Command::new("python3");
@ -182,10 +314,13 @@ fn ensure_python() -> Result<()> {
}
fn propagate_opt_level(cmd: &mut Command) {
let level = env::var("HAKO_LLVM_OPT_LEVEL")
.ok()
.or_else(|| env::var("NYASH_LLVM_OPT_LEVEL").ok());
let hako = env::var("HAKO_LLVM_OPT_LEVEL").ok();
let nyash = env::var("NYASH_LLVM_OPT_LEVEL").ok();
let level = nyash.clone().or(hako.clone());
if let Some(level) = level {
if hako.is_some() && nyash.is_none() {
eprintln!("[deprecate/env] 'HAKO_LLVM_OPT_LEVEL' is deprecated; use 'NYASH_LLVM_OPT_LEVEL'");
}
cmd.env("HAKO_LLVM_OPT_LEVEL", &level);
cmd.env("NYASH_LLVM_OPT_LEVEL", &level);
}