Phase 22.x WIP: LLVM backend improvements + MIR builder enhancements
LLVM backend improvements: - Add native LLVM backend support (NYASH_LLVM_BACKEND=native) - Add crate backend selector with priority (crate > llvmlite) - Add native_llvm_builder.py for native IR generation - Add NYASH_LLVM_NATIVE_TRACE=1 for IR dump MIR builder enhancements: - Refactor lower_if_compare_* boxes for better code generation - Refactor lower_return_* boxes for optimized returns - Refactor lower_loop_* boxes for loop handling - Refactor lower_method_* boxes for method calls - Update pattern_util_box for better pattern matching Smoke tests: - Add phase2100 S3 backend selector tests (17 new tests) - Add phase2120 native backend tests (4 new tests) - Add phase2034 MIR builder internal tests (2 new tests) - Add phase2211 TLV shim parity test Documentation: - Update ENV_VARS.md with LLVM backend variables - Update CURRENT_TASK.md with progress - Update README.md and CHANGELOG.md Config: - Add NYASH_LLVM_BACKEND env support in src/config/env.rs - Update ny_mir_builder.sh for backend selection - Update dispatch.rs for backend routing Tools: - Add tools/native_llvm_builder.py - Update smokes/v2/profiles/quick/core/phase2100/run_all.sh Known: Many Hako builder internal files modified for optimization
This commit is contained in:
@ -6,6 +6,7 @@ use std::process::Command;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use clap::{ArgAction, Parser};
|
||||
use serde_json::Value as JsonValue;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(
|
||||
@ -86,29 +87,59 @@ fn main() -> Result<()> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Prepare input JSON path: either from file or stdin -> temp file
|
||||
// Prepare input JSON path: either from file or stdin -> temp file.
|
||||
// Optionally normalize canary JSON into the shape expected by the Python builder
|
||||
// when HAKO_LLVM_CANARY_NORMALIZE=1 (no default behavior change).
|
||||
let mut temp_path: Option<PathBuf> = None;
|
||||
let canary_norm = env::var("HAKO_LLVM_CANARY_NORMALIZE").ok().as_deref() == Some("1");
|
||||
let input_path = if args.infile == "-" {
|
||||
let mut buf = String::new();
|
||||
std::io::stdin()
|
||||
.read_to_string(&mut buf)
|
||||
.context("reading MIR JSON from stdin")?;
|
||||
// Basic sanity check that it's JSON
|
||||
let _: serde_json::Value =
|
||||
let mut val: serde_json::Value =
|
||||
serde_json::from_str(&buf).context("stdin does not contain valid JSON")?;
|
||||
if canary_norm {
|
||||
val = normalize_canary_json(val);
|
||||
}
|
||||
let tmp = std::env::temp_dir().join("ny_llvmc_stdin.json");
|
||||
let mut f = File::create(&tmp).context("create temp json file")?;
|
||||
f.write_all(buf.as_bytes()).context("write temp json")?;
|
||||
let out = serde_json::to_vec(&val).context("serialize normalized json")?;
|
||||
f.write_all(&out).context("write temp json")?;
|
||||
temp_path = Some(tmp.clone());
|
||||
tmp
|
||||
} else {
|
||||
PathBuf::from(&args.infile)
|
||||
let p = PathBuf::from(&args.infile);
|
||||
if canary_norm {
|
||||
// Read file, normalize, and write to a temp path
|
||||
let mut buf = String::new();
|
||||
File::open(&p).and_then(|mut f| f.read_to_string(&mut buf)).context("read input json")?;
|
||||
let mut val: serde_json::Value = serde_json::from_str(&buf).context("input is not valid JSON")?;
|
||||
val = normalize_canary_json(val);
|
||||
let tmp = std::env::temp_dir().join("ny_llvmc_in.json");
|
||||
let mut f = File::create(&tmp).context("create temp json file")?;
|
||||
let out = serde_json::to_vec(&val).context("serialize normalized json")?;
|
||||
f.write_all(&out).context("write temp json")?;
|
||||
temp_path = Some(tmp.clone());
|
||||
tmp
|
||||
} else {
|
||||
p
|
||||
}
|
||||
};
|
||||
|
||||
if !input_path.exists() {
|
||||
bail!("input JSON not found: {}", input_path.display());
|
||||
}
|
||||
|
||||
// Optional: preflight shape/hints (best-effort; no behavior change)
|
||||
if let Ok(s) = std::fs::read_to_string(&input_path) {
|
||||
if let Ok(val) = serde_json::from_str::<JsonValue>(&s) {
|
||||
if let Some(hint) = shape_hint(&val) {
|
||||
eprintln!("[ny-llvmc/hint] {}", hint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Produce object first
|
||||
let obj_path = if emit_exe {
|
||||
let mut p = args.out.clone();
|
||||
@ -118,6 +149,20 @@ fn main() -> Result<()> {
|
||||
args.out.clone()
|
||||
};
|
||||
|
||||
// Optional: print concise shape hint in verbose mode when not normalizing
|
||||
if env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") && env::var("HAKO_LLVM_CANARY_NORMALIZE").ok().as_deref() != Some("1") {
|
||||
if let Ok(mut f) = File::open(&input_path) {
|
||||
let mut buf = String::new();
|
||||
if f.read_to_string(&mut buf).is_ok() {
|
||||
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&buf) {
|
||||
if let Some(h) = shape_hint(&val) {
|
||||
eprintln!("[ny-llvmc/hint] {}", h);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
run_harness_in(&harness_path, &input_path, &obj_path).with_context(|| {
|
||||
format!(
|
||||
"failed to compile MIR JSON via harness: {}",
|
||||
@ -144,6 +189,93 @@ fn main() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return a concise hint if the MIR JSON likely has a schema/shape mismatch for the Python harness.
|
||||
fn shape_hint(v: &JsonValue) -> Option<String> {
|
||||
// Accept both v0/v1 tolerant; only emit hint on common canary shapes
|
||||
// 1) schema_version numeric 1 rather than string "1.0"
|
||||
if let Some(sv) = v.get("schema_version") {
|
||||
if sv.is_number() {
|
||||
if sv.as_i64() == Some(1) {
|
||||
return Some("schema_version=1 detected; set to \"1.0\" or enable HAKO_LLVM_CANARY_NORMALIZE=1".into());
|
||||
}
|
||||
} else if sv.as_str() == Some("1") {
|
||||
return Some("schema_version=\"1\" detected; prefer \"1.0\" or enable HAKO_LLVM_CANARY_NORMALIZE=1".into());
|
||||
}
|
||||
}
|
||||
// 2) blocks use 'inst' instead of 'instructions'
|
||||
if let Some(funcs) = v.get("functions") {
|
||||
if let Some(arr) = funcs.as_array() {
|
||||
for f in arr {
|
||||
if let Some(blocks) = f.get("blocks").and_then(|b| b.as_array()) {
|
||||
for b in blocks {
|
||||
if b.get("inst").is_some() && b.get("instructions").is_none() {
|
||||
return Some("block key 'inst' found; rename to 'instructions' or enable HAKO_LLVM_CANARY_NORMALIZE=1".into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Normalize a very small canary JSON into the shape expected by the Python harness.
|
||||
/// - Accepts schema_version as number or string; coerces to "1.0" when 1.
|
||||
/// - Renames block key 'inst' -> 'instructions'.
|
||||
/// - Converts const {"ty":"i64","value":N} into {"value":{"type":"i64","value":N}}
|
||||
fn normalize_canary_json(mut v: serde_json::Value) -> serde_json::Value {
|
||||
use serde_json::{Map, Value};
|
||||
// schema_version: number 1 -> string "1.0"
|
||||
match v.get_mut("schema_version") {
|
||||
Some(Value::Number(n)) if n.as_i64() == Some(1) => {
|
||||
*v.get_mut("schema_version").unwrap() = Value::String("1.0".to_string());
|
||||
}
|
||||
Some(Value::String(s)) if s == "1" => {
|
||||
*v.get_mut("schema_version").unwrap() = Value::String("1.0".to_string());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
// functions as array
|
||||
if let Some(funcs) = v.get_mut("functions") {
|
||||
if let Value::Array(ref mut arr) = funcs {
|
||||
for func in arr.iter_mut() {
|
||||
if let Value::Object(ref mut fm) = func {
|
||||
if let Some(blocks_v) = fm.get_mut("blocks") {
|
||||
if let Value::Array(ref mut blks) = blocks_v {
|
||||
for blk in blks.iter_mut() {
|
||||
if let Value::Object(ref mut bm) = blk {
|
||||
// Rename 'inst' -> 'instructions'
|
||||
if let Some(insts) = bm.remove("inst") {
|
||||
bm.insert("instructions".to_string(), insts);
|
||||
}
|
||||
// Normalize instructions
|
||||
if let Some(Value::Array(ref mut ins_arr)) = bm.get_mut("instructions") {
|
||||
for ins in ins_arr.iter_mut() {
|
||||
if let Value::Object(ref mut im) = ins {
|
||||
if im.get("op").and_then(|x| x.as_str()) == Some("const") {
|
||||
// if 'ty' and flat 'value' exist, wrap into typed value
|
||||
if let (Some(ty), Some(val)) = (im.remove("ty"), im.remove("value")) {
|
||||
let mut val_obj = Map::new();
|
||||
if let Value::String(ts) = ty { val_obj.insert("type".to_string(), Value::String(ts)); }
|
||||
else { val_obj.insert("type".to_string(), ty); }
|
||||
val_obj.insert("value".to_string(), val);
|
||||
im.insert("value".to_string(), Value::Object(val_obj));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
fn run_harness_dummy(harness: &Path, out: &Path) -> Result<()> {
|
||||
ensure_python()?;
|
||||
let mut cmd = Command::new("python3");
|
||||
@ -182,10 +314,13 @@ fn ensure_python() -> Result<()> {
|
||||
}
|
||||
|
||||
fn propagate_opt_level(cmd: &mut Command) {
|
||||
let level = env::var("HAKO_LLVM_OPT_LEVEL")
|
||||
.ok()
|
||||
.or_else(|| env::var("NYASH_LLVM_OPT_LEVEL").ok());
|
||||
let hako = env::var("HAKO_LLVM_OPT_LEVEL").ok();
|
||||
let nyash = env::var("NYASH_LLVM_OPT_LEVEL").ok();
|
||||
let level = nyash.clone().or(hako.clone());
|
||||
if let Some(level) = level {
|
||||
if hako.is_some() && nyash.is_none() {
|
||||
eprintln!("[deprecate/env] 'HAKO_LLVM_OPT_LEVEL' is deprecated; use 'NYASH_LLVM_OPT_LEVEL'");
|
||||
}
|
||||
cmd.env("HAKO_LLVM_OPT_LEVEL", &level);
|
||||
cmd.env("NYASH_LLVM_OPT_LEVEL", &level);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user