2025-10-31 23:16:27 +09:00
use std ::env ;
2025-09-17 20:33:19 +09:00
use std ::fs ::File ;
use std ::io ::{ Read , Write } ;
use std ::path ::{ Path , PathBuf } ;
use std ::process ::Command ;
use anyhow ::{ bail , Context , Result } ;
use clap ::{ ArgAction , Parser } ;
2025-11-09 23:40:36 +09:00
use serde_json ::Value as JsonValue ;
2025-09-17 20:33:19 +09:00
#[ derive(Parser, Debug) ]
2025-09-18 03:57:25 +09:00
#[ command(
name = " ny-llvmc " ,
about = " Nyash LLVM compiler (llvmlite harness wrapper) "
) ]
2025-09-17 20:33:19 +09:00
struct Args {
/// MIR JSON input file path (use '-' to read from stdin). When omitted with --dummy, a dummy ny_main is emitted.
#[ arg(long = " in " , value_name = " FILE " , default_value = " - " ) ]
infile : String ,
2025-09-18 03:57:25 +09:00
/// Output path. For `--emit obj`, this is an object (.o). For `--emit exe`, this is an executable path.
2025-09-17 20:33:19 +09:00
#[ arg(long, value_name = " FILE " ) ]
out : PathBuf ,
/// Generate a dummy object (ny_main -> i32 0). Ignores --in when set.
#[ arg(long, action = ArgAction::SetTrue) ]
dummy : bool ,
/// Path to Python harness script (defaults to tools/llvmlite_harness.py in CWD)
#[ arg(long, value_name = " FILE " ) ]
harness : Option < PathBuf > ,
2025-09-18 03:57:25 +09:00
/// Emit kind: 'obj' (default) or 'exe'.
#[ arg(long, value_name = " {obj|exe} " , default_value = " obj " ) ]
emit : String ,
2025-09-24 12:57:33 +09:00
/// Path to directory containing libnyash_kernel.a when emitting an executable. If omitted, searches target/release then crates/nyash_kernel/target/release.
2025-09-18 03:57:25 +09:00
#[ arg(long, value_name = " DIR " ) ]
nyrt : Option < PathBuf > ,
/// Extra linker libs/flags appended when emitting an executable (single string, space-separated).
#[ arg(long, value_name = " FLAGS " ) ]
libs : Option < String > ,
2025-09-17 20:33:19 +09:00
}
fn main ( ) -> Result < ( ) > {
let args = Args ::parse ( ) ;
// Ensure parent dir exists
if let Some ( parent ) = args . out . parent ( ) {
std ::fs ::create_dir_all ( parent ) . ok ( ) ;
}
// Resolve harness path
let harness_path = if let Some ( p ) = args . harness . clone ( ) {
p
} else {
PathBuf ::from ( " tools/llvmlite_harness.py " )
} ;
2025-09-18 03:57:25 +09:00
// Determine emit kind
let emit_exe = matches! ( args . emit . as_str ( ) , " exe " | " EXE " ) ;
2025-09-17 20:33:19 +09:00
if args . dummy {
2025-09-18 03:57:25 +09:00
// Dummy ny_main: always go through harness to produce an object then link if requested
let obj_path = if emit_exe {
// derive a temporary .o path next to output
let mut p = args . out . clone ( ) ;
p . set_extension ( " o " ) ;
p
} else {
args . out . clone ( )
} ;
run_harness_dummy ( & harness_path , & obj_path )
2025-09-17 20:33:19 +09:00
. with_context ( | | " failed to run harness in dummy mode " ) ? ;
2025-09-18 03:57:25 +09:00
if emit_exe {
2025-09-25 01:09:48 +09:00
link_executable (
& obj_path ,
& args . out ,
args . nyrt . as_ref ( ) ,
args . libs . as_deref ( ) ,
) ? ;
2025-09-18 03:57:25 +09:00
println! ( " [ny-llvmc] executable written: {} " , args . out . display ( ) ) ;
} else {
println! ( " [ny-llvmc] dummy object written: {} " , obj_path . display ( ) ) ;
}
2025-09-17 20:33:19 +09:00
return Ok ( ( ) ) ;
}
2025-11-09 23:40:36 +09:00
// Prepare input JSON path: either from file or stdin -> temp file.
// Optionally normalize canary JSON into the shape expected by the Python builder
// when HAKO_LLVM_CANARY_NORMALIZE=1 (no default behavior change).
2025-09-17 20:33:19 +09:00
let mut temp_path : Option < PathBuf > = None ;
2025-11-09 23:40:36 +09:00
let canary_norm = env ::var ( " HAKO_LLVM_CANARY_NORMALIZE " ) . ok ( ) . as_deref ( ) = = Some ( " 1 " ) ;
2025-09-17 20:33:19 +09:00
let input_path = if args . infile = = " - " {
let mut buf = String ::new ( ) ;
std ::io ::stdin ( )
. read_to_string ( & mut buf )
. context ( " reading MIR JSON from stdin " ) ? ;
2025-11-09 23:40:36 +09:00
let mut val : serde_json ::Value =
2025-09-18 03:57:25 +09:00
serde_json ::from_str ( & buf ) . context ( " stdin does not contain valid JSON " ) ? ;
2025-11-09 23:40:36 +09:00
if canary_norm {
val = normalize_canary_json ( val ) ;
}
2025-09-17 20:33:19 +09:00
let tmp = std ::env ::temp_dir ( ) . join ( " ny_llvmc_stdin.json " ) ;
let mut f = File ::create ( & tmp ) . context ( " create temp json file " ) ? ;
2025-11-09 23:40:36 +09:00
let out = serde_json ::to_vec ( & val ) . context ( " serialize normalized json " ) ? ;
f . write_all ( & out ) . context ( " write temp json " ) ? ;
2025-09-17 20:33:19 +09:00
temp_path = Some ( tmp . clone ( ) ) ;
tmp
} else {
2025-11-09 23:40:36 +09:00
let p = PathBuf ::from ( & args . infile ) ;
if canary_norm {
// Read file, normalize, and write to a temp path
let mut buf = String ::new ( ) ;
File ::open ( & p ) . and_then ( | mut f | f . read_to_string ( & mut buf ) ) . context ( " read input json " ) ? ;
let mut val : serde_json ::Value = serde_json ::from_str ( & buf ) . context ( " input is not valid JSON " ) ? ;
val = normalize_canary_json ( val ) ;
let tmp = std ::env ::temp_dir ( ) . join ( " ny_llvmc_in.json " ) ;
let mut f = File ::create ( & tmp ) . context ( " create temp json file " ) ? ;
let out = serde_json ::to_vec ( & val ) . context ( " serialize normalized json " ) ? ;
f . write_all ( & out ) . context ( " write temp json " ) ? ;
temp_path = Some ( tmp . clone ( ) ) ;
tmp
} else {
p
}
2025-09-17 20:33:19 +09:00
} ;
if ! input_path . exists ( ) {
bail! ( " input JSON not found: {} " , input_path . display ( ) ) ;
}
2025-11-13 16:40:58 +09:00
// Optional: dump incoming MIR JSON for diagnostics (AotPrep 後の入力を観測)
if let Ok ( dump_path ) = env ::var ( " NYASH_LLVM_DUMP_MIR_IN " ) {
let _ = std ::fs ::copy ( & input_path , & dump_path ) ;
eprintln! ( " [ny-llvmc] dumped MIR input to {} " , dump_path ) ;
}
2025-11-09 23:40:36 +09:00
// Optional: preflight shape/hints (best-effort; no behavior change)
if let Ok ( s ) = std ::fs ::read_to_string ( & input_path ) {
if let Ok ( val ) = serde_json ::from_str ::< JsonValue > ( & s ) {
if let Some ( hint ) = shape_hint ( & val ) {
eprintln! ( " [ny-llvmc/hint] {} " , hint ) ;
}
}
}
2025-09-18 03:57:25 +09:00
// Produce object first
let obj_path = if emit_exe {
let mut p = args . out . clone ( ) ;
p . set_extension ( " o " ) ;
p
} else {
args . out . clone ( )
} ;
2025-11-09 23:40:36 +09:00
// Optional: print concise shape hint in verbose mode when not normalizing
if env ::var ( " NYASH_CLI_VERBOSE " ) . ok ( ) . as_deref ( ) = = Some ( " 1 " ) & & env ::var ( " HAKO_LLVM_CANARY_NORMALIZE " ) . ok ( ) . as_deref ( ) ! = Some ( " 1 " ) {
if let Ok ( mut f ) = File ::open ( & input_path ) {
let mut buf = String ::new ( ) ;
if f . read_to_string ( & mut buf ) . is_ok ( ) {
if let Ok ( val ) = serde_json ::from_str ::< serde_json ::Value > ( & buf ) {
if let Some ( h ) = shape_hint ( & val ) {
eprintln! ( " [ny-llvmc/hint] {} " , h ) ;
}
}
}
}
}
2025-09-18 03:57:25 +09:00
run_harness_in ( & harness_path , & input_path , & obj_path ) . with_context ( | | {
format! (
" failed to compile MIR JSON via harness: {} " ,
input_path . display ( )
)
} ) ? ;
if emit_exe {
2025-09-25 01:09:48 +09:00
link_executable (
& obj_path ,
& args . out ,
args . nyrt . as_ref ( ) ,
args . libs . as_deref ( ) ,
) ? ;
2025-09-18 03:57:25 +09:00
println! ( " [ny-llvmc] executable written: {} " , args . out . display ( ) ) ;
} else {
println! ( " [ny-llvmc] object written: {} " , obj_path . display ( ) ) ;
}
2025-09-17 20:33:19 +09:00
// Cleanup temp file if used
if let Some ( p ) = temp_path {
let _ = std ::fs ::remove_file ( p ) ;
}
Ok ( ( ) )
}
2025-11-09 23:40:36 +09:00
/// Return a concise hint if the MIR JSON likely has a schema/shape mismatch for the Python harness.
fn shape_hint ( v : & JsonValue ) -> Option < String > {
// Accept both v0/v1 tolerant; only emit hint on common canary shapes
// 1) schema_version numeric 1 rather than string "1.0"
if let Some ( sv ) = v . get ( " schema_version " ) {
if sv . is_number ( ) {
if sv . as_i64 ( ) = = Some ( 1 ) {
return Some ( " schema_version=1 detected; set to \" 1.0 \" or enable HAKO_LLVM_CANARY_NORMALIZE=1 " . into ( ) ) ;
}
} else if sv . as_str ( ) = = Some ( " 1 " ) {
return Some ( " schema_version= \" 1 \" detected; prefer \" 1.0 \" or enable HAKO_LLVM_CANARY_NORMALIZE=1 " . into ( ) ) ;
}
}
// 2) blocks use 'inst' instead of 'instructions'
if let Some ( funcs ) = v . get ( " functions " ) {
if let Some ( arr ) = funcs . as_array ( ) {
for f in arr {
if let Some ( blocks ) = f . get ( " blocks " ) . and_then ( | b | b . as_array ( ) ) {
for b in blocks {
if b . get ( " inst " ) . is_some ( ) & & b . get ( " instructions " ) . is_none ( ) {
return Some ( " block key 'inst' found; rename to 'instructions' or enable HAKO_LLVM_CANARY_NORMALIZE=1 " . into ( ) ) ;
}
}
}
}
}
}
None
}
/// Normalize a very small canary JSON into the shape expected by the Python harness.
/// - Accepts schema_version as number or string; coerces to "1.0" when 1.
/// - Renames block key 'inst' -> 'instructions'.
/// - Converts const {"ty":"i64","value":N} into {"value":{"type":"i64","value":N}}
fn normalize_canary_json ( mut v : serde_json ::Value ) -> serde_json ::Value {
use serde_json ::{ Map , Value } ;
// schema_version: number 1 -> string "1.0"
match v . get_mut ( " schema_version " ) {
Some ( Value ::Number ( n ) ) if n . as_i64 ( ) = = Some ( 1 ) = > {
* v . get_mut ( " schema_version " ) . unwrap ( ) = Value ::String ( " 1.0 " . to_string ( ) ) ;
}
Some ( Value ::String ( s ) ) if s = = " 1 " = > {
* v . get_mut ( " schema_version " ) . unwrap ( ) = Value ::String ( " 1.0 " . to_string ( ) ) ;
}
_ = > { }
}
// functions as array
if let Some ( funcs ) = v . get_mut ( " functions " ) {
if let Value ::Array ( ref mut arr ) = funcs {
for func in arr . iter_mut ( ) {
if let Value ::Object ( ref mut fm ) = func {
if let Some ( blocks_v ) = fm . get_mut ( " blocks " ) {
if let Value ::Array ( ref mut blks ) = blocks_v {
for blk in blks . iter_mut ( ) {
if let Value ::Object ( ref mut bm ) = blk {
// Rename 'inst' -> 'instructions'
if let Some ( insts ) = bm . remove ( " inst " ) {
bm . insert ( " instructions " . to_string ( ) , insts ) ;
}
// Normalize instructions
if let Some ( Value ::Array ( ref mut ins_arr ) ) = bm . get_mut ( " instructions " ) {
for ins in ins_arr . iter_mut ( ) {
if let Value ::Object ( ref mut im ) = ins {
if im . get ( " op " ) . and_then ( | x | x . as_str ( ) ) = = Some ( " const " ) {
// if 'ty' and flat 'value' exist, wrap into typed value
if let ( Some ( ty ) , Some ( val ) ) = ( im . remove ( " ty " ) , im . remove ( " value " ) ) {
let mut val_obj = Map ::new ( ) ;
if let Value ::String ( ts ) = ty { val_obj . insert ( " type " . to_string ( ) , Value ::String ( ts ) ) ; }
else { val_obj . insert ( " type " . to_string ( ) , ty ) ; }
val_obj . insert ( " value " . to_string ( ) , val ) ;
im . insert ( " value " . to_string ( ) , Value ::Object ( val_obj ) ) ;
}
}
}
}
}
}
}
}
}
}
}
}
}
v
}
2025-09-17 20:33:19 +09:00
fn run_harness_dummy ( harness : & Path , out : & Path ) -> Result < ( ) > {
ensure_python ( ) ? ;
2025-10-31 23:16:27 +09:00
let mut cmd = Command ::new ( " python3 " ) ;
cmd . arg ( harness ) . arg ( " --out " ) . arg ( out ) ;
propagate_opt_level ( & mut cmd ) ;
let status = cmd
2025-09-17 20:33:19 +09:00
. status ( )
. context ( " failed to execute python harness (dummy) " ) ? ;
if ! status . success ( ) {
bail! ( " harness exited with status: {:?} " , status . code ( ) ) ;
}
Ok ( ( ) )
}
fn run_harness_in ( harness : & Path , input : & Path , out : & Path ) -> Result < ( ) > {
ensure_python ( ) ? ;
2025-10-31 23:16:27 +09:00
let mut cmd = Command ::new ( " python3 " ) ;
cmd . arg ( harness )
2025-09-17 20:33:19 +09:00
. arg ( " --in " )
. arg ( input )
. arg ( " --out " )
2025-10-31 23:16:27 +09:00
. arg ( out ) ;
propagate_opt_level ( & mut cmd ) ;
2025-11-04 16:33:04 +09:00
let status = cmd . status ( ) . context ( " failed to execute python harness " ) ? ;
2025-09-17 20:33:19 +09:00
if ! status . success ( ) {
bail! ( " harness exited with status: {:?} " , status . code ( ) ) ;
}
Ok ( ( ) )
}
fn ensure_python ( ) -> Result < ( ) > {
match Command ::new ( " python3 " ) . arg ( " --version " ) . output ( ) {
Ok ( out ) if out . status . success ( ) = > Ok ( ( ) ) ,
_ = > bail! ( " python3 not found in PATH (required for llvmlite harness) " ) ,
}
}
2025-10-31 23:16:27 +09:00
fn propagate_opt_level ( cmd : & mut Command ) {
2025-11-09 23:40:36 +09:00
let hako = env ::var ( " HAKO_LLVM_OPT_LEVEL " ) . ok ( ) ;
let nyash = env ::var ( " NYASH_LLVM_OPT_LEVEL " ) . ok ( ) ;
let level = nyash . clone ( ) . or ( hako . clone ( ) ) ;
2025-10-31 23:16:27 +09:00
if let Some ( level ) = level {
2025-11-09 23:40:36 +09:00
if hako . is_some ( ) & & nyash . is_none ( ) {
eprintln! ( " [deprecate/env] 'HAKO_LLVM_OPT_LEVEL' is deprecated; use 'NYASH_LLVM_OPT_LEVEL' " ) ;
}
2025-10-31 23:16:27 +09:00
cmd . env ( " HAKO_LLVM_OPT_LEVEL " , & level ) ;
cmd . env ( " NYASH_LLVM_OPT_LEVEL " , & level ) ;
}
}
2025-09-25 01:09:48 +09:00
fn link_executable (
obj : & Path ,
out_exe : & Path ,
nyrt_dir_opt : Option < & PathBuf > ,
extra_libs : Option < & str > ,
) -> Result < ( ) > {
2025-09-18 03:57:25 +09:00
// Resolve nyRT static lib
let nyrt_dir = if let Some ( dir ) = nyrt_dir_opt {
dir . clone ( )
} else {
2025-09-24 12:57:33 +09:00
// try target/release then crates/nyash_kernel/target/release
2025-09-18 03:57:25 +09:00
let a = PathBuf ::from ( " target/release " ) ;
2025-09-24 12:57:33 +09:00
let b = PathBuf ::from ( " crates/nyash_kernel/target/release " ) ;
2025-09-25 01:09:48 +09:00
if a . join ( " libnyash_kernel.a " ) . exists ( ) {
a
} else {
b
}
2025-09-18 03:57:25 +09:00
} ;
2025-09-24 12:57:33 +09:00
let libnyrt = nyrt_dir . join ( " libnyash_kernel.a " ) ;
2025-09-18 03:57:25 +09:00
if ! libnyrt . exists ( ) {
2025-09-25 01:09:48 +09:00
bail! (
" libnyash_kernel.a not found in {} (use --nyrt to specify) " ,
nyrt_dir . display ( )
) ;
2025-09-18 03:57:25 +09:00
}
// Choose a C linker
2025-09-25 01:09:48 +09:00
let linker = [ " cc " , " clang " , " gcc " ]
. into_iter ( )
. find ( | c | {
Command ::new ( c )
. arg ( " --version " )
. output ( )
. map ( | o | o . status . success ( ) )
. unwrap_or ( false )
} )
. unwrap_or ( " cc " ) ;
2025-09-18 03:57:25 +09:00
let mut cmd = Command ::new ( linker ) ;
cmd . arg ( " -o " ) . arg ( out_exe ) ;
cmd . arg ( obj ) ;
2025-09-24 12:57:33 +09:00
// Whole-archive libnyash_kernel to ensure all objects are linked
2025-09-25 01:09:48 +09:00
cmd . arg ( " -Wl,--whole-archive " )
. arg ( & libnyrt )
. arg ( " -Wl,--no-whole-archive " ) ;
2025-09-18 03:57:25 +09:00
// Common libs on Linux
cmd . arg ( " -ldl " ) . arg ( " -lpthread " ) . arg ( " -lm " ) ;
if let Some ( extras ) = extra_libs {
for tok in extras . split_whitespace ( ) {
cmd . arg ( tok ) ;
}
}
2025-11-13 16:40:58 +09:00
// Run linker and capture diagnostics for better error reporting
let output = cmd
. output ( )
. with_context ( | | format! ( " failed to invoke system linker: {} " , linker ) ) ? ;
if ! output . status . success ( ) {
eprintln! ( " [ny-llvmc/link] command: {} " , linker ) ;
// Show args (for debugging)
// Note: std::process::Command doesn't expose argv back; re-emit essential parts
eprintln! (
" [ny-llvmc/link] args: -o {} {} -Wl,--whole-archive {} -Wl,--no-whole-archive -ldl -lpthread -lm {} " ,
out_exe . display ( ) ,
obj . display ( ) ,
libnyrt . display ( ) ,
extra_libs . unwrap_or ( " " )
) ;
let stderr = String ::from_utf8_lossy ( & output . stderr ) ;
let stdout = String ::from_utf8_lossy ( & output . stdout ) ;
eprintln! ( " [ny-llvmc/link:stdout] \n {} " , stdout ) ;
eprintln! ( " [ny-llvmc/link:stderr] \n {} " , stderr ) ;
bail! ( " linker exited with status: {:?} " , output . status . code ( ) ) ;
2025-09-18 03:57:25 +09:00
}
Ok ( ( ) )
}