Files
hakorune/src/runner/selfhost.rs

564 lines
30 KiB
Rust

/*!
* Runner selfhost helpers — Ny compiler pipeline (Ny -> JSON v0)
*
* Transitional shim: provides a stable entrypoint from callers, while the
* heavy implementation currently lives in modes/common.rs. Next step will
* migrate the full implementation here.
*/
use super::*;
use nyash_rust::parser::NyashParser;
use std::io::Read;
use std::process::Stdio;
use std::thread::sleep;
use std::time::{Duration, Instant};
use std::{fs, process};
impl NyashRunner {
/// Selfhost (Ny -> JSON v0) pipeline: EXE/VM/Python フォールバック含む
pub(crate) fn try_run_selfhost_pipeline(&self, filename: &str) -> bool {
use std::io::Write;
// Read input source
let code = match fs::read_to_string(filename) {
Ok(c) => c,
Err(e) => {
eprintln!("[ny-compiler] read error: {}", e);
return false;
}
};
// Optional Phase-15: strip `using` lines and register modules (same policy as execute_nyash_file)
let enable_using = crate::config::env::enable_using();
let mut code_ref: std::borrow::Cow<'_, str> = std::borrow::Cow::Borrowed(&code);
if enable_using {
let mut out = String::with_capacity(code.len());
let mut used_names: Vec<(String, Option<String>)> = Vec::new();
for line in code.lines() {
let t = line.trim_start();
if t.starts_with("using ") {
if crate::config::env::cli_verbose() {
eprintln!("[using] stripped(line→selfhost): {}", line);
}
let rest0 = t.strip_prefix("using ").unwrap().trim();
let rest0 = rest0.strip_suffix(';').unwrap_or(rest0).trim();
let (target, alias) = if let Some(pos) = rest0.find(" as ") {
(
rest0[..pos].trim().to_string(),
Some(rest0[pos + 4..].trim().to_string()),
)
} else {
(rest0.to_string(), None)
};
let is_path = target.starts_with('"')
|| target.starts_with("./")
|| target.starts_with('/')
|| target.ends_with(".nyash");
if is_path {
let path = target.trim_matches('"').to_string();
let name = alias.clone().unwrap_or_else(|| {
std::path::Path::new(&path)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("module")
.to_string()
});
used_names.push((name, Some(path)));
} else {
used_names.push((target, alias));
}
continue;
}
out.push_str(line);
out.push('\n');
}
// Register modules into minimal registry with best-effort path resolution
for (ns_or_alias, alias_or_path) in used_names {
if let Some(path) = alias_or_path {
let sb = crate::box_trait::StringBox::new(path);
crate::runtime::modules_registry::set(ns_or_alias, Box::new(sb));
} else {
let rel = format!("apps/{}.nyash", ns_or_alias.replace('.', "/"));
let exists = std::path::Path::new(&rel).exists();
let path_or_ns = if exists { rel } else { ns_or_alias.clone() };
let sb = crate::box_trait::StringBox::new(path_or_ns);
crate::runtime::modules_registry::set(ns_or_alias, Box::new(sb));
}
}
code_ref = std::borrow::Cow::Owned(out);
}
// Write to tmp/ny_parser_input.ny (as expected by Ny parser v0), unless forced to reuse existing tmp
let use_tmp_only = crate::config::env::ny_compiler_use_tmp_only();
let tmp_dir = std::path::Path::new("tmp");
if let Err(e) = std::fs::create_dir_all(tmp_dir) {
eprintln!("[ny-compiler] mkdir tmp failed: {}", e);
return false;
}
let tmp_path = tmp_dir.join("ny_parser_input.ny");
if !use_tmp_only {
match std::fs::File::create(&tmp_path) {
Ok(mut f) => {
if let Err(e) = f.write_all(code_ref.as_bytes()) {
eprintln!("[ny-compiler] write tmp failed: {}", e);
return false;
}
}
Err(e) => {
eprintln!("[ny-compiler] open tmp failed: {}", e);
return false;
}
}
}
// Preferred: run Ny selfhost compiler program (apps/selfhost-compiler/compiler.nyash)
// This avoids inline embedding pitfalls and supports Stage-3 gating via args.
{
let exe = std::env::current_exe()
.unwrap_or_else(|_| std::path::PathBuf::from("target/release/nyash"));
let parser_prog = std::path::Path::new("apps/selfhost-compiler/compiler.nyash");
if parser_prog.exists() {
let mut cmd = std::process::Command::new(&exe);
cmd.arg("--backend").arg("vm").arg(parser_prog);
// Forward minimal args to child parser program
if crate::config::env::ny_compiler_min_json() {
cmd.arg("--").arg("--min-json");
}
// Always feed input via tmp file written by the parent pipeline
cmd.arg("--").arg("--read-tmp");
if crate::config::env::ny_compiler_stage3() {
cmd.arg("--").arg("--stage3");
}
// Suppress parent noise and keep only JSON from child
cmd.env_remove("NYASH_USE_NY_COMPILER");
cmd.env_remove("NYASH_CLI_VERBOSE");
cmd.env("NYASH_JSON_ONLY", "1");
let timeout_ms: u64 = crate::config::env::ny_compiler_timeout_ms();
let mut cmd = cmd
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
if let Ok(mut child) = cmd.spawn() {
let mut ch_stdout = child.stdout.take();
let mut ch_stderr = child.stderr.take();
let start = std::time::Instant::now();
let mut timed_out = false;
loop {
match child.try_wait() {
Ok(Some(_)) => break,
Ok(None) => {
if start.elapsed() >= std::time::Duration::from_millis(timeout_ms) {
let _ = child.kill();
let _ = child.wait();
timed_out = true;
break;
}
std::thread::sleep(std::time::Duration::from_millis(10));
}
Err(_) => break,
}
}
let mut out_buf = Vec::new();
let mut err_buf = Vec::new();
if let Some(mut s) = ch_stdout {
let _ = s.read_to_end(&mut out_buf);
}
if let Some(mut s) = ch_stderr {
let _ = s.read_to_end(&mut err_buf);
}
if timed_out {
let head = String::from_utf8_lossy(&out_buf)
.chars()
.take(200)
.collect::<String>();
eprintln!(
"[ny-compiler] child timeout after {} ms; stdout(head)='{}'",
timeout_ms,
head.replace('\n', "\\n")
);
}
let stdout = String::from_utf8_lossy(&out_buf).to_string();
let mut json_line = String::new();
for line in stdout.lines() {
let t = line.trim();
if t.starts_with('{') && t.contains("\"version\"") && t.contains("\"kind\"")
{
json_line = t.to_string();
break;
}
}
if !json_line.is_empty() {
match super::json_v0_bridge::parse_json_v0_to_module(&json_line) {
Ok(module) => {
super::json_v0_bridge::maybe_dump_mir(&module);
let emit_only = crate::config::env::ny_compiler_emit_only();
if emit_only {
return false;
}
// Prefer PyVM path when requested
if crate::config::env::vm_use_py() {
if let Ok(py3) = which::which("python3") {
let runner = std::path::Path::new("tools/pyvm_runner.py");
if runner.exists() {
let tmp_dir = std::path::Path::new("tmp");
let _ = std::fs::create_dir_all(tmp_dir);
let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json");
if let Err(e) = crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &mir_json_path) {
eprintln!("❌ PyVM MIR JSON emit error: {}", e);
std::process::exit(1);
}
let entry =
if module.functions.contains_key("Main.main") {
"Main.main"
} else if module.functions.contains_key("main") {
"main"
} else {
"Main.main"
};
let status = std::process::Command::new(py3)
.args([
"tools/pyvm_runner.py",
"--in",
&mir_json_path.display().to_string(),
"--entry",
entry,
])
.status()
.map_err(|e| format!("spawn pyvm: {}", e))
.unwrap();
let code = status.code().unwrap_or(1);
println!("Result: {}", code);
std::process::exit(code);
}
}
}
self.execute_mir_module(&module);
return true;
}
Err(e) => {
eprintln!("[ny-compiler] json parse error (child): {}", e);
}
}
}
}
}
}
// Python MVP-first: prefer the lightweight harness to produce JSON v0 (unless skipped)
if std::env::var("NYASH_NY_COMPILER_SKIP_PY").ok().as_deref() != Some("1") {
if let Ok(py3) = which::which("python3") {
let py = std::path::Path::new("tools/ny_parser_mvp.py");
if py.exists() {
let mut cmd = std::process::Command::new(&py3);
cmd.arg(py).arg(&tmp_path);
let timeout_ms: u64 = std::env::var("NYASH_NY_COMPILER_TIMEOUT_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(2000);
let out = match super::modes::common_util::io::spawn_with_timeout(cmd, timeout_ms) {
Ok(o) => o,
Err(e) => { eprintln!("[ny-compiler] python harness failed: {}", e); return false; }
};
if !out.timed_out {
if let Ok(line) = String::from_utf8(out.stdout)
.map(|s| s.lines().next().unwrap_or("").to_string())
{
if line.contains("\"version\"") && line.contains("\"kind\"") {
match super::json_v0_bridge::parse_json_v0_to_module(&line) {
Ok(module) => {
super::json_v0_bridge::maybe_dump_mir(&module);
let emit_only =
std::env::var("NYASH_NY_COMPILER_EMIT_ONLY")
.unwrap_or_else(|_| "1".to_string())
== "1";
if emit_only {
return false;
}
// Prefer PyVM for selfhost pipeline (parity reference)
if std::env::var("NYASH_VM_USE_PY").ok().as_deref()
== Some("1")
{
// Reuse the common PyVM runner path
let tmp_dir = std::path::Path::new("tmp");
let _ = std::fs::create_dir_all(tmp_dir);
let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json");
if let Err(e) = crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &mir_json_path) {
eprintln!("❌ PyVM MIR JSON emit error: {}", e);
process::exit(1);
}
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref()
== Some("1")
{
eprintln!(
"[Bridge] using PyVM (selfhost-py) → {}",
mir_json_path.display()
);
}
let entry =
if module.functions.contains_key("Main.main") {
"Main.main"
} else if module.functions.contains_key("main") {
"main"
} else {
"Main.main"
};
let status = std::process::Command::new(&py3)
.args([
"tools/pyvm_runner.py",
"--in",
&mir_json_path.display().to_string(),
"--entry",
entry,
])
.status()
.map_err(|e| format!("spawn pyvm: {}", e))
.unwrap();
let code = status.code().unwrap_or(1);
if !status.success() {
if std::env::var("NYASH_CLI_VERBOSE")
.ok()
.as_deref()
== Some("1")
{
eprintln!(
"❌ PyVM (selfhost-py) failed (status={})",
code
);
}
}
println!("Result: {}", code);
std::process::exit(code);
}
self.execute_mir_module(&module);
return true;
}
Err(e) => {
eprintln!("[ny-compiler] json parse error: {}", e);
return false;
}
}
}
}
}
}
}
}
// EXE-first: if requested, try external parser EXE (nyash_compiler)
if std::env::var("NYASH_USE_NY_COMPILER_EXE").ok().as_deref() == Some("1") {
// Resolve parser EXE path
let exe_path = if let Ok(p) = std::env::var("NYASH_NY_COMPILER_EXE_PATH") {
std::path::PathBuf::from(p)
} else {
let mut p = std::path::PathBuf::from("dist/nyash_compiler");
#[cfg(windows)]
{
p.push("nyash_compiler.exe");
}
#[cfg(not(windows))]
{
p.push("nyash_compiler");
}
if !p.exists() {
// Try PATH
if let Ok(w) = which::which("nyash_compiler") {
w
} else {
p
}
} else {
p
}
};
if exe_path.exists() {
let timeout_ms: u64 = std::env::var("NYASH_NY_COMPILER_TIMEOUT_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(2000);
if let Some(module) = super::modes::common_util::selfhost_exe::exe_try_parse_json_v0(filename, timeout_ms) {
super::json_v0_bridge::maybe_dump_mir(&module);
let emit_only = std::env::var("NYASH_NY_COMPILER_EMIT_ONLY")
.unwrap_or_else(|_| "1".to_string())
== "1";
if emit_only { return false; }
// Prefer PyVM when requested (reference semantics)
if std::env::var("NYASH_VM_USE_PY").ok().as_deref() == Some("1") {
if let Ok(py3) = which::which("python3") {
let runner = std::path::Path::new("tools/pyvm_runner.py");
if runner.exists() {
let tmp_dir = std::path::Path::new("tmp");
let _ = std::fs::create_dir_all(tmp_dir);
let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json");
if let Err(e) = crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &mir_json_path) {
eprintln!("❌ PyVM MIR JSON emit error: {}", e);
process::exit(1);
}
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[Bridge] using PyVM (selfhost) → {}", mir_json_path.display());
}
let entry = if module.functions.contains_key("Main.main") { "Main.main" }
else if module.functions.contains_key("main") { "main" } else { "Main.main" };
let status = std::process::Command::new(py3)
.args(["tools/pyvm_runner.py", "--in", &mir_json_path.display().to_string(), "--entry", entry])
.status()
.map_err(|e| format!("spawn pyvm: {}", e))
.unwrap();
let code = status.code().unwrap_or(1);
println!("Result: {}", code);
std::process::exit(code);
}
}
}
self.execute_mir_module(&module);
return true;
} else {
return false;
}
}
}
// Fallback: inline VM run (embed source into a tiny wrapper that prints JSON)
// This avoids CLI arg forwarding complexity and does not require FileBox.
let mut raw = String::new();
{
// Escape source for embedding as string literal
let mut esc = String::with_capacity(code_ref.len());
for ch in code_ref.chars() {
match ch {
'\\' => esc.push_str("\\\\"),
'"' => esc.push_str("\\\""),
'\n' => esc.push_str("\n"),
'\r' => esc.push_str(""),
_ => esc.push(ch),
}
}
let inline_path = std::path::Path::new("tmp").join("inline_selfhost_emit.nyash");
let inline_code = format!(
"include \"apps/selfhost-compiler/boxes/parser_box.nyash\"\ninclude \"apps/selfhost-compiler/boxes/emitter_box.nyash\"\nstatic box Main {{\n main(args) {{\n local s = \"{}\"\n local p = new ParserBox()\n p.stage3_enable(1)\n local json = p.parse_program2(s)\n local e = new EmitterBox()\n json = e.emit_program(json, \"[]\")\n print(json)\n return 0\n }}\n}}\n",
esc
);
if let Err(e) = std::fs::write(&inline_path, inline_code) {
eprintln!("[ny-compiler] write inline failed: {}", e);
return false;
}
let exe = std::env::current_exe()
.unwrap_or_else(|_| std::path::PathBuf::from("target/release/nyash"));
let mut cmd = std::process::Command::new(exe);
cmd.arg("--backend").arg("vm").arg(&inline_path);
cmd.env_remove("NYASH_USE_NY_COMPILER");
cmd.env_remove("NYASH_CLI_VERBOSE");
cmd.env("NYASH_JSON_ONLY", "1");
let timeout_ms: u64 = std::env::var("NYASH_NY_COMPILER_TIMEOUT_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(2000);
let out = match super::modes::common_util::io::spawn_with_timeout(cmd, timeout_ms) {
Ok(o) => o,
Err(e) => { eprintln!("[ny-compiler] spawn inline vm failed: {}", e); return false; }
};
if out.timed_out {
let head = String::from_utf8_lossy(&out.stdout).chars().take(200).collect::<String>();
eprintln!("[ny-compiler] inline timeout after {} ms; stdout(head)='{}'", timeout_ms, head.replace('\n', "\\n"));
}
raw = String::from_utf8_lossy(&out.stdout).to_string();
}
let mut json_line = String::new();
for line in raw.lines() {
let t = line.trim();
if t.starts_with('{') && t.contains("\"version\"") && t.contains("\"kind\"") {
json_line = t.to_string();
break;
}
}
if json_line.is_empty() {
return false;
}
match super::json_v0_bridge::parse_json_v0_to_module(&json_line) {
Ok(module) => {
super::json_v0_bridge::maybe_dump_mir(&module);
let emit_only = std::env::var("NYASH_NY_COMPILER_EMIT_ONLY")
.unwrap_or_else(|_| "1".to_string())
== "1";
if emit_only {
return false;
}
// Phase-15 policy: when NYASH_VM_USE_PY=1, prefer PyVM as reference executor
// regardless of BoxCall presence to ensure semantics parity (e.g., PHI merges).
let prefer_pyvm = std::env::var("NYASH_VM_USE_PY").ok().as_deref() == Some("1");
// Backward compatibility: if not preferring PyVM explicitly, still auto-enable when BoxCalls exist.
let needs_pyvm = !prefer_pyvm
&& module.functions.values().any(|f| {
f.blocks.values().any(|bb| {
bb.instructions.iter().any(|inst| {
matches!(inst, crate::mir::MirInstruction::BoxCall { .. })
})
})
});
if prefer_pyvm || needs_pyvm {
if let Ok(py3) = which::which("python3") {
let runner = std::path::Path::new("tools/pyvm_runner.py");
if runner.exists() {
let tmp_dir = std::path::Path::new("tmp");
let _ = std::fs::create_dir_all(tmp_dir);
let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json");
if let Err(e) =
crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(
&module,
&mir_json_path,
)
{
eprintln!("❌ PyVM MIR JSON emit error: {}", e);
process::exit(1);
}
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
let mode = if prefer_pyvm {
"selfhost"
} else {
"selfhost-fallback"
};
eprintln!(
"[Bridge] using PyVM ({}) → {}",
mode,
mir_json_path.display()
);
}
let entry = if module.functions.contains_key("Main.main") {
"Main.main"
} else if module.functions.contains_key("main") {
"main"
} else {
"Main.main"
};
let status = std::process::Command::new(py3)
.args([
"tools/pyvm_runner.py",
"--in",
&mir_json_path.display().to_string(),
"--entry",
entry,
])
.status()
.map_err(|e| format!("spawn pyvm: {}", e))
.unwrap();
let code = status.code().unwrap_or(1);
if !status.success() {
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!(
"❌ PyVM (selfhost-fallback) failed (status={})",
code
);
}
}
// Harmonize with interpreter path for smokes
println!("Result: {}", code);
std::process::exit(code);
}
}
}
self.execute_mir_module(&module);
true
}
Err(e) => {
eprintln!("❌ JSON v0 bridge error: {}", e);
false
}
}
}
}