Files
hakorune/src/runner/selfhost.rs
nyash-codex 22575aa1db refactor(selfhost): clean up selfhost.rs - remove duplicates, unify env access
## Changes

### Duplicate code removal
- Remove nested double cli_verbose() checks (2 places)
- Remove duplicate pre_run_reset_oob_if_strict() calls
- Remove duplicate OOB strict check blocks

### Environment variable access unification
- All raw std::env::var() calls replaced with config::env functions
- Added new config::env functions:
  - ny_compiler_use_py()
  - macro_selfhost_pre_expand()
  - scopebox_enable()
  - loopform_normalize()
  - selfhost_inline_force()

### Common helper extraction
- maybe_dump_mir_verbose(): MIR dump with verbose check
- check_oob_strict_exit(): OOB strict mode check and exit
- execute_with_oob_check(): Combined run + OOB check

## Result
- Net ~11 lines reduction
- Much better code structure and maintainability
- Consistent environment variable access through config::env

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 07:18:29 +09:00

529 lines
26 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*!
* Runner selfhost helpers — Ny compiler pipeline (Ny -> JSON v0)
*
* Transitional shim: provides a stable entrypoint from callers, while the
* heavy implementation currently lives in modes/common.rs. Next step will
* migrate the full implementation here.
*/
use super::*;
use nyash_rust::{mir::MirCompiler, parser::NyashParser};
use std::{fs, process};
// ============================================================================
// Selfhost pipeline helpers
// ============================================================================
/// Dump MIR if NYASH_CLI_VERBOSE is enabled.
fn maybe_dump_mir_verbose(module: &crate::mir::MirModule) {
if crate::config::env::cli_verbose() {
super::json_v0_bridge::maybe_dump_mir(module);
}
}
/// Check OOB strict mode and exit(1) if out-of-bounds was observed.
fn check_oob_strict_exit() {
if crate::config::env::oob_strict_fail() && crate::runtime::observe::oob_seen() {
eprintln!("[selfhost][oob-strict] Out-of-bounds observed → exit(1)");
std::process::exit(1);
}
}
/// Run module and check OOB, with pre-run reset.
fn execute_with_oob_check(runner: &NyashRunner, module: &crate::mir::MirModule) {
crate::runner::child_env::pre_run_reset_oob_if_strict();
runner.execute_mir_module(module);
check_oob_strict_exit();
}
impl NyashRunner {
/// Selfhost (Ny -> JSON v0) pipeline: EXE/VM/Python フォールバック含む
pub(crate) fn try_run_selfhost_pipeline(&self, filename: &str) -> bool {
use std::io::Write;
// Phase 25.1b: guard selfhost pipeline to Ny-only sources.
// `.hako` / other extensionsは StageB / JSON v0 bridge 側の責務なので、
// ここでは Ny/Nyash 拡張子以外は即座にスキップする。
let path = std::path::Path::new(filename);
if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
match ext {
"ny" | "nyash" => { /* continue */ }
"hako" => {
// Opt-in: allow .hako to flow through Ny compiler when explicitly requested.
if !crate::config::env::use_ny_compiler() {
crate::cli_v!(
"[ny-compiler] skip selfhost pipeline for .hako (ext={}); enable NYASH_USE_NY_COMPILER=1 to force",
ext
);
return false;
}
}
_ => {
crate::cli_v!(
"[ny-compiler] skip selfhost pipeline for non-Ny source: {} (ext={})",
filename,
ext
);
return false;
}
}
} else {
// No extension: treat as non-Ny for safety
crate::cli_v!(
"[ny-compiler] skip selfhost pipeline for source without extension: {}",
filename
);
return false;
}
// Read input source
let code = match fs::read_to_string(filename) {
Ok(c) => c,
Err(e) => {
eprintln!("[ny-compiler] read error: {}", e);
return false;
}
};
// Optional Phase-15: using prelude merge (text-based for speed)
let mut code_ref: std::borrow::Cow<'_, str> = std::borrow::Cow::Borrowed(&code);
if crate::config::env::enable_using() {
let using_ast = crate::config::env::using_ast_enabled();
if using_ast {
// Text-based merge: faster for inline/selfhost execution
match crate::runner::modes::common_util::resolve::merge_prelude_text(
self, &code, filename,
) {
Ok(merged) => {
code_ref = std::borrow::Cow::Owned(merged);
}
Err(e) => {
eprintln!("[ny-compiler] using text merge error: {}", e);
return false;
}
}
} else {
// Legacy: strip only (no prelude merge)
match crate::runner::modes::common_util::resolve::resolve_prelude_paths_profiled(
self, &code, filename,
) {
Ok((clean, paths)) => {
if !paths.is_empty() {
eprintln!("[ny-compiler] using: AST prelude merge is disabled in this profile. Enable NYASH_USING_AST=1 or remove 'using' lines.");
return false;
}
code_ref = std::borrow::Cow::Owned(clean);
}
Err(e) => {
eprintln!("[ny-compiler] {}", e);
return false;
}
}
}
}
// Promote dev sugar to standard: pre-expand line-head '@name[:T] = expr' to 'local name[:T] = expr'
{
let expanded =
crate::runner::modes::common_util::resolve::preexpand_at_local(code_ref.as_ref());
code_ref = std::borrow::Cow::Owned(expanded);
}
// Write to tmp/ny_parser_input.ny (as expected by Ny parser v0), unless forced to reuse existing tmp
let use_tmp_only = crate::config::env::ny_compiler_use_tmp_only();
let tmp_dir = std::path::Path::new("tmp");
if let Err(e) = std::fs::create_dir_all(tmp_dir) {
eprintln!("[ny-compiler] mkdir tmp failed: {}", e);
return false;
}
// Optional macro preexpand path for selfhost
// Default: auto when macro engine is enabled (safe: PyVM only)
// Gate: NYASH_MACRO_SELFHOST_PRE_EXPAND={1|auto|0}
{
let preenv = crate::config::env::macro_selfhost_pre_expand().or_else(|| {
if crate::r#macro::enabled() {
Some("auto".to_string())
} else {
None
}
});
let do_pre = match preenv.as_deref() {
Some("1") => true,
Some("auto") => crate::r#macro::enabled() && crate::config::env::vm_use_py(),
_ => false,
};
if do_pre && crate::r#macro::enabled() {
crate::cli_v!(
"[ny-compiler] selfhost macro pre-expand: engaging (mode={:?})",
preenv
);
match NyashParser::parse_from_string(code_ref.as_ref()) {
Ok(ast0) => {
let ast = crate::r#macro::maybe_expand_and_dump(&ast0, false);
// Compile to MIR and execute (respect VM/PyVM policy similar to vm mode)
let mut mir_compiler = MirCompiler::with_options(true);
match mir_compiler.compile(ast) {
Ok(result) => {
let prefer_pyvm = crate::config::env::vm_use_py();
if prefer_pyvm {
if let Ok(code) = crate::runner::modes::common_util::pyvm::run_pyvm_harness_lib(&result.module, "selfhost-preexpand") {
println!("Result: {}", code);
std::process::exit(code);
} else {
eprintln!("❌ PyVM error (selfhost-preexpand)");
std::process::exit(1);
}
} else {
// For now, only PyVM path is supported in pre-expand mode; fall back otherwise.
crate::cli_v!("[ny-compiler] pre-expand path requires NYASH_VM_USE_PY=1; falling back to default selfhost");
return false;
}
}
Err(e) => {
eprintln!("[ny-compiler] pre-expand compile error: {}", e);
return false;
}
}
}
Err(e) => {
eprintln!("[ny-compiler] pre-expand parse error: {}", e);
return false;
}
}
}
}
let tmp_path = tmp_dir.join("ny_parser_input.ny");
if !use_tmp_only {
match std::fs::File::create(&tmp_path) {
Ok(mut f) => {
if let Err(e) = f.write_all(code_ref.as_bytes()) {
eprintln!("[ny-compiler] write tmp failed: {}", e);
return false;
}
}
Err(e) => {
eprintln!("[ny-compiler] open tmp failed: {}", e);
return false;
}
}
}
// Preferred: run Ny selfhost compiler program (lang/src/compiler/entry/compiler.hako)
// This avoids inline embedding pitfalls and supports Stage-3 gating via args.
{
use crate::runner::modes::common_util::selfhost::{child, json};
let verbose_level = crate::config::env::dump::cli_verbose_level();
let exe = std::env::current_exe()
.unwrap_or_else(|_| std::path::PathBuf::from("target/release/nyash"));
// Phase 28.2: selfhost compiler entry moved under lang/src/compiler/entry
let parser_prog = std::path::Path::new("lang/src/compiler/entry/compiler.hako");
if parser_prog.exists() {
// Phase 28.2: observation log (NYASH_CLI_VERBOSE>=2)
if verbose_level >= 2 {
eprintln!("[selfhost/ny] spawning Ny compiler child process: {}", parser_prog.display());
}
// Build extra args forwarded to child program
let mut extra_owned: Vec<String> = Vec::new();
if crate::config::env::ny_compiler_min_json() {
extra_owned.push("--".to_string());
extra_owned.push("--min-json".to_string());
}
extra_owned.push("--".to_string());
extra_owned.push("--read-tmp".to_string());
if crate::config::env::ny_compiler_stage3() {
extra_owned.push("--".to_string());
extra_owned.push("--stage3".to_string());
}
// Optional: map env toggles to child args (prepasses)
if crate::config::env::scopebox_enable() {
extra_owned.push("--".to_string());
extra_owned.push("--scopebox".to_string());
}
if crate::config::env::loopform_normalize() {
extra_owned.push("--".to_string());
extra_owned.push("--loopform".to_string());
}
// Optional: developer-provided child args passthrough (space-separated)
if let Some(raw) = crate::config::env::ny_compiler_child_args() {
let items: Vec<String> = raw
.split(' ')
.filter(|s| !s.trim().is_empty())
.map(|s| s.to_string())
.collect();
if !items.is_empty() {
extra_owned.push("--".to_string());
for it in items {
extra_owned.push(it);
}
}
}
let extra: Vec<&str> = extra_owned.iter().map(|s| s.as_str()).collect();
let timeout_ms: u64 = crate::config::env::ny_compiler_timeout_ms();
if let Some(line) = child::run_ny_program_capture_json(
&exe,
parser_prog,
timeout_ms,
&extra,
&["NYASH_USE_NY_COMPILER", "NYASH_CLI_VERBOSE"],
&[
("NYASH_JSON_ONLY", "1"),
("NYASH_DISABLE_PLUGINS", "1"),
("NYASH_SKIP_TOML_ENV", "1"),
("NYASH_USING_AST", "0"),
("NYASH_ALLOW_USING_FILE", "0"),
("HAKO_ALLOW_USING_FILE", "0"),
],
) {
// Phase 28.2: observation log - JSON received
if verbose_level >= 2 {
eprintln!("[selfhost/ny] received Program(JSON v0), size={} bytes", line.len());
}
match json::parse_json_v0_line(&line) {
Ok(module) => {
// Phase 28.2: observation log - before maybe_dump_mir
if verbose_level >= 2 {
let dump_path = crate::config::env::dump::rust_mir_dump_path();
eprintln!(
"[selfhost/ny] lowering Program(JSON v0) → MIR via json_v0_bridge (funcs={})",
module.functions.len()
);
eprintln!(
"[selfhost/ny] calling maybe_dump_mir (RUST_MIR_DUMP_PATH={:?}, cli_verbose={})",
dump_path.as_deref().unwrap_or("<unset>"),
crate::config::env::cli_verbose()
);
}
super::json_v0_bridge::maybe_dump_mir(&module);
// Phase 28.2: observation log - after maybe_dump_mir
if verbose_level >= 2 {
if let Some(ref path) = crate::config::env::dump::rust_mir_dump_path() {
if std::path::Path::new(path).exists() {
eprintln!("[selfhost/ny] ✅ MIR dump file created: {}", path);
} else {
eprintln!("[selfhost/ny] ⚠️ MIR dump file NOT created: {}", path);
}
}
}
let emit_only = crate::config::env::ny_compiler_emit_only();
if emit_only {
return false;
}
// Prefer PyVM path when requested
if crate::config::env::vm_use_py() {
if let Some(code) = crate::runner::modes::common_util::selfhost::json::run_pyvm_module(&module, "selfhost") {
println!("Result: {}", code);
std::process::exit(code);
}
}
self.execute_mir_module(&module);
return true;
}
Err(e) => {
eprintln!("[ny-compiler] json parse error (child): {}", e);
}
}
}
}
}
// Python MVP (optional): lightweight harness to produce JSON v0.
// Phase 25.1b: default OFFNYASH_NY_COMPILER_USE_PY=1 のときだけ有効)。
if crate::config::env::ny_compiler_use_py() {
if let Ok(py3) = which::which("python3") {
let py = std::path::Path::new("tools/ny_parser_mvp.py");
if py.exists() {
let mut cmd = std::process::Command::new(&py3);
// Phase 25.1b: Use selfhost compiler env for consistency
crate::runner::child_env::apply_selfhost_compiler_env(&mut cmd);
cmd.arg(py).arg(&tmp_path);
let timeout_ms = crate::config::env::ny_compiler_timeout_ms();
let out =
match super::modes::common_util::io::spawn_with_timeout(cmd, timeout_ms) {
Ok(o) => o,
Err(e) => {
eprintln!("[ny-compiler] python harness failed: {}", e);
return false;
}
};
if !out.timed_out {
if let Ok(s) = String::from_utf8(out.stdout) {
if let Some(line) = crate::runner::modes::common_util::selfhost::json::first_json_v0_line(&s) {
match super::json_v0_bridge::parse_json_v0_to_module(&line) {
Ok(module) => {
maybe_dump_mir_verbose(&module);
if crate::config::env::ny_compiler_emit_only() {
return false;
}
// Prefer PyVM for selfhost pipeline (parity reference)
if crate::config::env::vm_use_py() {
let code = match crate::runner::modes::common_util::pyvm::run_pyvm_harness(&module, "selfhost-py") {
Ok(c) => c,
Err(e) => { eprintln!("❌ PyVM error: {}", e); 1 }
};
println!("Result: {}", code);
std::process::exit(code);
}
execute_with_oob_check(self, &module);
return true;
}
Err(e) => {
eprintln!("[ny-compiler] json parse error: {}", e);
return false;
}
}
}
}
}
}
}
}
// EXE-first: if requested, try external parser EXE (nyash_compiler)
if crate::config::env::use_ny_compiler_exe() {
// Resolve parser EXE path
let exe_path = if let Some(p) = crate::config::env::ny_compiler_exe_path() {
std::path::PathBuf::from(p)
} else {
let mut p = std::path::PathBuf::from("dist/nyash_compiler");
#[cfg(windows)]
{
p.push("nyash_compiler.exe");
}
#[cfg(not(windows))]
{
p.push("nyash_compiler");
}
if !p.exists() {
// Try PATH
if let Ok(w) = which::which("nyash_compiler") {
w
} else {
p
}
} else {
p
}
};
if exe_path.exists() {
let timeout_ms = crate::config::env::ny_compiler_timeout_ms();
if let Some(module) = super::modes::common_util::selfhost_exe::exe_try_parse_json_v0(
filename, timeout_ms,
) {
maybe_dump_mir_verbose(&module);
if crate::config::env::ny_compiler_emit_only() {
return false;
}
// Prefer PyVM when requested (reference semantics)
if crate::config::env::vm_use_py() {
if let Ok(py3) = which::which("python3") {
let runner = std::path::Path::new("tools/pyvm_runner.py");
if runner.exists() {
let tmp_dir = std::path::Path::new("tmp");
let _ = std::fs::create_dir_all(tmp_dir);
let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json");
if let Err(e) =
crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(
&module,
&mir_json_path,
)
{
eprintln!("❌ PyVM MIR JSON emit error: {}", e);
process::exit(1);
}
crate::cli_v!(
"[Bridge] using PyVM (selfhost) → {}",
mir_json_path.display()
);
let allow_top = crate::config::env::entry_allow_toplevel_main();
let entry = if module.functions.contains_key("Main.main") {
"Main.main"
} else if allow_top && module.functions.contains_key("main") {
"main"
} else if module.functions.contains_key("main") {
eprintln!("[entry] Warning: using top-level 'main' without explicit allow; set NYASH_ENTRY_ALLOW_TOPLEVEL_MAIN=1 to silence.");
"main"
} else {
"Main.main"
};
let mut cmd = std::process::Command::new(py3);
crate::runner::child_env::apply_core_wrapper_env(&mut cmd);
let status = cmd
.args([
"tools/pyvm_runner.py",
"--in",
&mir_json_path.display().to_string(),
"--entry",
entry,
])
.status()
.map_err(|e| format!("spawn pyvm: {}", e))
.unwrap();
let code = status.code().unwrap_or(1);
println!("Result: {}", code);
std::process::exit(code);
}
}
}
execute_with_oob_check(self, &module);
return true;
} else {
return false;
}
}
}
// Fallback: inline VM run (embed source into a tiny wrapper that prints JSON)
// Phase 25.1b: この経路は Ny selfhost 実験用だったが、現在は不安定かつ .hako 側 selfhost builder の
// デバッグを阻害するため、既定で無効化する。Ny selfhost が必要な場合は別の .sh ベースの
// パイプラインtools/ny_selfhost_inline.sh など)を使う想定とし、ここでは常に Rust 既定
// パスへフォールバックする。
crate::cli_v!("[ny-compiler] inline selfhost pipeline disabled (Phase 25.1b); falling back to default path");
// Dev-only escape hatch: allow forcing the old inline path when explicitly requested.
if crate::config::env::selfhost_inline_force() {
match super::json_v0_bridge::parse_json_v0_to_module("") {
Ok(module) => {
maybe_dump_mir_verbose(&module);
if crate::config::env::ny_compiler_emit_only() {
return false;
}
// Phase-15 policy: when NYASH_VM_USE_PY=1, prefer PyVM as reference executor
// regardless of BoxCall presence to ensure semantics parity (e.g., PHI merges).
let prefer_pyvm = crate::config::env::vm_use_py();
// Backward compatibility: if not preferring PyVM explicitly, still auto-enable when BoxCalls exist.
let needs_pyvm = !prefer_pyvm
&& module.functions.values().any(|f| {
f.blocks.values().any(|bb| {
bb.instructions.iter().any(|inst| {
matches!(inst, crate::mir::MirInstruction::BoxCall { .. })
})
})
});
if prefer_pyvm || needs_pyvm {
let label = if prefer_pyvm {
"selfhost"
} else {
"selfhost-fallback"
};
if let Some(code) =
crate::runner::modes::common_util::selfhost::json::run_pyvm_module(
&module, label,
)
{
println!("Result: {}", code);
std::process::exit(code);
}
}
execute_with_oob_check(self, &module);
return true;
}
Err(e) => {
eprintln!("❌ JSON v0 bridge error: {}", e);
return false;
}
}
}
// Default path: always fall back to existing Rust runner.
return false;
}
}