Files
hakorune/src/runner/selfhost.rs
nyash-codex d3cbc71c9b feat(mir): Phase 25.1f完了 - Conservative PHI + ControlForm観測レイヤー
🎉 Conservative PHI Box理論による完全SSA構築

**Phase 7-B: Conservative PHI実装**
- 片方branchのみ定義変数に対応(emit_void使用)
- 全変数にPHI生成(Conservative Box理論)
- Stage-1 resolver全テスト緑化(3/3 PASS)

**Phase 25.1f: ControlForm観測レイヤー**
- LoopShape/IfShape/ControlForm構造定義
- Loop/If統一インターフェース実装
- debug_dump/debug_validate機能追加
- NYASH_CONTROL_FORM_TRACE環境変数対応

**主な変更**:
- src/mir/builder/phi.rs: Conservative PHI実装
- src/mir/control_form.rs: ControlForm構造(NEW)
- src/mir/loop_builder.rs: LoopForm v2デフォルト化

**テスト結果**:
 mir_stage1_using_resolver_min_fragment_verifies
 mir_stage1_using_resolver_full_collect_entries_verifies
 mir_parserbox_parse_program2_harness_parses_minimal_source

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-18 18:56:35 +09:00

460 lines
24 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*!
* Runner selfhost helpers — Ny compiler pipeline (Ny -> JSON v0)
*
* Transitional shim: provides a stable entrypoint from callers, while the
* heavy implementation currently lives in modes/common.rs. Next step will
* migrate the full implementation here.
*/
use super::*;
use nyash_rust::{mir::MirCompiler, parser::NyashParser};
use std::{fs, process};
impl NyashRunner {
/// Selfhost (Ny -> JSON v0) pipeline: EXE/VM/Python フォールバック含む
pub(crate) fn try_run_selfhost_pipeline(&self, filename: &str) -> bool {
use std::io::Write;
// Phase 25.1b: guard selfhost pipeline to Ny-only sources.
// `.hako` / other extensionsは StageB / JSON v0 bridge 側の責務なので、
// ここでは Ny/Nyash 拡張子以外は即座にスキップする。
let path = std::path::Path::new(filename);
if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
match ext {
"ny" | "nyash" => { /* continue */ }
_ => {
crate::cli_v!(
"[ny-compiler] skip selfhost pipeline for non-Ny source: {} (ext={})",
filename,
ext
);
return false;
}
}
} else {
// No extension: treat as non-Ny for safety
crate::cli_v!(
"[ny-compiler] skip selfhost pipeline for source without extension: {}",
filename
);
return false;
}
// Read input source
let code = match fs::read_to_string(filename) {
Ok(c) => c,
Err(e) => {
eprintln!("[ny-compiler] read error: {}", e);
return false;
}
};
// Optional Phase-15: using prelude merge (text-based for speed)
let mut code_ref: std::borrow::Cow<'_, str> = std::borrow::Cow::Borrowed(&code);
if crate::config::env::enable_using() {
let using_ast = crate::config::env::using_ast_enabled();
if using_ast {
// Text-based merge: faster for inline/selfhost execution
match crate::runner::modes::common_util::resolve::merge_prelude_text(self, &code, filename) {
Ok(merged) => {
code_ref = std::borrow::Cow::Owned(merged);
}
Err(e) => {
eprintln!("[ny-compiler] using text merge error: {}", e);
return false;
}
}
} else {
// Legacy: strip only (no prelude merge)
match crate::runner::modes::common_util::resolve::resolve_prelude_paths_profiled(self, &code, filename) {
Ok((clean, paths)) => {
if !paths.is_empty() {
eprintln!("[ny-compiler] using: AST prelude merge is disabled in this profile. Enable NYASH_USING_AST=1 or remove 'using' lines.");
return false;
}
code_ref = std::borrow::Cow::Owned(clean);
}
Err(e) => { eprintln!("[ny-compiler] {}", e); return false; }
}
}
}
// Promote dev sugar to standard: pre-expand line-head '@name[:T] = expr' to 'local name[:T] = expr'
{
let expanded = crate::runner::modes::common_util::resolve::preexpand_at_local(code_ref.as_ref());
code_ref = std::borrow::Cow::Owned(expanded);
}
// Write to tmp/ny_parser_input.ny (as expected by Ny parser v0), unless forced to reuse existing tmp
let use_tmp_only = crate::config::env::ny_compiler_use_tmp_only();
let tmp_dir = std::path::Path::new("tmp");
if let Err(e) = std::fs::create_dir_all(tmp_dir) {
eprintln!("[ny-compiler] mkdir tmp failed: {}", e);
return false;
}
// Optional macro preexpand path for selfhost
// Default: auto when macro engine is enabled (safe: PyVM only)
// Gate: NYASH_MACRO_SELFHOST_PRE_EXPAND={1|auto|0}
{
let preenv = std::env::var("NYASH_MACRO_SELFHOST_PRE_EXPAND")
.ok()
.or_else(|| if crate::r#macro::enabled() { Some("auto".to_string()) } else { None });
let do_pre = match preenv.as_deref() {
Some("1") => true,
Some("auto") => crate::r#macro::enabled() && crate::config::env::vm_use_py(),
_ => false,
};
if do_pre && crate::r#macro::enabled() {
crate::cli_v!("[ny-compiler] selfhost macro pre-expand: engaging (mode={:?})", preenv);
match NyashParser::parse_from_string(code_ref.as_ref()) {
Ok(ast0) => {
let ast = crate::r#macro::maybe_expand_and_dump(&ast0, false);
// Compile to MIR and execute (respect VM/PyVM policy similar to vm mode)
let mut mir_compiler = MirCompiler::with_options(true);
match mir_compiler.compile(ast) {
Ok(result) => {
let prefer_pyvm = crate::config::env::vm_use_py();
if prefer_pyvm {
if let Ok(code) = crate::runner::modes::common_util::pyvm::run_pyvm_harness_lib(&result.module, "selfhost-preexpand") {
println!("Result: {}", code);
std::process::exit(code);
} else {
eprintln!("❌ PyVM error (selfhost-preexpand)");
std::process::exit(1);
}
} else {
// For now, only PyVM path is supported in pre-expand mode; fall back otherwise.
crate::cli_v!("[ny-compiler] pre-expand path requires NYASH_VM_USE_PY=1; falling back to default selfhost");
return false;
}
}
Err(e) => {
eprintln!("[ny-compiler] pre-expand compile error: {}", e);
return false;
}
}
}
Err(e) => {
eprintln!("[ny-compiler] pre-expand parse error: {}", e);
return false;
}
}
}
}
let tmp_path = tmp_dir.join("ny_parser_input.ny");
if !use_tmp_only {
match std::fs::File::create(&tmp_path) {
Ok(mut f) => {
if let Err(e) = f.write_all(code_ref.as_bytes()) {
eprintln!("[ny-compiler] write tmp failed: {}", e);
return false;
}
}
Err(e) => {
eprintln!("[ny-compiler] open tmp failed: {}", e);
return false;
}
}
}
// Preferred: run Ny selfhost compiler program (apps/selfhost/compiler/compiler.hako)
// This avoids inline embedding pitfalls and supports Stage-3 gating via args.
{
use crate::runner::modes::common_util::selfhost::{child, json};
let exe = std::env::current_exe()
.unwrap_or_else(|_| std::path::PathBuf::from("target/release/nyash"));
let parser_prog = std::path::Path::new("apps/selfhost/compiler/compiler.hako");
if parser_prog.exists() {
// Build extra args forwarded to child program
let mut extra_owned: Vec<String> = Vec::new();
if crate::config::env::ny_compiler_min_json() {
extra_owned.push("--".to_string());
extra_owned.push("--min-json".to_string());
}
extra_owned.push("--".to_string());
extra_owned.push("--read-tmp".to_string());
if crate::config::env::ny_compiler_stage3() {
extra_owned.push("--".to_string());
extra_owned.push("--stage3".to_string());
}
// Optional: map env toggles to child args (prepasses)
if std::env::var("NYASH_SCOPEBOX_ENABLE").ok().as_deref() == Some("1") {
extra_owned.push("--".to_string());
extra_owned.push("--scopebox".to_string());
}
if std::env::var("NYASH_LOOPFORM_NORMALIZE").ok().as_deref() == Some("1") {
extra_owned.push("--".to_string());
extra_owned.push("--loopform".to_string());
}
// Optional: developer-provided child args passthrough (space-separated)
if let Ok(raw) = std::env::var("NYASH_SELFHOST_CHILD_ARGS") {
let items: Vec<String> = raw
.split(' ')
.filter(|s| !s.trim().is_empty())
.map(|s| s.to_string())
.collect();
if !items.is_empty() {
extra_owned.push("--".to_string());
for it in items { extra_owned.push(it); }
}
}
let extra: Vec<&str> = extra_owned.iter().map(|s| s.as_str()).collect();
let timeout_ms: u64 = crate::config::env::ny_compiler_timeout_ms();
if let Some(line) = child::run_ny_program_capture_json(
&exe,
parser_prog,
timeout_ms,
&extra,
&[
"NYASH_USE_NY_COMPILER",
"NYASH_CLI_VERBOSE",
],
&[
("NYASH_JSON_ONLY", "1"),
("NYASH_DISABLE_PLUGINS", "1"),
("NYASH_SKIP_TOML_ENV", "1"),
("NYASH_USING_AST", "0"),
("NYASH_ALLOW_USING_FILE", "0"),
("HAKO_ALLOW_USING_FILE", "0"),
],
) {
match json::parse_json_v0_line(&line) {
Ok(module) => {
if crate::config::env::cli_verbose() {
if crate::config::env::cli_verbose() {
super::json_v0_bridge::maybe_dump_mir(&module);
}
}
let emit_only = crate::config::env::ny_compiler_emit_only();
if emit_only {
return false;
}
// Prefer PyVM path when requested
if crate::config::env::vm_use_py() {
if let Some(code) = crate::runner::modes::common_util::selfhost::json::run_pyvm_module(&module, "selfhost") {
println!("Result: {}", code);
std::process::exit(code);
}
}
self.execute_mir_module(&module);
return true;
}
Err(e) => {
eprintln!("[ny-compiler] json parse error (child): {}", e);
}
}
}
}
}
// Python MVP (optional): lightweight harness to produce JSON v0.
// Phase 25.1b: default OFFNYASH_NY_COMPILER_USE_PY=1 のときだけ有効)。
if std::env::var("NYASH_NY_COMPILER_USE_PY").ok().as_deref() == Some("1") {
if let Ok(py3) = which::which("python3") {
let py = std::path::Path::new("tools/ny_parser_mvp.py");
if py.exists() {
let mut cmd = std::process::Command::new(&py3);
// Phase 25.1b: Use selfhost compiler env for consistency
crate::runner::child_env::apply_selfhost_compiler_env(&mut cmd);
cmd.arg(py).arg(&tmp_path);
let timeout_ms: u64 = std::env::var("NYASH_NY_COMPILER_TIMEOUT_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(60000); // Phase 25.1b: Increased to 60000ms (60s) for consistency
let out = match super::modes::common_util::io::spawn_with_timeout(cmd, timeout_ms) {
Ok(o) => o,
Err(e) => { eprintln!("[ny-compiler] python harness failed: {}", e); return false; }
};
if !out.timed_out {
if let Ok(s) = String::from_utf8(out.stdout) {
if let Some(line) = crate::runner::modes::common_util::selfhost::json::first_json_v0_line(&s) {
match super::json_v0_bridge::parse_json_v0_to_module(&line) {
Ok(module) => {
if crate::config::env::cli_verbose() {
if crate::config::env::cli_verbose() {
super::json_v0_bridge::maybe_dump_mir(&module);
}
}
let emit_only =
std::env::var("NYASH_NY_COMPILER_EMIT_ONLY")
.unwrap_or_else(|_| "1".to_string())
== "1";
if emit_only {
return false;
}
// Prefer PyVM for selfhost pipeline (parity reference)
if std::env::var("NYASH_VM_USE_PY").ok().as_deref() == Some("1") {
let code = match crate::runner::modes::common_util::pyvm::run_pyvm_harness(&module, "selfhost-py") {
Ok(c) => c,
Err(e) => { eprintln!("❌ PyVM error: {}", e); 1 }
};
println!("Result: {}", code);
std::process::exit(code);
}
crate::runner::child_env::pre_run_reset_oob_if_strict();
self.execute_mir_module(&module);
if crate::config::env::oob_strict_fail() && crate::runtime::observe::oob_seen() {
eprintln!("[selfhost][oob-strict] Out-of-bounds observed → exit(1)");
std::process::exit(1);
}
return true;
}
Err(e) => {
eprintln!("[ny-compiler] json parse error: {}", e);
return false;
}
}
}
}
}
}
}
}
// EXE-first: if requested, try external parser EXE (nyash_compiler)
if std::env::var("NYASH_USE_NY_COMPILER_EXE").ok().as_deref() == Some("1") {
// Resolve parser EXE path
let exe_path = if let Ok(p) = std::env::var("NYASH_NY_COMPILER_EXE_PATH") {
std::path::PathBuf::from(p)
} else {
let mut p = std::path::PathBuf::from("dist/nyash_compiler");
#[cfg(windows)]
{
p.push("nyash_compiler.exe");
}
#[cfg(not(windows))]
{
p.push("nyash_compiler");
}
if !p.exists() {
// Try PATH
if let Ok(w) = which::which("nyash_compiler") {
w
} else {
p
}
} else {
p
}
};
if exe_path.exists() {
let timeout_ms: u64 = std::env::var("NYASH_NY_COMPILER_TIMEOUT_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(2000);
if let Some(module) = super::modes::common_util::selfhost_exe::exe_try_parse_json_v0(filename, timeout_ms) {
if crate::config::env::cli_verbose() {
super::json_v0_bridge::maybe_dump_mir(&module);
}
let emit_only = std::env::var("NYASH_NY_COMPILER_EMIT_ONLY")
.unwrap_or_else(|_| "1".to_string())
== "1";
if emit_only { return false; }
// Prefer PyVM when requested (reference semantics)
if std::env::var("NYASH_VM_USE_PY").ok().as_deref() == Some("1") {
if let Ok(py3) = which::which("python3") {
let runner = std::path::Path::new("tools/pyvm_runner.py");
if runner.exists() {
let tmp_dir = std::path::Path::new("tmp");
let _ = std::fs::create_dir_all(tmp_dir);
let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json");
if let Err(e) = crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &mir_json_path) {
eprintln!("❌ PyVM MIR JSON emit error: {}", e);
process::exit(1);
}
crate::cli_v!("[Bridge] using PyVM (selfhost) → {}", mir_json_path.display());
let allow_top = crate::config::env::entry_allow_toplevel_main();
let entry = if module.functions.contains_key("Main.main") { "Main.main" }
else if allow_top && module.functions.contains_key("main") { "main" }
else if module.functions.contains_key("main") { eprintln!("[entry] Warning: using top-level 'main' without explicit allow; set NYASH_ENTRY_ALLOW_TOPLEVEL_MAIN=1 to silence."); "main" }
else { "Main.main" };
let mut cmd = std::process::Command::new(py3);
crate::runner::child_env::apply_core_wrapper_env(&mut cmd);
let status = cmd
.args(["tools/pyvm_runner.py", "--in", &mir_json_path.display().to_string(), "--entry", entry])
.status()
.map_err(|e| format!("spawn pyvm: {}", e))
.unwrap();
let code = status.code().unwrap_or(1);
println!("Result: {}", code);
std::process::exit(code);
}
}
}
crate::runner::child_env::pre_run_reset_oob_if_strict();
crate::runner::child_env::pre_run_reset_oob_if_strict();
self.execute_mir_module(&module);
if crate::config::env::oob_strict_fail() && crate::runtime::observe::oob_seen() {
eprintln!("[selfhost][oob-strict] Out-of-bounds observed → exit(1)");
std::process::exit(1);
}
if crate::config::env::oob_strict_fail() && crate::runtime::observe::oob_seen() {
eprintln!("[selfhost][oob-strict] Out-of-bounds observed → exit(1)");
std::process::exit(1);
}
return true;
} else {
return false;
}
}
}
// Fallback: inline VM run (embed source into a tiny wrapper that prints JSON)
// Phase 25.1b: この経路は Ny selfhost 実験用だったが、現在は不安定かつ .hako 側 selfhost builder の
// デバッグを阻害するため、既定で無効化する。Ny selfhost が必要な場合は別の .sh ベースの
// パイプラインtools/ny_selfhost_inline.sh など)を使う想定とし、ここでは常に Rust 既定
// パスへフォールバックする。
crate::cli_v!("[ny-compiler] inline selfhost pipeline disabled (Phase 25.1b); falling back to default path");
// Dev-only escape hatch: allow forcing the old inline path when explicitly requested.
if std::env::var("NYASH_SELFHOST_INLINE_FORCE").ok().as_deref() == Some("1") {
match super::json_v0_bridge::parse_json_v0_to_module("") {
Ok(module) => {
if crate::config::env::cli_verbose() {
if crate::config::env::cli_verbose() {
super::json_v0_bridge::maybe_dump_mir(&module);
}
}
let emit_only = std::env::var("NYASH_NY_COMPILER_EMIT_ONLY")
.unwrap_or_else(|_| "1".to_string())
== "1";
if emit_only {
return false;
}
// Phase-15 policy: when NYASH_VM_USE_PY=1, prefer PyVM as reference executor
// regardless of BoxCall presence to ensure semantics parity (e.g., PHI merges).
let prefer_pyvm = std::env::var("NYASH_VM_USE_PY").ok().as_deref() == Some("1");
// Backward compatibility: if not preferring PyVM explicitly, still auto-enable when BoxCalls exist.
let needs_pyvm = !prefer_pyvm
&& module.functions.values().any(|f| {
f.blocks.values().any(|bb| {
bb.instructions.iter().any(|inst| {
matches!(inst, crate::mir::MirInstruction::BoxCall { .. })
})
})
});
if prefer_pyvm || needs_pyvm {
let label = if prefer_pyvm { "selfhost" } else { "selfhost-fallback" };
if let Some(code) = crate::runner::modes::common_util::selfhost::json::run_pyvm_module(&module, label) {
println!("Result: {}", code);
std::process::exit(code);
}
}
crate::runner::child_env::pre_run_reset_oob_if_strict();
self.execute_mir_module(&module);
if crate::config::env::oob_strict_fail() && crate::runtime::observe::oob_seen() {
eprintln!("[selfhost][oob-strict] Out-of-bounds observed → exit(1)");
std::process::exit(1);
}
return true;
}
Err(e) => {
eprintln!("❌ JSON v0 bridge error: {}", e);
return false;
}
}
}
// Default path: always fall back to existing Rust runner.
return false;
}
}