diff --git a/.github/workflows/selfhost-bootstrap.yml b/.github/workflows/selfhost-bootstrap.yml new file mode 100644 index 00000000..59108458 --- /dev/null +++ b/.github/workflows/selfhost-bootstrap.yml @@ -0,0 +1,50 @@ +name: Selfhost Bootstrap Smoke + +on: + push: + paths: + - 'src/**' + - 'apps/**' + - 'tools/**' + - 'docs/**' + - 'Cargo.toml' + - 'Cargo.lock' + - '.github/workflows/selfhost-bootstrap.yml' + pull_request: + paths: + - 'src/**' + - 'apps/**' + - 'tools/**' + - 'docs/**' + +jobs: + selfhost-bootstrap: + runs-on: ubuntu-latest + timeout-minutes: 10 + env: + CARGO_TERM_COLOR: always + NYASH_DISABLE_PLUGINS: '1' + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Rust (stable) + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo registry and build + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Build (release, cranelift-jit) + run: cargo build --release --features cranelift-jit -j 2 + + - name: Run bootstrap selfhost smoke + run: timeout -s KILL 40s bash tools/bootstrap_selfhost_smoke.sh + diff --git a/.github/workflows/selfhost-exe-first.yml b/.github/workflows/selfhost-exe-first.yml new file mode 100644 index 00000000..756eafb1 --- /dev/null +++ b/.github/workflows/selfhost-exe-first.yml @@ -0,0 +1,57 @@ +name: Selfhost EXE-first (Optional) + +on: + workflow_dispatch: + schedule: + - cron: '0 7 * * *' + +jobs: + selfhost-exe-first: + runs-on: ubuntu-latest + timeout-minutes: 25 + env: + CARGO_TERM_COLOR: always + NYASH_DISABLE_PLUGINS: '1' + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Rust (stable) + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo registry and build + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Install LLVM 18 (llvm-config-18) + run: | + sudo apt-get update + sudo apt-get install -y curl ca-certificates lsb-release wget gnupg python3-pip + curl -fsSL https://apt.llvm.org/llvm.sh -o llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 18 + llvm-config-18 --version + + - name: Install llvmlite (Python) + run: | + python3 -m pip install --upgrade pip + python3 -m pip install llvmlite jsonschema + + - name: Build nyash (release) + run: cargo build --release -j 2 + + - name: ny-llvmc dummy smoke + run: | + cargo build --release -p nyash-llvm-compiler -j 2 + ./target/release/ny-llvmc --dummy --out /tmp/dummy.o + file /tmp/dummy.o || true + + - name: Run EXE-first smoke (parser EXE + bridge) + run: timeout -s KILL 10m bash tools/exe_first_smoke.sh diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 349cb8cf..f30250e0 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -3,6 +3,7 @@ Summary - Default execution is MIR13 (PHI‑off). Bridge/Builder do not emit PHIs; llvmlite synthesizes PHIs when needed. MIR14 (PHI‑on) remains experimental for targeted tests. - PyVM is the semantic reference engine; llvmlite is used for AOT and parity checks. + - GC: user modes defined; controller実装(rc+cycle skeleton + metrics/diagnostics)に移行。LLVM safepoint輸出/NyRT配線と自動挿入(envゲートON)を完了。 What Changed (recent) - MIR13 default enabled @@ -16,30 +17,56 @@ What Changed (recent) - Selfhost/PyVM スモークを通して E2E 確認(peek/ternary)。 - llvmlite stability for MIR13(bring‑up進行中) - Control‑flow 分離: `instructions/controlflow/{branch,jump,while_.py}` を導入し、`llvm_builder.py` の責務を縮小。 - - プリパス導入(環境変数で有効化): `NYASH_LLVM_PREPASS_LOOP=1` + - プリパス(環境変数で有効化): `NYASH_LLVM_PREPASS_LOOP=1`, `NYASH_LLVM_PREPASS_IFMERGE=1` - ループ検出(単純 while 形)→ 構造化 lower(LoopForm失敗時は regular while) - - CFG ユーティリティ: `cfg/utils.py`(preds/succs) - - 値解決ポリシー共通化: `utils/values.py`(prefer same‑block SSA → resolver) - - vmap の per‑block 化: `lower_block` 内で `vmap_cur` を用意し、ブロック末に `block_end_values` へスナップショット。cross‑block 汚染を抑制。 - - Resolver 強化: end‑of‑block解決で他ブロックのPHIを安易に採用しない(自己参照/非支配回避)。 + - if‑merge(ret‑merge)前処理: ret 値 PHI の前宣言と finalize 配線の一意化 + - CFG ユーティリティ: `src/llvm_py/cfg/utils.py`(preds/succs) + - PHI 配線の分離: `src/llvm_py/phi_wiring.py` に placeholder/finalize を移管(builder 薄化) + - 値解決ポリシー共通化: `src/llvm_py/utils/values.py`(prefer same‑block SSA → resolver) + - vmap の per‑block 化: `vmap_cur` を用意し、ブロック末に `block_end_values` へスナップショット。cross‑block 汚染を抑制。 + - Resolver 強化: end‑of‑block 解決で他ブロック PHI を安易に採用しない(自己参照/非支配を回避)。 + - BuildCtx 導入: `src/llvm_py/build_ctx.py` で lowering 引数を集約(compare/ret/call/boxcall/externcall/typeop/newbox/safepoint が ctx 対応) + - トレース統一: `src/llvm_py/trace.py` を追加し、`NYASH_CLI_VERBOSE`/`NYASH_LLVM_TRACE_PHI`/`NYASH_LLVM_TRACE_VALUES` を一元管理 + - curated スモーク拡張: `tools/smokes/curated_llvm.sh --with-if-merge` を追加(if‑merge ケース含む) - Parity runner pragmatics - `tools/pyvm_vs_llvmlite.sh` compares exit code by default; use `CMP_STRICT=1` for stdout+exit. - Stage‑2 smokes更新: `tools/selfhost_stage2_smoke.sh` に "Peek basic" を追加。 +- GC controller/metrics(Phase‑15) + - `GcController`(統合フック)導入+CLI `--gc`(`NYASH_GC_MODE`)。CountingGcは互換ラッパに縮退。 + - NyRT exports: `ny_safepoint` / `ny_check_safepoint` / `nyash.gc.barrier_write` → runtime hooks 連携。 + - LLVM:自動 safepoint 挿入(loop header / call / externcall / boxcall)。`NYASH_LLVM_AUTO_SAFEPOINT` で制御(既定=1)。 + - メトリクス:text/JSON(`NYASH_GC_METRICS{,_JSON}=1`)。JSONに alloc_count/alloc_bytes/trial_nodes/edges/collections/last_ms/reason_bits/thresholds を含む。 + - 診断:`NYASH_GC_LEAK_DIAG=1` でハンドルTop‑K(残存)出力。Array/Map 到達集合の試走(gc_trace)。 + +- CI/DevOps(Self‑Hosting パイロット強化) + - 追加: `.github/workflows/selfhost-bootstrap.yml`(常時) — `tools/bootstrap_selfhost_smoke.sh` を40s timeoutで実行。 + - 追加: `.github/workflows/selfhost-exe-first.yml`(任意/cron) — LLVM18 + llvmlite をセットアップし `tools/exe_first_smoke.sh` を実行。 + - スモーク堅牢化: `tools/bootstrap_selfhost_smoke.sh`/`tools/exe_first_smoke.sh` に timeout を付与。 + - JSON v0 スキーマ追加: `docs/reference/mir/json_v0.schema.json` と検証ツール `tools/validate_mir_json.py`。EXE‑first スモークに組み込み。 + +- LLVM crate 分離の足場(Phase‑15.6 向け) + - 新規クレート(スキャフォールド): `crates/nyash-llvm-compiler`(CLI名: `ny-llvmc`)。 + - `--dummy --out ` でダミー `.o` を生成。 + - `--in --out ` で MIR(JSON)→`.o` を llvmlite ハーネス経由で生成(`tools/llvmlite_harness.py`)。 + - ドキュメント追記: `docs/LLVM_HARNESS.md` に `ny-llvmc` とスキーマ検証の項を追加。 + +- Nyash ABI v2(TypeBox)検出の足場 + - ローダに `nyash_typebox_` シンボル検出を追加(`abi_tag='TYBX'`/`version`/`invoke_id`)し、Boxスペックへ保持(まだ実行には未使用)。 Current Status - Self‑hosting Bridge → PyVM smokes: PASS(Stage‑2 代表: array/string/logic/if/loop/ternary/peek/dot-chain) - PyVM core fixes applied: compare(None,x) の安全化、Copy 命令サポート、最大ステップ上限(NYASH_PYVM_MAX_STEPS) - MIR13(PHI‑off): if/ternary/loop の合流で Copy が正しく JSON に出るよう修正(emit_mir_json + builder no‑phi 合流) - Curated LLVM(PHI‑off 既定): 継続(個別ケースの IR 生成不備は未着手) -- LLVM ハーネス(llvmlite): +LLVM ハーネス(llvmlite/AOT): - `loop_if_phi`: プリパスON+構造化whileで EXE 退出コード 0(緑)。 - - `ternary_nested`: vmap per‑block で安定度向上。残タスク: merge(ret) の PHI 配線をプリパス/resolve 側で確定・重複排除。 + - `ternary_nested`: if‑merge プリパス+phi_wiring で ret‑merge を構造化し、退出コード一致(緑)。 Next (short plan) 0) Refactor/Structure(継続) - - controlflow の切出し完了(branch/jump/while)。binop/compare/copy の前処理を `utils/values.resolve_i64_strict` に集約(完了)。 - - vmap per‑block 化(完了)。builder の責務縮小と prepass/cfg/util への移譲(進行中)。 - - if‑merge プリパス実装: ret‑merge の構造化/PHI確定(予定)。 + - BuildCtx 展開を完了(barrier/atomic/loopform も ctx 主経路に) + - trace 化の残り掃除(環境直読み print を削減) + - phi_wiring を関数分割(解析/配線/タグ付け)→ ユニットテスト追加 1) Legacy Interpreter/VM offboarding (phase‑A): - ✅ Introduced `vm-legacy` feature (default OFF) to gate old VM execution層。 - ✅ 抽出: JIT が参照する最小型(例: `VMValue`)を薄い共通モジュールへ切替(`vm_types`)。 @@ -50,23 +77,48 @@ Next (short plan) 2) Legacy Interpreter/VM offboarding (phase‑B): - 物理移動: `src/archive/{interpreter_legacy,vm_legacy}/` へ移設(ドキュメント更新)。 3) LLVM/llvmlite 整備(優先中): - - MIR13 の Copy 合流を LLVM IR に等価反映(pred‑localize or PHI 合成): per‑block vmap 完了、resolver 強化済。 + - MIR13 の Copy 合流を LLVM IR に等価反映(pred‑localize or PHI 合成): per‑block vmap 完了、resolver/phi_wiring 強化済。 - 代表ケース: - `apps/tests/loop_if_phi.nyash`: プリパスONで緑(退出コード一致)。 - - `apps/tests/ternary_nested.nyash`: if‑merge プリパスでの構造化/PHI 確定を実装 → IR 検証通過・退出コード一致まで。 + - `apps/tests/ternary_nested.nyash`: if‑merge + phi_wiring で退出コード一致を継続。 - `tools/pyvm_vs_llvmlite.sh` で PyVM と EXE の退出コード一致(必要に応じて CMP_STRICT=1)。 4) PHI‑on lane(任意): `loop_if_phi` 支配関係を finalize/resolve の順序強化で観察(低優先)。 5) Runner refactor(小PR): - `selfhost/{child.rs,json.rs}` 分離; `modes/common/{io,resolve,exec}.rs` 分割; `runner/mod.rs`の表面削減。 6) Optimizer/Verifier thin‑hub cleanup(非機能): orchestrator最小化とパス境界の明確化。 +7) GC(controller)観測の磨き込み + - JSON: running averages / roots要約(任意) / 理由タグ拡張 + - 収集頻度のサンプリング支援 + - plugin/FFI は非移動のまま、ハンドル間接を継続 +8) LLVM crate split(EXE‑first) + - LLVM harness/builder を `nyash-llvm-compiler` crate と CLI(`ny-llvmc`)に分離(入力: MIR JSON v0 / 出力: .o/.exe) + - `tools/build_llvm.sh` 内部を新crate APIに寄せ、Runnerからも呼べるよう段階移行 + - CI: selfhost smokes と LLVM EXE smokes を分離しアーティファクト配布線を評価 + +9) Nyash ABI v2 統一(後方互換なし) + - 方針: 既存 Type‑C ABI(library‑level `nyash_plugin_invoke`)を撤退し、Box単位の TypeBox へ一本化。 + - ローダ: `nyash_typebox_` の `invoke_id(instance_id, method_id, ...)` を実行ポインタとして保持し、birth/fini も含めて統一。 + - プラグイン: 公式プラグイン(String/File/Array/Map/Console/Integer)を順次 v2 へ移行。`resolve(name)->method_id` 実装。 + - 仕様: エラー規約(OK/E_SHORT/E_ARGS/E_TYPE/E_METHOD/E_HANDLE/E_PLUGIN)・TLVタグ一覧を docs に凍結、Cヘッダ雛形(`nyash_abi.h`)を配布。 + - CI: v2専用スモークを常時化(Linux)。Windows/macOS は任意ジョブで追随。 How to Run - PyVM reference smokes: `tools/pyvm_stage2_smoke.sh` - Bridge → PyVM smokes: `tools/selfhost_stage2_bridge_smoke.sh` - LLVM curated (PHI‑off default): `tools/smokes/curated_llvm.sh` - LLVM PHI‑on (experimental): `tools/smokes/curated_llvm.sh --phi-on` +- LLVM curated with if‑merge prepass: `tools/smokes/curated_llvm.sh --with-if-merge` - Parity (AOT vs PyVM): `tools/pyvm_vs_llvmlite.sh ` (`CMP_STRICT=1` to enable stdout check) - 開発時の補助: `NYASH_LLVM_PREPASS_LOOP=1` を併用(loop/if‑merge のプリパス有効化)。 + - GC modes/metrics: see `docs/reference/runtime/gc.md`(`--gc` / 自動 safepoint / 収集トリガ / JSONメトリクス) + +Self‑Hosting CI +- Bootstrap(常時): `.github/workflows/selfhost-bootstrap.yml` +- EXE‑first(任意): `.github/workflows/selfhost-exe-first.yml` + +LLVM Crate(試用) +- ダミー: `cargo build -p nyash-llvm-compiler --release && ./target/release/ny-llvmc --dummy --out /tmp/dummy.o` +- JSON→.o: `./target/release/ny-llvmc --in mir.json --out out.o` Operational Notes - 環境変数 @@ -93,9 +145,13 @@ Key Flags - `NYASH_VERIFY_ALLOW_NO_PHI` (default 1): relax verifier for PHI‑less MIR. - `NYASH_LLVM_USE_HARNESS=1`: route AOT through llvmlite harness. - `NYASH_LLVM_TRACE_PHI=1`: trace PHI resolution/wiring. +- `NYASH_LLVM_PREPASS_LOOP=1`: enable loop prepass (while detection/structure) +- `NYASH_LLVM_PREPASS_IFMERGE=1`: enable if‑merge (ret‑merge) prepass +- `NYASH_LLVM_TRACE_VALUES=1`: trace value resolution path Notes / Policies - Focus is self‑hosting stability. JIT/Cranelift is out of scope (safety fixes only). - PHI generation remains centralized in llvmlite; Bridge/Builder keep PHI‑off by default. -- No full tracing GC yet; handles/Arc lifetimes govern object retention. Safepoint/barrier/roots are staging utilities. +- No full tracing/moving GC yet; handles/Arc lifetimes govern object retention. Safepoint/barrier/roots are staging utilities. + - GC mode UX: keep user‑facing modes minimal (rc+cycle, minorgen); advanced modes are opt‑in for language dev. - Legacy Interpreter/VM は段階的にアーカイブへ。日常の意味論確認は PyVM を基準として継続。 diff --git a/app_async b/app_async new file mode 100644 index 00000000..37a1aab9 Binary files /dev/null and b/app_async differ diff --git a/app_gc_smoke b/app_gc_smoke new file mode 100644 index 00000000..3166e26e Binary files /dev/null and b/app_gc_smoke differ diff --git a/crates/nyash-llvm-compiler/Cargo.toml b/crates/nyash-llvm-compiler/Cargo.toml new file mode 100644 index 00000000..20c152bf --- /dev/null +++ b/crates/nyash-llvm-compiler/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "nyash-llvm-compiler" +version = "0.1.0" +edition = "2021" +description = "Nyash LLVM compiler CLI (harness wrapper). Compiles MIR(JSON) -> object (.o) or dummy." + +[dependencies] +anyhow = "1.0" +clap = { version = "4.5", features = ["derive"] } +serde_json = "1.0" + diff --git a/crates/nyash-llvm-compiler/src/main.rs b/crates/nyash-llvm-compiler/src/main.rs new file mode 100644 index 00000000..aca7d1b4 --- /dev/null +++ b/crates/nyash-llvm-compiler/src/main.rs @@ -0,0 +1,122 @@ +use std::fs::File; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use anyhow::{bail, Context, Result}; +use clap::{ArgAction, Parser}; + +#[derive(Parser, Debug)] +#[command(name = "ny-llvmc", about = "Nyash LLVM compiler (llvmlite harness wrapper)")] +struct Args { + /// MIR JSON input file path (use '-' to read from stdin). When omitted with --dummy, a dummy ny_main is emitted. + #[arg(long = "in", value_name = "FILE", default_value = "-")] + infile: String, + + /// Output object file (.o) + #[arg(long, value_name = "FILE")] + out: PathBuf, + + /// Generate a dummy object (ny_main -> i32 0). Ignores --in when set. + #[arg(long, action = ArgAction::SetTrue)] + dummy: bool, + + /// Path to Python harness script (defaults to tools/llvmlite_harness.py in CWD) + #[arg(long, value_name = "FILE")] + harness: Option, +} + +fn main() -> Result<()> { + let args = Args::parse(); + + // Ensure parent dir exists + if let Some(parent) = args.out.parent() { + std::fs::create_dir_all(parent).ok(); + } + + // Resolve harness path + let harness_path = if let Some(p) = args.harness.clone() { + p + } else { + PathBuf::from("tools/llvmlite_harness.py") + }; + + if args.dummy { + run_harness_dummy(&harness_path, &args.out) + .with_context(|| "failed to run harness in dummy mode")?; + println!("[ny-llvmc] dummy object written: {}", args.out.display()); + return Ok(()); + } + + // Prepare input JSON path: either from file or stdin -> temp file + let mut temp_path: Option = None; + let input_path = if args.infile == "-" { + let mut buf = String::new(); + std::io::stdin() + .read_to_string(&mut buf) + .context("reading MIR JSON from stdin")?; + // Basic sanity check that it's JSON + let _: serde_json::Value = serde_json::from_str(&buf) + .context("stdin does not contain valid JSON")?; + let tmp = std::env::temp_dir().join("ny_llvmc_stdin.json"); + let mut f = File::create(&tmp).context("create temp json file")?; + f.write_all(buf.as_bytes()).context("write temp json")?; + temp_path = Some(tmp.clone()); + tmp + } else { + PathBuf::from(&args.infile) + }; + + if !input_path.exists() { + bail!("input JSON not found: {}", input_path.display()); + } + + run_harness_in(&harness_path, &input_path, &args.out) + .with_context(|| format!("failed to compile MIR JSON via harness: {}", input_path.display()))?; + println!("[ny-llvmc] object written: {}", args.out.display()); + + // Cleanup temp file if used + if let Some(p) = temp_path { + let _ = std::fs::remove_file(p); + } + + Ok(()) +} + +fn run_harness_dummy(harness: &Path, out: &Path) -> Result<()> { + ensure_python()?; + let status = Command::new("python3") + .arg(harness) + .arg("--out") + .arg(out) + .status() + .context("failed to execute python harness (dummy)")?; + if !status.success() { + bail!("harness exited with status: {:?}", status.code()); + } + Ok(()) +} + +fn run_harness_in(harness: &Path, input: &Path, out: &Path) -> Result<()> { + ensure_python()?; + let status = Command::new("python3") + .arg(harness) + .arg("--in") + .arg(input) + .arg("--out") + .arg(out) + .status() + .context("failed to execute python harness")?; + if !status.success() { + bail!("harness exited with status: {:?}", status.code()); + } + Ok(()) +} + +fn ensure_python() -> Result<()> { + match Command::new("python3").arg("--version").output() { + Ok(out) if out.status.success() => Ok(()), + _ => bail!("python3 not found in PATH (required for llvmlite harness)"), + } +} + diff --git a/crates/nyrt/src/lib.rs b/crates/nyrt/src/lib.rs index 2cd05e59..f9b5a033 100644 --- a/crates/nyrt/src/lib.rs +++ b/crates/nyrt/src/lib.rs @@ -78,6 +78,7 @@ pub extern "C" fn nyash_string_concat_hh_export(a_h: i64, b_h: i64) -> i64 { String::new() }; let s = format!("{}{}", to_s(a_h), to_s(b_h)); + nyash_rust::runtime::global_hooks::gc_alloc(s.len() as u64); let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s)); let h = handles::to_handle(arc) as i64; eprintln!("[TRACE] concat_hh -> {}", h); @@ -134,6 +135,7 @@ pub extern "C" fn nyash_string_substring_hii_export(h: i64, start: i64, end: i64 let (st_u, en_u) = (st as usize, en as usize); let sub = s.get(st_u.min(s.len())..en_u.min(s.len())).unwrap_or(""); let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(sub.to_string())); + nyash_rust::runtime::global_hooks::gc_alloc(sub.len() as u64); let nh = handles::to_handle(arc) as i64; eprintln!("[TRACE] substring_hii -> {}", nh); nh @@ -196,7 +198,8 @@ pub extern "C" fn nyash_box_from_i8_string(ptr: *const i8) -> i64 { Ok(v) => v.to_string(), Err(_) => return 0, }; - let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s)); + let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s.clone())); + nyash_rust::runtime::global_hooks::gc_alloc(s.len() as u64); let h = handles::to_handle(arc) as i64; eprintln!("[TRACE] from_i8_string -> {}", h); h @@ -208,6 +211,7 @@ pub extern "C" fn nyash_box_from_i8_string(ptr: *const i8) -> i64 { pub extern "C" fn nyash_box_from_f64(val: f64) -> i64 { use nyash_rust::{box_trait::NyashBox, boxes::FloatBox, jit::rt::handles}; let arc: std::sync::Arc = std::sync::Arc::new(FloatBox::new(val)); + nyash_rust::runtime::global_hooks::gc_alloc(8); handles::to_handle(arc) as i64 } @@ -220,6 +224,7 @@ pub extern "C" fn nyash_box_from_i64(val: i64) -> i64 { jit::rt::handles, }; let arc: std::sync::Arc = std::sync::Arc::new(IntegerBox::new(val)); + nyash_rust::runtime::global_hooks::gc_alloc(8); handles::to_handle(arc) as i64 } @@ -487,7 +492,8 @@ pub extern "C" fn nyash_string_from_u64x2_export(lo: i64, hi: i64, len: i64) -> bytes.push(((hi_u >> (8 * i)) & 0xff) as u8); } let s = String::from_utf8_lossy(&bytes).to_string(); - let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s)); + let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s.clone())); + nyash_rust::runtime::global_hooks::gc_alloc(s.len() as u64); handles::to_handle(arc) as i64 } @@ -542,9 +548,25 @@ pub extern "C" fn nyash_gc_barrier_write_export(handle_or_ptr: i64) -> i64 { if std::env::var("NYASH_GC_BARRIER_TRACE").ok().as_deref() == Some("1") { eprintln!("[nyrt] nyash.gc.barrier_write h=0x{:x}", handle_or_ptr); } + // Forward to runtime GC hooks when available (Write barrier) + nyash_rust::runtime::global_hooks::gc_barrier(nyash_rust::runtime::BarrierKind::Write); 0 } +// LLVM safepoint exports (llvmlite harness) +// export: ny_safepoint(live_count: i64, live_values: i64*) -> void +#[no_mangle] +pub extern "C" fn ny_safepoint(_live_count: i64, _live_values: *const i64) { + // For now we ignore live-values; runtime uses cooperative safepoint + poll + nyash_rust::runtime::global_hooks::safepoint_and_poll(); +} + +// export: ny_check_safepoint() -> void +#[no_mangle] +pub extern "C" fn ny_check_safepoint() { + nyash_rust::runtime::global_hooks::safepoint_and_poll(); +} + #[export_name = "nyash.string.birth_h"] pub extern "C" fn nyash_string_birth_h_export() -> i64 { // Create a new StringBox via unified plugin host; return runtime handle as i64 @@ -552,6 +574,7 @@ pub extern "C" fn nyash_string_birth_h_export() -> i64 { if let Ok(b) = host_g.create_box("StringBox", &[]) { let arc: std::sync::Arc = std::sync::Arc::from(b); let h = nyash_rust::jit::rt::handles::to_handle(arc); + nyash_rust::runtime::global_hooks::gc_alloc(0); return h as i64; } } @@ -564,6 +587,7 @@ pub extern "C" fn nyash_integer_birth_h_export() -> i64 { if let Ok(b) = host_g.create_box("IntegerBox", &[]) { let arc: std::sync::Arc = std::sync::Arc::from(b); let h = nyash_rust::jit::rt::handles::to_handle(arc); + nyash_rust::runtime::global_hooks::gc_alloc(0); return h as i64; } } @@ -576,6 +600,7 @@ pub extern "C" fn nyash_console_birth_h_export() -> i64 { if let Ok(b) = host_g.create_box("ConsoleBox", &[]) { let arc: std::sync::Arc = std::sync::Arc::from(b); let h = nyash_rust::jit::rt::handles::to_handle(arc); + nyash_rust::runtime::global_hooks::gc_alloc(0); return h as i64; } } @@ -587,6 +612,7 @@ pub extern "C" fn nyash_console_birth_h_export() -> i64 { pub extern "C" fn nyash_array_birth_h_export() -> i64 { let arc: std::sync::Arc = std::sync::Arc::new(nyash_rust::boxes::array::ArrayBox::new()); + nyash_rust::runtime::global_hooks::gc_alloc(0); nyash_rust::jit::rt::handles::to_handle(arc) as i64 } @@ -595,6 +621,7 @@ pub extern "C" fn nyash_array_birth_h_export() -> i64 { pub extern "C" fn nyash_map_birth_h_export() -> i64 { let arc: std::sync::Arc = std::sync::Arc::new(nyash_rust::boxes::map_box::MapBox::new()); + nyash_rust::runtime::global_hooks::gc_alloc(0); nyash_rust::jit::rt::handles::to_handle(arc) as i64 } // ---- Process entry (driver) ---- @@ -646,6 +673,15 @@ pub extern "C" fn main() -> i32 { } } } + // Initialize a minimal runtime to back global hooks (GC/scheduler) for safepoints + // Choose GC hooks based on env (default dev: Counting for observability unless explicitly off) + let mut rt_builder = nyash_rust::runtime::NyashRuntimeBuilder::new(); + let gc_mode = nyash_rust::runtime::gc_mode::GcMode::from_env(); + let controller = std::sync::Arc::new(nyash_rust::runtime::gc_controller::GcController::new(gc_mode)); + rt_builder = rt_builder.with_gc_hooks(controller); + let rt_hooks = rt_builder.build(); + nyash_rust::runtime::global_hooks::set_from_runtime(&rt_hooks); + let mut inited = false; if let Some(dir) = &exe_dir { let candidate = dir.join("nyash.toml"); @@ -680,6 +716,70 @@ pub extern "C" fn main() -> i32 { let v = ny_main(); // Print standardized result line for golden comparisons println!("Result: {}", v); + // Optional GC metrics after program completes + let want_json = std::env::var("NYASH_GC_METRICS_JSON").ok().as_deref() == Some("1"); + let want_text = std::env::var("NYASH_GC_METRICS").ok().as_deref() == Some("1"); + if want_json || want_text { + let (sp, br, bw) = rt_hooks + .gc + .snapshot_counters() + .unwrap_or((0, 0, 0)); + let handles = nyash_rust::jit::rt::handles::len(); + let gc_mode_s = gc_mode.as_str(); + // Include allocation totals if controller is used + let any_gc: &dyn std::any::Any = &*rt_hooks.gc; + let (alloc_count, alloc_bytes, trial_nodes, trial_edges, collect_total, collect_sp, collect_alloc, last_ms, last_reason) = if let Some(ctrl) = any_gc + .downcast_ref::() + { + let (ac, ab) = ctrl.alloc_totals(); + let (tn, te) = ctrl.trial_reachability_last(); + let (ct, csp, calloc) = ctrl.collection_totals(); + let lms = ctrl.trial_duration_last_ms(); + let lrf = ctrl.trial_reason_last_bits(); + (ac, ab, tn, te, ct, csp, calloc, lms, lrf) + } else { + (0, 0, 0, 0, 0, 0, 0, 0, 0) + }; + // Settings snapshot (env) + let sp_interval = std::env::var("NYASH_GC_COLLECT_SP").ok().and_then(|s| s.parse::().ok()).unwrap_or(0); + let alloc_thresh = std::env::var("NYASH_GC_COLLECT_ALLOC").ok().and_then(|s| s.parse::().ok()).unwrap_or(0); + let auto_sp = std::env::var("NYASH_LLVM_AUTO_SAFEPOINT").ok().map(|v| v == "1").unwrap_or(true); + if want_json { + // Minimal JSON assembly to avoid extra deps in nyrt + println!( + "{{\"kind\":\"gc_metrics\",\"safepoints\":{},\"barrier_reads\":{},\"barrier_writes\":{},\"jit_handles\":{},\"alloc_count\":{},\"alloc_bytes\":{},\"trial_nodes\":{},\"trial_edges\":{},\"collections\":{},\"collect_by_sp\":{},\"collect_by_alloc\":{},\"last_collect_ms\":{},\"last_reason_bits\":{},\"sp_interval\":{},\"alloc_threshold\":{},\"auto_safepoint\":{},\"gc_mode\":\"{}\"}}", + sp, br, bw, handles, alloc_count, alloc_bytes, trial_nodes, trial_edges, collect_total, collect_sp, collect_alloc, last_ms, last_reason, sp_interval, alloc_thresh, if auto_sp {1} else {0}, gc_mode_s + ); + } else if want_text { + eprintln!( + "[GC] metrics: safepoints={} read_barriers={} write_barriers={} jit_handles={} allocs={} bytes={} collections={} (sp={} alloc={}) last_ms={} mode={}", + sp, br, bw, handles, alloc_count, alloc_bytes, collect_total, collect_sp, collect_alloc, last_ms, gc_mode_s + ); + } + // Threshold warning + if let Ok(s) = std::env::var("NYASH_GC_ALLOC_THRESHOLD") { + if let Ok(th) = s.parse::() { + if alloc_bytes > th { + eprintln!( + "[GC][warn] allocation bytes {} exceeded threshold {}", + alloc_bytes, th + ); + } + } + } + } + + // Leak diagnostics: report remaining JIT handles by type (Top-10) + if std::env::var("NYASH_GC_LEAK_DIAG").ok().as_deref() == Some("1") { + let tally = nyash_rust::jit::rt::handles::type_tally(); + let total = tally.iter().map(|(_, n)| *n as u64).sum::(); + if total > 0 { + eprintln!("[leak] Remaining handles by type (top 10):"); + for (i, (ty, n)) in tally.into_iter().take(10).enumerate() { + eprintln!(" {}. {} x{}", i + 1, ty, n); + } + } + } v as i32 } } diff --git a/crates/nyrt/src/plugin/console.rs b/crates/nyrt/src/plugin/console.rs index 8c0a1d10..bb7a95d0 100644 --- a/crates/nyrt/src/plugin/console.rs +++ b/crates/nyrt/src/plugin/console.rs @@ -131,7 +131,7 @@ pub extern "C" fn nyash_console_readline_export() -> *mut i8 { // Use read_to_end if stdin is not a TTY? Simpler: read_line through BufRead // For simplicity, read from stdin into buffer until newline or EOF let mut buf = String::new(); - let mut handle = io::stdin(); + // Note: use std::io::stdin() directly without an unused handle binding // On failure or EOF, return empty string match io::stdin().read_line(&mut buf) { Ok(_n) => { diff --git a/crates/nyrt/src/plugin/future.rs b/crates/nyrt/src/plugin/future.rs index 6ac8ed6e..1495fe10 100644 --- a/crates/nyrt/src/plugin/future.rs +++ b/crates/nyrt/src/plugin/future.rs @@ -114,7 +114,7 @@ pub extern "C" fn nyash_future_spawn_method_h( let handle = nyash_rust::jit::rt::handles::to_handle(fut_box.clone() as std::sync::Arc); // Copy data for async task - let mut cap: usize = 512; + let cap: usize = 512; let tlv = buf.clone(); let inv = invoke.unwrap(); nyash_rust::runtime::global_hooks::spawn_task( diff --git a/crates/nyrt/src/plugin/invoke.rs b/crates/nyrt/src/plugin/invoke.rs index 1422968a..226ab575 100644 --- a/crates/nyrt/src/plugin/invoke.rs +++ b/crates/nyrt/src/plugin/invoke.rs @@ -1,4 +1,5 @@ use crate::encode::{nyrt_encode_arg_or_legacy, nyrt_encode_from_legacy_at}; +use crate::plugin::invoke_core; #[no_mangle] pub extern "C" fn nyash_plugin_invoke3_i64( type_id: i64, @@ -8,39 +9,14 @@ pub extern "C" fn nyash_plugin_invoke3_i64( a1: i64, a2: i64, ) -> i64 { - use nyash_rust::runtime::plugin_loader_v2::PluginBoxV2; - // Resolve receiver instance from handle first; fallback to legacy VM args (param index) - let mut instance_id: u32 = 0; - let mut real_type_id: u32 = 0; - let mut invoke: Option< - unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, - > = None; - if a0 > 0 { - if let Some(obj) = nyash_rust::jit::rt::handles::get(a0 as u64) { - if let Some(p) = obj.as_any().downcast_ref::() { - instance_id = p.instance_id(); - real_type_id = p.inner.type_id; - invoke = Some(p.inner.invoke_fn); - } - } - } - if invoke.is_none() - && a0 >= 0 - && std::env::var("NYASH_JIT_ARGS_HANDLE_ONLY").ok().as_deref() != Some("1") - { - nyash_rust::jit::rt::with_legacy_vm_args(|args| { - let idx = a0 as usize; - if let Some(nyash_rust::backend::vm::VMValue::BoxRef(b)) = args.get(idx) { - if let Some(p) = b.as_any().downcast_ref::() { - instance_id = p.instance_id(); - invoke = Some(p.inner.invoke_fn); - } - } - }); - } - if invoke.is_none() { - return 0; - } + // Resolve receiver via shared core helper + let recv = match invoke_core::resolve_receiver_for_a0(a0) { + Some(r) => r, + None => return 0, + }; + let instance_id: u32 = recv.instance_id; + let _real_type_id: u32 = recv.real_type_id; + let invoke = recv.invoke; // Build TLV args from a1/a2 if present. Prefer handles/StringBox/IntegerBox via runtime host. use nyash_rust::{backend::vm::VMValue, jit::rt::handles}; // argc from LLVM lowering is explicit arg count (excludes receiver) @@ -48,164 +24,11 @@ pub extern "C" fn nyash_plugin_invoke3_i64( let mut buf = nyash_rust::runtime::plugin_ffi_common::encode_tlv_header(nargs as u16); // Encode legacy VM arg at position into provided buffer (avoid capturing &mut buf) let mut encode_from_legacy_into = |dst: &mut Vec, arg_pos: usize| { - nyash_rust::jit::rt::with_legacy_vm_args(|args| { - if let Some(v) = args.get(arg_pos) { - match v { - VMValue::String(s) => { - nyash_rust::runtime::plugin_ffi_common::encode::string(dst, s) - } - VMValue::Integer(i) => { - nyash_rust::runtime::plugin_ffi_common::encode::i64(dst, *i) - } - VMValue::Float(f) => { - nyash_rust::runtime::plugin_ffi_common::encode::f64(dst, *f) - } - VMValue::Bool(b) => { - nyash_rust::runtime::plugin_ffi_common::encode::bool(dst, *b) - } - VMValue::BoxRef(b) => { - // BufferBox → TLV bytes - if let Some(bufbox) = b - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::bytes( - dst, - &bufbox.to_vec(), - ); - return; - } - if let Some(p) = b.as_any().downcast_ref::() { - // Prefer StringBox/IntegerBox primitives when possible - let host = nyash_rust::runtime::get_global_plugin_host(); - if let Ok(hg) = host.read() { - if p.box_type == "StringBox" { - if let Ok(Some(sb)) = hg.invoke_instance_method( - "StringBox", - "toUtf8", - p.instance_id(), - &[], - ) { - if let Some(s) = sb - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::string( - dst, &s.value, - ); - return; - } - } - } else if p.box_type == "IntegerBox" { - if let Ok(Some(ibx)) = hg.invoke_instance_method( - "IntegerBox", - "get", - p.instance_id(), - &[], - ) { - if let Some(i) = - ibx.as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::i64( - dst, i.value, - ); - return; - } - } - } - } - // Fallback: pass handle as plugin-handle TLV - nyash_rust::runtime::plugin_ffi_common::encode::plugin_handle( - dst, - p.inner.type_id, - p.instance_id(), - ); - } else { - // Stringify unknown boxes - let s = b.to_string_box().value; - nyash_rust::runtime::plugin_ffi_common::encode::string(dst, &s) - } - } - _ => {} - } - } - }); + nyrt_encode_from_legacy_at(dst, arg_pos) }; // Encode argument value or fallback to legacy slot (avoid capturing &mut buf) let mut encode_arg_into = |dst: &mut Vec, val: i64, pos: usize| { - let mut appended = false; - // Try handle first - if val > 0 { - if let Some(obj) = handles::get(val as u64) { - // BufferBox handle → TLV bytes - if let Some(bufbox) = obj - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::bytes(dst, &bufbox.to_vec()); - appended = true; - return; - } - if let Some(p) = obj.as_any().downcast_ref::() { - let host = nyash_rust::runtime::get_global_plugin_host(); - if let Ok(hg) = host.read() { - if p.box_type == "StringBox" { - if let Ok(Some(sb)) = hg.invoke_instance_method( - "StringBox", - "toUtf8", - p.instance_id(), - &[], - ) { - if let Some(s) = sb - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::string( - dst, &s.value, - ); - appended = true; - return; - } - } - } else if p.box_type == "IntegerBox" { - if let Ok(Some(ibx)) = - hg.invoke_instance_method("IntegerBox", "get", p.instance_id(), &[]) - { - if let Some(i) = ibx - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::i64( - dst, i.value, - ); - appended = true; - return; - } - } - } - } - // Otherwise, pass as handle TLV - nyash_rust::runtime::plugin_ffi_common::encode::plugin_handle( - dst, - p.inner.type_id, - p.instance_id(), - ); - appended = true; - return; - } - } - } - // Legacy VM args by positional index (1-based for a1) - let before = dst.len(); - encode_from_legacy_into(dst, pos); - if dst.len() != before { - appended = true; - } - // If still nothing appended (no-op), fallback to raw i64 - if !appended { - nyash_rust::runtime::plugin_ffi_common::encode::i64(dst, val); - } + nyrt_encode_arg_or_legacy(dst, val, pos) }; if nargs >= 1 { encode_arg_into(&mut buf, a1, 1); @@ -219,123 +42,20 @@ pub extern "C" fn nyash_plugin_invoke3_i64( encode_from_legacy_into(&mut buf, pos); } } - // Prepare output buffer (dynamic growth on short buffer) - let mut cap: usize = 256; - let (mut tag_ret, mut sz_ret, mut payload_ret): (u8, usize, Vec) = (0, 0, Vec::new()); - loop { - let mut out = vec![0u8; cap]; - let mut out_len: usize = out.len(); - let rc = unsafe { - invoke.unwrap()( - type_id as u32, - method_id as u32, - instance_id, - buf.as_ptr(), - buf.len(), - out.as_mut_ptr(), - &mut out_len, - ) - }; - if rc != 0 { - // Retry on short buffer hint (-1) or when plugin wrote beyond capacity (len > cap) - if rc == -1 || out_len > cap { - cap = cap.saturating_mul(2).max(out_len + 16); - if cap > 1 << 20 { - break; - } - continue; - } - return 0; - } - let slice = &out[..out_len]; - if let Some((t, s, p)) = nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(slice) { - tag_ret = t; - sz_ret = s; - payload_ret = p.to_vec(); - } - break; - } - if payload_ret.is_empty() { - return 0; - } + // Call invoke with dynamic buffer logic centralized + let (tag_ret, sz_ret, payload_ret): (u8, usize, Vec) = match invoke_core::plugin_invoke_call( + invoke, + type_id as u32, + method_id as u32, + instance_id, + &buf, + ) { + Some((t, s, p)) => (t, s, p), + None => return 0, + }; if let Some((tag, sz, payload)) = Some((tag_ret, sz_ret, payload_ret.as_slice())) { - match tag { - 2 => { - // I32 - if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { - return v as i64; - } - } - 3 => { - // I64 - if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { - return v as i64; - } - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - return i64::from_le_bytes(b); - } - } - 6 | 7 => { - // String/Bytes -> register StringBox handle - use nyash_rust::box_trait::{NyashBox, StringBox}; - let s = nyash_rust::runtime::plugin_ffi_common::decode::string(payload); - let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s)); - let h = nyash_rust::jit::rt::handles::to_handle(arc); - return h as i64; - } - 8 => { - // Handle(tag=8) -> register and return handle id (i64) - if sz == 8 { - let mut t = [0u8; 4]; - t.copy_from_slice(&payload[0..4]); - let mut i = [0u8; 4]; - i.copy_from_slice(&payload[4..8]); - let r_type = u32::from_le_bytes(t); - let r_inst = u32::from_le_bytes(i); - // Build PluginBoxV2 and register into handle-registry - let meta_opt = - nyash_rust::runtime::plugin_loader_v2::metadata_for_type_id(r_type); - let (box_type_name, invoke_ptr) = if let Some(meta) = meta_opt { - (meta.box_type.clone(), meta.invoke_fn) - } else { - ("PluginBox".to_string(), invoke.unwrap()) - }; - let pb = nyash_rust::runtime::plugin_loader_v2::make_plugin_box_v2( - box_type_name.clone(), - r_type, - r_inst, - invoke_ptr, - ); - let arc: std::sync::Arc = - std::sync::Arc::new(pb); - let h = nyash_rust::jit::rt::handles::to_handle(arc); - return h as i64; - } - } - 1 => { - // Bool - return if nyash_rust::runtime::plugin_ffi_common::decode::bool(payload) - .unwrap_or(false) - { - 1 - } else { - 0 - }; - } - 5 => { - // F64 → optional conversion to i64 - if std::env::var("NYASH_JIT_NATIVE_F64").ok().as_deref() == Some("1") { - if sz == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - let f = f64::from_le_bytes(b); - return f as i64; - } - } - } - _ => {} + if let Some(v) = invoke_core::decode_entry_to_i64(tag, sz, payload, invoke) { + return v; } } 0 @@ -484,77 +204,7 @@ pub extern "C" fn nyash_plugin_invoke3_f64( }); }; let mut encode_arg = |val: i64, pos: usize| { - let mut appended = false; - if val > 0 { - if let Some(obj) = handles::get(val as u64) { - if let Some(bufbox) = obj - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::bytes( - &mut buf, - &bufbox.to_vec(), - ); - appended = true; - return; - } - if let Some(p) = obj.as_any().downcast_ref::() { - let host = nyash_rust::runtime::get_global_plugin_host(); - if let Ok(hg) = host.read() { - if p.box_type == "StringBox" { - if let Ok(Some(sb)) = hg.invoke_instance_method( - "StringBox", - "toUtf8", - p.instance_id(), - &[], - ) { - if let Some(s) = sb - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::string( - &mut buf, &s.value, - ); - appended = true; - return; - } - } - } else if p.box_type == "IntegerBox" { - if let Ok(Some(ibx)) = - hg.invoke_instance_method("IntegerBox", "get", p.instance_id(), &[]) - { - if let Some(i) = ibx - .as_any() - .downcast_ref::() - { - nyash_rust::runtime::plugin_ffi_common::encode::i64( - &mut buf, i.value, - ); - appended = true; - return; - } - } - } - } - nyash_rust::runtime::plugin_ffi_common::encode::plugin_handle( - &mut buf, - p.inner.type_id, - p.instance_id(), - ); - appended = true; - return; - } - } - } - let before = buf.len(); - // Use global helper to avoid nested mutable borrows on buf - nyrt_encode_from_legacy_at(&mut buf, pos); - if buf.len() != before { - appended = true; - } - if !appended { - nyash_rust::runtime::plugin_ffi_common::encode::i64(&mut buf, val); - } + crate::encode::nyrt_encode_arg_or_legacy(&mut buf, val, pos) }; if nargs >= 1 { encode_arg(a1, 1); @@ -567,77 +217,20 @@ pub extern "C" fn nyash_plugin_invoke3_f64( nyrt_encode_from_legacy_at(&mut buf, pos); } } - // Prepare output buffer (dynamic growth on short buffer) - let mut cap: usize = 256; - let (mut tag_ret, mut sz_ret, mut payload_ret): (u8, usize, Vec) = (0, 0, Vec::new()); - loop { - let mut out = vec![0u8; cap]; - let mut out_len: usize = out.len(); - let rc = unsafe { - invoke.unwrap()( - type_id as u32, - method_id as u32, - instance_id, - buf.as_ptr(), - buf.len(), - out.as_mut_ptr(), - &mut out_len, - ) - }; - if rc != 0 { - // Retry on short buffer (-1) or when plugin wrote beyond capacity - if rc == -1 || out_len > cap { - cap = cap.saturating_mul(2).max(out_len + 16); - if cap > 1 << 20 { - break; - } - continue; - } - return 0.0; - } - let slice = &out[..out_len]; - if let Some((t, s, p)) = nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(slice) { - tag_ret = t; - sz_ret = s; - payload_ret = p.to_vec(); - } - break; - } - if payload_ret.is_empty() { - return 0.0; - } + // Invoke via shared helper + let (mut tag_ret, mut sz_ret, mut payload_ret): (u8, usize, Vec) = match invoke_core::plugin_invoke_call( + invoke.unwrap(), + type_id as u32, + method_id as u32, + instance_id, + &buf, + ) { + Some((t, s, p)) => (t, s, p), + None => return 0.0, + }; if let Some((tag, sz, payload)) = Some((tag_ret, sz_ret, payload_ret.as_slice())) { - match tag { - 5 => { - // F64 - if sz == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - return f64::from_le_bytes(b); - } - } - 3 => { - // I64 -> f64 - if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { - return v as f64; - } - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - return (i64::from_le_bytes(b)) as f64; - } - } - 1 => { - // Bool -> f64 - return if nyash_rust::runtime::plugin_ffi_common::decode::bool(payload) - .unwrap_or(false) - { - 1.0 - } else { - 0.0 - }; - } - _ => {} + if let Some(f) = invoke_core::decode_entry_to_f64(tag, sz, payload) { + return f; } } 0.0 @@ -832,42 +425,10 @@ fn nyash_plugin_invoke_name_common_i64(method: &str, argc: i64, a0: i64, a1: i64 &mut out_len, ) }; - if rc != 0 { - return 0; - } + if rc != 0 { return 0; } let out_slice = &out[..out_len]; - if let Some((tag, _sz, payload)) = - nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(out_slice) - { - match tag { - 3 => { - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - return i64::from_le_bytes(b); - } - } - 1 => { - return if nyash_rust::runtime::plugin_ffi_common::decode::bool(payload) - .unwrap_or(false) - { - 1 - } else { - 0 - }; - } - 5 => { - if std::env::var("NYASH_JIT_NATIVE_F64").ok().as_deref() == Some("1") { - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - let f = f64::from_le_bytes(b); - return f as i64; - } - } - } - _ => {} - } + if let Some((tag, sz, payload)) = nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(out_slice) { + if let Some(v) = super::invoke_core::decode_entry_to_i64(tag, sz, payload, invoke.unwrap()) { return v; } } 0 } @@ -1106,73 +667,8 @@ pub extern "C" fn nyash_plugin_invoke3_tagged_i64( if rc != 0 { return 0; } - if let Some((tag, _sz, payload)) = - nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(&out[..out_len]) - { - match tag { - 2 => { - if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { - return v as i64; - } - } - 3 => { - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - return i64::from_le_bytes(b); - } - if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { - return v as i64; - } - } - 6 | 7 => { - use nyash_rust::box_trait::{NyashBox, StringBox}; - let s = nyash_rust::runtime::plugin_ffi_common::decode::string(payload); - let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s)); - let h = nyash_rust::jit::rt::handles::to_handle(arc); - return h as i64; - } - 1 => { - return if nyash_rust::runtime::plugin_ffi_common::decode::bool(payload) - .unwrap_or(false) - { - 1 - } else { - 0 - }; - } - 8 => { - if payload.len() == 8 { - let mut t = [0u8; 4]; - t.copy_from_slice(&payload[0..4]); - let mut i = [0u8; 4]; - i.copy_from_slice(&payload[4..8]); - let r_type = u32::from_le_bytes(t); - let r_inst = u32::from_le_bytes(i); - let pb = nyash_rust::runtime::plugin_loader_v2::make_plugin_box_v2( - "PluginBox".into(), - r_type, - r_inst, - invoke.unwrap(), - ); - let arc: std::sync::Arc = - std::sync::Arc::new(pb); - let h = nyash_rust::jit::rt::handles::to_handle(arc); - return h as i64; - } - } - 5 => { - if std::env::var("NYASH_JIT_NATIVE_F64").ok().as_deref() == Some("1") { - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - let f = f64::from_le_bytes(b); - return f as i64; - } - } - } - _ => {} - } + if let Some((tag, sz, payload)) = nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(&out[..out_len]) { + if let Some(v) = invoke_core::decode_entry_to_i64(tag, sz, payload, invoke.unwrap()) { return v; } } 0 } @@ -1263,73 +759,8 @@ pub extern "C" fn nyash_plugin_invoke_tagged_v_i64( if rc != 0 { return 0; } - if let Some((tag, _sz, payload)) = - nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(&out[..out_len]) - { - match tag { - 2 => { - if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { - return v as i64; - } - } - 3 => { - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - return i64::from_le_bytes(b); - } - if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { - return v as i64; - } - } - 6 | 7 => { - use nyash_rust::box_trait::{NyashBox, StringBox}; - let s = nyash_rust::runtime::plugin_ffi_common::decode::string(payload); - let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s)); - let h = nyash_rust::jit::rt::handles::to_handle(arc); - return h as i64; - } - 1 => { - return if nyash_rust::runtime::plugin_ffi_common::decode::bool(payload) - .unwrap_or(false) - { - 1 - } else { - 0 - }; - } - 8 => { - if payload.len() == 8 { - let mut t = [0u8; 4]; - t.copy_from_slice(&payload[0..4]); - let mut i = [0u8; 4]; - i.copy_from_slice(&payload[4..8]); - let r_type = u32::from_le_bytes(t); - let r_inst = u32::from_le_bytes(i); - let pb = nyash_rust::runtime::plugin_loader_v2::make_plugin_box_v2( - "PluginBox".into(), - r_type, - r_inst, - invoke.unwrap(), - ); - let arc: std::sync::Arc = - std::sync::Arc::new(pb); - let h = nyash_rust::jit::rt::handles::to_handle(arc); - return h as i64; - } - } - 5 => { - if std::env::var("NYASH_JIT_NATIVE_F64").ok().as_deref() == Some("1") { - if payload.len() == 8 { - let mut b = [0u8; 8]; - b.copy_from_slice(payload); - let f = f64::from_le_bytes(b); - return f as i64; - } - } - } - _ => {} - } + if let Some((tag, sz, payload)) = nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(&out[..out_len]) { + if let Some(v) = invoke_core::decode_entry_to_i64(tag, sz, payload, invoke.unwrap()) { return v; } } 0 } diff --git a/crates/nyrt/src/plugin/invoke_core.rs b/crates/nyrt/src/plugin/invoke_core.rs new file mode 100644 index 00000000..63426449 --- /dev/null +++ b/crates/nyrt/src/plugin/invoke_core.rs @@ -0,0 +1,199 @@ +use nyash_rust::runtime::plugin_loader_v2::PluginBoxV2; + +/// Thin shared helpers for plugin invoke shims (i64/f64) +/// +/// Goal: centralize receiver resolution and the dynamic buffer call loop, +/// keeping extern functions in invoke.rs small and consistent. + +pub struct Receiver { + pub instance_id: u32, + pub real_type_id: u32, + pub invoke: + unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, +} + +/// Resolve receiver from a0: prefer handle registry; fallback to legacy VM args when allowed. +pub fn resolve_receiver_for_a0(a0: i64) -> Option { + // 1) Handle registry (preferred) + if a0 > 0 { + if let Some(obj) = nyash_rust::jit::rt::handles::get(a0 as u64) { + if let Some(p) = obj.as_any().downcast_ref::() { + return Some(Receiver { + instance_id: p.instance_id(), + real_type_id: p.inner.type_id, + invoke: p.inner.invoke_fn, + }); + } + } + } + // 2) Legacy VM args (index by a0) unless handle-only is enforced + if a0 >= 0 + && std::env::var("NYASH_JIT_ARGS_HANDLE_ONLY").ok().as_deref() != Some("1") + { + nyash_rust::jit::rt::with_legacy_vm_args(|args| { + let idx = a0 as usize; + if let Some(nyash_rust::backend::vm::VMValue::BoxRef(b)) = args.get(idx) { + if let Some(p) = b.as_any().downcast_ref::() { + return Some(Receiver { + instance_id: p.instance_id(), + real_type_id: p.inner.type_id, + invoke: p.inner.invoke_fn, + }); + } + } + None + }) + } else { + None + } +} + +/// Call plugin invoke with dynamic buffer growth, returning first TLV entry on success. +pub fn plugin_invoke_call( + invoke: unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, + type_id: u32, + method_id: u32, + instance_id: u32, + tlv_args: &[u8], +) -> Option<(u8, usize, Vec)> { + let mut cap: usize = 256; + let mut tag_ret: u8 = 0; + let mut sz_ret: usize = 0; + let mut payload_ret: Vec = Vec::new(); + loop { + let mut out = vec![0u8; cap]; + let mut out_len: usize = out.len(); + let rc = unsafe { + invoke( + type_id, + method_id, + instance_id, + tlv_args.as_ptr(), + tlv_args.len(), + out.as_mut_ptr(), + &mut out_len, + ) + }; + if rc != 0 { + // Retry on short buffer hint (-1) or when plugin wrote beyond capacity (len > cap) + if rc == -1 || out_len > cap { + cap = cap.saturating_mul(2).max(out_len + 16); + if cap > 1 << 20 { + break; + } + continue; + } + return None; + } + let slice = &out[..out_len]; + if let Some((t, s, p)) = nyash_rust::runtime::plugin_ffi_common::decode::tlv_first(slice) { + tag_ret = t; + sz_ret = s; + payload_ret = p.to_vec(); + } + break; + } + if payload_ret.is_empty() { + return None; + } + Some((tag_ret, sz_ret, payload_ret)) +} + +/// Decode a single TLV entry to i64 with side-effects (handle registration) when applicable. +pub fn decode_entry_to_i64( + tag: u8, + sz: usize, + payload: &[u8], + fallback_invoke: unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, +) -> Option { + match tag { + 2 => nyash_rust::runtime::plugin_ffi_common::decode::i32(payload).map(|v| v as i64), + 3 => { + if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { + return Some(v as i64); + } + if payload.len() == 8 { + let mut b = [0u8; 8]; + b.copy_from_slice(payload); + return Some(i64::from_le_bytes(b)); + } + None + } + 6 | 7 => { + use nyash_rust::box_trait::{NyashBox, StringBox}; + let s = nyash_rust::runtime::plugin_ffi_common::decode::string(payload); + let arc: std::sync::Arc = std::sync::Arc::new(StringBox::new(s)); + let h = nyash_rust::jit::rt::handles::to_handle(arc); + Some(h as i64) + } + 8 => { + if sz == 8 { + let mut t = [0u8; 4]; + t.copy_from_slice(&payload[0..4]); + let mut i = [0u8; 4]; + i.copy_from_slice(&payload[4..8]); + let r_type = u32::from_le_bytes(t); + let r_inst = u32::from_le_bytes(i); + // Use metadata if available to set box_type/invoke_fn + let meta_opt = nyash_rust::runtime::plugin_loader_v2::metadata_for_type_id(r_type); + let (box_type_name, invoke_ptr) = if let Some(meta) = meta_opt { + (meta.box_type.clone(), meta.invoke_fn) + } else { + ("PluginBox".to_string(), fallback_invoke) + }; + let pb = nyash_rust::runtime::plugin_loader_v2::make_plugin_box_v2( + box_type_name, + r_type, + r_inst, + invoke_ptr, + ); + let arc: std::sync::Arc = + std::sync::Arc::new(pb); + let h = nyash_rust::jit::rt::handles::to_handle(arc); + return Some(h as i64); + } + None + } + 1 => nyash_rust::runtime::plugin_ffi_common::decode::bool(payload) + .map(|b| if b { 1 } else { 0 }), + 5 => { + if std::env::var("NYASH_JIT_NATIVE_F64").ok().as_deref() == Some("1") && sz == 8 { + let mut b = [0u8; 8]; + b.copy_from_slice(payload); + let f = f64::from_le_bytes(b); + return Some(f as i64); + } + None + } + _ => None, + } +} + +/// Decode a single TLV entry to f64 when possible. +pub fn decode_entry_to_f64(tag: u8, sz: usize, payload: &[u8]) -> Option { + match tag { + 5 => { + if sz == 8 { + let mut b = [0u8; 8]; + b.copy_from_slice(payload); + Some(f64::from_le_bytes(b)) + } else { + None + } + } + 3 => { + if let Some(v) = nyash_rust::runtime::plugin_ffi_common::decode::i32(payload) { + return Some(v as f64); + } + if payload.len() == 8 { + let mut b = [0u8; 8]; + b.copy_from_slice(payload); + return Some((i64::from_le_bytes(b)) as f64); + } + None + } + 1 => nyash_rust::runtime::plugin_ffi_common::decode::bool(payload) + .map(|b| if b { 1.0 } else { 0.0 }), + _ => None, + } +} diff --git a/crates/nyrt/src/plugin/mod.rs b/crates/nyrt/src/plugin/mod.rs index 0b3f9599..d382ed3e 100644 --- a/crates/nyrt/src/plugin/mod.rs +++ b/crates/nyrt/src/plugin/mod.rs @@ -4,6 +4,7 @@ pub mod console; pub mod future; pub mod instance; pub mod invoke; +pub mod invoke_core; pub mod map; pub mod semantics; pub mod string; @@ -14,6 +15,7 @@ pub use console::*; pub use future::*; pub use instance::*; pub use invoke::*; +pub use invoke_core::*; pub use map::*; pub use semantics::*; pub use string::*; diff --git a/docs/LLVM_HARNESS.md b/docs/LLVM_HARNESS.md index b207f7bc..844dc452 100644 --- a/docs/LLVM_HARNESS.md +++ b/docs/LLVM_HARNESS.md @@ -16,6 +16,12 @@ Protocol - Output: `.o` オブジェクト(既定: `NYASH_AOT_OBJECT_OUT` または `NYASH_LLVM_OBJ_OUT`)。 - 入口: `ny_main() -> i64`(戻り値は exit code 相当。必要時 handle 正規化を行う)。 +CLI(crate) +- `crates/nyash-llvm-compiler` 提供の `ny-llvmc` は llvmlite ハーネスの薄ラッパーだよ。 + - ダミー: `./target/release/ny-llvmc --dummy --out /tmp/dummy.o` + - JSON から: `./target/release/ny-llvmc --in mir.json --out out.o` + - 既定のハーネスパスは `tools/llvmlite_harness.py`。変更は `--harness ` で上書き可。 + Quick Start - 依存: `python3 -m pip install llvmlite` - ダミー生成(配線検証): @@ -41,5 +47,9 @@ Notes - 初版は固定 `ny_main` から開始してもよい(配線確認)。以降、MIR 命令を順次対応。 - ハーネスは自律(外部状態に依存しない)。エラーは即 stderr に詳細を出す。 +Schema Validation(任意) +- JSON v0 のスキーマは `docs/reference/mir/json_v0.schema.json` にあるよ。 +- 検証: `python3 tools/validate_mir_json.py `(要: `python3 -m pip install jsonschema`)。 + Appendix: 静的リンクについて - 生成 EXE は NyRT(libnyrt.a)を静的リンク。完全静的(-static)は musl 推奨(dlopen 不可になるため動的プラグインは使用不可)。 diff --git a/docs/README.md b/docs/README.md index 8e0d1d3c..3d1904df 100644 --- a/docs/README.md +++ b/docs/README.md @@ -55,6 +55,7 @@ - [言語リファレンス](reference/language/LANGUAGE_REFERENCE_2025.md) - [アーキテクチャ概要](reference/architecture/TECHNICAL_ARCHITECTURE_2025.md) - [実行バックエンド](reference/architecture/execution-backends.md) +- [GC モードと運用](reference/runtime/gc.md) - [プラグインシステム](reference/plugin-system/) - [CLIオプション早見表](tools/cli-options.md) diff --git a/docs/guides/selfhost-pilot.md b/docs/guides/selfhost-pilot.md new file mode 100644 index 00000000..ab5ce8c7 --- /dev/null +++ b/docs/guides/selfhost-pilot.md @@ -0,0 +1,36 @@ +Self‑Hosting Pilot — Quick Guide (Phase‑15) + +Overview +- Goal: Run Ny→JSON v0 via the selfhost compiler path and execute with PyVM/LLVM. +- Default remains env‑gated for safety; CI runs smokes to build confidence. + +Recommended Flows +- Runner (pilot): `NYASH_USE_NY_COMPILER=1 ./target/release/nyash --backend vm apps/examples/string_p0.nyash` +- Emit‑only: `NYASH_USE_NY_COMPILER=1 NYASH_NY_COMPILER_EMIT_ONLY=1 ...` +- EXE‑first (parser EXE): `tools/build_compiler_exe.sh && NYASH_USE_NY_COMPILER=1 NYASH_USE_NY_COMPILER_EXE=1 ./target/release/nyash --backend vm apps/examples/string_p0.nyash` +- LLVM AOT: `NYASH_LLVM_USE_HARNESS=1 tools/build_llvm.sh apps/... -o app && ./app` + +CI Workflows +- Selfhost Bootstrap (always): `.github/workflows/selfhost-bootstrap.yml` + - Builds nyash (`cranelift-jit`) and runs `tools/bootstrap_selfhost_smoke.sh`. +- Selfhost EXE‑first (optional): `.github/workflows/selfhost-exe-first.yml` + - Installs LLVM 18 + llvmlite, then runs `tools/exe_first_smoke.sh`. + +Useful Env Flags +- `NYASH_USE_NY_COMPILER=1`: Enable selfhost compiler pipeline. +- `NYASH_NY_COMPILER_EMIT_ONLY=1`: Print JSON v0 only (no execution). +- `NYASH_NY_COMPILER_TIMEOUT_MS=4000`: Child timeout (ms). Default 2000. +- `NYASH_USE_NY_COMPILER_EXE=1`: Prefer external parser EXE. +- `NYASH_NY_COMPILER_EXE_PATH=`: Override EXE path. +- `NYASH_SELFHOST_READ_TMP=1`: Child reads `tmp/ny_parser_input.ny` when supported. + +Troubleshooting (short) +- No Python found: install `python3` (PyVM / harness). +- No `llvm-config-18`: install LLVM 18 dev (see EXE‑first workflow). +- llvmlite import error: `python3 -m pip install llvmlite`. +- Parser child timeout: raise `NYASH_NY_COMPILER_TIMEOUT_MS`. +- EXE‑first bridge mismatch: re‑run with `NYASH_CLI_VERBOSE=1` and keep `dist/nyash_compiler/sample.json` for inspection. + +Notes +- JSON v0 schema is stable but not yet versioned; validation is planned. +- Default backend `vm` maps to PyVM unless legacy VM features are enabled. diff --git a/docs/reference/mir/json_v0.schema.json b/docs/reference/mir/json_v0.schema.json new file mode 100644 index 00000000..2018c056 --- /dev/null +++ b/docs/reference/mir/json_v0.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://nyash.dev/schema/mir/json_v0.schema.json", + "title": "Nyash MIR JSON v0", + "type": "object", + "additionalProperties": true, + "properties": { + "schema_version": { "type": ["integer", "string" ] }, + "functions": { + "oneOf": [ + { "$ref": "#/definitions/functionList" }, + { "$ref": "#/definitions/functionMap" } + ] + } + }, + "required": ["functions"], + "definitions": { + "functionList": { + "type": "array", + "items": { "$ref": "#/definitions/function" } + }, + "functionMap": { + "type": "object", + "additionalProperties": { "$ref": "#/definitions/functionBody" } + }, + "function": { + "type": "object", + "additionalProperties": true, + "properties": { + "name": { "type": "string" }, + "params": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true, + "properties": { + "name": { "type": "string" }, + "type": { "type": "string" } + }, + "required": ["name", "type"] + } + }, + "return_type": { "type": "string" }, + "entry_block": { "type": ["integer", "string"] }, + "blocks": { "$ref": "#/definitions/blocks" } + }, + "required": ["name", "blocks"] + }, + "functionBody": { + "type": "object", + "additionalProperties": true, + "properties": { + "params": { "$ref": "#/definitions/function/properties/params" }, + "return_type": { "type": "string" }, + "entry_block": { "type": ["integer", "string"] }, + "blocks": { "$ref": "#/definitions/blocks" } + }, + "required": ["blocks"] + }, + "blocks": { + "oneOf": [ + { + "type": "array", + "items": { "$ref": "#/definitions/block" } + }, + { + "type": "object", + "additionalProperties": { "$ref": "#/definitions/block" } + } + ] + }, + "block": { + "type": "object", + "additionalProperties": true, + "properties": { + "id": { "type": ["integer", "string"] }, + "instructions": { "$ref": "#/definitions/instructions" }, + "terminator": { "$ref": "#/definitions/instruction" } + }, + "required": ["id", "instructions"] + }, + "instructions": { + "type": "array", + "items": { "$ref": "#/definitions/instruction" } + }, + "instruction": { + "type": "object", + "additionalProperties": true, + "properties": { + "kind": { "type": "string" } + }, + "required": ["kind"] + } + } +} + diff --git a/docs/reference/runtime/gc.md b/docs/reference/runtime/gc.md new file mode 100644 index 00000000..d37b72d2 --- /dev/null +++ b/docs/reference/runtime/gc.md @@ -0,0 +1,62 @@ +Nyash GC Modes — Design and Usage + +Overview +- Nyash adopts a pragmatic GC strategy that balances safety, performance, and simplicity. +- Default is reference counting with a periodic cycle collector; advanced modes exist for tuning and debugging. + +User‑Facing Modes (recommended) +- rc+cycle (default, safe) + - Reference counting with periodic cycle detection/collection. + - Recommended for most applications; memory leaks from cycles are handled. +- minorgen (high‑performance) + - Lightweight generational GC: moving nursery (Gen‑0), non‑moving upper generations. + - Write barrier (old→new) is minimal; plugin/FFI objects remain non‑moving via handle indirection. + +Advanced Modes (for language dev/debug) +- stw (debug/verification) + - Non‑moving stop‑the‑world mark‑and‑sweep. Useful for strict correctness checks and leak cause isolation. +- rc (baseline for comparisons) + - rc+cycle with cycle detection disabled. For performance comparisons or targeted debugging. +- off (expert, self‑responsibility) + - Cycle detection and tracing off. Use only when cycles are guaranteed not to occur. Not recommended for long‑running services. + +Selection & Precedence +- CLI: `--gc {auto,rc+cycle,minorgen,stw,rc,off}` (auto = rc+cycle) +- ENV: `NYASH_GC_MODE` (overridden by CLI) +- nyash.toml [env] applies last + +Instrumentation & Diagnostics +- `NYASH_GC_METRICS=1`: print brief metrics (allocs/bytes/cycles/pauses) +- `NYASH_GC_METRICS_JSON=1`: emit JSON metrics for CI/aggregation +- `NYASH_GC_LEAK_DIAG=1`: on exit, dump suspected unreleased objects (Top‑K by type/site) +- `NYASH_GC_ALLOC_THRESHOLD=`: warn or fail when allocations/bytes exceed threshold + +Operational Guidance +- Default: rc+cycle for stable operations. +- Try minorgen when throughput/latency matter; it will fall back to rc+cycle on unsupported platforms or when plugin objects are declared non‑moving. +- off/rc are for special cases only; prefer enabling leak diagnostics when using them in development. + +Implementation Roadmap (Step‑wise) +1) Wiring & Observability + - Introduce `GcMode`, `GcController`, unify roots (handles, globals, frames) and safepoints. + - Add `LeakRegistry` (allocation ledger) and exit‑time dump. + - Ship rc+cycle (trial deletion) behind the controller (dev default can be rc+cycle). +2) minorgen (nursery) + - Moving Gen‑0 with simple promotion; upper generations non‑moving mark‑sweep. + - Minimal write barrier (old→new card marking). Plugin/FFI remain non‑moving. +3) stw (dev verify) + - Non‑moving STW mark‑and‑sweep for correctness checks. + +Notes +- Safepoint and barrier MIR ops already exist and are reused as GC coordination hooks. +- Handle indirection keeps future moving GCs compatible with plugin/FFI boundaries. + +LLVM Safepoints +- Automatic safepoint insertion can be toggled for the LLVM harness/backend: + - NYASH_LLVM_AUTO_SAFEPOINT=1 enables insertion (default 1) + - Injection points: loop headers, function calls, externcalls, and selected boxcalls. + - Safepoints call ny_check_safepoint/ny_safepoint in NyRT, which forwards to runtime hooks (GC.safepoint + scheduler poll). + +Controller & Metrics +- The unified GcController implements GcHooks and aggregates metrics (safepoints/read/write/alloc). +- CountingGc is a thin wrapper around GcController for compatibility. diff --git a/docs/tools/cli-options.md b/docs/tools/cli-options.md index 5fc773e1..2aaee34f 100644 --- a/docs/tools/cli-options.md +++ b/docs/tools/cli-options.md @@ -16,6 +16,19 @@ - `--vm-stats`: VM命令統計を有効化(`NYASH_VM_STATS=1`) - `--vm-stats-json`: VM統計をJSONで出力(`NYASH_VM_STATS_JSON=1`) +## GC +- `--gc {auto|rc+cycle|minorgen|stw|rc|off}`: GCモード(既定: `auto` → rc+cycle) + - `rc+cycle`: 参照カウント + 循環回収(推奨・安定) + - `minorgen`: 高速向けの軽量世代別(Gen‑0移動、上位非移動) + - `stw`: 検証用の非移動Mark‑Sweep(開発者向け) + - `rc`: 循環回収なしのRC(比較用) + - `off`: 自己責任モード(循環はリーク) +- 関連ENV + - `NYASH_GC_MODE`(CLIが優先) + - `NYASH_GC_METRICS` / `NYASH_GC_METRICS_JSON` + - `NYASH_GC_LEAK_DIAG` / `NYASH_GC_ALLOC_THRESHOLD` + - 詳細: `docs/reference/runtime/gc.md` + ## WASM/AOT - `--compile-wasm`: WATを出力 - `--compile-native` / `--aot`: AOT実行ファイル出力(要wasm-backend) diff --git a/src/cli.rs b/src/cli.rs index 9406c76d..07996f4c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -58,6 +58,8 @@ pub struct CliConfig { // Phase-15: JSON IR v0 bridge pub ny_parser_pipe: bool, pub json_file: Option, + // GC mode (dev; forwarded to env as NYASH_GC_MODE) + pub gc_mode: Option, // Build system (MVP) pub build_path: Option, pub build_app: Option, @@ -105,6 +107,12 @@ impl CliConfig { .value_name("FILE") .index(1) ) + .arg( + Arg::new("gc") + .long("gc") + .value_name("{auto,rc+cycle,minorgen,stw,rc,off}") + .help("Select GC mode (default: rc+cycle)") + ) .arg( Arg::new("parser") .long("parser") @@ -456,6 +464,7 @@ impl CliConfig { cli_verbose: matches.get_flag("verbose"), run_task: matches.get_one::("run-task").cloned(), load_ny_plugins: matches.get_flag("load-ny-plugins"), + gc_mode: matches.get_one::("gc").cloned(), parser_ny: matches .get_one::("parser") .map(|s| s == "ny") @@ -516,6 +525,7 @@ impl Default for CliConfig { cli_verbose: false, run_task: None, load_ny_plugins: false, + gc_mode: None, parser_ny: false, ny_parser_pipe: false, json_file: None, diff --git a/src/config/env.rs b/src/config/env.rs index aec82587..26c8ac03 100644 --- a/src/config/env.rs +++ b/src/config/env.rs @@ -211,6 +211,41 @@ pub fn gc_trace_level() -> u8 { } } +// ---- GC mode and instrumentation ---- +/// Return current GC mode string (auto default = "rc+cycle"). +/// Allowed: "auto", "rc+cycle", "minorgen", "stw", "rc", "off" +pub fn gc_mode() -> String { + match std::env::var("NYASH_GC_MODE").ok() { + Some(m) if !m.trim().is_empty() => m, + _ => "rc+cycle".to_string(), + } +} +/// Brief metrics emission (text) +pub fn gc_metrics() -> bool { + std::env::var("NYASH_GC_METRICS").ok().as_deref() == Some("1") +} +/// JSON metrics emission (single line) +pub fn gc_metrics_json() -> bool { + std::env::var("NYASH_GC_METRICS_JSON").ok().as_deref() == Some("1") +} +/// Leak diagnostics on exit +pub fn gc_leak_diag() -> bool { + std::env::var("NYASH_GC_LEAK_DIAG").ok().as_deref() == Some("1") +} +/// Optional allocation threshold; if Some(n) and exceeded, print warning +pub fn gc_alloc_threshold() -> Option { + std::env::var("NYASH_GC_ALLOC_THRESHOLD").ok()?.parse().ok() +} + +/// Run a collection every N safepoints (if Some) +pub fn gc_collect_sp_interval() -> Option { + std::env::var("NYASH_GC_COLLECT_SP").ok()?.parse().ok() +} +/// Run a collection when allocated bytes since last >= N (if Some) +pub fn gc_collect_alloc_bytes() -> Option { + std::env::var("NYASH_GC_COLLECT_ALLOC").ok()?.parse().ok() +} + // ---- Rewriter flags (optimizer transforms) pub fn rewrite_debug() -> bool { std::env::var("NYASH_REWRITE_DEBUG").ok().as_deref() == Some("1") diff --git a/src/jit/rt.rs b/src/jit/rt.rs index a7b8f23f..b449d315 100644 --- a/src/jit/rt.rs +++ b/src/jit/rt.rs @@ -162,6 +162,28 @@ pub mod handles { pub fn len() -> usize { REG.with(|cell| cell.borrow().map.len()) } + /// Tally handles by NyashBox type name (best-effort) + pub fn type_tally() -> Vec<(String, usize)> { + use std::collections::HashMap; + REG.with(|cell| { + let reg = cell.borrow(); + let mut map: HashMap = HashMap::new(); + for (_h, obj) in reg.map.iter() { + let tn = obj.type_name().to_string(); + *map.entry(tn).or_insert(0) += 1; + } + let mut v: Vec<(String, usize)> = map.into_iter().collect(); + v.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0))); + v + }) + } + /// Snapshot current handle objects (Arc clones) + pub fn snapshot_arcs() -> Vec> { + REG.with(|cell| { + let reg = cell.borrow(); + reg.map.values().cloned().collect() + }) + } // Scope management: track and clear handles created within a JIT call pub fn begin_scope() { diff --git a/src/llvm_py/instructions/barrier.py b/src/llvm_py/instructions/barrier.py index 61418399..f1009766 100644 --- a/src/llvm_py/instructions/barrier.py +++ b/src/llvm_py/instructions/barrier.py @@ -51,7 +51,8 @@ def lower_atomic_op( resolver=None, preds=None, block_end_values=None, - bb_map=None + bb_map=None, + ctx=None, ) -> None: """ Lower atomic operations @@ -66,7 +67,12 @@ def lower_atomic_op( ordering: Memory ordering """ # Get pointer - if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None: + if ctx is not None: + try: + ptr = ctx.resolver.resolve_ptr(ptr_vid, builder.block, ctx.preds, ctx.block_end_values, ctx.vmap) + except Exception: + ptr = vmap.get(ptr_vid) + elif resolver is not None and preds is not None and block_end_values is not None and bb_map is not None: ptr = resolver.resolve_ptr(ptr_vid, builder.block, preds, block_end_values, vmap) else: ptr = vmap.get(ptr_vid) @@ -85,7 +91,12 @@ def lower_atomic_op( elif op == "store": # Atomic store if val_vid is not None: - if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None: + if ctx is not None: + try: + val = ctx.resolver.resolve_i64(val_vid, builder.block, ctx.preds, ctx.block_end_values, ctx.vmap, ctx.bb_map) + except Exception: + val = vmap.get(val_vid, ir.Constant(ir.IntType(64), 0)) + elif resolver is not None and preds is not None and block_end_values is not None and bb_map is not None: val = resolver.resolve_i64(val_vid, builder.block, preds, block_end_values, vmap, bb_map) else: val = vmap.get(val_vid, ir.Constant(ir.IntType(64), 0)) @@ -94,7 +105,12 @@ def lower_atomic_op( elif op == "add": # Atomic add (fetch_add) if val_vid is not None: - if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None: + if ctx is not None: + try: + val = ctx.resolver.resolve_i64(val_vid, builder.block, ctx.preds, ctx.block_end_values, ctx.vmap, ctx.bb_map) + except Exception: + val = ir.Constant(ir.IntType(64), 1) + elif resolver is not None and preds is not None and block_end_values is not None and bb_map is not None: val = resolver.resolve_i64(val_vid, builder.block, preds, block_end_values, vmap, bb_map) else: val = ir.Constant(ir.IntType(64), 1) diff --git a/src/llvm_py/instructions/boxcall.py b/src/llvm_py/instructions/boxcall.py index c90df67f..3da8a5c4 100644 --- a/src/llvm_py/instructions/boxcall.py +++ b/src/llvm_py/instructions/boxcall.py @@ -5,6 +5,7 @@ Core of Nyash's "Everything is Box" philosophy import llvmlite.ir as ir from typing import Dict, List, Optional, Any +from instructions.safepoint import insert_automatic_safepoint def _declare(module: ir.Module, name: str, ret, args): for f in module.functions: @@ -68,6 +69,13 @@ def lower_boxcall( i64 = ir.IntType(64) i8 = ir.IntType(8) i8p = i8.as_pointer() + # Insert a safepoint around potential heavy boxcall sites (pre-call) + try: + import os + if os.environ.get('NYASH_LLVM_AUTO_SAFEPOINT', '1') == '1': + insert_automatic_safepoint(builder, module, "boxcall") + except Exception: + pass # Short-hands with ctx (backward-compatible fallback) r = resolver diff --git a/src/llvm_py/instructions/call.py b/src/llvm_py/instructions/call.py index e43dda25..71c6d1ab 100644 --- a/src/llvm_py/instructions/call.py +++ b/src/llvm_py/instructions/call.py @@ -6,6 +6,7 @@ Handles regular function calls (not BoxCall or ExternCall) import llvmlite.ir as ir from typing import Dict, List, Optional, Any from trace import debug as trace_debug +from instructions.safepoint import insert_automatic_safepoint def lower_call( builder: ir.IRBuilder, @@ -45,6 +46,13 @@ def lower_call( bb_map = ctx.bb_map except Exception: pass + # Insert an automatic safepoint after the function call + try: + import os + if os.environ.get('NYASH_LLVM_AUTO_SAFEPOINT', '1') == '1': + insert_automatic_safepoint(builder, module, "function_call") + except Exception: + pass # Short-hands with ctx (backward-compatible fallback) r = resolver p = preds diff --git a/src/llvm_py/instructions/controlflow/while_.py b/src/llvm_py/instructions/controlflow/while_.py index b3071382..d4972f11 100644 --- a/src/llvm_py/instructions/controlflow/while_.py +++ b/src/llvm_py/instructions/controlflow/while_.py @@ -4,6 +4,7 @@ Lowering helpers for while-control flow (regular structured) from typing import List, Dict, Any import llvmlite.ir as ir +from instructions.safepoint import insert_automatic_safepoint def lower_while_regular( builder: ir.IRBuilder, @@ -57,6 +58,13 @@ def lower_while_regular( else: cond_val = ir.Constant(i1, 0) + # Insert a safepoint at loop header to allow cooperative GC + try: + import os + if os.environ.get('NYASH_LLVM_AUTO_SAFEPOINT', '1') == '1': + insert_automatic_safepoint(cbuild, builder.block.parent.module, "loop_header") + except Exception: + pass cbuild.cbranch(cond_val, body_bb, exit_bb) # Body block @@ -77,4 +85,3 @@ def lower_while_regular( # Continue at exit builder.position_at_end(exit_bb) - diff --git a/src/llvm_py/instructions/externcall.py b/src/llvm_py/instructions/externcall.py index 7efc0208..e84455ec 100644 --- a/src/llvm_py/instructions/externcall.py +++ b/src/llvm_py/instructions/externcall.py @@ -5,6 +5,7 @@ Minimal mapping for NyRT-exported symbols (console/log family等) import llvmlite.ir as ir from typing import Dict, List, Optional, Any +from instructions.safepoint import insert_automatic_safepoint def lower_externcall( builder: ir.IRBuilder, @@ -197,3 +198,10 @@ def lower_externcall( vmap[dst_vid] = ir.Constant(i64, 0) else: vmap[dst_vid] = result + # Insert an automatic safepoint after externcall + try: + import os + if os.environ.get('NYASH_LLVM_AUTO_SAFEPOINT', '1') == '1': + insert_automatic_safepoint(builder, module, "extern_call") + except Exception: + pass diff --git a/src/llvm_py/instructions/loopform.py b/src/llvm_py/instructions/loopform.py index 55526cdd..0bc1bda2 100644 --- a/src/llvm_py/instructions/loopform.py +++ b/src/llvm_py/instructions/loopform.py @@ -7,6 +7,7 @@ import os import llvmlite.ir as ir from dataclasses import dataclass from typing import Dict, Tuple, List, Optional, Any +from instructions.safepoint import insert_automatic_safepoint @dataclass class LoopFormContext: @@ -53,7 +54,8 @@ def lower_while_loopform( bb_map: Dict[int, ir.Block], resolver=None, preds=None, - block_end_values=None + block_end_values=None, + ctx=None, ) -> bool: """ Lower a while loop using LoopForm structure @@ -72,9 +74,22 @@ def lower_while_loopform( builder.position_at_end(lf.preheader) builder.branch(lf.header) - # Header: Evaluate condition + # Header: Evaluate condition (insert a safepoint at loop header) builder.position_at_end(lf.header) - if resolver is not None and preds is not None and block_end_values is not None: + try: + import os + if os.environ.get('NYASH_LLVM_AUTO_SAFEPOINT', '1') == '1': + insert_automatic_safepoint(builder, func.module, "loop_header") + except Exception: + pass + if ctx is not None: + try: + cond64 = ctx.resolver.resolve_i64(condition_vid, builder.block, ctx.preds, ctx.block_end_values, ctx.vmap, ctx.bb_map) + zero64 = ir.IntType(64)(0) + cond = builder.icmp_unsigned('!=', cond64, zero64) + except Exception: + cond = vmap.get(condition_vid, ir.Constant(ir.IntType(1), 0)) + elif resolver is not None and preds is not None and block_end_values is not None: cond64 = resolver.resolve_i64(condition_vid, builder.block, preds, block_end_values, vmap, bb_map) zero64 = ir.IntType(64)(0) cond = builder.icmp_unsigned('!=', cond64, zero64) diff --git a/src/llvm_py/llvm_builder.py b/src/llvm_py/llvm_builder.py index 3ae5cc71..86642948 100644 --- a/src/llvm_py/llvm_builder.py +++ b/src/llvm_py/llvm_builder.py @@ -776,7 +776,7 @@ class NyashLLVMBuilder: for inst in term_ops: try: import os - trace_debug(f"[llvm-py] term op: {inst.get('op')} dst={inst.get('dst')} cond={inst.get('cond')}") + trace_debug(f"[llvm-py] term op: {inst.get('op')} dst={inst.get('dst')} cond={inst.get('cond')}") except Exception: pass try: @@ -950,7 +950,8 @@ class NyashLLVMBuilder: self.loop_count += 1 if not lower_while_loopform(builder, func, cond, body, self.loop_count, self.vmap, self.bb_map, - self.resolver, self.preds, self.block_end_values): + self.resolver, self.preds, self.block_end_values, + getattr(self, 'ctx', None)): # Fallback to regular while (structured) try: self.resolver._owner_lower_instruction = self.lower_instruction @@ -975,58 +976,10 @@ class NyashLLVMBuilder: except Exception: pass - def _lower_while_regular(self, builder: ir.IRBuilder, inst: Dict[str, Any], func: ir.Function): - """Fallback regular while lowering""" - # Create basic blocks: cond -> body -> cond, and exit - cond_vid = inst.get("cond") - body_insts = inst.get("body", []) - - cur_bb = builder.block - cond_bb = func.append_basic_block(name=f"while{self.loop_count}_cond") - body_bb = func.append_basic_block(name=f"while{self.loop_count}_body") - exit_bb = func.append_basic_block(name=f"while{self.loop_count}_exit") - - # Jump from current to cond - builder.branch(cond_bb) - - # Cond block - cbuild = ir.IRBuilder(cond_bb) - try: - # Resolve against the condition block to localize dominance - cond_val = self.resolver.resolve_i64(cond_vid, cbuild.block, self.preds, self.block_end_values, self.vmap, self.bb_map) - except Exception: - cond_val = self.vmap.get(cond_vid) - if cond_val is None: - cond_val = ir.Constant(self.i1, 0) - # Normalize to i1 - if hasattr(cond_val, 'type'): - if isinstance(cond_val.type, ir.IntType) and cond_val.type.width == 64: - zero64 = ir.Constant(self.i64, 0) - cond_val = cbuild.icmp_unsigned('!=', cond_val, zero64, name="while_cond_i1") - elif isinstance(cond_val.type, ir.PointerType): - nullp = ir.Constant(cond_val.type, None) - cond_val = cbuild.icmp_unsigned('!=', cond_val, nullp, name="while_cond_p1") - elif isinstance(cond_val.type, ir.IntType) and cond_val.type.width == 1: - # already i1 - pass - else: - # Fallback: treat as false - cond_val = ir.Constant(self.i1, 0) - else: - cond_val = ir.Constant(self.i1, 0) - - cbuild.cbranch(cond_val, body_bb, exit_bb) - - # Body block - bbuild = ir.IRBuilder(body_bb) - # Allow nested lowering of body instructions within this block - self._lower_instruction_list(bbuild, body_insts, func) - # Ensure terminator: if not terminated, branch back to cond - if bbuild.block.terminator is None: - bbuild.branch(cond_bb) - - # Continue at exit - builder.position_at_end(exit_bb) + # NOTE: regular while lowering is implemented in + # instructions/controlflow/while_.py::lower_while_regular and invoked + # from NyashLLVMBuilder.lower_instruction(). This legacy helper is removed + # to avoid divergence between two implementations. def _lower_instruction_list(self, builder: ir.IRBuilder, insts: List[Dict[str, Any]], func: ir.Function): """Lower a flat list of instructions using current builder and function.""" diff --git a/src/llvm_py/phi_wiring.py b/src/llvm_py/phi_wiring.py index 2413a96e..b6e6ac04 100644 --- a/src/llvm_py/phi_wiring.py +++ b/src/llvm_py/phi_wiring.py @@ -5,11 +5,161 @@ PHI wiring helpers - finalize_phis: Wire PHI incomings using end-of-block snapshots and resolver These operate on the NyashLLVMBuilder instance to keep changes minimal. + +Refactor note: core responsibilities are split into smaller helpers so they +can be unit-tested in isolation. """ -from typing import Dict, List, Any +from typing import Dict, List, Any, Optional, Tuple import llvmlite.ir as ir +# ---- Small helpers (analyzable/testable) ---- + +def _collect_produced_stringish(blocks: List[Dict[str, Any]]) -> Dict[int, bool]: + produced_str: Dict[int, bool] = {} + for block_data in blocks: + for inst in block_data.get("instructions", []) or []: + try: + opx = inst.get("op") + dstx = inst.get("dst") + if dstx is None: + continue + is_str = False + if opx == "const": + v = inst.get("value", {}) or {} + t = v.get("type") + if t == "string" or (isinstance(t, dict) and t.get("kind") in ("handle", "ptr") and t.get("box_type") == "StringBox"): + is_str = True + elif opx in ("binop", "boxcall", "externcall"): + t = inst.get("dst_type") + if isinstance(t, dict) and t.get("kind") == "handle" and t.get("box_type") == "StringBox": + is_str = True + if is_str: + produced_str[int(dstx)] = True + except Exception: + pass + return produced_str + +def analyze_incomings(blocks: List[Dict[str, Any]]) -> Dict[int, Dict[int, List[Tuple[int, int]]]]: + """Return block_phi_incomings map: block_id -> { dst_vid -> [(decl_b, v_src), ...] }""" + result: Dict[int, Dict[int, List[Tuple[int, int]]]] = {} + for block_data in blocks: + bid0 = block_data.get("id", 0) + for inst in block_data.get("instructions", []) or []: + if inst.get("op") == "phi": + try: + dst0 = int(inst.get("dst")) + incoming0 = inst.get("incoming", []) or [] + except Exception: + dst0 = None; incoming0 = [] + if dst0 is None: + continue + try: + result.setdefault(int(bid0), {})[dst0] = [(int(b), int(v)) for (v, b) in incoming0] + except Exception: + pass + return result + +def ensure_phi(builder, block_id: int, dst_vid: int, bb: ir.Block) -> ir.Instruction: + """Ensure a PHI placeholder exists at the block head for dst_vid and return it.""" + b = ir.IRBuilder(bb) + try: + b.position_at_start(bb) + except Exception: + pass + # Prefer predeclared PHIs (e.g., from if-merge prepass) + predecl = getattr(builder, 'predeclared_ret_phis', {}) if hasattr(builder, 'predeclared_ret_phis') else {} + phi = predecl.get((int(block_id), int(dst_vid))) if predecl else None + if phi is not None: + builder.vmap[dst_vid] = phi + return phi + # Reuse current if it is a PHI in the correct block + cur = builder.vmap.get(dst_vid) + try: + if cur is not None and hasattr(cur, 'add_incoming') and getattr(getattr(cur, 'basic_block', None), 'name', None) == bb.name: + return cur + except Exception: + pass + # Create a new placeholder + ph = b.phi(builder.i64, name=f"phi_{dst_vid}") + builder.vmap[dst_vid] = ph + return ph + +def _build_succs(preds: Dict[int, List[int]]) -> Dict[int, List[int]]: + succs: Dict[int, List[int]] = {} + for to_bid, from_list in (preds or {}).items(): + for fr in from_list: + succs.setdefault(fr, []).append(to_bid) + return succs + +def _nearest_pred_on_path(succs: Dict[int, List[int]], preds_list: List[int], decl_b: int, target_bid: int) -> Optional[int]: + from collections import deque + q = deque([decl_b]) + visited = set([decl_b]) + parent: Dict[int, Any] = {decl_b: None} + while q: + cur = q.popleft() + if cur == target_bid: + par = parent.get(target_bid) + return par if par in preds_list else None + for nx in succs.get(cur, []): + if nx not in visited: + visited.add(nx) + parent[nx] = cur + q.append(nx) + return None + +def wire_incomings(builder, block_id: int, dst_vid: int, incoming: List[Tuple[int, int]]): + """Wire PHI incoming edges for (block_id, dst_vid) using declared (decl_b, v_src) pairs.""" + bb = builder.bb_map.get(block_id) + if bb is None: + return + phi = ensure_phi(builder, block_id, dst_vid, bb) + # Normalize predecessor list + preds_raw = [p for p in builder.preds.get(block_id, []) if p != block_id] + seen = set() + preds_list: List[int] = [] + for p in preds_raw: + if p not in seen: + preds_list.append(p) + seen.add(p) + succs = _build_succs(builder.preds) + # Precompute a non-self initial source for self-carry + init_src_vid = None + for (_bd0, vs0) in incoming: + try: + vi = int(vs0) + except Exception: + continue + if vi != int(dst_vid): + init_src_vid = vi + break + chosen: Dict[int, ir.Value] = {} + for (b_decl, v_src) in incoming: + try: + bd = int(b_decl); vs = int(v_src) + except Exception: + continue + pred_match = _nearest_pred_on_path(succs, preds_list, bd, block_id) + if pred_match is None: + continue + if vs == int(dst_vid) and init_src_vid is not None: + vs = int(init_src_vid) + try: + val = builder.resolver._value_at_end_i64(vs, pred_match, builder.preds, builder.block_end_values, builder.vmap, builder.bb_map) + except Exception: + val = None + if val is None: + val = ir.Constant(builder.i64, 0) + chosen[pred_match] = val + for pred_bid, val in chosen.items(): + pred_bb = builder.bb_map.get(pred_bid) + if pred_bb is None: + continue + phi.add_incoming(val, pred_bb) + +# ---- Public API (used by llvm_builder) ---- + def setup_phi_placeholders(builder, blocks: List[Dict[str, Any]]): """Predeclare PHIs and collect incoming metadata for finalize_phis. @@ -18,183 +168,55 @@ def setup_phi_placeholders(builder, blocks: List[Dict[str, Any]]): values eagerly to help downstream resolvers choose correct intrinsics. """ try: - # Pass A: collect producer stringish hints per value-id - produced_str: Dict[int, bool] = {} - for block_data in blocks: - for inst in block_data.get("instructions", []) or []: - try: - opx = inst.get("op") - dstx = inst.get("dst") - if dstx is None: - continue - is_str = False - if opx == "const": - v = inst.get("value", {}) or {} - t = v.get("type") - if t == "string" or (isinstance(t, dict) and t.get("kind") in ("handle","ptr") and t.get("box_type") == "StringBox"): - is_str = True - elif opx in ("binop","boxcall","externcall"): - t = inst.get("dst_type") - if isinstance(t, dict) and t.get("kind") == "handle" and t.get("box_type") == "StringBox": - is_str = True - if is_str: - produced_str[int(dstx)] = True - except Exception: - pass - # Pass B: materialize PHI placeholders and record incoming metadata - builder.block_phi_incomings = {} + produced_str = _collect_produced_stringish(blocks) + builder.block_phi_incomings = analyze_incomings(blocks) + # Materialize placeholders and propagate stringish tags for block_data in blocks: bid0 = block_data.get("id", 0) bb0 = builder.bb_map.get(bid0) for inst in block_data.get("instructions", []) or []: - if inst.get("op") == "phi": - try: - dst0 = int(inst.get("dst")) - incoming0 = inst.get("incoming", []) or [] - except Exception: - dst0 = None; incoming0 = [] - if dst0 is None: - continue - # Record incoming metadata for finalize_phis - try: - builder.block_phi_incomings.setdefault(bid0, {})[dst0] = [ - (int(b), int(v)) for (v, b) in incoming0 - ] - except Exception: - pass - # Ensure placeholder exists at block head - if bb0 is not None: - b0 = ir.IRBuilder(bb0) - try: - b0.position_at_start(bb0) - except Exception: - pass - existing = builder.vmap.get(dst0) - is_phi = False - try: - is_phi = hasattr(existing, 'add_incoming') - except Exception: - is_phi = False - if not is_phi: - ph0 = b0.phi(builder.i64, name=f"phi_{dst0}") - builder.vmap[dst0] = ph0 - # Tag propagation: if explicit dst_type marks string or any incoming was produced as string-ish, tag dst - try: - dst_type0 = inst.get("dst_type") - mark_str = isinstance(dst_type0, dict) and dst_type0.get("kind") == "handle" and dst_type0.get("box_type") == "StringBox" - if not mark_str: - for (_b_decl_i, v_src_i) in incoming0: - try: - if produced_str.get(int(v_src_i)): - mark_str = True; break - except Exception: - pass - if mark_str and hasattr(builder.resolver, 'mark_string'): - builder.resolver.mark_string(int(dst0)) - except Exception: - pass + if inst.get("op") != "phi": + continue + try: + dst0 = int(inst.get("dst")) + incoming0 = inst.get("incoming", []) or [] + except Exception: + dst0 = None; incoming0 = [] + if dst0 is None or bb0 is None: + continue + _ = ensure_phi(builder, bid0, dst0, bb0) + # Tag propagation + try: + dst_type0 = inst.get("dst_type") + mark_str = isinstance(dst_type0, dict) and dst_type0.get("kind") == "handle" and dst_type0.get("box_type") == "StringBox" + if not mark_str: + for (_b_decl_i, v_src_i) in incoming0: + try: + if produced_str.get(int(v_src_i)): + mark_str = True; break + except Exception: + pass + if mark_str and hasattr(builder.resolver, 'mark_string'): + builder.resolver.mark_string(int(dst0)) + except Exception: + pass + # Definition hint: PHI defines dst in this block + try: + builder.def_blocks.setdefault(int(dst0), set()).add(int(bid0)) + except Exception: + pass + # Sync to resolver + try: + builder.resolver.block_phi_incomings = builder.block_phi_incomings + except Exception: + pass except Exception: pass def finalize_phis(builder): """Finalize PHIs declared in JSON by wiring incoming edges at block heads. - Uses resolver._value_at_end_i64 to materialize values at predecessor ends, - ensuring casts/boxing are inserted in predecessor blocks (dominance-safe).""" - # Build succ map for nearest-predecessor mapping - succs: Dict[int, List[int]] = {} - for to_bid, from_list in (builder.preds or {}).items(): - for fr in from_list: - succs.setdefault(fr, []).append(to_bid) + Uses resolver._value_at_end_i64 to materialize values at predecessor ends. + """ for block_id, dst_map in (getattr(builder, 'block_phi_incomings', {}) or {}).items(): - bb = builder.bb_map.get(block_id) - if bb is None: - continue - b = ir.IRBuilder(bb) - try: - b.position_at_start(bb) - except Exception: - pass for dst_vid, incoming in (dst_map or {}).items(): - # Ensure placeholder exists at block head - # Prefer predeclared ret-phi when available and force using it. - predecl = getattr(builder, 'predeclared_ret_phis', {}) if hasattr(builder, 'predeclared_ret_phis') else {} - phi = predecl.get((int(block_id), int(dst_vid))) if predecl else None - if phi is not None: - builder.vmap[dst_vid] = phi - else: - phi = builder.vmap.get(dst_vid) - need_local_phi = False - try: - if not (phi is not None and hasattr(phi, 'add_incoming')): - need_local_phi = True - else: - bb_of_phi = getattr(getattr(phi, 'basic_block', None), 'name', None) - if bb_of_phi != bb.name: - need_local_phi = True - except Exception: - need_local_phi = True - if need_local_phi: - phi = b.phi(builder.i64, name=f"phi_{dst_vid}") - builder.vmap[dst_vid] = phi - # Wire incoming per CFG predecessor; map src_vid when provided - preds_raw = [p for p in builder.preds.get(block_id, []) if p != block_id] - # Deduplicate while preserving order - seen = set() - preds_list: List[int] = [] - for p in preds_raw: - if p not in seen: - preds_list.append(p) - seen.add(p) - # Helper: find the nearest immediate predecessor on a path decl_b -> ... -> block_id - def nearest_pred_on_path(decl_b: int): - from collections import deque - q = deque([decl_b]) - visited = set([decl_b]) - parent: Dict[int, Any] = {decl_b: None} - while q: - cur = q.popleft() - if cur == block_id: - par = parent.get(block_id) - return par if par in preds_list else None - for nx in succs.get(cur, []): - if nx not in visited: - visited.add(nx) - parent[nx] = cur - q.append(nx) - return None - # Precompute a non-self initial source (if present) to use for self-carry cases - init_src_vid = None - for (b_decl0, v_src0) in incoming: - try: - vs0 = int(v_src0) - except Exception: - continue - if vs0 != int(dst_vid): - init_src_vid = vs0 - break - # Pre-resolve declared incomings to nearest immediate predecessors - chosen: Dict[int, ir.Value] = {} - for (b_decl, v_src) in incoming: - try: - bd = int(b_decl); vs = int(v_src) - except Exception: - continue - pred_match = nearest_pred_on_path(bd) - if pred_match is None: - continue - if vs == int(dst_vid) and init_src_vid is not None: - vs = int(init_src_vid) - try: - val = builder.resolver._value_at_end_i64(vs, pred_match, builder.preds, builder.block_end_values, builder.vmap, builder.bb_map) - except Exception: - val = None - if val is None: - # As a last resort, zero - val = ir.Constant(builder.i64, 0) - chosen[pred_match] = val - # Finally add incomings - for pred_bid, val in chosen.items(): - pred_bb = builder.bb_map.get(pred_bid) - if pred_bb is None: - continue - phi.add_incoming(val, pred_bb) + wire_incomings(builder, int(block_id), int(dst_vid), incoming) diff --git a/src/llvm_py/tests/test_phi_wiring.py b/src/llvm_py/tests/test_phi_wiring.py new file mode 100644 index 00000000..36e66d59 --- /dev/null +++ b/src/llvm_py/tests/test_phi_wiring.py @@ -0,0 +1,65 @@ +""" +Unit tests for phi_wiring helpers + +These tests construct a minimal function with two blocks and a PHI in the +second block. We verify that placeholders are created and incoming edges +are wired from the correct predecessor, using end-of-block snapshots. +""" + +import sys +from pathlib import Path + +# Ensure 'src' is importable when running this test directly +TEST_DIR = Path(__file__).resolve().parent +PKG_DIR = TEST_DIR.parent # src/llvm_py +ROOT = PKG_DIR.parent # src +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) +if str(PKG_DIR) not in sys.path: + sys.path.insert(0, str(PKG_DIR)) + +import llvmlite.ir as ir # type: ignore + +from phi_wiring import setup_phi_placeholders, finalize_phis # type: ignore +import llvm_builder # type: ignore + + +def _simple_mir_with_phi(): + """ + Build a minimal MIR JSON that compiles to: + bb0: const v1=42; jump bb1 + bb1: phi v2=[(bb0,v1)] ; ret v2 + """ + return { + "functions": [ + { + "name": "main", + "params": [], + "blocks": [ + {"id": 0, "instructions": [ + {"op": "const", "dst": 1, "value": {"type": "int", "value": 42}}, + {"op": "jump", "target": 1} + ]}, + {"id": 1, "instructions": [ + {"op": "phi", "dst": 2, "incoming": [[1, 0]]}, + {"op": "ret", "value": 2} + ]} + ] + } + ] + } + + +def test_phi_placeholders_and_finalize_basic(): + mir = _simple_mir_with_phi() + b = llvm_builder.NyashLLVMBuilder() + # Build once to create function, blocks, preds; stop before finalize by calling internals like lower_function + reader_functions = mir["functions"] + assert reader_functions + b.lower_function(reader_functions[0]) + # After lowering a function, finalize_phis is already called at the end of lower_function. + # Verify via IR text that a PHI exists in bb1 with an incoming from bb0. + ir_text = str(b.module) + assert 'bb1' in ir_text + assert 'phi i64' in ir_text + assert '[0, %"bb0"]' in ir_text or '[ i64 0, %"bb0"]' in ir_text diff --git a/src/mir/optimizer.rs b/src/mir/optimizer.rs index be9e57ea..3809998f 100644 --- a/src/mir/optimizer.rs +++ b/src/mir/optimizer.rs @@ -105,7 +105,7 @@ impl MirOptimizer { // Pass 7 (optional): Core-13 pure normalization if crate::config::env::mir_core13_pure() { - stats.merge(self.normalize_pure_core13(module)); + stats.merge(crate::mir::optimizer_passes::normalize_core13_pure::normalize_pure_core13(self, module)); } if self.debug { @@ -147,200 +147,7 @@ impl MirOptimizer { /// Neg x => BinOp(Sub, Const 0, x) /// Not x => Compare(Eq, x, Const false) /// BitNot x => BinOp(BitXor, x, Const(-1)) - fn normalize_pure_core13(&mut self, module: &mut MirModule) -> OptimizationStats { - use super::instruction::ConstValue; - use super::{BinaryOp, CompareOp, MirInstruction as I}; - let mut stats = OptimizationStats::new(); - for (_fname, function) in &mut module.functions { - for (_bb, block) in &mut function.blocks { - let mut out: Vec = Vec::with_capacity(block.instructions.len() + 8); - let old = std::mem::take(&mut block.instructions); - for inst in old.into_iter() { - match inst { - I::Load { dst, ptr } => { - out.push(I::ExternCall { - dst: Some(dst), - iface_name: "env.local".to_string(), - method_name: "get".to_string(), - args: vec![ptr], - effects: super::EffectMask::READ, - }); - stats.intrinsic_optimizations += 1; - } - I::Store { value, ptr } => { - out.push(I::ExternCall { - dst: None, - iface_name: "env.local".to_string(), - method_name: "set".to_string(), - args: vec![ptr, value], - effects: super::EffectMask::WRITE, - }); - stats.intrinsic_optimizations += 1; - } - I::NewBox { - dst, - box_type, - mut args, - } => { - // prepend type name as Const String - let ty_id = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - out.push(I::Const { - dst: ty_id, - value: ConstValue::String(box_type), - }); - let mut call_args = Vec::with_capacity(1 + args.len()); - call_args.push(ty_id); - call_args.append(&mut args); - out.push(I::ExternCall { - dst: Some(dst), - iface_name: "env.box".to_string(), - method_name: "new".to_string(), - args: call_args, - effects: super::EffectMask::PURE, // constructor is logically alloc; conservatively PURE here - }); - stats.intrinsic_optimizations += 1; - } - I::UnaryOp { dst, op, operand } => { - match op { - super::UnaryOp::Neg => { - let zero = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - out.push(I::Const { - dst: zero, - value: ConstValue::Integer(0), - }); - out.push(I::BinOp { - dst, - op: BinaryOp::Sub, - lhs: zero, - rhs: operand, - }); - } - super::UnaryOp::Not => { - let f = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - out.push(I::Const { - dst: f, - value: ConstValue::Bool(false), - }); - out.push(I::Compare { - dst, - op: CompareOp::Eq, - lhs: operand, - rhs: f, - }); - } - super::UnaryOp::BitNot => { - let all1 = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - out.push(I::Const { - dst: all1, - value: ConstValue::Integer(-1), - }); - out.push(I::BinOp { - dst, - op: BinaryOp::BitXor, - lhs: operand, - rhs: all1, - }); - } - } - stats.intrinsic_optimizations += 1; - } - other => out.push(other), - } - } - block.instructions = out; - if let Some(term) = block.terminator.take() { - block.terminator = Some(match term { - I::Load { dst, ptr } => I::ExternCall { - dst: Some(dst), - iface_name: "env.local".to_string(), - method_name: "get".to_string(), - args: vec![ptr], - effects: super::EffectMask::READ, - }, - I::Store { value, ptr } => I::ExternCall { - dst: None, - iface_name: "env.local".to_string(), - method_name: "set".to_string(), - args: vec![ptr, value], - effects: super::EffectMask::WRITE, - }, - I::NewBox { - dst, - box_type, - mut args, - } => { - let ty_id = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - block.instructions.push(I::Const { - dst: ty_id, - value: ConstValue::String(box_type), - }); - let mut call_args = Vec::with_capacity(1 + args.len()); - call_args.push(ty_id); - call_args.append(&mut args); - I::ExternCall { - dst: Some(dst), - iface_name: "env.box".to_string(), - method_name: "new".to_string(), - args: call_args, - effects: super::EffectMask::PURE, - } - } - I::UnaryOp { dst, op, operand } => match op { - super::UnaryOp::Neg => { - let zero = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - block.instructions.push(I::Const { - dst: zero, - value: ConstValue::Integer(0), - }); - I::BinOp { - dst, - op: BinaryOp::Sub, - lhs: zero, - rhs: operand, - } - } - super::UnaryOp::Not => { - let f = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - block.instructions.push(I::Const { - dst: f, - value: ConstValue::Bool(false), - }); - I::Compare { - dst, - op: CompareOp::Eq, - lhs: operand, - rhs: f, - } - } - super::UnaryOp::BitNot => { - let all1 = super::ValueId::new(function.next_value_id); - function.next_value_id += 1; - block.instructions.push(I::Const { - dst: all1, - value: ConstValue::Integer(-1), - }); - I::BinOp { - dst, - op: BinaryOp::BitXor, - lhs: operand, - rhs: all1, - } - } - }, - other => other, - }); - } - } - } - stats - } + // normalize_pure_core13 moved to optimizer_passes::normalize_core13_pure /// Eliminate dead code in a single function fn eliminate_dead_code_in_function(&mut self, function: &mut MirFunction) -> usize { diff --git a/src/mir/optimizer_passes/mod.rs b/src/mir/optimizer_passes/mod.rs index 9af61a65..45890b04 100644 --- a/src/mir/optimizer_passes/mod.rs +++ b/src/mir/optimizer_passes/mod.rs @@ -3,3 +3,5 @@ pub mod diagnostics; pub mod intrinsics; pub mod normalize; pub mod reorder; +pub mod normalize_core13_pure; +pub mod normalize_legacy_all; diff --git a/src/mir/optimizer_passes/normalize_core13_pure.rs b/src/mir/optimizer_passes/normalize_core13_pure.rs new file mode 100644 index 00000000..8930acc4 --- /dev/null +++ b/src/mir/optimizer_passes/normalize_core13_pure.rs @@ -0,0 +1,145 @@ +use crate::mir::optimizer::MirOptimizer; +use crate::mir::optimizer_stats::OptimizationStats; +use crate::mir::{BinaryOp, CompareOp, EffectMask, MirInstruction as I, MirModule, MirType, ValueId}; + +/// Core-13 "pure" normalization: rewrite a few non-13 ops to allowed forms. +/// - Load(dst, ptr) => ExternCall(Some dst, env.local.get, [ptr]) +/// - Store(val, ptr) => ExternCall(None, env.local.set, [ptr, val]) +/// - NewBox(dst, T, args...) => ExternCall(Some dst, env.box.new, [Const String(T), args...]) +/// - UnaryOp: +/// Neg x => BinOp(Sub, Const 0, x) +/// Not x => Compare(Eq, x, Const false) +/// BitNot x => BinOp(BitXor, x, Const(-1)) +pub fn normalize_pure_core13(_opt: &mut MirOptimizer, module: &mut MirModule) -> OptimizationStats { + use crate::mir::instruction::ConstValue; + let mut stats = OptimizationStats::new(); + for (_fname, function) in &mut module.functions { + for (_bb, block) in &mut function.blocks { + let mut out: Vec = Vec::with_capacity(block.instructions.len() + 8); + let old = std::mem::take(&mut block.instructions); + for inst in old.into_iter() { + match inst { + I::Load { dst, ptr } => { + out.push(I::ExternCall { + dst: Some(dst), + iface_name: "env.local".to_string(), + method_name: "get".to_string(), + args: vec![ptr], + effects: EffectMask::READ, + }); + stats.intrinsic_optimizations += 1; + } + I::Store { value, ptr } => { + out.push(I::ExternCall { + dst: None, + iface_name: "env.local".to_string(), + method_name: "set".to_string(), + args: vec![ptr, value], + effects: EffectMask::WRITE, + }); + stats.intrinsic_optimizations += 1; + } + I::NewBox { dst, box_type, mut args } => { + // prepend type name as Const String + let ty_id = ValueId::new(function.next_value_id); + function.next_value_id += 1; + out.push(I::Const { dst: ty_id, value: ConstValue::String(box_type) }); + let mut call_args = Vec::with_capacity(1 + args.len()); + call_args.push(ty_id); + call_args.append(&mut args); + out.push(I::ExternCall { + dst: Some(dst), + iface_name: "env.box".to_string(), + method_name: "new".to_string(), + args: call_args, + effects: EffectMask::PURE, // constructor is logically alloc; conservatively PURE here + }); + stats.intrinsic_optimizations += 1; + } + I::UnaryOp { dst, op, operand } => { + match op { + crate::mir::UnaryOp::Neg => { + let zero = ValueId::new(function.next_value_id); + function.next_value_id += 1; + out.push(I::Const { dst: zero, value: ConstValue::Integer(0) }); + out.push(I::BinOp { dst, op: BinaryOp::Sub, lhs: zero, rhs: operand }); + } + crate::mir::UnaryOp::Not => { + let f = ValueId::new(function.next_value_id); + function.next_value_id += 1; + out.push(I::Const { dst: f, value: ConstValue::Bool(false) }); + out.push(I::Compare { dst, op: CompareOp::Eq, lhs: operand, rhs: f }); + } + crate::mir::UnaryOp::BitNot => { + let all1 = ValueId::new(function.next_value_id); + function.next_value_id += 1; + out.push(I::Const { dst: all1, value: ConstValue::Integer(-1) }); + out.push(I::BinOp { dst, op: BinaryOp::BitXor, lhs: operand, rhs: all1 }); + } + } + stats.intrinsic_optimizations += 1; + } + other => out.push(other), + } + } + block.instructions = out; + + if let Some(term) = block.terminator.take() { + block.terminator = Some(match term { + I::Load { dst, ptr } => I::ExternCall { + dst: Some(dst), + iface_name: "env.local".to_string(), + method_name: "get".to_string(), + args: vec![ptr], + effects: EffectMask::READ, + }, + I::Store { value, ptr } => I::ExternCall { + dst: None, + iface_name: "env.local".to_string(), + method_name: "set".to_string(), + args: vec![ptr, value], + effects: EffectMask::WRITE, + }, + I::NewBox { dst, box_type, mut args } => { + let ty_id = ValueId::new(function.next_value_id); + function.next_value_id += 1; + block.instructions.push(I::Const { dst: ty_id, value: ConstValue::String(box_type) }); + let mut call_args = Vec::with_capacity(1 + args.len()); + call_args.push(ty_id); + call_args.append(&mut args); + I::ExternCall { + dst: Some(dst), + iface_name: "env.box".to_string(), + method_name: "new".to_string(), + args: call_args, + effects: EffectMask::PURE, + } + } + I::UnaryOp { dst, op, operand } => match op { + crate::mir::UnaryOp::Neg => { + let zero = ValueId::new(function.next_value_id); + function.next_value_id += 1; + block.instructions.push(I::Const { dst: zero, value: ConstValue::Integer(0) }); + I::BinOp { dst, op: BinaryOp::Sub, lhs: zero, rhs: operand } + } + crate::mir::UnaryOp::Not => { + let f = ValueId::new(function.next_value_id); + function.next_value_id += 1; + block.instructions.push(I::Const { dst: f, value: ConstValue::Bool(false) }); + I::Compare { dst, op: CompareOp::Eq, lhs: operand, rhs: f } + } + crate::mir::UnaryOp::BitNot => { + let all1 = ValueId::new(function.next_value_id); + function.next_value_id += 1; + block.instructions.push(I::Const { dst: all1, value: ConstValue::Integer(-1) }); + I::BinOp { dst, op: BinaryOp::BitXor, lhs: operand, rhs: all1 } + } + }, + other => other, + }); + } + } + } + stats +} + diff --git a/src/mir/optimizer_passes/normalize_legacy_all.rs b/src/mir/optimizer_passes/normalize_legacy_all.rs new file mode 100644 index 00000000..a5358b87 --- /dev/null +++ b/src/mir/optimizer_passes/normalize_legacy_all.rs @@ -0,0 +1,8 @@ +use crate::mir::optimizer::MirOptimizer; +use crate::mir::optimizer_stats::OptimizationStats; + +/// Delegate: legacy normalization (moved from optimizer.rs) +pub fn normalize_legacy_instructions(opt: &mut MirOptimizer, module: &mut crate::mir::MirModule) -> OptimizationStats { + crate::mir::optimizer_passes::normalize::normalize_legacy_instructions(opt, module) +} + diff --git a/src/runner/mod.rs b/src/runner/mod.rs index 4122e3c0..643d39e3 100644 --- a/src/runner/mod.rs +++ b/src/runner/mod.rs @@ -143,6 +143,12 @@ impl NyashRunner { if self.config.cli_verbose { std::env::set_var("NYASH_CLI_VERBOSE", "1"); } + // GC mode forwarding: map CLI --gc to NYASH_GC_MODE for downstream runtimes + if let Some(ref m) = self.config.gc_mode { + if !m.trim().is_empty() { + std::env::set_var("NYASH_GC_MODE", m); + } + } // Script-level env directives (special comments) — parse early // Supported: // // @env KEY=VALUE diff --git a/src/runner/modes/common.rs b/src/runner/modes/common.rs index ad5f3827..99202401 100644 --- a/src/runner/modes/common.rs +++ b/src/runner/modes/common.rs @@ -11,8 +11,6 @@ use std::thread::sleep; use crate::runner::pipeline::{suggest_in_base, resolve_using_target}; use crate::runner::trace::cli_verbose; use crate::cli_v; -use crate::runner::trace::cli_verbose; -use crate::cli_v; // (moved) suggest_in_base is now in runner/pipeline.rs @@ -131,97 +129,13 @@ impl NyashRunner { /// Helper: run PyVM harness over a MIR module, returning the exit code fn run_pyvm_harness(&self, module: &nyash_rust::mir::MirModule, tag: &str) -> Result { - let py3 = which::which("python3").map_err(|e| format!("python3 not found: {}", e))?; - let runner = std::path::Path::new("tools/pyvm_runner.py"); - if !runner.exists() { return Err(format!("PyVM runner not found: {}", runner.display())); } - let tmp_dir = std::path::Path::new("tmp"); - let _ = std::fs::create_dir_all(tmp_dir); - let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json"); - crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(module, &mir_json_path) - .map_err(|e| format!("PyVM MIR JSON emit error: {}", e))?; - cli_v!("[ny-compiler] using PyVM ({} ) → {}", tag, mir_json_path.display()); - // Determine entry function hint (prefer Main.main if present) - let entry = if module.functions.contains_key("Main.main") { "Main.main" } - else if module.functions.contains_key("main") { "main" } else { "Main.main" }; - let status = std::process::Command::new(py3) - .args([ - runner.to_string_lossy().as_ref(), - "--in", - &mir_json_path.display().to_string(), - "--entry", - entry, - ]) - .status() - .map_err(|e| format!("spawn pyvm: {}", e))?; - let code = status.code().unwrap_or(1); - if !status.success() { cli_v!("❌ PyVM ({}) failed (status={})", tag, code); } - Ok(code) + super::common_util::pyvm::run_pyvm_harness(module, tag) } /// Helper: try external selfhost compiler EXE to parse Ny -> JSON v0 and return MIR module /// Returns Some(module) on success, None on failure (timeout/invalid output/missing exe) fn exe_try_parse_json_v0(&self, filename: &str, timeout_ms: u64) -> Option { - // Resolve parser EXE path - let exe_path = if let Ok(p) = std::env::var("NYASH_NY_COMPILER_EXE_PATH") { - std::path::PathBuf::from(p) - } else { - let mut p = std::path::PathBuf::from("dist/nyash_compiler"); - #[cfg(windows)] - { p.push("nyash_compiler.exe"); } - #[cfg(not(windows))] - { p.push("nyash_compiler"); } - if !p.exists() { - if let Ok(w) = which::which("nyash_compiler") { w } else { p } - } else { p } - }; - if !exe_path.exists() { cli_v!("[ny-compiler] exe not found at {}", exe_path.display()); return None; } - - // Build command - let mut cmd = std::process::Command::new(&exe_path); - cmd.arg(filename); - if crate::config::env::ny_compiler_min_json() { cmd.arg("--min-json"); } - if crate::config::env::selfhost_read_tmp() { cmd.arg("--read-tmp"); } - if let Some(raw) = crate::config::env::ny_compiler_child_args() { for tok in raw.split_whitespace() { cmd.arg(tok); } } - let mut cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - let mut child = match cmd.spawn() { Ok(c) => c, Err(e) => { eprintln!("[ny-compiler] exe spawn failed: {}", e); return None; } }; - let mut ch_stdout = child.stdout.take(); - let mut ch_stderr = child.stderr.take(); - let start = Instant::now(); - let mut timed_out = false; - loop { - match child.try_wait() { - Ok(Some(_)) => break, - Ok(None) => { - if start.elapsed() >= Duration::from_millis(timeout_ms) { let _ = child.kill(); let _ = child.wait(); timed_out = true; break; } - sleep(Duration::from_millis(10)); - } - Err(e) => { eprintln!("[ny-compiler] exe wait error: {}", e); return None; } - } - } - let mut out_buf = Vec::new(); - let mut err_buf = Vec::new(); - if let Some(mut s) = ch_stdout { let _ = s.read_to_end(&mut out_buf); } - if let Some(mut s) = ch_stderr { let _ = s.read_to_end(&mut err_buf); } - if timed_out { - let head = String::from_utf8_lossy(&out_buf).chars().take(200).collect::(); - eprintln!("[ny-compiler] exe timeout after {} ms; stdout(head)='{}'", timeout_ms, head.replace('\n', "\\n")); - return None; - } - let stdout = match String::from_utf8(out_buf) { Ok(s) => s, Err(_) => String::new() }; - let mut json_line = String::new(); - for line in stdout.lines() { let t = line.trim(); if t.starts_with('{') && t.contains("\"version\"") && t.contains("\"kind\"") { json_line = t.to_string(); break; } } - if json_line.is_empty() { - if cli_verbose() { - let head: String = stdout.chars().take(200).collect(); - let errh: String = String::from_utf8_lossy(&err_buf).chars().take(200).collect(); - cli_v!("[ny-compiler] exe produced no JSON; stdout(head)='{}' stderr(head)='{}'", head.replace('\n', "\\n"), errh.replace('\n', "\\n")); - } - return None; - } - match json_v0_bridge::parse_json_v0_to_module(&json_line) { - Ok(module) => Some(module), - Err(e) => { eprintln!("[ny-compiler] JSON parse failed (exe): {}", e); None } - } + super::common_util::selfhost_exe::exe_try_parse_json_v0(filename, timeout_ms) } /// Phase-15.3: Attempt Ny compiler pipeline (Ny -> JSON v0 via Ny program), then execute MIR @@ -324,38 +238,22 @@ impl NyashRunner { if crate::config::env::selfhost_read_tmp() { cmd.arg("--read-tmp"); } if let Some(raw) = crate::config::env::ny_compiler_child_args() { for tok in raw.split_whitespace() { cmd.arg(tok); } } let timeout_ms: u64 = crate::config::env::ny_compiler_timeout_ms(); - let mut cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - let mut child = match cmd.spawn() { Ok(c) => c, Err(e) => { eprintln!("[ny-compiler] exe spawn failed: {}", e); return false; } }; - let mut ch_stdout = child.stdout.take(); - let mut ch_stderr = child.stderr.take(); - let start = Instant::now(); - let mut timed_out = false; - loop { - match child.try_wait() { - Ok(Some(_status)) => { break; } - Ok(None) => { - if start.elapsed() >= Duration::from_millis(timeout_ms) { let _ = child.kill(); let _ = child.wait(); timed_out = true; break; } - sleep(Duration::from_millis(10)); - } - Err(e) => { eprintln!("[ny-compiler] exe wait error: {}", e); return false; } - } - } - let mut out_buf = Vec::new(); - let mut err_buf = Vec::new(); - if let Some(mut s) = ch_stdout { let _ = s.read_to_end(&mut out_buf); } - if let Some(mut s) = ch_stderr { let _ = s.read_to_end(&mut err_buf); } - if timed_out { - let head = String::from_utf8_lossy(&out_buf).chars().take(200).collect::(); + let out = match super::common_util::io::spawn_with_timeout(cmd, timeout_ms) { + Ok(o) => o, + Err(e) => { eprintln!("[ny-compiler] exe spawn failed: {}", e); return false; } + }; + if out.timed_out { + let head = String::from_utf8_lossy(&out.stdout).chars().take(200).collect::(); eprintln!("[ny-compiler] exe timeout after {} ms; stdout(head)='{}'", timeout_ms, head.replace('\n', "\\n")); return false; } - let stdout = match String::from_utf8(out_buf) { Ok(s) => s, Err(_) => String::new() }; + let stdout = match String::from_utf8(out.stdout) { Ok(s) => s, Err(_) => String::new() }; let mut json_line = String::new(); for line in stdout.lines() { let t = line.trim(); if t.starts_with('{') && t.contains("\"version\"") && t.contains("\"kind\"") { json_line = t.to_string(); break; } } if json_line.is_empty() { if crate::config::env::cli_verbose() { let head: String = stdout.chars().take(200).collect(); - let errh: String = String::from_utf8_lossy(&err_buf).chars().take(200).collect(); + let errh: String = String::from_utf8_lossy(&out.stderr).chars().take(200).collect(); eprintln!("[ny-compiler] exe produced no JSON; stdout(head)='{}' stderr(head)='{}'", head.replace('\n', "\\n"), errh.replace('\n', "\\n")); } return false; @@ -450,40 +348,15 @@ impl NyashRunner { .ok() .and_then(|s| s.parse::().ok()) .unwrap_or(2000); - let mut cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - let mut child = match cmd.spawn() { - Ok(c) => c, + let out = match super::common_util::io::spawn_with_timeout(cmd, timeout_ms) { + Ok(o) => o, Err(e) => { eprintln!("[ny-compiler] spawn failed: {}", e); return false; } }; - let mut ch_stdout = child.stdout.take(); - let mut ch_stderr = child.stderr.take(); - let start = Instant::now(); - let mut timed_out = false; - loop { - match child.try_wait() { - Ok(Some(_status)) => { break; } - Ok(None) => { - if start.elapsed() >= Duration::from_millis(timeout_ms) { - let _ = child.kill(); - let _ = child.wait(); - timed_out = true; - break; - } - sleep(Duration::from_millis(10)); - } - Err(e) => { eprintln!("[ny-compiler] wait error: {}", e); return false; } - } - } - // Collect any available output - let mut out_buf = Vec::new(); - let mut err_buf = Vec::new(); - if let Some(mut s) = ch_stdout { let _ = s.read_to_end(&mut out_buf); } - if let Some(mut s) = ch_stderr { let _ = s.read_to_end(&mut err_buf); } - if timed_out { - let head = String::from_utf8_lossy(&out_buf).chars().take(200).collect::(); + if out.timed_out { + let head = String::from_utf8_lossy(&out.stdout).chars().take(200).collect::(); eprintln!("[ny-compiler] child timeout after {} ms; stdout(head)='{}'", timeout_ms, head.replace('\n', "\\n")); } - let stdout = match String::from_utf8(out_buf) { Ok(s) => s, Err(_) => String::new() }; + let stdout = match String::from_utf8(out.stdout.clone()) { Ok(s) => s, Err(_) => String::new() }; if timed_out { // Fall back path will be taken below when json_line remains empty } else if let Ok(s) = String::from_utf8(err_buf.clone()) { diff --git a/src/runner/modes/common_util/io.rs b/src/runner/modes/common_util/io.rs new file mode 100644 index 00000000..aadc0902 --- /dev/null +++ b/src/runner/modes/common_util/io.rs @@ -0,0 +1,55 @@ +use std::io::Read; +use std::process::{Command, Stdio}; +use std::thread::sleep; +use std::time::{Duration, Instant}; + +pub struct ChildOutput { + pub stdout: Vec, + pub stderr: Vec, + pub status_ok: bool, + pub exit_code: Option, + pub timed_out: bool, +} + +/// Spawn command with timeout (ms), capture stdout/stderr, and return ChildOutput. +pub fn spawn_with_timeout(mut cmd: Command, timeout_ms: u64) -> std::io::Result { + let mut cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); + let mut child = cmd.spawn()?; + let mut ch_stdout = child.stdout.take(); + let mut ch_stderr = child.stderr.take(); + let start = Instant::now(); + let mut timed_out = false; + let mut exit_status: Option = None; + loop { + match child.try_wait()? { + Some(status) => { exit_status = Some(status); break }, + None => { + if start.elapsed() >= Duration::from_millis(timeout_ms) { + let _ = child.kill(); + let _ = child.wait(); + timed_out = true; + break; + } + sleep(Duration::from_millis(10)); + } + } + } + let mut out_buf = Vec::new(); + let mut err_buf = Vec::new(); + if let Some(mut s) = ch_stdout { + let _ = s.read_to_end(&mut out_buf); + } + if let Some(mut s) = ch_stderr { + let _ = s.read_to_end(&mut err_buf); + } + let (status_ok, exit_code) = if let Some(st) = exit_status { + (st.success(), st.code()) + } else { (false, None) }; + Ok(ChildOutput { + stdout: out_buf, + stderr: err_buf, + status_ok, + exit_code, + timed_out, + }) +} diff --git a/src/runner/modes/common_util/mod.rs b/src/runner/modes/common_util/mod.rs new file mode 100644 index 00000000..df45203c --- /dev/null +++ b/src/runner/modes/common_util/mod.rs @@ -0,0 +1,9 @@ +/*! + * Shared helpers for runner/modes/common.rs + * + * Minimal extraction to reduce duplication and prepare for full split. + */ + +pub mod pyvm; +pub mod selfhost_exe; +pub mod io; diff --git a/src/runner/modes/common_util/pyvm.rs b/src/runner/modes/common_util/pyvm.rs new file mode 100644 index 00000000..622d10ba --- /dev/null +++ b/src/runner/modes/common_util/pyvm.rs @@ -0,0 +1,39 @@ +use std::process::Stdio; + +/// Run PyVM harness over a MIR module, returning the exit code +pub fn run_pyvm_harness(module: &crate::mir::MirModule, tag: &str) -> Result { + let py3 = which::which("python3").map_err(|e| format!("python3 not found: {}", e))?; + let runner = std::path::Path::new("tools/pyvm_runner.py"); + if !runner.exists() { + return Err(format!("PyVM runner not found: {}", runner.display())); + } + let tmp_dir = std::path::Path::new("tmp"); + let _ = std::fs::create_dir_all(tmp_dir); + let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json"); + crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(module, &mir_json_path) + .map_err(|e| format!("PyVM MIR JSON emit error: {}", e))?; + crate::cli_v!("[ny-compiler] using PyVM ({} ) → {}", tag, mir_json_path.display()); + // Determine entry function hint (prefer Main.main if present) + let entry = if module.functions.contains_key("Main.main") { + "Main.main" + } else if module.functions.contains_key("main") { + "main" + } else { + "Main.main" + }; + let status = std::process::Command::new(py3) + .args([ + runner.to_string_lossy().as_ref(), + "--in", + &mir_json_path.display().to_string(), + "--entry", + entry, + ]) + .status() + .map_err(|e| format!("spawn pyvm: {}", e))?; + let code = status.code().unwrap_or(1); + if !status.success() { + crate::cli_v!("❌ PyVM ({}) failed (status={})", tag, code); + } + Ok(code) +} diff --git a/src/runner/modes/common_util/selfhost_exe.rs b/src/runner/modes/common_util/selfhost_exe.rs new file mode 100644 index 00000000..71a3b542 --- /dev/null +++ b/src/runner/modes/common_util/selfhost_exe.rs @@ -0,0 +1,131 @@ +use std::io::Read; +use std::process::Stdio; +use std::thread::sleep; +use std::time::{Duration, Instant}; + +/// Try external selfhost compiler EXE to parse Ny -> JSON v0 and return MIR module. +/// Returns Some(module) on success, None on failure (timeout/invalid output/missing exe) +pub fn exe_try_parse_json_v0(filename: &str, timeout_ms: u64) -> Option { + // Resolve parser EXE path + let exe_path = if let Ok(p) = std::env::var("NYASH_NY_COMPILER_EXE_PATH") { + std::path::PathBuf::from(p) + } else { + let mut p = std::path::PathBuf::from("dist/nyash_compiler"); + #[cfg(windows)] + { + p.push("nyash_compiler.exe"); + } + #[cfg(not(windows))] + { + p.push("nyash_compiler"); + } + if !p.exists() { + if let Ok(w) = which::which("nyash_compiler") { + w + } else { + p + } + } else { + p + } + }; + if !exe_path.exists() { + crate::cli_v!("[ny-compiler] exe not found at {}", exe_path.display()); + return None; + } + // Build command + let mut cmd = std::process::Command::new(&exe_path); + cmd.arg(filename); + if crate::config::env::ny_compiler_min_json() { + cmd.arg("--min-json"); + } + if crate::config::env::selfhost_read_tmp() { + cmd.arg("--read-tmp"); + } + if let Some(raw) = crate::config::env::ny_compiler_child_args() { + for tok in raw.split_whitespace() { + cmd.arg(tok); + } + } + let mut cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); + let mut child = match cmd.spawn() { + Ok(c) => c, + Err(e) => { + eprintln!("[ny-compiler] exe spawn failed: {}", e); + return None; + } + }; + let mut ch_stdout = child.stdout.take(); + let mut ch_stderr = child.stderr.take(); + let start = Instant::now(); + let mut timed_out = false; + loop { + match child.try_wait() { + Ok(Some(_)) => break, + Ok(None) => { + if start.elapsed() >= Duration::from_millis(timeout_ms) { + let _ = child.kill(); + let _ = child.wait(); + timed_out = true; + break; + } + sleep(Duration::from_millis(10)); + } + Err(e) => { + eprintln!("[ny-compiler] exe wait error: {}", e); + return None; + } + } + } + let mut out_buf = Vec::new(); + let mut err_buf = Vec::new(); + if let Some(mut s) = ch_stdout { + let _ = s.read_to_end(&mut out_buf); + } + if let Some(mut s) = ch_stderr { + let _ = s.read_to_end(&mut err_buf); + } + if timed_out { + let head = String::from_utf8_lossy(&out_buf) + .chars() + .take(200) + .collect::(); + eprintln!( + "[ny-compiler] exe timeout after {} ms; stdout(head)='{}'", + timeout_ms, + head.replace('\n', "\\n") + ); + return None; + } + let stdout = match String::from_utf8(out_buf) { + Ok(s) => s, + Err(_) => String::new(), + }; + let mut json_line = String::new(); + for line in stdout.lines() { + let t = line.trim(); + if t.starts_with('{') && t.contains("\"version\"") && t.contains("\"kind\"") { + json_line = t.to_string(); + break; + } + } + if json_line.is_empty() { + if crate::config::env::cli_verbose() { + let head: String = stdout.chars().take(200).collect(); + let errh: String = String::from_utf8_lossy(&err_buf).chars().take(200).collect(); + crate::cli_v!( + "[ny-compiler] exe produced no JSON; stdout(head)='{}' stderr(head)='{}'", + head.replace('\n', "\\n"), + errh.replace('\n', "\\n") + ); + } + return None; + } + match crate::runner::json_v0_bridge::parse_json_v0_to_module(&json_line) { + Ok(module) => Some(module), + Err(e) => { + eprintln!("[ny-compiler] JSON parse failed: {}", e); + None + } + } +} diff --git a/src/runner/modes/llvm.rs b/src/runner/modes/llvm.rs index 1e233ec4..4c2efe11 100644 --- a/src/runner/modes/llvm.rs +++ b/src/runner/modes/llvm.rs @@ -84,21 +84,22 @@ impl NyashRunner { ); } // 2) Run harness with --in/--out(失敗時は即エラー) - let status = std::process::Command::new(py3) - .args([ - harness.to_string_lossy().as_ref(), - "--in", - &mir_json_path.display().to_string(), - "--out", - &_out_path, - ]) - .status() + let mut cmd = std::process::Command::new(py3); + cmd.args([ + harness.to_string_lossy().as_ref(), + "--in", + &mir_json_path.display().to_string(), + "--out", + &_out_path, + ]); + let out = crate::runner::modes::common_util::io::spawn_with_timeout(cmd, 20_000) .map_err(|e| format!("spawn harness: {}", e)) .unwrap(); - if !status.success() { + if out.timed_out || !out.status_ok { eprintln!( - "❌ llvmlite harness failed (status={})", - status.code().unwrap_or(-1) + "❌ llvmlite harness failed (timeout={} code={:?})", + out.timed_out, + out.exit_code ); process::exit(1); } diff --git a/src/runner/modes/mod.rs b/src/runner/modes/mod.rs index ecc11d0f..03bfedad 100644 --- a/src/runner/modes/mod.rs +++ b/src/runner/modes/mod.rs @@ -8,5 +8,8 @@ pub mod mir; pub mod vm; pub mod pyvm; +// Shared helpers extracted from common.rs (in progress) +pub mod common_util; + #[cfg(feature = "cranelift-jit")] pub mod aot; diff --git a/src/runner/modes/pyvm.rs b/src/runner/modes/pyvm.rs index 65c10719..1bb84979 100644 --- a/src/runner/modes/pyvm.rs +++ b/src/runner/modes/pyvm.rs @@ -76,22 +76,20 @@ pub fn execute_pyvm_only(_runner: &NyashRunner, filename: &str) { mir_json_path.display() ); } - let status = std::process::Command::new(py3) - .args([ - runner.to_string_lossy().as_ref(), - "--in", - &mir_json_path.display().to_string(), - "--entry", - entry, - ]) - .status() + let mut cmd = std::process::Command::new(py3); + cmd.args([ + runner.to_string_lossy().as_ref(), + "--in", + &mir_json_path.display().to_string(), + "--entry", + entry, + ]); + let out = crate::runner::modes::common_util::io::spawn_with_timeout(cmd, 10_000) .map_err(|e| format!("spawn pyvm: {}", e)) .unwrap(); - let code = status.code().unwrap_or(1); - if !status.success() { - if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { - eprintln!("❌ PyVM failed (status={})", code); - } + let code = if out.timed_out { 1 } else { out.exit_code.unwrap_or(1) }; + if out.timed_out && std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { + eprintln!("❌ PyVM timeout"); } process::exit(code); } else { @@ -99,4 +97,3 @@ pub fn execute_pyvm_only(_runner: &NyashRunner, filename: &str) { process::exit(1); } } - diff --git a/src/runner/modes/vm.rs b/src/runner/modes/vm.rs index 58be5833..3ade446f 100644 --- a/src/runner/modes/vm.rs +++ b/src/runner/modes/vm.rs @@ -207,23 +207,20 @@ impl NyashRunner { "Main.main" }; // Spawn runner - let status = std::process::Command::new(py3) - .args([ - runner.to_string_lossy().as_ref(), - "--in", - &mir_json_path.display().to_string(), - "--entry", - entry, - ]) - .status() + let mut cmd = std::process::Command::new(py3); + cmd.args([ + runner.to_string_lossy().as_ref(), + "--in", + &mir_json_path.display().to_string(), + "--entry", + entry, + ]); + let out = super::common_util::io::spawn_with_timeout(cmd, 10_000) .map_err(|e| format!("spawn pyvm: {}", e)) .unwrap(); - // Always propagate PyVM exit code to match llvmlite semantics - let code = status.code().unwrap_or(1); - if !status.success() { - if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { - eprintln!("❌ PyVM failed (status={})", code); - } + let code = if out.timed_out { 1 } else { out.exit_code.unwrap_or(1) }; + if out.timed_out && std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { + eprintln!("❌ PyVM timeout"); } process::exit(code); } else { diff --git a/src/runner/selfhost.rs b/src/runner/selfhost.rs index f18d2f6c..40f74e0c 100644 --- a/src/runner/selfhost.rs +++ b/src/runner/selfhost.rs @@ -248,14 +248,15 @@ impl NyashRunner { if py.exists() { let mut cmd = std::process::Command::new(&py3); cmd.arg(py).arg(&tmp_path); - let out = match cmd.output() { + let timeout_ms: u64 = std::env::var("NYASH_NY_COMPILER_TIMEOUT_MS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(2000); + let out = match super::modes::common_util::io::spawn_with_timeout(cmd, timeout_ms) { Ok(o) => o, - Err(e) => { - eprintln!("[ny-compiler] python harness failed to spawn: {}", e); - return false; - } + Err(e) => { eprintln!("[ny-compiler] python harness failed: {}", e); return false; } }; - if out.status.success() { + if !out.timed_out { if let Ok(line) = String::from_utf8(out.stdout) .map(|s| s.lines().next().unwrap_or("").to_string()) { @@ -366,179 +367,48 @@ impl NyashRunner { } }; if exe_path.exists() { - let mut cmd = std::process::Command::new(&exe_path); - // Prefer passing the original filename directly (parser EXE accepts positional path) - cmd.arg(filename); - // Gates - if std::env::var("NYASH_NY_COMPILER_MIN_JSON").ok().as_deref() == Some("1") { - cmd.arg("--min-json"); - } - if std::env::var("NYASH_SELFHOST_READ_TMP").ok().as_deref() == Some("1") { - cmd.arg("--read-tmp"); - } - if std::env::var("NYASH_NY_COMPILER_STAGE3").ok().as_deref() == Some("1") { - cmd.arg("--stage3"); - } - if let Ok(raw) = std::env::var("NYASH_NY_COMPILER_CHILD_ARGS") { - for tok in raw.split_whitespace() { - cmd.arg(tok); - } - } let timeout_ms: u64 = std::env::var("NYASH_NY_COMPILER_TIMEOUT_MS") .ok() .and_then(|s| s.parse().ok()) .unwrap_or(2000); - let mut cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - let mut child = match cmd.spawn() { - Ok(c) => c, - Err(e) => { - eprintln!("[ny-compiler] exe spawn failed: {}", e); - return false; - } - }; - let mut ch_stdout = child.stdout.take(); - let mut ch_stderr = child.stderr.take(); - let start = Instant::now(); - let mut timed_out = false; - loop { - match child.try_wait() { - Ok(Some(_status)) => { - break; - } - Ok(None) => { - if start.elapsed() >= Duration::from_millis(timeout_ms) { - let _ = child.kill(); - let _ = child.wait(); - timed_out = true; - break; - } - sleep(Duration::from_millis(10)); - } - Err(e) => { - eprintln!("[ny-compiler] exe wait error: {}", e); - return false; - } - } - } - let mut out_buf = Vec::new(); - let mut err_buf = Vec::new(); - if let Some(mut s) = ch_stdout { - let _ = s.read_to_end(&mut out_buf); - } - if let Some(mut s) = ch_stderr { - let _ = s.read_to_end(&mut err_buf); - } - if timed_out { - let head = String::from_utf8_lossy(&out_buf) - .chars() - .take(200) - .collect::(); - eprintln!( - "[ny-compiler] exe timeout after {} ms; stdout(head)='{}'", - timeout_ms, - head.replace('\n', "\\n") - ); - return false; - } - let stdout = match String::from_utf8(out_buf) { - Ok(s) => s, - Err(_) => String::new(), - }; - let mut json_line = String::new(); - for line in stdout.lines() { - let t = line.trim(); - if t.starts_with('{') && t.contains("\"version\"") && t.contains("\"kind\"") { - json_line = t.to_string(); - break; - } - } - if json_line.is_empty() { - if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { - let head: String = stdout.chars().take(200).collect(); - let errh: String = String::from_utf8_lossy(&err_buf) - .chars() - .take(200) - .collect(); - eprintln!("[ny-compiler] exe produced no JSON; stdout(head)='{}' stderr(head)='{}'", head.replace('\n', "\\n"), errh.replace('\n', "\\n")); - } - return false; - } - // Parse JSON v0 → MIR module - match super::json_v0_bridge::parse_json_v0_to_module(&json_line) { - Ok(module) => { - println!("🚀 Ny compiler EXE path (ny→json_v0) ON"); - super::json_v0_bridge::maybe_dump_mir(&module); - let emit_only = std::env::var("NYASH_NY_COMPILER_EMIT_ONLY") - .unwrap_or_else(|_| "1".to_string()) - == "1"; - if emit_only { - return false; - } else { - // Prefer PyVM when requested (reference semantics), regardless of BoxCall presence - let prefer_pyvm = - std::env::var("NYASH_VM_USE_PY").ok().as_deref() == Some("1"); - if prefer_pyvm { - if let Ok(py3) = which::which("python3") { - let runner = std::path::Path::new("tools/pyvm_runner.py"); - if runner.exists() { - let tmp_dir = std::path::Path::new("tmp"); - let _ = std::fs::create_dir_all(tmp_dir); - let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json"); - if let Err(e) = crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &mir_json_path) { - eprintln!("❌ PyVM MIR JSON emit error: {}", e); - process::exit(1); - } - if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() - == Some("1") - { - eprintln!( - "[Bridge] using PyVM (selfhost) → {}", - mir_json_path.display() - ); - } - let entry = if module.functions.contains_key("Main.main") { - "Main.main" - } else if module.functions.contains_key("main") { - "main" - } else { - "Main.main" - }; - let status = std::process::Command::new(py3) - .args([ - "tools/pyvm_runner.py", - "--in", - &mir_json_path.display().to_string(), - "--entry", - entry, - ]) - .status() - .map_err(|e| format!("spawn pyvm: {}", e)) - .unwrap(); - let code = status.code().unwrap_or(1); - if !status.success() { - if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() - == Some("1") - { - eprintln!( - "❌ PyVM (selfhost) failed (status={})", - code - ); - } - } - // Harmonize with interpreter path for smokes: print Result then exit code - println!("Result: {}", code); - std::process::exit(code); - } + if let Some(module) = super::modes::common_util::selfhost_exe::exe_try_parse_json_v0(filename, timeout_ms) { + super::json_v0_bridge::maybe_dump_mir(&module); + let emit_only = std::env::var("NYASH_NY_COMPILER_EMIT_ONLY") + .unwrap_or_else(|_| "1".to_string()) + == "1"; + if emit_only { return false; } + // Prefer PyVM when requested (reference semantics) + if std::env::var("NYASH_VM_USE_PY").ok().as_deref() == Some("1") { + if let Ok(py3) = which::which("python3") { + let runner = std::path::Path::new("tools/pyvm_runner.py"); + if runner.exists() { + let tmp_dir = std::path::Path::new("tmp"); + let _ = std::fs::create_dir_all(tmp_dir); + let mir_json_path = tmp_dir.join("nyash_pyvm_mir.json"); + if let Err(e) = crate::runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &mir_json_path) { + eprintln!("❌ PyVM MIR JSON emit error: {}", e); + process::exit(1); } + if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { + eprintln!("[Bridge] using PyVM (selfhost) → {}", mir_json_path.display()); + } + let entry = if module.functions.contains_key("Main.main") { "Main.main" } + else if module.functions.contains_key("main") { "main" } else { "Main.main" }; + let status = std::process::Command::new(py3) + .args(["tools/pyvm_runner.py", "--in", &mir_json_path.display().to_string(), "--entry", entry]) + .status() + .map_err(|e| format!("spawn pyvm: {}", e)) + .unwrap(); + let code = status.code().unwrap_or(1); + println!("Result: {}", code); + std::process::exit(code); } - self.execute_mir_module(&module); - return true; } } - Err(e) => { - eprintln!("[ny-compiler] json parse error: {}", e); - return false; - } + self.execute_mir_module(&module); + return true; + } else { + return false; } } } @@ -578,52 +448,15 @@ impl NyashRunner { .ok() .and_then(|s| s.parse().ok()) .unwrap_or(2000); - let mut cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - let mut child = match cmd.spawn() { - Ok(c) => c, - Err(e) => { - eprintln!("[ny-compiler] spawn inline vm failed: {}", e); - return false; - } + let out = match super::modes::common_util::io::spawn_with_timeout(cmd, timeout_ms) { + Ok(o) => o, + Err(e) => { eprintln!("[ny-compiler] spawn inline vm failed: {}", e); return false; } }; - let mut ch_stdout = child.stdout.take(); - let mut ch_stderr = child.stderr.take(); - let start = Instant::now(); - let mut timed_out = false; - loop { - match child.try_wait() { - Ok(Some(_)) => break, - Ok(None) => { - if start.elapsed() >= Duration::from_millis(timeout_ms) { - let _ = child.kill(); - let _ = child.wait(); - timed_out = true; - break; - } - sleep(Duration::from_millis(10)); - } - Err(e) => { - eprintln!("[ny-compiler] inline wait error: {}", e); - break; - } - } + if out.timed_out { + let head = String::from_utf8_lossy(&out.stdout).chars().take(200).collect::(); + eprintln!("[ny-compiler] inline timeout after {} ms; stdout(head)='{}'", timeout_ms, head.replace('\n', "\\n")); } - let mut out_buf = Vec::new(); - if let Some(mut s) = ch_stdout { - let _ = s.read_to_end(&mut out_buf); - } - if timed_out { - let head = String::from_utf8_lossy(&out_buf) - .chars() - .take(200) - .collect::(); - eprintln!( - "[ny-compiler] inline timeout after {} ms; stdout(head)='{}'", - timeout_ms, - head.replace('\n', "\\n") - ); - } - raw = String::from_utf8_lossy(&out_buf).to_string(); + raw = String::from_utf8_lossy(&out.stdout).to_string(); } let mut json_line = String::new(); for line in raw.lines() { diff --git a/src/runtime/gc.rs b/src/runtime/gc.rs index e2d8ec66..38a8c9c6 100644 --- a/src/runtime/gc.rs +++ b/src/runtime/gc.rs @@ -11,11 +11,13 @@ pub enum BarrierKind { /// GC hooks that execution engines may call at key points. /// Implementations must be Send + Sync for multi-thread preparation. -pub trait GcHooks: Send + Sync { +pub trait GcHooks: Send + Sync + std::any::Any { /// Safe point for cooperative GC (e.g., poll or yield). fn safepoint(&self) {} /// Memory barrier hint for loads/stores. fn barrier(&self, _kind: BarrierKind) {} + /// Allocation accounting (bytes are best-effort; may be 0 when unknown) + fn alloc(&self, _bytes: u64) {} /// Optional counters snapshot for diagnostics. Default: None. fn snapshot_counters(&self) -> Option<(u64, u64, u64)> { None @@ -27,48 +29,35 @@ pub struct NullGc; impl GcHooks for NullGc {} -use std::sync::atomic::{AtomicU64, Ordering}; - -/// Simple counting GC (PoC): counts safepoints and barriers. -/// Useful to validate hook frequency without affecting semantics. +/// CountingGc is now a thin wrapper around the unified GcController. pub struct CountingGc { - pub safepoints: AtomicU64, - pub barrier_reads: AtomicU64, - pub barrier_writes: AtomicU64, + inner: crate::runtime::gc_controller::GcController, } impl CountingGc { pub fn new() -> Self { + // Default to rc+cycle mode for development metrics + let mode = crate::runtime::gc_mode::GcMode::RcCycle; Self { - safepoints: AtomicU64::new(0), - barrier_reads: AtomicU64::new(0), - barrier_writes: AtomicU64::new(0), + inner: crate::runtime::gc_controller::GcController::new(mode), } } pub fn snapshot(&self) -> (u64, u64, u64) { - ( - self.safepoints.load(Ordering::Relaxed), - self.barrier_reads.load(Ordering::Relaxed), - self.barrier_writes.load(Ordering::Relaxed), - ) + self.inner.snapshot() } } impl GcHooks for CountingGc { fn safepoint(&self) { - self.safepoints.fetch_add(1, Ordering::Relaxed); + self.inner.safepoint(); } fn barrier(&self, kind: BarrierKind) { - match kind { - BarrierKind::Read => { - self.barrier_reads.fetch_add(1, Ordering::Relaxed); - } - BarrierKind::Write => { - self.barrier_writes.fetch_add(1, Ordering::Relaxed); - } - } + self.inner.barrier(kind); + } + fn alloc(&self, bytes: u64) { + self.inner.alloc(bytes); } fn snapshot_counters(&self) -> Option<(u64, u64, u64)> { - Some(self.snapshot()) + Some(self.inner.snapshot()) } } diff --git a/src/runtime/gc_controller.rs b/src/runtime/gc_controller.rs new file mode 100644 index 00000000..a38b0cec --- /dev/null +++ b/src/runtime/gc_controller.rs @@ -0,0 +1,207 @@ +//! Unified GC controller (skeleton) +//! Implements GcHooks and centralizes mode selection and metrics. + +use std::sync::atomic::{AtomicU64, Ordering}; + +use super::gc::{BarrierKind, GcHooks}; +use super::gc_mode::GcMode; +use crate::config::env; +use crate::runtime::gc_trace; +use std::collections::{HashSet, VecDeque}; + +pub struct GcController { + mode: GcMode, + safepoints: AtomicU64, + barrier_reads: AtomicU64, + barrier_writes: AtomicU64, + alloc_bytes: AtomicU64, + alloc_count: AtomicU64, + sp_since_last: AtomicU64, + bytes_since_last: AtomicU64, + collect_sp_interval: Option, + collect_alloc_bytes: Option, + // Diagnostics: last trial reachability counters + trial_nodes_last: AtomicU64, + trial_edges_last: AtomicU64, + // Diagnostics: collection counters and last duration/flags + collect_count_total: AtomicU64, + collect_by_sp: AtomicU64, + collect_by_alloc: AtomicU64, + trial_duration_last_ms: AtomicU64, + trial_reason_last: AtomicU64, // bitflags: 1=sp, 2=alloc +} + +impl GcController { + pub fn new(mode: GcMode) -> Self { + Self { + mode, + safepoints: AtomicU64::new(0), + barrier_reads: AtomicU64::new(0), + barrier_writes: AtomicU64::new(0), + alloc_bytes: AtomicU64::new(0), + alloc_count: AtomicU64::new(0), + sp_since_last: AtomicU64::new(0), + bytes_since_last: AtomicU64::new(0), + collect_sp_interval: env::gc_collect_sp_interval(), + collect_alloc_bytes: env::gc_collect_alloc_bytes(), + trial_nodes_last: AtomicU64::new(0), + trial_edges_last: AtomicU64::new(0), + collect_count_total: AtomicU64::new(0), + collect_by_sp: AtomicU64::new(0), + collect_by_alloc: AtomicU64::new(0), + trial_duration_last_ms: AtomicU64::new(0), + trial_reason_last: AtomicU64::new(0), + } + } + pub fn mode(&self) -> GcMode { + self.mode + } + pub fn snapshot(&self) -> (u64, u64, u64) { + ( + self.safepoints.load(Ordering::Relaxed), + self.barrier_reads.load(Ordering::Relaxed), + self.barrier_writes.load(Ordering::Relaxed), + ) + } +} + +impl GcHooks for GcController { + fn safepoint(&self) { + // Off mode: minimal overhead but still callable + if self.mode != GcMode::Off { + self.safepoints.fetch_add(1, Ordering::Relaxed); + let sp = self.sp_since_last.fetch_add(1, Ordering::Relaxed) + 1; + // Opportunistic collection trigger + let sp_hit = self + .collect_sp_interval + .map(|n| n > 0 && sp >= n) + .unwrap_or(false); + let bytes = self.bytes_since_last.load(Ordering::Relaxed); + let bytes_hit = self + .collect_alloc_bytes + .map(|n| n > 0 && bytes >= n) + .unwrap_or(false); + if sp_hit || bytes_hit { + // Record reason flags for diagnostics + let mut flags: u64 = 0; + if sp_hit { flags |= 1; self.collect_by_sp.fetch_add(1, Ordering::Relaxed); } + if bytes_hit { flags |= 2; self.collect_by_alloc.fetch_add(1, Ordering::Relaxed); } + self.trial_reason_last.store(flags, Ordering::Relaxed); + self.run_trial_collection(); + } + } + // Future: per-mode collection/cooperation hooks + } + fn barrier(&self, kind: BarrierKind) { + if self.mode == GcMode::Off { + return; + } + match kind { + BarrierKind::Read => { + self.barrier_reads.fetch_add(1, Ordering::Relaxed); + } + BarrierKind::Write => { + self.barrier_writes.fetch_add(1, Ordering::Relaxed); + } + } + } + fn snapshot_counters(&self) -> Option<(u64, u64, u64)> { + Some(self.snapshot()) + } + fn alloc(&self, bytes: u64) { + if self.mode == GcMode::Off { + return; + } + self.alloc_count.fetch_add(1, Ordering::Relaxed); + self.alloc_bytes.fetch_add(bytes, Ordering::Relaxed); + self.bytes_since_last + .fetch_add(bytes, Ordering::Relaxed); + } +} + +impl GcController { + pub fn alloc_totals(&self) -> (u64, u64) { + ( + self.alloc_count.load(Ordering::Relaxed), + self.alloc_bytes.load(Ordering::Relaxed), + ) + } +} + +impl GcController { + fn run_trial_collection(&self) { + // Reset windows + self.sp_since_last.store(0, Ordering::Relaxed); + self.bytes_since_last.store(0, Ordering::Relaxed); + // PoC: no object graph; report current handles as leak candidates and return. + if self.mode == GcMode::Off { + return; + } + // Only run for rc/rc+cycle/stw; rc+cycle is default. + match self.mode { + GcMode::Rc | GcMode::RcCycle | GcMode::STW => { + let started = std::time::Instant::now(); + // Roots: JIT/AOT handle registry snapshot + let roots = crate::jit::rt::handles::snapshot_arcs(); + let mut visited: HashSet = HashSet::new(); + let mut q: VecDeque> = + VecDeque::new(); + for r in roots.into_iter() { + let id = r.box_id(); + if visited.insert(id) { + q.push_back(r); + } + } + let mut nodes: u64 = visited.len() as u64; + let mut edges: u64 = 0; + while let Some(cur) = q.pop_front() { + gc_trace::trace_children(&*cur, &mut |child| { + edges += 1; + let id = child.box_id(); + if visited.insert(id) { + nodes += 1; + q.push_back(child); + } + }); + } + // Store last diagnostics (available for JSON metrics) + self.trial_nodes_last.store(nodes, Ordering::Relaxed); + self.trial_edges_last.store(edges, Ordering::Relaxed); + if (nodes + edges) > 0 && crate::config::env::gc_metrics() { + eprintln!( + "[GC] trial: reachable nodes={} edges={} (roots=jit_handles)", + nodes, edges + ); + } + // Update counters + let dur = started.elapsed(); + let ms = dur.as_millis() as u64; + self.trial_duration_last_ms.store(ms, Ordering::Relaxed); + self.collect_count_total.fetch_add(1, Ordering::Relaxed); + // Reason flags derive from current env thresholds vs last windows reaching triggers + // Note: we set flags in safepoint() where triggers were decided. + } + _ => {} + } + } +} + +impl GcController { + pub fn trial_reachability_last(&self) -> (u64, u64) { + ( + self.trial_nodes_last.load(Ordering::Relaxed), + self.trial_edges_last.load(Ordering::Relaxed), + ) + } + pub fn collection_totals(&self) -> (u64, u64, u64) { + ( + self.collect_count_total.load(Ordering::Relaxed), + self.collect_by_sp.load(Ordering::Relaxed), + self.collect_by_alloc.load(Ordering::Relaxed), + ) + } + pub fn trial_duration_last_ms(&self) -> u64 { + self.trial_duration_last_ms.load(Ordering::Relaxed) + } + pub fn trial_reason_last_bits(&self) -> u64 { self.trial_reason_last.load(Ordering::Relaxed) } +} diff --git a/src/runtime/gc_mode.rs b/src/runtime/gc_mode.rs new file mode 100644 index 00000000..4d0a714d --- /dev/null +++ b/src/runtime/gc_mode.rs @@ -0,0 +1,34 @@ +//! GC mode selection (user-facing) +use crate::config::env; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum GcMode { + RcCycle, + Minorgen, + STW, + Rc, + Off, +} + +impl GcMode { + pub fn from_env() -> Self { + match env::gc_mode().to_ascii_lowercase().as_str() { + "auto" | "rc+cycle" => GcMode::RcCycle, + "minorgen" => GcMode::Minorgen, + "stw" => GcMode::STW, + "rc" => GcMode::Rc, + "off" => GcMode::Off, + _ => GcMode::RcCycle, + } + } + pub fn as_str(&self) -> &'static str { + match self { + GcMode::RcCycle => "rc+cycle", + GcMode::Minorgen => "minorgen", + GcMode::STW => "stw", + GcMode::Rc => "rc", + GcMode::Off => "off", + } + } +} + diff --git a/src/runtime/gc_trace.rs b/src/runtime/gc_trace.rs new file mode 100644 index 00000000..2dbab311 --- /dev/null +++ b/src/runtime/gc_trace.rs @@ -0,0 +1,35 @@ +//! Minimal GC tracing helpers (skeleton) +//! +//! Downcast-based child edge enumeration for builtin containers. +//! This is a non-invasive helper to support diagnostics and future collectors. + +use std::sync::Arc; + +use crate::box_trait::NyashBox; + +/// Visit child boxes of a given object and invoke `visit(child)` for each. +/// This function recognizes builtin containers (ArrayBox/MapBox) and is a no-op otherwise. +pub fn trace_children(obj: &dyn NyashBox, visit: &mut dyn FnMut(Arc)) { + // ArrayBox + if let Some(arr) = obj.as_any().downcast_ref::() { + if let Ok(items) = arr.items.read() { + for it in items.iter() { + // Convert Box to Arc + let arc: Arc = Arc::from(it.clone_box()); + visit(arc); + } + } + return; + } + // MapBox + if let Some(map) = obj.as_any().downcast_ref::() { + if let Ok(data) = map.get_data().read() { + for (_k, v) in data.iter() { + let arc: Arc = Arc::from(v.clone_box()); + visit(arc); + } + } + return; + } +} + diff --git a/src/runtime/global_hooks.rs b/src/runtime/global_hooks.rs index 5ef620c8..104c31b7 100644 --- a/src/runtime/global_hooks.rs +++ b/src/runtime/global_hooks.rs @@ -4,97 +4,74 @@ use once_cell::sync::OnceCell; use std::sync::{Arc, RwLock}; use super::scheduler::CancellationToken; -use super::{gc::GcHooks, scheduler::Scheduler}; +use super::{gc::BarrierKind, gc::GcHooks, scheduler::Scheduler}; -static GLOBAL_GC: OnceCell>>> = OnceCell::new(); -static GLOBAL_SCHED: OnceCell>>> = OnceCell::new(); -// Phase 2 scaffold: current task group's cancellation token (no-op default) -static GLOBAL_CUR_TOKEN: OnceCell>> = OnceCell::new(); -// Phase 2 scaffold: current group's child futures registry (best-effort) -static GLOBAL_GROUP_FUTURES: OnceCell>> = - OnceCell::new(); -// Strong ownership list for implicit group (pre-TaskGroup actualization) -static GLOBAL_GROUP_STRONG: OnceCell>> = - OnceCell::new(); -// Simple scope depth counter for implicit group (join-at-scope-exit footing) -static TASK_SCOPE_DEPTH: OnceCell> = OnceCell::new(); -// TaskGroup scope stack (explicit group ownership per function scope) -static TASK_GROUP_STACK: OnceCell< - RwLock>>, -> = OnceCell::new(); +// Unified global runtime hooks state (single lock for consistency) +struct GlobalHooksState { + gc: Option>, + sched: Option>, + cur_token: Option, + futures: Vec, + strong: Vec, + scope_depth: usize, + group_stack: Vec>, +} -fn gc_cell() -> &'static RwLock>> { - GLOBAL_GC.get_or_init(|| RwLock::new(None)) +impl GlobalHooksState { + fn new() -> Self { + Self { + gc: None, + sched: None, + cur_token: None, + futures: Vec::new(), + strong: Vec::new(), + scope_depth: 0, + group_stack: Vec::new(), + } + } } -fn sched_cell() -> &'static RwLock>> { - GLOBAL_SCHED.get_or_init(|| RwLock::new(None)) -} -fn token_cell() -> &'static RwLock> { - GLOBAL_CUR_TOKEN.get_or_init(|| RwLock::new(None)) -} -fn futures_cell() -> &'static RwLock> { - GLOBAL_GROUP_FUTURES.get_or_init(|| RwLock::new(Vec::new())) -} -fn strong_cell() -> &'static RwLock> { - GLOBAL_GROUP_STRONG.get_or_init(|| RwLock::new(Vec::new())) -} -fn scope_depth_cell() -> &'static RwLock { - TASK_SCOPE_DEPTH.get_or_init(|| RwLock::new(0)) -} -fn group_stack_cell( -) -> &'static RwLock>> { - TASK_GROUP_STACK.get_or_init(|| RwLock::new(Vec::new())) + +static GLOBAL_STATE: OnceCell> = OnceCell::new(); + +fn state() -> &'static RwLock { + GLOBAL_STATE.get_or_init(|| RwLock::new(GlobalHooksState::new())) } pub fn set_from_runtime(rt: &crate::runtime::nyash_runtime::NyashRuntime) { - if let Ok(mut g) = gc_cell().write() { - *g = Some(rt.gc.clone()); - } - if let Ok(mut s) = sched_cell().write() { - *s = rt.scheduler.as_ref().cloned(); - } - // Optional: initialize a fresh token for the runtime's root group (Phase 2 wiring) - if let Ok(mut t) = token_cell().write() { - if t.is_none() { - *t = Some(CancellationToken::new()); + if let Ok(mut st) = state().write() { + st.gc = Some(rt.gc.clone()); + st.sched = rt.scheduler.as_ref().cloned(); + if st.cur_token.is_none() { + st.cur_token = Some(CancellationToken::new()); } - } - // Reset group futures registry on new runtime - if let Ok(mut f) = futures_cell().write() { - f.clear(); - } - if let Ok(mut s) = strong_cell().write() { - s.clear(); - } - if let Ok(mut d) = scope_depth_cell().write() { - *d = 0; - } - if let Ok(mut st) = group_stack_cell().write() { - st.clear(); + st.futures.clear(); + st.strong.clear(); + st.scope_depth = 0; + st.group_stack.clear(); } } pub fn set_gc(gc: Arc) { - if let Ok(mut g) = gc_cell().write() { - *g = Some(gc); + if let Ok(mut st) = state().write() { + st.gc = Some(gc); } } pub fn set_scheduler(s: Arc) { - if let Ok(mut w) = sched_cell().write() { - *w = Some(s); + if let Ok(mut st) = state().write() { + st.sched = Some(s); } } /// Set the current task group's cancellation token (scaffold). pub fn set_current_group_token(tok: CancellationToken) { - if let Ok(mut w) = token_cell().write() { - *w = Some(tok); + if let Ok(mut st) = state().write() { + st.cur_token = Some(tok); } } /// Get the current task group's cancellation token (no-op default). pub fn current_group_token() -> CancellationToken { - if let Ok(r) = token_cell().read() { - if let Some(t) = r.as_ref() { + if let Ok(st) = state().read() { + if let Some(t) = st.cur_token.as_ref() { return t.clone(); } } @@ -103,21 +80,17 @@ pub fn current_group_token() -> CancellationToken { /// Register a Future into the current group's registry (best-effort; clones share state) pub fn register_future_to_current_group(fut: &crate::boxes::future::FutureBox) { - // Prefer explicit current TaskGroup at top of stack - if let Ok(st) = group_stack_cell().read() { - if let Some(inner) = st.last() { + if let Ok(mut st) = state().write() { + // Prefer explicit current TaskGroup at top of stack + if let Some(inner) = st.group_stack.last() { if let Ok(mut v) = inner.strong.lock() { v.push(fut.clone()); return; } } - } - // Fallback to implicit global group - if let Ok(mut list) = futures_cell().write() { - list.push(fut.downgrade()); - } - if let Ok(mut s) = strong_cell().write() { - s.push(fut.clone()); + // Fallback to implicit global group + st.futures.push(fut.downgrade()); + st.strong.push(fut.clone()); } } @@ -127,26 +100,15 @@ pub fn join_all_registered_futures(timeout_ms: u64) { let deadline = Instant::now() + Duration::from_millis(timeout_ms); loop { let mut all_ready = true; - // purge list of dropped or completed futures opportunistically - { - // purge weak list: keep only upgradeable futures - if let Ok(mut list) = futures_cell().write() { - list.retain(|fw| fw.is_ready().is_some()); - } - // purge strong list: remove completed futures to reduce retention - if let Ok(mut s) = strong_cell().write() { - s.retain(|f| !f.ready()); - } - } - // check readiness - { - if let Ok(list) = futures_cell().read() { - for fw in list.iter() { - if let Some(ready) = fw.is_ready() { - if !ready { - all_ready = false; - break; - } + // purge + readiness check under single state lock (short critical sections) + if let Ok(mut st) = state().write() { + st.futures.retain(|fw| fw.is_ready().is_some()); + st.strong.retain(|f| !f.ready()); + for fw in st.futures.iter() { + if let Some(ready) = fw.is_ready() { + if !ready { + all_ready = false; + break; } } } @@ -161,22 +123,18 @@ pub fn join_all_registered_futures(timeout_ms: u64) { std::thread::yield_now(); } // Final sweep - if let Ok(mut s) = strong_cell().write() { - s.retain(|f| !f.ready()); - } - if let Ok(mut list) = futures_cell().write() { - list.retain(|fw| matches!(fw.is_ready(), Some(false))); + if let Ok(mut st) = state().write() { + st.strong.retain(|f| !f.ready()); + st.futures.retain(|fw| matches!(fw.is_ready(), Some(false))); } } /// Push a task scope (footing). On pop of the outermost scope, perform a best-effort join. pub fn push_task_scope() { - if let Ok(mut d) = scope_depth_cell().write() { - *d += 1; - } - // Push a new explicit TaskGroup for this scope - if let Ok(mut st) = group_stack_cell().write() { - st.push(std::sync::Arc::new( + if let Ok(mut st) = state().write() { + st.scope_depth += 1; + // Push a new explicit TaskGroup for this scope + st.group_stack.push(std::sync::Arc::new( crate::boxes::task_group_box::TaskGroupInner { strong: std::sync::Mutex::new(Vec::new()), }, @@ -190,19 +148,13 @@ pub fn push_task_scope() { pub fn pop_task_scope() { let mut do_join = false; let mut popped: Option> = None; - { - if let Ok(mut d) = scope_depth_cell().write() { - if *d > 0 { - *d -= 1; - } - if *d == 0 { - do_join = true; - } + if let Ok(mut st) = state().write() { + if st.scope_depth > 0 { + st.scope_depth -= 1; } - } - // Pop explicit group for this scope - if let Ok(mut st) = group_stack_cell().write() { - popped = st.pop(); + if st.scope_depth == 0 { do_join = true; } + // Pop explicit group for this scope + popped = st.group_stack.pop(); } if do_join { let ms: u64 = std::env::var("NYASH_TASK_SCOPE_JOIN_MS") @@ -240,13 +192,11 @@ pub fn pop_task_scope() { /// Perform a runtime safepoint and poll the scheduler if available. pub fn safepoint_and_poll() { - if let Ok(g) = gc_cell().read() { - if let Some(gc) = g.as_ref() { + if let Ok(st) = state().read() { + if let Some(gc) = st.gc.as_ref() { gc.safepoint(); } - } - if let Ok(s) = sched_cell().read() { - if let Some(sched) = s.as_ref() { + if let Some(sched) = st.sched.as_ref() { sched.poll(); } } @@ -255,8 +205,8 @@ pub fn safepoint_and_poll() { /// Try to schedule a task on the global scheduler. Returns true if scheduled. pub fn spawn_task(name: &str, f: Box) -> bool { // If a scheduler is registered, enqueue the task; otherwise run inline. - if let Ok(s) = sched_cell().read() { - if let Some(sched) = s.as_ref() { + if let Ok(st) = state().read() { + if let Some(sched) = st.sched.as_ref() { sched.spawn(name, f); return true; } @@ -272,8 +222,8 @@ pub fn spawn_task_with_token( token: crate::runtime::scheduler::CancellationToken, f: Box, ) -> bool { - if let Ok(s) = sched_cell().read() { - if let Some(sched) = s.as_ref() { + if let Ok(st) = state().read() { + if let Some(sched) = st.sched.as_ref() { sched.spawn_with_token(name, token, f); return true; } @@ -284,8 +234,8 @@ pub fn spawn_task_with_token( /// Spawn a delayed task via scheduler if available; returns true if scheduled. pub fn spawn_task_after(delay_ms: u64, name: &str, f: Box) -> bool { - if let Ok(s) = sched_cell().read() { - if let Some(sched) = s.as_ref() { + if let Ok(st) = state().read() { + if let Some(sched) = st.sched.as_ref() { sched.spawn_after(delay_ms, name, f); return true; } @@ -297,3 +247,21 @@ pub fn spawn_task_after(delay_ms: u64, name: &str, f: Box bool { std::env::var("NYASH_DEBUG_PLUGIN").unwrap_or_default() == "1" } +type BoxInvokeFn = extern "C" fn(u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32; + #[derive(Debug, Clone, Default)] struct LoadedBoxSpec { type_id: Option, methods: HashMap, fini_method_id: Option, + // Optional Nyash ABI v2 per-box invoke entry (not yet used for calls) + invoke_id: Option, } #[derive(Debug, Clone, Copy)] struct MethodSpec { @@ -124,7 +128,7 @@ impl PluginLoaderV2 { let lib = unsafe { Library::new(&lib_path) }.map_err(|_| BidError::PluginError)?; let lib_arc = Arc::new(lib); - // Resolve required invoke symbol (TypeBox v2: nyash_plugin_invoke) + // Resolve required invoke symbol (legacy library-level): nyash_plugin_invoke unsafe { let invoke_sym: Symbol< unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, @@ -152,6 +156,35 @@ impl PluginLoaderV2 { .insert(lib_name.to_string(), Arc::new(loaded)); } + // Try to resolve Nyash ABI v2 per-box TypeBox symbols and record invoke_id + // Symbol pattern: nyash_typebox_ + for box_type in &lib_def.boxes { + let sym_name = format!("nyash_typebox_{}\0", box_type); + unsafe { + if let Ok(tb_sym) = lib_arc.get::>(sym_name.as_bytes()) { + let st: &NyashTypeBoxFfi = &*tb_sym; + // Validate ABI tag 'TYBX' (0x54594258) and basic invariants + let abi_ok = st.abi_tag == 0x5459_4258 + && st.struct_size as usize >= std::mem::size_of::(); + if !abi_ok { + continue; + } + // Remember invoke_id in box_specs for (lib_name, box_type) + if let Some(invoke_id) = st.invoke_id { + let key = (lib_name.to_string(), box_type.to_string()); + let mut map = self.box_specs.write().map_err(|_| BidError::PluginError)?; + let entry = map.entry(key).or_insert(LoadedBoxSpec { + type_id: None, + methods: HashMap::new(), + fini_method_id: None, + invoke_id: None, + }); + entry.invoke_id = Some(invoke_id); + } + } + } + } + Ok(()) } diff --git a/tools/bootstrap_selfhost_smoke.sh b/tools/bootstrap_selfhost_smoke.sh index 78de4da0..2a89e61a 100644 --- a/tools/bootstrap_selfhost_smoke.sh +++ b/tools/bootstrap_selfhost_smoke.sh @@ -13,13 +13,16 @@ fi echo "[bootstrap] c0 (rust) → c1 (ny) → c1' parity (JIT-only)" >&2 # c0: baseline run (rust path) -NYASH_DISABLE_PLUGINS=1 NYASH_CLI_VERBOSE=1 "$BIN" --backend vm "$ROOT_DIR/apps/examples/string_p0.nyash" > /tmp/nyash-c0.out +timeout -s KILL 20s env NYASH_DISABLE_PLUGINS=1 NYASH_CLI_VERBOSE=1 \ + "$BIN" --backend vm "$ROOT_DIR/apps/examples/string_p0.nyash" > /tmp/nyash-c0.out # c1: try Ny compiler path (flagged); tolerate fallback to rust path -NYASH_DISABLE_PLUGINS=1 NYASH_USE_NY_COMPILER=1 NYASH_CLI_VERBOSE=1 "$BIN" --backend vm "$ROOT_DIR/apps/examples/string_p0.nyash" > /tmp/nyash-c1.out || true +timeout -s KILL 20s env NYASH_DISABLE_PLUGINS=1 NYASH_USE_NY_COMPILER=1 NYASH_CLI_VERBOSE=1 \ + "$BIN" --backend vm "$ROOT_DIR/apps/examples/string_p0.nyash" > /tmp/nyash-c1.out || true # c1': re-run (simulated second pass) -NYASH_DISABLE_PLUGINS=1 NYASH_USE_NY_COMPILER=1 NYASH_CLI_VERBOSE=1 "$BIN" --backend vm "$ROOT_DIR/apps/examples/string_p0.nyash" > /tmp/nyash-c1p.out || true +timeout -s KILL 20s env NYASH_DISABLE_PLUGINS=1 NYASH_USE_NY_COMPILER=1 NYASH_CLI_VERBOSE=1 \ + "$BIN" --backend vm "$ROOT_DIR/apps/examples/string_p0.nyash" > /tmp/nyash-c1p.out || true H0=$(rg -n '^Result:\s*' /tmp/nyash-c0.out | sed 's/\s\+/ /g') H1=$(rg -n '^Result:\s*' /tmp/nyash-c1.out | sed 's/\s\+/ /g' || true) diff --git a/tools/exe_first_smoke.sh b/tools/exe_first_smoke.sh index 6946baf7..4966ca01 100644 --- a/tools/exe_first_smoke.sh +++ b/tools/exe_first_smoke.sh @@ -15,7 +15,10 @@ mkdir -p dist/nyash_compiler/tmp echo 'return 1+2*3' > dist/nyash_compiler/tmp/sample_exe_smoke.nyash echo "[3/4] Running parser EXE → JSON ..." -(cd dist/nyash_compiler && ./nyash_compiler tmp/sample_exe_smoke.nyash > sample.json) +(cd dist/nyash_compiler && timeout -s KILL 60s ./nyash_compiler tmp/sample_exe_smoke.nyash > sample.json) + +echo "[3.5/4] Validating JSON schema ..." +python3 tools/validate_mir_json.py dist/nyash_compiler/sample.json if ! head -n1 dist/nyash_compiler/sample.json | grep -q '"kind":"Program"'; then echo "error: JSON does not look like a Program" >&2 @@ -26,7 +29,7 @@ echo "[4/4] Executing via bridge (pipe) to verify semantics ..." # Keep core minimal and deterministic export NYASH_DISABLE_PLUGINS=1 set +e -cat dist/nyash_compiler/sample.json | ./target/release/nyash --ny-parser-pipe --backend vm >/dev/null +timeout -s KILL 60s bash -c 'cat dist/nyash_compiler/sample.json | ./target/release/nyash --ny-parser-pipe --backend vm >/dev/null' RC=$? set -e if [[ "$RC" -ne 7 ]]; then @@ -36,4 +39,3 @@ fi echo "✅ EXE-first smoke passed (parser EXE + bridge run)" exit 0 - diff --git a/tools/validate_mir_json.py b/tools/validate_mir_json.py new file mode 100644 index 00000000..f9773d54 --- /dev/null +++ b/tools/validate_mir_json.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Validate a MIR JSON file against the Nyash JSON v0 schema. + +Usage: + python3 tools/validate_mir_json.py [--schema docs/reference/mir/json_v0.schema.json] + +Requires the 'jsonschema' Python package. Install via: + python3 -m pip install jsonschema +""" + +import argparse +import json +import sys +from pathlib import Path + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument('json_file', help='MIR JSON file path') + ap.add_argument('--schema', default='docs/reference/mir/json_v0.schema.json', help='Schema JSON path') + args = ap.parse_args() + + try: + import jsonschema # type: ignore + except Exception: + print('[schema] error: Python package "jsonschema" not found.\n' + 'Install with: python3 -m pip install jsonschema', file=sys.stderr) + return 2 + + try: + with open(args.json_file, 'r', encoding='utf-8') as f: + data = json.load(f) + except Exception as e: + print(f'[schema] error: failed to read JSON: {e}', file=sys.stderr) + return 3 + + try: + with open(args.schema, 'r', encoding='utf-8') as f: + schema = json.load(f) + except Exception as e: + print(f'[schema] error: failed to read schema: {e}', file=sys.stderr) + return 4 + + try: + jsonschema.validate(instance=data, schema=schema) + except jsonschema.ValidationError as e: # type: ignore + # Show human-friendly context + path = '/'.join([str(p) for p in e.path]) + print(f'[schema] validation failed at $.{path}: {e.message}', file=sys.stderr) + return 5 + + print('[schema] validation OK') + return 0 + +if __name__ == '__main__': + sys.exit(main()) +