From cc8b27a1aa3a2fc81969a6058a329341ff88fb6e Mon Sep 17 00:00:00 2001 From: tomoaki Date: Wed, 24 Dec 2025 03:17:30 +0900 Subject: [PATCH] feat(weak): Phase 285A1 - Weak Field Contract (Strict Type Enforcement) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove automatic WeakNew conversion and enforce strict compile-time type checking for weak field assignments. Only 3 assignment types allowed: 1. Result of weak(x) call (WeakRef type) 2. Existing WeakRef variable (e.g., me.parent = other.parent) 3. Void/null (clear operation) **Implementation**: - Added MirType::WeakRef to type system (src/mir/types.rs) - Track WeakRef type in emit_weak_new() even in pure mode - Weak field reads return WeakRef without auto-upgrade - Removed automatic WeakNew conversion from field writes - Implemented check_weak_field_assignment() with actionable errors - Fixed null literal type tracking (Phase 285A1.1: Unknown → Void) **Testing**: - 5 test fixtures (3 OK, 2 NG cases) - all passing - Smoke test: phase285_weak_field_vm.sh - Error messages guide users to use weak() or null **Documentation**: - Updated lifecycle.md SSOT with weak field contract 🤖 Generated with Claude Code Co-Authored-By: Claude Sonnet 4.5 --- apps/tests/phase285_weak_field_ng_boxref.hako | 12 + .../phase285_weak_field_ng_primitive.hako | 11 + .../phase285_weak_field_ok_explicit.hako | 13 + .../phase285_weak_field_ok_transfer.hako | 14 + apps/tests/phase285_weak_field_ok_void.hako | 12 + .../phases/phase-285/CLAUDE_CODE_RUNBOOK.md | 143 ++++++++ .../main/phases/phase-285/P0-INSTRUCTIONS.md | 22 +- .../current/main/phases/phase-285/README.md | 46 ++- docs/reference/language/lifecycle.md | 335 ++++++++++++++++++ .../mir_interpreter/handlers/type_ops.rs | 1 + src/mir/builder/emission/constant.rs | 6 +- src/mir/builder/fields.rs | 65 +++- src/mir/builder/utils.rs | 15 +- src/mir/printer_helpers.rs | 1 + src/mir/region/mod.rs | 1 + src/mir/types.rs | 1 + .../lifecycle/phase285_leak_report_vm.sh | 49 +++ .../lifecycle/phase285_weak_basic_llvm.sh | 10 + .../quick/lifecycle/phase285_weak_basic_vm.sh | 39 ++ .../quick/lifecycle/phase285_weak_field_vm.sh | 35 ++ 20 files changed, 802 insertions(+), 29 deletions(-) create mode 100644 apps/tests/phase285_weak_field_ng_boxref.hako create mode 100644 apps/tests/phase285_weak_field_ng_primitive.hako create mode 100644 apps/tests/phase285_weak_field_ok_explicit.hako create mode 100644 apps/tests/phase285_weak_field_ok_transfer.hako create mode 100644 apps/tests/phase285_weak_field_ok_void.hako create mode 100644 docs/development/current/main/phases/phase-285/CLAUDE_CODE_RUNBOOK.md create mode 100644 docs/reference/language/lifecycle.md create mode 100644 tools/smokes/v2/profiles/quick/lifecycle/phase285_leak_report_vm.sh create mode 100644 tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_llvm.sh create mode 100644 tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_vm.sh create mode 100644 tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_field_vm.sh diff --git a/apps/tests/phase285_weak_field_ng_boxref.hako b/apps/tests/phase285_weak_field_ng_boxref.hako new file mode 100644 index 00000000..86c889a0 --- /dev/null +++ b/apps/tests/phase285_weak_field_ng_boxref.hako @@ -0,0 +1,12 @@ +box Node { + init { weak parent } +} + +static box Main { + main() { + local n1 = new Node() + local n2 = new Node() + n1.parent = n2 // ❌ Direct BoxRef + return 0 + } +} diff --git a/apps/tests/phase285_weak_field_ng_primitive.hako b/apps/tests/phase285_weak_field_ng_primitive.hako new file mode 100644 index 00000000..4a479b6e --- /dev/null +++ b/apps/tests/phase285_weak_field_ng_primitive.hako @@ -0,0 +1,11 @@ +box Node { + init { weak parent } +} + +static box Main { + main() { + local n = new Node() + n.parent = 42 // ❌ Integer + return 0 + } +} diff --git a/apps/tests/phase285_weak_field_ok_explicit.hako b/apps/tests/phase285_weak_field_ok_explicit.hako new file mode 100644 index 00000000..50e4b19f --- /dev/null +++ b/apps/tests/phase285_weak_field_ok_explicit.hako @@ -0,0 +1,13 @@ +box Node { + init { weak parent } +} + +static box Main { + main() { + local n1 = new Node() + local n2 = new Node() + n1.parent = weak(n2) // ✅ Explicit weak() + print("OK: explicit weak()") + return 0 + } +} diff --git a/apps/tests/phase285_weak_field_ok_transfer.hako b/apps/tests/phase285_weak_field_ok_transfer.hako new file mode 100644 index 00000000..b6c0f1ac --- /dev/null +++ b/apps/tests/phase285_weak_field_ok_transfer.hako @@ -0,0 +1,14 @@ +box Node { + init { weak parent, weak sibling } +} + +static box Main { + main() { + local n1 = new Node() + local n2 = new Node() + n1.parent = weak(n2) + n1.sibling = n1.parent // ✅ WeakRef → WeakRef + print("OK: weak field transfer") + return 0 + } +} diff --git a/apps/tests/phase285_weak_field_ok_void.hako b/apps/tests/phase285_weak_field_ok_void.hako new file mode 100644 index 00000000..eebbba34 --- /dev/null +++ b/apps/tests/phase285_weak_field_ok_void.hako @@ -0,0 +1,12 @@ +box Node { + init { weak parent } +} + +static box Main { + main() { + local n = new Node() + n.parent = null // ✅ Clear (null keyword, equivalent to Void) + print("OK: Void clear") + return 0 + } +} diff --git a/docs/development/current/main/phases/phase-285/CLAUDE_CODE_RUNBOOK.md b/docs/development/current/main/phases/phase-285/CLAUDE_CODE_RUNBOOK.md new file mode 100644 index 00000000..b1b23576 --- /dev/null +++ b/docs/development/current/main/phases/phase-285/CLAUDE_CODE_RUNBOOK.md @@ -0,0 +1,143 @@ +# Claude Code Runbook (Phase 285): VM↔LLVM lifecycle conformance + +This file is an instruction sheet for an implementation agent (Claude Code) to make the language SSOT pass end-to-end across backends. + +Language SSOT: +- Lifecycle/weak/fini/GC policy: `docs/reference/language/lifecycle.md` +- Truthiness + `null`/`void`: `docs/reference/language/types.md` + +Non-goal: changing language semantics. Any backend drift must be fixed as an implementation bug or explicitly tracked as “unsupported”. + +## What to implement (in order) + +### 0) Preflight (must-pass before any weak smokes) + +Confirm the following are implemented; if any are missing, do **not** run weak fixtures yet: +- `weak(x)` can be parsed and lowered into MIR (WeakRef/WeakNew). +- VM has a handler for MIR `WeakRef/WeakNew/WeakLoad` (no panic/unimplemented). +- `WeakRef.weak_to_strong()` exists at the language surface. + +If any are missing, choose one: +- **Option A**: build the missing weak infrastructure first. +- **Option B**: temporarily scope to exit-time leak report only (skip weak smokes, document as “unsupported”). + +### 1) WeakRef semantics (VM + LLVM) + +Required behavior: +- `weak(x)` creates a non-owning WeakRef. +- `w.weak_to_strong()` returns a strong BoxRef when the target is usable; otherwise returns `null` (runtime `Void`). +- WeakRef does not auto-upgrade on field access (field read returns WeakRef). +- WeakRef equality uses a stable token (do not make `dropped==dropped` true for unrelated targets). + +Conformance checks: +- VM: weak works in real execution (not just `toString()`). +- LLVM (harness): must match VM behavior for the same program output/exit code. +- WASM: if unsupported, keep it explicitly documented as unsupported; do not pretend it is correct by copying strong refs. + +### 2) Exit-time “roots still held” report (diagnostic, default-off) + +Goal: when a program ends while strong references are still held in global roots, print a report so developers can see leaks/cycles. + +Requirements: +- Must be default-off. +- Must not change program meaning (only prints when enabled). +- Should report “what roots still hold strong references”, not attempt to “fix” them. + +Suggested interface (choose one and document it): +- Env: `NYASH_LEAK_LOG={1|2}` + - `1`: summary counts + - `2`: verbose (print up to N names/entries, with truncation) + +Root candidates to include (best-effort): +- `env.modules` registry +- plugin singletons / plugin registry +- host handles / external handles registry + +Output stability: +- Use stable tags like `[leak]` or `[lifecycle/leak]` so smokes can match logs. +- Truncate long lists deterministically (e.g., first N sorted entries). + +### 3) Cross-backend smokes (VM + LLVM) + +Add smokes under `tools/smokes/v2/` (preferred) to lock behavior. +Keep tests fast and deterministic. + +Recommended fixtures (as `.hako` or inline sources in the smoke): + +**A. Weak weak_to_strong success/fail** +```nyash +box SomeBox { x } +static box Main { + main() { + local x = new SomeBox() + local w = weak(x) + x = null + local y = w.weak_to_strong() + if y == null { print("ok: dropped") } + return 0 + } +} +``` +Expected (VM and LLVM): prints `ok: dropped`, exit 0. + +**B. Strong cycle + leak report** +```nyash +box Node { other } +static box Main { + main() { + local a = new Node() + local b = new Node() + a.other = b + b.other = a + print("ok: cycle-created") + return 0 + } +} +``` +Expected: +- Program output stays `ok: cycle-created`. +- With leak report enabled, a report appears at exit (VM at minimum; LLVM if feasible). + +**C. Weak breaks cycle (no strong-cycle leak)** +```nyash +box Node { other_weak } +static box Main { + main() { + local a = new Node() + local b = new Node() + a.other_weak = weak(b) + b.other_weak = weak(a) + print("ok: weak-cycle") + return 0 + } +} +``` +Expected: +- Program output stays `ok: weak-cycle`. +- Leak report should not claim an obvious strong-cycle root for these nodes (best-effort; depends on what is rooted globally). + +### 4) Update docs after implementation + +When the above is implemented: +- Add the chosen env var to `docs/reference/environment-variables.md` (avoid env var sprawl; keep it in the diagnostics table). +- If any backend remains unsupported, update `docs/reference/language/lifecycle.md` “Implementation status” with an explicit note and link. + +## Commands (suggested) + +Build: +- `cargo build --release --features llvm` + +Run VM: +- `./target/release/hakorune --backend vm local_tests/phase285_weak_basic.hako` + +Run LLVM: +- `NYASH_LLVM_USE_HARNESS=1 ./target/release/hakorune --backend llvm local_tests/phase285_weak_basic.hako` + +Leak report (example if env var chosen): +- `NYASH_LEAK_LOG=1 ./target/release/hakorune --backend vm local_tests/phase285_cycle.hako` + +## Done criteria (acceptance) + +- VM and LLVM outputs match for weak fixtures (success/fail). +- Strong-cycle fixture produces a visible exit-time report when the diagnostic is enabled (and produces no report when disabled). +- Weak-cycle fixture does not falsely report a strong-cycle “leak” for the nodes (within the documented root scope). diff --git a/docs/development/current/main/phases/phase-285/P0-INSTRUCTIONS.md b/docs/development/current/main/phases/phase-285/P0-INSTRUCTIONS.md index 7679480c..84905f0c 100644 --- a/docs/development/current/main/phases/phase-285/P0-INSTRUCTIONS.md +++ b/docs/development/current/main/phases/phase-285/P0-INSTRUCTIONS.md @@ -5,23 +5,27 @@ ## 1. このP0でやること(コード変更なし) 1) 仕様SSOTを 1 ファイルにまとめる - - `docs/development/current/main/phases/phase-285/README.md` を入口SSOTとして育てる。 + - 言語レベルの SSOT は `docs/reference/language/lifecycle.md`(lifecyle/weak/fini/GC)と `docs/reference/language/types.md`(truthiness と `null`/`void`)に集約する。 + - Phase 285 は「実装の棚卸し・差分追跡・受け入れ条件」を書く(言語SSOTを書き換えない)。 -2) 用語と境界を固定する - - strong/weak/roots/finalizer/collection の定義 - - weakref の API(upgrade/生存判定) - - finalizer の禁止事項(再入・例外・順序) +2) 用語と境界を固定する + - strong/weak/roots/finalizer/collection の定義 + - weakref の API(weak_to_strong/生存判定) + - finalizer の禁止事項(再入・例外・順序) -3) LLVM harness の扱いを明文化する - - 未対応なら “未対応” を仕様として書く(差分を隠さない)。 +3) LLVM harness の扱いを明文化する + - 未対応なら “未対応” を差分として書く(差分を隠さない)。 + - 差分は「仕様差」ではなく「未実装/バグ/保留」として分類する(言語SSOTは揺らさない)。 ## 2. README に必ず書く事項(チェックリスト) - [ ] “roots” は何か(stack/local/global/handle/plugin 等) -- [ ] strong/weak の意味(upgrade の成否条件) +- [ ] strong/weak の意味(weak_to_strong の成否条件) +- [ ] strong/weak の意味(weak_to_strong の成否条件) - [ ] finalizer はあるか/いつ発火するか/何が禁止か - [ ] GC/解放のトリガ(自動/手動/閾値/テスト用) - [ ] VM と LLVM harness の差分(未対応の場合の方針) + - 分類: (A) 仕様通り / (B) 未実装 / (C) 既知バグ / (D) 仕様外(禁止) ## 3. 次(P1/P2)への導線(箇条書きでOK) @@ -34,7 +38,7 @@ - `src/value.rs` - `NyashValue::WeakBox` の生成箇所(weak をどう作るか) - - `upgrade()` 失敗時の観測方法(文字列化/判定API) + - `weak_to_strong()` 失敗時の観測方法(文字列化/判定API) - unit test: `test_weak_reference_drop` の仕様(何を固定しているか) - `src/finalization.rs` - finalizer の存在(あれば: 登録、呼び出しタイミング、順序) diff --git a/docs/development/current/main/phases/phase-285/README.md b/docs/development/current/main/phases/phase-285/README.md index b62222f5..07ac9539 100644 --- a/docs/development/current/main/phases/phase-285/README.md +++ b/docs/development/current/main/phases/phase-285/README.md @@ -1,4 +1,4 @@ -# Phase 285: Box lifecycle / weakref / finalization / GC SSOT +# Phase 285: Box lifecycle / weakref / finalization / GC conformance Status: Planned (design-first) @@ -6,6 +6,12 @@ Status: Planned (design-first) Box の生存期間(強参照/弱参照/解放/最終化/GC)を SSOT として固定し、移行期間でも意味論が割れない状態にする。 +Language-level SSOT: +- Lifecycle/weak/fini/GC policy: `docs/reference/language/lifecycle.md` +- Truthiness + `null`/`void`: `docs/reference/language/types.md` + +This Phase document is not the language SSOT; it tracks implementation status, backend gaps, and acceptance criteria. + ## Why now - JoinIR/Plan/compose の収束が進むほど、実行時の “値の寿命” の揺れが目立つ。 @@ -22,26 +28,41 @@ Box の生存期間(強参照/弱参照/解放/最終化/GC)を SSOT とし ## Snapshot(今わかっていること) - weakref は `Weak>` で保持される(`NyashValue::WeakBox`) -- `WeakBox` の `to_string()` は `upgrade()` を試み、`WeakRef(null)` 表示になりうる(観測可能) +- `WeakBox` の `to_string()` は `weak_to_strong()` を試み、`WeakRef(null)` 表示になりうる(観測可能) - `src/value.rs` に weakref の drop 挙動を固定する unit test がある(`test_weak_reference_drop`) ## Responsibility Map(どこが仕様を決めるか) -- **SSOT(意味)**: Rust VM 実装(`src/value.rs`, `src/finalization.rs` 周辺) -- **SSOT(観測)**: fixture/smoke(Phase 285 P2 で作る) -- **LLVM harness**: まずは “差分を仕様として明文化” が優先(未対応なら SKIP を SSOT 化する) +- **SSOT(意味)**: `docs/reference/language/*`(言語レベルのSSOT) +- **Conformance**: Rust VM / LLVM harness / WASM / JIT など各バックエンド実装 +- **観測の固定**: fixture/smoke(Phase 285 P2 で作る) ## 用語(P0で固定する) - **Strong reference**: 所有参照(`Arc` 等で Box を保持) -- **Weak reference**: 非所有参照(`Weak` / `upgrade()` が失敗しうる) -- **Upgrade**: weak → strong の昇格(成功/失敗が意味論) +- **Weak reference**: 非所有参照(`Weak` / `weak_to_strong()` が失敗しうる) +- **Weak-to-strong**: weak → strong の昇格(成功/失敗が意味論) - **Roots**: 解放/GC から保護される参照集合(stack/local/global/handle/plugin) - **Finalizer**: 解放に伴う最終化処理(もし存在するなら) +## P0 decisions (docs-only) + +- Weak の観測は `weak_to_strong()` で行い、失敗値は `null`(= runtime `Void` の別名)。 +- `cleanup`(Stage‑3 block-postfix)が「出口で必ず走る」決定的 cleanup を保証する(`catch` の有無に関係なく、常に実行)。 +- GC は意味論ではなく補助(GC off で cycle はリークしうる)。 +- ByRef (`RefGet/RefSet`) は non-owning / non-escaping(寿命・弱参照・GC の道具にしない)。 + +## RUNBOOK caveat (implementation reality) + +The runbook assumes WeakRef infrastructure exists in the VM and lowering. +If any of the following are missing, treat weak smokes as **unsupported** and scope to exit-time leak report first: +- `weak(x)` parse/lower +- VM handler for MIR WeakRef/WeakNew/WeakLoad +- language-surface `weak_to_strong()` on WeakRef + ## Questions to Answer (P0/P1) -- weakref の “生存判定” は何で観測できるか(`toString` / `is_alive` / `upgrade` API など) +- weakref の “生存判定” は何で観測できるか(`toString` / `is_alive` / `weak_to_strong` API など) - finalizer は存在するか / いつ発火するか(drop 時?GC 時?明示 API?) - finalizer 内での禁止事項(再入、例外、I/O、allocation)をどうするか - LLVM harness の扱い(現状未対応なら “未対応として SSOT 化”) @@ -51,7 +72,7 @@ Box の生存期間(強参照/弱参照/解放/最終化/GC)を SSOT とし ### P0(docs-only) - 用語の固定(strong/weak/roots/finalizer/collection) -- 仕様の固定(weakref の upgrade 成否、finalizer の発火条件、禁止事項) +- 仕様の固定(weakref の weak_to_strong 成否、finalizer の発火条件、禁止事項) - “LLVM harness の扱い” を明文化(未対応なら未対応として SSOT に書く) ### P1(investigation) @@ -69,3 +90,10 @@ Box の生存期間(強参照/弱参照/解放/最終化/GC)を SSOT とし - GC アルゴリズム刷新(RC→tracing 等の設計変更) - LLVM harness に同等機能を “一気に” 実装(差分の記録→段階導入を優先) + +## Acceptance criteria (P2+) + +- VM と LLVM で、weak が仕様通り動作する(`weak_to_strong()` 成功/失敗が一致、失敗は `null`)。 +- 強参照サイクルを意図的に作ったとき、(GC off なら)回収されないことが観測できる。 +- 終了時に「強参照が残っている root」をデバッグ出力できる(default-off の診断フラグ)。 + - これは意味論ではなく診断であり、ON/OFF でプログラムの意味を変えない。 diff --git a/docs/reference/language/lifecycle.md b/docs/reference/language/lifecycle.md new file mode 100644 index 00000000..055a784f --- /dev/null +++ b/docs/reference/language/lifecycle.md @@ -0,0 +1,335 @@ +# Box Lifecycle and Finalization (SSOT) + +Status: SSOT (language-level), with implementation status notes. + +This document defines the Nyash object lifecycle model: lexical scope, ownership (strong/weak), finalization (`fini()`), and what is (and is not) guaranteed across backends. + +## Terms + +- **Binding**: a local variable slot (created by `local`) that points to a value. +- **Box value**: an object reference (user-defined / builtin / plugin). +- **Strong reference**: an owning reference that contributes to keeping the object alive. +- **Weak reference**: a non-owning reference; it does not keep the object alive and may become dead. +- **Finalization (`fini`)**: a logical end-of-life hook. It is not “physical deallocation”. + +## 0) Two-layer model (resource vs memory) + +Nyash separates two concerns: + +- **Resource lifecycle (deterministic)**: `fini()` defines *logical* end-of-life and must be safe and explicit. +- **Heap memory reclamation (non-deterministic)**: physical memory is reclaimed by the runtime implementation (typically reference counting). Timing is not part of the language semantics. + +This split lets Nyash keep “箱理論” simple: +- Programs must use `fini()` (or sugar that guarantees it) to deterministically release external resources (fd/socket/native handles). +- Programs must not rely on GC timing for correctness. + +## 1) Scope model (locals) + +- `local` is block-scoped: the binding exists from its declaration to the end of the lexical block (`{ ... }`). +- Leaving a block drops its bindings immediately (including inner `{}` blocks). +- Dropping a binding reduces strong ownership held by that binding. It may or may not physically deallocate the object (depends on other strong references). + +This is the “variable lifetime” rule. Object lifetime is defined below. + +## 2) Object lifetime (strong / weak) + +### Strong ownership + +- A strong reference keeps the object alive. +- When the last strong reference to an object disappears, the object becomes eligible for physical destruction by the runtime. + - In typical implementations this is immediate (reference-counted drop) for acyclic graphs, but the language does not require immediacy. + +### Weak references + +Weak references exist to avoid cycles and to represent back-pointers safely. + +Language-level guidance: +- Locals and return values are typically strong. +- Back-pointers / caches / parent links that would create cycles should be weak. + +Required property: +- A weak reference never keeps the object alive. + +Observable operations (surface-level; exact API depends on the box type): +- “Is alive?” check. +- Weak-to-strong conversion (may fail): `weak_to_strong()`. + +## 3) Finalization (`fini`) — what it means + +`fini()` is a **logical** termination hook: +- After `fini()` has executed successfully for an object, the object must be treated as unusable (use-after-fini is an error). +- `fini()` must be **idempotent** (calling it multiple times is allowed and must not double-free resources). + - This supports “external force fini” and best-effort cleanup paths safely. + +### Fail-fast after `fini` + +After an object is finalized, operations must fail fast (use-after-fini). +Permitted exceptions (optional, per type) are strictly observational operations such as identity / debug string. + +### Object states (Alive / Dead / Freed) + +Nyash distinguishes: + +- **Alive**: normal state; methods/fields are usable. +- **Dead**: finalized by `fini()`; object identity may still exist but is not usable. +- **Freed**: physically destroyed by the runtime (implementation detail). + +State transitions (conceptual): + +- `Alive --fini()--> Dead --(runtime)--> Freed` +- `Alive --(runtime)--> Freed` + +SSOT rule: +- `fini()` is the only operation that creates the **Dead** state. +- Runtime reclamation does not imply `fini()` was executed. + +### Dead: allowed vs forbidden operations + +Allowed on **Dead** (minimal set): +- Debug/observation: `toString`, `typeName`, `id` (if provided) +- Identity checks: `==` (identity only), and identity-based hashing if the type supports hashing + +Forbidden on **Dead** (Fail-Fast, UseAfterFini): +- Field read/write +- Method calls +- ByRef (`RefGet/RefSet`) operations +- Conversions / truthiness (`if dead_box { ... }` is an error) +- Creating new weak references from a dead object (`weak(dead)` is an error) + +### Finalization precedence + +When finalization is triggered (by explicit call or by an owning context; see below): +1) If the object is already finalized, do nothing (idempotent). +2) Run user-defined `fini()` if present. +3) Run automatic cascade finalization for remaining **strong-owned fields** (weak fields are skipped). +4) Clear fields / invalidate internal state. + +### Weak references are non-owning + +Weak references are values (`WeakRef`) that can be stored in locals or fields: +- They are **not** part of ownership. +- Automatic cascade finalization must not follow weak references. +- Calling `fini()` “through” a weak reference is invalid (non-owning references cannot decide the target’s lifetime). + +## 4) Ownership and “escaping” out of a scope + +Nyash distinguishes “dropping a binding” from “finalizing an object”. + +Finalization is tied to **ownership**, not merely being in scope. + +### Owning contexts + +An object is considered owned by one of these contexts: +- A local binding (typical case). +- A strong-owned field of another object. +- A module/global registry entry (e.g., `env.modules`). +- A runtime host handle / singleton registry (typical for plugins). + +### Escapes (ownership moves) + +If a value is moved into a longer-lived owning context before the current scope ends, then the current scope must not finalize it. + +Common escape paths: +- Assigning into an enclosing-scope binding (updates the owner). +- Returning via `outbox` (ownership moves to the caller). +- Storing into a strong-owned field of an object that outlives the scope. +- Publishing into global/module registries. + +This rule is what keeps “scope finalization” from breaking shared references. + +## 4.1) What is guaranteed to run automatically + +Language guarantee (deterministic): +- Only **explicit cleanup constructs** guarantee cleanup execution for all exits (return/break/continue/error). + +Recommended SSOT surface: +- `cleanup` blocks (Stage‑3): attach cleanup code structurally. +- Future sugar may exist (`defer`, RAII-style `using`), but it must lower to `cleanup` semantics. + +Non-guarantees: +- “Leaving a block” does not by itself guarantee `fini()` execution for an object, because aliasing/escaping is allowed. +- GC must not call `fini()` as part of meaning. + +### `cleanup` (block-postfix) — the deterministic “defer” + +The primary guaranteed cleanup construct is block-postfix `cleanup` (Stage‑3): + +```nyash +{ + local f = open(path) + do_work(f) +} cleanup { + f.fini() +} +``` + +SSOT semantics: +- The `cleanup` block runs exactly once on every exit path from the attached block (normal fallthrough, `return`, `break`, `continue`, and errors). +- The `cleanup` block executes *before* the block’s locals are dropped, and can reference locals from that block. +- `cleanup` must not change the meaning of the program aside from running its code; it is not implicit GC/finalization. +Note: +- `cleanup` may appear with or without `catch`. It always runs after `catch` (if present). + +## 4.2) Weak references (surface model) + +Weak references exist to avoid strong cycles and to model back-pointers. + +SSOT operations: +- `weak(x)` produces a `WeakRef` to `x` (x must be Alive). +- `weakRef.weak_to_strong()` returns the target box if it is usable, otherwise `null` (none). + - It returns `null` if the target is **Dead** (finalized) or **Freed** (collected). + - Note: `null` and `void` are equivalent at runtime (SSOT: `docs/reference/language/types.md`). + +WeakRef in fields: +- Reading a field that stores a `WeakRef` yields a `WeakRef`. It does not auto-upgrade. + +Recommended usage pattern: +```nyash +local x = w.weak_to_strong() +if x != null { + ... +} +``` + +WeakRef equality: +- `WeakRef` carries a stable target token (conceptually: `WeakToken`). +- `w1 == w2` compares tokens. This is independent of Alive/Dead/Freed. + - "dead==dead" is true only when both weakrefs point to the same original target token. + +### Weak Field Assignment Contract (Phase 285A1) + +Weak fields enforce strict type requirements at compile time: + +**Allowed assignments** (3 cases): +1. **Explicit weak reference**: `me.parent = weak(p)` +2. **WeakRef variable**: `me.parent = other.parent` (where `other.parent` is weak field) +3. **Void**: `me.parent = Void` (clear operation; null is sugar for Void) + +**Forbidden assignments** (Fail-Fast compile error): +- Direct BoxRef: `me.parent = p` where `p` is BoxRef +- Primitives: `me.parent = 42` +- Any non-WeakRef type without explicit `weak()` + +**Error message example**: +``` +Cannot assign Box (NodeBox) to weak field 'Tree.parent'. +Use weak(...) to create weak reference: me.parent = weak(value) +``` + +**Rationale**: Explicit `weak()` calls make the semantic difference between strong and weak references visible. This prevents: +- Accidental strong references in weak fields (reference cycles) +- Confusion about object lifetime and ownership +- Silent bugs from automatic conversions + +**Example**: +```nyash +box Node { + weak parent + + set_parent(p) { + // ❌ me.parent = p // Compile error + // ✅ me.parent = weak(p) // Explicit weak() + // ✅ me.parent = Void // Clear operation (SSOT: Void primary) + } + + copy_parent(other: Node) { + // ✅ me.parent = other.parent // WeakRef → WeakRef + } +} +``` + +## 5) Cycles and GC (language-level policy) + +### Cycles + +Nyash allows object graphs; strong cycles can exist unless the program avoids them. + +Policy: +- Programs should use **weak** references for back-pointers / parent links to avoid strong cycles. +- If a strong cycle exists, memory reclamation is not guaranteed (it may leak). This is allowed behavior in “no cycle collector” mode. + +Important: weak references themselves do not require tracing GC. +- They require a runtime liveness mechanism (e.g., an `Rc/Weak`-style control block) so that “weak_to_strong” can succeed/fail safely. + +### GC modes + +GC is treated as an optimization/diagnostics facility, not as a semantic requirement. In practice, this means “cycle collection / tracing”, not “basic refcount drop”. + +- **GC off**: reference-counted reclamation still applies for non-cyclic ownership graphs; strong cycles may leak. +- **GC on**: the runtime may additionally reclaim unreachable cycles eventually; timing is not guaranteed. + +Invariant: +- Whether GC is on or off must not change *program meaning*, except for observability related to resource/memory timing (which must not be relied upon for correctness). + +## 6) ByRef (`RefGet/RefSet`) — borrowed slot references (non-owning) + +Nyash has an internal “ByRef” concept (MIR `RefGet/RefSet`) used to access and mutate fields through a **borrowed reference to a storage slot**. + +Intended use cases: +- Field get/set lowering with visibility checks (public/private) and delegation (from/override). +- Passing a “mutable reference” to runtime helpers or plugin calls without copying large values. + +SSOT constraints: +- ByRef is **non-owning**: it does not keep the target alive and does not affect strong/weak counts. +- ByRef is **non-escaping**: it must not be stored in fields/arrays/maps, returned, captured by closures, or placed into global registries. +- ByRef is **scope-bound**: it is only valid within the dynamic extent where it was produced (typically a single statement or call lowering). +- Using ByRef on **Dead/Freed** targets is an error (UseAfterFini / dangling ByRef). + +These constraints keep “箱理論” simple: ownership is strong/weak; ByRef is a temporary access mechanism only. + +## 7) Diagnostics (non-normative) + +Runtimes may provide diagnostics to help validate lifecycle rules (example: reporting remaining strong roots or non-finalized objects at process exit). These diagnostics are not part of language semantics and must be default-off. + +## 8) Implementation status (non-normative) + +This section documents current backend reality so we can detect drift as bugs. + +### Feature Matrix (Phase 285A0 update) + +| Feature | VM | LLVM | WASM | +|---------|-----|------|------| +| WeakRef (`weak(x)`, `weak_to_strong()`) | ✅ | ❌ unsupported (285A1) | ❌ unsupported | +| Leak Report (`NYASH_LEAK_LOG`) | ✅ | ⚠️ partial (not yet) | ❌ | + +### Notes + +- **Block-scoped locals** are the language model (`local` drops at `}`), but the *observable* effects depend on where the last strong reference is held. +- **WeakRef** (Phase 285A0): VM backend fully supports `weak(x)` and `weak_to_strong()`. LLVM harness support is planned for Phase 285A1. +- **WASM backend** currently treats MIR `WeakNew/WeakLoad` as plain copies (weak behaves like strong). This does not satisfy the SSOT weak semantics yet (see also: `docs/guides/wasm-guide/planning/unsupported_features.md`). +- **Leak Report** (Phase 285): `NYASH_LEAK_LOG={1|2}` prints exit-time diagnostics showing global roots still held (modules, host_handles, plugin_boxes). See `docs/reference/environment-variables.md`. +- Conformance gaps (any backend differences from this document) must be treated as bugs and tracked explicitly; do not "paper over" differences by changing this SSOT without a decision. + +See also: +- `docs/reference/language/variables-and-scope.md` (binding scoping and assignment resolution) +- `docs/reference/boxes-system/memory-finalization.md` (design notes; must not contradict this SSOT) + +## 9) Validation recipes (non-normative) + +WeakRef behavior (weak_to_strong must fail safely): +```nyash +box SomeBox { } +static box Main { + main() { + local x = new SomeBox() + local w = weak(x) + x = null + local y = w.weak_to_strong() + if y == null { print("ok: dropped") } + } +} +``` + +Cycle avoidance (use weak for back-pointers): +```nyash +box Node { next_weak } +static box Main { + main() { + local a = new Node() + local b = new Node() + a.next_weak = weak(b) + b.next_weak = weak(a) + return 0 + } +} +``` diff --git a/src/backend/mir_interpreter/handlers/type_ops.rs b/src/backend/mir_interpreter/handlers/type_ops.rs index f9900cbb..0d297bcc 100644 --- a/src/backend/mir_interpreter/handlers/type_ops.rs +++ b/src/backend/mir_interpreter/handlers/type_ops.rs @@ -38,6 +38,7 @@ fn matches_mir_type(value: &VMValue, ty: &MirType) -> bool { } _ => false, }, + MirType::WeakRef => matches!(value, VMValue::WeakBox(_)), // Phase 285A1 MirType::Future(_) => matches!(value, VMValue::Future(_)), MirType::Array(_) => { // Current VM representation is BoxRef(ArrayBox) (not a distinct VMValue variant). diff --git a/src/mir/builder/emission/constant.rs b/src/mir/builder/emission/constant.rs index eb5fe8ea..526e63ce 100644 --- a/src/mir/builder/emission/constant.rs +++ b/src/mir/builder/emission/constant.rs @@ -73,11 +73,11 @@ pub fn emit_null(b: &mut MirBuilder) -> ValueId { dst, value: ConstValue::Null, }); - // Phase 84-1: Null constant type annotation - // Note: MirType has no Null variant, using Unknown as fallback + // Phase 285A1.1: Null constant type annotation + // Null is syntactic sugar for Void (SSOT: lifecycle.md) b.type_ctx .value_types - .insert(dst, crate::mir::MirType::Unknown); + .insert(dst, crate::mir::MirType::Void); dst } diff --git a/src/mir/builder/fields.rs b/src/mir/builder/fields.rs index f45bae7c..3240037e 100644 --- a/src/mir/builder/fields.rs +++ b/src/mir/builder/fields.rs @@ -119,9 +119,17 @@ impl super::MirBuilder { if let Some(class_name) = inferred_class { if let Some(weak_set) = self.comp_ctx.weak_fields_by_box.get(&class_name) { if weak_set.contains(&field) { - let loaded = self.emit_weak_load(field_val)?; - let _ = self.emit_barrier_read(loaded); - return Ok(loaded); + // Phase 285A1: Read weak field returns WeakRef (no auto-upgrade) + // field_val is the result of getField, which we treat as WeakRef + let dst = field_val; // The load result is already our return value + + // Phase 285A1: Mark the result as WeakRef type + self.type_ctx + .value_types + .insert(dst, crate::mir::types::MirType::WeakRef); + + let _ = self.emit_barrier_read(dst); + return Ok(dst); // Return WeakRef directly (no WeakLoad) } } } @@ -145,7 +153,7 @@ impl super::MirBuilder { // LocalSSA: argument in-block (optional safety) value_result = self.local_arg(value_result); - // If base is known and field is weak, create WeakRef before store + // Phase 285A1: If field is weak, enforce type contract (3 allowed cases) if let Some(class_name) = self .type_ctx .value_origin_newbox @@ -154,7 +162,8 @@ impl super::MirBuilder { { if let Some(weak_set) = self.comp_ctx.weak_fields_by_box.get(&class_name) { if weak_set.contains(&field) { - value_result = self.emit_weak_new(value_result)?; + // Phase 285A1: Strict type check (no automatic conversion) + self.check_weak_field_assignment(&class_name, &field, value_result)?; } } } @@ -219,4 +228,50 @@ impl super::MirBuilder { Ok(value_result) } + + /// Phase 285A1: Enforce weak field assignment contract + /// + /// Allowed assignments: + /// 1. WeakRef (from weak() or weak field read) + /// 2. Void (clear operation) + /// + /// Forbidden (Fail-Fast): + /// - BoxRef without weak() + /// - Primitives + /// - Unknown/untracked values + fn check_weak_field_assignment( + &mut self, + box_name: &str, + field_name: &str, + value_id: ValueId, + ) -> Result<(), String> { + // Get value type + let value_type = self.type_ctx.value_types.get(&value_id); + + match value_type { + // Case 1 & 2: WeakRef allowed + Some(crate::mir::types::MirType::WeakRef) => Ok(()), + + // Case 3: Void allowed (clear) + Some(crate::mir::types::MirType::Void) => Ok(()), + + // Forbidden: None/Unknown (型追跡漏れ防止) + None => Err(format!( + "Cannot assign untracked value to weak field '{}.{}'. Use weak(...) or Void explicitly.", + box_name, field_name + )), + + // Forbidden: BoxRef + Some(crate::mir::types::MirType::Box(box_type)) => Err(format!( + "Cannot assign Box ({}) to weak field '{}.{}'. Use weak(...) to create weak reference: me.{} = weak(value)", + box_type, box_name, field_name, field_name + )), + + // Forbidden: Primitives and others + Some(other_type) => Err(format!( + "Cannot assign {:?} to weak field '{}.{}'. Weak fields require WeakRef type. Use weak(...) or Void.", + other_type, box_name, field_name + )), + } + } } diff --git a/src/mir/builder/utils.rs b/src/mir/builder/utils.rs index 24858bbd..445cf2d1 100644 --- a/src/mir/builder/utils.rs +++ b/src/mir/builder/utils.rs @@ -391,10 +391,19 @@ impl super::MirBuilder { &mut self, box_val: super::ValueId, ) -> Result { - if crate::config::env::mir_core13_pure() { - return Ok(box_val); - } let dst = self.next_value_id(); + + // Phase 285A1: Track WeakRef type (even in pure mode) + self.type_ctx + .value_types + .insert(dst, crate::mir::types::MirType::WeakRef); + + // Phase 285A1: WeakRef type must be tracked even in pure mode + if crate::config::env::mir_core13_pure() { + // Pure mode: still track type, but skip instruction + return Ok(dst); + } + self.emit_instruction(super::MirInstruction::WeakRef { dst, op: WeakRefOp::New, diff --git a/src/mir/printer_helpers.rs b/src/mir/printer_helpers.rs index b53baf39..9e3f1b61 100644 --- a/src/mir/printer_helpers.rs +++ b/src/mir/printer_helpers.rs @@ -12,6 +12,7 @@ pub fn format_type(mir_type: &MirType) -> String { MirType::Future(inner_type) => { format!("future<{}>", format_type(inner_type)) } + MirType::WeakRef => "weakref".to_string(), // Phase 285A1 MirType::Void => "void".to_string(), MirType::Unknown => "?".to_string(), } diff --git a/src/mir/region/mod.rs b/src/mir/region/mod.rs index 2c21e410..d0a89d5e 100644 --- a/src/mir/region/mod.rs +++ b/src/mir/region/mod.rs @@ -62,6 +62,7 @@ impl Region { pub fn classify_ref_kind(ty: &MirType) -> RefSlotKind { match ty { MirType::Box(_) | MirType::Array(_) | MirType::Future(_) => RefSlotKind::StrongRoot, + MirType::WeakRef => RefSlotKind::WeakRoot, // Phase 285A1 MirType::Integer | MirType::Float | MirType::Bool | MirType::String => { RefSlotKind::NonRef } diff --git a/src/mir/types.rs b/src/mir/types.rs index 42be1e05..82db5a40 100644 --- a/src/mir/types.rs +++ b/src/mir/types.rs @@ -84,6 +84,7 @@ pub enum MirType { Box(String), // Box type with name Array(Box), Future(Box), // Future containing a type + WeakRef, // Phase 285A1: Weak reference type Void, Unknown, } diff --git a/tools/smokes/v2/profiles/quick/lifecycle/phase285_leak_report_vm.sh b/tools/smokes/v2/profiles/quick/lifecycle/phase285_leak_report_vm.sh new file mode 100644 index 00000000..ba5db335 --- /dev/null +++ b/tools/smokes/v2/profiles/quick/lifecycle/phase285_leak_report_vm.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# phase285_leak_report_vm.sh - Phase 285: Exit-time leak report smoke test +# +# Verifies NYASH_LEAK_LOG={1,2} produces [lifecycle/leak] output at exit. + +source "$(dirname "$0")/../../../lib/test_runner.sh" +export SMOKES_USE_PYVM=0 +require_env || exit 2 + +FIXTURE="$NYASH_ROOT/apps/tests/phase285_leak_report.hako" + +# Test 1: Without NYASH_LEAK_LOG - no leak output +output_no_log=$(NYASH_DISABLE_PLUGINS=1 "$NYASH_BIN" "$FIXTURE" 2>&1) +if echo "$output_no_log" | grep -q "\[lifecycle/leak\]"; then + log_error "phase285_leak_no_log: [lifecycle/leak] should NOT appear without NYASH_LEAK_LOG" + exit 1 +fi +if ! echo "$output_no_log" | grep -q "ok: cycle-created"; then + log_error "phase285_leak_no_log: Expected 'ok: cycle-created' output" + exit 1 +fi +log_success "phase285_leak_no_log: No leak output when NYASH_LEAK_LOG is unset" + +# Test 2: With NYASH_LEAK_LOG=1 - summary leak output +output_log1=$(NYASH_LEAK_LOG=1 NYASH_DISABLE_PLUGINS=1 "$NYASH_BIN" "$FIXTURE" 2>&1) +if ! echo "$output_log1" | grep -q "\[lifecycle/leak\] Roots still held at exit:"; then + log_error "phase285_leak_log1: Expected '[lifecycle/leak] Roots still held at exit:' with NYASH_LEAK_LOG=1" + exit 1 +fi +if ! echo "$output_log1" | grep -q "\[lifecycle/leak\].*modules:"; then + log_error "phase285_leak_log1: Expected '[lifecycle/leak] modules: N' with NYASH_LEAK_LOG=1" + exit 1 +fi +if ! echo "$output_log1" | grep -q "ok: cycle-created"; then + log_error "phase285_leak_log1: Expected 'ok: cycle-created' output" + exit 1 +fi +log_success "phase285_leak_log1: Summary leak output with NYASH_LEAK_LOG=1" + +# Test 3: With NYASH_LEAK_LOG=2 - verbose leak output (module names) +output_log2=$(NYASH_LEAK_LOG=2 NYASH_DISABLE_PLUGINS=1 "$NYASH_BIN" "$FIXTURE" 2>&1) +if ! echo "$output_log2" | grep -q "\[lifecycle/leak\].*module names:"; then + log_error "phase285_leak_log2: Expected '[lifecycle/leak] module names:' with NYASH_LEAK_LOG=2" + exit 1 +fi +log_success "phase285_leak_log2: Verbose leak output with NYASH_LEAK_LOG=2" + +log_success "phase285_leak_report_vm: All tests passed" +exit 0 diff --git a/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_llvm.sh b/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_llvm.sh new file mode 100644 index 00000000..a7e0ce85 --- /dev/null +++ b/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_llvm.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# phase285_weak_basic_llvm.sh - Phase 285A0.1: WeakRef basic smoke test (LLVM) +# +# SKIP: WeakRef (weak/weak_to_strong) not yet supported in LLVM harness (Phase 285A1) + +source "$(dirname "$0")/../../../lib/test_runner.sh" +require_env || exit 2 + +test_skip "phase285_weak_basic_llvm" "WeakRef (weak/weak_to_strong) not yet supported in LLVM harness (Phase 285A1)" +exit 0 diff --git a/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_vm.sh b/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_vm.sh new file mode 100644 index 00000000..0859254b --- /dev/null +++ b/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_basic_vm.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# phase285_weak_basic_vm.sh - Phase 285A0.1: WeakRef basic smoke test (VM) +# +# Verifies weak(x) and weak_to_strong() work correctly in VM backend. +# Note: Full drop semantics test deferred (needs GC/scope analysis) +# SSOT: docs/reference/language/lifecycle.md:179 + +source "$(dirname "$0")/../../../lib/test_runner.sh" +export SMOKES_USE_PYVM=0 +require_env || exit 2 + +FIXTURE="$NYASH_ROOT/apps/tests/phase285_weak_basic.hako" + +output=$(NYASH_DISABLE_PLUGINS=1 "$NYASH_BIN" "$FIXTURE" 2>&1) +exit_code=$? + +# Check for success marker +if ! echo "$output" | grep -q "ok: weak and weak_to_strong work correctly"; then + log_error "phase285_weak_basic_vm: Expected 'ok: weak and weak_to_strong work correctly'" + echo "$output" + exit 1 +fi + +# Check for failure markers +if echo "$output" | grep -q "ng:"; then + log_error "phase285_weak_basic_vm: Found 'ng:' in output (test failure)" + echo "$output" + exit 1 +fi + +# Check exit code +if [ "$exit_code" -ne 0 ]; then + log_error "phase285_weak_basic_vm: Non-zero exit code: $exit_code" + echo "$output" + exit 1 +fi + +log_success "phase285_weak_basic_vm: WeakRef basic test passed" +exit 0 diff --git a/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_field_vm.sh b/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_field_vm.sh new file mode 100644 index 00000000..9d3cc439 --- /dev/null +++ b/tools/smokes/v2/profiles/quick/lifecycle/phase285_weak_field_vm.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# phase285_weak_field_vm.sh - Phase 285A1: Weak Field Contract smoke test (VM) + +source "$(dirname "$0")/../../../lib/test_runner.sh" +require_env || exit 2 + +# Test 1: OK cases (should compile and run) +for fixture in explicit transfer void; do + FIXTURE="$NYASH_ROOT/apps/tests/phase285_weak_field_ok_${fixture}.hako" + + if ! output=$(NYASH_DISABLE_PLUGINS=1 "$NYASH_BIN" "$FIXTURE" 2>&1); then + log_error "phase285_weak_field_vm: OK case '${fixture}' failed to compile" + echo "$output" + exit 1 + fi +done + +# Test 2: NG cases (should fail to compile) +for fixture in boxref primitive; do + FIXTURE="$NYASH_ROOT/apps/tests/phase285_weak_field_ng_${fixture}.hako" + + if output=$(NYASH_DISABLE_PLUGINS=1 "$NYASH_BIN" "$FIXTURE" 2>&1); then + log_error "phase285_weak_field_vm: NG case '${fixture}' should have failed" + exit 1 + fi + + if ! echo "$output" | grep -q "weak"; then + log_error "phase285_weak_field_vm: NG case '${fixture}' missing 'weak' in error" + echo "$output" + exit 1 + fi +done + +log_success "phase285_weak_field_vm: All weak field contract tests passed" +exit 0