fix: guard unified BoxCall recursion and document Stage-B stack overflow status

2025-11-17 17:53:40 +09:00
parent 4f3831c07b
commit e5b9b84aca
5 changed files with 78 additions and 9 deletions
--- a/CURRENT_TASK.md
+++ b/CURRENT_TASK.md
@ -235,9 +235,10 @@ Update (2025-11-16 — Phase 25.1b: selfhost builder multi-carrier & BoxTypeInsp
    - `src/runner/modes/vm.rs` 側では、`static_box_decls` を InlineUserBoxFactory の `decls` にも統合し、`NewBox` から静的 Box（HakoCli など）も user factory 経由で生成できるようにした（plugins disabled でも HakoCli の NewBox 自体は成功する）。
    - If‑block 側の PHI 再割り当て問題についても、`src/mir/utils/phi_helpers.rs` の `insert_phi()` がグローバルアロケーター（`value_gen.next()`）を直接叩いていた箇所を関数ローカルアロケーター（`MirFunction::next_value_id()` 経由）に切り替えることで、`%0` など既存 ValueId との衝突／再定義が発生しないよう修正済み。これにより「if 内でメソッド呼び出し前に PHI が既存 ValueId を上書きする」タイプの SSA 破綻も消えている。
    - さらに Rust VM 側の `MirInterpreter::reg_load` に開発用の追加情報を付けたことで、`Invalid value: use of undefined value ValueId(N)` が発生した際に `fn` / `last_block` / `last_inst` がエラーメッセージに含まれるようになり、Stage‑B / Stage‑B 用最小ハーネス内の `ParserBox.length()` 呼び出しが recv 未定義で落ちていることを特定できるようになった（NYASH_VM_TRACE/NYASH_VM_TRACE_EXEC 未設定時でも場所が分かる）。
-  - なお、Stage‑B を selfhost CLI サンプルに対して実行した際に現時点で見えている残存課題は次の 2 点:
+  - なお、Stage‑B を selfhost CLI サンプルに対して実行した際に現時点で見えている残存課題は次の 3 点:
    - 1) `if args { ... }` まわりの truthy 判定（ArrayBox を boolean 条件に使っている部分）の扱いに起因する型エラーであり、これは SSA ではなく「条件式の型／truthy 規約」をどう定義するかという別問題として扱う（Phase 25.1c 以降の型システム整理タスクで扱う想定）。
-    - 2) Stage‑B 用最小ハーネス（`lang/src/compiler/tests/stageb_min_sample.hako` + `tools/test_stageb_min.sh` の Test2）を Stage‑B 経由で実行した際に、依然として `❌ VM error: Invalid value: use of undefined value ValueId(21)` が報告されるケースが残っており、これは loop/if とは別に「Method recv の materialization（pin_to_slot で割り当てたレシーバー ID に対して実際の Copy が emit されていない）」経路の問題であることが分かってきている。`NYASH_LOCAL_SSA_TRACE=1` で見ると LocalSSA::recv 自体は `%19 -> %20 -> %21` の Copy を `bb3418` で emit しており `emit_instruction` までは届いているが、最終 MIR ダンプ（`NYASH_VM_DUMP_MIR=1`）では `bb3417/3418` にその Copy が存在せず、Call の `receiver` だけが `%21` のまま残っている。すなわち、Copy を差し込んだ後に `MirFunction::blocks` のどこかでブロック内容が別のものに置き換わっている疑いが濃厚。Rust MirBuilder 側で「Call emit ブロックが変わっても recv Copy が消えない」ことを構造的に保証しつつ、selfhost (Nyash) 側 `LowerReturnMethodArrayMapBox` の receiver=0 ハードコードも別タスクとして撤去する、という二本立ての追跡が必要になっている。
+    - 2) Stage‑B 用最小ハーネス（`lang/src/compiler/tests/stageb_min_sample.hako` + `tools/test_stageb_min.sh` の Test2）を Stage‑B 経由で実行した際に、以前は `❌ VM error: Invalid value: use of undefined value ValueId(21)` が報告されていたが、Rust MIR builder 側の BoxCompilationContext 導入と ValueId 割り当て修正により、この Undefined value 系は解消済み。ただし現在も `compiler_stageb.hako` を経由する Test2 では stack overflow（`thread 'main' has overflowed its stack`）が発生しており、これは emit_unified_call ↔ BoxCall の再帰ルートではなく、Stage‑B Nyash ボックス側の自己再帰/循環に起因する可能性が高い。Phase 25.1e 以降で Nyash 側に浅い再帰ガード（depth カウンタ）を入れ、どの Box.method が再帰しているかを特定するタスクとして扱う。
+    - 3) Rust 側では emit_unified_call / emit_box_or_plugin_call / emit_legacy_call に対して再入防止フラグ（`in_unified_boxcall_fallback`）と再帰深度カウンタ（build_expression / build_method_call / emit_unified_call）を導入済みであり、call 系の自己再帰は構造的に防止されている。Stage‑B 経路の stack overflow 調査は、これを前提に Nyash 側の構造へフォーカスする。
 - Next tasks (Phase 25.1b → 25.1c handoff / Codex):
  1. Rust 層 Call/ExternCall 契約のドキュメント固定（Step 4.1）
     - `src/mir/builder/builder_calls.rs` / `src/backend/mir_interpreter/handlers/{calls,externs,extern_provider}.rs` / `src/runtime/plugin_loader_v2/enabled/extern_functions.rs` をベースに、「MethodCall/ExternCall/hostbridge.extern_invoke/ env.codegen/env.mirbuilder」の SSOT を Phase 25.1b README に記録（実施済み）。
--- a/docs/development/roadmap/phases/phase-25.1d/README.md
+++ b/docs/development/roadmap/phases/phase-25.1d/README.md
@ -70,4 +70,4 @@ Status: planning（構造バグ切り出しフェーズ・挙動は変えない
 - やることは単純で、やる量は多い:
  - 小さいテストを書く → verifier で赤を出す → LoopBuilder / IfForm / MirBuilder を直す → 緑になるまで繰り返す。
 - これにより、Stage‑B / Stage‑1 / selfhost の土台となる Rust MIR 層が安定し、その上に Nyash selfhost 側の MirBuilder を載せやすくする。
-
+- なお、Stage‑B 最小ハーネス（`stageb_min_sample.hako`）については、Rust MIR builder 経由の直接 VM / MIR verify は既に緑であり、残っている stack overflow は `compiler_stageb.hako` 側の Nyash ボックス連鎖に起因するものと考えられる。Rust 層では `emit_unified_call` / BoxCall / legacy 警戒の再入防止フラグと再帰深度カウンタを導入済みであり、以降は Nyash 側に浅い再帰ガードを置いて原因ボックスを特定するフェーズへ引き継ぐ。
--- a/src/mir/builder.rs
+++ b/src/mir/builder.rs
@ -188,6 +188,10 @@ pub struct MirBuilder {
    /// infinite recursion (emit_unified_call → emit_box_or_plugin_call →
    /// emit_unified_call …) can occur when routing decisions disagree.
    pub(super) in_unified_boxcall_fallback: bool,
+
+    /// Recursion depth counter for debugging stack overflow
+    /// Tracks the depth of build_expression calls to detect infinite loops
+    pub(super) recursion_depth: usize,
 }

 impl MirBuilder {
@ -242,6 +246,7 @@ impl MirBuilder {
            schedule_mat_map: HashMap::new(),

            in_unified_boxcall_fallback: false,
+            recursion_depth: 0,
        }
    }

@ -371,7 +376,21 @@ impl MirBuilder {
    /// Build an expression and return its value ID
    pub(super) fn build_expression(&mut self, ast: ASTNode) -> Result<ValueId, String> {
        // Delegated to exprs.rs to keep this file lean
-        self.build_expression_impl(ast)
+        // Debug: Track recursion depth to detect infinite loops
+        const MAX_RECURSION_DEPTH: usize = 200;
+        self.recursion_depth += 1;
+        if self.recursion_depth > MAX_RECURSION_DEPTH {
+            eprintln!("\n[FATAL] ============================================");
+            eprintln!("[FATAL] Recursion depth exceeded {} in build_expression", MAX_RECURSION_DEPTH);
+            eprintln!("[FATAL] Current depth: {}", self.recursion_depth);
+            eprintln!("[FATAL] AST node type: {:?}", std::mem::discriminant(&ast));
+            eprintln!("[FATAL] ============================================\n");
+            return Err(format!("Recursion depth exceeded: {} (possible infinite loop)", self.recursion_depth));
+        }
+
+        let result = self.build_expression_impl(ast);
+        self.recursion_depth -= 1;
+        result
    }


--- a/src/mir/builder/calls/build.rs
+++ b/src/mir/builder/calls/build.rs
@ -66,6 +66,27 @@ impl MirBuilder {
        object: ASTNode,
        method: String,
        arguments: Vec<ASTNode>,
+    ) -> Result<ValueId, String> {
+        // Debug: Check recursion depth
+        const MAX_METHOD_DEPTH: usize = 100;
+        self.recursion_depth += 1;
+        if self.recursion_depth > MAX_METHOD_DEPTH {
+            eprintln!("[FATAL] build_method_call recursion depth exceeded {}", MAX_METHOD_DEPTH);
+            eprintln!("[FATAL] Current depth: {}", self.recursion_depth);
+            eprintln!("[FATAL] Method: {}", method);
+            return Err(format!("build_method_call recursion depth exceeded: {}", self.recursion_depth));
+        }
+
+        let result = self.build_method_call_impl(object, method, arguments);
+        self.recursion_depth -= 1;
+        result
+    }
+
+    fn build_method_call_impl(
+        &mut self,
+        object: ASTNode,
+        method: String,
+        arguments: Vec<ASTNode>,
    ) -> Result<ValueId, String> {
        if std::env::var("NYASH_STATIC_CALL_TRACE").ok().as_deref() == Some("1") {
            let kind = match &object {
--- a/src/mir/builder/calls/emit.rs
+++ b/src/mir/builder/calls/emit.rs
@ -18,12 +18,34 @@ impl MirBuilder {
        target: CallTarget,
        args: Vec<ValueId>,
    ) -> Result<(), String> {
-        // Check environment variable for unified call usage
-        if !call_unified::is_unified_call_enabled() {
-            // Fall back to legacy implementation
-            return self.emit_legacy_call(dst, target, args);
+        // Debug: Check recursion depth
+        const MAX_EMIT_DEPTH: usize = 100;
+        self.recursion_depth += 1;
+        if self.recursion_depth > MAX_EMIT_DEPTH {
+            eprintln!("[FATAL] emit_unified_call recursion depth exceeded {}", MAX_EMIT_DEPTH);
+            eprintln!("[FATAL] Current depth: {}", self.recursion_depth);
+            eprintln!("[FATAL] Target: {:?}", target);
+            return Err(format!("emit_unified_call recursion depth exceeded: {}", self.recursion_depth));
        }

+        // Check environment variable for unified call usage
+        let result = if !call_unified::is_unified_call_enabled() {
+            // Fall back to legacy implementation
+            self.emit_legacy_call(dst, target, args)
+        } else {
+            self.emit_unified_call_impl(dst, target, args)
+        };
+        self.recursion_depth -= 1;
+        result
+    }
+
+    fn emit_unified_call_impl(
+        &mut self,
+        dst: Option<ValueId>,
+        target: CallTarget,
+        args: Vec<ValueId>,
+    ) -> Result<(), String> {
+
        // Emit resolve.try for method targets (dev-only; default OFF)
        let arity_for_try = args.len();
        if let CallTarget::Method { ref box_type, ref method, receiver } = target {
@ -166,7 +188,13 @@ impl MirBuilder {
                // LEGACY PATH (after unified migration):
                // Instance→Function rewrite is centralized in unified call path.
                // Legacy path no longer functionizes; always use Box/Plugin call here.
-                self.emit_box_or_plugin_call(dst, receiver, method, None, args, EffectMask::IO)
+                // CRITICAL FIX: Prevent bouncing back to emit_unified_call
+                // Set flag to prevent emit_box_or_plugin_call from calling emit_unified_call
+                let prev_flag = self.in_unified_boxcall_fallback;
+                self.in_unified_boxcall_fallback = true;
+                let result = self.emit_box_or_plugin_call(dst, receiver, method, None, args, EffectMask::IO);
+                self.in_unified_boxcall_fallback = prev_flag;
+                result
            },
            CallTarget::Constructor(box_type) => {
                // Use existing NewBox