From 5e3d9e7ae48a9bebda4168f48f751270563c3b42 Mon Sep 17 00:00:00 2001 From: nyash-codex Date: Fri, 31 Oct 2025 20:18:39 +0900 Subject: [PATCH] =?UTF-8?q?restore(lang/compiler):=20bring=20back=20lang/s?= =?UTF-8?q?rc/compiler=20from=20e917d400;=20add=20Hako=20index=20canaries?= =?UTF-8?q?=20and=20docs;=20implement=20Rust-side=20index=20operator=20(Ar?= =?UTF-8?q?ray/Map=20get/set)=20with=20Fail=E2=80=91Fast=20diagnostics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - restore: lang/src/compiler/** (parser/emit/builder/pipeline_v2) from e917d400 - docs: docs/development/selfhosting/index-operator-hako.md - smokes(hako): tools/smokes/v2/profiles/quick/core/index_operator_hako.sh (opt-in) - smokes(vm): adjust index_operator_vm.sh for semicolon gate + stable error text - rust/parser: allow IndexExpr and assignment LHS=Index; postfix parse LBRACK chain - rust/builder: lower arr/map index to BoxCall get/set; annotate array/map literals; Fail‑Fast for unsupported types - CURRENT_TASK: mark Rust side done; add Hako tasks checklist Note: files disappeared likely due to branch FF to a lineage without lang/src/compiler; no explicit delete commit found. Added anchor checks and suggested CI guard in follow-up. --- CURRENT_TASK.md | 17 +- crates/nyash_kernel/src/lib.rs | 6 +- crates/nyash_kernel/src/plugin/future.rs | 14 +- crates/nyash_kernel/src/plugin/invoke.rs | 8 +- crates/nyash_kernel/src/plugin/invoke_core.rs | 2 +- .../selfhosting/index-operator-hako.md | 31 + lang/src/compiler/README.md | 14 + lang/src/compiler/builder/mod.hako | 23 + lang/src/compiler/builder/rewrite/known.hako | 105 ++++ .../src/compiler/builder/rewrite/special.hako | 8 + .../compiler/builder/ssa/cond_inserter.hako | 196 ++++++ lang/src/compiler/builder/ssa/local.hako | 98 +++ lang/src/compiler/builder/ssa/loopssa.hako | 8 + lang/src/compiler/debug/debug_box.hako | 39 ++ .../compiler/emit/common/call_emit_box.hako | 51 ++ .../compiler/emit/common/header_emit_box.hako | 23 + .../compiler/emit/common/json_emit_box.hako | 12 + .../compiler/emit/common/mir_emit_box.hako | 14 + .../compiler/emit/common/newbox_emit_box.hako | 35 ++ lang/src/compiler/emit/mir_emitter_box.hako | 179 ++++++ lang/src/compiler/entry/compiler.hako | 25 + lang/src/compiler/hako_module.toml | 37 ++ .../compiler/parser/expr/parser_expr_box.hako | 355 +++++++++++ .../parser/expr/parser_literal_box.hako | 119 ++++ .../compiler/parser/expr/parser_peek_box.hako | 104 ++++ lang/src/compiler/parser/parser_box.hako | 239 ++++++++ .../parser/scan/parser_common_utils_box.hako | 72 +++ .../parser/scan/parser_ident_scan_box.hako | 22 + .../parser/scan/parser_number_scan_box.hako | 26 + .../parser/scan/parser_string_scan_box.hako | 50 ++ .../parser/scan/parser_string_utils_box.hako | 20 + .../parser/stmt/parser_control_box.hako | 173 ++++++ .../parser/stmt/parser_exception_box.hako | 152 +++++ .../compiler/parser/stmt/parser_stmt_box.hako | 202 +++++++ .../parser/using/using_collector_box.hako | 80 +++ lang/src/compiler/pipeline_v2/README.md | 60 ++ .../pipeline_v2/README_using_resolver.md | 33 ++ .../pipeline_v2/alias_preflight_box.hako | 28 + .../src/compiler/pipeline_v2/backend_box.hako | 11 + .../pipeline_v2/call_extract_box.hako | 12 + .../pipeline_v2/compare_extract_box.hako | 119 ++++ .../compiler/pipeline_v2/emit_binop_box.hako | 33 ++ .../compiler/pipeline_v2/emit_call_box.hako | 89 +++ .../pipeline_v2/emit_compare_box.hako | 84 +++ .../compiler/pipeline_v2/emit_method_box.hako | 84 +++ .../compiler/pipeline_v2/emit_mir_flow.hako | 111 ++++ .../pipeline_v2/emit_mir_flow_map.hako | 110 ++++ .../compiler/pipeline_v2/emit_newbox_box.hako | 36 ++ .../compiler/pipeline_v2/emit_return_box.hako | 14 + .../pipeline_v2/execution_pipeline_box.hako | 49 ++ lang/src/compiler/pipeline_v2/flow_entry.hako | 26 + .../compiler/pipeline_v2/header_emit_box.hako | 16 + .../compiler/pipeline_v2/json_minify_box.hako | 37 ++ .../compiler/pipeline_v2/local_ssa_box.hako | 188 ++++++ .../compiler/pipeline_v2/map_helpers_box.hako | 43 ++ .../pipeline_v2/method_extract_box.hako | 12 + .../compiler/pipeline_v2/mir_builder_box.hako | 35 ++ .../compiler/pipeline_v2/mir_call_box.hako | 91 +++ .../pipeline_v2/name_resolve_box.hako | 29 + .../compiler/pipeline_v2/namespace_box.hako | 61 ++ .../compiler/pipeline_v2/new_extract_box.hako | 12 + .../compiler/pipeline_v2/normalizer_box.hako | 90 +++ lang/src/compiler/pipeline_v2/pipeline.hako | 558 ++++++++++++++++++ .../pipeline_v2/pipeline_emit_box.hako | 15 + .../pipeline_v2/pipeline_helpers_box.hako | 65 ++ .../pipeline_v2/readonly_map_view.hako | 31 + lang/src/compiler/pipeline_v2/regex_flow.hako | 95 +++ .../pipeline_v2/signature_verifier_box.hako | 111 ++++ .../pipeline_v2/stage1_args_parser_box.hako | 67 +++ .../pipeline_v2/stage1_extract_flow.hako | 209 +++++++ .../stage1_int_args_extract_box.hako | 127 ++++ .../pipeline_v2/stage1_json_scanner_box.hako | 99 ++++ .../stage1_name_args_normalizer_box.hako | 60 ++ .../pipeline_v2/terminator_guard_box.hako | 37 ++ .../pipeline_v2/using_resolver_box.hako | 70 +++ lang/src/compiler/stage1/emitter_box.hako | 10 + .../src/compiler/stage1/json_program_box.hako | 326 ++++++++++ src/ast.rs | 7 + src/ast/utils.rs | 6 + src/mir/builder/exprs.rs | 123 ++++ src/mir/builder/vars.rs | 4 + src/parser/expr/call.rs | 10 + src/parser/expr_cursor.rs | 13 + src/parser/mod.rs | 4 +- .../quick/core/index_operator_hako.sh | 72 +++ .../profiles/quick/core/index_operator_vm.sh | 43 ++ 86 files changed, 6214 insertions(+), 20 deletions(-) create mode 100644 docs/development/selfhosting/index-operator-hako.md create mode 100644 lang/src/compiler/README.md create mode 100644 lang/src/compiler/builder/mod.hako create mode 100644 lang/src/compiler/builder/rewrite/known.hako create mode 100644 lang/src/compiler/builder/rewrite/special.hako create mode 100644 lang/src/compiler/builder/ssa/cond_inserter.hako create mode 100644 lang/src/compiler/builder/ssa/local.hako create mode 100644 lang/src/compiler/builder/ssa/loopssa.hako create mode 100644 lang/src/compiler/debug/debug_box.hako create mode 100644 lang/src/compiler/emit/common/call_emit_box.hako create mode 100644 lang/src/compiler/emit/common/header_emit_box.hako create mode 100644 lang/src/compiler/emit/common/json_emit_box.hako create mode 100644 lang/src/compiler/emit/common/mir_emit_box.hako create mode 100644 lang/src/compiler/emit/common/newbox_emit_box.hako create mode 100644 lang/src/compiler/emit/mir_emitter_box.hako create mode 100644 lang/src/compiler/entry/compiler.hako create mode 100644 lang/src/compiler/hako_module.toml create mode 100644 lang/src/compiler/parser/expr/parser_expr_box.hako create mode 100644 lang/src/compiler/parser/expr/parser_literal_box.hako create mode 100644 lang/src/compiler/parser/expr/parser_peek_box.hako create mode 100644 lang/src/compiler/parser/parser_box.hako create mode 100644 lang/src/compiler/parser/scan/parser_common_utils_box.hako create mode 100644 lang/src/compiler/parser/scan/parser_ident_scan_box.hako create mode 100644 lang/src/compiler/parser/scan/parser_number_scan_box.hako create mode 100644 lang/src/compiler/parser/scan/parser_string_scan_box.hako create mode 100644 lang/src/compiler/parser/scan/parser_string_utils_box.hako create mode 100644 lang/src/compiler/parser/stmt/parser_control_box.hako create mode 100644 lang/src/compiler/parser/stmt/parser_exception_box.hako create mode 100644 lang/src/compiler/parser/stmt/parser_stmt_box.hako create mode 100644 lang/src/compiler/parser/using/using_collector_box.hako create mode 100644 lang/src/compiler/pipeline_v2/README.md create mode 100644 lang/src/compiler/pipeline_v2/README_using_resolver.md create mode 100644 lang/src/compiler/pipeline_v2/alias_preflight_box.hako create mode 100644 lang/src/compiler/pipeline_v2/backend_box.hako create mode 100644 lang/src/compiler/pipeline_v2/call_extract_box.hako create mode 100644 lang/src/compiler/pipeline_v2/compare_extract_box.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_binop_box.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_call_box.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_compare_box.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_method_box.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_mir_flow.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_mir_flow_map.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_newbox_box.hako create mode 100644 lang/src/compiler/pipeline_v2/emit_return_box.hako create mode 100644 lang/src/compiler/pipeline_v2/execution_pipeline_box.hako create mode 100644 lang/src/compiler/pipeline_v2/flow_entry.hako create mode 100644 lang/src/compiler/pipeline_v2/header_emit_box.hako create mode 100644 lang/src/compiler/pipeline_v2/json_minify_box.hako create mode 100644 lang/src/compiler/pipeline_v2/local_ssa_box.hako create mode 100644 lang/src/compiler/pipeline_v2/map_helpers_box.hako create mode 100644 lang/src/compiler/pipeline_v2/method_extract_box.hako create mode 100644 lang/src/compiler/pipeline_v2/mir_builder_box.hako create mode 100644 lang/src/compiler/pipeline_v2/mir_call_box.hako create mode 100644 lang/src/compiler/pipeline_v2/name_resolve_box.hako create mode 100644 lang/src/compiler/pipeline_v2/namespace_box.hako create mode 100644 lang/src/compiler/pipeline_v2/new_extract_box.hako create mode 100644 lang/src/compiler/pipeline_v2/normalizer_box.hako create mode 100644 lang/src/compiler/pipeline_v2/pipeline.hako create mode 100644 lang/src/compiler/pipeline_v2/pipeline_emit_box.hako create mode 100644 lang/src/compiler/pipeline_v2/pipeline_helpers_box.hako create mode 100644 lang/src/compiler/pipeline_v2/readonly_map_view.hako create mode 100644 lang/src/compiler/pipeline_v2/regex_flow.hako create mode 100644 lang/src/compiler/pipeline_v2/signature_verifier_box.hako create mode 100644 lang/src/compiler/pipeline_v2/stage1_args_parser_box.hako create mode 100644 lang/src/compiler/pipeline_v2/stage1_extract_flow.hako create mode 100644 lang/src/compiler/pipeline_v2/stage1_int_args_extract_box.hako create mode 100644 lang/src/compiler/pipeline_v2/stage1_json_scanner_box.hako create mode 100644 lang/src/compiler/pipeline_v2/stage1_name_args_normalizer_box.hako create mode 100644 lang/src/compiler/pipeline_v2/terminator_guard_box.hako create mode 100644 lang/src/compiler/pipeline_v2/using_resolver_box.hako create mode 100644 lang/src/compiler/stage1/emitter_box.hako create mode 100644 lang/src/compiler/stage1/json_program_box.hako create mode 100644 tools/smokes/v2/profiles/quick/core/index_operator_hako.sh create mode 100644 tools/smokes/v2/profiles/quick/core/index_operator_vm.sh diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 005677d1..5fffc7d6 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -147,11 +147,18 @@ Index Operator Bring‑up(Phase‑20.31 内の小粒対応) - 文字列 index/range は後続(Phase‑2) - 未対応型は Fail‑Fast: "index operator is only supported for Array/Map" -実装計画 -1) AST: IndexExpr と Assign(IndexExpr, …)(Rust パーサー) -2) MIR Lowering: Array/Map の get/set に正規化 -3) スモーク(quick): arr_read / arr_write / map_rw / negative_string -4) ドキュメント: docs/specs/language/index-operator.md +実装状況(Rust 側) +- [x] AST: IndexExpr と Assign(IndexExpr, …)(Rust パーサー) +- [x] MIR Lowering: Array/Map の get/set に正規化(Unsupported 型は compile-time Fail-Fast) +- [x] スモーク(quick): arr_read / arr_write / map_rw / negative_string +- [x] ドキュメント: docs/specs/language/index-operator.md + +Hakorune コンパイラ(Hako 側) +- [ ] Parser: IndexExpr + Assign(LHS=IndexExpr) +- [ ] Lowering: Array/Map → BoxCall("get"/"set")(AOT は従来の dotted extern を踏襲) +- [ ] 診断: 未対応型は Fail‑Fast(安定文言) +- [ ] スモーク: tools/smokes/v2/profiles/quick/core/index_operator_hako.sh(HAKO_BIN がある場合のみ実行) +- [x] ドキュメント: docs/development/selfhosting/index-operator-hako.md ロールアウト - 必要なら dev フラグ(HAKO_INDEX_OPERATOR_DEV=1)で段階導入(dev=ON, prod=OFF)。 diff --git a/crates/nyash_kernel/src/lib.rs b/crates/nyash_kernel/src/lib.rs index fe8aecc9..0369a89b 100644 --- a/crates/nyash_kernel/src/lib.rs +++ b/crates/nyash_kernel/src/lib.rs @@ -234,7 +234,7 @@ pub extern "C" fn nyash_box_from_i64(val: i64) -> i64 { pub extern "C" fn nyash_env_box_new(type_name: *const i8) -> i64 { use nyash_rust::{ box_trait::NyashBox, - runtime::{host_handles as handles, box_registry::get_global_registry}, + runtime::{box_registry::get_global_registry, host_handles as handles}, }; use std::ffi::CStr; if type_name.is_null() { @@ -283,7 +283,7 @@ pub extern "C" fn nyash_env_box_new_i64x( ) -> i64 { use nyash_rust::{ box_trait::{IntegerBox, NyashBox}, - runtime::{host_handles as handles, box_registry::get_global_registry}, + runtime::{box_registry::get_global_registry, host_handles as handles}, }; use std::ffi::CStr; if type_name.is_null() { @@ -353,7 +353,7 @@ pub extern "C" fn nyash_any_length_h_export(handle: i64) -> i64 { use nyash_rust::runtime::host_handles as handles; if std::env::var("NYASH_JIT_TRACE_LEN").ok().as_deref() == Some("1") { let present = if handle > 0 { - handles::get(handle as u64).is_some() + handles::get(handle as u64).is_some() } else { false }; diff --git a/crates/nyash_kernel/src/plugin/future.rs b/crates/nyash_kernel/src/plugin/future.rs index ea265d92..7748f14d 100644 --- a/crates/nyash_kernel/src/plugin/future.rs +++ b/crates/nyash_kernel/src/plugin/future.rs @@ -112,8 +112,9 @@ pub extern "C" fn nyash_future_spawn_method_h( } // Prepare FutureBox and register handle let fut_box = std::sync::Arc::new(nyash_rust::boxes::future::FutureBox::new()); - let handle = - nyash_rust::runtime::host_handles::to_handle_arc(fut_box.clone() as std::sync::Arc); + let handle = nyash_rust::runtime::host_handles::to_handle_arc( + fut_box.clone() as std::sync::Arc + ); // Copy data for async task let cap: usize = 512; let tlv = buf.clone(); @@ -245,7 +246,7 @@ pub extern "C" fn nyash_future_spawn_instance3_i64(a0: i64, a1: i64, a2: i64, ar } // Resolve receiver invoke and type id/name let (instance_id, real_type_id, invoke) = - if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { + if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { if let Some(p) = obj.as_any().downcast_ref::() { (p.instance_id(), p.inner.type_id, Some(p.inner.invoke_fn)) } else { @@ -265,7 +266,7 @@ pub extern "C" fn nyash_future_spawn_instance3_i64(a0: i64, a1: i64, a2: i64, ar // Determine method name string (from a1 handle→StringBox, or a1 as C string pointer, or legacy VM args) let mut method_name: Option = None; if a1 > 0 { - if let Some(obj) = nyash_rust::runtime::host_handles::get(a1 as u64) { + if let Some(obj) = nyash_rust::runtime::host_handles::get(a1 as u64) { if let Some(p) = obj.as_any().downcast_ref::() { if p.box_type == "StringBox" { // Limit the lifetime of the read guard to this inner block by avoiding an outer binding @@ -388,8 +389,9 @@ pub extern "C" fn nyash_future_spawn_instance3_i64(a0: i64, a1: i64, a2: i64, ar } // Create Future and schedule async invoke let fut_box = std::sync::Arc::new(nyash_rust::boxes::future::FutureBox::new()); - let handle = - nyash_rust::runtime::host_handles::to_handle_arc(fut_box.clone() as std::sync::Arc); + let handle = nyash_rust::runtime::host_handles::to_handle_arc( + fut_box.clone() as std::sync::Arc + ); let tlv = buf.clone(); nyash_rust::runtime::global_hooks::spawn_task( "nyash.future.spawn_instance3_i64", diff --git a/crates/nyash_kernel/src/plugin/invoke.rs b/crates/nyash_kernel/src/plugin/invoke.rs index 4d4e9f40..5c529e4e 100644 --- a/crates/nyash_kernel/src/plugin/invoke.rs +++ b/crates/nyash_kernel/src/plugin/invoke.rs @@ -77,7 +77,7 @@ pub extern "C" fn nyash_plugin_invoke3_f64( unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, > = None; if a0 > 0 { - if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { + if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { if let Some(p) = obj.as_any().downcast_ref::() { instance_id = p.instance_id(); invoke = Some(p.inner.invoke_fn); @@ -160,7 +160,7 @@ fn nyash_plugin_invoke_name_common_i64(method: &str, argc: i64, a0: i64, a1: i64 unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, > = None; if a0 > 0 { - if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { + if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { if let Some(p) = obj.as_any().downcast_ref::() { instance_id = p.instance_id(); type_id = p.inner.type_id; @@ -263,7 +263,7 @@ pub extern "C" fn nyash_plugin_invoke_by_name_i64( unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, > = None; if recv_handle > 0 { - if let Some(obj) = nyash_rust::runtime::host_handles::get(recv_handle as u64) { + if let Some(obj) = nyash_rust::runtime::host_handles::get(recv_handle as u64) { if let Some(p) = obj.as_any().downcast_ref::() { instance_id = p.instance_id(); type_id = p.inner.type_id; @@ -401,7 +401,7 @@ pub extern "C" fn nyash_plugin_invoke3_tagged_i64( unsafe extern "C" fn(u32, u32, u32, *const u8, usize, *mut u8, *mut usize) -> i32, > = None; if a0 > 0 { - if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { + if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { if let Some(p) = obj.as_any().downcast_ref::() { instance_id = p.instance_id(); real_type_id = p.inner.type_id; diff --git a/crates/nyash_kernel/src/plugin/invoke_core.rs b/crates/nyash_kernel/src/plugin/invoke_core.rs index 0412f174..89d73a36 100644 --- a/crates/nyash_kernel/src/plugin/invoke_core.rs +++ b/crates/nyash_kernel/src/plugin/invoke_core.rs @@ -15,7 +15,7 @@ pub struct Receiver { pub fn resolve_receiver_for_a0(a0: i64) -> Option { // 1) Handle registry (preferred) if a0 > 0 { - if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { + if let Some(obj) = nyash_rust::runtime::host_handles::get(a0 as u64) { if let Some(p) = obj.as_any().downcast_ref::() { return Some(Receiver { instance_id: p.instance_id(), diff --git a/docs/development/selfhosting/index-operator-hako.md b/docs/development/selfhosting/index-operator-hako.md new file mode 100644 index 00000000..d9ac8168 --- /dev/null +++ b/docs/development/selfhosting/index-operator-hako.md @@ -0,0 +1,31 @@ +Index Operator in Hakorune Compiler (selfhost) + +Scope +- Bring Hako-side behavior up to parity with Rust parser/MIR builder for Phase‑20.31. + +User-facing spec (Phase‑1) +- Read: expr[index] for Array/Map +- Write: expr[index] = value for Array/Map +- String indexing/ranges: out of scope in Phase‑1 +- Unsupported receiver: Fail‑Fast with a stable diagnostic + +Required changes (Hako compiler) +- Parser + - Add IndexExpr(target, index) + - Permit Assign(IndexExpr, value) on LHS +- Lowering (MIR emit) + - Array: index read/write → BoxCall("get"/"set") on ArrayBox + - Map: index read/write → BoxCall("get"/"set") on MapBox + - Optional (AOT): dotted extern mapping remains as today (nyash.array.get_h, nyash.map.set_hh …) +- Diagnostics + - If receiver type cannot be resolved to ArrayBox/MapBox, emit: "index operator is only supported for Array/Map" + +Smokes (opt‑in, external HAKO_BIN) +- tools/smokes/v2/profiles/quick/core/index_operator_hako.sh + - Requires HAKO_BIN; skips with WARN when missing + - Canaries: array read/write, map rw, negative string + +Rollout +- No flags are required; follow Rust side semantics. +- Keep Phase‑2 (String/range) for later work. + diff --git a/lang/src/compiler/README.md b/lang/src/compiler/README.md new file mode 100644 index 00000000..af7c2a09 --- /dev/null +++ b/lang/src/compiler/README.md @@ -0,0 +1,14 @@ +# Hakorune Compiler — Layout and Responsibilities + +Structure (target) +- emit/ + - mir_emitter_box.hako — high-level MIR emitter entry + - common/ — shared emit helpers (mir_emit/json_emit/call_emit/header_emit/newbox_emit) +- parser/ — lexer/parser (to be moved from apps/* in later steps) +- builder/, ssa/, rewrite/, pipeline_v2/ — existing compiler stages (move gradually) + +Policy +- Compiler lives under `lang/src/compiler/`. +- VM engines live under `lang/src/vm/engines/` (Hakorune/Mini), with shared helpers in `vm/boxes/`. +- Keep imports across these boundaries minimal and documented. + diff --git a/lang/src/compiler/builder/mod.hako b/lang/src/compiler/builder/mod.hako new file mode 100644 index 00000000..2985363a --- /dev/null +++ b/lang/src/compiler/builder/mod.hako @@ -0,0 +1,23 @@ +// Moved from apps/selfhost-compiler/builder/mod.hako +// builder/mod.hako — Aggregator for compiler-track passes (scaffold) + +using lang.compiler.builder.ssa.local as LocalSSA +using lang.compiler.builder.ssa.loop as LoopSSA +using lang.compiler.builder.rewrite.special as RewriteSpecial +using lang.compiler.builder.rewrite.known as RewriteKnown + +static box CompilerBuilder { + // Apply passes in safe order; currently no-ops (behavior-neutral). + apply_all(stage1_json) { + local j = stage1_json + j = RewriteSpecial.apply(j) + j = RewriteKnown.try_apply(j) + j = LocalSSA.ensure_recv(j) + j = LocalSSA.ensure_args(j) + j = LocalSSA.ensure_cond(j) + j = LocalSSA.ensure_cmp(j) + j = LoopSSA.stabilize_merges(j) + return j + } +} + diff --git a/lang/src/compiler/builder/rewrite/known.hako b/lang/src/compiler/builder/rewrite/known.hako new file mode 100644 index 00000000..ab17803f --- /dev/null +++ b/lang/src/compiler/builder/rewrite/known.hako @@ -0,0 +1,105 @@ +// Moved from apps/selfhost-compiler/builder/rewrite/known.hako — Stage‑1 JSON minimal canonicalization +// Phase‑31.C.1 scope: ModuleFunction name → `Box.method/arity` のみ付与 +// 前提: args は整数IDの配列([1,2,3])で、入れ子や文字列は含まない + +static box RewriteKnown { + try_apply(stage1_json) { + if stage1_json == null { return null } + local s = "" + stage1_json + local i = 0 + local out = "" + loop(true) { + local p = s.indexOf("\"type\":\"ModuleFunction\"") + if p < 0 { break } + // write through up to this point + out = out + s.substring(i, p) + + // Find name start + local name_key = "\"name\":\"" + local np = s.indexOf(name_key, p) + if np < 0 { out = out + s.substring(p, s.size()) i = s.size() break } + local name_start = np + name_key.size() + // Find name end quote + local name_end = s.indexOf("\"", name_start) + if name_end < 0 { out = out + s.substring(p, s.size()) i = s.size() break } + local name = s.substring(name_start, name_end) + + // If already canonical, just append segment as-is + if name.indexOf("/") >= 0 { + out = out + s.substring(p, name_end) + i = name_end + continue + } + + // Find args bracket after name + local args_key = "\"args\":[" + local ap = s.indexOf(args_key, name_end) + if ap < 0 { out = out + s.substring(p, name_end) i = name_end continue } + local lb = ap + args_key.size() - 1 // points to '[' + // Find closing bracket + local rb = s.indexOf("]", lb + 1) + if rb < 0 { out = out + s.substring(p, name_end) i = name_end continue } + + // Count simple elements (digits, commas, spaces) + local body = s.substring(lb + 1, rb) + local trimmed = me._trim(body) + local arity = 0 + if trimmed.size() == 0 { + arity = 0 + } else { + // guard: if body contains non-digit/comma/space, skip rewrite (fail-safe) + if me._is_simple_ids(trimmed) == false { + out = out + s.substring(p, name_end) + i = name_end + continue + } + arity = me._count_commas(trimmed) + 1 + } + + // Emit up to name_start, then canonical name, then continue from name_end + out = out + s.substring(p, name_start) + name + "/" + me._itoa(arity) + i = name_end + } + // Append the tail + out = out + s.substring(i, s.size()) + return out + } + + _trim(text) { + local a = 0 + local b = text.size() + loop(a < b && me._is_space(text.substring(a,a+1))) { a = a + 1 } + loop(b > a && me._is_space(text.substring(b-1,b))) { b = b - 1 } + return text.substring(a,b) + } + _is_space(ch) { return ch == " " || ch == "\n" || ch == "\t" || ch == "\r" } + _is_digit(ch) { return ch >= "0" && ch <= "9" } + _is_simple_ids(text) { + local i = 0 + loop(i < text.size()) { + local ch = text.substring(i,i+1) + if !(me._is_space(ch) || ch == "," || me._is_digit(ch)) { return false } + i = i + 1 + } + return true + } + _count_commas(text) { + local i = 0 + local n = 0 + loop(i < text.size()) { if text.substring(i,i+1) == "," { n = n + 1 } i = i + 1 } + return n + } + _itoa(n) { + // simple positive int to string + if n == 0 { return "0" } + local s = "" + local x = n + loop(x > 0) { + local d = x % 10 + s = (d == 0 ? "0" : d == 1 ? "1" : d == 2 ? "2" : d == 3 ? "3" : d == 4 ? "4" : d == 5 ? "5" : d == 6 ? "6" : d == 7 ? "7" : d == 8 ? "8" : "9") + s + x = (x - d) / 10 + } + return s + } +} + diff --git a/lang/src/compiler/builder/rewrite/special.hako b/lang/src/compiler/builder/rewrite/special.hako new file mode 100644 index 00000000..0ce9bf14 --- /dev/null +++ b/lang/src/compiler/builder/rewrite/special.hako @@ -0,0 +1,8 @@ +// Moved from apps/selfhost-compiler/builder/rewrite/special.hako — Early normalization (no-op scaffold) + +static box RewriteSpecial { + // Normalize to str()/equals() forms where applicable (behavior-preserving). + // Scaffold returns input unchanged; actual rewrite to be added under guard. + apply(stage1_json) { return stage1_json } +} + diff --git a/lang/src/compiler/builder/ssa/cond_inserter.hako b/lang/src/compiler/builder/ssa/cond_inserter.hako new file mode 100644 index 00000000..20195b2c --- /dev/null +++ b/lang/src/compiler/builder/ssa/cond_inserter.hako @@ -0,0 +1,196 @@ +// cond_inserter.hako — minimal JSON-string helper to ensure branch(cond) has copy +// Scope: smoke/dev helper(文字列JSON限定、構造的なLocalSSAとは別ライン) + +using "lang/src/shared/json/json_cursor.hako" as JsonCursorBox + +static box CondInserter { + ensure_cond(mjson) { + if mjson == null { return "" } + local out = me._ensure_copy( + mjson, + "\"op\":\"branch\"", + "\"cond\":", + "\"instructions\":[", + "\"op\":\"copy\"", + "\"", + "\"", + "\"" + ) + if out != null { return out } + out = me._ensure_copy( + mjson, + "\\\"op\\\":\\\"branch\\\"", + "\\\"cond\\\":", + "\\\"instructions\\\":[", + "\\\"op\\\":\\\"copy\\\"", + "\\\"", + "\\\"", + "\\\"" + ) + if out != null { return out } + return mjson + } + + _ensure_copy(mjson, branch_pat, cond_pat, instr_pat, copy_pat, quote, key_quote, value_quote) { + local branch_idx = me._index_of_from(mjson, branch_pat, 0) + if branch_idx < 0 { return null } + local cond_idx = me._index_of_from(mjson, cond_pat, branch_idx) + if cond_idx < 0 { return null } + local cond_start + cond_start = cond_idx + 7 + local cond_digits + cond_digits = me._digits_from(mjson, cond_start) + if cond_digits == "" || cond_digits == null { return null } + local cond_value = me._digits_to_int(cond_digits) + local inst_idx = me._last_index_before(mjson, instr_pat, branch_idx) + if inst_idx < 0 { return null } + local segment = me._substr(mjson, inst_idx, branch_idx) + if me._index_of_from(segment, copy_pat, 0) >= 0 { return mjson } + local insert_pos + insert_pos = inst_idx + 17 + local copy_entry = "{" + key_quote + "op" + key_quote + ":" + value_quote + "copy" + value_quote + "," + + key_quote + "dst" + key_quote + ":" + me._int_to_str(cond_value) + "," + + key_quote + "src" + key_quote + ":" + me._int_to_str(cond_value) + "}" + local glue = "" + local next_ch = me._peek_non_ws(mjson, insert_pos) + if next_ch != "]" && next_ch != "" { glue = "," } + return me._substr(mjson, 0, insert_pos) + copy_entry + glue + me._substr(mjson, insert_pos, 999999) + } + + _index_of_from(text, needle, pos) { + if text == null || needle == null { return -1 } + local i + i = pos + loop(i < 1000) { + local text_ch + text_ch = me._substr(text, i, i + 1) + if text_ch == "" { return -1 } + local j + j = 0 + local found + found = 1 + loop(j < 50) { + local nc + nc = me._substr(needle, j, j + 1) + if nc == "" { break } + local tc + tc = me._substr(text, i + j, i + j + 1) + if tc != nc { + found = 0 + break + } + j = j + 1 + } + if found == 1 { return i } + i = i + 1 + } + return -1 + } + + _last_index_before(text, needle, limit) { + local last = -1 + local start = 0 + loop(true) { + local pos = me._index_of_from(text, needle, start) + if pos < 0 || pos >= limit { break } + last = pos + start = pos + 1 + } + return last + } + + _peek_non_ws(text, pos) { + local i = pos + local limit = pos + 100 + loop(i < limit) { + local ch = me._substr(text, i, i + 1) + if ch == "" { return "" } + if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { i = i + 1 } else { return ch } + } + return "" + } + + _digits_to_int(digits) { + local n = me._strlen(digits) + if n == 0 { return 0 } + local i = 0 + local acc = 0 + loop(i < n) { + local ch = me._substr(digits, i, i + 1) + acc = acc * 10 + me._digit_value(ch) + i = i + 1 + } + return acc + } + + _digit_value(ch) { + if ch == "0" { return 0 } + if ch == "1" { return 1 } + if ch == "2" { return 2 } + if ch == "3" { return 3 } + if ch == "4" { return 4 } + if ch == "5" { return 5 } + if ch == "6" { return 6 } + if ch == "7" { return 7 } + if ch == "8" { return 8 } + return 9 + } + + _int_to_str(n) { + if n == 0 { return "0" } + local value = n + local sign = "" + if value < 0 { sign = "-" value = 0 - value } + local out = "" + loop(value > 0) { + local digit = value % 10 + out = me._digit_char(digit) + out + value = value / 10 + } + return sign + out + } + + _digit_char(d) { + if d == 0 { return "0" } + if d == 1 { return "1" } + if d == 2 { return "2" } + if d == 3 { return "3" } + if d == 4 { return "4" } + if d == 5 { return "5" } + if d == 6 { return "6" } + if d == 7 { return "7" } + if d == 8 { return "8" } + return "9" + } + + _digits_from(text, start_pos) { + if text == null { return "" } + local ch + ch = me._substr(text, start_pos, start_pos + 1) + if ch >= "0" && ch <= "9" { return ch } + if ch == "-" { + local ch2 + ch2 = me._substr(text, start_pos + 1, start_pos + 2) + if ch2 >= "0" && ch2 <= "9" { return ch + ch2 } + } + return "" + } + + _strlen(text) { + if text == null { return 0 } + local i = 0 + loop(me._substr(text, i, i + 1) != "") { + i = i + 1 + } + return i + } + + _substr(text, start, finish) { + if text == null { return "" } + local s = start + local e = finish + if s < 0 { s = 0 } + if e < s { e = s } + return call("String.substring/2", text, s, e) + } +} diff --git a/lang/src/compiler/builder/ssa/local.hako b/lang/src/compiler/builder/ssa/local.hako new file mode 100644 index 00000000..e11ffd3d --- /dev/null +++ b/lang/src/compiler/builder/ssa/local.hako @@ -0,0 +1,98 @@ +// Moved from apps/selfhost-compiler/builder/ssa/local.hako — LocalSSA minimal(安全:挙動不変) +// 目的: +// - Stage‑1 JSON / MIR(JSON v0) のうち、既知の最小パターンで +// 「ブロック内にオペランドが材化されている(LocalSSA)」ことを軽く検証・整形する。 +// - 既定では再配線(Copy挿入)等は行わず、将来の拡張に備えた安全な足場のみを提供する。 + +using "lang/src/shared/common/string_helpers.hako" as StringHelpers +static box LocalSSA { + // --- 内部ヘルパ --- + // trace fields are created on demand (dynamic properties) + _index_of(hay, needle) { return hay.indexOf(needle) } + _last_index_of(hay, needle) { return hay.lastIndexOf(needle) } + _index_of_from(hay, needle, pos) { + // Delegate to StringHelpers with adjusted argument order + return StringHelpers.index_of(hay, pos, needle) + } + _read_digits(text, pos) { return StringHelpers.read_digits(text, pos) } + _count_occurs(hay, needle) { + if hay == null { return 0 } + if needle == null { return 0 } + local n = 0 + local i = 0 + loop(true) { + local p = hay.indexOf(needle) + if p < 0 { break } + n = n + 1 + hay = hay.substring(p + needle.size(), hay.size()) + } + return n + } + _to_int(digits) { + // Delegate to StringHelpers (handles digits-only strings correctly) + return StringHelpers.to_i64(digits) + } + _seek_obj_start(text, from_pos) { + local i = from_pos + local start = from_pos + loop(i >= 0) { + local ch = text.substring(i, i+1) + if ch == "{" { start = i break } + i = i - 1 + } + return start + } + _seek_obj_end(text, obj_start) { + local i = obj_start + local depth = 0 + loop(true) { + local ch = text.substring(i, i+1) + if ch == "" { break } + if ch == "{" { depth = depth + 1 } else { if ch == "}" { depth = depth - 1 } } + if depth == 0 { return i + 1 } + i = i + 1 + } + return i + } + _block_insts_start(mjson, before_pos) { + // find the nearest instructions array start that encloses before_pos + local prefix = mjson.substring(0, before_pos) + local p = prefix.lastIndexOf("\"instructions\":[") + if p < 0 { return -1 } + return p + 16 // len of "\"instructions\":[" + } + _block_insts_end(mjson, insts_start) { + // matching bracket for this instructions array + local i = insts_start + local depth = 1 + loop(true) { + local ch = mjson.substring(i, i+1) + if ch == "" { break } + if ch == "[" { depth = depth + 1 } else { if ch == "]" { depth = depth - 1 } } + if depth == 0 { return i } + i = i + 1 + } + return i + } + _max_dst_id(mjson) { + local pos = 0 + local maxv = 0 + loop(true) { + local p = me._index_of_from(mjson, "\"dst\":", pos) + if p < 0 { break } + local digits = me._read_digits(mjson, p + 6) + if digits != "" { + local acc = me._to_int(digits) + if acc > maxv { maxv = acc } + } + pos = p + 6 + } + return maxv + } + // --- 公開API(安全No-Opルール) --- + ensure_recv(stage1_json) { return stage1_json } + ensure_args(stage1_json) { return stage1_json } + ensure_cmp(stage1_json) { return stage1_json } + ensure_cond(stage1_json) { return stage1_json } + ensure_calls(stage1_json) { return stage1_json } +} diff --git a/lang/src/compiler/builder/ssa/loopssa.hako b/lang/src/compiler/builder/ssa/loopssa.hako new file mode 100644 index 00000000..c3173248 --- /dev/null +++ b/lang/src/compiler/builder/ssa/loopssa.hako @@ -0,0 +1,8 @@ +// Moved from apps/selfhost-compiler/builder/ssa/loopssa.hako — Loop SSA scaffold (no-op) + +static box LoopSSA { + // Guard PHI-like merges at loop headers/exits (future work). + // For now, pass-through to keep behavior unchanged. + stabilize_merges(stage1_json) { return stage1_json } +} + diff --git a/lang/src/compiler/debug/debug_box.hako b/lang/src/compiler/debug/debug_box.hako new file mode 100644 index 00000000..ed422e66 --- /dev/null +++ b/lang/src/compiler/debug/debug_box.hako @@ -0,0 +1,39 @@ +// DebugBox — conditional debug output aggregator (migrated to lang/src/compiler) +box DebugBox { + enabled + birth() { + me.enabled = 0 + return 0 + } + set_enabled(v) { + me.enabled = v + return 0 + } + log(msg) { + if me.enabled { + local c = new ConsoleBox() + c.println("[DEBUG] " + msg) + } + return 0 + } + info(msg) { + if me.enabled { + local c = new ConsoleBox() + c.println("[INFO] " + msg) + } + return 0 + } + error(msg) { + if me.enabled { + local c = new ConsoleBox() + c.println("[ERROR] " + msg) + } + return 0 + } +} + +// Include stub to satisfy current include lowering (expects a static box) +static box DebugStub { + main(args) { return 0 } +} + diff --git a/lang/src/compiler/emit/common/call_emit_box.hako b/lang/src/compiler/emit/common/call_emit_box.hako new file mode 100644 index 00000000..0a15d049 --- /dev/null +++ b/lang/src/compiler/emit/common/call_emit_box.hako @@ -0,0 +1,51 @@ +// call_emit_box.hako — CallEmitBox: construct MIR(JSON v0) nodes for call-family ops +// Responsibility: return MapBox nodes for call/boxcall/mir_call variants. +// Notes: +// - Args are expected as an ArrayBox of integer register ids. +// - Callers may optionally attach (string like "[1,2]") for legacy rebuild paths. + +using "lang/src/shared/common/string_helpers.hako" as StringHelpers + +static box CallEmitBox { + make_call(name, arg_ids, dst) { + return {op: "call", name: name, args: arg_ids, dst: dst} + } + + make_boxcall(method, recv_id, arg_ids, dst) { + return {op: "boxcall", method: method, recv: recv_id, args: arg_ids, dst: dst} + } + + // MIR v1 experimental variants + make_mir_call_global(name, arg_ids, dst) { + local callee = {type: "Global", name: name} + return {op: "mir_call", dst: dst, callee: callee, args: arg_ids} + } + make_mir_call_extern(name, arg_ids, dst) { + local callee = {type: "Extern", name: name} + return {op: "mir_call", dst: dst, callee: callee, args: arg_ids} + } + + make_mir_call_method(method, recv_id, arg_ids, dst) { + local callee = {type: "Method", method: method, receiver: recv_id} + return {op: "mir_call", dst: dst, callee: callee, args: arg_ids} + } + + make_mir_call_module(name, arg_ids, dst) { + local canon = me._canonical_module_name(name, arg_ids.size()) + local callee = {type: "ModuleFunction", name: canon} + return {op: "mir_call", dst: dst, callee: callee, args: arg_ids} + } + + make_mir_call_constructor(box_type, arg_ids, dst) { + local callee = {type: "Constructor", box_type: box_type} + return {op: "mir_call", dst: dst, callee: callee, args: arg_ids} + } + + _canonical_module_name(name, arity) { + if name == null { return "" } + if name.indexOf("/") >= 0 { return name } + return name + "/" + StringHelpers.int_to_str(arity) + } +} + +static box CallEmitBoxStub { main(args) { return 0 } } diff --git a/lang/src/compiler/emit/common/header_emit_box.hako b/lang/src/compiler/emit/common/header_emit_box.hako new file mode 100644 index 00000000..ce023e0d --- /dev/null +++ b/lang/src/compiler/emit/common/header_emit_box.hako @@ -0,0 +1,23 @@ +// header_emit_box.hako — HeaderEmitBox: thin helpers to build MIR(JSON v0) headers +// Responsibility: construct minimal Maps for block/function/module headers consistently. + +static box HeaderEmitBox { + make_block(id, insts) { + if id == null { id = 0 } + if insts == null { insts = new ArrayBox() } + return {id: id, instructions: insts} + } + + make_function_main(blocks) { + if blocks == null { blocks = new ArrayBox() } + return {name: "main", params: [], blocks: blocks} + } + + make_module_with_functions(fns) { + if fns == null { fns = new ArrayBox() } + return {functions: fns} + } +} + +static box HeaderEmitBoxStub { main(args) { return 0 } } + diff --git a/lang/src/compiler/emit/common/json_emit_box.hako b/lang/src/compiler/emit/common/json_emit_box.hako new file mode 100644 index 00000000..b697b224 --- /dev/null +++ b/lang/src/compiler/emit/common/json_emit_box.hako @@ -0,0 +1,12 @@ +// json_emit_box.hako — JsonEmitBox: minimal helpers for JSON emission +// Responsibility: provide a thin, stable facade for JSON.stringify and small append patterns. + +static box JsonEmitBox { + to_json(node) { + // First‑class JSON stringify (normalized in builder). Fallback to .toJSON when available. + return JSON.stringify(node) + } +} + +static box JsonEmitBoxStub { main(args) { return 0 } } + diff --git a/lang/src/compiler/emit/common/mir_emit_box.hako b/lang/src/compiler/emit/common/mir_emit_box.hako new file mode 100644 index 00000000..8b963f33 --- /dev/null +++ b/lang/src/compiler/emit/common/mir_emit_box.hako @@ -0,0 +1,14 @@ +// mir_emit_box.hako — MirEmitBox: construct minimal MIR(JSON v0) instruction maps +// Responsibility: return MapBox-style literals for common MIR ops. + +static box MirEmitBox { + make_const(dst, val) { return { op:"const", dst:dst, value:{ type:"i64", value:val } } } + make_compare(kind, lhs, rhs, dst) { return { op:"compare", cmp:kind, lhs:lhs, rhs:rhs, dst:dst } } + make_copy(dst, src) { return { op:"copy", dst:dst, src:src } } + make_branch(cond, then_id, else_id) { return { op:"branch", cond:cond, then:then_id, else_id:else_id } } + make_jump(target) { return { op:"jump", target:target } } + make_ret(val) { return { op:"ret", value:val } } +} + +static box MirEmitBoxStub { main(args) { return 0 } } + diff --git a/lang/src/compiler/emit/common/newbox_emit_box.hako b/lang/src/compiler/emit/common/newbox_emit_box.hako new file mode 100644 index 00000000..b8730e82 --- /dev/null +++ b/lang/src/compiler/emit/common/newbox_emit_box.hako @@ -0,0 +1,35 @@ +// newbox_emit_box.hako — NewBoxEmitBox: construct MIR(JSON v0) node for newbox +// Responsibility: return MapBox node for { op: newbox, box_type, args, dst }. +using "apps/lib/json_native/stringify.hako" as JSON + +static box NewBoxEmitBox { + make_new(box_type, arg_ids, dst) { + return {op: "newbox", box_type: box_type, args: arg_ids, dst: dst} + } + + // Optional: attach original ids_text snapshot for rebuild compatibility + set_args_text(node, args_text) { + if node == null { return null } + node.set("args_text", args_text) + return node + } + + // Overload: accept args as ArrayBox and also record args_text as JSON string + with_args_array(node, arg_ids) { + if node == null { return null } + if arg_ids == null { arg_ids = new ArrayBox() } + node.set("args", arg_ids) + node.set("args_text", JSON.stringify_array(arg_ids)) + return node + } + + // Overload: accept args as text; keep args as-is + with_args_text(node, args_text) { + if node == null { return null } + node.set("args_text", args_text) + return node + } +} + +static box NewBoxEmitBoxStub { main(args) { return 0 } } + diff --git a/lang/src/compiler/emit/mir_emitter_box.hako b/lang/src/compiler/emit/mir_emitter_box.hako new file mode 100644 index 00000000..c1b1903f --- /dev/null +++ b/lang/src/compiler/emit/mir_emitter_box.hako @@ -0,0 +1,179 @@ +// MirEmitterBox — Minimal MIR JSON v0 emitter (M2 MVP) +// Scope: Return(Int) only (const + ret). Safe default to 0 when not found. +// Future: add Binary/Compare/ExternCall/BoxCall lowering incrementally. + +using selfhost.common.json.mir_builder_min as MirJsonBuilderMin +using "apps/lib/json_native/stringify.hako" as JSON +using "lang/src/shared/common/string_helpers.hako" as StringHelpers + +static box MirEmitterBox { + _index_of(hay, needle) { return hay.indexOf(needle) } + _index_of_from(hay, needle, pos) { + // Delegate to StringHelpers with adjusted argument order + return StringHelpers.index_of(hay, pos, needle) + } + _read_digits(text, pos) { return StringHelpers.read_digits(text, pos) } + _str_to_int(s) { return StringHelpers.to_i64(s) } + + _map_binop(op) { + if op == "Add" || op == "+" { return "Add" } + if op == "Sub" || op == "-" { return "Sub" } + if op == "Mul" || op == "*" { return "Mul" } + if op == "Div" || op == "/" { return "Div" } + if op == "Mod" || op == "%" { return "Mod" } + return "" + } + _map_cmp(op) { + if op == "Eq" || op == "==" { return "Eq" } + if op == "Ne" || op == "!=" { return "Ne" } + if op == "Lt" || op == "<" { return "Lt" } + if op == "Le" || op == "<=" { return "Le" } + if op == "Gt" || op == ">" { return "Gt" } + if op == "Ge" || op == ">=" { return "Ge" } + return "" + } + + // Try to synthesize MIR from a tiny subset of Stage‑1 JSON patterns. + // Supported (best-effort): Return(BinOp(Int,Int)), Return(Compare(Int,Int)) + // Note: Compare emits minimal control-flow (branch/jump) to exercise CFG. + _emit_from_stage1_try(ast_json) { + if ast_json == null { return null } + // Return(BinOp(...)) + local p = me._index_of(ast_json, "\"type\":\"Return\"") + if p >= 0 { + local q = me._index_of_from(ast_json, "\"type\":\"BinOp\"", p) + if q >= 0 { + // op + local opk_pos = me._index_of_from(ast_json, "\"op\":\"", q) + if opk_pos >= 0 { + local opk_end = me._index_of_from(ast_json, "\"", opk_pos + 6) + local opk = ast_json.substring(opk_pos + 6, opk_end) + local kind = me._map_binop(opk) + if kind != "" { + // lhs/rhs int (seek dynamic start after '"value":') + local lhsp = me._index_of_from(ast_json, "\"lhs\":{\"type\":\"Int\",\"value\":", q) + local rhsp = me._index_of_from(ast_json, "\"rhs\":{\"type\":\"Int\",\"value\":", q) + if lhsp >= 0 && rhsp >= 0 { + local lval = me._index_of_from(ast_json, "\"value\":", lhsp) + local rval = me._index_of_from(ast_json, "\"value\":", rhsp) + if lval >= 0 && rval >= 0 { + local lhs_digits = me._read_digits(ast_json, lval + 8) + local rhs_digits = me._read_digits(ast_json, rval + 8) + if lhs_digits != "" && rhs_digits != "" { + local a = me._str_to_int(lhs_digits) + local b = me._str_to_int(rhs_digits) + // MIR via builder: const a -> 1, const b -> 2, binop kind 1,2 -> 3, ret 3 + local j = MirJsonBuilderMin.make() + |> MirJsonBuilderMin.start_module() + |> MirJsonBuilderMin.start_function("main") + |> MirJsonBuilderMin.start_block(0) + |> MirJsonBuilderMin.add_const(1, a) + |> MirJsonBuilderMin.add_const(2, b) + |> MirJsonBuilderMin.add_binop(kind, 1, 2, 3) + |> MirJsonBuilderMin.add_ret(3) + |> MirJsonBuilderMin.end_all() + |> MirJsonBuilderMin.to_string() + return j + } + } + } + } + } + // Return(Compare(...)) → CFG (entry→then/else→merge) + q = me._index_of_from(ast_json, "\"type\":\"Compare\"", p) + if q >= 0 { + local opk_pos = me._index_of_from(ast_json, "\"op\":\"", q) + if opk_pos >= 0 { + local opk_end = me._index_of_from(ast_json, "\"", opk_pos + 6) + local opk = ast_json.substring(opk_pos + 6, opk_end) + local kind = me._map_cmp(opk) + if kind != "" { + local lhsp = me._index_of_from(ast_json, "\"lhs\":{\"type\":\"Int\",\"value\":", q) + local rhsp = me._index_of_from(ast_json, "\"rhs\":{\"type\":\"Int\",\"value\":", q) + if lhsp >= 0 && rhsp >= 0 { + local lval = me._index_of_from(ast_json, "\"value\":", lhsp) + local rval = me._index_of_from(ast_json, "\"value\":", rhsp) + if lval >= 0 && rval >= 0 { + local lhs_digits = me._read_digits(ast_json, lval + 8) + local rhs_digits = me._read_digits(ast_json, rval + 8) + if lhs_digits != "" && rhs_digits != "" { + local a = me._str_to_int(lhs_digits) + local b = me._str_to_int(rhs_digits) + // MIR CFG via declarative Map/Array + stringify + local mir = { + functions: [{ + name: "main", + params: [], + blocks: [ + { id: 0, instructions: [ + { op:"const", dst:1, value:{ type:"i64", value:a } }, + { op:"const", dst:2, value:{ type:"i64", value:b } }, + { op:"compare", cmp: kind, lhs:1, rhs:2, dst:3 }, + { op:"branch", cond:3, "then":1, "else":2 } + ] + }, + { id: 1, instructions: [ + { op:"const", dst:6, value:{ type:"i64", value:1 } }, + { op:"jump", target:3 } + ] + }, + { id: 2, instructions: [ + { op:"const", dst:6, value:{ type:"i64", value:0 } }, + { op:"jump", target:3 } + ] + }, + { id: 3, instructions: [ { op:"ret", value:6 } ] } + ] + }] + } + return JSON.stringify_map(mir) + } + } + } + } + } + } + } + return null + } + // Extract first Return(Int) value from Stage-1 JSON (very small string scan) + _extract_return_int(ast_json) { + if ast_json == null { return 0 } + // Look for '"type":"Return"' + local p = ast_json.lastIndexOf("\"type\":\"Return\"") + if p < 0 { p = ast_json.indexOf("\"type\":\"Return\"") } + if p < 0 { return 0 } + // From there, search for '"type":"Int","value":' + local q = ast_json.indexOf("\"type\":\"Int\",\"value\":", p) + if q < 0 { return 0 } + q = q + 23 // length of the marker + // Use StringHelpers for digit reading and parsing + local digits = StringHelpers.read_digits(ast_json, q) + if digits.size() == 0 { return 0 } + return StringHelpers.to_i64(digits) + } + + // Build minimal MIR JSON v0: main with const -> ret + emit_mir_min(ast_json) { + // Try richer patterns first; fallback to Return(Int) + local try_rich = me._emit_from_stage1_try(ast_json) + if try_rich != null { return try_rich } + // Extract Return(Int) value (DRY: delegate to helper) + local retv = me._extract_return_int(ast_json) + // Declarative: Map/Array リテラル + JSON.stringify(読みやすさ優先・挙動同一) + local mir = { + functions: [{ + name: "main", + params: [], + blocks: [{ + id: 0, + instructions: [ + { op: "const", dst: 1, value: { type: "i64", value: retv } }, + { op: "ret", value: 1 } + ] + }] + }] + } + return JSON.stringify_map(mir) + } +} diff --git a/lang/src/compiler/entry/compiler.hako b/lang/src/compiler/entry/compiler.hako new file mode 100644 index 00000000..31117c23 --- /dev/null +++ b/lang/src/compiler/entry/compiler.hako @@ -0,0 +1,25 @@ +// Compiler entry (MVP) +// - When invoked with --min-json, emit minimal Program JSON v0 to stdout +// - Otherwise, act as a silent placeholder (return 0) + +static box CompilerEntry { + main(args) { + // Detect --min-json flag + local emit = 0 + if args != null { + local n = args.length() + local i = 0 + loop(i < n) { + local a = args.get(i) + // Robust compare: coerce to string before equality + local s = "" + a + if s == "--min-json" { emit = 1 break } + i = i + 1 + } + } + if emit == 1 { + print("{\\\"version\\\":0,\\\"kind\\\":\\\"Program\\\",\\\"body\\\":[{\\\"type\\\":\\\"Return\\\",\\\"expr\\\":{\\\"type\\\":\\\"Int\\\",\\\"value\\\":7}}]}") + } + return 0 + } +} diff --git a/lang/src/compiler/hako_module.toml b/lang/src/compiler/hako_module.toml new file mode 100644 index 00000000..3f738ac0 --- /dev/null +++ b/lang/src/compiler/hako_module.toml @@ -0,0 +1,37 @@ +[module] +name = "lang.compiler" +version = "1.0.0" + +[exports] +# Parser +parser.box = "parser/parser_box.hako" +parser.scan.parser_string_utils_box = "parser/scan/parser_string_utils_box.hako" +parser.scan.parser_ident_scan_box = "parser/scan/parser_ident_scan_box.hako" +parser.scan.parser_number_scan_box = "parser/scan/parser_number_scan_box.hako" +parser.scan.parser_string_scan_box = "parser/scan/parser_string_scan_box.hako" +parser.using.using_collector_box = "parser/using/using_collector_box.hako" +parser.expr.parser_expr_box = "parser/expr/parser_expr_box.hako" +parser.expr.parser_peek_box = "parser/expr/parser_peek_box.hako" +parser.expr.parser_literal_box = "parser/expr/parser_literal_box.hako" +parser.stmt.parser_stmt_box = "parser/stmt/parser_stmt_box.hako" +parser.stmt.parser_control_box = "parser/stmt/parser_control_box.hako" +parser.stmt.parser_exception_box = "parser/stmt/parser_exception_box.hako" + +# Stage‑1 emit helpers +stage1.json_program_box = "stage1/json_program_box.hako" +stage1.emitter_box = "stage1/emitter_box.hako" + +# Pipeline v2 (emit only) +pipeline_v2.flow_entry = "pipeline_v2/flow_entry.hako" +pipeline_v2.pipeline = "pipeline_v2/pipeline.hako" +pipeline_v2.using_resolver = "pipeline_v2/using_resolver_box.hako" + +# Builder / SSA / Rewrite (scaffolds) +builder.ssa.local = "builder/ssa/local.hako" +builder.ssa.loop = "builder/ssa/loopssa.hako" +builder.ssa.cond_inserter = "builder/ssa/cond_inserter.hako" +builder.rewrite.special = "builder/rewrite/special.hako" +builder.rewrite.known = "builder/rewrite/known.hako" + +[dependencies] +"selfhost.shared" = "^1.0.0" diff --git a/lang/src/compiler/parser/expr/parser_expr_box.hako b/lang/src/compiler/parser/expr/parser_expr_box.hako new file mode 100644 index 00000000..776202ce --- /dev/null +++ b/lang/src/compiler/parser/expr/parser_expr_box.hako @@ -0,0 +1,355 @@ +// Moved from apps/selfhost-compiler/boxes/parser/expr/parser_expr_box.hako +// ParserExprBox — expression parser coordinator +// Responsibility: Parse expressions and delegate to specialized boxes +// API: parse(src, i, ctx) -> JSON (delegates to parse_expr2) + +using lang.compiler.parser.scan.parser_number_scan_box +using lang.compiler.parser.expr.parser_peek_box +using lang.compiler.parser.expr.parser_literal_box + +static box ParserExprBox { + parse_number2(src, i, ctx) { + local pair = ParserNumberScanBox.scan_int(src, i) + local at = pair.lastIndexOf("@") + local json = pair.substring(0, at) + local pos = i + if at >= 0 { pos = ctx.to_int(pair.substring(at+1, pair.size())) } + ctx.gpos_set(pos) + return json + } + + parse_string2(src, i, ctx) { + local n = src.size() + local j = i + 1 + local out = "" + local guard = 0 + local max = 200000 + + loop(j < n) { + if guard > max { break } + guard = guard + 1 + local ch = src.substring(j, j+1) + + if ch == "\"" { + j = j + 1 + ctx.gpos_set(j) + return "{\"type\":\"Str\",\"value\":\"" + ctx.esc_json(out) + "\"}" + } + + if ch == "\\" && j + 1 < n { + local nx = src.substring(j+1, j+2) + if nx == "\"" { out = out + "\"" j = j + 2 } + else { if nx == "\\" { out = out + "\\" j = j + 2 } + else { if nx == "n" { out = out + "\n" j = j + 2 } + else { if nx == "r" { out = out + "\r" j = j + 2 } + else { if nx == "t" { out = out + "\t" j = j + 2 } + else { if nx == "u" && j + 5 < n { out = out + src.substring(j, j+6) j = j + 6 } + else { out = out + nx j = j + 2 } } } } } } + } else { + out = out + ch + j = j + 1 + } + } + + ctx.gpos_set(j) + return "{\"type\":\"Str\",\"value\":\"" + ctx.esc_json(out) + "\"}" + } + + parse_factor2(src, i, ctx) { + local j = ctx.skip_ws(src, i) + if j >= src.size() { + ctx.gpos_set(j) + return "{\"type\":\"Int\",\"value\":0}" + } + + if ctx.starts_with_kw(src, j, "true") == 1 { + ctx.gpos_set(j + 4) + return "{\"type\":\"Bool\",\"value\":true}" + } + + if ctx.starts_with_kw(src, j, "false") == 1 { + ctx.gpos_set(j + 5) + return "{\"type\":\"Bool\",\"value\":false}" + } + + if ctx.starts_with_kw(src, j, "null") == 1 { + ctx.gpos_set(j + 4) + return "{\"type\":\"Null\"}" + } + + // Peek expression: delegate to ParserPeekBox + if ctx.starts_with_kw(src, j, "peek") == 1 { + j = j + 4 + return ParserPeekBox.parse(src, j, ctx) + } + + local ch = src.substring(j, j+1) + + // Parenthesized + if ch == "(" { + local inner = me.parse_expr2(src, j + 1, ctx) + local k = ctx.gpos_get() + k = ctx.skip_ws(src, k) + if src.substring(k, k+1) == ")" { k = k + 1 } + ctx.gpos_set(k) + return inner + } + + // String literal + if ch == "\"" { + return me.parse_string2(src, j, ctx) + } + + // Map literal: delegate to ParserLiteralBox + if ch == "{" { + return ParserLiteralBox.parse_map(src, j, ctx) + } + + // Array literal: delegate to ParserLiteralBox + if ch == "[" { + return ParserLiteralBox.parse_array(src, j, ctx) + } + + // new Class(args) + if ctx.starts_with_kw(src, j, "new") == 1 { + local p = ctx.skip_ws(src, j + 3) + local idp = ctx.read_ident2(src, p) + local at = idp.lastIndexOf("@") + local cls = idp.substring(0, at) + local k = ctx.to_int(idp.substring(at+1, idp.size())) + k = ctx.skip_ws(src, k) + if src.substring(k, k+1) == "(" { k = k + 1 } + local args_and_pos = me.parse_args2(src, k, ctx) + local at2 = args_and_pos.lastIndexOf("@") + local args_json = args_and_pos.substring(0, at2) + k = ctx.to_int(args_and_pos.substring(at2+1, args_and_pos.size())) + k = ctx.skip_ws(src, k) + if src.substring(k, k+1) == ")" { k = k + 1 } + ctx.gpos_set(k) + return "{\"type\":\"New\",\"class\":\"" + cls + "\",\"args\":" + args_json + "}" + } + + // Identifier / Call / Method chain + if ctx.is_alpha(ch) { + local idp = ctx.read_ident2(src, j) + local at = idp.lastIndexOf("@") + local name = idp.substring(0, at) + local k = ctx.to_int(idp.substring(at+1, idp.size())) + local node = "{\"type\":\"Var\",\"name\":\"" + name + "\"}" + local cont2 = 1 + + loop(cont2 == 1) { + k = ctx.skip_ws(src, k) + local tch = src.substring(k, k+1) + + if tch == "(" { + k = k + 1 + local args_and_pos = me.parse_args2(src, k, ctx) + local at2 = args_and_pos.lastIndexOf("@") + local args_json = args_and_pos.substring(0, at2) + k = ctx.to_int(args_and_pos.substring(at2+1, args_and_pos.size())) + k = ctx.skip_ws(src, k) + if src.substring(k, k+1) == ")" { k = k + 1 } + node = "{\"type\":\"Call\",\"name\":\"" + name + "\",\"args\":" + args_json + "}" + } else { + if tch == "." { + k = k + 1 + k = ctx.skip_ws(src, k) + local midp = ctx.read_ident2(src, k) + local at3 = midp.lastIndexOf("@") + local mname = midp.substring(0, at3) + k = ctx.to_int(midp.substring(at3+1, midp.size())) + k = ctx.skip_ws(src, k) + if src.substring(k, k+1) == "(" { k = k + 1 } + local args2 = me.parse_args2(src, k, ctx) + local at4 = args2.lastIndexOf("@") + local args_json2 = args2.substring(0, at4) + k = ctx.to_int(args2.substring(at4+1, args2.size())) + k = ctx.skip_ws(src, k) + if src.substring(k, k+1) == ")" { k = k + 1 } + node = "{\"type\":\"Method\",\"recv\":" + node + ",\"method\":\"" + mname + "\",\"args\":" + args_json2 + "}" + } else { + cont2 = 0 + } + } + } + + ctx.gpos_set(k) + return node + } + + // Fallback: number + return me.parse_number2(src, j, ctx) + } + + parse_unary2(src, i, ctx) { + local j = ctx.skip_ws(src, i) + if src.substring(j, j+1) == "-" { + local rhs = me.parse_factor2(src, j + 1, ctx) + j = ctx.gpos_get() + local zero = "{\"type\":\"Int\",\"value\":0}" + ctx.gpos_set(j) + return "{\"type\":\"Binary\",\"op\":\"-\",\"lhs\":" + zero + ",\"rhs\":" + rhs + "}" + } + return me.parse_factor2(src, j, ctx) + } + + parse_term2(src, i, ctx) { + local lhs = me.parse_unary2(src, i, ctx) + local j = ctx.gpos_get() + local cont = 1 + + loop(cont == 1) { + j = ctx.skip_ws(src, j) + if j >= src.size() { + cont = 0 + } else { + local op = src.substring(j, j+1) + if op != "*" && op != "/" { + cont = 0 + } else { + local rhs = me.parse_unary2(src, j+1, ctx) + j = ctx.gpos_get() + lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" + } + } + } + + ctx.gpos_set(j) + return lhs + } + + parse_sum2(src, i, ctx) { + local lhs = me.parse_term2(src, i, ctx) + local j = ctx.gpos_get() + local cont = 1 + + loop(cont == 1) { + j = ctx.skip_ws(src, j) + if j >= src.size() { + cont = 0 + } else { + local op = src.substring(j, j+1) + if op != "+" && op != "-" { + cont = 0 + } else { + local rhs = me.parse_term2(src, j+1, ctx) + j = ctx.gpos_get() + lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" + } + } + } + + ctx.gpos_set(j) + return lhs + } + + parse_compare2(src, i, ctx) { + local lhs = me.parse_sum2(src, i, ctx) + local j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + local two = src.substring(j, j+2) + local one = src.substring(j, j+1) + local op = "" + + if two == "==" || two == "!=" || two == "<=" || two == ">=" { + op = two + j = j + 2 + } else { + if one == "<" || one == ">" { + op = one + j = j + 1 + } + } + + if op == "" { + ctx.gpos_set(j) + return lhs + } + + local rhs = me.parse_sum2(src, j, ctx) + j = ctx.gpos_get() + ctx.gpos_set(j) + return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" + } + + parse_expr2(src, i, ctx) { + local lhs = me.parse_compare2(src, i, ctx) + local j = ctx.gpos_get() + local cont = 1 + + loop(cont == 1) { + j = ctx.skip_ws(src, j) + local two = src.substring(j, j+2) + if two != "&&" && two != "||" { + cont = 0 + } else { + local rhs = me.parse_compare2(src, j+2, ctx) + j = ctx.gpos_get() + lhs = "{\"type\":\"Logical\",\"op\":\"" + two + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}" + } + } + + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == "?" { + j = j + 1 + j = ctx.skip_ws(src, j) + local then_expr = me.parse_expr2(src, j, ctx) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == ":" { j = j + 1 } + j = ctx.skip_ws(src, j) + local else_expr = me.parse_expr2(src, j, ctx) + j = ctx.gpos_get() + if else_expr.size() == 0 { else_expr = "{\"type\":\"Int\",\"value\":0}" } + ctx.gpos_set(j) + return "{\"type\":\"Ternary\",\"cond\":" + lhs + ",\"then\":" + then_expr + ",\"else\":" + else_expr + "}" + } + + ctx.gpos_set(j) + return lhs + } + + parse_args2(src, i, ctx) { + local j = ctx.skip_ws(src, i) + local n = src.size() + local out = "[" + j = ctx.skip_ws(src, j) + + if j < n && src.substring(j, j+1) == ")" { + return "[]@" + ctx.i2s(j) + } + + // first argument + local e = me.parse_expr2(src, j, ctx) + j = ctx.gpos_get() + out = out + e + + // subsequent arguments with guard + local cont_args = 1 + local guard = 0 + local max = 100000 + + loop(cont_args == 1) { + if guard > max { cont_args = 0 } else { guard = guard + 1 } + local before = j + j = ctx.skip_ws(src, j) + + if j < n && src.substring(j, j+1) == "," { + j = j + 1 + j = ctx.skip_ws(src, j) + e = me.parse_expr2(src, j, ctx) + j = ctx.gpos_get() + out = out + "," + e + } else { + cont_args = 0 + } + + if j == before { cont_args = 0 } + } + + out = out + "]" + return out + "@" + ctx.i2s(j) + } +} + diff --git a/lang/src/compiler/parser/expr/parser_literal_box.hako b/lang/src/compiler/parser/expr/parser_literal_box.hako new file mode 100644 index 00000000..d0a9c382 --- /dev/null +++ b/lang/src/compiler/parser/expr/parser_literal_box.hako @@ -0,0 +1,119 @@ +// Moved from apps/selfhost-compiler/boxes/parser/expr/parser_literal_box.hako +// ParserLiteralBox — Map/Array literal parser +// Responsibility: Parse Map {"k": v, ...} and Array [e1, e2, ...] literals +// API: parse_map(src, i, ctx) -> JSON, parse_array(src, i, ctx) -> JSON + +static box ParserLiteralBox { + // Map literal: {"k": v, ...} (string keys only) → Call{name:"map.of", args:[Str(k1), v1, Str(k2), v2, ...]} + parse_map(src, i, ctx) { + local n = src.size() + local j = i + 1 // skip opening '{' + local out = "[" + local first = 1 + local cont = 1 + local guard = 0 + local max = 400000 + + loop(cont == 1) { + if guard > max { cont = 0 } else { guard = guard + 1 } + j = ctx.skip_ws(src, j) + + if j >= n { + cont = 0 + } else { + if src.substring(j, j+1) == "}" { + j = j + 1 + cont = 0 + } else { + // key (string only for Stage-2) + if src.substring(j, j+1) != "\"" { + // degrade by skipping one char to avoid infinite loop + j = j + 1 + continue + } + + local key_raw = ctx.read_string_lit(src, j) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == ":" { j = j + 1 } + j = ctx.skip_ws(src, j) + local val_json = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + local key_json = "{\"type\":\"Str\",\"value\":\"" + ctx.esc_json(key_raw) + "\"}" + + if first == 1 { + out = out + key_json + "," + val_json + first = 0 + } else { + out = out + "," + key_json + "," + val_json + } + + // optional comma + local before2 = j + j = ctx.skip_ws(src, j) + if j < n && src.substring(j, j+1) == "," { j = j + 1 } + + // progress guard (in case of malformed input) + if j <= before2 { + if j < n { j = j + 1 } else { j = n } + } + } + } + } + + out = out + "]" + ctx.gpos_set(j) + return "{\"type\":\"Call\",\"name\":\"map.of\",\"args\":" + out + "}" + } + + // Array literal: [e1, e2, ...] → Call{name:"array.of", args:[...]} + parse_array(src, i, ctx) { + local n = src.size() + local j = i + 1 // skip opening '[' + local out = "[" + local first = 1 + local cont = 1 + local guard = 0 + local max = 400000 + + loop(cont == 1) { + if guard > max { cont = 0 } else { guard = guard + 1 } + j = ctx.skip_ws(src, j) + + if j >= n { + cont = 0 + } else { + if src.substring(j, j+1) == "]" { + j = j + 1 + cont = 0 + } else { + local before = j + local ej = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + + if first == 1 { + out = out + ej + first = 0 + } else { + out = out + "," + ej + } + + // optional comma+whitespace + local before2 = j + j = ctx.skip_ws(src, j) + if j < n && src.substring(j, j+1) == "," { j = j + 1 } + + // progress guard + if j <= before { + if j < n { j = j + 1 } else { j = n } + } + } + } + } + + out = out + "]" + ctx.gpos_set(j) + return "{\"type\":\"Call\",\"name\":\"array.of\",\"args\":" + out + "}" + } +} + diff --git a/lang/src/compiler/parser/expr/parser_peek_box.hako b/lang/src/compiler/parser/expr/parser_peek_box.hako new file mode 100644 index 00000000..2d78aca8 --- /dev/null +++ b/lang/src/compiler/parser/expr/parser_peek_box.hako @@ -0,0 +1,104 @@ +// Moved from apps/selfhost-compiler/boxes/parser/expr/parser_peek_box.hako +// ParserPeekBox — peek expression parser (peek { "label" => , ..., else => }) +// Responsibility: Parse peek expressions (pattern-matching syntax) +// API: parse(src, i, ctx) -> JSON string + +static box ParserPeekBox { + parse(src, i, ctx) { + // ctx is ParserBox for delegation + local j = i + local n = src.size() + + // Parse scrutinee expression + local scr = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + + // Enter arms block + if src.substring(j, j+1) == "{" { j = j + 1 } + j = ctx.skip_ws(src, j) + + local arms_json = "[" + local first_arm = 1 + local else_json = null + local contp = 1 + local guardp = 0 + local maxp = 400000 + + loop(contp == 1) { + if guardp > maxp { contp = 0 } else { guardp = guardp + 1 } + j = ctx.skip_ws(src, j) + + if j >= n { + contp = 0 + } else { + if src.substring(j, j+1) == "}" { + j = j + 1 + contp = 0 + } else { + // else arm or labeled arm + if ctx.starts_with_kw(src, j, "else") == 1 { + j = j + 4 + j = ctx.skip_ws(src, j) + if src.substring(j, j+2) == "=>" { j = j + 2 } + j = ctx.skip_ws(src, j) + + // else body may be a block or bare expr + if src.substring(j, j+1) == "{" { + j = j + 1 + j = ctx.skip_ws(src, j) + else_json = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == "}" { j = j + 1 } + } else { + else_json = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + } + } else { + // labeled arm: string literal label + if src.substring(j, j+1) != "\"" { + // degrade safely to avoid infinite loop + j = j + 1 + continue + } + + local label_raw = ctx.read_string_lit(src, j) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + if src.substring(j, j+2) == "=>" { j = j + 2 } + j = ctx.skip_ws(src, j) + + // arm expr: block or bare expr + local expr_json = "{\"type\":\"Int\",\"value\":0}" + if src.substring(j, j+1) == "{" { + j = j + 1 + j = ctx.skip_ws(src, j) + expr_json = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == "}" { j = j + 1 } + } else { + expr_json = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + } + + local arm_json = "{\"label\":\"" + ctx.esc_json(label_raw) + "\",\"expr\":" + expr_json + "}" + if first_arm == 1 { + arms_json = arms_json + arm_json + first_arm = 0 + } else { + arms_json = arms_json + "," + arm_json + } + } + } + } + } + + arms_json = arms_json + "]" + if else_json == null { else_json = "{\"type\":\"Null\"}" } + ctx.gpos_set(j) + return "{\"type\":\"Peek\",\"scrutinee\":" + scr + ",\"arms\":" + arms_json + ",\"else\":" + else_json + "}" + } +} + diff --git a/lang/src/compiler/parser/parser_box.hako b/lang/src/compiler/parser/parser_box.hako new file mode 100644 index 00000000..3ea7300e --- /dev/null +++ b/lang/src/compiler/parser/parser_box.hako @@ -0,0 +1,239 @@ +// Moved from apps/selfhost-compiler/boxes/parser/parser_box.hako +// ParserBox — Stage‑1 JSON v0 generator (coordinator, delegates to specialized boxes) +// Responsibility: Coordinate parsing, manage state, delegate to specialized boxes +// API: parse_program2(src) -> JSON + +using lang.compiler.parser.scan.parser_string_utils_box +using lang.compiler.parser.scan.parser_ident_scan_box +using lang.compiler.parser.scan.parser_string_scan_box +using lang.compiler.parser.using.using_collector_box +using lang.compiler.parser.expr.parser_expr_box +using lang.compiler.parser.stmt.parser_stmt_box +using lang.compiler.parser.stmt.parser_control_box + +box ParserBox { + gpos + usings_json + stage3 + + birth() { + me.gpos = 0 + me.usings_json = "[]" + me.stage3 = 0 + return 0 + } + + stage3_enable(flag) { + if flag == null { flag = 0 } + if flag == 0 { me.stage3 = 0 } else { me.stage3 = 1 } + return 0 + } + + stage3_enabled() { + if me.stage3 == 1 { return 1 } + return 0 + } + + // === State management === + gpos_set(i) { me.gpos = i return 0 } + gpos_get() { return me.gpos } + + // === JSON utilities === + esc_json(s) { + local out = "" + local i = 0 + local n = s.size() + loop(i < n) { + local ch = s.substring(i, i+1) + if ch == "\\" { out = out + "\\\\" } + else { if ch == "\"" { out = out + "\\\"" } + else { out = out + ch } } + i = i + 1 + } + return out + } + + // === Delegation to ParserStringUtilsBox === + is_digit(ch) { return ParserStringUtilsBox.is_digit(ch) } + + is_space(ch) { return ParserStringUtilsBox.is_space(ch) } + + is_alpha(ch) { return ParserStringUtilsBox.is_alpha(ch) } + + starts_with(src, i, pat) { return ParserStringUtilsBox.starts_with(src, i, pat) } + + index_of(src, i, pat) { return ParserStringUtilsBox.index_of(src, i, pat) } + + trim(s) { return ParserStringUtilsBox.trim(s) } + + starts_with_kw(src, i, kw) { return ParserStringUtilsBox.starts_with_kw(src, i, kw) } + + i2s(v) { return ParserStringUtilsBox.i2s(v) } + + to_int(s) { return ParserStringUtilsBox.to_int(s) } + + skip_ws(src, i) { return ParserStringUtilsBox.skip_ws(src, i) } + + // === Delegation to scanner boxes === + read_ident2(src, i) { return ParserIdentScanBox.scan_ident(src, i) } + + read_string_lit(src, i) { + local pair = ParserStringScanBox.scan(src, i) + local at = pair.lastIndexOf("@") + local content = pair.substring(0, at) + local pos = 0 + if at >= 0 { pos = me.to_int(pair.substring(at+1, pair.size())) } + else { pos = i } + me.gpos_set(pos) + return content + } + + // === using system === + add_using(kind, target, alias) { + local cur = me.usings_json + if cur == null || cur.size() == 0 { cur = "[]" } + + local name = "" + local path = null + + if kind == "path" { + path = target + if alias != null { + name = alias + } else { + local p = target + local idx = -1 + local t = 0 + loop(t < p.size()) { + if p.substring(t,t+1) == "/" { idx = t } + t = t + 1 + } + if idx >= 0 { p = p.substring(idx+1, p.size()) } + + if p.size() > 5 && me.starts_with(p, p.size()-5, ".hako") == 1 { + p = p.substring(0, p.size()-5) + } else { + if p.size() > 6 && me.starts_with(p, p.size()-6, ".nyash") == 1 { + p = p.substring(0, p.size()-6) + } + } + name = p + } + } else { + name = target + if alias != null { name = alias } + } + + local entry = "{\"name\":\"" + me.esc_json(name) + "\"" + if path != null { entry = entry + ",\"path\":\"" + me.esc_json(path) + "\"" } + entry = entry + "}" + + if cur == "[]" { + me.usings_json = "[" + entry + "]" + return 0 + } + + local pos = cur.lastIndexOf("]") + if pos < 0 { + me.usings_json = "[" + entry + "]" + return 0 + } + + me.usings_json = cur.substring(0, pos) + "," + entry + "]" + return 0 + } + + extract_usings(src) { + me.usings_json = UsingCollectorBox.collect(src) + return 0 + } + + get_usings_json() { + return me.usings_json + } + + // === Delegation to ParserExprBox === + parse_expr2(src, i) { + local expr = new ParserExprBox() + return expr.parse_expr2(src, i, me) + } + + // === Delegation to ParserStmtBox === + parse_stmt2(src, i) { + local stmt = new ParserStmtBox() + return stmt.parse(src, i, me) + } + + // === Delegation to ParserControlBox === + parse_block2(src, i) { + local ctrl = new ParserControlBox() + return ctrl.parse_block(src, i, me) + } + + // === Top-level program parser === + parse_program2(src) { + local i = me.skip_ws(src, 0) + local body = "[" + local first = 1 + local cont_prog = 1 + + loop(cont_prog == 1) { + i = me.skip_ws(src, i) + + if i >= src.size() { + cont_prog = 0 + } else { + local start_i = i + local s = me.parse_stmt2(src, i) + i = me.gpos_get() + + // Progress guard + if i <= start_i { + if i < src.size() { i = i + 1 } + else { i = src.size() } + me.gpos_set(i) + } + + // consume optional semicolons + local done2 = 0 + local guard2 = 0 + local max2 = 100000 + + loop(done2 == 0) { + if guard2 > max2 { done2 = 1 } + else { guard2 = guard2 + 1 } + + local before2 = i + i = me.skip_ws(src, i) + + if i < src.size() && src.substring(i, i+1) == ";" { + i = i + 1 + } else { + done2 = 1 + } + + if i == before2 { done2 = 1 } + } + + if s.size() > 0 { + if first == 1 { + body = body + s + first = 0 + } else { + body = body + "," + s + } + } + } + } + + body = body + "]" + return "{\"version\":0,\"kind\":\"Program\",\"body\":" + body + "}" + } +} + +static box ParserStub { + main(args) { + return 0 + } +} + diff --git a/lang/src/compiler/parser/scan/parser_common_utils_box.hako b/lang/src/compiler/parser/scan/parser_common_utils_box.hako new file mode 100644 index 00000000..3b6d03f7 --- /dev/null +++ b/lang/src/compiler/parser/scan/parser_common_utils_box.hako @@ -0,0 +1,72 @@ +// Moved from apps/selfhost-compiler/boxes/parser/scan/parser_common_utils_box.hako +// ParserCommonUtilsBox — shared utility functions for parser boxes +// Responsibility: Provide common string/character operations used across parser components +// Notes: Pure utility functions; no state, no dependencies + +static box ParserCommonUtilsBox { + // ===== 数値・文字列変換 ===== + + i2s(v) { return "" + v } + + // ===== 文字判定 ===== + + is_alpha(ch) { + return (ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_" + } + + is_digit(ch) { + return ch >= "0" && ch <= "9" + } + + // ===== 文字列検索・操作 ===== + + dq() { return "\"" } + + starts_with(src, i, pat) { + local n = src.size() + local m = pat.size() + if i + m > n { return 0 } + local k = 0 + loop(k < m) { + if src.substring(i + k, i + k + 1) != pat.substring(k, k + 1) { return 0 } + k = k + 1 + } + return 1 + } + + index_of(src, i, pat) { + local n = src.size() + local m = pat.size() + if m == 0 { return i } + local j = i + loop(j + m <= n) { + if me.starts_with(src, j, pat) { return j } + j = j + 1 + } + return -1 + } + + trim(s) { + local i = 0 + local n = s.size() + loop(i < n && (s.substring(i,i+1) == " " || s.substring(i,i+1) == "\t")) { i = i + 1 } + local j = n + loop(j > i && (s.substring(j-1,j) == " " || s.substring(j-1,j) == "\t" || s.substring(j-1,j) == ";")) { j = j - 1 } + return s.substring(i, j) + } + + esc_json(s) { + local out = "" + local i = 0 + local n = s.size() + loop(i < n) { + local ch = s.substring(i, i+1) + if ch == "\\" { out = out + "\\\\" } + else { if ch == "\"" { out = out + "\\\"" } + else { out = out + ch } } + i = i + 1 + } + return out + } +} + diff --git a/lang/src/compiler/parser/scan/parser_ident_scan_box.hako b/lang/src/compiler/parser/scan/parser_ident_scan_box.hako new file mode 100644 index 00000000..1f27ccc5 --- /dev/null +++ b/lang/src/compiler/parser/scan/parser_ident_scan_box.hako @@ -0,0 +1,22 @@ +// Moved from apps/selfhost-compiler/boxes/parser/scan/parser_ident_scan_box.hako +using lang.compiler.parser.scan.parser_string_utils_box as ParserStringUtilsBox + +static box ParserIdentScanBox { + scan_ident(src, i) { + local j = i + local n = src.size() + if j >= n { return "@" + ParserStringUtilsBox.i2s(i) } + // first char: alpha or '_' + local ch = src.substring(j, j+1) + if !(ParserStringUtilsBox.is_alpha(ch) || ch == "_") { return "@" + ParserStringUtilsBox.i2s(i) } + j = j + 1 + loop(j < n) { + local ch2 = src.substring(j, j+1) + if ParserStringUtilsBox.is_alpha(ch2) || ParserStringUtilsBox.is_digit(ch2) || ch2 == "_" { j = j + 1 } + else { break } + } + local s = src.substring(i, j) + return s + "@" + ParserStringUtilsBox.i2s(j) + } +} + diff --git a/lang/src/compiler/parser/scan/parser_number_scan_box.hako b/lang/src/compiler/parser/scan/parser_number_scan_box.hako new file mode 100644 index 00000000..0278fdf3 --- /dev/null +++ b/lang/src/compiler/parser/scan/parser_number_scan_box.hako @@ -0,0 +1,26 @@ +// Moved from apps/selfhost-compiler/boxes/parser/scan/parser_number_scan_box.hako +// ParserNumberScanBox — scan integer literal starting at index i +// Returns: "{\"type\":\"Int\",\"value\":}@" + +using lang.compiler.parser.scan.parser_common_utils_box as Utils + +static box ParserNumberScanBox { + scan_int(src, i) { + if src == null { return "{\"type\":\"Int\",\"value\":0}@" + Utils.i2s(i) } + local n = src.size() + local j = i + local cont = 1 + local guard = 0 + local max = 100000 + loop(cont == 1) { + if guard > max { cont = 0 } else { guard = guard + 1 } + if j < n { + if Utils.is_digit(src.substring(j, j+1)) { j = j + 1 } else { cont = 0 } + } else { cont = 0 } + } + local s = src.substring(i, j) + if s.size() == 0 { s = "0" } + return "{\"type\":\"Int\",\"value\":" + s + "}@" + Utils.i2s(j) + } +} + diff --git a/lang/src/compiler/parser/scan/parser_string_scan_box.hako b/lang/src/compiler/parser/scan/parser_string_scan_box.hako new file mode 100644 index 00000000..528d9f94 --- /dev/null +++ b/lang/src/compiler/parser/scan/parser_string_scan_box.hako @@ -0,0 +1,50 @@ +// Moved from apps/selfhost-compiler/boxes/parser/scan/parser_string_scan_box.hako +// ParserStringScanBox — string literal scanner (escape-aware) +// Responsibility: read a Ny string literal starting at index i (i points to '"'). +// Returns: "@" where is the index after the closing quote. +// Notes: pure string scanning; no external deps. + +using lang.compiler.parser.scan.parser_common_utils_box as Utils + +static box ParserStringScanBox { + scan(src, i) { + if src == null { return "@" + Utils.i2s(i) } + local n = src.size() + local j = i + if j >= n || src.substring(j, j+1) != "\"" { return "@" + Utils.i2s(i) } + j = j + 1 + local out = "" + local guard = 0 + local max = 200000 + loop(j < n) { + if guard > max { break } else { guard = guard + 1 } + local ch = src.substring(j, j+1) + if ch == "\"" { + j = j + 1 + return out + "@" + Utils.i2s(j) + } + if ch == "\\" && j + 1 < n { + local nx = src.substring(j+1, j+2) + if nx == "\"" { out = out + "\"" j = j + 2 } + else { + if nx == "\\" { out = out + "\\" j = j + 2 } else { + if nx == "n" { out = out + "\n" j = j + 2 } else { + if nx == "r" { out = out + "\n" j = j + 2 } else { + if nx == "t" { out = out + "\t" j = j + 2 } else { + if nx == "u" && j + 5 < n { out = out + src.substring(j, j+6) j = j + 6 } + else { out = out + nx j = j + 2 } + } + } + } + } + } + } else { + out = out + ch + j = j + 1 + } + } + // if unterminated, return what we have and the last pos to avoid infinite loops + return out + "@" + Utils.i2s(j) + } +} + diff --git a/lang/src/compiler/parser/scan/parser_string_utils_box.hako b/lang/src/compiler/parser/scan/parser_string_utils_box.hako new file mode 100644 index 00000000..acdd2091 --- /dev/null +++ b/lang/src/compiler/parser/scan/parser_string_utils_box.hako @@ -0,0 +1,20 @@ +// Moved from apps/selfhost-compiler/boxes/parser/scan/parser_string_utils_box.hako +// ParserStringUtilsBox — Delegation to StringHelpers (unified string utilities) +// Responsibility: Backward compatibility wrapper for parser code +// Notes: All functionality now provided by apps/selfhost/common/string_helpers.hako + +using "lang/src/shared/common/string_helpers.hako" as StringHelpers + +static box ParserStringUtilsBox { + // Delegate all methods to StringHelpers (centralized implementation) + i2s(v) { return StringHelpers.int_to_str(v) } + is_digit(ch) { return StringHelpers.is_digit(ch) } + is_space(ch) { return StringHelpers.is_space(ch) } + is_alpha(ch) { return StringHelpers.is_alpha(ch) } + starts_with(src, i, pat) { return StringHelpers.starts_with(src, i, pat) } + starts_with_kw(src, i, kw) { return StringHelpers.starts_with_kw(src, i, kw) } + index_of(src, i, pat) { return StringHelpers.index_of(src, i, pat) } + trim(s) { return StringHelpers.trim(s) } + to_int(s) { return StringHelpers.to_i64(s) } + skip_ws(src, i) { return StringHelpers.skip_ws(src, i) } +} diff --git a/lang/src/compiler/parser/stmt/parser_control_box.hako b/lang/src/compiler/parser/stmt/parser_control_box.hako new file mode 100644 index 00000000..0711e827 --- /dev/null +++ b/lang/src/compiler/parser/stmt/parser_control_box.hako @@ -0,0 +1,173 @@ +// Moved from apps/selfhost-compiler/boxes/parser/stmt/parser_control_box.hako +// ParserControlBox — if/loop/break/continue parser +// Responsibility: Parse control flow statements +// API: parse_if, parse_loop, parse_break, parse_continue, parse_block + +static box ParserControlBox { + // Parse: if (cond) { ... } else { ... } + parse_if(src, i, stmt_start, ctx) { + local j = i + 2 // skip "if" + j = ctx.skip_ws(src, j) + local paren = 0 + if src.substring(j, j+1) == "(" { paren = 1 j = j + 1 } + + local cond = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + + if paren == 1 { + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == ")" { j = j + 1 } + } + + j = ctx.skip_ws(src, j) + local then_res = me.parse_block(src, j, ctx) + local at1 = then_res.lastIndexOf("@") + local then_json = then_res.substring(0, at1) + j = ctx.to_int(then_res.substring(at1+1, then_res.size())) + j = ctx.skip_ws(src, j) + + local else_json = null + if ctx.starts_with_kw(src, j, "else") == 1 { + j = j + 4 + j = ctx.skip_ws(src, j) + local else_res = me.parse_block(src, j, ctx) + local at2 = else_res.lastIndexOf("@") + else_json = else_res.substring(0, at2) + j = ctx.to_int(else_res.substring(at2+1, else_res.size())) + } + + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + + if else_json == null { + return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + "}" + } else { + return "{\"type\":\"If\",\"cond\":" + cond + ",\"then\":" + then_json + ",\"else\":" + else_json + "}" + } + } + + // Parse: loop(cond) { ... } + parse_loop(src, i, stmt_start, ctx) { + local j = i + 4 // skip "loop" + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == "(" { j = j + 1 } + + local cond = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == ")" { j = j + 1 } + j = ctx.skip_ws(src, j) + + local body_res = me.parse_block(src, j, ctx) + local at3 = body_res.lastIndexOf("@") + local body_json = body_res.substring(0, at3) + j = ctx.to_int(body_res.substring(at3+1, body_res.size())) + + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Loop\",\"cond\":" + cond + ",\"body\":" + body_json + "}" + } + + // Parse: break → {type:"Break"} (Stage-3) or no-op + parse_break(src, i, stmt_start, ctx) { + local j = i + 5 // skip "break" + + if ctx.stage3_enabled() == 1 { + j = ctx.skip_ws(src, j) + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Break\"}" + } + + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}" + } + + // Parse: continue → {type:"Continue"} (Stage-3) or no-op + parse_continue(src, i, stmt_start, ctx) { + local j = i + 8 // skip "continue" + + if ctx.stage3_enabled() == 1 { + j = ctx.skip_ws(src, j) + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Continue\"}" + } + + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}" + } + + // Parse: { stmt1; stmt2; ... } + parse_block(src, i, ctx) { + local j = ctx.skip_ws(src, i) + if src.substring(j, j+1) != "{" { return "[]@" + ctx.i2s(j) } + j = j + 1 + + local body = "[" + local first = 1 + local cont_block = 1 + + loop(cont_block == 1) { + j = ctx.skip_ws(src, j) + + if j >= src.size() { + cont_block = 0 + } else { + if src.substring(j, j+1) == "}" { + j = j + 1 + cont_block = 0 + } else { + local start_j = j + local s = ctx.parse_stmt2(src, j) + j = ctx.gpos_get() + + // Progress guard: ensure forward movement to avoid infinite loop on malformed input + if j <= start_j { + if j < src.size() { j = j + 1 } else { j = src.size() } + ctx.gpos_set(j) + } + + // consume optional semicolons (ASI minimal) + local done = 0 + local guard = 0 + local max = 100000 + loop(done == 0) { + if guard > max { done = 1 } else { guard = guard + 1 } + local before = j + j = ctx.skip_ws(src, j) + if j < src.size() && src.substring(j, j+1) == ";" { j = j + 1 } else { done = 1 } + if j == before { done = 1 } + } + + if s.size() > 0 { + if first == 1 { + body = body + s + first = 0 + } else { + body = body + "," + s + } + } + } + } + } + + body = body + "]" + return body + "@" + ctx.i2s(j) + } +} + diff --git a/lang/src/compiler/parser/stmt/parser_exception_box.hako b/lang/src/compiler/parser/stmt/parser_exception_box.hako new file mode 100644 index 00000000..a3e45f57 --- /dev/null +++ b/lang/src/compiler/parser/stmt/parser_exception_box.hako @@ -0,0 +1,152 @@ +// Moved from apps/selfhost-compiler/boxes/parser/stmt/parser_exception_box.hako +// ParserExceptionBox — try/catch/throw parser +// Responsibility: Parse exception handling constructs +// API: parse_try(src, i, ctx) -> JSON, parse_throw(src, i, ctx) -> JSON + +static box ParserExceptionBox { + // Parse: throw expr → {type:"Throw", expr:...} (Stage-3) or {type:"Expr", expr:...} (fallback) + parse_throw(src, i, stmt_start, ctx) { + local j = i + 5 // skip "throw" + j = ctx.skip_ws(src, j) + local e_throw = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + + if ctx.stage3_enabled() == 1 { + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Throw\",\"expr\":" + e_throw + "}" + } + + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Expr\",\"expr\":" + e_throw + "}" + } + + // Parse: try { ... } (catch ...)* (cleanup { ... })? → {type:"Try", ...} (Stage-3) or no-op + parse_try(src, i, stmt_start, ctx) { + local j = i + 3 // skip "try" + j = ctx.skip_ws(src, j) + + // parse try block + local try_res = ctx.parse_block2(src, j) + local at_t = try_res.lastIndexOf("@") + local try_json = try_res.substring(0, at_t) + j = ctx.to_int(try_res.substring(at_t+1, try_res.size())) + + local catches_json = "[" + local catch_first = 1 + + // zero or more catch + local guard_ct = 0 + local max_ct = 100 + local cont_ct = 1 + loop(cont_ct == 1) { + if guard_ct > max_ct { cont_ct = 0 } else { guard_ct = guard_ct + 1 } + j = ctx.skip_ws(src, j) + + if ctx.starts_with_kw(src, j, "catch") == 1 { + j = j + 5 + j = ctx.skip_ws(src, j) + local catch_type = null + local catch_param = null + + if src.substring(j, j+1) == "(" { + j = j + 1 + j = ctx.skip_ws(src, j) + + // optional type + name + if ctx.is_alpha(src.substring(j, j+1)) { + local id1 = ctx.read_ident2(src, j) + local at1 = id1.lastIndexOf("@") + catch_type = id1.substring(0, at1) + j = ctx.to_int(id1.substring(at1+1, id1.size())) + j = ctx.skip_ws(src, j) + } + + if ctx.is_alpha(src.substring(j, j+1)) { + local id2 = ctx.read_ident2(src, j) + local at2 = id2.lastIndexOf("@") + catch_param = id2.substring(0, at2) + j = ctx.to_int(id2.substring(at2+1, id2.size())) + j = ctx.skip_ws(src, j) + } + + if src.substring(j, j+1) == ")" { j = j + 1 } + } + + j = ctx.skip_ws(src, j) + + // catch body + local c_res = ctx.parse_block2(src, j) + local atc = c_res.lastIndexOf("@") + j = ctx.to_int(c_res.substring(atc+1, c_res.size())) + + if ctx.stage3_enabled() == 1 { + local entry = "{" + local wrote = 0 + + if catch_param != null && catch_param.size() > 0 { + entry = entry + "\"param\":\"" + ctx.esc_json(catch_param) + "\"" + wrote = 1 + } + + if catch_type != null && catch_type.size() > 0 { + if wrote == 1 { entry = entry + "," } + entry = entry + "\"typeHint\":\"" + ctx.esc_json(catch_type) + "\"" + wrote = 1 + } + + local body_json = c_res.substring(0, atc) + if wrote == 1 { entry = entry + "," } + entry = entry + "\"body\":" + body_json + "}" + + if catch_first == 0 { + catches_json = catches_json + "," + entry + } else { + catches_json = catches_json + entry + catch_first = 0 + } + } + } else { + cont_ct = 0 + } + } + + catches_json = catches_json + "]" + + // optional cleanup + j = ctx.skip_ws(src, j) + local finally_json = null + + if ctx.starts_with_kw(src, j, "cleanup") == 1 { + j = j + 7 + j = ctx.skip_ws(src, j) + local f_res = ctx.parse_block2(src, j) + local atf = f_res.lastIndexOf("@") + j = ctx.to_int(f_res.substring(atf+1, f_res.size())) + finally_json = f_res.substring(0, atf) + } + + if ctx.stage3_enabled() == 1 { + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + local node = "{\"type\":\"Try\",\"try\":" + try_json + ",\"catches\":" + catches_json + if finally_json != null { node = node + ",\"finally\":" + finally_json } + node = node + "}" + return node + } + + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Expr\",\"expr\":{\"type\":\"Int\",\"value\":0}}" + } +} + diff --git a/lang/src/compiler/parser/stmt/parser_stmt_box.hako b/lang/src/compiler/parser/stmt/parser_stmt_box.hako new file mode 100644 index 00000000..db415f7b --- /dev/null +++ b/lang/src/compiler/parser/stmt/parser_stmt_box.hako @@ -0,0 +1,202 @@ +// Moved from apps/selfhost-compiler/boxes/parser/stmt/parser_stmt_box.hako +// ParserStmtBox — statement parser coordinator +// Responsibility: Parse statements and delegate to specialized boxes +// API: parse(src, i, ctx) -> JSON + +using lang.compiler.parser.stmt.parser_control_box +using lang.compiler.parser.stmt.parser_exception_box + +static box ParserStmtBox { + parse(src, i, ctx) { + local j = ctx.skip_ws(src, i) + local stmt_start = j + + // using statement + if ctx.starts_with_kw(src, j, "using") == 1 { + return me.parse_using(src, j, stmt_start, ctx) + } + + // assignment: IDENT '=' expr + if j < src.size() && ctx.is_alpha(src.substring(j, j+1)) { + local idp0 = ctx.read_ident2(src, j) + local at0 = idp0.lastIndexOf("@") + if at0 > 0 { + local name0 = idp0.substring(0, at0) + local k0 = ctx.to_int(idp0.substring(at0+1, idp0.size())) + k0 = ctx.skip_ws(src, k0) + if k0 < src.size() && src.substring(k0, k0+1) == "=" { + local eq_two = "=" + if k0 + 1 < src.size() { eq_two = src.substring(k0, k0+2) } + if eq_two != "==" { + k0 = k0 + 1 + k0 = ctx.skip_ws(src, k0) + local default_local = "{\"type\":\"Int\",\"value\":0}" + local expr_json0 = default_local + local end_pos0 = k0 + if k0 < src.size() { + local ahead = src.substring(k0, k0+1) + if ahead != "}" && ahead != ";" { + expr_json0 = ctx.parse_expr2(src, k0) + end_pos0 = ctx.gpos_get() + } + } + k0 = end_pos0 + if k0 <= stmt_start { + if k0 < src.size() { k0 = k0 + 1 } else { k0 = src.size() } + } + ctx.gpos_set(k0) + return "{\"type\":\"Local\",\"name\":\"" + name0 + "\",\"expr\":" + expr_json0 + "}" + } + } + } + } + + // return statement + if ctx.starts_with_kw(src, j, "return") == 1 { + j = j + 6 + j = ctx.skip_ws(src, j) + local default_ret = "{\"type\":\"Int\",\"value\":0}" + local expr_json_ret = default_ret + local end_pos_ret = j + if j < src.size() { + local ahead_ret = src.substring(j, j+1) + if ahead_ret != "}" && ahead_ret != ";" { + expr_json_ret = ctx.parse_expr2(src, j) + end_pos_ret = ctx.gpos_get() + } + } + j = end_pos_ret + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Return\",\"expr\":" + expr_json_ret + "}" + } + + // local declaration + if ctx.starts_with_kw(src, j, "local") == 1 { + j = j + 5 + j = ctx.skip_ws(src, j) + local idp = ctx.read_ident2(src, j) + local at = idp.lastIndexOf("@") + local name = idp.substring(0, at) + j = ctx.to_int(idp.substring(at+1, idp.size())) + j = ctx.skip_ws(src, j) + if j < src.size() && src.substring(j, j+1) == "=" { j = j + 1 } + j = ctx.skip_ws(src, j) + local default_local = "{\"type\":\"Int\",\"value\":0}" + local expr_json_local = default_local + local end_pos_local = j + if j < src.size() { + local ahead_local = src.substring(j, j+1) + if ahead_local != "}" && ahead_local != ";" { + expr_json_local = ctx.parse_expr2(src, j) + end_pos_local = ctx.gpos_get() + } + } + j = end_pos_local + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Local\",\"name\":\"" + name + "\",\"expr\":" + expr_json_local + "}" + } + + // Delegate to specialized boxes + if ctx.starts_with_kw(src, j, "if") == 1 { + return ParserControlBox.parse_if(src, j, stmt_start, ctx) + } + + if ctx.starts_with_kw(src, j, "loop") == 1 { + return ParserControlBox.parse_loop(src, j, stmt_start, ctx) + } + + if ctx.starts_with_kw(src, j, "break") == 1 { + return ParserControlBox.parse_break(src, j, stmt_start, ctx) + } + + if ctx.starts_with_kw(src, j, "continue") == 1 { + return ParserControlBox.parse_continue(src, j, stmt_start, ctx) + } + + if ctx.starts_with_kw(src, j, "throw") == 1 { + return ParserExceptionBox.parse_throw(src, j, stmt_start, ctx) + } + + if ctx.starts_with_kw(src, j, "try") == 1 { + return ParserExceptionBox.parse_try(src, j, stmt_start, ctx) + } + + // Fallback: expression or unknown token + local expr_start = j + local e = ctx.parse_expr2(src, j) + j = ctx.gpos_get() + if j <= expr_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "{\"type\":\"Expr\",\"expr\":" + e + "}" + } + + // Parse using statement + parse_using(src, i, stmt_start, ctx) { + local j = i + 5 // skip "using" + j = ctx.skip_ws(src, j) + + if src.substring(j, j+1) == "\"" { + local p = ctx.read_string_lit(src, j) + j = ctx.gpos_get() + j = ctx.skip_ws(src, j) + local alias = null + if ctx.starts_with_kw(src, j, "as") == 1 { + j = j + 2 + j = ctx.skip_ws(src, j) + local idp = ctx.read_ident2(src, j) + local at = idp.lastIndexOf("@") + alias = idp.substring(0, at) + j = ctx.to_int(idp.substring(at+1, idp.size())) + } + ctx.add_using("path", p, alias) + } else { + if ctx.is_alpha(src.substring(j, j+1)) { + local idp = ctx.read_ident2(src, j) + local at = idp.lastIndexOf("@") + local name = idp.substring(0, at) + j = ctx.to_int(idp.substring(at+1, idp.size())) + local cont = 1 + loop(cont == 1) { + j = ctx.skip_ws(src, j) + if src.substring(j, j+1) == "." { + j = j + 1 + j = ctx.skip_ws(src, j) + idp = ctx.read_ident2(src, j) + at = idp.lastIndexOf("@") + name = name + "." + idp.substring(0, at) + j = ctx.to_int(idp.substring(at+1, idp.size())) + } else { + cont = 0 + } + } + j = ctx.skip_ws(src, j) + local alias2 = null + if ctx.starts_with_kw(src, j, "as") == 1 { + j = j + 2 + j = ctx.skip_ws(src, j) + idp = ctx.read_ident2(src, j) + at = idp.lastIndexOf("@") + alias2 = idp.substring(0, at) + j = ctx.to_int(idp.substring(at+1, idp.size())) + } + ctx.add_using("ns", name, alias2) + } + } + + // ensure progress + if j <= stmt_start { + if j < src.size() { j = j + 1 } else { j = src.size() } + } + ctx.gpos_set(j) + return "" + } +} + diff --git a/lang/src/compiler/parser/using/using_collector_box.hako b/lang/src/compiler/parser/using/using_collector_box.hako new file mode 100644 index 00000000..5d9a78b8 --- /dev/null +++ b/lang/src/compiler/parser/using/using_collector_box.hako @@ -0,0 +1,80 @@ +// Moved from apps/selfhost-compiler/boxes/parser/using/using_collector_box.hako +// UsingCollectorBox — line-based `using` extractor (Path/Namespace → JSON array) +// Responsibility: Parse source text line-by-line and produce a JSON array +// [{"name":"AliasOrBase","path":"path/if/any"}, ...] +// Notes: +// - ParserBox.extract_usings delegates to this box (Phase 2 split) +// - Pure string scan(依存ゼロ)。Fail‑Fastはせず、安全にスキップでループを進める + +using lang.compiler.parser.scan.parser_common_utils_box as Utils + +static box UsingCollectorBox { + + // Public API: collect line-based using declarations to JSON array string + collect(src) { + if src == null { return "[]" } + local n = src.size() + local i = 0 + local first = 1 + local out = "[" + loop(i < n) { + // line slice [i, j) + local j = i + loop(j < n && src.substring(j, j+1) != "\n") { j = j + 1 } + local line = src.substring(i, j) + // trim left spaces/tabs + local k = 0 + loop(k < line.size() && (line.substring(k,k+1) == " " || line.substring(k,k+1) == "\t")) { k = k + 1 } + if Utils.starts_with(line, k, "using ") == 1 { + local rest = Utils.trim(line.substring(k + 6, line.size())) + // split on ' as ' + local as_pos = Utils.index_of(rest, 0, " as ") + local target = rest + local alias = null + if as_pos >= 0 { target = Utils.trim(rest.substring(0, as_pos)) alias = Utils.trim(rest.substring(as_pos + 4, rest.size())) } + // path or namespace + local is_path = 0 + if target.size() > 0 { + if Utils.starts_with(target, 0, Utils.dq()) == 1 { is_path = 1 } + if Utils.starts_with(target, 0, "./") == 1 { is_path = 1 } + if Utils.starts_with(target, 0, "/") == 1 { is_path = 1 } + if target.size() >= 5 && Utils.starts_with(target, target.size()-5, ".hako") == 1 { is_path = 1 } + if target.size() >= 6 && Utils.starts_with(target, target.size()-6, ".nyash") == 1 { is_path = 1 } + } + local name = "" + local path = null + if is_path == 1 { + // strip quotes + if Utils.starts_with(target, 0, Utils.dq()) == 1 { + target = target.substring(1, target.size()) + if target.size() > 0 && target.substring(target.size()-1, target.size()) == Utils.dq() { target = target.substring(0, target.size()-1) } + } + path = target + if alias != null { name = alias } else { + // basename + local p = target + local idx = -1 + local t = 0 + loop(t < p.size()) { if p.substring(t,t+1) == "/" { idx = t } t = t + 1 } + if idx >= 0 { p = p.substring(idx+1, p.size()) } + // strip extension + if p.size() > 5 && Utils.starts_with(p, p.size()-5, ".hako") == 1 { p = p.substring(0, p.size()-5) } + else { if p.size() > 6 && Utils.starts_with(p, p.size()-6, ".nyash") == 1 { p = p.substring(0, p.size()-6) } } + name = p + } + } else { + name = target + } + // append entry + if first == 0 { out = out + "," } else { first = 0 } + out = out + "{" + Utils.dq() + "name" + Utils.dq() + ":" + Utils.dq() + Utils.esc_json(name) + Utils.dq() + if path != null { out = out + "," + Utils.dq() + "path" + Utils.dq() + ":" + Utils.dq() + Utils.esc_json(path) + Utils.dq() } + out = out + "}" + } + i = j + 1 + } + out = out + "]" + return out + } +} + diff --git a/lang/src/compiler/pipeline_v2/README.md b/lang/src/compiler/pipeline_v2/README.md new file mode 100644 index 00000000..9e6a15f2 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/README.md @@ -0,0 +1,60 @@ +Pipeline V2 — Box‑First Extract→Emit + +Scope +- Selfhost compilerの emit‑only 経路(Stage‑1 JSON → MIR(JSON v0/v1))を、箱の責務で明確化する。 +- Parser/Resolver/Runtime には影響しない。既定挙動は不変(devフラグ/引数でのみ起動)。 + +Modules(責務) +- compare_extract_box.hako + - 目的: Stage‑1 JSON から Compare(lhs, rhs, op) を堅牢に抽出(整数のみ)。 + - API: + - extract_return_compare_ints(ast_json) -> {cmp,lhs,rhs} | null + - extract_if_compare_ints(ast_json) -> {cmp,lhs,rhs} | null + - 失敗時: null(呼び出し側でフォールバック)。 + +- emit_compare_box.hako + - 目的: Compare の MIR(JSON v0) 生成。 + - API: + - emit_compare_ret(lhs,rhs,cmp,trace) -> JSON v0(compare→ret) + - emit_compare_cfg3(lhs,rhs,cmp,materialize,trace) -> JSON v0(branch/jump/ret; materialize=2でcopy材化想定) + - 失敗時: なし(入力は抽出済み前提)。 + +- pipeline.hako(flow PipelineV2) + - 役割: Extract系→Emit系の配線。Call/Method/New/Compare/If(Compare) を段階的に対応。 + - フラグ: + - prefer_cfg=0: Return‑only(compare→ret) + - prefer_cfg=1: CFG(branch/jump/ret) + - prefer_cfg=2: CFG+材化(将来の copy after PHI を想定; 現状は等価分岐) + - trace: 1で最小トレース([trace])を出力。既定は0(静音)。 + +I/O(最小仕様) +- 入力: Stage‑1 JSON(Return/If/Call/Method/New の最小形)。負数/空白は RegexFlow で吸収。 +- Call/Method/New の `args` 配列は `{"type":"Int","value":…}` のみを許容する。NamedArg/DefaultArg/VarArg など Int 以外が混在した場合は Null を返し、呼び出し側で Fail‑Fast(Stage‑0/Resolver 側で脱糖すること)。 +- 出力: MIR(JSON v0)。将来 v1(MirCall) への直出力は lower_stage1_to_mir_v1 を併設(dev用途)。 + +Fail‑Fast & Fallback +- 抽出箱は見つからない場合 null を返す。pipeline は legacy extractor(Stage1ExtractFlow)でフォールバックする。 +- 既定ONは変えない(dev引数でのみ有効)。 + +Stage Guard(Stage‑2 / Stage‑3) +- Stage‑2: Call/Method/New 向けの emit 手前ガード。Stage‑1 で弾いた NamedArg / DefaultArg / VarArg などが混入した場合は Null で Fail‑Fast し、呼び出し側に返す。 +- Stage‑3: MIR(JSON) 生成器。Stage‑2 の整形結果のみを受理し、`PipelineV2.lower_stage1_to_mir` が null を返した場合は Emit を実行しない。 +- 代表スモーク + - Stage‑1 ガード: `selfhost_pipeline_v2_stage1_invalid_args_fail_vm.sh` / `..._named_default_fail_vm.sh` / `..._vararg_fail_vm.sh` + - Stage‑2/3 正常系: `selfhost_pipeline_v2_call_exec_vm.sh`, `selfhost_pipeline_v2_method_exec_vm.sh`, `selfhost_pipeline_v2_newbox_exec_vm.sh` + +Testing +- quick/selfhost に compare/binop/call/method/new の代表スモークがある。Compare系は Return‑only と CFG をそれぞれ確認。 +- Mini‑VM(apps/selfhost/vm/boxes/mir_vm_min.nyash)は最小仕様。算術/比較/CFGのみのスモークで品質を担保。 + +Notes +- 追加の Extract 箱(Call/Method/New)を段階導入し、Stage1ExtractFlow の責務を縮小する計画。 +- trace は既定OFF。--emit-trace 指定時のみ出力する。CI/quick は既定で静音。 + +WASM 開発ラインとの取り込み方針(注意) +- wasm-development ブランチは独立開発ライン。Selfhost 側には以下のみ注意して取り込む。 + - 共有仕様(MIR JSON 形状、PHI invariants、v1/v0 変換ポリシ)に関するドキュメントの同期。 + - Python LLVM ハーネスの仕様更新点(値配線/PHI 正規化)を docs に反映し、実装取り込みは最小・可逆。 + - Selfhost(本フォルダ)の箱 API/入出力は変更せず、adapter で吸収(MirJsonV1Adapter など)。 + - 実装取り込みは小粒・局所・既定OFFのフラグ配下。quick が緑のままになる粒度で実施。 + - 互換チェックは quick/integration の代表スモークで行い、重い検証は wasm ライン側で継続。 diff --git a/lang/src/compiler/pipeline_v2/README_using_resolver.md b/lang/src/compiler/pipeline_v2/README_using_resolver.md new file mode 100644 index 00000000..ac7f5f99 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/README_using_resolver.md @@ -0,0 +1,33 @@ +UsingResolverBox — Pre‑Resolve Using Declarations + +Scope +- Manage alias→path and alias→namespace name maps for compiler pipeline (P2‑A). +- Consume lightweight JSON emitted by parser (UsingCollectorBox or ParserBox.add_using). +- Provide stable getters for later NamespaceBox (P2‑B) and Emit boxes. + +Responsibilities +- Input forms: + - Path using: [{"name":"Alias","path":"apps/..../file.hako"}, ...] + - Namespace using: [{"name":"selfhost.vm.mir_min"}, ...] or [{"name":"Alias"}] when ParserBox.add_using(ns, alias) is used. +- Output/context: + - alias_paths: Alias → file path + - alias_names: Alias → namespace name (best effort; Alias itself when only ns is known) + - modules_map: NsName → file path (provided by caller; no file IO here) + +Non‑Responsibilities +- Reading hako.toml or filesystem. +- Runtime using resolution. This is compiler‑only pre‑resolution. + +API (box methods) +- load_usings_json(usings_json) +- load_modules_json(modules_json) +- add_ns(alias, ns_name) / add_module(ns_name, path) / add_path(alias, path) +- resolve_path_alias(alias) -> path | null +- resolve_namespace_alias(alias) -> ns_name | null +- resolve_module_path_from_alias(alias) -> path | null +- to_context_json() -> {alias_paths,namespaces,modules} + +Notes +- When UsingCollectorBox is used, namespace entries contain the full name in "name" and no alias. In that case alias_names[alias] will use the same string; callers may still override via add_ns(alias, ns). +- Keep this box pure and side‑effect free for easy testing. + diff --git a/lang/src/compiler/pipeline_v2/alias_preflight_box.hako b/lang/src/compiler/pipeline_v2/alias_preflight_box.hako new file mode 100644 index 00000000..aabf7a7a --- /dev/null +++ b/lang/src/compiler/pipeline_v2/alias_preflight_box.hako @@ -0,0 +1,28 @@ +// alias_preflight_box.hako — AliasPreflightBox +// Responsibility: Early check for using-alias heads in Stage‑1 names. +// Input: raw dotted label (e.g., "alias.something.method"), UsingResolverBox instance. +// Behavior: If dotted, verify alias head is known; otherwise print a stable one-line error and return 0. +// Non‑Responsibility: Namespace normalization or emit. + +using "lang/src/compiler/pipeline_v2/using_resolver_box.hako" as UsingResolverBox +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow + +static box AliasPreflightBox { + // Returns 1 if OK, 0 if alias head is unresolved (prints error). + check_head(raw_label, r_state) { + if raw_label == null { return 1 } + if r_state == null { return 1 } + local s = "" + raw_label + local dot = RegexFlow.find_from(s, ".", 0) + if dot < 0 { return 1 } + local head = s.substring(0, dot) + if head == null || head == "" { return 1 } + if UsingResolverBox.resolve_namespace_alias(r_state, head) == null { + print("[ERROR] Unresolved using alias: " + head) + return 0 + } + return 1 + } +} + +static box AliasPreflightMain { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/backend_box.hako b/lang/src/compiler/pipeline_v2/backend_box.hako new file mode 100644 index 00000000..8b93d02c --- /dev/null +++ b/lang/src/compiler/pipeline_v2/backend_box.hako @@ -0,0 +1,11 @@ +// BackendBox — backend tag holder (no execution in Phase 15.7) +box BackendBox { + name + birth(n) { if n == null { n = "vm" } me.name = n return 0 } + get_name() { return me.name } + // Stub execute: reserved for future; returns 0 for now + execute(_mir_json) { return 0 } +} + +static box BackendStub { main(args) { return 0 } } + diff --git a/lang/src/compiler/pipeline_v2/call_extract_box.hako b/lang/src/compiler/pipeline_v2/call_extract_box.hako new file mode 100644 index 00000000..f45f28ea --- /dev/null +++ b/lang/src/compiler/pipeline_v2/call_extract_box.hako @@ -0,0 +1,12 @@ +// CallExtractBox — Stage‑1 JSON から Return(Call name(args...)) を抽出(整数引数のみ) +// Delegation to Stage1IntArgsExtractBox (unified implementation) +using "lang/src/compiler/pipeline_v2/stage1_int_args_extract_box.hako" as Unified + +static box CallExtractBox { + // Returns { name: String, args: [Int,...] } or null + extract_return_call_ints(ast_json) { + return Unified.extract_return_call_ints(ast_json) + } +} + +static box CallExtractStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/compare_extract_box.hako b/lang/src/compiler/pipeline_v2/compare_extract_box.hako new file mode 100644 index 00000000..867a2327 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/compare_extract_box.hako @@ -0,0 +1,119 @@ +// CompareExtractBox — Stage‑1 JSON から Compare(lhs,rhs,op) を堅牢に抽出(整数のみ) +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +static box CompareExtractBox { + // --- internal helpers (range & safe find) --- + _find_brace_range(s, around_pos) { + // Find object range that contains around_pos: search '{' backwards, then match '}' by depth + if around_pos < 0 { return null } + local start = around_pos + loop (start >= 0) { + local ch = s.substring(start, start+1) + if ch == "{" { break } + start = start - 1 + } + if start < 0 { return null } + local i = start + local depth = 0 + loop(true) { + local ch2 = s.substring(i, i+1) + if ch2 == "" { break } + if ch2 == "{" { depth = depth + 1 } else { if ch2 == "}" { depth = depth - 1 } } + if depth == 0 { return { "from": start, "to": i + 1 } } + i = i + 1 + } + return null + } + _find_in_range(s, needle, start, endp) { + local p = RegexFlow.find_from(s, needle, start) + if p < 0 { return -1 } + if p >= endp { return -1 } + return p + } + // Returns MapBox {"cmp": String, "lhs": Int, "rhs": Int } or null when not found + extract_return_compare_ints(ast_json) { + if ast_json == null { return null } + // Find Return then Compare (whitespace耐性は RegexFlow に委譲) + local rq = RegexFlow.find_from(ast_json, "\"type\":\"Return\"", 0) + if rq < 0 { return null } + local cq = RegexFlow.find_from(ast_json, "\"type\":\"Compare\"", rq) + if cq < 0 { return null } + // op + local opk_pos = RegexFlow.find_from(ast_json, "\"op\":\"", cq) + if opk_pos < 0 { return null } + local opk_end = RegexFlow.find_from(ast_json, "\"", opk_pos + 6) + if opk_end < 0 { return null } + local cmp = ast_json.substring(opk_pos + 6, opk_end) + // lhs/rhs → 各 value の digits を抽出 + local lhsp = RegexFlow.find_from(ast_json, "\"lhs\"", cq) + local rhsp = RegexFlow.find_from(ast_json, "\"rhs\"", cq) + if lhsp < 0 || rhsp < 0 { return null } + local lv = -1 + local rv = -1 + { + local vpos = RegexFlow.find_from(ast_json, "\"value\":", lhsp) + if vpos >= 0 { + local ds = RegexFlow.digits_from(ast_json, vpos + 8) + if ds != "" { lv = RegexFlow.to_int(ds) } + } + } + { + local vpos2 = RegexFlow.find_from(ast_json, "\"value\":", rhsp) + if vpos2 >= 0 { + local ds2 = RegexFlow.digits_from(ast_json, vpos2 + 8) + if ds2 != "" { rv = RegexFlow.to_int(ds2) } + } + } + if lv < 0 || rv < 0 { return null } + // Pack cmp/lhs/rhs into ArrayBox [cmp, lhs, rhs] + return [lv, rv, cmp] + } + + // If(cond=Compare(lhs,rhs,op), then=[Return(Int 1)], else=[Return(Int 0)]) → MapBox {cmp,lhs,rhs} or null + extract_if_compare_ints(ast_json) { + if ast_json == null { return null } + // Find If first + local ip = RegexFlow.find_from(ast_json, "\"type\":\"If\"", 0) + if ip < 0 { return null } + // Find Compare after If + local cq = RegexFlow.find_from(ast_json, "\"type\":\"Compare\"", ip) + if cq < 0 { return null } + // Restrict to the Compare object range to avoid accidental matches + local range = me._find_brace_range(ast_json, cq) + if range == null { return null } + local rs = BoxHelpers.map_get(range, "from") + local re = BoxHelpers.map_get(range, "to") + // TEMP debug + print("DBG:cmp-range=" + ast_json.substring(rs, re)) + // op + local opk_pos = me._find_in_range(ast_json, "\"op\":\"", rs, re) + if opk_pos < 0 { return null } + local opv_start = opk_pos + 6 + local opk_end = me._find_in_range(ast_json, "\"", opv_start, re) + if opk_end < 0 { return null } + local cmp = ast_json.substring(opv_start, opk_end) + print("DBG:cmp-op=" + cmp) + // lhs + local lhsp = me._find_in_range(ast_json, "\"lhs\"", rs, re) + if lhsp < 0 { return null } + local vpos = me._find_in_range(ast_json, "\"value\":", lhsp, re) + if vpos < 0 { return null } + local ds = RegexFlow.digits_from(ast_json, vpos + 8) + if ds == "" { return null } + local lv = RegexFlow.to_int(ds) + print("DBG:lhs=" + (""+lv)) + // rhs + local rhsp = me._find_in_range(ast_json, "\"rhs\"", rs, re) + if rhsp < 0 { return null } + local vpos2 = me._find_in_range(ast_json, "\"value\":", rhsp, re) + if vpos2 < 0 { return null } + local ds2 = RegexFlow.digits_from(ast_json, vpos2 + 8) + if ds2 == "" { return null } + local rv = RegexFlow.to_int(ds2) + print("DBG:rhs=" + (""+rv)) + return [lv, rv, cmp] + } +} + +static box CompareExtractStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/emit_binop_box.hako b/lang/src/compiler/pipeline_v2/emit_binop_box.hako new file mode 100644 index 00000000..e9258aa3 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_binop_box.hako @@ -0,0 +1,33 @@ +// EmitBinopBox — binop の最小 MIR(JSON v0) 生成 +using "lang/src/shared/json/mir_builder_min.hako" as MirJsonBuilderMin + +static box EmitBinopBox { + _map_binop(opk) { + if opk == "+" { return "Add" } + if opk == "-" { return "Sub" } + if opk == "*" { return "Mul" } + if opk == "/" { return "Div" } + if opk == "%" { return "Mod" } + return opk + } + + emit_binop(lhs, rhs, opk) { return EmitBinopBox.emit_binop2(lhs, rhs, opk, 0) } + + emit_binop2(lhs, rhs, opk, trace) { + // dev-trace (minimal, env-gate想定) + if trace == 1 { print("[emit] binop lhs=" + lhs + " rhs=" + rhs + " op=" + opk) } + local kind = EmitBinopBox._map_binop(opk) + local b = MirJsonBuilderMin.make() + |> MirJsonBuilderMin.start_module() + |> MirJsonBuilderMin.start_function("main") + |> MirJsonBuilderMin.start_block(0) + |> MirJsonBuilderMin.add_const(1, lhs) + |> MirJsonBuilderMin.add_const(2, rhs) + |> MirJsonBuilderMin.add_binop(kind, 1, 2, 3) + |> MirJsonBuilderMin.add_ret(3) + |> MirJsonBuilderMin.end_all() + return MirJsonBuilderMin.to_string(b) + } +} + +static box EmitBinopStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/emit_call_box.hako b/lang/src/compiler/pipeline_v2/emit_call_box.hako new file mode 100644 index 00000000..32ed0b72 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_call_box.hako @@ -0,0 +1,89 @@ +// EmitCallBox — Return(Call name(int_args...)) を MIR(JSON v0) に最小変換 +// 仕様: 各引数を const i64 として材化し、call を発行、dst を ret する。 + +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using lang.compiler.emit.common.header_emit as HeaderEmitBox +using "lang/src/shared/mir/mir_schema_box.hako" as MirSchemaBox + +static box EmitCallBox { + _to_str(n) { + local v = n + if v == 0 { return "0" } + if v < 0 { return "-" + EmitCallBox._to_str(0 - v) } + local out = ""; local digits = "0123456789" + loop (v > 0) { + local d = v % 10 + local ch = digits.substring(d, d+1) + out = ch + out + v = v / 10 + } + return out + } + _quote(s) { + if s == null { return "\"\"" } + local out = ""; local i = 0; local n = s.size() + loop (i < n) { + local ch = call("String.substring/2", s, i, i+1) + if ch == "\\" { out = out + "\\\\" } + else { if ch == "\"" { out = out + "\\\"" } else { + if ch == "\n" { out = out + "\\n" } else { + if ch == "\r" { out = out + "\\r" } else { + if ch == "\t" { out = out + "\\t" } else { out = out + ch } + } + } + }} + i = i + 1 + } + return "\"" + out + "\"" + } + emit_call_int_args(name, args) { + name = match name { null => "", _ => name } + args = match args { null => [], _ => args } + // JSON v0 shape (HeaderEmitBox contract): {functions:[{name,params,blocks:[{id,instructions}]}]} + // Materialize immediate int args: r1..rN; mir_call Extern(name)(r1..rN)->rK; ret rK + local s = "" + args + local pos = 0 + local n = 0 + // Build instruction JSON (string) pieces (plugins OFFでも動くよう配列を使わない) + local body = "[" + local first = 1 + // const r1..rN + loop(true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { pos = pos + 1 } else { + local vid = 1 + n + local vv = RegexFlow.to_int(ds) + if first == 1 { first = 0 } else { body = body + "," } + body = body + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":" + EmitCallBox._to_str(vid) + ",\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + EmitCallBox._to_str(vv) + "}}" + n = n + 1 + pos = pos + ds.size() + } + if pos >= s.size() { break } + } + local dst = n + 1 + // mir_call (Extern) + { + // args JSON: [1,2,...] + local args_s = "["; { local i=0; loop(i0 { args_s = args_s + "," } args_s = args_s + EmitCallBox._to_str(1+i); i=i+1 } } args_s = args_s + "]" + local name_q = EmitCallBox._quote("" + name) + local call_json = "{\\\"op\\\":\\\"mir_call\\\",\\\"dst\\\":" + EmitCallBox._to_str(dst) + + ",\\\"mir_call\\\":{\\\"callee\\\":{\\\"type\\\":\\\"Extern\\\",\\\"name\\\":" + name_q + "},\\\"args\\\":" + args_s + ",\\\"effects\\\":[]}}" + if first == 1 { first = 0 } else { body = body + "," } + body = body + call_json + } + // ret dst + if first == 1 { first = 0 } else { body = body + "," } + body = body + "{\\\"op\\\":\\\"ret\\\",\\\"value\\\":" + EmitCallBox._to_str(dst) + "}" + body = body + "]" + local module_json = "{\"kind\":\"MIR\",\"schema_version\":\"1.0\",\"functions\":[{\"name\":\"main\",\"params\":[],\"blocks\":[{\"id\":0,\"instructions\":" + body + "}]}]}" + return module_json + } + + // JSON v1 (MirCall) emission — shape equivalent(CallEmitBox + HeaderEmitBox) + emit_call_int_args_v1(name, args) { + // v1 path is shape-equivalent for now — delegate to v0 builder above + return EmitCallBox.emit_call_int_args(name, args) + } +} + +static box EmitCallStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/emit_compare_box.hako b/lang/src/compiler/pipeline_v2/emit_compare_box.hako new file mode 100644 index 00000000..04ddf6d8 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_compare_box.hako @@ -0,0 +1,84 @@ +// EmitCompareBox — compare/branch/jump/ret の最小 MIR(JSON v0) 生成(string直組み) +using "lang/src/compiler/pipeline_v2/local_ssa_box.hako" as LocalSSABox +using lang.compiler.emit.common.mir_emit as MirEmitBox +using lang.compiler.emit.common.header_emit as HeaderEmitBox + +static box EmitCompareBox { + _to_str(n) { + local v = n + if v == 0 { return "0" } + if v < 0 { return "-" + EmitCompareBox._to_str(0 - v) } + local out = ""; local digits = "0123456789" + loop (v > 0) { local d = v % 10; local ch = digits.substring(d, d+1); out = ch + out; v = v / 10 } + return out + } + _quote(s) { + if s == null { return "\"\"" } + local out = ""; local i = 0; local n = s.size() + loop (i < n) { + local ch = call("String.substring/2", s, i, i+1) + if ch == "\\" { out = out + "\\\\" } + else { if ch == "\"" { out = out + "\\\"" } else { + if ch == "\n" { out = out + "\\n" } else { + if ch == "\r" { out = out + "\\r" } else { + if ch == "\t" { out = out + "\\t" } else { out = out + ch } + } + } + }} + i = i + 1 + } + return "\"" + out + "\"" + } + // Return-only variant (prefer_cfg = 0): emit compare → ret directly + emit_compare(lhs, rhs, cmp) { return EmitCompareBox.emit_compare_ret(lhs, rhs, cmp, 0) } + + emit_compare_ret(lhs, rhs, cmp, trace) { + // dev-trace(最小) + if trace == 1 { print("[emit] compare-ret lhs=" + lhs + " rhs=" + rhs + " cmp=" + cmp) } + // entry: const lhs/rhs; compare→r3; ret r3(string直組み) + local lhs_s = EmitCompareBox._to_str(lhs) + local rhs_s = EmitCompareBox._to_str(rhs) + local cmp_q = EmitCompareBox._quote("" + cmp) + local body = "[" + + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":1,\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + lhs_s + "}}," + + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":2,\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + rhs_s + "}}," + + "{\\\"op\\\":\\\"compare\\\",\\\"cmp\\\":" + cmp_q + ",\\\"lhs\\\":1,\\\"rhs\\\":2,\\\"dst\\\":3}," + + "{\\\"op\\\":\\\"ret\\\",\\\"value\\\":3}" + + "]" + return "{\"kind\":\"MIR\",\"schema_version\":\"1.0\",\"functions\":[{\"name\":\"main\",\"params\":[],\"blocks\":[{\"id\":0,\"instructions\":" + body + "}]}]}" + } + + emit_compare_cfg(lhs, rhs, cmp) { return EmitCompareBox.emit_compare_cfg2(lhs, rhs, cmp, 0) } + + emit_compare_cfg2(lhs, rhs, cmp, materialize) { return EmitCompareBox.emit_compare_cfg3(lhs, rhs, cmp, materialize, 0) } + + emit_compare_cfg3(lhs, rhs, cmp, materialize, trace) { + if trace == 1 { print("[emit] compare lhs=" + lhs + " rhs=" + rhs + " cmp=" + cmp + " mat=" + materialize) } + // normalize cmp via match + cmp = match cmp { null => "Gt", "" => "Gt", _ => cmp } + // string直組み + local lhs_s = EmitCompareBox._to_str(lhs) + local rhs_s = EmitCompareBox._to_str(rhs) + local cmp_q = EmitCompareBox._quote("" + cmp) + // Block 0 instructions + local b0 = "[" + + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":1,\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + lhs_s + "}}," + + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":2,\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + rhs_s + "}}," + + "{\\\"op\\\":\\\"compare\\\",\\\"cmp\\\":" + cmp_q + ",\\\"lhs\\\":1,\\\"rhs\\\":2,\\\"dst\\\":3}" + if materialize == 1 { b0 = b0 + ",{\\\"op\\\":\\\"copy\\\",\\\"dst\\\":4,\\\"src\\\":3}" } + // branch uses key "else" (not else_id) to match reader + b0 = b0 + ",{\\\"op\\\":\\\"branch\\\",\\\"cond\\\":3,\\\"then\\\":1,\\\"else\\\":2}]" + + // Block 1 (then): set r6=1; jump -> 3 + local b1 = "[{\"op\":\"const\",\"dst\":6,\"value\":{\"type\":\"i64\",\"value\":1}},{\"op\":\"jump\",\"target\":3}]" + // Block 2 (else): set r6=0; jump -> 3 + local b2 = "[{\"op\":\"const\",\"dst\":6,\"value\":{\"type\":\"i64\",\"value\":0}},{\"op\":\"jump\",\"target\":3}]" + // Block 3 (merge): ret r6 + local b3 = "[{\"op\":\"ret\",\"value\":6}]" + + local blocks = "[{\"id\":0,\"instructions\":" + b0 + "},{\"id\":1,\"instructions\":" + b1 + "},{\"id\":2,\"instructions\":" + b2 + "},{\"id\":3,\"instructions\":" + b3 + "}]" + return "{\"kind\":\"MIR\",\"schema_version\":\"1.0\",\"functions\":[{\"name\":\"main\",\"params\":[],\"blocks\":" + blocks + "}]}" +} +} + +static box EmitCompareStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/emit_method_box.hako b/lang/src/compiler/pipeline_v2/emit_method_box.hako new file mode 100644 index 00000000..1caa7db0 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_method_box.hako @@ -0,0 +1,84 @@ +// EmitMethodBox — Return(Method recv, method, args[int...]) → MIR(JSON v0) +// 最小形: const recv→r1; 各引数を r2..rN; boxcall(method, recv=r1, args=r2..) → rK; ret rK + +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using lang.compiler.emit.common.header_emit as HeaderEmitBox +using "lang/src/shared/mir/mir_schema_box.hako" as MirSchemaBox + +static box EmitMethodBox { + _to_str(n) { + local v = n + if v == 0 { return "0" } + if v < 0 { return "-" + EmitMethodBox._to_str(0 - v) } + local out = ""; local digits = "0123456789" + loop (v > 0) { local d = v % 10; local ch = digits.substring(d, d+1); out = ch + out; v = v / 10 } + return out + } + _quote(s) { + if s == null { return "\"\"" } + local out = ""; local i = 0; local n = s.size() + loop (i < n) { + local ch = call("String.substring/2", s, i, i+1) + if ch == "\\" { out = out + "\\\\" } + else { if ch == "\"" { out = out + "\\\"" } else { + if ch == "\n" { out = out + "\\n" } else { + if ch == "\r" { out = out + "\\r" } else { + if ch == "\t" { out = out + "\\t" } else { out = out + ch } + } + } + }} + i = i + 1 + } + return "\"" + out + "\"" + } + emit_method_int_args(method, recv_val, args) { + method = match method { null => "", _ => method } + args = match args { null => [], _ => args } + // Shape: const recv->r1; const args r2..rN; mir_call Method(method, r1, r2..)->rK; ret rK + local s = "" + args + local pos = 0 + local n = 0 + // Build instruction JSON (string) pieces(配列を使わない) + local body = "[" + local first = 1 + // recv first + body = body + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":1,\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + EmitMethodBox._to_str(recv_val) + "}}" + first = 0 + // materialize args r2..r(n+1) + loop(true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { pos = pos + 1 } else { + local vid = 2 + n + local vv = RegexFlow.to_int(ds) + body = body + "," + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":" + EmitMethodBox._to_str(vid) + ",\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + EmitMethodBox._to_str(vv) + "}}" + n = n + 1 + pos = pos + ds.size() + } + if pos >= s.size() { break } + } + local dst = n + 2 + // mir_call (Method) + { + // args JSON: [2,3,...] + local args_s = "["; { local i=0; loop(i0 { args_s = args_s + "," } args_s = args_s + EmitMethodBox._to_str(2+i); i=i+1 } } args_s = args_s + "]" + local method_q = EmitMethodBox._quote("" + method) + // receiver id is 1 + local call_json = "{\\\"op\\\":\\\"mir_call\\\",\\\"dst\\\":" + EmitMethodBox._to_str(dst) + + ",\\\"mir_call\\\":{\\\"callee\\\":{\\\"type\\\":\\\"Method\\\",\\\"method\\\":" + method_q + ",\\\"receiver\\\":1},\\\"args\\\":" + args_s + ",\\\"effects\\\":[]}}" + body = body + "," + call_json + } + // ret dst + body = body + "," + "{\\\"op\\\":\\\"ret\\\",\\\"value\\\":" + EmitMethodBox._to_str(dst) + "}" + body = body + "]" + local module_json = "{\"kind\":\"MIR\",\"schema_version\":\"1.0\",\"functions\":[{\"name\":\"main\",\"params\":[],\"blocks\":[{\"id\":0,\"instructions\":" + body + "}]}]}" + return module_json + } + + // JSON v1 (MirCall) emission — shape equivalent(CallEmitBox + HeaderEmitBox) + emit_method_int_args_v1(method, recv_val, args) { + // v1 path is shape-equivalent for now — delegate to v0 builder above + return EmitMethodBox.emit_method_int_args(method, recv_val, args) + } +} + +static box EmitMethodStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/emit_mir_flow.hako b/lang/src/compiler/pipeline_v2/emit_mir_flow.hako new file mode 100644 index 00000000..e4c09214 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_mir_flow.hako @@ -0,0 +1,111 @@ +using "lang/src/shared/mir/json_emit_box.hako" as JsonEmitBox +// Shared MIR helpers (P1/P2) +using "lang/src/shared/mir/mir_schema_box.hako" as MirSchema +using "lang/src/shared/mir/block_builder_box.hako" as BlockBuilder +using "lang/src/compiler/pipeline_v2/local_ssa_box.hako" as LocalSSABox +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +flow EmitMirFlow { + _int(n) { return "" + n } + + emit_return_int(v) { + local mod_full = BlockBuilder.const_ret(v) + return JsonEmitBox.to_json(mod_full) + } + + _map_binop(opk) { + if opk == "+" { return "Add" } + if opk == "-" { return "Sub" } + if opk == "*" { return "Mul" } + if opk == "/" { return "Div" } + if opk == "%" { return "Mod" } + return opk + } + + emit_binop(lhs, rhs, opk) { + local kind = EmitMirFlow._map_binop(opk) + local mod_full = BlockBuilder.binop(lhs, rhs, kind) + return JsonEmitBox.to_json(mod_full) + } + + emit_compare_cfg(lhs, rhs, cmp) { + return EmitMirFlow.emit_compare_cfg2(lhs, rhs, cmp, 0) + } + + // emit_compare_cfg2: materialize=1 なら cond を copy して branch する + emit_compare_cfg2(lhs, rhs, cmp, materialize) { + local mod_full = BlockBuilder.compare_branch(lhs, rhs, cmp) + if materialize != 0 { + // functions[0].blocks[0].instructions に cond=3 材化(copy 5<-3) を挿入 + local fns = BoxHelpers.map_get(mod_full, "functions") + local fns_len = BoxHelpers.array_len(fns) + if fns_len > 0 { + local blocks = BoxHelpers.map_get(BoxHelpers.array_get(fns, 0), "blocks") + local blocks_len = BoxHelpers.array_len(blocks) + if blocks_len > 0 { + local insts = BoxHelpers.map_get(BoxHelpers.array_get(blocks, 0), "instructions") + if insts != null { LocalSSABox.ensure_after_last_def_copy(insts, 3, 5) } + } + } + } + return JsonEmitBox.to_json(mod_full) + } + + // emit_loop_counter: while (i < limit) { i = i + 1 } ; return i + // Minimal loop CFG without PHI by updating the same dst id for 'i'. + // Blocks: + // 0: const i=0 -> r1; const limit -> r2; const one=1 -> r4; jump 1 + // 1: compare lt r1 r2 -> r3; branch r3 then:2 else:3 + // 2: binop add r1 r4 -> r1; jump 1 + // 3: ret r1 + emit_loop_counter(limit) { + local mod_full = BlockBuilder.loop_counter(limit) + return JsonEmitBox.to_json(mod_full) + } + + // P4: emit extern call for op_eq via shared BlockBuilder (immediate values) + // Shape: const 1=lhs; const 2=rhs; mir_call Extern(op_eq)(1,2)->r3; ret r3 + emit_op_eq(lhs, rhs) { + local vals = new ArrayBox() + vals.push(lhs) + vals.push(rhs) + local mod_full = BlockBuilder.extern_call_ival_ret("nyrt.ops.op_eq", vals, "nyrt.ops.op_eq") + return JsonEmitBox.to_json(mod_full) + } + + // P4: generic extern/global/method/constructor emitters (immediate values) + emit_extern_call(name, arg_vals) { + local mod_full = BlockBuilder.extern_call_ival_ret(name, arg_vals, name) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + emit_global_call(name, arg_vals) { + local mod_full = BlockBuilder.global_call_ival_ret(name, arg_vals, name) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + emit_method_call(method, recv_val, arg_vals) { + local mod_full = BlockBuilder.method_call_ival_ret(method, recv_val, arg_vals, method) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + emit_constructor(box_type, arg_vals) { + local mod_full = BlockBuilder.constructor_call_ival_ret(box_type, arg_vals, box_type) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + + // P5: ctor(ArrayBox)→size()→ret via shared builder + emit_array_ctor_then_size() { + local mod_full = BlockBuilder.ctor_then_size_ret() + return JsonEmitBox.to_json(mod_full) + } + + + // Internal: LocalSSA materialize last ret value with a copy + _ssa_materialize_last_result(mod_full) { + // Delegate to LocalSSABox for structural materialization policy. + LocalSSABox.ensure_materialize_last_ret(mod_full) + } + +} diff --git a/lang/src/compiler/pipeline_v2/emit_mir_flow_map.hako b/lang/src/compiler/pipeline_v2/emit_mir_flow_map.hako new file mode 100644 index 00000000..6468b2a9 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_mir_flow_map.hako @@ -0,0 +1,110 @@ +using "lang/src/shared/mir/json_emit_box.hako" as JsonEmitBox +// Shared MIR helpers (P1) +using "lang/src/shared/mir/mir_schema_box.hako" as MirSchema +using "lang/src/shared/mir/block_builder_box.hako" as BlockBuilder +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers +using "lang/src/compiler/pipeline_v2/local_ssa_box.hako" as LocalSSABox + +flow EmitMirFlowMap { + _array_len(arr) { return BoxHelpers.array_len(arr) } + _array_get(arr, idx) { return BoxHelpers.array_get(arr, idx) } + _map_get(obj, key) { return BoxHelpers.map_get(obj, key) } + _i(n) { return n } + + _empty_arr() { return new ArrayBox() } + + _instr_const(dst, val) { + local v = {type: "i64", value: EmitMirFlowMap._i(val)} + return {op: "const", dst: EmitMirFlowMap._i(dst), value: v} + } + _instr_ret(val) { + return {op: "ret", value: EmitMirFlowMap._i(val)} + } + _instr_binop(kind, lhs, rhs, dst) { + return {op: "binop", op_kind: kind, lhs: EmitMirFlowMap._i(lhs), rhs: EmitMirFlowMap._i(rhs), dst: EmitMirFlowMap._i(dst)} + } + _instr_compare(cmp, lhs, rhs, dst) { + return {op: "compare", cmp: cmp, lhs: EmitMirFlowMap._i(lhs), rhs: EmitMirFlowMap._i(rhs), dst: EmitMirFlowMap._i(dst)} + } + _instr_branch(cond, then_id, else_id) { + return {op: "branch", cond: EmitMirFlowMap._i(cond), then: EmitMirFlowMap._i(then_id), "else": EmitMirFlowMap._i(else_id)} + } + _instr_jump(target) { + return {op: "jump", target: EmitMirFlowMap._i(target)} + } + _block(id, insts) { + return {id: EmitMirFlowMap._i(id), instructions: insts} + } + _single_function_main(blocks) { + local f = {name: "main", params: [], blocks: blocks} + local fns = [f] + return {functions: fns} + } + + emit_return_int(v) { + // Route via shared BlockBuilder (P1). Keep output shape identical to legacy by + // returning only the { functions: [...] } wrapper. + local mod_full = BlockBuilder.const_ret(v) + return JsonEmitBox.to_json(mod_full) + } + + emit_binop(lhs, rhs, opk) { + // Shared builder (P2 prep) + local mod_full = BlockBuilder.binop(lhs, rhs, opk) + return JsonEmitBox.to_json(mod_full) + } + + // materialize: 0/1 (builder parity) + emit_compare_cfg2(lhs, rhs, cmp, materialize) { + local mod_full = BlockBuilder.compare_branch(lhs, rhs, cmp) + if materialize != 0 { + local fns = me._map_get(mod_full, "functions") + local fns_len = me._array_len(fns) + if fns_len > 0 { + local blocks = me._map_get(me._array_get(fns, 0), "blocks") + local blocks_len = me._array_len(blocks) + if blocks_len > 0 { + local insts = me._map_get(me._array_get(blocks, 0), "instructions") + if insts != null { LocalSSABox.ensure_after_last_def_copy(insts, 3, 5) } + } + } + } + return JsonEmitBox.to_json(mod_full) + } + + emit_loop_counter(limit) { + // Shared builder (P2 prep) + local mod_full = BlockBuilder.loop_counter(limit) + return JsonEmitBox.to_json(mod_full) + } + + + // P4: generic extern/global/method/constructor emitters (immediate values) + emit_extern_call(name, arg_vals) { + local mod_full = BlockBuilder.extern_call_ival_ret(name, arg_vals, name) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + emit_global_call(name, arg_vals) { + local mod_full = BlockBuilder.global_call_ival_ret(name, arg_vals, name) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + emit_method_call(method, recv_val, arg_vals) { + local mod_full = BlockBuilder.method_call_ival_ret(method, recv_val, arg_vals, method) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + emit_constructor(box_type, arg_vals) { + local mod_full = BlockBuilder.constructor_call_ival_ret(box_type, arg_vals, box_type) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + + // P5: ctor(ArrayBox)→size()→ret via shared builder + emit_array_ctor_then_size() { + local mod_full = BlockBuilder.ctor_then_size_ret() + return JsonEmitBox.to_json(mod_full) + } + +} diff --git a/lang/src/compiler/pipeline_v2/emit_newbox_box.hako b/lang/src/compiler/pipeline_v2/emit_newbox_box.hako new file mode 100644 index 00000000..6f8c4a64 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_newbox_box.hako @@ -0,0 +1,36 @@ +// EmitNewBoxBox — Return(New class, args[int...]) → MIR(JSON v0) +// 最小形: 各引数を r1..rN; newbox(class, args=r1..rN) → rK; ret rK + +using "lang/src/shared/json/mir_builder_min.hako" as MirJsonBuilderMin +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using "lang/src/compiler/pipeline_v2/stage1_args_parser_box.hako" as Stage1ArgsParserBox +using "lang/src/shared/mir/json_emit_box.hako" as JsonEmitBox +using "lang/src/shared/mir/block_builder_box.hako" as BlockBuilder +using "lang/src/compiler/pipeline_v2/local_ssa_box.hako" as LocalSSABox + +static box EmitNewBoxBox { + emit_newbox_int_args(class_name, args) { + class_name = match class_name { null => "", _ => class_name } + args = match args { null => [], _ => args } + // ArgsParserBox 正規化 → BlockBuilder 直結 + local vals = Stage1ArgsParserBox.parse_ints(args) + if vals == null { return null } + local mod_full = BlockBuilder.constructor_call_ival_ret(class_name, vals, class_name) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } + + // JSON v1 (MirCall) emission — experimental, shape-only + emit_newbox_int_args_v1(class_name, args) { + class_name = match class_name { null => "", _ => class_name } + args = match args { null => [], _ => args } + // 同形出力(shared builder に一本化) + local vals = Stage1ArgsParserBox.parse_ints(args) + if vals == null { return null } + local mod_full = BlockBuilder.constructor_call_ival_ret(class_name, vals, class_name) + LocalSSABox.ensure_materialize_last_ret(mod_full) + return JsonEmitBox.to_json(mod_full) + } +} + +static box EmitNewBoxStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/emit_return_box.hako b/lang/src/compiler/pipeline_v2/emit_return_box.hako new file mode 100644 index 00000000..d343ee84 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/emit_return_box.hako @@ -0,0 +1,14 @@ +// EmitReturnBox — return(Int) の最小 MIR(JSON v0) 生成(依存最小・文字列直組み) + +static box EmitReturnBox { + emit_return_int(v) { return EmitReturnBox.emit_return_int2(v, 0) } + + emit_return_int2(v, trace) { + if trace == 1 { print("[emit] return v=" + v) } + local sv = "" + v + // Minimal JSON v0: const→ret + return "{\"kind\":\"MIR\",\"schema_version\":\"1.0\",\"functions\":[{\"name\":\"main\",\"params\":[],\"blocks\":[{\"id\":0,\"instructions\":[{\"op\":\"const\",\"dst\":1,\"value\":{\"type\":\"i64\",\"value\":" + sv + "}},{\"op\":\"ret\",\"value\":1}]}]}]}" + } +} + +static box EmitReturnStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/execution_pipeline_box.hako b/lang/src/compiler/pipeline_v2/execution_pipeline_box.hako new file mode 100644 index 00000000..9c671601 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/execution_pipeline_box.hako @@ -0,0 +1,49 @@ +// ExecutionPipelineBox — Orchestrate Parser → Emit (emit-only; no execution) +using lang.compiler.parser.box as ParserBoxMod +// Parser dependencies (Phase 2 refactoring: hierarchical structure) +using lang.compiler.parser.scan.parser_string_utils_box +using lang.compiler.parser.scan.parser_ident_scan_box +using lang.compiler.parser.scan.parser_number_scan_box +using lang.compiler.parser.scan.parser_string_scan_box +using lang.compiler.parser.using.using_collector_box +using lang.compiler.parser.expr.parser_expr_box +using lang.compiler.parser.expr.parser_peek_box +using lang.compiler.parser.expr.parser_literal_box +using lang.compiler.parser.stmt.parser_stmt_box +using lang.compiler.parser.stmt.parser_control_box +using lang.compiler.parser.stmt.parser_exception_box +using lang.compiler.stage1.json_program_box +using lang.compiler.stage1.emitter_box as EmitterBoxMod +using "lang/src/compiler/pipeline_v2/backend_box.hako" as BackendBoxMod + +box ExecutionPipelineBox { + backend_name + backend + + birth(name) { + // Optional backend tag (no execution here) + if name == null { name = "vm" } + me.backend_name = name + me.backend = new BackendBox(name) + return 0 + } + + // Run with source text; stage3_flag=1 enables Stage‑3 acceptance in parser + run_source(src, stage3_flag) { + if src == null { src = "return 0" } + if stage3_flag == null { stage3_flag = 0 } + // Parse + local p = new ParserBox() + if stage3_flag == 1 { p.stage3_enable(1) } + p.extract_usings(src) + local usings = p.get_usings_json() + local ast = p.parse_program2(src) + // Emit Stage‑1 JSON with meta.usings + local json = EmitterBox.emit_program(ast, usings) + if json == null || json.size() == 0 { return 1 } + print(json) + return 0 + } +} + +static box ExecutionPipelineStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/flow_entry.hako b/lang/src/compiler/pipeline_v2/flow_entry.hako new file mode 100644 index 00000000..ed1d5d01 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/flow_entry.hako @@ -0,0 +1,26 @@ +// flow_entry.hako — Pipeline v2 entry box(emit-only) +// Guard: This box performs no execution. Returns MIR(JSON) as text. + +using "lang/src/compiler/pipeline_v2/pipeline.hako" as PipelineV2 + +static box FlowEntryBox { + // Emit legacy v0 JSON(call/boxcall/newbox)。最小入力: Stage‑1 JSON 文字列 + emit_v0_from_ast(ast_json, prefer_cfg) { + return PipelineV2.lower_stage1_to_mir(ast_json, prefer_cfg) + } + // Emit v0 with using context (alias/module maps) — prefer this when names need resolution + emit_v0_from_ast_with_usings(ast_json, prefer_cfg, usings_json, modules_json) { + print("[DEBUG FlowEntry] emit_v0_from_ast_with_usings called") + local result = PipelineV2.lower_stage1_to_mir_with_usings(ast_json, prefer_cfg, usings_json, modules_json) + print("[DEBUG FlowEntry] result=" + result) + return result + } + + // Emit v1 → v0 互換 JSON(unified mir_call を一旦生成して適応)。自己ホスト実行向け + emit_v1_compat_from_ast(ast_json, prefer_cfg) { + return PipelineV2.lower_stage1_to_mir_v1_compat(ast_json, prefer_cfg) + } + + // No-op entry(箱ガード用) + main(args) { return 0 } +} diff --git a/lang/src/compiler/pipeline_v2/header_emit_box.hako b/lang/src/compiler/pipeline_v2/header_emit_box.hako new file mode 100644 index 00000000..cbce4da2 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/header_emit_box.hako @@ -0,0 +1,16 @@ +// HeaderEmitBox — emit-only helper for minimal Stage-1 JSON header +// Responsibility +// - Provide a single place to emit the minimal JSON v0 header used by +// quiet child pipelines (e.g., --min-json). No parsing or lowering here. +// Input/Output +// - Input: none +// - Output: prints a single JSON line: {"version":0,"kind":"Program"} +// Non-goals +// - This box does not attempt to inject body or metadata; callers may compose. + +static box HeaderEmitBox { + emit_min_json_header() { + print("{\"version\":0,\"kind\":\"Program\"}") + return 0 + } +} diff --git a/lang/src/compiler/pipeline_v2/json_minify_box.hako b/lang/src/compiler/pipeline_v2/json_minify_box.hako new file mode 100644 index 00000000..a464d870 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/json_minify_box.hako @@ -0,0 +1,37 @@ +// json_minify_box.hako — strip insignificant whitespace from JSON text +// Note: preserves characters inside string literals, including escaped quotes + +static box JsonMinifyBox { + minify(text) { + if text == null { return null } + local s = "" + text + local out = "" + local i = 0 + local n = s.size() + local in_str = 0 + loop(i < n) { + local ch = s.substring(i, i+1) + if in_str == 1 { + if ch == "\\" { + out = out + ch + i = i + 1 + if i < n { out = out + s.substring(i, i+1) } + } else { + out = out + ch + if ch == "\"" { in_str = 0 } + } + } else { + if ch == "\"" { in_str = 1 out = out + ch } + else { + // skip whitespace outside strings + if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { } + else { out = out + ch } + } + } + i = i + 1 + } + return out + } +} + +static box JsonMinifyStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/local_ssa_box.hako b/lang/src/compiler/pipeline_v2/local_ssa_box.hako new file mode 100644 index 00000000..08fabe26 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/local_ssa_box.hako @@ -0,0 +1,188 @@ +// LocalSSABox — 材化(materialize)/Copy 挿入の最小ポリシーを集約 +// Phase 15.7: 最小実装。将来 PHI/Call 前の規約をここに集約して拡張する。 + +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +static box LocalSSABox { + _maybe_unwrap_instructions(insts) { + if insts == null { return null } + local repr = "" + insts + if repr.indexOf("MapBox(") == 0 { + local inner = BoxHelpers.map_get(insts, "instructions") + if inner != null { return inner } + } + return insts + } + + // 汎用: copy 命令を insts(ArrayBox of Map)末尾に追加 + add_copy(insts, dst, src) { + if insts == null { return 1 } + insts = me._maybe_unwrap_instructions(insts) + if insts == null { return 1 } + call("ArrayBox.push/2", insts, { op:"copy", dst: dst, src: src }) + return 0 + } + + // PHI直後の材化(最小): いまは add_copy と同じ。将来 PHI 群スキップを実装 + ensure_after_phis_copy(insts, src, dst) { + if insts == null { return 1 } + insts = me._maybe_unwrap_instructions(insts) + if insts == null { return 1 } + if BoxHelpers.is_array(insts) == 0 { + return me.add_copy(insts, dst, src) + } + local n = BoxHelpers.array_len(insts) + local i = 0 + loop (i < n) { + local ins = BoxHelpers.array_get(insts, i) + if ins == null { break } + local op = BoxHelpers.map_get(ins, "op") + if op == null || op != "phi" { break } + i = i + 1 + } + local insert_at = i // phi 直後 + local node = { op:"copy", dst: dst, src: src } + if insert_at >= n { + call("ArrayBox.push/2", insts, node) + return 0 + } + if n > 0 { + call("ArrayBox.push/2", insts, BoxHelpers.array_get(insts, n - 1)) + local j = n - 1 + loop (j >= insert_at) { + call("ArrayBox.set/3", insts, j + 1, BoxHelpers.array_get(insts, j)) + j = j - 1 + } + call("ArrayBox.set/3", insts, insert_at, node) + return 0 + } + call("ArrayBox.push/2", insts, node) + return 0 + } + + // 新規: 定義直後に copy を挿入(安全・最小) + ensure_after_last_def_copy(insts, src, dst) { + if insts == null { return 1 } + if BoxHelpers.is_array(insts) == 0 { return 1 } + local n = BoxHelpers.array_len(insts) + local insert_at = n + // Find the first terminator index (ret/branch/jump/throw) to ensure block ends with a terminator + local term_at = n + { + local i = 0 + loop(i < n) { + local ins = BoxHelpers.array_get(insts, i) + if ins != null { + local op = BoxHelpers.map_get(ins, "op") + if op == "ret" || op == "branch" || op == "jump" || op == "throw" { term_at = i break } + } + i = i + 1 + } + } + // 探索: 最後に dst=src を定義した位置 + { + local i = 0 + loop (i < n) { + local ins = BoxHelpers.array_get(insts, i) + if ins != null { + print("[LS] checking def ins=" + ("" + ins)) + local d_raw = BoxHelpers.map_get(ins, "dst") + print("[LS] dst raw=" + ("" + d_raw)) + local d = BoxHelpers.value_i64(d_raw) + if d != null && d == src { insert_at = i + 1 } + } + i = i + 1 + } + } + // Do not cross terminator: insert before the first terminator if present + if insert_at > term_at { insert_at = term_at } + local node = { op:"copy", dst: dst, src: src } + if insert_at >= n { call("ArrayBox.push/2", insts, node) return 0 } + // 1つ末尾に空きを作る(末尾要素を複製して押し出す) + if n > 0 { + call("ArrayBox.push/2", insts, BoxHelpers.array_get(insts, n - 1)) + local j = n - 1 + loop (j >= insert_at) { + call("ArrayBox.set/3", insts, j + 1, BoxHelpers.array_get(insts, j)) + j = j - 1 + } + call("ArrayBox.set/3", insts, insert_at, node) + return 0 + } + call("ArrayBox.push/2", insts, node) + return 0 + } + + // calls の最小材化: 最後の ret が参照する値 src を探し、src の直後に copy(dst=src+1) を挿入する。 + // mod_full: { functions: [ { blocks: [ { instructions: [...] } ] } ] } + ensure_materialize_last_ret(mod_full) { + if mod_full == null { return 1 } + local fns = BoxHelpers.map_get(mod_full, "functions") + if fns == null || BoxHelpers.array_len(fns) == 0 { return 1 } + local blocks = BoxHelpers.map_get(BoxHelpers.array_get(fns, 0), "blocks") + if blocks == null || BoxHelpers.array_len(blocks) == 0 { return 1 } + local insts = BoxHelpers.map_get(BoxHelpers.array_get(blocks, 0), "instructions") + if insts == null || BoxHelpers.array_len(insts) == 0 { return 1 } + // ret の引数を探索 + local n = BoxHelpers.array_len(insts) + local src = null + { + local i = 0 + loop(i < n) { + local ins = BoxHelpers.array_get(insts, i) + if ins != null { + local op = BoxHelpers.map_get(ins, "op") + if op == "ret" { + src = BoxHelpers.value_i64(BoxHelpers.map_get(ins, "value")) + } + } + i = i + 1 + } + } + if src == null { return 1 } + return me.ensure_after_last_def_copy(insts, src, src + 1) + } + + // ensure_cond: 最初の branch の cond を検出し、その定義直後に copy を1つ挿入(最小) + // - 形状期待: いずれかの block.instructions に { op:"branch", cond:, ... } + ensure_cond(mod_full) { + if mod_full == null { return 1 } + local fns = BoxHelpers.map_get(mod_full, "functions") + if fns == null { return 1 } + local bn = BoxHelpers.array_len(fns) + if bn == 0 { return 1 } + local blocks = BoxHelpers.map_get(BoxHelpers.array_get(fns, 0), "blocks") + if blocks == null || BoxHelpers.array_len(blocks) == 0 { return 1 } + // 探索: 各ブロックの instructions から最初の branch を見つけ、そのブロックで材化 + local bi = 0 + local block_count = BoxHelpers.array_len(blocks) + loop (bi < block_count) { + local insts = BoxHelpers.map_get(BoxHelpers.array_get(blocks, bi), "instructions") + local insts_len = BoxHelpers.array_len(insts) + if insts_len > 0 { + local n = insts_len + local cond = null + { + local i = 0 + loop(i < n) { + local ins = BoxHelpers.array_get(insts, i) + if ins != null { + local op = BoxHelpers.map_get(ins, "op") + if op == "branch" { + cond = BoxHelpers.value_i64(BoxHelpers.map_get(ins, "cond")) + break + } + } + i = i + 1 + } + } + if cond != null { return me.ensure_after_last_def_copy(insts, cond, cond + 2) } + } + bi = bi + 1 + } + return 1 + } + +} + +static box LocalSSAStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/map_helpers_box.hako b/lang/src/compiler/pipeline_v2/map_helpers_box.hako new file mode 100644 index 00000000..a5243bfe --- /dev/null +++ b/lang/src/compiler/pipeline_v2/map_helpers_box.hako @@ -0,0 +1,43 @@ +// map_helpers_box.hako — Pipeline用の軽量ヘルパ(MapBoxの型付き取得) + +using "lang/src/shared/common/string_helpers.hako" as StringHelpers +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers +static box MapHelpersBox { + _raw_get(m, key) { + return BoxHelpers.map_get(m, key) + } + + _to_i64(v) { return StringHelpers.to_i64(v) } + + get_str(m, key) { + local v = me._raw_get(m, key) + if v == null { return "" } + if BoxHelpers.is_map(v) == 1 { + local inner = BoxHelpers.map_get(v, "value") + if inner != null { return "" + inner } + } + return "" + v + } + + get_i64(m, key) { + local v = me._raw_get(m, key) + return BoxHelpers.value_i64(v) + } + + expect_str(m, key) { return me.get_str(m, key) } + expect_i64(m, key) { return me.get_i64(m, key) } + + opt_str(m, key, def) { + local s = me.get_str(m, key) + if s == null || s == "" { return def } + return s + } + + opt_i64(m, key, def) { + local n = me.get_i64(m, key) + if n == null { return def } + return n + } +} + +static box MapHelpersStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/method_extract_box.hako b/lang/src/compiler/pipeline_v2/method_extract_box.hako new file mode 100644 index 00000000..dcad49aa --- /dev/null +++ b/lang/src/compiler/pipeline_v2/method_extract_box.hako @@ -0,0 +1,12 @@ +// MethodExtractBox — Stage‑1 JSON から Return(Method recv, method, args) を抽出(整数引数のみ; recvは無視) +// Delegation to Stage1IntArgsExtractBox (unified implementation) +using "lang/src/compiler/pipeline_v2/stage1_int_args_extract_box.hako" as Unified + +static box MethodExtractBox { + // Returns { method: String, args: [Int,...] } or null + extract_return_method_ints(ast_json) { + return Unified.extract_return_method_ints(ast_json) + } +} + +static box MethodExtractStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/mir_builder_box.hako b/lang/src/compiler/pipeline_v2/mir_builder_box.hako new file mode 100644 index 00000000..bb1a597e --- /dev/null +++ b/lang/src/compiler/pipeline_v2/mir_builder_box.hako @@ -0,0 +1,35 @@ +// MirBuilderBox — minimal Ny→MIR(JSON v0) lowering entry (dev) +// Phase 15.7: kept optional; pipeline.v2 uses Emit flow directly. +using "lang/src/compiler/pipeline_v2/stage1_extract_flow.hako" as Stage1ExtractFlow +using "lang/src/compiler/pipeline_v2/emit_return_box.hako" as EmitReturnBox +using "lang/src/compiler/pipeline_v2/emit_binop_box.hako" as EmitBinopBox +using "lang/src/compiler/pipeline_v2/emit_compare_box.hako" as EmitCompareBox +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +box MirBuilderBox { + // Placeholder for optimizer toggle + optimize_flag + birth() { me.optimize_flag = 0 return 0 } + set_optimize(v) { if v == null { v = 0 } me.optimize_flag = v return 0 } + + // Accept Stage‑1 AST JSON and emit minimal MIR(JSON v0) + build(ast_json) { + if ast_json == null { return EmitReturnBox.emit_return_int2(0, 0) } + // If(cond=Compare) → CFG (branch/jump/ret) + if call("String.indexOf/2", ast_json, "\"type\":\"If\"") >= 0 { + local ic = Stage1ExtractFlow.extract_if_compare(ast_json) + if ic != null { return EmitCompareBox.emit_compare_cfg3(BoxHelpers.map_get(ic, "lhs"), BoxHelpers.map_get(ic, "rhs"), BoxHelpers.map_get(ic, "cmp"), 0, 0) } + } + // Return(Compare) + local c = Stage1ExtractFlow.extract_return_compare(ast_json) + if c != null { return EmitCompareBox.emit_compare_cfg3(BoxHelpers.map_get(c, "lhs"), BoxHelpers.map_get(c, "rhs"), BoxHelpers.map_get(c, "cmp"), 0, 0) } + // Return(BinOp) + local b = Stage1ExtractFlow.extract_return_binop(ast_json) + if b != null { return EmitBinopBox.emit_binop2(BoxHelpers.map_get(b, "lhs"), BoxHelpers.map_get(b, "rhs"), BoxHelpers.map_get(b, "kind"), 0) } + // Fallback: Return(Int) + local v = Stage1ExtractFlow.extract_return_int(ast_json) + return EmitReturnBox.emit_return_int2(v, 0) + } +} + +static box MirBuilderStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/mir_call_box.hako b/lang/src/compiler/pipeline_v2/mir_call_box.hako new file mode 100644 index 00000000..4780ee10 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/mir_call_box.hako @@ -0,0 +1,91 @@ +// MirCallBox — JSON v1 unified call emitters(薄い箱) +// 目的: v1(mir_call)を第一級に扱う最小APIを集中させる。実行は含まない(emit-only)。 + +using "lang/src/shared/json/mir_builder_min.hako" as MirJsonBuilderMin +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using lang.compiler.emit.common.mir_emit as MirEmitBox +using lang.compiler.emit.common.call_emit as CallEmitBox +using lang.compiler.emit.common.header_emit as HeaderEmitBox +using "lang/src/shared/mir/json_emit_box.hako" as JsonEmitBox + +static box MirCallBox { + // Global(name, args:int[]) + emit_call_v1(name, args) { + name = match name { null => "", _ => name } + args = match args { null => [], _ => args } + local s = "" + args + local pos = 0 + local n = 0 + local insts = [] + // materialize const args r1..rN + loop (true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { pos = pos + 1 } else { insts.push(MirEmitBox.make_const(1 + n, RegexFlow.to_int(ds))) n = n + 1 pos = pos + ds.size() } + if pos >= s.size() { break } + } + local dst = n + 1 + // args 1..n + local arg_ids = new ArrayBox() + { local i=0 loop(i "", _ => method } + args = match args { null => [], _ => args } + local s = "" + args + local pos = 0 + local n = 0 + local insts = [] + insts.push(MirEmitBox.make_const(1, recv_val)) + // materialize args r2..r(n+1) + { local i = 0 loop(true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { pos = pos + 1 } else { insts.push(MirEmitBox.make_const(2 + i, RegexFlow.to_int(ds))) i = i + 1 n = i pos = pos + ds.size() } + if pos >= s.size() { break } + } + } + local dst = n + 2 + // args 2..(n+1) + local arg_ids = new ArrayBox() + { local i=0 loop(i "", _ => class_name } + args = match args { null => [], _ => args } + local s = "" + args + local pos = 0 + local n = 0 + local insts = [] + // materialize args r1..rN + { local i = 0 loop(true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { pos = pos + 1 } else { insts.push(MirEmitBox.make_const(1 + i, RegexFlow.to_int(ds))) i = i + 1 n = i pos = pos + ds.size() } + if pos >= s.size() { break } + } + } + local dst = n + 1 + // args 1..n + local arg_ids = new ArrayBox() + { local i=0 loop(i") + normalize_call_ints(raw) { + if raw == null { return null } + local out = new MapBox() + local name = "" + BoxHelpers.map_get(raw, "name") + if name == null || name == "" { return null } + BoxHelpers.map_set(out, "name", name) + // args: accept array-like; fallback to empty + local arr = new ArrayBox() + local src = BoxHelpers.map_get(raw, "args") + if src != null { + local n = BoxHelpers.array_len(src) + local i = 0 + loop (i < n) { + arr.push(StringHelpers.to_i64(BoxHelpers.array_get(src, i))) + i = i + 1 + } + } + BoxHelpers.map_set(out, "args", arr) + return out + } + + // Normalize Method: { method:String, args:[Int...] } → MapBox("method","args:Array") + normalize_method_ints(raw) { + if raw == null { return null } + local out = new MapBox() + local m = "" + BoxHelpers.map_get(raw, "method") + if m == null || m == "" { return null } + BoxHelpers.map_set(out, "method", m) + local arr = new ArrayBox() + local src = BoxHelpers.map_get(raw, "args") + if src != null { + local n = BoxHelpers.array_len(src) + local i = 0 + loop (i < n) { + arr.push(StringHelpers.to_i64(BoxHelpers.array_get(src, i))) + i = i + 1 + } + } + BoxHelpers.map_set(out, "args", arr) + return out + } + + // Normalize New: { class:String, args:[Int...] } → MapBox("class","args:Array") + normalize_new_ints(raw) { + if raw == null { return null } + local out = new MapBox() + local c = "" + BoxHelpers.map_get(raw, "class") + if c == null || c == "" { return null } + BoxHelpers.map_set(out, "class", c) + local arr = new ArrayBox() + local src = BoxHelpers.map_get(raw, "args") + if src != null { + local n = BoxHelpers.array_len(src) + local i = 0 + loop (i < n) { + arr.push(StringHelpers.to_i64(BoxHelpers.array_get(src, i))) + i = i + 1 + } + } + BoxHelpers.map_set(out, "args", arr) + return out + } +} + +static box NormalizerStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/pipeline.hako b/lang/src/compiler/pipeline_v2/pipeline.hako new file mode 100644 index 00000000..40a82c0f --- /dev/null +++ b/lang/src/compiler/pipeline_v2/pipeline.hako @@ -0,0 +1,558 @@ +using "lang/src/compiler/pipeline_v2/stage1_extract_flow.hako" as Stage1ExtractFlow +using "lang/src/compiler/pipeline_v2/emit_return_box.hako" as EmitReturnBox +using "lang/src/compiler/pipeline_v2/emit_binop_box.hako" as EmitBinopBox +using "lang/src/compiler/pipeline_v2/emit_compare_box.hako" as EmitCompareBox +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using lang.compiler.builder.ssa.local as LocalSSA +using "lang/src/compiler/pipeline_v2/emit_call_box.hako" as EmitCallBox +using "lang/src/compiler/pipeline_v2/emit_method_box.hako" as EmitMethodBox +using "lang/src/compiler/pipeline_v2/emit_newbox_box.hako" as EmitNewBoxBox +using "lang/src/compiler/pipeline_v2/mir_call_box.hako" as MirCallBox +using "lang/src/shared/json/mir_v1_adapter.hako" as MirJsonV1Adapter +using "lang/src/compiler/pipeline_v2/compare_extract_box.hako" as CompareExtractBox +using "lang/src/compiler/pipeline_v2/normalizer_box.hako" as NormalizerBox +using "lang/src/compiler/pipeline_v2/map_helpers_box.hako" as MapHelpersBox +using "lang/src/compiler/pipeline_v2/call_extract_box.hako" as CallExtractBox +using "lang/src/compiler/pipeline_v2/method_extract_box.hako" as MethodExtractBox +using "lang/src/compiler/pipeline_v2/new_extract_box.hako" as NewExtractBox +using "lang/src/shared/common/string_helpers.hako" as StringHelpers +using "lang/src/compiler/pipeline_v2/using_resolver_box.hako" as UsingResolverBox +using "lang/src/compiler/pipeline_v2/namespace_box.hako" as NamespaceBox +using "lang/src/compiler/pipeline_v2/signature_verifier_box.hako" as SignatureVerifierBox +using "lang/src/compiler/pipeline_v2/stage1_json_scanner_box.hako" as Stage1JsonScannerBox +using "lang/src/compiler/pipeline_v2/stage1_name_args_normalizer_box.hako" as NameArgsNormBox +using "lang/src/compiler/pipeline_v2/alias_preflight_box.hako" as AliasPreflightBox +using "lang/src/compiler/pipeline_v2/stage1_args_parser_box.hako" as Stage1ArgsParserBox +using "lang/src/compiler/pipeline_v2/pipeline_helpers_box.hako" as PipelineHelpersBox + +flow PipelineV2 { + lower_stage1_to_mir(ast_json, prefer_cfg) { + // Backward-compatible entry (trace=0) + return PipelineV2.lower_stage1_to_mir_trace(ast_json, prefer_cfg, 0) + } + + // Experimental: emit JSON v1 (MirCall) directly (no adapter). Shape-only focus. + lower_stage1_to_mir_v1(ast_json, prefer_cfg) { + if ast_json == null { return EmitReturnBox.emit_return_int2(0, 0) } + local r = UsingResolverBox.state_new() + // Call (prefer extract box) + { + local kq = RegexFlow.find_from(ast_json, "\"type\":\"Call\"", 0) + if kq >= 0 { + // Strict preflight (scanner): read name and enforce using alias resolution + { + local scan = Stage1JsonScannerBox.extract_name_args(ast_json, kq) + if scan != null { + if AliasPreflightBox.check_head(scan.get("name"), r) != 1 { return null } + } + } + + local kc = CallExtractBox.extract_return_call_ints(ast_json) + if kc != null { + // Normalize to fix key/types at entry + local kn = NormalizerBox.normalize_call_ints(kc) + if kn == null { return null } + if SignatureVerifierBox.verify_call_name_arity(kn.get("name"), kn.get("args")) != 1 { return null } + local out_call = EmitCallBox.emit_call_int_args_v1(kn.get("name"), kn.get("args")) + if out_call == null { return null } + return out_call + } + // Tolerant scanner fallback → v1 emit (raw; no resolver context here) + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local pair = Stage1JsonScannerBox.extract_name_args(ast_json, pos_body) + if pair != null { + local out_scan = EmitCallBox.emit_call_int_args_v1(pair.get("name"), pair.get("args_text")) + if out_scan == null { return null } + return out_scan + } + } + // Fallback legacy + local k = Stage1ExtractFlow.extract_return_call(ast_json) + if k != null { + local kn2 = NormalizerBox.normalize_call_ints(k) + if kn2 == null { return null } + if SignatureVerifierBox.verify_call_name_arity(kn2.get("name"), kn2.get("args")) != 1 { return null } + local out_legacy = EmitCallBox.emit_call_int_args_v1(kn2.get("name"), kn2.get("args")) + if out_legacy == null { return null } + return out_legacy + } + } + } + // Method (recv:0 placeholder; prefer extract box) + { + local mq = RegexFlow.find_from(ast_json, "\"type\":\"Method\"", 0) + if mq >= 0 { + local mb = MethodExtractBox.extract_return_method_ints(ast_json) + if mb != null { + local mn = NormalizerBox.normalize_method_ints(mb) + if mn == null { return null } + local out_method = EmitMethodBox.emit_method_int_args_v1(mn.get("method"), 0, mn.get("args")) + if out_method == null { return null } + return out_method + } + // Tolerant scanner fallback (method/args only) + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local pair = Stage1JsonScannerBox.extract_label_args(ast_json, "method", pos_body) + if pair != null { + local out_method_scan = EmitMethodBox.emit_method_int_args_v1(pair.get("label"), 0, pair.get("args_text")) + if out_method_scan == null { return null } + return out_method_scan + } + } + local m = Stage1ExtractFlow.extract_return_method(ast_json) + if m != null { + local mn2 = NormalizerBox.normalize_method_ints(m) + if mn2 == null { return null } + local out_method_legacy = EmitMethodBox.emit_method_int_args_v1(mn2.get("method"), 0, mn2.get("args")) + if out_method_legacy == null { return null } + return out_method_legacy + } + } + } + // New (prefer extract box) + { + local nq = RegexFlow.find_from(ast_json, "\"type\":\"New\"", 0) + if nq >= 0 { + local nb = NewExtractBox.extract_return_new_ints(ast_json) + if nb != null { + local nn = NormalizerBox.normalize_new_ints(nb) + if nn == null { return null } + local out_new = EmitNewBoxBox.emit_newbox_int_args_v1(nn.get("class"), nn.get("args")) + if out_new == null { return null } + return out_new + } + // Tolerant scanner fallback (class/args only) + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local pair = Stage1JsonScannerBox.extract_label_args(ast_json, "class", pos_body) + if pair != null { + local out_new_scan = EmitNewBoxBox.emit_newbox_int_args_v1(pair.get("label"), pair.get("args_text")) + if out_new_scan == null { return null } + return out_new_scan + } + } + local n = Stage1ExtractFlow.extract_return_new(ast_json) + if n != null { + local nn2 = NormalizerBox.normalize_new_ints(n) + if nn2 == null { return null } + local out_new_legacy = EmitNewBoxBox.emit_newbox_int_args_v1(nn2.get("class"), nn2.get("args")) + if out_new_legacy == null { return null } + return out_new_legacy + } + } + } + // Fallback to v0 path for remaining cases + return PipelineV2.lower_stage1_to_mir(ast_json, prefer_cfg) + } + + // Experimental helper: emit v1 then downgrade to v0 for Mini‑VM exec + lower_stage1_to_mir_v1_compat(ast_json, prefer_cfg) { + local j1 = PipelineV2.lower_stage1_to_mir_v1(ast_json, prefer_cfg) + return MirJsonV1Adapter.to_v0(j1) + } + + lower_stage1_to_mir_trace(ast_json, prefer_cfg, trace) { + local prefer = PipelineHelpersBox.to_i64(prefer_cfg) + if ast_json == null { return EmitReturnBox.emit_return_int2(0, trace) } + if trace == 1 { print("[pipe] prefer_cfg=" + prefer_cfg + " prefer=" + prefer) } + local r = UsingResolverBox.state_new() + // Early fast-path: any Compare in program (Int-only) + { + local cq0 = RegexFlow.find_from(ast_json, "\"type\":\"Compare\"", 0) + if cq0 >= 0 { + local opk_pos0 = RegexFlow.find_from(ast_json, "\"op\":\"", cq0) + local cmp0 = "" + if opk_pos0 >= 0 { + local opk_end0 = RegexFlow.find_from(ast_json, "\"", opk_pos0 + 6) + if opk_end0 >= 0 { cmp0 = ast_json.substring(opk_pos0 + 6, opk_end0) } + } + local lhsp0 = RegexFlow.find_from(ast_json, "\"lhs\"", cq0) + local rhsp0 = RegexFlow.find_from(ast_json, "\"rhs\"", cq0) + if lhsp0 >= 0 && rhsp0 >= 0 { + local lv0 = 0 + local rv0 = 0 + local vpos0 = RegexFlow.find_from(ast_json, "\"value\":", lhsp0) + if vpos0 >= 0 { + local ds0 = RegexFlow.digits_from(ast_json, vpos0 + 8) + if ds0 != "" { lv0 = RegexFlow.to_int(ds0) } + } + local vpos02 = RegexFlow.find_from(ast_json, "\"value\":", rhsp0) + if vpos02 >= 0 { + local ds02 = RegexFlow.digits_from(ast_json, vpos02 + 8) + if ds02 != "" { rv0 = RegexFlow.to_int(ds02) } + } + if cmp0 == null || cmp0 == "" { cmp0 = "Gt" } + if prefer >= 1 { + local mat0 = 0 + if prefer >= 2 { mat0 = 1 } + if trace == 1 { print("[pipe] any-fast cfg cmp=" + cmp0 + " lhs=" + (""+lv0) + " rhs=" + (""+rv0) + " mat=" + (""+mat0)) } + local jfast0 = EmitCompareBox.emit_compare_cfg3(lv0, rv0, cmp0, mat0, trace) + return LocalSSA.ensure_cond(jfast0) + } else { + if trace == 1 { print("[pipe] any-fast ret cmp=" + cmp0 + " lhs=" + (""+lv0) + " rhs=" + (""+rv0)) } + return EmitCompareBox.emit_compare_ret(lv0, rv0, cmp0, trace) + } + } + } + } + // If(...) pattern (prefer CFG form) + { + local iq = RegexFlow.find_from(ast_json, "\"type\":\"If\"", 0) + if iq >= 0 { + // Fast-path: directly parse Compare within If for primitives (Int-only) + { + local cv = PipelineHelpersBox.parse_compare_values(ast_json, iq) + if cv != null { + local mat = 1 + local lfast = cv.get(0) + local rfast = cv.get(1) + local cmpfast = cv.get(2) + if cmpfast == null || cmpfast == "" { cmpfast = "Gt" } + if trace == 1 { print("[pipe] if-fast cmp=" + cmpfast + " lhs=" + (""+lfast) + " rhs=" + (""+rfast) + " mat=" + (""+mat)) } + local jfast = EmitCompareBox.emit_compare_cfg3(lfast, rfast, cmpfast, mat, trace) + return LocalSSA.ensure_cond(jfast) + } + } + // Prefer robust extractor box + local icb = CompareExtractBox.extract_if_compare_ints(ast_json) + if icb != null { + local mat = 0 + if prefer >= 2 { mat = 1 } + local l = icb.get(0) + local r = icb.get(1) + local c = icb.get(2) + local j0 = EmitCompareBox.emit_compare_cfg3(l, r, c, mat, trace) + return LocalSSA.ensure_cond(j0) + } + // Fallback legacy extractor + local ic = Stage1ExtractFlow.extract_if_compare(ast_json) + if ic != null { + local mat = 0 + if prefer >= 2 { mat = 1 } + local l2 = ic.get("lhs") + 0 + local r2 = ic.get("rhs") + 0 + local c2 = "" + ic.get("cmp") + if c2 == null || c2 == "" { c2 = "Gt" } + local j0 = EmitCompareBox.emit_compare_cfg3(l2, r2, c2, mat, trace) + return LocalSSA.ensure_cond(j0) + } + } + } + // Next Call (Return(Call ...)) — prefer extract box + { + local kq = RegexFlow.find_from(ast_json, "\"type\":\"Call\"", 0) + if kq >= 0 { + // Strict preflight via tolerant scanner: read raw name and enforce using alias resolution + { + local scan0 = Stage1JsonScannerBox.extract_name_args(ast_json, kq) + if scan0 != null { + if AliasPreflightBox.check_head(scan0.get("name"), r) != 1 { return null } + } + } + + local kc = CallExtractBox.extract_return_call_ints(ast_json) + if kc != null { + local kn = NormalizerBox.normalize_call_ints(kc) + if kn == null { return null } + if SignatureVerifierBox.verify_call_name_arity(kn.get("name"), kn.get("args")) != 1 { return null } + local j4 = EmitCallBox.emit_call_int_args(kn.get("name"), kn.get("args")) + if j4 == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j4)) + } + // Fallback: scanner → normalizer → emit + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local pair = Stage1JsonScannerBox.extract_name_args(ast_json, pos_body) + if pair != null { + local norm = NameArgsNormBox.normalize_call(pair, r) + if norm != null { + local j4b = EmitCallBox.emit_call_int_args(norm.get("name"), norm.get("args_text")) + if j4b == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j4b)) + } + } + } + + local k = Stage1ExtractFlow.extract_return_call(ast_json) + if k != null { + local kn2 = NormalizerBox.normalize_call_ints(k) + if kn2 == null { return null } + if SignatureVerifierBox.verify_call_name_arity(kn2.get("name"), kn2.get("args")) != 1 { return null } + local j4 = EmitCallBox.emit_call_int_args(kn2.get("name"), kn2.get("args")) + if j4 == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j4)) + } + // (scanner fallback handled above) + + } + } + // Next Method (Return(Method ...)) — prefer extract box; recv is 0 (placeholder) + { + local mq = RegexFlow.find_from(ast_json, "\"type\":\"Method\"", 0) + if mq >= 0 { + local mb = MethodExtractBox.extract_return_method_ints(ast_json) + if mb != null { + local mn = NormalizerBox.normalize_method_ints(mb) + if mn == null { return null } + // Compile-time arity check (best-effort by method name) + if SignatureVerifierBox.verify_from_args(mn.get("method"), mn.get("args")) != 1 { return null } + local j5 = EmitMethodBox.emit_method_int_args(mn.get("method"), 0, mn.get("args")) + if j5 == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j5)) + } + // Lightweight Stage1 scanner as tolerant fallback (method/args only) + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local pair = Stage1JsonScannerBox.extract_label_args(ast_json, "method", pos_body) + if pair != null { + local raw2 = pair.get("label") + local args2 = pair.get("args_text") + local fq2 = NamespaceBox.normalize_global_name(raw2, r) + if fq2 != null { + if SignatureVerifierBox.verify_from_args(fq2, args2) == 1 { + local j5b = EmitMethodBox.emit_method_int_args(fq2, 0, args2) + if j5b == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j5b)) + } + } + } + } + local m = Stage1ExtractFlow.extract_return_method(ast_json) + if m != null { + local mn2 = NormalizerBox.normalize_method_ints(m) + if mn2 == null { return null } + if SignatureVerifierBox.verify_from_args(mn2.get("method"), mn2.get("args")) != 1 { return null } + local j5 = EmitMethodBox.emit_method_int_args(mn2.get("method"), 0, mn2.get("args")) + if j5 == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j5)) + } + } + } + // Next New (Return(New ...)) — prefer extract box + { + local nq = RegexFlow.find_from(ast_json, "\"type\":\"New\"", 0) + if nq >= 0 { + local nb = NewExtractBox.extract_return_new_ints(ast_json) + if nb != null { + local nn = NormalizerBox.normalize_new_ints(nb) + if nn == null { return null } + local j6 = EmitNewBoxBox.emit_newbox_int_args(nn.get("class"), nn.get("args")) + if j6 == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j6)) + } + local n = Stage1ExtractFlow.extract_return_new(ast_json) + if n != null { + local nn2 = NormalizerBox.normalize_new_ints(n) + if nn2 == null { return null } + local j6 = EmitNewBoxBox.emit_newbox_int_args(nn2.get("class"), nn2.get("args")) + if j6 == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j6)) + } + // Tolerant scanner for New: class/args only + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local pairN = Stage1JsonScannerBox.extract_label_args(ast_json, "class", pos_body) + if pairN != null { + local rawC = pairN.get("label") + local argsN = pairN.get("args_text") + // No namespace resolution here (non-using path): emit directly + local j6b = EmitNewBoxBox.emit_newbox_int_args(rawC, argsN) + if j6b == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j6b)) + } + } + } + } + // Try Compare first (avoid mis-detection by 'op' field inside Compare) + // Guard by raw scan to avoid calling methods on null-like values + { + local cq = RegexFlow.find_from(ast_json, "\"type\":\"Compare\"", 0) + if cq >= 0 { + // Prefer robust extractor box (returns ArrayBox [lv, rv, cmp]) + local ce = CompareExtractBox.extract_return_compare_ints(ast_json) + if ce != null { + local final_lhs = ce.get(0) // ArrayBox: lhs at index 0 + local final_rhs = ce.get(1) // ArrayBox: rhs at index 1 + local cmp = ce.get(2) // ArrayBox: cmp at index 2 + if trace == 1 { print("[trace] compare via box lhs=" + final_lhs + " rhs=" + final_rhs + " cmp=" + cmp) } + if prefer_cfg >= 1 { + local mat = 0 + if prefer >= 2 { mat = 1 } + // Direct emit (no MapBox/Normalizer needed - values already normalized) + local j1 = EmitCompareBox.emit_compare_cfg3(final_lhs, final_rhs, cmp, mat, trace) + return LocalSSA.ensure_cond(j1) + } else { + // Direct emit (no MapBox/Normalizer needed - values already normalized) + local j0 = EmitCompareBox.emit_compare_ret(final_lhs, final_rhs, cmp, trace) + return LocalSSA.ensure_cond(j0) + } + } + // Fallback to legacy extractor (should rarely be used) + local c = Stage1ExtractFlow.extract_return_compare(ast_json) + if c != null { + local lhs = c.get("lhs") + local rhs = c.get("rhs") + local cmp2 = c.get("cmp") + if trace == 1 { print("[trace] compare via legacy lhs=" + lhs + " rhs=" + rhs + " cmp=" + cmp2) } + if prefer_cfg >= 1 { + local mat = 0 + if prefer >= 2 { mat = 1 } + local raw3 = {cmp: cmp2, lhs: lhs, rhs: rhs} + local nn3 = NormalizerBox.normalize_cmp(raw3) + if nn3 == null { return null } + local j1 = EmitCompareBox.emit_compare_cfg3(nn3.get("lhs"), nn3.get("rhs"), nn3.get("cmp"), mat, trace) + return LocalSSA.ensure_cond(j1) + } else { + local raw4 = {cmp: cmp2, lhs: lhs, rhs: rhs} + local nn4 = NormalizerBox.normalize_cmp(raw4) + if nn4 == null { return null } + local j0 = EmitCompareBox.emit_compare_ret(nn4.get("lhs"), nn4.get("rhs"), nn4.get("cmp"), trace) + return LocalSSA.ensure_cond(j0) + } + } + } + } + // Then BinOp (guard by raw scan) + { + local bq = RegexFlow.find_from(ast_json, "\"type\":\"BinOp\"", 0) + if bq >= 0 { + local b = Stage1ExtractFlow.extract_return_binop(ast_json) + local lhs = b.get("lhs") + local rhs = b.get("rhs") + local kind = b.get("kind") + local j2 = EmitBinopBox.emit_binop2(lhs, rhs, kind, trace) + return LocalSSA.ensure_cond(j2) + } + } + // Fallback Return(Int) + local v = Stage1ExtractFlow.extract_return_int(ast_json) + local j3 = EmitReturnBox.emit_return_int2(v, trace) + return LocalSSA.ensure_cond(j3) + } + // Overload: resolve names via UsingResolverBox before emit + lower_stage1_to_mir_with_usings(ast_json, prefer_cfg, usings_json, modules_json) { + print("[DEBUG] === lower_stage1_to_mir_with_usings ENTRY ===") + print("[DEBUG] ast_json length=" + ast_json.length()) + print("[DEBUG] usings_json=" + usings_json) + print("[DEBUG] modules_json=" + modules_json) + if ast_json == null { return PipelineV2.lower_stage1_to_mir(ast_json, prefer_cfg) } + // Build resolver context + local r = UsingResolverBox.state_new() + if usings_json != null { UsingResolverBox.load_usings_json(r, usings_json) } + if modules_json != null { UsingResolverBox.load_modules_json(r, modules_json) } + // Upgrade alias→namespace mapping now that modules are loaded (Module‑First) + UsingResolverBox.upgrade_aliases(r) + print("[DEBUG] upgrade_aliases complete") + + // Prefer Call/Method/New branches with name normalization; otherwise fallback to default + { + local kq = RegexFlow.find_from(ast_json, "\"type\":\"Call\"", 0) + if kq >= 0 { + // Strict preflight via tolerant scanner: read raw name and enforce using alias resolution + { + local scan0 = Stage1JsonScannerBox.extract_name_args(ast_json, kq) + if scan0 != null { + local raw_head = scan0.get("name") + print("[DEBUG] preflight raw_head=" + raw_head) + local dot = RegexFlow.find_from(raw_head, ".", 0) + if dot >= 0 { + local head = raw_head.substring(0, dot) + local resolved = UsingResolverBox.resolve_namespace_alias(r, head) + print("[DEBUG] preflight head=" + head + " resolved=" + resolved) + if resolved == null { + print("[ERROR] Unresolved using alias: " + head) + return null + } + } + } + } + + // Prefer lightweight Stage1 scanner first(plugins OFFでも安全) + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local pair = Stage1JsonScannerBox.extract_name_args(ast_json, pos_body) + if pair != null { + local raw2 = pair.get("name") + local args2 = pair.get("args_text") + local fq2 = NamespaceBox.normalize_global_name(raw2, r) + if fq2 != null { + local j4b = EmitCallBox.emit_call_int_args(fq2, args2) + if j4b == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j4b)) + } + } + } + // Structured extract(配列/Map依存。plugins ON時の静的パス) + local kc = CallExtractBox.extract_return_call_ints(ast_json) + if kc != null { + local kn = NormalizerBox.normalize_call_ints(kc) + if kn == null { + print("[DEBUG] normalize_call_ints returned null") + return null + } + local raw_name = kn.get("name") + print("[DEBUG] raw_name before normalize=" + raw_name) + if SignatureVerifierBox.verify_call_name_arity(raw_name, kn.get("args")) != 1 { + print("[DEBUG] verify_call_name_arity failed for " + raw_name) + return null + } + local fq_name = NamespaceBox.normalize_global_name(raw_name, r) + print("[DEBUG] fq_name after normalize=" + fq_name) + if fq_name == null { + print("[DEBUG] normalize_global_name returned null for raw_name=" + raw_name) + return null + } + local j4 = EmitCallBox.emit_call_int_args(fq_name, kn.get("args")) + if j4 == null { + print("[DEBUG] emit_call_int_args returned null") + return null + } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j4)) + } + + } + } + { + local nq = RegexFlow.find_from(ast_json, "\"type\":\"New\"", 0) + if nq >= 0 { + local nb = NewExtractBox.extract_return_new_ints(ast_json) + if nb != null { + local nn = NormalizerBox.normalize_new_ints(nb) + if nn == null { return null } + local raw_c = nn.get("class") + local fq_c = NamespaceBox.normalize_class_name(raw_c, r) + if fq_c == null { return null } + local j6 = EmitNewBoxBox.emit_newbox_int_args(fq_c, nn.get("args")) + if j6 == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j6)) + } + // Scanner fallback: class + args → normalizer → emit + { + local pos_body = Stage1JsonScannerBox.find_body_start(ast_json) + if pos_body < 0 { pos_body = 0 } + local nscan = Stage1JsonScannerBox.extract_label_args(ast_json, "class", pos_body) + if nscan != null { + local normN = NameArgsNormBox.normalize_class(nscan, r) + if normN != null { + local j6b = EmitNewBoxBox.emit_newbox_int_args(normN.get("class"), normN.get("args_text")) + if j6b == null { return null } + return LocalSSA.ensure_calls(LocalSSA.ensure_cond(j6b)) + } + } + } + } + } + // Fallback to default lowering (includes Compare/Method and other forms) + return PipelineV2.lower_stage1_to_mir(ast_json, prefer_cfg) + } + +} diff --git a/lang/src/compiler/pipeline_v2/pipeline_emit_box.hako b/lang/src/compiler/pipeline_v2/pipeline_emit_box.hako new file mode 100644 index 00000000..dc8a4f95 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/pipeline_emit_box.hako @@ -0,0 +1,15 @@ +// pipeline_emit_box.hako — PipelineEmitBox +// Responsibility: small wrappers that emit MIR(JSON v0) and apply LocalSSA ensures + +using "lang/src/compiler/pipeline_v2/emit_call_box.hako" as EmitCallBox +using "lang/src/compiler/pipeline_v2/local_ssa_box.hako" as LocalSSABox + +static box PipelineEmitBox { + // Emit Call(name, int-args) → JSON v0, wrapped with LocalSSA ensures + emit_call_int_args_v0(name, args) { + local j = EmitCallBox.emit_call_int_args(name, args) + return LocalSSABox.ensure_calls(LocalSSABox.ensure_cond(j)) + } +} + +static box PipelineEmitMain { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/pipeline_helpers_box.hako b/lang/src/compiler/pipeline_v2/pipeline_helpers_box.hako new file mode 100644 index 00000000..6957c21f --- /dev/null +++ b/lang/src/compiler/pipeline_v2/pipeline_helpers_box.hako @@ -0,0 +1,65 @@ +// pipeline_helpers_box.hako — Pipeline解析ヘルパの独立Box +// 責務: JSON/数値解析の共通ユーティリティ +// Extracted from PipelineV2 flow (556行 → 506行、-50行削減) + +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using "lang/src/shared/common/string_helpers.hako" as StringHelpers + +static box PipelineHelpersBox { + // Parse integer at specific position (whitespace-aware, sign-aware) + parse_int_at(s, idx) { + if s == null { return null } + local i = idx + loop(true) { + local ch = s.substring(i, i+1) + if ch == "" { return null } + if ch == " " || ch == "\t" || ch == "\n" || ch == "\r" { i = i + 1 continue } + break + } + local neg = 0 + local ch = s.substring(i, i+1) + if ch == "-" { neg = 1 i = i + 1 } + local acc = 0 + local matched = 0 + loop(true) { + ch = s.substring(i, i+1) + if ch == "" { break } + local digit_idx = "0123456789".indexOf(ch) + if digit_idx >= 0 { + matched = 1 + acc = acc * 10 + digit_idx + i = i + 1 + } else { break } + } + if matched == 0 { return null } + if neg == 1 { acc = 0 - acc } + return acc + } + + // Parse integer after prefix string (search + extract pattern) + parse_int_after_prefix(s, prefix, search_pos) { + local p = RegexFlow.find_from(s, prefix, search_pos) + if p < 0 { return null } + local res = PipelineHelpersBox.parse_int_at(s, p + prefix.size()) + return res + } + + // Parse Compare values from Stage-1 JSON (returns [lhs, rhs, cmp]) + parse_compare_values(ast_json, start_pos) { + if ast_json == null { return null } + local op_pos = RegexFlow.find_from(ast_json, "\"op\":\"", start_pos) + if op_pos < 0 { return null } + local cmp_start = op_pos + 6 + local cmp_end = RegexFlow.find_from(ast_json, "\"", cmp_start) + if cmp_end < 0 { return null } + local cmp = ast_json.substring(cmp_start, cmp_end) + local lhs_val = PipelineHelpersBox.parse_int_after_prefix(ast_json, "\"lhs\":{\"type\":\"Int\",\"value\":", start_pos) + if lhs_val == null { return null } + local rhs_val = PipelineHelpersBox.parse_int_after_prefix(ast_json, "\"rhs\":{\"type\":\"Int\",\"value\":", start_pos) + if rhs_val == null { return null } + return [lhs_val, rhs_val, cmp] + } + + // Delegate to StringHelpers for type conversion + to_i64(v) { return StringHelpers.to_i64(v) } +} diff --git a/lang/src/compiler/pipeline_v2/readonly_map_view.hako b/lang/src/compiler/pipeline_v2/readonly_map_view.hako new file mode 100644 index 00000000..d3ec97c9 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/readonly_map_view.hako @@ -0,0 +1,31 @@ +// readonly_map_view.hako — 読み取り専用ビュー(誤更新防止用) + +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +static box ReadOnlyMapView { + _m + + of(m) { + local v = new ReadOnlyMapView() + v._m = m + return v + } + + has(key) { return call("MapBox.has/2", me._m, key) } + get(key) { return BoxHelpers.map_get(me._m, key) } + + set(key, val) { + print("[ReadOnlyMapView] set forbidden: key=" + (""+key)) + return 0 + } + delete(key) { + print("[ReadOnlyMapView] delete forbidden: key=" + (""+key)) + return 0 + } + clear() { + print("[ReadOnlyMapView] clear forbidden") + return 0 + } +} + +static box ReadOnlyMapViewStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/regex_flow.hako b/lang/src/compiler/pipeline_v2/regex_flow.hako new file mode 100644 index 00000000..a4a9cd58 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/regex_flow.hako @@ -0,0 +1,95 @@ +using "lang/src/shared/common/string_helpers.hako" as StringHelpers + +flow RegexFlow { + // Minimal regex-like helpers focused on readability and determinism. + // Supported primitives: digits (\d+), identifier ([A-Za-z_][A-Za-z0-9_]*), + // literal substring find, read_digits from position. + + // Read optional whitespace + optional '-' + digits + digits_from(s, pos) { + if s == null { return "" } + if pos < 0 { pos = 0 } + local out = "" + local i = pos + // skip ASCII whitespaces + loop(true) { + local ch0 = s.substring(i, i+1) + if ch0 == "" { break } + if ch0 == " " || ch0 == "\t" || ch0 == "\n" || ch0 == "\r" { i = i + 1 continue } + break + } + // optional minus sign + local ch = s.substring(i, i+1) + if ch == "-" { out = out + ch i = i + 1 } + // digits + loop(true) { + ch = s.substring(i, i+1) + if ch == "" { break } + if ch >= "0" && ch <= "9" { out = out + ch i = i + 1 } else { break } + } + return out + } + + ident_from(s, pos) { + // [A-Za-z_][A-Za-z0-9_]* + if s == null { return "" } + if pos < 0 { pos = 0 } + local out = "" + local i = pos + local ch = s.substring(i, i+1) + if !((ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_") { return "" } + out = out + ch + i = i + 1 + loop(true) { + ch = s.substring(i, i+1) + if ch == "" { break } + if ((ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || (ch >= "0" && ch <= "9") || ch == "_") { + out = out + ch + i = i + 1 + } else { break } + } + return out + } + + find_from(s, needle, pos) { + if s == null { return -1 } + if needle == null { return -1 } + if pos < 0 { pos = 0 } + local n = s.size() + local m = needle.size() + if m == 0 { return pos } + local i = pos + local limit = n - m + loop(i <= limit) { + local seg = s.substring(i, i+m) + if seg == needle { return i } + i = i + 1 + } + return -1 + } + + last_index_of(s, needle) { + // Delegate to StringHelpers (more efficient backward search) + return StringHelpers.last_index_of(s, needle) + } + + to_int(digits) { + if digits == null { return 0 } + local n = digits.size() + if n == 0 { return 0 } + local i = 0 + local neg = 0 + if digits.substring(0,1) == "-" { neg = 1 i = 1 } + local acc = 0 + loop(i < n) { + local ch = digits.substring(i, i+1) + if ch < "0" || ch > "9" { break } + local d = "0123456789".indexOf(ch) + if d < 0 { break } + acc = acc * 10 + d + i = i + 1 + } + if neg == 1 { return 0 - acc } + return acc + } +} diff --git a/lang/src/compiler/pipeline_v2/signature_verifier_box.hako b/lang/src/compiler/pipeline_v2/signature_verifier_box.hako new file mode 100644 index 00000000..9df190fe --- /dev/null +++ b/lang/src/compiler/pipeline_v2/signature_verifier_box.hako @@ -0,0 +1,111 @@ +// signature_verifier_box.hako — SignatureVerifierBox +// Responsibility: compile-time arity checks for common built-in methods. +// Notes: +// - Stage‑1 JSON at this phase does not carry receiver type, so we verify +// by method name only for known methods where arity is uniform across boxes +// (e.g., indexOf=1 on String/Array, push=1 on Array, etc.). +// - Unknown methods are allowed (return 1) to avoid over-blocking during bring-up. + +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +// Method registry is optional; allow unknowns by design in bring-up. + +static box SignatureVerifierBox { + // Return 1 if (method, argc) is acceptable; 0 otherwise. + verify_method(method, argc) { + if method == null { return 1 } + // Registry check skipped (optional). Unknown names are allowed during bring-up. + // Fallback legacy (kept for compatibility) + local m = "" + method + local a = ("" + argc) + "" + // String‑like + if m == "len" && a == "0" { return 1 } + if m == "length" && a == "0" { return 1 } + if m == "substring" && a == "2" { return 1 } + if m == "concat" && a == "1" { return 1 } + if m == "indexOf" && a == "1" { return 1 } + if m == "replace" && a == "2" { return 1 } + if m == "trim" && a == "0" { return 1 } + if m == "toUpper" && a == "0" { return 1 } + if m == "toLower" && a == "0" { return 1 } + // Array‑like + if m == "get" && a == "1" { return 1 } + if m == "set" && a == "2" { return 1 } + if m == "push" && a == "1" { return 1 } + if m == "pop" && a == "0" { return 1 } + if m == "clear" && a == "0" { return 1 } + if m == "contains" && a == "1" { return 1 } + if m == "join" && a == "1" { return 1 } + if m == "slice" && a == "2" { return 1 } + // Map‑like + if m == "size" && a == "0" { return 1 } + if m == "has" && a == "1" { return 1 } + if m == "delete" && a == "1" { return 1 } + if m == "remove" && a == "1" { return 1 } + if m == "keys" && a == "0" { return 1 } + if m == "values" && a == "0" { return 1 } + if m == "toJSON" && a == "0" { return 1 } + // Unknown method: allow (return 1) for now + return 1 + } + + // Count integer args from Stage‑1 args JSON (shape: [ {"type":"Int","value":N}, ... ]) + _count_int_args(args) { + if args == null { return 0 } + local s = "" + args + local pos = 0 + local n = 0 + loop(true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { + pos = pos + 1 + } else { + n = n + 1 + // advance to the end of this digit run + local p2 = RegexFlow.find_from(s, ds, pos) + if p2 < 0 { pos = pos + ds.size() } else { pos = p2 + ds.size() } + } + if pos >= s.size() { break } + } + return n + } + + // Convenience: verify from args JSON directly; prints an error on mismatch. + // Returns 1 if OK, 0 if mismatch. + verify_from_args(method, args) { + local argc = SignatureVerifierBox._count_int_args(args) + local ok = SignatureVerifierBox.verify_method(method, argc) + if ok != 1 { + print("[ERROR] No matching method by arity: " + method + "/" + (""+argc)) + return 0 + } + return 1 + } + // Name-aware call verifier for global functions like "StringBox.indexOf". + // If name pattern matches a known built-in box, validate by registry; otherwise allow. + verify_call_name_arity(name, args) { + if name == null { return 1 } + local argc = SignatureVerifierBox._count_int_args(args) + local s = "" + name + // Split at last '.' for method name + local last = RegexFlow.last_index_of(s, ".") + if last < 0 { return 1 } + local method = s.substring(last + 1, s.size()) + // Determine class token just before method (penultimate segment) + local head_all = s.substring(0, last) + local prev = RegexFlow.last_index_of(head_all, ".") + local head = head_all + if prev >= 0 { + head = head_all.substring(prev + 1, head_all.size()) + } + // Normalize head to Box name + local bxname = head + if head == "String" { bxname = "StringBox" } + if head == "Array" { bxname = "ArrayBox" } + if head == "Map" { bxname = "MapBox" } + if bxname == "StringBox" || bxname == "ArrayBox" || bxname == "MapBox" { return 1 } + return 1 + } + +} + +static box SignatureVerifierMain { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/stage1_args_parser_box.hako b/lang/src/compiler/pipeline_v2/stage1_args_parser_box.hako new file mode 100644 index 00000000..58b450f7 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/stage1_args_parser_box.hako @@ -0,0 +1,67 @@ +// stage1_args_parser_box.hako — Stage1ArgsParserBox +// Responsibility: Parse Stage‑1 args JSON text into integers (MVP: Int only). +// Non‑Responsibility: MIR emit or namespace. + +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow +using "lang/src/shared/json/json_cursor.hako" as JsonCursorBox + +static box Stage1ArgsParserBox { + // Count Int occurrences in Stage‑1 args JSON text. + count_ints(args_text) { + if args_text == null { return 0 } + local s = "" + args_text + local pos = 0 + local n = 0 + loop(true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { pos = pos + 1 } else { n = n + 1 pos = pos + ds.size() } + if pos >= s.size() { break } + } + return n + } + // Parse ints from args_text and return ArrayBox [i64,...]. + parse_ints(args_text) { + if args_text == null { return [] } + local s = "" + args_text + if me._only_int_nodes(s) != 1 { return null } + local pos = 0 + local out = [] + loop(true) { + local ds = RegexFlow.digits_from(s, pos) + if ds == "" { + pos = pos + 1 + } else { + out.push(RegexFlow.to_int(ds)) + // advance to end of this token to avoid re-matching + local p2 = RegexFlow.find_from(s, ds, pos) + if p2 < 0 { pos = pos + ds.size() } else { pos = p2 + ds.size() } + } + if pos >= s.size() { break } + } + return out + } + + _only_int_nodes(s) { + if s == null { return 1 } + local lb = JsonCursorBox.find_from(s, "[", 0) + if lb < 0 { return 1 } + local rb = JsonCursorBox.seek_array_end(s, lb) + if rb < 0 { rb = s.size() } + local chk = RegexFlow.find_from(s, "\"type\":\"", lb) + if chk < 0 || chk >= rb { return 1 } + local pos = chk + loop(true) { + if pos < 0 || pos >= rb { break } + local ty_start = pos + 8 + local ty_end = RegexFlow.find_from(s, "\"", ty_start) + if ty_end < 0 || ty_end >= rb { return 0 } + local ty = s.substring(ty_start, ty_end) + if ty != "Int" { return 0 } + pos = RegexFlow.find_from(s, "\"type\":\"", ty_end + 1) + if pos < 0 || pos >= rb { break } + } + return 1 + } +} + +static box Stage1ArgsParserMain { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/stage1_extract_flow.hako b/lang/src/compiler/pipeline_v2/stage1_extract_flow.hako new file mode 100644 index 00000000..fc98e459 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/stage1_extract_flow.hako @@ -0,0 +1,209 @@ +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow + +flow Stage1ExtractFlow { + // Extract minimal info from Stage‑1 JSON (Return Int / BinOp / Compare) + + _idx(s, needle) { return RegexFlow.find_from(s, needle, 0) } + _idx_from(s, needle, pos) { return RegexFlow.find_from(s, needle, pos) } + + extract_return_int(ast_json) { + if ast_json == null { return 0 } + local ret_key = "\"type\":\"Return\"" + local p = RegexFlow.last_index_of(ast_json, ret_key) + if p < 0 { p = Stage1ExtractFlow._idx(ast_json, ret_key) } + if p < 0 { return 0 } + // whitespace tolerant: find 'type:"Int"' then subsequent 'value:' + local tpos = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"Int\"", p) + if tpos < 0 { return 0 } + local vpos = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", tpos) + if vpos < 0 { return 0 } + local ds = RegexFlow.digits_from(ast_json, vpos + 8) + return RegexFlow.to_int(ds) + } + + extract_return_binop(ast_json) { + // Return(BinOp) → {kind, lhs, rhs} or null + if ast_json == null { return null } + local p = Stage1ExtractFlow._idx(ast_json, "\"type\":\"Return\"") + if p < 0 { return null } + local q = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"BinOp\"", p) + if q < 0 { return null } + local opk_pos = Stage1ExtractFlow._idx_from(ast_json, "\"op\":\"", q) + if opk_pos < 0 { return null } + local opk_end = Stage1ExtractFlow._idx_from(ast_json, "\"", opk_pos + 6) + if opk_end < 0 { return null } + local opk = ast_json.substring(opk_pos + 6, opk_end) + // lhs/rhs int (whitespace tolerant) + local lhsp = Stage1ExtractFlow._idx_from(ast_json, "\"lhs\"", q) + local rhsp = Stage1ExtractFlow._idx_from(ast_json, "\"rhs\"", q) + if lhsp < 0 || rhsp < 0 { return null } + local lval = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", lhsp) + local rval = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", rhsp) + if lval < 0 || rval < 0 { return null } + local lhs_ds = RegexFlow.digits_from(ast_json, lval + 8) + local rhs_ds = RegexFlow.digits_from(ast_json, rval + 8) + if lhs_ds == "" || rhs_ds == "" { return null } + local lhs = RegexFlow.to_int(lhs_ds) + local rhs = RegexFlow.to_int(rhs_ds) + local kind = opk + return { kind: kind, lhs: lhs, rhs: rhs } + } + + extract_return_compare(ast_json) { + // Return(Compare) → {cmp, lhs, rhs} or null + if ast_json == null { return null } + local p = Stage1ExtractFlow._idx(ast_json, "\"type\":\"Return\"") + if p < 0 { return null } + local q = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"Compare\"", p) + if q < 0 { return null } + local opk_pos = Stage1ExtractFlow._idx_from(ast_json, "\"op\":\"", q) + if opk_pos < 0 { return null } + local opk_end = Stage1ExtractFlow._idx_from(ast_json, "\"", opk_pos + 6) + if opk_end < 0 { return null } + local opk = ast_json.substring(opk_pos + 6, opk_end) + // lhs/rhs int (whitespace tolerant) + local lhsp = Stage1ExtractFlow._idx_from(ast_json, "\"lhs\"", q) + local rhsp = Stage1ExtractFlow._idx_from(ast_json, "\"rhs\"", q) + if lhsp < 0 || rhsp < 0 { return null } + local lval = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", lhsp) + local rval = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", rhsp) + if lval < 0 || rval < 0 { return null } + local lhs_ds = RegexFlow.digits_from(ast_json, lval + 8) + local rhs_ds = RegexFlow.digits_from(ast_json, rval + 8) + if lhs_ds == "" || rhs_ds == "" { return null } + local lhs = RegexFlow.to_int(lhs_ds) + local rhs = RegexFlow.to_int(rhs_ds) + return { cmp: opk, lhs: lhs, rhs: rhs } + } + + // If(cond=Compare(lhs,rhs,op), then=[Return(Int 1)], else=[Return(Int 0)]) → {cmp, lhs, rhs} or null + extract_if_compare(ast_json) { + if ast_json == null { return null } + // Find If node first (tolerant to whitespace) + local ip = RegexFlow.find_from(ast_json, "\"type\":\"If\"", 0) + if ip < 0 { return null } + // Find Compare under/after If + local q = RegexFlow.find_from(ast_json, "\"type\":\"Compare\"", ip) + if q < 0 { return null } + // Extract op + local opk_pos = RegexFlow.find_from(ast_json, "\"op\":\"", q) + if opk_pos < 0 { return null } + local opk_end = RegexFlow.find_from(ast_json, "\"", opk_pos + 6) + if opk_end < 0 { return null } + local opk = ast_json.substring(opk_pos + 6, opk_end) + // Extract lhs/rhs integer values + local lhsp = RegexFlow.find_from(ast_json, "\"lhs\"", q) + local rhsp = RegexFlow.find_from(ast_json, "\"rhs\"", q) + if lhsp < 0 || rhsp < 0 { return null } + local lval = RegexFlow.find_from(ast_json, "\"value\":", lhsp) + local rval = RegexFlow.find_from(ast_json, "\"value\":", rhsp) + if lval < 0 || rval < 0 { return null } + local lhs_ds = RegexFlow.digits_from(ast_json, lval + 8) + local rhs_ds = RegexFlow.digits_from(ast_json, rval + 8) + if lhs_ds == "" || rhs_ds == "" { return null } + local lhs = RegexFlow.to_int(lhs_ds) + local rhs = RegexFlow.to_int(rhs_ds) + return { cmp: opk, lhs: lhs, rhs: rhs } + } + + // Return(Method recv, method, args) → {method, args:[int,...]} or null + extract_return_method(ast_json) { + if ast_json == null { return null } + local p = Stage1ExtractFlow._idx(ast_json, "\"type\":\"Return\"") + if p < 0 { return null } + local q = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"Method\"", p) + if q < 0 { return null } + if Stage1IntArgsExtractBox.validate_only_int_args(ast_json, q) != 1 { return null } + // method name + local mk = Stage1ExtractFlow._idx_from(ast_json, "\"method\":\"", q) + if mk < 0 { return null } + local mk_end = Stage1ExtractFlow._idx_from(ast_json, "\"", mk + 10) + if mk_end < 0 { return null } + local mname = ast_json.substring(mk + 10, mk_end) + // args (integers only; recv は無視) + local ak = Stage1ExtractFlow._idx_from(ast_json, "\"args\":[", q) + local args = [] + if ak >= 0 { + local rb = Stage1ExtractFlow._idx_from(ast_json, "]", ak) + if rb < 0 { rb = ast_json.size() } + local i = ak + loop(true) { + local tpos = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"Int\"", i) + if tpos < 0 || tpos >= rb { break } + local vpos = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", tpos) + if vpos < 0 || vpos >= rb { i = tpos + 1 continue } + local ds = RegexFlow.digits_from(ast_json, vpos + 8) + if ds != "" { args.push(RegexFlow.to_int(ds)) } + i = vpos + 8 + ds.size() + } + } + return { method: mname, args: args } + } + + // Return(New class(args)) → {class, args:[int,...]} or null + extract_return_new(ast_json) { + if ast_json == null { return null } + local p = Stage1ExtractFlow._idx(ast_json, "\"type\":\"Return\"") + if p < 0 { return null } + local q = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"New\"", p) + if q < 0 { return null } + local ck = Stage1ExtractFlow._idx_from(ast_json, "\"class\":\"", q) + if ck < 0 { return null } + local ck_end = Stage1ExtractFlow._idx_from(ast_json, "\"", ck + 9) + if ck_end < 0 { return null } + local cname = ast_json.substring(ck + 9, ck_end) + if Stage1IntArgsExtractBox.validate_only_int_args(ast_json, q) != 1 { return null } + // args integers only + local ak = Stage1ExtractFlow._idx_from(ast_json, "\"args\":[", q) + local args = [] + if ak >= 0 { + local rb = Stage1ExtractFlow._idx_from(ast_json, "]", ak) + if rb < 0 { rb = ast_json.size() } + local i = ak + loop(true) { + local tpos = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"Int\"", i) + if tpos < 0 || tpos >= rb { break } + local vpos = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", tpos) + if vpos < 0 || vpos >= rb { i = tpos + 1 continue } + local ds = RegexFlow.digits_from(ast_json, vpos + 8) + if ds != "" { args.push(RegexFlow.to_int(ds)) } + i = vpos + 8 + ds.size() + } + } + return { class: cname, args: args } + } + + // Return(Call name(args...)) → {name, args:[int,...]} or null + extract_return_call(ast_json) { + if ast_json == null { return null } + // Return → Call 検出 + local p = Stage1ExtractFlow._idx(ast_json, "\"type\":\"Return\"") + if p < 0 { return null } + local q = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"Call\"", p) + if q < 0 { return null } + if Stage1IntArgsExtractBox.validate_only_int_args(ast_json, q) != 1 { return null } + // name 抽出 + local nk = Stage1ExtractFlow._idx_from(ast_json, "\"name\":\"", q) + if nk < 0 { return null } + local nk_end = Stage1ExtractFlow._idx_from(ast_json, "\"", nk + 8) + if nk_end < 0 { return null } + local name = ast_json.substring(nk + 8, nk_end) + // args 抽出(整数に限定): args 配列内の Int ノードのみを拾う + local ak = Stage1ExtractFlow._idx_from(ast_json, "\"args\":[", q) + if ak < 0 { return { name: name, args: [] } } + local rb = Stage1ExtractFlow._idx_from(ast_json, "]", ak) + if rb < 0 { rb = ast_json.size() } + local args = [] + local i = ak + loop(true) { + local tpos = Stage1ExtractFlow._idx_from(ast_json, "\"type\":\"Int\"", i) + if tpos < 0 || tpos >= rb { break } + local vpos = Stage1ExtractFlow._idx_from(ast_json, "\"value\":", tpos) + if vpos < 0 || vpos >= rb { i = tpos + 1 continue } + local ds = RegexFlow.digits_from(ast_json, vpos + 8) + if ds != "" { args.push(RegexFlow.to_int(ds)) } + i = vpos + 8 + ds.size() + } + return { name: name, args: args } + } +} diff --git a/lang/src/compiler/pipeline_v2/stage1_int_args_extract_box.hako b/lang/src/compiler/pipeline_v2/stage1_int_args_extract_box.hako new file mode 100644 index 00000000..f07ec4aa --- /dev/null +++ b/lang/src/compiler/pipeline_v2/stage1_int_args_extract_box.hako @@ -0,0 +1,127 @@ +// Stage1IntArgsExtractBox — 統合Extract: Return(Call/Method/New) から名前と整数引数を抽出 +// Responsibility: 3つの類似ExtractBoxを統合(call_extract/method_extract/new_extract) +// Notes: 85%重複コードを共通化、args抽出ロジックを一元管理 + +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow + +static box Stage1IntArgsExtractBox { + // ===== 汎用Extract(内部実装) ===== + + extract_return_expr_ints(ast_json, expr_type, name_key, name_offset) { + if ast_json == null { return null } + + // Step 1: Return 探索 + local rq = RegexFlow.find_from(ast_json, "\"type\":\"Return\"", 0) + if rq < 0 { return null } + + // Step 2: Expr type 探索(Call/Method/New) + local q = RegexFlow.find_from(ast_json, "\"type\":\"" + expr_type + "\"", rq) + if q < 0 { return null } + + // Step 3: 名前抽出 + local nk = RegexFlow.find_from(ast_json, name_key, q) + if nk < 0 { return null } + local nk_end = RegexFlow.find_from(ast_json, "\"", nk + name_offset) + if nk_end < 0 { return null } + local name = ast_json.substring(nk + name_offset, nk_end) + + // Step 4: args抽出(Int only、共通ロジック) + local args = me._extract_int_args(ast_json, q) + + return { name: name, args: args } + } + + // ===== 内部ヘルパー:Int引数抽出(共通化) ===== + + _extract_int_args(ast_json, start_pos) { + local ak = RegexFlow.find_from(ast_json, "\"args\":[", start_pos) + local vals = [] + if ak < 0 { return vals } + + // bracket-aware end detection + local lb = RegexFlow.find_from(ast_json, "[", ak) + local rb = ast_json.size() + if lb >= 0 { + local i2 = lb + 1 + local depth = 1 + loop(true) { + local ch = ast_json.substring(i2, i2+1) + if ch == "" { break } + if ch == "[" { depth = depth + 1 } else { if ch == "]" { depth = depth - 1 } } + if depth == 0 { rb = i2 break } + i2 = i2 + 1 + } + if me._ensure_only_int_types(ast_json, lb, rb) != 1 { return null } + } + + // scan ints within ak..rb + local i = ak + loop(true) { + local tpos = RegexFlow.find_from(ast_json, "\"type\":\"Int\"", i) + if tpos < 0 || tpos >= rb { break } + local vpos = RegexFlow.find_from(ast_json, "\"value\":", tpos) + if vpos < 0 || vpos >= rb { i = tpos + 1 continue } + local ds = RegexFlow.digits_from(ast_json, vpos + 8) + if ds != "" { vals.push(RegexFlow.to_int(ds)) } + i = vpos + 8 + ds.size() + } + return vals + } + + _ensure_only_int_types(ast_json, array_start, array_end) { + local check = array_start + loop(true) { + local tpos = RegexFlow.find_from(ast_json, "\"type\":\"", check) + if tpos < 0 || tpos >= array_end { break } + local ty_start = tpos + 8 + local ty_end = RegexFlow.find_from(ast_json, "\"", ty_start) + if ty_end < 0 || ty_end >= array_end { return 0 } + local ty = ast_json.substring(ty_start, ty_end) + if ty != "Int" { + return 0 + } + check = ty_end + 1 + } + return 1 + } + + validate_only_int_args(ast_json, expr_pos) { + local ak = RegexFlow.find_from(ast_json, "\"args\":[", expr_pos) + if ak < 0 { return 1 } + local lb = RegexFlow.find_from(ast_json, "[", ak) + if lb < 0 { return 1 } + local rb = ast_json.size() + local i2 = lb + 1 + local depth = 1 + loop(true) { + local ch = ast_json.substring(i2, i2+1) + if ch == "" { break } + if ch == "[" { depth = depth + 1 } else { if ch == "]" { depth = depth - 1 } } + if depth == 0 { rb = i2 break } + i2 = i2 + 1 + } + return me._ensure_only_int_types(ast_json, lb, rb) + } + + // ===== 公開API(後方互換) ===== + + extract_return_call_ints(ast_json) { + return me.extract_return_expr_ints(ast_json, "Call", "\"name\":\"", 8) + } + + extract_return_method_ints(ast_json) { + local result = me.extract_return_expr_ints(ast_json, "Method", "\"method\":\"", 10) + if result == null { return null } + // method key for backward compatibility + return { method: result.name, args: result.args } + } + + extract_return_new_ints(ast_json) { + local result = me.extract_return_expr_ints(ast_json, "New", "\"class\":\"", 9) + if result == null { return null } + // class key for backward compatibility + return { class: result.name, args: result.args } + } +} + +static box Stage1IntArgsExtractStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/stage1_json_scanner_box.hako b/lang/src/compiler/pipeline_v2/stage1_json_scanner_box.hako new file mode 100644 index 00000000..388b6993 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/stage1_json_scanner_box.hako @@ -0,0 +1,99 @@ +// stage1_json_scanner_box.hako — Stage1JsonScannerBox +// Responsibility: Provide common scans for Stage‑1 JSON strings used by PipelineV2 +// - body anchor detection +// - key search (plain/escaped/": " tolerant) +// - value start after key +// - name/args basic extraction +// Non‑Responsibility: MIR emit, namespace resolution, IO + +using "lang/src/shared/json/json_cursor.hako" as JsonCursorBox +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +static box Stage1JsonScannerBox { + // Find the start position of Program.body array key + find_body_start(ast_json) { + if ast_json == null { return -1 } + local s = "" + ast_json + local pos = JsonCursorBox.find_key_dual(s, "\"body\":[", "\\\"body\\\":[", 0) + return pos + } + // Tolerant key finder: exact/escaped/colon+space + find_key(ast_json, key, start_pos) { + if ast_json == null { return -1 } + local s = "" + ast_json + local plain = "\"" + key + "\":\"" + local escaped = "\\\"" + key + "\\\":\\\"" + local p1 = JsonCursorBox.find_key_dual(s, plain, escaped, start_pos) + if p1 >= 0 { return p1 } + if plain.size() >= 2 { + local head = plain.substring(0, plain.size() - 1) + local last = plain.substring(plain.size() - 1, plain.size()) + local spaced = head + " " + last + local p2 = JsonCursorBox.find_from(s, spaced, start_pos) + if p2 >= 0 { return p2 } + // Escaped + spaced: tolerate JSON embedded as string with colon-space + if escaped.size() >= 2 { + local ehead = escaped.substring(0, escaped.size() - 1) + local elast = escaped.substring(escaped.size() - 1, escaped.size()) + local espaced = ehead + " " + elast + local p3 = JsonCursorBox.find_from(s, espaced, start_pos) + if p3 >= 0 { return p3 } + } + } + return -1 + } + // Compute the index of first char of a JSON string value following a key at key_pos + value_start_after_key_pos(s, key_pos) { + if s == null { return -1 } + local i = key_pos + local n = s.size() + loop(i < n) { + local ch = s.substring(i,i+1) + if ch == ":" { i = i + 1 break } + i = i + 1 + } + loop(i < n) { + local ch2 = s.substring(i,i+1) + if ch2 == " " || ch2 == "\t" { i = i + 1 continue } + break + } + if i < n { + local ch0 = s.substring(i, i+1) + if ch0 == "\\" { if i + 1 < n && s.substring(i+1,i+2) == "\"" { return i + 2 } else { return i } } + if ch0 == "\"" { return i + 1 } else { return i } + } + return -1 + } + // Extract label (generic) and args slice starting at/after start_pos; returns map or null + extract_label_args(ast_json, label_key, start_pos) { + if ast_json == null { return null } + local s = "" + ast_json + local npos = me.find_key(s, label_key, start_pos) + local apos = me.find_key(s, "args", start_pos) + if npos < 0 || apos < 0 { return null } + local vstart = me.value_start_after_key_pos(s, npos) + if vstart < 0 { return null } + local nend = JsonCursorBox.find_from(s, "\"", vstart) + if nend <= vstart { return null } + local label = s.substring(vstart, nend) + local lb = JsonCursorBox.find_from(s, "[", apos) + local rb = s.size() + if lb >= 0 { + // Use JsonCursorBox for escape-aware array end seeking + local rb_result = JsonCursorBox.seek_array_end(s, lb) + if rb_result >= lb { rb = rb_result } + } + local args_text = s.substring(apos, rb) + return map({ label: label, args_text: args_text, label_pos: npos, args_pos: apos, label_key: label_key }) + } + + // Backward compatible helper for Call (label_key = "name") + extract_name_args(ast_json, start_pos) { + local m = me.extract_label_args(ast_json, "name", start_pos) + if m == null { return null } + call("MapBox.set/3", m, "name", BoxHelpers.map_get(m, "label")) + return m + } +} + +static box Stage1JsonScannerMain { main(args){ return 0 } } diff --git a/lang/src/compiler/pipeline_v2/stage1_name_args_normalizer_box.hako b/lang/src/compiler/pipeline_v2/stage1_name_args_normalizer_box.hako new file mode 100644 index 00000000..8622ca53 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/stage1_name_args_normalizer_box.hako @@ -0,0 +1,60 @@ +// stage1_name_args_normalizer_box.hako — Stage1NameArgsNormalizerBox +// Responsibility: Normalize (label, args_text) from Stage‑1 scanner into names ready for emit. +// - Call: name normalization via UsingResolverBox/NamespaceBox, arity sanity via SignatureVerifierBox +// - Method: same as Call but for method label +// - New: class normalization via UsingResolverBox/NamespaceBox +// Non‑Responsibility: MIR emit or IO + +using "lang/src/compiler/pipeline_v2/namespace_box.hako" as NamespaceBox +using "lang/src/compiler/pipeline_v2/signature_verifier_box.hako" as SignatureVerifierBox +using "lang/src/compiler/pipeline_v2/alias_preflight_box.hako" as AliasPreflightBox +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +static box Stage1NameArgsNormalizerBox { + _label_or_name(scan) { + if scan == null { return null } + local n = BoxHelpers.map_get(scan, "name") + if n != null { return n } + return BoxHelpers.map_get(scan, "label") + } + + // Returns { name, args_text } or null + normalize_call(scan, r) { + if scan == null { return null } + local raw = me._label_or_name(scan) + if raw == null || raw == "" { return null } + if AliasPreflightBox.check_head(raw, r) != 1 { return null } + local fq = NamespaceBox.normalize_global_name(raw, r) + if fq == null { return null } + local args = BoxHelpers.map_get(scan, "args_text") + // Best-effort arity check for globals + if SignatureVerifierBox.verify_call_name_arity(fq, args) != 1 { return null } + return { name: fq, args_text: args } + } + + // Returns { method, args_text } or null + normalize_method(scan, r) { + if scan == null { return null } + local raw = me._label_or_name(scan) + if raw == null || raw == "" { return null } + if AliasPreflightBox.check_head(raw, r) != 1 { return null } + local fq = NamespaceBox.normalize_global_name(raw, r) + if fq == null { return null } + local args = BoxHelpers.map_get(scan, "args_text") + if SignatureVerifierBox.verify_from_args(fq, args) != 1 { return null } + return { method: fq, args_text: args } + } + + // Returns { class, args_text } or null + normalize_class(scan, r) { + if scan == null { return null } + local raw = me._label_or_name(scan) + if raw == null || raw == "" { return null } + // class names are resolved through class resolver (no alias head check needed but harmless) + local fq = NamespaceBox.normalize_class_name(raw, r) + if fq == null { return null } + return { class: fq, args_text: BoxHelpers.map_get(scan, "args_text") } +} +} + +static box Stage1NameArgsNormalizerMain { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/terminator_guard_box.hako b/lang/src/compiler/pipeline_v2/terminator_guard_box.hako new file mode 100644 index 00000000..dc27420b --- /dev/null +++ b/lang/src/compiler/pipeline_v2/terminator_guard_box.hako @@ -0,0 +1,37 @@ +// terminator_guard_box.hako — TerminatorGuardBox +// Responsibility: Provide a thin, unified guard to prevent emit after terminator. +// Notes: Operates on an instructions ArrayBox of Map nodes with key "op". + +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers + +static box TerminatorGuardBox { + // Return 1 if the given instructions array ends with ret/throw + block_terminated_insts(insts) { + if insts == null { return 0 } + local n = BoxHelpers.array_len(insts) + if n <= 0 { return 0 } + local last = BoxHelpers.array_get(insts, n - 1) + if last == null { return 0 } + local op = BoxHelpers.map_get(last, "op") + if op == null { return 0 } + if op == "ret" || op == "throw" { return 1 } + return 0 + } + + // Unified message; kept stable for tests + print_error(opname) { + if opname == null { opname = "op" } + print("[ERROR] TerminatorGuard: emit after terminator forbidden (attempt=" + opname + ")") + } + + // Check before emit; returns 1 if allowed, 0 if blocked (and prints message) + guard_before_emit_insts(insts, opname) { + if me.block_terminated_insts(insts) == 1 { + me.print_error(opname) + return 0 + } + return 1 + } +} + +static box TerminatorGuardStub { main(args) { return 0 } } diff --git a/lang/src/compiler/pipeline_v2/using_resolver_box.hako b/lang/src/compiler/pipeline_v2/using_resolver_box.hako new file mode 100644 index 00000000..e3ee09c9 --- /dev/null +++ b/lang/src/compiler/pipeline_v2/using_resolver_box.hako @@ -0,0 +1,70 @@ +// UsingResolverBox — static, stateful resolver helpers(インスタンス禁止・VM互換) +// State layout (Map): { +// alias_paths: Map, alias_names: Map, alias_keys: Array, +// modules_map: Map, modules_keys: Array +// } + +using "lang/src/shared/common/string_helpers.hako" as StringHelpers +using "lang/src/shared/common/box_helpers.hako" as BoxHelpers +using "lang/src/compiler/pipeline_v2/regex_flow.hako" as RegexFlow + +static box UsingResolverBox { + // Lightweight state as String: holds modules_json only + state_new() { return "" } + + load_usings_json(state, usings_json) { return state } + + load_modules_json(state, mod_json) { return ("" + mod_json) } + + resolve_path_alias(state, alias) { return null } + + resolve_namespace_alias(state, alias) { + if alias == null { return null } + local s = "" + state + // Prefer unique tail match by last segment + local i = 0 + local start = 0 + local found = null + loop(true) { + local kpos = RegexFlow.find_from(s, "\"", start) + if kpos < 0 { break } + local kend = RegexFlow.find_from(s, "\"", kpos + 1) + if kend < 0 { break } + local key = s.substring(kpos + 1, kend) + local dot = RegexFlow.last_index_of(key, ".") + local last = key + if dot >= 0 { last = key.substring(dot + 1, key.size()) } + if last == alias { + if found == null { found = key } else { return null } + } else { + // first-letter case-insensitive match + if last.size() == alias.size() && last.size() > 0 { + local l0 = last.substring(0,1) + local a0 = alias.substring(0,1) + local restl = last.substring(1, last.size()) + local resta = alias.substring(1, alias.size()) + if restl == resta { + local U = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; local L = "abcdefghijklmnopqrstuvwxyz" + local idxL = L.indexOf(l0); local idxU = U.indexOf(l0) + if (idxL >= 0 && U.substring(idxL, idxL+1) == a0) || (idxU >= 0 && L.substring(idxU, idxU+1) == a0) { + if found == null { found = key } else { return null } + } + } + } + } + start = kend + 1 + } + return found + } + + resolve_module_path_from_alias(state, alias) { return null } + + guess_namespace_from_tail(state, tail) { return me.resolve_namespace_alias(state, tail) } + + upgrade_aliases(state) { return 0 } + + to_context_json(state) { return "{}" } + map_to_json(m) { return "{}" } +} + +static box UsingResolverBoxMain { main(args) { return 0 } } diff --git a/lang/src/compiler/stage1/emitter_box.hako b/lang/src/compiler/stage1/emitter_box.hako new file mode 100644 index 00000000..d843f1f4 --- /dev/null +++ b/lang/src/compiler/stage1/emitter_box.hako @@ -0,0 +1,10 @@ +// Moved from apps/selfhost-compiler/boxes/emitter_box.hako +// EmitterBox — thin wrapper to emit JSON v0 (extracted) +using "lang/src/compiler/stage1/json_program_box.hako" as JsonProg + +static box EmitterBox { + emit_program(json, usings_json) { + if json == null { return json } + return JsonProg.normalize(json, usings_json) + } +} diff --git a/lang/src/compiler/stage1/json_program_box.hako b/lang/src/compiler/stage1/json_program_box.hako new file mode 100644 index 00000000..2251d6a3 --- /dev/null +++ b/lang/src/compiler/stage1/json_program_box.hako @@ -0,0 +1,326 @@ +// Moved from apps/selfhost-compiler/boxes/json_program_box.hako +// JsonProgramBox — JSON v0 正規化の最小箱 +// 責務: Programヘッダの補正・meta.usings注入・主要Stmt/Exprのキー順安定化 +using "lang/src/shared/common/string_helpers.hako" as StringHelpers +using "lang/src/shared/json/json_utils.hako" as JsonUtilsBox + +static box JsonProgramBox { + normalize(json, usings_json) { + local normalized = me.normalize_program(json) + // 一括正規化: 配列フィールドの null を [] に丸める(Loop.body / If.then/else / Call.args) + normalized = me.fix_null_arrays(normalized) + normalized = me.compact_array_ws(normalized) + return me.ensure_meta(normalized, usings_json) + } + + ensure_meta(json, usings_json) { + local payload = usings_json + if payload == null { payload = "[]" } + if payload.size() == 0 { payload = "[]" } + + if json == null { + return "{\"version\":0,\"kind\":\"Program\",\"body\":[],\"meta\":{\"usings\":" + payload + "}}" + } + + local n = json.lastIndexOf("}") + if n < 0 { return json } + local head = json.substring(0, n) + local tail = json.substring(n, json.size()) + local needs_comma = 1 + if head.size() == 0 { needs_comma = 0 } + else { + local last = head.substring(head.size() - 1, head.size()) + if last == "{" || last == "," { needs_comma = 0 } + } + if needs_comma == 1 { head = head + "," } + return head + "\"meta\":{\"usings\":" + payload + "}" + tail + } + + normalize_program(json) { + if json == null { return json } + local trimmed = me.trim(json) + if trimmed == null { return json } + if me.index_of(trimmed, 0, "\"body\"") < 0 { return json } + + local version = JsonUtilsBox.extract_value(trimmed, "version") + if version == null { version = "0" } + local kind_name = JsonUtilsBox.extract_string_value(trimmed, "kind", "Program") + local body_raw = JsonUtilsBox.extract_value(trimmed, "body") + if body_raw == null { body_raw = "[]" } + local body_norm = me.normalize_stmt_array(body_raw) + + return "{\"version\":" + version + ",\"kind\":" + me.quote(kind_name) + ",\"body\":" + body_norm + "}" + } + + // fix_null_arrays: よくある null 配列を [] に正規化(string ベース、保守的) + fix_null_arrays(json) { + if json == null { return json } + local s = json + s = me._replace_all(s, "\"args\":null", "\"args\":[]") + s = me._replace_all(s, "\"body\":null", "\"body\":[]") + s = me._replace_all(s, "\"then\":null", "\"then\":[]") + s = me._replace_all(s, "\"else\":null", "\"else\":[]") + return s + } + + compact_array_ws(json) { + if json == null { return json } + local s = json + s = me._replace_all(s, "[ {", "[{") + s = me._replace_all(s, "[ {", "[{") + s = me._replace_all(s, "} ]", "}]") + s = me._replace_all(s, "} ]", "}]") + return s + } + + _replace_all(text, pat, rep) { + if text == null { return text } + local m = pat.size() + if m == 0 { return text } + local out = "" + local i = 0 + local n = text.size() + loop(i < n) { + if StringHelpers.starts_with(text, i, pat) == 1 { + out = out + rep + i = i + m + } else { + local ch = text.substring(i, i + 1) + if ch == null { ch = "" } + out = out + ch + i = i + 1 + } + } + return out + } + + normalize_stmt_array(array_json) { + if array_json == null { return "[]" } + local trimmed = me.trim(array_json) + if trimmed.size() == 0 { return "[]" } + if trimmed == "null" { return "[]" } + if trimmed.size() < 2 { return "[]" } + if trimmed.substring(0, 1) != "[" { return trimmed } + if trimmed == "[]" { return "[]" } + + local parts = JsonUtilsBox.split_top_level(trimmed) + local out = new ArrayBox() + local i = 0 + loop(i < parts.size()) { + local item = me.trim(parts.get(i)) + if item.size() > 0 { + out.push(me.normalize_stmt(item)) + } + i = i + 1 + } + return "[" + me.join(out) + "]" + } + + default_int_expr() { return "{\"type\":\"Int\",\"value\":0}" } + + default_bool_expr() { return "{\"type\":\"Bool\",\"value\":false}" } + + default_recv_expr() { return "{\"type\":\"Var\",\"name\":\"me\"}" } + + normalize_stmt(stmt_json) { + if stmt_json == null { return "{}" } + local trimmed = me.trim(stmt_json) + local type_name = JsonUtilsBox.extract_string_value(trimmed, "type", null) + if type_name == null { return trimmed } + + if type_name == "Return" { + local expr_raw = JsonUtilsBox.extract_value(trimmed, "expr") + local expr_norm = me.normalize_expr(expr_raw) + if expr_norm == null { expr_norm = me.default_int_expr() } + return "{\"type\":\"Return\",\"expr\":" + expr_norm + "}" + } + if type_name == "If" { + local cond_raw = JsonUtilsBox.extract_value(trimmed, "cond") + local cond_norm = me.normalize_expr(cond_raw) + if cond_norm == null { cond_norm = me.default_bool_expr() } + local then_raw = JsonUtilsBox.extract_value(trimmed, "then") + local then_norm = me.normalize_stmt_array(then_raw) + local else_raw = JsonUtilsBox.extract_value(trimmed, "else") + local out = "{\"type\":\"If\",\"cond\":" + cond_norm + ",\"then\":" + then_norm + if else_raw != null { + local else_trim = me.trim(else_raw) + if else_trim != "null" { + local else_norm = me.normalize_stmt_array(else_trim) + out = out + ",\"else\":" + else_norm + } + } + return out + "}" + } + if type_name == "Loop" { + local cond_raw2 = JsonUtilsBox.extract_value(trimmed, "cond") + local cond_norm2 = me.normalize_expr(cond_raw2) + if cond_norm2 == null { cond_norm2 = me.default_bool_expr() } + local body_raw = JsonUtilsBox.extract_value(trimmed, "body") + local body_norm = me.normalize_stmt_array(body_raw) + return "{\"type\":\"Loop\",\"cond\":" + cond_norm2 + ",\"body\":" + body_norm + "}" + } + if type_name == "Local" || type_name == "Const" { + local name = JsonUtilsBox.extract_string_value(trimmed, "name", "tmp") + local expr_raw3 = JsonUtilsBox.extract_value(trimmed, "expr") + if expr_raw3 == null { expr_raw3 = JsonUtilsBox.extract_value(trimmed, "value") } + local expr_norm3 = me.normalize_expr(expr_raw3) + if expr_norm3 == null { expr_norm3 = me.default_int_expr() } + local tag = "Local" + if type_name == "Const" { tag = "Const" } + return "{\"type\":\"" + tag + "\",\"name\":" + me.quote(name) + ",\"expr\":" + expr_norm3 + "}" + } + if type_name == "Expr" { + local expr_raw4 = JsonUtilsBox.extract_value(trimmed, "expr") + local expr_norm4 = me.normalize_expr(expr_raw4) + if expr_norm4 == null { expr_norm4 = me.default_int_expr() } + return "{\"type\":\"Expr\",\"expr\":" + expr_norm4 + "}" + } + + return trimmed + } + + normalize_expr_array(array_json) { + if array_json == null { return "[]" } + local trimmed = me.trim(array_json) + if trimmed.size() == 0 { return "[]" } + if trimmed == "null" { return "[]" } + if trimmed.size() < 2 { return "[]" } + if trimmed.substring(0, 1) != "[" { return trimmed } + if trimmed == "[]" { return "[]" } + + local parts = JsonUtilsBox.split_top_level(trimmed) + local out = new ArrayBox() + local i = 0 + loop(i < parts.size()) { + local item = me.trim(parts.get(i)) + if item.size() > 0 { + local norm = me.normalize_expr(item) + if norm == null { norm = item } + out.push(norm) + } + i = i + 1 + } + return "[" + me.join(out) + "]" + } + + normalize_expr(expr_json) { + if expr_json == null { return null } + local trimmed = me.trim(expr_json) + local type_name = JsonUtilsBox.extract_string_value(trimmed, "type", null) + if type_name == null { return trimmed } + + if type_name == "Int" { + local value_raw = JsonUtilsBox.extract_value(trimmed, "value") + if value_raw == null { value_raw = "0" } + return "{\"type\":\"Int\",\"value\":" + me.trim(value_raw) + "}" + } + if type_name == "Str" { + local value_raw2 = JsonUtilsBox.extract_value(trimmed, "value") + if value_raw2 == null { value_raw2 = "\"\"" } + return "{\"type\":\"Str\",\"value\":" + me.trim(value_raw2) + "}" + } + if type_name == "Bool" { + local value_raw3 = JsonUtilsBox.extract_value(trimmed, "value") + if value_raw3 == null { value_raw3 = "false" } + return "{\"type\":\"Bool\",\"value\":" + me.trim(value_raw3) + "}" + } + if type_name == "Null" { + return "{\"type\":\"Null\"}" + } + if type_name == "Var" { + local name = JsonUtilsBox.extract_string_value(trimmed, "name", "") + return "{\"type\":\"Var\",\"name\":" + me.quote(name) + "}" + } + if type_name == "Call" { + local name2 = JsonUtilsBox.extract_string_value(trimmed, "name", "") + local args_raw = JsonUtilsBox.extract_value(trimmed, "args") + local args_norm = me.normalize_expr_array(args_raw) + return "{\"type\":\"Call\",\"name\":" + me.quote(name2) + ",\"args\":" + args_norm + "}" + } + if type_name == "Method" { + local recv_raw = JsonUtilsBox.extract_value(trimmed, "recv") + local recv_norm = me.normalize_expr(recv_raw) + if recv_norm == null { recv_norm = me.default_recv_expr() } + local method_name = JsonUtilsBox.extract_string_value(trimmed, "method", "") + local args_raw2 = JsonUtilsBox.extract_value(trimmed, "args") + local args_norm2 = me.normalize_expr_array(args_raw2) + return "{\"type\":\"Method\",\"recv\":" + recv_norm + ",\"method\":" + me.quote(method_name) + ",\"args\":" + args_norm2 + "}" + } + if type_name == "Binary" { + local op = JsonUtilsBox.extract_string_value(trimmed, "op", "+") + local lhs_raw = JsonUtilsBox.extract_value(trimmed, "lhs") + local rhs_raw = JsonUtilsBox.extract_value(trimmed, "rhs") + local lhs_norm = me.normalize_expr(lhs_raw) + if lhs_norm == null { lhs_norm = me.default_int_expr() } + local rhs_norm = me.normalize_expr(rhs_raw) + if rhs_norm == null { rhs_norm = me.default_int_expr() } + return "{\"type\":\"Binary\",\"op\":" + me.quote(op) + ",\"lhs\":" + lhs_norm + ",\"rhs\":" + rhs_norm + "}" + } + if type_name == "Compare" { + local op2 = JsonUtilsBox.extract_string_value(trimmed, "op", "==") + local lhs_raw2 = JsonUtilsBox.extract_value(trimmed, "lhs") + local rhs_raw2 = JsonUtilsBox.extract_value(trimmed, "rhs") + local lhs_norm2 = me.normalize_expr(lhs_raw2) + if lhs_norm2 == null { lhs_norm2 = me.default_int_expr() } + local rhs_norm2 = me.normalize_expr(rhs_raw2) + if rhs_norm2 == null { rhs_norm2 = me.default_int_expr() } + return "{\"type\":\"Compare\",\"op\":" + me.quote(op2) + ",\"lhs\":" + lhs_norm2 + ",\"rhs\":" + rhs_norm2 + "}" + } + if type_name == "Logical" { + local op3 = JsonUtilsBox.extract_string_value(trimmed, "op", "&&") + local lhs_raw3 = JsonUtilsBox.extract_value(trimmed, "lhs") + local rhs_raw3 = JsonUtilsBox.extract_value(trimmed, "rhs") + local lhs_norm3 = me.normalize_expr(lhs_raw3) + if lhs_norm3 == null { lhs_norm3 = me.default_bool_expr() } + local rhs_norm3 = me.normalize_expr(rhs_raw3) + if rhs_norm3 == null { rhs_norm3 = me.default_bool_expr() } + return "{\"type\":\"Logical\",\"op\":" + me.quote(op3) + ",\"lhs\":" + lhs_norm3 + ",\"rhs\":" + rhs_norm3 + "}" + } + + return trimmed + } + + // String operations (delegated to StringHelpers) + index_of(s, start, pat) { return StringHelpers.index_of(s, start, pat) } + last_index_of(s, pat) { return StringHelpers.last_index_of(s, pat) } + skip_ws(s, idx) { return StringHelpers.skip_ws(s, idx) } + trim(s) { + if s == null { return "" } + return me._trim_all(s) + } + + _trim_all(text) { + if text == null { return "" } + local n = text.size() + local start = 0 + loop(start < n) { + local ch = call("String.substring/2", text, start, start + 1) + if ch == " " || ch == "\t" || ch == "\n" || ch == "\r" { start = start + 1 } else { break } + } + local end_idx = n + loop(end_idx > start) { + local ch2 = call("String.substring/2", text, end_idx - 1, end_idx) + if ch2 == " " || ch2 == "\t" || ch2 == "\n" || ch2 == "\r" || ch2 == ";" { end_idx = end_idx - 1 } else { break } + } + if end_idx <= start { return "" } + local part = call("String.substring/2", text, start, end_idx) + if part == null { return "" } + return part + } + quote(s) { return StringHelpers.json_quote(s) } + i2s(v) { return StringHelpers.int_to_str(v) } + + // Array joining (normalization-specific) + join(parts) { + if parts == null { return "" } + local out = "" + local i = 0 + local n = parts.size() + loop(i < n) { + local item = parts.get(i) + if i == 0 { out = out + item } else { out = out + "," + item } + i = i + 1 + } + return out + } +} diff --git a/src/ast.rs b/src/ast.rs index cb6cad91..e0281125 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -521,6 +521,13 @@ pub enum ASTNode { span: Span, }, + /// 添字アクセス: target[index] + Index { + target: Box, + index: Box, + span: Span, + }, + /// コンストラクタ呼び出し: new ClassName(arguments) New { class: String, diff --git a/src/ast/utils.rs b/src/ast/utils.rs index 2a51717c..d364ad1e 100644 --- a/src/ast/utils.rs +++ b/src/ast/utils.rs @@ -26,6 +26,7 @@ impl ASTNode { ASTNode::BinaryOp { .. } => "BinaryOp", ASTNode::MethodCall { .. } => "MethodCall", ASTNode::FieldAccess { .. } => "FieldAccess", + ASTNode::Index { .. } => "Index", ASTNode::New { .. } => "New", ASTNode::This { .. } => "This", ASTNode::Me { .. } => "Me", @@ -78,6 +79,7 @@ impl ASTNode { ASTNode::FromCall { .. } => ASTNodeType::Expression, ASTNode::ThisField { .. } => ASTNodeType::Expression, ASTNode::MeField { .. } => ASTNodeType::Expression, + ASTNode::Index { .. } => ASTNodeType::Expression, ASTNode::MatchExpr { .. } => ASTNodeType::Expression, ASTNode::QMarkPropagate { .. } => ASTNodeType::Expression, ASTNode::Lambda { .. } => ASTNodeType::Expression, @@ -312,6 +314,9 @@ impl ASTNode { ASTNode::MapLiteral { entries, .. } => { format!("MapLiteral({} entries)", entries.len()) } + ASTNode::Index { target, index, .. } => { + format!("Index(target={:?}, index={:?})", target, index) + } ASTNode::ScopeBox { .. } => "ScopeBox".to_string(), } } @@ -342,6 +347,7 @@ impl ASTNode { ASTNode::BinaryOp { span, .. } => *span, ASTNode::MethodCall { span, .. } => *span, ASTNode::FieldAccess { span, .. } => *span, + ASTNode::Index { span, .. } => *span, ASTNode::New { span, .. } => *span, ASTNode::This { span, .. } => *span, ASTNode::Me { span, .. } => *span, diff --git a/src/mir/builder/exprs.rs b/src/mir/builder/exprs.rs index b45face2..2b10824d 100644 --- a/src/mir/builder/exprs.rs +++ b/src/mir/builder/exprs.rs @@ -66,6 +66,8 @@ impl super::MirBuilder { let stmt = AssignStmt::try_from(node).expect("ASTNode::Assignment must convert"); if let ASTNode::FieldAccess { object, field, .. } = stmt.target.as_ref() { self.build_field_assignment(*object.clone(), field.clone(), *stmt.value.clone()) + } else if let ASTNode::Index { target, index, .. } = stmt.target.as_ref() { + self.build_index_assignment(*target.clone(), *index.clone(), *stmt.value.clone()) } else if let ASTNode::Variable { name, .. } = stmt.target.as_ref() { self.build_assignment(name.clone(), *stmt.value.clone()) } else { @@ -73,6 +75,10 @@ impl super::MirBuilder { } } + ASTNode::Index { target, index, .. } => { + self.build_index_expression(*target.clone(), *index.clone()) + } + node @ ASTNode::FunctionCall { .. } => { let c = CallExpr::try_from(node).expect("ASTNode::FunctionCall must convert"); self.build_function_call(c.name, c.arguments) @@ -213,6 +219,11 @@ impl super::MirBuilder { box_type: "ArrayBox".to_string(), args: vec![], })?; + self.value_origin_newbox + .insert(arr_id, "ArrayBox".to_string()); + self + .value_types + .insert(arr_id, super::MirType::Box("ArrayBox".to_string())); for e in elements { let v = self.build_expression_impl(e)?; self.emit_instruction(MirInstruction::BoxCall { @@ -233,6 +244,12 @@ impl super::MirBuilder { box_type: "MapBox".to_string(), args: vec![], })?; + self + .value_origin_newbox + .insert(map_id, "MapBox".to_string()); + self + .value_types + .insert(map_id, super::MirType::Box("MapBox".to_string())); for (k, expr) in entries { // const string key let k_id = crate::mir::builder::emission::constant::emit_string(self, k); @@ -310,4 +327,110 @@ impl super::MirBuilder { _ => Err(format!("Unsupported AST node type: {:?}", ast)), } } + + fn infer_index_target_class(&self, target_val: ValueId) -> Option { + if let Some(cls) = self.value_origin_newbox.get(&target_val) { + return Some(cls.clone()); + } + self.value_types.get(&target_val).and_then(|ty| match ty { + super::MirType::Box(name) => Some(name.clone()), + super::MirType::String => Some("String".to_string()), + super::MirType::Integer => Some("Integer".to_string()), + super::MirType::Float => Some("Float".to_string()), + _ => None, + }) + } + + fn format_index_target_kind(class_hint: Option<&String>) -> String { + class_hint + .map(|s| s.as_str()) + .filter(|s| !s.is_empty()) + .unwrap_or("unknown") + .to_string() + } + + pub(super) fn build_index_expression( + &mut self, + target: ASTNode, + index: ASTNode, + ) -> Result { + let target_val = self.build_expression(target)?; + let class_hint = self.infer_index_target_class(target_val); + + match class_hint.as_deref() { + Some("ArrayBox") => { + let index_val = self.build_expression(index)?; + let dst = self.value_gen.next(); + self.emit_box_or_plugin_call( + Some(dst), + target_val, + "get".to_string(), + None, + vec![index_val], + super::EffectMask::READ, + )?; + Ok(dst) + } + Some("MapBox") => { + let index_val = self.build_expression(index)?; + let dst = self.value_gen.next(); + self.emit_box_or_plugin_call( + Some(dst), + target_val, + "get".to_string(), + None, + vec![index_val], + super::EffectMask::READ, + )?; + Ok(dst) + } + _ => Err(format!( + "index operator is only supported for Array/Map (found {})", + Self::format_index_target_kind(class_hint.as_ref()) + )), + } + } + + pub(super) fn build_index_assignment( + &mut self, + target: ASTNode, + index: ASTNode, + value: ASTNode, + ) -> Result { + let target_val = self.build_expression(target)?; + let class_hint = self.infer_index_target_class(target_val); + + match class_hint.as_deref() { + Some("ArrayBox") => { + let index_val = self.build_expression(index)?; + let value_val = self.build_expression(value)?; + self.emit_box_or_plugin_call( + None, + target_val, + "set".to_string(), + None, + vec![index_val, value_val], + super::EffectMask::MUT, + )?; + Ok(value_val) + } + Some("MapBox") => { + let index_val = self.build_expression(index)?; + let value_val = self.build_expression(value)?; + self.emit_box_or_plugin_call( + None, + target_val, + "set".to_string(), + None, + vec![index_val, value_val], + super::EffectMask::MUT, + )?; + Ok(value_val) + } + _ => Err(format!( + "index assignment is only supported for Array/Map (found {})", + Self::format_index_target_kind(class_hint.as_ref()) + )), + } + } } diff --git a/src/mir/builder/vars.rs b/src/mir/builder/vars.rs index 643a17df..60691d9e 100644 --- a/src/mir/builder/vars.rs +++ b/src/mir/builder/vars.rs @@ -55,6 +55,10 @@ pub(super) fn collect_free_vars( ASTNode::FieldAccess { object, .. } => { collect_free_vars(object, used, locals); } + ASTNode::Index { target, index, .. } => { + collect_free_vars(target, used, locals); + collect_free_vars(index, used, locals); + } ASTNode::New { arguments, .. } => { for a in arguments { collect_free_vars(a, used, locals); diff --git a/src/parser/expr/call.rs b/src/parser/expr/call.rs index 45701ec3..bf70b685 100644 --- a/src/parser/expr/call.rs +++ b/src/parser/expr/call.rs @@ -192,6 +192,16 @@ impl NyashParser { span: Span::unknown(), }; } + } else if self.match_token(&TokenType::LBRACK) { + self.advance(); // consume '[' + must_advance!(self, _unused, "index expression parsing"); + let index_expr = self.parse_expression()?; + self.consume(TokenType::RBRACK)?; + expr = ASTNode::Index { + target: Box::new(expr), + index: Box::new(index_expr), + span: Span::unknown(), + }; } else if self.match_token(&TokenType::QUESTION) { let nt = self.peek_token(); let is_ender = matches!( diff --git a/src/parser/expr_cursor.rs b/src/parser/expr_cursor.rs index 0c2af1e0..52eda1c5 100644 --- a/src/parser/expr_cursor.rs +++ b/src/parser/expr_cursor.rs @@ -246,6 +246,19 @@ impl ExprParserWithCursor { continue; } + // 添字アクセス target[index] + if cursor.match_token(&TokenType::LBRACK) { + cursor.advance(); // consume '[' + let index_expr = Self::parse_expression(cursor)?; + cursor.consume(TokenType::RBRACK)?; + expr = ASTNode::Index { + target: Box::new(expr), + index: Box::new(index_expr), + span: Span::unknown(), + }; + continue; + } + break; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 493542af..9032d31a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -276,7 +276,9 @@ impl NyashParser { // 左辺が代入可能な形式かチェック match &expr { - ASTNode::Variable { .. } | ASTNode::FieldAccess { .. } => Ok(ASTNode::Assignment { + ASTNode::Variable { .. } + | ASTNode::FieldAccess { .. } + | ASTNode::Index { .. } => Ok(ASTNode::Assignment { target: Box::new(expr), value, span: Span::unknown(), diff --git a/tools/smokes/v2/profiles/quick/core/index_operator_hako.sh b/tools/smokes/v2/profiles/quick/core/index_operator_hako.sh new file mode 100644 index 00000000..a0507bb3 --- /dev/null +++ b/tools/smokes/v2/profiles/quick/core/index_operator_hako.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# index_operator_hako.sh — Hako-side index operator canaries (opt-in) + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# Try to detect repo root via git; fallback by climbing to tools directory +if ROOT_GIT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null); then + ROOT="$ROOT_GIT" +else + ROOT="$(cd "$SCRIPT_DIR/../../../../.." && pwd)" +fi +HAKO_BIN_DEFAULT="$ROOT/tools/bin/hako" +HAKO_BIN="${HAKO_BIN:-$HAKO_BIN_DEFAULT}" + +warn() { echo -e "[WARN] $*" >&2; } +info() { echo -e "[INFO] $*" >&2; } +fail() { echo -e "[FAIL] $*" >&2; return 1; } +pass() { echo -e "[PASS] $*" >&2; } + +require_hako() { + if [ ! -x "$HAKO_BIN" ]; then + warn "Hako binary not found: $HAKO_BIN (set HAKO_BIN to override)" + warn "Skipping Hako index canaries" + exit 0 + fi +} + +run_hako() { + local code="$1" + local tmp="/tmp/hako_idx_$$.hako" + printf "%s\n" "$code" > "$tmp" + # Keep output quiet; rely on program output only + NYASH_PARSER_ALLOW_SEMICOLON=1 \ + NYASH_SYNTAX_SUGAR_LEVEL=full \ + NYASH_ENABLE_ARRAY_LITERAL=1 \ + "$HAKO_BIN" --backend vm "$tmp" 2>&1 + local rc=$? + rm -f "$tmp" + return $rc +} + +check_exact() { + local expect="$1"; shift + local got="$1"; shift + local name="$1"; shift + if [ "$got" = "$expect" ]; then pass "$name"; return 0; fi + printf "Expected: %s\nActual: %s\n" "$expect" "$got" >&2 + fail "$name" +} + +require_hako + +info "Hako index canary: array read" +out=$(run_hako 'box Main { static method main() { local a=[1,2,3]; print(a[0]); } }') +check_exact "1" "$out" "hako_index_array_read" || exit 1 + +info "Hako index canary: array write" +out=$(run_hako 'box Main { static method main() { local a=[1,2]; a[1]=9; print(a[1]); } }') +check_exact "9" "$out" "hako_index_array_write" || exit 1 + +info "Hako index canary: map rw" +out=$(run_hako 'box Main { static method main() { local m={"a":1}; m["b"]=7; print(m["b"]); } }') +check_exact "7" "$out" "hako_index_map_rw" || exit 1 + +info "Hako index canary: string unsupported (expect failure)" +run_hako 'box Main { static method main() { local s="hey"; print(s[0]); } }' >/tmp/hako_idx_err.txt 2>&1 && { + fail "hako_index_string_unsupported (expected failure)"; exit 1; +} +pass "hako_index_string_unsupported" + +exit 0 diff --git a/tools/smokes/v2/profiles/quick/core/index_operator_vm.sh b/tools/smokes/v2/profiles/quick/core/index_operator_vm.sh new file mode 100644 index 00000000..84a4ef2e --- /dev/null +++ b/tools/smokes/v2/profiles/quick/core/index_operator_vm.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# index_operator_vm.sh - Array/Map indexing support tests + +source "$(dirname "$0")/../../../lib/test_runner.sh" +source "$(dirname "$0")/../../../lib/result_checker.sh" + +require_env || exit 2 +preflight_plugins || exit 2 + +test_index_array_read() { + local output + output=$(NYASH_PARSER_ALLOW_SEMICOLON=1 run_nyash_vm -c 'local arr = [1, 2, 3]; print(arr[0]);' 2>&1) + check_exact "1" "$output" "index_array_read" +} + +test_index_array_write() { + local output + output=$(NYASH_PARSER_ALLOW_SEMICOLON=1 run_nyash_vm -c 'local arr = [1, 2]; arr[1] = 9; print(arr[1]);' 2>&1) + check_exact "9" "$output" "index_array_write" +} + +test_index_map_read_write() { + local output + output=$(NYASH_PARSER_ALLOW_SEMICOLON=1 run_nyash_vm -c 'local m = { "a": 1 }; m["b"] = 7; print(m["b"]);' 2>&1) + check_exact "7" "$output" "index_map_rw" +} + +test_index_string_unsupported() { + local output + local status + output=$(NYASH_PARSER_ALLOW_SEMICOLON=1 run_nyash_vm -c 'local s = "hey"; print(s[0]);' 2>&1) && status=0 || status=$? + if [ "$status" -eq 0 ]; then + echo "[FAIL] index_string_unsupported: expected failure" >&2 + return 1 + fi + # Expect builder to fail-fast with explicit diagnostic + check_regex "index operator is only supported" "$output" "index_string_unsupported" +} + +run_test "index_array_read" test_index_array_read +run_test "index_array_write" test_index_array_write +run_test "index_map_rw" test_index_map_read_write +run_test "index_string_unsupported" test_index_string_unsupported