feat(phase21.5/22.1): MirBuilder JsonFrag refactor + FileBox ring-1 + registry tests
Phase 21.5 (AOT/LLVM Optimization Prep) - FileBox ring-1 (core-ro) provider: priority=-100, always available, no panic path - src/runner/modes/common_util/provider_registry.rs: CoreRoFileProviderFactory - Auto-registers at startup, eliminates fallback panic structurally - StringBox fast path prototypes (length/size optimization) - Performance benchmarks (C/Python/Hako comparison baseline) Phase 22.1 (JsonFrag Unification) - JsonFrag.last_index_of_from() for backward search (VM fallback) - Replace hand-written lastIndexOf in lower_loop_sum_bc_box.hako - SentinelExtractorBox for Break/Continue pattern extraction MirBuilder Refactor (Box → JsonFrag Migration) - 20+ lower_*_box.hako: Box-heavy → JsonFrag text assembly - MirBuilderMinBox: lightweight using set for dev env - Registry-only fast path with [registry:*] tag observation - pattern_util_box.hako: enhanced pattern matching Dev Environment & Testing - Dev toggles: SMOKES_DEV_PREINCLUDE=1 (point-enable), HAKO_MIR_BUILDER_SKIP_LOOPS=1 - phase2160: registry opt-in tests (array/map get/set/push/len) - content verification - phase2034: rc-dependent → token grep (grep -F based validation) - run_quick.sh: fast smoke testing harness - ENV documentation: docs/ENV_VARS.md Test Results ✅ quick phase2034: ALL GREEN (MirBuilder internal patterns) ✅ registry phase2160: ALL GREEN (array/map get/set/push/len) ✅ rc-dependent tests → content token verification complete ✅ PREINCLUDE policy: default OFF, point-enable only where needed Technical Notes - No INCLUDE by default (maintain minimalism) - FAIL_FAST=0 in Bring-up contexts only (explicit dev toggles) - Tag-based route observation ([mirbuilder/min:*], [registry:*]) - MIR structure validation (not just rc parity) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -1,7 +1,16 @@
|
||||
// Stage-B compiler entry — ParserBox → FlowEntry emit-only
|
||||
// Notes (Dev/Doc):
|
||||
// - Scope: Hako だけで Program(JSON v0) を生成する自己ホスト入口。本文抽出→コメント除去→正規化→parse_program2 で一行JSONを出力。
|
||||
// - Robustness: 本文抽出は "method main" なしでも `box Main { main(...) { ... } }` からフォールバック抽出する。
|
||||
// 文字列を正しくスキップ(エスケープ対応)しつつ () と {} の対応を検出。// と /* */ のコメントは除去。
|
||||
// 前後の空白/改行もトリムして JSON 揺れを低減する。
|
||||
// - Known limits: VM の include は未対応。MirBuilder を VM 直で実行する経路は既定OFF(opt‑in検証のみ)。
|
||||
// - Policy: Program(JSON) → MIR(JSON) は Rust CLI 変換(Gate‑C)を既定とし、Hako ビルダーは opt‑in で段階導入する。
|
||||
// - Toggles around: HAKO_MIR_BUILDER_{INTERNAL,REGISTRY,DELEGATE}(MirBuilder側)。ここでは JSON 生成に専念する。
|
||||
// - Recommended: Dev/CI は wrapper(tools/hakorune_emit_mir.sh)経由で Program→MIR を行い、失敗時は Gate‑C に自動委譲する。
|
||||
|
||||
using sh_core as StringHelpers // Required: ParserStringUtilsBox depends on this (using chain unresolved)
|
||||
include "lang/src/compiler/entry/bundle_resolver.hako"
|
||||
using "hako.compiler.entry.bundle_resolver" as BundleResolver
|
||||
using lang.compiler.parser.box as ParserBox
|
||||
|
||||
// Note: Runner resolves entry as Main.main by default.
|
||||
@ -41,7 +50,7 @@ static box Main {
|
||||
{
|
||||
// naive search for "method main" → '(' → ')' → '{' ... balanced '}'
|
||||
local s = src
|
||||
// naive substring search for "method main"
|
||||
// naive substring search for "method main"; fallback to "main(" inside box Main
|
||||
local k0 = -1
|
||||
{
|
||||
local pat = "method main"
|
||||
@ -53,39 +62,127 @@ static box Main {
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
if k0 < 0 {
|
||||
// Fallback: find "box Main" (with or without leading 'static') then locate "main(" after it
|
||||
local kbox = -1
|
||||
{
|
||||
local pat = "box Main"
|
||||
local m = pat.length()
|
||||
local i = 0
|
||||
local n = s.length()
|
||||
loop(i + m <= n) {
|
||||
if s.substring(i, i + m) == pat { kbox = i break }
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
if kbox >= 0 {
|
||||
// search for "main(" starting at kbox
|
||||
local i = kbox
|
||||
local n = s.length()
|
||||
loop(i + 5 <= n) { // len("main(") = 5
|
||||
if s.substring(i, i + 5) == "main(" { k0 = i break }
|
||||
i = i + 1
|
||||
}
|
||||
} else {
|
||||
// last resort: global search of "main(" (may overmatch but better than full-file body)
|
||||
local i = 0
|
||||
local n = s.length()
|
||||
loop(i + 5 <= n) {
|
||||
if s.substring(i, i + 5) == "main(" { k0 = i break }
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
if k0 >= 0 {
|
||||
// find '(' after k0
|
||||
// find '(' after k0 (skip inside strings)
|
||||
local k1 = -1
|
||||
{
|
||||
local j = k0
|
||||
local n = s.length()
|
||||
loop(j < n) { if s.substring(j, j + 1) == "(" { k1 = j break } j = j + 1 }
|
||||
local in_str = 0
|
||||
local esc = 0
|
||||
loop(j < n) {
|
||||
local ch = s.substring(j, j + 1)
|
||||
if in_str == 1 {
|
||||
if esc == 1 { esc = 0 j = j + 1 continue }
|
||||
if ch == "\\" { esc = 1 j = j + 1 continue }
|
||||
if ch == "\"" { in_str = 0 j = j + 1 continue }
|
||||
j = j + 1
|
||||
continue
|
||||
}
|
||||
if ch == "\"" { in_str = 1 j = j + 1 continue }
|
||||
if ch == "(" { k1 = j break }
|
||||
j = j + 1
|
||||
}
|
||||
}
|
||||
if k1 >= 0 {
|
||||
// find ')' after k1
|
||||
// find ')' after k1 (skip inside strings)
|
||||
local k2 = -1
|
||||
{
|
||||
local j = k1
|
||||
local n = s.length()
|
||||
loop(j < n) { if s.substring(j, j + 1) == ")" { k2 = j break } j = j + 1 }
|
||||
local in_str = 0
|
||||
local esc = 0
|
||||
loop(j < n) {
|
||||
local ch = s.substring(j, j + 1)
|
||||
if in_str == 1 {
|
||||
if esc == 1 { esc = 0 j = j + 1 continue }
|
||||
if ch == "\\" { esc = 1 j = j + 1 continue }
|
||||
if ch == "\"" { in_str = 0 j = j + 1 continue }
|
||||
j = j + 1
|
||||
continue
|
||||
}
|
||||
if ch == "\"" { in_str = 1 j = j + 1 continue }
|
||||
if ch == ")" { k2 = j break }
|
||||
j = j + 1
|
||||
}
|
||||
}
|
||||
if k2 >= 0 {
|
||||
// Find opening '{' following ')'
|
||||
// Find opening '{' following ')' (skip inside strings)
|
||||
local k3 = -1
|
||||
{
|
||||
local j = k2
|
||||
local n = s.length()
|
||||
loop(j < n) { if s.substring(j, j + 1) == "{" { k3 = j break } j = j + 1 }
|
||||
local in_str = 0
|
||||
local esc = 0
|
||||
loop(j < n) {
|
||||
local ch = s.substring(j, j + 1)
|
||||
if in_str == 1 {
|
||||
if esc == 1 { esc = 0 j = j + 1 continue }
|
||||
if ch == "\\" { esc = 1 j = j + 1 continue }
|
||||
if ch == "\"" { in_str = 0 j = j + 1 continue }
|
||||
j = j + 1
|
||||
continue
|
||||
}
|
||||
if ch == "\"" { in_str = 1 j = j + 1 continue }
|
||||
if ch == "{" { k3 = j break }
|
||||
j = j + 1
|
||||
}
|
||||
}
|
||||
if k3 >= 0 {
|
||||
// Balanced scan for matching '}'
|
||||
local depth = 0
|
||||
local i = k3
|
||||
local n = s.length()
|
||||
local in_str = 0
|
||||
local esc = 0
|
||||
loop(i < n) {
|
||||
local ch = s.substring(i, i + 1)
|
||||
if ch == "{" { depth = depth + 1 }
|
||||
else { if ch == "}" { depth = depth - 1 if depth == 0 { i = i + 1 break } } }
|
||||
if in_str == 1 {
|
||||
if esc == 1 { esc = 0 i = i + 1 continue }
|
||||
if ch == "\\" { esc = 1 i = i + 1 continue }
|
||||
if ch == "\"" { in_str = 0 i = i + 1 continue }
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if ch == "\"" { in_str = 1 i = i + 1 continue }
|
||||
if ch == "{" { depth = depth + 1 i = i + 1 continue }
|
||||
if ch == "}" {
|
||||
depth = depth - 1
|
||||
i = i + 1
|
||||
if depth == 0 { break }
|
||||
continue
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
if depth == 0 {
|
||||
@ -100,6 +197,49 @@ static box Main {
|
||||
|
||||
if body_src == null { body_src = src }
|
||||
|
||||
// 4.7) Strip comments from body_src to avoid stray tokens in Program(JSON)
|
||||
{
|
||||
local s = body_src
|
||||
local out = ""
|
||||
local i = 0
|
||||
local n = s.length()
|
||||
local in_str = 0
|
||||
local esc = 0
|
||||
local in_line = 0
|
||||
local in_block = 0
|
||||
loop(i < n) {
|
||||
local ch = s.substring(i, i + 1)
|
||||
if in_line == 1 {
|
||||
if ch == "\n" { in_line = 0 out = out + ch }
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if in_block == 1 {
|
||||
if ch == "*" && i + 1 < n && s.substring(i + 1, i + 2) == "/" { in_block = 0 i = i + 2 continue }
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if in_str == 1 {
|
||||
if esc == 1 { out = out + ch esc = 0 i = i + 1 continue }
|
||||
if ch == "\\" { out = out + ch esc = 1 i = i + 1 continue }
|
||||
if ch == "\"" { out = out + ch in_str = 0 i = i + 1 continue }
|
||||
out = out + ch
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
// Not in string/comment
|
||||
if ch == "\"" { out = out + ch in_str = 1 i = i + 1 continue }
|
||||
if ch == "/" && i + 1 < n {
|
||||
local ch2 = s.substring(i + 1, i + 2)
|
||||
if ch2 == "/" { in_line = 1 i = i + 2 continue }
|
||||
if ch2 == "*" { in_block = 1 i = i + 2 continue }
|
||||
}
|
||||
out = out + ch
|
||||
i = i + 1
|
||||
}
|
||||
body_src = out
|
||||
}
|
||||
|
||||
// 4.5) Optional: bundle extra module sources provided via repeated --bundle-src args
|
||||
// This is a minimal concatenation bundler (no I/O, no resolver). It simply places
|
||||
// provided module snippets before the main body for Stage‑B parser to accept.
|
||||
@ -176,7 +316,26 @@ static box Main {
|
||||
body_src = merged_prefix + body_src
|
||||
}
|
||||
|
||||
// 5) Parse and emit Stage‑1 JSON v0 (Program)
|
||||
// 5) Normalize body: trim leading/trailing whitespaces/newlines
|
||||
{
|
||||
local s = body_src
|
||||
local n = s.length()
|
||||
local b = 0
|
||||
// left trim (space, tab, CR, LF)
|
||||
loop(b < n) {
|
||||
local ch = s.substring(b, b + 1)
|
||||
if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { b = b + 1 } else { break }
|
||||
}
|
||||
// right trim
|
||||
local e = n
|
||||
loop(e > b) {
|
||||
local ch = s.substring(e - 1, e)
|
||||
if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { e = e - 1 } else { break }
|
||||
}
|
||||
if e > b { body_src = s.substring(b, e) } else { body_src = "" }
|
||||
}
|
||||
|
||||
// 6) Parse and emit Stage‑1 JSON v0 (Program)
|
||||
// Bridge(JSON v0) が Program v0 を受け取り MIR に lowering するため、ここでは AST(JSON v0) を出力する。
|
||||
// 既定で MIR 直出力は行わない(重い経路を避け、一行出力を保証)。
|
||||
local ast_json = p.parse_program2(body_src)
|
||||
|
||||
Reference in New Issue
Block a user