feat(phase21.5/22.1): MirBuilder JsonFrag refactor + FileBox ring-1 + registry tests

Phase 21.5 (AOT/LLVM Optimization Prep)
- FileBox ring-1 (core-ro) provider: priority=-100, always available, no panic path
  - src/runner/modes/common_util/provider_registry.rs: CoreRoFileProviderFactory
  - Auto-registers at startup, eliminates fallback panic structurally
- StringBox fast path prototypes (length/size optimization)
- Performance benchmarks (C/Python/Hako comparison baseline)

Phase 22.1 (JsonFrag Unification)
- JsonFrag.last_index_of_from() for backward search (VM fallback)
- Replace hand-written lastIndexOf in lower_loop_sum_bc_box.hako
- SentinelExtractorBox for Break/Continue pattern extraction

MirBuilder Refactor (Box → JsonFrag Migration)
- 20+ lower_*_box.hako: Box-heavy → JsonFrag text assembly
- MirBuilderMinBox: lightweight using set for dev env
- Registry-only fast path with [registry:*] tag observation
- pattern_util_box.hako: enhanced pattern matching

Dev Environment & Testing
- Dev toggles: SMOKES_DEV_PREINCLUDE=1 (point-enable), HAKO_MIR_BUILDER_SKIP_LOOPS=1
- phase2160: registry opt-in tests (array/map get/set/push/len) - content verification
- phase2034: rc-dependent → token grep (grep -F based validation)
- run_quick.sh: fast smoke testing harness
- ENV documentation: docs/ENV_VARS.md

Test Results
 quick phase2034: ALL GREEN (MirBuilder internal patterns)
 registry phase2160: ALL GREEN (array/map get/set/push/len)
 rc-dependent tests → content token verification complete
 PREINCLUDE policy: default OFF, point-enable only where needed

Technical Notes
- No INCLUDE by default (maintain minimalism)
- FAIL_FAST=0 in Bring-up contexts only (explicit dev toggles)
- Tag-based route observation ([mirbuilder/min:*], [registry:*])
- MIR structure validation (not just rc parity)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-11-10 19:42:42 +09:00
parent fc5706e3f2
commit 6055d53eff
135 changed files with 3983 additions and 1150 deletions

View File

@ -1,7 +1,16 @@
// Stage-B compiler entry — ParserBox → FlowEntry emit-only
// Notes (Dev/Doc):
// - Scope: Hako だけで Program(JSON v0) を生成する自己ホスト入口。本文抽出→コメント除去→正規化→parse_program2 で一行JSONを出力。
// - Robustness: 本文抽出は "method main" なしでも `box Main { main(...) { ... } }` からフォールバック抽出する。
// 文字列を正しくスキップ(エスケープ対応)しつつ () と {} の対応を検出。// と /* */ のコメントは除去。
// 前後の空白/改行もトリムして JSON 揺れを低減する。
// - Known limits: VM の include は未対応。MirBuilder を VM 直で実行する経路は既定OFFoptin検証のみ
// - Policy: Program(JSON) → MIR(JSON) は Rust CLI 変換GateCを既定とし、Hako ビルダーは optin で段階導入する。
// - Toggles around: HAKO_MIR_BUILDER_{INTERNAL,REGISTRY,DELEGATE}MirBuilder側。ここでは JSON 生成に専念する。
// - Recommended: Dev/CI は wrappertools/hakorune_emit_mir.sh経由で Program→MIR を行い、失敗時は GateC に自動委譲する。
using sh_core as StringHelpers // Required: ParserStringUtilsBox depends on this (using chain unresolved)
include "lang/src/compiler/entry/bundle_resolver.hako"
using "hako.compiler.entry.bundle_resolver" as BundleResolver
using lang.compiler.parser.box as ParserBox
// Note: Runner resolves entry as Main.main by default.
@ -41,7 +50,7 @@ static box Main {
{
// naive search for "method main" → '(' → ')' → '{' ... balanced '}'
local s = src
// naive substring search for "method main"
// naive substring search for "method main"; fallback to "main(" inside box Main
local k0 = -1
{
local pat = "method main"
@ -53,39 +62,127 @@ static box Main {
i = i + 1
}
}
if k0 < 0 {
// Fallback: find "box Main" (with or without leading 'static') then locate "main(" after it
local kbox = -1
{
local pat = "box Main"
local m = pat.length()
local i = 0
local n = s.length()
loop(i + m <= n) {
if s.substring(i, i + m) == pat { kbox = i break }
i = i + 1
}
}
if kbox >= 0 {
// search for "main(" starting at kbox
local i = kbox
local n = s.length()
loop(i + 5 <= n) { // len("main(") = 5
if s.substring(i, i + 5) == "main(" { k0 = i break }
i = i + 1
}
} else {
// last resort: global search of "main(" (may overmatch but better than full-file body)
local i = 0
local n = s.length()
loop(i + 5 <= n) {
if s.substring(i, i + 5) == "main(" { k0 = i break }
i = i + 1
}
}
}
if k0 >= 0 {
// find '(' after k0
// find '(' after k0 (skip inside strings)
local k1 = -1
{
local j = k0
local n = s.length()
loop(j < n) { if s.substring(j, j + 1) == "(" { k1 = j break } j = j + 1 }
local in_str = 0
local esc = 0
loop(j < n) {
local ch = s.substring(j, j + 1)
if in_str == 1 {
if esc == 1 { esc = 0 j = j + 1 continue }
if ch == "\\" { esc = 1 j = j + 1 continue }
if ch == "\"" { in_str = 0 j = j + 1 continue }
j = j + 1
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == "(" { k1 = j break }
j = j + 1
}
}
if k1 >= 0 {
// find ')' after k1
// find ')' after k1 (skip inside strings)
local k2 = -1
{
local j = k1
local n = s.length()
loop(j < n) { if s.substring(j, j + 1) == ")" { k2 = j break } j = j + 1 }
local in_str = 0
local esc = 0
loop(j < n) {
local ch = s.substring(j, j + 1)
if in_str == 1 {
if esc == 1 { esc = 0 j = j + 1 continue }
if ch == "\\" { esc = 1 j = j + 1 continue }
if ch == "\"" { in_str = 0 j = j + 1 continue }
j = j + 1
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == ")" { k2 = j break }
j = j + 1
}
}
if k2 >= 0 {
// Find opening '{' following ')'
// Find opening '{' following ')' (skip inside strings)
local k3 = -1
{
local j = k2
local n = s.length()
loop(j < n) { if s.substring(j, j + 1) == "{" { k3 = j break } j = j + 1 }
local in_str = 0
local esc = 0
loop(j < n) {
local ch = s.substring(j, j + 1)
if in_str == 1 {
if esc == 1 { esc = 0 j = j + 1 continue }
if ch == "\\" { esc = 1 j = j + 1 continue }
if ch == "\"" { in_str = 0 j = j + 1 continue }
j = j + 1
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == "{" { k3 = j break }
j = j + 1
}
}
if k3 >= 0 {
// Balanced scan for matching '}'
local depth = 0
local i = k3
local n = s.length()
local in_str = 0
local esc = 0
loop(i < n) {
local ch = s.substring(i, i + 1)
if ch == "{" { depth = depth + 1 }
else { if ch == "}" { depth = depth - 1 if depth == 0 { i = i + 1 break } } }
if in_str == 1 {
if esc == 1 { esc = 0 i = i + 1 continue }
if ch == "\\" { esc = 1 i = i + 1 continue }
if ch == "\"" { in_str = 0 i = i + 1 continue }
i = i + 1
continue
}
if ch == "\"" { in_str = 1 i = i + 1 continue }
if ch == "{" { depth = depth + 1 i = i + 1 continue }
if ch == "}" {
depth = depth - 1
i = i + 1
if depth == 0 { break }
continue
}
i = i + 1
}
if depth == 0 {
@ -100,6 +197,49 @@ static box Main {
if body_src == null { body_src = src }
// 4.7) Strip comments from body_src to avoid stray tokens in Program(JSON)
{
local s = body_src
local out = ""
local i = 0
local n = s.length()
local in_str = 0
local esc = 0
local in_line = 0
local in_block = 0
loop(i < n) {
local ch = s.substring(i, i + 1)
if in_line == 1 {
if ch == "\n" { in_line = 0 out = out + ch }
i = i + 1
continue
}
if in_block == 1 {
if ch == "*" && i + 1 < n && s.substring(i + 1, i + 2) == "/" { in_block = 0 i = i + 2 continue }
i = i + 1
continue
}
if in_str == 1 {
if esc == 1 { out = out + ch esc = 0 i = i + 1 continue }
if ch == "\\" { out = out + ch esc = 1 i = i + 1 continue }
if ch == "\"" { out = out + ch in_str = 0 i = i + 1 continue }
out = out + ch
i = i + 1
continue
}
// Not in string/comment
if ch == "\"" { out = out + ch in_str = 1 i = i + 1 continue }
if ch == "/" && i + 1 < n {
local ch2 = s.substring(i + 1, i + 2)
if ch2 == "/" { in_line = 1 i = i + 2 continue }
if ch2 == "*" { in_block = 1 i = i + 2 continue }
}
out = out + ch
i = i + 1
}
body_src = out
}
// 4.5) Optional: bundle extra module sources provided via repeated --bundle-src args
// This is a minimal concatenation bundler (no I/O, no resolver). It simply places
// provided module snippets before the main body for StageB parser to accept.
@ -176,7 +316,26 @@ static box Main {
body_src = merged_prefix + body_src
}
// 5) Parse and emit Stage1 JSON v0 (Program)
// 5) Normalize body: trim leading/trailing whitespaces/newlines
{
local s = body_src
local n = s.length()
local b = 0
// left trim (space, tab, CR, LF)
loop(b < n) {
local ch = s.substring(b, b + 1)
if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { b = b + 1 } else { break }
}
// right trim
local e = n
loop(e > b) {
local ch = s.substring(e - 1, e)
if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { e = e - 1 } else { break }
}
if e > b { body_src = s.substring(b, e) } else { body_src = "" }
}
// 6) Parse and emit Stage1 JSON v0 (Program)
// Bridge(JSON v0) が Program v0 を受け取り MIR に lowering するため、ここでは AST(JSON v0) を出力する。
// 既定で MIR 直出力は行わない(重い経路を避け、一行出力を保証)。
local ast_json = p.parse_program2(body_src)