refactor(compiler): reorganize StageBBodyExtractorBox structure

**Goal**: Improve readability of 480-line build_body_src method with
clear phase separators, consistent spacing, and unified formatting.
**Zero logic changes** - behavior 100% identical.

**Structure improvements**:

1. **Added clear phase separators** with ==== comment lines:
   - Phase 4: Body extraction (k0/k1/k2/k3 logic)
   - Phase 4.7: Comment removal
   - Phase 4.5: Bundle resolution
   - Phase 4.6: Duplicate bundle check
   - Bundle merge + line-map debug output
   - Using resolver
   - Phase 5: Trim (left/right)

2. **Improved readability**:
   - Added consistent spacing between phases (2 blank lines)
   - Unified indentation (2 spaces throughout)
   - Grouped related debug blocks together
   - Made block structure more visible

3. **Zero logic changes**:
   - All variable names unchanged
   - All conditions unchanged
   - All calculations unchanged
   - All DEBUG messages unchanged
   - All bundle/using resolver calls unchanged

**Verification**:
- Same ValueId(17) error as before (expected, will fix in Task B)
- Debug logs identical ([plugin/missing], [DEBUG])
- Behavior 100% identical to original

**Impact**: Code now much more maintainable with clear phase boundaries,
making future modifications safer and simpler.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-11-17 04:19:17 +09:00
parent e2c37f06ba
commit b4cb516f6a

View File

@ -48,8 +48,12 @@ static box StageBArgsBox {
// Phase 25.1c: Body extraction + bundle + using + trim
static box StageBBodyExtractorBox {
build_body_src(src, args) {
// 4) If wrapped in `box Main { method main() { ... } }` or `static box Main { method main() { ... } }`,
// extract the method body text. Allow disabling via env HAKO_STAGEB_BODY_EXTRACT=0.
// ============================================================================
// 4) Body extraction: extract main method body from box Main wrapper
// ============================================================================
// If wrapped in `box Main { method main() { ... } }` or `static box Main { method main() { ... } }`,
// extract the method body text. Allow disabling via env HAKO_STAGEB_BODY_EXTRACT=0.
local body_src = null
{
local do_extract = 1
@ -57,94 +61,72 @@ static box StageBBodyExtractorBox {
local ex = env.get("HAKO_STAGEB_BODY_EXTRACT")
if ex != null && ("" + ex) == "0" { do_extract = 0 }
}
if do_extract == 0 {
body_src = src
} else {
// naive search for "method main" → '(' → ')' → '{' ... balanced '}'
local s = src
// naive substring search for "method main"; fallback to "main(" inside box Main
local k0 = -1
{
local pat = "method main"
local m = pat.length()
local i = 0
local n = s.length()
loop(i + m <= n) {
if s.substring(i, i + m) == pat { k0 = i break }
i = i + 1
}
}
if k0 < 0 {
// Fallback: find "box Main" (with or without leading 'static') then locate "main(" after it
local kbox = -1
// naive search for "method main" → '(' → ')' → '{' ... balanced '}'
local s = src
// naive substring search for "method main"; fallback to "main(" inside box Main
local k0 = -1
{
local pat = "box Main"
local pat = "method main"
local m = pat.length()
local i = 0
local n = s.length()
loop(i + m <= n) {
if s.substring(i, i + m) == pat { kbox = i break }
if s.substring(i, i + m) == pat { k0 = i break }
i = i + 1
}
}
if kbox >= 0 {
// search for "main(" starting at kbox
local i = kbox
local n = s.length()
loop(i + 5 <= n) { // len("main(") = 5
if s.substring(i, i + 5) == "main(" { k0 = i break }
i = i + 1
}
} else {
// last resort: global search of "main(" (may overmatch but better than full-file body)
local i = 0
local n = s.length()
loop(i + 5 <= n) {
if s.substring(i, i + 5) == "main(" { k0 = i break }
i = i + 1
}
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[DEBUG] Source length: " + s.length())
print("[DEBUG] k0 (main position) = " + k0)
}
}
if k0 >= 0 {
// find '(' after k0 (skip inside strings)
local k1 = -1
{
local j = k0
local n = s.length()
local in_str = 0
local esc = 0
loop(j < n) {
local ch = s.substring(j, j + 1)
if in_str == 1 {
if esc == 1 { esc = 0 j = j + 1 continue }
if ch == "\\" { esc = 1 j = j + 1 continue }
if ch == "\"" { in_str = 0 j = j + 1 continue }
j = j + 1
continue
if k0 < 0 {
// Fallback: find "box Main" (with or without leading 'static') then locate "main(" after it
local kbox = -1
{
local pat = "box Main"
local m = pat.length()
local i = 0
local n = s.length()
loop(i + m <= n) {
if s.substring(i, i + m) == pat { kbox = i break }
i = i + 1
}
}
if kbox >= 0 {
// search for "main(" starting at kbox
local i = kbox
local n = s.length()
loop(i + 5 <= n) { // len("main(") = 5
if s.substring(i, i + 5) == "main(" { k0 = i break }
i = i + 1
}
} else {
// last resort: global search of "main(" (may overmatch but better than full-file body)
local i = 0
local n = s.length()
loop(i + 5 <= n) {
if s.substring(i, i + 5) == "main(" { k0 = i break }
i = i + 1
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == "(" { k1 = j break }
j = j + 1
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[DEBUG] k1 ('(' position) = " + k1)
print("[DEBUG] Source length: " + s.length())
print("[DEBUG] k0 (main position) = " + k0)
}
}
if k1 >= 0 {
// find ')' after k1 (skip inside strings)
local k2 = -1
if k0 >= 0 {
// find '(' after k0 (skip inside strings)
local k1 = -1
{
local j = k1
local j = k0
local n = s.length()
local in_str = 0
local esc = 0
@ -158,21 +140,23 @@ static box StageBBodyExtractorBox {
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == ")" { k2 = j break }
if ch == "(" { k1 = j break }
j = j + 1
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[DEBUG] k2 (')' position) = " + k2)
print("[DEBUG] k1 ('(' position) = " + k1)
}
}
if k2 >= 0 {
// Find opening '{' following ')' (skip inside strings)
local k3 = -1
if k1 >= 0 {
// find ')' after k1 (skip inside strings)
local k2 = -1
{
local j = k2
local j = k1
local n = s.length()
local in_str = 0
local esc = 0
@ -186,110 +170,151 @@ static box StageBBodyExtractorBox {
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == "{" { k3 = j break }
if ch == ")" { k2 = j break }
j = j + 1
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[DEBUG] k3 ('{' position) = " + k3)
print("[DEBUG] k2 (')' position) = " + k2)
}
}
if k3 >= 0 {
// Balanced scan for matching '}'
local depth = 0
local i = k3
local n = s.length()
local in_str = 0
local esc = 0
local start_pos = -1
local end_pos = -1
loop(i < n) {
local ch = s.substring(i, i + 1)
if in_str == 1 {
if esc == 1 { esc = 0 i = i + 1 continue }
if ch == "\\" { esc = 1 i = i + 1 continue }
if ch == "\"" { in_str = 0 i = i + 1 continue }
i = i + 1
continue
if k2 >= 0 {
// Find opening '{' following ')' (skip inside strings)
local k3 = -1
{
local j = k2
local n = s.length()
local in_str = 0
local esc = 0
loop(j < n) {
local ch = s.substring(j, j + 1)
if in_str == 1 {
if esc == 1 { esc = 0 j = j + 1 continue }
if ch == "\\" { esc = 1 j = j + 1 continue }
if ch == "\"" { in_str = 0 j = j + 1 continue }
j = j + 1
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == "{" { k3 = j break }
j = j + 1
}
if ch == "\"" { in_str = 1 i = i + 1 continue }
if ch == "{" { depth = depth + 1 i = i + 1 continue }
if ch == "}" {
depth = depth - 1
i = i + 1
if depth == 0 { break }
continue
}
i = i + 1
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[DEBUG] After balanced scan: depth=" + depth + ", i=" + i)
print("[DEBUG] k3 ('{' position) = " + k3)
}
}
if depth == 0 {
// inside of '{'..'}'
start_pos = k3 + 1
end_pos = i - 1
if k3 >= 0 {
// Balanced scan for matching '}'
local depth = 0
local i = k3
local n = s.length()
local in_str = 0
local esc = 0
local start_pos = -1
local end_pos = -1
loop(i < n) {
local ch = s.substring(i, i + 1)
if in_str == 1 {
if esc == 1 { esc = 0 i = i + 1 continue }
if ch == "\\" { esc = 1 i = i + 1 continue }
if ch == "\"" { in_str = 0 i = i + 1 continue }
i = i + 1
continue
}
if ch == "\"" { in_str = 1 i = i + 1 continue }
if ch == "{" { depth = depth + 1 i = i + 1 continue }
if ch == "}" {
depth = depth - 1
i = i + 1
if depth == 0 { break }
continue
}
i = i + 1
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[DEBUG] Will extract substring(" + start_pos + ", " + end_pos + ")")
print("[DEBUG] After balanced scan: depth=" + depth + ", i=" + i)
}
}
}
// Extract outside the nested blocks to avoid potential scoping issues
if start_pos >= 0 && end_pos >= start_pos {
body_src = s.substring(start_pos, end_pos)
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null {
print("[DEBUG] After extraction: body_src is NULL!")
} else {
print("[DEBUG] After extraction: body_src length = " + body_src.length())
if depth == 0 {
// inside of '{'..'}'
start_pos = k3 + 1
end_pos = i - 1
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[DEBUG] Will extract substring(" + start_pos + ", " + end_pos + ")")
}
}
}
// Extract outside the nested blocks to avoid potential scoping issues
if start_pos >= 0 && end_pos >= start_pos {
body_src = s.substring(start_pos, end_pos)
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null {
print("[DEBUG] After extraction: body_src is NULL!")
} else {
print("[DEBUG] After extraction: body_src length = " + body_src.length())
}
}
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null { print("[DEBUG] body_src is NULL after line 254") } else { print("[DEBUG] body_src OK after line 254, len=" + body_src.length()) }
}
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null { print("[DEBUG] body_src is NULL after line 254") } else { print("[DEBUG] body_src OK after line 254, len=" + body_src.length()) }
if body_src == null { print("[DEBUG] body_src is NULL after line 256") } else { print("[DEBUG] body_src OK after line 256, len=" + body_src.length()) }
}
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null { print("[DEBUG] body_src is NULL after line 256") } else { print("[DEBUG] body_src OK after line 256, len=" + body_src.length()) }
if body_src == null { print("[DEBUG] body_src is NULL after line 268") } else { print("[DEBUG] body_src OK after line 268, len=" + body_src.length()) }
}
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null { print("[DEBUG] body_src is NULL after line 268") } else { print("[DEBUG] body_src OK after line 268, len=" + body_src.length()) }
if body_src == null { print("[DEBUG] body_src is NULL after line 269") } else { print("[DEBUG] body_src OK after line 269, len=" + body_src.length()) }
}
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null { print("[DEBUG] body_src is NULL after line 269") } else { print("[DEBUG] body_src OK after line 269, len=" + body_src.length()) }
if body_src == null { print("[DEBUG] body_src is NULL after line 270") } else { print("[DEBUG] body_src OK after line 270, len=" + body_src.length()) }
}
}
}
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
if body_src == null { print("[DEBUG] body_src is NULL after line 270") } else { print("[DEBUG] body_src OK after line 270, len=" + body_src.length()) }
}
}
}
// Fallback: if extraction failed or produced empty, use full src
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
@ -306,7 +331,11 @@ static box StageBBodyExtractorBox {
if body_src == null || ("" + body_src).length() == 0 { body_src = src }
}
// 4.7) Strip comments from body_src to avoid stray tokens in Program(JSON)
// ============================================================================
// 4.7) Comment removal: strip // and /* */ comments
// ============================================================================
{
local s = body_src
local out = ""
@ -316,18 +345,22 @@ static box StageBBodyExtractorBox {
local esc = 0
local in_line = 0
local in_block = 0
loop(i < n) {
local ch = s.substring(i, i + 1)
if in_line == 1 {
if ch == "\n" { in_line = 0 out = out + ch }
i = i + 1
continue
}
if in_block == 1 {
if ch == "*" && i + 1 < n && s.substring(i + 1, i + 2) == "/" { in_block = 0 i = i + 2 continue }
i = i + 1
continue
}
if in_str == 1 {
if esc == 1 { out = out + ch esc = 0 i = i + 1 continue }
if ch == "\\" { out = out + ch esc = 1 i = i + 1 continue }
@ -336,6 +369,7 @@ static box StageBBodyExtractorBox {
i = i + 1
continue
}
// Not in string/comment
if ch == "\"" { out = out + ch in_str = 1 i = i + 1 continue }
if ch == "/" && i + 1 < n {
@ -343,36 +377,46 @@ static box StageBBodyExtractorBox {
if ch2 == "/" { in_line = 1 i = i + 2 continue }
if ch2 == "*" { in_block = 1 i = i + 2 continue }
}
out = out + ch
i = i + 1
}
body_src = out
}
// 4.5) Optional: bundle extra module sources provided via repeated --bundle-src args
// This is a minimal concatenation bundler (no I/O, no resolver). It simply places
// provided module snippets before the main body for StageB parser to accept.
// Usage example:
// compiler_stageb.hako -- --bundle-src "static box Util { method nop(a){ return a } }" --source "static box Main { method main(args){ return 0 } }"
// Policy:
// - --bundle-mod "Name:code" accepts multiple named bundles, but duplicate Name is FailFast
// and emits a stable tag: `[bundle/duplicate] Name`.
// - --require-mod Name ensures the named module is present (via --bundle-mod), otherwise
// FailFast with `[bundle/missing] Name`.
// ============================================================================
// 4.5) Bundle resolution: --bundle-src, --bundle-mod, --require-mod
// ============================================================================
// Optional: bundle extra module sources provided via repeated --bundle-src args
// This is a minimal concatenation bundler (no I/O, no resolver). It simply places
// provided module snippets before the main body for Stage-B parser to accept.
// Usage example:
// compiler_stageb.hako -- --bundle-src "static box Util { method nop(a){ return a } }" --source "static box Main { method main(args){ return 0 } }"
// Policy:
// - --bundle-mod "Name:code" accepts multiple named bundles, but duplicate Name is Fail-Fast
// and emits a stable tag: `[bundle/duplicate] Name`.
// - --require-mod Name ensures the named module is present (via --bundle-mod), otherwise
// Fail-Fast with `[bundle/missing] Name`.
local bundles = new ArrayBox()
// Named bundles (name:src) and requirements
local bundle_names = new ArrayBox()
local bundle_srcs = new ArrayBox()
local require_mods = new ArrayBox()
if args != null {
local i = 0
local n = args.length()
loop(i < n) {
local t = "" + args.get(i)
if t == "--bundle-src" && i + 1 < n {
bundles.push("" + args.get(i + 1))
i = i + 1
}
if t == "--bundle-mod" && i + 1 < n {
// Parse "name:code" into (name, code)
local pair = "" + args.get(i + 1)
@ -391,17 +435,23 @@ static box StageBBodyExtractorBox {
}
i = i + 1
}
if t == "--require-mod" && i + 1 < n {
require_mods.push("" + args.get(i + 1))
i = i + 1
}
i = i + 1
}
}
// Required modules are validated in BundleResolver.resolve (includes env alias injection)
// 4.6) FailFast on duplicate named bundles to avoid ambiguity
// ============================================================================
// 4.6) Fail-Fast on duplicate named bundles
// ============================================================================
// Policy: duplicate module names are not allowed. Emit a stable diagnostic tag and exit.
if bundle_names.length() > 1 {
local i = 0
local n = bundle_names.length()
@ -419,9 +469,15 @@ static box StageBBodyExtractorBox {
}
}
// ============================================================================
// Bundle merge + line-map debug output
// ============================================================================
if bundles.length() > 0 || bundle_srcs.length() > 0 || require_mods.length() > 0 {
local merged_prefix = BundleResolver.resolve(bundles, bundle_names, bundle_srcs, require_mods)
if merged_prefix == null { return 1 }
// Debug: emit line-map for merged bundles so parse error line can be mapped
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
@ -437,6 +493,7 @@ static box StageBBodyExtractorBox {
}
}
print("[stageb/line-map] prefix total lines=" + total)
// bundle-src (anonymous)
if bundles != null && bundles.length() > 0 {
local i = 0; local acc = 1
@ -458,6 +515,7 @@ static box StageBBodyExtractorBox {
i = i + 1
}
}
// bundle-mod (named)
if bundle_names != null && bundle_srcs != null {
local i2 = 0; local acc2 = 1
@ -491,9 +549,15 @@ static box StageBBodyExtractorBox {
}
}
}
body_src = merged_prefix + body_src
}
// ============================================================================
// Using resolver: resolve using statements
// ============================================================================
{
local apply_flag = env.get("HAKO_STAGEB_APPLY_USINGS")
if apply_flag == null || ("" + apply_flag) != "0" {
@ -504,22 +568,29 @@ static box StageBBodyExtractorBox {
}
}
// 5) Normalize body: trim leading/trailing whitespaces/newlines
// ============================================================================
// 5) Trim: normalize leading/trailing whitespaces and newlines
// ============================================================================
{
local s = body_src
local n = s.length()
local b = 0
// left trim (space, tab, CR, LF)
loop(b < n) {
local ch = s.substring(b, b + 1)
if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { b = b + 1 } else { break }
}
// right trim
local e = n
loop(e > b) {
local ch = s.substring(e - 1, e)
if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { e = e - 1 } else { break }
}
if e > b { body_src = s.substring(b, e) } else { body_src = "" }
}