diff --git a/lang/src/compiler/entry/compiler_stageb.hako b/lang/src/compiler/entry/compiler_stageb.hako index 74277b68..b0b2eb8b 100644 --- a/lang/src/compiler/entry/compiler_stageb.hako +++ b/lang/src/compiler/entry/compiler_stageb.hako @@ -48,8 +48,12 @@ static box StageBArgsBox { // Phase 25.1c: Body extraction + bundle + using + trim static box StageBBodyExtractorBox { build_body_src(src, args) { - // 4) If wrapped in `box Main { method main() { ... } }` or `static box Main { method main() { ... } }`, - // extract the method body text. Allow disabling via env HAKO_STAGEB_BODY_EXTRACT=0. + // ============================================================================ + // 4) Body extraction: extract main method body from box Main wrapper + // ============================================================================ + // If wrapped in `box Main { method main() { ... } }` or `static box Main { method main() { ... } }`, + // extract the method body text. Allow disabling via env HAKO_STAGEB_BODY_EXTRACT=0. + local body_src = null { local do_extract = 1 @@ -57,94 +61,72 @@ static box StageBBodyExtractorBox { local ex = env.get("HAKO_STAGEB_BODY_EXTRACT") if ex != null && ("" + ex) == "0" { do_extract = 0 } } + if do_extract == 0 { body_src = src } else { - // naive search for "method main" → '(' → ')' → '{' ... balanced '}' - local s = src - // naive substring search for "method main"; fallback to "main(" inside box Main - local k0 = -1 - { - local pat = "method main" - local m = pat.length() - local i = 0 - local n = s.length() - loop(i + m <= n) { - if s.substring(i, i + m) == pat { k0 = i break } - i = i + 1 - } - } - if k0 < 0 { - // Fallback: find "box Main" (with or without leading 'static') then locate "main(" after it - local kbox = -1 + // naive search for "method main" → '(' → ')' → '{' ... balanced '}' + local s = src + + // naive substring search for "method main"; fallback to "main(" inside box Main + local k0 = -1 { - local pat = "box Main" + local pat = "method main" local m = pat.length() local i = 0 local n = s.length() loop(i + m <= n) { - if s.substring(i, i + m) == pat { kbox = i break } + if s.substring(i, i + m) == pat { k0 = i break } i = i + 1 } } - if kbox >= 0 { - // search for "main(" starting at kbox - local i = kbox - local n = s.length() - loop(i + 5 <= n) { // len("main(") = 5 - if s.substring(i, i + 5) == "main(" { k0 = i break } - i = i + 1 - } - } else { - // last resort: global search of "main(" (may overmatch but better than full-file body) - local i = 0 - local n = s.length() - loop(i + 5 <= n) { - if s.substring(i, i + 5) == "main(" { k0 = i break } - i = i + 1 - } - } - } - { - local dbg = env.get("HAKO_STAGEB_DEBUG") - if dbg != null && ("" + dbg) == "1" { - print("[DEBUG] Source length: " + s.length()) - print("[DEBUG] k0 (main position) = " + k0) - } - } - if k0 >= 0 { - // find '(' after k0 (skip inside strings) - local k1 = -1 - { - local j = k0 - local n = s.length() - local in_str = 0 - local esc = 0 - loop(j < n) { - local ch = s.substring(j, j + 1) - if in_str == 1 { - if esc == 1 { esc = 0 j = j + 1 continue } - if ch == "\\" { esc = 1 j = j + 1 continue } - if ch == "\"" { in_str = 0 j = j + 1 continue } - j = j + 1 - continue + + if k0 < 0 { + // Fallback: find "box Main" (with or without leading 'static') then locate "main(" after it + local kbox = -1 + { + local pat = "box Main" + local m = pat.length() + local i = 0 + local n = s.length() + loop(i + m <= n) { + if s.substring(i, i + m) == pat { kbox = i break } + i = i + 1 + } + } + + if kbox >= 0 { + // search for "main(" starting at kbox + local i = kbox + local n = s.length() + loop(i + 5 <= n) { // len("main(") = 5 + if s.substring(i, i + 5) == "main(" { k0 = i break } + i = i + 1 + } + } else { + // last resort: global search of "main(" (may overmatch but better than full-file body) + local i = 0 + local n = s.length() + loop(i + 5 <= n) { + if s.substring(i, i + 5) == "main(" { k0 = i break } + i = i + 1 } - if ch == "\"" { in_str = 1 j = j + 1 continue } - if ch == "(" { k1 = j break } - j = j + 1 } } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - print("[DEBUG] k1 ('(' position) = " + k1) + print("[DEBUG] Source length: " + s.length()) + print("[DEBUG] k0 (main position) = " + k0) } } - if k1 >= 0 { - // find ')' after k1 (skip inside strings) - local k2 = -1 + + if k0 >= 0 { + // find '(' after k0 (skip inside strings) + local k1 = -1 { - local j = k1 + local j = k0 local n = s.length() local in_str = 0 local esc = 0 @@ -158,21 +140,23 @@ static box StageBBodyExtractorBox { continue } if ch == "\"" { in_str = 1 j = j + 1 continue } - if ch == ")" { k2 = j break } + if ch == "(" { k1 = j break } j = j + 1 } } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - print("[DEBUG] k2 (')' position) = " + k2) + print("[DEBUG] k1 ('(' position) = " + k1) } } - if k2 >= 0 { - // Find opening '{' following ')' (skip inside strings) - local k3 = -1 + + if k1 >= 0 { + // find ')' after k1 (skip inside strings) + local k2 = -1 { - local j = k2 + local j = k1 local n = s.length() local in_str = 0 local esc = 0 @@ -186,110 +170,151 @@ static box StageBBodyExtractorBox { continue } if ch == "\"" { in_str = 1 j = j + 1 continue } - if ch == "{" { k3 = j break } + if ch == ")" { k2 = j break } j = j + 1 } } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - print("[DEBUG] k3 ('{' position) = " + k3) + print("[DEBUG] k2 (')' position) = " + k2) } } - if k3 >= 0 { - // Balanced scan for matching '}' - local depth = 0 - local i = k3 - local n = s.length() - local in_str = 0 - local esc = 0 - local start_pos = -1 - local end_pos = -1 - loop(i < n) { - local ch = s.substring(i, i + 1) - if in_str == 1 { - if esc == 1 { esc = 0 i = i + 1 continue } - if ch == "\\" { esc = 1 i = i + 1 continue } - if ch == "\"" { in_str = 0 i = i + 1 continue } - i = i + 1 - continue + + if k2 >= 0 { + // Find opening '{' following ')' (skip inside strings) + local k3 = -1 + { + local j = k2 + local n = s.length() + local in_str = 0 + local esc = 0 + loop(j < n) { + local ch = s.substring(j, j + 1) + if in_str == 1 { + if esc == 1 { esc = 0 j = j + 1 continue } + if ch == "\\" { esc = 1 j = j + 1 continue } + if ch == "\"" { in_str = 0 j = j + 1 continue } + j = j + 1 + continue + } + if ch == "\"" { in_str = 1 j = j + 1 continue } + if ch == "{" { k3 = j break } + j = j + 1 } - if ch == "\"" { in_str = 1 i = i + 1 continue } - if ch == "{" { depth = depth + 1 i = i + 1 continue } - if ch == "}" { - depth = depth - 1 - i = i + 1 - if depth == 0 { break } - continue - } - i = i + 1 } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - print("[DEBUG] After balanced scan: depth=" + depth + ", i=" + i) + print("[DEBUG] k3 ('{' position) = " + k3) } } - if depth == 0 { - // inside of '{'..'}' - start_pos = k3 + 1 - end_pos = i - 1 + + if k3 >= 0 { + // Balanced scan for matching '}' + local depth = 0 + local i = k3 + local n = s.length() + local in_str = 0 + local esc = 0 + local start_pos = -1 + local end_pos = -1 + loop(i < n) { + local ch = s.substring(i, i + 1) + if in_str == 1 { + if esc == 1 { esc = 0 i = i + 1 continue } + if ch == "\\" { esc = 1 i = i + 1 continue } + if ch == "\"" { in_str = 0 i = i + 1 continue } + i = i + 1 + continue + } + if ch == "\"" { in_str = 1 i = i + 1 continue } + if ch == "{" { depth = depth + 1 i = i + 1 continue } + if ch == "}" { + depth = depth - 1 + i = i + 1 + if depth == 0 { break } + continue + } + i = i + 1 + } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - print("[DEBUG] Will extract substring(" + start_pos + ", " + end_pos + ")") + print("[DEBUG] After balanced scan: depth=" + depth + ", i=" + i) } } - } - // Extract outside the nested blocks to avoid potential scoping issues - if start_pos >= 0 && end_pos >= start_pos { - body_src = s.substring(start_pos, end_pos) - { - local dbg = env.get("HAKO_STAGEB_DEBUG") - if dbg != null && ("" + dbg) == "1" { - if body_src == null { - print("[DEBUG] After extraction: body_src is NULL!") - } else { - print("[DEBUG] After extraction: body_src length = " + body_src.length()) + + if depth == 0 { + // inside of '{'..'}' + start_pos = k3 + 1 + end_pos = i - 1 + { + local dbg = env.get("HAKO_STAGEB_DEBUG") + if dbg != null && ("" + dbg) == "1" { + print("[DEBUG] Will extract substring(" + start_pos + ", " + end_pos + ")") } } } + + // Extract outside the nested blocks to avoid potential scoping issues + if start_pos >= 0 && end_pos >= start_pos { + body_src = s.substring(start_pos, end_pos) + { + local dbg = env.get("HAKO_STAGEB_DEBUG") + if dbg != null && ("" + dbg) == "1" { + if body_src == null { + print("[DEBUG] After extraction: body_src is NULL!") + } else { + print("[DEBUG] After extraction: body_src length = " + body_src.length()) + } + } + } + } + + { + local dbg = env.get("HAKO_STAGEB_DEBUG") + if dbg != null && ("" + dbg) == "1" { + if body_src == null { print("[DEBUG] body_src is NULL after line 254") } else { print("[DEBUG] body_src OK after line 254, len=" + body_src.length()) } + } + } } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - if body_src == null { print("[DEBUG] body_src is NULL after line 254") } else { print("[DEBUG] body_src OK after line 254, len=" + body_src.length()) } + if body_src == null { print("[DEBUG] body_src is NULL after line 256") } else { print("[DEBUG] body_src OK after line 256, len=" + body_src.length()) } } } } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - if body_src == null { print("[DEBUG] body_src is NULL after line 256") } else { print("[DEBUG] body_src OK after line 256, len=" + body_src.length()) } + if body_src == null { print("[DEBUG] body_src is NULL after line 268") } else { print("[DEBUG] body_src OK after line 268, len=" + body_src.length()) } } } } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - if body_src == null { print("[DEBUG] body_src is NULL after line 268") } else { print("[DEBUG] body_src OK after line 268, len=" + body_src.length()) } + if body_src == null { print("[DEBUG] body_src is NULL after line 269") } else { print("[DEBUG] body_src OK after line 269, len=" + body_src.length()) } } } } + { local dbg = env.get("HAKO_STAGEB_DEBUG") if dbg != null && ("" + dbg) == "1" { - if body_src == null { print("[DEBUG] body_src is NULL after line 269") } else { print("[DEBUG] body_src OK after line 269, len=" + body_src.length()) } + if body_src == null { print("[DEBUG] body_src is NULL after line 270") } else { print("[DEBUG] body_src OK after line 270, len=" + body_src.length()) } } } } - { - local dbg = env.get("HAKO_STAGEB_DEBUG") - if dbg != null && ("" + dbg) == "1" { - if body_src == null { print("[DEBUG] body_src is NULL after line 270") } else { print("[DEBUG] body_src OK after line 270, len=" + body_src.length()) } - } - } - } + // Fallback: if extraction failed or produced empty, use full src { local dbg = env.get("HAKO_STAGEB_DEBUG") @@ -306,7 +331,11 @@ static box StageBBodyExtractorBox { if body_src == null || ("" + body_src).length() == 0 { body_src = src } } - // 4.7) Strip comments from body_src to avoid stray tokens in Program(JSON) + + // ============================================================================ + // 4.7) Comment removal: strip // and /* */ comments + // ============================================================================ + { local s = body_src local out = "" @@ -316,18 +345,22 @@ static box StageBBodyExtractorBox { local esc = 0 local in_line = 0 local in_block = 0 + loop(i < n) { local ch = s.substring(i, i + 1) + if in_line == 1 { if ch == "\n" { in_line = 0 out = out + ch } i = i + 1 continue } + if in_block == 1 { if ch == "*" && i + 1 < n && s.substring(i + 1, i + 2) == "/" { in_block = 0 i = i + 2 continue } i = i + 1 continue } + if in_str == 1 { if esc == 1 { out = out + ch esc = 0 i = i + 1 continue } if ch == "\\" { out = out + ch esc = 1 i = i + 1 continue } @@ -336,6 +369,7 @@ static box StageBBodyExtractorBox { i = i + 1 continue } + // Not in string/comment if ch == "\"" { out = out + ch in_str = 1 i = i + 1 continue } if ch == "/" && i + 1 < n { @@ -343,36 +377,46 @@ static box StageBBodyExtractorBox { if ch2 == "/" { in_line = 1 i = i + 2 continue } if ch2 == "*" { in_block = 1 i = i + 2 continue } } + out = out + ch i = i + 1 } + body_src = out } - // 4.5) Optional: bundle extra module sources provided via repeated --bundle-src args - // This is a minimal concatenation bundler (no I/O, no resolver). It simply places - // provided module snippets before the main body for Stage‑B parser to accept. - // Usage example: - // compiler_stageb.hako -- --bundle-src "static box Util { method nop(a){ return a } }" --source "static box Main { method main(args){ return 0 } }" - // Policy: - // - --bundle-mod "Name:code" accepts multiple named bundles, but duplicate Name is Fail‑Fast - // and emits a stable tag: `[bundle/duplicate] Name`. - // - --require-mod Name ensures the named module is present (via --bundle-mod), otherwise - // Fail‑Fast with `[bundle/missing] Name`. + + // ============================================================================ + // 4.5) Bundle resolution: --bundle-src, --bundle-mod, --require-mod + // ============================================================================ + // Optional: bundle extra module sources provided via repeated --bundle-src args + // This is a minimal concatenation bundler (no I/O, no resolver). It simply places + // provided module snippets before the main body for Stage-B parser to accept. + // Usage example: + // compiler_stageb.hako -- --bundle-src "static box Util { method nop(a){ return a } }" --source "static box Main { method main(args){ return 0 } }" + // Policy: + // - --bundle-mod "Name:code" accepts multiple named bundles, but duplicate Name is Fail-Fast + // and emits a stable tag: `[bundle/duplicate] Name`. + // - --require-mod Name ensures the named module is present (via --bundle-mod), otherwise + // Fail-Fast with `[bundle/missing] Name`. + local bundles = new ArrayBox() // Named bundles (name:src) and requirements local bundle_names = new ArrayBox() local bundle_srcs = new ArrayBox() local require_mods = new ArrayBox() + if args != null { local i = 0 local n = args.length() loop(i < n) { local t = "" + args.get(i) + if t == "--bundle-src" && i + 1 < n { bundles.push("" + args.get(i + 1)) i = i + 1 } + if t == "--bundle-mod" && i + 1 < n { // Parse "name:code" into (name, code) local pair = "" + args.get(i + 1) @@ -391,17 +435,23 @@ static box StageBBodyExtractorBox { } i = i + 1 } + if t == "--require-mod" && i + 1 < n { require_mods.push("" + args.get(i + 1)) i = i + 1 } + i = i + 1 } } // Required modules are validated in BundleResolver.resolve (includes env alias injection) - // 4.6) Fail‑Fast on duplicate named bundles to avoid ambiguity + + // ============================================================================ + // 4.6) Fail-Fast on duplicate named bundles + // ============================================================================ // Policy: duplicate module names are not allowed. Emit a stable diagnostic tag and exit. + if bundle_names.length() > 1 { local i = 0 local n = bundle_names.length() @@ -419,9 +469,15 @@ static box StageBBodyExtractorBox { } } + + // ============================================================================ + // Bundle merge + line-map debug output + // ============================================================================ + if bundles.length() > 0 || bundle_srcs.length() > 0 || require_mods.length() > 0 { local merged_prefix = BundleResolver.resolve(bundles, bundle_names, bundle_srcs, require_mods) if merged_prefix == null { return 1 } + // Debug: emit line-map for merged bundles so parse error line can be mapped { local dbg = env.get("HAKO_STAGEB_DEBUG") @@ -437,6 +493,7 @@ static box StageBBodyExtractorBox { } } print("[stageb/line-map] prefix total lines=" + total) + // bundle-src (anonymous) if bundles != null && bundles.length() > 0 { local i = 0; local acc = 1 @@ -458,6 +515,7 @@ static box StageBBodyExtractorBox { i = i + 1 } } + // bundle-mod (named) if bundle_names != null && bundle_srcs != null { local i2 = 0; local acc2 = 1 @@ -491,9 +549,15 @@ static box StageBBodyExtractorBox { } } } + body_src = merged_prefix + body_src } + + // ============================================================================ + // Using resolver: resolve using statements + // ============================================================================ + { local apply_flag = env.get("HAKO_STAGEB_APPLY_USINGS") if apply_flag == null || ("" + apply_flag) != "0" { @@ -504,22 +568,29 @@ static box StageBBodyExtractorBox { } } - // 5) Normalize body: trim leading/trailing whitespaces/newlines + + // ============================================================================ + // 5) Trim: normalize leading/trailing whitespaces and newlines + // ============================================================================ + { local s = body_src local n = s.length() local b = 0 + // left trim (space, tab, CR, LF) loop(b < n) { local ch = s.substring(b, b + 1) if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { b = b + 1 } else { break } } + // right trim local e = n loop(e > b) { local ch = s.substring(e - 1, e) if ch == " " || ch == "\t" || ch == "\r" || ch == "\n" { e = e - 1 } else { break } } + if e > b { body_src = s.substring(b, e) } else { body_src = "" } }