Files
hakorune/lang/src/compiler/entry/compiler.hako
nyash-codex df9068a555 feat(stage-b): Add FLOW keyword support + fix Stage-3 keyword conflicts
##  Fixed Issues

### 1. `local` keyword tokenization (commit 9aab64f7)
- Added Stage-3 gate for LOCAL/TRY/CATCH/THROW keywords
- LOCAL now only active when NYASH_PARSER_STAGE3=1

### 2. `env.local.get` keyword conflict
- File: `lang/src/compiler/entry/compiler_stageb.hako:21-23`
- Problem: `.local` in member access tokenized as `.LOCAL` keyword
- Fix: Commented out `env.local.get("HAKO_SOURCE")` line
- Fallback: Use `--source` argument (still functional)

### 3. `flow` keyword missing
- Added FLOW to TokenType enum (`src/tokenizer/kinds.rs`)
- Added "flow" → TokenType::FLOW mapping (`src/tokenizer/lex_ident.rs`)
- Added FLOW to Stage-3 gate (requires NYASH_PARSER_STAGE3=1)
- Added FLOW to parser statement dispatch (`src/parser/statements/mod.rs`)
- Added FLOW to declaration handler (`src/parser/statements/declarations.rs`)
- Updated box_declaration parser to accept BOX or FLOW (`src/parser/declarations/box_definition.rs`)
- Treat `flow FooBox {}` as syntactic sugar for `box FooBox {}`

### 4. Module namespace conversion
- Renamed `lang.compiler.builder.ssa.local` → `localvar` (avoid keyword)
- Renamed file `local.hako` → `local_ssa.hako`
- Converted 152 path-based using statements to namespace format
- Added 26+ entries to `nyash.toml` [modules] section

## ⚠️ Remaining Issues

### Stage-B selfhost compiler performance
- Stage-B compiler not producing output (hangs/times out after 10+ seconds)
- Excessive PHI debug output suggests compilation loop issue
- Needs investigation: infinite loop or N² algorithm in hako compiler

### Fallback JSON version mismatch
- Rust fallback (`--emit-mir-json`) emits MIR v1 JSON (schema_version: "1.0")
- Smoke tests expect MIR v0 JSON (`"version":0, "kind":"Program"`)
- stageb_helpers.sh fallback needs adjustment

## Test Status
- Parse errors: FIXED 
- Keyword conflicts: FIXED 
- Stage-B smoke tests: STILL FAILING  (performance issue)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-02 04:13:17 +09:00

505 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Compiler entry (MVP)
// - When invoked with --min-json, emit minimal Program JSON v0 to stdout
// - Otherwise, act as a silent placeholder (return 0)
using lang.compiler.entry.compiler_stageb as StageBMain
static box Main {
_parse_signed_int(raw) {
if raw == null { return null }
local text = "" + raw
if text.length() == 0 { return null }
local sign = 1
local idx = 0
if text.length() > 0 && text.substring(0, 1) == "-" {
sign = -1
idx = 1
}
if idx >= text.length() { return null }
local acc = 0
loop(idx < text.length()) {
local ch = text.substring(idx, idx + 1)
if ch < "0" || ch > "9" { return null }
local digit = "0123456789".indexOf(ch)
if digit < 0 { return null }
acc = acc * 10 + digit
idx = idx + 1
}
return sign * acc
}
_collect_flags(args) {
// Stage-A flags: emit/source/return only
// Stage-B flags: prefer_cfg/stage3/v1_compat
local flags = { emit: 0, ret: null, source: null, stage_b: 0, prefer_cfg: 1, stage3: 0, v1_compat: 0 }
if args == null { return flags }
local i = 0
local n = args.length()
loop(i < n) {
local token = "" + args.get(i)
if token == "--min-json" {
flags.emit = 1
} else if token == "--stage-b" {
flags.stage_b = 1
} else if token == "--source" && i + 1 < n {
flags.source = "" + args.get(i + 1)
i = i + 1
} else if token == "--return-int" && i + 1 < n {
local parsed = me._parse_signed_int(args.get(i + 1))
if parsed != null { flags.ret = parsed }
i = i + 1
} else if token == "--prefer-cfg" && i + 1 < n {
local parsed = me._parse_signed_int(args.get(i + 1))
if parsed != null { flags.prefer_cfg = parsed }
i = i + 1
} else if token == "--stage3" {
flags.stage3 = 1
} else if token == "--v1-compat" {
flags.v1_compat = 1
}
i = i + 1
}
return flags
}
// ----- Minimal parser utilities (Stage-A) -----
_trim(s) {
if s == null { return "" }
local i = 0
local j = s.length()
loop(i < j) {
local ch = s.substring(i,i+1)
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { i = i + 1 continue }
break
}
loop(j > i) {
local ch = s.substring(j-1,j)
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { j = j - 1 continue }
break
}
return s.substring(i,j)
}
_starts_with(s, pref) { if s.length() < pref.length() { return 0 } return s.substring(0, pref.length()) == pref }
_find_main_body(src) {
if src == null { return "" }
local key = "static method main"
local p = src.indexOf(key)
if p < 0 { return "" }
// find '{' after p without using 2-arg indexOf
local tail = src.substring(p, src.length())
local lb_rel = tail.indexOf("{")
if lb_rel < 0 { return "" }
local lb = p + lb_rel
if lb < 0 { return "" }
// find matching }
local depth = 0
local i = lb
loop(i < src.length()) {
local ch = src.substring(i,i+1)
if ch == "{" { depth = depth + 1 }
if ch == "}" {
depth = depth - 1
if depth == 0 { return src.substring(lb+1, i) }
}
i = i + 1
}
return ""
}
_emit_int(n) { return "{\"type\":\"Int\",\"value\":" + (""+n) + "}" }
_emit_str(t) { return "{\"type\":\"Str\",\"value\":\"" + t + "\"}" }
_emit_var(n) { return "{\"type\":\"Var\",\"name\":\"" + n + "\"}" }
_emit_call(name, args_json) { return "{\"type\":\"Call\",\"name\":\"" + name + "\" ,\"args\":[" + args_json + "]}" }
_emit_method(recv_json, m, args_json) { return "{\"type\":\"Method\",\"recv\":" + recv_json + ",\"method\":\"" + m + "\",\"args\":[" + args_json + "]}" }
_emit_stmt_local(name, expr_json) { return "{\"type\":\"Local\",\"name\":\"" + name + "\" ,\"expr\":" + expr_json + "}" }
_emit_stmt_extern_print(expr_json) { return "{\"type\":\"Extern\",\"iface\":\"env.console\",\"method\":\"log\",\"args\":[" + expr_json + "]}" }
_emit_stmt_expr(expr_json) { return "{\"type\":\"Expr\",\"expr\":" + expr_json + "}" }
_parse_number(tok) { return me._parse_signed_int(tok) }
_emit_key(tok) {
tok = me._trim(tok)
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" {
return me._emit_str(tok.substring(1,tok.length()-1))
}
local n = me._parse_number(tok)
return me._emit_int(n)
}
_emit_binary(op, lhs, rhs) {
return "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
_emit_compare(op, lhs, rhs) {
return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
_is_operator(tok) {
tok = me._trim(tok)
if tok == "+" || tok == "-" || tok == "*" || tok == "/" { return true }
if tok == ">" || tok == "<" || tok == "==" || tok == "!=" { return true }
if tok == ">=" || tok == "<=" { return true }
return false
}
_get_precedence(op) {
if op == "*" || op == "/" { return 2 }
if op == "+" || op == "-" { return 1 }
return 0
}
_is_compare_operator(op) {
return op == ">" || op == "<" || op == "==" || op == "!=" || op == ">=" || op == "<="
}
_find_main_operator(expr) {
// StageA: find top-level binary operator with lowest precedence
expr = me._trim(expr)
local depth = 0
local min_prec = 999
local best_pos = -1
local best_op = ""
local i = expr.length() - 1
loop(i >= 0) {
local ch = expr.substring(i,i+1)
if ch == ")" { depth = depth + 1 }
if ch == "(" { depth = depth - 1 }
if depth == 0 {
// check for two-char operators first
if i > 0 {
local two_char = expr.substring(i-1,i+1)
if me._is_operator(two_char) {
local prec = me._get_precedence(two_char)
if prec < min_prec {
min_prec = prec
best_pos = i-1
best_op = two_char
}
}
}
// check for single-char operators
if me._is_operator(ch) {
local prec = me._get_precedence(ch)
if prec < min_prec {
min_prec = prec
best_pos = i
best_op = ch
}
}
}
i = i - 1
}
if best_pos >= 0 { return best_pos + "," + best_op }
return ""
}
_parse_array(expr) {
// expr like: [1,2,3]
local inner = me._trim(expr.substring(1, expr.length()-1))
if inner == "" { return me._emit_call("array.of", "") }
local out = ""
local i = 0
local n = inner.length()
loop(i <= n) {
// find next comma or end
local j = i
loop(j < n) { local ch = inner.substring(j,j+1) if ch == "," { break } j = j + 1 }
local jj = j
if jj >= n { jj = n }
local tok = me._trim(inner.substring(i, jj))
if tok != "" {
local num = me._parse_number(tok)
if out != "" { out = out + "," }
out = out + me._emit_int(num)
}
i = j + 1
if j >= n { break }
}
return me._emit_call("array.of", out)
}
_parse_map(expr) {
// expr like: {"a":1,"b":2} - StageA: minimal implementation for basic key/value pairs
local inner = me._trim(expr.substring(1, expr.length()-1))
if inner == "" { return me._emit_call("map.of", "") }
local out = ""
local i = 0
local n = inner.length()
loop(i <= n) {
// find next comma or end
local j = i
loop(j < n) {
local ch = inner.substring(j,j+1)
if ch == "," { break }
j = j + 1
}
local jj = j
if jj >= n { jj = n }
local pair = me._trim(inner.substring(i, jj))
if pair != "" {
local colon = pair.indexOf(":")
if colon > 0 {
local key = me._trim(pair.substring(0, colon))
local value = me._trim(pair.substring(colon+1, pair.length()))
local key_json = me._emit_key(key)
local val_json = me._parse_expr_simple(value)
if out != "" { out = out + "," }
out = out + key_json + "," + val_json
}
}
i = j + 1
if j >= n { break }
}
return me._emit_call("map.of", out)
}
_parse_expr_simple(tok) {
// StageA: number, "string", variable, array/map literal, index read a[0], simple binary expressions
tok = me._trim(tok)
// Simple binary operator check (basic implementation)
local plus_pos = tok.indexOf("+")
if plus_pos > 0 {
local lhs = me._trim(tok.substring(0, plus_pos))
local rhs = me._trim(tok.substring(plus_pos + 1, tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
return "{\"type\":\"Binary\",\"op\":\"+\",\"lhs\":" + lhs_json + ",\"rhs\":" + rhs_json + "}"
}
}
local minus_pos = tok.indexOf("-")
if minus_pos > 0 {
local lhs = me._trim(tok.substring(0, minus_pos))
local rhs = me._trim(tok.substring(minus_pos + 1, tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
return "{\"type\":\"Binary\",\"op\":\"-\",\"lhs\":" + lhs_json + ",\"rhs\":" + rhs_json + "}"
}
}
local mul_pos = tok.indexOf("*")
if mul_pos > 0 {
local lhs = me._trim(tok.substring(0, mul_pos))
local rhs = me._trim(tok.substring(mul_pos + 1, tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
return "{\"type\":\"Binary\",\"op\":\"*\",\"lhs\":" + lhs_json + ",\"rhs\":" + rhs_json + "}"
}
}
local div_pos = tok.indexOf("/")
if div_pos > 0 {
local lhs = me._trim(tok.substring(0, div_pos))
local rhs = me._trim(tok.substring(div_pos + 1, tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
return "{\"type\":\"Binary\",\"op\":\"/\",\"lhs\":" + lhs_json + ",\"rhs\":" + rhs_json + "}"
}
}
// Simple comparison operator check (check for == first, then others)
local eq_pos = tok.indexOf("==")
if eq_pos > 0 {
local lhs = me._trim(tok.substring(0, eq_pos))
local rhs = me._trim(tok.substring(eq_pos + 2, tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
return "{\"type\":\"Compare\",\"op\":\"==\",\"lhs\":" + lhs_json + ",\"rhs\":" + rhs_json + "}"
}
}
local gt_pos = tok.indexOf(">")
if gt_pos > 0 {
local lhs = me._trim(tok.substring(0, gt_pos))
local rhs = me._trim(tok.substring(gt_pos + 1, tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
return "{\"type\":\"Compare\",\"op\":\">\",\"lhs\":" + lhs_json + ",\"rhs\":" + rhs_json + "}"
}
}
if tok.length() >= 2 && tok.substring(0,1) == "[" && tok.substring(tok.length()-1,tok.length()) == "]" { return me._parse_array(tok) }
if tok.length() >= 2 && tok.substring(0,1) == "{" && tok.substring(tok.length()-1,tok.length()) == "}" { return me._parse_map(tok) }
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" { return me._emit_str(tok.substring(1,tok.length()-1)) }
// index read: name[KEY]
local lb = tok.indexOf("[")
if lb > 0 && tok.substring(tok.length()-1,tok.length()) == "]" {
local name = me._trim(tok.substring(0,lb))
local idxs = me._trim(tok.substring(lb+1, tok.length()-1))
// Check for string indexing (unsupported in Stage-A)
if name.length() >= 2 && name.substring(0,1) == "\"" && name.substring(name.length()-1,name.length()) == "\"" {
// String indexing not supported: return error diagnostic
return "{\"type\":\"Error\",\"message\":\"String indexing not supported in Stage-A\"}"
}
local kj = me._emit_key(idxs)
return me._emit_method(me._emit_var(name), "get", kj)
}
// number or variable
local n = me._parse_number(tok)
if n != null { return me._emit_int(n) }
return me._emit_var(tok)
}
_parse_stmt(stmt) {
// StageA: local, print, index write, if statements
local s = me._trim(stmt)
if s == "" { return "" }
if me._starts_with(s, "local ") {
local rest = me._trim(s.substring(6, s.length()))
local eq = rest.indexOf("=")
if eq > 0 {
local name = me._trim(rest.substring(0, eq))
local expr = me._trim(rest.substring(eq+1, rest.length()))
local ej = me._parse_expr_simple(expr)
return me._emit_stmt_local(name, ej)
}
}
// print(EXPR)
if me._starts_with(s, "print(") && s.substring(s.length()-1,s.length()) == ")" {
local inner = s.substring(6, s.length()-1)
local ej = me._parse_expr_simple(inner)
return me._emit_stmt_extern_print(ej)
}
// if(condition) { statement }
if me._starts_with(s, "if(") {
local rb = s.indexOf(")")
if rb > 0 {
local after_paren = s.substring(rb+1, s.length())
if me._starts_with(after_paren, "{") {
local cond = me._trim(s.substring(3, rb))
local body_start = rb + 2 // skip ")" and "{"
local body_end = s.length() - 1 // skip "}"
local body = me._trim(s.substring(body_start, body_end))
// Check if body is missing closing parenthesis (common issue)
if me._starts_with(body, "print(") && body.substring(body.length()-1, body.length()) != ")" {
// Add missing closing parenthesis
body = body + ")"
}
// Debug disabled
local cond_json = me._parse_expr_simple(cond)
local stmt_json = ""
// Parse print statements directly in if body
if me._starts_with(body, "print(") && body.substring(body.length()-1, body.length()) == ")" {
local inner = body.substring(6, body.length()-1)
local ej = me._parse_expr_simple(inner)
stmt_json = me._emit_stmt_extern_print(ej)
} else {
// If body doesn't end with semicolon, add it for parsing
if body.length() > 0 && body.substring(body.length()-1, body.length()) != ";" {
body = body + ";"
}
stmt_json = me._parse_stmt(body)
}
// Debug: print stmt_json
// print("DEBUG: stmt_json=" + stmt_json)
if stmt_json == "" {
// Empty statement, return just the if with empty body
return "{\"type\":\"If\",\"cond\":" + cond_json + ",\"then\":[]}"
}
return "{\"type\":\"If\",\"cond\":" + cond_json + ",\"then\":[" + stmt_json + "]}"
}
}
}
// index write: NAME[KEY] = EXPR
local eq = s.indexOf("=")
if eq > 0 {
local lhs = me._trim(s.substring(0, eq))
local rhs = me._trim(s.substring(eq+1, s.length()))
local lb = lhs.indexOf("[")
if lb > 0 && lhs.substring(lhs.length()-1,lhs.length()) == "]" {
local name = me._trim(lhs.substring(0,lb))
local idxs = me._trim(lhs.substring(lb+1, lhs.length()-1))
local kj = me._emit_key(idxs)
local rj = me._parse_expr_simple(rhs)
local args = kj + "," + rj
local mj = me._emit_method(me._emit_var(name), "set", args)
return me._emit_stmt_expr(mj)
}
}
return ""
}
_compile_source_to_json_v0(source) {
local body = me._find_main_body(source)
if body == "" {
// Fallback: return 0
return "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}"
}
// Debug: print body to see what we're parsing
// print("DEBUG: body=" + body)
// split by ';'
local out = ""
local i = 0
local n = body.length()
local start = 0
loop(i <= n) {
if i == n || body.substring(i,i+1) == ";" {
local stmt = me._trim(body.substring(start, i))
if stmt != "" {
local sj = me._parse_stmt(stmt)
if sj != "" {
if out != "" { out = out + "," }
out = out + sj
}
}
start = i + 1
}
i = i + 1
}
if out == "" { out = "{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}" }
return "{\"version\":0,\"kind\":\"Program\",\"body\":[" + out + "]}"
}
_emit_program_json(ret_value) {
// {"version":0,"kind":"Program","body":[{"type":"Return","expr":{"type":"Int","value":ret_value}}]}
local prefix = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":"
local suffix = "}}]}"
print(prefix + ("" + ret_value) + suffix)
}
main(args) {
local flags = me._collect_flags(args)
if flags.stage_b == 1 {
local json = StageBMain._do_compile_stage_b(flags.source, flags.prefer_cfg, flags.stage3, flags.v1_compat)
print(json)
return 0
}
if flags.emit == 1 {
local json = me._compile_source_to_json_v0(flags.source)
print(json)
return 0
}
// Stage-A は --min-json 指定時のみ JSON を出力
if flags.source != null && flags.source != "" {
local json = me._compile_source_to_json_v0(flags.source)
if json == "" { json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}" }
print(json)
return 0
}
// fallback: constant return-int
local ret = flags.ret
if ret == null { ret = 42 }
me._emit_program_json(ret)
return 0
}
}