Files
hakorune/lang/src/compiler/entry/compiler.hako
nyash-codex 5208491e6e hako( compiler): Stage-A enhancements - map literals, binary/compare operators, if statements, and error diagnostics
- Implement map literal parsing with basic key/value pairs: {a:1,b:2}
- Add binary operators (+, -, *, /) with precedence handling
- Add comparison operators (>, <, ==, !=, >=, <=) for if statements
- Implement minimal if statement parsing: if(condition){statement}
- Add string indexing error diagnostic for unsupported Stage-A features
- Create new smoke tests: hako_min_binop_vm.sh and hako_min_if_vm.sh
- Enhance JSON v0 output with proper ExprV0.Binary and ExprV0.Compare structures

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
2025-10-31 22:48:46 +09:00

407 lines
14 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Compiler entry (MVP)
// - When invoked with --min-json, emit minimal Program JSON v0 to stdout
// - Otherwise, act as a silent placeholder (return 0)
static box Main {
_parse_signed_int(raw) {
if raw == null { return null }
local text = "" + raw
if text.length() == 0 { return null }
local sign = 1
local idx = 0
if text.length() > 0 && text.substring(0, 1) == "-" {
sign = -1
idx = 1
}
if idx >= text.length() { return null }
local acc = 0
loop(idx < text.length()) {
local ch = text.substring(idx, idx + 1)
if ch < "0" || ch > "9" { return null }
local digit = "0123456789".indexOf(ch)
if digit < 0 { return null }
acc = acc * 10 + digit
idx = idx + 1
}
return sign * acc
}
_collect_flags(args) {
local flags = { emit: 0, ret: null, source: null }
if args == null { return flags }
local i = 0
local n = args.length()
loop(i < n) {
local token = "" + args.get(i)
if token == "--min-json" {
flags.emit = 1
} else if token == "--source" && i + 1 < n {
flags.source = "" + args.get(i + 1)
i = i + 1
} else if token == "--return-int" && i + 1 < n {
local parsed = me._parse_signed_int(args.get(i + 1))
if parsed != null { flags.ret = parsed }
i = i + 1
}
i = i + 1
}
return flags
}
// ----- Minimal parser utilities (Stage-A) -----
_trim(s) {
if s == null { return "" }
local i = 0
local j = s.length()
loop(i < j) {
local ch = s.substring(i,i+1)
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { i = i + 1 continue }
break
}
loop(j > i) {
local ch = s.substring(j-1,j)
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { j = j - 1 continue }
break
}
return s.substring(i,j)
}
_starts_with(s, pref) { if s.length() < pref.length() { return 0 } return s.substring(0, pref.length()) == pref }
_find_main_body(src) {
if src == null { return "" }
local key = "static method main"
local p = src.indexOf(key)
if p < 0 { return "" }
// find '{' after p without using 2-arg indexOf
local tail = src.substring(p, src.length())
local lb_rel = tail.indexOf("{")
if lb_rel < 0 { return "" }
local lb = p + lb_rel
if lb < 0 { return "" }
// find matching }
local depth = 0
local i = lb
loop(i < src.length()) {
local ch = src.substring(i,i+1)
if ch == "{" { depth = depth + 1 }
if ch == "}" {
depth = depth - 1
if depth == 0 { return src.substring(lb+1, i) }
}
i = i + 1
}
return ""
}
_emit_int(n) { return "{\"type\":\"Int\",\"value\":" + (""+n) + "}" }
_emit_str(t) { return "{\"type\":\"Str\",\"value\":\"" + t + "\"}" }
_emit_var(n) { return "{\"type\":\"Var\",\"name\":\"" + n + "\"}" }
_emit_call(name, args_json) { return "{\"type\":\"Call\",\"name\":\"" + name + "\" ,\"args\":[" + args_json + "]}" }
_emit_method(recv_json, m, args_json) { return "{\"type\":\"Method\",\"recv\":" + recv_json + ",\"method\":\"" + m + "\",\"args\":[" + args_json + "]}" }
_emit_stmt_local(name, expr_json) { return "{\"type\":\"Local\",\"name\":\"" + name + "\" ,\"expr\":" + expr_json + "}" }
_emit_stmt_extern_print(expr_json) { return "{\"type\":\"Extern\",\"iface\":\"env.console\",\"method\":\"log\",\"args\":[" + expr_json + "]}" }
_emit_stmt_expr(expr_json) { return "{\"type\":\"Expr\",\"expr\":" + expr_json + "}" }
_parse_number(tok) { return me._parse_signed_int(tok) }
_emit_key(tok) {
tok = me._trim(tok)
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" {
return me._emit_str(tok.substring(1,tok.length()-1))
}
local n = me._parse_number(tok)
return me._emit_int(n)
}
_emit_binary(op, lhs, rhs) {
return "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
_emit_compare(op, lhs, rhs) {
return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
_is_operator(tok) {
tok = me._trim(tok)
if tok == "+" || tok == "-" || tok == "*" || tok == "/" { return true }
if tok == ">" || tok == "<" || tok == "==" || tok == "!=" { return true }
if tok == ">=" || tok == "<=" { return true }
return false
}
_get_precedence(op) {
if op == "*" || op == "/" { return 2 }
if op == "+" || op == "-" { return 1 }
return 0
}
_is_compare_operator(op) {
return op == ">" || op == "<" || op == "==" || op == "!=" || op == ">=" || op == "<="
}
_find_main_operator(expr) {
// StageA: find top-level binary operator with lowest precedence
expr = me._trim(expr)
local depth = 0
local min_prec = 999
local best_pos = -1
local best_op = ""
local i = expr.length() - 1
loop(i >= 0) {
local ch = expr.substring(i,i+1)
if ch == ")" { depth = depth + 1 }
if ch == "(" { depth = depth - 1 }
if depth == 0 {
// check for two-char operators first
if i > 0 {
local two_char = expr.substring(i-1,i+1)
if me._is_operator(two_char) {
local prec = me._get_precedence(two_char)
if prec < min_prec {
min_prec = prec
best_pos = i-1
best_op = two_char
}
}
}
// check for single-char operators
if me._is_operator(ch) {
local prec = me._get_precedence(ch)
if prec < min_prec {
min_prec = prec
best_pos = i
best_op = ch
}
}
}
i = i - 1
}
if best_pos >= 0 { return best_pos + "," + best_op }
return ""
}
_parse_array(expr) {
// expr like: [1,2,3]
local inner = me._trim(expr.substring(1, expr.length()-1))
if inner == "" { return me._emit_call("array.of", "") }
local out = ""
local i = 0
local n = inner.length()
loop(i <= n) {
// find next comma or end
local j = i
loop(j < n) { local ch = inner.substring(j,j+1) if ch == "," { break } j = j + 1 }
local jj = j
if jj >= n { jj = n }
local tok = me._trim(inner.substring(i, jj))
if tok != "" {
local num = me._parse_number(tok)
if out != "" { out = out + "," }
out = out + me._emit_int(num)
}
i = j + 1
if j >= n { break }
}
return me._emit_call("array.of", out)
}
_parse_map(expr) {
// expr like: {"a":1,"b":2} - StageA: minimal implementation for basic key/value pairs
local inner = me._trim(expr.substring(1, expr.length()-1))
if inner == "" { return me._emit_call("map.of", "") }
local out = ""
local i = 0
local n = inner.length()
loop(i <= n) {
// find next comma or end
local j = i
loop(j < n) {
local ch = inner.substring(j,j+1)
if ch == "," { break }
j = j + 1
}
local jj = j
if jj >= n { jj = n }
local pair = me._trim(inner.substring(i, jj))
if pair != "" {
local colon = pair.indexOf(":")
if colon > 0 {
local key = me._trim(pair.substring(0, colon))
local value = me._trim(pair.substring(colon+1, pair.length()))
local key_json = me._emit_key(key)
local val_json = me._parse_expr_simple(value)
if out != "" { out = out + "," }
out = out + key_json + "," + val_json
}
}
i = j + 1
if j >= n { break }
}
return me._emit_call("map.of", out)
}
_parse_expr_simple(tok) {
// StageA: number, "string", variable, array/map literal, index read a[0], binary/compare expressions
tok = me._trim(tok)
// check for binary/compare operators first
local op_info = me._find_main_operator(tok)
if op_info != "" {
local comma = op_info.indexOf(",")
local pos_str = op_info.substring(0, comma)
local pos = me._parse_number(pos_str)
local op = op_info.substring(comma+1, op_info.length())
if pos != null && pos >= 0 {
local lhs = me._trim(tok.substring(0, pos))
local rhs = me._trim(tok.substring(pos + op.length(), tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
if me._is_compare_operator(op) {
return me._emit_compare(op, lhs_json, rhs_json)
} else {
return me._emit_binary(op, lhs_json, rhs_json)
}
}
}
}
if tok.length() >= 2 && tok.substring(0,1) == "[" && tok.substring(tok.length()-1,tok.length()) == "]" { return me._parse_array(tok) }
if tok.length() >= 2 && tok.substring(0,1) == "{" && tok.substring(tok.length()-1,tok.length()) == "}" { return me._parse_map(tok) }
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" { return me._emit_str(tok.substring(1,tok.length()-1)) }
// index read: name[KEY]
local lb = tok.indexOf("[")
if lb > 0 && tok.substring(tok.length()-1,tok.length()) == "]" {
local name = me._trim(tok.substring(0,lb))
local idxs = me._trim(tok.substring(lb+1, tok.length()-1))
// Check for string indexing (unsupported in Stage-A)
if name.length() >= 2 && name.substring(0,1) == "\"" && name.substring(name.length()-1,name.length()) == "\"" {
// String indexing not supported: return error diagnostic
return "{\"type\":\"Error\",\"message\":\"String indexing not supported in Stage-A\"}"
}
local kj = me._emit_key(idxs)
return me._emit_method(me._emit_var(name), "get", kj)
}
// number or variable
local n = me._parse_number(tok)
if n != null { return me._emit_int(n) }
return me._emit_var(tok)
}
_parse_stmt(stmt) {
// StageA: local, print, index write, if statements
local s = me._trim(stmt)
if s == "" { return "" }
if me._starts_with(s, "local ") {
local rest = me._trim(s.substring(6, s.length()))
local eq = rest.indexOf("=")
if eq > 0 {
local name = me._trim(rest.substring(0, eq))
local expr = me._trim(rest.substring(eq+1, rest.length()))
local ej = me._parse_expr_simple(expr)
return me._emit_stmt_local(name, ej)
}
}
// print(EXPR)
if me._starts_with(s, "print(") && s.substring(s.length()-1,s.length()) == ")" {
local inner = s.substring(6, s.length()-1)
local ej = me._parse_expr_simple(inner)
return me._emit_stmt_extern_print(ej)
}
// if(condition) { statement }
if me._starts_with(s, "if(") {
local rb = s.indexOf(")")
if rb > 0 && me._starts_with(s.substring(rb+1), "{") {
local cond = me._trim(s.substring(3, rb))
local body_start = rb + 2 // skip ")" and "{"
local body_end = s.length() - 1 // skip "}"
local body = me._trim(s.substring(body_start, body_end))
local cond_json = me._parse_expr_simple(cond)
local stmt_json = me._parse_stmt(body)
return "{\"type\":\"If\",\"cond\":" + cond_json + ",\"then\":[" + stmt_json + "]}"
}
}
// index write: NAME[KEY] = EXPR
local eq = s.indexOf("=")
if eq > 0 {
local lhs = me._trim(s.substring(0, eq))
local rhs = me._trim(s.substring(eq+1, s.length()))
local lb = lhs.indexOf("[")
if lb > 0 && lhs.substring(lhs.length()-1,lhs.length()) == "]" {
local name = me._trim(lhs.substring(0,lb))
local idxs = me._trim(lhs.substring(lb+1, lhs.length()-1))
local kj = me._emit_key(idxs)
local rj = me._parse_expr_simple(rhs)
local args = kj + "," + rj
local mj = me._emit_method(me._emit_var(name), "set", args)
return me._emit_stmt_expr(mj)
}
}
return ""
}
_compile_source_to_json_v0(source) {
local body = me._find_main_body(source)
if body == "" {
// Fallback: return 0
return "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}"
}
// Debug: print body to see what we're parsing
// print("DEBUG: body=" + body)
// split by ';'
local out = ""
local i = 0
local n = body.length()
local start = 0
loop(i <= n) {
if i == n || body.substring(i,i+1) == ";" {
local stmt = me._trim(body.substring(start, i))
if stmt != "" {
local sj = me._parse_stmt(stmt)
if sj != "" {
if out != "" { out = out + "," }
out = out + sj
}
}
start = i + 1
}
i = i + 1
}
if out == "" { out = "{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}" }
return "{\"version\":0,\"kind\":\"Program\",\"body\":[" + out + "]}"
}
_emit_program_json(ret_value) {
// {"version":0,"kind":"Program","body":[{"type":"Return","expr":{"type":"Int","value":ret_value}}]}
local prefix = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":"
local suffix = "}}]}"
print(prefix + ("" + ret_value) + suffix)
}
main(args) {
local flags = me._collect_flags(args)
if flags.emit == 1 {
local json = me._compile_source_to_json_v0(flags.source)
print(json)
return
}
// Stage-A は --min-json 指定時のみ JSON を出力
if flags.source != null && flags.source != "" {
local json = me._compile_source_to_json_v0(flags.source)
if json == "" { json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}" }
print(json)
return 0
}
// fallback: constant return-int
local ret = flags.ret
if ret == null { ret = 42 }
me._emit_program_json(ret)
return 0
}
}