- Implement map literal parsing with basic key/value pairs: {a:1,b:2}
- Add binary operators (+, -, *, /) with precedence handling
- Add comparison operators (>, <, ==, !=, >=, <=) for if statements
- Implement minimal if statement parsing: if(condition){statement}
- Add string indexing error diagnostic for unsupported Stage-A features
- Create new smoke tests: hako_min_binop_vm.sh and hako_min_if_vm.sh
- Enhance JSON v0 output with proper ExprV0.Binary and ExprV0.Compare structures
Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
407 lines
14 KiB
Plaintext
407 lines
14 KiB
Plaintext
// Compiler entry (MVP)
|
||
// - When invoked with --min-json, emit minimal Program JSON v0 to stdout
|
||
// - Otherwise, act as a silent placeholder (return 0)
|
||
|
||
static box Main {
|
||
_parse_signed_int(raw) {
|
||
if raw == null { return null }
|
||
local text = "" + raw
|
||
if text.length() == 0 { return null }
|
||
local sign = 1
|
||
local idx = 0
|
||
if text.length() > 0 && text.substring(0, 1) == "-" {
|
||
sign = -1
|
||
idx = 1
|
||
}
|
||
if idx >= text.length() { return null }
|
||
local acc = 0
|
||
loop(idx < text.length()) {
|
||
local ch = text.substring(idx, idx + 1)
|
||
if ch < "0" || ch > "9" { return null }
|
||
local digit = "0123456789".indexOf(ch)
|
||
if digit < 0 { return null }
|
||
acc = acc * 10 + digit
|
||
idx = idx + 1
|
||
}
|
||
return sign * acc
|
||
}
|
||
|
||
_collect_flags(args) {
|
||
local flags = { emit: 0, ret: null, source: null }
|
||
if args == null { return flags }
|
||
|
||
local i = 0
|
||
local n = args.length()
|
||
loop(i < n) {
|
||
local token = "" + args.get(i)
|
||
if token == "--min-json" {
|
||
flags.emit = 1
|
||
} else if token == "--source" && i + 1 < n {
|
||
flags.source = "" + args.get(i + 1)
|
||
i = i + 1
|
||
} else if token == "--return-int" && i + 1 < n {
|
||
local parsed = me._parse_signed_int(args.get(i + 1))
|
||
if parsed != null { flags.ret = parsed }
|
||
i = i + 1
|
||
}
|
||
i = i + 1
|
||
}
|
||
return flags
|
||
}
|
||
|
||
// ----- Minimal parser utilities (Stage-A) -----
|
||
_trim(s) {
|
||
if s == null { return "" }
|
||
local i = 0
|
||
local j = s.length()
|
||
loop(i < j) {
|
||
local ch = s.substring(i,i+1)
|
||
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { i = i + 1 continue }
|
||
break
|
||
}
|
||
loop(j > i) {
|
||
local ch = s.substring(j-1,j)
|
||
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { j = j - 1 continue }
|
||
break
|
||
}
|
||
return s.substring(i,j)
|
||
}
|
||
|
||
_starts_with(s, pref) { if s.length() < pref.length() { return 0 } return s.substring(0, pref.length()) == pref }
|
||
|
||
_find_main_body(src) {
|
||
if src == null { return "" }
|
||
local key = "static method main"
|
||
local p = src.indexOf(key)
|
||
if p < 0 { return "" }
|
||
// find '{' after p without using 2-arg indexOf
|
||
local tail = src.substring(p, src.length())
|
||
local lb_rel = tail.indexOf("{")
|
||
if lb_rel < 0 { return "" }
|
||
local lb = p + lb_rel
|
||
if lb < 0 { return "" }
|
||
// find matching }
|
||
local depth = 0
|
||
local i = lb
|
||
loop(i < src.length()) {
|
||
local ch = src.substring(i,i+1)
|
||
if ch == "{" { depth = depth + 1 }
|
||
if ch == "}" {
|
||
depth = depth - 1
|
||
if depth == 0 { return src.substring(lb+1, i) }
|
||
}
|
||
i = i + 1
|
||
}
|
||
return ""
|
||
}
|
||
|
||
_emit_int(n) { return "{\"type\":\"Int\",\"value\":" + (""+n) + "}" }
|
||
_emit_str(t) { return "{\"type\":\"Str\",\"value\":\"" + t + "\"}" }
|
||
_emit_var(n) { return "{\"type\":\"Var\",\"name\":\"" + n + "\"}" }
|
||
_emit_call(name, args_json) { return "{\"type\":\"Call\",\"name\":\"" + name + "\" ,\"args\":[" + args_json + "]}" }
|
||
_emit_method(recv_json, m, args_json) { return "{\"type\":\"Method\",\"recv\":" + recv_json + ",\"method\":\"" + m + "\",\"args\":[" + args_json + "]}" }
|
||
_emit_stmt_local(name, expr_json) { return "{\"type\":\"Local\",\"name\":\"" + name + "\" ,\"expr\":" + expr_json + "}" }
|
||
_emit_stmt_extern_print(expr_json) { return "{\"type\":\"Extern\",\"iface\":\"env.console\",\"method\":\"log\",\"args\":[" + expr_json + "]}" }
|
||
_emit_stmt_expr(expr_json) { return "{\"type\":\"Expr\",\"expr\":" + expr_json + "}" }
|
||
|
||
_parse_number(tok) { return me._parse_signed_int(tok) }
|
||
_emit_key(tok) {
|
||
tok = me._trim(tok)
|
||
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" {
|
||
return me._emit_str(tok.substring(1,tok.length()-1))
|
||
}
|
||
local n = me._parse_number(tok)
|
||
return me._emit_int(n)
|
||
}
|
||
|
||
_emit_binary(op, lhs, rhs) {
|
||
return "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
|
||
}
|
||
|
||
_emit_compare(op, lhs, rhs) {
|
||
return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
|
||
}
|
||
|
||
_is_operator(tok) {
|
||
tok = me._trim(tok)
|
||
if tok == "+" || tok == "-" || tok == "*" || tok == "/" { return true }
|
||
if tok == ">" || tok == "<" || tok == "==" || tok == "!=" { return true }
|
||
if tok == ">=" || tok == "<=" { return true }
|
||
return false
|
||
}
|
||
|
||
_get_precedence(op) {
|
||
if op == "*" || op == "/" { return 2 }
|
||
if op == "+" || op == "-" { return 1 }
|
||
return 0
|
||
}
|
||
|
||
_is_compare_operator(op) {
|
||
return op == ">" || op == "<" || op == "==" || op == "!=" || op == ">=" || op == "<="
|
||
}
|
||
|
||
_find_main_operator(expr) {
|
||
// Stage‑A: find top-level binary operator with lowest precedence
|
||
expr = me._trim(expr)
|
||
local depth = 0
|
||
local min_prec = 999
|
||
local best_pos = -1
|
||
local best_op = ""
|
||
local i = expr.length() - 1
|
||
loop(i >= 0) {
|
||
local ch = expr.substring(i,i+1)
|
||
if ch == ")" { depth = depth + 1 }
|
||
if ch == "(" { depth = depth - 1 }
|
||
if depth == 0 {
|
||
// check for two-char operators first
|
||
if i > 0 {
|
||
local two_char = expr.substring(i-1,i+1)
|
||
if me._is_operator(two_char) {
|
||
local prec = me._get_precedence(two_char)
|
||
if prec < min_prec {
|
||
min_prec = prec
|
||
best_pos = i-1
|
||
best_op = two_char
|
||
}
|
||
}
|
||
}
|
||
// check for single-char operators
|
||
if me._is_operator(ch) {
|
||
local prec = me._get_precedence(ch)
|
||
if prec < min_prec {
|
||
min_prec = prec
|
||
best_pos = i
|
||
best_op = ch
|
||
}
|
||
}
|
||
}
|
||
i = i - 1
|
||
}
|
||
if best_pos >= 0 { return best_pos + "," + best_op }
|
||
return ""
|
||
}
|
||
|
||
_parse_array(expr) {
|
||
// expr like: [1,2,3]
|
||
local inner = me._trim(expr.substring(1, expr.length()-1))
|
||
if inner == "" { return me._emit_call("array.of", "") }
|
||
local out = ""
|
||
local i = 0
|
||
local n = inner.length()
|
||
loop(i <= n) {
|
||
// find next comma or end
|
||
local j = i
|
||
loop(j < n) { local ch = inner.substring(j,j+1) if ch == "," { break } j = j + 1 }
|
||
local jj = j
|
||
if jj >= n { jj = n }
|
||
local tok = me._trim(inner.substring(i, jj))
|
||
if tok != "" {
|
||
local num = me._parse_number(tok)
|
||
if out != "" { out = out + "," }
|
||
out = out + me._emit_int(num)
|
||
}
|
||
i = j + 1
|
||
if j >= n { break }
|
||
}
|
||
return me._emit_call("array.of", out)
|
||
}
|
||
|
||
_parse_map(expr) {
|
||
// expr like: {"a":1,"b":2} - Stage‑A: minimal implementation for basic key/value pairs
|
||
local inner = me._trim(expr.substring(1, expr.length()-1))
|
||
if inner == "" { return me._emit_call("map.of", "") }
|
||
|
||
local out = ""
|
||
local i = 0
|
||
local n = inner.length()
|
||
loop(i <= n) {
|
||
// find next comma or end
|
||
local j = i
|
||
loop(j < n) {
|
||
local ch = inner.substring(j,j+1)
|
||
if ch == "," { break }
|
||
j = j + 1
|
||
}
|
||
local jj = j
|
||
if jj >= n { jj = n }
|
||
local pair = me._trim(inner.substring(i, jj))
|
||
if pair != "" {
|
||
local colon = pair.indexOf(":")
|
||
if colon > 0 {
|
||
local key = me._trim(pair.substring(0, colon))
|
||
local value = me._trim(pair.substring(colon+1, pair.length()))
|
||
local key_json = me._emit_key(key)
|
||
local val_json = me._parse_expr_simple(value)
|
||
if out != "" { out = out + "," }
|
||
out = out + key_json + "," + val_json
|
||
}
|
||
}
|
||
i = j + 1
|
||
if j >= n { break }
|
||
}
|
||
return me._emit_call("map.of", out)
|
||
}
|
||
|
||
_parse_expr_simple(tok) {
|
||
// Stage‑A: number, "string", variable, array/map literal, index read a[0], binary/compare expressions
|
||
tok = me._trim(tok)
|
||
|
||
// check for binary/compare operators first
|
||
local op_info = me._find_main_operator(tok)
|
||
if op_info != "" {
|
||
local comma = op_info.indexOf(",")
|
||
local pos_str = op_info.substring(0, comma)
|
||
local pos = me._parse_number(pos_str)
|
||
local op = op_info.substring(comma+1, op_info.length())
|
||
if pos != null && pos >= 0 {
|
||
local lhs = me._trim(tok.substring(0, pos))
|
||
local rhs = me._trim(tok.substring(pos + op.length(), tok.length()))
|
||
if lhs != "" && rhs != "" {
|
||
local lhs_json = me._parse_expr_simple(lhs)
|
||
local rhs_json = me._parse_expr_simple(rhs)
|
||
if me._is_compare_operator(op) {
|
||
return me._emit_compare(op, lhs_json, rhs_json)
|
||
} else {
|
||
return me._emit_binary(op, lhs_json, rhs_json)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if tok.length() >= 2 && tok.substring(0,1) == "[" && tok.substring(tok.length()-1,tok.length()) == "]" { return me._parse_array(tok) }
|
||
if tok.length() >= 2 && tok.substring(0,1) == "{" && tok.substring(tok.length()-1,tok.length()) == "}" { return me._parse_map(tok) }
|
||
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" { return me._emit_str(tok.substring(1,tok.length()-1)) }
|
||
// index read: name[KEY]
|
||
local lb = tok.indexOf("[")
|
||
if lb > 0 && tok.substring(tok.length()-1,tok.length()) == "]" {
|
||
local name = me._trim(tok.substring(0,lb))
|
||
local idxs = me._trim(tok.substring(lb+1, tok.length()-1))
|
||
// Check for string indexing (unsupported in Stage-A)
|
||
if name.length() >= 2 && name.substring(0,1) == "\"" && name.substring(name.length()-1,name.length()) == "\"" {
|
||
// String indexing not supported: return error diagnostic
|
||
return "{\"type\":\"Error\",\"message\":\"String indexing not supported in Stage-A\"}"
|
||
}
|
||
local kj = me._emit_key(idxs)
|
||
return me._emit_method(me._emit_var(name), "get", kj)
|
||
}
|
||
// number or variable
|
||
local n = me._parse_number(tok)
|
||
if n != null { return me._emit_int(n) }
|
||
return me._emit_var(tok)
|
||
}
|
||
|
||
_parse_stmt(stmt) {
|
||
// Stage‑A: local, print, index write, if statements
|
||
local s = me._trim(stmt)
|
||
if s == "" { return "" }
|
||
if me._starts_with(s, "local ") {
|
||
local rest = me._trim(s.substring(6, s.length()))
|
||
local eq = rest.indexOf("=")
|
||
if eq > 0 {
|
||
local name = me._trim(rest.substring(0, eq))
|
||
local expr = me._trim(rest.substring(eq+1, rest.length()))
|
||
local ej = me._parse_expr_simple(expr)
|
||
return me._emit_stmt_local(name, ej)
|
||
}
|
||
}
|
||
// print(EXPR)
|
||
if me._starts_with(s, "print(") && s.substring(s.length()-1,s.length()) == ")" {
|
||
local inner = s.substring(6, s.length()-1)
|
||
local ej = me._parse_expr_simple(inner)
|
||
return me._emit_stmt_extern_print(ej)
|
||
}
|
||
// if(condition) { statement }
|
||
if me._starts_with(s, "if(") {
|
||
local rb = s.indexOf(")")
|
||
if rb > 0 && me._starts_with(s.substring(rb+1), "{") {
|
||
local cond = me._trim(s.substring(3, rb))
|
||
local body_start = rb + 2 // skip ")" and "{"
|
||
local body_end = s.length() - 1 // skip "}"
|
||
local body = me._trim(s.substring(body_start, body_end))
|
||
|
||
local cond_json = me._parse_expr_simple(cond)
|
||
local stmt_json = me._parse_stmt(body)
|
||
|
||
return "{\"type\":\"If\",\"cond\":" + cond_json + ",\"then\":[" + stmt_json + "]}"
|
||
}
|
||
}
|
||
// index write: NAME[KEY] = EXPR
|
||
local eq = s.indexOf("=")
|
||
if eq > 0 {
|
||
local lhs = me._trim(s.substring(0, eq))
|
||
local rhs = me._trim(s.substring(eq+1, s.length()))
|
||
local lb = lhs.indexOf("[")
|
||
if lb > 0 && lhs.substring(lhs.length()-1,lhs.length()) == "]" {
|
||
local name = me._trim(lhs.substring(0,lb))
|
||
local idxs = me._trim(lhs.substring(lb+1, lhs.length()-1))
|
||
local kj = me._emit_key(idxs)
|
||
local rj = me._parse_expr_simple(rhs)
|
||
local args = kj + "," + rj
|
||
local mj = me._emit_method(me._emit_var(name), "set", args)
|
||
return me._emit_stmt_expr(mj)
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
_compile_source_to_json_v0(source) {
|
||
local body = me._find_main_body(source)
|
||
if body == "" {
|
||
// Fallback: return 0
|
||
return "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}"
|
||
}
|
||
|
||
// Debug: print body to see what we're parsing
|
||
// print("DEBUG: body=" + body)
|
||
// split by ';'
|
||
local out = ""
|
||
local i = 0
|
||
local n = body.length()
|
||
local start = 0
|
||
loop(i <= n) {
|
||
if i == n || body.substring(i,i+1) == ";" {
|
||
local stmt = me._trim(body.substring(start, i))
|
||
if stmt != "" {
|
||
local sj = me._parse_stmt(stmt)
|
||
if sj != "" {
|
||
if out != "" { out = out + "," }
|
||
out = out + sj
|
||
}
|
||
}
|
||
start = i + 1
|
||
}
|
||
i = i + 1
|
||
}
|
||
if out == "" { out = "{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}" }
|
||
return "{\"version\":0,\"kind\":\"Program\",\"body\":[" + out + "]}"
|
||
}
|
||
|
||
_emit_program_json(ret_value) {
|
||
// {"version":0,"kind":"Program","body":[{"type":"Return","expr":{"type":"Int","value":ret_value}}]}
|
||
local prefix = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":"
|
||
local suffix = "}}]}"
|
||
print(prefix + ("" + ret_value) + suffix)
|
||
}
|
||
|
||
main(args) {
|
||
local flags = me._collect_flags(args)
|
||
if flags.emit == 1 {
|
||
local json = me._compile_source_to_json_v0(flags.source)
|
||
print(json)
|
||
return
|
||
}
|
||
// Stage-A は --min-json 指定時のみ JSON を出力
|
||
if flags.source != null && flags.source != "" {
|
||
local json = me._compile_source_to_json_v0(flags.source)
|
||
if json == "" { json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}" }
|
||
print(json)
|
||
return 0
|
||
}
|
||
// fallback: constant return-int
|
||
local ret = flags.ret
|
||
if ret == null { ret = 42 }
|
||
me._emit_program_json(ret)
|
||
return 0
|
||
}
|
||
}
|