hako( compiler): Stage-A enhancements - map literals, binary/compare operators, if statements, and error diagnostics

- Implement map literal parsing with basic key/value pairs: {a:1,b:2}
- Add binary operators (+, -, *, /) with precedence handling
- Add comparison operators (>, <, ==, !=, >=, <=) for if statements
- Implement minimal if statement parsing: if(condition){statement}
- Add string indexing error diagnostic for unsupported Stage-A features
- Create new smoke tests: hako_min_binop_vm.sh and hako_min_if_vm.sh
- Enhance JSON v0 output with proper ExprV0.Binary and ExprV0.Compare structures

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
nyash-codex
2025-10-31 22:48:46 +09:00
parent e5f697eb22
commit 5208491e6e
3 changed files with 643 additions and 14 deletions

View File

@ -2,24 +2,405 @@
// - When invoked with --min-json, emit minimal Program JSON v0 to stdout
// - Otherwise, act as a silent placeholder (return 0)
static box CompilerEntry {
main(args) {
// Detect --min-json flag
local emit = 0
if args != null {
local n = args.length()
local i = 0
loop(i < n) {
local a = args.get(i)
// Robust compare: coerce to string before equality
local s = "" + a
if s == "--min-json" { emit = 1 break }
static box Main {
_parse_signed_int(raw) {
if raw == null { return null }
local text = "" + raw
if text.length() == 0 { return null }
local sign = 1
local idx = 0
if text.length() > 0 && text.substring(0, 1) == "-" {
sign = -1
idx = 1
}
if idx >= text.length() { return null }
local acc = 0
loop(idx < text.length()) {
local ch = text.substring(idx, idx + 1)
if ch < "0" || ch > "9" { return null }
local digit = "0123456789".indexOf(ch)
if digit < 0 { return null }
acc = acc * 10 + digit
idx = idx + 1
}
return sign * acc
}
_collect_flags(args) {
local flags = { emit: 0, ret: null, source: null }
if args == null { return flags }
local i = 0
local n = args.length()
loop(i < n) {
local token = "" + args.get(i)
if token == "--min-json" {
flags.emit = 1
} else if token == "--source" && i + 1 < n {
flags.source = "" + args.get(i + 1)
i = i + 1
} else if token == "--return-int" && i + 1 < n {
local parsed = me._parse_signed_int(args.get(i + 1))
if parsed != null { flags.ret = parsed }
i = i + 1
}
i = i + 1
}
if emit == 1 {
print("{\\\"version\\\":0,\\\"kind\\\":\\\"Program\\\",\\\"body\\\":[{\\\"type\\\":\\\"Return\\\",\\\"expr\\\":{\\\"type\\\":\\\"Int\\\",\\\"value\\\":7}}]}")
return flags
}
// ----- Minimal parser utilities (Stage-A) -----
_trim(s) {
if s == null { return "" }
local i = 0
local j = s.length()
loop(i < j) {
local ch = s.substring(i,i+1)
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { i = i + 1 continue }
break
}
loop(j > i) {
local ch = s.substring(j-1,j)
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" { j = j - 1 continue }
break
}
return s.substring(i,j)
}
_starts_with(s, pref) { if s.length() < pref.length() { return 0 } return s.substring(0, pref.length()) == pref }
_find_main_body(src) {
if src == null { return "" }
local key = "static method main"
local p = src.indexOf(key)
if p < 0 { return "" }
// find '{' after p without using 2-arg indexOf
local tail = src.substring(p, src.length())
local lb_rel = tail.indexOf("{")
if lb_rel < 0 { return "" }
local lb = p + lb_rel
if lb < 0 { return "" }
// find matching }
local depth = 0
local i = lb
loop(i < src.length()) {
local ch = src.substring(i,i+1)
if ch == "{" { depth = depth + 1 }
if ch == "}" {
depth = depth - 1
if depth == 0 { return src.substring(lb+1, i) }
}
i = i + 1
}
return ""
}
_emit_int(n) { return "{\"type\":\"Int\",\"value\":" + (""+n) + "}" }
_emit_str(t) { return "{\"type\":\"Str\",\"value\":\"" + t + "\"}" }
_emit_var(n) { return "{\"type\":\"Var\",\"name\":\"" + n + "\"}" }
_emit_call(name, args_json) { return "{\"type\":\"Call\",\"name\":\"" + name + "\" ,\"args\":[" + args_json + "]}" }
_emit_method(recv_json, m, args_json) { return "{\"type\":\"Method\",\"recv\":" + recv_json + ",\"method\":\"" + m + "\",\"args\":[" + args_json + "]}" }
_emit_stmt_local(name, expr_json) { return "{\"type\":\"Local\",\"name\":\"" + name + "\" ,\"expr\":" + expr_json + "}" }
_emit_stmt_extern_print(expr_json) { return "{\"type\":\"Extern\",\"iface\":\"env.console\",\"method\":\"log\",\"args\":[" + expr_json + "]}" }
_emit_stmt_expr(expr_json) { return "{\"type\":\"Expr\",\"expr\":" + expr_json + "}" }
_parse_number(tok) { return me._parse_signed_int(tok) }
_emit_key(tok) {
tok = me._trim(tok)
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" {
return me._emit_str(tok.substring(1,tok.length()-1))
}
local n = me._parse_number(tok)
return me._emit_int(n)
}
_emit_binary(op, lhs, rhs) {
return "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
_emit_compare(op, lhs, rhs) {
return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
_is_operator(tok) {
tok = me._trim(tok)
if tok == "+" || tok == "-" || tok == "*" || tok == "/" { return true }
if tok == ">" || tok == "<" || tok == "==" || tok == "!=" { return true }
if tok == ">=" || tok == "<=" { return true }
return false
}
_get_precedence(op) {
if op == "*" || op == "/" { return 2 }
if op == "+" || op == "-" { return 1 }
return 0
}
_is_compare_operator(op) {
return op == ">" || op == "<" || op == "==" || op == "!=" || op == ">=" || op == "<="
}
_find_main_operator(expr) {
// StageA: find top-level binary operator with lowest precedence
expr = me._trim(expr)
local depth = 0
local min_prec = 999
local best_pos = -1
local best_op = ""
local i = expr.length() - 1
loop(i >= 0) {
local ch = expr.substring(i,i+1)
if ch == ")" { depth = depth + 1 }
if ch == "(" { depth = depth - 1 }
if depth == 0 {
// check for two-char operators first
if i > 0 {
local two_char = expr.substring(i-1,i+1)
if me._is_operator(two_char) {
local prec = me._get_precedence(two_char)
if prec < min_prec {
min_prec = prec
best_pos = i-1
best_op = two_char
}
}
}
// check for single-char operators
if me._is_operator(ch) {
local prec = me._get_precedence(ch)
if prec < min_prec {
min_prec = prec
best_pos = i
best_op = ch
}
}
}
i = i - 1
}
if best_pos >= 0 { return best_pos + "," + best_op }
return ""
}
_parse_array(expr) {
// expr like: [1,2,3]
local inner = me._trim(expr.substring(1, expr.length()-1))
if inner == "" { return me._emit_call("array.of", "") }
local out = ""
local i = 0
local n = inner.length()
loop(i <= n) {
// find next comma or end
local j = i
loop(j < n) { local ch = inner.substring(j,j+1) if ch == "," { break } j = j + 1 }
local jj = j
if jj >= n { jj = n }
local tok = me._trim(inner.substring(i, jj))
if tok != "" {
local num = me._parse_number(tok)
if out != "" { out = out + "," }
out = out + me._emit_int(num)
}
i = j + 1
if j >= n { break }
}
return me._emit_call("array.of", out)
}
_parse_map(expr) {
// expr like: {"a":1,"b":2} - StageA: minimal implementation for basic key/value pairs
local inner = me._trim(expr.substring(1, expr.length()-1))
if inner == "" { return me._emit_call("map.of", "") }
local out = ""
local i = 0
local n = inner.length()
loop(i <= n) {
// find next comma or end
local j = i
loop(j < n) {
local ch = inner.substring(j,j+1)
if ch == "," { break }
j = j + 1
}
local jj = j
if jj >= n { jj = n }
local pair = me._trim(inner.substring(i, jj))
if pair != "" {
local colon = pair.indexOf(":")
if colon > 0 {
local key = me._trim(pair.substring(0, colon))
local value = me._trim(pair.substring(colon+1, pair.length()))
local key_json = me._emit_key(key)
local val_json = me._parse_expr_simple(value)
if out != "" { out = out + "," }
out = out + key_json + "," + val_json
}
}
i = j + 1
if j >= n { break }
}
return me._emit_call("map.of", out)
}
_parse_expr_simple(tok) {
// StageA: number, "string", variable, array/map literal, index read a[0], binary/compare expressions
tok = me._trim(tok)
// check for binary/compare operators first
local op_info = me._find_main_operator(tok)
if op_info != "" {
local comma = op_info.indexOf(",")
local pos_str = op_info.substring(0, comma)
local pos = me._parse_number(pos_str)
local op = op_info.substring(comma+1, op_info.length())
if pos != null && pos >= 0 {
local lhs = me._trim(tok.substring(0, pos))
local rhs = me._trim(tok.substring(pos + op.length(), tok.length()))
if lhs != "" && rhs != "" {
local lhs_json = me._parse_expr_simple(lhs)
local rhs_json = me._parse_expr_simple(rhs)
if me._is_compare_operator(op) {
return me._emit_compare(op, lhs_json, rhs_json)
} else {
return me._emit_binary(op, lhs_json, rhs_json)
}
}
}
}
if tok.length() >= 2 && tok.substring(0,1) == "[" && tok.substring(tok.length()-1,tok.length()) == "]" { return me._parse_array(tok) }
if tok.length() >= 2 && tok.substring(0,1) == "{" && tok.substring(tok.length()-1,tok.length()) == "}" { return me._parse_map(tok) }
if tok.length() >= 2 && tok.substring(0,1) == "\"" && tok.substring(tok.length()-1,tok.length()) == "\"" { return me._emit_str(tok.substring(1,tok.length()-1)) }
// index read: name[KEY]
local lb = tok.indexOf("[")
if lb > 0 && tok.substring(tok.length()-1,tok.length()) == "]" {
local name = me._trim(tok.substring(0,lb))
local idxs = me._trim(tok.substring(lb+1, tok.length()-1))
// Check for string indexing (unsupported in Stage-A)
if name.length() >= 2 && name.substring(0,1) == "\"" && name.substring(name.length()-1,name.length()) == "\"" {
// String indexing not supported: return error diagnostic
return "{\"type\":\"Error\",\"message\":\"String indexing not supported in Stage-A\"}"
}
local kj = me._emit_key(idxs)
return me._emit_method(me._emit_var(name), "get", kj)
}
// number or variable
local n = me._parse_number(tok)
if n != null { return me._emit_int(n) }
return me._emit_var(tok)
}
_parse_stmt(stmt) {
// StageA: local, print, index write, if statements
local s = me._trim(stmt)
if s == "" { return "" }
if me._starts_with(s, "local ") {
local rest = me._trim(s.substring(6, s.length()))
local eq = rest.indexOf("=")
if eq > 0 {
local name = me._trim(rest.substring(0, eq))
local expr = me._trim(rest.substring(eq+1, rest.length()))
local ej = me._parse_expr_simple(expr)
return me._emit_stmt_local(name, ej)
}
}
// print(EXPR)
if me._starts_with(s, "print(") && s.substring(s.length()-1,s.length()) == ")" {
local inner = s.substring(6, s.length()-1)
local ej = me._parse_expr_simple(inner)
return me._emit_stmt_extern_print(ej)
}
// if(condition) { statement }
if me._starts_with(s, "if(") {
local rb = s.indexOf(")")
if rb > 0 && me._starts_with(s.substring(rb+1), "{") {
local cond = me._trim(s.substring(3, rb))
local body_start = rb + 2 // skip ")" and "{"
local body_end = s.length() - 1 // skip "}"
local body = me._trim(s.substring(body_start, body_end))
local cond_json = me._parse_expr_simple(cond)
local stmt_json = me._parse_stmt(body)
return "{\"type\":\"If\",\"cond\":" + cond_json + ",\"then\":[" + stmt_json + "]}"
}
}
// index write: NAME[KEY] = EXPR
local eq = s.indexOf("=")
if eq > 0 {
local lhs = me._trim(s.substring(0, eq))
local rhs = me._trim(s.substring(eq+1, s.length()))
local lb = lhs.indexOf("[")
if lb > 0 && lhs.substring(lhs.length()-1,lhs.length()) == "]" {
local name = me._trim(lhs.substring(0,lb))
local idxs = me._trim(lhs.substring(lb+1, lhs.length()-1))
local kj = me._emit_key(idxs)
local rj = me._parse_expr_simple(rhs)
local args = kj + "," + rj
local mj = me._emit_method(me._emit_var(name), "set", args)
return me._emit_stmt_expr(mj)
}
}
return ""
}
_compile_source_to_json_v0(source) {
local body = me._find_main_body(source)
if body == "" {
// Fallback: return 0
return "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}"
}
// Debug: print body to see what we're parsing
// print("DEBUG: body=" + body)
// split by ';'
local out = ""
local i = 0
local n = body.length()
local start = 0
loop(i <= n) {
if i == n || body.substring(i,i+1) == ";" {
local stmt = me._trim(body.substring(start, i))
if stmt != "" {
local sj = me._parse_stmt(stmt)
if sj != "" {
if out != "" { out = out + "," }
out = out + sj
}
}
start = i + 1
}
i = i + 1
}
if out == "" { out = "{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}" }
return "{\"version\":0,\"kind\":\"Program\",\"body\":[" + out + "]}"
}
_emit_program_json(ret_value) {
// {"version":0,"kind":"Program","body":[{"type":"Return","expr":{"type":"Int","value":ret_value}}]}
local prefix = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":"
local suffix = "}}]}"
print(prefix + ("" + ret_value) + suffix)
}
main(args) {
local flags = me._collect_flags(args)
if flags.emit == 1 {
local json = me._compile_source_to_json_v0(flags.source)
print(json)
return
}
// Stage-A は --min-json 指定時のみ JSON を出力
if flags.source != null && flags.source != "" {
local json = me._compile_source_to_json_v0(flags.source)
if json == "" { json = "{\"version\":0,\"kind\":\"Program\",\"body\":[{\"type\":\"Return\",\"expr\":{\"type\":\"Int\",\"value\":0}}]}" }
print(json)
return 0
}
// fallback: constant return-int
local ret = flags.ret
if ret == null { ret = 42 }
me._emit_program_json(ret)
return 0
}
}

View File

@ -0,0 +1,124 @@
#!/bin/bash
# hako_min_binop_vm.sh — Hako minimum binary operators canary (opt-in)
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# Try to detect repo root via git; fallback by climbing to tools directory
if ROOT_GIT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null); then
ROOT="$ROOT_GIT"
else
ROOT="$(cd "$SCRIPT_DIR/../../../../.." && pwd)"
fi
HAKO_BIN_DEFAULT="$ROOT/tools/bin/hako"
HAKO_BIN="${HAKO_BIN:-$HAKO_BIN_DEFAULT}"
warn() { echo -e "[WARN] $*" >&2; }
info() { echo -e "[INFO] $*" >&2; }
fail() { echo -e "[FAIL] $*" >&2; return 1; }
pass() { echo -e "[PASS] $*" >&2; }
require_hako() {
if [ ! -x "$HAKO_BIN" ]; then
warn "Hako binary not found: $HAKO_BIN (set HAKO_BIN to override)"
warn "Skipping Hako binop canaries"
exit 0
fi
}
# Compile Hako code to MIR JSON v0 via Selfhost Compiler
hako_compile_to_mir() {
local code="$1"
local hako_tmp="/tmp/hako_binop_$$.hako"
local json_out="/tmp/hako_binop_$$.mir.json"
printf "%s\n" "$code" > "$hako_tmp"
# Selfhost Compiler: Hako → JSON v0 (capture noise then extract JSON line)
local raw="/tmp/hako_binop_raw_$$.txt"
NYASH_PARSER_ALLOW_SEMICOLON=1 \
NYASH_SYNTAX_SUGAR_LEVEL=full \
NYASH_ENABLE_ARRAY_LITERAL=1 \
NYASH_QUIET=1 HAKO_QUIET=1 NYASH_CLI_VERBOSE=0 \
"$ROOT/target/release/nyash" --backend vm \
"$ROOT/lang/src/compiler/entry/compiler.hako" -- --min-json --source "$(cat "$hako_tmp")" > "$raw" 2>&1
awk '/"version":0/ && /"kind":"Program"/ {print; exit}' "$raw" > "$json_out"
rm -f "$raw"
local rc=$?
rm -f "$hako_tmp"
if [ $rc -ne 0 ] || [ ! -f "$json_out" ]; then
warn "Compilation failed (rc=$rc)"
rm -f "$json_out"
return 1
fi
echo "$json_out"
return 0
}
# Execute MIR JSON v0 via Gate-C (--json-file)
run_mir_via_gate_c() {
local json_path="$1"
if [ ! -f "$json_path" ]; then
warn "JSON file not found: $json_path"
return 1
fi
# Gate-C execution (JSON v0 → MIR Interpreter)
# Suppress noise for clean output
NYASH_QUIET=1 \
HAKO_QUIET=1 \
NYASH_CLI_VERBOSE=0 \
NYASH_NYRT_SILENT_RESULT=1 \
out="$("$ROOT/target/release/nyash" --json-file "$json_path" 2>&1)"
# Filter: drop interpreter headers and Result lines; print the last meaningful line
printf '%s\n' "$out" | awk '/^(✅|ResultType|Result:)/{next} NF{last=$0} END{ if(last) print last }'
local rc=$?
rm -f "$json_path"
return $rc
}
# Unified 2-stage execution: compile → run
run_hako() {
local code="$1"
local json_path
json_path=$(hako_compile_to_mir "$code") || return 1
run_mir_via_gate_c "$json_path"
return $?
}
check_exact() {
local expect="$1"; shift
local got="$1"; shift
local name="$1"; shift
if [ "$got" = "$expect" ]; then pass "$name"; return 0; fi
printf "Expected: %s\nActual: %s\n" "$expect" "$got" >&2
fail "$name"
}
require_hako
info "Hako binop canary: simple addition"
out=$(run_hako 'box Main { static method main() { print(1+2); } }')
check_exact "3" "$out" "hako_binop_add" || exit 1
info "Hako binop canary: precedence (multiply before add)"
out=$(run_hako 'box Main { static method main() { print(1+2*3); } }')
check_exact "7" "$out" "hako_binop_precedence" || exit 1
info "Hako binop canary: subtraction"
out=$(run_hako 'box Main { static method main() { print(10-4); } }')
check_exact "6" "$out" "hako_binop_sub" || exit 1
info "Hako binop canary: division"
out=$(run_hako 'box Main { static method main() { print(8/2); } }')
check_exact "4" "$out" "hako_binop_div" || exit 1
exit 0

View File

@ -0,0 +1,124 @@
#!/bin/bash
# hako_min_if_vm.sh — Hako minimum if-statement canary (opt-in)
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# Try to detect repo root via git; fallback by climbing to tools directory
if ROOT_GIT=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null); then
ROOT="$ROOT_GIT"
else
ROOT="$(cd "$SCRIPT_DIR/../../../../.." && pwd)"
fi
HAKO_BIN_DEFAULT="$ROOT/tools/bin/hako"
HAKO_BIN="${HAKO_BIN:-$HAKO_BIN_DEFAULT}"
warn() { echo -e "[WARN] $*" >&2; }
info() { echo -e "[INFO] $*" >&2; }
fail() { echo -e "[FAIL] $*" >&2; return 1; }
pass() { echo -e "[PASS] $*" >&2; }
require_hako() {
if [ ! -x "$HAKO_BIN" ]; then
warn "Hako binary not found: $HAKO_BIN (set HAKO_BIN to override)"
warn "Skipping Hako if-statement canaries"
exit 0
fi
}
# Compile Hako code to MIR JSON v0 via Selfhost Compiler
hako_compile_to_mir() {
local code="$1"
local hako_tmp="/tmp/hako_if_$$.hako"
local json_out="/tmp/hako_if_$$.mir.json"
printf "%s\n" "$code" > "$hako_tmp"
# Selfhost Compiler: Hako → JSON v0 (capture noise then extract JSON line)
local raw="/tmp/hako_if_raw_$$.txt"
NYASH_PARSER_ALLOW_SEMICOLON=1 \
NYASH_SYNTAX_SUGAR_LEVEL=full \
NYASH_ENABLE_ARRAY_LITERAL=1 \
NYASH_QUIET=1 HAKO_QUIET=1 NYASH_CLI_VERBOSE=0 \
"$ROOT/target/release/nyash" --backend vm \
"$ROOT/lang/src/compiler/entry/compiler.hako" -- --min-json --source "$(cat "$hako_tmp")" > "$raw" 2>&1
awk '/"version":0/ && /"kind":"Program"/ {print; exit}' "$raw" > "$json_out"
rm -f "$raw"
local rc=$?
rm -f "$hako_tmp"
if [ $rc -ne 0 ] || [ ! -f "$json_out" ]; then
warn "Compilation failed (rc=$rc)"
rm -f "$json_out"
return 1
fi
echo "$json_out"
return 0
}
# Execute MIR JSON v0 via Gate-C (--json-file)
run_mir_via_gate_c() {
local json_path="$1"
if [ ! -f "$json_path" ]; then
warn "JSON file not found: $json_path"
return 1
fi
# Gate-C execution (JSON v0 → MIR Interpreter)
# Suppress noise for clean output
NYASH_QUIET=1 \
HAKO_QUIET=1 \
NYASH_CLI_VERBOSE=0 \
NYASH_NYRT_SILENT_RESULT=1 \
out="$("$ROOT/target/release/nyash" --json-file "$json_path" 2>&1)"
# Filter: drop interpreter headers and Result lines; print the last meaningful line
printf '%s\n' "$out" | awk '/^(✅|ResultType|Result:)/{next} NF{last=$0} END{ if(last) print last }'
local rc=$?
rm -f "$json_path"
return $rc
}
# Unified 2-stage execution: compile → run
run_hako() {
local code="$1"
local json_path
json_path=$(hako_compile_to_mir "$code") || return 1
run_mir_via_gate_c "$json_path"
return $?
}
check_exact() {
local expect="$1"; shift
local got="$1"; shift
local name="$1"; shift
if [ "$got" = "$expect" ]; then pass "$name"; return 0; fi
printf "Expected: %s\nActual: %s\n" "$expect" "$got" >&2
fail "$name"
}
require_hako
info "Hako if canary: simple if with true condition"
out=$(run_hako 'box Main { static method main() { if(5>4){ print(1); } } }')
check_exact "1" "$out" "hako_if_true" || exit 1
info "Hako if canary: if with false condition (should produce no output)"
out=$(run_hako 'box Main { static method main() { if(4>5){ print(1); } } }')
check_exact "" "$out" "hako_if_false" || exit 1
info "Hako if canary: if with comparison operator"
out=$(run_hako 'box Main { static method main() { if(10==10){ print(42); } } }')
check_exact "42" "$out" "hako_if_equals" || exit 1
info "Hako if canary: if with greater-than"
out=$(run_hako 'box Main { static method main() { if(7>3){ print(100); } } }')
check_exact "100" "$out" "hako_if_greater" || exit 1
exit 0