restore(lang/compiler): bring back lang/src/compiler from e917d400; add Hako index canaries and docs; implement Rust-side index operator (Array/Map get/set) with Fail‑Fast diagnostics

- restore: lang/src/compiler/** (parser/emit/builder/pipeline_v2) from e917d400
- docs: docs/development/selfhosting/index-operator-hako.md
- smokes(hako): tools/smokes/v2/profiles/quick/core/index_operator_hako.sh (opt-in)
- smokes(vm): adjust index_operator_vm.sh for semicolon gate + stable error text
- rust/parser: allow IndexExpr and assignment LHS=Index; postfix parse LBRACK chain
- rust/builder: lower arr/map index to BoxCall get/set; annotate array/map literals; Fail‑Fast for unsupported types
- CURRENT_TASK: mark Rust side done; add Hako tasks checklist

Note: files disappeared likely due to branch FF to a lineage without lang/src/compiler; no explicit delete commit found. Added anchor checks and suggested CI guard in follow-up.
This commit is contained in:
nyash-codex
2025-10-31 20:18:39 +09:00
parent 86fd03afe8
commit 5e3d9e7ae4
86 changed files with 6214 additions and 20 deletions

View File

@ -0,0 +1,355 @@
// Moved from apps/selfhost-compiler/boxes/parser/expr/parser_expr_box.hako
// ParserExprBox — expression parser coordinator
// Responsibility: Parse expressions and delegate to specialized boxes
// API: parse(src, i, ctx) -> JSON (delegates to parse_expr2)
using lang.compiler.parser.scan.parser_number_scan_box
using lang.compiler.parser.expr.parser_peek_box
using lang.compiler.parser.expr.parser_literal_box
static box ParserExprBox {
parse_number2(src, i, ctx) {
local pair = ParserNumberScanBox.scan_int(src, i)
local at = pair.lastIndexOf("@")
local json = pair.substring(0, at)
local pos = i
if at >= 0 { pos = ctx.to_int(pair.substring(at+1, pair.size())) }
ctx.gpos_set(pos)
return json
}
parse_string2(src, i, ctx) {
local n = src.size()
local j = i + 1
local out = ""
local guard = 0
local max = 200000
loop(j < n) {
if guard > max { break }
guard = guard + 1
local ch = src.substring(j, j+1)
if ch == "\"" {
j = j + 1
ctx.gpos_set(j)
return "{\"type\":\"Str\",\"value\":\"" + ctx.esc_json(out) + "\"}"
}
if ch == "\\" && j + 1 < n {
local nx = src.substring(j+1, j+2)
if nx == "\"" { out = out + "\"" j = j + 2 }
else { if nx == "\\" { out = out + "\\" j = j + 2 }
else { if nx == "n" { out = out + "\n" j = j + 2 }
else { if nx == "r" { out = out + "\r" j = j + 2 }
else { if nx == "t" { out = out + "\t" j = j + 2 }
else { if nx == "u" && j + 5 < n { out = out + src.substring(j, j+6) j = j + 6 }
else { out = out + nx j = j + 2 } } } } } }
} else {
out = out + ch
j = j + 1
}
}
ctx.gpos_set(j)
return "{\"type\":\"Str\",\"value\":\"" + ctx.esc_json(out) + "\"}"
}
parse_factor2(src, i, ctx) {
local j = ctx.skip_ws(src, i)
if j >= src.size() {
ctx.gpos_set(j)
return "{\"type\":\"Int\",\"value\":0}"
}
if ctx.starts_with_kw(src, j, "true") == 1 {
ctx.gpos_set(j + 4)
return "{\"type\":\"Bool\",\"value\":true}"
}
if ctx.starts_with_kw(src, j, "false") == 1 {
ctx.gpos_set(j + 5)
return "{\"type\":\"Bool\",\"value\":false}"
}
if ctx.starts_with_kw(src, j, "null") == 1 {
ctx.gpos_set(j + 4)
return "{\"type\":\"Null\"}"
}
// Peek expression: delegate to ParserPeekBox
if ctx.starts_with_kw(src, j, "peek") == 1 {
j = j + 4
return ParserPeekBox.parse(src, j, ctx)
}
local ch = src.substring(j, j+1)
// Parenthesized
if ch == "(" {
local inner = me.parse_expr2(src, j + 1, ctx)
local k = ctx.gpos_get()
k = ctx.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
ctx.gpos_set(k)
return inner
}
// String literal
if ch == "\"" {
return me.parse_string2(src, j, ctx)
}
// Map literal: delegate to ParserLiteralBox
if ch == "{" {
return ParserLiteralBox.parse_map(src, j, ctx)
}
// Array literal: delegate to ParserLiteralBox
if ch == "[" {
return ParserLiteralBox.parse_array(src, j, ctx)
}
// new Class(args)
if ctx.starts_with_kw(src, j, "new") == 1 {
local p = ctx.skip_ws(src, j + 3)
local idp = ctx.read_ident2(src, p)
local at = idp.lastIndexOf("@")
local cls = idp.substring(0, at)
local k = ctx.to_int(idp.substring(at+1, idp.size()))
k = ctx.skip_ws(src, k)
if src.substring(k, k+1) == "(" { k = k + 1 }
local args_and_pos = me.parse_args2(src, k, ctx)
local at2 = args_and_pos.lastIndexOf("@")
local args_json = args_and_pos.substring(0, at2)
k = ctx.to_int(args_and_pos.substring(at2+1, args_and_pos.size()))
k = ctx.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
ctx.gpos_set(k)
return "{\"type\":\"New\",\"class\":\"" + cls + "\",\"args\":" + args_json + "}"
}
// Identifier / Call / Method chain
if ctx.is_alpha(ch) {
local idp = ctx.read_ident2(src, j)
local at = idp.lastIndexOf("@")
local name = idp.substring(0, at)
local k = ctx.to_int(idp.substring(at+1, idp.size()))
local node = "{\"type\":\"Var\",\"name\":\"" + name + "\"}"
local cont2 = 1
loop(cont2 == 1) {
k = ctx.skip_ws(src, k)
local tch = src.substring(k, k+1)
if tch == "(" {
k = k + 1
local args_and_pos = me.parse_args2(src, k, ctx)
local at2 = args_and_pos.lastIndexOf("@")
local args_json = args_and_pos.substring(0, at2)
k = ctx.to_int(args_and_pos.substring(at2+1, args_and_pos.size()))
k = ctx.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
node = "{\"type\":\"Call\",\"name\":\"" + name + "\",\"args\":" + args_json + "}"
} else {
if tch == "." {
k = k + 1
k = ctx.skip_ws(src, k)
local midp = ctx.read_ident2(src, k)
local at3 = midp.lastIndexOf("@")
local mname = midp.substring(0, at3)
k = ctx.to_int(midp.substring(at3+1, midp.size()))
k = ctx.skip_ws(src, k)
if src.substring(k, k+1) == "(" { k = k + 1 }
local args2 = me.parse_args2(src, k, ctx)
local at4 = args2.lastIndexOf("@")
local args_json2 = args2.substring(0, at4)
k = ctx.to_int(args2.substring(at4+1, args2.size()))
k = ctx.skip_ws(src, k)
if src.substring(k, k+1) == ")" { k = k + 1 }
node = "{\"type\":\"Method\",\"recv\":" + node + ",\"method\":\"" + mname + "\",\"args\":" + args_json2 + "}"
} else {
cont2 = 0
}
}
}
ctx.gpos_set(k)
return node
}
// Fallback: number
return me.parse_number2(src, j, ctx)
}
parse_unary2(src, i, ctx) {
local j = ctx.skip_ws(src, i)
if src.substring(j, j+1) == "-" {
local rhs = me.parse_factor2(src, j + 1, ctx)
j = ctx.gpos_get()
local zero = "{\"type\":\"Int\",\"value\":0}"
ctx.gpos_set(j)
return "{\"type\":\"Binary\",\"op\":\"-\",\"lhs\":" + zero + ",\"rhs\":" + rhs + "}"
}
return me.parse_factor2(src, j, ctx)
}
parse_term2(src, i, ctx) {
local lhs = me.parse_unary2(src, i, ctx)
local j = ctx.gpos_get()
local cont = 1
loop(cont == 1) {
j = ctx.skip_ws(src, j)
if j >= src.size() {
cont = 0
} else {
local op = src.substring(j, j+1)
if op != "*" && op != "/" {
cont = 0
} else {
local rhs = me.parse_unary2(src, j+1, ctx)
j = ctx.gpos_get()
lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
}
ctx.gpos_set(j)
return lhs
}
parse_sum2(src, i, ctx) {
local lhs = me.parse_term2(src, i, ctx)
local j = ctx.gpos_get()
local cont = 1
loop(cont == 1) {
j = ctx.skip_ws(src, j)
if j >= src.size() {
cont = 0
} else {
local op = src.substring(j, j+1)
if op != "+" && op != "-" {
cont = 0
} else {
local rhs = me.parse_term2(src, j+1, ctx)
j = ctx.gpos_get()
lhs = "{\"type\":\"Binary\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
}
ctx.gpos_set(j)
return lhs
}
parse_compare2(src, i, ctx) {
local lhs = me.parse_sum2(src, i, ctx)
local j = ctx.gpos_get()
j = ctx.skip_ws(src, j)
local two = src.substring(j, j+2)
local one = src.substring(j, j+1)
local op = ""
if two == "==" || two == "!=" || two == "<=" || two == ">=" {
op = two
j = j + 2
} else {
if one == "<" || one == ">" {
op = one
j = j + 1
}
}
if op == "" {
ctx.gpos_set(j)
return lhs
}
local rhs = me.parse_sum2(src, j, ctx)
j = ctx.gpos_get()
ctx.gpos_set(j)
return "{\"type\":\"Compare\",\"op\":\"" + op + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
parse_expr2(src, i, ctx) {
local lhs = me.parse_compare2(src, i, ctx)
local j = ctx.gpos_get()
local cont = 1
loop(cont == 1) {
j = ctx.skip_ws(src, j)
local two = src.substring(j, j+2)
if two != "&&" && two != "||" {
cont = 0
} else {
local rhs = me.parse_compare2(src, j+2, ctx)
j = ctx.gpos_get()
lhs = "{\"type\":\"Logical\",\"op\":\"" + two + "\",\"lhs\":" + lhs + ",\"rhs\":" + rhs + "}"
}
}
j = ctx.skip_ws(src, j)
if src.substring(j, j+1) == "?" {
j = j + 1
j = ctx.skip_ws(src, j)
local then_expr = me.parse_expr2(src, j, ctx)
j = ctx.gpos_get()
j = ctx.skip_ws(src, j)
if src.substring(j, j+1) == ":" { j = j + 1 }
j = ctx.skip_ws(src, j)
local else_expr = me.parse_expr2(src, j, ctx)
j = ctx.gpos_get()
if else_expr.size() == 0 { else_expr = "{\"type\":\"Int\",\"value\":0}" }
ctx.gpos_set(j)
return "{\"type\":\"Ternary\",\"cond\":" + lhs + ",\"then\":" + then_expr + ",\"else\":" + else_expr + "}"
}
ctx.gpos_set(j)
return lhs
}
parse_args2(src, i, ctx) {
local j = ctx.skip_ws(src, i)
local n = src.size()
local out = "["
j = ctx.skip_ws(src, j)
if j < n && src.substring(j, j+1) == ")" {
return "[]@" + ctx.i2s(j)
}
// first argument
local e = me.parse_expr2(src, j, ctx)
j = ctx.gpos_get()
out = out + e
// subsequent arguments with guard
local cont_args = 1
local guard = 0
local max = 100000
loop(cont_args == 1) {
if guard > max { cont_args = 0 } else { guard = guard + 1 }
local before = j
j = ctx.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," {
j = j + 1
j = ctx.skip_ws(src, j)
e = me.parse_expr2(src, j, ctx)
j = ctx.gpos_get()
out = out + "," + e
} else {
cont_args = 0
}
if j == before { cont_args = 0 }
}
out = out + "]"
return out + "@" + ctx.i2s(j)
}
}

View File

@ -0,0 +1,119 @@
// Moved from apps/selfhost-compiler/boxes/parser/expr/parser_literal_box.hako
// ParserLiteralBox — Map/Array literal parser
// Responsibility: Parse Map {"k": v, ...} and Array [e1, e2, ...] literals
// API: parse_map(src, i, ctx) -> JSON, parse_array(src, i, ctx) -> JSON
static box ParserLiteralBox {
// Map literal: {"k": v, ...} (string keys only) → Call{name:"map.of", args:[Str(k1), v1, Str(k2), v2, ...]}
parse_map(src, i, ctx) {
local n = src.size()
local j = i + 1 // skip opening '{'
local out = "["
local first = 1
local cont = 1
local guard = 0
local max = 400000
loop(cont == 1) {
if guard > max { cont = 0 } else { guard = guard + 1 }
j = ctx.skip_ws(src, j)
if j >= n {
cont = 0
} else {
if src.substring(j, j+1) == "}" {
j = j + 1
cont = 0
} else {
// key (string only for Stage-2)
if src.substring(j, j+1) != "\"" {
// degrade by skipping one char to avoid infinite loop
j = j + 1
continue
}
local key_raw = ctx.read_string_lit(src, j)
j = ctx.gpos_get()
j = ctx.skip_ws(src, j)
if src.substring(j, j+1) == ":" { j = j + 1 }
j = ctx.skip_ws(src, j)
local val_json = ctx.parse_expr2(src, j)
j = ctx.gpos_get()
local key_json = "{\"type\":\"Str\",\"value\":\"" + ctx.esc_json(key_raw) + "\"}"
if first == 1 {
out = out + key_json + "," + val_json
first = 0
} else {
out = out + "," + key_json + "," + val_json
}
// optional comma
local before2 = j
j = ctx.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," { j = j + 1 }
// progress guard (in case of malformed input)
if j <= before2 {
if j < n { j = j + 1 } else { j = n }
}
}
}
}
out = out + "]"
ctx.gpos_set(j)
return "{\"type\":\"Call\",\"name\":\"map.of\",\"args\":" + out + "}"
}
// Array literal: [e1, e2, ...] → Call{name:"array.of", args:[...]}
parse_array(src, i, ctx) {
local n = src.size()
local j = i + 1 // skip opening '['
local out = "["
local first = 1
local cont = 1
local guard = 0
local max = 400000
loop(cont == 1) {
if guard > max { cont = 0 } else { guard = guard + 1 }
j = ctx.skip_ws(src, j)
if j >= n {
cont = 0
} else {
if src.substring(j, j+1) == "]" {
j = j + 1
cont = 0
} else {
local before = j
local ej = ctx.parse_expr2(src, j)
j = ctx.gpos_get()
if first == 1 {
out = out + ej
first = 0
} else {
out = out + "," + ej
}
// optional comma+whitespace
local before2 = j
j = ctx.skip_ws(src, j)
if j < n && src.substring(j, j+1) == "," { j = j + 1 }
// progress guard
if j <= before {
if j < n { j = j + 1 } else { j = n }
}
}
}
}
out = out + "]"
ctx.gpos_set(j)
return "{\"type\":\"Call\",\"name\":\"array.of\",\"args\":" + out + "}"
}
}

View File

@ -0,0 +1,104 @@
// Moved from apps/selfhost-compiler/boxes/parser/expr/parser_peek_box.hako
// ParserPeekBox — peek expression parser (peek <expr> { "label" => <expr>, ..., else => <expr> })
// Responsibility: Parse peek expressions (pattern-matching syntax)
// API: parse(src, i, ctx) -> JSON string
static box ParserPeekBox {
parse(src, i, ctx) {
// ctx is ParserBox for delegation
local j = i
local n = src.size()
// Parse scrutinee expression
local scr = ctx.parse_expr2(src, j)
j = ctx.gpos_get()
j = ctx.skip_ws(src, j)
// Enter arms block
if src.substring(j, j+1) == "{" { j = j + 1 }
j = ctx.skip_ws(src, j)
local arms_json = "["
local first_arm = 1
local else_json = null
local contp = 1
local guardp = 0
local maxp = 400000
loop(contp == 1) {
if guardp > maxp { contp = 0 } else { guardp = guardp + 1 }
j = ctx.skip_ws(src, j)
if j >= n {
contp = 0
} else {
if src.substring(j, j+1) == "}" {
j = j + 1
contp = 0
} else {
// else arm or labeled arm
if ctx.starts_with_kw(src, j, "else") == 1 {
j = j + 4
j = ctx.skip_ws(src, j)
if src.substring(j, j+2) == "=>" { j = j + 2 }
j = ctx.skip_ws(src, j)
// else body may be a block or bare expr
if src.substring(j, j+1) == "{" {
j = j + 1
j = ctx.skip_ws(src, j)
else_json = ctx.parse_expr2(src, j)
j = ctx.gpos_get()
j = ctx.skip_ws(src, j)
if src.substring(j, j+1) == "}" { j = j + 1 }
} else {
else_json = ctx.parse_expr2(src, j)
j = ctx.gpos_get()
}
} else {
// labeled arm: string literal label
if src.substring(j, j+1) != "\"" {
// degrade safely to avoid infinite loop
j = j + 1
continue
}
local label_raw = ctx.read_string_lit(src, j)
j = ctx.gpos_get()
j = ctx.skip_ws(src, j)
if src.substring(j, j+2) == "=>" { j = j + 2 }
j = ctx.skip_ws(src, j)
// arm expr: block or bare expr
local expr_json = "{\"type\":\"Int\",\"value\":0}"
if src.substring(j, j+1) == "{" {
j = j + 1
j = ctx.skip_ws(src, j)
expr_json = ctx.parse_expr2(src, j)
j = ctx.gpos_get()
j = ctx.skip_ws(src, j)
if src.substring(j, j+1) == "}" { j = j + 1 }
} else {
expr_json = ctx.parse_expr2(src, j)
j = ctx.gpos_get()
}
local arm_json = "{\"label\":\"" + ctx.esc_json(label_raw) + "\",\"expr\":" + expr_json + "}"
if first_arm == 1 {
arms_json = arms_json + arm_json
first_arm = 0
} else {
arms_json = arms_json + "," + arm_json
}
}
}
}
}
arms_json = arms_json + "]"
if else_json == null { else_json = "{\"type\":\"Null\"}" }
ctx.gpos_set(j)
return "{\"type\":\"Peek\",\"scrutinee\":" + scr + ",\"arms\":" + arms_json + ",\"else\":" + else_json + "}"
}
}