restore(lang): full lang tree from ff3ef452 (306 files) — compiler, vm, shared, runner, c-abi, etc.\n\n- Restores lang/ directory (files≈306, dirs≈64) as per historical branch with selfhost sources\n- Keeps our recent parser index changes in compiler/* (merged clean by checkout)\n- Unblocks selfhost development and documentation references
This commit is contained in:
256
lang/src/externs/normalize/core_extern_normalize.hako
Normal file
256
lang/src/externs/normalize/core_extern_normalize.hako
Normal file
@ -0,0 +1,256 @@
|
||||
// CoreExternNormalize — Phase 20.26 scaffold
|
||||
// Responsibility: provide a stable entry to normalize MIR(JSON v0)
|
||||
// from Method/ModuleFunction forms to Extern names. MVP is a no-op
|
||||
// placeholder so routing can be tested safely.
|
||||
|
||||
using "lang/src/vm/core/json_v0_reader.hako" as NyVmJsonV0Reader
|
||||
using "lang/src/shared/json/json_cursor.hako" as JsonCursorBox
|
||||
|
||||
static box CoreExternNormalize {
|
||||
// Normalize entire MIR(JSON v0): ensure entry per function and rewrite
|
||||
// all blocks' instructions for selected String methods → Extern calls.
|
||||
normalize_json(j) {
|
||||
// Find functions array
|
||||
local p_funcs = JsonCursorBox.find_key_dual(j, "\"functions\":[", r#"\"functions\":\["#, 0)
|
||||
if p_funcs < 0 { return j }
|
||||
local lb_funcs = j.indexOf("[", p_funcs)
|
||||
if lb_funcs < 0 { return j }
|
||||
local rb_funcs = JsonCursorBox.seek_array_end(j, lb_funcs)
|
||||
if rb_funcs < 0 { return j }
|
||||
|
||||
// Iterate function objects inside functions array
|
||||
local arr = j.substring(lb_funcs+1, rb_funcs)
|
||||
local out = new ArrayBox()
|
||||
local pos = 0
|
||||
loop(true) {
|
||||
// skip whitespace/commas
|
||||
loop(true) {
|
||||
if pos >= arr.size() { break }
|
||||
local ch = arr.substring(pos,pos+1)
|
||||
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" || ch == "," { pos = pos + 1 continue }
|
||||
break
|
||||
}
|
||||
if pos >= arr.size() { break }
|
||||
if arr.substring(pos,pos+1) != "{" { break }
|
||||
local end = JsonCursorBox.seek_obj_end(arr, pos)
|
||||
if end < 0 { break }
|
||||
local f = arr.substring(pos, end+1)
|
||||
local f2 = me._rewrite_function_json(f)
|
||||
out.push(f2)
|
||||
pos = end + 1
|
||||
}
|
||||
// Join functions
|
||||
local n = out.size(); local i=0; local joined=""
|
||||
loop(i<n) { joined = joined + out.get(i); if i<n-1 { joined = joined + ",\n " }; i=i+1 }
|
||||
// Splice back
|
||||
return j.substring(0, lb_funcs+1) + joined + j.substring(rb_funcs, j.size())
|
||||
}
|
||||
|
||||
_rewrite_function_json(f) {
|
||||
// Insert missing entry (derive from first block id)
|
||||
local f2 = f
|
||||
if NyVmJsonV0Reader.read_entry_id(f2) < 0 {
|
||||
local fb = NyVmJsonV0Reader.first_block(f2)
|
||||
if fb != "" {
|
||||
local eid = NyVmJsonV0Reader.read_block_id(fb)
|
||||
if eid >= 0 {
|
||||
local name_pos = f2.indexOf("\"name\":")
|
||||
if name_pos >= 0 {
|
||||
local colon = f2.indexOf(":", name_pos)
|
||||
if colon >= 0 {
|
||||
local q = colon + 1
|
||||
loop(true) { local ch=f2.substring(q,q+1) if ch==" "||ch=="\n"||ch=="\r"||ch=="\t" { q=q+1 continue } break }
|
||||
if f2.substring(q,q+1) == "\"" {
|
||||
local qend = JsonCursorBox.scan_string_end(f2, q)
|
||||
if qend >= 0 {
|
||||
local prefix = f2.substring(0, qend+1)
|
||||
local suffix = f2.substring(qend+1, f2.size())
|
||||
local insert = ", \"entry\": " + ("" + eid)
|
||||
f2 = prefix + insert + suffix
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite all blocks' instructions
|
||||
local p_b = JsonCursorBox.find_key_dual(f2, "\"blocks\":[", r#"\"blocks\":\["#, 0)
|
||||
if p_b < 0 { return f2 }
|
||||
local lb_b = f2.indexOf("[", p_b)
|
||||
if lb_b < 0 { return f2 }
|
||||
local rb_b = JsonCursorBox.seek_array_end(f2, lb_b)
|
||||
if rb_b < 0 { return f2 }
|
||||
local blocks = f2.substring(lb_b+1, rb_b)
|
||||
local bout = new ArrayBox()
|
||||
local bp = 0
|
||||
loop(true) {
|
||||
// skip ws/commas
|
||||
loop(true) {
|
||||
if bp >= blocks.size() { break }
|
||||
local ch = blocks.substring(bp,bp+1)
|
||||
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" || ch == "," { bp = bp + 1 continue }
|
||||
break
|
||||
}
|
||||
if bp >= blocks.size() { break }
|
||||
if blocks.substring(bp,bp+1) != "{" { break }
|
||||
local be = JsonCursorBox.seek_obj_end(blocks, bp)
|
||||
if be < 0 { break }
|
||||
local blk = blocks.substring(bp, be+1)
|
||||
local blk2 = me._rewrite_block_json(blk)
|
||||
bout.push(blk2)
|
||||
bp = be + 1
|
||||
}
|
||||
local nb = bout.size(); local bi=0; local bjoined=""
|
||||
loop(bi<nb) { bjoined=bjoined + bout.get(bi); if bi<nb-1 { bjoined=bjoined + ",\n " }; bi=bi+1 }
|
||||
return f2.substring(0, lb_b+1) + bjoined + f2.substring(rb_b, f2.size())
|
||||
}
|
||||
|
||||
_rewrite_block_json(blk) {
|
||||
local p_i = JsonCursorBox.find_key_dual(blk, "\"instructions\":[", r#"\"instructions\":\["#, 0)
|
||||
if p_i < 0 { return blk }
|
||||
local lb_i = blk.indexOf("[", p_i)
|
||||
if lb_i < 0 { return blk }
|
||||
local rb_i = JsonCursorBox.seek_array_end(blk, lb_i)
|
||||
if rb_i < 0 { return blk }
|
||||
local insts = blk.substring(lb_i+1, rb_i)
|
||||
local out = new ArrayBox()
|
||||
local pos = 0
|
||||
loop(true) {
|
||||
local it = NyVmJsonV0Reader.next_instruction(insts, pos)
|
||||
local obj = it.get("obj")
|
||||
if obj == null { break }
|
||||
pos = it.get("next")
|
||||
out.push(me._map_string_calls(obj))
|
||||
}
|
||||
// Join back
|
||||
local n = out.size(); local idx = 0; local joined = ""
|
||||
loop(idx < n) { joined = joined + out.get(idx); if idx < n-1 { joined = joined + ",\n" }; idx = idx + 1 }
|
||||
return blk.substring(0, lb_i+1) + joined + blk.substring(rb_i, blk.size())
|
||||
}
|
||||
|
||||
_map_string_calls(obj) {
|
||||
local rep = obj
|
||||
if obj.indexOf("\"op\":\"mir_call\"") < 0 { return rep }
|
||||
local flags = me._flags_fragment(obj)
|
||||
// String.length aliases
|
||||
if obj.indexOf("\"method\":\"length\"") >= 0 || obj.indexOf("\"method\":\"len\"") >= 0 || obj.indexOf("\"method\":\"size\"") >= 0 {
|
||||
local recv = me._read_digits(obj, "receiver"); local dst = me._read_digits(obj, "dst")
|
||||
if recv != "" && dst != "" {
|
||||
return me._build_mir_call(dst, "nyrt.string.length", "[" + recv + "]", flags)
|
||||
}
|
||||
return rep
|
||||
}
|
||||
// String.substring(recv,start,end)
|
||||
if obj.indexOf("\"method\":\"substring\"") >= 0 {
|
||||
local recv = me._read_digits(obj, "receiver"); local dst = me._read_digits(obj, "dst"); local args = me._read_args_digits(obj)
|
||||
if recv != "" && dst != "" && args.size() >= 2 {
|
||||
local a0 = args.get(0); local a1 = args.get(1)
|
||||
return me._build_mir_call(dst, "nyrt.string.substring", "[" + recv + "," + a0 + "," + a1 + "]", flags)
|
||||
}
|
||||
return rep
|
||||
}
|
||||
// String.indexOf/ find (recv, needle[, from])
|
||||
if obj.indexOf("\"method\":\"indexOf\"") >= 0 || obj.indexOf("\"method\":\"find\"") >= 0 {
|
||||
local recv = me._read_digits(obj, "receiver"); local dst = me._read_digits(obj, "dst"); local args = me._read_args_digits(obj)
|
||||
if recv != "" && dst != "" && args.size() >= 1 {
|
||||
local arg_str = "[" + recv + "," + args.get(0)
|
||||
if args.size() >= 2 { arg_str = arg_str + "," + args.get(1) }
|
||||
arg_str = arg_str + "]"
|
||||
return me._build_mir_call(dst, "nyrt.string.indexOf", arg_str, flags)
|
||||
}
|
||||
return rep
|
||||
}
|
||||
// String.lastIndexOf(recv, needle[, from])
|
||||
if obj.indexOf("\"method\":\"lastIndexOf\"") >= 0 {
|
||||
local recv = me._read_digits(obj, "receiver"); local dst = me._read_digits(obj, "dst"); local args = me._read_args_digits(obj)
|
||||
if recv != "" && dst != "" && args.size() >= 1 {
|
||||
local arg_str = "[" + recv + "," + args.get(0)
|
||||
if args.size() >= 2 { arg_str = arg_str + "," + args.get(1) }
|
||||
arg_str = arg_str + "]"
|
||||
return me._build_mir_call(dst, "nyrt.string.lastIndexOf", arg_str, flags)
|
||||
}
|
||||
return rep
|
||||
}
|
||||
// String.replace(recv, needle)
|
||||
if obj.indexOf("\"method\":\"replace\"") >= 0 {
|
||||
local recv = me._read_digits(obj, "receiver"); local dst = me._read_digits(obj, "dst"); local args = me._read_args_digits(obj)
|
||||
if recv != "" && dst != "" && args.size() >= 2 {
|
||||
local a0 = args.get(0); local a1 = args.get(1)
|
||||
return me._build_mir_call(dst, "nyrt.string.replace", "[" + recv + "," + a0 + "," + a1 + "]", flags)
|
||||
}
|
||||
return rep
|
||||
}
|
||||
// String.charAt(recv, idx)
|
||||
if obj.indexOf("\"method\":\"charAt\"") >= 0 {
|
||||
local recv = me._read_digits(obj, "receiver"); local dst = me._read_digits(obj, "dst"); local args = me._read_args_digits(obj)
|
||||
if recv != "" && dst != "" && args.size() >= 1 {
|
||||
local a0 = args.get(0)
|
||||
return me._build_mir_call(dst, "nyrt.string.charAt", "[" + recv + "," + a0 + "]", flags)
|
||||
}
|
||||
return rep
|
||||
}
|
||||
return rep
|
||||
}
|
||||
|
||||
_flags_fragment(json) {
|
||||
if json.indexOf("\"flags\":") < 0 { return "" }
|
||||
if json.indexOf("\"optionality\":\"bang\"") >= 0 {
|
||||
return ",\\\"flags\\\":{\\\"optionality\\\":\\\"bang\\\"}"
|
||||
}
|
||||
if json.indexOf("\"optionality\":\"optional\"") >= 0 {
|
||||
return ",\\\"flags\\\":{\\\"optionality\\\":\\\"optional\\\"}"
|
||||
}
|
||||
// flags present but null/default → omit
|
||||
return ""
|
||||
}
|
||||
|
||||
_build_mir_call(dst, extern_name, args_json, flags_fragment) {
|
||||
local rep = "{\\\"op\\\":\\\"mir_call\\\",\\\"dst\\\":" + dst + ",\\\"mir_call\\\":{\\\"callee\\\":{\\\"type\\\":\\\"Extern\\\",\\\"name\\\":\\\"" + extern_name + "\\\"},\\\"args\\\":" + args_json
|
||||
if flags_fragment != "" { rep = rep + flags_fragment }
|
||||
rep = rep + "}}"
|
||||
return rep
|
||||
}
|
||||
|
||||
_read_digits(json, key) {
|
||||
local p = JsonCursorBox.find_key_dual(json, "\""+key+"\":", r#"\""+key+"\":"#, 0)
|
||||
if p < 0 { return "" }
|
||||
local colon = json.indexOf(":", p)
|
||||
if colon < 0 { return "" }
|
||||
local ds = JsonCursorBox.digits_from(json, colon+1)
|
||||
return ds
|
||||
}
|
||||
|
||||
// Read args array as digits strings (VIDs). Returns ArrayBox of strings.
|
||||
_read_args_digits(json) {
|
||||
local out = new ArrayBox()
|
||||
local p = JsonCursorBox.find_key_dual(json, "\"args\":[", r#"\"args\":\["#, 0)
|
||||
if p < 0 { return out }
|
||||
local lb = json.indexOf("[", p)
|
||||
if lb < 0 { return out }
|
||||
local i = lb + 1
|
||||
loop(true) {
|
||||
i = me._skip_ws(json, i)
|
||||
if i >= json.size() { break }
|
||||
local ch = json.substring(i,i+1)
|
||||
if ch == "]" { break }
|
||||
local ds = JsonCursorBox.digits_from(json, i)
|
||||
if ds == "" { break }
|
||||
out.push(ds)
|
||||
i = i + ds.size()
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
_skip_ws(json, pos) {
|
||||
local i = pos
|
||||
local n = json.size()
|
||||
loop(i < n) {
|
||||
local ch = json.substring(i,i+1)
|
||||
if ch == " " || ch == "\n" || ch == "\r" || ch == "\t" || ch == "," { i = i + 1 continue }
|
||||
break
|
||||
}
|
||||
return i
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user