phase-20.45: PRIMARY no-fallback reps + MIR v0 shape fixes
- Fix MIR v0 shape in lowers: functions[] + name="main" + blocks.id * lower_return_int_box.hako * lower_return_binop_box.hako - runner_min: adopt LowerReturnBinOpBox before ReturnInt - Add PRIMARY no-fallback canaries (all PASS): * return-binop / array-size / load-store / return-logical (OR) - Fix phase2043 runner_min canary alias (Runner -> BuilderRunnerMinBox) - Update docs: phase-20.45 README (PRIMARY reps), CURRENT_TASK progress Ancillary: keep builder/provider/canary files in sync; no unrelated behavior changes.
This commit is contained in:
@ -6,6 +6,108 @@ using selfhost.shared.json.core.json_cursor as JsonCursorBox
|
||||
using selfhost.shared.common.string_helpers as StringHelpers
|
||||
|
||||
static box JsonFragBox {
|
||||
// Toggle: enable Unicode \uXXXX decode in string readers
|
||||
_decode_unicode_on() {
|
||||
local v = env.get("HAKO_PARSER_DECODE_UNICODE")
|
||||
if v == null { return 0 }
|
||||
if v == "1" || v == "true" || v == "on" { return 1 }
|
||||
return 0
|
||||
}
|
||||
|
||||
// Decode simple escapes (\\ \" \/ \b \f \n \r \t) and \uXXXX (printable ASCII only)
|
||||
_decode_escapes(s) {
|
||||
if s == null { return null }
|
||||
// Normalize common JSON double-escape: "\\uXXXX" -> "\uXXXX"
|
||||
local src0 = "" + s
|
||||
local n0 = src0.length()
|
||||
local tmp = ""
|
||||
local p = 0
|
||||
loop(p < n0) {
|
||||
local ch0 = src0.substring(p, p+1)
|
||||
if ch0 == "\\" && p + 2 <= n0 && src0.substring(p+1, p+2) == "\\" {
|
||||
if p + 3 <= n0 && src0.substring(p+2, p+3) == "u" {
|
||||
tmp = tmp + "\\u"
|
||||
p = p + 3
|
||||
continue
|
||||
}
|
||||
// generic \\ -> \
|
||||
tmp = tmp + "\\"
|
||||
p = p + 2
|
||||
continue
|
||||
}
|
||||
tmp = tmp + ch0
|
||||
p = p + 1
|
||||
}
|
||||
local s1 = tmp
|
||||
local n = s1.length()
|
||||
if n == 0 { return s1 }
|
||||
local out = ""
|
||||
local i = 0
|
||||
// Printable ASCII table for 0x20..0x7E
|
||||
local ascii = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
||||
loop(i < n) {
|
||||
local ch = s1.substring(i, i+1)
|
||||
if ch != "\\" { out = out + ch i = i + 1 continue }
|
||||
// escape
|
||||
if i + 1 >= n { out = out + "\\" i = i + 1 continue }
|
||||
local e = s1.substring(i+1, i+2)
|
||||
if e == "\\" { out = out + "\\" i = i + 2 continue }
|
||||
if e == "\"" { out = out + "\"" i = i + 2 continue }
|
||||
if e == "/" { out = out + "/" i = i + 2 continue }
|
||||
if e == "b" { out = out + "\b" i = i + 2 continue }
|
||||
if e == "f" { out = out + "\f" i = i + 2 continue }
|
||||
if e == "n" { out = out + "\n" i = i + 2 continue }
|
||||
if e == "r" { out = out + "\r" i = i + 2 continue }
|
||||
if e == "t" { out = out + "\t" i = i + 2 continue }
|
||||
if e == "u" {
|
||||
// \uXXXX (hex)
|
||||
if i + 6 <= n {
|
||||
local h = s1.substring(i+2, i+6)
|
||||
// parse hex (limited)
|
||||
local val = 0
|
||||
local k = 0
|
||||
local ok = 1
|
||||
loop(k < 4) {
|
||||
local c = h.substring(k, k+1)
|
||||
local d = -1
|
||||
if c >= "0" && c <= "9" { d = "0123456789".indexOf(c) }
|
||||
else {
|
||||
if c >= "a" && c <= "f" { d = 10 + ("abcdef".indexOf(c)) }
|
||||
else { if c >= "A" && c <= "F" { d = 10 + ("ABCDEF".indexOf(c)) } else { ok = 0 }}
|
||||
}
|
||||
if d < 0 { ok = 0 break }
|
||||
val = val * 16 + d
|
||||
k = k + 1
|
||||
}
|
||||
if ok == 1 {
|
||||
// Printable ASCII only (0x20..0x7E)
|
||||
if val >= 32 && val <= 126 {
|
||||
local pos = val - 32
|
||||
out = out + ascii.substring(pos, pos+1)
|
||||
i = i + 6
|
||||
continue
|
||||
}
|
||||
// Surrogate pair handling: collapse \uD83D\uDE00 etc. into a single placeholder
|
||||
// High surrogate range: 55296..56319
|
||||
if val >= 55296 && val <= 56319 {
|
||||
// skip following low surrogate if present
|
||||
if i + 12 <= n && s1.substring(i+6, i+8) == "\\u" { i = i + 12 } else { i = i + 6 }
|
||||
out = out + "?"
|
||||
continue
|
||||
}
|
||||
// Non-ASCII BMP → placeholder
|
||||
out = out + "?"
|
||||
i = i + 6
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: keep as-is for unknown escape
|
||||
out = out + ch
|
||||
i = i + 1
|
||||
}
|
||||
return out
|
||||
}
|
||||
// 基本ヘルパ - VM fallback implementations for cross-box static calls
|
||||
index_of_from(hay, needle, pos) {
|
||||
// VM fallback: implement using substring + indexOf
|
||||
@ -62,7 +164,9 @@ static box JsonFragBox {
|
||||
local j = i
|
||||
loop(j < n) { if s.substring(j,j+1) == "\"" { break } j = j + 1 }
|
||||
if j <= i { return null }
|
||||
return s.substring(i, j)
|
||||
local raw = s.substring(i, j)
|
||||
if me._decode_unicode_on() == 1 { return me._decode_escapes(raw) }
|
||||
return raw
|
||||
}
|
||||
read_float_from(text, pos) {
|
||||
if text == null { return null }
|
||||
@ -154,7 +258,11 @@ static box JsonFragBox {
|
||||
if p >= 0 {
|
||||
local vstart = p + pat.length() // start of value (right after opening quote)
|
||||
local vend = me._scan_string_end(seg, vstart - 1)
|
||||
if vend > vstart { return seg.substring(vstart, vend) }
|
||||
if vend > vstart {
|
||||
local raw = seg.substring(vstart, vend)
|
||||
if me._decode_unicode_on() == 1 { return me._decode_escapes(raw) }
|
||||
return raw
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user