mirbuilder: integrate Normalizer (toggle), add tag-quiet mode, share f64 canonicalization; expand canaries; doc updates for quick timeout + dev toggles; Phase 21.5 optimization readiness

This commit is contained in:
nyash-codex
2025-11-10 23:17:46 +09:00
parent 24d88a10c0
commit ece91306b7
56 changed files with 2227 additions and 142 deletions

View File

@ -22,6 +22,7 @@ json.core.string_scan = "json/core/string_scan.hako"
json.core.json_canonical = "json/json_canonical_box.hako"
json.utils.json_utils = "json/json_utils.hako"
json.utils.json_frag = "json/utils/json_frag.hako"
json.utils.json_number_canonical = "json/utils/json_number_canonical_box.hako"
# Host bridge & adapters
host_bridge.host_bridge = "host_bridge/host_bridge_box.hako"

View File

@ -0,0 +1,112 @@
// json_number_canonical_box.hako — Shared JSON numeric canonicalization helpers
// Responsibility: canonicalize f64 textual forms (including exponent); read numeric tokens.
// Non-responsibility: MIR execution or JSON scanning beyond local token extraction.
using selfhost.shared.common.string_helpers as StringHelpers
static box JsonNumberCanonicalBox {
// Make a minimal canonical float string (trim trailing zeros; ensure at least one fractional digit)
canonicalize_f64(fs) {
if fs == null { return "0.0" }
local s = "" + fs
// Split sign
local sign = ""
if s.substring(0,1) == "-" { sign = "-" s = s.substring(1) }
// Handle exponent forms like d(.d*)?[eE][+-]?d+
local exp_pos = -1
local ei = s.indexOf("e"); if ei < 0 { ei = s.indexOf("E") }
if ei >= 0 { exp_pos = ei }
if exp_pos >= 0 {
local mant = s.substring(0, exp_pos)
local exp_str = s.substring(exp_pos+1)
local esign = 1
if exp_str.substring(0,1) == "+" { exp_str = exp_str.substring(1) }
else { if exp_str.substring(0,1) == "-" { esign = -1 exp_str = exp_str.substring(1) } }
local ed = ""
local k = 0
loop(k < exp_str.length()) { local ch = exp_str.substring(k,k+1); if ch >= "0" && ch <= "9" { ed = ed + ch k = k + 1 } else { break } }
local e = StringHelpers.to_i64(ed)
local dotm = mant.indexOf(".")
local intp = mant
local frac = ""
if dotm >= 0 { intp = mant.substring(0, dotm); frac = mant.substring(dotm+1) }
local ms = intp + frac
// trim leading zeros for stability
local a = 0
loop(a < ms.length() && ms.substring(a,a+1) == "0") { a = a + 1 }
ms = ms.substring(a)
if ms.length() == 0 { return "0.0" }
local shift = e * esign
local pos = intp.length() + shift
if pos < 0 {
local zeros = ""; local zc = 0 - pos
loop(zc > 0) { zeros = zeros + "0" zc = zc - 1 }
local sfrac = zeros + ms
local out = "0." + sfrac
local j2 = out.length()
loop(j2 > 0) { if out.substring(j2-1,j2) == "0" { j2 = j2 - 1 } else { break } }
out = out.substring(0, j2)
if out.substring(out.length()-1) == "." { out = out + "0" }
if out == "0.0" { return "0.0" } else { return (sign == "-" && out != "0.0") ? ("-" + out) : out }
}
if pos >= ms.length() {
local zeros2 = ""; local zc2 = pos - ms.length()
loop(zc2 > 0) { zeros2 = zeros2 + "0" zc2 = zc2 - 1 }
local out2 = ms + zeros2 + ".0"
if out2 == "0.0" { return "0.0" } else { return (sign == "-" && out2 != "0.0") ? ("-" + out2) : out2 }
}
local left = ms.substring(0, pos)
local right = ms.substring(pos)
local j3 = right.length()
loop(j3 > 0) { if right.substring(j3-1,j3) == "0" { j3 = j3 - 1 } else { break } }
right = right.substring(0, j3)
if right.length() == 0 { right = "0" }
local out3 = left + "." + right
local b = 0
loop(b + 1 < left.length() && left.substring(b,b+1) == "0") { b = b + 1 }
out3 = left.substring(b) + "." + right
if out3.substring(0,1) == "." { out3 = "0" + out3 }
if out3 == "0.0" { return "0.0" } else { return (sign == "-" && out3 != "0.0") ? ("-" + out3) : out3 }
}
// Non-exponent path: canonicalize decimals
local dot = s.indexOf(".")
if dot < 0 { return (sign == "-") ? ("-" + s + ".0") : (s + ".0") }
local intp2 = s.substring(0, dot)
local frac2 = s.substring(dot+1)
local j = frac2.length()
loop(j > 0) { if frac2.substring(j-1,j) == "0" { j = j - 1 } else { break } }
frac2 = frac2.substring(0, j)
local out4 = intp2 + "." + (frac2.length() == 0 ? "0" : frac2)
if out4 == "0.0" { return "0.0" }
return (sign == "-") ? ("-" + out4) : out4
}
// Read a numeric token (digits, optional dot, optional exponent) starting at pos
read_num_token(text, pos) {
if text == null { return null }
local s = "" + text
local n = s.length()
local i = pos
if i >= n { return null }
if s.substring(i,i+1) == "+" || s.substring(i,i+1) == "-" { i = i + 1 }
local had = 0
loop(i < n) { local ch = s.substring(i,i+1); if ch >= "0" && ch <= "9" { had = 1 i = i + 1 } else { break } }
if i < n && s.substring(i,i+1) == "." {
i = i + 1
loop(i < n) { local ch2 = s.substring(i,i+1); if ch2 >= "0" && ch2 <= "9" { had = 1 i = i + 1 } else { break } }
}
if i < n {
local ch3 = s.substring(i,i+1)
if ch3 == "e" || ch3 == "E" {
i = i + 1
if i < n && (s.substring(i,i+1) == "+" || s.substring(i,i+1) == "-") { i = i + 1 }
local dig = 0
loop(i < n) { local ce = s.substring(i,i+1); if ce >= "0" && ce <= "9" { dig = 1 i = i + 1 } else { break } }
if dig == 0 { return null }
}
}
if had == 0 { return null }
return s.substring(pos, i)
}
}