macro(loopform mvp-2): gate normalization to <=2 updated vars and no break/continue; add loop_two_vars golden and selfhost preexpand smoke
This commit is contained in:
@ -3,9 +3,9 @@
|
||||
// Next steps: normalize `loop(cond){ body }` into carrier-based LoopForm.
|
||||
|
||||
static box MacroBoxSpec {
|
||||
static function name() { return "LoopNormalize" }
|
||||
name() { return "LoopNormalize" }
|
||||
|
||||
static function expand(json, ctx) {
|
||||
expand(json, ctx) {
|
||||
// MVP normalizer: detect Loop nodes with canonical key order
|
||||
// "kind":"Loop","condition":<json>,"body":[ ... ] and rewrite them
|
||||
// into a normalized form using JsonBuilder (keys ordered as condition/body).
|
||||
@ -17,21 +17,27 @@ static box MacroBoxSpec {
|
||||
local out = ""
|
||||
local i = 0
|
||||
|
||||
// parse a JSON string starting at i (supports objects, arrays, strings, numbers, true/false/null)
|
||||
// parse a JSON value starting at i and return "<end_index>#<json>"
|
||||
function parse_value(s, i) {
|
||||
local n = s.length()
|
||||
if i >= n { return ["", i] }
|
||||
if i >= n { return ("" + i) + "#" + "" }
|
||||
local ch = s.substring(i, i+1)
|
||||
// string
|
||||
if ch == "\"" {
|
||||
local j = i + 1
|
||||
loop(j < n) {
|
||||
local c = s.substring(j, j+1)
|
||||
if c == "\\" { j = j + 2; continue }
|
||||
if c == "\"" { j = j + 1; break }
|
||||
if c == "\\" {
|
||||
j = j + 2
|
||||
continue
|
||||
}
|
||||
if c == "\"" {
|
||||
j = j + 1
|
||||
break
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
return [s.substring(i, j), j]
|
||||
return ("" + j) + "#" + s.substring(i, j)
|
||||
}
|
||||
// object
|
||||
if ch == "{" {
|
||||
@ -45,17 +51,17 @@ static box MacroBoxSpec {
|
||||
local k = j - 1
|
||||
local esc = false
|
||||
if k >= 0 && s.substring(k, k+1) == "\\" { esc = true }
|
||||
if !esc { in_str = !in_str }
|
||||
if not esc { in_str = not in_str }
|
||||
j = j + 1
|
||||
continue
|
||||
}
|
||||
if !in_str {
|
||||
if not in_str {
|
||||
if c == "{" { depth = depth + 1 }
|
||||
else if c == "}" { depth = depth - 1 }
|
||||
if c == "}" { depth = depth - 1 }
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
return [s.substring(i, j), j]
|
||||
return ("" + j) + "#" + s.substring(i, j)
|
||||
}
|
||||
// array
|
||||
if ch == "[" {
|
||||
@ -68,17 +74,17 @@ static box MacroBoxSpec {
|
||||
local k = j - 1
|
||||
local esc = false
|
||||
if k >= 0 && s.substring(k, k+1) == "\\" { esc = true }
|
||||
if !esc { in_str = !in_str }
|
||||
if not esc { in_str = not in_str }
|
||||
j = j + 1
|
||||
continue
|
||||
}
|
||||
if !in_str {
|
||||
if not in_str {
|
||||
if c == "[" { depth = depth + 1 }
|
||||
else if c == "]" { depth = depth - 1 }
|
||||
if c == "]" { depth = depth - 1 }
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
return [s.substring(i, j), j]
|
||||
return ("" + j) + "#" + s.substring(i, j)
|
||||
}
|
||||
// number/true/false/null: read until delimiter
|
||||
local j = i
|
||||
@ -87,7 +93,71 @@ static box MacroBoxSpec {
|
||||
if c == "," || c == "]" || c == "}" || c == "\n" || c == "\r" || c == "\t" || c == " " { break }
|
||||
j = j + 1
|
||||
}
|
||||
return [s.substring(i, j), j]
|
||||
return ("" + j) + "#" + s.substring(i, j)
|
||||
}
|
||||
|
||||
function pair_idx(pair) {
|
||||
// parse decimal at start until '#'
|
||||
local i = 0
|
||||
local n = pair.length()
|
||||
local val = 0
|
||||
loop(i < n) {
|
||||
local ch = pair.substring(i, i+1)
|
||||
if ch == "#" { break }
|
||||
local d = 0
|
||||
if ch == "0" { d = 0 }
|
||||
if ch == "1" { d = 1 }
|
||||
if ch == "2" { d = 2 }
|
||||
if ch == "3" { d = 3 }
|
||||
if ch == "4" { d = 4 }
|
||||
if ch == "5" { d = 5 }
|
||||
if ch == "6" { d = 6 }
|
||||
if ch == "7" { d = 7 }
|
||||
if ch == "8" { d = 8 }
|
||||
if ch == "9" { d = 9 }
|
||||
val = val * 10 + d
|
||||
i = i + 1
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
function pair_json(pair) {
|
||||
// substring after first '#'
|
||||
local i = 0
|
||||
local n = pair.length()
|
||||
loop(i < n) {
|
||||
if pair.substring(i, i+1) == "#" { return pair.substring(i+1, n) }
|
||||
i = i + 1
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Extract assignment target variable name from an Assignment JSON object string.
|
||||
// Returns "" if not found.
|
||||
function extract_assign_target_var(e2) {
|
||||
// pattern: "target":{"kind":"Variable","name":"<NAME>"}
|
||||
local pat = "\"target\":{\"kind\":\"Variable\",\"name\":\""
|
||||
local pos = -1
|
||||
local j = 0
|
||||
loop(j + pat.length() <= e2.length()) {
|
||||
if e2.substring(j, j + pat.length()) == pat { pos = j + pat.length(); break }
|
||||
j = j + 1
|
||||
}
|
||||
if pos < 0 { return "" }
|
||||
// read until next unescaped quote
|
||||
local name = ""
|
||||
local k = pos
|
||||
loop(k < e2.length()) {
|
||||
local c = e2.substring(k, k+1)
|
||||
if c == "\\" {
|
||||
// skip escape and next char
|
||||
if k + 1 < e2.length() { name = name + e2.substring(k, k+2); k = k + 2; continue }
|
||||
}
|
||||
if c == "\"" { break }
|
||||
name = name + c
|
||||
k = k + 1
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// pattern tokens
|
||||
@ -100,17 +170,18 @@ static box MacroBoxSpec {
|
||||
if i + 6 < s.length() && s.substring(i, i+1) == "{" {
|
||||
// look ahead inside this object to see if it begins with kind:Loop
|
||||
local val = parse_value(s, i)
|
||||
local obj = val.get(0)
|
||||
local endi = val.get(1)
|
||||
local endi = pair_idx(val)
|
||||
local obj = pair_json(val)
|
||||
|
||||
// quick check: contains kind:"Loop"
|
||||
local pos_kind = obj.indexOf(t_kind_loop) // assume Nyash has indexOf? If not, manual scan fallback below
|
||||
if pos_kind == null {
|
||||
// Fallback manual contains
|
||||
// quick check: contains kind:"Loop" (manual scan)
|
||||
{
|
||||
local found = 0
|
||||
local k = 0
|
||||
loop(k + t_kind_loop.length() <= obj.length()) {
|
||||
if obj.substring(k, k + t_kind_loop.length()) == t_kind_loop { found = 1; break }
|
||||
if obj.substring(k, k + t_kind_loop.length()) == t_kind_loop {
|
||||
found = 1
|
||||
break
|
||||
}
|
||||
k = k + 1
|
||||
}
|
||||
if found == 0 {
|
||||
@ -125,73 +196,184 @@ static box MacroBoxSpec {
|
||||
local oj = 0
|
||||
local pos_c = -1
|
||||
loop(oj + t_cond.length() <= obj.length()) {
|
||||
if obj.substring(oj, oj + t_cond.length()) == t_cond { pos_c = oj + t_cond.length(); break }
|
||||
if obj.substring(oj, oj + t_cond.length()) == t_cond {
|
||||
pos_c = oj + t_cond.length()
|
||||
break
|
||||
}
|
||||
oj = oj + 1
|
||||
}
|
||||
local pos_b = -1
|
||||
local kk = 0
|
||||
loop(kk + t_body.length() <= obj.length()) {
|
||||
if obj.substring(kk, kk + t_body.length()) == t_body { pos_b = kk + t_body.length(); break }
|
||||
if obj.substring(kk, kk + t_body.length()) == t_body {
|
||||
pos_b = kk + t_body.length()
|
||||
break
|
||||
}
|
||||
kk = kk + 1
|
||||
}
|
||||
if pos_c >= 0 && pos_b >= 0 {
|
||||
// extract values
|
||||
local cond_pair = parse_value(obj, pos_c)
|
||||
local cond_json = cond_pair.get(0)
|
||||
local cond_json = pair_json(cond_pair)
|
||||
// move after condition to find body array
|
||||
// ensure we re-scan from pos_b to robustly pick body
|
||||
local body_pair = parse_value(obj, pos_b)
|
||||
local body_json = body_pair.get(0)
|
||||
local body_json = pair_json(body_pair)
|
||||
// if body_json is not array, keep identity
|
||||
if body_json.substring(0,1) == "[" {
|
||||
// decompose body array into elements
|
||||
local elems = []
|
||||
// strip [ ... ]
|
||||
// Reorder body: move Assignment nodes to the tail (carrier-like normalization)
|
||||
local inner = body_json.substring(1, body_json.length()-1)
|
||||
// split top-level JSON elements (respect nesting)
|
||||
local p = 0
|
||||
local n = inner.length()
|
||||
local in_str = false
|
||||
local depth_obj = 0
|
||||
local depth_arr = 0
|
||||
local start = 0
|
||||
loop(p < n) {
|
||||
local c = inner.substring(p, p+1)
|
||||
if c == "\"" {
|
||||
// toggle string unless escaped
|
||||
local k2 = p - 1
|
||||
local elems = []
|
||||
local p2 = 0
|
||||
local n2 = inner.length()
|
||||
local in_str2 = false
|
||||
local depth_obj2 = 0
|
||||
local depth_arr2 = 0
|
||||
local start2 = 0
|
||||
loop(p2 < n2) {
|
||||
local c2 = inner.substring(p2, p2+1)
|
||||
if c2 == "\"" {
|
||||
local k2 = p2 - 1
|
||||
local esc2 = false
|
||||
if k2 >= 0 && inner.substring(k2, k2+1) == "\\" { esc2 = true }
|
||||
if !esc2 { in_str = !in_str }
|
||||
} else if !in_str {
|
||||
if c == "{" { depth_obj = depth_obj + 1 }
|
||||
else if c == "}" { depth_obj = depth_obj - 1 }
|
||||
else if c == "[" { depth_arr = depth_arr + 1 }
|
||||
else if c == "]" { depth_arr = depth_arr - 1 }
|
||||
else if c == "," && depth_obj == 0 && depth_arr == 0 {
|
||||
elems.push(inner.substring(start, p))
|
||||
start = p + 1
|
||||
if not esc2 { in_str2 = not in_str2 }
|
||||
} else if not in_str2 {
|
||||
if c2 == "{" { depth_obj2 = depth_obj2 + 1 }
|
||||
if c2 == "}" { depth_obj2 = depth_obj2 - 1 }
|
||||
if c2 == "[" { depth_arr2 = depth_arr2 + 1 }
|
||||
if c2 == "]" { depth_arr2 = depth_arr2 - 1 }
|
||||
if c2 == "," && depth_obj2 == 0 && depth_arr2 == 0 {
|
||||
elems.push(inner.substring(start2, p2))
|
||||
start2 = p2 + 1
|
||||
}
|
||||
}
|
||||
p = p + 1
|
||||
p2 = p2 + 1
|
||||
}
|
||||
if start < n { elems.push(inner.substring(start, n)) }
|
||||
if start2 < n2 { elems.push(inner.substring(start2, n2)) }
|
||||
|
||||
// trim spaces of elements (simple)
|
||||
// Classify with original indices
|
||||
local assigns = [] // list of [idx,json]
|
||||
local others = [] // list of [idx,json]
|
||||
local tagA = "\"kind\":\"Assignment\""
|
||||
local t = 0
|
||||
loop(t < elems.length()) {
|
||||
local e = elems.get(t)
|
||||
// naive trim
|
||||
local e2 = elems.get(t)
|
||||
// trim
|
||||
local a = 0
|
||||
local b = e.length()
|
||||
loop(a < b && (e.substring(a,a+1)==" " || e.substring(a,a+1)=="\n" || e.substring(a,a+1)=="\t" || e.substring(a,a+1)=="\r")) { a = a + 1 }
|
||||
loop(b > a && (e.substring(b-1,b)==" " || e.substring(b-1,b)=="\n" || e.substring(b-1,b)=="\t" || e.substring(b-1,b)=="\r")) { b = b - 1 }
|
||||
elems.set(t, e.substring(a,b))
|
||||
local b = e2.length()
|
||||
loop(a < b && (e2.substring(a,a+1)==" " || e2.substring(a,a+1)=="\n" || e2.substring(a,a+1)=="\t" || e2.substring(a,a+1)=="\r")) { a = a + 1 }
|
||||
loop(b > a && (e2.substring(b-1,b)==" " || e2.substring(b-1,b)=="\n" || e2.substring(b-1,b)=="\t" || e2.substring(b-1,b)=="\r")) { b = b - 1 }
|
||||
e2 = e2.substring(a,b)
|
||||
// contains tagA?
|
||||
local found = 0
|
||||
local q = 0
|
||||
loop(q + tagA.length() <= e2.length()) {
|
||||
if e2.substring(q, q + tagA.length()) == tagA {
|
||||
found = 1
|
||||
break
|
||||
}
|
||||
q = q + 1
|
||||
}
|
||||
if found == 1 { assigns.push([t, e2]) } else { others.push([t, e2]) }
|
||||
t = t + 1
|
||||
}
|
||||
|
||||
// rebuild Loop via JsonBuilder (canonical key order)
|
||||
local loop_norm = JB.loop_(cond_json, elems)
|
||||
// Only reorder when all others appear before all assigns in the original order
|
||||
local ok = 1
|
||||
if assigns.length() > 0 && others.length() > 0 {
|
||||
// max index of others, min index of assigns
|
||||
local max_o = others.get(0).get(0)
|
||||
local i2 = 1
|
||||
loop(i2 < others.length()) { if others.get(i2).get(0) > max_o { max_o = others.get(i2).get(0) } i2 = i2 + 1 }
|
||||
local min_a = assigns.get(0).get(0)
|
||||
i2 = 1
|
||||
loop(i2 < assigns.length()) { if assigns.get(i2).get(0) < min_a { min_a = assigns.get(i2).get(0) } i2 = i2 + 1 }
|
||||
if not (max_o <= min_a) { ok = 0 }
|
||||
}
|
||||
|
||||
// MVP-2 gate: skip when Break/Continue exists (conservative)
|
||||
if ok == 1 {
|
||||
local has_ctrl = 0
|
||||
local tagBr = "\"kind\":\"Break\""
|
||||
local tagCt = "\"kind\":\"Continue\""
|
||||
t = 0
|
||||
loop(t < elems.length()) {
|
||||
local e3 = elems.get(t)
|
||||
// cheap contains
|
||||
local p = 0
|
||||
loop(p + tagBr.length() <= e3.length()) {
|
||||
if e3.substring(p, p + tagBr.length()) == tagBr { has_ctrl = 1; break }
|
||||
p = p + 1
|
||||
}
|
||||
if has_ctrl == 0 {
|
||||
p = 0
|
||||
loop(p + tagCt.length() <= e3.length()) {
|
||||
if e3.substring(p, p + tagCt.length()) == tagCt { has_ctrl = 1; break }
|
||||
p = p + 1
|
||||
}
|
||||
}
|
||||
if has_ctrl == 1 { break }
|
||||
t = t + 1
|
||||
}
|
||||
if has_ctrl == 1 { ok = 0 }
|
||||
}
|
||||
|
||||
// MVP-2 gate: allow up to 2 unique assignment targets; else keep original
|
||||
if ok == 1 {
|
||||
local uniq = []
|
||||
t = 0
|
||||
local too_many = 0
|
||||
loop(t < assigns.length()) {
|
||||
local aj = assigns.get(t).get(1)
|
||||
local nm = extract_assign_target_var(aj)
|
||||
if nm == "" {
|
||||
// unknown structure → conservative: abort reorder
|
||||
too_many = 1
|
||||
break
|
||||
}
|
||||
// check if nm already recorded
|
||||
local seen = 0
|
||||
local u = 0
|
||||
loop(u < uniq.length()) {
|
||||
if uniq.get(u) == nm { seen = 1; break }
|
||||
u = u + 1
|
||||
}
|
||||
if seen == 0 { uniq.push(nm) }
|
||||
if uniq.length() > 2 { too_many = 1; break }
|
||||
t = t + 1
|
||||
}
|
||||
if too_many == 1 { ok = 0 }
|
||||
}
|
||||
|
||||
// Rebuild body (others then assigns) only when ok; otherwise keep original
|
||||
local body_new = "["
|
||||
local first = 1
|
||||
t = 0
|
||||
if ok == 1 {
|
||||
loop(t < others.length()) {
|
||||
if first == 1 { first = 0 } else { body_new = body_new + "," }
|
||||
body_new = body_new + others.get(t).get(1)
|
||||
t = t + 1
|
||||
}
|
||||
t = 0
|
||||
loop(t < assigns.length()) {
|
||||
if first == 1 { first = 0 } else { body_new = body_new + "," }
|
||||
body_new = body_new + assigns.get(t).get(1)
|
||||
t = t + 1
|
||||
}
|
||||
} else {
|
||||
// keep original order
|
||||
local back = 0
|
||||
loop(back < elems.length()) {
|
||||
if first == 1 { first = 0 } else { body_new = body_new + "," }
|
||||
body_new = body_new + elems.get(back)
|
||||
back = back + 1
|
||||
}
|
||||
}
|
||||
body_new = body_new + "]"
|
||||
|
||||
// rebuild Loop string directly (canonical key order: condition, body)
|
||||
local loop_norm = "{\"kind\":\"Loop\",\"condition\":" + cond_json + ",\"body\":" + body_new + "}"
|
||||
out = out + loop_norm
|
||||
i = endi
|
||||
continue
|
||||
|
||||
8
apps/tests/macro_golden_loop_two_vars.nyash
Normal file
8
apps/tests/macro_golden_loop_two_vars.nyash
Normal file
@ -0,0 +1,8 @@
|
||||
local i = 0
|
||||
local sum = 0
|
||||
loop(i < 3) {
|
||||
print(i)
|
||||
sum = sum + i
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
10
tools/test/golden/macro/loop_two_vars.expanded.json
Normal file
10
tools/test/golden/macro/loop_two_vars.expanded.json
Normal file
@ -0,0 +1,10 @@
|
||||
{"kind":"Program","statements":[
|
||||
{"kind":"Local","variables":["i"],"inits":[{"kind":"Literal","value":{"type":"int","value":0}}]},
|
||||
{"kind":"Local","variables":["sum"],"inits":[{"kind":"Literal","value":{"type":"int","value":0}}]},
|
||||
{"kind":"Loop","condition":{"kind":"BinaryOp","op":"<","left":{"kind":"Variable","name":"i"},"right":{"kind":"Literal","value":{"type":"int","value":3}}},"body":[
|
||||
{"kind":"Print","expression":{"kind":"Variable","name":"i"}},
|
||||
{"kind":"Assignment","target":{"kind":"Variable","name":"sum"},"value":{"kind":"BinaryOp","op":"+","left":{"kind":"Variable","name":"sum"},"right":{"kind":"Variable","name":"i"}}},
|
||||
{"kind":"Assignment","target":{"kind":"Variable","name":"i"},"value":{"kind":"BinaryOp","op":"+","left":{"kind":"Variable","name":"i"},"right":{"kind":"Literal","value":{"type":"int","value":1}}}}
|
||||
]}
|
||||
]}
|
||||
|
||||
31
tools/test/golden/macro/loop_two_vars_user_macro_golden.sh
Normal file
31
tools/test/golden/macro/loop_two_vars_user_macro_golden.sh
Normal file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
root=$(cd "$(dirname "$0")"/../../../.. && pwd)
|
||||
bin="$root/target/release/nyash"
|
||||
src="apps/tests/macro_golden_loop_two_vars.nyash"
|
||||
golden="$root/tools/test/golden/macro/loop_two_vars.expanded.json"
|
||||
|
||||
if [ ! -x "$bin" ]; then
|
||||
echo "nyash binary not found at $bin; build first (cargo build --release)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export NYASH_MACRO_ENABLE=1
|
||||
export NYASH_MACRO_PATHS="apps/macros/examples/loop_normalize_macro.nyash"
|
||||
|
||||
normalize_json() {
|
||||
python3 -c 'import sys,json; print(json.dumps(json.loads(sys.stdin.read()), sort_keys=True, separators=(",",":")))'
|
||||
}
|
||||
|
||||
out_raw=$("$bin" --dump-expanded-ast-json "$src")
|
||||
out_norm=$(printf '%s' "$out_raw" | normalize_json)
|
||||
gold_norm=$(normalize_json < "$golden")
|
||||
|
||||
if [ "$out_norm" != "$gold_norm" ]; then
|
||||
echo "Golden mismatch (loop two vars normalization)" >&2
|
||||
diff -u <(echo "$out_norm") <(echo "$gold_norm") || true
|
||||
exit 2
|
||||
fi
|
||||
|
||||
echo "[OK] golden loop two vars normalization matched"
|
||||
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
root=$(cd "$(dirname \"$0\")"/../../../.. && pwd)
|
||||
bin="$root/target/release/nyash"
|
||||
src="apps/tests/macro_golden_loop_two_vars.nyash"
|
||||
|
||||
if [ ! -x "$bin" ]; then
|
||||
echo "nyash binary not found at $bin; build first (cargo build --release)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Enable user macro (loop normalize) and macro engine
|
||||
export NYASH_MACRO_ENABLE=1
|
||||
export NYASH_MACRO_PATHS="apps/macros/examples/loop_normalize_macro.nyash"
|
||||
|
||||
# Selfhost pre-expand: default auto (no explicit env); requires PyVM
|
||||
export NYASH_USE_NY_COMPILER=1
|
||||
export NYASH_VM_USE_PY=1
|
||||
|
||||
# Verbose to assert pre-expand path engagement
|
||||
export NYASH_CLI_VERBOSE=1
|
||||
|
||||
out=$("$bin" --backend vm "$src" 2>&1 || true)
|
||||
|
||||
echo "$out" | rg -q "selfhost macro pre-expand: engaging" && echo "[OK] selfhost pre-expand (loop two vars, auto) engaged" && exit 0
|
||||
|
||||
echo "[WARN] selfhost pre-expand auto did not engage; printing logs:" >&2
|
||||
echo "$out" >&2
|
||||
exit 2
|
||||
|
||||
Reference in New Issue
Block a user