diff --git a/apps/macros/examples/loop_normalize_macro.nyash b/apps/macros/examples/loop_normalize_macro.nyash index eaf4175a..9625fd88 100644 --- a/apps/macros/examples/loop_normalize_macro.nyash +++ b/apps/macros/examples/loop_normalize_macro.nyash @@ -3,9 +3,9 @@ // Next steps: normalize `loop(cond){ body }` into carrier-based LoopForm. static box MacroBoxSpec { - static function name() { return "LoopNormalize" } + name() { return "LoopNormalize" } - static function expand(json, ctx) { + expand(json, ctx) { // MVP normalizer: detect Loop nodes with canonical key order // "kind":"Loop","condition":,"body":[ ... ] and rewrite them // into a normalized form using JsonBuilder (keys ordered as condition/body). @@ -17,21 +17,27 @@ static box MacroBoxSpec { local out = "" local i = 0 - // parse a JSON string starting at i (supports objects, arrays, strings, numbers, true/false/null) + // parse a JSON value starting at i and return "#" function parse_value(s, i) { local n = s.length() - if i >= n { return ["", i] } + if i >= n { return ("" + i) + "#" + "" } local ch = s.substring(i, i+1) // string if ch == "\"" { local j = i + 1 loop(j < n) { local c = s.substring(j, j+1) - if c == "\\" { j = j + 2; continue } - if c == "\"" { j = j + 1; break } + if c == "\\" { + j = j + 2 + continue + } + if c == "\"" { + j = j + 1 + break + } j = j + 1 } - return [s.substring(i, j), j] + return ("" + j) + "#" + s.substring(i, j) } // object if ch == "{" { @@ -45,17 +51,17 @@ static box MacroBoxSpec { local k = j - 1 local esc = false if k >= 0 && s.substring(k, k+1) == "\\" { esc = true } - if !esc { in_str = !in_str } + if not esc { in_str = not in_str } j = j + 1 continue } - if !in_str { + if not in_str { if c == "{" { depth = depth + 1 } - else if c == "}" { depth = depth - 1 } + if c == "}" { depth = depth - 1 } } j = j + 1 } - return [s.substring(i, j), j] + return ("" + j) + "#" + s.substring(i, j) } // array if ch == "[" { @@ -68,17 +74,17 @@ static box MacroBoxSpec { local k = j - 1 local esc = false if k >= 0 && s.substring(k, k+1) == "\\" { esc = true } - if !esc { in_str = !in_str } + if not esc { in_str = not in_str } j = j + 1 continue } - if !in_str { + if not in_str { if c == "[" { depth = depth + 1 } - else if c == "]" { depth = depth - 1 } + if c == "]" { depth = depth - 1 } } j = j + 1 } - return [s.substring(i, j), j] + return ("" + j) + "#" + s.substring(i, j) } // number/true/false/null: read until delimiter local j = i @@ -87,7 +93,71 @@ static box MacroBoxSpec { if c == "," || c == "]" || c == "}" || c == "\n" || c == "\r" || c == "\t" || c == " " { break } j = j + 1 } - return [s.substring(i, j), j] + return ("" + j) + "#" + s.substring(i, j) + } + + function pair_idx(pair) { + // parse decimal at start until '#' + local i = 0 + local n = pair.length() + local val = 0 + loop(i < n) { + local ch = pair.substring(i, i+1) + if ch == "#" { break } + local d = 0 + if ch == "0" { d = 0 } + if ch == "1" { d = 1 } + if ch == "2" { d = 2 } + if ch == "3" { d = 3 } + if ch == "4" { d = 4 } + if ch == "5" { d = 5 } + if ch == "6" { d = 6 } + if ch == "7" { d = 7 } + if ch == "8" { d = 8 } + if ch == "9" { d = 9 } + val = val * 10 + d + i = i + 1 + } + return val + } + + function pair_json(pair) { + // substring after first '#' + local i = 0 + local n = pair.length() + loop(i < n) { + if pair.substring(i, i+1) == "#" { return pair.substring(i+1, n) } + i = i + 1 + } + return "" + } + + // Extract assignment target variable name from an Assignment JSON object string. + // Returns "" if not found. + function extract_assign_target_var(e2) { + // pattern: "target":{"kind":"Variable","name":""} + local pat = "\"target\":{\"kind\":\"Variable\",\"name\":\"" + local pos = -1 + local j = 0 + loop(j + pat.length() <= e2.length()) { + if e2.substring(j, j + pat.length()) == pat { pos = j + pat.length(); break } + j = j + 1 + } + if pos < 0 { return "" } + // read until next unescaped quote + local name = "" + local k = pos + loop(k < e2.length()) { + local c = e2.substring(k, k+1) + if c == "\\" { + // skip escape and next char + if k + 1 < e2.length() { name = name + e2.substring(k, k+2); k = k + 2; continue } + } + if c == "\"" { break } + name = name + c + k = k + 1 + } + return name } // pattern tokens @@ -100,17 +170,18 @@ static box MacroBoxSpec { if i + 6 < s.length() && s.substring(i, i+1) == "{" { // look ahead inside this object to see if it begins with kind:Loop local val = parse_value(s, i) - local obj = val.get(0) - local endi = val.get(1) + local endi = pair_idx(val) + local obj = pair_json(val) - // quick check: contains kind:"Loop" - local pos_kind = obj.indexOf(t_kind_loop) // assume Nyash has indexOf? If not, manual scan fallback below - if pos_kind == null { - // Fallback manual contains + // quick check: contains kind:"Loop" (manual scan) + { local found = 0 local k = 0 loop(k + t_kind_loop.length() <= obj.length()) { - if obj.substring(k, k + t_kind_loop.length()) == t_kind_loop { found = 1; break } + if obj.substring(k, k + t_kind_loop.length()) == t_kind_loop { + found = 1 + break + } k = k + 1 } if found == 0 { @@ -125,73 +196,184 @@ static box MacroBoxSpec { local oj = 0 local pos_c = -1 loop(oj + t_cond.length() <= obj.length()) { - if obj.substring(oj, oj + t_cond.length()) == t_cond { pos_c = oj + t_cond.length(); break } + if obj.substring(oj, oj + t_cond.length()) == t_cond { + pos_c = oj + t_cond.length() + break + } oj = oj + 1 } local pos_b = -1 local kk = 0 loop(kk + t_body.length() <= obj.length()) { - if obj.substring(kk, kk + t_body.length()) == t_body { pos_b = kk + t_body.length(); break } + if obj.substring(kk, kk + t_body.length()) == t_body { + pos_b = kk + t_body.length() + break + } kk = kk + 1 } if pos_c >= 0 && pos_b >= 0 { // extract values local cond_pair = parse_value(obj, pos_c) - local cond_json = cond_pair.get(0) + local cond_json = pair_json(cond_pair) // move after condition to find body array // ensure we re-scan from pos_b to robustly pick body local body_pair = parse_value(obj, pos_b) - local body_json = body_pair.get(0) + local body_json = pair_json(body_pair) // if body_json is not array, keep identity if body_json.substring(0,1) == "[" { - // decompose body array into elements - local elems = [] - // strip [ ... ] + // Reorder body: move Assignment nodes to the tail (carrier-like normalization) local inner = body_json.substring(1, body_json.length()-1) - // split top-level JSON elements (respect nesting) - local p = 0 - local n = inner.length() - local in_str = false - local depth_obj = 0 - local depth_arr = 0 - local start = 0 - loop(p < n) { - local c = inner.substring(p, p+1) - if c == "\"" { - // toggle string unless escaped - local k2 = p - 1 + local elems = [] + local p2 = 0 + local n2 = inner.length() + local in_str2 = false + local depth_obj2 = 0 + local depth_arr2 = 0 + local start2 = 0 + loop(p2 < n2) { + local c2 = inner.substring(p2, p2+1) + if c2 == "\"" { + local k2 = p2 - 1 local esc2 = false if k2 >= 0 && inner.substring(k2, k2+1) == "\\" { esc2 = true } - if !esc2 { in_str = !in_str } - } else if !in_str { - if c == "{" { depth_obj = depth_obj + 1 } - else if c == "}" { depth_obj = depth_obj - 1 } - else if c == "[" { depth_arr = depth_arr + 1 } - else if c == "]" { depth_arr = depth_arr - 1 } - else if c == "," && depth_obj == 0 && depth_arr == 0 { - elems.push(inner.substring(start, p)) - start = p + 1 + if not esc2 { in_str2 = not in_str2 } + } else if not in_str2 { + if c2 == "{" { depth_obj2 = depth_obj2 + 1 } + if c2 == "}" { depth_obj2 = depth_obj2 - 1 } + if c2 == "[" { depth_arr2 = depth_arr2 + 1 } + if c2 == "]" { depth_arr2 = depth_arr2 - 1 } + if c2 == "," && depth_obj2 == 0 && depth_arr2 == 0 { + elems.push(inner.substring(start2, p2)) + start2 = p2 + 1 } } - p = p + 1 + p2 = p2 + 1 } - if start < n { elems.push(inner.substring(start, n)) } + if start2 < n2 { elems.push(inner.substring(start2, n2)) } - // trim spaces of elements (simple) + // Classify with original indices + local assigns = [] // list of [idx,json] + local others = [] // list of [idx,json] + local tagA = "\"kind\":\"Assignment\"" local t = 0 loop(t < elems.length()) { - local e = elems.get(t) - // naive trim + local e2 = elems.get(t) + // trim local a = 0 - local b = e.length() - loop(a < b && (e.substring(a,a+1)==" " || e.substring(a,a+1)=="\n" || e.substring(a,a+1)=="\t" || e.substring(a,a+1)=="\r")) { a = a + 1 } - loop(b > a && (e.substring(b-1,b)==" " || e.substring(b-1,b)=="\n" || e.substring(b-1,b)=="\t" || e.substring(b-1,b)=="\r")) { b = b - 1 } - elems.set(t, e.substring(a,b)) + local b = e2.length() + loop(a < b && (e2.substring(a,a+1)==" " || e2.substring(a,a+1)=="\n" || e2.substring(a,a+1)=="\t" || e2.substring(a,a+1)=="\r")) { a = a + 1 } + loop(b > a && (e2.substring(b-1,b)==" " || e2.substring(b-1,b)=="\n" || e2.substring(b-1,b)=="\t" || e2.substring(b-1,b)=="\r")) { b = b - 1 } + e2 = e2.substring(a,b) + // contains tagA? + local found = 0 + local q = 0 + loop(q + tagA.length() <= e2.length()) { + if e2.substring(q, q + tagA.length()) == tagA { + found = 1 + break + } + q = q + 1 + } + if found == 1 { assigns.push([t, e2]) } else { others.push([t, e2]) } t = t + 1 } - // rebuild Loop via JsonBuilder (canonical key order) - local loop_norm = JB.loop_(cond_json, elems) + // Only reorder when all others appear before all assigns in the original order + local ok = 1 + if assigns.length() > 0 && others.length() > 0 { + // max index of others, min index of assigns + local max_o = others.get(0).get(0) + local i2 = 1 + loop(i2 < others.length()) { if others.get(i2).get(0) > max_o { max_o = others.get(i2).get(0) } i2 = i2 + 1 } + local min_a = assigns.get(0).get(0) + i2 = 1 + loop(i2 < assigns.length()) { if assigns.get(i2).get(0) < min_a { min_a = assigns.get(i2).get(0) } i2 = i2 + 1 } + if not (max_o <= min_a) { ok = 0 } + } + + // MVP-2 gate: skip when Break/Continue exists (conservative) + if ok == 1 { + local has_ctrl = 0 + local tagBr = "\"kind\":\"Break\"" + local tagCt = "\"kind\":\"Continue\"" + t = 0 + loop(t < elems.length()) { + local e3 = elems.get(t) + // cheap contains + local p = 0 + loop(p + tagBr.length() <= e3.length()) { + if e3.substring(p, p + tagBr.length()) == tagBr { has_ctrl = 1; break } + p = p + 1 + } + if has_ctrl == 0 { + p = 0 + loop(p + tagCt.length() <= e3.length()) { + if e3.substring(p, p + tagCt.length()) == tagCt { has_ctrl = 1; break } + p = p + 1 + } + } + if has_ctrl == 1 { break } + t = t + 1 + } + if has_ctrl == 1 { ok = 0 } + } + + // MVP-2 gate: allow up to 2 unique assignment targets; else keep original + if ok == 1 { + local uniq = [] + t = 0 + local too_many = 0 + loop(t < assigns.length()) { + local aj = assigns.get(t).get(1) + local nm = extract_assign_target_var(aj) + if nm == "" { + // unknown structure → conservative: abort reorder + too_many = 1 + break + } + // check if nm already recorded + local seen = 0 + local u = 0 + loop(u < uniq.length()) { + if uniq.get(u) == nm { seen = 1; break } + u = u + 1 + } + if seen == 0 { uniq.push(nm) } + if uniq.length() > 2 { too_many = 1; break } + t = t + 1 + } + if too_many == 1 { ok = 0 } + } + + // Rebuild body (others then assigns) only when ok; otherwise keep original + local body_new = "[" + local first = 1 + t = 0 + if ok == 1 { + loop(t < others.length()) { + if first == 1 { first = 0 } else { body_new = body_new + "," } + body_new = body_new + others.get(t).get(1) + t = t + 1 + } + t = 0 + loop(t < assigns.length()) { + if first == 1 { first = 0 } else { body_new = body_new + "," } + body_new = body_new + assigns.get(t).get(1) + t = t + 1 + } + } else { + // keep original order + local back = 0 + loop(back < elems.length()) { + if first == 1 { first = 0 } else { body_new = body_new + "," } + body_new = body_new + elems.get(back) + back = back + 1 + } + } + body_new = body_new + "]" + + // rebuild Loop string directly (canonical key order: condition, body) + local loop_norm = "{\"kind\":\"Loop\",\"condition\":" + cond_json + ",\"body\":" + body_new + "}" out = out + loop_norm i = endi continue diff --git a/apps/tests/macro_golden_loop_two_vars.nyash b/apps/tests/macro_golden_loop_two_vars.nyash new file mode 100644 index 00000000..2c533317 --- /dev/null +++ b/apps/tests/macro_golden_loop_two_vars.nyash @@ -0,0 +1,8 @@ +local i = 0 +local sum = 0 +loop(i < 3) { + print(i) + sum = sum + i + i = i + 1 +} + diff --git a/tools/test/golden/macro/loop_two_vars.expanded.json b/tools/test/golden/macro/loop_two_vars.expanded.json new file mode 100644 index 00000000..89c08a50 --- /dev/null +++ b/tools/test/golden/macro/loop_two_vars.expanded.json @@ -0,0 +1,10 @@ +{"kind":"Program","statements":[ + {"kind":"Local","variables":["i"],"inits":[{"kind":"Literal","value":{"type":"int","value":0}}]}, + {"kind":"Local","variables":["sum"],"inits":[{"kind":"Literal","value":{"type":"int","value":0}}]}, + {"kind":"Loop","condition":{"kind":"BinaryOp","op":"<","left":{"kind":"Variable","name":"i"},"right":{"kind":"Literal","value":{"type":"int","value":3}}},"body":[ + {"kind":"Print","expression":{"kind":"Variable","name":"i"}}, + {"kind":"Assignment","target":{"kind":"Variable","name":"sum"},"value":{"kind":"BinaryOp","op":"+","left":{"kind":"Variable","name":"sum"},"right":{"kind":"Variable","name":"i"}}}, + {"kind":"Assignment","target":{"kind":"Variable","name":"i"},"value":{"kind":"BinaryOp","op":"+","left":{"kind":"Variable","name":"i"},"right":{"kind":"Literal","value":{"type":"int","value":1}}}} + ]} +]} + diff --git a/tools/test/golden/macro/loop_two_vars_user_macro_golden.sh b/tools/test/golden/macro/loop_two_vars_user_macro_golden.sh new file mode 100644 index 00000000..8abe0105 --- /dev/null +++ b/tools/test/golden/macro/loop_two_vars_user_macro_golden.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +root=$(cd "$(dirname "$0")"/../../../.. && pwd) +bin="$root/target/release/nyash" +src="apps/tests/macro_golden_loop_two_vars.nyash" +golden="$root/tools/test/golden/macro/loop_two_vars.expanded.json" + +if [ ! -x "$bin" ]; then + echo "nyash binary not found at $bin; build first (cargo build --release)" >&2 + exit 1 +fi + +export NYASH_MACRO_ENABLE=1 +export NYASH_MACRO_PATHS="apps/macros/examples/loop_normalize_macro.nyash" + +normalize_json() { + python3 -c 'import sys,json; print(json.dumps(json.loads(sys.stdin.read()), sort_keys=True, separators=(",",":")))' +} + +out_raw=$("$bin" --dump-expanded-ast-json "$src") +out_norm=$(printf '%s' "$out_raw" | normalize_json) +gold_norm=$(normalize_json < "$golden") + +if [ "$out_norm" != "$gold_norm" ]; then + echo "Golden mismatch (loop two vars normalization)" >&2 + diff -u <(echo "$out_norm") <(echo "$gold_norm") || true + exit 2 +fi + +echo "[OK] golden loop two vars normalization matched" diff --git a/tools/test/smoke/macro/selfhost_preexpand_loop_two_vars_auto.sh b/tools/test/smoke/macro/selfhost_preexpand_loop_two_vars_auto.sh new file mode 100644 index 00000000..b4dcc99d --- /dev/null +++ b/tools/test/smoke/macro/selfhost_preexpand_loop_two_vars_auto.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +root=$(cd "$(dirname \"$0\")"/../../../.. && pwd) +bin="$root/target/release/nyash" +src="apps/tests/macro_golden_loop_two_vars.nyash" + +if [ ! -x "$bin" ]; then + echo "nyash binary not found at $bin; build first (cargo build --release)" >&2 + exit 1 +fi + +# Enable user macro (loop normalize) and macro engine +export NYASH_MACRO_ENABLE=1 +export NYASH_MACRO_PATHS="apps/macros/examples/loop_normalize_macro.nyash" + +# Selfhost pre-expand: default auto (no explicit env); requires PyVM +export NYASH_USE_NY_COMPILER=1 +export NYASH_VM_USE_PY=1 + +# Verbose to assert pre-expand path engagement +export NYASH_CLI_VERBOSE=1 + +out=$("$bin" --backend vm "$src" 2>&1 || true) + +echo "$out" | rg -q "selfhost macro pre-expand: engaging" && echo "[OK] selfhost pre-expand (loop two vars, auto) engaged" && exit 0 + +echo "[WARN] selfhost pre-expand auto did not engage; printing logs:" >&2 +echo "$out" >&2 +exit 2 +