using+pyvm: fix inlining seam and brace guard; stabilize MiniVm.collect_prints unknown-skip; harden MiniVmPrints int/binop scan; add seam-combiner diagnostics (default OFF); green self-contained + progress on using-mixed

This commit is contained in:
Selfhosting Dev
2025-09-22 03:45:36 +09:00
parent c8063c9e41
commit c9a0333c38
6 changed files with 1229 additions and 123 deletions

390
tools/using_combine.py Normal file
View File

@ -0,0 +1,390 @@
#!/usr/bin/env python3
import sys, os, argparse
def read_text(path: str) -> str:
with open(path, 'r', encoding='utf-8') as f:
return f.read()
def load_toml_modules(toml_path: str):
modules = {}
using_paths = []
try:
txt = read_text(toml_path)
except Exception:
return modules, using_paths
section = None
for line in txt.splitlines():
s = line.strip()
if not s:
continue
if s.startswith('[') and s.endswith(']'):
section = s[1:-1].strip()
continue
if section == 'modules':
# dotted key: a.b.c = "path"
if '=' in s:
k, v = s.split('=', 1)
key = k.strip()
val = v.strip().strip('"')
if key and val:
modules[key] = val
if section == 'using':
# paths = ["apps", "lib", "."]
if s.startswith('paths') and '=' in s:
_, arr = s.split('=', 1)
arr = arr.strip()
if arr.startswith('[') and arr.endswith(']'):
body = arr[1:-1]
parts = [p.strip().strip('"') for p in body.split(',') if p.strip()]
using_paths = [p for p in parts if p]
return modules, using_paths
def brace_delta_ignoring_strings(text: str) -> int:
i = 0
n = len(text)
delta = 0
in_str = False
in_sl = False
in_ml = False
while i < n:
ch = text[i]
# single-line comment
if in_sl:
if ch == '\n':
in_sl = False
i += 1
continue
# multi-line comment
if in_ml:
if ch == '*' and i + 1 < n and text[i + 1] == '/':
in_ml = False
i += 2
continue
i += 1
continue
# string
if in_str:
if ch == '\\':
i += 2
continue
if ch == '"':
in_str = False
i += 1
continue
i += 1
continue
# enter states
if ch == '"':
in_str = True
i += 1
continue
if ch == '/' and i + 1 < n and text[i + 1] == '/':
in_sl = True
i += 2
continue
if ch == '/' and i + 1 < n and text[i + 1] == '*':
in_ml = True
i += 2
continue
# count braces
if ch == '{':
delta += 1
elif ch == '}':
delta -= 1
i += 1
return delta
def find_balanced(text: str, start: int, open_ch: str, close_ch: str) -> int:
i = start
n = len(text)
if i >= n or text[i] != open_ch:
return -1
depth = 0
while i < n:
ch = text[i]
if ch == '"':
i += 1
while i < n:
c = text[i]
if c == '\\':
i += 2
continue
if c == '"':
i += 1
break
i += 1
continue
if ch == open_ch:
depth += 1
if ch == close_ch:
depth -= 1
if depth == 0:
return i
i += 1
return -1
def dedup_boxes(text: str) -> str:
seen = set()
out = []
i = 0
n = len(text)
tok = 'static box '
while i < n:
if text.startswith(tok, i):
j = i + len(tok)
# read identifier
name = []
while j < n and (text[j] == '_' or text[j].isalnum()):
name.append(text[j]); j += 1
# skip ws to '{'
while j < n and text[j].isspace():
j += 1
if j < n and text[j] == '{':
end = find_balanced(text, j, '{', '}')
if end < 0:
end = j
block = text[i:end+1]
nm = ''.join(name)
if nm in seen:
i = end + 1
continue
seen.add(nm)
out.append(block)
i = end + 1
continue
# default: copy one char
out.append(text[i])
i += 1
return ''.join(out)
def dedup_fn_prints_in_slice(text: str) -> str:
# limited: within static box MiniVmPrints, keep only first definition of print_prints_in_slice
out = []
i = 0
n = len(text)
tok = 'static box '
while i < n:
if text.startswith(tok, i):
j = i + len(tok)
name = []
while j < n and (text[j] == '_' or text[j].isalnum()):
name.append(text[j]); j += 1
while j < n and text[j].isspace():
j += 1
if j < n and text[j] == '{':
end = find_balanced(text, j, '{', '}')
if end < 0:
end = j
header = text[i:j+1]
body = text[j+1:end]
nm = ''.join(name)
if nm == 'MiniVmPrints':
kept = False
body_out = []
p = 0
m = len(body)
while p < m:
# find line start
ls = p
if ls > 0:
while ls < m and body[ls-1] != '\n':
ls += 1
if ls >= m:
break
# skip ws
q = ls
while q < m and body[q].isspace() and body[q] != '\n':
q += 1
rem = body[q:q+64]
if rem.startswith('print_prints_in_slice('):
# find def body
r = q
depth = 0
in_s = False
while r < m:
c = body[r]
if in_s:
if c == '\\':
r += 2; continue
if c == '"':
in_s = False
r += 1; continue
if c == '"':
in_s = True; r += 1; continue
if c == '(':
depth += 1; r += 1; continue
if c == ')':
depth -= 1; r += 1
if depth <= 0:
break
continue
r += 1
while r < m and body[r].isspace():
r += 1
if r < m and body[r] == '{':
t = r
d2 = 0
in_s2 = False
while t < m:
c2 = body[t]
if in_s2:
if c2 == '\\':
t += 2; continue
if c2 == '"':
in_s2 = False
t += 1; continue
if c2 == '"':
in_s2 = True; t += 1; continue
if c2 == '{':
d2 += 1
if c2 == '}':
d2 -= 1
if d2 == 0:
t += 1; break
t += 1
# start-of-line for pretty include
sol = q
while sol > 0 and body[sol-1] != '\n':
sol -= 1
if not kept:
body_out.append(body[sol:t])
kept = True
p = t
continue
# default copy this line
eol = ls
while eol < m and body[eol] != '\n':
eol += 1
body_out.append(body[ls:eol+1])
p = eol + 1
new_body = ''.join(body_out)
out.append(header)
out.append(new_body)
out.append('}')
i = end + 1
continue
# non-target box
out.append(text[i:end+1])
i = end + 1
continue
out.append(text[i]); i += 1
return ''.join(out)
def combine(entry: str, fix_braces: bool, dedup_box: bool, dedup_fn: bool, seam_debug: bool) -> str:
repo_root = os.getcwd()
modules, using_paths = load_toml_modules(os.path.join(repo_root, 'nyash.toml'))
visited = set()
def resolve_ns(ns: str) -> str | None:
if ns in modules:
return modules[ns]
# try using paths as base dirs
for base in using_paths or []:
cand = os.path.join(base, *(ns.split('.'))) + '.nyash'
if os.path.exists(cand):
return cand
return None
def strip_and_inline(path: str) -> str:
abspath = path
if not os.path.isabs(abspath):
abspath = os.path.abspath(path)
key = os.path.normpath(abspath)
if key in visited:
return ''
visited.add(key)
code = read_text(abspath)
out_lines = []
used = [] # list[(target, alias_or_path)] ; alias_or_path: None|alias|path
for line in code.splitlines():
t = line.lstrip()
if t.startswith('using '):
rest = t[len('using '):].strip()
if rest.endswith(';'):
rest = rest[:-1].strip()
if ' as ' in rest:
tgt, alias = rest.split(' as ', 1)
tgt = tgt.strip(); alias = alias.strip()
else:
tgt, alias = rest, None
# path vs ns
is_path = tgt.startswith('"') or tgt.startswith('./') or tgt.startswith('/') or tgt.endswith('.nyash')
if is_path:
path_tgt = tgt.strip('"')
name = alias or os.path.splitext(os.path.basename(path_tgt))[0]
used.append((name, path_tgt))
else:
used.append((tgt, alias))
continue
out_lines.append(line)
out = '\n'.join(out_lines) + '\n'
prelude = []
for ns, alias in used:
path_tgt = None
if alias is not None and (ns.startswith('/') or ns.startswith('./') or ns.endswith('.nyash')):
path_tgt = ns
else:
if alias is None:
# direct namespace
path_tgt = resolve_ns(ns)
else:
# alias ns
path_tgt = resolve_ns(ns)
if not path_tgt:
continue
# resolve relative to current file
if not os.path.isabs(path_tgt):
cand = os.path.join(os.path.dirname(abspath), path_tgt)
if os.path.exists(cand):
path_tgt = cand
if not os.path.exists(path_tgt):
continue
inlined = strip_and_inline(path_tgt)
if inlined:
prelude.append(inlined)
prelude_text = ''.join(prelude)
# seam debug
if seam_debug:
tail = prelude_text[-160:].replace('\n', '\\n')
head = out[:160].replace('\n', '\\n')
sys.stderr.write(f"[using][seam] prelude_tail=<<<{tail}>>>\n")
sys.stderr.write(f"[using][seam] body_head =<<<{head}>>>\n")
# seam join
prelude_clean = prelude_text.rstrip('\n\r')
combined = prelude_clean + '\n' + out
if fix_braces:
delta = brace_delta_ignoring_strings(prelude_clean)
if delta > 0:
sys.stderr.write(f"[using][seam] fix: appending {delta} '}}' before body\n")
combined = prelude_clean + '\n' + ('}\n' * delta) + out
# dedups
if dedup_box:
combined = dedup_boxes(combined)
if dedup_fn:
combined = dedup_fn_prints_in_slice(combined)
return combined
return strip_and_inline(entry)
def main():
ap = argparse.ArgumentParser()
ap.add_argument('--entry', required=True)
ap.add_argument('--fix-braces', action='store_true')
ap.add_argument('--dedup-box', action='store_true')
ap.add_argument('--dedup-fn', action='store_true')
ap.add_argument('--seam-debug', action='store_true')
args = ap.parse_args()
text = combine(args.entry, args.fix_braces, args.dedup_box, args.dedup_fn, args.seam_debug)
sys.stdout.write(text)
if __name__ == '__main__':
main()