selfhost/runtime: Stage 0-1 runner + MIR JSON loader (summary) with trace; compiler: scopebox/loopform prepass wiring (flags, child args); libs: add P1 standard boxes (console/string/array/map) as thin wrappers; runner: pass --box-pref via env; ops_calls dispatcher skeleton; docs: selfhost executor roadmap + scopebox/loopform notes; smokes: selfhost runner + identity prepasses; CURRENT_TASK: update plan and box lib schedule

This commit is contained in:
Selfhosting Dev
2025-09-22 21:52:39 +09:00
parent b00dc4ec37
commit da78fc174b
72 changed files with 3163 additions and 2557 deletions

View File

@ -0,0 +1,12 @@
/*!
* Using resolver utilities (split)
* - strip: remove `using` lines, inline modules, register aliases/modules
* - seam: seam logging and optional brace-fix at join points
*/
pub mod strip;
pub mod seam;
// Public re-exports to preserve existing call sites
pub use strip::{strip_using_and_register, preexpand_at_local};

View File

@ -0,0 +1,84 @@
/// Log tail of inlined prelude chunk for seam inspection.
pub fn log_inlined_tail(path_key: &str, inlined_text: &str, seam_dbg: bool) {
if !seam_dbg { return; }
let tail = inlined_text
.chars()
.rev()
.take(120)
.collect::<String>()
.chars()
.rev()
.collect::<String>();
eprintln!(
"[using][seam][inlined] {} tail=<<<{}>>>",
path_key,
tail.replace('\n', "\\n")
);
}
/// Log the seam between prelude and body for quick visual diff.
pub fn log_prelude_body_seam(prelude_clean: &str, body: &str, seam_dbg: bool) {
if !seam_dbg { return; }
let tail = prelude_clean
.chars()
.rev()
.take(160)
.collect::<String>()
.chars()
.rev()
.collect::<String>();
let head = body.chars().take(160).collect::<String>();
eprintln!("[using][seam] prelude_tail=<<<{}>>>", tail.replace('\n', "\\n"));
eprintln!("[using][seam] body_head =<<<{}>>>", head.replace('\n', "\\n"));
}
/// Apply optional seam safety: append missing '}' for unmatched '{' in prelude
/// When `trace` is true, emits a short note with delta count.
pub fn fix_prelude_braces_if_enabled(prelude_clean: &str, combined: &mut String, trace: bool) {
if std::env::var("NYASH_RESOLVE_FIX_BRACES").ok().as_deref() != Some("1") {
return;
}
// compute { } delta ignoring strings and comments
let mut delta: i32 = 0;
let mut it = prelude_clean.chars().peekable();
let mut in_str = false;
let mut in_sl = false;
let mut in_ml = false;
while let Some(c) = it.next() {
if in_sl {
if c == '\n' { in_sl = false; }
continue;
}
if in_ml {
if c == '*' {
if let Some('/') = it.peek().copied() {
it.next();
in_ml = false;
}
}
continue;
}
if in_str {
if c == '\\' { it.next(); continue; }
if c == '"' { in_str = false; }
continue;
}
if c == '"' { in_str = true; continue; }
if c == '/' {
match it.peek().copied() {
Some('/') => { in_sl = true; it.next(); continue; }
Some('*') => { in_ml = true; it.next(); continue; }
_ => {}
}
}
if c == '{' { delta += 1; }
if c == '}' { delta -= 1; }
}
if delta > 0 {
if trace { eprintln!("[using][seam] fix: appending {} '}}' before body", delta); }
for _ in 0..delta {
combined.push('}');
combined.push('\n');
}
}
}

View File

@ -19,8 +19,7 @@ pub fn strip_using_and_register(
let dedup_fn = std::env::var("NYASH_RESOLVE_DEDUP_FN").ok().as_deref() == Some("1");
let seam_dbg = std::env::var("NYASH_RESOLVE_SEAM_DEBUG").ok().as_deref() == Some("1");
let mut cmd = std::process::Command::new("python3");
cmd.arg("tools/using_combine.py")
.arg("--entry").arg(filename);
cmd.arg("tools/using_combine.py").arg("--entry").arg(filename);
if fix_braces { cmd.arg("--fix-braces"); }
if dedup_box { cmd.arg("--dedup-box"); }
if dedup_fn { cmd.arg("--dedup-fn"); }
@ -35,11 +34,10 @@ pub fn strip_using_and_register(
return Err(format!("using combiner failed: {}", err));
}
}
Err(e) => {
return Err(format!("using combiner spawn error: {}", e));
}
Err(e) => return Err(format!("using combiner spawn error: {}", e)),
}
}
fn strip_and_inline(
runner: &NyashRunner,
code: &str,
@ -57,21 +55,12 @@ pub fn strip_using_and_register(
let rest0 = rest0.strip_suffix(';').unwrap_or(rest0).trim();
let (target, alias) = if let Some(pos) = rest0.find(" as ") {
(rest0[..pos].trim().to_string(), Some(rest0[pos + 4..].trim().to_string()))
} else {
(rest0.to_string(), None)
};
let is_path = target.starts_with('"')
|| target.starts_with("./")
|| target.starts_with('/')
|| target.ends_with(".nyash");
} else { (rest0.to_string(), None) };
let is_path = target.starts_with('"') || target.starts_with("./") || target.starts_with('/') || target.ends_with(".nyash");
if is_path {
let path = target.trim_matches('"').to_string();
let name = alias.clone().unwrap_or_else(|| {
std::path::Path::new(&path)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("module")
.to_string()
std::path::Path::new(&path).file_stem().and_then(|s| s.to_str()).unwrap_or("module").to_string()
});
used.push((name, Some(path)));
} else {
@ -98,8 +87,8 @@ pub fn strip_using_and_register(
}
for (ns, alias_opt) in used {
// Two forms:
// - using path "..." [as Alias] → handled earlier (stored as (name, Some(path)))
// - using namespace.with.dots [as Alias] → resolve ns → register alias → inline
// - using path "..." [as Alias]
// - using namespace.with.dots [as Alias]
let resolved_path = if let Some(alias) = alias_opt {
// alias case: resolve namespace to a concrete path
let mut found: Option<String> = using_ctx
@ -109,39 +98,7 @@ pub fn strip_using_and_register(
.map(|(_, p)| p.clone());
if trace {
if let Some(f) = &found {
eprintln!("[using] hit modules: {} -> {}", ns, f);
} else {
eprintln!("[using] miss modules: {}", ns);
}
}
if found.is_none() {
if let Ok(text) = std::fs::read_to_string("nyash.toml") {
if let Ok(doc) = toml::from_str::<toml::Value>(&text) {
if let Some(mut cur) = doc.get("modules").and_then(|v| v.as_table()) {
let mut segs = ns.split('.').peekable();
let mut hit: Option<String> = None;
while let Some(seg) = segs.next() {
if let Some(next) = cur.get(seg) {
if let Some(t) = next.as_table() {
cur = t;
continue;
}
if segs.peek().is_none() {
if let Some(s) = next.as_str() {
hit = Some(s.to_string());
}
}
}
break;
}
if hit.is_some() {
if trace {
eprintln!("[using] hit nyash.toml: {} -> {}", ns, hit.as_ref().unwrap());
}
found = hit;
}
}
}
eprintln!("[using/resolve] alias '{}' -> '{}'", ns, f);
}
}
if found.is_none() {
@ -157,11 +114,7 @@ pub fn strip_using_and_register(
) {
Ok(v) => {
// Treat unchanged token (namespace) as unresolved
if v == ns {
found = None;
} else {
found = Some(v)
}
if v == ns { found = None; } else { found = Some(v) }
}
Err(e) => return Err(format!("using: {}", e)),
}
@ -199,50 +152,34 @@ pub fn strip_using_and_register(
// Resolve relative to current file dir
// Guard: skip obvious namespace tokens (ns.ns without extension)
if (!path.contains('/') && !path.contains('\\')) && !path.ends_with(".nyash") && path.contains('.') {
if verbose {
eprintln!("[using] unresolved '{}' (namespace token, skip inline)", path);
}
if verbose { eprintln!("[using] unresolved '{}' (namespace token, skip inline)", path); }
continue;
}
let mut p = std::path::PathBuf::from(&path);
if p.is_relative() {
// If the raw relative path exists from CWD, use it.
// Otherwise, try relative to the current file's directory.
if !p.exists() {
if let Some(dir) = std::path::Path::new(filename).parent() {
let cand = dir.join(&p);
if cand.exists() {
p = cand;
}
if cand.exists() { p = cand; }
}
}
}
// normalize to absolute to stabilize de-dup
if let Ok(abs) = std::fs::canonicalize(&p) { p = abs; }
let key = p.to_string_lossy().to_string();
if visited.contains(&key) {
continue;
}
if visited.contains(&key) { continue; }
visited.insert(key.clone());
if let Ok(text) = std::fs::read_to_string(&p) {
let inlined = strip_and_inline(runner, &text, &key, visited)?;
prelude.push_str(&inlined);
prelude.push_str("\n");
if seam_dbg {
let tail = inlined.chars().rev().take(120).collect::<String>().chars().rev().collect::<String>();
eprintln!("[using][seam][inlined] {} tail=<<<{}>>>", key, tail.replace('\n', "\\n"));
}
crate::runner::modes::common_util::resolve::seam::log_inlined_tail(&key, &inlined, seam_dbg);
} else if verbose {
eprintln!("[using] warn: could not read {}", p.display());
}
}
}
// Prepend inlined modules so their boxes are defined before use
// Seam guard: collapse consecutive blank lines at the join (prelude || body) to a single blank line
if prelude.is_empty() {
return Ok(out);
}
// Optionally deduplicate repeated static boxes in prelude by name (default OFF)
if prelude.is_empty() { return Ok(out); }
// Optional de-dup of static boxes by name
let mut prelude_text = prelude;
if std::env::var("NYASH_RESOLVE_DEDUP_BOX").ok().as_deref() == Some("1") {
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
@ -250,19 +187,15 @@ pub fn strip_using_and_register(
let bytes: Vec<char> = prelude_text.chars().collect();
let mut i = 0usize;
while i < bytes.len() {
// naive scan for "static box "
if i + 12 < bytes.len() && bytes[i..].iter().take(11).collect::<String>() == "static box " {
// read name token
let mut j = i + 11;
let mut name = String::new();
while j < bytes.len() {
let c = bytes[j];
if c.is_alphanumeric() || c == '_' { name.push(c); j += 1; } else { break; }
}
// find opening brace '{'
while j < bytes.len() && bytes[j].is_whitespace() { j += 1; }
if j < bytes.len() && bytes[j] == '{' {
// scan to matching closing brace for this box
let mut k = j;
let mut depth = 0i32;
while k < bytes.len() {
@ -271,210 +204,110 @@ pub fn strip_using_and_register(
if c == '}' { depth -= 1; if depth == 0 { k += 1; break; } }
k += 1;
}
// decide
if seen.contains(&name) {
// skip duplicate box
i = k; // drop this block
continue;
} else {
if seen.contains(&name) { i = k; continue; } else {
seen.insert(name);
// keep this block as-is
out_txt.push_str(&bytes[i..k].iter().collect::<String>());
i = k;
continue;
i = k; continue;
}
}
}
// default: copy one char
out_txt.push(bytes[i]);
i += 1;
}
prelude_text = out_txt;
}
// Optional: de-duplicate repeated function definitions inside specific boxes (default OFF)
// Optional: function dedup (MiniVmPrints.print_prints_in_slice)
if std::env::var("NYASH_RESOLVE_DEDUP_FN").ok().as_deref() == Some("1") {
// Currently target MiniVmPrints.print_prints_in_slice only (low risk)
let mut out_txt = String::with_capacity(prelude_text.len());
let bytes: Vec<char> = prelude_text.chars().collect();
let mut i = 0usize;
while i < bytes.len() {
// scan for "static box "
let ahead: String = bytes[i..bytes.len().min(i + 12)].iter().collect();
if ahead.starts_with("static box ") {
// parse box name
let mut j = i + 11; // len("static box ") == 11
let mut j = i + 11;
let mut name = String::new();
while j < bytes.len() {
let c = bytes[j];
if c.is_ascii_alphanumeric() || c == '_' { name.push(c); j += 1; } else { break; }
}
// skip ws to '{'
while j < bytes.len() { let c = bytes[j]; if c.is_ascii_alphanumeric() || c == '_' { name.push(c); j += 1; } else { break; } }
while j < bytes.len() && bytes[j].is_whitespace() { j += 1; }
if j < bytes.len() && bytes[j] == '{' {
// find matching closing '}' for the box body
let mut k = j;
let mut depth = 0i32;
let mut in_str = false;
while k < bytes.len() {
let c = bytes[k];
if in_str {
if c == '\\' { k += 2; continue; }
if c == '"' { in_str = false; }
k += 1;
continue;
} else {
if c == '"' { in_str = true; k += 1; continue; }
if c == '{' { depth += 1; }
if c == '}' { depth -= 1; if depth == 0 { k += 1; break; } }
k += 1;
}
if in_str { if c == '\\' { k += 2; continue; } if c == '"' { in_str = false; } k += 1; continue; } else { if c == '"' { in_str = true; k += 1; continue; } if c == '{' { depth += 1; } if c == '}' { depth -= 1; if depth == 0 { k += 1; break; } } k += 1; }
}
// write header up to body start '{'
out_txt.push_str(&bytes[i..(j + 1)].iter().collect::<String>());
// process body (limited dedup for MiniVmPrints.print_prints_in_slice)
let body_end = k.saturating_sub(1);
if name == "MiniVmPrints" {
let mut kept = false;
let mut p = j + 1;
while p <= body_end {
// find next line start
let mut ls = p;
if ls > j + 1 {
while ls <= body_end && bytes[ls - 1] != '\n' { ls += 1; }
}
let mut ls = p; if ls > j + 1 { while ls <= body_end && bytes[ls - 1] != '\n' { ls += 1; } }
if ls > body_end { break; }
// skip spaces
let mut q = ls;
while q <= body_end && bytes[q].is_whitespace() && bytes[q] != '\n' { q += 1; }
// check for function definition of print_prints_in_slice
let mut q = ls; while q <= body_end && bytes[q].is_whitespace() && bytes[q] != '\n' { q += 1; }
let rem: String = bytes[q..(body_end + 1).min(q + 64)].iter().collect();
if rem.starts_with("print_prints_in_slice(") {
// find ')'
let mut r = q;
let mut dp = 0i32;
let mut in_s = false;
let mut r = q; let mut dp = 0i32; let mut instr = false;
while r <= body_end {
let c = bytes[r];
if in_s { if c == '\\' { r += 2; continue; } if c == '"' { in_s = false; } r += 1; continue; }
if c == '"' { in_s = true; r += 1; continue; }
if c == '(' { dp += 1; r += 1; continue; }
if c == ')' { dp -= 1; r += 1; if dp <= 0 { break; } continue; }
if instr { if c == '\\' { r += 2; continue; } if c == '"' { instr = false; } r += 1; continue; }
if c == '"' { instr = true; r += 1; continue; }
if c == '(' { dp += 1; }
if c == ')' { dp -= 1; if dp == 0 { r += 1; break; } }
if dp == 0 && c == '{' { break; }
r += 1;
}
while r <= body_end && bytes[r].is_whitespace() { r += 1; }
if r <= body_end && bytes[r] == '{' {
// find body end
let mut t = r;
let mut d2 = 0i32;
let mut in_s2 = false;
while t <= body_end {
let c2 = bytes[t];
if in_s2 { if c2 == '\\' { t += 2; continue; } if c2 == '"' { in_s2 = false; } t += 1; continue; }
if c2 == '"' { in_s2 = true; t += 1; continue; }
if c2 == '{' { d2 += 1; }
if c2 == '}' { d2 -= 1; if d2 == 0 { t += 1; break; } }
t += 1;
let mut s = r; let mut bd = 0i32; let mut is2 = false;
while s <= body_end {
let c = bytes[s];
if is2 { if c == '\\' { s += 2; continue; } if c == '"' { is2 = false; } s += 1; continue; }
if c == '"' { is2 = true; s += 1; continue; }
if c == '{' { bd += 1; }
if c == '}' { bd -= 1; if bd == 0 { s += 1; break; } }
s += 1;
}
// start-of-line
let mut sol = q;
while sol > j + 1 && bytes[sol - 1] != '\n' { sol -= 1; }
if !kept {
out_txt.push_str(&bytes[sol..t].iter().collect::<String>());
out_txt.push_str(&bytes[q..s].iter().collect::<String>());
kept = true;
}
p = t;
// advance outer scanner to the end of this function body
i = s;
let _ = i; // mark as read to satisfy unused_assignments lint
continue;
}
}
// copy this line
let mut eol = ls;
while eol <= body_end && bytes[eol] != '\n' { eol += 1; }
out_txt.push_str(&bytes[ls..(eol.min(body_end + 1))].iter().collect::<String>());
if eol <= body_end && bytes[eol] == '\n' { out_txt.push('\n'); }
p = eol + 1;
out_txt.push(bytes[p]); p += 1;
}
} else {
// copy body as-is
out_txt.push_str(&bytes[(j + 1)..=body_end].iter().collect::<String>());
}
// write closing '}'
out_txt.push('}');
i = k;
continue;
if !kept { out_txt.push_str(&bytes[j + 1..=body_end].iter().collect::<String>()); }
out_txt.push('}'); out_txt.push('\n'); i = k; continue;
} else { out_txt.push_str(&bytes[j + 1..k].iter().collect::<String>()); i = k; continue; }
}
}
// default: copy one char
out_txt.push(bytes[i]);
i += 1;
out_txt.push(bytes[i]); i += 1;
}
prelude_text = out_txt;
}
let prelude_clean = prelude_text.trim_end_matches(['\n', '\r']);
if seam_dbg {
let tail = prelude_clean.chars().rev().take(160).collect::<String>().chars().rev().collect::<String>();
let head = out.chars().take(160).collect::<String>();
eprintln!("[using][seam] prelude_tail=<<<{}>>>", tail.replace('\n', "\\n"));
eprintln!("[using][seam] body_head =<<<{}>>>", head.replace('\n', "\\n"));
}
// Seam join + optional fix
let prelude_clean = prelude_text.trim_end_matches('\n');
crate::runner::modes::common_util::resolve::seam::log_prelude_body_seam(prelude_clean, &out, seam_dbg);
let mut combined = String::with_capacity(prelude_clean.len() + out.len() + 1);
combined.push_str(prelude_clean);
combined.push('\n');
// Optional seam safety: append missing '}' for unmatched '{' in prelude
if std::env::var("NYASH_RESOLVE_FIX_BRACES").ok().as_deref() == Some("1") {
// compute { } delta ignoring strings and comments
let mut delta: i32 = 0;
let mut it = prelude_clean.chars().peekable();
let mut in_str = false;
let mut in_sl = false;
let mut in_ml = false;
while let Some(c) = it.next() {
if in_sl {
if c == '\n' { in_sl = false; }
continue;
}
if in_ml {
if c == '*' {
if let Some('/') = it.peek().copied() {
// consume '/'
it.next();
in_ml = false;
}
}
continue;
}
if in_str {
if c == '\\' { it.next(); continue; }
if c == '"' { in_str = false; }
continue;
}
if c == '"' { in_str = true; continue; }
if c == '/' {
match it.peek().copied() {
Some('/') => { in_sl = true; it.next(); continue; }
Some('*') => { in_ml = true; it.next(); continue; }
_ => {}
}
}
if c == '{' { delta += 1; }
if c == '}' { delta -= 1; }
}
if delta > 0 {
if trace { eprintln!("[using][seam] fix: appending {} '}}' before body", delta); }
for _ in 0..delta { combined.push('}'); combined.push('\n'); }
}
}
crate::runner::modes::common_util::resolve::seam::fix_prelude_braces_if_enabled(prelude_clean, &mut combined, trace);
combined.push_str(&out);
Ok(combined)
}
let mut visited = HashSet::new();
let combined = strip_and_inline(runner, code, filename, &mut visited)?;
// Dev sugar: always pre-expand @name[:T] = expr at line-head to keep sources readable
Ok(preexpand_at_local(&combined))
}
/// Pre-expand line-head `@name[: Type] = expr` into `local name[: Type] = expr`.
/// Minimal, safe, no semantics change. Applies only at line head (after spaces/tabs).
pub(crate) fn preexpand_at_local(src: &str) -> String {
pub fn preexpand_at_local(src: &str) -> String {
let mut out = String::with_capacity(src.len());
for line in src.lines() {
let bytes = line.as_bytes();
@ -483,33 +316,18 @@ pub(crate) fn preexpand_at_local(src: &str) -> String {
if i < bytes.len() && bytes[i] == b'@' {
// parse identifier
let mut j = i + 1;
// first char [A-Za-z_]
if j < bytes.len() && ((bytes[j] as char).is_ascii_alphabetic() || bytes[j] == b'_') {
j += 1;
while j < bytes.len() {
let c = bytes[j] as char;
if c.is_ascii_alphanumeric() || c == '_' { j += 1; } else { break; }
}
// optional type: spaces ':' spaces ident
let mut k = j;
while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { k += 1; }
while j < bytes.len() { let c = bytes[j] as char; if c.is_ascii_alphanumeric() || c == '_' { j += 1; } else { break; } }
let mut k = j; while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { k += 1; }
if k < bytes.len() && bytes[k] == b':' {
k += 1;
while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { k += 1; }
// simple type ident
k += 1; while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') { k += 1; }
if k < bytes.len() && ((bytes[k] as char).is_ascii_alphabetic() || bytes[k] == b'_') {
k += 1;
while k < bytes.len() {
let c = bytes[k] as char;
if c.is_ascii_alphanumeric() || c == '_' { k += 1; } else { break; }
}
k += 1; while k < bytes.len() { let c = bytes[k] as char; if c.is_ascii_alphanumeric() || c == '_' { k += 1; } else { break; } }
}
}
// consume spaces to '='
let mut eqp = k;
while eqp < bytes.len() && (bytes[eqp] == b' ' || bytes[eqp] == b'\t') { eqp += 1; }
let mut eqp = k; while eqp < bytes.len() && (bytes[eqp] == b' ' || bytes[eqp] == b'\t') { eqp += 1; }
if eqp < bytes.len() && bytes[eqp] == b'=' {
// build transformed line: prefix + 'local ' + rest from after '@' up to '=' + ' =' + remainder
out.push_str(&line[..i]);
out.push_str("local ");
out.push_str(&line[i + 1..eqp]);