vm(resolve): add text-based prelude merge for selfhost compilation

- Add merge_prelude_text() function for fast using system support
- Implement normalize_text_for_inline() for parser robustness
- Update selfhost.rs to use text-based merge when NYASH_USING_AST=1
- Add merge_prelude_text export to mod.rs
- Improves Phase 15 selfhost compilation speed and reliability

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
nyash-codex
2025-11-04 09:25:36 +09:00
parent 70a98ae09b
commit 5a1bb549a7
3 changed files with 132 additions and 8 deletions

View File

@ -26,4 +26,5 @@ pub use strip::{
resolve_prelude_paths_profiled,
parse_preludes_to_asts,
merge_prelude_asts_with_main,
merge_prelude_text,
};

View File

@ -635,3 +635,112 @@ pub fn preexpand_at_local(src: &str) -> String {
}
out
}
/// Text-based prelude merge: simpler and faster than AST merge.
/// Recursively resolves using dependencies, strips using lines from each file,
/// and concatenates prelude text followed by main source text.
/// Returns merged source text ready for compilation.
pub fn merge_prelude_text(
runner: &NyashRunner,
source: &str,
filename: &str,
) -> Result<String, String> {
let trace = std::env::var("NYASH_RESOLVE_TRACE").ok().as_deref() == Some("1");
// First pass: collect and resolve prelude paths
let (cleaned_main, prelude_paths) = resolve_prelude_paths_profiled(runner, source, filename)?;
if prelude_paths.is_empty() {
// No using statements, return original
return Ok(source.to_string());
}
if trace {
crate::runner::trace::log(format!(
"[using/text-merge] {} prelude files for '{}'",
prelude_paths.len(),
filename
));
}
// Build merged text: preludes first, then main source
let mut merged = String::new();
// Add preludes in DFS order
for (idx, path) in prelude_paths.iter().enumerate() {
let content = std::fs::read_to_string(path)
.map_err(|e| format!("using: failed to read '{}': {}", path, e))?;
// Strip using lines from prelude and normalize
let (cleaned_raw, _nested) = collect_using_and_strip(runner, &content, path)?;
let cleaned = normalize_text_for_inline(&cleaned_raw);
if trace {
crate::runner::trace::log(format!(
"[using/text-merge] [{}] '{}' ({} bytes)",
idx + 1,
path,
cleaned.len()
));
}
merged.push_str(&cleaned);
merged.push('\n');
}
// Add boundary marker if debug mode
if std::env::var("NYASH_RESOLVE_SEAM_DEBUG").ok().as_deref() == Some("1") {
merged.push_str("\n/* --- using prelude/main boundary --- */\n\n");
}
// Add main source (already cleaned of using lines) and normalize
let cleaned_main_norm = normalize_text_for_inline(&cleaned_main);
merged.push_str(&cleaned_main_norm);
if trace {
crate::runner::trace::log(format!(
"[using/text-merge] final merged: {} bytes ({} prelude + {} main)",
merged.len(),
merged.len() - cleaned_main.len(),
cleaned_main.len()
));
}
Ok(normalize_text_for_inline(&merged))
}
/// Minimal normalization to improve inline parser robustness.
/// - Normalize CRLF to LF
/// - Remove redundant semicolons before closing braces (`; }` → `}`)
/// - Ensure file ends with a newline
fn normalize_text_for_inline(s: &str) -> String {
let mut out = s.replace("\r\n", "\n").replace("\r", "\n");
// Remove `;` before `}` across line boundaries conservatively
// pattern: `;` followed by optional spaces/newlines then `}`
// Do a few passes to cover nested cases without regex
for _ in 0..2 {
let mut tmp = String::with_capacity(out.len());
let bytes = out.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] == b';' {
// peek ahead skipping spaces/newlines
let mut j = i + 1;
while j < bytes.len() {
let c = bytes[j];
if c == b' ' || c == b'\t' || c == b'\n' { j += 1; } else { break; }
}
if j < bytes.len() && bytes[j] == b'}' {
// drop ';' (do not advance j here)
i += 1;
continue;
}
}
tmp.push(bytes[i] as char);
i += 1;
}
out = tmp;
}
if !out.ends_with('\n') { out.push('\n'); }
out
}

View File

@ -22,21 +22,35 @@ impl NyashRunner {
return false;
}
};
// Optional Phase-15: strip `using` lines and register modules (same policy as execute_nyash_file)
// Optional Phase-15: using prelude merge (text-based for speed)
let mut code_ref: std::borrow::Cow<'_, str> = std::borrow::Cow::Borrowed(&code);
if crate::config::env::enable_using() {
let using_ast = crate::config::env::using_ast_enabled();
if using_ast {
// Text-based merge: faster for inline/selfhost execution
match crate::runner::modes::common_util::resolve::merge_prelude_text(self, &code, filename) {
Ok(merged) => {
code_ref = std::borrow::Cow::Owned(merged);
}
Err(e) => {
eprintln!("[ny-compiler] using text merge error: {}", e);
return false;
}
}
} else {
// Legacy: strip only (no prelude merge)
match crate::runner::modes::common_util::resolve::resolve_prelude_paths_profiled(self, &code, filename) {
Ok((clean, paths)) => {
if !paths.is_empty() && !crate::config::env::using_ast_enabled() {
if !paths.is_empty() {
eprintln!("[ny-compiler] using: AST prelude merge is disabled in this profile. Enable NYASH_USING_AST=1 or remove 'using' lines.");
return false;
}
code_ref = std::borrow::Cow::Owned(clean);
// Selfhost compile path does not need to parse prelude ASTs here.
}
Err(e) => { eprintln!("[ny-compiler] {}", e); return false; }
}
}
}
// Promote dev sugar to standard: pre-expand line-head '@name[:T] = expr' to 'local name[:T] = expr'
{