Phase 20.34: expand MirBuilder internal library with comprehensive lowering boxes; add pattern registry and program scanning infrastructure; implement internal lowerers for if/loop/return patterns; add dev tools and comprehensive canary tests; update VM boxes and host providers for internal delegation; wire phase2034 test suite with 30+ canary scripts covering internal lowering scenarios

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
nyash-codex
2025-11-03 16:09:19 +09:00
parent 8827b8d416
commit a4f30ae827
89 changed files with 4125 additions and 115 deletions

View File

@ -61,13 +61,49 @@ pub(super) fn try_handle_string_box(
return Ok(true);
}
"indexOf" => {
// indexOf(substr) -> first index or -1
// Support both 1-arg indexOf(search) and 2-arg indexOf(search, fromIndex)
let (needle, from_index) = match args.len() {
1 => {
// indexOf(search) - search from beginning
let n = this.reg_load(args[0])?.to_string();
(n, 0)
}
2 => {
// indexOf(search, fromIndex) - search from specified position
let n = this.reg_load(args[0])?.to_string();
let from = this.reg_load(args[1])?.as_integer().unwrap_or(0);
(n, from.max(0) as usize)
}
_ => {
return Err(VMError::InvalidInstruction(
"indexOf expects 1 or 2 args (search [, fromIndex])".into(),
));
}
};
// Search for needle starting from from_index
let search_str = if from_index >= sb_norm.value.len() {
""
} else {
&sb_norm.value[from_index..]
};
let idx = search_str.find(&needle)
.map(|i| (from_index + i) as i64)
.unwrap_or(-1);
if let Some(d) = dst { this.regs.insert(d, VMValue::Integer(idx)); }
return Ok(true);
}
"contains" => {
// contains(search) -> boolean (true if found, false otherwise)
// Implemented as indexOf(search) >= 0
if args.len() != 1 {
return Err(VMError::InvalidInstruction("indexOf expects 1 arg".into()));
return Err(VMError::InvalidInstruction("contains expects 1 arg".into()));
}
let needle = this.reg_load(args[0])?.to_string();
let idx = sb_norm.value.find(&needle).map(|i| i as i64).unwrap_or(-1);
if let Some(d) = dst { this.regs.insert(d, VMValue::Integer(idx)); }
let found = sb_norm.value.contains(&needle);
if let Some(d) = dst { this.regs.insert(d, VMValue::Bool(found)); }
return Ok(true);
}
"lastIndexOf" => {
@ -102,13 +138,26 @@ pub(super) fn try_handle_string_box(
return Ok(true);
}
"substring" => {
if args.len() != 2 {
return Err(VMError::InvalidInstruction(
"substring expects 2 args (start, end)".into(),
));
}
let s_idx = this.reg_load(args[0])?.as_integer().unwrap_or(0);
let e_idx = this.reg_load(args[1])?.as_integer().unwrap_or(0);
// Support both 1-arg (start to end) and 2-arg (start, end) forms
let (s_idx, e_idx) = match args.len() {
1 => {
// substring(start) - from start to end of string
let s = this.reg_load(args[0])?.as_integer().unwrap_or(0);
let len = sb_norm.value.chars().count() as i64;
(s, len)
}
2 => {
// substring(start, end) - half-open interval [start, end)
let s = this.reg_load(args[0])?.as_integer().unwrap_or(0);
let e = this.reg_load(args[1])?.as_integer().unwrap_or(0);
(s, e)
}
_ => {
return Err(VMError::InvalidInstruction(
"substring expects 1 or 2 args (start [, end])".into(),
));
}
};
let len = sb_norm.value.chars().count() as i64;
let start = s_idx.max(0).min(len) as usize;
let end = e_idx.max(start as i64).min(len) as usize;

View File

@ -133,9 +133,13 @@ impl MirInterpreter {
VMValue::String(ref s) => s.clone(),
other => other.to_string(),
};
if std::env::var("HAKO_DEBUG_LEGACY_CALL").ok().as_deref() == Some("1") {
eprintln!("[vm-debug] legacy-call raw='{}' argc={}", raw, args.len());
}
// Minimal builtin bridge: support print-like globals in legacy form
// Accept: "print", "nyash.console.log", "env.console.log", "nyash.builtin.print"
// Also bridge hostbridge.extern_invoke to the extern handler (legacy form)
match raw.as_str() {
"print" | "nyash.console.log" | "env.console.log" | "nyash.builtin.print" => {
if let Some(a0) = args.get(0) {
@ -146,6 +150,12 @@ impl MirInterpreter {
}
return Ok(VMValue::Void);
}
name if name == "hostbridge.extern_invoke" || name.starts_with("hostbridge.extern_invoke/") => {
return self.execute_extern_function("hostbridge.extern_invoke", args);
}
name if name == "env.get" || name.starts_with("env.get/") || name.contains("env.get") => {
return self.execute_extern_function("env.get", args);
}
_ => {}
}
@ -337,6 +347,77 @@ impl MirInterpreter {
args: &[ValueId],
) -> Result<VMValue, VMError> {
match func_name {
name if name == "env.get" || name.starts_with("env.get/") => {
// Route env.get global to extern handler
return self.execute_extern_function("env.get", args);
}
name if name == "hostbridge.extern_invoke" || name.starts_with("hostbridge.extern_invoke/") => {
// Treat as extern_invoke in legacy/global-resolved form
if args.len() < 3 {
return Err(VMError::InvalidInstruction(
"hostbridge.extern_invoke expects 3 args".into(),
));
}
let name = self.reg_load(args[0])?.to_string();
let method = self.reg_load(args[1])?.to_string();
let v = self.reg_load(args[2])?;
let mut first_arg_str: Option<String> = None;
match v {
VMValue::BoxRef(b) => {
if let Some(ab) = b.as_any().downcast_ref::<crate::boxes::array::ArrayBox>() {
let idx: Box<dyn crate::box_trait::NyashBox> =
Box::new(crate::box_trait::IntegerBox::new(0));
let elem = ab.get(idx);
first_arg_str = Some(elem.to_string_box().value);
} else {
first_arg_str = Some(b.to_string_box().value);
}
}
_ => first_arg_str = Some(v.to_string()),
}
match (name.as_str(), method.as_str()) {
("env.mirbuilder", "emit") => {
if let Some(s) = first_arg_str {
match crate::host_providers::mir_builder::program_json_to_mir_json(&s) {
Ok(out) => Ok(VMValue::String(out)),
Err(e) => Err(VMError::InvalidInstruction(format!(
"env.mirbuilder.emit: {}",
e
))),
}
} else {
Err(VMError::InvalidInstruction(
"extern_invoke env.mirbuilder.emit expects 1 arg".into(),
))
}
}
("env.codegen", "emit_object") => {
if let Some(s) = first_arg_str {
let opts = crate::host_providers::llvm_codegen::Opts {
out: None,
nyrt: std::env::var("NYASH_EMIT_EXE_NYRT").ok().map(std::path::PathBuf::from),
opt_level: std::env::var("HAKO_LLVM_OPT_LEVEL").ok(),
timeout_ms: None,
};
match crate::host_providers::llvm_codegen::mir_json_to_object(&s, opts) {
Ok(p) => Ok(VMValue::String(p.to_string_lossy().into_owned())),
Err(e) => Err(VMError::InvalidInstruction(format!(
"env.codegen.emit_object: {}",
e
))),
}
} else {
Err(VMError::InvalidInstruction(
"extern_invoke env.codegen.emit_object expects 1 arg".into(),
))
}
}
_ => Err(VMError::InvalidInstruction(format!(
"hostbridge.extern_invoke unsupported for {}.{}",
name, method
))),
}
}
"nyash.builtin.print" | "print" | "nyash.console.log" => {
if let Some(arg_id) = args.get(0) {
let val = self.reg_load(*arg_id)?;
@ -591,6 +672,80 @@ impl MirInterpreter {
};
panic!("{}", msg);
}
"hostbridge.extern_invoke" => {
// Legacy global-call form: hostbridge.extern_invoke(name, method, args?)
if args.len() < 2 {
return Err(VMError::InvalidInstruction(
"extern_invoke expects at least 2 args".into(),
));
}
let name = self.reg_load(args[0])?.to_string();
let method = self.reg_load(args[1])?.to_string();
// Extract first arg as string when a third argument exists (ArrayBox or primitive)
let mut first_arg_str: Option<String> = None;
if let Some(a2) = args.get(2) {
let v = self.reg_load(*a2)?;
match v {
VMValue::BoxRef(b) => {
if let Some(ab) = b.as_any().downcast_ref::<crate::boxes::array::ArrayBox>() {
let idx: Box<dyn crate::box_trait::NyashBox> =
Box::new(crate::box_trait::IntegerBox::new(0));
let elem = ab.get(idx);
first_arg_str = Some(elem.to_string_box().value);
} else {
first_arg_str = Some(b.to_string_box().value);
}
}
_ => first_arg_str = Some(v.to_string()),
}
}
match (name.as_str(), method.as_str()) {
("env.mirbuilder", "emit") => {
if let Some(s) = first_arg_str {
match crate::host_providers::mir_builder::program_json_to_mir_json(&s) {
Ok(out) => Ok(VMValue::String(out)),
Err(e) => Err(VMError::InvalidInstruction(format!(
"env.mirbuilder.emit: {}",
e
))),
}
} else {
Err(VMError::InvalidInstruction(
"extern_invoke env.mirbuilder.emit expects 1 arg".into(),
))
}
}
("env.codegen", "emit_object") => {
if let Some(s) = first_arg_str {
let opts = crate::host_providers::llvm_codegen::Opts {
out: None,
nyrt: std::env::var("NYASH_EMIT_EXE_NYRT")
.ok()
.map(std::path::PathBuf::from),
opt_level: std::env::var("HAKO_LLVM_OPT_LEVEL").ok(),
timeout_ms: None,
};
match crate::host_providers::llvm_codegen::mir_json_to_object(&s, opts) {
Ok(p) => Ok(VMValue::String(p.to_string_lossy().into_owned())),
Err(e) => Err(VMError::InvalidInstruction(format!(
"env.codegen.emit_object: {}",
e
))),
}
} else {
Err(VMError::InvalidInstruction(
"extern_invoke env.codegen.emit_object expects 1 arg".into(),
))
}
}
_ => Err(VMError::InvalidInstruction(format!(
"hostbridge.extern_invoke unsupported for {}.{}",
name, method
))),
}
}
_ => Err(VMError::InvalidInstruction(format!(
"Unknown extern function: {}",
extern_name

View File

@ -9,6 +9,21 @@ impl MirInterpreter {
args: &[ValueId],
) -> Result<(), VMError> {
match (iface, method) {
("env", "get") => {
if let Some(a0) = args.get(0) {
let key = self.reg_load(*a0)?.to_string();
let val = std::env::var(&key).ok();
if let Some(d) = dst {
if let Some(s) = val {
self.regs.insert(d, VMValue::String(s));
} else {
// Represent missing env as null-equivalent (Void)
self.regs.insert(d, VMValue::Void);
}
}
}
Ok(())
}
("env.console", "log") => {
if let Some(a0) = args.get(0) {
let v = self.reg_load(*a0)?;
@ -115,6 +130,144 @@ impl MirInterpreter {
}
Ok(())
}
("env", "get") => {
// env.get(key) - get environment variable
if let Some(a0) = args.get(0) {
let k = self.reg_load(*a0)?.to_string();
let val = std::env::var(&k).ok();
if let Some(d) = dst {
if let Some(v) = val {
self.regs.insert(d, VMValue::String(v));
} else {
self.regs.insert(d, VMValue::from_nyash_box(Box::new(crate::box_trait::VoidBox::new())));
}
}
}
Ok(())
}
("env.mirbuilder", "emit") => {
// program_json -> mir_json (delegate provider)
if let Some(a0) = args.get(0) {
let program_json = self.reg_load(*a0)?.to_string();
match crate::host_providers::mir_builder::program_json_to_mir_json(&program_json) {
Ok(s) => {
if let Some(d) = dst { self.regs.insert(d, VMValue::String(s)); }
Ok(())
}
Err(e) => Err(VMError::InvalidInstruction(format!("env.mirbuilder.emit: {}", e))),
}
} else {
Err(VMError::InvalidInstruction("env.mirbuilder.emit expects 1 arg".into()))
}
}
("env.codegen", "emit_object") => {
// mir_json -> object path (ny-llvmc or harness)
if let Some(a0) = args.get(0) {
let mir_json = self.reg_load(*a0)?.to_string();
let opts = crate::host_providers::llvm_codegen::Opts { out: None, nyrt: std::env::var("NYASH_EMIT_EXE_NYRT").ok().map(std::path::PathBuf::from), opt_level: std::env::var("HAKO_LLVM_OPT_LEVEL").ok(), timeout_ms: None };
match crate::host_providers::llvm_codegen::mir_json_to_object(&mir_json, opts) {
Ok(p) => {
if let Some(d) = dst { self.regs.insert(d, VMValue::String(p.to_string_lossy().into_owned())); }
Ok(())
}
Err(e) => Err(VMError::InvalidInstruction(format!("env.codegen.emit_object: {}", e))),
}
} else {
Err(VMError::InvalidInstruction("env.codegen.emit_object expects 1 arg".into()))
}
}
("hostbridge", "extern_invoke") => {
// hostbridge.extern_invoke(name, method, args?)
if args.len() < 2 {
return Err(VMError::InvalidInstruction(
"extern_invoke expects at least 2 args".into(),
));
}
let name = self.reg_load(args[0])?.to_string();
let method = self.reg_load(args[1])?.to_string();
// Extract first payload argument as string if provided.
// MirBuilder uses: extern_invoke("env.mirbuilder","emit", [program_json])
let mut first_arg_str: Option<String> = None;
if let Some(a2) = args.get(2) {
let v = self.reg_load(*a2)?;
match v {
VMValue::BoxRef(b) => {
// If it's an ArrayBox, read element[0]
if let Some(ab) = b.as_any().downcast_ref::<crate::boxes::array::ArrayBox>() {
let idx: Box<dyn crate::box_trait::NyashBox> =
Box::new(crate::box_trait::IntegerBox::new(0));
let elem = ab.get(idx);
first_arg_str = Some(elem.to_string_box().value);
} else {
// Fallback: stringify the box
first_arg_str = Some(b.to_string_box().value);
}
}
// For primitive VM values, use their string form
_ => first_arg_str = Some(v.to_string()),
}
}
// Dispatch to known providers
match (name.as_str(), method.as_str()) {
("env.mirbuilder", "emit") => {
if let Some(s) = first_arg_str {
match crate::host_providers::mir_builder::program_json_to_mir_json(&s) {
Ok(out) => {
if let Some(d) = dst {
self.regs.insert(d, VMValue::String(out));
}
Ok(())
}
Err(e) => Err(VMError::InvalidInstruction(format!(
"env.mirbuilder.emit: {}",
e
))),
}
} else {
Err(VMError::InvalidInstruction(
"extern_invoke env.mirbuilder.emit expects 1 arg".into(),
))
}
}
("env.codegen", "emit_object") => {
if let Some(s) = first_arg_str {
let opts = crate::host_providers::llvm_codegen::Opts {
out: None,
nyrt: std::env::var("NYASH_EMIT_EXE_NYRT")
.ok()
.map(std::path::PathBuf::from),
opt_level: std::env::var("HAKO_LLVM_OPT_LEVEL").ok(),
timeout_ms: None,
};
match crate::host_providers::llvm_codegen::mir_json_to_object(&s, opts) {
Ok(p) => {
if let Some(d) = dst {
self.regs.insert(
d,
VMValue::String(p.to_string_lossy().into_owned()),
);
}
Ok(())
}
Err(e) => Err(VMError::InvalidInstruction(format!(
"env.codegen.emit_object: {}",
e
))),
}
} else {
Err(VMError::InvalidInstruction(
"extern_invoke env.codegen.emit_object expects 1 arg".into(),
))
}
}
_ => Err(VMError::InvalidInstruction(format!(
"hostbridge.extern_invoke unsupported for {}.{}",
name, method
))),
}
}
_ => Err(VMError::InvalidInstruction(format!(
"ExternCall {}.{} not supported",
iface, method