Phase 21.7 normalization: optimization pre-work + bench harness expansion

- Add opt-in optimizations (defaults OFF)
  - Ret purity verifier: NYASH_VERIFY_RET_PURITY=1
  - strlen FAST enhancement for const handles
  - FAST_INT gate for same-BB SSA optimization
  - length cache for string literals in llvmlite
- Expand bench harness (tools/perf/microbench.sh)
  - Add branch/call/stringchain/arraymap/chip8/kilo cases
  - Auto-calculate ratio vs C reference
  - Document in benchmarks/README.md
- Compiler health improvements
  - Unify PHI insertion to insert_phi_at_head()
  - Add NYASH_LLVM_SKIP_BUILD=1 for build reuse
- Runtime & safety enhancements
  - Clarify Rust/Hako ownership boundaries
  - Strengthen receiver localization (LocalSSA/pin/after-PHIs)
  - Stop excessive PluginInvoke→BoxCall rewrites
- Update CURRENT_TASK.md, docs, and canaries

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-11-13 16:40:58 +09:00
parent 9e2fa1e36e
commit dda65b94b7
160 changed files with 6773 additions and 1692 deletions

View File

@ -0,0 +1,234 @@
use super::*;
impl MirInterpreter {
pub(super) fn execute_method_callee(
&mut self,
box_name: &str,
method: &str,
receiver: &Option<ValueId>,
args: &[ValueId],
) -> Result<VMValue, VMError> {
if let Some(recv_id) = receiver {
// Primary: load receiver by id. If undefined due to builder localization gap,
// try to auto-locate the most recent `NewBox <box_name>` in the current block
// (same fn/last_block) and use its dst as the receiver. This is a structural
// recovery, not a by-name fallback, and keeps semantics stable for plugin boxes.
let recv_val = match self.reg_load(*recv_id) {
Ok(v) => v,
Err(e) => {
// Attempt structured autoscan for receiver in current block
if let (Some(cur_fn), Some(bb)) = (self.cur_fn.clone(), self.last_block) {
if let Some(func) = self.functions.get(&cur_fn) {
if let Some(block) = func.blocks.get(&bb) {
let mut last_recv: Option<ValueId> = None;
for inst in &block.instructions {
if let crate::mir::MirInstruction::NewBox { dst, box_type, .. } = inst {
if box_type == box_name { last_recv = Some(*dst); }
}
}
if let Some(rid) = last_recv {
if let Ok(v) = self.reg_load(rid) { v } else { return Err(e); }
} else {
// Dev fallback (guarded): use args[0] as surrogate receiver if explicitly allowed
let tolerate = std::env::var("NYASH_VM_RECV_ARG_FALLBACK").ok().as_deref() == Some("1")
|| std::env::var("NYASH_VM_TOLERATE_VOID").ok().as_deref() == Some("1");
if tolerate { if let Some(a0) = args.get(0) { self.reg_load(*a0)? } else { return Err(e); } }
else { return Err(e); }
}
} else {
return Err(e);
}
} else {
return Err(e);
}
} else {
// Dev fallback (guarded): use args[0] as surrogate receiver if explicitly allowed
let tolerate = std::env::var("NYASH_VM_RECV_ARG_FALLBACK").ok().as_deref() == Some("1")
|| std::env::var("NYASH_VM_TOLERATE_VOID").ok().as_deref() == Some("1");
if tolerate { if let Some(a0) = args.get(0) { self.reg_load(*a0)? } else { return Err(e); } }
else { return Err(e); }
}
}
};
let dev_trace = std::env::var("NYASH_VM_TRACE").ok().as_deref() == Some("1");
// Fast bridge for builtin boxes (Array) and common methods.
// Preserve legacy semantics when plugins are absent.
if let VMValue::BoxRef(bx) = &recv_val {
// ArrayBox bridge
if let Some(arr) = bx.as_any().downcast_ref::<crate::boxes::array::ArrayBox>() {
match method {
"birth" => { return Ok(VMValue::Void); }
"push" => {
if let Some(a0) = args.get(0) {
let v = self.load_as_box(*a0)?;
let _ = arr.push(v);
return Ok(VMValue::Void);
}
}
"len" | "length" | "size" => {
let ret = arr.length();
return Ok(VMValue::from_nyash_box(ret));
}
"get" => {
if let Some(a0) = args.get(0) {
let idx = self.load_as_box(*a0)?;
let ret = arr.get(idx);
return Ok(VMValue::from_nyash_box(ret));
}
}
"set" => {
if args.len() >= 2 {
let idx = self.load_as_box(args[0])?;
let val = self.load_as_box(args[1])?;
let _ = arr.set(idx, val);
return Ok(VMValue::Void);
}
}
_ => {}
}
}
}
// Minimal bridge for birth(): delegate to BoxCall handler and return Void
if method == "birth" {
let _ = self.handle_box_call(None, *recv_id, &method.to_string(), args)?;
return Ok(VMValue::Void);
}
let is_kw = method == "keyword_to_token_type";
if dev_trace && is_kw {
let a0 = args.get(0).and_then(|id| self.reg_load(*id).ok());
eprintln!("[vm-trace] mcall {} argv0={:?}", method, a0);
}
let out = self.execute_method_call(&recv_val, method, args)?;
if dev_trace && is_kw {
eprintln!("[vm-trace] mret {} -> {:?}", method, out);
}
Ok(out)
} else {
// Receiver not provided: try static singleton instance for the box (methodize PoC fallback)
if self.static_box_decls.contains_key(box_name) {
let instance = self.ensure_static_box_instance(box_name)?;
let recv_val = VMValue::from_nyash_box(Box::new(instance.clone()));
return self.execute_method_call(&recv_val, method, args);
}
Err(self.err_with_context("Method call", &format!("missing receiver for {}", method)))
}
}
fn execute_method_call(
&mut self,
receiver: &VMValue,
method: &str,
args: &[ValueId],
) -> Result<VMValue, VMError> {
match receiver {
VMValue::String(s) => match method {
"length" => Ok(VMValue::Integer(s.len() as i64)),
"concat" => {
if let Some(arg_id) = args.get(0) {
let arg_val = self.reg_load(*arg_id)?;
let new_str = format!("{}{}", s, arg_val.to_string());
Ok(VMValue::String(new_str))
} else {
Err(self.err_invalid("concat requires 1 argument"))
}
}
"replace" => {
if args.len() == 2 {
let old = self.reg_load(args[0])?.to_string();
let new = self.reg_load(args[1])?.to_string();
Ok(VMValue::String(s.replace(&old, &new)))
} else {
Err(self.err_invalid("replace requires 2 arguments"))
}
}
"indexOf" => {
if let Some(arg_id) = args.get(0) {
let needle = self.reg_load(*arg_id)?.to_string();
let idx = s.find(&needle).map(|i| i as i64).unwrap_or(-1);
Ok(VMValue::Integer(idx))
} else {
Err(self.err_invalid("indexOf requires 1 argument"))
}
}
"lastIndexOf" => {
if let Some(arg_id) = args.get(0) {
let needle = self.reg_load(*arg_id)?.to_string();
let idx = s.rfind(&needle).map(|i| i as i64).unwrap_or(-1);
Ok(VMValue::Integer(idx))
} else {
Err(self.err_invalid("lastIndexOf requires 1 argument"))
}
}
"substring" => {
let start = if let Some(a0) = args.get(0) {
self.reg_load(*a0)?.as_integer().unwrap_or(0)
} else { 0 };
let end = if let Some(a1) = args.get(1) {
self.reg_load(*a1)?.as_integer().unwrap_or(s.len() as i64)
} else { s.len() as i64 };
let len = s.len() as i64;
let i0 = start.max(0).min(len) as usize;
let i1 = end.max(0).min(len) as usize;
if i0 > i1 { return Ok(VMValue::String(String::new())); }
// Note: operating on bytes; Nyash strings are UTF8, but tests are ASCII only here
let bytes = s.as_bytes();
let sub = String::from_utf8(bytes[i0..i1].to_vec()).unwrap_or_default();
Ok(VMValue::String(sub))
}
_ => Err(self.err_method_not_found("String", method)),
},
VMValue::BoxRef(box_ref) => {
// Try builtin StringBox first
if let Some(string_box) = box_ref
.as_any()
.downcast_ref::<crate::boxes::string_box::StringBox>()
{
match method {
"lastIndexOf" => {
if let Some(arg_id) = args.get(0) {
let needle = self.reg_load(*arg_id)?.to_string();
let result_box = string_box.lastIndexOf(&needle);
Ok(VMValue::from_nyash_box(result_box))
} else {
Err(self.err_invalid("lastIndexOf requires 1 argument"))
}
}
"indexOf" | "find" => {
if let Some(arg_id) = args.get(0) {
let needle = self.reg_load(*arg_id)?.to_string();
let result_box = string_box.find(&needle);
Ok(VMValue::from_nyash_box(result_box))
} else {
Err(self.err_invalid("indexOf/find requires 1 argument"))
}
}
_ => Err(self.err_method_not_found("StringBox", method)),
}
} else if let Some(p) = box_ref
.as_any()
.downcast_ref::<crate::runtime::plugin_loader_v2::PluginBoxV2>()
{
let host = crate::runtime::plugin_loader_unified::get_global_plugin_host();
let host = host.read().unwrap();
let argv = self.load_args_as_boxes(args)?;
match host.invoke_instance_method(
&p.box_type,
method,
p.inner.instance_id,
&argv,
) {
Ok(Some(ret)) => Ok(VMValue::from_nyash_box(ret)),
Ok(None) => Ok(VMValue::Void),
Err(e) => Err(self.err_with_context(
&format!("Plugin method {}.{}", p.box_type, method),
&format!("{:?}", e)
)),
}
} else {
Err(self.err_method_not_found(&box_ref.type_name(), method))
}
}
_ => Err(self.err_with_context("method call", &format!("{} not supported on {:?}", method, receiver))),
}
}
}