phase-20.45: PRIMARY no-fallback reps + MIR v0 shape fixes

- Fix MIR v0 shape in lowers: functions[] + name="main" + blocks.id
  * lower_return_int_box.hako
  * lower_return_binop_box.hako
- runner_min: adopt LowerReturnBinOpBox before ReturnInt
- Add PRIMARY no-fallback canaries (all PASS):
  * return-binop / array-size / load-store / return-logical (OR)
- Fix phase2043 runner_min canary alias (Runner -> BuilderRunnerMinBox)
- Update docs: phase-20.45 README (PRIMARY reps), CURRENT_TASK progress

Ancillary: keep builder/provider/canary files in sync; no unrelated behavior changes.
This commit is contained in:
nyash-codex
2025-11-05 18:57:03 +09:00
parent 0996090d6d
commit 96ea3892af
119 changed files with 4746 additions and 316 deletions

View File

@ -106,7 +106,7 @@ pub(super) fn try_handle_map_box(
if let Some(d) = dst { this.regs.insert(d, VMValue::from_nyash_box(ret)); }
return Ok(true);
}
"size" => {
"len" | "length" | "size" => {
let ret = mb.size();
if let Some(d) = dst { this.regs.insert(d, VMValue::from_nyash_box(ret)); }
return Ok(true);

View File

@ -524,6 +524,15 @@ pub fn ny_compiler_use_tmp_only() -> bool {
.as_deref()
== Some("1")
}
/// Unicode decode toggle for string literals (\uXXXX, optional surrogate pairs).
/// Enabled when either HAKO_PARSER_DECODE_UNICODE=1 or NYASH_PARSER_DECODE_UNICODE=1.
/// Default: OFF (for strict backward compatibility).
pub fn parser_decode_unicode() -> bool {
env_flag("HAKO_PARSER_DECODE_UNICODE")
.or_else(|| env_flag("NYASH_PARSER_DECODE_UNICODE"))
.unwrap_or(false)
}
fn env_flag(var: &str) -> Option<bool> {
std::env::var(var).ok().map(|v| {
let lv = v.to_ascii_lowercase();

View File

@ -25,6 +25,7 @@ pub fn program_json_to_mir_json(program_json: &str) -> Result<String, String> {
// Emit MIR(JSON) to a temporary file (reuse existing emitter), then read back
let tmp_dir = std::env::temp_dir();
let tmp_path = tmp_dir.join("hako_mirbuilder_out.json");
// Emit MIR JSON (v0/v1 per env) via harness-bin emitter to a temp file
if let Err(e) = runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &tmp_path) {
let tag = format!("[mirbuilder/emit/error] {}", e);
eprintln!("{}", tag);
@ -43,4 +44,3 @@ pub fn program_json_to_mir_json(program_json: &str) -> Result<String, String> {
}
}
}

View File

@ -46,11 +46,12 @@ impl super::MirBuilder {
phi_inputs.push((else_block, else_val));
crate::mir::builder::emission::branch::emit_jump(self, merge_block)?;
self.start_new_block(merge_block)?;
// フェーズM: 常にPHI命令を使用no_phi_mode撤廃
self.emit_instruction(super::MirInstruction::Phi {
dst: result_val,
inputs: phi_inputs,
})?;
// フェーズM: PHI はブロック先頭に配置cf_common 統一
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, phi_inputs);
} else {
self.emit_instruction(super::MirInstruction::Phi { dst: result_val, inputs: phi_inputs })?;
}
return Ok(result_val);
}
@ -101,11 +102,12 @@ impl super::MirBuilder {
// Merge and yield result
self.start_new_block(merge_block)?;
// フェーズM: 常にPHI命令を使用no_phi_mode撤廃
self.emit_instruction(super::MirInstruction::Phi {
dst: result_val,
inputs: phi_inputs,
})?;
// フェーズM: PHI はブロック先頭に配置cf_common 統一
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, phi_inputs);
} else {
self.emit_instruction(super::MirInstruction::Phi { dst: result_val, inputs: phi_inputs })?;
}
Ok(result_val)
}
}

View File

@ -68,7 +68,11 @@ impl MirBuilder {
for (name, &pre_v) in pre_if_var_map.iter() {
let phi_val = self.value_gen.next();
let inputs = vec![(pre_branch_bb, pre_v)];
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
}
self.variable_map.insert(name.clone(), phi_val);
if trace_if {
eprintln!(
@ -99,7 +103,11 @@ impl MirBuilder {
for (name, &pre_v) in pre_if_var_map.iter() {
let phi_val = self.value_gen.next();
let inputs = vec![(pre_branch_bb, pre_v)];
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
}
self.variable_map.insert(name.clone(), phi_val);
if trace_if {
eprintln!(
@ -121,7 +129,7 @@ impl MirBuilder {
if else_reaches_merge {
// Scope leave for else-branch
self.hint_scope_leave(0);
self.emit_instruction(MirInstruction::Jump { target: merge_block })?;
crate::mir::builder::emission::branch::emit_jump(self, merge_block)?;
}
// Pop else-branch debug region
self.debug_pop_region();

View File

@ -261,7 +261,11 @@ impl super::MirBuilder {
for (name, &pre_v) in pre_if_var_map.iter() {
let phi_val = self.value_gen.next();
let inputs = vec![(pre_branch_bb, pre_v)];
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
}
self.variable_map.insert(name.clone(), phi_val);
}
@ -290,13 +294,13 @@ impl super::MirBuilder {
self.start_new_block(rhs_join)?;
let rhs_bool = self.value_gen.next();
let inputs = vec![(rhs_true_exit, t_id), (rhs_false_exit, f_id)];
if let Some(func) = self.current_function.as_mut() {
func.update_cfg();
}
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
if let Some(func) = self.current_function.as_mut() { func.update_cfg(); }
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, rhs_bool, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
}
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
self.value_types.insert(rhs_bool, MirType::Bool);
rhs_bool
} else {
@ -319,7 +323,11 @@ impl super::MirBuilder {
for (name, &pre_v) in pre_if_var_map.iter() {
let phi_val = self.value_gen.next();
let inputs = vec![(pre_branch_bb, pre_v)];
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
}
self.variable_map.insert(name.clone(), phi_val);
}
// AND: else → false
@ -349,13 +357,13 @@ impl super::MirBuilder {
self.start_new_block(rhs_join)?;
let rhs_bool = self.value_gen.next();
let inputs = vec![(rhs_true_exit, t_id), (rhs_false_exit, f_id)];
if let Some(func) = self.current_function.as_mut() {
func.update_cfg();
}
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
if let Some(func) = self.current_function.as_mut() { func.update_cfg(); }
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, rhs_bool, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
}
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
self.value_types.insert(rhs_bool, MirType::Bool);
rhs_bool
};
@ -364,7 +372,7 @@ impl super::MirBuilder {
let else_var_map_end = self.variable_map.clone();
if else_reaches_merge {
self.hint_scope_leave(0);
self.emit_instruction(MirInstruction::Jump { target: merge_block })?;
crate::mir::builder::emission::branch::emit_jump(self, merge_block)?;
}
// ---- MERGE ----
@ -379,11 +387,13 @@ impl super::MirBuilder {
if else_reaches_merge { inputs.push((else_exit_block, else_value_raw)); }
let result_val = if inputs.len() >= 2 {
if let Some(func) = self.current_function.as_mut() { func.update_cfg(); }
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
}
let dst = self.value_gen.next();
self.emit_instruction(MirInstruction::Phi { dst, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, dst, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst, inputs })?;
}
self.value_types.insert(dst, MirType::Bool);
dst
} else if inputs.len() == 1 {

View File

@ -57,7 +57,11 @@ impl MirBuilder {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
}
let merged = self.value_gen.next();
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, merged, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
}
self.variable_map.insert(name, merged);
}
}
@ -90,7 +94,11 @@ impl MirBuilder {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
}
let merged = self.value_gen.next();
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, merged, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
}
self.variable_map.insert(pin_name.clone(), merged);
}
}
@ -160,7 +168,11 @@ impl MirBuilder {
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
}
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
}
}
}
self.variable_map = pre_if_var_map.clone();
@ -183,7 +195,11 @@ impl MirBuilder {
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
}
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, inputs);
} else {
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
}
}
}
// Merge variable map conservatively to pre-if snapshot (no new bindings)

View File

@ -1,6 +1,6 @@
use crate::mir::{
function::{FunctionSignature, MirFunction, MirModule},
BasicBlock, BasicBlockId, ConstValue, MirInstruction, MirType, ValueId,
BasicBlock, BasicBlockId, ConstValue, EffectMask, MirInstruction, MirType, ValueId,
};
use serde_json::Value;
use super::mir_json::common as mirjson_common;
@ -132,6 +132,39 @@ pub fn parse_mir_v0_to_module(json: &str) -> Result<MirModule, String> {
block_ref.add_instruction(MirInstruction::Return { value });
if let Some(val) = value { signature.return_type = MirType::Integer; max_value_id = max_value_id.max(val.as_u32() + 1); } else { signature.return_type = MirType::Void; }
}
"newbox" => {
let dst = require_u64(inst, "dst", "newbox dst")? as u32;
let ty = inst.get("type").and_then(Value::as_str).ok_or_else(|| "newbox missing type".to_string())?.to_string();
let args_v = inst.get("args").and_then(Value::as_array).cloned().unwrap_or_default();
let mut args: Vec<ValueId> = Vec::with_capacity(args_v.len());
for a in args_v {
let id = a.as_u64().ok_or_else(|| "newbox arg must be integer".to_string())? as u32;
args.push(ValueId::new(id));
}
block_ref.add_instruction(MirInstruction::NewBox { dst: ValueId::new(dst), box_type: ty, args });
max_value_id = max_value_id.max(dst + 1);
}
"boxcall" => {
// { op:"boxcall", box:<vid>, method:"name", args:[vid...], dst?:<vid> }
let box_id = require_u64(inst, "box", "boxcall box")? as u32;
let method = inst.get("method").and_then(Value::as_str).ok_or_else(|| "boxcall missing method".to_string())?.to_string();
let dst_opt = inst.get("dst").and_then(Value::as_u64).map(|v| ValueId::new(v as u32));
let args_v = inst.get("args").and_then(Value::as_array).cloned().unwrap_or_default();
let mut args: Vec<ValueId> = Vec::with_capacity(args_v.len());
for a in args_v {
let id = a.as_u64().ok_or_else(|| "boxcall arg must be integer".to_string())? as u32;
args.push(ValueId::new(id));
}
block_ref.add_instruction(MirInstruction::BoxCall {
dst: dst_opt,
box_val: ValueId::new(box_id),
method,
method_id: None,
args,
effects: EffectMask::READ,
});
if let Some(dv) = dst_opt { max_value_id = max_value_id.max(dv.as_u32() + 1); }
}
other => {
return Err(format!("unsupported op '{}' in mir_json_v0 loader", other));
}

View File

@ -0,0 +1,48 @@
/*!
* Hako-like source detection and minimal normalization helpers.
*
* - looks_like_hako_code: heuristics to detect Hako surface in Nyash path
* - strip_local_decl: drop leading `local ` at line head for Nyash parser compatibility
* - fail_fast_on_hako: env-gated policy (default ON) to fail fast on Hako-like source in Nyash VM path
*/
/// Heuristic detection of Hako-like source (development-only convenience)
pub fn looks_like_hako_code(s: &str) -> bool {
s.contains("using selfhost.")
|| s.contains("using hakorune.")
|| s.lines().any(|l| l.trim_start().starts_with("local "))
}
/// Remove leading `local ` declarations at line head to keep Nyash parser stable
pub fn strip_local_decl(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for line in s.lines() {
let leading = line.len() - line.trim_start().len();
let (indent, rest) = line.split_at(leading);
if rest.starts_with("local ") || rest.starts_with("local\t") {
let bytes = rest.as_bytes();
let mut i = 5; // after 'local'
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
i += 1;
break;
}
out.push_str(indent);
out.push_str(&rest[i..]);
out.push('\n');
} else {
out.push_str(line);
out.push('\n');
}
}
out
}
/// Policy toggle: fail fast when Hako-like code enters Nyash VM path
/// Default: ON (true)
pub fn fail_fast_on_hako() -> bool {
match std::env::var("HAKO_FAIL_FAST_ON_HAKO_IN_NYASH_VM").ok().as_deref() {
Some("0") | Some("false") | Some("off") => false,
_ => true,
}
}

View File

@ -11,3 +11,4 @@ pub mod selfhost;
pub mod resolve;
pub mod exec;
pub mod core_bridge;
pub mod hako;

View File

@ -359,11 +359,12 @@ pub fn resolve_prelude_paths_profiled(
) -> Result<(String, Vec<String>), String> {
// First pass: strip using from the main source and collect direct prelude paths
let (cleaned, direct) = collect_using_and_strip(runner, code, filename)?;
// When AST using is enabled、recursively collect nested preludes in DFS order
// Recursively collect nested preludes (DFS) for both AST/text merges.
// Rationale: even when we merge via text, nested `using` inside preludes
// must be discovered so that their definitions are present at runtime
// (e.g., runner_min -> lower_* boxes). Previously this only ran when
// NYASH_USING_AST=1, which caused unresolved calls in inline flows.
let ast_on = std::env::var("NYASH_USING_AST").ok().as_deref() == Some("1");
if !ast_on {
return Ok((cleaned, direct));
}
let mut out: Vec<String> = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
fn normalize_path(path: &str) -> (String, String) {
@ -451,6 +452,9 @@ pub fn resolve_prelude_paths_profiled(
}
}
}
// If AST merge is disabled, still return the discovered nested prelude list
// so that the text merger can inline all dependencies. This keeps behavior
// consistent across strategies and fixes nested `using` resolution.
Ok((cleaned, out))
}
@ -659,6 +663,40 @@ pub fn merge_prelude_text(
// First pass: collect and resolve prelude paths
let (cleaned_main, prelude_paths) = resolve_prelude_paths_profiled(runner, source, filename)?;
// Expand nested preludes for text-merge too (DFS) so that any `using`
// inside prelude files (e.g., runner_min -> lower_* boxes) are also
// included even when NYASH_USING_AST is OFF.
let mut expanded: Vec<String> = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
fn canonize(p: &str) -> String {
std::fs::canonicalize(p)
.ok()
.map(|pb| pb.to_string_lossy().to_string())
.unwrap_or_else(|| p.to_string())
}
fn dfs_text(
runner: &NyashRunner,
path: &str,
out: &mut Vec<String>,
seen: &mut std::collections::HashSet<String>,
) -> Result<(), String> {
let key = canonize(path);
if !seen.insert(key.clone()) {
return Ok(());
}
let src = std::fs::read_to_string(path)
.map_err(|e| format!("using: failed to read '{}': {}", path, e))?;
let (_cleaned, nested) = collect_using_and_strip(runner, &src, path)?;
for n in nested.iter() {
dfs_text(runner, n, out, seen)?;
}
out.push(key);
Ok(())
}
for p in prelude_paths.iter() {
dfs_text(runner, p, &mut expanded, &mut seen)?;
}
let prelude_paths = &expanded;
if prelude_paths.is_empty() {
// No using statements, return original

View File

@ -138,38 +138,16 @@ impl NyashRunner {
// Hako-friendly normalize: strip leading `local ` at line head for parser compatibility.
// This keeps semantics close enough for our inline/selfhost drivers while we unify frontends.
fn looks_like_hako_code(s: &str) -> bool {
s.contains("using selfhost.") || s.lines().any(|l| l.trim_start().starts_with("local "))
}
fn strip_local_decl(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for line in s.lines() {
let leading = line.len() - line.trim_start().len();
let (indent, rest) = line.split_at(leading);
if rest.starts_with("local ") || rest.starts_with("local\t") {
// drop the first token `local` and a single following space/tab
let mut bytes = rest.as_bytes();
let mut i = 5; // after 'local'
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') { i += 1; break; }
out.push_str(indent);
out.push_str(&rest[i..]);
out.push('\n');
} else {
out.push_str(line);
out.push('\n');
}
}
out
}
if looks_like_hako_code(&preexpanded_owned) {
preexpanded_owned = strip_local_decl(&preexpanded_owned);
if crate::runner::modes::common_util::hako::looks_like_hako_code(&preexpanded_owned) {
preexpanded_owned = crate::runner::modes::common_util::hako::strip_local_decl(&preexpanded_owned);
}
// Routing (Hako-like): 既定は FailFasthv1 直行は関数冒頭で処理済み)。
{
let s = preexpanded_owned.as_str();
let hako_like = s.contains("static box ") || s.contains("using selfhost.") || s.contains("using hakorune.");
let ff_env = std::env::var("HAKO_FAIL_FAST_ON_HAKO_IN_NYASH_VM").ok();
let fail_fast = match ff_env.as_deref() { Some("0")|Some("false")|Some("off") => false, _ => true };
let hako_like = s.contains("static box ")
|| s.contains("using selfhost.")
|| s.contains("using hakorune.");
let fail_fast = crate::runner::modes::common_util::hako::fail_fast_on_hako();
if hako_like && fail_fast {
eprintln!(
"❌ Hako-like source detected in Nyash VM path. Use Hakorune VM (v1 dispatcher) or Core/LLVM for MIR.\n hint: verify with HAKO_VERIFY_PRIMARY=hakovm"

View File

@ -57,39 +57,14 @@ impl NyashRunner {
// Dev sugar pre-expand: @name = expr → local name = expr
code2 = crate::runner::modes::common_util::resolve::preexpand_at_local(&code2);
// Hako-friendly normalize: strip leading `local ` at line head for Nyash parser compatibility.
fn looks_like_hako_code(s: &str) -> bool {
s.contains("using selfhost.") || s.lines().any(|l| l.trim_start().starts_with("local "))
}
fn strip_local_decl(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for line in s.lines() {
let leading = line.len() - line.trim_start().len();
let (indent, rest) = line.split_at(leading);
if rest.starts_with("local ") || rest.starts_with("local\t") {
let bytes = rest.as_bytes();
let mut i = 5; // skip 'local'
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') { i += 1; break; }
out.push_str(indent);
out.push_str(&rest[i..]);
out.push('\n');
} else {
out.push_str(line);
out.push('\n');
}
}
out
}
if looks_like_hako_code(&code2) {
code2 = strip_local_decl(&code2);
if crate::runner::modes::common_util::hako::looks_like_hako_code(&code2) {
code2 = crate::runner::modes::common_util::hako::strip_local_decl(&code2);
}
// FailFast (optin): Hako 構文を Nyash VM 経路で実行しない
// 目的: .hako は Hakorune VM、MIR は Core/LLVM に役割分離するためのガード
{
let on = match std::env::var("HAKO_FAIL_FAST_ON_HAKO_IN_NYASH_VM").ok().as_deref() {
Some("0")|Some("false")|Some("off") => false,
_ => true,
};
let on = crate::runner::modes::common_util::hako::fail_fast_on_hako();
if on {
let s = code2.as_str();
let hako_like = s.contains("static box ") || s.contains("using selfhost.") || s.contains("using hakorune.");

View File

@ -126,32 +126,75 @@ impl PluginHost {
/// Resolve a method handle for a given plugin box type and method name.
pub fn resolve_method(&self, box_type: &str, method_name: &str) -> BidResult<MethodHandle> {
let cfg = self.config.as_ref().ok_or(BidError::PluginError)?;
let (lib_name, _lib_def) = cfg
.find_library_for_box(box_type)
.ok_or(BidError::InvalidType)?;
let cfg_path = self.config_path.as_deref().unwrap_or("nyash.toml");
let toml_content = std::fs::read_to_string(cfg_path).map_err(|_| BidError::PluginError)?;
let toml_value: toml::Value =
toml::from_str(&toml_content).map_err(|_| BidError::PluginError)?;
let box_conf = cfg
.get_box_config(lib_name, box_type, &toml_value)
.ok_or(BidError::InvalidType)?;
// Prefer config mapping; fallback to loader's TypeBox resolve(name)
let (method_id, returns_result) = if let Some(m) = box_conf.methods.get(method_name) {
(m.method_id, m.returns_result)
} else {
let l = self.loader.read().unwrap();
let mid = l
.resolve_method_id(box_type, method_name)
.map_err(|_| BidError::InvalidMethod)?;
(mid, false)
};
// Path A: library-backed box (dynamic plugin)
if let Some((lib_name, _lib_def)) = cfg.find_library_for_box(box_type) {
if let Some(box_conf) = cfg.get_box_config(lib_name, box_type, &toml_value) {
// Prefer config mapping; fallback to loader's TypeBox resolve(name)
let (method_id, returns_result) = if let Some(m) = box_conf.methods.get(method_name) {
(m.method_id, m.returns_result)
} else {
let l = self.loader.read().unwrap();
let mid = l
.resolve_method_id(box_type, method_name)
.map_err(|_| BidError::InvalidMethod)?;
(mid, false)
};
return Ok(MethodHandle {
lib: lib_name.to_string(),
box_type: box_type.to_string(),
type_id: box_conf.type_id,
method_id,
returns_result,
});
}
}
// Path B: builtin/core boxes via central config (no library/path required)
// Require: [box_types] BoxName = <id> and [box_methods.BoxName.methods] entries
if let Some(type_id) = cfg.box_types.get(box_type).copied() {
if let Some(bm) = toml_value
.get("box_methods")
.and_then(|v| v.get(box_type))
.and_then(|v| v.get("methods"))
.and_then(|v| v.as_table())
{
if let Some(entry) = bm.get(method_name) {
// Support both { method_id = N } and bare integer in the future
let (method_id, returns_result) = if let Some(mid) = entry.get("method_id") {
(mid.as_integer().unwrap_or(0) as u32, entry.get("returns_result").and_then(|b| b.as_bool()).unwrap_or(false))
} else if let Some(mid) = entry.as_integer() {
(mid as u32, false)
} else {
return Err(BidError::InvalidMethod);
};
return Ok(MethodHandle {
lib: "builtin".to_string(),
box_type: box_type.to_string(),
type_id,
method_id,
returns_result,
});
}
}
}
// Fallback: delegate to loader (TypeBox, file-based, etc.)
let l = self.loader.read().unwrap();
let mid = l
.resolve_method_id(box_type, method_name)
.map_err(|_| BidError::InvalidMethod)?;
let type_id = *cfg.box_types.get(box_type).unwrap_or(&0);
Ok(MethodHandle {
lib: lib_name.to_string(),
lib: "builtin".to_string(),
box_type: box_type.to_string(),
type_id: box_conf.type_id,
method_id,
returns_result,
type_id,
method_id: mid,
returns_result: false,
})
}
@ -192,6 +235,36 @@ impl PluginHost {
/// Check if a method returns Result (Ok/Err) per plugin spec or central config.
pub fn method_returns_result(&self, box_type: &str, method_name: &str) -> bool {
// Prefer central config when available (works for builtin boxes)
if let Some(cfg) = self.config.as_ref() {
if let Some(path) = self.config_path.as_deref() {
if let Ok(toml_content) = std::fs::read_to_string(path) {
if let Ok(toml_value) = toml::from_str::<toml::Value>(&toml_content) {
if let Some(bm) = toml_value
.get("box_methods")
.and_then(|v| v.get(box_type))
.and_then(|v| v.get("methods"))
.and_then(|v| v.as_table())
{
if let Some(entry) = bm.get(method_name) {
return entry
.get("returns_result")
.and_then(|b| b.as_bool())
.unwrap_or(false);
}
}
// Library-backed path
if let Some((lib_name, _)) = cfg.find_library_for_box(box_type) {
if let Some(box_conf) = cfg.get_box_config(lib_name, box_type, &toml_value) {
if let Some(m) = box_conf.methods.get(method_name) {
return m.returns_result;
}
}
}
}
}
}
}
let l = self.loader.read().unwrap();
l.method_returns_result(box_type, method_name)
}

View File

@ -0,0 +1,33 @@
use crate::tokenizer::{NyashTokenizer, TokenType};
fn collect_string_token(src: &str) -> String {
let mut t = NyashTokenizer::new(src);
let tokens = t.tokenize().expect("tokenize");
// Expect first non-EOF token to be STRING
for tok in tokens {
if let TokenType::STRING(s) = tok.token_type { return s; }
}
panic!("no STRING token found");
}
#[test]
fn unicode_decode_toggle_off_keeps_literal() {
// OFF by default
std::env::remove_var("NYASH_PARSER_DECODE_UNICODE");
std::env::remove_var("HAKO_PARSER_DECODE_UNICODE");
let s = collect_string_token("\"\\u0041\"");
assert_eq!(s, "\\u0041");
}
#[test]
fn unicode_decode_toggle_on_decodes_basic_and_surrogate() {
// ON: enable decode
std::env::set_var("NYASH_PARSER_DECODE_UNICODE", "1");
let s = collect_string_token("\"\\u0041\"");
assert_eq!(s, "A");
let s2 = collect_string_token("\"\\uD83D\\uDE00\"");
// Expect surrogate pair to decode into one char (😀)
assert_eq!(s2.chars().count(), 1);
}

View File

@ -28,6 +28,66 @@ impl NyashTokenizer {
Some('"') => string_value.push('"'),
Some('\'') => string_value.push('\''), // 1-quote: エスケープされたシングルクォート
Some('/') => string_value.push('/'), // \/ を許容
Some('u') => {
// Unicode decode (optional; default OFF)
if crate::config::env::parser_decode_unicode() {
let base = self.position; // index of 'u'
// read 4 hex digits without consuming; then advance position in bulk
let read_hex4 = |input: &Vec<char>, start: usize| -> Option<u32> {
if start + 4 > input.len() { return None; }
let d0 = input.get(start)?.to_digit(16)?;
let d1 = input.get(start + 1)?.to_digit(16)?;
let d2 = input.get(start + 2)?.to_digit(16)?;
let d3 = input.get(start + 3)?.to_digit(16)?;
Some((d0 << 12) | (d1 << 8) | (d2 << 4) | d3)
};
let first_start = base + 1; // after 'u'
if let Some(u1) = read_hex4(&self.input, first_start) {
// consume 'u' + 4 hex
self.position = base + 5;
let mut out_char: Option<char> = None;
// surrogate pair
if (0xD800..=0xDBFF).contains(&u1) {
if self.position + 6 <= self.input.len()
&& self.input.get(self.position) == Some(&'\\')
&& self.input.get(self.position + 1) == Some(&'u')
{
if let Some(u2) = read_hex4(&self.input, self.position + 2) {
if (0xDC00..=0xDFFF).contains(&u2) {
let high_ten = (u1 - 0xD800) as u32;
let low_ten = (u2 - 0xDC00) as u32;
let scalar = 0x10000 + ((high_ten << 10) | low_ten);
out_char = std::char::from_u32(scalar);
// consume '\\u' + 4 hex of low surrogate
self.position += 6;
}
}
}
}
if out_char.is_none() {
out_char = std::char::from_u32(u1 as u32);
}
if let Some(ch) = out_char {
string_value.push(ch);
// Skip the generic advance at loop end to avoid double step
continue;
} else {
// Fallback to literal when invalid
string_value.push('\\');
string_value.push('u');
continue;
}
} else {
// Not enough hex digits; keep literal
string_value.push('\\');
string_value.push('u');
}
} else {
// Decoding disabled → keep literal
string_value.push('\\');
string_value.push('u');
}
}
// TODO: 将来 `\uXXXX` デコード既定OFF
Some(c2) => {
// 未知のエスケープはそのまま残す(互換性維持)