phase-20.45: PRIMARY no-fallback reps + MIR v0 shape fixes
- Fix MIR v0 shape in lowers: functions[] + name="main" + blocks.id * lower_return_int_box.hako * lower_return_binop_box.hako - runner_min: adopt LowerReturnBinOpBox before ReturnInt - Add PRIMARY no-fallback canaries (all PASS): * return-binop / array-size / load-store / return-logical (OR) - Fix phase2043 runner_min canary alias (Runner -> BuilderRunnerMinBox) - Update docs: phase-20.45 README (PRIMARY reps), CURRENT_TASK progress Ancillary: keep builder/provider/canary files in sync; no unrelated behavior changes.
This commit is contained in:
@ -106,7 +106,7 @@ pub(super) fn try_handle_map_box(
|
||||
if let Some(d) = dst { this.regs.insert(d, VMValue::from_nyash_box(ret)); }
|
||||
return Ok(true);
|
||||
}
|
||||
"size" => {
|
||||
"len" | "length" | "size" => {
|
||||
let ret = mb.size();
|
||||
if let Some(d) = dst { this.regs.insert(d, VMValue::from_nyash_box(ret)); }
|
||||
return Ok(true);
|
||||
|
||||
@ -524,6 +524,15 @@ pub fn ny_compiler_use_tmp_only() -> bool {
|
||||
.as_deref()
|
||||
== Some("1")
|
||||
}
|
||||
|
||||
/// Unicode decode toggle for string literals (\uXXXX, optional surrogate pairs).
|
||||
/// Enabled when either HAKO_PARSER_DECODE_UNICODE=1 or NYASH_PARSER_DECODE_UNICODE=1.
|
||||
/// Default: OFF (for strict backward compatibility).
|
||||
pub fn parser_decode_unicode() -> bool {
|
||||
env_flag("HAKO_PARSER_DECODE_UNICODE")
|
||||
.or_else(|| env_flag("NYASH_PARSER_DECODE_UNICODE"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
fn env_flag(var: &str) -> Option<bool> {
|
||||
std::env::var(var).ok().map(|v| {
|
||||
let lv = v.to_ascii_lowercase();
|
||||
|
||||
@ -25,6 +25,7 @@ pub fn program_json_to_mir_json(program_json: &str) -> Result<String, String> {
|
||||
// Emit MIR(JSON) to a temporary file (reuse existing emitter), then read back
|
||||
let tmp_dir = std::env::temp_dir();
|
||||
let tmp_path = tmp_dir.join("hako_mirbuilder_out.json");
|
||||
// Emit MIR JSON (v0/v1 per env) via harness-bin emitter to a temp file
|
||||
if let Err(e) = runner::mir_json_emit::emit_mir_json_for_harness_bin(&module, &tmp_path) {
|
||||
let tag = format!("[mirbuilder/emit/error] {}", e);
|
||||
eprintln!("{}", tag);
|
||||
@ -43,4 +44,3 @@ pub fn program_json_to_mir_json(program_json: &str) -> Result<String, String> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -46,11 +46,12 @@ impl super::MirBuilder {
|
||||
phi_inputs.push((else_block, else_val));
|
||||
crate::mir::builder::emission::branch::emit_jump(self, merge_block)?;
|
||||
self.start_new_block(merge_block)?;
|
||||
// フェーズM: 常にPHI命令を使用(no_phi_mode撤廃)
|
||||
self.emit_instruction(super::MirInstruction::Phi {
|
||||
dst: result_val,
|
||||
inputs: phi_inputs,
|
||||
})?;
|
||||
// フェーズM: PHI はブロック先頭に配置(cf_common 統一)
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, phi_inputs);
|
||||
} else {
|
||||
self.emit_instruction(super::MirInstruction::Phi { dst: result_val, inputs: phi_inputs })?;
|
||||
}
|
||||
return Ok(result_val);
|
||||
}
|
||||
|
||||
@ -101,11 +102,12 @@ impl super::MirBuilder {
|
||||
|
||||
// Merge and yield result
|
||||
self.start_new_block(merge_block)?;
|
||||
// フェーズM: 常にPHI命令を使用(no_phi_mode撤廃)
|
||||
self.emit_instruction(super::MirInstruction::Phi {
|
||||
dst: result_val,
|
||||
inputs: phi_inputs,
|
||||
})?;
|
||||
// フェーズM: PHI はブロック先頭に配置(cf_common 統一)
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, phi_inputs);
|
||||
} else {
|
||||
self.emit_instruction(super::MirInstruction::Phi { dst: result_val, inputs: phi_inputs })?;
|
||||
}
|
||||
Ok(result_val)
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,7 +68,11 @@ impl MirBuilder {
|
||||
for (name, &pre_v) in pre_if_var_map.iter() {
|
||||
let phi_val = self.value_gen.next();
|
||||
let inputs = vec![(pre_branch_bb, pre_v)];
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
}
|
||||
self.variable_map.insert(name.clone(), phi_val);
|
||||
if trace_if {
|
||||
eprintln!(
|
||||
@ -99,7 +103,11 @@ impl MirBuilder {
|
||||
for (name, &pre_v) in pre_if_var_map.iter() {
|
||||
let phi_val = self.value_gen.next();
|
||||
let inputs = vec![(pre_branch_bb, pre_v)];
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
}
|
||||
self.variable_map.insert(name.clone(), phi_val);
|
||||
if trace_if {
|
||||
eprintln!(
|
||||
@ -121,7 +129,7 @@ impl MirBuilder {
|
||||
if else_reaches_merge {
|
||||
// Scope leave for else-branch
|
||||
self.hint_scope_leave(0);
|
||||
self.emit_instruction(MirInstruction::Jump { target: merge_block })?;
|
||||
crate::mir::builder::emission::branch::emit_jump(self, merge_block)?;
|
||||
}
|
||||
// Pop else-branch debug region
|
||||
self.debug_pop_region();
|
||||
|
||||
@ -261,7 +261,11 @@ impl super::MirBuilder {
|
||||
for (name, &pre_v) in pre_if_var_map.iter() {
|
||||
let phi_val = self.value_gen.next();
|
||||
let inputs = vec![(pre_branch_bb, pre_v)];
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
}
|
||||
self.variable_map.insert(name.clone(), phi_val);
|
||||
}
|
||||
|
||||
@ -290,13 +294,13 @@ impl super::MirBuilder {
|
||||
self.start_new_block(rhs_join)?;
|
||||
let rhs_bool = self.value_gen.next();
|
||||
let inputs = vec![(rhs_true_exit, t_id), (rhs_false_exit, f_id)];
|
||||
if let Some(func) = self.current_function.as_mut() {
|
||||
func.update_cfg();
|
||||
}
|
||||
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
|
||||
if let Some(func) = self.current_function.as_mut() { func.update_cfg(); }
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, rhs_bool, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
|
||||
}
|
||||
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
|
||||
self.value_types.insert(rhs_bool, MirType::Bool);
|
||||
rhs_bool
|
||||
} else {
|
||||
@ -319,7 +323,11 @@ impl super::MirBuilder {
|
||||
for (name, &pre_v) in pre_if_var_map.iter() {
|
||||
let phi_val = self.value_gen.next();
|
||||
let inputs = vec![(pre_branch_bb, pre_v)];
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, phi_val, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: phi_val, inputs })?;
|
||||
}
|
||||
self.variable_map.insert(name.clone(), phi_val);
|
||||
}
|
||||
// AND: else → false
|
||||
@ -349,13 +357,13 @@ impl super::MirBuilder {
|
||||
self.start_new_block(rhs_join)?;
|
||||
let rhs_bool = self.value_gen.next();
|
||||
let inputs = vec![(rhs_true_exit, t_id), (rhs_false_exit, f_id)];
|
||||
if let Some(func) = self.current_function.as_mut() {
|
||||
func.update_cfg();
|
||||
}
|
||||
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
|
||||
if let Some(func) = self.current_function.as_mut() { func.update_cfg(); }
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, rhs_bool, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
|
||||
}
|
||||
self.emit_instruction(MirInstruction::Phi { dst: rhs_bool, inputs })?;
|
||||
self.value_types.insert(rhs_bool, MirType::Bool);
|
||||
rhs_bool
|
||||
};
|
||||
@ -364,7 +372,7 @@ impl super::MirBuilder {
|
||||
let else_var_map_end = self.variable_map.clone();
|
||||
if else_reaches_merge {
|
||||
self.hint_scope_leave(0);
|
||||
self.emit_instruction(MirInstruction::Jump { target: merge_block })?;
|
||||
crate::mir::builder::emission::branch::emit_jump(self, merge_block)?;
|
||||
}
|
||||
|
||||
// ---- MERGE ----
|
||||
@ -379,11 +387,13 @@ impl super::MirBuilder {
|
||||
if else_reaches_merge { inputs.push((else_exit_block, else_value_raw)); }
|
||||
let result_val = if inputs.len() >= 2 {
|
||||
if let Some(func) = self.current_function.as_mut() { func.update_cfg(); }
|
||||
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
}
|
||||
let dst = self.value_gen.next();
|
||||
self.emit_instruction(MirInstruction::Phi { dst, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, dst, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst, inputs })?;
|
||||
}
|
||||
self.value_types.insert(dst, MirType::Bool);
|
||||
dst
|
||||
} else if inputs.len() == 1 {
|
||||
|
||||
@ -57,7 +57,11 @@ impl MirBuilder {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
}
|
||||
let merged = self.value_gen.next();
|
||||
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, merged, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
|
||||
}
|
||||
self.variable_map.insert(name, merged);
|
||||
}
|
||||
}
|
||||
@ -90,7 +94,11 @@ impl MirBuilder {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
}
|
||||
let merged = self.value_gen.next();
|
||||
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, merged, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: merged, inputs })?;
|
||||
}
|
||||
self.variable_map.insert(pin_name.clone(), merged);
|
||||
}
|
||||
}
|
||||
@ -160,7 +168,11 @@ impl MirBuilder {
|
||||
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
}
|
||||
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.variable_map = pre_if_var_map.clone();
|
||||
@ -183,7 +195,11 @@ impl MirBuilder {
|
||||
if let (Some(func), Some(cur_bb)) = (&self.current_function, self.current_block) {
|
||||
crate::mir::phi_core::common::debug_verify_phi_inputs(func, cur_bb, &inputs);
|
||||
}
|
||||
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
|
||||
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
|
||||
crate::mir::ssot::cf_common::insert_phi_at_head(func, cur_bb, result_val, inputs);
|
||||
} else {
|
||||
self.emit_instruction(MirInstruction::Phi { dst: result_val, inputs })?;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Merge variable map conservatively to pre-if snapshot (no new bindings)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
use crate::mir::{
|
||||
function::{FunctionSignature, MirFunction, MirModule},
|
||||
BasicBlock, BasicBlockId, ConstValue, MirInstruction, MirType, ValueId,
|
||||
BasicBlock, BasicBlockId, ConstValue, EffectMask, MirInstruction, MirType, ValueId,
|
||||
};
|
||||
use serde_json::Value;
|
||||
use super::mir_json::common as mirjson_common;
|
||||
@ -132,6 +132,39 @@ pub fn parse_mir_v0_to_module(json: &str) -> Result<MirModule, String> {
|
||||
block_ref.add_instruction(MirInstruction::Return { value });
|
||||
if let Some(val) = value { signature.return_type = MirType::Integer; max_value_id = max_value_id.max(val.as_u32() + 1); } else { signature.return_type = MirType::Void; }
|
||||
}
|
||||
"newbox" => {
|
||||
let dst = require_u64(inst, "dst", "newbox dst")? as u32;
|
||||
let ty = inst.get("type").and_then(Value::as_str).ok_or_else(|| "newbox missing type".to_string())?.to_string();
|
||||
let args_v = inst.get("args").and_then(Value::as_array).cloned().unwrap_or_default();
|
||||
let mut args: Vec<ValueId> = Vec::with_capacity(args_v.len());
|
||||
for a in args_v {
|
||||
let id = a.as_u64().ok_or_else(|| "newbox arg must be integer".to_string())? as u32;
|
||||
args.push(ValueId::new(id));
|
||||
}
|
||||
block_ref.add_instruction(MirInstruction::NewBox { dst: ValueId::new(dst), box_type: ty, args });
|
||||
max_value_id = max_value_id.max(dst + 1);
|
||||
}
|
||||
"boxcall" => {
|
||||
// { op:"boxcall", box:<vid>, method:"name", args:[vid...], dst?:<vid> }
|
||||
let box_id = require_u64(inst, "box", "boxcall box")? as u32;
|
||||
let method = inst.get("method").and_then(Value::as_str).ok_or_else(|| "boxcall missing method".to_string())?.to_string();
|
||||
let dst_opt = inst.get("dst").and_then(Value::as_u64).map(|v| ValueId::new(v as u32));
|
||||
let args_v = inst.get("args").and_then(Value::as_array).cloned().unwrap_or_default();
|
||||
let mut args: Vec<ValueId> = Vec::with_capacity(args_v.len());
|
||||
for a in args_v {
|
||||
let id = a.as_u64().ok_or_else(|| "boxcall arg must be integer".to_string())? as u32;
|
||||
args.push(ValueId::new(id));
|
||||
}
|
||||
block_ref.add_instruction(MirInstruction::BoxCall {
|
||||
dst: dst_opt,
|
||||
box_val: ValueId::new(box_id),
|
||||
method,
|
||||
method_id: None,
|
||||
args,
|
||||
effects: EffectMask::READ,
|
||||
});
|
||||
if let Some(dv) = dst_opt { max_value_id = max_value_id.max(dv.as_u32() + 1); }
|
||||
}
|
||||
other => {
|
||||
return Err(format!("unsupported op '{}' in mir_json_v0 loader", other));
|
||||
}
|
||||
|
||||
48
src/runner/modes/common_util/hako.rs
Normal file
48
src/runner/modes/common_util/hako.rs
Normal file
@ -0,0 +1,48 @@
|
||||
/*!
|
||||
* Hako-like source detection and minimal normalization helpers.
|
||||
*
|
||||
* - looks_like_hako_code: heuristics to detect Hako surface in Nyash path
|
||||
* - strip_local_decl: drop leading `local ` at line head for Nyash parser compatibility
|
||||
* - fail_fast_on_hako: env-gated policy (default ON) to fail fast on Hako-like source in Nyash VM path
|
||||
*/
|
||||
|
||||
/// Heuristic detection of Hako-like source (development-only convenience)
|
||||
pub fn looks_like_hako_code(s: &str) -> bool {
|
||||
s.contains("using selfhost.")
|
||||
|| s.contains("using hakorune.")
|
||||
|| s.lines().any(|l| l.trim_start().starts_with("local "))
|
||||
}
|
||||
|
||||
/// Remove leading `local ` declarations at line head to keep Nyash parser stable
|
||||
pub fn strip_local_decl(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
for line in s.lines() {
|
||||
let leading = line.len() - line.trim_start().len();
|
||||
let (indent, rest) = line.split_at(leading);
|
||||
if rest.starts_with("local ") || rest.starts_with("local\t") {
|
||||
let bytes = rest.as_bytes();
|
||||
let mut i = 5; // after 'local'
|
||||
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
|
||||
i += 1;
|
||||
break;
|
||||
}
|
||||
out.push_str(indent);
|
||||
out.push_str(&rest[i..]);
|
||||
out.push('\n');
|
||||
} else {
|
||||
out.push_str(line);
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Policy toggle: fail fast when Hako-like code enters Nyash VM path
|
||||
/// Default: ON (true)
|
||||
pub fn fail_fast_on_hako() -> bool {
|
||||
match std::env::var("HAKO_FAIL_FAST_ON_HAKO_IN_NYASH_VM").ok().as_deref() {
|
||||
Some("0") | Some("false") | Some("off") => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
@ -11,3 +11,4 @@ pub mod selfhost;
|
||||
pub mod resolve;
|
||||
pub mod exec;
|
||||
pub mod core_bridge;
|
||||
pub mod hako;
|
||||
|
||||
@ -359,11 +359,12 @@ pub fn resolve_prelude_paths_profiled(
|
||||
) -> Result<(String, Vec<String>), String> {
|
||||
// First pass: strip using from the main source and collect direct prelude paths
|
||||
let (cleaned, direct) = collect_using_and_strip(runner, code, filename)?;
|
||||
// When AST using is enabled、recursively collect nested preludes in DFS order
|
||||
// Recursively collect nested preludes (DFS) for both AST/text merges.
|
||||
// Rationale: even when we merge via text, nested `using` inside preludes
|
||||
// must be discovered so that their definitions are present at runtime
|
||||
// (e.g., runner_min -> lower_* boxes). Previously this only ran when
|
||||
// NYASH_USING_AST=1, which caused unresolved calls in inline flows.
|
||||
let ast_on = std::env::var("NYASH_USING_AST").ok().as_deref() == Some("1");
|
||||
if !ast_on {
|
||||
return Ok((cleaned, direct));
|
||||
}
|
||||
let mut out: Vec<String> = Vec::new();
|
||||
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
fn normalize_path(path: &str) -> (String, String) {
|
||||
@ -451,6 +452,9 @@ pub fn resolve_prelude_paths_profiled(
|
||||
}
|
||||
}
|
||||
}
|
||||
// If AST merge is disabled, still return the discovered nested prelude list
|
||||
// so that the text merger can inline all dependencies. This keeps behavior
|
||||
// consistent across strategies and fixes nested `using` resolution.
|
||||
Ok((cleaned, out))
|
||||
}
|
||||
|
||||
@ -659,6 +663,40 @@ pub fn merge_prelude_text(
|
||||
|
||||
// First pass: collect and resolve prelude paths
|
||||
let (cleaned_main, prelude_paths) = resolve_prelude_paths_profiled(runner, source, filename)?;
|
||||
// Expand nested preludes for text-merge too (DFS) so that any `using`
|
||||
// inside prelude files (e.g., runner_min -> lower_* boxes) are also
|
||||
// included even when NYASH_USING_AST is OFF.
|
||||
let mut expanded: Vec<String> = Vec::new();
|
||||
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
fn canonize(p: &str) -> String {
|
||||
std::fs::canonicalize(p)
|
||||
.ok()
|
||||
.map(|pb| pb.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| p.to_string())
|
||||
}
|
||||
fn dfs_text(
|
||||
runner: &NyashRunner,
|
||||
path: &str,
|
||||
out: &mut Vec<String>,
|
||||
seen: &mut std::collections::HashSet<String>,
|
||||
) -> Result<(), String> {
|
||||
let key = canonize(path);
|
||||
if !seen.insert(key.clone()) {
|
||||
return Ok(());
|
||||
}
|
||||
let src = std::fs::read_to_string(path)
|
||||
.map_err(|e| format!("using: failed to read '{}': {}", path, e))?;
|
||||
let (_cleaned, nested) = collect_using_and_strip(runner, &src, path)?;
|
||||
for n in nested.iter() {
|
||||
dfs_text(runner, n, out, seen)?;
|
||||
}
|
||||
out.push(key);
|
||||
Ok(())
|
||||
}
|
||||
for p in prelude_paths.iter() {
|
||||
dfs_text(runner, p, &mut expanded, &mut seen)?;
|
||||
}
|
||||
let prelude_paths = &expanded;
|
||||
|
||||
if prelude_paths.is_empty() {
|
||||
// No using statements, return original
|
||||
|
||||
@ -138,38 +138,16 @@ impl NyashRunner {
|
||||
|
||||
// Hako-friendly normalize: strip leading `local ` at line head for parser compatibility.
|
||||
// This keeps semantics close enough for our inline/selfhost drivers while we unify frontends.
|
||||
fn looks_like_hako_code(s: &str) -> bool {
|
||||
s.contains("using selfhost.") || s.lines().any(|l| l.trim_start().starts_with("local "))
|
||||
}
|
||||
fn strip_local_decl(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
for line in s.lines() {
|
||||
let leading = line.len() - line.trim_start().len();
|
||||
let (indent, rest) = line.split_at(leading);
|
||||
if rest.starts_with("local ") || rest.starts_with("local\t") {
|
||||
// drop the first token `local` and a single following space/tab
|
||||
let mut bytes = rest.as_bytes();
|
||||
let mut i = 5; // after 'local'
|
||||
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') { i += 1; break; }
|
||||
out.push_str(indent);
|
||||
out.push_str(&rest[i..]);
|
||||
out.push('\n');
|
||||
} else {
|
||||
out.push_str(line);
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
if looks_like_hako_code(&preexpanded_owned) {
|
||||
preexpanded_owned = strip_local_decl(&preexpanded_owned);
|
||||
if crate::runner::modes::common_util::hako::looks_like_hako_code(&preexpanded_owned) {
|
||||
preexpanded_owned = crate::runner::modes::common_util::hako::strip_local_decl(&preexpanded_owned);
|
||||
}
|
||||
// Routing (Hako-like): 既定は Fail‑Fast(hv1 直行は関数冒頭で処理済み)。
|
||||
{
|
||||
let s = preexpanded_owned.as_str();
|
||||
let hako_like = s.contains("static box ") || s.contains("using selfhost.") || s.contains("using hakorune.");
|
||||
let ff_env = std::env::var("HAKO_FAIL_FAST_ON_HAKO_IN_NYASH_VM").ok();
|
||||
let fail_fast = match ff_env.as_deref() { Some("0")|Some("false")|Some("off") => false, _ => true };
|
||||
let hako_like = s.contains("static box ")
|
||||
|| s.contains("using selfhost.")
|
||||
|| s.contains("using hakorune.");
|
||||
let fail_fast = crate::runner::modes::common_util::hako::fail_fast_on_hako();
|
||||
if hako_like && fail_fast {
|
||||
eprintln!(
|
||||
"❌ Hako-like source detected in Nyash VM path. Use Hakorune VM (v1 dispatcher) or Core/LLVM for MIR.\n hint: verify with HAKO_VERIFY_PRIMARY=hakovm"
|
||||
|
||||
@ -57,39 +57,14 @@ impl NyashRunner {
|
||||
// Dev sugar pre-expand: @name = expr → local name = expr
|
||||
code2 = crate::runner::modes::common_util::resolve::preexpand_at_local(&code2);
|
||||
// Hako-friendly normalize: strip leading `local ` at line head for Nyash parser compatibility.
|
||||
fn looks_like_hako_code(s: &str) -> bool {
|
||||
s.contains("using selfhost.") || s.lines().any(|l| l.trim_start().starts_with("local "))
|
||||
}
|
||||
fn strip_local_decl(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
for line in s.lines() {
|
||||
let leading = line.len() - line.trim_start().len();
|
||||
let (indent, rest) = line.split_at(leading);
|
||||
if rest.starts_with("local ") || rest.starts_with("local\t") {
|
||||
let bytes = rest.as_bytes();
|
||||
let mut i = 5; // skip 'local'
|
||||
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') { i += 1; break; }
|
||||
out.push_str(indent);
|
||||
out.push_str(&rest[i..]);
|
||||
out.push('\n');
|
||||
} else {
|
||||
out.push_str(line);
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
if looks_like_hako_code(&code2) {
|
||||
code2 = strip_local_decl(&code2);
|
||||
if crate::runner::modes::common_util::hako::looks_like_hako_code(&code2) {
|
||||
code2 = crate::runner::modes::common_util::hako::strip_local_decl(&code2);
|
||||
}
|
||||
|
||||
// Fail‑Fast (opt‑in): Hako 構文を Nyash VM 経路で実行しない
|
||||
// 目的: .hako は Hakorune VM、MIR は Core/LLVM に役割分離するためのガード
|
||||
{
|
||||
let on = match std::env::var("HAKO_FAIL_FAST_ON_HAKO_IN_NYASH_VM").ok().as_deref() {
|
||||
Some("0")|Some("false")|Some("off") => false,
|
||||
_ => true,
|
||||
};
|
||||
let on = crate::runner::modes::common_util::hako::fail_fast_on_hako();
|
||||
if on {
|
||||
let s = code2.as_str();
|
||||
let hako_like = s.contains("static box ") || s.contains("using selfhost.") || s.contains("using hakorune.");
|
||||
|
||||
@ -126,32 +126,75 @@ impl PluginHost {
|
||||
/// Resolve a method handle for a given plugin box type and method name.
|
||||
pub fn resolve_method(&self, box_type: &str, method_name: &str) -> BidResult<MethodHandle> {
|
||||
let cfg = self.config.as_ref().ok_or(BidError::PluginError)?;
|
||||
let (lib_name, _lib_def) = cfg
|
||||
.find_library_for_box(box_type)
|
||||
.ok_or(BidError::InvalidType)?;
|
||||
let cfg_path = self.config_path.as_deref().unwrap_or("nyash.toml");
|
||||
let toml_content = std::fs::read_to_string(cfg_path).map_err(|_| BidError::PluginError)?;
|
||||
let toml_value: toml::Value =
|
||||
toml::from_str(&toml_content).map_err(|_| BidError::PluginError)?;
|
||||
let box_conf = cfg
|
||||
.get_box_config(lib_name, box_type, &toml_value)
|
||||
.ok_or(BidError::InvalidType)?;
|
||||
// Prefer config mapping; fallback to loader's TypeBox resolve(name)
|
||||
let (method_id, returns_result) = if let Some(m) = box_conf.methods.get(method_name) {
|
||||
(m.method_id, m.returns_result)
|
||||
} else {
|
||||
let l = self.loader.read().unwrap();
|
||||
let mid = l
|
||||
.resolve_method_id(box_type, method_name)
|
||||
.map_err(|_| BidError::InvalidMethod)?;
|
||||
(mid, false)
|
||||
};
|
||||
|
||||
// Path A: library-backed box (dynamic plugin)
|
||||
if let Some((lib_name, _lib_def)) = cfg.find_library_for_box(box_type) {
|
||||
if let Some(box_conf) = cfg.get_box_config(lib_name, box_type, &toml_value) {
|
||||
// Prefer config mapping; fallback to loader's TypeBox resolve(name)
|
||||
let (method_id, returns_result) = if let Some(m) = box_conf.methods.get(method_name) {
|
||||
(m.method_id, m.returns_result)
|
||||
} else {
|
||||
let l = self.loader.read().unwrap();
|
||||
let mid = l
|
||||
.resolve_method_id(box_type, method_name)
|
||||
.map_err(|_| BidError::InvalidMethod)?;
|
||||
(mid, false)
|
||||
};
|
||||
return Ok(MethodHandle {
|
||||
lib: lib_name.to_string(),
|
||||
box_type: box_type.to_string(),
|
||||
type_id: box_conf.type_id,
|
||||
method_id,
|
||||
returns_result,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Path B: builtin/core boxes via central config (no library/path required)
|
||||
// Require: [box_types] BoxName = <id> and [box_methods.BoxName.methods] entries
|
||||
if let Some(type_id) = cfg.box_types.get(box_type).copied() {
|
||||
if let Some(bm) = toml_value
|
||||
.get("box_methods")
|
||||
.and_then(|v| v.get(box_type))
|
||||
.and_then(|v| v.get("methods"))
|
||||
.and_then(|v| v.as_table())
|
||||
{
|
||||
if let Some(entry) = bm.get(method_name) {
|
||||
// Support both { method_id = N } and bare integer in the future
|
||||
let (method_id, returns_result) = if let Some(mid) = entry.get("method_id") {
|
||||
(mid.as_integer().unwrap_or(0) as u32, entry.get("returns_result").and_then(|b| b.as_bool()).unwrap_or(false))
|
||||
} else if let Some(mid) = entry.as_integer() {
|
||||
(mid as u32, false)
|
||||
} else {
|
||||
return Err(BidError::InvalidMethod);
|
||||
};
|
||||
return Ok(MethodHandle {
|
||||
lib: "builtin".to_string(),
|
||||
box_type: box_type.to_string(),
|
||||
type_id,
|
||||
method_id,
|
||||
returns_result,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: delegate to loader (TypeBox, file-based, etc.)
|
||||
let l = self.loader.read().unwrap();
|
||||
let mid = l
|
||||
.resolve_method_id(box_type, method_name)
|
||||
.map_err(|_| BidError::InvalidMethod)?;
|
||||
let type_id = *cfg.box_types.get(box_type).unwrap_or(&0);
|
||||
Ok(MethodHandle {
|
||||
lib: lib_name.to_string(),
|
||||
lib: "builtin".to_string(),
|
||||
box_type: box_type.to_string(),
|
||||
type_id: box_conf.type_id,
|
||||
method_id,
|
||||
returns_result,
|
||||
type_id,
|
||||
method_id: mid,
|
||||
returns_result: false,
|
||||
})
|
||||
}
|
||||
|
||||
@ -192,6 +235,36 @@ impl PluginHost {
|
||||
|
||||
/// Check if a method returns Result (Ok/Err) per plugin spec or central config.
|
||||
pub fn method_returns_result(&self, box_type: &str, method_name: &str) -> bool {
|
||||
// Prefer central config when available (works for builtin boxes)
|
||||
if let Some(cfg) = self.config.as_ref() {
|
||||
if let Some(path) = self.config_path.as_deref() {
|
||||
if let Ok(toml_content) = std::fs::read_to_string(path) {
|
||||
if let Ok(toml_value) = toml::from_str::<toml::Value>(&toml_content) {
|
||||
if let Some(bm) = toml_value
|
||||
.get("box_methods")
|
||||
.and_then(|v| v.get(box_type))
|
||||
.and_then(|v| v.get("methods"))
|
||||
.and_then(|v| v.as_table())
|
||||
{
|
||||
if let Some(entry) = bm.get(method_name) {
|
||||
return entry
|
||||
.get("returns_result")
|
||||
.and_then(|b| b.as_bool())
|
||||
.unwrap_or(false);
|
||||
}
|
||||
}
|
||||
// Library-backed path
|
||||
if let Some((lib_name, _)) = cfg.find_library_for_box(box_type) {
|
||||
if let Some(box_conf) = cfg.get_box_config(lib_name, box_type, &toml_value) {
|
||||
if let Some(m) = box_conf.methods.get(method_name) {
|
||||
return m.returns_result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let l = self.loader.read().unwrap();
|
||||
l.method_returns_result(box_type, method_name)
|
||||
}
|
||||
|
||||
33
src/tests/tokenizer_unicode_toggle.rs
Normal file
33
src/tests/tokenizer_unicode_toggle.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use crate::tokenizer::{NyashTokenizer, TokenType};
|
||||
|
||||
fn collect_string_token(src: &str) -> String {
|
||||
let mut t = NyashTokenizer::new(src);
|
||||
let tokens = t.tokenize().expect("tokenize");
|
||||
// Expect first non-EOF token to be STRING
|
||||
for tok in tokens {
|
||||
if let TokenType::STRING(s) = tok.token_type { return s; }
|
||||
}
|
||||
panic!("no STRING token found");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unicode_decode_toggle_off_keeps_literal() {
|
||||
// OFF by default
|
||||
std::env::remove_var("NYASH_PARSER_DECODE_UNICODE");
|
||||
std::env::remove_var("HAKO_PARSER_DECODE_UNICODE");
|
||||
let s = collect_string_token("\"\\u0041\"");
|
||||
assert_eq!(s, "\\u0041");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unicode_decode_toggle_on_decodes_basic_and_surrogate() {
|
||||
// ON: enable decode
|
||||
std::env::set_var("NYASH_PARSER_DECODE_UNICODE", "1");
|
||||
let s = collect_string_token("\"\\u0041\"");
|
||||
assert_eq!(s, "A");
|
||||
|
||||
let s2 = collect_string_token("\"\\uD83D\\uDE00\"");
|
||||
// Expect surrogate pair to decode into one char (😀)
|
||||
assert_eq!(s2.chars().count(), 1);
|
||||
}
|
||||
|
||||
@ -28,6 +28,66 @@ impl NyashTokenizer {
|
||||
Some('"') => string_value.push('"'),
|
||||
Some('\'') => string_value.push('\''), // 1-quote: エスケープされたシングルクォート
|
||||
Some('/') => string_value.push('/'), // \/ を許容
|
||||
Some('u') => {
|
||||
// Unicode decode (optional; default OFF)
|
||||
if crate::config::env::parser_decode_unicode() {
|
||||
let base = self.position; // index of 'u'
|
||||
// read 4 hex digits without consuming; then advance position in bulk
|
||||
let read_hex4 = |input: &Vec<char>, start: usize| -> Option<u32> {
|
||||
if start + 4 > input.len() { return None; }
|
||||
let d0 = input.get(start)?.to_digit(16)?;
|
||||
let d1 = input.get(start + 1)?.to_digit(16)?;
|
||||
let d2 = input.get(start + 2)?.to_digit(16)?;
|
||||
let d3 = input.get(start + 3)?.to_digit(16)?;
|
||||
Some((d0 << 12) | (d1 << 8) | (d2 << 4) | d3)
|
||||
};
|
||||
let first_start = base + 1; // after 'u'
|
||||
if let Some(u1) = read_hex4(&self.input, first_start) {
|
||||
// consume 'u' + 4 hex
|
||||
self.position = base + 5;
|
||||
let mut out_char: Option<char> = None;
|
||||
// surrogate pair
|
||||
if (0xD800..=0xDBFF).contains(&u1) {
|
||||
if self.position + 6 <= self.input.len()
|
||||
&& self.input.get(self.position) == Some(&'\\')
|
||||
&& self.input.get(self.position + 1) == Some(&'u')
|
||||
{
|
||||
if let Some(u2) = read_hex4(&self.input, self.position + 2) {
|
||||
if (0xDC00..=0xDFFF).contains(&u2) {
|
||||
let high_ten = (u1 - 0xD800) as u32;
|
||||
let low_ten = (u2 - 0xDC00) as u32;
|
||||
let scalar = 0x10000 + ((high_ten << 10) | low_ten);
|
||||
out_char = std::char::from_u32(scalar);
|
||||
// consume '\\u' + 4 hex of low surrogate
|
||||
self.position += 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if out_char.is_none() {
|
||||
out_char = std::char::from_u32(u1 as u32);
|
||||
}
|
||||
if let Some(ch) = out_char {
|
||||
string_value.push(ch);
|
||||
// Skip the generic advance at loop end to avoid double step
|
||||
continue;
|
||||
} else {
|
||||
// Fallback to literal when invalid
|
||||
string_value.push('\\');
|
||||
string_value.push('u');
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// Not enough hex digits; keep literal
|
||||
string_value.push('\\');
|
||||
string_value.push('u');
|
||||
}
|
||||
} else {
|
||||
// Decoding disabled → keep literal
|
||||
string_value.push('\\');
|
||||
string_value.push('u');
|
||||
}
|
||||
}
|
||||
// TODO: 将来 `\uXXXX` デコード(既定OFF)
|
||||
Some(c2) => {
|
||||
// 未知のエスケープはそのまま残す(互換性維持)
|
||||
|
||||
Reference in New Issue
Block a user