hv1: early-exit at main (no plugin init); tokenizer: Stage-3 single-quote + full escapes (\/ \b \f \' \r fix); builder: route BinOp via SSOT emit_binop_to_dst; hv1 verify canary route (builder→Core); docs: phase-20.39 updates

This commit is contained in:
nyash-codex
2025-11-04 20:46:43 +09:00
parent 31ce798341
commit 44a5158a14
53 changed files with 2237 additions and 179 deletions

View File

@ -9,6 +9,25 @@ use nyash_rust::runner::NyashRunner;
/// Thin entry point - delegates to CLI parsing and runner execution
fn main() {
// hv1 direct (primary): earliest possible check before any bootstrap/log init
// If NYASH_VERIFY_JSON is present and route is requested, execute and exit.
// This avoids plugin host/registry initialization and keeps output minimal.
let has_json = std::env::var("NYASH_VERIFY_JSON").is_ok();
let route = std::env::var("HAKO_ROUTE_HAKOVM").ok().as_deref() == Some("1")
|| std::env::var("HAKO_VERIFY_PRIMARY").ok().as_deref() == Some("hakovm");
if has_json && route {
let json = std::env::var("NYASH_VERIFY_JSON").unwrap_or_default();
// Minimal runner (no plugin init here); config parse is cheap and has no side effects.
let cfg = CliConfig::parse();
let runner = NyashRunner::new(cfg);
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!("[hv1-direct] early-exit (main)");
}
let rc = nyash_rust::runner::core_executor::run_json_v0(&runner, &json);
println!("{}", rc);
std::process::exit(rc);
}
// Bootstrap env overrides from nyash.toml [env] early (管理棟)
env_config::bootstrap_from_toml_env();
// Parse command-line arguments

View File

@ -94,12 +94,20 @@ impl super::MirBuilder {
self.value_types.insert(dst, MirType::Integer);
} else {
// guard中は従来のBinOp
self.emit_instruction(MirInstruction::BinOp { dst, op, lhs, rhs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::binop_lower::emit_binop_to_dst(func, cur_bb, dst, op, lhs, rhs);
} else {
self.emit_instruction(MirInstruction::BinOp { dst, op, lhs, rhs })?;
}
self.value_types.insert(dst, MirType::Integer);
}
} else {
// 既存の算術経路
self.emit_instruction(MirInstruction::BinOp { dst, op, lhs, rhs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::binop_lower::emit_binop_to_dst(func, cur_bb, dst, op, lhs, rhs);
} else {
self.emit_instruction(MirInstruction::BinOp { dst, op, lhs, rhs })?;
}
if matches!(op, crate::mir::BinaryOp::Add) {
let lhs_is_str = match self.value_types.get(&lhs) {
Some(MirType::String) => true,
@ -122,7 +130,11 @@ impl super::MirBuilder {
}
} else {
// 既存の算術経路
self.emit_instruction(MirInstruction::BinOp { dst, op, lhs, rhs })?;
if let (Some(func), Some(cur_bb)) = (self.current_function.as_mut(), self.current_block) {
crate::mir::ssot::binop_lower::emit_binop_to_dst(func, cur_bb, dst, op, lhs, rhs);
} else {
self.emit_instruction(MirInstruction::BinOp { dst, op, lhs, rhs })?;
}
if matches!(op, crate::mir::BinaryOp::Add) {
let lhs_is_str = match self.value_types.get(&lhs) {
Some(MirType::String) => true,

View File

@ -18,6 +18,7 @@ pub mod instruction_introspection; // Introspection helpers for tests (instructi
pub mod types; // core MIR enums (ConstValue, Ops, MirType)
pub mod loop_api; // Minimal LoopBuilder facade (adapter-ready)
pub mod loop_builder; // SSA loop construction with phi nodes
pub mod ssot; // Shared helpers (SSOT) for instruction lowering
pub mod optimizer;
pub mod utils; // Phase 15 control flow utilities for root treatment
pub mod phi_core; // Phase 1 scaffold: unified PHI entry (re-exports only)

View File

@ -0,0 +1,50 @@
use crate::mir::{BasicBlockId, BinaryOp, MirFunction, MirInstruction, ValueId};
/// Parse a binary operator string to BinaryOp
pub fn parse_binop_str(op: &str) -> Option<BinaryOp> {
match op {
"+" => Some(BinaryOp::Add),
"-" => Some(BinaryOp::Sub),
"*" => Some(BinaryOp::Mul),
"/" => Some(BinaryOp::Div),
"%" => Some(BinaryOp::Mod),
"&" => Some(BinaryOp::BitAnd),
"|" => Some(BinaryOp::BitOr),
"^" => Some(BinaryOp::BitXor),
"<<" => Some(BinaryOp::Shl),
">>" => Some(BinaryOp::Shr),
_ => None,
}
}
/// Emit a MIR BinOp into the current block and return the destination ValueId
pub fn emit_binop_func(
f: &mut MirFunction,
cur_bb: BasicBlockId,
op: BinaryOp,
lhs: ValueId,
rhs: ValueId,
) -> ValueId {
let dst = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::BinOp { dst, op, lhs, rhs });
}
dst
}
/// Emit a MIR BinOp into the current block using the provided destination id.
/// This variant allows front-ends that pre-allocate `dst` (e.g., builders that
/// maintain their own value id generator) to route through the SSOT without
/// changing id allocation policy.
pub fn emit_binop_to_dst(
f: &mut MirFunction,
cur_bb: BasicBlockId,
dst: ValueId,
op: BinaryOp,
lhs: ValueId,
rhs: ValueId,
) {
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::BinOp { dst, op, lhs, rhs });
}
}

View File

@ -0,0 +1,26 @@
use crate::mir::{BasicBlockId, MirFunction, MirInstruction, ValueId, BinaryOp, ConstValue};
use std::collections::HashMap;
/// Apply `var += step` before continue so that header sees updated value.
/// Returns the new ValueId of the variable if updated, otherwise None.
pub fn apply_increment_before_continue(
f: &mut MirFunction,
cur_bb: BasicBlockId,
vars: &mut HashMap<String, ValueId>,
var_name: &str,
step: i64,
) -> Option<ValueId> {
let cur_val = match vars.get(var_name) { Some(v) => *v, None => return None };
// Emit const step
let step_v = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::Const { dst: step_v, value: ConstValue::Integer(step) });
}
// Emit add
let new_v = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::BinOp { dst: new_v, op: BinaryOp::Add, lhs: cur_val, rhs: step_v });
}
vars.insert(var_name.to_string(), new_v);
Some(new_v)
}

2
src/mir/ssot/mod.rs Normal file
View File

@ -0,0 +1,2 @@
pub mod binop_lower;
pub mod loop_common;

View File

@ -15,7 +15,7 @@
use super::NyashRunner;
use std::io::Write;
pub(crate) fn run_json_v0(runner: &NyashRunner, json: &str) -> i32 {
pub fn run_json_v0(runner: &NyashRunner, json: &str) -> i32 {
// Optional: direct Core Dispatcher via child nyash (boxed)
// Toggle: HAKO_CORE_DIRECT=1 (alias: NYASH_CORE_DIRECT)
let core_direct = std::env::var("HAKO_CORE_DIRECT").ok().as_deref() == Some("1")
@ -39,6 +39,26 @@ pub(crate) fn run_json_v0(runner: &NyashRunner, json: &str) -> i32 {
}
let mut payload = json.to_string();
// Fast-path: accept MIR(JSON v0) directly when it looks like a module (functions/blocks)
if payload.contains("\"functions\"") && payload.contains("\"blocks\"") {
match super::mir_json_v0::parse_mir_v0_to_module(&payload) {
Ok(module) => {
super::json_v0_bridge::maybe_dump_mir(&module);
crate::runner::child_env::pre_run_reset_oob_if_strict();
let rc = runner.execute_mir_module_quiet_exit(&module);
if crate::config::env::oob_strict_fail() && crate::runtime::observe::oob_seen() {
eprintln!("[gate-c][oob-strict] Out-of-bounds observed → exit(1)");
return 1;
}
return rc;
}
Err(e) => {
eprintln!("❌ MIR JSON v0 parse error: {}", e);
return 1;
}
}
}
// Always try the v1 bridge first (StageB Program JSON → MIR module).
// This is noop when input is already MIR(JSON v0) with functions/blocks.
if let Ok(j) = crate::runner::modes::common_util::core_bridge::canonicalize_module_json(&payload) {

View File

@ -9,6 +9,9 @@ use std::{fs, process};
/// Thin file dispatcher: select backend and delegate to mode executors
pub(crate) fn execute_file_with_backend(runner: &NyashRunner, filename: &str) {
// Note: hv1 direct route is now handled at main.rs entry point (before NyashRunner creation).
// This function is only called after plugins and runner initialization have already occurred.
// Selfhost pipeline (Ny -> JSON v0)
// Default: ON. Backwardcompat envs:
// - NYASH_USE_NY_COMPILER={1|true|on} to force ON

View File

@ -1,7 +1,7 @@
use super::ast::{ProgramV0, StmtV0};
use super::ast::{ProgramV0, StmtV0, ExprV0};
use crate::mir::{
BasicBlockId, ConstValue, EffectMask, FunctionSignature, MirFunction, MirInstruction, MirModule,
MirPrinter, MirType, ValueId,
MirPrinter, MirType, ValueId, BinaryOp,
};
use std::collections::HashMap;
use std::cell::RefCell;
@ -28,6 +28,8 @@ pub(super) struct LoopContext {
thread_local! {
static EXIT_SNAPSHOT_STACK: RefCell<Vec<Vec<(BasicBlockId, HashMap<String, ValueId>)>>> = RefCell::new(Vec::new());
static CONT_SNAPSHOT_STACK: RefCell<Vec<Vec<(BasicBlockId, HashMap<String, ValueId>)>>> = RefCell::new(Vec::new());
// Optional increment hint for current loop frame: (var_name, step)
static INCR_HINT_STACK: RefCell<Vec<Option<(String, i64)>>> = RefCell::new(Vec::new());
}
pub(super) fn push_loop_snapshot_frames() {
@ -59,6 +61,35 @@ fn record_continue_snapshot(cur_bb: BasicBlockId, vars: &HashMap<String, ValueId
});
}
pub(super) fn detect_and_push_increment_hint(body: &[StmtV0]) {
let mut hint: Option<(String, i64)> = None;
for stmt in body.iter().rev() {
if let StmtV0::Local { name, expr } = stmt.clone() {
if let ExprV0::Binary { op, lhs, rhs } = expr {
if let ExprV0::Var { name: vname } = *lhs {
if vname == name {
if let ExprV0::Int { value } = *rhs {
if let Some(v) = value.as_i64() {
let s = match op.as_str() { "+" => v, "-" => -v, _ => 0 };
if s != 0 { hint = Some((name.clone(), s)); break; }
}
}
}
}
}
}
}
INCR_HINT_STACK.with(|s| s.borrow_mut().push(hint));
}
pub(super) fn pop_increment_hint() -> Option<(String, i64)> {
INCR_HINT_STACK.with(|s| s.borrow_mut().pop().unwrap_or(None))
}
fn peek_increment_hint() -> Option<(String, i64)> {
INCR_HINT_STACK.with(|s| s.borrow().last().cloned().unwrap_or(None))
}
#[derive(Clone)]
pub(super) struct BridgeEnv {
pub(super) throw_enabled: bool,
@ -172,7 +203,13 @@ pub(super) fn lower_stmt_with_vars(
}
StmtV0::Continue => {
if let Some(ctx) = loop_stack.last().copied() {
// snapshot variables at continue
// Optional: apply increment hint before continue (so header sees updated var)
if let Some((ref var_name, step)) = peek_increment_hint() {
let _ = crate::mir::ssot::loop_common::apply_increment_before_continue(
f, cur_bb, vars, var_name, step,
);
}
// snapshot variables at continue (after increment)
record_continue_snapshot(cur_bb, vars);
lower_continue_stmt(f, cur_bb, ctx.cond_bb);
}

View File

@ -152,22 +152,11 @@ pub(super) fn lower_expr_with_scope<S: VarScope>(
ExprV0::Binary { op, lhs, rhs } => {
let (l, cur_after_l) = lower_expr_with_scope(env, f, cur_bb, lhs, vars)?;
let (r, cur_after_r) = lower_expr_with_scope(env, f, cur_after_l, rhs, vars)?;
let bop = match op.as_str() {
"+" => BinaryOp::Add,
"-" => BinaryOp::Sub,
"*" => BinaryOp::Mul,
"/" => BinaryOp::Div,
_ => return Err("unsupported op".into()),
let bop = match crate::mir::ssot::binop_lower::parse_binop_str(op) {
Some(b) => b,
None => return Err("unsupported op".into()),
};
let dst = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_after_r) {
bb.add_instruction(MirInstruction::BinOp {
dst,
op: bop,
lhs: l,
rhs: r,
});
}
let dst = crate::mir::ssot::binop_lower::emit_binop_func(f, cur_after_r, bop, l, r);
Ok((dst, cur_after_r))
}
ExprV0::Extern {

View File

@ -126,8 +126,11 @@ pub(super) fn lower_loop_stmt(
// open snapshot frames for nested break/continue
super::push_loop_snapshot_frames();
loop_stack.push(LoopContext { cond_bb, exit_bb });
// Detect simple increment hint for this loop body
super::detect_and_push_increment_hint(body);
let bend_res = lower_stmt_list_with_vars(ops.f, body_bb, body, &mut body_vars, loop_stack, env);
loop_stack.pop();
let _ = super::pop_increment_hint();
let bend = bend_res?;
// collect snapshots for this loop level
let continue_snaps = super::pop_continue_snapshots();

View File

@ -85,6 +85,15 @@ pub fn parse_mir_v0_to_module(json: &str) -> Result<MirModule, String> {
block_ref.add_instruction(MirInstruction::Copy { dst: ValueId::new(dst), src: ValueId::new(src) });
max_value_id = max_value_id.max(dst + 1);
}
"binop" => {
let dst = require_u64(inst, "dst", "binop dst")? as u32;
let lhs = require_u64(inst, "lhs", "binop lhs")? as u32;
let rhs = require_u64(inst, "rhs", "binop rhs")? as u32;
let operation = inst.get("operation").and_then(Value::as_str).ok_or_else(|| "binop missing operation".to_string())?;
let bop = parse_binop(operation)?;
block_ref.add_instruction(MirInstruction::BinOp { dst: ValueId::new(dst), op: bop, lhs: ValueId::new(lhs), rhs: ValueId::new(rhs) });
max_value_id = max_value_id.max(dst + 1);
}
"compare" => {
let dst = require_u64(inst, "dst", "compare dst")? as u32;
let lhs = require_u64(inst, "lhs", "compare lhs")? as u32;
@ -160,3 +169,15 @@ fn parse_compare(op: &str) -> Result<crate::mir::types::CompareOp, String> {
s => return Err(format!("unsupported compare op '{}'", s)),
})
}
fn parse_binop(op: &str) -> Result<crate::mir::types::BinaryOp, String> {
use crate::mir::types::BinaryOp;
Ok(match op {
"+" => BinaryOp::Add,
"-" => BinaryOp::Sub,
"*" => BinaryOp::Mul,
"/" => BinaryOp::Div,
"%" => BinaryOp::Mod,
s => return Err(format!("unsupported binary op '{}'", s)),
})
}

View File

@ -28,7 +28,7 @@ mod mir_json_v0;
pub mod mir_json_emit;
pub mod modes;
mod pipe_io;
mod core_executor;
pub mod core_executor;
mod pipeline;
mod jit_direct;
mod selfhost;

View File

@ -15,32 +15,8 @@ use std::{fs, process};
impl NyashRunner {
/// Execute VM mode (split)
pub(crate) fn execute_vm_mode(&self, filename: &str) {
// Fast-path: hv1 verify direct (bypass NyashParser)
// If NYASH_VERIFY_JSON is present and hv1 route is requested, parse JSON v1 → MIR and run Core interpreter.
// This avoids generating/compiling Hako inline drivers and stabilizes -c/inline verify flows.
let want_hv1_direct = {
let has_json = std::env::var("NYASH_VERIFY_JSON").is_ok();
let route = std::env::var("HAKO_ROUTE_HAKOVM").ok().as_deref() == Some("1")
|| std::env::var("HAKO_VERIFY_PRIMARY").ok().as_deref() == Some("hakovm");
has_json && route
};
if want_hv1_direct {
if let Ok(j) = std::env::var("NYASH_VERIFY_JSON") {
// Try v1 schema first, then v0 for compatibility
if let Ok(Some(module)) = crate::runner::json_v1_bridge::try_parse_v1_to_module(&j) {
let rc = self.execute_mir_module_quiet_exit(&module);
println!("{}", rc);
std::process::exit(rc);
}
if let Ok(module) = crate::runner::mir_json_v0::parse_mir_v0_to_module(&j) {
let rc = self.execute_mir_module_quiet_exit(&module);
println!("{}", rc);
std::process::exit(rc);
}
eprintln!("❌ hv1-direct: invalid JSON for MIR (v1/v0)");
std::process::exit(1);
}
}
// Note: hv1 direct route is now handled at main.rs entry point (before plugin initialization).
// This function is only called after plugin initialization has already occurred.
// Quiet mode for child pipelines (e.g., selfhost compiler JSON emit)
let quiet_pipe = std::env::var("NYASH_JSON_ONLY").ok().as_deref() == Some("1");

View File

@ -15,6 +15,9 @@ impl NyashRunner {
/// - Respects using preprocessing done earlier in the pipeline
/// - Relies on global plugin host initialized by runner
pub(crate) fn execute_vm_fallback_interpreter(&self, filename: &str) {
// Note: hv1 direct route is now handled at main.rs entry point (before plugin initialization).
// This function is only called after plugin initialization has already occurred.
// Read source
let code = match fs::read_to_string(filename) {
Ok(s) => s,

View File

@ -149,11 +149,12 @@ impl NyashTokenizer {
}
Some('"') => {
let string_value = self.read_string()?;
Ok(Token::new(
TokenType::STRING(string_value),
start_line,
start_column,
))
Ok(Token::new(TokenType::STRING(string_value), start_line, start_column))
}
// Stage3: シングルクォート文字列(オプトイン)
Some('\'') if crate::config::env::parser_stage3() => {
let string_value = self.read_single_quoted_string()?;
Ok(Token::new(TokenType::STRING(string_value), start_line, start_column))
}
Some(c) if c.is_ascii_digit() => {
let token_type = self.read_numeric_literal()?;

View File

@ -1,16 +1,17 @@
use super::{NyashTokenizer, TokenizeError};
impl NyashTokenizer {
/// 文字列リテラルを読み取り
pub(crate) fn read_string(&mut self) -> Result<String, TokenizeError> {
/// 文字列リテラルを読み取り(区切り文字 quote を指定可: '"' or '\''
fn read_string_with_quote(&mut self, quote: char) -> Result<String, TokenizeError> {
let start_line = self.line;
self.advance(); // 開始の '"' をスキップ
// 開始の quote をスキップ
self.advance();
let mut string_value = String::new();
while let Some(c) = self.current_char() {
if c == '"' {
self.advance(); // 終了の '"' をスキップ
if c == quote {
self.advance(); // 終了の quote をスキップ
return Ok(string_value);
}
@ -21,11 +22,17 @@ impl NyashTokenizer {
Some('n') => string_value.push('\n'),
Some('t') => string_value.push('\t'),
Some('r') => string_value.push('\r'),
Some('b') => string_value.push('\u{0008}'), // backspace
Some('f') => string_value.push('\u{000C}'), // form feed
Some('\\') => string_value.push('\\'),
Some('"') => string_value.push('"'),
Some(c) => {
Some('\'') => string_value.push('\''), // 1-quote: エスケープされたシングルクォート
Some('/') => string_value.push('/'), // \/ を許容
// TODO: 将来 `\uXXXX` デコード既定OFF
Some(c2) => {
// 未知のエスケープはそのまま残す(互換性維持)
string_value.push('\\');
string_value.push(c);
string_value.push(c2);
}
None => break,
}
@ -38,5 +45,14 @@ impl NyashTokenizer {
Err(TokenizeError::UnterminatedString { line: start_line })
}
}
/// 既存互換: ダブルクォート専用のリーダ(内部で read_string_with_quote を呼ぶ)
pub(crate) fn read_string(&mut self) -> Result<String, TokenizeError> {
self.read_string_with_quote('"')
}
/// シングルクォート文字列の読み取りStage3 の文法拡張)
pub(crate) fn read_single_quoted_string(&mut self) -> Result<String, TokenizeError> {
self.read_string_with_quote('\'')
}
}