selfhost(pyvm): MiniVmPrints – prefer JSON route early-return (ok==1) to avoid fallback loops; keep default behavior unchanged elsewhere

This commit is contained in:
Selfhosting Dev
2025-09-22 07:54:25 +09:00
parent 27568eb4a6
commit 8e4cadd349
348 changed files with 9981 additions and 30074 deletions

View File

@ -1,930 +0,0 @@
/*!
* CLI Argument Parsing Module - Nyash Command Line Interface
*
* This module handles all command-line argument parsing using clap,
* separating CLI concerns from the main execution logic.
*/
use clap::{Arg, ArgMatches, Command};
use serde_json;
/// Command-line configuration structure
#[derive(Debug, Clone)]
pub struct CliConfig {
// File input (Nyash source)
pub file: Option<String>,
pub debug_fuel: Option<usize>,
pub dump_ast: bool,
pub dump_mir: bool,
pub verify_mir: bool,
pub mir_verbose: bool,
pub mir_verbose_effects: bool,
pub no_optimize: bool,
pub backend: String,
pub compile_wasm: bool,
pub compile_native: bool,
pub output_file: Option<String>,
pub benchmark: bool,
pub iterations: u32,
pub vm_stats: bool,
pub vm_stats_json: bool,
// JIT controls
pub jit_exec: bool,
pub jit_stats: bool,
pub jit_stats_json: bool,
pub jit_dump: bool,
pub jit_events: bool,
pub jit_events_compile: bool,
pub jit_events_runtime: bool,
pub jit_events_path: Option<String>,
pub jit_threshold: Option<u32>,
pub jit_phi_min: bool,
pub jit_hostcall: bool,
pub jit_handle_debug: bool,
pub jit_native_f64: bool,
pub jit_native_bool: bool,
pub jit_only: bool,
pub jit_direct: bool,
// DOT emit helper
pub emit_cfg: Option<String>,
// Verbose CLI
pub cli_verbose: bool,
// Tasks
pub run_task: Option<String>,
// Ny script plugins enumeration (opt-in)
pub load_ny_plugins: bool,
// Parser choice: 'ny' (direct v0 bridge) when true, otherwise default rust
pub parser_ny: bool,
// Phase-15: JSON IR v0 bridge
pub ny_parser_pipe: bool,
pub json_file: Option<String>,
// GC mode (dev; forwarded to env as NYASH_GC_MODE)
pub gc_mode: Option<String>,
// Build system (MVP)
pub build_path: Option<String>,
pub build_app: Option<String>,
pub build_out: Option<String>,
pub build_aot: Option<String>,
pub build_profile: Option<String>,
pub build_target: Option<String>,
// Using (CLI)
pub cli_usings: Vec<String>,
// Emit MIR JSON to a file and exit (bridge mode)
pub emit_mir_json: Option<String>,
// Emit native executable via ny-llvmc (crate) and exit
pub emit_exe: Option<String>,
pub emit_exe_nyrt: Option<String>,
pub emit_exe_libs: Option<String>,
// Macro child (sandbox) mode
pub macro_expand_child: Option<String>,
// Dump expanded AST as JSON and exit
pub dump_expanded_ast_json: bool,
// MacroCtx (caps) JSON for child macro route
pub macro_ctx_json: Option<String>,
}
/// Grouped views (Phase 1: non-breaking). These structs provide a categorized
/// lens over the flat CliConfig without changing public fields.
#[derive(Debug, Clone)]
pub struct InputConfig {
pub file: Option<String>,
pub cli_usings: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct DebugConfig {
pub debug_fuel: Option<usize>,
pub dump_ast: bool,
pub dump_mir: bool,
pub verify_mir: bool,
pub mir_verbose: bool,
pub mir_verbose_effects: bool,
pub cli_verbose: bool,
}
#[derive(Debug, Clone)]
pub struct BackendConfig {
pub backend: String,
// VM
pub vm_stats: bool,
pub vm_stats_json: bool,
// JIT
pub jit: JitConfig,
}
#[derive(Debug, Clone)]
pub struct JitConfig {
pub exec: bool,
pub stats: bool,
pub stats_json: bool,
pub dump: bool,
pub events: bool,
pub events_compile: bool,
pub events_runtime: bool,
pub events_path: Option<String>,
pub threshold: Option<u32>,
pub phi_min: bool,
pub hostcall: bool,
pub handle_debug: bool,
pub native_f64: bool,
pub native_bool: bool,
pub only: bool,
pub direct: bool,
}
#[derive(Debug, Clone)]
pub struct BuildConfig {
pub path: Option<String>,
pub app: Option<String>,
pub out: Option<String>,
pub aot: Option<String>,
pub profile: Option<String>,
pub target: Option<String>,
}
#[derive(Debug, Clone)]
pub struct EmitConfig {
pub emit_cfg: Option<String>,
pub emit_mir_json: Option<String>,
pub emit_exe: Option<String>,
pub emit_exe_nyrt: Option<String>,
pub emit_exe_libs: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ParserPipeConfig {
pub parser_ny: bool,
pub ny_parser_pipe: bool,
pub json_file: Option<String>,
}
#[derive(Debug, Clone)]
pub struct CliGroups {
pub input: InputConfig,
pub debug: DebugConfig,
pub backend: BackendConfig,
pub build: BuildConfig,
pub emit: EmitConfig,
pub parser: ParserPipeConfig,
pub gc_mode: Option<String>,
pub compile_wasm: bool,
pub compile_native: bool,
pub output_file: Option<String>,
pub benchmark: bool,
pub iterations: u32,
pub run_task: Option<String>,
pub load_ny_plugins: bool,
}
impl CliConfig {
/// Parse command-line arguments and return configuration
pub fn parse() -> Self {
// Pre-process raw argv to capture trailing script args after '--'
let argv: Vec<String> = std::env::args().collect();
if let Some(pos) = argv.iter().position(|s| s == "--") {
// Everything after '--' is script args
let script_args: Vec<String> = argv.iter().skip(pos + 1).cloned().collect();
if !script_args.is_empty() {
if let Ok(json) = serde_json::to_string(&script_args) {
std::env::set_var("NYASH_SCRIPT_ARGS_JSON", json);
}
}
// Only parse CLI args up to '--'
let matches = Self::build_command()
.try_get_matches_from(&argv[..pos])
.unwrap_or_else(|e| e.exit());
Self::from_matches(&matches)
} else {
let matches = Self::build_command().get_matches();
Self::from_matches(&matches)
}
}
/// Non-breaking grouped view for downstream code to gradually adopt.
pub fn as_groups(&self) -> CliGroups {
CliGroups {
input: InputConfig { file: self.file.clone(), cli_usings: self.cli_usings.clone() },
debug: DebugConfig {
debug_fuel: self.debug_fuel,
dump_ast: self.dump_ast,
dump_mir: self.dump_mir,
verify_mir: self.verify_mir,
mir_verbose: self.mir_verbose,
mir_verbose_effects: self.mir_verbose_effects,
cli_verbose: self.cli_verbose,
},
backend: BackendConfig {
backend: self.backend.clone(),
vm_stats: self.vm_stats,
vm_stats_json: self.vm_stats_json,
jit: JitConfig {
exec: self.jit_exec,
stats: self.jit_stats,
stats_json: self.jit_stats_json,
dump: self.jit_dump,
events: self.jit_events,
events_compile: self.jit_events_compile,
events_runtime: self.jit_events_runtime,
events_path: self.jit_events_path.clone(),
threshold: self.jit_threshold,
phi_min: self.jit_phi_min,
hostcall: self.jit_hostcall,
handle_debug: self.jit_handle_debug,
native_f64: self.jit_native_f64,
native_bool: self.jit_native_bool,
only: self.jit_only,
direct: self.jit_direct,
},
},
build: BuildConfig {
path: self.build_path.clone(),
app: self.build_app.clone(),
out: self.build_out.clone(),
aot: self.build_aot.clone(),
profile: self.build_profile.clone(),
target: self.build_target.clone(),
},
emit: EmitConfig {
emit_cfg: self.emit_cfg.clone(),
emit_mir_json: self.emit_mir_json.clone(),
emit_exe: self.emit_exe.clone(),
emit_exe_nyrt: self.emit_exe_nyrt.clone(),
emit_exe_libs: self.emit_exe_libs.clone(),
},
parser: ParserPipeConfig {
parser_ny: self.parser_ny,
ny_parser_pipe: self.ny_parser_pipe,
json_file: self.json_file.clone(),
},
gc_mode: self.gc_mode.clone(),
compile_wasm: self.compile_wasm,
compile_native: self.compile_native,
output_file: self.output_file.clone(),
benchmark: self.benchmark,
iterations: self.iterations,
run_task: self.run_task.clone(),
load_ny_plugins: self.load_ny_plugins,
}
}
/// Build the clap Command structure
fn build_command() -> Command {
Command::new("nyash")
.version("1.0")
.author("Claude Code <claude@anthropic.com>")
.about("🦀 Nyash Programming Language - Everything is Box in Rust! 🦀")
.arg(
Arg::new("file")
.help("Nyash file to execute")
.value_name("FILE")
.index(1)
)
.arg(
Arg::new("macro-expand-child")
.long("macro-expand-child")
.value_name("FILE")
.help("Macro sandbox child: read AST JSON v0 from stdin, expand using Nyash macro file, write AST JSON v0 to stdout (PoC)")
)
.arg(
Arg::new("dump-expanded-ast-json")
.long("dump-expanded-ast-json")
.help("Dump AST after macro expansion as JSON v0 and exit")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("macro-ctx-json")
.long("macro-ctx-json")
.value_name("JSON")
.help("Provide MacroCtx as JSON string (e.g., {\"caps\":{\"io\":false,\"net\":false,\"env\":true}}) for macro child routes")
)
.arg(
Arg::new("gc")
.long("gc")
.value_name("{auto,rc+cycle,minorgen,stw,rc,off}")
.help("Select GC mode (default: rc+cycle)")
)
.arg(
Arg::new("parser")
.long("parser")
.value_name("{rust|ny}")
.help("Choose parser: 'rust' (default) or 'ny' (direct v0 bridge)")
)
.arg(
Arg::new("ny-parser-pipe")
.long("ny-parser-pipe")
.help("Read Ny JSON IR v0 from stdin and execute via MIR Interpreter")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("json-file")
.long("json-file")
.value_name("FILE")
.help("Read Ny JSON IR v0 from a file and execute via MIR Interpreter")
)
.arg(
Arg::new("emit-mir-json")
.long("emit-mir-json")
.value_name("FILE")
.help("Emit MIR JSON v0 to file (validation-friendly) and exit")
)
.arg(
Arg::new("emit-exe")
.long("emit-exe")
.value_name("FILE")
.help("Emit native executable via ny-llvmc (crate) and exit")
)
.arg(
Arg::new("emit-exe-nyrt")
.long("emit-exe-nyrt")
.value_name("DIR")
.help("Directory containing libnyrt.a (used with --emit-exe)")
)
.arg(
Arg::new("emit-exe-libs")
.long("emit-exe-libs")
.value_name("FLAGS")
.help("Extra linker flags for ny-llvmc when emitting executable")
)
.arg(
Arg::new("stage3")
.long("stage3")
.help("Enable Stage-3 syntax acceptance for selfhost parser (sets NYASH_NY_COMPILER_STAGE3=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("ny-compiler-args")
.long("ny-compiler-args")
.value_name("ARGS")
.help("Pass additional args to selfhost child compiler (equivalent to NYASH_NY_COMPILER_CHILD_ARGS)")
)
.arg(
Arg::new("using")
.long("using")
.value_name("SPEC")
.help("Register a using entry (e.g., 'ns as Alias' or '\"apps/foo.nyash\" as Foo'). Repeatable.")
.action(clap::ArgAction::Append)
)
.arg(
Arg::new("debug-fuel")
.long("debug-fuel")
.value_name("ITERATIONS")
.help("Set parser debug fuel limit (default: 100000, 'unlimited' for no limit)")
.default_value("100000")
)
.arg(
Arg::new("dump-ast")
.long("dump-ast")
.help("Dump parsed AST and exit")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("profile")
.long("profile")
.value_name("{lite|dev|ci|strict}")
.help("Set execution profile: lite (macros OFF), dev (macros ON), ci (macros ON+strict), strict (macros ON+strict). Default run behaves like dev for macros.")
)
.arg(
Arg::new("expand")
.long("expand")
.help("Macro: enable macro engine and dump expansion traces (sets NYASH_MACRO_ENABLE=1, NYASH_MACRO_TRACE=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("macro-preexpand")
.long("macro-preexpand")
.help("Self-host: pre-expand macros before MIR compile (sets NYASH_MACRO_SELFHOST_PRE_EXPAND=1). Requires NYASH_USE_NY_COMPILER=1 and NYASH_VM_USE_PY=1.")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("macro-preexpand-auto")
.long("macro-preexpand-auto")
.help("Self-host: pre-expand macros in auto mode (sets NYASH_MACRO_SELFHOST_PRE_EXPAND=auto). Requires NYASH_USE_NY_COMPILER=1 and NYASH_VM_USE_PY=1.")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("macro-top-level-allow")
.long("macro-top-level-allow")
.help("Allow top-level static MacroBoxSpec.expand(json[,ctx]) without BoxDeclaration (sets NYASH_MACRO_TOPLEVEL_ALLOW=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("macro-profile")
.long("macro-profile")
.value_name("{dev|ci-fast|strict}")
.help("Convenience: configure macro envs for dev/ci-fast/strict. Non-breaking; can be overridden by explicit envs/flags.")
)
.arg(
Arg::new("run-tests")
.long("run-tests")
.help("Run tests: enable macro engine and inject test harness (functions starting with 'test_')")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("test-filter")
.long("test-filter")
.value_name("SUBSTR")
.help("Only run tests whose name contains SUBSTR (with --run-tests)")
)
.arg(
Arg::new("test-entry")
.long("test-entry")
.value_name("{wrap|override}")
.help("When --run-tests and a main exists: wrap (run tests then call original) or override (replace main with test harness). Default: keep original (no harness). Use with --run-tests.")
)
.arg(
Arg::new("test-return")
.long("test-return")
.value_name("{tests|original}")
.help("When --run-tests with --test-entry wrap: choose harness return policy (tests: return failures count; original: return original main()'s result)")
)
.arg(
Arg::new("dump-mir")
.long("dump-mir")
.help("Dump MIR (Mid-level Intermediate Representation) instead of executing")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("verify")
.long("verify")
.help("Verify MIR integrity and exit")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("mir-verbose")
.long("mir-verbose")
.help("Show verbose MIR output with statistics")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("mir-verbose-effects")
.long("mir-verbose-effects")
.help("Show per-instruction effect category (pure/readonly/side)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("no-optimize")
.long("no-optimize")
.help("Disable MIR optimizer passes (dump raw Builder MIR)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("backend")
.long("backend")
.value_name("BACKEND")
.help("Choose execution backend: 'vm' (default), 'llvm', or 'interpreter' (legacy)")
.default_value("vm")
)
.arg(
Arg::new("verbose")
.long("verbose")
.short('v')
.help("Verbose CLI output (sets NYASH_CLI_VERBOSE=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("compile-wasm")
.long("compile-wasm")
.help("Compile to WebAssembly (WAT/WASM). Requires --features wasm-backend")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("compile-native")
.long("compile-native")
.help("Compile to native executable (AOT). Requires --features cranelift-jit")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("aot")
.long("aot")
.help("Short form of --compile-native")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("output")
.long("output")
.short('o')
.value_name("FILE")
.help("Output file (for WASM compilation or AOT executable)")
)
.arg(
Arg::new("benchmark")
.long("benchmark")
.help("Run performance benchmarks across all backends")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("iterations")
.long("iterations")
.value_name("COUNT")
.help("Number of iterations for benchmarks (default: 10)")
.default_value("10")
)
.arg(
Arg::new("vm-stats")
.long("vm-stats")
.help("Enable VM instruction statistics (equivalent to NYASH_VM_STATS=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("vm-stats-json")
.long("vm-stats-json")
.help("Output VM statistics in JSON format")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-exec")
.long("jit-exec")
.help("Enable JIT execution where available (NYASH_JIT_EXEC=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-stats")
.long("jit-stats")
.help("Print JIT compilation/execution statistics (NYASH_JIT_STATS=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-stats-json")
.long("jit-stats-json")
.help("Output JIT statistics in JSON format (NYASH_JIT_STATS_JSON=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-dump")
.long("jit-dump")
.help("Dump JIT lowering summary (NYASH_JIT_DUMP=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-events")
.long("jit-events")
.help("Emit JIT events as JSONL (NYASH_JIT_EVENTS=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-events-compile")
.long("jit-events-compile")
.help("Emit compile-time (lower) JIT events (NYASH_JIT_EVENTS_COMPILE=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-events-runtime")
.long("jit-events-runtime")
.help("Emit runtime JIT events (NYASH_JIT_EVENTS_RUNTIME=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-events-path")
.long("jit-events-path")
.value_name("FILE")
.help("Write JIT events JSONL to file (NYASH_JIT_EVENTS_PATH)")
)
.arg(
Arg::new("jit-threshold")
.long("jit-threshold")
.value_name("N")
.help("Set hotness threshold for JIT compilation (NYASH_JIT_THRESHOLD)")
)
.arg(
Arg::new("jit-phi-min")
.long("jit-phi-min")
.help("Enable minimal PHI path for branches (NYASH_JIT_PHI_MIN=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-hostcall")
.long("jit-hostcall")
.help("Enable JIT hostcall bridge for Array/Map (NYASH_JIT_HOSTCALL=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-handle-debug")
.long("jit-handle-debug")
.help("Print JIT handle allocation debug logs (NYASH_JIT_HANDLE_DEBUG=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-native-f64")
.long("jit-native-f64")
.help("Enable native f64 ABI path in JIT (NYASH_JIT_NATIVE_F64=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-native-bool")
.long("jit-native-bool")
.help("Enable native bool ABI path in JIT (NYASH_JIT_NATIVE_BOOL=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-only")
.long("jit-only")
.help("Run JIT only (no VM fallback). Fails if JIT is unavailable (NYASH_JIT_ONLY=1)")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("jit-direct")
.long("jit-direct")
.help("Run program via independent JIT engine (no VM interpreter/executor). Requires --features cranelift-jit")
.action(clap::ArgAction::SetTrue)
)
.arg(
Arg::new("emit-cfg")
.long("emit-cfg")
.value_name("DOT_FILE")
.help("Emit JIT CFG as DOT to file (equivalent to setting NYASH_JIT_DOT)")
)
.arg(
Arg::new("run-task")
.long("run-task")
.value_name("NAME")
.help("Run a named task defined in nyash.toml [tasks]")
)
.arg(
Arg::new("load-ny-plugins")
.long("load-ny-plugins")
.help("Opt-in: read [ny_plugins] from nyash.toml and load scripts in order")
.action(clap::ArgAction::SetTrue)
)
// Build system (MVP)
.arg(
Arg::new("build")
.long("build")
.value_name("PATH")
.help("Build AOT executable using nyash.toml at PATH (MVP)")
)
.arg(
Arg::new("build-app")
.long("app")
.value_name("FILE")
.help("Entry Nyash script for --build (e.g., apps/hello/main.nyash)")
)
.arg(
Arg::new("build-out")
.long("out")
.value_name("FILE")
.help("Output executable name for --build (default: app/app.exe)")
)
.arg(
Arg::new("build-aot")
.long("build-aot")
.value_name("{cranelift|llvm}")
.help("AOT backend for --build (default: cranelift)")
)
.arg(
Arg::new("build-profile")
.long("profile")
.value_name("{release|debug}")
.help("Cargo profile for --build (default: release)")
)
.arg(
Arg::new("build-target")
.long("target")
.value_name("TRIPLE")
.help("Target triple for --build (e.g., x86_64-pc-windows-msvc)")
)
}
/// Convert ArgMatches to CliConfig
fn from_matches(matches: &ArgMatches) -> Self {
// Stage-3 gate: when specified via CLI, set env for selfhost child
if matches.get_flag("stage3") {
std::env::set_var("NYASH_NY_COMPILER_STAGE3", "1");
}
// Side-effect: forward child args for selfhost compiler via env
if let Some(a) = matches.get_one::<String>("ny-compiler-args") {
std::env::set_var("NYASH_NY_COMPILER_CHILD_ARGS", a);
}
let cfg = Self {
file: matches.get_one::<String>("file").cloned(),
debug_fuel: parse_debug_fuel(matches.get_one::<String>("debug-fuel").unwrap()),
dump_ast: matches.get_flag("dump-ast"),
dump_mir: matches.get_flag("dump-mir"),
verify_mir: matches.get_flag("verify"),
mir_verbose: matches.get_flag("mir-verbose"),
mir_verbose_effects: matches.get_flag("mir-verbose-effects"),
no_optimize: matches.get_flag("no-optimize"),
backend: matches.get_one::<String>("backend").unwrap().clone(),
compile_wasm: matches.get_flag("compile-wasm"),
compile_native: matches.get_flag("compile-native") || matches.get_flag("aot"),
output_file: matches.get_one::<String>("output").cloned(),
benchmark: matches.get_flag("benchmark"),
iterations: matches
.get_one::<String>("iterations")
.unwrap()
.parse()
.unwrap_or(10),
vm_stats: matches.get_flag("vm-stats"),
vm_stats_json: matches.get_flag("vm-stats-json"),
jit_exec: matches.get_flag("jit-exec"),
jit_stats: matches.get_flag("jit-stats"),
jit_stats_json: matches.get_flag("jit-stats-json"),
jit_dump: matches.get_flag("jit-dump"),
jit_events: matches.get_flag("jit-events"),
jit_events_compile: matches.get_flag("jit-events-compile"),
jit_events_runtime: matches.get_flag("jit-events-runtime"),
jit_events_path: matches.get_one::<String>("jit-events-path").cloned(),
jit_threshold: matches
.get_one::<String>("jit-threshold")
.and_then(|s| s.parse::<u32>().ok()),
jit_phi_min: matches.get_flag("jit-phi-min"),
jit_hostcall: matches.get_flag("jit-hostcall"),
jit_handle_debug: matches.get_flag("jit-handle-debug"),
jit_native_f64: matches.get_flag("jit-native-f64"),
jit_native_bool: matches.get_flag("jit-native-bool"),
emit_cfg: matches.get_one::<String>("emit-cfg").cloned(),
jit_only: matches.get_flag("jit-only"),
jit_direct: matches.get_flag("jit-direct"),
cli_verbose: matches.get_flag("verbose"),
run_task: matches.get_one::<String>("run-task").cloned(),
load_ny_plugins: matches.get_flag("load-ny-plugins"),
gc_mode: matches.get_one::<String>("gc").cloned(),
parser_ny: matches
.get_one::<String>("parser")
.map(|s| s == "ny")
.unwrap_or(false),
ny_parser_pipe: matches.get_flag("ny-parser-pipe"),
json_file: matches.get_one::<String>("json-file").cloned(),
// Build system (MVP)
build_path: matches.get_one::<String>("build").cloned(),
build_app: matches.get_one::<String>("build-app").cloned(),
build_out: matches.get_one::<String>("build-out").cloned(),
build_aot: matches.get_one::<String>("build-aot").cloned(),
build_profile: matches.get_one::<String>("build-profile").cloned(),
build_target: matches.get_one::<String>("build-target").cloned(),
cli_usings: matches
.get_many::<String>("using")
.map(|v| v.cloned().collect())
.unwrap_or_else(|| Vec::new()),
emit_mir_json: matches.get_one::<String>("emit-mir-json").cloned(),
emit_exe: matches.get_one::<String>("emit-exe").cloned(),
emit_exe_nyrt: matches.get_one::<String>("emit-exe-nyrt").cloned(),
emit_exe_libs: matches.get_one::<String>("emit-exe-libs").cloned(),
macro_expand_child: matches.get_one::<String>("macro-expand-child").cloned(),
dump_expanded_ast_json: matches.get_flag("dump-expanded-ast-json"),
macro_ctx_json: matches.get_one::<String>("macro-ctx-json").cloned(),
};
// Macro debug gate
if matches.get_flag("expand") {
std::env::set_var("NYASH_MACRO_ENABLE", "1");
std::env::set_var("NYASH_MACRO_TRACE", "1");
}
// Forward MacroCtx JSON to env for macro child routes, if provided
if let Some(ctx) = matches.get_one::<String>("macro-ctx-json") {
std::env::set_var("NYASH_MACRO_CTX_JSON", ctx);
}
// Profile mapping (non-breaking; users can override afterwards)
if let Some(p) = matches.get_one::<String>("profile") {
match p.as_str() {
"lite" => {
std::env::set_var("NYASH_MACRO_ENABLE", "0");
std::env::set_var("NYASH_MACRO_STRICT", "0");
std::env::set_var("NYASH_MACRO_TRACE", "0");
}
"dev" => {
std::env::set_var("NYASH_MACRO_ENABLE", "1");
std::env::set_var("NYASH_MACRO_STRICT", "1");
std::env::set_var("NYASH_MACRO_TRACE", "0");
}
"ci" | "strict" => {
std::env::set_var("NYASH_MACRO_ENABLE", "1");
std::env::set_var("NYASH_MACRO_STRICT", "1");
std::env::set_var("NYASH_MACRO_TRACE", "0");
}
_ => {}
}
}
if matches.get_flag("run-tests") {
std::env::set_var("NYASH_MACRO_ENABLE", "1");
std::env::set_var("NYASH_TEST_RUN", "1");
if let Some(f) = matches.get_one::<String>("test-filter") {
std::env::set_var("NYASH_TEST_FILTER", f);
}
if let Some(entry) = matches.get_one::<String>("test-entry") {
let v = entry.as_str();
if v == "wrap" || v == "override" {
std::env::set_var("NYASH_TEST_ENTRY", v);
}
}
if let Some(ret) = matches.get_one::<String>("test-return") {
let v = ret.as_str();
if v == "tests" || v == "original" {
std::env::set_var("NYASH_TEST_RETURN", v);
}
}
}
// Self-host macro pre-expand gate (CLI convenience)
if matches.get_flag("macro-preexpand") {
std::env::set_var("NYASH_MACRO_SELFHOST_PRE_EXPAND", "1");
}
if matches.get_flag("macro-preexpand-auto") {
std::env::set_var("NYASH_MACRO_SELFHOST_PRE_EXPAND", "auto");
}
if matches.get_flag("macro-top-level-allow") {
std::env::set_var("NYASH_MACRO_TOPLEVEL_ALLOW", "1");
}
if let Some(p) = matches.get_one::<String>("macro-profile") {
let p = p.as_str();
match p {
"dev" | "ci-fast" | "strict" => {
// Minimal, non-invasive mapping; users can still override.
std::env::set_var("NYASH_MACRO_ENABLE", "1");
std::env::set_var("NYASH_MACRO_STRICT", "1");
std::env::set_var("NYASH_MACRO_TOPLEVEL_ALLOW", "0");
std::env::set_var("NYASH_MACRO_SELFHOST_PRE_EXPAND", "auto");
}
_ => {}
}
}
cfg
}
}
impl Default for CliConfig {
fn default() -> Self {
Self {
file: None,
debug_fuel: Some(100000),
dump_ast: false,
dump_mir: false,
verify_mir: false,
mir_verbose: false,
mir_verbose_effects: false,
no_optimize: false,
backend: "interpreter".to_string(),
compile_wasm: false,
compile_native: false,
output_file: None,
benchmark: false,
iterations: 10,
vm_stats: false,
vm_stats_json: false,
jit_exec: false,
jit_stats: false,
jit_stats_json: false,
jit_dump: false,
jit_events: false,
jit_events_compile: false,
jit_events_runtime: false,
jit_events_path: None,
jit_threshold: None,
jit_phi_min: false,
jit_hostcall: false,
jit_handle_debug: false,
jit_native_f64: false,
jit_native_bool: false,
emit_cfg: None,
jit_only: false,
jit_direct: false,
cli_verbose: false,
run_task: None,
load_ny_plugins: false,
gc_mode: None,
parser_ny: false,
ny_parser_pipe: false,
json_file: None,
build_path: None,
build_app: None,
build_out: None,
build_aot: None,
build_profile: None,
build_target: None,
cli_usings: Vec::new(),
emit_mir_json: None,
emit_exe: None,
emit_exe_nyrt: None,
emit_exe_libs: None,
macro_expand_child: None,
dump_expanded_ast_json: false,
macro_ctx_json: None,
}
}
}
/// Parse debug fuel value ("unlimited" or numeric)
fn parse_debug_fuel(value: &str) -> Option<usize> {
if value == "unlimited" {
None // No limit
} else {
value.parse::<usize>().ok()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_debug_fuel() {
assert_eq!(parse_debug_fuel("unlimited"), None);
assert_eq!(parse_debug_fuel("1000"), Some(1000));
assert_eq!(parse_debug_fuel("invalid"), None);
}
#[test]
fn test_default_config() {
let config = CliConfig::default();
assert_eq!(config.backend, "interpreter");
assert_eq!(config.iterations, 10);
}
}

206
src/cli/args.rs Normal file
View File

@ -0,0 +1,206 @@
use clap::{Arg, ArgMatches, Command};
use serde_json;
use super::CliConfig;
use super::utils::parse_debug_fuel;
pub fn parse() -> CliConfig {
let argv: Vec<String> = std::env::args().collect();
if let Some(pos) = argv.iter().position(|s| s == "--") {
let script_args: Vec<String> = argv.iter().skip(pos + 1).cloned().collect();
if !script_args.is_empty() {
if let Ok(json) = serde_json::to_string(&script_args) {
std::env::set_var("NYASH_SCRIPT_ARGS_JSON", json);
}
}
let matches = build_command()
.try_get_matches_from(&argv[..pos])
.unwrap_or_else(|e| e.exit());
from_matches(&matches)
} else {
let matches = build_command().get_matches();
from_matches(&matches)
}
}
pub fn build_command() -> Command {
Command::new("nyash")
.version("1.0")
.author("Claude Code <claude@anthropic.com>")
.about("🦀 Nyash Programming Language - Everything is Box in Rust! 🦀")
.arg(Arg::new("file").help("Nyash file to execute").value_name("FILE").index(1))
.arg(Arg::new("macro-expand-child").long("macro-expand-child").value_name("FILE").help("Macro sandbox child: read AST JSON v0 from stdin, expand using Nyash macro file, write AST JSON v0 to stdout (PoC)"))
.arg(Arg::new("dump-ast").long("dump-ast").help("Dump parsed AST and exit").action(clap::ArgAction::SetTrue))
.arg(Arg::new("macro-preexpand").long("macro-preexpand").help("Enable selfhost macro pre-expand").action(clap::ArgAction::SetTrue))
.arg(Arg::new("macro-preexpand-auto").long("macro-preexpand-auto").help("Auto enable selfhost macro pre-expand").action(clap::ArgAction::SetTrue))
.arg(Arg::new("macro-top-level-allow").long("macro-top-level-allow").help("Allow top-level macro usage").action(clap::ArgAction::SetTrue))
.arg(Arg::new("macro-profile").long("macro-profile").value_name("{dev|ci-fast|strict}").help("Select macro profile"))
.arg(Arg::new("dump-expanded-ast-json").long("dump-expanded-ast-json").help("Dump AST after macro expansion as JSON v0 and exit").action(clap::ArgAction::SetTrue))
.arg(Arg::new("macro-ctx-json").long("macro-ctx-json").value_name("JSON").help("Provide MacroCtx as JSON string for macro child routes"))
.arg(Arg::new("gc").long("gc").value_name("{auto,rc+cycle,minorgen,stw,rc,off}").help("Select GC mode (default: rc+cycle)"))
.arg(Arg::new("parser").long("parser").value_name("{rust|ny}").help("Choose parser: 'rust' (default) or 'ny' (direct v0 bridge)"))
.arg(Arg::new("ny-parser-pipe").long("ny-parser-pipe").help("Read Ny JSON IR v0 from stdin and execute via MIR Interpreter").action(clap::ArgAction::SetTrue))
.arg(Arg::new("json-file").long("json-file").value_name("FILE").help("Read Ny JSON IR v0 from a file and execute via MIR Interpreter"))
.arg(Arg::new("emit-mir-json").long("emit-mir-json").value_name("FILE").help("Emit MIR JSON v0 to file and exit"))
.arg(Arg::new("emit-exe").long("emit-exe").value_name("FILE").help("Emit native executable via ny-llvmc and exit"))
.arg(Arg::new("emit-exe-nyrt").long("emit-exe-nyrt").value_name("DIR").help("Directory containing libnyrt.a (used with --emit-exe)"))
.arg(Arg::new("emit-exe-libs").long("emit-exe-libs").value_name("FLAGS").help("Extra linker flags for ny-llvmc when emitting executable"))
.arg(Arg::new("stage3").long("stage3").help("Enable Stage-3 syntax acceptance for selfhost parser").action(clap::ArgAction::SetTrue))
.arg(Arg::new("ny-compiler-args").long("ny-compiler-args").value_name("ARGS").help("Pass additional args to selfhost child compiler"))
.arg(Arg::new("using").long("using").value_name("NAME").help("Add a using directive to current session; repeat").action(clap::ArgAction::Append))
.arg(Arg::new("debug-fuel").long("debug-fuel").value_name("N|unlimited").help("Limit interpreter/JIT steps or 'unlimited' (default 100000)").default_value("100000"))
.arg(Arg::new("run-tests").long("run-tests").help("Run inline tests in the module (functions starting with 'test_')").action(clap::ArgAction::SetTrue))
.arg(Arg::new("test-filter").long("test-filter").value_name("SUBSTR").help("Only run tests whose name contains SUBSTR (with --run-tests)"))
.arg(Arg::new("test-entry").long("test-entry").value_name("{wrap|override}").help("When --run-tests and a main exists: wrap or override") )
.arg(Arg::new("test-return").long("test-return").value_name("{tests|original}").help("Harness return policy (tests or original)") )
.arg(Arg::new("dump-mir").long("dump-mir").help("Dump MIR instead of executing").action(clap::ArgAction::SetTrue))
.arg(Arg::new("verify").long("verify").help("Verify MIR integrity and exit").action(clap::ArgAction::SetTrue))
.arg(Arg::new("mir-verbose").long("mir-verbose").help("Show verbose MIR output with statistics").action(clap::ArgAction::SetTrue))
.arg(Arg::new("mir-verbose-effects").long("mir-verbose-effects").help("Show per-instruction effect category").action(clap::ArgAction::SetTrue))
.arg(Arg::new("no-optimize").long("no-optimize").help("Disable MIR optimizer passes").action(clap::ArgAction::SetTrue))
.arg(Arg::new("backend").long("backend").value_name("BACKEND").help("Backend: vm (default), llvm, interpreter").default_value("vm"))
.arg(Arg::new("verbose").long("verbose").short('v').help("Verbose CLI output (sets NYASH_CLI_VERBOSE=1)").action(clap::ArgAction::SetTrue))
.arg(Arg::new("compile-wasm").long("compile-wasm").help("Compile to WebAssembly").action(clap::ArgAction::SetTrue))
.arg(Arg::new("compile-native").long("compile-native").help("Compile to native executable (AOT)").action(clap::ArgAction::SetTrue))
.arg(Arg::new("aot").long("aot").help("Short form of --compile-native").action(clap::ArgAction::SetTrue))
.arg(Arg::new("output").long("output").short('o').value_name("FILE").help("Output file for compilation"))
.arg(Arg::new("benchmark").long("benchmark").help("Run performance benchmarks").action(clap::ArgAction::SetTrue))
.arg(Arg::new("iterations").long("iterations").value_name("COUNT").help("Iterations for benchmarks").default_value("10"))
.arg(Arg::new("vm-stats").long("vm-stats").help("Enable VM instruction statistics").action(clap::ArgAction::SetTrue))
.arg(Arg::new("vm-stats-json").long("vm-stats-json").help("Output VM statistics in JSON").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-exec").long("jit-exec").help("Enable JIT execution").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-stats").long("jit-stats").help("Print JIT statistics").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-stats-json").long("jit-stats-json").help("Output JIT stats in JSON").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-dump").long("jit-dump").help("Dump JIT lowering summary").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-events").long("jit-events").help("Emit JIT events JSONL").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-events-compile").long("jit-events-compile").help("Emit compile-time JIT events").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-events-runtime").long("jit-events-runtime").help("Emit runtime JIT events").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-events-path").long("jit-events-path").value_name("FILE").help("Write JIT events JSONL to file"))
.arg(Arg::new("jit-threshold").long("jit-threshold").value_name("N").help("Hotness threshold for JIT compilation"))
.arg(Arg::new("jit-phi-min").long("jit-phi-min").help("Minimal PHI path for branches").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-hostcall").long("jit-hostcall").help("Enable JIT hostcall bridge").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-handle-debug").long("jit-handle-debug").help("Print JIT handle allocation debug logs").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-native-f64").long("jit-native-f64").help("Enable native f64 ABI path").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-native-bool").long("jit-native-bool").help("Enable native bool ABI path").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-only").long("jit-only").help("Run JIT only (no VM fallback)").action(clap::ArgAction::SetTrue))
.arg(Arg::new("jit-direct").long("jit-direct").help("Independent JIT engine mode").action(clap::ArgAction::SetTrue))
.arg(Arg::new("emit-cfg").long("emit-cfg").value_name("DOT_FILE").help("Emit JIT CFG as DOT"))
.arg(Arg::new("run-task").long("run-task").value_name("NAME").help("Run a named task from nyash.toml"))
.arg(Arg::new("load-ny-plugins").long("load-ny-plugins").help("Load scripts from nyash.toml [ny_plugins]").action(clap::ArgAction::SetTrue))
.arg(Arg::new("build").long("build").value_name("PATH").help("Build AOT executable using nyash.toml at PATH (MVP)"))
.arg(Arg::new("build-app").long("app").value_name("FILE").help("Entry Nyash script for --build"))
.arg(Arg::new("build-out").long("out").value_name("FILE").help("Output executable name for --build"))
.arg(Arg::new("build-aot").long("build-aot").value_name("{cranelift|llvm}").help("AOT backend for --build"))
.arg(Arg::new("build-profile").long("profile").value_name("{release|debug}").help("Cargo profile for --build"))
.arg(Arg::new("build-target").long("target").value_name("TRIPLE").help("Target triple for --build"))
}
pub fn from_matches(matches: &ArgMatches) -> CliConfig {
if matches.get_flag("stage3") { std::env::set_var("NYASH_NY_COMPILER_STAGE3", "1"); }
if let Some(a) = matches.get_one::<String>("ny-compiler-args") { std::env::set_var("NYASH_NY_COMPILER_CHILD_ARGS", a); }
let cfg = CliConfig {
file: matches.get_one::<String>("file").cloned(),
debug_fuel: parse_debug_fuel(matches.get_one::<String>("debug-fuel").unwrap()),
dump_ast: matches.get_flag("dump-ast"),
dump_mir: matches.get_flag("dump-mir"),
verify_mir: matches.get_flag("verify"),
mir_verbose: matches.get_flag("mir-verbose"),
mir_verbose_effects: matches.get_flag("mir-verbose-effects"),
no_optimize: matches.get_flag("no-optimize"),
backend: matches.get_one::<String>("backend").unwrap().clone(),
compile_wasm: matches.get_flag("compile-wasm"),
compile_native: matches.get_flag("compile-native") || matches.get_flag("aot"),
output_file: matches.get_one::<String>("output").cloned(),
benchmark: matches.get_flag("benchmark"),
iterations: matches.get_one::<String>("iterations").unwrap().parse().unwrap_or(10),
vm_stats: matches.get_flag("vm-stats"),
vm_stats_json: matches.get_flag("vm-stats-json"),
jit_exec: matches.get_flag("jit-exec"),
jit_stats: matches.get_flag("jit-stats"),
jit_stats_json: matches.get_flag("jit-stats-json"),
jit_dump: matches.get_flag("jit-dump"),
jit_events: matches.get_flag("jit-events"),
jit_events_compile: matches.get_flag("jit-events-compile"),
jit_events_runtime: matches.get_flag("jit-events-runtime"),
jit_events_path: matches.get_one::<String>("jit-events-path").cloned(),
jit_threshold: matches.get_one::<String>("jit-threshold").and_then(|s| s.parse::<u32>().ok()),
jit_phi_min: matches.get_flag("jit-phi-min"),
jit_hostcall: matches.get_flag("jit-hostcall"),
jit_handle_debug: matches.get_flag("jit-handle-debug"),
jit_native_f64: matches.get_flag("jit-native-f64"),
jit_native_bool: matches.get_flag("jit-native-bool"),
emit_cfg: matches.get_one::<String>("emit-cfg").cloned(),
jit_only: matches.get_flag("jit-only"),
jit_direct: matches.get_flag("jit-direct"),
cli_verbose: matches.get_flag("verbose"),
run_task: matches.get_one::<String>("run-task").cloned(),
load_ny_plugins: matches.get_flag("load-ny-plugins"),
gc_mode: matches.get_one::<String>("gc").cloned(),
parser_ny: matches.get_one::<String>("parser").map(|s| s == "ny").unwrap_or(false),
ny_parser_pipe: matches.get_flag("ny-parser-pipe"),
json_file: matches.get_one::<String>("json-file").cloned(),
build_path: matches.get_one::<String>("build").cloned(),
build_app: matches.get_one::<String>("build-app").cloned(),
build_out: matches.get_one::<String>("build-out").cloned(),
build_aot: matches.get_one::<String>("build-aot").cloned(),
build_profile: matches.get_one::<String>("build-profile").cloned(),
build_target: matches.get_one::<String>("build-target").cloned(),
cli_usings: matches.get_many::<String>("using").map(|v| v.cloned().collect()).unwrap_or_else(|| Vec::new()),
emit_mir_json: matches.get_one::<String>("emit-mir-json").cloned(),
emit_exe: matches.get_one::<String>("emit-exe").cloned(),
emit_exe_nyrt: matches.get_one::<String>("emit-exe-nyrt").cloned(),
emit_exe_libs: matches.get_one::<String>("emit-exe-libs").cloned(),
macro_expand_child: matches.get_one::<String>("macro-expand-child").cloned(),
dump_expanded_ast_json: matches.get_flag("dump-expanded-ast-json"),
macro_ctx_json: matches.get_one::<String>("macro-ctx-json").cloned(),
};
if cfg.cli_verbose { std::env::set_var("NYASH_CLI_VERBOSE", "1"); }
if cfg.vm_stats { std::env::set_var("NYASH_VM_STATS", "1"); }
if cfg.vm_stats_json { std::env::set_var("NYASH_VM_STATS_JSON", "1"); }
if cfg.jit_exec { std::env::set_var("NYASH_JIT_EXEC", "1"); }
if cfg.jit_stats { std::env::set_var("NYASH_JIT_STATS", "1"); }
if cfg.jit_stats_json { std::env::set_var("NYASH_JIT_STATS_JSON", "1"); }
if cfg.jit_dump { std::env::set_var("NYASH_JIT_DUMP", "1"); }
if cfg.jit_events { std::env::set_var("NYASH_JIT_EVENTS", "1"); }
if cfg.jit_events_compile { std::env::set_var("NYASH_JIT_EVENTS_COMPILE", "1"); }
if cfg.jit_events_runtime { std::env::set_var("NYASH_JIT_EVENTS_RUNTIME", "1"); }
if let Some(p) = &cfg.jit_events_path { std::env::set_var("NYASH_JIT_EVENTS_PATH", p); }
if let Some(t) = cfg.jit_threshold { std::env::set_var("NYASH_JIT_THRESHOLD", t.to_string()); }
if cfg.jit_phi_min { std::env::set_var("NYASH_JIT_PHI_MIN", "1"); }
if cfg.jit_hostcall { std::env::set_var("NYASH_JIT_HOSTCALL", "1"); }
if cfg.jit_handle_debug { std::env::set_var("NYASH_JIT_HANDLE_DEBUG", "1"); }
if cfg.jit_native_f64 { std::env::set_var("NYASH_JIT_NATIVE_F64", "1"); }
if cfg.jit_native_bool { std::env::set_var("NYASH_JIT_NATIVE_BOOL", "1"); }
if cfg.jit_only { std::env::set_var("NYASH_JIT_ONLY", "1"); }
if cfg.jit_direct { std::env::set_var("NYASH_JIT_DIRECT", "1"); }
if let Some(gc) = &cfg.gc_mode { std::env::set_var("NYASH_GC_MODE", gc); }
if matches.get_flag("run-tests") {
std::env::set_var("NYASH_RUN_TESTS", "1");
if let Some(filter) = matches.get_one::<String>("test-filter") { std::env::set_var("NYASH_TEST_FILTER", filter); }
if let Some(entry) = matches.get_one::<String>("test-entry") {
let v = entry.as_str();
if v == "wrap" || v == "override" { std::env::set_var("NYASH_TEST_ENTRY", v); }
}
if let Some(ret) = matches.get_one::<String>("test-return") {
let v = ret.as_str();
if v == "tests" || v == "original" { std::env::set_var("NYASH_TEST_RETURN", v); }
}
}
if matches.get_flag("macro-preexpand") { std::env::set_var("NYASH_MACRO_SELFHOST_PRE_EXPAND", "1"); }
if matches.get_flag("macro-preexpand-auto") { std::env::set_var("NYASH_MACRO_SELFHOST_PRE_EXPAND", "auto"); }
if matches.get_flag("macro-top-level-allow") { std::env::set_var("NYASH_MACRO_TOPLEVEL_ALLOW", "1"); }
if let Some(p) = matches.get_one::<String>("macro-profile") {
match p.as_str() {
"dev" | "ci-fast" | "strict" => {
std::env::set_var("NYASH_MACRO_ENABLE", "1");
std::env::set_var("NYASH_MACRO_STRICT", "1");
std::env::set_var("NYASH_MACRO_TOPLEVEL_ALLOW", "0");
std::env::set_var("NYASH_MACRO_SELFHOST_PRE_EXPAND", "auto");
}
_ => {}
}
}
cfg
}

89
src/cli/groups.rs Normal file
View File

@ -0,0 +1,89 @@
#[derive(Debug, Clone)]
pub struct InputConfig {
pub file: Option<String>,
pub cli_usings: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct DebugConfig {
pub debug_fuel: Option<usize>,
pub dump_ast: bool,
pub dump_mir: bool,
pub verify_mir: bool,
pub mir_verbose: bool,
pub mir_verbose_effects: bool,
pub cli_verbose: bool,
}
#[derive(Debug, Clone)]
pub struct JitConfig {
pub exec: bool,
pub stats: bool,
pub stats_json: bool,
pub dump: bool,
pub events: bool,
pub events_compile: bool,
pub events_runtime: bool,
pub events_path: Option<String>,
pub threshold: Option<u32>,
pub phi_min: bool,
pub hostcall: bool,
pub handle_debug: bool,
pub native_f64: bool,
pub native_bool: bool,
pub only: bool,
pub direct: bool,
}
#[derive(Debug, Clone)]
pub struct BackendConfig {
pub backend: String,
pub vm_stats: bool,
pub vm_stats_json: bool,
pub jit: JitConfig,
}
#[derive(Debug, Clone)]
pub struct BuildConfig {
pub path: Option<String>,
pub app: Option<String>,
pub out: Option<String>,
pub aot: Option<String>,
pub profile: Option<String>,
pub target: Option<String>,
}
#[derive(Debug, Clone)]
pub struct EmitConfig {
pub emit_cfg: Option<String>,
pub emit_mir_json: Option<String>,
pub emit_exe: Option<String>,
pub emit_exe_nyrt: Option<String>,
pub emit_exe_libs: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ParserPipeConfig {
pub parser_ny: bool,
pub ny_parser_pipe: bool,
pub json_file: Option<String>,
}
#[derive(Debug, Clone)]
pub struct CliGroups {
pub input: InputConfig,
pub debug: DebugConfig,
pub backend: BackendConfig,
pub build: BuildConfig,
pub emit: EmitConfig,
pub parser: ParserPipeConfig,
pub gc_mode: Option<String>,
pub compile_wasm: bool,
pub compile_native: bool,
pub output_file: Option<String>,
pub benchmark: bool,
pub iterations: u32,
pub run_task: Option<String>,
pub load_ny_plugins: bool,
}

220
src/cli/mod.rs Normal file
View File

@ -0,0 +1,220 @@
/*!
* CLI Argument Parsing Module - Nyash Command Line Interface (split)
*/
mod args;
mod groups;
mod utils;
use groups::*;
/// Command-line configuration structure
#[derive(Debug, Clone)]
pub struct CliConfig {
pub file: Option<String>,
pub debug_fuel: Option<usize>,
pub dump_ast: bool,
pub dump_mir: bool,
pub verify_mir: bool,
pub mir_verbose: bool,
pub mir_verbose_effects: bool,
pub no_optimize: bool,
pub backend: String,
pub compile_wasm: bool,
pub compile_native: bool,
pub output_file: Option<String>,
pub benchmark: bool,
pub iterations: u32,
pub vm_stats: bool,
pub vm_stats_json: bool,
// JIT controls
pub jit_exec: bool,
pub jit_stats: bool,
pub jit_stats_json: bool,
pub jit_dump: bool,
pub jit_events: bool,
pub jit_events_compile: bool,
pub jit_events_runtime: bool,
pub jit_events_path: Option<String>,
pub jit_threshold: Option<u32>,
pub jit_phi_min: bool,
pub jit_hostcall: bool,
pub jit_handle_debug: bool,
pub jit_native_f64: bool,
pub jit_native_bool: bool,
pub jit_only: bool,
pub jit_direct: bool,
pub emit_cfg: Option<String>,
pub cli_verbose: bool,
pub run_task: Option<String>,
pub load_ny_plugins: bool,
pub parser_ny: bool,
pub ny_parser_pipe: bool,
pub json_file: Option<String>,
pub gc_mode: Option<String>,
pub build_path: Option<String>,
pub build_app: Option<String>,
pub build_out: Option<String>,
pub build_aot: Option<String>,
pub build_profile: Option<String>,
pub build_target: Option<String>,
pub cli_usings: Vec<String>,
pub emit_mir_json: Option<String>,
pub emit_exe: Option<String>,
pub emit_exe_nyrt: Option<String>,
pub emit_exe_libs: Option<String>,
pub macro_expand_child: Option<String>,
pub dump_expanded_ast_json: bool,
pub macro_ctx_json: Option<String>,
}
pub use groups::{BackendConfig, BuildConfig, CliGroups, DebugConfig, EmitConfig, InputConfig, JitConfig, ParserPipeConfig};
impl CliConfig {
pub fn parse() -> Self { args::parse() }
pub fn as_groups(&self) -> CliGroups {
CliGroups {
input: InputConfig { file: self.file.clone(), cli_usings: self.cli_usings.clone() },
debug: DebugConfig {
debug_fuel: self.debug_fuel,
dump_ast: self.dump_ast,
dump_mir: self.dump_mir,
verify_mir: self.verify_mir,
mir_verbose: self.mir_verbose,
mir_verbose_effects: self.mir_verbose_effects,
cli_verbose: self.cli_verbose,
},
backend: BackendConfig {
backend: self.backend.clone(),
vm_stats: self.vm_stats,
vm_stats_json: self.vm_stats_json,
jit: JitConfig {
exec: self.jit_exec,
stats: self.jit_stats,
stats_json: self.jit_stats_json,
dump: self.jit_dump,
events: self.jit_events,
events_compile: self.jit_events_compile,
events_runtime: self.jit_events_runtime,
events_path: self.jit_events_path.clone(),
threshold: self.jit_threshold,
phi_min: self.jit_phi_min,
hostcall: self.jit_hostcall,
handle_debug: self.jit_handle_debug,
native_f64: self.jit_native_f64,
native_bool: self.jit_native_bool,
only: self.jit_only,
direct: self.jit_direct,
},
},
build: BuildConfig {
path: self.build_path.clone(),
app: self.build_app.clone(),
out: self.build_out.clone(),
aot: self.build_aot.clone(),
profile: self.build_profile.clone(),
target: self.build_target.clone(),
},
emit: EmitConfig {
emit_cfg: self.emit_cfg.clone(),
emit_mir_json: self.emit_mir_json.clone(),
emit_exe: self.emit_exe.clone(),
emit_exe_nyrt: self.emit_exe_nyrt.clone(),
emit_exe_libs: self.emit_exe_libs.clone(),
},
parser: ParserPipeConfig {
parser_ny: self.parser_ny,
ny_parser_pipe: self.ny_parser_pipe,
json_file: self.json_file.clone(),
},
gc_mode: self.gc_mode.clone(),
compile_wasm: self.compile_wasm,
compile_native: self.compile_native,
output_file: self.output_file.clone(),
benchmark: self.benchmark,
iterations: self.iterations,
run_task: self.run_task.clone(),
load_ny_plugins: self.load_ny_plugins,
}
}
}
impl Default for CliConfig {
fn default() -> Self {
Self {
file: None,
debug_fuel: Some(100000),
dump_ast: false,
dump_mir: false,
verify_mir: false,
mir_verbose: false,
mir_verbose_effects: false,
no_optimize: false,
backend: "interpreter".to_string(),
compile_wasm: false,
compile_native: false,
output_file: None,
benchmark: false,
iterations: 10,
vm_stats: false,
vm_stats_json: false,
jit_exec: false,
jit_stats: false,
jit_stats_json: false,
jit_dump: false,
jit_events: false,
jit_events_compile: false,
jit_events_runtime: false,
jit_events_path: None,
jit_threshold: None,
jit_phi_min: false,
jit_hostcall: false,
jit_handle_debug: false,
jit_native_f64: false,
jit_native_bool: false,
emit_cfg: None,
jit_only: false,
jit_direct: false,
cli_verbose: false,
run_task: None,
load_ny_plugins: false,
gc_mode: None,
parser_ny: false,
ny_parser_pipe: false,
json_file: None,
build_path: None,
build_app: None,
build_out: None,
build_aot: None,
build_profile: None,
build_target: None,
cli_usings: Vec::new(),
emit_mir_json: None,
emit_exe: None,
emit_exe_nyrt: None,
emit_exe_libs: None,
macro_expand_child: None,
dump_expanded_ast_json: false,
macro_ctx_json: None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_debug_fuel() {
assert_eq!(super::utils::parse_debug_fuel("unlimited"), None);
assert_eq!(super::utils::parse_debug_fuel("1000"), Some(1000));
assert_eq!(super::utils::parse_debug_fuel("invalid"), None);
}
#[test]
fn test_default_config() {
let config = CliConfig::default();
assert_eq!(config.backend, "interpreter");
assert_eq!(config.iterations, 10);
}
}

9
src/cli/utils.rs Normal file
View File

@ -0,0 +1,9 @@
/// Parse debug fuel value ("unlimited" or numeric)
pub fn parse_debug_fuel(value: &str) -> Option<usize> {
if value == "unlimited" {
None
} else {
value.parse::<usize>().ok()
}
}

View File

@ -51,10 +51,23 @@ def op_call(owner, fn, inst: Dict[str, Any], regs: Dict[int, Any]) -> Any:
owner._dbg(f"[pyvm] call -> {fname} args={call_args}")
result = owner._exec_function(callee, call_args)
else:
# Heuristic resolution: match suffix ".name/arity"
# Heuristic resolution: match suffix ".name/arity"; prefer current box context on ties
arity = len(call_args)
suffix = f".{fname}/{arity}"
candidates = [k for k in owner.functions.keys() if k.endswith(suffix)]
if len(candidates) > 1:
# Prefer the current box if available (MiniVm.* when inside MiniVm.*)
try:
cur_box = fn.name.split(".")[0] if "." in fn.name else ""
except Exception:
cur_box = ""
if cur_box:
scoped = [k for k in candidates if k.startswith(cur_box + ".")]
if len(scoped) == 1:
candidates = scoped
# Still multiple: pick the lexicographically first for determinism
if len(candidates) > 1:
candidates = [sorted(candidates)[0]]
if len(candidates) == 1:
callee = owner.functions[candidates[0]]
owner._dbg(f"[pyvm] call -> {candidates[0]} args={call_args}")

View File

@ -234,8 +234,28 @@ impl<'a> LoopBuilder<'a> {
// 以前は body_id に保存していたが、複数ブロックのボディや continue 混在時に不正確になるため
// 実際の latch_id に対してスナップショットを紐づける
self.block_var_maps.insert(latch_id, latch_snapshot);
self.emit_jump(header_id)?;
let _ = crate::mir::builder::loops::add_predecessor(self.parent_builder, header_id, latch_id);
// Only jump back to header if the latch block is not already terminated
{
let need_jump = {
if let Some(ref fun_ro) = self.parent_builder.current_function {
if let Some(bb) = fun_ro.get_block(latch_id) {
!bb.is_terminated()
} else {
true
}
} else {
true
}
};
if need_jump {
self.emit_jump(header_id)?;
let _ = crate::mir::builder::loops::add_predecessor(
self.parent_builder,
header_id,
latch_id,
);
}
}
// 9. Headerブロックをシール全predecessors確定
self.seal_block(header_id, latch_id)?;

View File

@ -21,6 +21,7 @@ pub mod optimizer_passes; // optimizer passes (normalize/diagnostics)
pub mod optimizer_stats; // extracted stats struct
pub mod passes;
pub mod printer;
mod printer_helpers; // internal helpers extracted from printer.rs
pub mod hints; // scaffold: zero-cost guidance (no-op)
pub mod slot_registry; // Phase 9.79b.1: method slot resolution (IDs)
pub mod value_id;

View File

@ -5,6 +5,7 @@
*/
use super::{BasicBlock, MirFunction, MirInstruction, MirModule, MirType, ValueId};
use super::printer_helpers;
use crate::debug::log as dlog;
use std::collections::HashMap;
use std::fmt::Write;
@ -338,11 +339,7 @@ impl MirPrinter {
}
fn format_dst(&self, dst: &ValueId, types: &HashMap<ValueId, MirType>) -> String {
if let Some(ty) = types.get(dst) {
format!("{}: {:?} =", dst, ty)
} else {
format!("{} =", dst)
}
printer_helpers::format_dst(dst, types)
}
/// Format a single instruction
@ -351,405 +348,11 @@ impl MirPrinter {
instruction: &MirInstruction,
types: &HashMap<ValueId, MirType>,
) -> String {
match instruction {
MirInstruction::Const { dst, value } => {
format!("{} const {}", self.format_dst(dst, types), value)
}
MirInstruction::BinOp { dst, op, lhs, rhs } => {
format!("{} {} {:?} {}", self.format_dst(dst, types), lhs, op, rhs)
}
MirInstruction::UnaryOp { dst, op, operand } => {
format!("{} {:?} {}", self.format_dst(dst, types), op, operand)
}
MirInstruction::Compare { dst, op, lhs, rhs } => {
format!(
"{} icmp {:?} {}, {}",
self.format_dst(dst, types),
op,
lhs,
rhs
)
}
MirInstruction::Load { dst, ptr } => {
format!("{} load {}", self.format_dst(dst, types), ptr)
}
MirInstruction::Store { value, ptr } => {
format!("store {} -> {}", value, ptr)
}
MirInstruction::Call {
dst,
func,
args,
effects: _,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
if let Some(dst) = dst {
format!(
"{} call {}({})",
self.format_dst(dst, types),
func,
args_str
)
} else {
format!("call {}({})", func, args_str)
}
}
MirInstruction::FunctionNew {
dst,
params,
body,
captures,
me,
} => {
let p = params.join(", ");
let c = captures
.iter()
.map(|(n, v)| format!("{}={}", n, v))
.collect::<Vec<_>>()
.join(", ");
let me_s = me.map(|m| format!(" me={}", m)).unwrap_or_default();
let cap_s = if c.is_empty() {
String::new()
} else {
format!(" [{}]", c)
};
format!(
"{} function_new ({}) {{...{}}}{}{}",
self.format_dst(dst, types),
p,
body.len(),
cap_s,
me_s
)
}
MirInstruction::BoxCall {
dst,
box_val,
method,
method_id,
args,
effects: _,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
let id_suffix = method_id.map(|id| format!("[#{}]", id)).unwrap_or_default();
if let Some(dst) = dst {
format!(
"{} call {}.{}{}({})",
self.format_dst(dst, types),
box_val,
method,
id_suffix,
args_str
)
} else {
format!("call {}.{}{}({})", box_val, method, id_suffix, args_str)
}
}
MirInstruction::PluginInvoke {
dst,
box_val,
method,
args,
effects: _,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
if let Some(dst) = dst {
format!(
"{} plugin_invoke {}.{}({})",
self.format_dst(dst, types),
box_val,
method,
args_str
)
} else {
format!("plugin_invoke {}.{}({})", box_val, method, args_str)
}
}
MirInstruction::Branch {
condition,
then_bb,
else_bb,
} => {
format!("br {}, label {}, label {}", condition, then_bb, else_bb)
}
MirInstruction::Jump { target } => {
format!("br label {}", target)
}
MirInstruction::Return { value } => {
if let Some(value) = value {
format!("ret {}", value)
} else {
"ret void".to_string()
}
}
MirInstruction::Phi { dst, inputs } => {
let inputs_str = inputs
.iter()
.map(|(bb, val)| format!("[{}, {}]", val, bb))
.collect::<Vec<_>>()
.join(", ");
format!("{} phi {}", self.format_dst(dst, types), inputs_str)
}
MirInstruction::NewBox {
dst,
box_type,
args,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
format!(
"{} new {}({})",
self.format_dst(dst, types),
box_type,
args_str
)
}
// Legacy -> Unified print: TypeCheck as TypeOp(check)
MirInstruction::TypeCheck {
dst,
value,
expected_type,
} => {
// Print using unified TypeOp style to avoid naming divergence
format!(
"{} typeop check {} {}",
self.format_dst(dst, types),
value,
expected_type
)
}
MirInstruction::Cast {
dst,
value,
target_type,
} => {
format!(
"{} cast {} to {:?}",
self.format_dst(dst, types),
value,
target_type
)
}
MirInstruction::TypeOp { dst, op, value, ty } => {
let op_str = match op {
super::TypeOpKind::Check => "check",
super::TypeOpKind::Cast => "cast",
};
format!(
"{} typeop {} {} {:?}",
self.format_dst(dst, types),
op_str,
value,
ty
)
}
MirInstruction::ArrayGet { dst, array, index } => {
format!("{} {}[{}]", self.format_dst(dst, types), array, index)
}
MirInstruction::ArraySet {
array,
index,
value,
} => {
format!("{}[{}] = {}", array, index, value)
}
MirInstruction::Copy { dst, src } => {
format!("{} copy {}", self.format_dst(dst, types), src)
}
MirInstruction::Debug { value, message } => {
format!("debug {} \"{}\"", value, message)
}
MirInstruction::Print { value, effects: _ } => {
format!("print {}", value)
}
MirInstruction::Nop => "nop".to_string(),
// Phase 5: Control flow & exception handling
MirInstruction::Throw {
exception,
effects: _,
} => {
format!("throw {}", exception)
}
MirInstruction::Catch {
exception_type,
exception_value,
handler_bb,
} => {
if let Some(ref exc_type) = exception_type {
format!("catch {} {} -> {}", exc_type, exception_value, handler_bb)
} else {
format!("catch * {} -> {}", exception_value, handler_bb)
}
}
MirInstruction::Safepoint => "safepoint".to_string(),
// Phase 6: Box reference operations
MirInstruction::RefNew { dst, box_val } => {
format!("{} ref_new {}", self.format_dst(dst, types), box_val)
}
MirInstruction::RefGet {
dst,
reference,
field,
} => {
format!(
"{} ref_get {}.{}",
self.format_dst(dst, types),
reference,
field
)
}
MirInstruction::RefSet {
reference,
field,
value,
} => {
format!("ref_set {}.{} = {}", reference, field, value)
}
// Legacy -> Unified print: WeakNew as weakref new
MirInstruction::WeakNew { dst, box_val } => {
format!("{} weakref new {}", self.format_dst(dst, types), box_val)
}
// Legacy -> Unified print: WeakLoad as weakref load
MirInstruction::WeakLoad { dst, weak_ref } => {
format!("{} weakref load {}", self.format_dst(dst, types), weak_ref)
}
// Legacy -> Unified print: BarrierRead as barrier read
MirInstruction::BarrierRead { ptr } => {
format!("barrier read {}", ptr)
}
// Legacy -> Unified print: BarrierWrite as barrier write
MirInstruction::BarrierWrite { ptr } => {
format!("barrier write {}", ptr)
}
MirInstruction::WeakRef { dst, op, value } => {
let op_str = match op {
super::WeakRefOp::New => "new",
super::WeakRefOp::Load => "load",
};
format!(
"{} weakref {} {}",
self.format_dst(dst, types),
op_str,
value
)
}
MirInstruction::Barrier { op, ptr } => {
let op_str = match op {
super::BarrierOp::Read => "read",
super::BarrierOp::Write => "write",
};
format!("barrier {} {}", op_str, ptr)
}
// Phase 7: Async/Future Operations
MirInstruction::FutureNew { dst, value } => {
format!("{} future_new {}", self.format_dst(dst, types), value)
}
MirInstruction::FutureSet { future, value } => {
format!("future_set {} = {}", future, value)
}
MirInstruction::Await { dst, future } => {
format!("{} await {}", self.format_dst(dst, types), future)
}
// Phase 9.7: External Function Calls
MirInstruction::ExternCall {
dst,
iface_name,
method_name,
args,
effects,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
if let Some(dst) = dst {
format!(
"{} extern_call {}.{}({}) [effects: {}]",
self.format_dst(dst, types),
iface_name,
method_name,
args_str,
effects
)
} else {
format!(
"extern_call {}.{}({}) [effects: {}]",
iface_name, method_name, args_str, effects
)
}
}
}
// Delegate to helpers to keep this file lean
printer_helpers::format_instruction(instruction, types)
}
/// Format a MIR type
fn format_type(&self, mir_type: &super::MirType) -> String {
match mir_type {
super::MirType::Integer => "i64".to_string(),
super::MirType::Float => "f64".to_string(),
super::MirType::Bool => "i1".to_string(),
super::MirType::String => "str".to_string(),
super::MirType::Box(name) => format!("box<{}>", name),
super::MirType::Array(elem_type) => format!("[{}]", self.format_type(elem_type)),
super::MirType::Future(inner_type) => {
format!("future<{}>", self.format_type(inner_type))
}
super::MirType::Void => "void".to_string(),
super::MirType::Unknown => "?".to_string(),
}
printer_helpers::format_type(mir_type)
}
}

367
src/mir/printer_helpers.rs Normal file
View File

@ -0,0 +1,367 @@
use super::{MirInstruction, MirType, ValueId};
use std::collections::HashMap;
pub fn format_type(mir_type: &MirType) -> String {
match mir_type {
MirType::Integer => "i64".to_string(),
MirType::Float => "f64".to_string(),
MirType::Bool => "i1".to_string(),
MirType::String => "str".to_string(),
MirType::Box(name) => format!("box<{}>", name),
MirType::Array(elem_type) => format!("[{}]", format_type(elem_type)),
MirType::Future(inner_type) => {
format!("future<{}>", format_type(inner_type))
}
MirType::Void => "void".to_string(),
MirType::Unknown => "?".to_string(),
}
}
pub fn format_dst(dst: &ValueId, types: &HashMap<ValueId, MirType>) -> String {
if let Some(ty) = types.get(dst) {
format!("{}: {:?} =", dst, ty)
} else {
format!("{} =", dst)
}
}
pub fn format_instruction(
instruction: &MirInstruction,
types: &HashMap<ValueId, MirType>,
) -> String {
match instruction {
MirInstruction::Const { dst, value } => {
format!("{} const {}", format_dst(dst, types), value)
}
MirInstruction::BinOp { dst, op, lhs, rhs } => {
format!("{} {} {:?} {}", format_dst(dst, types), lhs, op, rhs)
}
MirInstruction::UnaryOp { dst, op, operand } => {
format!("{} {:?} {}", format_dst(dst, types), op, operand)
}
MirInstruction::Compare { dst, op, lhs, rhs } => {
format!(
"{} icmp {:?} {}, {}",
format_dst(dst, types),
op,
lhs,
rhs
)
}
MirInstruction::Load { dst, ptr } => {
format!("{} load {}", format_dst(dst, types), ptr)
}
MirInstruction::Store { value, ptr } => {
format!("store {} -> {}", value, ptr)
}
MirInstruction::Call {
dst,
func,
args,
effects: _,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
if let Some(dst) = dst {
format!(
"{} call {}({})",
format_dst(dst, types),
func,
args_str
)
} else {
format!("call {}({})", func, args_str)
}
}
MirInstruction::FunctionNew {
dst,
params,
body,
captures,
me,
} => {
let p = params.join(", ");
let c = captures
.iter()
.map(|(n, v)| format!("{}={}", n, v))
.collect::<Vec<_>>()
.join(", ");
let me_s = me.map(|m| format!(" me={}", m)).unwrap_or_default();
let cap_s = if c.is_empty() { String::new() } else { format!(" [{}]", c) };
format!(
"{} function_new ({}) {{...{}}}{}{}",
format_dst(dst, types),
p,
body.len(),
cap_s,
me_s
)
}
MirInstruction::BoxCall {
dst,
box_val,
method,
method_id,
args,
effects: _,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
let id_suffix = method_id.map(|id| format!("[#{}]", id)).unwrap_or_default();
if let Some(dst) = dst {
format!(
"{} call {}.{}{}({})",
format_dst(dst, types),
box_val,
method,
id_suffix,
args_str
)
} else {
format!("call {}.{}{}({})", box_val, method, id_suffix, args_str)
}
}
MirInstruction::PluginInvoke {
dst,
box_val,
method,
args,
effects: _,
} => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
if let Some(dst) = dst {
format!(
"{} plugin_invoke {}.{}({})",
format_dst(dst, types),
box_val,
method,
args_str
)
} else {
format!("plugin_invoke {}.{}({})", box_val, method, args_str)
}
}
MirInstruction::Branch {
condition,
then_bb,
else_bb,
} => {
format!("br {}, label {}, label {}", condition, then_bb, else_bb)
}
MirInstruction::Jump { target } => {
format!("br label {}", target)
}
MirInstruction::Return { value } => {
if let Some(value) = value {
format!("ret {}", value)
} else {
"ret void".to_string()
}
}
MirInstruction::Phi { dst, inputs } => {
let inputs_str = inputs
.iter()
.map(|(bb, val)| format!("[{}, {}]", val, bb))
.collect::<Vec<_>>()
.join(", ");
format!("{} phi {}", format_dst(dst, types), inputs_str)
}
MirInstruction::NewBox { dst, box_type, args } => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
format!(
"{} new {}({})",
format_dst(dst, types),
box_type,
args_str
)
}
// Legacy -> Unified print: TypeCheck as TypeOp(check)
MirInstruction::TypeCheck { dst, value, expected_type } => {
format!(
"{} typeop check {} {}",
format_dst(dst, types),
value,
expected_type
)
}
MirInstruction::Cast { dst, value, target_type } => {
format!(
"{} cast {} to {:?}",
format_dst(dst, types),
value,
target_type
)
}
MirInstruction::TypeOp { dst, op, value, ty } => {
let op_str = match op {
super::TypeOpKind::Check => "check",
super::TypeOpKind::Cast => "cast",
};
format!(
"{} typeop {} {} {:?}",
format_dst(dst, types),
op_str,
value,
ty
)
}
MirInstruction::ArrayGet { dst, array, index } => {
format!("{} {}[{}]", format_dst(dst, types), array, index)
}
MirInstruction::ArraySet { array, index, value } => {
format!("{}[{}] = {}", array, index, value)
}
MirInstruction::Copy { dst, src } => {
format!("{} copy {}", format_dst(dst, types), src)
}
MirInstruction::Debug { value, message } => {
format!("debug {} \"{}\"", value, message)
}
MirInstruction::Print { value, effects: _ } => {
format!("print {}", value)
}
MirInstruction::Nop => "nop".to_string(),
// Phase 5: Control flow & exception handling
MirInstruction::Throw { exception, effects: _ } => {
format!("throw {}", exception)
}
MirInstruction::Catch { exception_type, exception_value, handler_bb } => {
if let Some(ref exc_type) = exception_type {
format!("catch {} {} -> {}", exc_type, exception_value, handler_bb)
} else {
format!("catch * {} -> {}", exception_value, handler_bb)
}
}
MirInstruction::Safepoint => "safepoint".to_string(),
// Phase 6: Box reference operations
MirInstruction::RefNew { dst, box_val } => {
format!("{} ref_new {}", format_dst(dst, types), box_val)
}
MirInstruction::RefGet { dst, reference, field } => {
format!(
"{} ref_get {}.{}",
format_dst(dst, types),
reference,
field
)
}
MirInstruction::RefSet { reference, field, value } => {
format!("ref_set {}.{} = {}", reference, field, value)
}
// Legacy -> Unified print: WeakNew/WeakLoad/BarrierRead/BarrierWrite
MirInstruction::WeakNew { dst, box_val } => {
format!("{} weakref new {}", format_dst(dst, types), box_val)
}
MirInstruction::WeakLoad { dst, weak_ref } => {
format!("{} weakref load {}", format_dst(dst, types), weak_ref)
}
MirInstruction::BarrierRead { ptr } => {
format!("barrier read {}", ptr)
}
MirInstruction::BarrierWrite { ptr } => {
format!("barrier write {}", ptr)
}
// Phase 6: WeakRef/Barrier unified
MirInstruction::WeakRef { dst, op, value } => {
let op_str = match op {
super::WeakRefOp::New => "new",
super::WeakRefOp::Load => "load",
};
format!(
"{} weakref {} {}",
format_dst(dst, types),
op_str,
value
)
}
MirInstruction::Barrier { op, ptr } => {
let op_str = match op {
super::BarrierOp::Read => "read",
super::BarrierOp::Write => "write",
};
format!("barrier {} {}", op_str, ptr)
}
// Phase 7: Async/Future Operations
MirInstruction::FutureNew { dst, value } => {
format!("{} future_new {}", format_dst(dst, types), value)
}
MirInstruction::FutureSet { future, value } => {
format!("future_set {} = {}", future, value)
}
MirInstruction::Await { dst, future } => {
format!("{} await {}", format_dst(dst, types), future)
}
// Phase 9.7: External Function Calls
MirInstruction::ExternCall { dst, iface_name, method_name, args, effects } => {
let args_str = args
.iter()
.map(|v| format!("{}", v))
.collect::<Vec<_>>()
.join(", ");
if let Some(dst) = dst {
format!(
"{} extern_call {}.{}({}) [effects: {}]",
format_dst(dst, types),
iface_name,
method_name,
args_str,
effects
)
} else {
format!(
"extern_call {}.{}({}) [effects: {}]",
iface_name, method_name, args_str, effects
)
}
}
}
}

View File

@ -8,10 +8,13 @@ use super::{BasicBlockId, MirFunction, MirModule, ValueId};
use crate::debug::log as dlog;
use crate::mir::verification_types::VerificationError;
use std::collections::HashMap;
mod cfg;
mod dom;
mod awaits;
mod barrier;
mod legacy;
mod utils;
mod ssa;
// VerificationError moved to crate::mir::verification_types
@ -286,186 +289,23 @@ impl MirVerifier {
/// Verify SSA form properties
fn verify_ssa_form(&self, function: &MirFunction) -> Result<(), Vec<VerificationError>> {
// Allow non-SSA (edge-copy) mode for PHI-less MIR when enabled via env
if crate::config::env::verify_allow_no_phi() {
return Ok(());
}
let mut errors = Vec::new();
let mut definitions = HashMap::new();
// Check that each value is defined exactly once
for (block_id, block) in &function.blocks {
for (inst_idx, instruction) in block.all_instructions().enumerate() {
if let Some(dst) = instruction.dst_value() {
if let Some((first_block, _)) = definitions.insert(dst, (*block_id, inst_idx)) {
errors.push(VerificationError::MultipleDefinition {
value: dst,
first_block,
second_block: *block_id,
});
}
}
}
}
// Check that all used values are defined
for (block_id, block) in &function.blocks {
for (inst_idx, instruction) in block.all_instructions().enumerate() {
for used_value in instruction.used_values() {
if !definitions.contains_key(&used_value) {
errors.push(VerificationError::UndefinedValue {
value: used_value,
block: *block_id,
instruction_index: inst_idx,
});
}
}
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
ssa::check_ssa_form(function)
}
/// Verify dominance relations (def must dominate use across blocks)
fn verify_dominance(&self, function: &MirFunction) -> Result<(), Vec<VerificationError>> {
// Allow non-SSA (edge-copy) mode for PHI-less MIR when enabled via env
if crate::config::env::verify_allow_no_phi() {
return Ok(());
}
let mut errors = Vec::new();
// Build def -> block map and dominators
let def_block = utils::compute_def_blocks(function);
let dominators = utils::compute_dominators(function);
for (use_block_id, block) in &function.blocks {
for instruction in block.all_instructions() {
// Phi inputs are special: they are defined in predecessors; skip dominance check for them
if let super::MirInstruction::Phi { .. } = instruction {
continue;
}
for used_value in instruction.used_values() {
if let Some(&def_bb) = def_block.get(&used_value) {
if def_bb != *use_block_id {
let doms = dominators.get(use_block_id).unwrap();
if !doms.contains(&def_bb) {
errors.push(VerificationError::DominatorViolation {
value: used_value,
use_block: *use_block_id,
def_block: def_bb,
});
}
}
}
}
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
dom::check_dominance(function)
}
/// Verify control flow graph integrity
fn verify_control_flow(&self, function: &MirFunction) -> Result<(), Vec<VerificationError>> {
let mut errors = Vec::new();
// Check that all referenced blocks exist
for (block_id, block) in &function.blocks {
for successor in &block.successors {
if !function.blocks.contains_key(successor) {
errors.push(VerificationError::ControlFlowError {
block: *block_id,
reason: format!("References non-existent block {}", successor),
});
}
}
}
// Check that all blocks are reachable from entry
let reachable = utils::compute_reachable_blocks(function);
for block_id in function.blocks.keys() {
if !reachable.contains(block_id) && *block_id != function.entry_block {
errors.push(VerificationError::UnreachableBlock { block: *block_id });
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
cfg::check_control_flow(function)
}
/// Verify that blocks with multiple predecessors do not use predecessor-defined values directly.
/// In merge blocks, values coming from predecessors must be routed through Phi.
fn verify_merge_uses(&self, function: &MirFunction) -> Result<(), Vec<VerificationError>> {
// Allow non-SSA (edge-copy) mode for PHI-less MIR when enabled via env
if crate::config::env::verify_allow_no_phi() {
return Ok(());
}
let mut errors = Vec::new();
let preds = utils::compute_predecessors(function);
let def_block = utils::compute_def_blocks(function);
let dominators = utils::compute_dominators(function);
// Helper: collect phi dsts in a block
let mut phi_dsts_in_block: std::collections::HashMap<
BasicBlockId,
std::collections::HashSet<ValueId>,
> = std::collections::HashMap::new();
for (bid, block) in &function.blocks {
let set = phi_dsts_in_block.entry(*bid).or_default();
for inst in block.all_instructions() {
if let super::MirInstruction::Phi { dst, .. } = inst {
set.insert(*dst);
}
}
}
for (bid, block) in &function.blocks {
let Some(pred_list) = preds.get(bid) else {
continue;
};
if pred_list.len() < 2 {
continue;
}
let phi_dsts = phi_dsts_in_block.get(bid);
let doms_of_block = dominators.get(bid).unwrap();
// check instructions including terminator
for inst in block.all_instructions() {
// Skip Phi: its inputs are allowed to come from predecessors by SSA definition
if let super::MirInstruction::Phi { .. } = inst {
continue;
}
for used in inst.used_values() {
if let Some(&db) = def_block.get(&used) {
// If def doesn't dominate merge block, it must be routed via phi
if !doms_of_block.contains(&db) {
let is_phi_dst = phi_dsts.map(|s| s.contains(&used)).unwrap_or(false);
if !is_phi_dst {
errors.push(VerificationError::MergeUsesPredecessorValue {
value: used,
merge_block: *bid,
pred_block: db,
});
}
}
}
}
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
cfg::check_merge_uses(function)
}
/// Get all verification errors from the last run
@ -487,465 +327,7 @@ impl Default for MirVerifier {
}
}
impl std::fmt::Display for VerificationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
VerificationError::UndefinedValue {
value,
block,
instruction_index,
} => {
write!(
f,
"Undefined value {} used in block {} at instruction {}",
value, block, instruction_index
)
}
VerificationError::MultipleDefinition {
value,
first_block,
second_block,
} => {
write!(
f,
"Value {} defined multiple times: first in block {}, again in block {}",
value, first_block, second_block
)
}
VerificationError::InvalidPhi {
phi_value,
block,
reason,
} => {
write!(
f,
"Invalid phi function {} in block {}: {}",
phi_value, block, reason
)
}
VerificationError::UnreachableBlock { block } => {
write!(f, "Unreachable block {}", block)
}
VerificationError::ControlFlowError { block, reason } => {
write!(f, "Control flow error in block {}: {}", block, reason)
}
VerificationError::DominatorViolation {
value,
use_block,
def_block,
} => {
write!(
f,
"Value {} used in block {} but defined in non-dominating block {}",
value, use_block, def_block
)
}
VerificationError::MergeUsesPredecessorValue {
value,
merge_block,
pred_block,
} => {
write!(
f,
"Merge block {} uses predecessor-defined value {} from block {} without Phi",
merge_block, value, pred_block
)
}
VerificationError::InvalidWeakRefSource {
weak_ref,
block,
instruction_index,
reason,
} => {
write!(
f,
"Invalid WeakRef source {} in block {} at {}: {}",
weak_ref, block, instruction_index, reason
)
}
VerificationError::InvalidBarrierPointer {
ptr,
block,
instruction_index,
reason,
} => {
write!(
f,
"Invalid Barrier pointer {} in block {} at {}: {}",
ptr, block, instruction_index, reason
)
}
VerificationError::SuspiciousBarrierContext {
block,
instruction_index,
note,
} => {
write!(
f,
"Suspicious Barrier context in block {} at {}: {}",
block, instruction_index, note
)
}
VerificationError::UnsupportedLegacyInstruction {
block,
instruction_index,
name,
} => {
write!(
f,
"Unsupported legacy instruction '{}' in block {} at {} (enable rewrite passes)",
name, block, instruction_index
)
}
VerificationError::MissingCheckpointAroundAwait {
block,
instruction_index,
position,
} => {
write!(
f,
"Missing {} checkpoint around await in block {} at instruction {}",
position, block, instruction_index
)
}
VerificationError::EdgeCopyStrictViolation { block, value, pred_block, reason } => {
if let Some(pb) = pred_block {
write!(
f,
"EdgeCopyStrictViolation for value {} at merge block {} from pred {}: {}",
value, block, pb, reason
)
} else {
write!(
f,
"EdgeCopyStrictViolation for value {} at merge block {}: {}",
value, block, reason
)
}
}
}
}
}
// Display impl moved to verification_types.rs
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::{ASTNode, LiteralValue, Span};
use crate::mir::{
BasicBlock, EffectMask, FunctionSignature, MirBuilder, MirFunction, MirPrinter, MirType,
};
#[test]
fn test_valid_function_verification() {
let signature = FunctionSignature {
name: "test".to_string(),
params: vec![],
return_type: MirType::Void,
effects: EffectMask::PURE,
};
let entry_block = BasicBlockId::new(0);
let function = MirFunction::new(signature, entry_block);
let mut verifier = MirVerifier::new();
let result = verifier.verify_function(&function);
assert!(result.is_ok(), "Valid function should pass verification");
}
#[test]
fn test_undefined_value_detection() {
// This test would create a function with undefined value usage
// and verify that the verifier catches it
// Implementation details would depend on the specific test case
}
#[test]
fn test_if_merge_uses_phi_not_predecessor() {
// Program:
// if true { result = "A" } else { result = "B" }
// result
let ast = ASTNode::Program {
statements: vec![
ASTNode::If {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "result".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::Literal {
value: LiteralValue::String("A".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "result".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::Literal {
value: LiteralValue::String("B".to_string()),
span: Span::unknown(),
}),
span: Span::unknown(),
}]),
span: Span::unknown(),
},
ASTNode::Variable {
name: "result".to_string(),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let mut builder = MirBuilder::new();
let module = builder.build_module(ast).expect("build mir");
// Verify: should be OK (no MergeUsesPredecessorValue)
let mut verifier = MirVerifier::new();
let res = verifier.verify_module(&module);
if let Err(errs) = &res {
eprintln!("Verifier errors: {:?}", errs);
}
assert!(res.is_ok(), "MIR should pass merge-phi verification");
// Optional: ensure printer shows a phi in merge and ret returns a defined value
let mut printer = MirPrinter::verbose();
let mir_text = printer.print_module(&module);
assert!(
mir_text.contains("phi"),
"Printed MIR should contain a phi in merge block\n{}",
mir_text
);
}
#[test]
fn test_merge_use_before_phi_detected() {
use crate::mir::{ConstValue, MirInstruction};
// Construct a function with a bad merge use (no phi)
let signature = FunctionSignature {
name: "merge_bad".to_string(),
params: vec![],
return_type: MirType::String,
effects: EffectMask::PURE,
};
let entry = BasicBlockId::new(0);
let mut f = MirFunction::new(signature, entry);
let then_bb = BasicBlockId::new(1);
let else_bb = BasicBlockId::new(2);
let merge_bb = BasicBlockId::new(3);
let cond = f.next_value_id(); // %0
{
let b0 = f.get_block_mut(entry).unwrap();
b0.add_instruction(MirInstruction::Const {
dst: cond,
value: ConstValue::Bool(true),
});
b0.add_instruction(MirInstruction::Branch {
condition: cond,
then_bb,
else_bb,
});
}
let v1 = f.next_value_id(); // %1
let mut b1 = BasicBlock::new(then_bb);
b1.add_instruction(MirInstruction::Const {
dst: v1,
value: ConstValue::String("A".to_string()),
});
b1.add_instruction(MirInstruction::Jump { target: merge_bb });
f.add_block(b1);
let v2 = f.next_value_id(); // %2
let mut b2 = BasicBlock::new(else_bb);
b2.add_instruction(MirInstruction::Const {
dst: v2,
value: ConstValue::String("B".to_string()),
});
b2.add_instruction(MirInstruction::Jump { target: merge_bb });
f.add_block(b2);
let mut b3 = BasicBlock::new(merge_bb);
// Illegal: directly use v1 from predecessor
b3.add_instruction(MirInstruction::Return { value: Some(v1) });
f.add_block(b3);
f.update_cfg();
let mut verifier = MirVerifier::new();
let res = verifier.verify_function(&f);
assert!(
res.is_err(),
"Verifier should error on merge use without phi"
);
let errs = res.err().unwrap();
assert!(
errs.iter().any(|e| matches!(
e,
VerificationError::MergeUsesPredecessorValue { .. }
| VerificationError::DominatorViolation { .. }
)),
"Expected merge/dominator error, got: {:?}",
errs
);
}
#[test]
fn test_loop_phi_normalization() {
// Program:
// local i = 0
// loop(false) { i = 1 }
// i
let ast = ASTNode::Program {
statements: vec![
ASTNode::Local {
variables: vec!["i".to_string()],
initial_values: vec![Some(Box::new(ASTNode::Literal {
value: LiteralValue::Integer(0),
span: Span::unknown(),
}))],
span: Span::unknown(),
},
ASTNode::Loop {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(false),
span: Span::unknown(),
}),
body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
span: Span::unknown(),
},
ASTNode::Variable {
name: "i".to_string(),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let mut builder = MirBuilder::new();
let module = builder.build_module(ast).expect("build mir");
// Verify SSA/dominance: should pass
let mut verifier = MirVerifier::new();
let res = verifier.verify_module(&module);
if let Err(errs) = &res {
eprintln!("Verifier errors: {:?}", errs);
}
assert!(
res.is_ok(),
"MIR loop with phi normalization should pass verification"
);
// Ensure phi is printed (header phi for variable i)
let printer = MirPrinter::verbose();
let mir_text = printer.print_module(&module);
assert!(
mir_text.contains("phi"),
"Printed MIR should contain a phi for loop header\n{}",
mir_text
);
}
#[test]
fn test_loop_nested_if_phi() {
// Program:
// local x = 0
// loop(false) { if true { x = 1 } else { x = 2 } }
// x
let ast = ASTNode::Program {
statements: vec![
ASTNode::Local {
variables: vec!["x".to_string()],
initial_values: vec![Some(Box::new(ASTNode::Literal {
value: LiteralValue::Integer(0),
span: Span::unknown(),
}))],
span: Span::unknown(),
},
ASTNode::Loop {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(false),
span: Span::unknown(),
}),
body: vec![ASTNode::If {
condition: Box::new(ASTNode::Literal {
value: LiteralValue::Bool(true),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "x".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}],
else_body: Some(vec![ASTNode::Assignment {
target: Box::new(ASTNode::Variable {
name: "x".to_string(),
span: Span::unknown(),
}),
value: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(2),
span: Span::unknown(),
}),
span: Span::unknown(),
}]),
span: Span::unknown(),
}],
span: Span::unknown(),
},
ASTNode::Variable {
name: "x".to_string(),
span: Span::unknown(),
},
],
span: Span::unknown(),
};
let mut builder = MirBuilder::new();
let module = builder.build_module(ast).expect("build mir");
let mut verifier = MirVerifier::new();
let res = verifier.verify_module(&module);
if let Err(errs) = &res {
eprintln!("Verifier errors: {:?}", errs);
}
assert!(
res.is_ok(),
"Nested if in loop should pass verification with proper phis"
);
let printer = MirPrinter::verbose();
let mir_text = printer.print_module(&module);
assert!(
mir_text.contains("phi"),
"Printed MIR should contain phi nodes for nested if/loop\n{}",
mir_text
);
}
}
mod tests {}

View File

@ -74,3 +74,143 @@ pub enum VerificationError {
reason: String,
},
}
impl std::fmt::Display for VerificationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
VerificationError::UndefinedValue {
value,
block,
instruction_index,
} => {
write!(
f,
"Undefined value {} used in block {} at instruction {}",
value, block, instruction_index
)
}
VerificationError::MultipleDefinition {
value,
first_block,
second_block,
} => {
write!(
f,
"Value {} defined multiple times: first in block {}, again in block {}",
value, first_block, second_block
)
}
VerificationError::InvalidPhi {
phi_value,
block,
reason,
} => {
write!(
f,
"Invalid phi function {} in block {}: {}",
phi_value, block, reason
)
}
VerificationError::UnreachableBlock { block } => {
write!(f, "Unreachable block {}", block)
}
VerificationError::ControlFlowError { block, reason } => {
write!(f, "Control flow error in block {}: {}", block, reason)
}
VerificationError::DominatorViolation {
value,
use_block,
def_block,
} => {
write!(
f,
"Value {} used in block {} but defined in non-dominating block {}",
value, use_block, def_block
)
}
VerificationError::MergeUsesPredecessorValue {
value,
merge_block,
pred_block,
} => {
write!(
f,
"Merge block {} uses predecessor-defined value {} from block {} without Phi",
merge_block, value, pred_block
)
}
VerificationError::InvalidWeakRefSource {
weak_ref,
block,
instruction_index,
reason,
} => {
write!(
f,
"Invalid WeakRef source {} in block {} at {}: {}",
weak_ref, block, instruction_index, reason
)
}
VerificationError::InvalidBarrierPointer {
ptr,
block,
instruction_index,
reason,
} => {
write!(
f,
"Invalid Barrier pointer {} in block {} at {}: {}",
ptr, block, instruction_index, reason
)
}
VerificationError::SuspiciousBarrierContext {
block,
instruction_index,
note,
} => {
write!(
f,
"Suspicious Barrier context in block {} at {}: {}",
block, instruction_index, note
)
}
VerificationError::UnsupportedLegacyInstruction {
block,
instruction_index,
name,
} => {
write!(
f,
"Unsupported legacy instruction '{}' in block {} at {} (enable rewrite passes)",
name, block, instruction_index
)
}
VerificationError::MissingCheckpointAroundAwait {
block,
instruction_index,
position,
} => {
write!(
f,
"Missing {} checkpoint around await in block {} at instruction {}",
position, block, instruction_index
)
}
VerificationError::EdgeCopyStrictViolation { block, value, pred_block, reason } => {
if let Some(pb) = pred_block {
write!(
f,
"EdgeCopyStrictViolation for value {} at merge block {} from pred {}: {}",
value, block, pb, reason
)
} else {
write!(
f,
"EdgeCopyStrictViolation for value {} at merge block {}: {}",
value, block, reason
)
}
}
}
}
}

View File

@ -176,12 +176,39 @@ impl NyashParser {
break;
}
// 🔥 static { } ブロックの処理
// 🔥 static 初期化子の処理
// Gate: NYASH_PARSER_STATIC_INIT_STRICT=1 のとき、
// - 直後が '{' の場合のみ static 初期化子として扱う
// - 直後が 'box' or 'function' の場合は、トップレベル宣言の開始とみなし、この box 本体を閉じる
// 既定ゲートOFFは従来挙動常に static { ... } を期待)
if self.match_token(&TokenType::STATIC) {
self.advance(); // consume 'static'
let static_body = self.parse_block_statements()?;
static_init = Some(static_body);
continue;
let strict = std::env::var("NYASH_PARSER_STATIC_INIT_STRICT").ok().as_deref() == Some("1");
if strict {
match self.peek_token() {
TokenType::LBRACE => {
self.advance(); // consume 'static'
let static_body = self.parse_block_statements()?;
static_init = Some(static_body);
continue;
}
TokenType::BOX | TokenType::FUNCTION => {
// トップレベルの `static box|function` が続くシーム: ここで box を閉じる
break;
}
_ => {
// 不明な形は従来通り initializer として解釈(互換重視)
self.advance();
let static_body = self.parse_block_statements()?;
static_init = Some(static_body);
continue;
}
}
} else {
self.advance(); // consume 'static'
let static_body = self.parse_block_statements()?;
static_init = Some(static_body);
continue;
}
}
// initブロックの処理

View File

@ -175,7 +175,50 @@ impl NyashParser {
input: impl Into<String>,
fuel: Option<usize>,
) -> Result<ASTNode, ParseError> {
let mut tokenizer = crate::tokenizer::NyashTokenizer::new(input);
// Normalize logical operators '||'/'&&' to 'or'/'and' before tokenization (outside strings/comments)
fn normalize_logical_ops(src: &str) -> String {
let mut out = String::with_capacity(src.len());
let mut it = src.chars().peekable();
let mut in_str = false;
let mut in_line = false;
let mut in_block = false;
while let Some(c) = it.next() {
if in_line {
out.push(c);
if c == '\n' { in_line = false; }
continue;
}
if in_block {
out.push(c);
if c == '*' && matches!(it.peek(), Some('/')) { out.push('/'); it.next(); in_block = false; }
continue;
}
if in_str {
out.push(c);
if c == '\\' { if let Some(nc) = it.next() { out.push(nc); } continue; }
if c == '"' { in_str = false; }
continue;
}
match c {
'"' => { in_str = true; out.push(c); }
'/' => {
match it.peek() { Some('/') => { out.push('/'); out.push('/'); it.next(); in_line = true; }, Some('*') => { out.push('/'); out.push('*'); it.next(); in_block = true; }, _ => out.push('/') }
}
'#' => { in_line = true; out.push('#'); }
'|' => {
if matches!(it.peek(), Some('|')) { out.push_str(" or "); it.next(); } else if matches!(it.peek(), Some('>')) { out.push('|'); out.push('>'); it.next(); } else { out.push('|'); }
}
'&' => {
if matches!(it.peek(), Some('&')) { out.push_str(" and "); it.next(); } else { out.push('&'); }
}
_ => out.push(c),
}
}
out
}
let input_s: String = input.into();
let pre = normalize_logical_ops(&input_s);
let mut tokenizer = crate::tokenizer::NyashTokenizer::new(pre);
let tokens = tokenizer.tokenize()?;
let mut parser = Self::new(tokens);

152
src/runner/jit_direct.rs Normal file
View File

@ -0,0 +1,152 @@
#![cfg(feature = "jit-direct-only")]
use super::*;
impl NyashRunner {
/// Run a file through independent JIT engine (no VM execute loop)
pub(crate) fn run_file_jit_direct(&self, filename: &str) {
use nyash_rust::{mir::MirCompiler, parser::NyashParser};
use std::fs;
let emit_err = |phase: &str, code: &str, msg: &str| {
if std::env::var("NYASH_JIT_STATS_JSON").ok().as_deref() == Some("1")
|| std::env::var("NYASH_JIT_ERROR_JSON").ok().as_deref() == Some("1")
{
let payload = serde_json::json!({
"kind": "jit_direct_error",
"phase": phase,
"code": code,
"message": msg,
"file": filename,
});
println!("{}", payload.to_string());
} else {
eprintln!("[JIT-direct][{}][{}] {}", phase, code, msg);
}
};
let code = match fs::read_to_string(filename) {
Ok(s) => s,
Err(e) => { emit_err("read_file", "IO", &format!("{}", e)); std::process::exit(1); }
};
let ast = match NyashParser::parse_from_string(&code) {
Ok(a) => a,
Err(e) => { emit_err("parse", "SYNTAX", &format!("{}", e)); std::process::exit(1); }
};
let mut mc = MirCompiler::new();
let cr = match mc.compile(ast) {
Ok(m) => m,
Err(e) => { emit_err("mir", "MIR_COMPILE", &format!("{}", e)); std::process::exit(1); }
};
let func = match cr.module.functions.get("main") {
Some(f) => f,
None => { emit_err("mir", "NO_MAIN", "No main function found"); std::process::exit(1); }
};
// Refuse write-effects in jit-direct when policy.read_only
{
use nyash_rust::mir::effect::Effect;
let policy = nyash_rust::jit::policy::current();
let mut writes = 0usize;
for (_bbid, bb) in func.blocks.iter() {
for inst in bb.instructions.iter() {
let mask = inst.effects();
if mask.contains(Effect::WriteHeap) { writes += 1; }
}
if let Some(term) = &bb.terminator {
if term.effects().contains(Effect::WriteHeap) { writes += 1; }
}
}
if policy.read_only && writes > 0 {
emit_err("policy","WRITE_EFFECTS", &format!("write-effects detected ({} ops). jit-direct is read-only at this stage.", writes));
std::process::exit(1);
}
}
// PHI-min config for jit-direct
{
let mut cfg = nyash_rust::jit::config::current();
cfg.phi_min = true;
nyash_rust::jit::config::set_current(cfg);
}
// minimal runtime hooks
{
let rt = nyash_rust::runtime::NyashRuntime::new();
nyash_rust::runtime::global_hooks::set_from_runtime(&rt);
}
let mut engine = nyash_rust::jit::engine::JitEngine::new();
match engine.compile_function("main", func) {
Some(h) => {
nyash_rust::jit::events::emit("compile", &func.signature.name, Some(h), None, serde_json::json!({}));
// parse NYASH_JIT_ARGS
let mut jit_args: Vec<nyash_rust::jit::abi::JitValue> = Vec::new();
if let Ok(s) = std::env::var("NYASH_JIT_ARGS") { for raw in s.split(',') { let t = raw.trim(); if t.is_empty() { continue; } let v = if let Some(rest) = t.strip_prefix("i:") { rest.parse::<i64>().ok().map(nyash_rust::jit::abi::JitValue::I64) } else if let Some(rest) = t.strip_prefix("f:") { rest.parse::<f64>().ok().map(nyash_rust::jit::abi::JitValue::F64) } else if let Some(rest) = t.strip_prefix("b:") { let b = matches!(rest, "1"|"true"|"True"|"TRUE"); Some(nyash_rust::jit::abi::JitValue::Bool(b)) } else if let Some(rest) = t.strip_prefix("h:") { rest.parse::<u64>().ok().map(nyash_rust::jit::abi::JitValue::Handle) } else if t.eq_ignore_ascii_case("true") || t == "1" { Some(nyash_rust::jit::abi::JitValue::Bool(true)) } else if t.eq_ignore_ascii_case("false") || t == "0" { Some(nyash_rust::jit::abi::JitValue::Bool(false)) } else if let Ok(iv) = t.parse::<i64>() { Some(nyash_rust::jit::abi::JitValue::I64(iv)) } else if let Ok(fv) = t.parse::<f64>() { Some(nyash_rust::jit::abi::JitValue::F64(fv)) } else { None }; if let Some(jv) = v { jit_args.push(jv); } } }
// coerce to MIR signature
use nyash_rust::mir::MirType;
let expected = &func.signature.params;
if expected.len() != jit_args.len() { emit_err("args","COUNT_MISMATCH", &format!("expected={}, passed={}", expected.len(), jit_args.len())); eprintln!("Hint: set NYASH_JIT_ARGS as comma-separated values, e.g., i:42,f:3.14,b:true"); std::process::exit(1); }
let mut coerced: Vec<nyash_rust::jit::abi::JitValue> = Vec::with_capacity(jit_args.len());
for (exp, got) in expected.iter().zip(jit_args.iter()) {
let cv = match exp {
MirType::Integer => match got { nyash_rust::jit::abi::JitValue::I64(v)=>nyash_rust::jit::abi::JitValue::I64(*v), nyash_rust::jit::abi::JitValue::F64(f)=>nyash_rust::jit::abi::JitValue::I64(*f as i64), nyash_rust::jit::abi::JitValue::Bool(b)=>nyash_rust::jit::abi::JitValue::I64(if *b {1}else{0}), _=>nyash_rust::jit::abi::adapter::from_jit_value(got) },
MirType::Float => match got { nyash_rust::jit::abi::JitValue::F64(v)=>nyash_rust::jit::abi::JitValue::F64(*v), nyash_rust::jit::abi::JitValue::I64(i)=>nyash_rust::jit::abi::JitValue::F64(*i as f64), _=>nyash_rust::jit::abi::adapter::from_jit_value(got) },
MirType::Bool => match got { nyash_rust::jit::abi::JitValue::Bool(b)=>nyash_rust::jit::abi::JitValue::Bool(*b), nyash_rust::jit::abi::JitValue::I64(i)=>nyash_rust::jit::abi::JitValue::Bool(*i!=0), _=>nyash_rust::jit::abi::adapter::from_jit_value(got) },
_ => nyash_rust::jit::abi::adapter::from_jit_value(got),
};
coerced.push(cv);
}
match engine.execute_function(h, &coerced) {
Some(v) => {
let ret_ty = &func.signature.return_type;
let vmv = match (ret_ty, v) {
(MirType::Bool, nyash_rust::jit::abi::JitValue::Bool(b)) => nyash_rust::backend::vm::VMValue::Bool(b),
(MirType::Float, nyash_rust::jit::abi::JitValue::F64(f)) => nyash_rust::backend::vm::VMValue::Float(f),
(MirType::Integer, nyash_rust::jit::abi::JitValue::I64(i)) => nyash_rust::backend::vm::VMValue::Integer(i),
(_, v) => nyash_rust::jit::abi::adapter::from_jit_value(&v),
};
println!("✅ JIT-direct execution completed successfully!");
let (ety, sval) = match (ret_ty, &vmv) {
(MirType::Bool, nyash_rust::backend::vm::VMValue::Bool(b)) => ("Bool", b.to_string()),
(MirType::Float, nyash_rust::backend::vm::VMValue::Float(f)) => ("Float", format!("{}", f)),
(MirType::Integer, nyash_rust::backend::vm::VMValue::Integer(i)) => ("Integer", i.to_string()),
(_, nyash_rust::backend::vm::VMValue::Integer(i)) => ("Integer", i.to_string()),
(_, nyash_rust::backend::vm::VMValue::Float(f)) => ("Float", format!("{}", f)),
(_, nyash_rust::backend::vm::VMValue::Bool(b)) => ("Bool", b.to_string()),
(_, nyash_rust::backend::vm::VMValue::String(s)) => ("String", s.clone()),
(_, nyash_rust::backend::vm::VMValue::BoxRef(arc)) => ("BoxRef", arc.type_name().to_string()),
(_, nyash_rust::backend::vm::VMValue::Future(_)) => ("Future", "<future>".to_string()),
(_, nyash_rust::backend::vm::VMValue::Void) => ("Void", "void".to_string()),
};
println!("ResultType(MIR): {}", ety);
println!("Result: {}", sval);
if std::env::var("NYASH_JIT_STATS_JSON").ok().as_deref() == Some("1") {
let cfg = nyash_rust::jit::config::current();
let caps = nyash_rust::jit::config::probe_capabilities();
let (phi_t, phi_b1, ret_b) = engine.last_lower_stats();
let abi_mode = if cfg.native_bool_abi && caps.supports_b1_sig { "b1_bool" } else { "i64_bool" };
let payload = serde_json::json!({
"version": 1,
"function": func.signature.name,
"abi_mode": abi_mode,
"abi_b1_enabled": cfg.native_bool_abi,
"abi_b1_supported": caps.supports_b1_sig,
"b1_norm_count": nyash_rust::jit::rt::b1_norm_get(),
"ret_bool_hint_count": nyash_rust::jit::rt::ret_bool_hint_get(),
"phi_total_slots": phi_t,
"phi_b1_slots": phi_b1,
"ret_bool_hint_used": ret_b,
});
println!("{}", payload.to_string());
}
}
None => {
nyash_rust::jit::events::emit("fallback", &func.signature.name, Some(h), None, serde_json::json!({"reason":"trap_or_missing"}));
emit_err("execute", "TRAP_OR_MISSING", "execution failed (trap or missing handle)");
std::process::exit(1);
}
}
}
None => {
emit_err("compile", "UNAVAILABLE", "Build with --features cranelift-jit");
std::process::exit(1);
}
}
}
}

View File

@ -22,9 +22,10 @@ mod demos;
mod dispatch;
mod json_v0_bridge;
mod mir_json_emit;
mod modes;
pub mod modes;
mod pipe_io;
mod pipeline;
mod jit_direct;
mod selfhost;
mod tasks;
mod trace;

View File

@ -307,7 +307,7 @@ impl NyashRunner {
};
// Select selfhost compiler entry
// NYASH_NY_COMPILER_PREF=legacy|new|auto (default auto: prefer new when exists)
let cand_new = std::path::Path::new("apps/selfhost-compiler/compiler.nyash");
let cand_new = std::path::Path::new("apps/selfhost/compiler/compiler.nyash");
let cand_old = std::path::Path::new("apps/selfhost/parser/ny_parser_v0/main.nyash");
let pref = std::env::var("NYASH_NY_COMPILER_PREF").ok();
let parser_prog = match pref.as_deref() {
@ -506,6 +506,13 @@ impl NyashRunner {
Err(e) => { eprintln!("{}", e); std::process::exit(1); }
}
}
// Optional dev sugar: @name[:T] = expr → local name[:T] = expr (line-head only)
let preexpanded_owned;
{
let s = crate::runner::modes::common_util::resolve::preexpand_at_local(code_ref);
preexpanded_owned = s;
code_ref = &preexpanded_owned;
}
// Parse the code with debug fuel limit
let groups = self.config.as_groups();

View File

@ -2,7 +2,7 @@ use std::path::Path;
/// Run a Nyash program as a child (`nyash --backend vm <program>`) and capture the first JSON v0 line.
/// - `exe`: path to nyash executable
/// - `program`: path to the Nyash script to run (e.g., apps/selfhost-compiler/compiler.nyash)
/// - `program`: path to the Nyash script to run (e.g., apps/selfhost/compiler/compiler.nyash)
/// - `timeout_ms`: kill child after this duration
/// - `extra_args`: additional args to pass after program (e.g., "--", "--read-tmp")
/// - `env_remove`: environment variable names to remove for the child
@ -49,4 +49,3 @@ pub fn run_ny_program_capture_json(
};
crate::runner::modes::common_util::selfhost::json::first_json_v0_line(stdout)
}

View File

@ -1,927 +0,0 @@
use serde_json::Value;
fn map_expr_to_stmt(e: nyash_rust::ASTNode) -> nyash_rust::ASTNode { e }
fn transform_peek_to_if_expr(peek: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
// only support literal-only arms conservatively
let mut conds_bodies: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms {
conds_bodies.push((lit.clone(), (*body).clone()));
}
let mut current: A = *(*else_expr).clone();
for (lit, body) in conds_bodies.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![map_expr_to_stmt(body)];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
fn transform_peek_to_if_stmt_assign(peek: &nyash_rust::ASTNode, target: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
let mut pairs: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms { pairs.push((lit.clone(), (*body).clone())); }
let mut current: A = *(*else_expr).clone();
for (lit, body) in pairs.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![A::Assignment { target: Box::new(target.clone()), value: Box::new(body), span: Span::unknown() }];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
fn transform_peek_to_if_stmt_return(peek: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
let mut pairs: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms { pairs.push((lit.clone(), (*body).clone())); }
let mut current: A = *(*else_expr).clone();
for (lit, body) in pairs.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![A::Return { value: Some(Box::new(body)), span: Span::unknown() }];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
fn transform_peek_to_if_stmt_print(peek: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
let mut pairs: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms { pairs.push((lit.clone(), (*body).clone())); }
let mut current: A = *(*else_expr).clone();
for (lit, body) in pairs.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![A::Print { expression: Box::new(body), span: Span::unknown() }];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
fn transform_peek_match_literal(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
A::Program { statements, span } => {
A::Program { statements: statements.into_iter().map(|n| transform_peek_match_literal(&n)).collect(), span }
}
A::If { condition, then_body, else_body, span } => {
A::If {
condition: Box::new(transform_peek_match_literal(&condition)),
then_body: then_body.into_iter().map(|n| transform_peek_match_literal(&n)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|n| transform_peek_match_literal(&n)).collect()),
span,
}
}
A::Loop { condition, body, span } => {
A::Loop {
condition: Box::new(transform_peek_match_literal(&condition)),
body: body.into_iter().map(|n| transform_peek_match_literal(&n)).collect(),
span,
}
}
A::Local { variables, initial_values, span } => {
let mut new_inits: Vec<Option<Box<A>>> = Vec::with_capacity(initial_values.len());
for opt in initial_values {
if let Some(v) = opt {
if let Some(ifexpr) = transform_peek_to_if_expr(&v) {
new_inits.push(Some(Box::new(ifexpr)));
} else {
new_inits.push(Some(Box::new(transform_peek_match_literal(&v))));
}
} else {
new_inits.push(None);
}
}
A::Local { variables, initial_values: new_inits, span }
}
A::Assignment { target, value, span } => {
if let Some(ifstmt) = transform_peek_to_if_stmt_assign(&value, &target) {
ifstmt
} else {
A::Assignment { target, value: Box::new(transform_peek_match_literal(&value)), span }
}
}
A::Return { value, span } => {
if let Some(v) = &value {
if let Some(ifstmt) = transform_peek_to_if_stmt_return(v) {
ifstmt
} else {
A::Return { value: Some(Box::new(transform_peek_match_literal(v))), span }
}
} else {
A::Return { value: None, span }
}
}
A::Print { expression, span } => {
if let Some(ifstmt) = transform_peek_to_if_stmt_print(&expression) {
ifstmt
} else {
A::Print { expression: Box::new(transform_peek_match_literal(&expression)), span }
}
}
other => other,
}
}
fn transform_array_prepend_zero(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, LiteralValue, Span};
match ast {
A::ArrayLiteral { elements, .. } => {
// Idempotent: only prepend if first element is not int 0
let mut new_elems: Vec<A> = Vec::with_capacity(elements.len() + 1);
let already_zero = elements.get(0).and_then(|n| if let A::Literal { value: LiteralValue::Integer(0), .. } = n { Some(()) } else { None }).is_some();
if already_zero {
for e in elements { new_elems.push(transform_array_prepend_zero(e)); }
} else {
new_elems.push(A::Literal { value: LiteralValue::Integer(0), span: Span::unknown() });
for e in elements { new_elems.push(transform_array_prepend_zero(e)); }
}
A::ArrayLiteral { elements: new_elems, span: Span::unknown() }
}
A::Program { statements, .. } => A::Program { statements: statements.iter().map(transform_array_prepend_zero).collect(), span: Span::unknown() },
A::Print { expression, .. } => A::Print { expression: Box::new(transform_array_prepend_zero(expression)), span: Span::unknown() },
A::Return { value, .. } => A::Return { value: value.as_ref().map(|v| Box::new(transform_array_prepend_zero(v))), span: Span::unknown() },
A::Assignment { target, value, .. } => A::Assignment { target: Box::new(transform_array_prepend_zero(target)), value: Box::new(transform_array_prepend_zero(value)), span: Span::unknown() },
A::If { condition, then_body, else_body, .. } => A::If {
condition: Box::new(transform_array_prepend_zero(condition)),
then_body: then_body.iter().map(transform_array_prepend_zero).collect(),
else_body: else_body.as_ref().map(|v| v.iter().map(transform_array_prepend_zero).collect()),
span: Span::unknown(),
},
A::BinaryOp { operator, left, right, .. } => A::BinaryOp { operator: operator.clone(), left: Box::new(transform_array_prepend_zero(left)), right: Box::new(transform_array_prepend_zero(right)), span: Span::unknown() },
A::UnaryOp { operator, operand, .. } => A::UnaryOp { operator: operator.clone(), operand: Box::new(transform_array_prepend_zero(operand)), span: Span::unknown() },
A::MethodCall { object, method, arguments, .. } => A::MethodCall { object: Box::new(transform_array_prepend_zero(object)), method: method.clone(), arguments: arguments.iter().map(transform_array_prepend_zero).collect(), span: Span::unknown() },
A::FunctionCall { name, arguments, .. } => A::FunctionCall { name: name.clone(), arguments: arguments.iter().map(transform_array_prepend_zero).collect(), span: Span::unknown() },
A::MapLiteral { entries, .. } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), transform_array_prepend_zero(v))).collect(), span: Span::unknown() },
other => other.clone(),
}
}
fn transform_map_insert_tag(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, LiteralValue, Span};
match ast {
A::MapLiteral { entries, .. } => {
// Idempotent: only insert if first key is not "__macro"
let mut new_entries: Vec<(String, A)> = Vec::with_capacity(entries.len() + 1);
let already_tagged = entries.get(0).map(|(k, _)| k == "__macro").unwrap_or(false);
if already_tagged {
for (k, v) in entries { new_entries.push((k.clone(), transform_map_insert_tag(v))); }
} else {
new_entries.push(("__macro".to_string(), A::Literal { value: LiteralValue::String("on".to_string()), span: Span::unknown() }));
for (k, v) in entries { new_entries.push((k.clone(), transform_map_insert_tag(v))); }
}
A::MapLiteral { entries: new_entries, span: Span::unknown() }
}
A::Program { statements, .. } => A::Program { statements: statements.iter().map(transform_map_insert_tag).collect(), span: Span::unknown() },
A::Print { expression, .. } => A::Print { expression: Box::new(transform_map_insert_tag(expression)), span: Span::unknown() },
A::Return { value, .. } => A::Return { value: value.as_ref().map(|v| Box::new(transform_map_insert_tag(v))), span: Span::unknown() },
A::Assignment { target, value, .. } => A::Assignment { target: Box::new(transform_map_insert_tag(target)), value: Box::new(transform_map_insert_tag(value)), span: Span::unknown() },
A::If { condition, then_body, else_body, .. } => A::If {
condition: Box::new(transform_map_insert_tag(condition)),
then_body: then_body.iter().map(transform_map_insert_tag).collect(),
else_body: else_body.as_ref().map(|v| v.iter().map(transform_map_insert_tag).collect()),
span: Span::unknown(),
},
A::BinaryOp { operator, left, right, .. } => A::BinaryOp { operator: operator.clone(), left: Box::new(transform_map_insert_tag(left)), right: Box::new(transform_map_insert_tag(right)), span: Span::unknown() },
A::UnaryOp { operator, operand, .. } => A::UnaryOp { operator: operator.clone(), operand: Box::new(transform_map_insert_tag(operand)), span: Span::unknown() },
A::MethodCall { object, method, arguments, .. } => A::MethodCall { object: Box::new(transform_map_insert_tag(object)), method: method.clone(), arguments: arguments.iter().map(transform_map_insert_tag).collect(), span: Span::unknown() },
A::FunctionCall { name, arguments, .. } => A::FunctionCall { name: name.clone(), arguments: arguments.iter().map(transform_map_insert_tag).collect(), span: Span::unknown() },
A::ArrayLiteral { elements, .. } => A::ArrayLiteral { elements: elements.iter().map(transform_map_insert_tag).collect(), span: Span::unknown() },
other => other.clone(),
}
}
fn transform_loop_normalize(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
// Recurse into container nodes first
A::Program { statements, span } => {
A::Program { statements: statements.into_iter().map(|n| transform_loop_normalize(&n)).collect(), span }
}
A::If { condition, then_body, else_body, span } => {
A::If {
condition: Box::new(transform_loop_normalize(&condition)),
then_body: then_body.into_iter().map(|n| transform_loop_normalize(&n)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|n| transform_loop_normalize(&n)).collect()),
span,
}
}
A::Loop { condition, body, span } => {
// First, normalize inside children
let condition = Box::new(transform_loop_normalize(&condition));
let body_norm: Vec<A> = body.into_iter().map(|n| transform_loop_normalize(&n)).collect();
// MVP-3: break/continue 最小対応
// 方針: 本体を control(Break/Continue) でセグメントに分割し、
// 各セグメント内のみ安全に「非代入→代入」に整列する(順序維持の安定版)。
// 追加ガード: 代入先は変数に限る。変数の種類は全体で最大2種までMVP-2 制約維持)。
// まず全体の更新変数の種類を走査(観測のみ)。
// 制限は設けず、後続のセグメント整列(非代入→代入)に委ねる。
// 複合ターゲットが出現した場合は保守的に“整列スキップ”とするため、ここでは弾かない。
// セグメント分解 → セグメント毎に安全整列
let mut rebuilt: Vec<A> = Vec::with_capacity(body_norm.len());
let mut seg: Vec<A> = Vec::new();
let flush_seg = |seg: &mut Vec<A>, out: &mut Vec<A>| {
// セグメント内で「代入の後に非代入」が存在したら整列しない
let mut saw_assign = false;
let mut safe = true;
for n in seg.iter() {
match n {
A::Assignment { .. } => { saw_assign = true; }
_ => {
if saw_assign { safe = false; break; }
}
}
}
if safe {
// others → assigns の順で安定整列
let mut others: Vec<A> = Vec::new();
let mut assigns: Vec<A> = Vec::new();
for n in seg.drain(..) {
match n {
A::Assignment { .. } => assigns.push(n),
_ => others.push(n),
}
}
out.extend(others.into_iter());
out.extend(assigns.into_iter());
} else {
// そのまま吐き出す
out.extend(seg.drain(..));
}
};
for stmt in body_norm.into_iter() {
match stmt.clone() {
A::Break { .. } | A::Continue { .. } => {
// control の直前までをフラッシュしてから control を出力
flush_seg(&mut seg, &mut rebuilt);
rebuilt.push(stmt);
}
other => seg.push(other),
}
}
// 末尾セグメントをフラッシュ
flush_seg(&mut seg, &mut rebuilt);
A::Loop { condition, body: rebuilt, span }
}
// Leaf and other nodes: unchanged
A::Local { variables, initial_values, span } => A::Local { variables, initial_values, span },
A::Assignment { target, value, span } => A::Assignment { target, value, span },
A::Return { value, span } => A::Return { value, span },
A::Print { expression, span } => A::Print { expression, span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left, right, span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand, span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object, method, arguments, span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments, span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements, span },
A::MapLiteral { entries, span } => A::MapLiteral { entries, span },
other => other,
}
}
// Core normalization pass used by runners (always-on when macros enabled).
// Order matters: for/foreach → match(PeekExpr) → loop tail alignment.
pub fn normalize_core_pass(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
let a1 = transform_for_foreach(ast);
let a2 = transform_peek_match_literal(&a1);
let a3 = transform_loop_normalize(&a2);
// Optional: inject ScopeBox wrappers for diagnostics/visibility (no-op for MIR)
let a4 = if std::env::var("NYASH_SCOPEBOX_ENABLE").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false) {
transform_scopebox_inject(&a3)
} else { a3 };
// Lift nested function declarations (no captures) to top-level with gensym names
let a4b = transform_lift_nested_functions(&a4);
// Optional: If → LoopForm (conservative). Only transform if no else and branch has no break/continue.
let a5 = if std::env::var("NYASH_IF_AS_LOOPFORM").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false) {
transform_if_to_loopform(&a4b)
} else { a4b };
// Optional: postfix catch/cleanup sugar → TryCatch normalization
let a6 = if std::env::var("NYASH_CATCH_NEW").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false) {
transform_postfix_handlers(&a5)
} else { a5 };
a6
}
// ---- Nested Function Lift (no captures) ----
fn transform_lift_nested_functions(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
use std::sync::atomic::{AtomicUsize, Ordering};
static COUNTER: AtomicUsize = AtomicUsize::new(0);
fn gensym(base: &str) -> String {
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
format!("__ny_lifted_{}_{}", base, n)
}
fn collect_locals(n: &A, set: &mut std::collections::HashSet<String>) {
match n {
A::Local { variables, .. } => { for v in variables { set.insert(v.clone()); } }
A::Program { statements, .. } => for s in statements { collect_locals(s, set); },
A::FunctionDeclaration { body, .. } => for s in body { collect_locals(s, set); },
A::If { then_body, else_body, .. } => {
for s in then_body { collect_locals(s, set); }
if let Some(b) = else_body { for s in b { collect_locals(s, set); } }
}
_ => {}
}
}
fn collect_vars(n: &A, set: &mut std::collections::HashSet<String>) {
match n {
A::Variable { name, .. } => { set.insert(name.clone()); }
A::Program { statements, .. } => for s in statements { collect_vars(s, set); },
A::FunctionDeclaration { body, .. } => for s in body { collect_vars(s, set); },
A::If { condition, then_body, else_body, .. } => {
collect_vars(condition, set);
for s in then_body { collect_vars(s, set); }
if let Some(b) = else_body { for s in b { collect_vars(s, set); } }
}
A::Assignment { target, value, .. } => { collect_vars(target, set); collect_vars(value, set); }
A::Return { value, .. } => { if let Some(v) = value { collect_vars(v, set); } }
A::Print { expression, .. } => collect_vars(expression, set),
A::BinaryOp { left, right, .. } => { collect_vars(left, set); collect_vars(right, set); }
A::UnaryOp { operand, .. } => collect_vars(operand, set),
A::MethodCall { object, arguments, .. } => { collect_vars(object, set); for a in arguments { collect_vars(a, set); } }
A::FunctionCall { arguments, .. } => { for a in arguments { collect_vars(a, set); } }
A::ArrayLiteral { elements, .. } => { for e in elements { collect_vars(e, set); } }
A::MapLiteral { entries, .. } => { for (_,v) in entries { collect_vars(v, set); } }
_ => {}
}
}
fn rename_calls(n: &A, mapping: &std::collections::HashMap<String, String>) -> A {
use nyash_rust::ast::ASTNode as A;
match n.clone() {
A::FunctionCall { name, arguments, span } => {
let new_name = mapping.get(&name).cloned().unwrap_or(name);
A::FunctionCall { name: new_name, arguments: arguments.into_iter().map(|a| rename_calls(&a, mapping)).collect(), span }
}
A::Program { statements, span } => A::Program { statements: statements.into_iter().map(|s| rename_calls(&s, mapping)).collect(), span },
A::FunctionDeclaration { name, params, body, is_static, is_override, span } => {
A::FunctionDeclaration { name, params, body: body.into_iter().map(|s| rename_calls(&s, mapping)).collect(), is_static, is_override, span }
}
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(rename_calls(&condition, mapping)),
then_body: then_body.into_iter().map(|s| rename_calls(&s, mapping)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|s| rename_calls(&s, mapping)).collect()),
span,
},
A::Assignment { target, value, span } => A::Assignment { target: Box::new(rename_calls(&target, mapping)), value: Box::new(rename_calls(&value, mapping)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(rename_calls(v, mapping))), span },
A::Print { expression, span } => A::Print { expression: Box::new(rename_calls(&expression, mapping)), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(rename_calls(&left, mapping)), right: Box::new(rename_calls(&right, mapping)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(rename_calls(&operand, mapping)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(rename_calls(&object, mapping)), method, arguments: arguments.into_iter().map(|a| rename_calls(&a, mapping)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| rename_calls(&e, mapping)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k,v)| (k, rename_calls(&v, mapping))).collect(), span },
other => other,
}
}
fn lift_in_body(body: Vec<A>, hoisted: &mut Vec<A>, mapping: &mut std::collections::HashMap<String,String>) -> Vec<A> {
use std::collections::HashSet;
let mut out: Vec<A> = Vec::new();
for st in body.into_iter() {
match st.clone() {
A::FunctionDeclaration { name, params, body, is_static, is_override, span } => {
// check captures
let mut locals: HashSet<String> = HashSet::new();
collect_locals(&A::FunctionDeclaration{ name: name.clone(), params: params.clone(), body: body.clone(), is_static, is_override, span }, &mut locals);
let mut used: HashSet<String> = HashSet::new();
collect_vars(&A::FunctionDeclaration{ name: name.clone(), params: params.clone(), body: body.clone(), is_static, is_override, span }, &mut used);
let params_set: HashSet<String> = params.iter().cloned().collect();
let mut extra: HashSet<String> = used.drain().collect();
extra.retain(|v| !params_set.contains(v) && !locals.contains(v));
if extra.is_empty() {
// Hoist with gensym name
let new_name = gensym(&name);
let lifted = A::FunctionDeclaration { name: new_name.clone(), params, body, is_static: true, is_override, span };
hoisted.push(lifted);
mapping.insert(name, new_name);
// do not keep nested declaration in place
continue;
} else {
// keep as-is (cannot hoist due to captures)
out.push(st);
}
}
other => out.push(other),
}
}
// After scanning, rename calls in out according to mapping
out.into_iter().map(|n| rename_calls(&n, mapping)).collect()
}
fn walk(n: &A, hoisted: &mut Vec<A>) -> A {
use nyash_rust::ast::ASTNode as A;
match n.clone() {
A::Program { statements, span } => {
let mut mapping = std::collections::HashMap::new();
let stmts2 = lift_in_body(statements.into_iter().map(|s| walk(&s, hoisted)).collect(), hoisted, &mut mapping);
// Append hoisted at end (global scope)
// Note: hoisted collected at all levels; only append here once after full walk
A::Program { statements: stmts2, span }
}
A::FunctionDeclaration { name, params, body, is_static, is_override, span } => {
let mut mapping = std::collections::HashMap::new();
let body2: Vec<A> = body.into_iter().map(|s| walk(&s, hoisted)).collect();
let body3 = lift_in_body(body2, hoisted, &mut mapping);
A::FunctionDeclaration { name, params, body: body3, is_static, is_override, span }
}
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(walk(&condition, hoisted)),
then_body: then_body.into_iter().map(|s| walk(&s, hoisted)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|s| walk(&s, hoisted)).collect()),
span,
},
A::Assignment { target, value, span } => A::Assignment { target: Box::new(walk(&target, hoisted)), value: Box::new(walk(&value, hoisted)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(walk(v, hoisted))), span },
A::Print { expression, span } => A::Print { expression: Box::new(walk(&expression, hoisted)), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(walk(&left, hoisted)), right: Box::new(walk(&right, hoisted)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(walk(&operand, hoisted)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(walk(&object, hoisted)), method, arguments: arguments.into_iter().map(|a| walk(&a, hoisted)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.into_iter().map(|a| walk(&a, hoisted)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| walk(&e, hoisted)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k,v)| (k, walk(&v, hoisted))).collect(), span },
other => other,
}
}
let mut hoisted: Vec<A> = Vec::new();
let mut out = walk(ast, &mut hoisted);
// Append hoisted functions at top-level if root is Program
if let A::Program { statements, span } = out.clone() {
let mut ss = statements;
ss.extend(hoisted.into_iter());
out = A::Program { statements: ss, span };
}
out
}
fn subst_var(node: &nyash_rust::ASTNode, name: &str, replacement: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match node.clone() {
A::Variable { name: n, .. } if n == name => replacement.clone(),
A::Program { statements, span } => A::Program { statements: statements.iter().map(|s| subst_var(s, name, replacement)).collect(), span },
A::Print { expression, span } => A::Print { expression: Box::new(subst_var(&expression, name, replacement)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(subst_var(v, name, replacement))), span },
A::Assignment { target, value, span } => A::Assignment { target: Box::new(subst_var(&target, name, replacement)), value: Box::new(subst_var(&value, name, replacement)), span },
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(subst_var(&condition, name, replacement)),
then_body: then_body.iter().map(|s| subst_var(s, name, replacement)).collect(),
else_body: else_body.map(|v| v.iter().map(|s| subst_var(s, name, replacement)).collect()),
span,
},
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(subst_var(&left, name, replacement)), right: Box::new(subst_var(&right, name, replacement)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(subst_var(&operand, name, replacement)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(subst_var(&object, name, replacement)), method, arguments: arguments.iter().map(|a| subst_var(a, name, replacement)).collect(), span },
A::FunctionCall { name: fn_name, arguments, span } => A::FunctionCall { name: fn_name, arguments: arguments.iter().map(|a| subst_var(a, name, replacement)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.iter().map(|e| subst_var(e, name, replacement)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), subst_var(v, name, replacement))).collect(), span },
other => other,
}
}
fn transform_for_foreach(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, LiteralValue, Span};
fn rewrite_stmt_list(list: Vec<A>) -> Vec<A> {
let mut out: Vec<A> = Vec::new();
for st in list.into_iter() {
match st.clone() {
A::FunctionCall { name, arguments, .. } if (name == "ny_for" || name == "for") && arguments.len() == 4 => {
let init = arguments[0].clone();
let cond = arguments[1].clone();
let step = arguments[2].clone();
let body_lam = arguments[3].clone();
if let A::Lambda { params, body, .. } = body_lam {
if params.is_empty() {
// Accept init as Local/Assignment or Lambda(); step as Assignment or Lambda()
// Emit init statements (0..n)
match init.clone() {
A::Assignment { .. } | A::Local { .. } => out.push(init),
A::Lambda { params: p2, body: b2, .. } if p2.is_empty() => {
for s in b2 { out.push(transform_for_foreach(&s)); }
}
_ => {}
}
let mut loop_body: Vec<A> = body
.into_iter()
.map(|n| transform_for_foreach(&n))
.collect();
// Append step statements at tail
match step.clone() {
A::Assignment { .. } => loop_body.push(step),
A::Lambda { params: p3, body: b3, .. } if p3.is_empty() => {
for s in b3 { loop_body.push(transform_for_foreach(&s)); }
}
_ => {}
}
out.push(A::Loop { condition: Box::new(cond), body: loop_body, span: Span::unknown() });
continue;
}
}
// Fallback: keep as-is
out.push(A::FunctionCall { name, arguments, span: Span::unknown() });
}
A::FunctionCall { name, arguments, .. } if (name == "ny_foreach" || name == "foreach") && arguments.len() == 3 => {
let arr = arguments[0].clone();
let var_name_opt = match &arguments[1] { A::Literal { value: LiteralValue::String(s), .. } => Some(s.clone()), _ => None };
let lam = arguments[2].clone();
if let (Some(vn), A::Lambda { params, body, .. }) = (var_name_opt, lam) {
if params.is_empty() {
let idx_name = "__ny_i".to_string();
let idx_var = A::Variable { name: idx_name.clone(), span: Span::unknown() };
let init_idx = A::Local { variables: vec![idx_name.clone()], initial_values: vec![Some(Box::new(A::Literal { value: LiteralValue::Integer(0), span: Span::unknown() }))], span: Span::unknown() };
let size_call = A::MethodCall { object: Box::new(arr.clone()), method: "size".to_string(), arguments: vec![], span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Less, left: Box::new(idx_var.clone()), right: Box::new(size_call), span: Span::unknown() };
let elem = A::MethodCall { object: Box::new(arr.clone()), method: "get".to_string(), arguments: vec![idx_var.clone()], span: Span::unknown() };
let mut loop_body: Vec<A> = body.into_iter().map(|n| subst_var(&n, &vn, &elem)).map(|n| transform_for_foreach(&n)).collect();
let step = A::Assignment { target: Box::new(A::Variable { name: idx_name.clone(), span: Span::unknown() }), value: Box::new(A::BinaryOp { operator: BinaryOperator::Add, left: Box::new(A::Variable { name: idx_name.clone(), span: Span::unknown() }), right: Box::new(A::Literal { value: LiteralValue::Integer(1), span: Span::unknown() }), span: Span::unknown() }), span: Span::unknown() };
loop_body.push(step);
out.push(init_idx);
out.push(A::Loop { condition: Box::new(cond), body: loop_body, span: Span::unknown() });
continue;
}
}
out.push(A::FunctionCall { name, arguments, span: Span::unknown() });
}
A::Local { variables, initial_values, .. } => {
let mut expanded_any = false;
for opt in &initial_values {
if let Some(v) = opt {
if let A::FunctionCall { name, arguments, .. } = v.as_ref() {
if ((name == "ny_for" || name == "for") && arguments.len() == 4)
|| ((name == "ny_foreach" || name == "foreach") && arguments.len() == 3)
{
expanded_any = true;
}
}
}
}
if expanded_any {
for opt in initial_values {
if let Some(v) = opt {
match v.as_ref() {
A::FunctionCall { name: _, arguments, .. } if (arguments.len() == 4) => {
// Reuse handling by fabricating a statement call
let fake = A::FunctionCall { name: "for".to_string(), arguments: arguments.clone(), span: Span::unknown() };
// Route into the top arm by re-matching
match fake.clone() {
A::FunctionCall { name: _, arguments, .. } => {
let init = arguments[0].clone();
let cond = arguments[1].clone();
let step = arguments[2].clone();
let body_lam = arguments[3].clone();
if let A::Lambda { params, body, .. } = body_lam {
if params.is_empty() {
match init.clone() {
A::Assignment { .. } | A::Local { .. } => out.push(init),
A::Lambda { params: p2, body: b2, .. } if p2.is_empty() => { for s in b2 { out.push(transform_for_foreach(&s)); } }
_ => {}
}
let mut loop_body: Vec<A> = body.into_iter().map(|n| transform_for_foreach(&n)).collect();
match step.clone() {
A::Assignment { .. } => loop_body.push(step),
A::Lambda { params: p3, body: b3, .. } if p3.is_empty() => { for s in b3 { loop_body.push(transform_for_foreach(&s)); } }
_ => {}
}
out.push(A::Loop { condition: Box::new(cond), body: loop_body, span: Span::unknown() });
}
}
}
_ => {}
}
}
A::FunctionCall { name: _, arguments, .. } if (arguments.len() == 3) => {
let arr = arguments[0].clone();
let var_name_opt = match &arguments[1] { A::Literal { value: LiteralValue::String(s), .. } => Some(s.clone()), _ => None };
let lam = arguments[2].clone();
if let (Some(vn), A::Lambda { params, body, .. }) = (var_name_opt, lam) {
if params.is_empty() {
let idx_name = "__ny_i".to_string();
let idx_var = A::Variable { name: idx_name.clone(), span: Span::unknown() };
let init_idx = A::Local { variables: vec![idx_name.clone()], initial_values: vec![Some(Box::new(A::Literal { value: LiteralValue::Integer(0), span: Span::unknown() }))], span: Span::unknown() };
let size_call = A::MethodCall { object: Box::new(arr.clone()), method: "size".to_string(), arguments: vec![], span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Less, left: Box::new(idx_var.clone()), right: Box::new(size_call), span: Span::unknown() };
let elem = A::MethodCall { object: Box::new(arr.clone()), method: "get".to_string(), arguments: vec![idx_var.clone()], span: Span::unknown() };
let mut loop_body: Vec<A> = body.into_iter().map(|n| subst_var(&n, &vn, &elem)).map(|n| transform_for_foreach(&n)).collect();
let step = A::Assignment { target: Box::new(A::Variable { name: idx_name.clone(), span: Span::unknown() }), value: Box::new(A::BinaryOp { operator: BinaryOperator::Add, left: Box::new(A::Variable { name: idx_name.clone(), span: Span::unknown() }), right: Box::new(A::Literal { value: LiteralValue::Integer(1), span: Span::unknown() }), span: Span::unknown() }), span: Span::unknown() };
loop_body.push(step);
out.push(init_idx);
out.push(A::Loop { condition: Box::new(cond), body: loop_body, span: Span::unknown() });
}
}
}
_ => {}
}
}
}
// Drop original Local that carried macros
continue;
} else {
out.push(A::Local { variables, initial_values, span: Span::unknown() });
}
}
A::FunctionCall { name, arguments, .. } if name == "foreach_" && arguments.len() == 3 => {
let arr = arguments[0].clone();
let var_name_opt = match &arguments[1] { A::Literal { value: LiteralValue::String(s), .. } => Some(s.clone()), _ => None };
let lam = arguments[2].clone();
if let (Some(vn), A::Lambda { params, body, .. }) = (var_name_opt, lam) {
if params.is_empty() {
// __ny_i = 0; loop(__ny_i < arr.size()) { body[var=arr.get(__ny_i)]; __ny_i = __ny_i + 1 }
let idx_name = "__ny_i".to_string();
let idx_var = A::Variable { name: idx_name.clone(), span: Span::unknown() };
let init_idx = A::Local { variables: vec![idx_name.clone()], initial_values: vec![Some(Box::new(A::Literal { value: LiteralValue::Integer(0), span: Span::unknown() }))], span: Span::unknown() };
let size_call = A::MethodCall { object: Box::new(arr.clone()), method: "size".to_string(), arguments: vec![], span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Less, left: Box::new(idx_var.clone()), right: Box::new(size_call), span: Span::unknown() };
let elem = A::MethodCall { object: Box::new(arr.clone()), method: "get".to_string(), arguments: vec![idx_var.clone()], span: Span::unknown() };
let mut loop_body: Vec<A> = body.into_iter().map(|n| subst_var(&n, &vn, &elem)).map(|n| transform_for_foreach(&n)).collect();
let step = A::Assignment { target: Box::new(A::Variable { name: idx_name.clone(), span: Span::unknown() }), value: Box::new(A::BinaryOp { operator: BinaryOperator::Add, left: Box::new(A::Variable { name: idx_name.clone(), span: Span::unknown() }), right: Box::new(A::Literal { value: LiteralValue::Integer(1), span: Span::unknown() }), span: Span::unknown() }), span: Span::unknown() };
loop_body.push(step);
out.push(init_idx);
out.push(A::Loop { condition: Box::new(cond), body: loop_body, span: Span::unknown() });
continue;
}
}
out.push(A::FunctionCall { name, arguments, span: Span::unknown() });
}
// Recurse into container nodes and preserve others
A::If { condition, then_body, else_body, span } => {
out.push(A::If {
condition: Box::new(transform_for_foreach(&condition)),
then_body: rewrite_stmt_list(then_body),
else_body: else_body.map(rewrite_stmt_list),
span,
});
}
A::Loop { condition, body, span } => {
out.push(A::Loop {
condition: Box::new(transform_for_foreach(&condition)),
body: rewrite_stmt_list(body),
span,
});
}
other => out.push(transform_for_foreach(&other)),
}
}
out
}
match ast.clone() {
A::Program { statements, span } => A::Program { statements: rewrite_stmt_list(statements), span },
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(transform_for_foreach(&condition)),
then_body: rewrite_stmt_list(then_body),
else_body: else_body.map(rewrite_stmt_list),
span,
},
A::Loop { condition, body, span } => A::Loop { condition: Box::new(transform_for_foreach(&condition)), body: rewrite_stmt_list(body), span },
// Leaf and expression nodes: descend but no statement expansion
A::Print { expression, span } => A::Print { expression: Box::new(transform_for_foreach(&expression)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(transform_for_foreach(v))), span },
A::Assignment { target, value, span } => A::Assignment { target: Box::new(transform_for_foreach(&target)), value: Box::new(transform_for_foreach(&value)), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_for_foreach(&left)), right: Box::new(transform_for_foreach(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_for_foreach(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_for_foreach(&object)), method, arguments: arguments.iter().map(|a| transform_for_foreach(a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.iter().map(|a| transform_for_foreach(a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.iter().map(|e| transform_for_foreach(e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), transform_for_foreach(v))).collect(), span },
other => other,
}
}
fn transform_scopebox_inject(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
A::Program { statements, span } => {
A::Program { statements: statements.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span }
}
A::If { condition, then_body, else_body, span } => {
let cond = Box::new(transform_scopebox_inject(&condition));
let then_wrapped = vec![A::ScopeBox { body: then_body.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span: nyash_rust::ast::Span::unknown() }];
let else_wrapped = else_body.map(|v| vec![A::ScopeBox { body: v.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span: nyash_rust::ast::Span::unknown() }]);
A::If { condition: cond, then_body: then_wrapped, else_body: else_wrapped, span }
}
A::Loop { condition, body, span } => {
let cond = Box::new(transform_scopebox_inject(&condition));
let body_wrapped = vec![A::ScopeBox { body: body.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span: nyash_rust::ast::Span::unknown() }];
A::Loop { condition: cond, body: body_wrapped, span }
}
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_scopebox_inject(&left)), right: Box::new(transform_scopebox_inject(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_scopebox_inject(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_scopebox_inject(&object)), method, arguments: arguments.into_iter().map(|a| transform_scopebox_inject(&a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.into_iter().map(|a| transform_scopebox_inject(&a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_scopebox_inject(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k, v)| (k, transform_scopebox_inject(&v))).collect(), span },
other => other,
}
}
fn transform_if_to_loopform(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, Span};
// Conservative rewrite: if (cond) { then } with no else and no break/continue in then → loop(cond) { then }
// (unused helpers removed)
match ast.clone() {
A::Program { statements, span } => A::Program { statements: statements.into_iter().map(|n| transform_if_to_loopform(&n)).collect(), span },
A::If { condition, then_body, else_body, span } => {
// Case A/B unified: wrap into single-iteration loop with explicit break (semantics-preserving)
// This avoids multi-iteration semantics and works for both then-only and else-present cases.
let cond_t = Box::new(transform_if_to_loopform(&condition));
let then_t = then_body.into_iter().map(|n| transform_if_to_loopform(&n)).collect();
let else_t = else_body.map(|v| v.into_iter().map(|n| transform_if_to_loopform(&n)).collect());
let inner_if = A::If { condition: cond_t, then_body: then_t, else_body: else_t, span: Span::unknown() };
let one = A::Literal { value: nyash_rust::ast::LiteralValue::Integer(1), span: Span::unknown() };
let loop_body = vec![inner_if, A::Break { span: Span::unknown() }];
A::Loop { condition: Box::new(one), body: loop_body, span }
}
A::Loop { condition, body, span } => A::Loop {
condition: Box::new(transform_if_to_loopform(&condition)),
body: body.into_iter().map(|n| transform_if_to_loopform(&n)).collect(),
span
},
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_if_to_loopform(&left)), right: Box::new(transform_if_to_loopform(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_if_to_loopform(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_if_to_loopform(&object)), method, arguments: arguments.into_iter().map(|a| transform_if_to_loopform(&a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.into_iter().map(|a| transform_if_to_loopform(&a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_if_to_loopform(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k, v)| (k, transform_if_to_loopform(&v))).collect(), span },
other => other,
}
}
// Phase 1 sugar: postfix_catch(expr, "Type"?, fn(e){...}) / with_cleanup(expr, fn(){...})
// → legacy TryCatch AST for existing lowering paths. This is a stopgap until parser accepts postfix forms.
fn transform_postfix_handlers(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, CatchClause, Span};
fn map_vec(v: Vec<A>) -> Vec<A> { v.into_iter().map(|n| transform_postfix_handlers(&n)).collect() }
match ast.clone() {
A::Program { statements, span } => A::Program { statements: map_vec(statements), span },
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(transform_postfix_handlers(&condition)),
then_body: map_vec(then_body),
else_body: else_body.map(map_vec),
span,
},
A::Loop { condition, body, span } => A::Loop {
condition: Box::new(transform_postfix_handlers(&condition)),
body: map_vec(body),
span,
},
A::BinaryOp { operator, left, right, span } => A::BinaryOp {
operator,
left: Box::new(transform_postfix_handlers(&left)),
right: Box::new(transform_postfix_handlers(&right)),
span,
},
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_postfix_handlers(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall {
object: Box::new(transform_postfix_handlers(&object)),
method,
arguments: arguments.into_iter().map(|a| transform_postfix_handlers(&a)).collect(),
span,
},
A::FunctionCall { name, arguments, span } => {
let name_l = name.to_ascii_lowercase();
if name_l == "postfix_catch" {
// Forms:
// - postfix_catch(expr, fn(e){...})
// - postfix_catch(expr, "Type", fn(e){...})
let mut args = arguments;
if args.len() >= 2 {
let expr = transform_postfix_handlers(&args.remove(0));
let (type_opt, handler) = if args.len() == 1 {
(None, args.remove(0))
} else if args.len() >= 2 {
let ty = match args.remove(0) {
A::Literal { value: nyash_rust::ast::LiteralValue::String(s), .. } => Some(s),
other => {
// keep robust: non-string type → debug print type name, treat as None
let _ = other; None
}
};
(ty, args.remove(0))
} else { (None, A::Literal { value: nyash_rust::ast::LiteralValue::Void, span: Span::unknown() }) };
if let A::Lambda { params, body, .. } = handler {
let var = params.get(0).cloned();
let cc = CatchClause { exception_type: type_opt, variable_name: var, body, span: Span::unknown() };
return A::TryCatch { try_body: vec![expr], catch_clauses: vec![cc], finally_body: None, span };
}
}
// Fallback: recurse into args
A::FunctionCall { name, arguments: args.into_iter().map(|a| transform_postfix_handlers(&a)).collect(), span }
} else if name_l == "with_cleanup" {
// Form: with_cleanup(expr, fn(){...})
let mut args = arguments;
if args.len() >= 2 {
let expr = transform_postfix_handlers(&args.remove(0));
let handler = args.remove(0);
if let A::Lambda { body, .. } = handler {
return A::TryCatch { try_body: vec![expr], catch_clauses: vec![], finally_body: Some(body), span };
}
}
A::FunctionCall { name, arguments: args.into_iter().map(|a| transform_postfix_handlers(&a)).collect(), span }
} else {
A::FunctionCall { name, arguments: arguments.into_iter().map(|a| transform_postfix_handlers(&a)).collect(), span }
}
}
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_postfix_handlers(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k,v)| (k, transform_postfix_handlers(&v))).collect(), span },
other => other,
}
}
pub fn run_macro_child(macro_file: &str) {
// Read stdin all
use std::io::Read;
let mut input = String::new();
if let Err(e) = std::io::stdin().read_to_string(&mut input) {
eprintln!("[macro-child] read stdin error: {}", e);
std::process::exit(2);
}
let v: Value = match serde_json::from_str(&input) {
Ok(x) => x,
Err(e) => { eprintln!("[macro-child] invalid JSON: {}", e); std::process::exit(3); }
};
let ast = match crate::r#macro::ast_json::json_to_ast(&v) {
Some(a) => a,
None => { eprintln!("[macro-child] unsupported AST JSON v0"); std::process::exit(4); }
};
// Analyze macro behavior (PoC)
let mut behavior = crate::r#macro::macro_box_ny::analyze_macro_file(macro_file);
if macro_file.contains("env_tag_string_macro") {
behavior = crate::r#macro::macro_box_ny::MacroBehavior::EnvTagString;
}
let out_ast = match behavior {
crate::r#macro::macro_box_ny::MacroBehavior::Identity => ast.clone(),
crate::r#macro::macro_box_ny::MacroBehavior::Uppercase => {
// Apply built-in Uppercase transformation
let m = crate::r#macro::macro_box::UppercasePrintMacro;
crate::r#macro::macro_box::MacroBox::expand(&m, &ast)
}
crate::r#macro::macro_box_ny::MacroBehavior::ArrayPrependZero => transform_array_prepend_zero(&ast),
crate::r#macro::macro_box_ny::MacroBehavior::MapInsertTag => transform_map_insert_tag(&ast),
crate::r#macro::macro_box_ny::MacroBehavior::LoopNormalize => {
transform_loop_normalize(&ast)
}
crate::r#macro::macro_box_ny::MacroBehavior::IfMatchNormalize => {
transform_peek_match_literal(&ast)
}
crate::r#macro::macro_box_ny::MacroBehavior::ForForeachNormalize => {
transform_for_foreach(&ast)
}
crate::r#macro::macro_box_ny::MacroBehavior::EnvTagString => {
fn tag(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
A::Literal { value: nyash_rust::ast::LiteralValue::String(s), .. } => {
if s == "hello" { A::Literal { value: nyash_rust::ast::LiteralValue::String("hello [ENV]".to_string()), span: nyash_rust::ast::Span::unknown() } } else { ast.clone() }
}
A::Program { statements, span } => A::Program { statements: statements.iter().map(|n| tag(n)).collect(), span },
A::Print { expression, span } => A::Print { expression: Box::new(tag(&expression)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(tag(v))), span },
A::Assignment { target, value, span } => A::Assignment { target: Box::new(tag(&target)), value: Box::new(tag(&value)), span },
A::If { condition, then_body, else_body, span } => A::If { condition: Box::new(tag(&condition)), then_body: then_body.iter().map(|n| tag(n)).collect(), else_body: else_body.map(|v| v.iter().map(|n| tag(n)).collect()), span },
A::Loop { condition, body, span } => A::Loop { condition: Box::new(tag(&condition)), body: body.iter().map(|n| tag(n)).collect(), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(tag(&left)), right: Box::new(tag(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(tag(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(tag(&object)), method, arguments: arguments.iter().map(|a| tag(a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.iter().map(|a| tag(a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.iter().map(|e| tag(e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), tag(v))).collect(), span },
other => other,
}
}
// Prefer ctx JSON from env (NYASH_MACRO_CTX_JSON) if provided; fallback to simple flag
let mut env_on = std::env::var("NYASH_MACRO_CAP_ENV").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false);
if let Ok(ctxs) = std::env::var("NYASH_MACRO_CTX_JSON") {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&ctxs) {
env_on = v.get("caps").and_then(|c| c.get("env")).and_then(|b| b.as_bool()).unwrap_or(env_on);
}
}
if env_on { tag(&ast) } else { ast.clone() }
}
};
let out_json = crate::r#macro::ast_json::ast_to_json(&out_ast);
println!("{}", out_json.to_string());
}

View File

@ -0,0 +1,70 @@
use super::transforms::*;
pub fn run_macro_child(macro_file: &str) {
// Read full AST JSON (v0) from stdin
use std::io::Read;
let mut input = String::new();
if let Err(_) = std::io::stdin().read_to_string(&mut input) {
eprintln!("[macro-child] failed to read AST JSON from stdin");
std::process::exit(3);
}
let value: serde_json::Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => { eprintln!("[macro-child] invalid AST JSON v0"); std::process::exit(3); }
};
let ast: nyash_rust::ASTNode = match crate::r#macro::ast_json::json_to_ast(&value) {
Some(a) => a,
None => { eprintln!("[macro-child] unsupported AST JSON v0"); std::process::exit(4); }
};
// Analyze macro behavior (PoC)
let mut behavior = crate::r#macro::macro_box_ny::analyze_macro_file(macro_file);
if macro_file.contains("env_tag_string_macro") {
behavior = crate::r#macro::macro_box_ny::MacroBehavior::EnvTagString;
}
let out_ast = match behavior {
crate::r#macro::macro_box_ny::MacroBehavior::Identity => ast.clone(),
crate::r#macro::macro_box_ny::MacroBehavior::Uppercase => {
let m = crate::r#macro::macro_box::UppercasePrintMacro;
crate::r#macro::macro_box::MacroBox::expand(&m, &ast)
}
crate::r#macro::macro_box_ny::MacroBehavior::ArrayPrependZero => transform_array_prepend_zero(&ast),
crate::r#macro::macro_box_ny::MacroBehavior::MapInsertTag => transform_map_insert_tag(&ast),
crate::r#macro::macro_box_ny::MacroBehavior::LoopNormalize => transform_loop_normalize(&ast),
crate::r#macro::macro_box_ny::MacroBehavior::IfMatchNormalize => transform_peek_match_literal(&ast),
crate::r#macro::macro_box_ny::MacroBehavior::ForForeachNormalize => transform_for_foreach(&ast),
crate::r#macro::macro_box_ny::MacroBehavior::EnvTagString => {
fn tag(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
A::Literal { value: nyash_rust::ast::LiteralValue::String(s), .. } => {
if s == "hello" { A::Literal { value: nyash_rust::ast::LiteralValue::String("hello [ENV]".to_string()), span: nyash_rust::ast::Span::unknown() } } else { ast.clone() }
}
A::Program { statements, span } => A::Program { statements: statements.iter().map(|n| tag(n)).collect(), span },
A::Print { expression, span } => A::Print { expression: Box::new(tag(&expression)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(tag(v))), span },
A::Assignment { target, value, span } => A::Assignment { target: Box::new(tag(&target)), value: Box::new(tag(&value)), span },
A::If { condition, then_body, else_body, span } => A::If { condition: Box::new(tag(&condition)), then_body: then_body.iter().map(|n| tag(n)).collect(), else_body: else_body.map(|v| v.iter().map(|n| tag(n)).collect()), span },
A::Loop { condition, body, span } => A::Loop { condition: Box::new(tag(&condition)), body: body.iter().map(|n| tag(n)).collect(), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(tag(&left)), right: Box::new(tag(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(tag(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(tag(&object)), method, arguments: arguments.iter().map(|a| tag(a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.iter().map(|a| tag(a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.iter().map(|e| tag(e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), tag(v))).collect(), span },
other => other,
}
}
let mut env_on = std::env::var("NYASH_MACRO_CAP_ENV").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false);
if let Ok(ctxs) = std::env::var("NYASH_MACRO_CTX_JSON") {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&ctxs) {
env_on = v.get("caps").and_then(|c| c.get("env")).and_then(|b| b.as_bool()).unwrap_or(env_on);
}
}
if env_on { tag(&ast) } else { ast.clone() }
}
};
let out_json = crate::r#macro::ast_json::ast_to_json(&out_ast);
println!("{}", out_json.to_string());
}

View File

@ -0,0 +1,10 @@
/*!
* Macro child mode (split modules)
*/
mod transforms;
mod entry;
pub use entry::run_macro_child;
pub use transforms::normalize_core_pass;

View File

@ -0,0 +1,36 @@
pub(super) fn transform_array_prepend_zero(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, LiteralValue, Span};
match ast {
A::ArrayLiteral { elements, .. } => {
let mut new_elems: Vec<A> = Vec::with_capacity(elements.len() + 1);
let already_zero = elements
.get(0)
.and_then(|n| match n { A::Literal { value: LiteralValue::Integer(0), .. } => Some(()), _ => None })
.is_some();
if already_zero {
for e in elements { new_elems.push(transform_array_prepend_zero(e)); }
} else {
new_elems.push(A::Literal { value: LiteralValue::Integer(0), span: Span::unknown() });
for e in elements { new_elems.push(transform_array_prepend_zero(e)); }
}
A::ArrayLiteral { elements: new_elems, span: Span::unknown() }
}
A::Program { statements, .. } => A::Program { statements: statements.iter().map(transform_array_prepend_zero).collect(), span: Span::unknown() },
A::Print { expression, .. } => A::Print { expression: Box::new(transform_array_prepend_zero(expression)), span: Span::unknown() },
A::Return { value, .. } => A::Return { value: value.as_ref().map(|v| Box::new(transform_array_prepend_zero(v))), span: Span::unknown() },
A::Assignment { target, value, .. } => A::Assignment { target: Box::new(transform_array_prepend_zero(target)), value: Box::new(transform_array_prepend_zero(value)), span: Span::unknown() },
A::If { condition, then_body, else_body, .. } => A::If {
condition: Box::new(transform_array_prepend_zero(condition)),
then_body: then_body.iter().map(transform_array_prepend_zero).collect(),
else_body: else_body.as_ref().map(|v| v.iter().map(transform_array_prepend_zero).collect()),
span: Span::unknown(),
},
A::BinaryOp { operator, left, right, .. } => A::BinaryOp { operator: operator.clone(), left: Box::new(transform_array_prepend_zero(left)), right: Box::new(transform_array_prepend_zero(right)), span: Span::unknown() },
A::UnaryOp { operator, operand, .. } => A::UnaryOp { operator: operator.clone(), operand: Box::new(transform_array_prepend_zero(operand)), span: Span::unknown() },
A::MethodCall { object, method, arguments, .. } => A::MethodCall { object: Box::new(transform_array_prepend_zero(object)), method: method.clone(), arguments: arguments.iter().map(transform_array_prepend_zero).collect(), span: Span::unknown() },
A::FunctionCall { name, arguments, .. } => A::FunctionCall { name: name.clone(), arguments: arguments.iter().map(transform_array_prepend_zero).collect(), span: Span::unknown() },
A::MapLiteral { entries, .. } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), transform_array_prepend_zero(v))).collect(), span: Span::unknown() },
other => other.clone(),
}
}

View File

@ -0,0 +1,98 @@
fn subst_var(node: &nyash_rust::ASTNode, name: &str, replacement: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match node.clone() {
A::Variable { name: n, .. } if n == name => replacement.clone(),
A::Program { statements, span } => A::Program { statements: statements.iter().map(|s| subst_var(s, name, replacement)).collect(), span },
A::Print { expression, span } => A::Print { expression: Box::new(subst_var(&expression, name, replacement)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(subst_var(v, name, replacement))), span },
A::Assignment { target, value, span } => A::Assignment { target: Box::new(subst_var(&target, name, replacement)), value: Box::new(subst_var(&value, name, replacement)), span },
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(subst_var(&condition, name, replacement)),
then_body: then_body.iter().map(|s| subst_var(s, name, replacement)).collect(),
else_body: else_body.map(|v| v.iter().map(|s| subst_var(s, name, replacement)).collect()),
span,
},
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(subst_var(&left, name, replacement)), right: Box::new(subst_var(&right, name, replacement)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(subst_var(&operand, name, replacement)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(subst_var(&object, name, replacement)), method, arguments: arguments.iter().map(|a| subst_var(a, name, replacement)).collect(), span },
A::FunctionCall { name: fn_name, arguments, span } => A::FunctionCall { name: fn_name, arguments: arguments.iter().map(|a| subst_var(a, name, replacement)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.iter().map(|e| subst_var(e, name, replacement)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), subst_var(v, name, replacement))).collect(), span },
other => other,
}
}
pub(super) fn transform_for_foreach(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, LiteralValue, Span};
fn rewrite_stmt_list(list: Vec<A>) -> Vec<A> {
let mut out: Vec<A> = Vec::new();
for st in list.into_iter() {
match st.clone() {
A::FunctionCall { name, arguments, .. } if (name == "ny_for" || name == "for") && arguments.len() == 4 => {
let init = arguments[0].clone();
let cond = arguments[1].clone();
let step = arguments[2].clone();
let body_lam = arguments[3].clone();
if let A::Lambda { params, body, .. } = body_lam {
if params.is_empty() {
match init.clone() {
A::Assignment { .. } | A::Local { .. } => out.push(init),
A::Lambda { params: p2, body: b2, .. } if p2.is_empty() => { for s in b2 { out.push(transform_for_foreach(&s)); } }
_ => {}
}
let mut loop_body: Vec<A> = body.into_iter().map(|n| transform_for_foreach(&n)).collect();
match step.clone() {
A::Assignment { .. } => loop_body.push(step),
A::Lambda { params: p2, body: b2, .. } if p2.is_empty() => { for s in b2 { loop_body.push(transform_for_foreach(&s)); } }
_ => {}
}
out.push(A::Loop { condition: Box::new(transform_for_foreach(&cond)), body: loop_body, span: Span::unknown() });
continue;
}
}
out.push(st);
}
A::FunctionCall { name, arguments, .. } if (name == "ny_foreach" || name == "foreach") && arguments.len() == 3 => {
let array = arguments[0].clone();
let param_name = match &arguments[1] { A::Variable { name, .. } => name.clone(), _ => "it".to_string() };
let body_lam = arguments[2].clone();
if let A::Lambda { params, body, .. } = body_lam {
if params.is_empty() {
let iter = A::Variable { name: "__i".to_string(), span: Span::unknown() };
let zero = A::Literal { value: LiteralValue::Integer(0), span: Span::unknown() };
let one = A::Literal { value: LiteralValue::Integer(1), span: Span::unknown() };
let init = A::Local { variables: vec!["__i".to_string()], initial_values: vec![Some(Box::new(zero))], span: Span::unknown() };
let len_call = A::MethodCall { object: Box::new(transform_for_foreach(&array)), method: "len".to_string(), arguments: vec![], span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Less, left: Box::new(iter.clone()), right: Box::new(len_call), span: Span::unknown() };
let get_call = A::MethodCall { object: Box::new(transform_for_foreach(&array)), method: "get".to_string(), arguments: vec![iter.clone()], span: Span::unknown() };
let body_stmts: Vec<A> = body.into_iter().map(|s| subst_var(&s, &param_name, &get_call)).collect();
let step = A::Assignment { target: Box::new(iter.clone()), value: Box::new(A::BinaryOp { operator: BinaryOperator::Add, left: Box::new(iter), right: Box::new(one), span: Span::unknown() }), span: Span::unknown() };
out.push(init);
out.push(A::Loop { condition: Box::new(cond), body: { let mut b = Vec::new(); for s in body_stmts { b.push(transform_for_foreach(&s)); } b.push(step); b }, span: Span::unknown() });
continue;
}
}
out.push(st);
}
other => out.push(transform_for_foreach(&other)),
}
}
out
}
// `A` is already imported above
match ast.clone() {
A::Program { statements, span } => A::Program { statements: rewrite_stmt_list(statements), span },
A::If { condition, then_body, else_body, span } => A::If { condition: Box::new(transform_for_foreach(&condition)), then_body: rewrite_stmt_list(then_body), else_body: else_body.map(rewrite_stmt_list), span },
A::Loop { condition, body, span } => A::Loop { condition: Box::new(transform_for_foreach(&condition)), body: rewrite_stmt_list(body), span },
A::Print { expression, span } => A::Print { expression: Box::new(transform_for_foreach(&expression)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(transform_for_foreach(v))), span },
A::Assignment { target, value, span } => A::Assignment { target: Box::new(transform_for_foreach(&target)), value: Box::new(transform_for_foreach(&value)), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_for_foreach(&left)), right: Box::new(transform_for_foreach(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_for_foreach(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_for_foreach(&object)), method, arguments: arguments.iter().map(|a| transform_for_foreach(a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.iter().map(|a| transform_for_foreach(a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.iter().map(|e| transform_for_foreach(e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.iter().map(|(k,v)| (k.clone(), transform_for_foreach(v))).collect(), span },
other => other,
}
}

View File

@ -0,0 +1,24 @@
pub(super) fn transform_if_to_loopform(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, Span};
match ast.clone() {
A::Program { statements, span } => A::Program { statements: statements.into_iter().map(|n| transform_if_to_loopform(&n)).collect(), span },
A::If { condition, then_body, else_body, span } => {
let cond_t = Box::new(transform_if_to_loopform(&condition));
let then_t = then_body.into_iter().map(|n| transform_if_to_loopform(&n)).collect();
let else_t = else_body.map(|v| v.into_iter().map(|n| transform_if_to_loopform(&n)).collect());
let inner_if = A::If { condition: cond_t, then_body: then_t, else_body: else_t, span: Span::unknown() };
let one = A::Literal { value: nyash_rust::ast::LiteralValue::Integer(1), span: Span::unknown() };
let loop_body = vec![inner_if, A::Break { span: Span::unknown() }];
A::Loop { condition: Box::new(one), body: loop_body, span }
}
A::Loop { condition, body, span } => A::Loop { condition: Box::new(transform_if_to_loopform(&condition)), body: body.into_iter().map(|n| transform_if_to_loopform(&n)).collect(), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_if_to_loopform(&left)), right: Box::new(transform_if_to_loopform(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_if_to_loopform(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_if_to_loopform(&object)), method, arguments: arguments.into_iter().map(|a| transform_if_to_loopform(&a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.into_iter().map(|a| transform_if_to_loopform(&a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_if_to_loopform(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k, v)| (k, transform_if_to_loopform(&v))).collect(), span },
other => other,
}
}

View File

@ -0,0 +1,132 @@
pub(super) fn transform_lift_nested_functions(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
use std::sync::atomic::{AtomicUsize, Ordering};
static COUNTER: AtomicUsize = AtomicUsize::new(0);
fn gensym(base: &str) -> String { let n = COUNTER.fetch_add(1, Ordering::Relaxed); format!("__ny_lifted_{}_{}", base, n) }
fn collect_locals(n: &A, set: &mut std::collections::HashSet<String>) {
match n {
A::Local { variables, .. } => { for v in variables { set.insert(v.clone()); } }
A::Program { statements, .. } => for s in statements { collect_locals(s, set); },
A::FunctionDeclaration { body, .. } => for s in body { collect_locals(s, set); },
A::If { then_body, else_body, .. } => { for s in then_body { collect_locals(s, set); } if let Some(b) = else_body { for s in b { collect_locals(s, set); } } }
_ => {}
}
}
fn collect_vars(n: &A, set: &mut std::collections::HashSet<String>) {
match n {
A::Variable { name, .. } => { set.insert(name.clone()); }
A::Program { statements, .. } => for s in statements { collect_vars(s, set); },
A::FunctionDeclaration { body, .. } => for s in body { collect_vars(s, set); },
A::If { condition, then_body, else_body, .. } => {
collect_vars(condition, set);
for s in then_body { collect_vars(s, set); }
if let Some(b) = else_body { for s in b { collect_vars(s, set); } }
}
A::Assignment { target, value, .. } => { collect_vars(target, set); collect_vars(value, set); }
A::Return { value, .. } => { if let Some(v) = value { collect_vars(v, set); } }
A::Print { expression, .. } => collect_vars(expression, set),
A::BinaryOp { left, right, .. } => { collect_vars(left, set); collect_vars(right, set); }
A::UnaryOp { operand, .. } => collect_vars(operand, set),
A::MethodCall { object, arguments, .. } => { collect_vars(object, set); for a in arguments { collect_vars(a, set); } }
A::FunctionCall { arguments, .. } => { for a in arguments { collect_vars(a, set); } }
A::ArrayLiteral { elements, .. } => { for e in elements { collect_vars(e, set); } }
A::MapLiteral { entries, .. } => { for (_,v) in entries { collect_vars(v, set); } }
_ => {}
}
}
fn rename_calls(n: &A, mapping: &std::collections::HashMap<String, String>) -> A {
use nyash_rust::ast::ASTNode as A;
match n.clone() {
A::FunctionCall { name, arguments, span } => {
let new_name = mapping.get(&name).cloned().unwrap_or(name);
A::FunctionCall { name: new_name, arguments: arguments.into_iter().map(|a| rename_calls(&a, mapping)).collect(), span }
}
A::Program { statements, span } => A::Program { statements: statements.into_iter().map(|s| rename_calls(&s, mapping)).collect(), span },
A::FunctionDeclaration { name, params, body, is_static, is_override, span } => {
A::FunctionDeclaration { name, params, body: body.into_iter().map(|s| rename_calls(&s, mapping)).collect(), is_static, is_override, span }
}
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(rename_calls(&condition, mapping)),
then_body: then_body.into_iter().map(|s| rename_calls(&s, mapping)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|s| rename_calls(&s, mapping)).collect()),
span,
},
A::Assignment { target, value, span } => A::Assignment { target: Box::new(rename_calls(&target, mapping)), value: Box::new(rename_calls(&value, mapping)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(rename_calls(v, mapping))), span },
A::Print { expression, span } => A::Print { expression: Box::new(rename_calls(&expression, mapping)), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(rename_calls(&left, mapping)), right: Box::new(rename_calls(&right, mapping)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(rename_calls(&operand, mapping)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(rename_calls(&object, mapping)), method, arguments: arguments.into_iter().map(|a| rename_calls(&a, mapping)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| rename_calls(&e, mapping)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k,v)| (k, rename_calls(&v, mapping))).collect(), span },
other => other,
}
}
fn lift_in_body(body: Vec<A>, hoisted: &mut Vec<A>, mapping: &mut std::collections::HashMap<String,String>) -> Vec<A> {
use std::collections::HashSet;
let mut out: Vec<A> = Vec::new();
for st in body.into_iter() {
match st.clone() {
A::FunctionDeclaration { name, params, body, is_static, is_override, span } => {
let mut locals: HashSet<String> = HashSet::new();
collect_locals(&A::FunctionDeclaration{ name: name.clone(), params: params.clone(), body: body.clone(), is_static, is_override, span }, &mut locals);
let mut used: HashSet<String> = HashSet::new();
collect_vars(&A::FunctionDeclaration{ name: name.clone(), params: params.clone(), body: body.clone(), is_static, is_override, span }, &mut used);
let params_set: HashSet<String> = params.iter().cloned().collect();
let mut extra: HashSet<String> = used.drain().collect();
extra.retain(|v| !params_set.contains(v) && !locals.contains(v));
if extra.is_empty() {
let new_name = gensym(&name);
let lifted = A::FunctionDeclaration { name: new_name.clone(), params, body, is_static: true, is_override, span };
hoisted.push(lifted);
mapping.insert(name, new_name);
continue;
} else { out.push(st); }
}
other => out.push(other),
}
}
out.into_iter().map(|n| rename_calls(&n, mapping)).collect()
}
fn walk(n: &A, hoisted: &mut Vec<A>) -> A {
use nyash_rust::ast::ASTNode as A;
match n.clone() {
A::Program { statements, span } => {
let mut mapping = std::collections::HashMap::new();
let stmts2 = lift_in_body(statements.into_iter().map(|s| walk(&s, hoisted)).collect(), hoisted, &mut mapping);
A::Program { statements: stmts2, span }
}
A::FunctionDeclaration { name, params, body, is_static, is_override, span } => {
let mut mapping = std::collections::HashMap::new();
let body2: Vec<A> = body.into_iter().map(|s| walk(&s, hoisted)).collect();
let body3 = lift_in_body(body2, hoisted, &mut mapping);
A::FunctionDeclaration { name, params, body: body3, is_static, is_override, span }
}
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(walk(&condition, hoisted)),
then_body: then_body.into_iter().map(|s| walk(&s, hoisted)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|s| walk(&s, hoisted)).collect()),
span,
},
A::Assignment { target, value, span } => A::Assignment { target: Box::new(walk(&target, hoisted)), value: Box::new(walk(&value, hoisted)), span },
A::Return { value, span } => A::Return { value: value.as_ref().map(|v| Box::new(walk(v, hoisted))), span },
A::Print { expression, span } => A::Print { expression: Box::new(walk(&expression, hoisted)), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(walk(&left, hoisted)), right: Box::new(walk(&right, hoisted)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(walk(&operand, hoisted)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(walk(&object, hoisted)), method, arguments: arguments.into_iter().map(|a| walk(&a, hoisted)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.into_iter().map(|a| walk(&a, hoisted)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| walk(&e, hoisted)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k,v)| (k, walk(&v, hoisted))).collect(), span },
other => other,
}
}
let mut hoisted: Vec<A> = Vec::new();
let mut out = walk(ast, &mut hoisted);
if let A::Program { statements, span } = out.clone() {
let mut ss = statements;
ss.extend(hoisted.into_iter());
out = A::Program { statements: ss, span };
}
out
}

View File

@ -0,0 +1,25 @@
pub(super) fn transform_loop_normalize(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
A::Program { statements, span } => A::Program { statements: statements.into_iter().map(|n| transform_loop_normalize(&n)).collect(), span },
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(transform_loop_normalize(&condition)),
then_body: then_body.into_iter().map(|n| transform_loop_normalize(&n)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|n| transform_loop_normalize(&n)).collect()),
span,
},
A::Loop { condition, body, span } => A::Loop {
condition: Box::new(transform_loop_normalize(&condition)),
body: body.into_iter().map(|n| transform_loop_normalize(&n)).collect(),
span,
},
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_loop_normalize(&left)), right: Box::new(transform_loop_normalize(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_loop_normalize(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_loop_normalize(&object)), method, arguments: arguments.into_iter().map(|a| transform_loop_normalize(&a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.into_iter().map(|a| transform_loop_normalize(&a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_loop_normalize(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k,v)| (k, transform_loop_normalize(&v))).collect(), span },
other => other,
}
}

View File

@ -0,0 +1,35 @@
pub(super) fn transform_map_insert_tag(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, LiteralValue, Span};
match ast {
A::MapLiteral { entries, .. } => {
let mut new_entries: Vec<(String, A)> = Vec::with_capacity(entries.len() + 1);
let already_tagged = entries.get(0).map(|(k, _)| k == "__macro").unwrap_or(false);
if already_tagged {
for (k, v) in entries { new_entries.push((k.clone(), transform_map_insert_tag(v))); }
} else {
new_entries.push((
"__macro".to_string(),
A::Literal { value: LiteralValue::String("on".to_string()), span: Span::unknown() },
));
for (k, v) in entries { new_entries.push((k.clone(), transform_map_insert_tag(v))); }
}
A::MapLiteral { entries: new_entries, span: Span::unknown() }
}
A::Program { statements, .. } => A::Program { statements: statements.iter().map(transform_map_insert_tag).collect(), span: Span::unknown() },
A::Print { expression, .. } => A::Print { expression: Box::new(transform_map_insert_tag(expression)), span: Span::unknown() },
A::Return { value, .. } => A::Return { value: value.as_ref().map(|v| Box::new(transform_map_insert_tag(v))), span: Span::unknown() },
A::Assignment { target, value, .. } => A::Assignment { target: Box::new(transform_map_insert_tag(target)), value: Box::new(transform_map_insert_tag(value)), span: Span::unknown() },
A::If { condition, then_body, else_body, .. } => A::If {
condition: Box::new(transform_map_insert_tag(condition)),
then_body: then_body.iter().map(transform_map_insert_tag).collect(),
else_body: else_body.as_ref().map(|v| v.iter().map(transform_map_insert_tag).collect()),
span: Span::unknown(),
},
A::BinaryOp { operator, left, right, .. } => A::BinaryOp { operator: operator.clone(), left: Box::new(transform_map_insert_tag(left)), right: Box::new(transform_map_insert_tag(right)), span: Span::unknown() },
A::UnaryOp { operator, operand, .. } => A::UnaryOp { operator: operator.clone(), operand: Box::new(transform_map_insert_tag(operand)), span: Span::unknown() },
A::MethodCall { object, method, arguments, .. } => A::MethodCall { object: Box::new(transform_map_insert_tag(object)), method: method.clone(), arguments: arguments.iter().map(transform_map_insert_tag).collect(), span: Span::unknown() },
A::FunctionCall { name, arguments, .. } => A::FunctionCall { name: name.clone(), arguments: arguments.iter().map(transform_map_insert_tag).collect(), span: Span::unknown() },
other => other.clone(),
}
}

View File

@ -0,0 +1,61 @@
/*!
* Macro child transforms — split modules
*/
mod peek;
mod array;
mod map;
mod loops;
mod foreach;
mod scopebox;
mod lift;
mod if_to_loopform;
mod postfix;
// Re-exported via thin wrappers to keep names stable
pub(super) fn transform_peek_match_literal(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
peek::transform_peek_match_literal(ast)
}
pub(super) fn transform_array_prepend_zero(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
array::transform_array_prepend_zero(ast)
}
pub(super) fn transform_map_insert_tag(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
map::transform_map_insert_tag(ast)
}
pub(super) fn transform_loop_normalize(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
loops::transform_loop_normalize(ast)
}
pub(super) fn transform_for_foreach(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
foreach::transform_for_foreach(ast)
}
pub(super) fn transform_scopebox_inject(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
scopebox::transform_scopebox_inject(ast)
}
pub(super) fn transform_lift_nested_functions(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
lift::transform_lift_nested_functions(ast)
}
pub(super) fn transform_if_to_loopform(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
if_to_loopform::transform_if_to_loopform(ast)
}
pub(super) fn transform_postfix_handlers(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
postfix::transform_postfix_handlers(ast)
}
// Core normalization pass used by runners (always-on when macros enabled).
// Order matters: for/foreach → match(PeekExpr) → loop tail alignment.
pub fn normalize_core_pass(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
let a1 = transform_for_foreach(ast);
let a2 = transform_peek_match_literal(&a1);
let a3 = transform_loop_normalize(&a2);
let a4 = if std::env::var("NYASH_SCOPEBOX_ENABLE").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false) {
transform_scopebox_inject(&a3)
} else { a3 };
let a4b = transform_lift_nested_functions(&a4);
let a5 = if std::env::var("NYASH_IF_AS_LOOPFORM").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false) {
transform_if_to_loopform(&a4b)
} else { a4b };
let a6 = if std::env::var("NYASH_CATCH_NEW").ok().map(|v| v=="1"||v=="true"||v=="on").unwrap_or(false) {
transform_postfix_handlers(&a5)
} else { a5 };
a6
}

View File

@ -0,0 +1,113 @@
fn map_expr_to_stmt(e: nyash_rust::ASTNode) -> nyash_rust::ASTNode { e }
fn transform_peek_to_if_expr(peek: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
let mut conds_bodies: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms { conds_bodies.push((lit.clone(), (*body).clone())); }
let mut current: A = *(*else_expr).clone();
for (lit, body) in conds_bodies.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![map_expr_to_stmt(body)];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
fn transform_peek_to_if_stmt_assign(peek: &nyash_rust::ASTNode, target: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
let mut pairs: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms { pairs.push((lit.clone(), (*body).clone())); }
let mut current: A = *(*else_expr).clone();
for (lit, body) in pairs.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![A::Assignment { target: Box::new(target.clone()), value: Box::new(body), span: Span::unknown() }];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
fn transform_peek_to_if_stmt_return(peek: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
let mut pairs: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms { pairs.push((lit.clone(), (*body).clone())); }
let mut current: A = *(*else_expr).clone();
for (lit, body) in pairs.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![A::Return { value: Some(Box::new(body)), span: Span::unknown() }];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
fn transform_peek_to_if_stmt_print(peek: &nyash_rust::ASTNode) -> Option<nyash_rust::ASTNode> {
use nyash_rust::ast::{ASTNode as A, BinaryOperator, Span};
if let A::PeekExpr { scrutinee, arms, else_expr, .. } = peek {
let mut pairs: Vec<(nyash_rust::ast::LiteralValue, A)> = Vec::new();
for (lit, body) in arms { pairs.push((lit.clone(), (*body).clone())); }
let mut current: A = *(*else_expr).clone();
for (lit, body) in pairs.into_iter().rev() {
let rhs = A::Literal { value: lit, span: Span::unknown() };
let cond = A::BinaryOp { operator: BinaryOperator::Equal, left: scrutinee.clone(), right: Box::new(rhs), span: Span::unknown() };
let then_body = vec![A::Print { expression: Box::new(body), span: Span::unknown() }];
let else_body = Some(vec![map_expr_to_stmt(current)]);
current = A::If { condition: Box::new(cond), then_body, else_body, span: Span::unknown() };
}
Some(current)
} else { None }
}
pub(super) fn transform_peek_match_literal(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
A::Program { statements, span } => A::Program { statements: statements.into_iter().map(|n| transform_peek_match_literal(&n)).collect(), span },
A::If { condition, then_body, else_body, span } => A::If {
condition: Box::new(transform_peek_match_literal(&condition)),
then_body: then_body.into_iter().map(|n| transform_peek_match_literal(&n)).collect(),
else_body: else_body.map(|v| v.into_iter().map(|n| transform_peek_match_literal(&n)).collect()),
span,
},
A::Loop { condition, body, span } => A::Loop {
condition: Box::new(transform_peek_match_literal(&condition)),
body: body.into_iter().map(|n| transform_peek_match_literal(&n)).collect(),
span,
},
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_peek_match_literal(&left)), right: Box::new(transform_peek_match_literal(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_peek_match_literal(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_peek_match_literal(&object)), method, arguments: arguments.into_iter().map(|a| transform_peek_match_literal(&a)).collect(), span },
A::FunctionCall { name, arguments, span } => {
if let Some(if_expr) = transform_peek_to_if_expr(&A::FunctionCall { name: name.clone(), arguments: arguments.clone(), span }) {
if_expr
} else { A::FunctionCall { name, arguments: arguments.into_iter().map(|a| transform_peek_match_literal(&a)).collect(), span } }
}
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_peek_match_literal(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k, v)| (k, transform_peek_match_literal(&v))).collect(), span },
A::Assignment { target, value, span } => {
if let Some(ifstmt) = transform_peek_to_if_stmt_assign(&value, &target) { ifstmt }
else { A::Assignment { target, value: Box::new(transform_peek_match_literal(&value)), span } }
}
A::Return { value, span } => {
if let Some(v) = &value {
if let Some(ifstmt) = transform_peek_to_if_stmt_return(v) { ifstmt }
else { A::Return { value: Some(Box::new(transform_peek_match_literal(v))), span } }
} else { A::Return { value: None, span } }
}
A::Print { expression, span } => {
if let Some(ifstmt) = transform_peek_to_if_stmt_print(&expression) { ifstmt }
else { A::Print { expression: Box::new(transform_peek_match_literal(&expression)), span } }
}
other => other,
}
}

View File

@ -0,0 +1,69 @@
pub(super) fn transform_postfix_handlers(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::{ASTNode as A, CatchClause, Span};
fn map_vec(v: Vec<A>) -> Vec<A> { v.into_iter().map(|n| transform_postfix_handlers(&n)).collect() }
match ast.clone() {
A::Program { statements, span } => A::Program { statements: map_vec(statements), span },
A::If { condition, then_body, else_body, span } => A::If { condition: Box::new(transform_postfix_handlers(&condition)), then_body: map_vec(then_body), else_body: else_body.map(map_vec), span },
A::Loop { condition, body, span } => A::Loop { condition: Box::new(transform_postfix_handlers(&condition)), body: map_vec(body), span },
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_postfix_handlers(&left)), right: Box::new(transform_postfix_handlers(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_postfix_handlers(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_postfix_handlers(&object)), method, arguments: arguments.into_iter().map(|a| transform_postfix_handlers(&a)).collect(), span },
A::FunctionCall { name, arguments, span } => {
let name_l = name.to_ascii_lowercase();
if name_l == "postfix_catch" {
let mut args = arguments;
if args.len() >= 2 {
let expr = transform_postfix_handlers(&args.remove(0));
let (type_opt, handler) = if args.len() == 1 {
(None, args.remove(0))
} else if args.len() >= 2 {
let ty = match args.remove(0) { A::Literal { value: nyash_rust::ast::LiteralValue::String(s), .. } => Some(s), _ => None };
(ty, args.remove(0))
} else {
(None, A::Literal { value: nyash_rust::ast::LiteralValue::Integer(0), span: Span::unknown() })
};
if let A::Lambda { params, body, .. } = handler {
if params.len() == 1 {
let cc = CatchClause {
exception_type: type_opt,
variable_name: Some(params[0].clone()),
body: body.into_iter().map(|n| transform_postfix_handlers(&n)).collect(),
span: Span::unknown(),
};
return A::TryCatch {
try_body: vec![expr],
catch_clauses: vec![cc],
finally_body: None,
span: Span::unknown(),
};
}
}
}
A::FunctionCall { name, arguments: args.into_iter().map(|n| transform_postfix_handlers(&n)).collect(), span }
} else if name_l == "with_cleanup" {
let mut args = arguments;
if args.len() >= 2 {
let expr = transform_postfix_handlers(&args.remove(0));
let cleanup = args.remove(0);
if let A::Lambda { params, body, .. } = cleanup {
if params.is_empty() {
return A::TryCatch {
try_body: vec![expr],
catch_clauses: vec![],
finally_body: Some(body.into_iter().map(|n| transform_postfix_handlers(&n)).collect()),
span: Span::unknown(),
};
}
}
}
A::FunctionCall { name, arguments: args.into_iter().map(|n| transform_postfix_handlers(&n)).collect(), span }
} else {
A::FunctionCall { name, arguments: arguments.into_iter().map(|n| transform_postfix_handlers(&n)).collect(), span }
}
}
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_postfix_handlers(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k,v)| (k, transform_postfix_handlers(&v))).collect(), span },
other => other,
}
}

View File

@ -0,0 +1,25 @@
pub(super) fn transform_scopebox_inject(ast: &nyash_rust::ASTNode) -> nyash_rust::ASTNode {
use nyash_rust::ast::ASTNode as A;
match ast.clone() {
A::Program { statements, span } => A::Program { statements: statements.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span },
A::If { condition, then_body, else_body, span } => {
let cond = Box::new(transform_scopebox_inject(&condition));
let then_wrapped = vec![A::ScopeBox { body: then_body.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span: nyash_rust::ast::Span::unknown() }];
let else_wrapped = else_body.map(|v| vec![A::ScopeBox { body: v.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span: nyash_rust::ast::Span::unknown() }]);
A::If { condition: cond, then_body: then_wrapped, else_body: else_wrapped, span }
}
A::Loop { condition, body, span } => {
let cond = Box::new(transform_scopebox_inject(&condition));
let body_wrapped = vec![A::ScopeBox { body: body.into_iter().map(|n| transform_scopebox_inject(&n)).collect(), span: nyash_rust::ast::Span::unknown() }];
A::Loop { condition: cond, body: body_wrapped, span }
}
A::BinaryOp { operator, left, right, span } => A::BinaryOp { operator, left: Box::new(transform_scopebox_inject(&left)), right: Box::new(transform_scopebox_inject(&right)), span },
A::UnaryOp { operator, operand, span } => A::UnaryOp { operator, operand: Box::new(transform_scopebox_inject(&operand)), span },
A::MethodCall { object, method, arguments, span } => A::MethodCall { object: Box::new(transform_scopebox_inject(&object)), method, arguments: arguments.into_iter().map(|a| transform_scopebox_inject(&a)).collect(), span },
A::FunctionCall { name, arguments, span } => A::FunctionCall { name, arguments: arguments.into_iter().map(|a| transform_scopebox_inject(&a)).collect(), span },
A::ArrayLiteral { elements, span } => A::ArrayLiteral { elements: elements.into_iter().map(|e| transform_scopebox_inject(&e)).collect(), span },
A::MapLiteral { entries, span } => A::MapLiteral { entries: entries.into_iter().map(|(k, v)| (k, transform_scopebox_inject(&v))).collect(), span },
other => other,
}
}

View File

@ -4,6 +4,7 @@ use std::{fs, process};
/// Execute using PyVM only (no Rust VM runtime). Emits MIR(JSON) and invokes tools/pyvm_runner.py.
pub fn execute_pyvm_only(runner: &NyashRunner, filename: &str) {
if std::env::var("NYASH_PYVM_TRACE").ok().as_deref() == Some("1") { eprintln!("[pyvm] entry"); }
// Read the file
let code = match fs::read_to_string(filename) {
Ok(content) => content,
@ -14,14 +15,63 @@ pub fn execute_pyvm_only(runner: &NyashRunner, filename: &str) {
};
// Optional using pre-processing (strip lines and register modules)
let code = if crate::config::env::enable_using() {
let mut code = if crate::config::env::enable_using() {
match crate::runner::modes::common_util::resolve::strip_using_and_register(runner, &code, filename) {
Ok(s) => s,
Err(e) => { eprintln!("{}", e); process::exit(1); }
}
} else { code };
// Dev sugar pre-expand: line-head @name[:T] = expr → local name[:T] = expr
code = crate::runner::modes::common_util::resolve::preexpand_at_local(&code);
// Normalize logical operators for Stage-2 parser: translate '||'/'&&' to 'or'/'and' outside strings/comments
fn normalize_logical_ops(src: &str) -> String {
let mut out = String::with_capacity(src.len());
let mut it = src.chars().peekable();
let mut in_str = false;
let mut in_line = false;
let mut in_block = false;
while let Some(c) = it.next() {
if in_line {
out.push(c);
if c == '\n' { in_line = false; }
continue;
}
if in_block {
out.push(c);
if c == '*' && matches!(it.peek(), Some('/')) { out.push('/'); it.next(); in_block = false; }
continue;
}
if in_str {
out.push(c);
if c == '\\' { if let Some(nc) = it.next() { out.push(nc); } continue; }
if c == '"' { in_str = false; }
continue;
}
match c {
'"' => { in_str = true; out.push(c); }
'/' => {
match it.peek() { Some('/') => { out.push('/'); out.push('/'); it.next(); in_line = true; }, Some('*') => { out.push('/'); out.push('*'); it.next(); in_block = true; }, _ => out.push('/') }
}
'#' => { in_line = true; out.push('#'); }
'|' => {
if matches!(it.peek(), Some('|')) { out.push_str(" or "); it.next(); } else if matches!(it.peek(), Some('>')) { out.push('|'); out.push('>'); it.next(); } else { out.push('|'); }
}
'&' => {
if matches!(it.peek(), Some('&')) { out.push_str(" and "); it.next(); } else { out.push('&'); }
}
_ => out.push(c),
}
}
out
}
code = normalize_logical_ops(&code);
// Parse to AST
if std::env::var("NYASH_PYVM_DUMP_CODE").ok().as_deref() == Some("1") {
eprintln!("[pyvm-code]\n{}", code);
}
let ast = match NyashParser::parse_from_string(&code) {
Ok(ast) => ast,
Err(e) => {

View File

@ -33,11 +33,17 @@ impl NyashRunner {
if let Ok(text) = std::fs::read_to_string("nyash.toml") {
if let Ok(doc) = toml::from_str::<toml::Value>(&text) {
if let Some(mods) = doc.get("modules").and_then(|v| v.as_table()) {
for (k, v) in mods.iter() {
if let Some(path) = v.as_str() {
pending_modules.push((k.to_string(), path.to_string()));
fn visit(prefix: &str, tbl: &toml::value::Table, out: &mut Vec<(String, String)>) {
for (k, v) in tbl.iter() {
let name = if prefix.is_empty() { k.to_string() } else { format!("{}.{}", prefix, k) };
if let Some(s) = v.as_str() {
out.push((name, s.to_string()));
} else if let Some(t) = v.as_table() {
visit(&name, t, out);
}
}
}
visit("", mods, &mut pending_modules);
}
if let Some(using_tbl) = doc.get("using").and_then(|v| v.as_table()) {
if let Some(paths_arr) = using_tbl.get("paths").and_then(|v| v.as_array()) {

View File

@ -103,13 +103,13 @@ impl NyashRunner {
}
}
}
// Preferred: run Ny selfhost compiler program (apps/selfhost-compiler/compiler.nyash)
// Preferred: run Ny selfhost compiler program (apps/selfhost/compiler/compiler.nyash)
// This avoids inline embedding pitfalls and supports Stage-3 gating via args.
{
use crate::runner::modes::common_util::selfhost::{child, json};
let exe = std::env::current_exe()
.unwrap_or_else(|_| std::path::PathBuf::from("target/release/nyash"));
let parser_prog = std::path::Path::new("apps/selfhost-compiler/compiler.nyash");
let parser_prog = std::path::Path::new("apps/selfhost/compiler/compiler.nyash");
if parser_prog.exists() {
// Build extra args forwarded to child program
let mut extra: Vec<&str> = Vec::new();
@ -296,7 +296,7 @@ impl NyashRunner {
}
let inline_path = std::path::Path::new("tmp").join("inline_selfhost_emit.nyash");
let inline_code = format!(
"include \"apps/selfhost-compiler/boxes/parser_box.nyash\"\ninclude \"apps/selfhost-compiler/boxes/emitter_box.nyash\"\nstatic box Main {{\n main(args) {{\n local s = \"{}\"\n local p = new ParserBox()\n p.stage3_enable(1)\n local json = p.parse_program2(s)\n local e = new EmitterBox()\n json = e.emit_program(json, \"[]\")\n print(json)\n return 0\n }}\n}}\n",
"include \"apps/selfhost/compiler/boxes/parser_box.nyash\"\ninclude \"apps/selfhost/compiler/boxes/emitter_box.nyash\"\nstatic box Main {{\n main(args) {{\n local s = \"{}\"\n local p = new ParserBox()\n p.stage3_enable(1)\n local json = p.parse_program2(s)\n local e = new EmitterBox()\n json = e.emit_program(json, \"[]\")\n print(json)\n return 0\n }}\n}}\n",
esc
);
if let Err(e) = std::fs::write(&inline_path, inline_code) {

View File

@ -1,880 +0,0 @@
/*!
* Nyash Tokenizer - .nyashソースコードをトークン列に変換
*
* Python版nyashc_v4.pyのNyashTokenizerをRustで完全再実装
* 正規表現ベース → 高速なcharレベル処理に最適化
*/
use crate::grammar::engine;
use thiserror::Error;
/// トークンの種類を表すenum
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
// リテラル
STRING(String),
NUMBER(i64),
FLOAT(f64), // 浮動小数点数サポート追加
TRUE,
FALSE,
NULL, // null リテラル
// キーワード
BOX,
GLOBAL,
SINGLETON,
NEW,
MATCH,
IF,
ELSE,
LOOP,
BREAK,
CONTINUE,
RETURN,
FUNCTION,
FN,
PRINT,
THIS,
ME,
INIT, // init (初期化ブロック)
PACK, // pack (コンストラクタ - 互換性)
BIRTH, // birth (コンストラクタ)
NOWAIT, // nowait
AWAIT, // await
INTERFACE, // interface
COLON, // : (継承用)
INCLUDE, // include (ファイル読み込み)
TRY, // try
CATCH, // catch
CLEANUP, // cleanup (finally replacement)
THROW, // throw
LOCAL, // local (一時変数宣言)
STATIC, // static (静的メソッド)
OUTBOX, // outbox (所有権移転変数)
NOT, // not (否定演算子)
OVERRIDE, // override (明示的オーバーライド)
FROM, // from (親メソッド呼び出し)
WEAK, // weak (弱参照修飾子)
USING, // using (名前空間インポート)
IMPORT, // import (Phase 12.7)
// 演算子 (長いものから先に定義)
ShiftLeft, // << (bitwise shift-left)
ShiftRight, // >> (bitwise shift-right)
BitAnd, // & (bitwise and)
BitOr, // | (bitwise or)
BitXor, // ^ (bitwise xor)
FatArrow, // => (match arms)
EQUALS, // ==
NotEquals, // !=
LessEquals, // <=
GreaterEquals, // >=
AND, // && または and
OR, // || または or
// Phase 12.7-B 基本糖衣: 2文字演算子最長一致優先
PipeForward, // |>
QmarkDot, // ?.
QmarkQmark, // ??
PlusAssign, // +=
MinusAssign, // -=
MulAssign, // *=
DivAssign, // /=
RANGE, // ..
LESS, // <
GREATER, // >
ASSIGN, // =
PLUS, // +
MINUS, // -
MULTIPLY, // *
DIVIDE, // /
MODULO, // %
// 記号
DOT, // .
DoubleColon, // :: (Parent::method) - P1用定義のみ
LPAREN, // (
RPAREN, // )
LBRACK, // [
RBRACK, // ]
LBRACE, // {
RBRACE, // }
COMMA, // ,
QUESTION, // ? (postfix result propagation)
NEWLINE, // \n
// Optional separators
SEMICOLON, // ; (gated by NYASH_PARSER_ALLOW_SEMICOLON)
// 識別子
IDENTIFIER(String),
// 特殊
EOF,
}
/// トークンの位置情報を含む構造体
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub line: usize,
pub column: usize,
}
impl Token {
pub fn new(token_type: TokenType, line: usize, column: usize) -> Self {
Self {
token_type,
line,
column,
}
}
}
/// トークナイズエラー
#[derive(Error, Debug)]
pub enum TokenizeError {
#[error("Unexpected character '{char}' at line {line}, column {column}")]
UnexpectedCharacter {
char: char,
line: usize,
column: usize,
},
#[error("Unterminated string literal at line {line}")]
UnterminatedString { line: usize },
#[error("Invalid number format at line {line}")]
InvalidNumber { line: usize },
#[error("Comment not closed at line {line}")]
UnterminatedComment { line: usize },
}
/// Nyashトークナイザー
pub struct NyashTokenizer {
input: Vec<char>,
position: usize,
line: usize,
column: usize,
}
impl NyashTokenizer {
#[inline]
fn allow_semicolon() -> bool {
match std::env::var("NYASH_PARSER_ALLOW_SEMICOLON").ok() {
Some(v) => {
let lv = v.to_ascii_lowercase();
lv == "1" || lv == "true" || lv == "on"
}
None => false,
}
}
#[inline]
fn strict_12_7() -> bool {
std::env::var("NYASH_STRICT_12_7").ok().as_deref() == Some("1")
}
/// 新しいトークナイザーを作成
pub fn new(input: impl Into<String>) -> Self {
let input_string = input.into();
Self {
input: input_string.chars().collect(),
position: 0,
line: 1,
column: 1,
}
}
/// 完全なトークナイズを実行
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizeError> {
let mut tokens = Vec::new();
while !self.is_at_end() {
// 空白・コメントをスキップ
self.skip_whitespace();
// 連続するブロックコメントや行コメントもまとめてスキップ
loop {
// block comment: /* ... */
if self.current_char() == Some('/') && self.peek_char() == Some('*') {
self.skip_block_comment()?;
self.skip_whitespace();
continue;
}
// line comments: // ... or # ...
if (self.current_char() == Some('/') && self.peek_char() == Some('/'))
|| self.current_char() == Some('#')
{
self.skip_line_comment();
self.skip_whitespace();
continue;
}
break;
}
if self.is_at_end() {
break;
}
// 次のトークンを読み取り
let token = self.tokenize_next()?;
tokens.push(token);
}
// EOF トークンを追加
tokens.push(Token::new(TokenType::EOF, self.line, self.column));
Ok(tokens)
}
/// 次の一つのトークンを読み取り
fn tokenize_next(&mut self) -> Result<Token, TokenizeError> {
let start_line = self.line;
let start_column = self.column;
match self.current_char() {
// Optional statement separator ';' (gated)
Some(';') if Self::allow_semicolon() => {
self.advance();
return Ok(Token::new(TokenType::SEMICOLON, start_line, start_column));
}
// Block comment should have been skipped by tokenize() pre-loop, but be defensive here
Some('/') if self.peek_char() == Some('*') => {
self.skip_block_comment()?;
// After skipping, restart tokenization for next token
return self.tokenize_next();
}
// 2文字またはそれ以上の演算子は最長一致で先に判定
Some('|') if self.peek_char() == Some('>') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::PipeForward, start_line, start_column));
}
Some('?') if self.peek_char() == Some('.') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::QmarkDot, start_line, start_column));
}
Some('?') if self.peek_char() == Some('?') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::QmarkQmark, start_line, start_column));
}
Some('?') => {
self.advance();
return Ok(Token::new(TokenType::QUESTION, start_line, start_column));
}
Some('+') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::PlusAssign, start_line, start_column));
}
Some('-') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::MinusAssign, start_line, start_column));
}
Some('*') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::MulAssign, start_line, start_column));
}
Some('/') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::DivAssign, start_line, start_column));
}
Some('.') if self.peek_char() == Some('.') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::RANGE, start_line, start_column));
}
Some('"') => {
let string_value = self.read_string()?;
Ok(Token::new(
TokenType::STRING(string_value),
start_line,
start_column,
))
}
Some(c) if c.is_ascii_digit() => {
let token_type = self.read_numeric_literal()?;
Ok(Token::new(token_type, start_line, start_column))
}
Some(c) if c.is_alphabetic() || c == '_' => {
let token_type = self.read_keyword_or_identifier();
Ok(Token::new(token_type, start_line, start_column))
}
Some('/') if self.peek_char() == Some('/') => {
self.skip_line_comment();
self.skip_whitespace(); // コメント後の空白もスキップ
return self.tokenize_next();
}
Some('#') => {
self.skip_line_comment();
self.skip_whitespace(); // コメント後の空白もスキップ
return self.tokenize_next();
}
Some('>') if self.peek_char() == Some('>') && !Self::strict_12_7() => {
self.advance();
self.advance();
Ok(Token::new(TokenType::ShiftRight, start_line, start_column))
}
Some(':') if self.peek_char() == Some(':') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::DoubleColon, start_line, start_column))
}
Some(':') => {
self.advance();
Ok(Token::new(TokenType::COLON, start_line, start_column))
}
Some('=') if self.peek_char() == Some('>') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::FatArrow, start_line, start_column))
}
Some('=') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::EQUALS, start_line, start_column))
}
Some('!') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::NotEquals, start_line, start_column))
}
// Shift-left must be detected before <= and <
Some('<') if self.peek_char() == Some('<') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::ShiftLeft, start_line, start_column))
}
Some('<') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::LessEquals, start_line, start_column))
}
Some('>') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(
TokenType::GreaterEquals,
start_line,
start_column,
))
}
Some('&') if self.peek_char() == Some('&') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::AND, start_line, start_column))
}
Some('|') if self.peek_char() == Some('|') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::OR, start_line, start_column))
}
Some('|') if self.peek_char() == Some('>') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::PipeForward, start_line, start_column));
}
// 単文字トークンはテーブル駆動で処理
Some(c) if self.single_char_token(c).is_some() => {
let tt = self.single_char_token(c).unwrap();
self.advance();
Ok(Token::new(tt, start_line, start_column))
}
Some(c) => Err(TokenizeError::UnexpectedCharacter {
char: c,
line: self.line,
column: self.column,
}),
None => Ok(Token::new(TokenType::EOF, self.line, self.column)),
}
}
// 単文字トークンのマップ(最長一致系は呼び出し元で処理済み)
fn single_char_token(&self, c: char) -> Option<TokenType> {
// '?' は上位で分岐済み、':' も同様。ここでは純粋な1文字を扱う。
match c {
'!' => Some(TokenType::NOT),
'<' => Some(TokenType::LESS),
'>' => Some(TokenType::GREATER),
'&' => Some(TokenType::BitAnd),
'|' => Some(TokenType::BitOr),
'^' => Some(TokenType::BitXor),
'=' => Some(TokenType::ASSIGN),
'+' => Some(TokenType::PLUS),
'-' => Some(TokenType::MINUS),
'*' => Some(TokenType::MULTIPLY),
'/' => Some(TokenType::DIVIDE),
'%' => Some(TokenType::MODULO),
'.' => Some(TokenType::DOT),
'(' => Some(TokenType::LPAREN),
')' => Some(TokenType::RPAREN),
'[' => Some(TokenType::LBRACK),
']' => Some(TokenType::RBRACK),
'{' => Some(TokenType::LBRACE),
'}' => Some(TokenType::RBRACE),
',' => Some(TokenType::COMMA),
'\n' => Some(TokenType::NEWLINE),
_ => None,
}
}
/// 文字列リテラルを読み取り
fn read_string(&mut self) -> Result<String, TokenizeError> {
let start_line = self.line;
self.advance(); // 開始の '"' をスキップ
let mut string_value = String::new();
while let Some(c) = self.current_char() {
if c == '"' {
self.advance(); // 終了の '"' をスキップ
return Ok(string_value);
}
// エスケープ文字の処理
if c == '\\' {
self.advance();
match self.current_char() {
Some('n') => string_value.push('\n'),
Some('t') => string_value.push('\t'),
Some('r') => string_value.push('\r'),
Some('\\') => string_value.push('\\'),
Some('"') => string_value.push('"'),
Some(c) => {
string_value.push('\\');
string_value.push(c);
}
None => break,
}
} else {
string_value.push(c);
}
self.advance();
}
Err(TokenizeError::UnterminatedString { line: start_line })
}
/// 数値リテラル(整数または浮動小数点数)を読み取り
fn read_numeric_literal(&mut self) -> Result<TokenType, TokenizeError> {
let start_line = self.line;
let mut number_str = String::new();
let mut has_dot = false;
// 整数部分を読み取り
while let Some(c) = self.current_char() {
if c.is_ascii_digit() {
number_str.push(c);
self.advance();
} else if c == '.'
&& !has_dot
&& self.peek_char().map_or(false, |ch| ch.is_ascii_digit())
{
// 小数点の後に数字が続く場合のみ受け入れる
has_dot = true;
number_str.push(c);
self.advance();
} else {
break;
}
}
if has_dot {
// 浮動小数点数として解析
number_str
.parse::<f64>()
.map(TokenType::FLOAT)
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
} else {
// 整数として解析
number_str
.parse::<i64>()
.map(TokenType::NUMBER)
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
}
}
/// キーワードまたは識別子を読み取り
fn read_keyword_or_identifier(&mut self) -> TokenType {
let mut identifier = String::new();
while let Some(c) = self.current_char() {
if c.is_alphanumeric() || c == '_' {
identifier.push(c);
self.advance();
} else {
break;
}
}
// キーワードチェック
let mut tok = match identifier.as_str() {
"box" => TokenType::BOX,
"global" => TokenType::GLOBAL,
"singleton" => TokenType::SINGLETON,
"new" => TokenType::NEW,
"match" => TokenType::MATCH,
"if" => TokenType::IF,
"else" => TokenType::ELSE,
"loop" => TokenType::LOOP,
"break" => TokenType::BREAK,
"continue" => TokenType::CONTINUE,
"return" => TokenType::RETURN,
"function" => TokenType::FUNCTION,
"fn" => TokenType::FN,
"print" => TokenType::PRINT,
"this" => TokenType::THIS,
"me" => TokenType::ME,
"init" => TokenType::INIT,
"pack" => TokenType::PACK,
"birth" => TokenType::BIRTH,
"nowait" => TokenType::NOWAIT,
"await" => TokenType::AWAIT,
"interface" => TokenType::INTERFACE,
"include" => TokenType::INCLUDE,
"import" => TokenType::IMPORT,
"try" => TokenType::TRY,
"catch" => TokenType::CATCH,
"cleanup" => TokenType::CLEANUP,
"throw" => TokenType::THROW,
"local" => TokenType::LOCAL,
"static" => TokenType::STATIC,
"outbox" => TokenType::OUTBOX,
"not" => TokenType::NOT,
"override" => TokenType::OVERRIDE,
"from" => TokenType::FROM,
"weak" => TokenType::WEAK,
"using" => TokenType::USING,
"and" => TokenType::AND,
"or" => TokenType::OR,
"true" => TokenType::TRUE,
"false" => TokenType::FALSE,
"null" => TokenType::NULL,
_ => TokenType::IDENTIFIER(identifier.clone()),
};
// 12.7 Strict mode: fallback extended keywords to IDENTIFIER
if Self::strict_12_7() {
let is_extended = matches!(
tok,
TokenType::INTERFACE
| TokenType::USING
| TokenType::INCLUDE
| TokenType::OUTBOX
| TokenType::NOWAIT
| TokenType::OVERRIDE
| TokenType::WEAK
| TokenType::PACK
);
if is_extended {
tok = TokenType::IDENTIFIER(identifier.clone());
}
}
// 統一文法エンジンとの差分チェック(動作は変更しない)
if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") {
// 安全に参照(初期導入のため、存在しない場合は無視)
let kw = engine::get().is_keyword_str(&identifier);
match (&tok, kw) {
(TokenType::IDENTIFIER(_), Some(name)) => {
eprintln!(
"[GRAMMAR-DIFF] tokenizer=IDENT, grammar=KEYWORD({}) word='{}'",
name, identifier
);
}
(TokenType::IDENTIFIER(_), None) => {}
// tokenizerがキーワード、grammarが未定義
(t, None) if !matches!(t, TokenType::IDENTIFIER(_)) => {
eprintln!(
"[GRAMMAR-DIFF] tokenizer=KEYWORD, grammar=IDENT word='{}'",
identifier
);
}
_ => {}
}
}
tok
}
/// 行コメントをスキップ
fn skip_line_comment(&mut self) {
while let Some(c) = self.current_char() {
if c == '\n' {
break; // 改行文字は消費せずに残す
}
self.advance();
}
}
/// ブロックコメントをスキップ: /* ... */(ネスト非対応)
fn skip_block_comment(&mut self) -> Result<(), TokenizeError> {
// Assume current position is at '/' and next is '*'
self.advance(); // '/'
self.advance(); // '*'
while let Some(c) = self.current_char() {
// detect end '*/'
if c == '*' && self.peek_char() == Some('/') {
self.advance(); // '*'
self.advance(); // '/'
return Ok(());
}
self.advance();
}
// EOF reached without closing */
Err(TokenizeError::UnterminatedComment { line: self.line })
}
/// 空白文字をスキップ改行は除く改行はNEWLINEトークンとして扱う
fn skip_whitespace(&mut self) {
while let Some(c) = self.current_char() {
if c.is_whitespace() && c != '\n' {
self.advance();
} else {
break;
}
}
}
/// 現在の文字を取得
fn current_char(&self) -> Option<char> {
self.input.get(self.position).copied()
}
/// 次の文字を先読み
fn peek_char(&self) -> Option<char> {
self.input.get(self.position + 1).copied()
}
/// 位置を1つ進める
fn advance(&mut self) {
if let Some(c) = self.current_char() {
if c == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
self.position += 1;
}
}
/// 入力の終端に達したかチェック
fn is_at_end(&self) -> bool {
self.position >= self.input.len()
}
}
// ===== Tests =====
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_tokens() {
let mut tokenizer = NyashTokenizer::new("box new = + - *");
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(tokens.len(), 7); // 6 tokens + EOF
assert_eq!(tokens[0].token_type, TokenType::BOX);
assert_eq!(tokens[1].token_type, TokenType::NEW);
assert_eq!(tokens[2].token_type, TokenType::ASSIGN);
assert_eq!(tokens[3].token_type, TokenType::PLUS);
assert_eq!(tokens[4].token_type, TokenType::MINUS);
assert_eq!(tokens[5].token_type, TokenType::MULTIPLY);
assert_eq!(tokens[6].token_type, TokenType::EOF);
}
#[test]
fn test_string_literal() {
let mut tokenizer = NyashTokenizer::new(r#""Hello, World!""#);
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(tokens.len(), 2); // STRING + EOF
match &tokens[0].token_type {
TokenType::STRING(s) => assert_eq!(s, "Hello, World!"),
_ => panic!("Expected STRING token"),
}
}
#[test]
fn test_number_literal() {
let mut tokenizer = NyashTokenizer::new("42 123 0");
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(tokens.len(), 4); // 3 numbers + EOF
match &tokens[0].token_type {
TokenType::NUMBER(n) => assert_eq!(*n, 42),
_ => panic!("Expected NUMBER token"),
}
match &tokens[1].token_type {
TokenType::NUMBER(n) => assert_eq!(*n, 123),
_ => panic!("Expected NUMBER token"),
}
match &tokens[2].token_type {
TokenType::NUMBER(n) => assert_eq!(*n, 0),
_ => panic!("Expected NUMBER token"),
}
}
#[test]
fn test_identifier() {
let mut tokenizer = NyashTokenizer::new("test_var myBox getValue");
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(tokens.len(), 4); // 3 identifiers + EOF
match &tokens[0].token_type {
TokenType::IDENTIFIER(s) => assert_eq!(s, "test_var"),
_ => panic!("Expected IDENTIFIER token"),
}
match &tokens[1].token_type {
TokenType::IDENTIFIER(s) => assert_eq!(s, "myBox"),
_ => panic!("Expected IDENTIFIER token"),
}
match &tokens[2].token_type {
TokenType::IDENTIFIER(s) => assert_eq!(s, "getValue"),
_ => panic!("Expected IDENTIFIER token"),
}
}
#[test]
fn test_operators() {
let mut tokenizer = NyashTokenizer::new(">> == != <= >= < >");
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(tokens[0].token_type, TokenType::ShiftRight);
assert_eq!(tokens[1].token_type, TokenType::EQUALS);
assert_eq!(tokens[2].token_type, TokenType::NotEquals);
assert_eq!(tokens[3].token_type, TokenType::LessEquals);
assert_eq!(tokens[4].token_type, TokenType::GreaterEquals);
assert_eq!(tokens[5].token_type, TokenType::LESS);
assert_eq!(tokens[6].token_type, TokenType::GREATER);
}
#[test]
fn test_complex_code() {
let code = r#"
box TestBox {
value
getValue() {
return this.value
}
}
obj = new TestBox()
obj.value = "test123"
"#;
let mut tokenizer = NyashTokenizer::new(code);
let tokens = tokenizer.tokenize().unwrap();
// 基本的なトークンがある事を確認
let token_types: Vec<_> = tokens.iter().map(|t| &t.token_type).collect();
assert!(token_types.contains(&&TokenType::BOX));
assert!(token_types.contains(&&TokenType::NEW));
assert!(token_types.contains(&&TokenType::THIS));
assert!(token_types.contains(&&TokenType::RETURN));
assert!(token_types.contains(&&TokenType::DOT));
}
#[test]
fn test_line_numbers() {
let code = "box\ntest\nvalue";
let mut tokenizer = NyashTokenizer::new(code);
let tokens = tokenizer.tokenize().unwrap();
// NEWLINEトークンを除外して確認
let non_newline: Vec<&Token> = tokens
.iter()
.filter(|t| !matches!(t.token_type, TokenType::NEWLINE))
.collect();
assert_eq!(non_newline[0].line, 1); // box
assert_eq!(non_newline[1].line, 2); // test
assert_eq!(non_newline[2].line, 3); // value
}
#[test]
fn test_comments() {
let code = r#"box Test // this is a comment
# this is also a comment
value"#;
let mut tokenizer = NyashTokenizer::new(code);
let tokens = tokenizer.tokenize().unwrap();
// コメントは除外されている
let token_types: Vec<_> = tokens
.iter()
.filter(|t| !matches!(t.token_type, TokenType::NEWLINE))
.map(|t| &t.token_type)
.collect();
assert_eq!(token_types.len(), 4); // box, Test, value, EOF
}
#[test]
fn test_error_handling() {
let mut tokenizer = NyashTokenizer::new("@#$%");
let result = tokenizer.tokenize();
assert!(result.is_err());
match result {
Err(TokenizeError::UnexpectedCharacter { char, line, column }) => {
assert_eq!(char, '@');
assert_eq!(line, 1);
assert_eq!(column, 1);
}
_ => panic!("Expected UnexpectedCharacter error"),
}
}
#[test]
fn test_basic_sugar_tokens() {
let mut t = NyashTokenizer::new("a|>f ? . ?.? a ?? b += -= *= /= ..");
// 注意: 空白や不正な並びを含むため、演算子の連続出現を個別で確認
// 分かりやすく固めたケース
let mut t2 = NyashTokenizer::new("|> ?.? ?? += -= *= /= ..");
let toks = t2.tokenize().unwrap();
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::PipeForward)));
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::QmarkDot)));
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::QmarkQmark)));
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::PlusAssign)));
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::MinusAssign)));
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::MulAssign)));
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::DivAssign)));
assert!(toks
.iter()
.any(|k| matches!(k.token_type, TokenType::RANGE)));
}
#[test]
fn test_longest_match_sequences() {
// '??' は '?' より優先、'?.' は '.' より優先、'..' は '.' より優先
let mut t = NyashTokenizer::new("?? ? ?. .. .");
let toks = t.tokenize().unwrap();
let kinds: Vec<&TokenType> = toks.iter().map(|k| &k.token_type).collect();
assert!(matches!(kinds[0], TokenType::QmarkQmark));
assert!(matches!(kinds[1], TokenType::QUESTION));
assert!(matches!(kinds[2], TokenType::QmarkDot));
assert!(matches!(kinds[3], TokenType::RANGE));
assert!(matches!(kinds[4], TokenType::DOT));
}
}

32
src/tokenizer/cursor.rs Normal file
View File

@ -0,0 +1,32 @@
use super::NyashTokenizer;
impl NyashTokenizer {
/// 現在の文字を取得
pub(crate) fn current_char(&self) -> Option<char> {
self.input.get(self.position).copied()
}
/// 次の文字を先読み
pub(crate) fn peek_char(&self) -> Option<char> {
self.input.get(self.position + 1).copied()
}
/// 1文字進める行/列も更新)
pub(crate) fn advance(&mut self) {
if let Some(c) = self.current_char() {
self.position += 1;
if c == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
}
/// 入力の終端に到達しているか
pub(crate) fn is_at_end(&self) -> bool {
self.position >= self.input.len()
}
}

256
src/tokenizer/engine.rs Normal file
View File

@ -0,0 +1,256 @@
use super::{NyashTokenizer, Token, TokenType, TokenizeError};
impl NyashTokenizer {
#[inline]
pub(crate) fn allow_semicolon() -> bool {
match std::env::var("NYASH_PARSER_ALLOW_SEMICOLON").ok() {
Some(v) => {
let lv = v.to_ascii_lowercase();
lv == "1" || lv == "true" || lv == "on"
}
None => false,
}
}
#[inline]
pub(crate) fn strict_12_7() -> bool {
std::env::var("NYASH_STRICT_12_7").ok().as_deref() == Some("1")
}
/// 新しいトークナイザーを作成
pub fn new(input: impl Into<String>) -> Self {
let input_string = input.into();
Self {
input: input_string.chars().collect(),
position: 0,
line: 1,
column: 1,
}
}
/// 完全なトークナイズを実行
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizeError> {
let mut tokens = Vec::new();
while !self.is_at_end() {
// 空白・コメントをスキップ
self.skip_whitespace();
// 連続するブロックコメントや行コメントもまとめてスキップ
loop {
// block comment: /* ... */
if self.current_char() == Some('/') && self.peek_char() == Some('*') {
self.skip_block_comment()?;
self.skip_whitespace();
continue;
}
// line comments: // ... or # ...
if (self.current_char() == Some('/') && self.peek_char() == Some('/'))
|| self.current_char() == Some('#')
{
self.skip_line_comment();
self.skip_whitespace();
continue;
}
break;
}
if self.is_at_end() {
break;
}
// 次のトークンを読み取り
let token = self.tokenize_next()?;
if std::env::var("NYASH_TOK_TRACE").ok().as_deref() == Some("1") {
eprintln!("[tok] {:?}", token.token_type);
}
tokens.push(token);
}
// EOF トークンを追加
tokens.push(Token::new(TokenType::EOF, self.line, self.column));
Ok(tokens)
}
/// 次の一つのトークンを読み取り
fn tokenize_next(&mut self) -> Result<Token, TokenizeError> {
let start_line = self.line;
let start_column = self.column;
match self.current_char() {
// Optional statement separator ';' (gated)
Some(';') if Self::allow_semicolon() => {
self.advance();
return Ok(Token::new(TokenType::SEMICOLON, start_line, start_column));
}
// Block comment should have been skipped by tokenize() pre-loop, but be defensive here
Some('/') if self.peek_char() == Some('*') => {
self.skip_block_comment()?;
// After skipping, restart tokenization for next token
return self.tokenize_next();
}
// 2文字またはそれ以上の演算子は最長一致で先に判定
Some('|') if self.peek_char() == Some('|') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::OR, start_line, start_column));
}
Some('&') if self.peek_char() == Some('&') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::AND, start_line, start_column));
}
Some('|') if self.peek_char() == Some('>') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::PipeForward, start_line, start_column));
}
Some('?') if self.peek_char() == Some('.') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::QmarkDot, start_line, start_column));
}
Some('?') if self.peek_char() == Some('?') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::QmarkQmark, start_line, start_column));
}
Some('?') => {
self.advance();
return Ok(Token::new(TokenType::QUESTION, start_line, start_column));
}
Some('+') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::PlusAssign, start_line, start_column));
}
Some('-') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::MinusAssign, start_line, start_column));
}
Some('*') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::MulAssign, start_line, start_column));
}
Some('/') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::DivAssign, start_line, start_column));
}
Some('.') if self.peek_char() == Some('.') => {
self.advance();
self.advance();
return Ok(Token::new(TokenType::RANGE, start_line, start_column));
}
Some('"') => {
let string_value = self.read_string()?;
Ok(Token::new(
TokenType::STRING(string_value),
start_line,
start_column,
))
}
Some(c) if c.is_ascii_digit() => {
let token_type = self.read_numeric_literal()?;
Ok(Token::new(token_type, start_line, start_column))
}
Some(c) if c.is_alphabetic() || c == '_' => {
let token_type = self.read_keyword_or_identifier();
Ok(Token::new(token_type, start_line, start_column))
}
Some('/') if self.peek_char() == Some('/') => {
self.skip_line_comment();
self.skip_whitespace(); // コメント後の空白もスキップ
return self.tokenize_next();
}
Some('#') => {
self.skip_line_comment();
self.skip_whitespace(); // コメント後の空白もスキップ
return self.tokenize_next();
}
Some('>') if self.peek_char() == Some('>') && !Self::strict_12_7() => {
self.advance();
self.advance();
Ok(Token::new(TokenType::ShiftRight, start_line, start_column))
}
Some(':') if self.peek_char() == Some(':') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::DoubleColon, start_line, start_column))
}
Some(':') => {
self.advance();
Ok(Token::new(TokenType::COLON, start_line, start_column))
}
Some('=') if self.peek_char() == Some('>') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::FatArrow, start_line, start_column))
}
Some('=') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::EQUALS, start_line, start_column))
}
Some('!') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::NotEquals, start_line, start_column))
}
Some('<') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::LessEquals, start_line, start_column))
}
Some('>') if self.peek_char() == Some('=') => {
self.advance();
self.advance();
Ok(Token::new(TokenType::GreaterEquals, start_line, start_column))
}
Some(c) => {
if let Some(token) = self.single_char_token(c) {
self.advance();
Ok(Token::new(token, start_line, start_column))
} else {
Err(TokenizeError::UnexpectedCharacter {
char: c,
line: start_line,
column: start_column,
})
}
}
None => Ok(Token::new(TokenType::EOF, start_line, start_column)),
}
}
// 単文字トークンのマップ(最長一致系は呼び出し元で処理済み)
fn single_char_token(&self, c: char) -> Option<TokenType> {
// '?' は上位で分岐済み、':' も同様。ここでは純粋な1文字を扱う。
match c {
'!' => Some(TokenType::NOT),
'<' => Some(TokenType::LESS),
'>' => Some(TokenType::GREATER),
'&' => Some(TokenType::BitAnd),
'|' => Some(TokenType::BitOr),
'^' => Some(TokenType::BitXor),
'=' => Some(TokenType::ASSIGN),
'+' => Some(TokenType::PLUS),
'-' => Some(TokenType::MINUS),
'*' => Some(TokenType::MULTIPLY),
'/' => Some(TokenType::DIVIDE),
'%' => Some(TokenType::MODULO),
'.' => Some(TokenType::DOT),
'(' => Some(TokenType::LPAREN),
')' => Some(TokenType::RPAREN),
'[' => Some(TokenType::LBRACK),
']' => Some(TokenType::RBRACK),
'{' => Some(TokenType::LBRACE),
'}' => Some(TokenType::RBRACE),
',' => Some(TokenType::COMMA),
'\n' => Some(TokenType::NEWLINE),
_ => None,
}
}
}

134
src/tokenizer/kinds.rs Normal file
View File

@ -0,0 +1,134 @@
use thiserror::Error;
/// トークンの種類
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
// リテラル
STRING(String),
NUMBER(i64),
FLOAT(f64),
TRUE,
FALSE,
NULL,
// キーワード
BOX,
GLOBAL,
SINGLETON,
NEW,
MATCH,
IF,
ELSE,
LOOP,
BREAK,
CONTINUE,
RETURN,
FUNCTION,
FN,
PRINT,
THIS,
ME,
INIT,
PACK,
BIRTH,
NOWAIT,
AWAIT,
INTERFACE,
COLON,
INCLUDE,
TRY,
CATCH,
CLEANUP,
THROW,
LOCAL,
STATIC,
OUTBOX,
NOT,
OVERRIDE,
FROM,
WEAK,
USING,
IMPORT,
// 演算子
ShiftLeft,
ShiftRight,
BitAnd,
BitOr,
BitXor,
FatArrow,
EQUALS,
NotEquals,
LessEquals,
GreaterEquals,
AND,
OR,
// 2文字演算子最長一致
PipeForward,
QmarkDot,
QmarkQmark,
PlusAssign,
MinusAssign,
MulAssign,
DivAssign,
RANGE,
LESS,
GREATER,
ASSIGN,
PLUS,
MINUS,
MULTIPLY,
DIVIDE,
MODULO,
// 記号
DOT,
DoubleColon,
LPAREN,
RPAREN,
LBRACK,
RBRACK,
LBRACE,
RBRACE,
COMMA,
QUESTION,
NEWLINE,
SEMICOLON, // オプショナル区切り
// 識別子
IDENTIFIER(String),
// 特殊
EOF,
}
/// トークン(位置情報付き)
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub line: usize,
pub column: usize,
}
impl Token {
pub fn new(token_type: TokenType, line: usize, column: usize) -> Self {
Self { token_type, line, column }
}
}
/// トークナイズエラー
#[derive(Error, Debug)]
pub enum TokenizeError {
#[error("Unexpected character '{char}' at line {line}, column {column}")]
UnexpectedCharacter { char: char, line: usize, column: usize },
#[error("Unterminated string literal at line {line}")]
UnterminatedString { line: usize },
#[error("Invalid number format at line {line}")]
InvalidNumber { line: usize },
#[error("Comment not closed at line {line}")]
UnterminatedComment { line: usize },
}

102
src/tokenizer/lex_ident.rs Normal file
View File

@ -0,0 +1,102 @@
use super::{NyashTokenizer, TokenType};
use crate::grammar::engine;
impl NyashTokenizer {
/// キーワードまたは識別子を読み取り
pub(crate) fn read_keyword_or_identifier(&mut self) -> TokenType {
let mut identifier = String::new();
while let Some(c) = self.current_char() {
if c.is_alphanumeric() || c == '_' {
identifier.push(c);
self.advance();
} else {
break;
}
}
// キーワードチェック
let mut tok = match identifier.as_str() {
"box" => TokenType::BOX,
"global" => TokenType::GLOBAL,
"singleton" => TokenType::SINGLETON,
"new" => TokenType::NEW,
"match" => TokenType::MATCH,
"if" => TokenType::IF,
"else" => TokenType::ELSE,
"loop" => TokenType::LOOP,
"break" => TokenType::BREAK,
"continue" => TokenType::CONTINUE,
"return" => TokenType::RETURN,
"function" => TokenType::FUNCTION,
"fn" => TokenType::FN,
"print" => TokenType::PRINT,
"this" => TokenType::THIS,
"me" => TokenType::ME,
"init" => TokenType::INIT,
"pack" => TokenType::PACK,
"birth" => TokenType::BIRTH,
"nowait" => TokenType::NOWAIT,
"await" => TokenType::AWAIT,
"interface" => TokenType::INTERFACE,
"include" => TokenType::INCLUDE,
"import" => TokenType::IMPORT,
"try" => TokenType::TRY,
"catch" => TokenType::CATCH,
"cleanup" => TokenType::CLEANUP,
"throw" => TokenType::THROW,
"local" => TokenType::LOCAL,
"static" => TokenType::STATIC,
"outbox" => TokenType::OUTBOX,
"not" => TokenType::NOT,
"override" => TokenType::OVERRIDE,
"from" => TokenType::FROM,
"weak" => TokenType::WEAK,
"using" => TokenType::USING,
"and" => TokenType::AND,
"or" => TokenType::OR,
"true" => TokenType::TRUE,
"false" => TokenType::FALSE,
"null" => TokenType::NULL,
_ => TokenType::IDENTIFIER(identifier.clone()),
};
// 12.7 Strict mode: fallback extended keywords to IDENTIFIER
if Self::strict_12_7() {
let is_extended = matches!(
tok,
TokenType::INTERFACE
| TokenType::USING
| TokenType::INCLUDE
| TokenType::OUTBOX
| TokenType::NOWAIT
| TokenType::OVERRIDE
| TokenType::WEAK
| TokenType::PACK
);
if is_extended {
tok = TokenType::IDENTIFIER(identifier.clone());
}
}
// 統一文法エンジンとの差分チェック(動作は変更しない)
if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") {
if let Some(kw) = engine::get().is_keyword_str(&identifier) {
if let TokenType::IDENTIFIER(_) = tok {
eprintln!(
"[GRAMMAR-DIFF] tokenizer=IDENT, grammar=KEYWORD({}) word='{}'",
kw, identifier
);
}
} else if !matches!(tok, TokenType::IDENTIFIER(_)) {
eprintln!(
"[GRAMMAR-DIFF] tokenizer=KEYWORD, grammar=IDENT word='{}'",
identifier
);
}
}
tok
}
}

View File

@ -0,0 +1,43 @@
use super::{NyashTokenizer, TokenType, TokenizeError};
impl NyashTokenizer {
/// 数値リテラル(整数または浮動小数点数)を読み取り
pub(crate) fn read_numeric_literal(&mut self) -> Result<TokenType, TokenizeError> {
let start_line = self.line;
let mut number_str = String::new();
let mut has_dot = false;
// 整数部分を読み取り
while let Some(c) = self.current_char() {
if c.is_ascii_digit() {
number_str.push(c);
self.advance();
} else if c == '.'
&& !has_dot
&& self.peek_char().map_or(false, |ch| ch.is_ascii_digit())
{
// 小数点の後に数字が続く場合のみ受け入れる
has_dot = true;
number_str.push(c);
self.advance();
} else {
break;
}
}
if has_dot {
// 浮動小数点数として解析
number_str
.parse::<f64>()
.map(TokenType::FLOAT)
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
} else {
// 整数として解析
number_str
.parse::<i64>()
.map(TokenType::NUMBER)
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
}
}
}

View File

@ -0,0 +1,42 @@
use super::{NyashTokenizer, TokenizeError};
impl NyashTokenizer {
/// 文字列リテラルを読み取り
pub(crate) fn read_string(&mut self) -> Result<String, TokenizeError> {
let start_line = self.line;
self.advance(); // 開始の '"' をスキップ
let mut string_value = String::new();
while let Some(c) = self.current_char() {
if c == '"' {
self.advance(); // 終了の '"' をスキップ
return Ok(string_value);
}
// エスケープ文字の処理
if c == '\\' {
self.advance();
match self.current_char() {
Some('n') => string_value.push('\n'),
Some('t') => string_value.push('\t'),
Some('r') => string_value.push('\r'),
Some('\\') => string_value.push('\\'),
Some('"') => string_value.push('"'),
Some(c) => {
string_value.push('\\');
string_value.push(c);
}
None => break,
}
} else {
string_value.push(c);
}
self.advance();
}
Err(TokenizeError::UnterminatedString { line: start_line })
}
}

24
src/tokenizer/mod.rs Normal file
View File

@ -0,0 +1,24 @@
/*!
* Nyash Tokenizer — split modules (kinds/cursor/whitespace/lexers/engine)
*/
mod kinds;
mod cursor;
mod whitespace;
mod lex_string;
mod lex_number;
mod lex_ident;
mod engine;
pub use kinds::{Token, TokenType, TokenizeError};
/// Nyashトークナイザー
pub struct NyashTokenizer {
pub(crate) input: Vec<char>,
pub(crate) position: usize,
pub(crate) line: usize,
pub(crate) column: usize,
}
// Public API and core logic are implemented in submodules via impl NyashTokenizer

View File

@ -0,0 +1,43 @@
use super::{NyashTokenizer, TokenizeError};
impl NyashTokenizer {
/// 行コメントをスキップ
pub(crate) fn skip_line_comment(&mut self) {
while let Some(c) = self.current_char() {
if c == '\n' {
break; // 改行文字は消費せずに残す
}
self.advance();
}
}
/// ブロックコメントをスキップ: /* ... */(ネスト非対応)
pub(crate) fn skip_block_comment(&mut self) -> Result<(), TokenizeError> {
// Assume current position is at '/' and next is '*'
self.advance(); // '/'
self.advance(); // '*'
while let Some(c) = self.current_char() {
// detect end '*/'
if c == '*' && self.peek_char() == Some('/') {
self.advance(); // '*'
self.advance(); // '/'
return Ok(());
}
self.advance();
}
// EOF reached without closing */
Err(TokenizeError::UnterminatedComment { line: self.line })
}
/// 空白文字をスキップ改行は除く改行はNEWLINEトークンとして扱う
pub(crate) fn skip_whitespace(&mut self) {
while let Some(c) = self.current_char() {
if c.is_whitespace() && c != '\n' {
self.advance();
} else {
break;
}
}
}
}