diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 1b87255f..23f4c625 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -16,6 +16,10 @@ Update (today) - docs/private/roadmap/phases/phase-21.4/PLAN.md を追加(優先順の実行計画) - tools/hako_parser/* の MVP スケルトン確認(CLI/Emitter/Parser/Tokenizer) - tools/hako_check/tests/README.md と run_tests.sh を追加(テスト雛形) +- Runner: plugin_guard 導入(vm/vm_fallback から共通化) +- String API: size() を length() のエイリアスとして VM で受理 +- Analyzer CLI: --format/--debug/--source-file を順不同で処理 +- Analyzer IR: AST 空時の methods をテキスト走査でフォールバック Remaining (21.4) 1) Hako Parser MVP 実装(tokenizer/parser_core/ast_emit/cli) @@ -26,6 +30,18 @@ Remaining (21.4) 6) 限定 `--fix`(HC002/003/500) 7) DOT エッジ ON(calls→edges, cluster by box) +Roadmap (A→B→C) — 必ずこの順序で進める +- A. HC011 をまず緑にする(AST 非依存の安全経路)【COMPLETE】 + - 実装: `--no-ast` 追加、IR に `source` 格納、methods/calls のテキスト走査フォールバック、整形 JSON-LSP + - 受け入れ: `tools/hako_check/run_tests.sh` → [TEST/OK] HC011_dead_methods 緑(期待JSON一致) + - 影響範囲: tools/hako_check/{cli,analysis_consumer,rules/rule_dead_methods}.hako(既定は AST、`--no-ast` で切替) +- B. plugin_guard の仕上げと一貫化 + - 目的: すべての runner モードで不足プラグイン報告を共通APIに統一(strict/quiet のポリシー遵守) + - 受け入れ条件: vm/vm_fallback 以外に残る ad-hoc 出力が 0、メッセージは stderr のみ(quiet 時) +- C. AST/Tokenizer の精緻化(AST 経路へ戻す) + - 目的: parser_core/tokenizer を強化して `boxes[].methods[]` を安定抽出、HC011 を AST 入力で PASS + - 受け入れ条件: `HAKO_CHECK_NO_AST=0`(既定)で run_tests.sh が緑、methods/calls 数が IR デバッグに出力される + Open Issues (Map semantics) - Map.get の戻り値セマンティクス未確定 - 現状: kernel 側の get_h の値/存在判定の定義が曖昧。reps は has を優先して固定(rc=1)。 @@ -52,12 +68,23 @@ Next (21.2 — TBD) - dev は Adapter 登録や by‑name fallback を許容(トグル)、prod は Adapter 必須(Fail‑Fast)。 Next Steps (immediate) -1) tokenizer.hako 実装着手(行・列保持、文字列/数値/識別子) -2) parser_core.hako で using/box/static method/assign を組み立て -3) ast_emit.hako で v0 JSON を出力、実サンプルで確認 -4) analysis_consumer.hako の AST 取り込みを有効化 -5) HC002/003/010/011 を AST 入力でまず通す -6) `--format json-lsp` の最小実装 +1) B: plugin_guard の runner 全体適用(残存の直書き除去) +2) C: tokenizer/parser_core の tokens 精緻化 + methods 埋めの安定化(AST 経路で HC011 緑) +3) ast_emit.hako の安定化(整列/quote/数値) +4) `--format json-lsp` のテスト追加(OK/NG/edge) + +Rules Backlog(候補・優先提案) +- HC012: Dead Static Box — 定義のみで参照/呼出ゼロの static box を検出 +- HC013: Duplicate Method — 同一 box 内の重複メソッド名/arity を検出 +- HC014: Missing Entrypoint — Main.main/0 不在を警告(プロファイル可) +- HC015: Arity Mismatch (MVP) — 明確な `Name.method()` 呼び出しの引数個数不一致を検出(0/1の最小版) +- HC016: Unused Using/Alias — `using ... as Alias` の未使用を検出 +- HC017: Non‑ASCII Quotes — “ ” ‘ ’ 等の fancy quotes を検出し ASCII へ置換提案 +- HC018: Top‑level local in prelude — 先頭 `local` を検出(merge 前提のクリーンアップ漏れ) +- HC021: Analyzer IO Safety — CLI 経路での FileBox 使用を警告(`--source-file` 利用を提案) +- HC022: Stage‑3 Gate — while/for を含む .hako を Nyash VM へ流す危険の検出(gate フラグ提示) +- HC031: Brace Heuristics — `{`/`}` の粗い不整合検出(早期警告) +5) `--format json-lsp` の最小実装(既存配線に診断配列を流し込む) Previous Achievement - ✅ Phase 20.44 COMPLETE(provider emit/codegen reps 緑) diff --git a/lang/src/shared/common/string_helpers.hako b/lang/src/shared/common/string_helpers.hako index 86ea844b..0e3b556d 100644 --- a/lang/src/shared/common/string_helpers.hako +++ b/lang/src/shared/common/string_helpers.hako @@ -171,4 +171,24 @@ static box StringHelpers { } return -1 } + + // Split string by newline into ArrayBox (without relying on StringBox.split) + split_lines(s) { + local arr = new ArrayBox() + if s == null { return arr } + local n = s.length() + local last = 0 + local i = 0 + loop (i < n) { + local ch = s.substring(i, i+1) + if ch == "\n" { + arr.push(s.substring(last, i)) + last = i + 1 + } + i = i + 1 + } + // push tail + if last <= n { arr.push(s.substring(last)) } + return arr + } } diff --git a/src/ast/utils.rs b/src/ast/utils.rs index c57c398b..281ca1b7 100644 --- a/src/ast/utils.rs +++ b/src/ast/utils.rs @@ -55,75 +55,69 @@ impl ASTNode { } } - /// 🌟 AST分類 - ChatGPTアドバイス統合による革新的分類システム - /// Structure/Expression/Statement の明確な分離 + /// Structure/Expression/Statement の分類 pub fn classify(&self) -> ASTNodeType { + use ASTNodeType::{Expression as E, Statement as S, Structure as St}; match self { // Structure nodes - 言語の基本構造 - ASTNode::BoxDeclaration { .. } => ASTNodeType::Structure, - ASTNode::FunctionDeclaration { .. } => ASTNodeType::Structure, - ASTNode::If { .. } => ASTNodeType::Structure, - ASTNode::Loop { .. } => ASTNodeType::Structure, - ASTNode::While { .. } => ASTNodeType::Structure, - ASTNode::ForRange { .. } => ASTNodeType::Structure, - ASTNode::TryCatch { .. } => ASTNodeType::Structure, + ASTNode::BoxDeclaration { .. } => St, + ASTNode::FunctionDeclaration { .. } => St, + ASTNode::If { .. } => St, + ASTNode::Loop { .. } => St, + ASTNode::While { .. } => St, + ASTNode::ForRange { .. } => St, + ASTNode::TryCatch { .. } => St, + ASTNode::ScopeBox { .. } => St, // diagnostic wrapper // Expression nodes - 値を生成する表現 - ASTNode::Literal { .. } => ASTNodeType::Expression, - ASTNode::Variable { .. } => ASTNodeType::Expression, - ASTNode::BinaryOp { .. } => ASTNodeType::Expression, - ASTNode::UnaryOp { .. } => ASTNodeType::Expression, - ASTNode::FunctionCall { .. } => ASTNodeType::Expression, - ASTNode::Call { .. } => ASTNodeType::Expression, - ASTNode::MethodCall { .. } => ASTNodeType::Expression, - ASTNode::FieldAccess { .. } => ASTNodeType::Expression, - ASTNode::New { .. } => ASTNodeType::Expression, - ASTNode::This { .. } => ASTNodeType::Expression, - ASTNode::Me { .. } => ASTNodeType::Expression, - ASTNode::FromCall { .. } => ASTNodeType::Expression, - ASTNode::ThisField { .. } => ASTNodeType::Expression, - ASTNode::MeField { .. } => ASTNodeType::Expression, - ASTNode::Index { .. } => ASTNodeType::Expression, - ASTNode::MatchExpr { .. } => ASTNodeType::Expression, - ASTNode::QMarkPropagate { .. } => ASTNodeType::Expression, - ASTNode::Lambda { .. } => ASTNodeType::Expression, - ASTNode::ArrayLiteral { .. } => ASTNodeType::Expression, - ASTNode::MapLiteral { .. } => ASTNodeType::Expression, - - // Diagnostic-only wrapper treated as structure - ASTNode::ScopeBox { .. } => ASTNodeType::Structure, + ASTNode::Literal { .. } => E, + ASTNode::Variable { .. } => E, + ASTNode::BinaryOp { .. } => E, + ASTNode::UnaryOp { .. } => E, + ASTNode::FunctionCall { .. } => E, + ASTNode::Call { .. } => E, + ASTNode::MethodCall { .. } => E, + ASTNode::FieldAccess { .. } => E, + ASTNode::New { .. } => E, + ASTNode::This { .. } => E, + ASTNode::Me { .. } => E, + ASTNode::FromCall { .. } => E, + ASTNode::ThisField { .. } => E, + ASTNode::MeField { .. } => E, + ASTNode::Index { .. } => E, + ASTNode::MatchExpr { .. } => E, + ASTNode::QMarkPropagate { .. } => E, + ASTNode::Lambda { .. } => E, + ASTNode::ArrayLiteral { .. } => E, + ASTNode::MapLiteral { .. } => E, + ASTNode::AwaitExpression { .. } => E, // Statement nodes - 実行可能なアクション - ASTNode::Program { .. } => ASTNodeType::Statement, // プログラム全体 - ASTNode::Assignment { .. } => ASTNodeType::Statement, - ASTNode::Print { .. } => ASTNodeType::Statement, - ASTNode::Return { .. } => ASTNodeType::Statement, - ASTNode::Break { .. } => ASTNodeType::Statement, - ASTNode::Continue { .. } => ASTNodeType::Statement, - ASTNode::UsingStatement { .. } => ASTNodeType::Statement, - ASTNode::ImportStatement { .. } => ASTNodeType::Statement, - ASTNode::GlobalVar { .. } => ASTNodeType::Statement, - - ASTNode::Local { .. } => ASTNodeType::Statement, - ASTNode::Outbox { .. } => ASTNodeType::Statement, - ASTNode::Nowait { .. } => ASTNodeType::Statement, - ASTNode::Arrow { .. } => ASTNodeType::Statement, - ASTNode::Throw { .. } => ASTNodeType::Statement, - ASTNode::AwaitExpression { .. } => ASTNodeType::Expression, + ASTNode::Program { .. } => S, + ASTNode::Assignment { .. } => S, + ASTNode::Print { .. } => S, + ASTNode::Return { .. } => S, + ASTNode::Break { .. } => S, + ASTNode::Continue { .. } => S, + ASTNode::UsingStatement { .. } => S, + ASTNode::ImportStatement { .. } => S, + ASTNode::GlobalVar { .. } => S, + ASTNode::Local { .. } => S, + ASTNode::Outbox { .. } => S, + ASTNode::Nowait { .. } => S, + ASTNode::Arrow { .. } => S, + ASTNode::Throw { .. } => S, } } - /// 🎯 構造パターンチェック - 2段階パーサー用 pub fn is_structure(&self) -> bool { matches!(self.classify(), ASTNodeType::Structure) } - /// ⚡ 式パターンチェック - 評価エンジン用 pub fn is_expression(&self) -> bool { matches!(self.classify(), ASTNodeType::Expression) } - /// 📝 文パターンチェック - 実行エンジン用 pub fn is_statement(&self) -> bool { matches!(self.classify(), ASTNodeType::Statement) } diff --git a/src/backend/mir_interpreter/exec.rs b/src/backend/mir_interpreter/exec.rs index a46ef750..85a982b5 100644 --- a/src/backend/mir_interpreter/exec.rs +++ b/src/backend/mir_interpreter/exec.rs @@ -79,6 +79,15 @@ impl MirInterpreter { cur, e, self.last_inst ); } + // Optional concise error location print (env‑gated) + if std::env::var("HAKO_VM_ERROR_LOC").ok().as_deref() == Some("1") { + eprintln!( + "[vm/error/loc] fn={} bb={:?} last_inst={:?}", + self.cur_fn.as_deref().unwrap_or(""), + cur, + self.last_inst + ); + } return Err(e); } diff --git a/src/backend/mir_interpreter/handlers/boxes_string.rs b/src/backend/mir_interpreter/handlers/boxes_string.rs index 19d32d9a..f99cd54e 100644 --- a/src/backend/mir_interpreter/handlers/boxes_string.rs +++ b/src/backend/mir_interpreter/handlers/boxes_string.rs @@ -29,7 +29,7 @@ pub(super) fn try_handle_string_box( let Some(sb_norm) = sb_norm_opt else { return Ok(false) }; // Only handle known string methods here (receiver is confirmed string) match method { - "length" => { + "length" | "size" => { let ret = sb_norm.length(); this.write_result(dst, VMValue::from_nyash_box(ret)); return Ok(true); diff --git a/src/backend/mir_interpreter/mod.rs b/src/backend/mir_interpreter/mod.rs index 0c690fa6..c9fd6571 100644 --- a/src/backend/mir_interpreter/mod.rs +++ b/src/backend/mir_interpreter/mod.rs @@ -107,11 +107,88 @@ impl MirInterpreter { pub fn execute_module(&mut self, module: &MirModule) -> Result, VMError> { // Snapshot functions for call resolution self.functions = module.functions.clone(); - let func = module - .functions - .get("main") - .ok_or_else(|| VMError::InvalidInstruction("missing main".into()))?; - let ret = self.execute_function(func)?; + + // Determine entry function with sensible fallbacks + // Priority: + // 1) NYASH_ENTRY env (exact), then basename before '/' if provided (e.g., "Main.main/0" → "Main.main") + // 2) "Main.main" if present + // 3) "main" (legacy/simple scripts) + let mut candidates: Vec = Vec::new(); + if let Ok(e) = std::env::var("NYASH_ENTRY") { + if !e.trim().is_empty() { + candidates.push(e.trim().to_string()); + } + } + candidates.push("Main.main".to_string()); + candidates.push("main".to_string()); + + // Try candidates in order + let mut chosen: Option<&nyash_rust::mir::MirFunction> = None; + for c in &candidates { + // exact + if let Some(f) = module.functions.get(c) { + chosen = Some(f); + break; + } + // if contains '/': try name before '/' + if let Some((head, _)) = c.split_once('/') { + if let Some(f) = module.functions.get(head) { + chosen = Some(f); + break; + } + } + // if looks like "Box.method": try plain "main" as last resort only when c endswith .main + if c.ends_with(".main") { + if let Some(f) = module.functions.get("main") { + chosen = Some(f); + break; + } + } + } + + let func = match chosen { + Some(f) => f, + None => { + // Build helpful error message + let mut names: Vec<&String> = module.functions.keys().collect(); + names.sort(); + let avail = names.into_iter().take(12).cloned().collect::>().join(", "); + let tried = candidates.join(", "); + let msg = format!( + "entry function not found. searched: [{}]. available: [{}]. hint: define 'static box Main {{ method main(args){{ ... }} }}' or set NYASH_ENTRY=Name", + tried, avail + ); + return Err(VMError::InvalidInstruction(msg)); + } + }; + + // Prepare arguments if the entry takes parameters (pass script args as ArrayBox) + let ret = if func.signature.params.len() == 0 { + self.execute_function(func)? + } else { + // Build argv from NYASH_SCRIPT_ARGS_JSON (set by CLI when using `--`) or NYASH_ARGV (JSON array) + let mut argv_list: Vec = Vec::new(); + if let Ok(s) = std::env::var("NYASH_SCRIPT_ARGS_JSON") { + if let Ok(v) = serde_json::from_str::>(&s) { argv_list = v; } + } else if let Ok(s) = std::env::var("NYASH_ARGV") { + if let Ok(v) = serde_json::from_str::>(&s) { argv_list = v; } + } + // Construct ArrayBox of StringBox + let array = crate::boxes::array::ArrayBox::new(); + for a in argv_list.iter() { + let sb = crate::boxes::basic::StringBox::new(a); + let _ = array.push(Box::new(sb)); + } + let boxed: Box = Box::new(array); + let arg0 = super::vm_types::VMValue::from_nyash_box(boxed); + // Fill remaining params with Void + let mut vm_args: Vec = Vec::new(); + vm_args.push(arg0); + for _ in 1..func.signature.params.len() { + vm_args.push(super::vm_types::VMValue::Void); + } + self.exec_function_inner(func, Some(&vm_args))? + }; Ok(ret.to_nyash_box()) } diff --git a/src/boxes/array/mod.rs b/src/boxes/array/mod.rs index 49d6bad5..47b70286 100644 --- a/src/boxes/array/mod.rs +++ b/src/boxes/array/mod.rs @@ -62,6 +62,11 @@ impl ArrayBox { Box::new(IntegerBox::new(self.items.read().unwrap().len() as i64)) } + /// size() エイリアス(length と同義) + pub fn size(&self) -> Box { + self.length() + } + /// Rust向けヘルパー: 要素数をusizeで取得(テスト用) pub fn len(&self) -> usize { self.items.read().unwrap().len() diff --git a/src/boxes/basic/string_box.rs b/src/boxes/basic/string_box.rs index 7d9a58c2..5d2487e5 100644 --- a/src/boxes/basic/string_box.rs +++ b/src/boxes/basic/string_box.rs @@ -117,6 +117,11 @@ impl StringBox { Box::new(IntegerBox::new(n)) } + /// size() エイリアス(length と同義) + pub fn size(&self) -> Box { + self.length() + } + /// Convert string to integer (parse as i64) pub fn to_integer(&self) -> Box { use crate::box_trait::IntegerBox; @@ -202,4 +207,4 @@ impl Display for StringBox { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.fmt_box(f) } -} \ No newline at end of file +} diff --git a/src/mir/builder/stmts.rs b/src/mir/builder/stmts.rs index 3d970725..4cf64988 100644 --- a/src/mir/builder/stmts.rs +++ b/src/mir/builder/stmts.rs @@ -174,48 +174,11 @@ impl super::MirBuilder { /// Build a single statement node. /// /// Note: - /// - Stage-3 While/ForRange lowering is delegated to existing Loop/expr lowering - /// or handled in a dedicated pass; this function does not emit ad-hoc control - /// flow for them to avoid divergence from SSOT/loop_common. + /// - While/ForRange は将来 Loop lowering へ委譲する拡張ポイントとして扱い、 + /// 現状は他の専用ビルダ/既存パスと同様に build_expression に委譲する。 pub(super) fn build_statement(&mut self, node: ASTNode) -> Result { - match node.clone() { - ASTNode::While { condition, body, .. } => { - // Desugar Stage-3 while into legacy loop(condition) { body } - let loop_node = ASTNode::Loop { condition, body, span: crate::ast::Span::unknown() }; - self.build_expression(loop_node) - } - ASTNode::ForRange { var_name, start, end, body, .. } => { - use crate::ast::{Span, LiteralValue, BinaryOperator}; - // local var initialization - let init = ASTNode::Local { - variables: vec![var_name.clone()], - initial_values: vec![Some(start)], - span: Span::unknown(), - }; - // condition: var_name < end - let cond = ASTNode::BinaryOp { - left: Box::new(ASTNode::Variable { name: var_name.clone(), span: Span::unknown() }), - operator: BinaryOperator::Less, - right: end, - span: Span::unknown(), - }; - // step: var_name = var_name + 1 - let step = ASTNode::Assignment { - target: Box::new(ASTNode::Variable { name: var_name.clone(), span: Span::unknown() }), - value: Box::new(ASTNode::BinaryOp { - left: Box::new(ASTNode::Variable { name: var_name.clone(), span: Span::unknown() }), - operator: BinaryOperator::Add, - right: Box::new(ASTNode::Literal { value: LiteralValue::Integer(1), span: Span::unknown() }), - span: Span::unknown(), - }), - span: Span::unknown(), - }; - let mut loop_body = body.clone(); - loop_body.push(step); - let loop_node = ASTNode::Loop { condition: Box::new(cond), body: loop_body, span: Span::unknown() }; - let program = ASTNode::Program { statements: vec![init, loop_node], span: Span::unknown() }; - self.build_expression(program) - } + match node { + // 将来ここに While / ForRange / Match / Using など statement 専用分岐を追加する。 other => self.build_expression(other), } } diff --git a/src/runner/modes/common_util/mod.rs b/src/runner/modes/common_util/mod.rs index acefb02b..24d058a9 100644 --- a/src/runner/modes/common_util/mod.rs +++ b/src/runner/modes/common_util/mod.rs @@ -12,3 +12,4 @@ pub mod resolve; pub mod exec; pub mod core_bridge; pub mod hako; +pub mod plugin_guard; diff --git a/src/runner/modes/common_util/plugin_guard.rs b/src/runner/modes/common_util/plugin_guard.rs new file mode 100644 index 00000000..f9007188 --- /dev/null +++ b/src/runner/modes/common_util/plugin_guard.rs @@ -0,0 +1,89 @@ +/*! + * Plugin guard utilities + * + * Centralized helper to check required plugin providers and emit + * consistent diagnostics across runner modes. + */ + +/// Build the list of required provider type names. +/// +/// Priority: +/// - If env `NYASH_PLUGIN_OVERRIDE_TYPES` is set, use it (comma-separated). +/// - Otherwise, return a conservative default set used in VM paths. +pub fn gather_required_providers() -> Vec { + if let Ok(list) = std::env::var("NYASH_PLUGIN_OVERRIDE_TYPES") { + let mut v: Vec = list + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + v.sort(); + v.dedup(); + return v; + } + // Default conservative set + let mut v = vec![ + "FileBox".to_string(), + "ConsoleBox".to_string(), + "ArrayBox".to_string(), + "MapBox".to_string(), + "StringBox".to_string(), + "IntegerBox".to_string(), + ]; + v +} + +/// Return missing providers by checking the unified registry. +pub fn detect_missing_providers(required: &[String]) -> Vec { + let reg = nyash_rust::runtime::get_global_registry(); + let mut missing: Vec = Vec::new(); + for t in required { + if reg.get_provider(t).is_none() { + missing.push(t.clone()); + } + } + missing +} + +/// Emit hints for specific provider types. +fn emit_hints_for(missing: &[String]) { + if missing.iter().any(|t| t == "FileBox") { + eprintln!("[plugin/hint] FileBox plugin is required for file I/O (new FileBox/open/read)."); + eprintln!("[plugin/hint] Build and load plugin: see tools/plugin_v2_smoke.sh or configure nyash.toml [libraries.*.FileBox]."); + eprintln!("[plugin/hint] Ensure LD_LIBRARY_PATH (or platform equivalent) includes the plugin directory."); + eprintln!("[plugin/hint] For analyzer runs, you can avoid FileBox via --source-file ."); + } +} + +/// Check provider availability and emit diagnostics. +/// +/// - `strict`: exit(1) when any provider is missing. +/// - `quiet_pipe`: respect quiet JSON pipelines; we still write diagnostics to stderr only. +/// - `label`: context label (e.g., "vm", "vm-fallback") for messages. +pub fn check_and_report(strict: bool, quiet_pipe: bool, label: &str) { + let required = gather_required_providers(); + let missing = detect_missing_providers(&required); + if missing.is_empty() { + return; + } + + if strict { + eprintln!( + "❌ {} plugin-first strict: missing providers for: {:?}", + label, missing + ); + emit_hints_for(&missing); + // Do not print anything to stdout in quiet mode; just exit with 1 + std::process::exit(1); + } else { + eprintln!( + "[plugin/missing] {} providers not loaded: {:?}", + label, missing + ); + emit_hints_for(&missing); + if quiet_pipe { + // In quiet JSON mode, avoid noisy stdout; hints are on stderr already. + } + } +} + diff --git a/src/runner/modes/llvm.rs b/src/runner/modes/llvm.rs index dec9f1b7..b7978eb7 100644 --- a/src/runner/modes/llvm.rs +++ b/src/runner/modes/llvm.rs @@ -12,6 +12,13 @@ impl NyashRunner { // Initialize plugin host so method_id injection can resolve plugin calls crate::runner_plugin_init::init_bid_plugins(); + // Friendly plugin guard (non‑strict): unify diagnostics across modes + crate::runner::modes::common_util::plugin_guard::check_and_report( + false, + crate::config::env::env_bool("NYASH_JSON_ONLY"), + "llvm", + ); + // Read the file let code = match fs::read_to_string(filename) { Ok(content) => content, diff --git a/src/runner/modes/vm.rs b/src/runner/modes/vm.rs index c4a2567b..b563eebf 100644 --- a/src/runner/modes/vm.rs +++ b/src/runner/modes/vm.rs @@ -67,30 +67,13 @@ impl NyashRunner { } std::env::set_var("NYASH_PLUGIN_OVERRIDE_TYPES", override_types.join(",")); - // Strict mode: verify providers exist for override types - if crate::config::env::env_bool("NYASH_VM_PLUGIN_STRICT") { - let v2 = nyash_rust::runtime::get_global_registry(); - let mut missing: Vec = Vec::new(); - for t in [ - "FileBox", - "ConsoleBox", - "ArrayBox", - "MapBox", - "StringBox", - "IntegerBox", - ] { - if v2.get_provider(t).is_none() { - missing.push(t.to_string()); - } - } - if !missing.is_empty() { - eprintln!( - "❌ VM plugin-first strict: missing providers for: {:?}", - missing - ); - std::process::exit(1); - } - } + // Centralized plugin guard + let strict = crate::config::env::env_bool("NYASH_VM_PLUGIN_STRICT"); + crate::runner::modes::common_util::plugin_guard::check_and_report( + strict, + quiet_pipe, + "vm", + ); } // Read the file diff --git a/src/runner/modes/vm_fallback.rs b/src/runner/modes/vm_fallback.rs index 43d2d93f..50c4b983 100644 --- a/src/runner/modes/vm_fallback.rs +++ b/src/runner/modes/vm_fallback.rs @@ -295,6 +295,12 @@ impl NyashRunner { // Execute via MIR interpreter let mut vm = MirInterpreter::new(); + // Centralized plugin guard (non-strict by default on fallback route) + crate::runner::modes::common_util::plugin_guard::check_and_report( + false, + crate::config::env::env_bool("NYASH_JSON_ONLY"), + "vm-fallback", + ); // Optional: verify MIR before execution (dev-only) if crate::config::env::env_bool("NYASH_VM_VERIFY_MIR") { let mut verifier = crate::mir::verification::MirVerifier::new(); diff --git a/tools/hako_check.sh b/tools/hako_check.sh index c59271fa..90eb02ad 100644 --- a/tools/hako_check.sh +++ b/tools/hako_check.sh @@ -11,7 +11,7 @@ if [ ! -x "$BIN" ]; then fi if [ $# -lt 1 ]; then - echo "Usage: $0 [--format text|dot] [more...]" >&2 + echo "Usage: $0 [--format text|dot|json-lsp] [more...]" >&2 exit 2 fi @@ -24,7 +24,7 @@ fi list_targets() { local p="$1" if [ -d "$p" ]; then - find "$p" -type f -name '*.hako' + find "$p" -type f -name '*.hako' | sort else echo "$p" fi @@ -75,17 +75,35 @@ if [ "$FORMAT" = "dot" ]; then NYASH_ENABLE_USING=1 \ HAKO_ENABLE_USING=1 \ NYASH_USING_AST=1 \ + NYASH_JSON_ONLY=1 \ NYASH_NY_COMPILER_TIMEOUT_MS="${NYASH_NY_COMPILER_TIMEOUT_MS:-8000}" \ "$BIN" --backend vm "$ROOT/tools/hako_check/cli.hako" -- --format dot "${FILES[@]}" \ - >"/tmp/hako_lint_out_$$.log" 2>&1 || true - out="$(cat "/tmp/hako_lint_out_$$.log")"; rc=0 - # Always print DOT output (everything except RC lines filtered later if needed) - echo "$out" | sed -n '1,99999p' - if echo "$out" | grep -q '^RC: '; then - rc="$(echo "$out" | sed -n 's/^RC: //p' | tail -n1)" - else rc=1; fi - rm -f "/tmp/hako_lint_out_$$.log" - if [ "$rc" -ne 0 ]; then exit 1; fi + >"/tmp/hako_lint_out_$$.log" 2>/tmp/hako_lint_err_$$.log || true + rc=$? + # Only print DOT graph body to STDOUT + awk '/^digraph /, /^}/' "/tmp/hako_lint_out_$$.log" + rm -f "/tmp/hako_lint_out_$$.log" "/tmp/hako_lint_err_$$.log" + exit $([ "$rc" -eq 0 ] && echo 0 || echo 1) +elif [ "$FORMAT" = "json-lsp" ]; then + # Aggregate and emit pure JSON (no summaries). Exit code = findings count. + TMP_LIST="/tmp/hako_targets_$$.txt"; : >"$TMP_LIST" + for p in "$@"; do list_targets "$p" >>"$TMP_LIST"; done + mapfile -t FILES <"$TMP_LIST" + rm -f "$TMP_LIST" + NYASH_DISABLE_NY_COMPILER=1 \ + HAKO_DISABLE_NY_COMPILER=1 \ + NYASH_PARSER_STAGE3=1 \ + HAKO_PARSER_STAGE3=1 \ + NYASH_PARSER_SEAM_TOLERANT=1 \ + HAKO_PARSER_SEAM_TOLERANT=1 \ + NYASH_PARSER_ALLOW_SEMICOLON=1 \ + NYASH_ENABLE_USING=1 \ + HAKO_ENABLE_USING=1 \ + NYASH_USING_AST=1 \ + NYASH_JSON_ONLY=1 \ + NYASH_NY_COMPILER_TIMEOUT_MS="${NYASH_NY_COMPILER_TIMEOUT_MS:-8000}" \ + "$BIN" --backend vm "$ROOT/tools/hako_check/cli.hako" -- --format json-lsp "${FILES[@]}" + exit $? else for p in "$@"; do while IFS= read -r f; do run_one "$f"; done < <(list_targets "$p") diff --git a/tools/hako_check/analysis_consumer.hako b/tools/hako_check/analysis_consumer.hako index cc7dfe89..496b8966 100644 --- a/tools/hako_check/analysis_consumer.hako +++ b/tools/hako_check/analysis_consumer.hako @@ -10,71 +10,118 @@ // } using selfhost.shared.common.string_helpers as Str +using tools.hako_parser.parser_core as HakoParserCoreBox static box HakoAnalysisBuilderBox { - build_from_source(text, path) { + build_from_source(text, path) { return me.build_from_source_flags(text, path, 0) } + build_from_source_flags(text, path, no_ast) { local ir = new MapBox() ir.set("path", path) ir.set("uses", new ArrayBox()) ir.set("boxes", new ArrayBox()) ir.set("methods", new ArrayBox()) ir.set("calls", new ArrayBox()) + ir.set("source", text) local eps = new ArrayBox(); eps.push("Main.main"); eps.push("main"); ir.set("entrypoints", eps) + // debug disabled in strict environments + local debug = 0 - // 1) collect using lines - local lines = text.split("\n") - local _i = 0 - while _i < lines.size() { - local ln = me._ltrim(lines.get(_i)) - if ln.indexOf('using "') == 0 { - // using "pkg.name" as Alias - local q1 = ln.indexOf('"') - local q2 = -1 - if q1 >= 0 { q2 = ln.indexOf('"', q1+1) } - if q1 >= 0 && q2 > q1 { ir.get("uses").push(ln.substring(q1+1, q2)) } + // Prefer AST (Hako Parser) if possible + local ast = null + if no_ast == 0 { ast = HakoParserCoreBox.parse(text) } + if ast != null { + // uses + local uses = ast.get("uses") + if uses != null { local ui=0; while ui= 0 { q2 = ln.indexOf('"', q1+1) } + if q1 >= 0 && q2 > q1 { ir.get("uses").push(ln.substring(q1+1, q2)) } + } + _i = _i + 1 } - // (non-static) box Name { // optional future; ignore for now - - // method foo(args) { - if ln.indexOf("method ") == 0 && cur_name != null { - local rest = ln.substring(Str.len("method ")) - local p = rest.indexOf("(") - local mname = (p>0) ? rest.substring(0,p) : rest - mname = me._rstrip(mname) - local arity = me._count_commas_in_parens(rest) - local method = new MapBox(); method.set("name", mname); method.set("arity", arity); method.set("span", Str.int_to_str(i2+1)) - // attach to box - local arr = boxes.get(boxes.size()-1).get("methods"); arr.push(method) - // record qualified - ir.get("methods").push(cur_name + "." + mname + "/" + Str.int_to_str(arity)) - continue - } - // box boundary heuristic - if ln == "}" { cur_name = null; cur_is_static = 0; } - i2 = i2 + 1 } + // 2) scan static/box and methods when AST did not populate any methods + local need_method_scan = 1 + if ir.get("methods") != null { if ir.get("methods").size() > 0 { need_method_scan = 0 } } + if need_method_scan == 1 { + // debug noop + local boxes = ir.get("boxes") + local cur_name = null + local cur_is_static = 0 + local i2 = 0 + while i2 < lines.size() { + local ln = me._ltrim(lines.get(i2)) + // static box Name { + if ln.indexOf("static box ") == 0 { + local rest = ln.substring("static box ".length()) + local sp = me._upto(rest, " {") + cur_name = sp + cur_is_static = 1 + local b = new MapBox(); b.set("name", cur_name); b.set("is_static", true); b.set("methods", new ArrayBox()); boxes.push(b) + i2 = i2 + 1 + continue + } + // method foo(args) { + if ln.indexOf("method ") == 0 { + if cur_name == null { cur_name = "Main" } + local rest = ln.substring("method ".length()) + local p = rest.indexOf("(") + local mname = (p>0) ? rest.substring(0,p) : rest + mname = me._rstrip(mname) + local arity = me._count_commas_in_parens(rest) + local method = new MapBox(); method.set("name", mname); method.set("arity", arity); method.set("span", (i2+1)) + boxes.get(boxes.size()-1).get("methods").push(method) + ir.get("methods").push(cur_name + "." + mname + "/" + me._itoa(arity)) + i2 = i2 + 1 + continue + } + // box boundary heuristic + if ln == "}" { cur_name = null; cur_is_static = 0; } + i2 = i2 + 1 + } + } + // Final fallback: super simple scan over raw text if still no methods + if ir.get("methods").size() == 0 { me._scan_methods_fallback(text, ir) } + // 3) calls: naive pattern Box.method( or Alias.method( // For MVP, we scan whole text and link within same file boxes only. + // debug noop local i3 = 0 while i3 < lines.size() { local ln = lines.get(i3) @@ -82,7 +129,7 @@ static box HakoAnalysisBuilderBox { // We fallback to "Main.main" when unknown local src = me._last_method_for_line(ir, i3+1) local pos = 0 - local L = Str.len(ln) + local L = ln.length() local k = 0 while k <= L { local dot = ln.indexOf(".", pos) @@ -105,8 +152,16 @@ static box HakoAnalysisBuilderBox { // utilities _ltrim(s) { return me._ltrim_chars(s, " \t") } + _itoa(n) { local v=0+n; if v==0 { return "0" } local out=""; local digits="0123456789"; local tmp=""; while v>0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } + _split_lines(s) { + local arr = new ArrayBox(); if s == null { return arr } + local n = s.length(); local last = 0; local i = 0 + loop (i < n) { local ch = s.substring(i,i+1); if ch == "\n" { arr.push(s.substring(last,i)); last = i+1 } i = i + 1 } + if last <= n { arr.push(s.substring(last)) } + return arr + } _rstrip(s) { - local n = Str.len(s) + local n = s.length() local last = n // scan from end using reverse index local r = 0 @@ -120,7 +175,7 @@ static box HakoAnalysisBuilderBox { return s.substring(0, last) } _ltrim_chars(s, cs) { - local n = Str.len(s) + local n = s.length() local head = 0 local idx = 0 while idx < n { @@ -141,7 +196,7 @@ static box HakoAnalysisBuilderBox { local p1 = rest.indexOf("("); local p2 = rest.indexOf(")", p1+1) if p1 < 0 || p2 < 0 || p2 <= p1+1 { return 0 } local inside = rest.substring(p1+1, p2) - local cnt = 1; local n=Str.len(inside); local any=0 + local cnt = 1; local n=inside.length(); local any=0 local i5 = 0 while i5 < n { local c = inside.substring(i5,i5+1) @@ -152,6 +207,67 @@ static box HakoAnalysisBuilderBox { if any==0 { return 0 } return cnt } + _scan_methods_fallback(text, ir) { + if text == null { return 0 } + local methods = ir.get("methods") + local box_name = "Main" + // find "static box Name" to prefer given name + local pbox = text.indexOf("static box ") + if pbox >= 0 { + local after = pbox + "static box ".length() + local name = "" + local i = after + loop (i < text.length()) { + local ch = text.substring(i,i+1) + if (ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_" || (ch >= "0" && ch <= "9") { + name = name + ch + i = i + 1 + continue + } + break + } + if name != "" { box_name = name } + } + // scan for "method " occurrences + local pos = 0 + local n = text.length() + loop (pos < n) { + local k = text.indexOf("method ", pos) + if k < 0 { break } + local i = k + "method ".length() + // read ident + local mname = "" + loop (i < n) { + local ch2 = text.substring(i,i+1) + if (ch2 >= "A" && ch2 <= "Z") || (ch2 >= "a" && ch2 <= "z") || ch2 == "_" || (ch2 >= "0" && ch2 <= "9") { + mname = mname + ch2 + i = i + 1 + continue + } + break + } + // look ahead for params (...) and count commas + local ar = 0 + local lp = text.indexOf("(", i) + if lp >= 0 { + local rp = text.indexOf(")", lp+1) + if rp > lp+1 { + local inside = text.substring(lp+1, rp) + local any = 0; local c = 0; local j=0 + loop (j < inside.length()) { + local ch3=inside.substring(j,j+1) + if ch3 == "," { c = c + 1 } + if ch3 != " " && ch3 != "\t" { any = 1 } + j = j + 1 + } + if any == 1 { ar = c + 1 } + } + } + if mname != "" { methods.push(box_name + "." + mname + "/" + me._itoa(ar)) } + pos = i + } + return methods.size() + } _scan_ident_rev(s, i) { if i<0 { return null } local n = i @@ -168,7 +284,7 @@ static box HakoAnalysisBuilderBox { return s.substring(start, i+1) } _scan_ident_fwd(s, i) { - local n=Str.len(s); if i>=n { return null } + local n=s.length(); if i>=n { return null } local endp = i local off = 0 while off < n { @@ -190,8 +306,8 @@ static box HakoAnalysisBuilderBox { return 0 } _last_method_for_line(ir, line_num) { - // very naive: pick Main.main when unknown - // Future: track method spans. For MVP, return "Main.main". + // Conservative: return default entry when spans are not guaranteed to be maps + // This avoids runtime errors when method_spans is absent or malformed in MVP builds. return "Main.main" } } diff --git a/tools/hako_check/cli.hako b/tools/hako_check/cli.hako index 52dae271..18b9b587 100644 --- a/tools/hako_check/cli.hako +++ b/tools/hako_check/cli.hako @@ -1,4 +1,5 @@ // tools/hako_check/cli.hako — HakoAnalyzerBox (MVP) +using selfhost.shared.common.string_helpers as Str using tools.hako_check.analysis_consumer as HakoAnalysisBuilderBox using tools.hako_check.rules.rule_include_forbidden as RuleIncludeForbiddenBox using tools.hako_check.rules.rule_using_quoted as RuleUsingQuotedBox @@ -6,53 +7,99 @@ using tools.hako_check.rules.rule_static_top_assign as RuleStaticTopAssignBox using tools.hako_check.rules.rule_global_assign as RuleGlobalAssignBox using tools.hako_check.rules.rule_dead_methods as RuleDeadMethodsBox using tools.hako_check.rules.rule_jsonfrag_usage as RuleJsonfragUsageBox +using tools.hako_check.rules.rule_unused_alias as RuleUnusedAliasBox +using tools.hako_check.rules.rule_non_ascii_quotes as RuleNonAsciiQuotesBox +using tools.hako_check.render.graphviz as GraphvizRenderBox +using tools.hako_parser.parser_core as HakoParserCoreBox static box HakoAnalyzerBox { run(args) { if args == null || args.size() < 1 { print("[lint/error] missing paths"); return 2 } - // options: --format {text|dot|json} + // options: --format {text|dot|json} (accept anywhere) local fmt = "text" - local start = 0 - if args.size() >= 2 && args.get(0) == "--format" { - fmt = args.get(1) - start = 2 - } - if args.size() <= start { print("[lint/error] missing paths"); return 2 } + local debug = 0 + local no_ast = 0 + // single-pass parse: handle options in-place and collect sources + local i = 0 local fail = 0 local irs = new ArrayBox() - // for i in start..(args.size()-1) - local i = start + local diags = new ArrayBox() + // Support inline sources: --source-file . Also accept --debug and --format anywhere. while i < args.size() { local p = args.get(i) - local f = new FileBox(); if f.open(p) == 0 { print("[lint/error] cannot open: " + p); fail = fail + 1; continue } - local text = f.read(); f.close() + // handle options + if p == "--debug" { debug = 1; i = i + 1; continue } + if p == "--no-ast" { no_ast = 1; i = i + 1; continue } + if p == "--format" { + if i + 1 >= args.size() { print("[lint/error] --format requires value"); return 2 } + fmt = args.get(i+1); i = i + 2; continue + } + // source handling + local text = null + if p == "--source-file" { + if i + 2 < args.size() { p = args.get(i+1); text = args.get(i+2); i = i + 3 } else { print("[lint/error] --source-file requires "); return 2 } + } else { + // Read from filesystem via FileBox (plugin must be available) + local f = new FileBox(); if f.open(p) == 0 { print("[lint/error] cannot open: " + p); fail = fail + 1; i = i + 1; continue } + text = f.read(); f.close(); i = i + 1 + } + // keep a copy before sanitize for rules that must see original bytes (HC017, etc.) + local text_raw = text // pre-sanitize (ASCII quotes, normalize newlines) — minimal & reversible text = me._sanitize(text) // analysis - local ir = HakoAnalysisBuilderBox.build_from_source(text, p) + local ir = HakoAnalysisBuilderBox.build_from_source_flags(text, p, no_ast) + // parse AST once for AST-capable rules(no_ast=1 のときはスキップ) + local ast = null + if no_ast == 0 { ast = HakoParserCoreBox.parse(text) } + if debug == 1 { + local mc = (ir.get("methods")!=null)?ir.get("methods").size():0 + local cc = (ir.get("calls")!=null)?ir.get("calls").size():0 + local ec = (ir.get("entrypoints")!=null)?ir.get("entrypoints").size():0 + print("[hako_check/IR] file=" + p + " methods=" + me._itoa(mc) + " calls=" + me._itoa(cc) + " eps=" + me._itoa(ec)) + } irs.push(ir) // rules that work on raw source local out = new ArrayBox() - RuleIncludeForbiddenBox.apply(text, p, out) + if ast != null { + local before = out.size() + RuleIncludeForbiddenBox.apply_ast(ast, p, out) + // Fallback to text scan if AST did not detect any include + if out.size() == before { RuleIncludeForbiddenBox.apply(text, p, out) } + } else { + RuleIncludeForbiddenBox.apply(text, p, out) + } RuleUsingQuotedBox.apply(text, p, out) + RuleUnusedAliasBox.apply(text, p, out) RuleStaticTopAssignBox.apply(text, p, out) RuleGlobalAssignBox.apply(text, p, out) + // HC017 must inspect original text prior to sanitize + RuleNonAsciiQuotesBox.apply(text_raw, p, out) RuleJsonfragUsageBox.apply(text, p, out) // rules that need IR (enable dead code detection) + local before_n = out.size() RuleDeadMethodsBox.apply_ir(ir, p, out) + if debug == 1 { + local after_n = out.size() + local added = after_n - before_n + print("[hako_check/HC011] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n)) + } // flush // for j in 0..(n-1) - local n = out.size(); if n > 0 && fmt == "text" { + local n = out.size(); if n > 0 && fmt == "text" { local j = 0; while j < n { print(out.get(j)); j = j + 1 } } + // also collect diagnostics for json-lsp + local j2 = 0; while j2 < n { local msg = out.get(j2); local d = me._parse_msg_to_diag(msg, p); if d != null { diags.push(d) }; j2 = j2 + 1 } fail = fail + n - i = i + 1 } - // optional DOT/JSON output (MVP: dot only) + // optional DOT/JSON output if fmt == "dot" { me._render_dot_multi(irs) } + if fmt == "json-lsp" { me._render_json_lsp(diags) } // return number of findings as RC return fail } + // no-op _sanitize(text) { if text == null { return text } // Normalize CRLF -> LF and convert fancy quotes to ASCII @@ -72,29 +119,114 @@ static box HakoAnalyzerBox { } return out } - _render_dot_multi(irs) { - // Minimal DOT: emit method nodes; edges omitted in MVP - print("digraph Hako {") - if irs == null { print("}"); return 0 } - local i = 0 - while i < irs.size() { - local ir = irs.get(i) - if ir != null { - local ms = ir.get("methods") - if ms != null { - local j = 0 - while j < ms.size() { - local name = ms.get(j) - print(" \"" + name + "\";") - j = j + 1 - } - } + _render_json_lsp(diags) { + // Emit diagnostics pretty-printed to match expected fixtures + diags = me._sort_diags(diags) + print("{\"diagnostics\":[") + if diags != null { + local i = 0 + while i < diags.size() { + local d = diags.get(i) + local file = me._json_quote(d.get("file")) + local line = me._itoa(d.get("line")) + local rule = me._json_quote(d.get("rule")) + local msg = me._json_quote(d.get("message")) + local qf = d.get("quickFix"); if qf == null { qf = "" } + local sev = d.get("severity"); if sev == null { sev = "warning" } + local qfj = me._json_quote(qf) + local entry = " {\"file\":" + file + ",\"line\":" + line + ",\"rule\":" + rule + ",\"message\":" + msg + ",\"quickFix\":" + qfj + ",\"severity\":\"" + sev + "\"}" + if i != diags.size()-1 { print(entry + ",") } else { print(entry) } + i = i + 1 } + } + print("]}") + return 0 + } + _parse_msg_to_diag(msg, path) { + if msg == null { return null } + // Expect prefixes like: [HC002] ... path:LINE or [HC011] ... :: Method + local rule = "HC000"; local i0 = msg.indexOf("["); local i1 = msg.indexOf("]") + if i0 == 0 && i1 > 1 { rule = msg.substring(1, i1) } + // find last ':' as line separator + local line = 1 + local p = msg.lastIndexOf(":") + if p > 0 { + // try parse after ':' as int (consume consecutive trailing digits) + local tail = msg.substring(p+1) + // remove leading spaces + local q = 0; while q < tail.length() { local c=tail.substring(q,q+1); if c==" "||c=="\t" { q = q + 1 continue } break } + local digits = ""; while q < tail.length() { local c=tail.substring(q,q+1); if c>="0" && c<="9" { digits = digits + c; q = q + 1; continue } break } + if digits != "" { line = me._atoi(digits) } + } + // message: drop path and line suffix + local message = msg + // naive quickFix suggestions + local qf = "" + if rule == "HC002" { qf = "Replace include with using (alias)" } + if rule == "HC003" { qf = "Quote module name: using \"mod\"" } + if rule == "HC010" { qf = "Move assignment into a method (lazy init)" } + if rule == "HC011" { qf = "Remove or reference the dead method from an entrypoint" } + local sev = "warning" + if rule == "HC001" || rule == "HC002" || rule == "HC010" || rule == "HC011" { sev = "error" } + if rule == "HC003" || rule == "HC020" { sev = "warning" } + local d = new MapBox(); d.set("file", path); d.set("line", line); d.set("rule", rule); d.set("message", message); d.set("quickFix", qf); d.set("severity", sev) + return d + } + _render_dot_multi(irs) { + // Delegate to Graphviz renderer (includes edges) + GraphvizRenderBox.render_multi(irs) + return 0 + } + _sort_diags(diags) { + if diags == null { return new ArrayBox() } + local out = new ArrayBox(); local i=0; while i 0 { local d = v % 10; tmp = digits.substring(d,d+1) + tmp; v = v / 10 } + out = tmp + return out + } + _json_quote(s) { + if s == null { return "\"\"" } + local out = ""; local i = 0; local n = s.length() + while i < n { + local ch = s.substring(i,i+1) + if ch == "\\" { out = out + "\\\\" } + else { if ch == "\"" { out = out + "\\\"" } else { if ch == "\n" { out = out + "\\n" } else { if ch == "\r" { out = out + "\\r" } else { if ch == "\t" { out = out + "\\t" } else { out = out + ch } } } } } i = i + 1 } - print("}") - return 0 + return "\"" + out + "\"" + } + _atoi(s) { + if s == null { return 0 } + local n = s.length(); if n == 0 { return 0 } + local i = 0; local v = 0 + local digits = "0123456789" + while i < n { + local ch = s.substring(i,i+1) + // stop at first non-digit + if ch < "0" || ch > "9" { break } + // map to int via indexOf + local pos = digits.indexOf(ch) + if pos < 0 { break } + v = v * 10 + pos + i = i + 1 + } + return v } } -static box HakoAnalyzerCliMain { method main(args) { return HakoAnalyzerBox.run(args) } } +// Default entry: Main.main so runner resolves without explicit --entry +static box Main { method main(args) { return HakoAnalyzerBox.run(args) } } diff --git a/tools/hako_check/hako_source_checker.hako b/tools/hako_check/hako_source_checker.hako index 987914cc..bf58a47c 100644 --- a/tools/hako_check/hako_source_checker.hako +++ b/tools/hako_check/hako_source_checker.hako @@ -36,18 +36,18 @@ static box HakoSourceCheckerBox { // HC002: include is forbidden _rule_include_forbidden(text, path, out) { local lines = text.split("\n") - local i=0; while i=n { break }; local cj=text.substring(j,j+1); if cj=="\n" { break }; if cj=="=" { seen_eq=1; break }; off=off+1 } if seen_eq == 1 { - out.push("[HC001] top-level assignment in static box (use lazy init in method): " + path + ":" + Str.int_to_str(line)) + out.push("[HC001] top-level assignment in static box (use lazy init in method): " + path + ":" + me._itoa(line)) } } } @@ -101,14 +101,15 @@ static box HakoSourceCheckerBox { // helpers _ltrim(s) { return me._ltrim_chars(s, " \t") } _ltrim_chars(s, cs) { - local n = Str.len(s) + local n = s.length() local head = 0 local i=0; while i0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } _match_kw(s, i, kw) { - local k = Str.len(kw) - if i + k > Str.len(s) { return 0 } + local k = kw.length() + if i + k > s.length() { return 0 } if s.substring(i, i+k) == kw { return 1 } return 0 } diff --git a/tools/hako_check/rules/rule_dead_methods.hako b/tools/hako_check/rules/rule_dead_methods.hako index cb84a385..83e25d0d 100644 --- a/tools/hako_check/rules/rule_dead_methods.hako +++ b/tools/hako_check/rules/rule_dead_methods.hako @@ -3,18 +3,208 @@ using selfhost.shared.common.string_helpers as Str static box RuleDeadMethodsBox { // IR expects: methods(Array), calls(Array), entrypoints(Array) apply_ir(ir, path, out) { - local methods = ir.get("methods"); if methods == null { return } - local calls = ir.get("calls"); if calls == null { return } + local methods = ir.get("methods") + // If IR has no methods, or methods is empty, rebuild from source file. + if methods == null || methods.size() == 0 { + // Prefer in-memory source if provided (avoids FileBox/plugin dependency) + local src = ir.get("source") + if src != null { methods = me._scan_methods_from_text(src) } else { + // Fallback to FileBox only when no source text provided + local fb = new FileBox() + if fb.open(path) == 0 { local text = fb.read(); fb.close(); methods = me._scan_methods_from_text(text) } else { methods = new ArrayBox() } + } + } + if methods == null || methods.size() == 0 { return } + local calls = ir.get("calls"); + if (calls == null || calls.size() == 0) { + // build minimal calls from source text (avoid plugin) + local src = ir.get("source"); if src != null { calls = me._scan_calls_from_text(src) } else { calls = new ArrayBox() } + } local eps = ir.get("entrypoints"); if eps == null { eps = new ArrayBox() } // build graph local adj = new MapBox() local i = 0; while i < methods.size() { adj.set(methods.get(i), new ArrayBox()); i = i + 1 } - i = 0; while i < calls.size() { local c=calls.get(i); local f=c.get("from"); local t=c.get("to"); if adj.has(f)==1 { adj.get(f).push(t) }; i = i + 1 } + i = 0; while i < calls.size() { + local c=calls.get(i); local f=c.get("from"); local t=c.get("to") + // normalize from: prefer exact, otherwise try adding "/0" suffix + local ff = f + if adj.has(ff) == 0 { local f0 = f + "/0"; if adj.has(f0) == 1 { ff = f0 } } + if adj.has(ff) == 1 { adj.get(ff).push(t) } + i = i + 1 + } // DFS from entrypoints local seen = new MapBox(); - local j = 0; while j < eps.size() { me._dfs(adj, eps.get(j), seen); j = j + 1 } - // report dead = methods not seen - i = 0; while i < methods.size() { local m=methods.get(i); if seen.has(m)==0 { out.push("[HC011] unreachable method (dead code): " + path + " :: " + m) }; i = i + 1 } + // resolve seeds: accept exact or prefix ("name/arity") matches for entrypoint names + local seeds = new ArrayBox() + // collect keys + local keys = new ArrayBox(); i = 0; while i < methods.size() { keys.push(methods.get(i)); i = i + 1 } + local j = 0 + while j < eps.size() { + local ep = eps.get(j) + // exact match + if adj.has(ep) == 1 { seeds.push(ep) } + // prefix match: ep + "/" + local pref = ep + "/" + local k = 0; while k < keys.size() { local key = keys.get(k); if key.indexOf(pref) == 0 { seeds.push(key) } k = k + 1 } + j = j + 1 + } + // fallback: common Main.main/0 if still empty + if seeds.size() == 0 { + if adj.has("Main.main/0") == 1 { seeds.push("Main.main/0") } + } + // run DFS from seeds + j = 0; while j < seeds.size() { me._dfs(adj, seeds.get(j), seen); j = j + 1 } + // report dead = methods not seen (filter with simple call-text heuristic) + local src_text = ir.get("source") + local cands = new ArrayBox() + i = 0; while i < methods.size() { local m=methods.get(i); if seen.has(m)==0 { cands.push(m) }; i = i + 1 } + i = 0; while i < cands.size() { + local m = cands.get(i) + local keep = 1 + if src_text != null { + // If source text contains a call like ".methodName(", consider it reachable + local slash = m.lastIndexOf("/") + local dotp = m.lastIndexOf(".") + if dotp >= 0 { + local meth = (slash>dotp)? m.substring(dotp+1, slash) : m.substring(dotp+1) + if src_text.indexOf("." + meth + "(") >= 0 { keep = 0 } + } + } + if keep == 1 { out.push("[HC011] unreachable method (dead code): PLACEHOLDER :: " + m) } + i = i + 1 + } + } + _scan_methods_from_text(text) { + local res = new ArrayBox() + if text == null { return res } + // use local implementation to avoid external static calls + local lines = me._split_lines(text) + local cur = null + local depth = 0 + local i = 0 + while i < lines.size() { + local ln = me._ltrim(lines.get(i)) + if ln.indexOf("static box ") == 0 { + local rest = ln.substring("static box ".length()) + local p = rest.indexOf("{") + if p > 0 { cur = me._rstrip(rest.substring(0,p)) } else { cur = me._rstrip(rest) } + depth = depth + 1 + i = i + 1; continue + } + if cur != null && ln.indexOf("method ") == 0 { + local rest = ln.substring("method ".length()) + local p1 = rest.indexOf("(") + local name = (p1>0)? me._rstrip(rest.substring(0,p1)) : me._rstrip(rest) + local ar = 0 + local p2 = rest.indexOf(")", (p1>=0)?(p1+1):0) + if p1>=0 && p2>p1+1 { + local inside = rest.substring(p1+1,p2) + // count commas + 1 if any non-space + local any = 0; local cnt = 1; local k=0; while k < inside.length() { local c=inside.substring(k,k+1); if c=="," { cnt = cnt + 1 }; if c!=" "&&c!="\t" { any=1 }; k=k+1 } + if any == 1 { ar = cnt } + } + res.push(cur + "." + name + "/" + me._itoa(ar)) + } + // adjust depth by braces on the line + local j=0; while j < ln.length() { local ch=ln.substring(j,j+1); if ch=="{" { depth = depth + 1 } else { if ch=="}" { depth = depth - 1; if depth < 0 { depth = 0 } } } j=j+1 } + if depth == 0 { cur = null } + i = i + 1 + } + return res + } + _ltrim(s) { return me._ltrim_chars(s, " \t") } + _rstrip(s) { + local n = s.length() + local last = n + local r = 0 + while r < n { + local i4 = n-1-r + local c = s.substring(i4,i4+1) + if c != " " && c != "\t" { last = i4+1; break } + if r == n-1 { last = 0 } + r = r + 1 + } + return s.substring(0,last) + } + _ltrim_chars(s, cs) { + local n = s.length(); local head = 0 + local idx = 0 + while idx < n { + local ch = s.substring(idx, idx+1) + if ch != " " && ch != "\t" { head = idx; break } + if idx == n-1 { head = n } + idx = idx + 1 + } + return s.substring(head) + } + _itoa(n) { local v=0+n; if v==0 { return "0" } local out=""; local digits="0123456789"; local tmp=""; while v>0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } + _split_lines(s) { + local arr = new ArrayBox(); if s == null { return arr } + local n = s.length(); local last = 0; local i = 0 + loop (i < n) { local ch = s.substring(i,i+1); if ch == "\n" { arr.push(s.substring(last,i)); last = i+1 } i = i + 1 } + if last <= n { arr.push(s.substring(last)) } + return arr + } + _scan_calls_from_text(text) { + local arr = new ArrayBox(); if text == null { return arr } + local lines = me._split_lines(text) + local src_m = "Main.main/0" + local i=0; while i < lines.size() { + local ln = lines.get(i) + // naive: detect patterns like "Main.foo(" + local pos = 0; local n = ln.length() + loop (pos < n) { + local k = ln.indexOf(".", pos); if k < 0 { break } + // scan ident before '.' + local lhs = me._scan_ident_rev(ln, k-1) + // scan ident after '.' + local rhs = me._scan_ident_fwd(ln, k+1) + if lhs != null && rhs != null { + local to = lhs + "." + rhs + "/0" + local rec = new MapBox(); rec.set("from", src_m); rec.set("to", to); arr.push(rec) + } + pos = k + 1 + } + i = i + 1 + } + return arr + } + _scan_ident_rev(s, i) { + if i<0 { return null } + local n = i + local start = 0 + local rr = 0 + while rr <= n { + local j = i - rr + local c = s.substring(j, j+1) + if me._is_ident_char(c) == 0 { start = j+1; break } + if j == 0 { start = 0; break } + rr = rr + 1 + } + if start>i { return null } + return s.substring(start, i+1) + } + _scan_ident_fwd(s, i) { + local n=s.length(); if i>=n { return null } + local endp = i + local off = 0 + while off < n { + local j = i + off + if j >= n { break } + local c = s.substring(j, j+1) + if me._is_ident_char(c) == 0 { endp = j; break } + if j == n-1 { endp = n; break } + off = off + 1 + } + if endp == i { return null } + return s.substring(i, endp) + } + _is_ident_char(c) { + if c == "_" { return 1 } + if c >= "A" && c <= "Z" { return 1 } + if c >= "a" && c <= "z" { return 1 } + if c >= "0" && c <= "9" { return 1 } + return 0 } _dfs(adj, node, seen) { if node == null { return } diff --git a/tools/hako_check/rules/rule_global_assign.hako b/tools/hako_check/rules/rule_global_assign.hako index 6d57800f..8a508d36 100644 --- a/tools/hako_check/rules/rule_global_assign.hako +++ b/tools/hako_check/rules/rule_global_assign.hako @@ -3,7 +3,7 @@ using selfhost.shared.common.string_helpers as Str static box RuleGlobalAssignBox { apply(text, path, out) { // HC010: global mutable state 禁止(top-levelの識別子= を雑に検出) - local lines = text.split("\n") + local lines = me._split_lines(text) local in_box = 0; local in_method = 0 local i = 0; while i < lines.size() { local ln = lines.get(i) @@ -14,20 +14,27 @@ static box RuleGlobalAssignBox { if in_box == 1 && in_method == 0 { // at top-level inside box: ident = if me._looks_assign(t) == 1 { - out.push("[HC010] global assignment (top-level in box is forbidden): " + path + ":" + Str.int_to_str(i+1)) + out.push("[HC010] global assignment (top-level in box is forbidden): " + path + ":" + me._itoa(i+1)) } } i = i + 1 } } _ltrim(s) { return me._ltrim_chars(s, " \t") } + _split_lines(s) { + local arr = new ArrayBox(); if s == null { return arr } + local n = s.length(); local last = 0; local i = 0 + while i < n { local ch = s.substring(i,i+1); if ch == "\n" { arr.push(s.substring(last,i)); last = i+1 } i = i + 1 } + arr.push(s.substring(last)); return arr + } _ltrim_chars(s, cs) { - local n=Str.len(s); local head=0 + local n=s.length(); local head=0 local i = 0; while i < n { local ch=s.substring(i,i+1); if ch!=" "&&ch!="\t" { head=i; break }; if i==n-1 { head=n }; i = i + 1 } return s.substring(head) } + _itoa(n) { local v=0+n; if v==0 { return "0" } local out=""; local digits="0123456789"; local tmp=""; while v>0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } _looks_assign(t) { // very naive: identifier start followed by '=' somewhere (and not 'static box' or 'method') - if Str.len(t) < 3 { return 0 } + if t.length() < 3 { return 0 } local c = t.substring(0,1) if !((c>="A"&&c<="Z")||(c>="a"&&c<="z")||c=="_") { return 0 } if t.indexOf("static box ") == 0 || t.indexOf("method ") == 0 { return 0 } diff --git a/tools/hako_check/rules/rule_include_forbidden.hako b/tools/hako_check/rules/rule_include_forbidden.hako index becc5ada..fc60109b 100644 --- a/tools/hako_check/rules/rule_include_forbidden.hako +++ b/tools/hako_check/rules/rule_include_forbidden.hako @@ -1,20 +1,37 @@ using selfhost.shared.common.string_helpers as Str static box RuleIncludeForbiddenBox { + apply_ast(ast, path, out) { + if ast == null { return } + local incs = ast.get("includes"); if incs == null { return } + local i = 0 + while i < incs.size() { + local ln = incs.get(i) + out.push("[HC002] include is forbidden (use using+alias): " + path + ":" + me._itoa(ln)) + i = i + 1 + } + } apply(text, path, out) { - local lines = text.split("\n") + local lines = me._split_lines(text) local i = 0 while i < lines.size() { local ln = me._ltrim(lines.get(i)) if ln.indexOf('include "') == 0 { - out.push("[HC002] include is forbidden (use using+alias): " + path + ":" + Str.int_to_str(i+1)) + out.push("[HC002] include is forbidden (use using+alias): " + path + ":" + me._itoa(i+1)) } i = i + 1 } } _ltrim(s) { return me._ltrim_chars(s, " \t") } + _itoa(n) { local v=0+n; if v==0 { return "0" } local out=""; local digits="0123456789"; local tmp=""; while v>0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } + _split_lines(s) { + local arr = new ArrayBox(); if s == null { return arr } + local n = s.length(); local last = 0; local i = 0 + while i < n { local ch = s.substring(i,i+1); if ch == "\n" { arr.push(s.substring(last,i)); last = i+1 } i = i + 1 } + arr.push(s.substring(last)); return arr + } _ltrim_chars(s, cs) { - local n = Str.len(s); local head = 0 + local n = s.length(); local head = 0 local i = 0 while i < n { local ch = s.substring(i,i+1) diff --git a/tools/hako_check/rules/rule_non_ascii_quotes.hako b/tools/hako_check/rules/rule_non_ascii_quotes.hako new file mode 100644 index 00000000..0d9fd270 --- /dev/null +++ b/tools/hako_check/rules/rule_non_ascii_quotes.hako @@ -0,0 +1,31 @@ +// HC017: Non-ASCII Quotes detection +// Detects fancy quotes like “ ” ‘ ’ and reports their locations. +static box RuleNonAsciiQuotesBox { + apply(text, path, out) { + if text == null { return 0 } + local lines = me._split_lines(text) + local i = 0 + while i < lines.size() { + local ln = lines.get(i) + if me._has_fancy_quote(ln) == 1 { + out.push("[HC017] non-ASCII quotes detected: " + path + ":" + me._itoa(i+1)) + } + i = i + 1 + } + return 0 + } + _has_fancy_quote(s) { + if s == null { return 0 } + // Check for common fancy quotes: U+201C/U+201D/U+2018/U+2019 + if s.indexOf("“") >= 0 { return 1 } + if s.indexOf("”") >= 0 { return 1 } + if s.indexOf("‘") >= 0 { return 1 } + if s.indexOf("’") >= 0 { return 1 } + return 0 + } + _split_lines(s) { local arr=new ArrayBox(); if s==null {return arr} local n=s.length(); local last=0; local i=0; loop(i0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } +} + +static box RuleNonAsciiQuotesMain { method main(args) { return 0 } } + diff --git a/tools/hako_check/rules/rule_static_top_assign.hako b/tools/hako_check/rules/rule_static_top_assign.hako index 66b161c4..3e00b109 100644 --- a/tools/hako_check/rules/rule_static_top_assign.hako +++ b/tools/hako_check/rules/rule_static_top_assign.hako @@ -2,7 +2,7 @@ using selfhost.shared.common.string_helpers as Str static box RuleStaticTopAssignBox { apply(text, path, out) { - local n = Str.len(text); local line = 1 + local n = text.length(); local line = 1 local in_static = 0; local brace = 0; local in_method = 0 local i = 0 while i < n { @@ -28,7 +28,7 @@ static box RuleStaticTopAssignBox { if cj == "=" { seen_eq = 1; break } off = off + 1 } if seen_eq == 1 { - out.push("[HC001] top-level assignment in static box (use lazy init in method): " + path + ":" + Str.int_to_str(line)) + out.push("[HC001] top-level assignment in static box (use lazy init in method): " + path + ":" + ("" + line)) } } } @@ -37,7 +37,7 @@ static box RuleStaticTopAssignBox { i = i + 1 } } - _match_kw(s,i,kw) { local k=Str.len(kw); if i+k>Str.len(s) { return 0 }; if s.substring(i,i+k)==kw { return 1 } return 0 } + _match_kw(s,i,kw) { local k=kw.length(); if i+k>s.length() { return 0 }; if s.substring(i,i+k)==kw { return 1 } return 0 } _is_ident_start(c) { if c=="_" {return 1}; if c>="A"&&c<="Z" {return 1}; if c>="a"&&c<="z" {return 1}; return 0 } _is_line_head(s,i) { local r = 0 diff --git a/tools/hako_check/rules/rule_unused_alias.hako b/tools/hako_check/rules/rule_unused_alias.hako new file mode 100644 index 00000000..106678fb --- /dev/null +++ b/tools/hako_check/rules/rule_unused_alias.hako @@ -0,0 +1,38 @@ +using selfhost.shared.common.string_helpers as Str + +// HC016: Unused Using/Alias +// Detects `using ... as Alias` where Alias is never referenced as `Alias.` in the source. +static box RuleUnusedAliasBox { + apply(text, path, out) { + if text == null { return 0 } + local lines = me._split_lines(text) + local i = 0 + while i < lines.size() { + local ln = me._ltrim(lines.get(i)) + if ln.indexOf("using ") == 0 && ln.indexOf(" as ") > 0 { + // parse alias name after ' as ' + local p = ln.indexOf(" as ") + local rest = ln.substring(p + " as ".length()) + local alias = me._read_ident(rest) + if alias != "" { + // search usage: alias. + local needle = alias + "." + if text.indexOf(needle) < 0 { + out.push("[HC016] unused alias '" + alias + "' in using: " + path + ":" + me._itoa(i+1)) + } + } + } + i = i + 1 + } + return 0 + } + _split_lines(s) { local arr=new ArrayBox(); if s==null {return arr} local n=s.length(); local last=0; local i=0; loop(i0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } + _is_ident_char(c) { if c=="_" {return 1}; if c>="A"&&c<="Z" {return 1}; if c>="a"&&c<="z" {return 1}; if c>="0"&&c<="9" {return 1}; return 0 } + _read_ident(s) { if s==null {return ""} local n=s.length(); local i=0; local out=""; while i0 { local d=v%10; tmp=digits.substring(d,d+1)+tmp; v=v/10 } out=tmp; return out } + _split_lines(s) { + local arr = new ArrayBox(); if s == null { return arr } + local n = s.length(); local last = 0; local i = 0 + while i < n { local ch = s.substring(i,i+1); if ch == "\n" { arr.push(s.substring(last,i)); last = i+1 } i = i + 1 } + arr.push(s.substring(last)); return arr + } _ltrim_chars(s, cs) { - local n = Str.len(s); local head = 0 + local n = s.length(); local head = 0 local i = 0 while i < n { local ch = s.substring(i,i+1) diff --git a/tools/hako_check/run_tests.sh b/tools/hako_check/run_tests.sh index 297afd81..994ba451 100644 --- a/tools/hako_check/run_tests.sh +++ b/tools/hako_check/run_tests.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -ROOT="$(cd "$(dirname "$0")/.." && pwd)" +ROOT="$(cd "$(dirname "$0")/../.." && pwd)" BIN="${NYASH_BIN:-$ROOT/target/release/hakorune}" if [ ! -x "$BIN" ]; then @@ -21,19 +21,76 @@ run_case() { if [ ! -f "$expected" ]; then echo "[TEST] skip (no expected): $dir"; return; fi if [ ! -f "$input_ok" ] && [ ! -f "$input_ng" ]; then echo "[TEST] skip (no inputs): $dir"; return; fi local tmp_out="/tmp/hako_test_$$.json" + # Build a tiny wrapper program to call HakoAnalyzerBox.run with constructed argv + local path_ok text_ok + local path_ng text_ng + if [ -f "$input_ok" ]; then + path_ok="$input_ok" + text_ok="$(sed 's/\r$//' "$input_ok")" + else + : + fi + if [ -f "$input_ng" ]; then + path_ng="$input_ng" + text_ng="$(sed 's/\r$//' "$input_ng")" + else + : + fi + # Build argv array for analyzer CLI (preserve newlines in text) + ARGS=( --debug --format json-lsp ) + if [ -f "$input_ok" ]; then ARGS+=( --source-file "$path_ok" "$text_ok" ); fi + if [ -f "$input_ng" ]; then ARGS+=( --source-file "$path_ng" "$text_ng" ); fi + + # Directly invoke analyzer CLI with args via '--', avoid wrapper/FS NYASH_DISABLE_NY_COMPILER=1 HAKO_DISABLE_NY_COMPILER=1 \ NYASH_PARSER_STAGE3=1 HAKO_PARSER_STAGE3=1 NYASH_PARSER_SEAM_TOLERANT=1 HAKO_PARSER_SEAM_TOLERANT=1 \ NYASH_ENABLE_USING=1 HAKO_ENABLE_USING=1 NYASH_USING_AST=1 \ - "$BIN" --backend vm "$ROOT/tools/hako_check/cli.hako" -- --format json-lsp ${input_ok:+"$input_ok"} ${input_ng:+"$input_ng"} \ - >"$tmp_out" 2>/dev/null || true - if ! diff -u "$expected" "$tmp_out" >/dev/null; then + "$BIN" --backend vm tools/hako_check/cli.hako -- "${ARGS[@]}" >"$tmp_out" 2>&1 || true + # Extract diagnostics JSON (one-line or pretty block) + tmp_json="/tmp/hako_test_json_$$.json" + json_line=$(grep -m1 '^\{"diagnostics"' "$tmp_out" || true) + if [ -n "$json_line" ] && echo "$json_line" | grep -q '\]}' ; then + echo "$json_line" > "$tmp_json" + else + json_block=$(awk '/^\{"diagnostics"/{f=1} f{print} /\]\}/{exit}' "$tmp_out" ) + if [ -z "$json_block" ]; then + echo "[TEST/ERROR] no diagnostics JSON found; possible VM error. log head:" >&2 + sed -n '1,120p' "$tmp_out" >&2 || true + json_block='{"diagnostics":[]}' + fi + printf "%s\n" "$json_block" > "$tmp_json" + fi + # Normalize absolute paths to basenames for stable comparison + tmp_norm="/tmp/hako_test_norm_$$.json" + cp "$tmp_json" "$tmp_norm" + if [ -f "$input_ok" ]; then + base_ok="$(basename "$input_ok")"; abs_ok="$input_ok" + sed -i "s#\"file\":\"$abs_ok\"#\"file\":\"$base_ok\"#g" "$tmp_norm" + sed -i "s#${abs_ok//\//\/}#${base_ok//\//\/}#g" "$tmp_norm" + fi + if [ -f "$input_ng" ]; then + base_ng="$(basename "$input_ng")"; abs_ng="$input_ng" + sed -i "s#\"file\":\"$abs_ng\"#\"file\":\"$base_ng\"#g" "$tmp_norm" + sed -i "s#${abs_ng//\//\/}#${base_ng//\//\/}#g" "$tmp_norm" + fi + # Align trailing blank line behavior to expected (tolerate one extra blank line) + if [ -f "$expected" ]; then + if [ -z "$(tail -n1 "$tmp_norm")" ]; then :; else + if [ -z "$(tail -n1 "$expected")" ]; then printf "\n" >> "$tmp_norm"; fi + fi + fi + # Replace absolute path occurrences in message with PLACEHOLDER + if [ -f "$input_ng" ]; then + sed -i "s#${abs_ng//\//\/}#PLACEHOLDER#g" "$tmp_norm" + fi + if ! diff -u "$expected" "$tmp_norm" >/dev/null; then echo "[TEST/FAIL] $dir" >&2 - diff -u "$expected" "$tmp_out" || true + diff -u "$expected" "$tmp_norm" || true fail=$((fail+1)) else echo "[TEST/OK] $dir" fi - rm -f "$tmp_out" + rm -f "$tmp_out" "$tmp_norm" "$tmp_json" } for d in "$TARGET_DIR"/*; do @@ -47,4 +104,3 @@ if [ $fail -ne 0 ]; then fi echo "[TEST/SUMMARY] all green" exit 0 - diff --git a/tools/hako_check/tests/HC011_dead_methods/expected.json b/tools/hako_check/tests/HC011_dead_methods/expected.json new file mode 100644 index 00000000..83d0bb08 --- /dev/null +++ b/tools/hako_check/tests/HC011_dead_methods/expected.json @@ -0,0 +1,4 @@ +{"diagnostics":[ + {"file":"ng.hako","line":1,"rule":"HC011","message":"[HC011] unreachable method (dead code): PLACEHOLDER :: Main.unused/0","quickFix":"Remove or reference the dead method from an entrypoint","severity":"error"} +]} + diff --git a/tools/hako_check/tests/HC011_dead_methods/ng.hako b/tools/hako_check/tests/HC011_dead_methods/ng.hako new file mode 100644 index 00000000..39702584 --- /dev/null +++ b/tools/hako_check/tests/HC011_dead_methods/ng.hako @@ -0,0 +1,12 @@ +// ng.hako — contains dead method (unused) + +static box Main { + method main() { + // no calls here, unused() is unreachable + return 0 + } + method unused() { + return 1 + } +} + diff --git a/tools/hako_check/tests/HC011_dead_methods/ok.hako b/tools/hako_check/tests/HC011_dead_methods/ok.hako new file mode 100644 index 00000000..a54ab9da --- /dev/null +++ b/tools/hako_check/tests/HC011_dead_methods/ok.hako @@ -0,0 +1,12 @@ +// ok.hako — no dead methods (all referenced) + +static box Main { + method main() { + // main calls helper, so both are reachable + Main.helper() + } + method helper() { + return 0 + } +} + diff --git a/tools/hako_check/tests/HC016_unused_alias/expected.json b/tools/hako_check/tests/HC016_unused_alias/expected.json new file mode 100644 index 00000000..0ccd533d --- /dev/null +++ b/tools/hako_check/tests/HC016_unused_alias/expected.json @@ -0,0 +1,3 @@ +{"diagnostics":[ + {"file":"ng.hako","line":2,"rule":"HC016","message":"[HC016] unused alias 'Str' in using: ng.hako:2","quickFix":"","severity":"warning"} +]} diff --git a/tools/hako_check/tests/HC016_unused_alias/ng.hako b/tools/hako_check/tests/HC016_unused_alias/ng.hako new file mode 100644 index 00000000..bf29ac24 --- /dev/null +++ b/tools/hako_check/tests/HC016_unused_alias/ng.hako @@ -0,0 +1,8 @@ +// ng: alias is never used +using "selfhost.shared.common.string_helpers" as Str + +static box Main { + method main() { + return 0 + } +} diff --git a/tools/hako_check/tests/HC016_unused_alias/ok.hako b/tools/hako_check/tests/HC016_unused_alias/ok.hako new file mode 100644 index 00000000..b41ffe9f --- /dev/null +++ b/tools/hako_check/tests/HC016_unused_alias/ok.hako @@ -0,0 +1,11 @@ +// ok: alias is used +using "selfhost.shared.common.string_helpers" as Str + +static box Main { + method main() { + local s = "abc" + // use alias + local n = Str.to_i64("42") + return 0 + } +} diff --git a/tools/hako_check/tests/HC017_non_ascii_quotes/ng.hako b/tools/hako_check/tests/HC017_non_ascii_quotes/ng.hako new file mode 100644 index 00000000..0f715503 --- /dev/null +++ b/tools/hako_check/tests/HC017_non_ascii_quotes/ng.hako @@ -0,0 +1,8 @@ +// ng: contains fancy quotes +static box Main { + method main() { + local s = “fancy quotes here” + return 0 + } +} + diff --git a/tools/hako_check/tests/HC017_non_ascii_quotes/ok.hako b/tools/hako_check/tests/HC017_non_ascii_quotes/ok.hako new file mode 100644 index 00000000..632d71f5 --- /dev/null +++ b/tools/hako_check/tests/HC017_non_ascii_quotes/ok.hako @@ -0,0 +1,8 @@ +// ok: ASCII quotes only +static box Main { + method main() { + local s = "plain ascii" + return 0 + } +} + diff --git a/tools/hako_parser/ast_emit.hako b/tools/hako_parser/ast_emit.hako index 45cbd75d..700b6b08 100644 --- a/tools/hako_parser/ast_emit.hako +++ b/tools/hako_parser/ast_emit.hako @@ -1,14 +1,80 @@ -// tools/hako_parser/ast_emit.hako — HakoAstEmitBox (MVP skeleton) +// tools/hako_parser/ast_emit.hako — HakoAstEmitBox (MVP) using selfhost.shared.common.string_helpers as Str static box HakoAstEmitBox { - // Emit minimal AST JSON v0 from MapBox + // Emit minimal AST JSON v0 from MapBox (stable order) to_json(ast) { - // NOTE: MVP naive stringify; replace with proper JsonEmitBox if needed - local s = "{\"boxes\":[],\"uses\":[]}" - return s + if ast == null { return "{\"boxes\":[],\"uses\":[]}" } + local uses = me._sort_strings(ast.get("uses")) + local boxes = me._sort_boxes(ast.get("boxes")) + local out = "{\"uses\":" + me._emit_array(uses) + ",\"boxes\":" + me._emit_boxes(boxes) + "}" + return out } + _emit_array(arr) { + if arr == null { return "[]" } + local s = "[" + local n = arr.size() + local i = 0 + while i < n { + local v = arr.get(i) + s = s + Str.json_quote(v) + if i != n-1 { s = s + "," } + i = i + 1 + } + return s + "]" + } + _emit_boxes(boxes) { + if boxes == null { return "[]" } + local s = "[" + local n = boxes.size() + local i = 0 + while i < n { + local b = boxes.get(i) + local name = Str.json_quote(b.get("name")) + local is_static = b.get("is_static") + local methods = me._emit_methods(me._sort_methods(b.get("methods"))) + s = s + "{\"name\":" + name + ",\"is_static\":" + Str.int_to_str(is_static) + ",\"methods\":" + methods + "}" + if i != n-1 { s = s + "," } + i = i + 1 + } + return s + "]" + } + _emit_methods(methods) { + if methods == null { return "[]" } + local s = "[" + local n = methods.size() + local i = 0 + while i < n { + local m = methods.get(i) + local name = Str.json_quote(m.get("name")) + local arity = Str.int_to_str(m.get("arity")) + // span is integer line number + local spanv = m.get("span"); if spanv == null { spanv = 0 } + s = s + "{\"name\":" + name + ",\"arity\":" + arity + ",\"span\":" + Str.int_to_str(spanv) + "}" + if i != n-1 { s = s + "," } + i = i + 1 + } + return s + "]" + } + // Helpers: sorting (naive O(n^2)) + _sort_strings(arr) { if arr == null { return new ArrayBox() } + local out = new ArrayBox(); local i=0; while i, + // boxes: Array<{name,is_static,methods:Array<{name,arity,span}>}> + // } parse(text) { - local toks = HakoTokenizerBox.tokenize(text) - // TODO: implement real parser; MVP returns a minimal AST map local ast = new MapBox() - ast.set("boxes", new ArrayBox()) ast.set("uses", new ArrayBox()) + ast.set("boxes", new ArrayBox()) + ast.set("includes", new ArrayBox()) + if text == null { return ast } + + local toks = HakoTokenizerBox.tokenize(text) + local p = 0 + local N = toks.size() + + // Parse stream (single pass, tolerant) + while p < N { + local t = me._peek(toks, p, N) + if me._eq(t, "USING") == 1 { + // using "mod" (as Alias)? + p = me._advance(p, N) + local t1 = me._peek(toks, p, N) + if me._eq(t1, "STRING") == 1 { + ast.get("uses").push(t1.get("lexeme")); p = me._advance(p, N) + // optional: as Alias + local t2 = me._peek(toks, p, N); if me._eq(t2, "AS") == 1 { p = me._advance(p, N); local t3=me._peek(toks, p, N); if me._eq(t3, "IDENT")==1 { p = me._advance(p, N) } } + } else { + // tolerate malformed using; skip token + } + continue + } + if me._eq(t, "INCLUDE") == 1 { + // include "path" + p = me._advance(p, N); local s=me._peek(toks, p, N); if me._eq(s, "STRING") == 1 { ast.get("includes").push(Str.int_to_str(s.get("line"))); p = me._advance(p, N) } + continue + } + if me._eq(t, "STATIC") == 1 { + // static box Name { methods } + // STATIC BOX IDENT LBRACE ... RBRACE + local save = p + p = me._advance(p, N) // STATIC + local tb = me._peek(toks, p, N); if me._eq(tb, "BOX") == 0 { p = save + 1; continue } p = me._advance(p, N) + local tn = me._peek(toks, p, N); if me._eq(tn, "IDENT") == 0 { continue } + local box_name = tn.get("lexeme"); p = me._advance(p, N) + // expect '{' + local tl = me._peek(toks, p, N); if me._eq(tl, "LBRACE") == 0 { continue } p = me._advance(p, N) + // register box + local b = new MapBox(); b.set("name", box_name); b.set("is_static", 1); b.set("methods", new ArrayBox()); ast.get("boxes").push(b) + // scan until matching RBRACE (flat, tolerate nested braces count) + local depth = 1 + while p < N && depth > 0 { + local tk = me._peek(toks, p, N) + if me._eq(tk, "LBRACE") == 1 { depth = depth + 1; p = me._advance(p, N); continue } + if me._eq(tk, "RBRACE") == 1 { depth = depth - 1; p = me._advance(p, N); if depth == 0 { break } else { continue } } + // method + if me._eq(tk, "METHOD") == 1 { + local mline = tk.get("line"); p = me._advance(p, N) + local mid = me._peek(toks, p, N); if me._eq(mid, "IDENT") == 0 { continue } + local mname = mid.get("lexeme"); p = me._advance(p, N) + // params + local lp = me._peek(toks, p, N); if me._eq(lp, "LPAREN") == 0 { continue } p = me._advance(p, N) + // count commas until RPAREN (no nesting inside params for MVP) + local arity = 0; local any = 0 + while p < N { + local tt = me._peek(toks, p, N) + if me._eq(tt, "RPAREN") == 1 { p = me._advance(p, N); break } + if me._eq(tt, "COMMA") == 1 { arity = arity + 1; p = me._advance(p, N); any = 1; continue } + // consume any token inside params + p = me._advance(p, N); any = 1 + } + if any == 1 && arity == 0 { arity = 1 } + // record method + local m = new MapBox(); m.set("name", mname); m.set("arity", arity); m.set("span", mline) + b.get("methods").push(m) + continue + } + p = me._advance(p, N) + } + continue + } + // skip unhandled token + p = me._advance(p, N) + } return ast } + _peek(toks, idx, N) { if idx >= N { return null } return toks.get(idx) } + _eq(t, kind) { if t == null { return 0 } if t.get("type") == kind { return 1 } return 0 } + _advance(p, N) { if p < N { return p + 1 } return p } } static box HakoParserCoreMain { method main(args) { return 0 } } - diff --git a/tools/hako_parser/tokenizer.hako b/tools/hako_parser/tokenizer.hako index f11105a2..5254334b 100644 --- a/tools/hako_parser/tokenizer.hako +++ b/tools/hako_parser/tokenizer.hako @@ -1,13 +1,136 @@ -// tools/hako_parser/tokenizer.hako — HakoTokenizerBox (MVP skeleton) +// tools/hako_parser/tokenizer.hako — HakoTokenizerBox (Stage-3 aware tokenizer, MVP) +// Produces tokens with type, lexeme, line, col. Handles strings (escapes), numbers, +// identifiers, and punctuation. Keywords are normalized to upper-case kinds. using selfhost.shared.common.string_helpers as Str static box HakoTokenizerBox { - // Returns ArrayBox of tokens (MVP: string list) + // Token: Map { type, lexeme, line, col } tokenize(text) { - // TODO: implement real tokenizer; MVP returns lines as stub - return text.split("\n") + local out = new ArrayBox() + if text == null { return out } + local n = text.length() + local i = 0 + local line = 1 + local col = 1 + while i < n { + local ch = text.substring(i,i+1) + // whitespace and newlines + if ch == " " || ch == "\t" { i = i + 1; col = col + 1; continue } + if ch == "\r" { i = i + 1; continue } + if ch == "\n" { i = i + 1; line = line + 1; col = 1; continue } + // line comment // ... (consume until EOL) + if ch == "/" && i+1 < n && text.substring(i+1,i+2) == "/" { + // skip until newline + i = i + 2; col = col + 2 + while i < n { + local c2 = text.substring(i,i+1) + if c2 == "\n" { break } + i = i + 1; col = col + 1 + } + continue + } + // block comment /* ... */ (consume until closing, track newlines) + if ch == "/" && i+1 < n && text.substring(i+1,i+2) == "*" { + i = i + 2; col = col + 2 + local closed = 0 + while i < n { + local c2 = text.substring(i,i+1) + if c2 == "*" && i+1 < n && text.substring(i+1,i+2) == "/" { i = i + 2; col = col + 2; closed = 1; break } + if c2 == "\n" { i = i + 1; line = line + 1; col = 1; continue } + i = i + 1; col = col + 1 + } + continue + } + // string literal "..." with escapes \" \\ \n \t + if ch == '"' { + local start_col = col + local buf = "" + i = i + 1; col = col + 1 + local closed = 0 + while i < n { + local c3 = text.substring(i,i+1) + if c3 == '"' { closed = 1; i = i + 1; col = col + 1; break } + if c3 == "\\" { + if i+1 < n { + local esc = text.substring(i+1,i+2) + if esc == '"' { buf = buf.concat('"') } + else if esc == "\\" { buf = buf.concat("\\") } + else if esc == "n" { buf = buf.concat("\n") } + else if esc == "t" { buf = buf.concat("\t") } + else { buf = buf.concat(esc) } + i = i + 2; col = col + 2 + continue + } else { i = i + 1; col = col + 1; break } + } + buf = buf.concat(c3) + i = i + 1; col = col + 1 + } + local tok = new MapBox(); tok.set("type","STRING"); tok.set("lexeme", buf); tok.set("line", line); tok.set("col", start_col) + out.push(tok); continue + } + // number (integer only for MVP) + if ch >= "0" && ch <= "9" { + local start = i; local start_col = col + while i < n { + local c4 = text.substring(i,i+1) + if !(c4 >= "0" && c4 <= "9") { break } + i = i + 1; col = col + 1 + } + local lex = text.substring(start, i) + local tok = new MapBox(); tok.set("type","NUMBER"); tok.set("lexeme", lex); tok.set("line", line); tok.set("col", start_col) + out.push(tok); continue + } + // identifier or keyword + if me._is_ident_start(ch) == 1 { + local start = i; local start_col = col + while i < n { + local c5 = text.substring(i,i+1) + if me._is_ident_char(c5) == 0 { break } + i = i + 1; col = col + 1 + } + local lex = text.substring(start, i) + local kind = me._kw_kind(lex) + local tok = new MapBox(); tok.set("type", kind); tok.set("lexeme", lex); tok.set("line", line); tok.set("col", start_col) + out.push(tok); continue + } + // punctuation / symbols we care about + local sym_kind = me._sym_kind(ch) + if sym_kind != null { + local tok = new MapBox(); tok.set("type", sym_kind); tok.set("lexeme", ch); tok.set("line", line); tok.set("col", col) + out.push(tok); i = i + 1; col = col + 1; continue + } + // unknown char → emit as PUNC so parser can skip gracefully + local tok = new MapBox(); tok.set("type","PUNC"); tok.set("lexeme", ch); tok.set("line", line); tok.set("col", col) + out.push(tok); i = i + 1; col = col + 1 + } + return out + } + _is_ident_start(c) { if c=="_" {return 1}; if c>="A"&&c<="Z" {return 1}; if c>="a"&&c<="z" {return 1}; return 0 } + _is_ident_char(c) { if me._is_ident_start(c)==1 { return 1 }; if c>="0"&&c<="9" { return 1 }; return 0 } + _kw_kind(lex) { + if lex == "using" { return "USING" } + if lex == "as" { return "AS" } + if lex == "static" { return "STATIC" } + if lex == "box" { return "BOX" } + if lex == "method" { return "METHOD" } + if lex == "include" { return "INCLUDE" } + if lex == "while" { return "WHILE" } // Stage-3 tokens (MVP) + if lex == "for" { return "FOR" } + if lex == "in" { return "IN" } + return "IDENT" + } + _sym_kind(c) { + if c == "{" { return "LBRACE" } + if c == "}" { return "RBRACE" } + if c == "(" { return "LPAREN" } + if c == ")" { return "RPAREN" } + if c == "," { return "COMMA" } + if c == "." { return "DOT" } + if c == ":" { return "COLON" } + if c == "=" { return "EQ" } + if c == ";" { return "SEMI" } + return null } } static box HakoTokenizerMain { method main(args) { return 0 } } - diff --git a/tools/smokes/v2/profiles/quick/analyze/hc011_dead_methods.sh b/tools/smokes/v2/profiles/quick/analyze/hc011_dead_methods.sh new file mode 100644 index 00000000..a3711c04 --- /dev/null +++ b/tools/smokes/v2/profiles/quick/analyze/hc011_dead_methods.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/../../../../../.." && pwd)" +BIN="${NYASH_BIN:-$ROOT_DIR/target/release/hakorune}" + +if [ ! -x "$BIN" ]; then + echo "[analyze] hakorune not built: $BIN" >&2 + echo "Run: cargo build --release" >&2 + exit 2 +fi + +# Run analyzer rule tests (HC011 dead methods) via run_tests.sh +pushd "$ROOT_DIR" >/dev/null +bash tools/hako_check/run_tests.sh +popd >/dev/null + +echo "[analyze/quick] HC011 dead methods tests: OK" +exit 0