2025-11-07 19:32:44 +09:00
// tools/hako_check/cli.hako — HakoAnalyzerBox (MVP)
2025-11-08 00:46:34 +09:00
using selfhost.shared.common.string_helpers as Str
2025-11-07 19:32:44 +09:00
using tools.hako_check.analysis_consumer as HakoAnalysisBuilderBox
using tools.hako_check.rules.rule_include_forbidden as RuleIncludeForbiddenBox
using tools.hako_check.rules.rule_using_quoted as RuleUsingQuotedBox
using tools.hako_check.rules.rule_static_top_assign as RuleStaticTopAssignBox
using tools.hako_check.rules.rule_global_assign as RuleGlobalAssignBox
using tools.hako_check.rules.rule_dead_methods as RuleDeadMethodsBox
using tools.hako_check.rules.rule_jsonfrag_usage as RuleJsonfragUsageBox
2025-11-08 00:46:34 +09:00
using tools.hako_check.rules.rule_unused_alias as RuleUnusedAliasBox
using tools.hako_check.rules.rule_non_ascii_quotes as RuleNonAsciiQuotesBox
2025-11-08 02:59:54 +09:00
using tools.hako_check.rules.rule_dead_static_box as RuleDeadStaticBoxBox
using tools.hako_check.rules.rule_duplicate_method as RuleDuplicateMethodBox
2025-11-08 03:06:02 +09:00
using tools.hako_check.rules.rule_missing_entrypoint as RuleMissingEntrypointBox
2025-11-08 03:14:22 +09:00
using tools.hako_check.rules.rule_top_level_local as RuleTopLevelLocalBox
2025-11-08 03:19:08 +09:00
using tools.hako_check.rules.rule_arity_mismatch as RuleArityMismatchBox
2025-11-08 03:23:27 +09:00
using tools.hako_check.rules.rule_stage3_gate as RuleStage3GateBox
2025-11-08 04:17:38 +09:00
using tools.hako_check.rules.rule_brace_heuristics as RuleBraceHeuristicsBox
using tools.hako_check.rules.rule_analyzer_io_safety as RuleAnalyzerIoSafetyBox
2025-11-08 00:46:34 +09:00
using tools.hako_check.render.graphviz as GraphvizRenderBox
using tools.hako_parser.parser_core as HakoParserCoreBox
2025-11-07 19:32:44 +09:00
static box HakoAnalyzerBox {
run(args) {
if args == null || args.size() < 1 { print("[lint/error] missing paths"); return 2 }
2025-11-08 00:46:34 +09:00
// options: --format {text|dot|json} (accept anywhere)
2025-11-07 19:32:44 +09:00
local fmt = "text"
2025-11-08 00:46:34 +09:00
local debug = 0
2025-11-08 12:39:23 +09:00
local no_ast = 0
2025-11-08 00:46:34 +09:00
// single-pass parse: handle options in-place and collect sources
local i = 0
2025-11-07 19:32:44 +09:00
local fail = 0
local irs = new ArrayBox()
2025-11-08 00:46:34 +09:00
local diags = new ArrayBox()
2025-11-08 12:20:25 +09:00
// optional filters
local rules_only = null // ArrayBox of keys
local rules_skip = null // ArrayBox of keys
2025-11-08 00:46:34 +09:00
// Support inline sources: --source-file <path> <text>. Also accept --debug and --format anywhere.
2025-11-07 19:32:44 +09:00
while i < args.size() {
local p = args.get(i)
2025-11-08 00:46:34 +09:00
// handle options
if p == "--debug" { debug = 1; i = i + 1; continue }
if p == "--no-ast" { no_ast = 1; i = i + 1; continue }
2025-11-08 12:20:25 +09:00
if p == "--force-ast" { no_ast = 0; i = i + 1; continue }
2025-11-08 00:46:34 +09:00
if p == "--format" {
if i + 1 >= args.size() { print("[lint/error] --format requires value"); return 2 }
fmt = args.get(i+1); i = i + 2; continue
}
2025-11-08 12:20:25 +09:00
if p == "--rules" {
if i + 1 >= args.size() { print("[lint/error] --rules requires CSV"); return 2 }
rules_only = me._parse_csv(args.get(i+1)); i = i + 2; continue
}
if p == "--skip-rules" {
if i + 1 >= args.size() { print("[lint/error] --skip-rules requires CSV"); return 2 }
rules_skip = me._parse_csv(args.get(i+1)); i = i + 2; continue
}
2025-11-08 00:46:34 +09:00
// source handling
local text = null
if p == "--source-file" {
if i + 2 < args.size() { p = args.get(i+1); text = args.get(i+2); i = i + 3 } else { print("[lint/error] --source-file requires <path> <text>"); return 2 }
} else {
// Read from filesystem via FileBox (plugin must be available)
local f = new FileBox(); if f.open(p) == 0 { print("[lint/error] cannot open: " + p); fail = fail + 1; i = i + 1; continue }
text = f.read(); f.close(); i = i + 1
}
// keep a copy before sanitize for rules that must see original bytes (HC017, etc.)
local text_raw = text
2025-11-07 19:32:44 +09:00
// pre-sanitize (ASCII quotes, normalize newlines) — minimal & reversible
text = me._sanitize(text)
2025-11-08 12:39:23 +09:00
// analysis - only build IR if needed by active rules
local ir = null
if me._needs_ir(rules_only, rules_skip) == 1 {
ir = HakoAnalysisBuilderBox.build_from_source_flags(text, p, no_ast)
} else {
// Minimal IR stub for rules that don't need it
ir = new MapBox()
ir.set("path", p)
ir.set("methods", new ArrayBox())
ir.set("calls", new ArrayBox())
ir.set("boxes", new ArrayBox())
ir.set("entrypoints", new ArrayBox())
}
2025-11-08 12:20:25 +09:00
// parse AST only when explicitly needed by active rules( include_forbiddenはfallback可)
2025-11-08 00:46:34 +09:00
local ast = null
2025-11-08 12:20:25 +09:00
if no_ast == 0 && me._needs_ast(rules_only, rules_skip) == 1 { ast = HakoParserCoreBox.parse(text) }
2025-11-08 00:46:34 +09:00
if debug == 1 {
local mc = (ir.get("methods")!=null)?ir.get("methods").size():0
local cc = (ir.get("calls")!=null)?ir.get("calls").size():0
local ec = (ir.get("entrypoints")!=null)?ir.get("entrypoints").size():0
print("[hako_check/IR] file=" + p + " methods=" + me._itoa(mc) + " calls=" + me._itoa(cc) + " eps=" + me._itoa(ec))
}
2025-11-07 19:32:44 +09:00
irs.push(ir)
// rules that work on raw source
local out = new ArrayBox()
2025-11-08 00:46:34 +09:00
if ast != null {
local before = out.size()
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "include_forbidden") == 1 { RuleIncludeForbiddenBox.apply_ast(ast, p, out) }
2025-11-08 00:46:34 +09:00
// Fallback to text scan if AST did not detect any include
2025-11-08 12:20:25 +09:00
if out.size() == before && me._rule_enabled(rules_only, rules_skip, "include_forbidden") == 1 { RuleIncludeForbiddenBox.apply(text, p, out) }
2025-11-08 00:46:34 +09:00
} else {
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "include_forbidden") == 1 { RuleIncludeForbiddenBox.apply(text, p, out) }
2025-11-08 00:46:34 +09:00
}
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "using_quoted") == 1 { RuleUsingQuotedBox.apply(text, p, out) }
if me._rule_enabled(rules_only, rules_skip, "unused_alias") == 1 { RuleUnusedAliasBox.apply(text, p, out) }
if me._rule_enabled(rules_only, rules_skip, "static_top_assign") == 1 { RuleStaticTopAssignBox.apply(text, p, out) }
if me._rule_enabled(rules_only, rules_skip, "global_assign") == 1 { RuleGlobalAssignBox.apply(text, p, out) }
2025-11-08 00:46:34 +09:00
// HC017 must inspect original text prior to sanitize
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "non_ascii_quotes") == 1 { RuleNonAsciiQuotesBox.apply(text_raw, p, out) }
if me._rule_enabled(rules_only, rules_skip, "jsonfrag_usage") == 1 { RuleJsonfragUsageBox.apply(text, p, out) }
if me._rule_enabled(rules_only, rules_skip, "top_level_local") == 1 { RuleTopLevelLocalBox.apply(text, p, out) }
if me._rule_enabled(rules_only, rules_skip, "stage3_gate") == 1 { RuleStage3GateBox.apply(text, p, out) }
2025-11-08 12:39:23 +09:00
// HC031 must inspect original text prior to sanitize (like HC017)
local before_hc031 = out.size()
if me._rule_enabled(rules_only, rules_skip, "brace_heuristics") == 1 { RuleBraceHeuristicsBox.apply(text_raw, p, out) }
if debug == 1 {
local added_hc031 = out.size() - before_hc031
print("[hako_check/HC031] file=" + p + " added=" + me._itoa(added_hc031) + " total_out=" + me._itoa(out.size()))
}
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "analyzer_io_safety") == 1 { RuleAnalyzerIoSafetyBox.apply(text, p, out) }
2025-11-07 19:32:44 +09:00
// rules that need IR (enable dead code detection)
2025-11-08 00:46:34 +09:00
local before_n = out.size()
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "dead_methods") == 1 { RuleDeadMethodsBox.apply_ir(ir, p, out) }
2025-11-08 00:46:34 +09:00
if debug == 1 {
local after_n = out.size()
local added = after_n - before_n
print("[hako_check/HC011] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
}
2025-11-08 02:59:54 +09:00
before_n = out.size()
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "dead_static_box") == 1 { RuleDeadStaticBoxBox.apply_ir(ir, p, out) }
2025-11-08 02:59:54 +09:00
if debug == 1 {
local after_n = out.size()
local added = after_n - before_n
local boxes_count = (ir.get("boxes")!=null)?ir.get("boxes").size():0
print("[hako_check/HC012] file=" + p + " boxes=" + me._itoa(boxes_count) + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
}
before_n = out.size()
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "duplicate_method") == 1 { RuleDuplicateMethodBox.apply_ir(ir, p, out) }
2025-11-08 02:59:54 +09:00
if debug == 1 {
local after_n = out.size()
local added = after_n - before_n
print("[hako_check/HC013] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
}
2025-11-08 03:06:02 +09:00
before_n = out.size()
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "missing_entrypoint") == 1 { RuleMissingEntrypointBox.apply_ir(ir, p, out) }
2025-11-08 03:06:02 +09:00
if debug == 1 {
local after_n = out.size()
local added = after_n - before_n
print("[hako_check/HC014] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
}
2025-11-08 03:19:08 +09:00
before_n = out.size()
2025-11-08 12:20:25 +09:00
if me._rule_enabled(rules_only, rules_skip, "arity_mismatch") == 1 { RuleArityMismatchBox.apply_ir(ir, p, out) }
2025-11-08 03:19:08 +09:00
if debug == 1 {
local after_n = out.size()
local added = after_n - before_n
print("[hako_check/HC015] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
}
2025-11-08 02:59:54 +09:00
// suppression: HC012(dead box) > HC011(unreachable method)
local filtered = me._suppress_overlap(out)
// flush (text only)
local n = filtered.size(); if n > 0 && fmt == "text" {
local j = 0; while j < n { print(filtered.get(j)); j = j + 1 }
2025-11-07 19:32:44 +09:00
}
2025-11-08 02:59:54 +09:00
// collect diagnostics for json-lsp
local j2 = 0; while j2 < n { local msg = filtered.get(j2); local d = me._parse_msg_to_diag(msg, p); if d != null { diags.push(d) }; j2 = j2 + 1 }
2025-11-07 19:32:44 +09:00
fail = fail + n
}
2025-11-08 00:46:34 +09:00
// optional DOT/JSON output
2025-11-07 19:32:44 +09:00
if fmt == "dot" { me._render_dot_multi(irs) }
2025-11-08 00:46:34 +09:00
if fmt == "json-lsp" { me._render_json_lsp(diags) }
2025-11-07 19:32:44 +09:00
// return number of findings as RC
return fail
}
2025-11-08 00:46:34 +09:00
// no-op
2025-11-07 19:32:44 +09:00
_sanitize(text) {
if text == null { return text }
2025-11-08 12:20:25 +09:00
// Normalize CRLF -> LF
2025-11-07 19:32:44 +09:00
local out = ""
local n = text.length()
2025-11-07 21:04:01 +09:00
local i2 = 0
while i2 < n {
local ch = text.substring(i2, i2+1)
2025-11-07 19:32:44 +09:00
// drop CR
2025-11-07 21:04:01 +09:00
if ch == "\r" { i2 = i2 + 1; continue }
2025-11-08 12:20:25 +09:00
// NOTE: Fancy quote conversion removed - StringBox lacks byte-level access
2025-11-07 19:32:44 +09:00
out = out.concat(ch)
2025-11-07 21:04:01 +09:00
i2 = i2 + 1
2025-11-07 19:32:44 +09:00
}
return out
}
2025-11-08 00:46:34 +09:00
_render_json_lsp(diags) {
// Emit diagnostics pretty-printed to match expected fixtures
diags = me._sort_diags(diags)
print("{\"diagnostics\":[")
if diags != null {
local i = 0
while i < diags.size() {
local d = diags.get(i)
local file = me._json_quote(d.get("file"))
local line = me._itoa(d.get("line"))
local rule = me._json_quote(d.get("rule"))
local msg = me._json_quote(d.get("message"))
local qf = d.get("quickFix"); if qf == null { qf = "" }
local sev = d.get("severity"); if sev == null { sev = "warning" }
local qfj = me._json_quote(qf)
local entry = " {\"file\":" + file + ",\"line\":" + line + ",\"rule\":" + rule + ",\"message\":" + msg + ",\"quickFix\":" + qfj + ",\"severity\":\"" + sev + "\"}"
if i != diags.size()-1 { print(entry + ",") } else { print(entry) }
i = i + 1
2025-11-07 19:32:44 +09:00
}
}
2025-11-08 00:46:34 +09:00
print("]}")
return 0
}
2025-11-08 12:20:25 +09:00
_needs_ast(only, skip) {
2025-11-08 12:39:23 +09:00
// Parse AST when duplicate_method is explicitly requested (needs method spans/definitions precision).
2025-11-08 12:20:25 +09:00
if only != null {
2025-11-08 12:39:23 +09:00
local i = 0; while i < only.size() { local k = only.get(i); if k == "duplicate_method" { return 1 } if k == "force_ast" { return 1 } i = i + 1 }
2025-11-08 12:20:25 +09:00
return 0
}
2025-11-08 12:39:23 +09:00
// Default (all rules): avoid AST to reduce VM/PHI risks; rely on IR/text fallbacks.
2025-11-08 12:20:25 +09:00
return 0
}
2025-11-08 12:39:23 +09:00
_needs_ir(only, skip) {
// IR is needed for rules that analyze methods, calls, or boxes
// Text-only rules (brace_heuristics, non_ascii_quotes, etc.) don't need IR
2025-11-08 12:20:25 +09:00
if only != null {
2025-11-08 12:39:23 +09:00
local i = 0
while i < only.size() {
local k = only.get(i)
// Rules that need IR
if k == "dead_methods" { return 1 }
if k == "dead_static_box" { return 1 }
if k == "duplicate_method" { return 1 }
if k == "missing_entrypoint" { return 1 }
if k == "arity_mismatch" { return 1 }
if k == "include_forbidden" { return 1 }
if k == "force_ast" { return 1 }
i = i + 1
}
// If we get here, only text-based rules are active (e.g., brace_heuristics)
2025-11-08 12:20:25 +09:00
return 0
}
2025-11-08 12:39:23 +09:00
// Default (all rules): need IR
return 1
2025-11-08 12:20:25 +09:00
}
_parse_csv(s) {
if s == null { return null }
local arr = new ArrayBox();
local cur = ""; local i = 0; while i < s.length() {
local ch = s.substring(i,i+1)
if ch == "," { if cur != "" { arr.push(cur) }; cur = ""; i = i + 1; continue }
cur = cur + ch; i = i + 1
}
if cur != "" { arr.push(cur) }
return arr
}
_rule_enabled(only, skip, key) {
if key == null { return 1 }
if only != null {
// enabled only if key is present
local i = 0; while i < only.size() { if only.get(i) == key { return 1 } i = i + 1 }
return 0
}
if skip != null {
local j = 0; while j < skip.size() { if skip.get(j) == key { return 0 } j = j + 1 }
}
return 1
}
2025-11-08 02:59:54 +09:00
// Build dead-box set and drop HC011 for methods inside dead boxes
_suppress_overlap(out) {
if out == null { return new ArrayBox() }
// collect dead boxes from HC012 messages
local dead = new MapBox()
local i = 0
while i < out.size() {
local s = out.get(i)
if me._is_hc012(s) == 1 {
local bx = me._extract_box_from_hc012(s)
if bx != null { dead.set(bx, 1) }
}
i = i + 1
}
// filter
local res = new ArrayBox()
i = 0
while i < out.size() {
local s = out.get(i)
if me._is_hc011(s) == 1 {
local qual = me._extract_method_from_hc011(s)
if qual != null {
// method qual: Box.method/arity → Box
local dot = qual.lastIndexOf(".")
if dot > 0 {
local box_name = qual.substring(0, dot)
if dead.has(box_name) == 1 { i = i + 1; continue }
}
}
}
res.push(s)
i = i + 1
}
return res
}
_is_hc011(s) {
if s == null { return 0 }
if s.indexOf("[HC011]") == 0 { return 1 }
return 0
}
_is_hc012(s) {
if s == null { return 0 }
if s.indexOf("[HC012]") == 0 { return 1 }
return 0
}
_extract_box_from_hc012(s) {
// format: [HC012] dead static box (never referenced): Name
if s == null { return null }
local p = s.lastIndexOf(":")
if p < 0 { return null }
local name = s.substring(p+1)
// trim spaces
local t = 0; while t < name.length() { local c=name.substring(t,t+1); if c==" "||c=="\t" { t=t+1; continue } break }
if t > 0 { name = name.substring(t) }
return name
}
_extract_method_from_hc011(s) {
// format: [HC011] ... :: Box.method/arity
if s == null { return null }
local p = s.lastIndexOf("::")
if p < 0 { return null }
local qual = s.substring(p+2)
// trim leading space
local t = 0; while t < qual.length() { local c=qual.substring(t,t+1); if c==" "||c=="\t" { t=t+1; continue } break }
if t > 0 { qual = qual.substring(t) }
return qual
}
2025-11-08 00:46:34 +09:00
_parse_msg_to_diag(msg, path) {
if msg == null { return null }
// Expect prefixes like: [HC002] ... path:LINE or [HC011] ... :: Method
local rule = "HC000"; local i0 = msg.indexOf("["); local i1 = msg.indexOf("]")
if i0 == 0 && i1 > 1 { rule = msg.substring(1, i1) }
// find last ':' as line separator
local line = 1
local p = msg.lastIndexOf(":")
if p > 0 {
// try parse after ':' as int (consume consecutive trailing digits)
local tail = msg.substring(p+1)
// remove leading spaces
local q = 0; while q < tail.length() { local c=tail.substring(q,q+1); if c==" "||c=="\t" { q = q + 1 continue } break }
local digits = ""; while q < tail.length() { local c=tail.substring(q,q+1); if c>="0" && c<="9" { digits = digits + c; q = q + 1; continue } break }
if digits != "" { line = me._atoi(digits) }
}
// message: drop path and line suffix
local message = msg
// naive quickFix suggestions
local qf = ""
if rule == "HC002" { qf = "Replace include with using (alias)" }
if rule == "HC003" { qf = "Quote module name: using \"mod\"" }
if rule == "HC010" { qf = "Move assignment into a method (lazy init)" }
if rule == "HC011" { qf = "Remove or reference the dead method from an entrypoint" }
local sev = "warning"
if rule == "HC001" || rule == "HC002" || rule == "HC010" || rule == "HC011" { sev = "error" }
if rule == "HC003" || rule == "HC020" { sev = "warning" }
local d = new MapBox(); d.set("file", path); d.set("line", line); d.set("rule", rule); d.set("message", message); d.set("quickFix", qf); d.set("severity", sev)
return d
}
_render_dot_multi(irs) {
// Delegate to Graphviz renderer (includes edges)
GraphvizRenderBox.render_multi(irs)
2025-11-07 19:32:44 +09:00
return 0
}
2025-11-08 00:46:34 +09:00
_sort_diags(diags) {
if diags == null { return new ArrayBox() }
local out = new ArrayBox(); local i=0; while i<diags.size() { out.push(diags.get(i)); i=i+1 }
local n = out.size(); local a=0; while a<n { local b=a+1; while b<n {
local da = out.get(a); local db = out.get(b)
local ka = da.get("file") + ":" + me._itoa(da.get("line"))
local kb = db.get("file") + ":" + me._itoa(db.get("line"))
if kb < ka { local tmp=out.get(a); out.set(a,out.get(b)); out.set(b,tmp) }
b=b+1 } a=a+1 }
return out
}
_itoa(n) {
// assume non-negative small ints for diagnostics
local v = 0 + n
if v == 0 { return "0" }
local out = ""; local digits = "0123456789"
local tmp = ""
while v > 0 { local d = v % 10; tmp = digits.substring(d,d+1) + tmp; v = v / 10 }
out = tmp
return out
}
_json_quote(s) {
if s == null { return "\"\"" }
local out = ""; local i = 0; local n = s.length()
while i < n {
local ch = s.substring(i,i+1)
if ch == "\\" { out = out + "\\\\" }
else { if ch == "\"" { out = out + "\\\"" } else { if ch == "\n" { out = out + "\\n" } else { if ch == "\r" { out = out + "\\r" } else { if ch == "\t" { out = out + "\\t" } else { out = out + ch } } } } }
i = i + 1
}
return "\"" + out + "\""
}
_atoi(s) {
if s == null { return 0 }
local n = s.length(); if n == 0 { return 0 }
local i = 0; local v = 0
local digits = "0123456789"
while i < n {
local ch = s.substring(i,i+1)
// stop at first non-digit
if ch < "0" || ch > "9" { break }
// map to int via indexOf
local pos = digits.indexOf(ch)
if pos < 0 { break }
v = v * 10 + pos
i = i + 1
}
return v
}
2025-11-07 19:32:44 +09:00
}
2025-11-08 00:46:34 +09:00
// Default entry: Main.main so runner resolves without explicit --entry
static box Main { method main(args) { return HakoAnalyzerBox.run(args) } }