hakorune/tools/hako_check/cli.hako

// tools/hako_check/cli.hako — HakoAnalyzerBox (MVP)
using selfhost.shared.common.string_helpers as Str
using tools.hako_check.analysis_consumer as HakoAnalysisBuilderBox
using tools.hako_check.rules.rule_include_forbidden as RuleIncludeForbiddenBox
using tools.hako_check.rules.rule_using_quoted as RuleUsingQuotedBox
using tools.hako_check.rules.rule_static_top_assign as RuleStaticTopAssignBox
using tools.hako_check.rules.rule_global_assign as RuleGlobalAssignBox
using tools.hako_check.rules.rule_dead_methods as RuleDeadMethodsBox
using tools.hako_check.rules.rule_jsonfrag_usage as RuleJsonfragUsageBox
using tools.hako_check.rules.rule_unused_alias as RuleUnusedAliasBox
using tools.hako_check.rules.rule_non_ascii_quotes as RuleNonAsciiQuotesBox
using tools.hako_check.rules.rule_dead_static_box as RuleDeadStaticBoxBox
using tools.hako_check.rules.rule_duplicate_method as RuleDuplicateMethodBox
using tools.hako_check.rules.rule_missing_entrypoint as RuleMissingEntrypointBox
using tools.hako_check.rules.rule_top_level_local as RuleTopLevelLocalBox
using tools.hako_check.rules.rule_arity_mismatch as RuleArityMismatchBox
using tools.hako_check.rules.rule_stage3_gate as RuleStage3GateBox
using tools.hako_check.rules.rule_brace_heuristics as RuleBraceHeuristicsBox
using tools.hako_check.rules.rule_analyzer_io_safety as RuleAnalyzerIoSafetyBox
using tools.hako_check.render.graphviz as GraphvizRenderBox
using tools.hako_parser.parser_core as HakoParserCoreBox

static box HakoAnalyzerBox {
  run(args) {
    if args == null || args.size() < 1 { print("[lint/error] missing paths"); return 2 }
    // options: --format {text|dot|json} (accept anywhere)
    local fmt = "text"
    local debug = 0
    local no_ast = 0
    // single-pass parse: handle options in-place and collect sources
    local i = 0
    local fail = 0
    local irs = new ArrayBox()
    local diags = new ArrayBox()
    // optional filters
    local rules_only = null  // ArrayBox of keys
    local rules_skip = null  // ArrayBox of keys
    // Support inline sources: --source-file <path> <text>. Also accept --debug and --format anywhere.
    while i < args.size() {
      local p = args.get(i)
      // handle options
      if p == "--debug" { debug = 1; i = i + 1; continue }
      if p == "--no-ast" { no_ast = 1; i = i + 1; continue }
      if p == "--force-ast" { no_ast = 0; i = i + 1; continue }
      if p == "--format" {
        if i + 1 >= args.size() { print("[lint/error] --format requires value"); return 2 }
        fmt = args.get(i+1); i = i + 2; continue
      }
      if p == "--rules" {
        if i + 1 >= args.size() { print("[lint/error] --rules requires CSV"); return 2 }
        rules_only = me._parse_csv(args.get(i+1)); i = i + 2; continue
      }
      if p == "--skip-rules" {
        if i + 1 >= args.size() { print("[lint/error] --skip-rules requires CSV"); return 2 }
        rules_skip = me._parse_csv(args.get(i+1)); i = i + 2; continue
      }
      // source handling
      local text = null
      if p == "--source-file" {
        if i + 2 < args.size() { p = args.get(i+1); text = args.get(i+2); i = i + 3 } else { print("[lint/error] --source-file requires <path> <text>"); return 2 }
      } else {
        // Read from filesystem via FileBox (plugin must be available)
        local f = new FileBox(); if f.open(p) == 0 { print("[lint/error] cannot open: " + p); fail = fail + 1; i = i + 1; continue }
        text = f.read(); f.close(); i = i + 1
      }
      // keep a copy before sanitize for rules that must see original bytes (HC017, etc.)
      local text_raw = text
      // pre-sanitize (ASCII quotes, normalize newlines) — minimal & reversible
      text = me._sanitize(text)
      // analysis - only build IR if needed by active rules
      local ir = null
      if me._needs_ir(rules_only, rules_skip) == 1 {
        ir = HakoAnalysisBuilderBox.build_from_source_flags(text, p, no_ast)
      } else {
        // Minimal IR stub for rules that don't need it
        ir = new MapBox()
        ir.set("path", p)
        ir.set("methods", new ArrayBox())
        ir.set("calls", new ArrayBox())
        ir.set("boxes", new ArrayBox())
        ir.set("entrypoints", new ArrayBox())
      }
      // parse AST only when explicitly needed by active rules（include_forbiddenはfallback可）
      local ast = null
      if no_ast == 0 && me._needs_ast(rules_only, rules_skip) == 1 { ast = HakoParserCoreBox.parse(text) }
      if debug == 1 {
        local mc = (ir.get("methods")!=null)?ir.get("methods").size():0
        local cc = (ir.get("calls")!=null)?ir.get("calls").size():0
        local ec = (ir.get("entrypoints")!=null)?ir.get("entrypoints").size():0
        print("[hako_check/IR] file=" + p + " methods=" + me._itoa(mc) + " calls=" + me._itoa(cc) + " eps=" + me._itoa(ec))
      }
      irs.push(ir)
      // rules that work on raw source
      local out = new ArrayBox()
      if ast != null {
        local before = out.size()
        if me._rule_enabled(rules_only, rules_skip, "include_forbidden") == 1 { RuleIncludeForbiddenBox.apply_ast(ast, p, out) }
        // Fallback to text scan if AST did not detect any include
        if out.size() == before && me._rule_enabled(rules_only, rules_skip, "include_forbidden") == 1 { RuleIncludeForbiddenBox.apply(text, p, out) }
      } else {
        if me._rule_enabled(rules_only, rules_skip, "include_forbidden") == 1 { RuleIncludeForbiddenBox.apply(text, p, out) }
      }
      if me._rule_enabled(rules_only, rules_skip, "using_quoted") == 1 { RuleUsingQuotedBox.apply(text, p, out) }
      if me._rule_enabled(rules_only, rules_skip, "unused_alias") == 1 { RuleUnusedAliasBox.apply(text, p, out) }
      if me._rule_enabled(rules_only, rules_skip, "static_top_assign") == 1 { RuleStaticTopAssignBox.apply(text, p, out) }
      if me._rule_enabled(rules_only, rules_skip, "global_assign") == 1 { RuleGlobalAssignBox.apply(text, p, out) }
      // HC017 must inspect original text prior to sanitize
      if me._rule_enabled(rules_only, rules_skip, "non_ascii_quotes") == 1 { RuleNonAsciiQuotesBox.apply(text_raw, p, out) }
      if me._rule_enabled(rules_only, rules_skip, "jsonfrag_usage") == 1 { RuleJsonfragUsageBox.apply(text, p, out) }
      if me._rule_enabled(rules_only, rules_skip, "top_level_local") == 1 { RuleTopLevelLocalBox.apply(text, p, out) }
      if me._rule_enabled(rules_only, rules_skip, "stage3_gate") == 1 { RuleStage3GateBox.apply(text, p, out) }
      // HC031 must inspect original text prior to sanitize (like HC017)
      local before_hc031 = out.size()
      if me._rule_enabled(rules_only, rules_skip, "brace_heuristics") == 1 { RuleBraceHeuristicsBox.apply(text_raw, p, out) }
      if debug == 1 {
        local added_hc031 = out.size() - before_hc031
        print("[hako_check/HC031] file=" + p + " added=" + me._itoa(added_hc031) + " total_out=" + me._itoa(out.size()))
      }
      if me._rule_enabled(rules_only, rules_skip, "analyzer_io_safety") == 1 { RuleAnalyzerIoSafetyBox.apply(text, p, out) }
      // rules that need IR (enable dead code detection)
      local before_n = out.size()
      if me._rule_enabled(rules_only, rules_skip, "dead_methods") == 1 { RuleDeadMethodsBox.apply_ir(ir, p, out) }
      if debug == 1 {
        local after_n = out.size()
        local added = after_n - before_n
        print("[hako_check/HC011] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
      }
      before_n = out.size()
      if me._rule_enabled(rules_only, rules_skip, "dead_static_box") == 1 { RuleDeadStaticBoxBox.apply_ir(ir, p, out) }
      if debug == 1 {
        local after_n = out.size()
        local added = after_n - before_n
        local boxes_count = (ir.get("boxes")!=null)?ir.get("boxes").size():0
        print("[hako_check/HC012] file=" + p + " boxes=" + me._itoa(boxes_count) + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
      }
      before_n = out.size()
      if me._rule_enabled(rules_only, rules_skip, "duplicate_method") == 1 { RuleDuplicateMethodBox.apply_ir(ir, p, out) }
      if debug == 1 {
        local after_n = out.size()
        local added = after_n - before_n
        print("[hako_check/HC013] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
      }
      before_n = out.size()
      if me._rule_enabled(rules_only, rules_skip, "missing_entrypoint") == 1 { RuleMissingEntrypointBox.apply_ir(ir, p, out) }
      if debug == 1 {
        local after_n = out.size()
        local added = after_n - before_n
        print("[hako_check/HC014] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
      }
      before_n = out.size()
      if me._rule_enabled(rules_only, rules_skip, "arity_mismatch") == 1 { RuleArityMismatchBox.apply_ir(ir, p, out) }
      if debug == 1 {
        local after_n = out.size()
        local added = after_n - before_n
        print("[hako_check/HC015] file=" + p + " added=" + me._itoa(added) + " total_out=" + me._itoa(after_n))
      }
      // suppression: HC012(dead box) > HC011(unreachable method)
      local filtered = me._suppress_overlap(out)
      // flush (text only)
      local n = filtered.size(); if n > 0 && fmt == "text" {
        local j = 0; while j < n { print(filtered.get(j)); j = j + 1 }
      }
      // collect diagnostics for json-lsp
      local j2 = 0; while j2 < n { local msg = filtered.get(j2); local d = me._parse_msg_to_diag(msg, p); if d != null { diags.push(d) }; j2 = j2 + 1 }
      fail = fail + n
    }
    // optional DOT/JSON output
    if fmt == "dot" { me._render_dot_multi(irs) }
    if fmt == "json-lsp" { me._render_json_lsp(diags) }
    // return number of findings as RC
    return fail
  }
  // no-op
  _sanitize(text) {
    if text == null { return text }
    // Normalize CRLF -> LF
    local out = ""
    local n = text.length()
    local i2 = 0
    while i2 < n {
      local ch = text.substring(i2, i2+1)
      // drop CR
      if ch == "\r" { i2 = i2 + 1; continue }
      // NOTE: Fancy quote conversion removed - StringBox lacks byte-level access
      out = out.concat(ch)
      i2 = i2 + 1
    }
    return out
  }
  _render_json_lsp(diags) {
    // Emit diagnostics pretty-printed to match expected fixtures
    diags = me._sort_diags(diags)
    print("{\"diagnostics\":[")
    if diags != null {
      local i = 0
      while i < diags.size() {
        local d = diags.get(i)
        local file = me._json_quote(d.get("file"))
        local line = me._itoa(d.get("line"))
        local rule = me._json_quote(d.get("rule"))
        local msg = me._json_quote(d.get("message"))
        local qf = d.get("quickFix"); if qf == null { qf = "" }
        local sev = d.get("severity"); if sev == null { sev = "warning" }
        local qfj = me._json_quote(qf)
        local entry = "  {\"file\":" + file + ",\"line\":" + line + ",\"rule\":" + rule + ",\"message\":" + msg + ",\"quickFix\":" + qfj + ",\"severity\":\"" + sev + "\"}"
        if i != diags.size()-1 { print(entry + ",") } else { print(entry) }
        i = i + 1
      }
    }
    print("]}")
    return 0
  }
  _needs_ast(only, skip) {
    // Parse AST when duplicate_method is explicitly requested (needs method spans/definitions precision).
    if only != null {
      local i = 0; while i < only.size() { local k = only.get(i); if k == "duplicate_method" { return 1 } if k == "force_ast" { return 1 } i = i + 1 }
      return 0
    }
    // Default (all rules): avoid AST to reduce VM/PHI risks; rely on IR/text fallbacks.
    return 0
  }
  _needs_ir(only, skip) {
    // IR is needed for rules that analyze methods, calls, or boxes
    // Text-only rules (brace_heuristics, non_ascii_quotes, etc.) don't need IR
    if only != null {
      local i = 0
      while i < only.size() {
        local k = only.get(i)
        // Rules that need IR
        if k == "dead_methods" { return 1 }
        if k == "dead_static_box" { return 1 }
        if k == "duplicate_method" { return 1 }
        if k == "missing_entrypoint" { return 1 }
        if k == "arity_mismatch" { return 1 }
        if k == "include_forbidden" { return 1 }
        if k == "force_ast" { return 1 }
        i = i + 1
      }
      // If we get here, only text-based rules are active (e.g., brace_heuristics)
      return 0
    }
    // Default (all rules): need IR
    return 1
  }
  _parse_csv(s) {
    if s == null { return null }
    local arr = new ArrayBox();
    local cur = ""; local i = 0; while i < s.length() {
      local ch = s.substring(i,i+1)
      if ch == "," { if cur != "" { arr.push(cur) }; cur = ""; i = i + 1; continue }
      cur = cur + ch; i = i + 1
    }
    if cur != "" { arr.push(cur) }
    return arr
  }
  _rule_enabled(only, skip, key) {
    if key == null { return 1 }
    if only != null {
      // enabled only if key is present
      local i = 0; while i < only.size() { if only.get(i) == key { return 1 } i = i + 1 }
      return 0
    }
    if skip != null {
      local j = 0; while j < skip.size() { if skip.get(j) == key { return 0 } j = j + 1 }
    }
    return 1
  }
  // Build dead-box set and drop HC011 for methods inside dead boxes
  _suppress_overlap(out) {
    if out == null { return new ArrayBox() }
    // collect dead boxes from HC012 messages
    local dead = new MapBox()
    local i = 0
    while i < out.size() {
      local s = out.get(i)
      if me._is_hc012(s) == 1 {
        local bx = me._extract_box_from_hc012(s)
        if bx != null { dead.set(bx, 1) }
      }
      i = i + 1
    }
    // filter
    local res = new ArrayBox()
    i = 0
    while i < out.size() {
      local s = out.get(i)
      if me._is_hc011(s) == 1 {
        local qual = me._extract_method_from_hc011(s)
        if qual != null {
          // method qual: Box.method/arity → Box
          local dot = qual.lastIndexOf(".")
          if dot > 0 {
          local box_name = qual.substring(0, dot)
            if dead.has(box_name) == 1 { i = i + 1; continue }
          }
        }
      }
      res.push(s)
      i = i + 1
    }
    return res
  }
  _is_hc011(s) {
    if s == null { return 0 }
    if s.indexOf("[HC011]") == 0 { return 1 }
    return 0
  }
  _is_hc012(s) {
    if s == null { return 0 }
    if s.indexOf("[HC012]") == 0 { return 1 }
    return 0
  }
  _extract_box_from_hc012(s) {
    // format: [HC012] dead static box (never referenced): Name
    if s == null { return null }
    local p = s.lastIndexOf(":")
    if p < 0 { return null }
    local name = s.substring(p+1)
    // trim spaces
    local t = 0; while t < name.length() { local c=name.substring(t,t+1); if c==" "||c=="\t" { t=t+1; continue } break }
    if t > 0 { name = name.substring(t) }
    return name
  }
  _extract_method_from_hc011(s) {
    // format: [HC011] ... :: Box.method/arity
    if s == null { return null }
    local p = s.lastIndexOf("::")
    if p < 0 { return null }
    local qual = s.substring(p+2)
    // trim leading space
    local t = 0; while t < qual.length() { local c=qual.substring(t,t+1); if c==" "||c=="\t" { t=t+1; continue } break }
    if t > 0 { qual = qual.substring(t) }
    return qual
  }
  _parse_msg_to_diag(msg, path) {
    if msg == null { return null }
    // Expect prefixes like: [HC002] ... path:LINE or [HC011] ... :: Method
    local rule = "HC000"; local i0 = msg.indexOf("["); local i1 = msg.indexOf("]")
    if i0 == 0 && i1 > 1 { rule = msg.substring(1, i1) }
    // find last ':' as line separator
    local line = 1
    local p = msg.lastIndexOf(":")
      if p > 0 {
        // try parse after ':' as int (consume consecutive trailing digits)
        local tail = msg.substring(p+1)
        // remove leading spaces
      local q = 0; while q < tail.length() { local c=tail.substring(q,q+1); if c==" "||c=="\t" { q = q + 1  continue } break }
      local digits = ""; while q < tail.length() { local c=tail.substring(q,q+1); if c>="0" && c<="9" { digits = digits + c; q = q + 1; continue } break }
      if digits != "" { line = me._atoi(digits) }
      }
    // message: drop path and line suffix
    local message = msg
    // naive quickFix suggestions
    local qf = ""
    if rule == "HC002" { qf = "Replace include with using (alias)" }
    if rule == "HC003" { qf = "Quote module name: using \"mod\"" }
    if rule == "HC010" { qf = "Move assignment into a method (lazy init)" }
    if rule == "HC011" { qf = "Remove or reference the dead method from an entrypoint" }
    local sev = "warning"
    if rule == "HC001" || rule == "HC002" || rule == "HC010" || rule == "HC011" { sev = "error" }
    if rule == "HC003" || rule == "HC020" { sev = "warning" }
    local d = new MapBox(); d.set("file", path); d.set("line", line); d.set("rule", rule); d.set("message", message); d.set("quickFix", qf); d.set("severity", sev)
    return d
  }
  _render_dot_multi(irs) {
    // Delegate to Graphviz renderer (includes edges)
    GraphvizRenderBox.render_multi(irs)
    return 0
  }
  _sort_diags(diags) {
    if diags == null { return new ArrayBox() }
    local out = new ArrayBox(); local i=0; while i<diags.size() { out.push(diags.get(i)); i=i+1 }
    local n = out.size(); local a=0; while a<n { local b=a+1; while b<n {
      local da = out.get(a); local db = out.get(b)
      local ka = da.get("file") + ":" + me._itoa(da.get("line"))
      local kb = db.get("file") + ":" + me._itoa(db.get("line"))
      if kb < ka { local tmp=out.get(a); out.set(a,out.get(b)); out.set(b,tmp) }
      b=b+1 } a=a+1 }
    return out
  }
  _itoa(n) {
    // assume non-negative small ints for diagnostics
    local v = 0 + n
    if v == 0 { return "0" }
    local out = ""; local digits = "0123456789"
    local tmp = ""
    while v > 0 { local d = v % 10; tmp = digits.substring(d,d+1) + tmp; v = v / 10 }
    out = tmp
    return out
  }
  _json_quote(s) {
    if s == null { return "\"\"" }
    local out = ""; local i = 0; local n = s.length()
    while i < n {
      local ch = s.substring(i,i+1)
      if ch == "\\" { out = out + "\\\\" }
      else { if ch == "\"" { out = out + "\\\"" } else { if ch == "\n" { out = out + "\\n" } else { if ch == "\r" { out = out + "\\r" } else { if ch == "\t" { out = out + "\\t" } else { out = out + ch } } } } }
      i = i + 1
    }
    return "\"" + out + "\""
  }
  _atoi(s) {
    if s == null { return 0 }
    local n = s.length(); if n == 0 { return 0 }
    local i = 0; local v = 0
    local digits = "0123456789"
    while i < n {
      local ch = s.substring(i,i+1)
      // stop at first non-digit
      if ch < "0" || ch > "9" { break }
      // map to int via indexOf
      local pos = digits.indexOf(ch)
      if pos < 0 { break }
      v = v * 10 + pos
      i = i + 1
    }
    return v
  }
}

// Default entry: Main.main so runner resolves without explicit --entry
static box Main { method main(args) { return HakoAnalyzerBox.run(args) } }