Phase 21.3 WIP: Hako Source Checker improvements - HC011/HC016/HC017 実装完了
主な変更: - ✅ HC011 (dead methods) 実装・テスト緑 - ✅ HC016 (unused alias) 実装・テスト緑 - ✅ HC017 (non-ascii quotes) 実装完了 - 🔧 tokenizer/parser_core 強化(AST優先ルート) - 🛡️ plugin_guard.rs 追加(stderr専用出力) - 📋 テストインフラ整備(run_tests.sh改善) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -1,13 +1,136 @@
|
||||
// tools/hako_parser/tokenizer.hako — HakoTokenizerBox (MVP skeleton)
|
||||
// tools/hako_parser/tokenizer.hako — HakoTokenizerBox (Stage-3 aware tokenizer, MVP)
|
||||
// Produces tokens with type, lexeme, line, col. Handles strings (escapes), numbers,
|
||||
// identifiers, and punctuation. Keywords are normalized to upper-case kinds.
|
||||
using selfhost.shared.common.string_helpers as Str
|
||||
|
||||
static box HakoTokenizerBox {
|
||||
// Returns ArrayBox of tokens (MVP: string list)
|
||||
// Token: Map { type, lexeme, line, col }
|
||||
tokenize(text) {
|
||||
// TODO: implement real tokenizer; MVP returns lines as stub
|
||||
return text.split("\n")
|
||||
local out = new ArrayBox()
|
||||
if text == null { return out }
|
||||
local n = text.length()
|
||||
local i = 0
|
||||
local line = 1
|
||||
local col = 1
|
||||
while i < n {
|
||||
local ch = text.substring(i,i+1)
|
||||
// whitespace and newlines
|
||||
if ch == " " || ch == "\t" { i = i + 1; col = col + 1; continue }
|
||||
if ch == "\r" { i = i + 1; continue }
|
||||
if ch == "\n" { i = i + 1; line = line + 1; col = 1; continue }
|
||||
// line comment // ... (consume until EOL)
|
||||
if ch == "/" && i+1 < n && text.substring(i+1,i+2) == "/" {
|
||||
// skip until newline
|
||||
i = i + 2; col = col + 2
|
||||
while i < n {
|
||||
local c2 = text.substring(i,i+1)
|
||||
if c2 == "\n" { break }
|
||||
i = i + 1; col = col + 1
|
||||
}
|
||||
continue
|
||||
}
|
||||
// block comment /* ... */ (consume until closing, track newlines)
|
||||
if ch == "/" && i+1 < n && text.substring(i+1,i+2) == "*" {
|
||||
i = i + 2; col = col + 2
|
||||
local closed = 0
|
||||
while i < n {
|
||||
local c2 = text.substring(i,i+1)
|
||||
if c2 == "*" && i+1 < n && text.substring(i+1,i+2) == "/" { i = i + 2; col = col + 2; closed = 1; break }
|
||||
if c2 == "\n" { i = i + 1; line = line + 1; col = 1; continue }
|
||||
i = i + 1; col = col + 1
|
||||
}
|
||||
continue
|
||||
}
|
||||
// string literal "..." with escapes \" \\ \n \t
|
||||
if ch == '"' {
|
||||
local start_col = col
|
||||
local buf = ""
|
||||
i = i + 1; col = col + 1
|
||||
local closed = 0
|
||||
while i < n {
|
||||
local c3 = text.substring(i,i+1)
|
||||
if c3 == '"' { closed = 1; i = i + 1; col = col + 1; break }
|
||||
if c3 == "\\" {
|
||||
if i+1 < n {
|
||||
local esc = text.substring(i+1,i+2)
|
||||
if esc == '"' { buf = buf.concat('"') }
|
||||
else if esc == "\\" { buf = buf.concat("\\") }
|
||||
else if esc == "n" { buf = buf.concat("\n") }
|
||||
else if esc == "t" { buf = buf.concat("\t") }
|
||||
else { buf = buf.concat(esc) }
|
||||
i = i + 2; col = col + 2
|
||||
continue
|
||||
} else { i = i + 1; col = col + 1; break }
|
||||
}
|
||||
buf = buf.concat(c3)
|
||||
i = i + 1; col = col + 1
|
||||
}
|
||||
local tok = new MapBox(); tok.set("type","STRING"); tok.set("lexeme", buf); tok.set("line", line); tok.set("col", start_col)
|
||||
out.push(tok); continue
|
||||
}
|
||||
// number (integer only for MVP)
|
||||
if ch >= "0" && ch <= "9" {
|
||||
local start = i; local start_col = col
|
||||
while i < n {
|
||||
local c4 = text.substring(i,i+1)
|
||||
if !(c4 >= "0" && c4 <= "9") { break }
|
||||
i = i + 1; col = col + 1
|
||||
}
|
||||
local lex = text.substring(start, i)
|
||||
local tok = new MapBox(); tok.set("type","NUMBER"); tok.set("lexeme", lex); tok.set("line", line); tok.set("col", start_col)
|
||||
out.push(tok); continue
|
||||
}
|
||||
// identifier or keyword
|
||||
if me._is_ident_start(ch) == 1 {
|
||||
local start = i; local start_col = col
|
||||
while i < n {
|
||||
local c5 = text.substring(i,i+1)
|
||||
if me._is_ident_char(c5) == 0 { break }
|
||||
i = i + 1; col = col + 1
|
||||
}
|
||||
local lex = text.substring(start, i)
|
||||
local kind = me._kw_kind(lex)
|
||||
local tok = new MapBox(); tok.set("type", kind); tok.set("lexeme", lex); tok.set("line", line); tok.set("col", start_col)
|
||||
out.push(tok); continue
|
||||
}
|
||||
// punctuation / symbols we care about
|
||||
local sym_kind = me._sym_kind(ch)
|
||||
if sym_kind != null {
|
||||
local tok = new MapBox(); tok.set("type", sym_kind); tok.set("lexeme", ch); tok.set("line", line); tok.set("col", col)
|
||||
out.push(tok); i = i + 1; col = col + 1; continue
|
||||
}
|
||||
// unknown char → emit as PUNC so parser can skip gracefully
|
||||
local tok = new MapBox(); tok.set("type","PUNC"); tok.set("lexeme", ch); tok.set("line", line); tok.set("col", col)
|
||||
out.push(tok); i = i + 1; col = col + 1
|
||||
}
|
||||
return out
|
||||
}
|
||||
_is_ident_start(c) { if c=="_" {return 1}; if c>="A"&&c<="Z" {return 1}; if c>="a"&&c<="z" {return 1}; return 0 }
|
||||
_is_ident_char(c) { if me._is_ident_start(c)==1 { return 1 }; if c>="0"&&c<="9" { return 1 }; return 0 }
|
||||
_kw_kind(lex) {
|
||||
if lex == "using" { return "USING" }
|
||||
if lex == "as" { return "AS" }
|
||||
if lex == "static" { return "STATIC" }
|
||||
if lex == "box" { return "BOX" }
|
||||
if lex == "method" { return "METHOD" }
|
||||
if lex == "include" { return "INCLUDE" }
|
||||
if lex == "while" { return "WHILE" } // Stage-3 tokens (MVP)
|
||||
if lex == "for" { return "FOR" }
|
||||
if lex == "in" { return "IN" }
|
||||
return "IDENT"
|
||||
}
|
||||
_sym_kind(c) {
|
||||
if c == "{" { return "LBRACE" }
|
||||
if c == "}" { return "RBRACE" }
|
||||
if c == "(" { return "LPAREN" }
|
||||
if c == ")" { return "RPAREN" }
|
||||
if c == "," { return "COMMA" }
|
||||
if c == "." { return "DOT" }
|
||||
if c == ":" { return "COLON" }
|
||||
if c == "=" { return "EQ" }
|
||||
if c == ";" { return "SEMI" }
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
static box HakoTokenizerMain { method main(args) { return 0 } }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user