- Add HAKO_TRACE_EXECUTION to trace executor route - Rust hv1_inline: stderr [trace] executor: hv1_inline (rust) - Hakovm dispatcher: stdout [trace] executor: hakovm (hako) - test_runner: trace lines for hv1_inline/core/hakovm routes - Add HAKO_VERIFY_SHOW_LOGS and HAKO_DEBUG=1 (enables both) - verify_v1_inline_file() log passthrough with numeric rc extraction - test_runner exports via HAKO_DEBUG - Canary expansion under phase2170 (state spec) - Array: push×5/10 → size, len/length alias, per‑recv/global, flow across blocks - Map: set dup-key non-increment, value_state get/has - run_all.sh: unify, remove SKIPs; all PASS - Docs - ENV_VARS.md: add Debug/Tracing toggles and examples - PLAN.md/CURRENT_TASK.md: mark 21.7 green, add Quickstart lines All changes gated by env vars; default behavior unchanged.
137 lines
5.3 KiB
Plaintext
137 lines
5.3 KiB
Plaintext
// tools/hako_parser/tokenizer.hako - HakoTokenizerBox (Stage-3 aware tokenizer, MVP)
|
|
// Produces tokens with type, lexeme, line, col. Handles strings (escapes), numbers,
|
|
// identifiers, and punctuation. Keywords are normalized to upper-case kinds.
|
|
// no external deps (self-contained tokenizer)
|
|
|
|
static box HakoTokenizerBox {
|
|
// Token: Map { type, lexeme, line, col }
|
|
tokenize(text) {
|
|
local out = new ArrayBox()
|
|
if text == null { return out }
|
|
local n = text.length()
|
|
local i = 0
|
|
local line = 1
|
|
local col = 1
|
|
while i < n {
|
|
local ch = text.substring(i,i+1)
|
|
// whitespace and newlines
|
|
if ch == " " || ch == "\t" { i = i + 1; col = col + 1; continue }
|
|
if ch == "\r" { i = i + 1; continue }
|
|
if ch == "\n" { i = i + 1; line = line + 1; col = 1; continue }
|
|
// line comment // ... (consume until EOL)
|
|
if ch == "/" && i+1 < n && text.substring(i+1,i+2) == "/" {
|
|
// skip until newline
|
|
i = i + 2; col = col + 2
|
|
while i < n {
|
|
local c2 = text.substring(i,i+1)
|
|
if c2 == "\n" { break }
|
|
i = i + 1; col = col + 1
|
|
}
|
|
continue
|
|
}
|
|
// block comment /* ... */ (consume until closing, track newlines)
|
|
if ch == "/" && i+1 < n && text.substring(i+1,i+2) == "*" {
|
|
i = i + 2; col = col + 2
|
|
local closed = 0
|
|
while i < n {
|
|
local c2 = text.substring(i,i+1)
|
|
if c2 == "*" && i+1 < n && text.substring(i+1,i+2) == "/" { i = i + 2; col = col + 2; closed = 1; break }
|
|
if c2 == "\n" { i = i + 1; line = line + 1; col = 1; continue }
|
|
i = i + 1; col = col + 1
|
|
}
|
|
continue
|
|
}
|
|
// string literal "..." with escapes \" \\ \n \t
|
|
if ch == '"' {
|
|
local start_col = col
|
|
local buf = ""
|
|
i = i + 1; col = col + 1
|
|
local closed = 0
|
|
while i < n {
|
|
local c3 = text.substring(i,i+1)
|
|
if c3 == '"' { closed = 1; i = i + 1; col = col + 1; break }
|
|
if c3 == "\\" {
|
|
if i+1 < n {
|
|
local esc = text.substring(i+1,i+2)
|
|
if esc == '"' { buf = buf.concat('"') }
|
|
else if esc == "\\" { buf = buf.concat("\\") }
|
|
else if esc == "n" { buf = buf.concat("\n") }
|
|
else if esc == "t" { buf = buf.concat("\t") }
|
|
else { buf = buf.concat(esc) }
|
|
i = i + 2; col = col + 2
|
|
continue
|
|
} else { i = i + 1; col = col + 1; break }
|
|
}
|
|
buf = buf.concat(c3)
|
|
i = i + 1; col = col + 1
|
|
}
|
|
local tok = new MapBox(); tok.set("type","STRING"); tok.set("lexeme", buf); tok.set("line", line); tok.set("col", start_col)
|
|
out.push(tok); continue
|
|
}
|
|
// number (integer only for MVP)
|
|
if ch >= "0" && ch <= "9" {
|
|
local start = i; local start_col = col
|
|
while i < n {
|
|
local c4 = text.substring(i,i+1)
|
|
if !(c4 >= "0" && c4 <= "9") { break }
|
|
i = i + 1; col = col + 1
|
|
}
|
|
local lex = text.substring(start, i)
|
|
local tok = new MapBox(); tok.set("type","NUMBER"); tok.set("lexeme", lex); tok.set("line", line); tok.set("col", start_col)
|
|
out.push(tok); continue
|
|
}
|
|
// identifier or keyword
|
|
if me._is_ident_start(ch) == 1 {
|
|
local start = i; local start_col = col
|
|
while i < n {
|
|
local c5 = text.substring(i,i+1)
|
|
if me._is_ident_char(c5) == 0 { break }
|
|
i = i + 1; col = col + 1
|
|
}
|
|
local lex = text.substring(start, i)
|
|
local kind = me._kw_kind(lex)
|
|
local tok = new MapBox(); tok.set("type", kind); tok.set("lexeme", lex); tok.set("line", line); tok.set("col", start_col)
|
|
out.push(tok); continue
|
|
}
|
|
// punctuation / symbols we care about
|
|
local sym_kind = me._sym_kind(ch)
|
|
if sym_kind != null {
|
|
local tok = new MapBox(); tok.set("type", sym_kind); tok.set("lexeme", ch); tok.set("line", line); tok.set("col", col)
|
|
out.push(tok); i = i + 1; col = col + 1; continue
|
|
}
|
|
// unknown char -> emit as PUNC so parser can skip gracefully
|
|
local tok = new MapBox(); tok.set("type","PUNC"); tok.set("lexeme", ch); tok.set("line", line); tok.set("col", col)
|
|
out.push(tok); i = i + 1; col = col + 1
|
|
}
|
|
return out
|
|
}
|
|
_is_ident_start(c) { if c=="_" {return 1}; if c>="A"&&c<="Z" {return 1}; if c>="a"&&c<="z" {return 1}; return 0 }
|
|
_is_ident_char(c) { if me._is_ident_start(c)==1 { return 1 }; if c>="0"&&c<="9" { return 1 }; return 0 }
|
|
_kw_kind(lex) {
|
|
if lex == "using" { return "USING" }
|
|
if lex == "as" { return "AS" }
|
|
if lex == "static" { return "STATIC" }
|
|
if lex == "box" { return "BOX" }
|
|
if lex == "method" { return "METHOD" }
|
|
if lex == "include" { return "INCLUDE" }
|
|
if lex == "while" { return "WHILE" } // Stage-3 tokens (MVP)
|
|
if lex == "for" { return "FOR" }
|
|
if lex == "in" { return "IN" }
|
|
return "IDENT"
|
|
}
|
|
_sym_kind(c) {
|
|
if c == "{" { return "LBRACE" }
|
|
if c == "}" { return "RBRACE" }
|
|
if c == "(" { return "LPAREN" }
|
|
if c == ")" { return "RPAREN" }
|
|
if c == "," { return "COMMA" }
|
|
if c == "." { return "DOT" }
|
|
if c == ":" { return "COLON" }
|
|
if c == "=" { return "EQ" }
|
|
if c == ";" { return "SEMI" }
|
|
return null
|
|
}
|
|
}
|
|
|
|
static box HakoTokenizerMain { method main(args) { return 0 } }
|