json-native: token positions (line/column); escape utils BMP coverage + surrogate guard; add smokes for string escapes, nested, and error cases (AST/VM)

This commit is contained in:
Selfhosting Dev
2025-09-26 00:42:55 +09:00
parent b3a96faccb
commit 041cef875a
16 changed files with 206 additions and 44 deletions

View File

@ -53,6 +53,13 @@ box JsonToken {
get_line() { return me.line }
get_column() { return me.column }
// 位置情報の設定(トークナイザーから付与)
set_line_column(line, column) {
me.line = line
me.column = column
return me
}
// ===== 判定メソッド =====
is_literal() {
@ -247,4 +254,4 @@ static box TokenStats {
i = i + 1
}
}
}
}

View File

@ -66,37 +66,39 @@ box JsonTokenizer {
// EOF チェック
if me.scanner.is_eof() {
return new JsonToken("EOF", "", me.scanner.get_position(), me.scanner.get_position())
return new JsonToken("EOF", "", me.scanner.get_position(), me.scanner.get_position()).set_line_column(me.scanner.get_line(), me.scanner.get_column())
}
local start_pos = me.scanner.get_position()
local start_line = me.scanner.get_line()
local start_col = me.scanner.get_column()
local ch = me.scanner.current()
// 構造文字(単一文字)
local structural_type = me.char_to_token_type(ch)
if structural_type != null {
me.scanner.advance()
return this.create_structural_token(structural_type, start_pos)
return this.create_structural_token(structural_type, start_pos).set_line_column(start_line, start_col)
}
// 文字列リテラル
if ch == "\"" {
return me.tokenize_string()
return me.tokenize_string().set_line_column(start_line, start_col)
}
// 数値リテラル
if me.is_number_start_char(ch) {
return me.tokenize_number()
return me.tokenize_number().set_line_column(start_line, start_col)
}
// キーワードnull, true, false
if me.is_alpha_char(ch) {
return me.tokenize_keyword()
return me.tokenize_keyword().set_line_column(start_line, start_col)
}
// 不明な文字(エラー)
me.scanner.advance()
return new JsonToken("ERROR", "Unexpected character: '" + ch + "'", start_pos, me.scanner.get_position())
return new JsonToken("ERROR", "Unexpected character: '" + ch + "'", start_pos, me.scanner.get_position()).set_line_column(start_line, start_col)
}
// ===== 専用トークナイザーメソッド =====

View File

@ -233,18 +233,110 @@ static box EscapeUtils {
(ch >= "A" and ch <= "F")
}
// 4桁の16進数文字列を文字に変換簡易版
// 4桁の16進数文字列を文字に変換MVP: BMPの基本ASCIIとサロゲート検知
hex_to_char(hex) {
// 簡易実装: 基本的なASCII文字のみ対応
return match hex {
"0020" => " ", // スペース
"0021" => "!", // 感嘆符
"0022" => "\"", // ダブルクォート
"005C" => "\\", // バックスラッシュ
"0041" => "A", // A
"0061" => "a", // a
_ => "?" // 不明な文字は?で代替
// サロゲート半の範囲は '?' に置換(結合は現段階で未対応
if hex >= "D800" and hex <= "DFFF" {
return "?"
}
// 簡易: よく使う範囲0x20-0x7Eを網羅
if hex == "005C" { return "\\" }
if hex == "0022" { return "\"" }
// 0-9, A-Z, a-z, 空白と基本記号
if hex == "0020" { return " " }
if hex == "0021" { return "!" }
if hex == "0023" { return "#" }
if hex == "0024" { return "$" }
if hex == "0025" { return "%" }
if hex == "0026" { return "&" }
if hex == "0027" { return "'" }
if hex == "0028" { return "(" }
if hex == "0029" { return ")" }
if hex == "002A" { return "*" }
if hex == "002B" { return "+" }
if hex == "002C" { return "," }
if hex == "002D" { return "-" }
if hex == "002E" { return "." }
if hex == "002F" { return "/" }
if hex == "0030" { return "0" }
if hex == "0031" { return "1" }
if hex == "0032" { return "2" }
if hex == "0033" { return "3" }
if hex == "0034" { return "4" }
if hex == "0035" { return "5" }
if hex == "0036" { return "6" }
if hex == "0037" { return "7" }
if hex == "0038" { return "8" }
if hex == "0039" { return "9" }
if hex == "003A" { return ":" }
if hex == "003B" { return ";" }
if hex == "003C" { return "<" }
if hex == "003D" { return "=" }
if hex == "003E" { return ">" }
if hex == "003F" { return "?" }
if hex == "0040" { return "@" }
if hex == "0041" { return "A" }
if hex == "0042" { return "B" }
if hex == "0043" { return "C" }
if hex == "0044" { return "D" }
if hex == "0045" { return "E" }
if hex == "0046" { return "F" }
if hex == "0047" { return "G" }
if hex == "0048" { return "H" }
if hex == "0049" { return "I" }
if hex == "004A" { return "J" }
if hex == "004B" { return "K" }
if hex == "004C" { return "L" }
if hex == "004D" { return "M" }
if hex == "004E" { return "N" }
if hex == "004F" { return "O" }
if hex == "0050" { return "P" }
if hex == "0051" { return "Q" }
if hex == "0052" { return "R" }
if hex == "0053" { return "S" }
if hex == "0054" { return "T" }
if hex == "0055" { return "U" }
if hex == "0056" { return "V" }
if hex == "0057" { return "W" }
if hex == "0058" { return "X" }
if hex == "0059" { return "Y" }
if hex == "005A" { return "Z" }
if hex == "005B" { return "[" }
if hex == "005D" { return "]" }
if hex == "005E" { return "^" }
if hex == "005F" { return "_" }
if hex == "0060" { return "`" }
if hex == "0061" { return "a" }
if hex == "0062" { return "b" }
if hex == "0063" { return "c" }
if hex == "0064" { return "d" }
if hex == "0065" { return "e" }
if hex == "0066" { return "f" }
if hex == "0067" { return "g" }
if hex == "0068" { return "h" }
if hex == "0069" { return "i" }
if hex == "006A" { return "j" }
if hex == "006B" { return "k" }
if hex == "006C" { return "l" }
if hex == "006D" { return "m" }
if hex == "006E" { return "n" }
if hex == "006F" { return "o" }
if hex == "0070" { return "p" }
if hex == "0071" { return "q" }
if hex == "0072" { return "r" }
if hex == "0073" { return "s" }
if hex == "0074" { return "t" }
if hex == "0075" { return "u" }
if hex == "0076" { return "v" }
if hex == "0077" { return "w" }
if hex == "0078" { return "x" }
if hex == "0079" { return "y" }
if hex == "007A" { return "z" }
if hex == "007B" { return "{" }
if hex == "007C" { return "|" }
if hex == "007D" { return "}" }
if hex == "007E" { return "~" }
return "?"
}
// ===== 妥当性検証 =====