Files
hakorune/lang/src/shared/common/string_helpers.hako

217 lines
6.4 KiB
Plaintext
Raw Normal View History

// string_helpers.hako - StringHelpers (common, pure helpers)
// Responsibility: numeric/string conversions and JSON quoting for selfhost tools.
// Non-responsibility: JSON scanning beyond local character processing; use CfgNavigatorBox/StringScanBox for navigation.
static box StringHelpers {
// Convert a numeric or numeric-like string to its decimal representation.
int_to_str(n) {
local v = me.to_i64(n)
if v == 0 { return "0" }
if v < 0 { return "-" + me.int_to_str(0 - v) }
local out = ""
local digits = "0123456789"
loop (v > 0) {
local d = v % 10
local ch = digits.substring(d, d+1)
out = ch + out
v = v / 10
}
return out
}
// Parse integer from number or numeric-like string (leading '-' allowed; stops at first non-digit).
to_i64(x) {
debug(stage1): Phase 25.1 - MIR Builder 型混乱バグ完全特定 🚨 重大発見: .hakoレベルでは修正不可能なMIR Builderバグ 🔍 根本原因特定: - MIR Builder の型レジストリシステムが型情報を正しく追跡できていない - new ArrayBox() で生成したValueIdが、誤った型として認識される - PHIマージポイントで型情報が失われる/上書きされる 📊 系統的な型混乱パターン: 1. args.size() → ParserBox.size() (本来: ArrayBox.size()) 2. cli_args.length() → ParserBox.length() (本来: ArrayBox.length()) 3. new ArrayBox().size() → LoopOptsBox.size() (本来: ArrayBox.size()) ❌ すべての.hako回避策が失敗: - パラメータ名変更: args → cli_args → cli_args_raw - 新しいArrayBox作成: local x = new ArrayBox() - Fail-Fast Guard追加 → すべて同じ型混乱エラー ✅ 決定的証拠: - __mir__.log が一度も実行されなかった → エラーは MIR生成時に発生(実行時ではない) → .hakoコードの問題ではない 📋 成果物: - __mir__.log マーカー追加 (lang/src/runner/stage1_cli.hako) - stage1_main 入口ログ - env toggles ログ - args.size() 前後ログ - StringHelpers.to_i64 改善 (lang/src/shared/common/string_helpers.hako) - null/Void ガード追加 - デバッグログ追加 - 完全調査レポート: - stage1_mir_builder_type_confusion_bug.md (最終レポート) - stage1_mir_log_investigation.md (詳細調査ログ) 🔧 必要な修正 (推定6-10時間): Phase 1: デバッグトレース追加 (30分) - src/mir/builder/types/mod.rs に NYASH_MIR_TYPE_TRACE Phase 2: トレース実行 (1時間) - 型情報がどこで失われるか特定 Phase 3: 根本修正 (4-8時間) - NewBox生成時の型登録修正 - PHI型伝播ロジック修正 - 型レジストリ整合性チェック追加 Phase 4: 検証 (1時間) - stage1_cli 正常動作確認 🎯 結論: MIR Builder の根本的インフラバグ。SSA変換とPHIノード経由での 型情報追跡に失敗している。.hakoレベルでは回避不可能。 Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Task Assistant <task@anthropic.com>
2025-11-21 08:03:03 +09:00
// Optional debug hook: observe incoming values/boxes.
// Enable with NYASH_TO_I64_DEBUG=1 when diagnosing numeric coercions.
if env.get("NYASH_TO_I64_DEBUG") == "1" {
__mir__.log("[string_helpers/to_i64] x", x)
}
if x == null { return 0 }
local s = "" + x
local i = 0
local neg = 0
if s.substring(0,1) == "-" { neg = 1 i = 1 }
local n = s.length()
if i >= n { return 0 }
local acc = 0
loop (i < n) {
local ch = s.substring(i, i+1)
if ch < "0" || ch > "9" { break }
local ds = "0123456789"
local dpos = ds.indexOf(ch)
if dpos < 0 { break }
acc = acc * 10 + dpos
i = i + 1
}
if neg == 1 { return 0 - acc }
return acc
}
// Quote a string for JSON (escape backslash, quote, and control chars) and wrap with quotes
json_quote(s) {
if s == null { return "\"\"" }
local out = ""
local i = 0
local n = s.length()
loop (i < n) {
local ch = s.substring(i, i+1)
if ch == "\\" { out = out + "\\\\" }
else { if ch == "\"" { out = out + "\\\"" } else {
if ch == "\n" { out = out + "\\n" } else {
if ch == "\r" { out = out + "\\r" } else {
if ch == "\t" { out = out + "\\t" } else { out = out + ch }
}
}
}}
i = i + 1
}
return "\"" + out + "\""
}
// Check if string is numeric-like (optional leading '-', then digits).
is_numeric_str(s) {
if s == null { return 0 }
local n = s.length()
if n == 0 { return 0 }
local i = 0
if s.substring(0,1) == "-" { if n == 1 { return 0 } i = 1 }
loop(i < n) { local ch = s.substring(i, i+1) if ch < "0" || ch > "9" { return 0 } i = i + 1 }
return 1
}
// Read consecutive digits starting at pos (no sign handling here; keep semantics simple)
read_digits(text, pos) {
local out = ""
loop (true) {
local ch = text.substring(pos, pos+1)
if ch == "" { break }
if ch >= "0" && ch <= "9" { out = out + ch pos = pos + 1 } else { break }
}
return out
}
// ------------------------------------
// NEW: Extended String Utilities (added for parser/compiler unification)
// ------------------------------------
// Character predicates
is_digit(ch) { return ch >= "0" && ch <= "9" }
is_alpha(ch) { return (ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_" }
is_space(ch) { return ch == " " || ch == "\t" || ch == "\n" || ch == "\r" }
// Pattern matching
starts_with(src, i, pat) {
local n = src.length()
local m = pat.length()
2025-11-19 23:12:01 +09:00
{
// Dev-only trace for StageB / parser調査用
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[string_helpers/starts_with] src=\"" + src + "\"")
print("[string_helpers/starts_with] i=" + ("" + i) + " m=" + ("" + m) + " n=" + ("" + n))
}
}
if i + m > n { return 0 }
local k = 0
loop(k < m) {
if src.substring(i + k, i + k + 1) != pat.substring(k, k + 1) { return 0 }
k = k + 1
}
return 1
}
// Keyword match with word boundary (next char not [A-Za-z0-9_])
starts_with_kw(src, i, kw) {
2025-11-19 23:12:01 +09:00
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[string_helpers/starts_with_kw] src=\"" + src + "\" i=" + ("" + i) + " kw=\"" + kw + "\"")
}
}
// 同一箱内ヘルパーとして、receiver 経由ではなく
// 明示的な箱名付きの静的呼び出しにする(引数順のずれ防止)。
if StringHelpers.starts_with(src, i, kw) == 0 { return 0 }
local n = src.length()
local j = i + kw.length()
if j >= n { return 1 }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) || me.is_digit(ch) { return 0 }
return 1
}
// String search
index_of(src, i, pat) {
local n = src.length()
local m = pat.length()
if m == 0 { return i }
local j = i
loop(j + m <= n) {
if me.starts_with(src, j, pat) { return j }
j = j + 1
}
return -1
}
// Trim spaces and tabs (with optional semicolon at end)
trim(s) {
local i = 0
local n = s.length()
loop(i < n && (s.substring(i,i+1) == " " || s.substring(i,i+1) == "\t")) { i = i + 1 }
local j = n
loop(j > i && (s.substring(j-1,j) == " " || s.substring(j-1,j) == "\t" || s.substring(j-1,j) == ";")) { j = j - 1 }
return s.substring(i, j)
}
// Skip whitespace from position i
skip_ws(src, i) {
if src == null { return i }
local n = src.length()
local cont = 1
local guard = 0
local max = 100000
loop(cont == 1) {
if guard > max { return i } else { guard = guard + 1 }
if i < n {
if me.is_space(src.substring(i, i+1)) { i = i + 1 } else { cont = 0 }
} else { cont = 0 }
}
return i
}
// Find last occurrence of pattern in string (backward search)
last_index_of(src, pat) {
if src == null { return -1 }
if pat == null { return -1 }
local n = src.length()
local m = pat.length()
if m == 0 { return n }
if m > n { return -1 }
local i = n - m
loop(i >= 0) {
if me.starts_with(src, i, pat) { return i }
i = i - 1
}
return -1
}
// Split string by newline into ArrayBox (without relying on StringBox.split)
split_lines(s) {
local arr = new ArrayBox()
if s == null { return arr }
local n = s.length()
local last = 0
local i = 0
loop (i < n) {
local ch = s.substring(i, i+1)
if ch == "\n" {
arr.push(s.substring(last, i))
last = i + 1
}
i = i + 1
}
// push tail
if last <= n { arr.push(s.substring(last)) }
return arr
}
}