Files
hakorune/lang/src/shared/common/string_helpers.hako
nyash-codex 380a724b9c debug(stage1): Phase 25.1 - MIR Builder 型混乱バグ完全特定
🚨 重大発見: .hakoレベルでは修正不可能なMIR Builderバグ

🔍 根本原因特定:
- MIR Builder の型レジストリシステムが型情報を正しく追跡できていない
- new ArrayBox() で生成したValueIdが、誤った型として認識される
- PHIマージポイントで型情報が失われる/上書きされる

📊 系統的な型混乱パターン:
1. args.size() → ParserBox.size() (本来: ArrayBox.size())
2. cli_args.length() → ParserBox.length() (本来: ArrayBox.length())
3. new ArrayBox().size() → LoopOptsBox.size() (本来: ArrayBox.size())

 すべての.hako回避策が失敗:
- パラメータ名変更: args → cli_args → cli_args_raw
- 新しいArrayBox作成: local x = new ArrayBox()
- Fail-Fast Guard追加
→ すべて同じ型混乱エラー

 決定的証拠:
- __mir__.log が一度も実行されなかった
→ エラーは MIR生成時に発生(実行時ではない)
→ .hakoコードの問題ではない

📋 成果物:
- __mir__.log マーカー追加 (lang/src/runner/stage1_cli.hako)
  - stage1_main 入口ログ
  - env toggles ログ
  - args.size() 前後ログ
- StringHelpers.to_i64 改善 (lang/src/shared/common/string_helpers.hako)
  - null/Void ガード追加
  - デバッグログ追加
- 完全調査レポート:
  - stage1_mir_builder_type_confusion_bug.md (最終レポート)
  - stage1_mir_log_investigation.md (詳細調査ログ)

🔧 必要な修正 (推定6-10時間):
Phase 1: デバッグトレース追加 (30分)
  - src/mir/builder/types/mod.rs に NYASH_MIR_TYPE_TRACE
Phase 2: トレース実行 (1時間)
  - 型情報がどこで失われるか特定
Phase 3: 根本修正 (4-8時間)
  - NewBox生成時の型登録修正
  - PHI型伝播ロジック修正
  - 型レジストリ整合性チェック追加
Phase 4: 検証 (1時間)
  - stage1_cli 正常動作確認

🎯 結論:
MIR Builder の根本的インフラバグ。SSA変換とPHIノード経由での
型情報追跡に失敗している。.hakoレベルでは回避不可能。

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Assistant <task@anthropic.com>
2025-11-21 08:03:03 +09:00

217 lines
6.4 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// string_helpers.hako - StringHelpers (common, pure helpers)
// Responsibility: numeric/string conversions and JSON quoting for selfhost tools.
// Non-responsibility: JSON scanning beyond local character processing; use CfgNavigatorBox/StringScanBox for navigation.
static box StringHelpers {
// Convert a numeric or numeric-like string to its decimal representation.
int_to_str(n) {
local v = me.to_i64(n)
if v == 0 { return "0" }
if v < 0 { return "-" + me.int_to_str(0 - v) }
local out = ""
local digits = "0123456789"
loop (v > 0) {
local d = v % 10
local ch = digits.substring(d, d+1)
out = ch + out
v = v / 10
}
return out
}
// Parse integer from number or numeric-like string (leading '-' allowed; stops at first non-digit).
to_i64(x) {
// Optional debug hook: observe incoming values/boxes.
// Enable with NYASH_TO_I64_DEBUG=1 when diagnosing numeric coercions.
if env.get("NYASH_TO_I64_DEBUG") == "1" {
__mir__.log("[string_helpers/to_i64] x", x)
}
if x == null { return 0 }
local s = "" + x
local i = 0
local neg = 0
if s.substring(0,1) == "-" { neg = 1 i = 1 }
local n = s.length()
if i >= n { return 0 }
local acc = 0
loop (i < n) {
local ch = s.substring(i, i+1)
if ch < "0" || ch > "9" { break }
local ds = "0123456789"
local dpos = ds.indexOf(ch)
if dpos < 0 { break }
acc = acc * 10 + dpos
i = i + 1
}
if neg == 1 { return 0 - acc }
return acc
}
// Quote a string for JSON (escape backslash, quote, and control chars) and wrap with quotes
json_quote(s) {
if s == null { return "\"\"" }
local out = ""
local i = 0
local n = s.length()
loop (i < n) {
local ch = s.substring(i, i+1)
if ch == "\\" { out = out + "\\\\" }
else { if ch == "\"" { out = out + "\\\"" } else {
if ch == "\n" { out = out + "\\n" } else {
if ch == "\r" { out = out + "\\r" } else {
if ch == "\t" { out = out + "\\t" } else { out = out + ch }
}
}
}}
i = i + 1
}
return "\"" + out + "\""
}
// Check if string is numeric-like (optional leading '-', then digits).
is_numeric_str(s) {
if s == null { return 0 }
local n = s.length()
if n == 0 { return 0 }
local i = 0
if s.substring(0,1) == "-" { if n == 1 { return 0 } i = 1 }
loop(i < n) { local ch = s.substring(i, i+1) if ch < "0" || ch > "9" { return 0 } i = i + 1 }
return 1
}
// Read consecutive digits starting at pos (no sign handling here; keep semantics simple)
read_digits(text, pos) {
local out = ""
loop (true) {
local ch = text.substring(pos, pos+1)
if ch == "" { break }
if ch >= "0" && ch <= "9" { out = out + ch pos = pos + 1 } else { break }
}
return out
}
// ------------------------------------
// NEW: Extended String Utilities (added for parser/compiler unification)
// ------------------------------------
// Character predicates
is_digit(ch) { return ch >= "0" && ch <= "9" }
is_alpha(ch) { return (ch >= "A" && ch <= "Z") || (ch >= "a" && ch <= "z") || ch == "_" }
is_space(ch) { return ch == " " || ch == "\t" || ch == "\n" || ch == "\r" }
// Pattern matching
starts_with(src, i, pat) {
local n = src.length()
local m = pat.length()
{
// Dev-only trace for StageB / parser調査用
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[string_helpers/starts_with] src=\"" + src + "\"")
print("[string_helpers/starts_with] i=" + ("" + i) + " m=" + ("" + m) + " n=" + ("" + n))
}
}
if i + m > n { return 0 }
local k = 0
loop(k < m) {
if src.substring(i + k, i + k + 1) != pat.substring(k, k + 1) { return 0 }
k = k + 1
}
return 1
}
// Keyword match with word boundary (next char not [A-Za-z0-9_])
starts_with_kw(src, i, kw) {
{
local dbg = env.get("HAKO_STAGEB_DEBUG")
if dbg != null && ("" + dbg) == "1" {
print("[string_helpers/starts_with_kw] src=\"" + src + "\" i=" + ("" + i) + " kw=\"" + kw + "\"")
}
}
// 同一箱内ヘルパーとして、receiver 経由ではなく
// 明示的な箱名付きの静的呼び出しにする(引数順のずれ防止)。
if StringHelpers.starts_with(src, i, kw) == 0 { return 0 }
local n = src.length()
local j = i + kw.length()
if j >= n { return 1 }
local ch = src.substring(j, j+1)
if me.is_alpha(ch) || me.is_digit(ch) { return 0 }
return 1
}
// String search
index_of(src, i, pat) {
local n = src.length()
local m = pat.length()
if m == 0 { return i }
local j = i
loop(j + m <= n) {
if me.starts_with(src, j, pat) { return j }
j = j + 1
}
return -1
}
// Trim spaces and tabs (with optional semicolon at end)
trim(s) {
local i = 0
local n = s.length()
loop(i < n && (s.substring(i,i+1) == " " || s.substring(i,i+1) == "\t")) { i = i + 1 }
local j = n
loop(j > i && (s.substring(j-1,j) == " " || s.substring(j-1,j) == "\t" || s.substring(j-1,j) == ";")) { j = j - 1 }
return s.substring(i, j)
}
// Skip whitespace from position i
skip_ws(src, i) {
if src == null { return i }
local n = src.length()
local cont = 1
local guard = 0
local max = 100000
loop(cont == 1) {
if guard > max { return i } else { guard = guard + 1 }
if i < n {
if me.is_space(src.substring(i, i+1)) { i = i + 1 } else { cont = 0 }
} else { cont = 0 }
}
return i
}
// Find last occurrence of pattern in string (backward search)
last_index_of(src, pat) {
if src == null { return -1 }
if pat == null { return -1 }
local n = src.length()
local m = pat.length()
if m == 0 { return n }
if m > n { return -1 }
local i = n - m
loop(i >= 0) {
if me.starts_with(src, i, pat) { return i }
i = i - 1
}
return -1
}
// Split string by newline into ArrayBox (without relying on StringBox.split)
split_lines(s) {
local arr = new ArrayBox()
if s == null { return arr }
local n = s.length()
local last = 0
local i = 0
loop (i < n) {
local ch = s.substring(i, i+1)
if ch == "\n" {
arr.push(s.substring(last, i))
last = i + 1
}
i = i + 1
}
// push tail
if last <= n { arr.push(s.substring(last)) }
return arr
}
}