From d859e461635e6f15873c39bf7556de9371381357 Mon Sep 17 00:00:00 2001 From: nyash-codex Date: Wed, 17 Dec 2025 16:57:06 +0900 Subject: [PATCH] fix(llvm_py): tag stringish across copy/phi for concat --- src/llvm_py/instructions/binop.py | 40 ++++++++++-------- src/llvm_py/phi_wiring/analysis.py | 65 +++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 17 deletions(-) diff --git a/src/llvm_py/instructions/binop.py b/src/llvm_py/instructions/binop.py index 7d03acbf..f400615f 100644 --- a/src/llvm_py/instructions/binop.py +++ b/src/llvm_py/instructions/binop.py @@ -243,25 +243,33 @@ def lower_binop( return val return ir.Constant(i64, 0) - # Phase 196: TypeFacts SSOT - Use handle+handle only when BOTH are strings - lhs_tag = False; rhs_tag = False + # Phase 196: TypeFacts/Resolver SSOT - Use handle+handle only when BOTH are strings. + # Root cause (Phase 102): loop-carried string accumulator may be i64-handle but not present in value_types; + # tag lookup MUST consult resolver.is_stringish()/string_ids. + lhs_tag = False + rhs_tag = False try: if resolver is not None: - # Check string_literals (actual string constants) + # SSOT: resolver's stringish tag (propagated via Copy/PHI) + if hasattr(resolver, 'is_stringish'): + lhs_tag = bool(resolver.is_stringish(lhs)) + rhs_tag = bool(resolver.is_stringish(rhs)) + # Legacy: actual string constants by ValueId if hasattr(resolver, 'string_literals'): - lhs_tag = lhs in resolver.string_literals - rhs_tag = rhs in resolver.string_literals - # Check value_types for String/StringBox types - if not lhs_tag and hasattr(resolver, 'value_types'): - lhs_ty = resolver.value_types.get(lhs) - if lhs_ty and (lhs_ty.get('kind') == 'string' or - (lhs_ty.get('kind') == 'handle' and lhs_ty.get('box_type') == 'StringBox')): - lhs_tag = True - if not rhs_tag and hasattr(resolver, 'value_types'): - rhs_ty = resolver.value_types.get(rhs) - if rhs_ty and (rhs_ty.get('kind') == 'string' or - (rhs_ty.get('kind') == 'handle' and rhs_ty.get('box_type') == 'StringBox')): - rhs_tag = True + lhs_tag = lhs_tag or (lhs in resolver.string_literals) + rhs_tag = rhs_tag or (rhs in resolver.string_literals) + # Legacy: value_types hints (best-effort) + if hasattr(resolver, 'value_types'): + if not lhs_tag: + lhs_ty = resolver.value_types.get(lhs) + if lhs_ty and (lhs_ty.get('kind') == 'string' or + (lhs_ty.get('kind') == 'handle' and lhs_ty.get('box_type') == 'StringBox')): + lhs_tag = True + if not rhs_tag: + rhs_ty = resolver.value_types.get(rhs) + if rhs_ty and (rhs_ty.get('kind') == 'string' or + (rhs_ty.get('kind') == 'handle' and rhs_ty.get('box_type') == 'StringBox')): + rhs_tag = True except Exception: pass # Phase 131-15-P1 DEBUG diff --git a/src/llvm_py/phi_wiring/analysis.py b/src/llvm_py/phi_wiring/analysis.py index 437dfca5..7789581a 100644 --- a/src/llvm_py/phi_wiring/analysis.py +++ b/src/llvm_py/phi_wiring/analysis.py @@ -5,7 +5,20 @@ from .common import trace def collect_produced_stringish(blocks: List[Dict[str, Any]]) -> Dict[int, bool]: + """Collect value-ids that are known to be string handles (best-effort). + + This is used for early tagging (PHI placeholder setup) before instructions + are lowered/executed. Keep it monotonic and conservative. + + Phase 102 root-cause: + - A string accumulator often goes through `copy` then `phi` before it's used + in `binop '+'`. If we don't propagate stringish across copy/phi here, the + PHI dst won't be tagged early, and the concat lowerer may incorrectly box + an i64-handle as an IntegerBox (breaking runtime string length/parity). + """ produced_str: Dict[int, bool] = {} + + # Seed: explicit producers with reliable type signals. for block_data in blocks: for inst in block_data.get("instructions", []) or []: try: @@ -35,6 +48,57 @@ def collect_produced_stringish(blocks: List[Dict[str, Any]]) -> Dict[int, bool]: produced_str[int(dstx)] = True except Exception: pass + + # Propagate: copy/phi/binop('+') can carry/produce stringish values even when + # dst_type metadata is missing. Use a small fixpoint iteration to cover chains. + changed = True + while changed: + changed = False + for block_data in blocks: + for inst in block_data.get("instructions", []) or []: + try: + opx = inst.get("op") + dstx = inst.get("dst") + if dstx is None: + continue + dst_i = int(dstx) + if produced_str.get(dst_i): + continue + + if opx == "copy": + src = inst.get("src") + if src is not None and produced_str.get(int(src)): + produced_str[dst_i] = True + changed = True + continue + + if opx == "phi": + incoming0 = inst.get("incoming", []) or [] + # JSON v0 incoming pairs are (value_id, block_id) + for (v_src, _b) in incoming0: + try: + if produced_str.get(int(v_src)): + produced_str[dst_i] = True + changed = True + break + except Exception: + continue + continue + + if opx == "binop" and inst.get("operation") == "+": + lhs = inst.get("lhs") + rhs = inst.get("rhs") + if lhs is not None and produced_str.get(int(lhs)): + produced_str[dst_i] = True + changed = True + continue + if rhs is not None and produced_str.get(int(rhs)): + produced_str[dst_i] = True + changed = True + continue + except Exception: + continue + return produced_str @@ -65,4 +129,3 @@ def analyze_incomings(blocks: List[Dict[str, Any]]) -> Dict[int, Dict[int, List[ except Exception: pass return result -