🔧 refactor(llvm-py): Fix resolver PHI handling and add trace improvements

Changes to resolver.py:
- Improved PHI value tracking in _value_at_end_i64() (lines 268-285)
- Added trace logging for snap hits with PHI detection
- Fixed PHI placeholder reuse logic to preserve dominance
- PHI values now returned directly from snapshots when valid

Changes to llvm_builder.py:
- Fixed externcall instruction parsing (line 522: 'func' instead of 'name')
- Improved block snapshot tracing (line 439)
- Added PHI incoming metadata tracking (lines 316-376)
- Enhanced definition tracking for lifetime hints

This should help debug the string carry=0 issue in esc_dirname_smoke where
PHI values were being incorrectly coerced instead of preserved.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Selfhosting Dev
2025-09-14 16:25:21 +09:00
parent 3e07763af8
commit 4c0e6726e3
34 changed files with 1487 additions and 215 deletions

View File

@ -4,7 +4,7 @@ Handles +, -, *, /, %, &, |, ^, <<, >>
"""
import llvmlite.ir as ir
from typing import Dict
from typing import Dict, Optional, Any
from .compare import lower_compare
import llvmlite.ir as ir
@ -19,7 +19,9 @@ def lower_binop(
current_block: ir.Block,
preds=None,
block_end_values=None,
bb_map=None
bb_map=None,
*,
dst_type: Optional[Any] = None,
) -> None:
"""
Lower MIR BinOp instruction
@ -73,6 +75,13 @@ def lower_binop(
# pointer present?
is_ptr_side = (hasattr(lhs_raw, 'type') and isinstance(lhs_raw.type, ir.PointerType)) or \
(hasattr(rhs_raw, 'type') and isinstance(rhs_raw.type, ir.PointerType))
# Explicit dst_type hint from MIR JSON?
force_string = False
try:
if isinstance(dst_type, dict) and dst_type.get('kind') == 'handle' and dst_type.get('box_type') == 'StringBox':
force_string = True
except Exception:
pass
# tagged string handles?(どちらかが string-ish のとき)
any_tagged = False
try:
@ -84,10 +93,36 @@ def lower_binop(
any_tagged = (lhs in resolver.string_literals) or (rhs in resolver.string_literals)
except Exception:
pass
is_str = is_ptr_side or any_tagged
is_str = force_string or is_ptr_side or any_tagged
if is_str:
# Helper: convert raw or resolved value to string handle
def to_handle(raw, val, tag: str, vid: int):
# If we already have an i64 in vmap (raw), prefer it
if raw is not None and hasattr(raw, 'type') and isinstance(raw.type, ir.IntType) and raw.type.width == 64:
is_tag = False
try:
if resolver is not None and hasattr(resolver, 'is_stringish'):
is_tag = resolver.is_stringish(vid)
except Exception:
is_tag = False
if force_string or is_tag:
return raw
# Heuristic: PHI values in string concat are typically handles; prefer pass-through
try:
raw_is_phi = hasattr(raw, 'add_incoming')
except Exception:
raw_is_phi = False
if raw_is_phi:
return raw
# Otherwise, box numeric i64 to IntegerBox handle
cal = None
for f in builder.module.functions:
if f.name == 'nyash.box.from_i64':
cal = f; break
if cal is None:
cal = ir.Function(builder.module, ir.FunctionType(i64, [i64]), name='nyash.box.from_i64')
v64 = raw
return builder.call(cal, [v64], name=f"int_i2h_{tag}_{dst}")
if raw is not None and hasattr(raw, 'type') and isinstance(raw.type, ir.PointerType):
# pointer-to-array -> GEP
try:
@ -112,9 +147,16 @@ def lower_binop(
is_tag = resolver.is_stringish(vid)
except Exception:
is_tag = False
if is_tag:
if force_string or is_tag:
return val
# Box numeric i64 to IntegerBox handle
# Heuristic: if vmap has a PHI placeholder for this vid, treat as handle
try:
maybe_phi = vmap.get(vid)
if maybe_phi is not None and hasattr(maybe_phi, 'add_incoming'):
return val
except Exception:
pass
# Otherwise, box numeric i64 to IntegerBox handle
cal = None
for f in builder.module.functions:
if f.name == 'nyash.box.from_i64':

View File

@ -213,11 +213,10 @@ def lower_boxcall(
callee = _declare(module, "nyash.console.log", i64, [i8p])
_ = builder.call(callee, [arg0_ptr], name="console_log_ptr")
else:
# Fallback: resolve i64 and prefer pointer API via to_i8p_h bridge
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
arg0 = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else None
else:
arg0 = vmap.get(args[0]) if args else None
# Fallback: prefer raw vmap value; resolve only if missing (avoid synthesizing PHIs here)
arg0 = vmap.get(args[0]) if args else None
if arg0 is None and resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
arg0 = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map)
if arg0 is None:
arg0 = ir.Constant(i64, 0)
# If we have a handle (i64), convert to i8* via bridge and log via pointer API

View File

@ -4,7 +4,7 @@ Handles comparison operations (<, >, <=, >=, ==, !=)
"""
import llvmlite.ir as ir
from typing import Dict
from typing import Dict, Optional, Any
from .externcall import lower_externcall
def lower_compare(
@ -18,7 +18,8 @@ def lower_compare(
current_block=None,
preds=None,
block_end_values=None,
bb_map=None
bb_map=None,
meta: Optional[Dict[str, Any]] = None,
) -> None:
"""
Lower MIR Compare instruction
@ -32,20 +33,27 @@ def lower_compare(
vmap: Value map
"""
# Get operands
if resolver is not None and preds is not None and block_end_values is not None and current_block is not None:
lhs_val = resolver.resolve_i64(lhs, current_block, preds, block_end_values, vmap, bb_map)
rhs_val = resolver.resolve_i64(rhs, current_block, preds, block_end_values, vmap, bb_map)
else:
lhs_val = vmap.get(lhs)
rhs_val = vmap.get(rhs)
# Prefer same-block SSA from vmap; fallback to resolver for cross-block dominance
lhs_val = vmap.get(lhs)
rhs_val = vmap.get(rhs)
if (lhs_val is None or rhs_val is None) and resolver is not None and preds is not None and block_end_values is not None and current_block is not None:
if lhs_val is None:
lhs_val = resolver.resolve_i64(lhs, current_block, preds, block_end_values, vmap, bb_map)
if rhs_val is None:
rhs_val = resolver.resolve_i64(rhs, current_block, preds, block_end_values, vmap, bb_map)
i64 = ir.IntType(64)
i8p = ir.IntType(8).as_pointer()
# String-aware equality: if either side is a pointer or tagged as string-ish, compare via eq_hh
# String-aware equality: if meta marks string or either side is tagged string-ish,
# compare handles directly via nyash.string.eq_hh
if op in ('==','!='):
lhs_ptr = hasattr(lhs_val, 'type') and isinstance(lhs_val.type, ir.PointerType)
rhs_ptr = hasattr(rhs_val, 'type') and isinstance(rhs_val.type, ir.PointerType)
force_string = False
try:
if isinstance(meta, dict) and meta.get('cmp_kind') == 'string':
force_string = True
except Exception:
pass
lhs_tag = False
rhs_tag = False
try:
@ -54,28 +62,30 @@ def lower_compare(
rhs_tag = resolver.is_stringish(rhs)
except Exception:
pass
if lhs_ptr or rhs_ptr or lhs_tag or rhs_tag:
# Convert both to handles (i64) then nyash.string.eq_hh
# nyash.box.from_i8_string(i8*) -> i64
box_from = None
for f in builder.module.functions:
if f.name == 'nyash.box.from_i8_string':
box_from = f
break
if not box_from:
box_from = ir.Function(builder.module, ir.FunctionType(i64, [i8p]), name='nyash.box.from_i8_string')
def to_h(v):
if hasattr(v, 'type') and isinstance(v.type, ir.PointerType):
return builder.call(box_from, [v])
else:
# assume i64 handle or number; zext/trunc to i64 if needed
if hasattr(v, 'type') and isinstance(v.type, ir.IntType) and v.type.width != 64:
return builder.zext(v, i64) if v.type.width < 64 else builder.trunc(v, i64)
if hasattr(v, 'type') and isinstance(v.type, ir.PointerType):
return builder.ptrtoint(v, i64)
return v if hasattr(v, 'type') else ir.Constant(i64, 0)
lh = to_h(lhs_val)
rh = to_h(rhs_val)
if force_string or lhs_tag or rhs_tag:
try:
import os
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
print(f"[compare] string-eq path: lhs={lhs} rhs={rhs} force={force_string} tagL={lhs_tag} tagR={rhs_tag}", flush=True)
except Exception:
pass
# Prefer same-block SSA (vmap) since string handles are produced in-place; fallback to resolver
lh = lhs_val if lhs_val is not None else (
resolver.resolve_i64(lhs, current_block, preds, block_end_values, vmap, bb_map)
if (resolver is not None and preds is not None and block_end_values is not None and current_block is not None) else ir.Constant(i64, 0)
)
rh = rhs_val if rhs_val is not None else (
resolver.resolve_i64(rhs, current_block, preds, block_end_values, vmap, bb_map)
if (resolver is not None and preds is not None and block_end_values is not None and current_block is not None) else ir.Constant(i64, 0)
)
try:
import os
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
lz = isinstance(lh, ir.Constant) and getattr(getattr(lh,'constant',None),'constant',None) == 0
rz = isinstance(rh, ir.Constant) and getattr(getattr(rh,'constant',None),'constant',None) == 0
print(f"[compare] string-eq args: lh_is_const={isinstance(lh, ir.Constant)} rh_is_const={isinstance(rh, ir.Constant)}", flush=True)
except Exception:
pass
eqf = None
for f in builder.module.functions:
if f.name == 'nyash.string.eq_hh':

View File

@ -18,7 +18,7 @@ from instructions.compare import lower_compare
from instructions.jump import lower_jump
from instructions.branch import lower_branch
from instructions.ret import lower_return
from instructions.phi import lower_phi, defer_phi_wiring
# PHI are deferred; finalize_phis wires incoming edges after snapshots
from instructions.call import lower_call
from instructions.boxcall import lower_boxcall
from instructions.externcall import lower_externcall
@ -101,12 +101,9 @@ class NyashLLVMBuilder:
if not exists:
ir.Function(self.module, fty, name=name)
# Process each function
# Process each function (finalize PHIs per function to avoid cross-function map collisions)
for func_data in functions:
self.lower_function(func_data)
# Wire deferred PHIs
self._wire_deferred_phis()
# Create ny_main wrapper if necessary
has_ny_main = any(f.name == 'ny_main' for f in self.module.functions)
@ -189,6 +186,11 @@ class NyashLLVMBuilder:
self.vmap.clear()
except Exception:
self.vmap = {}
# Reset basic-block map per function (block ids are local to function)
try:
self.bb_map.clear()
except Exception:
self.bb_map = {}
# Reset resolver caches (they key by block name; avoid collisions across functions)
try:
self.resolver.i64_cache.clear()
@ -284,6 +286,98 @@ class NyashLLVMBuilder:
visit(bid)
# Process blocks in the computed order
# Prepass: collect producer stringish hints and PHI metadata for all blocks
# and create placeholders at each block head so that resolver can safely
# return existing PHIs without creating new ones.
try:
# Pass A: collect producer stringish hints per value-id
produced_str: Dict[int, bool] = {}
for block_data in blocks:
for inst in block_data.get("instructions", []) or []:
try:
opx = inst.get("op")
dstx = inst.get("dst")
if dstx is None:
continue
is_str = False
if opx == "const":
v = inst.get("value", {}) or {}
t = v.get("type")
if t == "string" or (isinstance(t, dict) and t.get("kind") in ("handle","ptr") and t.get("box_type") == "StringBox"):
is_str = True
elif opx in ("binop","boxcall","externcall"):
t = inst.get("dst_type")
if isinstance(t, dict) and t.get("kind") == "handle" and t.get("box_type") == "StringBox":
is_str = True
if is_str:
produced_str[int(dstx)] = True
except Exception:
pass
self.block_phi_incomings = {}
for block_data in blocks:
bid0 = block_data.get("id", 0)
bb0 = self.bb_map.get(bid0)
for inst in block_data.get("instructions", []) or []:
if inst.get("op") == "phi":
try:
dst0 = int(inst.get("dst"))
incoming0 = inst.get("incoming", []) or []
except Exception:
dst0 = None; incoming0 = []
if dst0 is None:
continue
# Record incoming metadata for finalize_phis
try:
self.block_phi_incomings.setdefault(bid0, {})[dst0] = [
(int(b), int(v)) for (v, b) in incoming0
]
except Exception:
pass
# Ensure placeholder exists at block head
if bb0 is not None:
b0 = ir.IRBuilder(bb0)
try:
b0.position_at_start(bb0)
except Exception:
pass
existing = self.vmap.get(dst0)
is_phi = False
try:
is_phi = hasattr(existing, 'add_incoming')
except Exception:
is_phi = False
if not is_phi:
ph0 = b0.phi(self.i64, name=f"phi_{dst0}")
self.vmap[dst0] = ph0
# Tag propagation: if explicit dst_type marks string or any incoming was produced as string-ish, tag dst
try:
dst_type0 = inst.get("dst_type")
mark_str = isinstance(dst_type0, dict) and dst_type0.get("kind") == "handle" and dst_type0.get("box_type") == "StringBox"
if not mark_str:
for (v_id, _b_id) in incoming0:
try:
if produced_str.get(int(v_id)):
mark_str = True; break
except Exception:
pass
if mark_str and hasattr(self.resolver, 'mark_string'):
self.resolver.mark_string(int(dst0))
except Exception:
pass
# Definition hint: PHI defines dst in this block
try:
self.def_blocks.setdefault(int(dst0), set()).add(int(bid0))
except Exception:
pass
# Sync to resolver
try:
self.resolver.block_phi_incomings = self.block_phi_incomings
except Exception:
pass
except Exception:
pass
# Now lower blocks
for bid in order:
block_data = block_by_id.get(bid)
if block_data is None:
@ -294,8 +388,12 @@ class NyashLLVMBuilder:
# Provide lifetime hints to resolver (which blocks define which values)
try:
self.resolver.def_blocks = self.def_blocks
# Provide phi metadata for this function to resolver
self.resolver.block_phi_incomings = getattr(self, 'block_phi_incomings', {})
except Exception:
pass
# Finalize PHIs for this function now that all snapshots for it exist
self.finalize_phis()
def lower_block(self, bb: ir.Block, block_data: Dict[str, Any], func: ir.Function):
"""Lower a single basic block"""
@ -307,25 +405,11 @@ class NyashLLVMBuilder:
except Exception:
pass
instructions = block_data.get("instructions", [])
created_ids: List[int] = []
# Two-pass: lower all PHIs first to keep them grouped at top
phi_insts = [inst for inst in instructions if inst.get("op") == "phi"]
non_phi_insts = [inst for inst in instructions if inst.get("op") != "phi"]
# Lower PHIs
if phi_insts:
# Ensure insertion at block start
builder.position_at_start(bb)
for inst in phi_insts:
self.lower_instruction(builder, inst, func)
try:
dst = inst.get("dst")
if isinstance(dst, int) and dst not in created_ids and dst in self.vmap:
created_ids.append(dst)
except Exception:
pass
# Lower non-PHI instructions strictly in original program order.
# Reordering here can easily introduce use-before-def within the same
# basic block (e.g., string ops that depend on prior me.* calls).
created_ids: List[int] = []
non_phi_insts = [inst for inst in instructions if inst.get("op") != "phi"]
for inst in non_phi_insts:
# Stop if a terminator has already been emitted for this block
try:
@ -343,20 +427,21 @@ class NyashLLVMBuilder:
pass
# Snapshot end-of-block values for sealed PHI wiring
bid = block_data.get("id", 0)
snap: Dict[int, ir.Value] = {}
# include function args (avoid 0 constant confusion later via special-case)
# Robust snapshot: clone the entire vmap at block end so that
# values that were not redefined in this block (but remain live)
# are available to PHI finalize wiring. This avoids omissions of
# phi-dst/cyclic and carry-over values.
snap: Dict[int, ir.Value] = dict(self.vmap)
try:
arity = len(func.args)
import os
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
keys = sorted(list(snap.keys()))
print(f"[builder] snapshot bb{bid} keys={keys[:20]}...", flush=True)
except Exception:
arity = 0
for i in range(arity):
if i in self.vmap:
snap[i] = self.vmap[i]
pass
# Record block-local definitions for lifetime hinting
for vid in created_ids:
val = self.vmap.get(vid)
if val is not None:
snap[vid] = val
# Record block-local definition for lifetime hinting
if vid in self.vmap:
self.def_blocks.setdefault(vid, set()).add(block_data.get("id", 0))
self.block_end_values[bid] = snap
@ -374,8 +459,10 @@ class NyashLLVMBuilder:
lhs = inst.get("lhs")
rhs = inst.get("rhs")
dst = inst.get("dst")
dst_type = inst.get("dst_type")
lower_binop(builder, self.resolver, operation, lhs, rhs, dst,
self.vmap, builder.block, self.preds, self.block_end_values, self.bb_map)
self.vmap, builder.block, self.preds, self.block_end_values, self.bb_map,
dst_type=dst_type)
elif op == "jump":
target = inst.get("target")
@ -393,19 +480,19 @@ class NyashLLVMBuilder:
self.resolver, self.preds, self.block_end_values, self.bb_map)
elif op == "phi":
dst = inst.get("dst")
incoming = inst.get("incoming", [])
# Wire PHI immediately at the start of the current block using snapshots
lower_phi(builder, dst, incoming, self.vmap, self.bb_map, builder.block, self.resolver, self.block_end_values, self.preds)
# No-op here: PHIはメタのみresolverがondemand生成
return
elif op == "compare":
# Dedicated compare op
operation = inst.get("operation") or inst.get("op")
lhs = inst.get("lhs")
rhs = inst.get("rhs")
dst = inst.get("dst")
cmp_kind = inst.get("cmp_kind")
lower_compare(builder, operation, lhs, rhs, dst, self.vmap,
self.resolver, builder.block, self.preds, self.block_end_values, self.bb_map)
self.resolver, builder.block, self.preds, self.block_end_values, self.bb_map,
meta={"cmp_kind": cmp_kind} if cmp_kind else None)
elif op == "call":
func_name = inst.get("func")
@ -550,80 +637,127 @@ class NyashLLVMBuilder:
builder.position_at_end(cont)
self.lower_instruction(builder, sub, func)
def _wire_deferred_phis(self):
"""Wire all deferred PHI nodes"""
for cur_bid, dst_vid, incoming in self.phi_deferrals:
bb = self.bb_map.get(cur_bid)
def finalize_phis(self):
"""Finalize PHIs declared in JSON by wiring incoming edges at block heads.
Uses resolver._value_at_end_i64 to materialize values at predecessor ends,
ensuring casts/boxing are inserted in predecessor blocks (dominance-safe)."""
# Iterate JSON-declared PHIs per block
# Build succ map for nearest-predecessor mapping
succs: Dict[int, List[int]] = {}
for to_bid, from_list in (self.preds or {}).items():
for fr in from_list:
succs.setdefault(fr, []).append(to_bid)
for block_id, dst_map in (getattr(self, 'block_phi_incomings', {}) or {}).items():
bb = self.bb_map.get(block_id)
if bb is None:
continue
b = ir.IRBuilder(bb)
b.position_at_start(bb)
# Determine phi type: prefer pointer if any incoming is pointer; else f64; else i64
phi_type = self.i64
for (val_id, pred_bid) in incoming:
snap = self.block_end_values.get(pred_bid, {})
val = snap.get(val_id)
if val is not None and hasattr(val, 'type'):
if hasattr(val.type, 'is_pointer') and val.type.is_pointer:
phi_type = val.type
try:
b.position_at_start(bb)
except Exception:
pass
for dst_vid, incoming in (dst_map or {}).items():
# Ensure placeholder exists at block head
phi = self.vmap.get(dst_vid)
try:
is_phi = hasattr(phi, 'add_incoming')
except Exception:
is_phi = False
if not is_phi:
phi = b.phi(self.i64, name=f"phi_{dst_vid}")
self.vmap[dst_vid] = phi
# Wire incoming per CFG predecessor; map src_vid when provided
preds_raw = [p for p in self.preds.get(block_id, []) if p != block_id]
# Deduplicate while preserving order
seen = set()
preds_list: List[int] = []
for p in preds_raw:
if p not in seen:
preds_list.append(p)
seen.add(p)
# Helper: find the nearest immediate predecessor on a path decl_b -> ... -> block_id
def nearest_pred_on_path(decl_b: int) -> Optional[int]:
# BFS from decl_b to block_id; return the parent of block_id on that path.
from collections import deque
q = deque([decl_b])
visited = set([decl_b])
parent: Dict[int, Optional[int]] = {decl_b: None}
while q:
cur = q.popleft()
if cur == block_id:
par = parent.get(block_id)
return par if par in preds_list else None
for nx in succs.get(cur, []):
if nx not in visited:
visited.add(nx)
parent[nx] = cur
q.append(nx)
return None
# Precompute a non-self initial source (if present) to use for self-carry cases
init_src_vid: Optional[int] = None
for (b_decl0, v_src0) in incoming:
try:
vs0 = int(v_src0)
except Exception:
continue
if vs0 != int(dst_vid):
init_src_vid = vs0
break
elif str(val.type) == str(self.f64):
phi_type = self.f64
phi = b.phi(phi_type, name=f"phi_{dst_vid}")
for (val_id, pred_bid) in incoming:
pred_bb = self.bb_map.get(pred_bid)
if pred_bb is None:
continue
# Self-reference takes precedence regardless of snapshot
if val_id == dst_vid:
val = phi
else:
# Prefer resolver-driven localization at the end of the predecessor block
if hasattr(self, 'resolver') and self.resolver is not None:
# Pre-resolve declared incomings to nearest immediate predecessors
chosen: Dict[int, ir.Value] = {}
for (b_decl, v_src) in incoming:
try:
bd = int(b_decl); vs = int(v_src)
except Exception:
continue
pred_match = nearest_pred_on_path(bd)
if pred_match is None:
continue
# If self-carry is specified (vs == dst_vid), map to init_src_vid when available
if vs == int(dst_vid) and init_src_vid is not None:
vs = int(init_src_vid)
try:
val = self.resolver._value_at_end_i64(vs, pred_match, self.preds, self.block_end_values, self.vmap, self.bb_map)
except Exception:
val = None
if val is None:
val = ir.Constant(self.i64, 0)
chosen[pred_match] = val
# Fill remaining predecessors with dst carry or zero
for pred_bid in preds_list:
if pred_bid not in chosen:
try:
pred_block_obj = pred_bb
val = self.resolver.resolve_i64(val_id, pred_block_obj, self.preds, self.block_end_values, self.vmap, self.bb_map)
val = self.resolver._value_at_end_i64(dst_vid, pred_bid, self.preds, self.block_end_values, self.vmap, self.bb_map)
except Exception:
val = None
else:
# Snapshot fallback
snap = self.block_end_values.get(pred_bid, {})
# Special-case: incoming 0 means typed zero/null, not value-id 0
if isinstance(val_id, int) and val_id == 0:
val = None
else:
val = snap.get(val_id)
if val is None:
# Default based on phi type
if isinstance(phi_type, ir.IntType):
val = ir.Constant(phi_type, 0)
elif isinstance(phi_type, ir.DoubleType):
val = ir.Constant(phi_type, 0.0)
else:
val = ir.Constant(phi_type, None)
# Type adjust if needed
if hasattr(val, 'type') and val.type != phi_type:
# Insert cast in predecessor block before its terminator
pb = ir.IRBuilder(pred_bb)
try:
term = pred_bb.terminator
if term is not None:
pb.position_before(term)
else:
pb.position_at_end(pred_bb)
except Exception:
pb.position_at_end(pred_bb)
if isinstance(phi_type, ir.IntType) and hasattr(val, 'type') and isinstance(val.type, ir.PointerType):
val = pb.ptrtoint(val, phi_type, name=f"phi_p2i_{dst_vid}_{pred_bid}")
elif isinstance(phi_type, ir.PointerType) and hasattr(val, 'type') and isinstance(val.type, ir.IntType):
val = pb.inttoptr(val, phi_type, name=f"phi_i2p_{dst_vid}_{pred_bid}")
elif isinstance(phi_type, ir.IntType) and hasattr(val, 'type') and isinstance(val.type, ir.IntType):
if phi_type.width > val.type.width:
val = pb.zext(val, phi_type, name=f"phi_zext_{dst_vid}_{pred_bid}")
elif phi_type.width < val.type.width:
val = pb.trunc(val, phi_type, name=f"phi_trunc_{dst_vid}_{pred_bid}")
phi.add_incoming(val, pred_bb)
self.vmap[dst_vid] = phi
if val is None:
val = ir.Constant(self.i64, 0)
chosen[pred_bid] = val
# Finally add incomings (each predecessor at most once)
for pred_bid, val in chosen.items():
pred_bb = self.bb_map.get(pred_bid)
if pred_bb is None:
continue
phi.add_incoming(val, pred_bb)
# Tag dst as string-ish if any declared source was string-ish (post-lowering info)
try:
if hasattr(self.resolver, 'is_stringish') and hasattr(self.resolver, 'mark_string'):
any_str = False
for (_b_decl_i, v_src_i) in incoming:
try:
if self.resolver.is_stringish(int(v_src_i)):
any_str = True; break
except Exception:
pass
if any_str:
self.resolver.mark_string(int(dst_vid))
except Exception:
pass
# Clear legacy deferrals if any
try:
self.phi_deferrals.clear()
except Exception:
pass
def compile_to_object(self, output_path: str):
"""Compile module to object file"""

View File

@ -138,6 +138,9 @@ class PyVM:
out = float(vv)
elif ty == "string":
out = str(vv)
elif isinstance(ty, dict) and ty.get('kind') in ('handle','ptr') and ty.get('box_type') == 'StringBox':
# Treat handle/pointer-typed string constants as Python str for VM semantics
out = str(vv)
else:
out = None
self._set(regs, inst.get("dst"), out)

View File

@ -48,6 +48,8 @@ class Resolver:
# Lifetime hint: value_id -> set(block_id) where it's known to be defined
# Populated by the builder when available.
self.def_blocks = {}
# Optional: block -> { dst_vid -> [(pred_bid, val_vid), ...] } for PHIs from MIR JSON
self.block_phi_incomings = {}
def mark_string(self, value_id: int) -> None:
try:
@ -81,6 +83,23 @@ class Resolver:
return self.i64_cache[cache_key]
# Do not trust global vmap across blocks unless we know it's defined in this block.
# If this block has a declared MIR PHI for the value, prefer that placeholder
# and avoid creating any PHI here. Incoming is wired by finalize_phis().
try:
try:
block_id = int(str(current_block.name).replace('bb',''))
except Exception:
block_id = -1
if isinstance(self.block_phi_incomings, dict):
bmap = self.block_phi_incomings.get(block_id)
if isinstance(bmap, dict) and value_id in bmap:
existing_cur = vmap.get(value_id)
if existing_cur is not None and hasattr(existing_cur, 'add_incoming'):
self.i64_cache[cache_key] = existing_cur
return existing_cur
except Exception:
pass
# Get predecessor blocks
try:
@ -98,15 +117,36 @@ class Resolver:
existing = vmap.get(value_id)
if existing is not None and hasattr(existing, 'type') and isinstance(existing.type, ir.IntType) and existing.type.width == 64:
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
print(f"[VAL] reuse local v{value_id} in bb{bid}", flush=True)
print(f"[resolve] local reuse: bb{bid} v{value_id}", flush=True)
self.i64_cache[cache_key] = existing
return existing
else:
# Prefer a directly available SSA value from vmap同一ブロック直前定義の再利用
# def_blocks が未更新でも、vmap に存在するなら局所定義とみなす。
try:
existing = vmap.get(value_id)
except Exception:
existing = None
if existing is not None and hasattr(existing, 'type') and isinstance(existing.type, ir.IntType):
if existing.type.width == 64:
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
print(f"[resolve] vmap-fast reuse: bb{bid} v{value_id}", flush=True)
self.i64_cache[cache_key] = existing
return existing
else:
zextd = self.builder.zext(existing, self.i64) if self.builder is not None else ir.Constant(self.i64, 0)
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
print(f"[resolve] vmap-fast zext: bb{bid} v{value_id}", flush=True)
self.i64_cache[cache_key] = zextd
return zextd
if not pred_ids:
# Entry block or no predecessors: prefer local vmap value (already dominating)
base_val = vmap.get(value_id)
if base_val is None:
result = ir.Constant(self.i64, 0)
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] bb{bid} v{value_id} entry/no-preds → 0", flush=True)
else:
# If pointer string, box to handle in current block (use local builder)
if hasattr(base_val, 'type') and isinstance(base_val.type, ir.PointerType) and self.module is not None:
@ -135,41 +175,37 @@ class Resolver:
result = base_val if base_val.type.width == 64 else ir.Constant(self.i64, 0)
else:
result = ir.Constant(self.i64, 0)
elif len(pred_ids) == 1:
# Single-predecessor block: take predecessor end-of-block value directly
coerced = self._value_at_end_i64(value_id, pred_ids[0], preds, block_end_values, vmap, bb_map)
self.i64_cache[cache_key] = coerced
return coerced
else:
# Sealed SSA localization: create a PHI at the start of current block
# that merges i64-coerced snapshots from each predecessor. This guarantees
# dominance for downstream uses within the current block.
# Use shared builder so insertion order is respected relative to other instructions.
# Save current insertion point
sb = self.builder
if sb is None:
# As a conservative fallback, synthesize zero (should not happen in normal lowering)
result = ir.Constant(self.i64, 0)
self.i64_cache[cache_key] = result
return result
orig_block = sb.block
# Insert PHI at the very start of current_block
sb.position_at_start(current_block)
phi = sb.phi(self.i64, name=f"loc_i64_{value_id}")
for pred_id in pred_ids:
# Value at the end of predecessor, coerced to i64 within pred block
coerced = self._value_at_end_i64(value_id, pred_id, preds, block_end_values, vmap, bb_map)
pred_bb = bb_map.get(pred_id) if bb_map is not None else None
if pred_bb is None:
continue
phi.add_incoming(coerced, pred_bb)
# Restore insertion point to original location
# Multi-pred: if JSON declares a PHI for (current block, value_id),
# materialize it on-demand via end-of-block resolver. Otherwise, avoid
# synthesizing a localization PHI (return zero to preserve dominance).
try:
if orig_block is not None:
term = orig_block.terminator
if term is not None:
sb.position_before(term)
else:
sb.position_at_end(orig_block)
cur_bid = int(str(current_block.name).replace('bb',''))
except Exception:
pass
# Use the PHI value as the localized definition for this block
result = phi
cur_bid = -1
declared = False
try:
if isinstance(self.block_phi_incomings, dict):
m = self.block_phi_incomings.get(cur_bid)
if isinstance(m, dict) and value_id in m:
declared = True
except Exception:
declared = False
if declared:
# Return existing placeholder if present; do not create a new PHI here.
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] use placeholder PHI: bb{cur_bid} v{value_id}", flush=True)
placeholder = vmap.get(value_id)
result = placeholder if (placeholder is not None and hasattr(placeholder, 'add_incoming')) else ir.Constant(self.i64, 0)
else:
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] multi-pred no-declare: bb{cur_bid} v{value_id} -> 0", flush=True)
result = ir.Constant(self.i64, 0)
# Cache and return
self.i64_cache[cache_key] = result
@ -207,19 +243,43 @@ class Resolver:
bb_map: Optional[Dict[int, ir.Block]] = None,
_vis: Optional[set] = None) -> ir.Value:
"""Resolve value as i64 at the end of a given block by traversing predecessors if needed."""
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
try:
print(f"[resolve] end_i64 enter: bb{block_id} v{value_id}", flush=True)
except Exception:
pass
key = (block_id, value_id)
if key in self._end_i64_cache:
return self._end_i64_cache[key]
if _vis is None:
_vis = set()
if key in _vis:
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] cycle detected at end_i64(bb{block_id}, v{value_id}) → 0", flush=True)
return ir.Constant(self.i64, 0)
_vis.add(key)
# Do not synthesize PHIs here. Placeholders are created in the function prepass.
# If present in snapshot, coerce there
snap = block_end_values.get(block_id, {})
if value_id in snap and snap[value_id] is not None:
val = snap[value_id]
is_phi_val = False
try:
is_phi_val = hasattr(val, 'add_incoming')
except Exception:
is_phi_val = False
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
try:
ty = 'phi' if is_phi_val else ('ptr' if hasattr(val, 'type') and isinstance(val.type, ir.PointerType) else ('i'+str(getattr(val.type,'width','?')) if hasattr(val,'type') and isinstance(val.type, ir.IntType) else 'other'))
print(f"[resolve] snap hit: bb{block_id} v{value_id} type={ty}", flush=True)
except Exception:
pass
if is_phi_val:
# Using a dominating PHI placeholder as incoming is valid for finalize_phis
self._end_i64_cache[key] = val
return val
coerced = self._coerce_in_block_to_i64(val, block_id, bb_map)
self._end_i64_cache[key] = coerced
return coerced
@ -235,6 +295,9 @@ class Resolver:
# Do not use global vmap here; if not materialized by end of this block
# (or its preds), bail out with zero to preserve dominance.
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
preds_s = ','.join(str(x) for x in pred_ids)
print(f"[resolve] end_i64 miss: bb{block_id} v{value_id} preds=[{preds_s}] → 0", flush=True)
z = ir.Constant(self.i64, 0)
self._end_i64_cache[key] = z
return z
@ -242,7 +305,11 @@ class Resolver:
def _coerce_in_block_to_i64(self, val: Any, block_id: int, bb_map: Optional[Dict[int, ir.Block]]) -> ir.Value:
"""Ensure a value is available as i64 at the end of the given block by inserting casts/boxing there."""
if hasattr(val, 'type') and isinstance(val.type, ir.IntType):
# Re-materialize an i64 definition in the predecessor block to satisfy dominance
# If already i64, avoid re-materializing in predecessor block.
# Using a value defined in another block inside pred may violate dominance (e.g., self-referential PHIs).
if val.type.width == 64:
return val
# Otherwise, extend/truncate in predecessor block just before the terminator.
pred_bb = bb_map.get(block_id) if bb_map is not None else None
if pred_bb is None:
return ir.Constant(self.i64, 0)
@ -255,15 +322,10 @@ class Resolver:
pb.position_at_end(pred_bb)
except Exception:
pb.position_at_end(pred_bb)
if val.type.width == 64:
z = ir.Constant(self.i64, 0)
return pb.add(val, z, name=f"res_copy_{block_id}")
if val.type.width < 64:
return pb.zext(val, self.i64, name=f"res_zext_{block_id}")
else:
# Extend/truncate to i64 in pred block
if val.type.width < 64:
return pb.zext(val, self.i64, name=f"res_zext_{block_id}")
else:
return pb.trunc(val, self.i64, name=f"res_trunc_{block_id}")
return pb.trunc(val, self.i64, name=f"res_trunc_{block_id}")
if hasattr(val, 'type') and isinstance(val.type, ir.PointerType):
pred_bb = bb_map.get(block_id) if bb_map is not None else None
if pred_bb is None: