Files
hakorune/src/llvm_py/resolver.py
Selfhosting Dev 4c0e6726e3 🔧 refactor(llvm-py): Fix resolver PHI handling and add trace improvements
Changes to resolver.py:
- Improved PHI value tracking in _value_at_end_i64() (lines 268-285)
- Added trace logging for snap hits with PHI detection
- Fixed PHI placeholder reuse logic to preserve dominance
- PHI values now returned directly from snapshots when valid

Changes to llvm_builder.py:
- Fixed externcall instruction parsing (line 522: 'func' instead of 'name')
- Improved block snapshot tracing (line 439)
- Added PHI incoming metadata tracking (lines 316-376)
- Enhanced definition tracking for lifetime hints

This should help debug the string carry=0 issue in esc_dirname_smoke where
PHI values were being incorrectly coerced instead of preserved.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-14 16:25:21 +09:00

400 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Resolver API (Python version)
Based on src/backend/llvm/compiler/codegen/instructions/resolver.rs
"""
from typing import Dict, Optional, Any, Tuple
import os
import llvmlite.ir as ir
class Resolver:
"""
Centralized value resolution with per-block caching.
Following the Core Invariants from LLVM_LAYER_OVERVIEW.md:
- Resolver-only reads
- Localize at block start (PHI creation)
- Cache per (block, value) to avoid redundant PHIs
"""
def __init__(self, a, b=None):
"""Flexible init: either (builder, module) or (vmap, bb_map) for legacy wiring."""
if hasattr(a, 'position_at_end'):
# a is IRBuilder
self.builder = a
self.module = b
else:
# Legacy constructor (vmap, bb_map) — builder/module will be set later when available
self.builder = None
self.module = None
# Caches: (block_name, value_id) -> llvm value
self.i64_cache: Dict[Tuple[str, int], ir.Value] = {}
self.ptr_cache: Dict[Tuple[str, int], ir.Value] = {}
self.f64_cache: Dict[Tuple[str, int], ir.Value] = {}
# String literal map: value_id -> Python string (for by-name calls)
self.string_literals: Dict[int, str] = {}
# Optional: value_id -> i8* pointer for string constants (lower_const can populate)
self.string_ptrs: Dict[int, ir.Value] = {}
# Track value-ids that are known to represent string handles (i64)
# This is a best-effort tag used to decide '+' as string concat when both sides are i64.
self.string_ids: set[int] = set()
# Type shortcuts
self.i64 = ir.IntType(64)
self.i8p = ir.IntType(8).as_pointer()
self.f64_type = ir.DoubleType()
# Cache for recursive end-of-block i64 resolution
self._end_i64_cache: Dict[Tuple[int, int], ir.Value] = {}
# Lifetime hint: value_id -> set(block_id) where it's known to be defined
# Populated by the builder when available.
self.def_blocks = {}
# Optional: block -> { dst_vid -> [(pred_bid, val_vid), ...] } for PHIs from MIR JSON
self.block_phi_incomings = {}
def mark_string(self, value_id: int) -> None:
try:
self.string_ids.add(int(value_id))
except Exception:
pass
def is_stringish(self, value_id: int) -> bool:
try:
return int(value_id) in self.string_ids
except Exception:
return False
def resolve_i64(
self,
value_id: int,
current_block: ir.Block,
preds: Dict[int, list],
block_end_values: Dict[int, Dict[int, Any]],
vmap: Dict[int, Any],
bb_map: Optional[Dict[int, ir.Block]] = None
) -> ir.Value:
"""
Resolve a MIR value as i64 dominating the current block.
Creates PHI at block start if needed, caches the result.
"""
cache_key = (current_block.name, value_id)
# Check cache
if cache_key in self.i64_cache:
return self.i64_cache[cache_key]
# Do not trust global vmap across blocks unless we know it's defined in this block.
# If this block has a declared MIR PHI for the value, prefer that placeholder
# and avoid creating any PHI here. Incoming is wired by finalize_phis().
try:
try:
block_id = int(str(current_block.name).replace('bb',''))
except Exception:
block_id = -1
if isinstance(self.block_phi_incomings, dict):
bmap = self.block_phi_incomings.get(block_id)
if isinstance(bmap, dict) and value_id in bmap:
existing_cur = vmap.get(value_id)
if existing_cur is not None and hasattr(existing_cur, 'add_incoming'):
self.i64_cache[cache_key] = existing_cur
return existing_cur
except Exception:
pass
# Get predecessor blocks
try:
bid = int(str(current_block.name).replace('bb',''))
except Exception:
bid = -1
pred_ids = [p for p in preds.get(bid, []) if p != bid]
# Lifetime hint: if value is defined in this block, and present in vmap as i64, reuse it.
try:
defined_here = value_id in self.def_blocks and bid in self.def_blocks.get(value_id, set())
except Exception:
defined_here = False
if defined_here:
existing = vmap.get(value_id)
if existing is not None and hasattr(existing, 'type') and isinstance(existing.type, ir.IntType) and existing.type.width == 64:
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
print(f"[resolve] local reuse: bb{bid} v{value_id}", flush=True)
self.i64_cache[cache_key] = existing
return existing
else:
# Prefer a directly available SSA value from vmap同一ブロック直前定義の再利用
# def_blocks が未更新でも、vmap に存在するなら局所定義とみなす。
try:
existing = vmap.get(value_id)
except Exception:
existing = None
if existing is not None and hasattr(existing, 'type') and isinstance(existing.type, ir.IntType):
if existing.type.width == 64:
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
print(f"[resolve] vmap-fast reuse: bb{bid} v{value_id}", flush=True)
self.i64_cache[cache_key] = existing
return existing
else:
zextd = self.builder.zext(existing, self.i64) if self.builder is not None else ir.Constant(self.i64, 0)
if os.environ.get('NYASH_LLVM_TRACE_VALUES') == '1':
print(f"[resolve] vmap-fast zext: bb{bid} v{value_id}", flush=True)
self.i64_cache[cache_key] = zextd
return zextd
if not pred_ids:
# Entry block or no predecessors: prefer local vmap value (already dominating)
base_val = vmap.get(value_id)
if base_val is None:
result = ir.Constant(self.i64, 0)
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] bb{bid} v{value_id} entry/no-preds → 0", flush=True)
else:
# If pointer string, box to handle in current block (use local builder)
if hasattr(base_val, 'type') and isinstance(base_val.type, ir.PointerType) and self.module is not None:
pb = ir.IRBuilder(current_block)
try:
pb.position_at_start(current_block)
except Exception:
pass
i8p = ir.IntType(8).as_pointer()
v = base_val
try:
if hasattr(v.type, 'pointee') and isinstance(v.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
v = pb.gep(v, [c0, c0], name=f"res_gep_{value_id}")
except Exception:
pass
# declare and call boxer
for f in self.module.functions:
if f.name == 'nyash.box.from_i8_string':
box_from = f
break
else:
box_from = ir.Function(self.module, ir.FunctionType(self.i64, [i8p]), name='nyash.box.from_i8_string')
result = pb.call(box_from, [v], name=f"res_ptr2h_{value_id}")
elif hasattr(base_val, 'type') and isinstance(base_val.type, ir.IntType):
result = base_val if base_val.type.width == 64 else ir.Constant(self.i64, 0)
else:
result = ir.Constant(self.i64, 0)
elif len(pred_ids) == 1:
# Single-predecessor block: take predecessor end-of-block value directly
coerced = self._value_at_end_i64(value_id, pred_ids[0], preds, block_end_values, vmap, bb_map)
self.i64_cache[cache_key] = coerced
return coerced
else:
# Multi-pred: if JSON declares a PHI for (current block, value_id),
# materialize it on-demand via end-of-block resolver. Otherwise, avoid
# synthesizing a localization PHI (return zero to preserve dominance).
try:
cur_bid = int(str(current_block.name).replace('bb',''))
except Exception:
cur_bid = -1
declared = False
try:
if isinstance(self.block_phi_incomings, dict):
m = self.block_phi_incomings.get(cur_bid)
if isinstance(m, dict) and value_id in m:
declared = True
except Exception:
declared = False
if declared:
# Return existing placeholder if present; do not create a new PHI here.
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] use placeholder PHI: bb{cur_bid} v{value_id}", flush=True)
placeholder = vmap.get(value_id)
result = placeholder if (placeholder is not None and hasattr(placeholder, 'add_incoming')) else ir.Constant(self.i64, 0)
else:
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] multi-pred no-declare: bb{cur_bid} v{value_id} -> 0", flush=True)
result = ir.Constant(self.i64, 0)
# Cache and return
self.i64_cache[cache_key] = result
return result
def resolve_ptr(self, value_id: int, current_block: ir.Block,
preds: Dict, block_end_values: Dict, vmap: Dict) -> ir.Value:
"""Resolve as i8* pointer"""
cache_key = (current_block.name, value_id)
if cache_key in self.ptr_cache:
return self.ptr_cache[cache_key]
# Coerce current vmap value or GlobalVariable to i8*
val = vmap.get(value_id)
if val is None:
result = ir.Constant(self.i8p, None)
else:
if hasattr(val, 'type') and isinstance(val, ir.PointerType):
# If pointer to array (GlobalVariable), GEP to first element
ty = val.type.pointee if hasattr(val.type, 'pointee') else None
if ty is not None and hasattr(ty, 'element'):
c0 = ir.Constant(ir.IntType(32), 0)
result = self.builder.gep(val, [c0, c0], name=f"res_str_gep_{value_id}")
else:
result = val
elif hasattr(val, 'type') and isinstance(val.type, ir.IntType):
result = self.builder.inttoptr(val, self.i8p, name=f"res_i2p_{value_id}")
else:
# f64 or others -> zero
result = ir.Constant(self.i8p, None)
self.ptr_cache[cache_key] = result
return result
def _value_at_end_i64(self, value_id: int, block_id: int, preds: Dict[int, list],
block_end_values: Dict[int, Dict[int, Any]], vmap: Dict[int, Any],
bb_map: Optional[Dict[int, ir.Block]] = None,
_vis: Optional[set] = None) -> ir.Value:
"""Resolve value as i64 at the end of a given block by traversing predecessors if needed."""
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
try:
print(f"[resolve] end_i64 enter: bb{block_id} v{value_id}", flush=True)
except Exception:
pass
key = (block_id, value_id)
if key in self._end_i64_cache:
return self._end_i64_cache[key]
if _vis is None:
_vis = set()
if key in _vis:
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
print(f"[resolve] cycle detected at end_i64(bb{block_id}, v{value_id}) → 0", flush=True)
return ir.Constant(self.i64, 0)
_vis.add(key)
# Do not synthesize PHIs here. Placeholders are created in the function prepass.
# If present in snapshot, coerce there
snap = block_end_values.get(block_id, {})
if value_id in snap and snap[value_id] is not None:
val = snap[value_id]
is_phi_val = False
try:
is_phi_val = hasattr(val, 'add_incoming')
except Exception:
is_phi_val = False
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
try:
ty = 'phi' if is_phi_val else ('ptr' if hasattr(val, 'type') and isinstance(val.type, ir.PointerType) else ('i'+str(getattr(val.type,'width','?')) if hasattr(val,'type') and isinstance(val.type, ir.IntType) else 'other'))
print(f"[resolve] snap hit: bb{block_id} v{value_id} type={ty}", flush=True)
except Exception:
pass
if is_phi_val:
# Using a dominating PHI placeholder as incoming is valid for finalize_phis
self._end_i64_cache[key] = val
return val
coerced = self._coerce_in_block_to_i64(val, block_id, bb_map)
self._end_i64_cache[key] = coerced
return coerced
# Try recursively from predecessors
pred_ids = [p for p in preds.get(block_id, []) if p != block_id]
for p in pred_ids:
v = self._value_at_end_i64(value_id, p, preds, block_end_values, vmap, bb_map, _vis)
if v is not None:
self._end_i64_cache[key] = v
return v
# Do not use global vmap here; if not materialized by end of this block
# (or its preds), bail out with zero to preserve dominance.
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
preds_s = ','.join(str(x) for x in pred_ids)
print(f"[resolve] end_i64 miss: bb{block_id} v{value_id} preds=[{preds_s}] → 0", flush=True)
z = ir.Constant(self.i64, 0)
self._end_i64_cache[key] = z
return z
def _coerce_in_block_to_i64(self, val: Any, block_id: int, bb_map: Optional[Dict[int, ir.Block]]) -> ir.Value:
"""Ensure a value is available as i64 at the end of the given block by inserting casts/boxing there."""
if hasattr(val, 'type') and isinstance(val.type, ir.IntType):
# If already i64, avoid re-materializing in predecessor block.
# Using a value defined in another block inside pred may violate dominance (e.g., self-referential PHIs).
if val.type.width == 64:
return val
# Otherwise, extend/truncate in predecessor block just before the terminator.
pred_bb = bb_map.get(block_id) if bb_map is not None else None
if pred_bb is None:
return ir.Constant(self.i64, 0)
pb = ir.IRBuilder(pred_bb)
try:
term = pred_bb.terminator
if term is not None:
pb.position_before(term)
else:
pb.position_at_end(pred_bb)
except Exception:
pb.position_at_end(pred_bb)
if val.type.width < 64:
return pb.zext(val, self.i64, name=f"res_zext_{block_id}")
else:
return pb.trunc(val, self.i64, name=f"res_trunc_{block_id}")
if hasattr(val, 'type') and isinstance(val.type, ir.PointerType):
pred_bb = bb_map.get(block_id) if bb_map is not None else None
if pred_bb is None:
return ir.Constant(self.i64, 0)
pb = ir.IRBuilder(pred_bb)
try:
term = pred_bb.terminator
if term is not None:
pb.position_before(term)
else:
pb.position_at_end(pred_bb)
except Exception:
pb.position_at_end(pred_bb)
i8p = ir.IntType(8).as_pointer()
v = val
try:
if hasattr(v.type, 'pointee') and isinstance(v.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
v = pb.gep(v, [c0, c0], name=f"res_gep_{block_id}_{id(val)}")
except Exception:
pass
# declare boxer
box_from = None
for f in self.module.functions:
if f.name == 'nyash.box.from_i8_string':
box_from = f
break
if box_from is None:
box_from = ir.Function(self.module, ir.FunctionType(self.i64, [i8p]), name='nyash.box.from_i8_string')
return pb.call(box_from, [v], name=f"res_ptr2h_{block_id}")
return ir.Constant(self.i64, 0)
def resolve_f64(self, value_id: int, current_block: ir.Block,
preds: Dict, block_end_values: Dict, vmap: Dict) -> ir.Value:
"""Resolve as f64"""
cache_key = (current_block.name, value_id)
if cache_key in self.f64_cache:
return self.f64_cache[cache_key]
val = vmap.get(value_id)
if val is None:
result = ir.Constant(self.f64_type, 0.0)
else:
if hasattr(val, 'type') and val.type == self.f64_type:
result = val
elif hasattr(val, 'type') and isinstance(val.type, ir.IntType):
result = self.builder.sitofp(val, self.f64_type)
elif hasattr(val, 'type') and isinstance(val.type, ir.PointerType):
tmp = self.builder.ptrtoint(val, self.i64, name=f"res_p2i_{value_id}")
result = self.builder.sitofp(tmp, self.f64_type, name=f"res_i2f_{value_id}")
else:
result = ir.Constant(self.f64_type, 0.0)
self.f64_cache[cache_key] = result
return result
def _coerce_to_i64(self, val: Any) -> ir.Value:
"""Coerce various types to i64"""
if isinstance(val, ir.Constant) and val.type == self.i64:
return val
elif hasattr(val, 'type') and val.type.is_pointer:
# ptr to int
return self.builder.ptrtoint(val, self.i64, name=f"res_p2i_{getattr(val,'name','x')}") if self.builder is not None else ir.Constant(self.i64, 0)
elif hasattr(val, 'type') and isinstance(val.type, ir.IntType):
# int to int (extend/trunc)
if val.type.width < 64:
return self.builder.zext(val, self.i64) if self.builder is not None else ir.Constant(self.i64, 0)
elif val.type.width > 64:
return self.builder.trunc(val, self.i64) if self.builder is not None else ir.Constant(self.i64, 0)
return val
else:
# Default zero
return ir.Constant(self.i64, 0)