harness(llvm/py): fix PHI/dominance via Resolver-only; per-pred localization and constant GEPs; stabilize Main.esc_json/1, dirname/1, node_json/3; docs: add NYASH_LLVM_TRACE_FINAL and Resolver-only invariants

- Resolver-only reads across BBs; remove vmap fallbacks
- Create PHIs at block start; insert casts in preds before terminators
- Re-materialize int in preds to satisfy dominance (add/zext/trunc)
- Use constant GEP for method strings to avoid order dependency
- Order non-PHI lowering to preserve producer→consumer dominance
- Update docs: RESOLVER_API.md, LLVM_HARNESS.md
- compare_harness_on_off: ON/OFF exits match; linking green
This commit is contained in:
Selfhosting Dev
2025-09-13 19:49:03 +09:00
parent 1d6fab4eda
commit 2a9aa5368d
15 changed files with 646 additions and 214 deletions

View File

@ -45,17 +45,40 @@ def lower_binop(
# Relational/equality operators delegate to compare
if op in ('==','!=','<','>','<=','>='):
lower_compare(builder, op, lhs, rhs, dst, vmap)
# Delegate to compare with resolver/preds context to maintain dominance via localization
lower_compare(
builder,
op,
lhs,
rhs,
dst,
vmap,
resolver=resolver,
current_block=current_block,
preds=preds,
block_end_values=block_end_values,
bb_map=bb_map,
)
return
# String-aware concatenation unified to handles (i64) when any side is pointer string
# String-aware concatenation unified to handles (i64).
# Use concat_hh when either side is a pointer string OR tagged as string handle.
if op == '+':
i64 = ir.IntType(64)
i8p = ir.IntType(8).as_pointer()
lhs_raw = vmap.get(lhs)
rhs_raw = vmap.get(rhs)
is_str = (hasattr(lhs_raw, 'type') and isinstance(lhs_raw.type, ir.PointerType)) or \
(hasattr(rhs_raw, 'type') and isinstance(rhs_raw.type, ir.PointerType))
# pointer present?
is_ptr_side = (hasattr(lhs_raw, 'type') and isinstance(lhs_raw.type, ir.PointerType)) or \
(hasattr(rhs_raw, 'type') and isinstance(rhs_raw.type, ir.PointerType))
# tagged string handles?(両辺ともに string-ish のときのみ)
both_tagged = False
try:
if resolver is not None and hasattr(resolver, 'is_stringish'):
both_tagged = resolver.is_stringish(lhs) and resolver.is_stringish(rhs)
except Exception:
pass
is_str = is_ptr_side or both_tagged
if is_str:
# Helper: convert raw or resolved value to string handle
def to_handle(raw, val, tag: str):
@ -91,6 +114,12 @@ def lower_binop(
callee = ir.Function(builder.module, hh_fnty, name='nyash.string.concat_hh')
res = builder.call(callee, [hl, hr], name=f"concat_hh_{dst}")
vmap[dst] = res
# Tag result as string handle so subsequent '+' stays in string domain
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst)
except Exception:
pass
return
# Ensure both are i64

View File

@ -76,81 +76,123 @@ def lower_boxcall(
# Minimal method bridging for strings and console
if method_name in ("length", "len"):
# Prefer handle-based len_h
# Any.length_h: Array/String/Map に対応
recv_h = _ensure_handle(builder, module, recv_val)
callee = _declare(module, "nyash.string.len_h", i64, [i64])
result = builder.call(callee, [recv_h], name="strlen_h")
callee = _declare(module, "nyash.any.length_h", i64, [i64])
result = builder.call(callee, [recv_h], name="any_length_h")
if dst_vid is not None:
vmap[dst_vid] = result
return
if method_name == "substring":
# substring(start, end) with pointer-based API
# substring(start, end)
# If receiver is a handle (i64), use handle-based helper; else pointer-based API
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
s = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else ir.Constant(i64, 0)
e = resolver.resolve_i64(args[1], builder.block, preds, block_end_values, vmap, bb_map) if len(args) > 1 else ir.Constant(i64, 0)
else:
s = vmap.get(args[0], ir.Constant(i64, 0)) if args else ir.Constant(i64, 0)
e = vmap.get(args[1], ir.Constant(i64, 0)) if len(args) > 1 else ir.Constant(i64, 0)
# Coerce recv to i8*
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.IntType):
recv_p = builder.inttoptr(recv_p, i8p, name="bc_i2p_recv")
elif hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv")
except Exception:
pass
if hasattr(recv_val, 'type') and isinstance(recv_val.type, ir.IntType):
# handle-based
callee = _declare(module, "nyash.string.substring_hii", i64, [i64, i64, i64])
h = builder.call(callee, [recv_val, s, e], name="substring_h")
if dst_vid is not None:
vmap[dst_vid] = h
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst_vid)
except Exception:
pass
return
else:
recv_p = ir.Constant(i8p, None)
# Coerce indices
if hasattr(s, 'type') and isinstance(s.type, ir.PointerType):
s = builder.ptrtoint(s, i64)
if hasattr(e, 'type') and isinstance(e.type, ir.PointerType):
e = builder.ptrtoint(e, i64)
callee = _declare(module, "nyash.string.substring_sii", i8p, [i8p, i64, i64])
p = builder.call(callee, [recv_p, s, e], name="substring")
# Return as handle across blocks (i8* -> i64 via nyash.box.from_i8_string)
conv_fnty = ir.FunctionType(i64, [i8p])
conv = _declare(module, "nyash.box.from_i8_string", i64, [i8p])
h = builder.call(conv, [p], name="str_ptr2h_sub")
if dst_vid is not None:
vmap[dst_vid] = h
return
# pointer-based
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv")
except Exception:
pass
else:
recv_p = ir.Constant(i8p, None)
# Coerce indices
if hasattr(s, 'type') and isinstance(s.type, ir.PointerType):
s = builder.ptrtoint(s, i64)
if hasattr(e, 'type') and isinstance(e.type, ir.PointerType):
e = builder.ptrtoint(e, i64)
callee = _declare(module, "nyash.string.substring_sii", i8p, [i8p, i64, i64])
p = builder.call(callee, [recv_p, s, e], name="substring")
conv = _declare(module, "nyash.box.from_i8_string", i64, [i8p])
h = builder.call(conv, [p], name="str_ptr2h_sub")
if dst_vid is not None:
vmap[dst_vid] = h
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst_vid)
except Exception:
pass
return
if method_name == "lastIndexOf":
# lastIndexOf(needle)
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
needle = resolver.resolve_ptr(args[0], builder.block, preds, block_end_values, vmap) if args else ir.Constant(i8p, None)
n_i64 = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else ir.Constant(i64, 0)
else:
needle = vmap.get(args[0], ir.Constant(i8p, None)) if args else ir.Constant(i8p, None)
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.IntType):
recv_p = builder.inttoptr(recv_p, i8p, name="bc_i2p_recv2")
elif hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv2")
except Exception:
pass
if hasattr(needle, 'type') and isinstance(needle.type, ir.IntType):
needle = builder.inttoptr(needle, i8p, name="bc_i2p_needle")
elif hasattr(needle, 'type') and isinstance(needle.type, ir.PointerType):
try:
if isinstance(needle.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
needle = builder.gep(needle, [c0, c0], name="bc_gep_needle")
except Exception:
pass
elif not hasattr(needle, 'type'):
needle = ir.Constant(i8p, None)
callee = _declare(module, "nyash.string.lastIndexOf_ss", i64, [i8p, i8p])
res = builder.call(callee, [recv_p, needle], name="lastIndexOf")
n_i64 = vmap.get(args[0], ir.Constant(i64, 0)) if args else ir.Constant(i64, 0)
if hasattr(recv_val, 'type') and isinstance(recv_val.type, ir.IntType):
# handle-based
callee = _declare(module, "nyash.string.lastIndexOf_hh", i64, [i64, i64])
res = builder.call(callee, [recv_val, n_i64], name="lastIndexOf_hh")
if dst_vid is not None:
vmap[dst_vid] = res
return
else:
# pointer-based
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv2")
except Exception:
pass
else:
recv_p = ir.Constant(i8p, None)
needle = n_i64
if hasattr(needle, 'type') and isinstance(needle.type, ir.IntType):
needle = builder.inttoptr(needle, i8p, name="bc_i2p_needle")
elif hasattr(needle, 'type') and isinstance(needle.type, ir.PointerType):
try:
if isinstance(needle.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
needle = builder.gep(needle, [c0, c0], name="bc_gep_needle")
except Exception:
pass
callee = _declare(module, "nyash.string.lastIndexOf_ss", i64, [i8p, i8p])
res = builder.call(callee, [recv_p, needle], name="lastIndexOf")
if dst_vid is not None:
vmap[dst_vid] = res
return
if method_name == "get":
# ArrayBox.get(index) → nyash.array.get_h(handle, idx)
recv_h = _ensure_handle(builder, module, recv_val)
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
idx = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else ir.Constant(i64, 0)
else:
idx = vmap.get(args[0], ir.Constant(i64, 0)) if args else ir.Constant(i64, 0)
callee = _declare(module, "nyash.array.get_h", i64, [i64, i64])
res = builder.call(callee, [recv_h, idx], name="arr_get_h")
if dst_vid is not None:
vmap[dst_vid] = res
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
# Heuristic: args array often stores strings for CLI; tag as string-ish
resolver.mark_string(dst_vid)
except Exception:
pass
return
if method_name in ("print", "println", "log"):
@ -174,6 +216,48 @@ def lower_boxcall(
vmap[dst_vid] = ir.Constant(i64, 0)
return
# Special: method on `me` (self) or static dispatch to Main.* → direct call to `Main.method/arity`
try:
cur_fn_name = str(builder.block.parent.name)
except Exception:
cur_fn_name = ''
# Heuristic: value-id 0 is often the implicit receiver for `me` in MIR
if box_vid == 0 and cur_fn_name.startswith('Main.'):
# Build target function name with arity
arity = len(args)
target = f"Main.{method_name}/{arity}"
# If module already has such function, prefer direct call
callee = None
for f in module.functions:
if f.name == target:
callee = f
break
if callee is not None:
a = []
for i, aid in enumerate(args):
raw = vmap.get(aid)
if raw is not None and hasattr(raw, 'type') and isinstance(raw.type, ir.PointerType):
aval = _ensure_handle(builder, module, raw)
else:
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
aval = resolver.resolve_i64(aid, builder.block, preds, block_end_values, vmap, bb_map)
else:
aval = vmap.get(aid, ir.Constant(ir.IntType(64), 0))
if hasattr(aval, 'type') and isinstance(aval.type, ir.PointerType):
aval = _ensure_handle(builder, module, aval)
elif hasattr(aval, 'type') and isinstance(aval.type, ir.IntType) and aval.type.width != 64:
aval = builder.zext(aval, ir.IntType(64)) if aval.type.width < 64 else builder.trunc(aval, ir.IntType(64))
a.append(aval)
res = builder.call(callee, a, name=f"call_self_{method_name}")
if dst_vid is not None:
vmap[dst_vid] = res
try:
if method_name in ("esc_json", "node_json", "dirname", "join", "read_all") and resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst_vid)
except Exception:
pass
return
# Default: invoke via NyRT by-name shim (runtime resolves method id)
recv_h = _ensure_handle(builder, module, recv_val)
# Build C string for method name
@ -195,7 +279,9 @@ def lower_boxcall(
g.global_constant = True
g.initializer = ir.Constant(arr_ty, bytearray(mbytes))
c0 = ir.Constant(ir.IntType(32), 0)
mptr = builder.gep(g, [c0, c0], inbounds=True)
# Compute GEP in the current block so it is naturally ordered before the call
# Use constant GEP so we don't depend on instruction ordering
mptr = ir.Constant.gep(g, (c0, c0))
# Up to 2 args for minimal path
argc = ir.Constant(i64, min(len(args), 2))

View File

@ -87,7 +87,25 @@ def lower_call(
# Make the call
result = builder.call(func, call_args, name=f"call_{func_name}")
# Optional trace for final debugging
try:
import os
if os.environ.get('NYASH_LLVM_TRACE_FINAL') == '1' and isinstance(actual_name, str):
if actual_name in ("Main.node_json/3", "Main.esc_json/1", "main"):
print(f"[TRACE] call {actual_name} args={len(call_args)}", flush=True)
except Exception:
pass
# Store result if needed
if dst_vid is not None:
vmap[dst_vid] = result
# Heuristic: mark known string-producing functions as string handles
try:
name_for_tag = actual_name if isinstance(actual_name, str) else str(actual_name)
if resolver is not None and hasattr(resolver, 'mark_string'):
if any(key in name_for_tag for key in [
'esc_json', 'node_json', 'dirname', 'join', 'read_all', 'toJson'
]):
resolver.mark_string(dst_vid)
except Exception:
pass

View File

@ -42,10 +42,20 @@ def lower_compare(
i64 = ir.IntType(64)
i8p = ir.IntType(8).as_pointer()
# String-aware equality: if both are pointers, assume i8* strings
if op in ('==','!=') and hasattr(lhs_val, 'type') and hasattr(rhs_val, 'type'):
if isinstance(lhs_val.type, ir.PointerType) and isinstance(rhs_val.type, ir.PointerType):
# Box both to handles and call nyash.string.eq_hh
# String-aware equality: if either side is a pointer or tagged as string-ish, compare via eq_hh
if op in ('==','!='):
lhs_ptr = hasattr(lhs_val, 'type') and isinstance(lhs_val.type, ir.PointerType)
rhs_ptr = hasattr(rhs_val, 'type') and isinstance(rhs_val.type, ir.PointerType)
lhs_tag = False
rhs_tag = False
try:
if resolver is not None and hasattr(resolver, 'is_stringish'):
lhs_tag = resolver.is_stringish(lhs)
rhs_tag = resolver.is_stringish(rhs)
except Exception:
pass
if lhs_ptr or rhs_ptr or lhs_tag or rhs_tag:
# Convert both to handles (i64) then nyash.string.eq_hh
# nyash.box.from_i8_string(i8*) -> i64
box_from = None
for f in builder.module.functions:
@ -54,9 +64,18 @@ def lower_compare(
break
if not box_from:
box_from = ir.Function(builder.module, ir.FunctionType(i64, [i8p]), name='nyash.box.from_i8_string')
lh = builder.call(box_from, [lhs_val], name='lhs_ptr2h')
rh = builder.call(box_from, [rhs_val], name='rhs_ptr2h')
def to_h(v):
if hasattr(v, 'type') and isinstance(v.type, ir.PointerType):
return builder.call(box_from, [v])
else:
# assume i64 handle or number; zext/trunc to i64 if needed
if hasattr(v, 'type') and isinstance(v.type, ir.IntType) and v.type.width != 64:
return builder.zext(v, i64) if v.type.width < 64 else builder.trunc(v, i64)
if hasattr(v, 'type') and isinstance(v.type, ir.PointerType):
return builder.ptrtoint(v, i64)
return v if hasattr(v, 'type') else ir.Constant(i64, 0)
lh = to_h(lhs_val)
rh = to_h(rhs_val)
eqf = None
for f in builder.module.functions:
if f.name == 'nyash.string.eq_hh':
@ -68,7 +87,6 @@ def lower_compare(
if op == '==':
vmap[dst] = eq
else:
# ne = 1 - eq
one = ir.Constant(i64, 1)
ne = builder.sub(one, eq, name='str_ne')
vmap[dst] = ne

View File

@ -63,8 +63,12 @@ def lower_const(
g.global_constant = True
# Store the GlobalVariable; resolver.resolve_ptr will emit GEP in the current block
vmap[dst] = g
if resolver is not None and hasattr(resolver, 'string_literals'):
resolver.string_literals[dst] = str_val
if resolver is not None:
if hasattr(resolver, 'string_literals'):
resolver.string_literals[dst] = str_val
# Mark this value-id as string-ish to guide '+' and '==' lowering
if hasattr(resolver, 'mark_string'):
resolver.mark_string(dst)
elif const_type == 'void':
# Void/null constant - use i64 zero

View File

@ -45,6 +45,8 @@ def lower_externcall(
"nyash.string.charCodeAt_h": (i64, [i64, i64]),
"nyash.string.concat_hh": (i64, [i64, i64]),
"nyash.string.eq_hh": (i64, [i64, i64]),
"nyash.string.substring_hii": (i64, [i64, i64, i64]),
"nyash.string.lastIndexOf_hh": (i64, [i64, i64]),
# Strings (pointer-based plugin functions)
"nyash.string.concat_ss": (i8p, [i8p, i8p]),
"nyash.string.concat_si": (i8p, [i8p, i64]),

View File

@ -34,20 +34,9 @@ def lower_phi(
vmap[dst_vid] = ir.Constant(ir.IntType(64), 0)
return
# Determine PHI type from snapshots or fallback i64
# Use i64 for PHI to carry handles across blocks (strings/boxes),
# avoiding pointer PHIs that complicate dominance and boxing.
phi_type = ir.IntType(64)
if block_end_values is not None:
for val_id, pred_bid in incoming:
snap = block_end_values.get(pred_bid, {})
val = snap.get(val_id)
if val is not None and hasattr(val, 'type'):
phi_type = val.type
# Prefer pointer type
if hasattr(phi_type, 'is_pointer') and phi_type.is_pointer:
break
# Create PHI instruction
phi = builder.phi(phi_type, name=f"phi_{dst_vid}")
# Build map from provided incoming
incoming_map: Dict[int, int] = {}
@ -67,60 +56,71 @@ def lower_phi(
# Fallback: use blocks in incoming list
actual_preds = [b for _, b in incoming]
# Add incoming for each actual predecessor
# Collect incoming values
incoming_pairs: List[Tuple[ir.Block, ir.Value]] = []
for block_id in actual_preds:
block = bb_map.get(block_id)
# Prefer pred snapshot
if block_end_values is not None:
snap = block_end_values.get(block_id, {})
vid = incoming_map.get(block_id)
val = snap.get(vid) if vid is not None else None
else:
vid = incoming_map.get(block_id)
val = vmap.get(vid) if vid is not None else None
if not val:
# Create default value based on type
if isinstance(phi_type, ir.IntType):
val = ir.Constant(phi_type, 0)
elif isinstance(phi_type, ir.DoubleType):
val = ir.Constant(phi_type, 0.0)
else:
# Pointer type - null
val = ir.Constant(phi_type, None)
if not block:
# Skip if block not found
vid = incoming_map.get(block_id)
if block is None:
continue
# Type conversion if needed
if hasattr(val, 'type') and val.type != phi_type:
# Position at end (before terminator) of predecessor block
pb = ir.IRBuilder(block)
# Prefer resolver-driven localization per predecessor block to satisfy dominance
if vid is not None and resolver is not None and bb_map is not None:
try:
term = block.terminator
if term is not None:
pb.position_before(term)
pred_block_obj = bb_map.get(block_id)
if pred_block_obj is not None and hasattr(resolver, 'resolve_i64'):
val = resolver.resolve_i64(vid, pred_block_obj, preds_map or {}, block_end_values or {}, vmap, bb_map)
else:
pb.position_at_end(block)
val = None
except Exception:
pb.position_at_end(block)
# Convert types
if isinstance(phi_type, ir.IntType) and val.type.is_pointer:
val = pb.ptrtoint(val, phi_type, name=f"cast_p2i_{val_id}")
elif phi_type.is_pointer and isinstance(val.type, ir.IntType):
val = pb.inttoptr(val, phi_type, name=f"cast_i2p_{val_id}")
elif isinstance(phi_type, ir.IntType) and isinstance(val.type, ir.IntType):
# Int to int
if phi_type.width > val.type.width:
val = pb.zext(val, phi_type, name=f"zext_{val_id}")
else:
val = pb.trunc(val, phi_type, name=f"trunc_{val_id}")
# Add to PHI (skip if no block)
if block is not None:
phi.add_incoming(val, block)
val = None
else:
# Snapshot fallback
if block_end_values is not None:
snap = block_end_values.get(block_id, {})
val = snap.get(vid) if vid is not None else None
else:
val = vmap.get(vid) if vid is not None else None
if not val:
# Missing incoming for this predecessor → default 0
val = ir.Constant(phi_type, 0)
# Coerce pointer to i64 at predecessor end
if hasattr(val, 'type') and val.type != phi_type:
pb = ir.IRBuilder(block)
try:
term = block.terminator
if term is not None:
pb.position_before(term)
else:
pb.position_at_end(block)
except Exception:
pb.position_at_end(block)
if isinstance(phi_type, ir.IntType) and val.type.is_pointer:
i8p = ir.IntType(8).as_pointer()
try:
if hasattr(val.type, 'pointee') and isinstance(val.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
val = pb.gep(val, [c0, c0], name=f"phi_gep_{vid}")
except Exception:
pass
boxer = None
for f in builder.module.functions:
if f.name == 'nyash.box.from_i8_string':
boxer = f
break
if boxer is None:
boxer = ir.Function(builder.module, ir.FunctionType(ir.IntType(64), [i8p]), name='nyash.box.from_i8_string')
val = pb.call(boxer, [val], name=f"phi_ptr2h_{vid}")
incoming_pairs.append((block, val))
# If nothing collected, use zero constant and bail out
if not incoming_pairs:
vmap[dst_vid] = ir.Constant(phi_type, 0)
return
# Create PHI instruction now and add incoming
phi = builder.phi(phi_type, name=f"phi_{dst_vid}")
for block, val in incoming_pairs:
phi.add_incoming(val, block)
# Store PHI result
vmap[dst_vid] = phi