harness(llvm/py): fix PHI/dominance via Resolver-only; per-pred localization and constant GEPs; stabilize Main.esc_json/1, dirname/1, node_json/3; docs: add NYASH_LLVM_TRACE_FINAL and Resolver-only invariants

- Resolver-only reads across BBs; remove vmap fallbacks
- Create PHIs at block start; insert casts in preds before terminators
- Re-materialize int in preds to satisfy dominance (add/zext/trunc)
- Use constant GEP for method strings to avoid order dependency
- Order non-PHI lowering to preserve producer→consumer dominance
- Update docs: RESOLVER_API.md, LLVM_HARNESS.md
- compare_harness_on_off: ON/OFF exits match; linking green
This commit is contained in:
Selfhosting Dev
2025-09-13 19:49:03 +09:00
parent 1d6fab4eda
commit 2a9aa5368d
15 changed files with 646 additions and 214 deletions

View File

@ -76,81 +76,123 @@ def lower_boxcall(
# Minimal method bridging for strings and console
if method_name in ("length", "len"):
# Prefer handle-based len_h
# Any.length_h: Array/String/Map に対応
recv_h = _ensure_handle(builder, module, recv_val)
callee = _declare(module, "nyash.string.len_h", i64, [i64])
result = builder.call(callee, [recv_h], name="strlen_h")
callee = _declare(module, "nyash.any.length_h", i64, [i64])
result = builder.call(callee, [recv_h], name="any_length_h")
if dst_vid is not None:
vmap[dst_vid] = result
return
if method_name == "substring":
# substring(start, end) with pointer-based API
# substring(start, end)
# If receiver is a handle (i64), use handle-based helper; else pointer-based API
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
s = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else ir.Constant(i64, 0)
e = resolver.resolve_i64(args[1], builder.block, preds, block_end_values, vmap, bb_map) if len(args) > 1 else ir.Constant(i64, 0)
else:
s = vmap.get(args[0], ir.Constant(i64, 0)) if args else ir.Constant(i64, 0)
e = vmap.get(args[1], ir.Constant(i64, 0)) if len(args) > 1 else ir.Constant(i64, 0)
# Coerce recv to i8*
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.IntType):
recv_p = builder.inttoptr(recv_p, i8p, name="bc_i2p_recv")
elif hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv")
except Exception:
pass
if hasattr(recv_val, 'type') and isinstance(recv_val.type, ir.IntType):
# handle-based
callee = _declare(module, "nyash.string.substring_hii", i64, [i64, i64, i64])
h = builder.call(callee, [recv_val, s, e], name="substring_h")
if dst_vid is not None:
vmap[dst_vid] = h
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst_vid)
except Exception:
pass
return
else:
recv_p = ir.Constant(i8p, None)
# Coerce indices
if hasattr(s, 'type') and isinstance(s.type, ir.PointerType):
s = builder.ptrtoint(s, i64)
if hasattr(e, 'type') and isinstance(e.type, ir.PointerType):
e = builder.ptrtoint(e, i64)
callee = _declare(module, "nyash.string.substring_sii", i8p, [i8p, i64, i64])
p = builder.call(callee, [recv_p, s, e], name="substring")
# Return as handle across blocks (i8* -> i64 via nyash.box.from_i8_string)
conv_fnty = ir.FunctionType(i64, [i8p])
conv = _declare(module, "nyash.box.from_i8_string", i64, [i8p])
h = builder.call(conv, [p], name="str_ptr2h_sub")
if dst_vid is not None:
vmap[dst_vid] = h
return
# pointer-based
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv")
except Exception:
pass
else:
recv_p = ir.Constant(i8p, None)
# Coerce indices
if hasattr(s, 'type') and isinstance(s.type, ir.PointerType):
s = builder.ptrtoint(s, i64)
if hasattr(e, 'type') and isinstance(e.type, ir.PointerType):
e = builder.ptrtoint(e, i64)
callee = _declare(module, "nyash.string.substring_sii", i8p, [i8p, i64, i64])
p = builder.call(callee, [recv_p, s, e], name="substring")
conv = _declare(module, "nyash.box.from_i8_string", i64, [i8p])
h = builder.call(conv, [p], name="str_ptr2h_sub")
if dst_vid is not None:
vmap[dst_vid] = h
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst_vid)
except Exception:
pass
return
if method_name == "lastIndexOf":
# lastIndexOf(needle)
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
needle = resolver.resolve_ptr(args[0], builder.block, preds, block_end_values, vmap) if args else ir.Constant(i8p, None)
n_i64 = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else ir.Constant(i64, 0)
else:
needle = vmap.get(args[0], ir.Constant(i8p, None)) if args else ir.Constant(i8p, None)
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.IntType):
recv_p = builder.inttoptr(recv_p, i8p, name="bc_i2p_recv2")
elif hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv2")
except Exception:
pass
if hasattr(needle, 'type') and isinstance(needle.type, ir.IntType):
needle = builder.inttoptr(needle, i8p, name="bc_i2p_needle")
elif hasattr(needle, 'type') and isinstance(needle.type, ir.PointerType):
try:
if isinstance(needle.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
needle = builder.gep(needle, [c0, c0], name="bc_gep_needle")
except Exception:
pass
elif not hasattr(needle, 'type'):
needle = ir.Constant(i8p, None)
callee = _declare(module, "nyash.string.lastIndexOf_ss", i64, [i8p, i8p])
res = builder.call(callee, [recv_p, needle], name="lastIndexOf")
n_i64 = vmap.get(args[0], ir.Constant(i64, 0)) if args else ir.Constant(i64, 0)
if hasattr(recv_val, 'type') and isinstance(recv_val.type, ir.IntType):
# handle-based
callee = _declare(module, "nyash.string.lastIndexOf_hh", i64, [i64, i64])
res = builder.call(callee, [recv_val, n_i64], name="lastIndexOf_hh")
if dst_vid is not None:
vmap[dst_vid] = res
return
else:
# pointer-based
recv_p = recv_val
if hasattr(recv_p, 'type') and isinstance(recv_p.type, ir.PointerType):
try:
if isinstance(recv_p.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
recv_p = builder.gep(recv_p, [c0, c0], name="bc_gep_recv2")
except Exception:
pass
else:
recv_p = ir.Constant(i8p, None)
needle = n_i64
if hasattr(needle, 'type') and isinstance(needle.type, ir.IntType):
needle = builder.inttoptr(needle, i8p, name="bc_i2p_needle")
elif hasattr(needle, 'type') and isinstance(needle.type, ir.PointerType):
try:
if isinstance(needle.type.pointee, ir.ArrayType):
c0 = ir.Constant(ir.IntType(32), 0)
needle = builder.gep(needle, [c0, c0], name="bc_gep_needle")
except Exception:
pass
callee = _declare(module, "nyash.string.lastIndexOf_ss", i64, [i8p, i8p])
res = builder.call(callee, [recv_p, needle], name="lastIndexOf")
if dst_vid is not None:
vmap[dst_vid] = res
return
if method_name == "get":
# ArrayBox.get(index) → nyash.array.get_h(handle, idx)
recv_h = _ensure_handle(builder, module, recv_val)
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
idx = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else ir.Constant(i64, 0)
else:
idx = vmap.get(args[0], ir.Constant(i64, 0)) if args else ir.Constant(i64, 0)
callee = _declare(module, "nyash.array.get_h", i64, [i64, i64])
res = builder.call(callee, [recv_h, idx], name="arr_get_h")
if dst_vid is not None:
vmap[dst_vid] = res
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
# Heuristic: args array often stores strings for CLI; tag as string-ish
resolver.mark_string(dst_vid)
except Exception:
pass
return
if method_name in ("print", "println", "log"):
@ -174,6 +216,48 @@ def lower_boxcall(
vmap[dst_vid] = ir.Constant(i64, 0)
return
# Special: method on `me` (self) or static dispatch to Main.* → direct call to `Main.method/arity`
try:
cur_fn_name = str(builder.block.parent.name)
except Exception:
cur_fn_name = ''
# Heuristic: value-id 0 is often the implicit receiver for `me` in MIR
if box_vid == 0 and cur_fn_name.startswith('Main.'):
# Build target function name with arity
arity = len(args)
target = f"Main.{method_name}/{arity}"
# If module already has such function, prefer direct call
callee = None
for f in module.functions:
if f.name == target:
callee = f
break
if callee is not None:
a = []
for i, aid in enumerate(args):
raw = vmap.get(aid)
if raw is not None and hasattr(raw, 'type') and isinstance(raw.type, ir.PointerType):
aval = _ensure_handle(builder, module, raw)
else:
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
aval = resolver.resolve_i64(aid, builder.block, preds, block_end_values, vmap, bb_map)
else:
aval = vmap.get(aid, ir.Constant(ir.IntType(64), 0))
if hasattr(aval, 'type') and isinstance(aval.type, ir.PointerType):
aval = _ensure_handle(builder, module, aval)
elif hasattr(aval, 'type') and isinstance(aval.type, ir.IntType) and aval.type.width != 64:
aval = builder.zext(aval, ir.IntType(64)) if aval.type.width < 64 else builder.trunc(aval, ir.IntType(64))
a.append(aval)
res = builder.call(callee, a, name=f"call_self_{method_name}")
if dst_vid is not None:
vmap[dst_vid] = res
try:
if method_name in ("esc_json", "node_json", "dirname", "join", "read_all") and resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst_vid)
except Exception:
pass
return
# Default: invoke via NyRT by-name shim (runtime resolves method id)
recv_h = _ensure_handle(builder, module, recv_val)
# Build C string for method name
@ -195,7 +279,9 @@ def lower_boxcall(
g.global_constant = True
g.initializer = ir.Constant(arr_ty, bytearray(mbytes))
c0 = ir.Constant(ir.IntType(32), 0)
mptr = builder.gep(g, [c0, c0], inbounds=True)
# Compute GEP in the current block so it is naturally ordered before the call
# Use constant GEP so we don't depend on instruction ordering
mptr = ir.Constant.gep(g, (c0, c0))
# Up to 2 args for minimal path
argc = ir.Constant(i64, min(len(args), 2))