feat(llvm-py): Major breakthrough in Python LLVM backend! 🎉

 Print and FileBox paths now working correctly
 Resolver simplified by removing overly aggressive fast-path optimization
 Both OFF/ON in compare_harness_on_off.sh now use Python version
 String handle propagation issues resolved

Key changes:
- Removed instruction reordering in llvm_builder.py (respecting MIR order)
- Resolver now more conservative but reliable
- compare_harness_on_off.sh updated to use Python backend for both paths

This marks a major milestone towards Phase 15 self-hosting with Python/llvmlite!

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Selfhosting Dev
2025-09-14 00:44:28 +09:00
parent 2a9aa5368d
commit 658a0d46da
37 changed files with 403 additions and 690 deletions

View File

@ -62,7 +62,8 @@ def lower_binop(
return
# String-aware concatenation unified to handles (i64).
# Use concat_hh when either side is a pointer string OR tagged as string handle.
# Use concat_hh when either side is a pointer string OR either side is tagged as string handle
# (including literal strings and PHI-propagated tags).
if op == '+':
i64 = ir.IntType(64)
i8p = ir.IntType(8).as_pointer()
@ -71,14 +72,18 @@ def lower_binop(
# pointer present?
is_ptr_side = (hasattr(lhs_raw, 'type') and isinstance(lhs_raw.type, ir.PointerType)) or \
(hasattr(rhs_raw, 'type') and isinstance(rhs_raw.type, ir.PointerType))
# tagged string handles?両辺ともに string-ish のときのみ
both_tagged = False
# tagged string handles?どちらかが string-ish のとき)
any_tagged = False
try:
if resolver is not None and hasattr(resolver, 'is_stringish'):
both_tagged = resolver.is_stringish(lhs) and resolver.is_stringish(rhs)
if resolver is not None:
if hasattr(resolver, 'is_stringish'):
any_tagged = resolver.is_stringish(lhs) or resolver.is_stringish(rhs)
# literal strings are tracked separately
if not any_tagged and hasattr(resolver, 'string_literals'):
any_tagged = (lhs in resolver.string_literals) or (rhs in resolver.string_literals)
except Exception:
pass
is_str = is_ptr_side or both_tagged
is_str = is_ptr_side or any_tagged
if is_str:
# Helper: convert raw or resolved value to string handle
def to_handle(raw, val, tag: str):

View File

@ -205,6 +205,11 @@ def lower_boxcall(
arg0 = ir.Constant(i8p, None)
# Prefer handle API if arg is i64, else pointer API
if hasattr(arg0, 'type') and isinstance(arg0.type, ir.IntType) and arg0.type.width == 64:
# Optional runtime trace of the handle
import os as _os
if _os.environ.get('NYASH_LLVM_TRACE_FINAL') == '1':
trace = _declare(module, "nyash.debug.trace_handle", i64, [i64])
_ = builder.call(trace, [arg0], name="trace_handle")
callee = _declare(module, "nyash.console.log_handle", i64, [i64])
_ = builder.call(callee, [arg0], name="console_log_h")
else:
@ -221,8 +226,19 @@ def lower_boxcall(
cur_fn_name = str(builder.block.parent.name)
except Exception:
cur_fn_name = ''
# Heuristic: value-id 0 is often the implicit receiver for `me` in MIR
if box_vid == 0 and cur_fn_name.startswith('Main.'):
# Heuristic: MIR encodes `me` as a string literal "__me__" or sometimes value-id 0.
is_me = False
try:
if box_vid == 0:
is_me = True
# Prefer literal marker captured by resolver (from const lowering)
elif resolver is not None and hasattr(resolver, 'string_literals'):
lit = resolver.string_literals.get(box_vid)
if lit == "__me__":
is_me = True
except Exception:
pass
if is_me and cur_fn_name.startswith('Main.'):
# Build target function name with arity
arity = len(args)
target = f"Main.{method_name}/{arity}"
@ -300,3 +316,9 @@ def lower_boxcall(
result = builder.call(callee, [recv_h, mptr, argc, a1, a2], name="pinvoke_by_name")
if dst_vid is not None:
vmap[dst_vid] = result
# Heuristic tagging: common plugin methods returning strings
try:
if resolver is not None and hasattr(resolver, 'mark_string') and method_name in ("read", "dirname", "join"):
resolver.mark_string(dst_vid)
except Exception:
pass

View File

@ -40,7 +40,7 @@ def lower_const(
vmap[dst] = llvm_val
elif const_type == 'string':
# String constant - create global, store GlobalVariable (not GEP) to avoid dominance issues
# String constant - create global and immediately box to i64 handle
i8 = ir.IntType(8)
str_val = str(const_val)
str_bytes = str_val.encode('utf-8') + b'\0'
@ -61,8 +61,21 @@ def lower_const(
g.initializer = str_const
g.linkage = 'private'
g.global_constant = True
# Store the GlobalVariable; resolver.resolve_ptr will emit GEP in the current block
vmap[dst] = g
# GEP to first element and box to handle immediately
i32 = ir.IntType(32)
c0 = ir.Constant(i32, 0)
gep = builder.gep(g, [c0, c0], inbounds=True)
i8p = i8.as_pointer()
boxer_ty = ir.FunctionType(ir.IntType(64), [i8p])
boxer = None
for f in module.functions:
if f.name == 'nyash.box.from_i8_string':
boxer = f
break
if boxer is None:
boxer = ir.Function(module, boxer_ty, name='nyash.box.from_i8_string')
handle = builder.call(boxer, [gep], name=f"const_str_h_{dst}")
vmap[dst] = handle
if resolver is not None:
if hasattr(resolver, 'string_literals'):
resolver.string_literals[dst] = str_val

View File

@ -73,6 +73,9 @@ def lower_phi(
val = None
except Exception:
val = None
if val is None:
# Missing incoming for this predecessor → default 0
val = ir.Constant(phi_type, 0)
else:
# Snapshot fallback
if block_end_values is not None:
@ -124,6 +127,18 @@ def lower_phi(
# Store PHI result
vmap[dst_vid] = phi
# Propagate string-ness: if any incoming value-id is tagged string-ish, mark dst as string-ish.
try:
if resolver is not None and hasattr(resolver, 'is_stringish') and hasattr(resolver, 'mark_string'):
for val_id, _b in incoming:
try:
if resolver.is_stringish(val_id):
resolver.mark_string(dst_vid)
break
except Exception:
pass
except Exception:
pass
def defer_phi_wiring(
dst_vid: int,