docs: update CURRENT_TASK with Box Theory PHI plan (defer/finalize) and MIR v0.5 type meta; add parity tooling and PyVM scaffolding

impl(pyvm/llvmlite):
- add tools/parity.sh; tools/pyvm_runner.py; src/llvm_py/pyvm/*
- emit string const as handle type in MIR JSON; add dst_type hints
- unify '+' to concat_hh with from_i64/from_i8_string bridges; console print via to_i8p_h
- add runtime bridges: nyash.box.from_i64, nyash.string.to_i8p_h

tests:
- add apps/tests/min_str_cat_loop (minimal repro for string cat loop)
This commit is contained in:
Selfhosting Dev
2025-09-14 04:51:33 +09:00
parent 658a0d46da
commit 3e07763af8
49 changed files with 1231 additions and 201 deletions

View File

@ -69,6 +69,7 @@ def lower_binop(
i8p = ir.IntType(8).as_pointer()
lhs_raw = vmap.get(lhs)
rhs_raw = vmap.get(rhs)
# Prefer handle pipeline to keep handles consistent across blocks/ret
# pointer present?
is_ptr_side = (hasattr(lhs_raw, 'type') and isinstance(lhs_raw.type, ir.PointerType)) or \
(hasattr(rhs_raw, 'type') and isinstance(rhs_raw.type, ir.PointerType))
@ -86,7 +87,7 @@ def lower_binop(
is_str = is_ptr_side or any_tagged
if is_str:
# Helper: convert raw or resolved value to string handle
def to_handle(raw, val, tag: str):
def to_handle(raw, val, tag: str, vid: int):
if raw is not None and hasattr(raw, 'type') and isinstance(raw.type, ir.PointerType):
# pointer-to-array -> GEP
try:
@ -104,11 +105,29 @@ def lower_binop(
return builder.call(cal, [raw], name=f"str_ptr2h_{tag}_{dst}")
# if already i64
if val is not None and hasattr(val, 'type') and isinstance(val.type, ir.IntType) and val.type.width == 64:
return val
# Distinguish handle vs numeric: if vid is tagged string-ish, treat as handle; otherwise box numeric to handle
is_tag = False
try:
if resolver is not None and hasattr(resolver, 'is_stringish'):
is_tag = resolver.is_stringish(vid)
except Exception:
is_tag = False
if is_tag:
return val
# Box numeric i64 to IntegerBox handle
cal = None
for f in builder.module.functions:
if f.name == 'nyash.box.from_i64':
cal = f; break
if cal is None:
cal = ir.Function(builder.module, ir.FunctionType(i64, [i64]), name='nyash.box.from_i64')
# Ensure value is i64
v64 = val if val.type.width == 64 else builder.zext(val, i64)
return builder.call(cal, [v64], name=f"int_i2h_{tag}_{dst}")
return ir.Constant(i64, 0)
hl = to_handle(lhs_raw, lhs_val, 'l')
hr = to_handle(rhs_raw, rhs_val, 'r')
hl = to_handle(lhs_raw, lhs_val, 'l', lhs)
hr = to_handle(rhs_raw, rhs_val, 'r', rhs)
# concat_hh(handle, handle) -> handle
hh_fnty = ir.FunctionType(i64, [i64, i64])
callee = None

View File

@ -131,6 +131,8 @@ def lower_boxcall(
try:
if resolver is not None and hasattr(resolver, 'mark_string'):
resolver.mark_string(dst_vid)
if resolver is not None and hasattr(resolver, 'string_ptrs'):
resolver.string_ptrs[int(dst_vid)] = p
except Exception:
pass
return
@ -196,27 +198,42 @@ def lower_boxcall(
return
if method_name in ("print", "println", "log"):
# Console mapping
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
arg0 = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else None
else:
arg0 = vmap.get(args[0]) if args else None
if arg0 is None:
arg0 = ir.Constant(i8p, None)
# Prefer handle API if arg is i64, else pointer API
if hasattr(arg0, 'type') and isinstance(arg0.type, ir.IntType) and arg0.type.width == 64:
# Optional runtime trace of the handle
import os as _os
if _os.environ.get('NYASH_LLVM_TRACE_FINAL') == '1':
trace = _declare(module, "nyash.debug.trace_handle", i64, [i64])
_ = builder.call(trace, [arg0], name="trace_handle")
callee = _declare(module, "nyash.console.log_handle", i64, [i64])
_ = builder.call(callee, [arg0], name="console_log_h")
else:
if hasattr(arg0, 'type') and isinstance(arg0.type, ir.IntType):
arg0 = builder.inttoptr(arg0, i8p)
# Console mapping (prefer pointer-API when possible to avoid handle registry mismatch)
use_ptr = False
arg0_vid = args[0] if args else None
arg0_ptr = None
if resolver is not None and hasattr(resolver, 'string_ptrs') and arg0_vid is not None:
try:
arg0_ptr = resolver.string_ptrs.get(int(arg0_vid))
if arg0_ptr is not None:
use_ptr = True
except Exception:
pass
if use_ptr and arg0_ptr is not None:
callee = _declare(module, "nyash.console.log", i64, [i8p])
_ = builder.call(callee, [arg0], name="console_log")
_ = builder.call(callee, [arg0_ptr], name="console_log_ptr")
else:
# Fallback: resolve i64 and prefer pointer API via to_i8p_h bridge
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
arg0 = resolver.resolve_i64(args[0], builder.block, preds, block_end_values, vmap, bb_map) if args else None
else:
arg0 = vmap.get(args[0]) if args else None
if arg0 is None:
arg0 = ir.Constant(i64, 0)
# If we have a handle (i64), convert to i8* via bridge and log via pointer API
if hasattr(arg0, 'type') and isinstance(arg0.type, ir.IntType):
if arg0.type.width != 64:
arg0 = builder.zext(arg0, i64)
bridge = _declare(module, "nyash.string.to_i8p_h", i8p, [i64])
p = builder.call(bridge, [arg0], name="str_h2p_for_log")
callee = _declare(module, "nyash.console.log", i64, [i8p])
_ = builder.call(callee, [p], name="console_log_p")
else:
# Non-integer value: coerce to i8* and log
if hasattr(arg0, 'type') and isinstance(arg0.type, ir.IntType):
arg0 = builder.inttoptr(arg0, i8p)
callee = _declare(module, "nyash.console.log", i64, [i8p])
_ = builder.call(callee, [arg0], name="console_log")
if dst_vid is not None:
vmap[dst_vid] = ir.Constant(i64, 0)
return

View File

@ -107,5 +107,18 @@ def lower_call(
'esc_json', 'node_json', 'dirname', 'join', 'read_all', 'toJson'
]):
resolver.mark_string(dst_vid)
# Additionally, create a pointer view via bridge for println pointer-API
if resolver is not None and hasattr(resolver, 'string_ptrs'):
i64 = ir.IntType(64)
i8p = ir.IntType(8).as_pointer()
if hasattr(result, 'type') and isinstance(result.type, ir.IntType) and result.type.width == 64:
bridge = None
for f in module.functions:
if f.name == 'nyash.string.to_i8p_h':
bridge = f; break
if bridge is None:
bridge = ir.Function(module, ir.FunctionType(i8p, [i64]), name='nyash.string.to_i8p_h')
pv = builder.call(bridge, [result], name=f"ret_h2p_{dst_vid}")
resolver.string_ptrs[int(dst_vid)] = pv
except Exception:
pass

View File

@ -39,7 +39,7 @@ def lower_const(
llvm_val = ir.Constant(f64, float(const_val))
vmap[dst] = llvm_val
elif const_type == 'string':
elif const_type == 'string' or (isinstance(const_type, dict) and const_type.get('kind') in ('handle','ptr') and const_type.get('box_type') == 'StringBox'):
# String constant - create global and immediately box to i64 handle
i8 = ir.IntType(8)
str_val = str(const_val)
@ -82,6 +82,11 @@ def lower_const(
# Mark this value-id as string-ish to guide '+' and '==' lowering
if hasattr(resolver, 'mark_string'):
resolver.mark_string(dst)
# Keep raw pointer for potential pointer-API sites (e.g., console.log)
try:
resolver.string_ptrs[dst] = gep
except Exception:
pass
elif const_type == 'void':
# Void/null constant - use i64 zero

View File

@ -58,6 +58,7 @@ def lower_phi(
# Collect incoming values
incoming_pairs: List[Tuple[ir.Block, ir.Value]] = []
used_default_zero = False
for block_id in actual_preds:
block = bb_map.get(block_id)
vid = incoming_map.get(block_id)
@ -76,6 +77,7 @@ def lower_phi(
if val is None:
# Missing incoming for this predecessor → default 0
val = ir.Constant(phi_type, 0)
used_default_zero = True
else:
# Snapshot fallback
if block_end_values is not None:
@ -86,6 +88,7 @@ def lower_phi(
if not val:
# Missing incoming for this predecessor → default 0
val = ir.Constant(phi_type, 0)
used_default_zero = True
# Coerce pointer to i64 at predecessor end
if hasattr(val, 'type') and val.type != phi_type:
pb = ir.IRBuilder(block)
@ -127,6 +130,16 @@ def lower_phi(
# Store PHI result
vmap[dst_vid] = phi
# Strict mode: fail fast on synthesized zeros (indicates incomplete incoming or dominance issue)
import os
if used_default_zero and os.environ.get('NYASH_LLVM_PHI_STRICT') == '1':
raise RuntimeError(f"[LLVM_PY] PHI dst={dst_vid} used synthesized zero; check preds/incoming")
if os.environ.get('NYASH_LLVM_TRACE_PHI') == '1':
try:
blkname = str(current_block.name)
except Exception:
blkname = '<blk>'
print(f"[PHI] {blkname} v{dst_vid} incoming={len(incoming_pairs)} zero={1 if used_default_zero else 0}")
# Propagate string-ness: if any incoming value-id is tagged string-ish, mark dst as string-ish.
try:
if resolver is not None and hasattr(resolver, 'is_stringish') and hasattr(resolver, 'mark_string'):

View File

@ -30,12 +30,36 @@ def lower_return(
builder.ret_void()
else:
# Get return value (prefer resolver)
ret_val = None
if resolver is not None and preds is not None and block_end_values is not None and bb_map is not None:
if isinstance(return_type, ir.PointerType):
ret_val = resolver.resolve_ptr(value_id, builder.block, preds, block_end_values, vmap)
else:
ret_val = resolver.resolve_i64(value_id, builder.block, preds, block_end_values, vmap, bb_map)
else:
try:
if isinstance(return_type, ir.PointerType):
ret_val = resolver.resolve_ptr(value_id, builder.block, preds, block_end_values, vmap)
else:
# Prefer pointer→handle reboxing for string-ish returns even if function return type is i64
is_stringish = False
if hasattr(resolver, 'is_stringish'):
try:
is_stringish = resolver.is_stringish(int(value_id))
except Exception:
is_stringish = False
if is_stringish and hasattr(resolver, 'string_ptrs') and int(value_id) in getattr(resolver, 'string_ptrs'):
# Re-box known string pointer to handle
p = resolver.string_ptrs[int(value_id)]
i8p = ir.IntType(8).as_pointer()
i64 = ir.IntType(64)
boxer = None
for f in builder.module.functions:
if f.name == 'nyash.box.from_i8_string':
boxer = f; break
if boxer is None:
boxer = ir.Function(builder.module, ir.FunctionType(i64, [i8p]), name='nyash.box.from_i8_string')
ret_val = builder.call(boxer, [p], name='ret_ptr2h')
else:
ret_val = resolver.resolve_i64(value_id, builder.block, preds, block_end_values, vmap, bb_map)
except Exception:
ret_val = None
if ret_val is None:
ret_val = vmap.get(value_id)
if not ret_val:
# Default based on return type