Phase 21.7 normalization: optimization pre-work + bench harness expansion

- Add opt-in optimizations (defaults OFF)
  - Ret purity verifier: NYASH_VERIFY_RET_PURITY=1
  - strlen FAST enhancement for const handles
  - FAST_INT gate for same-BB SSA optimization
  - length cache for string literals in llvmlite
- Expand bench harness (tools/perf/microbench.sh)
  - Add branch/call/stringchain/arraymap/chip8/kilo cases
  - Auto-calculate ratio vs C reference
  - Document in benchmarks/README.md
- Compiler health improvements
  - Unify PHI insertion to insert_phi_at_head()
  - Add NYASH_LLVM_SKIP_BUILD=1 for build reuse
- Runtime & safety enhancements
  - Clarify Rust/Hako ownership boundaries
  - Strengthen receiver localization (LocalSSA/pin/after-PHIs)
  - Stop excessive PluginInvoke→BoxCall rewrites
- Update CURRENT_TASK.md, docs, and canaries

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-11-13 16:40:58 +09:00
parent 9e2fa1e36e
commit dda65b94b7
160 changed files with 6773 additions and 1692 deletions

View File

@ -62,6 +62,14 @@ def lower_function(builder, func_data: Dict[str, Any]):
builder.resolver.string_literals.clear()
if hasattr(builder.resolver, 'string_ptrs'):
builder.resolver.string_ptrs.clear()
if hasattr(builder.resolver, 'length_cache'):
builder.resolver.length_cache.clear()
# Also clear newbox→string-arg hints per function to avoid leakage
try:
if hasattr(builder.resolver, 'newbox_string_args') and isinstance(builder.resolver.newbox_string_args, dict):
builder.resolver.newbox_string_args.clear()
except Exception:
pass
except Exception:
pass

View File

@ -6,8 +6,30 @@ Handles +, -, *, /, %, &, |, ^, <<, >>
import llvmlite.ir as ir
from typing import Dict, Optional, Any
from utils.values import resolve_i64_strict
import os
from .compare import lower_compare
import llvmlite.ir as ir
def _canonicalize_i64(builder: ir.IRBuilder, value, vid, vmap: Dict[int, ir.Value], hint: str):
"""Normalize integers/pointers to i64 and cache per value id for FAST_INT paths."""
if value is None:
return None
target = ir.IntType(64)
try:
vtype = value.type
except Exception:
vtype = None
if isinstance(vtype, ir.PointerType):
value = builder.ptrtoint(value, target, name=f"{hint}_p2i_{vid}")
elif isinstance(vtype, ir.IntType):
width = vtype.width
if width < 64:
value = builder.zext(value, target, name=f"{hint}_zext_{vid}")
elif width > 64:
value = builder.trunc(value, target, name=f"{hint}_trunc_{vid}")
if isinstance(vid, int):
vmap[vid] = value
return value
def lower_binop(
builder: ir.IRBuilder,
@ -37,10 +59,20 @@ def lower_binop(
vmap: Value map
current_block: Current basic block
"""
# Resolve operands as i64 (using resolver when available)
# For now, simple vmap lookup
lhs_val = resolve_i64_strict(resolver, lhs, current_block, preds, block_end_values, vmap, bb_map)
rhs_val = resolve_i64_strict(resolver, rhs, current_block, preds, block_end_values, vmap, bb_map)
# Resolve operands as i64
fast_int = os.environ.get('NYASH_LLVM_FAST_INT') == '1'
lhs_val = None
rhs_val = None
if fast_int:
# Prefer same-block SSA directly to avoid resolver/PHI materialization cost in hot loops
lhs_val = vmap.get(lhs)
rhs_val = vmap.get(rhs)
if lhs_val is None:
lhs_val = resolve_i64_strict(resolver, lhs, current_block, preds, block_end_values, vmap, bb_map)
if rhs_val is None:
rhs_val = resolve_i64_strict(resolver, rhs, current_block, preds, block_end_values, vmap, bb_map)
lhs_val = _canonicalize_i64(builder, lhs_val, lhs, vmap, "bin_lhs")
rhs_val = _canonicalize_i64(builder, rhs_val, rhs, vmap, "bin_rhs")
if lhs_val is None:
lhs_val = ir.Constant(ir.IntType(64), 0)
if rhs_val is None:

View File

@ -51,6 +51,14 @@ def lower_boxcall(
bb_map=None,
ctx: Optional[Any] = None,
) -> None:
# Guard against emitting after a terminator: create continuation block if needed.
try:
if builder.block is not None and getattr(builder.block, 'terminator', None) is not None:
func = builder.block.parent
cont = func.append_basic_block(name=f"cont_bb_{builder.block.name}")
builder.position_at_end(cont)
except Exception:
pass
"""
Lower MIR BoxCall instruction
@ -124,6 +132,56 @@ def lower_boxcall(
fast_on = os.environ.get('NYASH_LLVM_FAST') == '1'
except Exception:
fast_on = False
def _cache_len(val):
if not fast_on or resolver is None or dst_vid is None or box_vid is None:
return
cache = getattr(resolver, 'length_cache', None)
if cache is None:
return
try:
cache[int(box_vid)] = val
except Exception:
pass
if fast_on and resolver is not None and dst_vid is not None and box_vid is not None:
cache = getattr(resolver, 'length_cache', None)
if cache is not None:
try:
cached = cache.get(int(box_vid))
except Exception:
cached = None
if cached is not None:
vmap[dst_vid] = cached
return
# Ultra-fast: literal length folding when receiver originates from a string literal.
# Check resolver.newbox_string_args[recv] -> arg_vid -> resolver.string_literals[arg_vid]
if fast_on and dst_vid is not None and resolver is not None:
try:
arg_vid = None
if hasattr(resolver, 'newbox_string_args'):
arg_vid = resolver.newbox_string_args.get(int(box_vid))
# Case A: newbox(StringBox, const)
if arg_vid is not None and hasattr(resolver, 'string_literals'):
lit = resolver.string_literals.get(int(arg_vid))
if isinstance(lit, str):
# Mode: bytes or code points
use_cp = os.environ.get('NYASH_STR_CP') == '1'
n = len(lit) if use_cp else len(lit.encode('utf-8'))
const_len = ir.Constant(ir.IntType(64), n)
vmap[dst_vid] = const_len
_cache_len(const_len)
return
# Case B: receiver itself is a literal-backed handle (const string)
if hasattr(resolver, 'string_literals'):
lit2 = resolver.string_literals.get(int(box_vid))
if isinstance(lit2, str):
use_cp = os.environ.get('NYASH_STR_CP') == '1'
n2 = len(lit2) if use_cp else len(lit2.encode('utf-8'))
const_len2 = ir.Constant(ir.IntType(64), n2)
vmap[dst_vid] = const_len2
_cache_len(const_len2)
return
except Exception:
pass
if fast_on and resolver is not None and hasattr(resolver, 'string_ptrs'):
try:
ptr = resolver.string_ptrs.get(int(box_vid))

View File

@ -6,9 +6,31 @@ Handles comparison operations (<, >, <=, >=, ==, !=)
import llvmlite.ir as ir
from typing import Dict, Optional, Any
from utils.values import resolve_i64_strict
import os
from .externcall import lower_externcall
from trace import values as trace_values
def _canonicalize_i64(builder: ir.IRBuilder, value, vid, vmap: Dict[int, ir.Value], hint: str):
if value is None:
return None
target = ir.IntType(64)
try:
vtype = value.type
except Exception:
vtype = None
if isinstance(vtype, ir.PointerType):
value = builder.ptrtoint(value, target, name=f"{hint}_p2i_{vid}")
elif isinstance(vtype, ir.IntType):
width = vtype.width
if width < 64:
value = builder.zext(value, target, name=f"{hint}_zext_{vid}")
elif width > 64:
value = builder.trunc(value, target, name=f"{hint}_trunc_{vid}")
if isinstance(vid, int):
vmap[vid] = value
return value
def lower_compare(
builder: ir.IRBuilder,
op: str,
@ -50,8 +72,18 @@ def lower_compare(
pass
# Get operands
# Prefer same-block SSA from vmap; fallback to resolver for cross-block dominance
lhs_val = resolve_i64_strict(resolver, lhs, current_block, preds, block_end_values, vmap, bb_map)
rhs_val = resolve_i64_strict(resolver, rhs, current_block, preds, block_end_values, vmap, bb_map)
fast_int = os.environ.get('NYASH_LLVM_FAST_INT') == '1'
lhs_val = None
rhs_val = None
if fast_int:
lhs_val = vmap.get(lhs)
rhs_val = vmap.get(rhs)
if lhs_val is None:
lhs_val = resolve_i64_strict(resolver, lhs, current_block, preds, block_end_values, vmap, bb_map)
if rhs_val is None:
rhs_val = resolve_i64_strict(resolver, rhs, current_block, preds, block_end_values, vmap, bb_map)
lhs_val = _canonicalize_i64(builder, lhs_val, lhs, vmap, "cmp_lhs")
rhs_val = _canonicalize_i64(builder, rhs_val, rhs, vmap, "cmp_rhs")
i64 = ir.IntType(64)
i8p = ir.IntType(8).as_pointer()

View File

@ -21,6 +21,15 @@ def lower_mir_call(owner, builder: ir.IRBuilder, mir_call: Dict[str, Any], dst_v
- resolver: Value resolver instance
"""
# Guard: avoid emitting after a terminator; if current block is closed, create continuation.
try:
if builder.block is not None and getattr(builder.block, 'terminator', None) is not None:
func = builder.block.parent
cont = func.append_basic_block(name=f"cont_bb_{builder.block.name}")
builder.position_at_end(cont)
except Exception:
pass
# Check if unified call is enabled
use_unified = os.getenv("NYASH_MIR_UNIFIED_CALL", "1").lower() not in ("0", "false", "off")
if not use_unified:

View File

@ -87,6 +87,12 @@ def lower_newbox(
resolver.newbox_string_args = {}
# Map the resulting box handle to the string argument
resolver.newbox_string_args[dst_vid] = args[0]
# Hint downstream passes that this dst is string-ish
if hasattr(resolver, 'mark_string'):
try:
resolver.mark_string(int(dst_vid))
except Exception:
resolver.mark_string(dst_vid)
except Exception:
pass # Silently ignore failures

View File

@ -5,6 +5,11 @@ Handles void and value returns
import llvmlite.ir as ir
from typing import Dict, Optional, Any
try:
# Create PHIs at block head to satisfy LLVM invariant
from ..phi_wiring.wiring import phi_at_block_head as _phi_at_block_head
except Exception:
_phi_at_block_head = None
def lower_return(
builder: ir.IRBuilder,
@ -115,6 +120,7 @@ def lower_return(
zero_like = (str(ret_val) == str(ir.Constant(return_type, 0.0)))
elif isinstance(return_type, ir.PointerType):
zero_like = (str(ret_val) == str(ir.Constant(return_type, None)))
# Synthesize a PHI for return at the BLOCK HEAD (grouped), not inline.
if zero_like and preds is not None and block_end_values is not None and bb_map is not None and isinstance(value_id, int):
# Derive current block id from name like 'bb3'
cur_bid = None
@ -125,8 +131,8 @@ def lower_return(
if cur_bid is not None:
incoming = []
for p in preds.get(cur_bid, []):
# Skip self-loop
if p == cur_bid: continue
if p == cur_bid:
continue
v = None
try:
v = block_end_values.get(p, {}).get(value_id)
@ -138,7 +144,17 @@ def lower_return(
if bblk is not None:
incoming.append((v, bblk))
if incoming:
phi = builder.phi(return_type, name=f"ret_phi_{value_id}")
if _phi_at_block_head is not None:
phi = _phi_at_block_head(builder.block, return_type, name=f"ret_phi_{value_id}")
else:
# Fallback: create PHI at block head using a temporary builder
try:
_b = ir.IRBuilder(builder.block)
_b.position_at_start(builder.block)
phi = _b.phi(return_type, name=f"ret_phi_{value_id}")
except Exception:
# As a last resort, create via current builder (may still succeed)
phi = builder.phi(return_type, name=f"ret_phi_{value_id}")
for (v, bblk) in incoming:
phi.add_incoming(v, bblk)
ret_val = phi
@ -162,4 +178,5 @@ def lower_return(
# Zero extend
ret_val = builder.zext(ret_val, return_type)
# Emit return; no further instructions should be emitted in this block
builder.ret(ret_val)

View File

@ -121,5 +121,13 @@ def insert_automatic_safepoint(
func_type = ir.FunctionType(void, [])
check_func = ir.Function(module, func_type, name="ny_check_safepoint")
# Guard: do not insert into a terminated block; create continuation if needed
try:
if builder.block is not None and getattr(builder.block, 'terminator', None) is not None:
func = builder.block.parent
cont = func.append_basic_block(name=f"cont_bb_{builder.block.name}")
builder.position_at_end(cont)
except Exception:
pass
# Insert safepoint check
builder.call(check_func, [], name=f"safepoint_{location}")

View File

@ -575,12 +575,27 @@ class NyashLLVMBuilder:
# to avoid divergence between two implementations.
def _lower_instruction_list(self, builder: ir.IRBuilder, insts: List[Dict[str, Any]], func: ir.Function):
"""Lower a flat list of instructions using current builder and function."""
for sub in insts:
# If current block already has a terminator, create a continuation block
if builder.block.terminator is not None:
cont = func.append_basic_block(name=f"cont_bb_{builder.block.name}")
builder.position_at_end(cont)
"""Lower a flat list of instructions using current builder and function.
Structural guard: truncate at first terminator (ret/branch/jump) to keep IR valid.
"""
# Sanitize: stop at first terminator in the MIR list
effective: List[Dict[str, Any]] = []
try:
for it in insts:
op = (it or {}).get('op')
effective.append(it)
if op in ('ret', 'branch', 'jump'):
break
except Exception:
effective = list(insts)
for sub in effective:
# If current block already has a terminator, stop lowering further instructions
# to keep LLVM IR structurally valid. Any residuals should be split upstream.
try:
if builder.block is not None and builder.block.terminator is not None:
break
except Exception:
pass
self.lower_instruction(builder, sub, func)
def finalize_phis(self):

View File

@ -47,6 +47,8 @@ class Resolver:
# Track value-ids that are known to represent string handles (i64)
# This is a best-effort tag used to decide '+' as string concat when both sides are i64.
self.string_ids: set[int] = set()
# Cache for repeated string length queries when immutably known
self.length_cache: Dict[int, ir.Value] = {}
# Type shortcuts
self.i64 = ir.IntType(64)

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python3
import os
import unittest
import llvmlite.ir as ir
from src.llvm_py.llvm_builder import NyashLLVMBuilder
class TestStrlenFast(unittest.TestCase):
def setUp(self):
# Ensure FAST toggle is ON for this test
os.environ['NYASH_LLVM_FAST'] = '1'
def tearDown(self):
os.environ.pop('NYASH_LLVM_FAST', None)
def test_newbox_string_length_fast_lowering(self):
# Minimal MIR JSON v0-like: const "hello" → newbox StringBox(arg) → boxcall length
mir = {
"functions": [
{
"name": "main",
"params": [],
"blocks": [
{
"id": 1,
"instructions": [
{"op": "const", "dst": 10, "value": {"type": "string", "value": "hello"}},
{"op": "newbox", "dst": 20, "box_type": "StringBox", "args": [10]},
{"op": "boxcall", "dst": 30, "box_val": 20, "method": "length", "args": []},
{"op": "ret", "value": 30},
]
}
]
}
]
}
b = NyashLLVMBuilder()
ir_txt = b.build_from_mir(mir)
# Must reference the neutral kernel symbol for FAST strlen
self.assertIn('declare i64 @nyrt_string_length(i8*, i64)', ir_txt or '')
# And a call site should be present
self.assertIn('call i64 @nyrt_string_length', ir_txt)
if __name__ == '__main__':
unittest.main()