From 5a88c4eb2368d30089a6d9bc2002f64df3bb9f14 Mon Sep 17 00:00:00 2001 From: tomoaki Date: Fri, 26 Dec 2025 17:12:34 +0900 Subject: [PATCH] refactor(llvm-py): Phase 3 boxification - Strategic extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract complex logic while keeping simple parts inline: 1. StringBoxerBox (15 lines) - Box string pointers (i8*) to handles (i64) - Eliminate function declaration boilerplate - Clear single responsibility 2. ReturnPhiSynthesizerBox (101 lines) - Synthesize PHI nodes for return values - should_synthesize_phi(): Zero-like detection - synthesize_phi(): PHI creation with predecessors - Respects _disable_phi_synthesis flag What was NOT boxified (good decisions): - Fast path vmap lookup (13 lines): Too simple - Global vmap fallback (7 lines): Too small - Default value generation (18 lines): Clear as-is Impact: - lower_return(): 166→117 lines (-29% reduction) - File size: 250→352 lines (+102 for organization) - Testable units: 2→4 (+2 new Boxes) Tests: - phase286_pattern5_return_min.hako: PASS (exit 7) - phase284_p1_return_in_loop_llvm.sh: PASS - phase284_p2_return_in_loop_llvm.sh: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/llvm_py/instructions/ret.py | 219 +++++++++++++++++++++++--------- 1 file changed, 161 insertions(+), 58 deletions(-) diff --git a/src/llvm_py/instructions/ret.py b/src/llvm_py/instructions/ret.py index 71a1c70f..72e62d66 100644 --- a/src/llvm_py/instructions/ret.py +++ b/src/llvm_py/instructions/ret.py @@ -81,6 +81,158 @@ class ReturnTypeAdjusterBox: return ret_val +class StringBoxerBox: + """ + Box-First principle: Single Responsibility - Box string pointers to handles + Converts i8* string pointers to i64 box handles via nyash.box.from_i8_string + """ + + @staticmethod + def box_string_pointer(builder: ir.IRBuilder, string_ptr: ir.Value) -> ir.Value: + """ + Box a string pointer (i8*) to handle (i64) + + Args: + builder: Current LLVM IR builder + string_ptr: i8* string pointer + + Returns: + i64 box handle + """ + i8p = ir.IntType(8).as_pointer() + i64 = ir.IntType(64) + + # Find or declare nyash.box.from_i8_string + boxer = None + for f in builder.module.functions: + if f.name == 'nyash.box.from_i8_string': + boxer = f + break + + if boxer is None: + boxer = ir.Function( + builder.module, + ir.FunctionType(i64, [i8p]), + name='nyash.box.from_i8_string' + ) + + return builder.call(boxer, [string_ptr], name='ret_ptr2h') + + +class ReturnPhiSynthesizerBox: + """ + Box-First principle: Single Responsibility - Synthesize PHI nodes for return values + Creates PHI at block head when return value is zero-like and has predecessors + Phase 131-4: Respects _disable_phi_synthesis flag + """ + + @staticmethod + def should_synthesize_phi(ret_val: ir.Value, return_type: ir.Type) -> bool: + """ + Check if return value is zero-like and needs PHI synthesis + + Args: + ret_val: Return value to check + return_type: Expected return type + + Returns: + True if PHI synthesis is needed + """ + if not isinstance(ret_val, ir.Constant): + return False + + # Check if zero-like + if isinstance(return_type, ir.IntType): + return str(ret_val) == str(ir.Constant(return_type, 0)) + elif isinstance(return_type, ir.DoubleType): + return str(ret_val) == str(ir.Constant(return_type, 0.0)) + elif isinstance(return_type, ir.PointerType): + return str(ret_val) == str(ir.Constant(return_type, None)) + + return False + + @staticmethod + def synthesize_phi( + builder: ir.IRBuilder, + value_id: int, + return_type: ir.Type, + preds: Dict[int, list], + block_end_values: Dict[int, Dict[int, ir.Value]], + bb_map: Dict[int, ir.Block], + resolver=None + ) -> Optional[ir.Value]: + """ + Synthesize PHI node at block head for return value + + Args: + builder: Current LLVM IR builder + value_id: Value ID to synthesize PHI for + return_type: Expected return type + preds: Predecessor map + block_end_values: Block end value snapshots + bb_map: Block ID to LLVM block map + resolver: Optional resolver for disable flag + + Returns: + PHI node if synthesized, None otherwise + """ + # Check if PHI synthesis is disabled (Phase 131-4) + if resolver is not None and hasattr(resolver, '_disable_phi_synthesis'): + if getattr(resolver, '_disable_phi_synthesis', False): + return None + + # Derive current block ID from name like 'bb3' + cur_bid = None + try: + cur_bid = int(str(builder.block.name).replace('bb', '')) + except Exception: + return None + + if cur_bid is None: + return None + + # Collect incoming values from predecessors + incoming = [] + for p in preds.get(cur_bid, []): + if p == cur_bid: + continue + + v = None + try: + v = block_end_values.get(p, {}).get(value_id) + except Exception: + v = None + + if v is None: + v = ir.Constant(return_type, 0) + + bblk = bb_map.get(p) + if bblk is not None: + incoming.append((v, bblk)) + + if not incoming: + return None + + # Create PHI at block head + if _phi_at_block_head is not None: + phi = _phi_at_block_head(builder.block, return_type, name=f"ret_phi_{value_id}") + else: + # Fallback: create PHI at block head using a temporary builder + try: + _b = ir.IRBuilder(builder.block) + _b.position_at_start(builder.block) + phi = _b.phi(return_type, name=f"ret_phi_{value_id}") + except Exception: + # As a last resort, create via current builder (may still succeed) + phi = builder.phi(return_type, name=f"ret_phi_{value_id}") + + # Add incoming values + for (v, bblk) in incoming: + phi.add_incoming(v, bblk) + + return phi + + def lower_return( builder: ir.IRBuilder, value_id: Optional[int], @@ -155,16 +307,9 @@ def lower_return( except Exception: is_stringish = False if is_stringish and hasattr(resolver, 'string_ptrs') and int(value_id) in getattr(resolver, 'string_ptrs'): + # Delegate to StringBoxerBox (Box-First principle) p = resolver.string_ptrs[int(value_id)] - i8p = ir.IntType(8).as_pointer() - i64 = ir.IntType(64) - boxer = None - for f in builder.module.functions: - if f.name == 'nyash.box.from_i8_string': - boxer = f; break - if boxer is None: - boxer = ir.Function(builder.module, ir.FunctionType(i64, [i8p]), name='nyash.box.from_i8_string') - ret_val = builder.call(boxer, [p], name='ret_ptr2h') + ret_val = StringBoxerBox.box_string_pointer(builder, p) else: ret_val = resolver.resolve_i64(value_id, builder.block, preds, block_end_values, vmap, bb_map) @@ -187,57 +332,15 @@ def lower_return( # Pointer type - null ret_val = ir.Constant(return_type, None) - # If still zero-like (typed zero) and we have predecessor snapshots, synthesize a minimal PHI at block head. + # Delegate PHI synthesis to ReturnPhiSynthesizerBox (Box-First principle) # Phase 131-4: Skip PHI synthesis if disabled (e.g., during Pass C terminator lowering) try: - disable_phi = False - if resolver is not None and hasattr(resolver, '_disable_phi_synthesis'): - disable_phi = getattr(resolver, '_disable_phi_synthesis', False) - zero_like = False - if isinstance(ret_val, ir.Constant): - if isinstance(return_type, ir.IntType): - zero_like = (str(ret_val) == str(ir.Constant(return_type, 0))) - elif isinstance(return_type, ir.DoubleType): - zero_like = (str(ret_val) == str(ir.Constant(return_type, 0.0))) - elif isinstance(return_type, ir.PointerType): - zero_like = (str(ret_val) == str(ir.Constant(return_type, None))) - # Synthesize a PHI for return at the BLOCK HEAD (grouped), not inline. - if not disable_phi and zero_like and preds is not None and block_end_values is not None and bb_map is not None and isinstance(value_id, int): - # Derive current block id from name like 'bb3' - cur_bid = None - try: - cur_bid = int(str(builder.block.name).replace('bb','')) - except Exception: - cur_bid = None - if cur_bid is not None: - incoming = [] - for p in preds.get(cur_bid, []): - if p == cur_bid: - continue - v = None - try: - v = block_end_values.get(p, {}).get(value_id) - except Exception: - v = None - if v is None: - v = ir.Constant(return_type, 0) - bblk = bb_map.get(p) - if bblk is not None: - incoming.append((v, bblk)) - if incoming: - if _phi_at_block_head is not None: - phi = _phi_at_block_head(builder.block, return_type, name=f"ret_phi_{value_id}") - else: - # Fallback: create PHI at block head using a temporary builder - try: - _b = ir.IRBuilder(builder.block) - _b.position_at_start(builder.block) - phi = _b.phi(return_type, name=f"ret_phi_{value_id}") - except Exception: - # As a last resort, create via current builder (may still succeed) - phi = builder.phi(return_type, name=f"ret_phi_{value_id}") - for (v, bblk) in incoming: - phi.add_incoming(v, bblk) + if ReturnPhiSynthesizerBox.should_synthesize_phi(ret_val, return_type): + if preds is not None and block_end_values is not None and bb_map is not None and isinstance(value_id, int): + phi = ReturnPhiSynthesizerBox.synthesize_phi( + builder, value_id, return_type, preds, block_end_values, bb_map, resolver + ) + if phi is not None: ret_val = phi except Exception: pass