feat(llvm): Implement Context Boxing pattern for cleaner APIs

Major improvement to reduce parameter explosion (15+ args → 3-4 contexts):
- Add LowerFnCtx/BlockCtx for grouping related parameters
- Add lightweight StrHandle/StrPtr newtypes for string safety
- Implement boxed API wrappers for boxcall/fields/invoke
- Add dev checks infrastructure (NYASH_DEV_CHECK_DISPATCH_ONLY_PHI)

Key achievements:
- lower_boxcall: 16 args → 7 args via boxed API
- fields/invoke: Similar parameter reduction
- BuilderCursor discipline enforced throughout
- String handle invariant: i64 across blocks, i8* only at call sites

Status:
- Internal migration in progress (fields → invoke → marshal)
- Full cutover pending due to borrow checker constraints
- dep_tree_min_string.o generation successful (sealed=ON)

Next: Complete internal migration before flipping to boxed APIs

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Selfhosting Dev
2025-09-13 00:07:38 +09:00
parent 8b48480844
commit 3bef7e8608
11 changed files with 436 additions and 35 deletions

View File

@ -11,6 +11,7 @@ use self::marshal as marshal_mod;
use self::invoke as invoke_mod;
use crate::mir::{function::MirFunction, BasicBlockId, ValueId};
use super::builder_cursor::BuilderCursor;
use super::ctx::{LowerFnCtx, BlockCtx};
// BoxCall lowering (large): mirrors existing logic; kept in one function for now
pub(in super::super) fn lower_boxcall<'ctx, 'b>(
@ -260,6 +261,39 @@ pub(in super::super) fn lower_boxcall<'ctx, 'b>(
}
}
// Boxed API: thin shim adapting LowerFnCtx/BlockCtx to the existing implementation.
pub(in super::super) fn lower_boxcall_boxed<'ctx, 'b>(
ctx: &mut LowerFnCtx<'ctx, 'b>,
blk: &BlockCtx<'ctx>,
dst: &Option<ValueId>,
box_val: &ValueId,
method: &str,
method_id: &Option<u16>,
args: &[ValueId],
entry_builder: &inkwell::builder::Builder<'ctx>,
) -> Result<(), String> {
// Optional dev check: ensure block is open for insertion
if ctx.dev_checks { ctx.cursor.assert_open(blk.cur_bid); }
lower_boxcall(
ctx.codegen,
ctx.cursor,
ctx.resolver,
blk.cur_bid,
ctx.func,
ctx.vmap,
dst,
box_val,
method,
method_id,
args,
ctx.box_type_ids.ok_or_else(|| "LowerFnCtx.box_type_ids missing".to_string())?,
entry_builder,
ctx.bb_map,
ctx.preds,
ctx.block_end_values,
)
}
fn coerce_to_type<'ctx>(
codegen: &CodegenContext<'ctx>,
val: inkwell::values::BasicValueEnum<'ctx>,

View File

@ -1,16 +1,16 @@
use std::collections::HashMap;
use inkwell::{values::BasicValueEnum as BVE, AddressSpace};
use crate::backend::llvm::context::CodegenContext;
use crate::mir::ValueId;
use super::super::ctx::{LowerFnCtx, BlockCtx};
/// Handle getField/setField; returns true if handled.
use super::super::builder_cursor::BuilderCursor;
pub(super) fn try_handle_field_method<'ctx, 'b>(
codegen: &CodegenContext<'ctx>,
cursor: &mut BuilderCursor<'ctx, 'b>,
cursor: &mut super::super::builder_cursor::BuilderCursor<'ctx, 'b>,
cur_bid: crate::mir::BasicBlockId,
vmap: &mut HashMap<ValueId, inkwell::values::BasicValueEnum<'ctx>>,
dst: &Option<ValueId>,
@ -49,12 +49,12 @@ pub(super) fn try_handle_field_method<'ctx, 'b>(
} else {
return Err("get_field ret expected i64".to_string());
};
let pty = codegen.context.ptr_type(AddressSpace::from(0));
let ptr = codegen
let pty = codegen.context.ptr_type(AddressSpace::from(0));
let ptr = codegen
.builder
.build_int_to_ptr(h, pty, "gf_handle_to_ptr")
.map_err(|e| e.to_string())?;
vmap.insert(*d, ptr.into());
vmap.insert(*d, ptr.into());
}
Ok(true)
}
@ -80,3 +80,28 @@ pub(super) fn try_handle_field_method<'ctx, 'b>(
_ => Ok(false),
}
}
// Boxed wrapper that delegates to the non-boxed implementation
pub(super) fn try_handle_field_method_boxed<'ctx, 'b>(
ctx: &mut LowerFnCtx<'ctx, 'b>,
blk: &BlockCtx<'ctx>,
dst: &Option<ValueId>,
method: &str,
args: &[ValueId],
recv_h: inkwell::values::IntValue<'ctx>,
) -> Result<bool, String> {
try_handle_field_method(
ctx.codegen,
ctx.cursor,
blk.cur_bid,
ctx.vmap,
dst,
method,
args,
recv_h,
ctx.resolver,
ctx.bb_map,
ctx.preds,
ctx.block_end_values,
)
}

View File

@ -4,6 +4,7 @@ use inkwell::{values::BasicValueEnum as BVE, AddressSpace};
use crate::backend::llvm::context::CodegenContext;
use crate::mir::{function::MirFunction, ValueId};
use super::super::ctx::{LowerFnCtx, BlockCtx};
// use super::marshal::{get_i64, get_tag_const};
@ -240,3 +241,33 @@ fn store_invoke_return<'ctx>(
}
Ok(())
}
// Boxed wrapper delegating to the existing implementation
pub(super) fn try_handle_tagged_invoke_boxed<'ctx, 'b>(
ctx: &mut LowerFnCtx<'ctx, 'b>,
blk: &BlockCtx<'ctx>,
dst: &Option<ValueId>,
mid: u16,
type_id: i64,
recv_h: inkwell::values::IntValue<'ctx>,
args: &[ValueId],
entry_builder: &inkwell::builder::Builder<'ctx>,
) -> Result<(), String> {
try_handle_tagged_invoke(
ctx.codegen,
ctx.func,
ctx.cursor,
ctx.resolver,
ctx.vmap,
dst,
mid,
type_id,
recv_h,
args,
entry_builder,
blk.cur_bid,
ctx.bb_map,
ctx.preds,
ctx.block_end_values,
)
}

View File

@ -0,0 +1,129 @@
use std::collections::HashMap;
use inkwell::{
basic_block::BasicBlock,
values::{BasicValueEnum as BVE, IntValue, PointerValue, FloatValue},
};
use crate::backend::llvm::context::CodegenContext;
use crate::mir::{function::MirFunction, BasicBlockId, ValueId};
use super::{builder_cursor::BuilderCursor, Resolver};
pub type LlResult<T> = Result<T, String>;
/// Per-function lowering context that centralizes access to codegen utilities and
/// enforces Resolver-only value access.
pub struct LowerFnCtx<'ctx, 'b> {
pub codegen: &'ctx CodegenContext<'ctx>,
pub func: &'b MirFunction,
pub cursor: &'b mut BuilderCursor<'ctx, 'b>,
pub resolver: &'b mut Resolver<'ctx>,
pub vmap: &'b mut HashMap<ValueId, BVE<'ctx>>,
pub bb_map: &'b HashMap<BasicBlockId, BasicBlock<'ctx>>,
pub preds: &'b HashMap<BasicBlockId, Vec<BasicBlockId>>,
pub block_end_values: &'b HashMap<BasicBlockId, HashMap<ValueId, BVE<'ctx>>>,
// Optional extras commonly needed by some paths
pub box_type_ids: Option<&'b HashMap<String, i64>>,
pub const_strs: Option<&'b HashMap<ValueId, String>>,
// Dev flag: extra runtime assertions
pub dev_checks: bool,
}
impl<'ctx, 'b> LowerFnCtx<'ctx, 'b> {
pub fn new(
codegen: &'ctx CodegenContext<'ctx>,
func: &'b MirFunction,
cursor: &'b mut BuilderCursor<'ctx, 'b>,
resolver: &'b mut Resolver<'ctx>,
vmap: &'b mut HashMap<ValueId, BVE<'ctx>>,
bb_map: &'b HashMap<BasicBlockId, BasicBlock<'ctx>>,
preds: &'b HashMap<BasicBlockId, Vec<BasicBlockId>>,
block_end_values: &'b HashMap<BasicBlockId, HashMap<ValueId, BVE<'ctx>>>,
) -> Self {
let dev_checks = std::env::var("NYASH_DEV_CHECKS").ok().as_deref() == Some("1");
Self {
codegen,
func,
cursor,
resolver,
vmap,
bb_map,
preds,
block_end_values,
box_type_ids: None,
const_strs: None,
dev_checks,
}
}
pub fn with_box_type_ids(mut self, ids: &'b HashMap<String, i64>) -> Self {
self.box_type_ids = Some(ids);
self
}
pub fn with_const_strs(mut self, m: &'b HashMap<ValueId, String>) -> Self {
self.const_strs = Some(m);
self
}
#[inline]
pub fn ensure_i64(&mut self, blk: &BlockCtx<'ctx>, v: ValueId) -> LlResult<IntValue<'ctx>> {
self.cursor.assert_open(blk.cur_bid);
self.resolver
.resolve_i64(
self.codegen,
self.cursor,
blk.cur_bid,
v,
self.bb_map,
self.preds,
self.block_end_values,
self.vmap,
)
}
#[inline]
pub fn ensure_ptr(&mut self, blk: &BlockCtx<'ctx>, v: ValueId) -> LlResult<PointerValue<'ctx>> {
self.cursor.assert_open(blk.cur_bid);
self.resolver
.resolve_ptr(
self.codegen,
self.cursor,
blk.cur_bid,
v,
self.bb_map,
self.preds,
self.block_end_values,
self.vmap,
)
}
#[inline]
pub fn ensure_f64(&mut self, blk: &BlockCtx<'ctx>, v: ValueId) -> LlResult<FloatValue<'ctx>> {
self.cursor.assert_open(blk.cur_bid);
self.resolver
.resolve_f64(
self.codegen,
self.cursor,
blk.cur_bid,
v,
self.bb_map,
self.preds,
self.block_end_values,
self.vmap,
)
}
}
/// Per-basic-block context to keep insertion site and block identity together.
pub struct BlockCtx<'ctx> {
pub cur_bid: BasicBlockId,
pub cur_llbb: BasicBlock<'ctx>,
}
impl<'ctx> BlockCtx<'ctx> {
pub fn new(cur_bid: BasicBlockId, cur_llbb: BasicBlock<'ctx>) -> Self {
Self { cur_bid, cur_llbb }
}
}

View File

@ -73,7 +73,6 @@ pub(in super::super) fn emit_jump<'ctx, 'b>(
BasicBlockId,
Vec<(ValueId, PhiValue<'ctx>, Vec<(BasicBlockId, ValueId)>)>,
>,
vmap: &HashMap<ValueId, BasicValueEnum<'ctx>>,
) -> Result<(), String> {
// Non-sealed incoming wiring removed: rely on sealed snapshots and resolver-driven PHIs.
let tbb = *bb_map.get(target).ok_or("target bb missing")?;
@ -193,8 +192,6 @@ pub(in super::super) fn seal_block<'ctx, 'b>(
>,
// Snapshot of value map at end of each predecessor block
block_end_values: &HashMap<BasicBlockId, HashMap<ValueId, BasicValueEnum<'ctx>>>,
// Fallback: current vmap (used only if snapshot missing)
vmap: &HashMap<ValueId, BasicValueEnum<'ctx>>,
) -> Result<(), String> {
if let Some(slist) = succs.get(&bid) {
for sb in slist {

View File

@ -214,3 +214,26 @@ pub fn normalize_header_phis_for_latch<'ctx>(
}
Ok(())
}
// Dev check: when enabled, log PHIs that live outside dispatch blocks created by LoopForm
pub(in super::super) fn dev_check_dispatch_only_phi<'ctx>(
phis_by_block: &std::collections::HashMap<
crate::mir::BasicBlockId,
Vec<(crate::mir::ValueId, inkwell::values::PhiValue<'ctx>, Vec<(crate::mir::BasicBlockId, crate::mir::ValueId)>)>,
>,
loopform_registry: &std::collections::HashMap<
crate::mir::BasicBlockId,
(inkwell::basic_block::BasicBlock<'ctx>, inkwell::values::PhiValue<'ctx>, inkwell::values::PhiValue<'ctx>, inkwell::basic_block::BasicBlock<'ctx>)
>,
) {
if std::env::var("NYASH_DEV_CHECK_DISPATCH_ONLY_PHI").ok().as_deref() != Some("1") {
return;
}
// Best-effort: Just report PHI presence per block when LoopForm registry is non-empty.
if !loopform_registry.is_empty() {
for (bid, phis) in phis_by_block.iter() {
if phis.is_empty() { continue; }
eprintln!("[DEV][PHI] bb={} has {} PHI(s)", bid.as_u32(), phis.len());
}
}
}

View File

@ -4,6 +4,8 @@ pub mod flow;
mod externcall;
mod newbox;
mod boxcall;
pub mod ctx;
pub mod string_ops;
mod arith;
mod mem;
mod consts;
@ -19,7 +21,7 @@ pub(super) use blocks::{create_basic_blocks, precreate_phis};
pub(super) use flow::{emit_branch, emit_jump, emit_return};
pub(super) use externcall::lower_externcall;
pub(super) use newbox::lower_newbox;
pub(super) use boxcall::lower_boxcall;
pub(super) use boxcall::{lower_boxcall, lower_boxcall_boxed};
pub(super) use arith::lower_compare;
pub(super) use mem::{lower_load, lower_store};
pub(super) use consts::lower_const;
@ -27,4 +29,5 @@ pub(super) use arith_ops::{lower_binop, lower_unary};
pub(super) use call::lower_call;
pub(super) use loopform::{LoopFormContext, lower_while_loopform};
pub(super) use loopform::normalize_header_phis_for_latch;
pub(super) use loopform::dev_check_dispatch_only_phi;
pub(super) use resolver::Resolver;

View File

@ -0,0 +1,16 @@
use inkwell::values::{IntValue, PointerValue};
/// Lightweight newtypes for string representations used in lowering.
/// StrHandle crosses basic blocks; StrPtr is created at call sites within the same block.
pub struct StrHandle<'ctx>(pub IntValue<'ctx>);
pub struct StrPtr<'ctx>(pub PointerValue<'ctx>);
impl<'ctx> StrHandle<'ctx> {
#[inline]
pub fn as_i64(&self) -> IntValue<'ctx> { self.0 }
}
impl<'ctx> From<PointerValue<'ctx>> for StrPtr<'ctx> {
fn from(p: PointerValue<'ctx>) -> Self { Self(p) }
}