docs: Architecture decision - Box/ExternCall boundary design

Documented the architectural decision for Nyash runtime design:

1. Core boxes (String/Integer/Array/Map/Bool) built into nyrt
   - Essential for self-hosting
   - Available at boot without plugin loader
   - High performance (no FFI overhead)

2. All other boxes as plugins (File/Net/User-defined)
   - Extensible ecosystem
   - Clear separation of concerns

3. Minimal ExternCall (only 5 functions)
   - print/error (output)
   - panic/exit (process control)
   - now (time)

Key principle: Everything goes through BoxCall interface
- No special fast paths
- Unified architecture
- "Everything is Box" philosophy maintained

This design balances self-hosting requirements with architectural purity.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Selfhosting Dev
2025-09-11 20:58:18 +09:00
parent e7ad2191de
commit 0ac22427e5
2 changed files with 77 additions and 632 deletions

View File

@ -0,0 +1,77 @@
# Box/ExternCall Architecture Design Decision
## Date: 2025-09-11
### Background
During LLVM backend development, confusion arose about the proper boundary between:
- ExternCall (core runtime functions)
- BoxCall (unified Box method dispatch)
- nyrt (Nyash runtime library)
### Design Decision
#### 1. **nyrt Built-in Core Boxes**
The following boxes are built into nyrt for self-hosting stability:
```rust
// crates/nyrt/src/core_boxes/
├── integer.rs // IntegerBoxarithmetic, comparison
├── string.rs // StringBoxstring operations
├── array.rs // ArrayBoxarray operations
├── map.rs // MapBoxkey-value storage
└── bool.rs // BoolBoxlogical operations
```
**Rationale**:
- Essential for self-hosting (compiler needs these)
- Available at boot time (no plugin loader dependency)
- High performance (no FFI overhead)
- Environment independent
#### 2. **Plugin Boxes**
All other boxes are implemented as plugins:
```
plugins/
├── file/ // FileBox
├── net/ // NetBox
└── custom/ // User-defined boxes
```
#### 3. **Minimal ExternCall**
ExternCall is limited to truly external operations:
```rust
// Only these 5 functions!
extern nyash.io.print(handle: i64)
extern nyash.io.error(handle: i64)
extern nyash.runtime.panic(handle: i64)
extern nyash.runtime.exit(code: i64)
extern nyash.time.now() -> i64
```
### Key Principle: Everything Goes Through BoxCall
```nyash
local s = new StringBox("Hello") // BoxCall → nyrt built-in
local f = new FileBox() // BoxCall → plugin
s.concat(" World") // BoxCall (unified interface)
```
Even core boxes use the same BoxCall mechanism - no special fast paths!
### Trade-offs Considered
**Option 1: Everything as plugins**
- ✅ Beautiful uniformity
- ❌ Complex bootstrap
- ❌ Performance overhead
- ❌ Environment dependencies
**Option 2: Core boxes in nyrt (chosen)**
- ✅ Simple, stable bootstrap
- ✅ Self-hosting friendly
- ✅ High performance for basics
- ❌ Slightly larger core
### Conclusion
This design prioritizes self-hosting stability while maintaining the "Everything is Box" philosophy through unified BoxCall interface.

View File

@ -348,638 +348,6 @@ impl LLVMCompiler {
&entry_builder,
)?;
continue;
let i64t = codegen.context.i64_type();
// Receiver handle (i64)
let recv_v = *vmap.get(box_val).ok_or("box receiver missing")?;
// Accept either an opaque pointer (i8*) or an i64 handle for the receiver
let recv_p = match recv_v {
BasicValueEnum::PointerValue(pv) => pv,
BasicValueEnum::IntValue(iv) => {
// Treat as Nyash handle and convert to opaque pointer
let pty = codegen.context.ptr_type(AddressSpace::from(0));
codegen
.builder
.build_int_to_ptr(iv, pty, "recv_i2p")
.map_err(|e| e.to_string())?
}
_ => {
return Err("box receiver must be pointer or i64 handle".to_string())
}
};
let recv_h = codegen
.builder
.build_ptr_to_int(recv_p, i64t, "recv_p2i")
.map_err(|e| e.to_string())?;
// Resolve type_id from metadata (Box("Type")) using box_type_ids
let type_id: i64 = if let Some(crate::mir::MirType::Box(bname)) =
func.metadata.value_types.get(box_val)
{
*box_type_ids.get(bname).unwrap_or(&0)
} else if let Some(crate::mir::MirType::String) =
func.metadata.value_types.get(box_val)
{
*box_type_ids.get("StringBox").unwrap_or(&0)
} else {
0
};
// Special-case ArrayBox get/set/push/length until general by-id is widely annotated
if let Some(crate::mir::MirType::Box(bname)) =
func.metadata.value_types.get(box_val)
{
if bname == "ArrayBox"
&& (method == "get"
|| method == "set"
|| method == "push"
|| method == "length")
{
match method.as_str() {
"get" => {
if args.len() != 1 {
return Err("ArrayBox.get expects 1 arg".to_string());
}
let idx_v =
*vmap.get(&args[0]).ok_or("array.get index missing")?;
let idx_i = if let BasicValueEnum::IntValue(iv) = idx_v {
iv
} else {
return Err("array.get index must be int".to_string());
};
let fnty = i64t.fn_type(&[i64t.into(), i64t.into()], false);
let callee = codegen
.module
.get_function("nyash_array_get_h")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash_array_get_h",
fnty,
None,
)
});
let call = codegen
.builder
.build_call(
callee,
&[recv_h.into(), idx_i.into()],
"aget",
)
.map_err(|e| e.to_string())?;
if let Some(d) = dst {
let rv = call
.try_as_basic_value()
.left()
.ok_or("array_get_h returned void".to_string())?;
vmap.insert(*d, rv);
}
}
"set" => {
if args.len() != 2 {
return Err("ArrayBox.set expects 2 arg".to_string());
}
let idx_v =
*vmap.get(&args[0]).ok_or("array.set index missing")?;
let val_v =
*vmap.get(&args[1]).ok_or("array.set value missing")?;
let idx_i = if let BasicValueEnum::IntValue(iv) = idx_v {
iv
} else {
return Err("array.set index must be int".to_string());
};
let val_i = if let BasicValueEnum::IntValue(iv) = val_v {
iv
} else {
return Err("array.set value must be int".to_string());
};
let fnty = i64t.fn_type(
&[i64t.into(), i64t.into(), i64t.into()],
false,
);
let callee = codegen
.module
.get_function("nyash_array_set_h")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash_array_set_h",
fnty,
None,
)
});
let _ = codegen
.builder
.build_call(
callee,
&[recv_h.into(), idx_i.into(), val_i.into()],
"aset",
)
.map_err(|e| e.to_string())?;
}
"push" => {
if args.len() != 1 {
return Err("ArrayBox.push expects 1 arg".to_string());
}
let val_v = *vmap
.get(&args[0])
.ok_or("array.push value missing")?;
let val_i =
match val_v {
BasicValueEnum::IntValue(iv) => iv,
BasicValueEnum::PointerValue(pv) => codegen
.builder
.build_ptr_to_int(pv, i64t, "val_p2i")
.map_err(|e| e.to_string())?,
_ => return Err(
"array.push value must be int or handle ptr"
.to_string(),
),
};
let fnty = i64t.fn_type(&[i64t.into(), i64t.into()], false);
let callee = codegen
.module
.get_function("nyash_array_push_h")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash_array_push_h",
fnty,
None,
)
});
let _ = codegen
.builder
.build_call(
callee,
&[recv_h.into(), val_i.into()],
"apush",
)
.map_err(|e| e.to_string())?;
}
"length" => {
if !args.is_empty() {
return Err("ArrayBox.length expects 0 arg".to_string());
}
let fnty = i64t.fn_type(&[i64t.into()], false);
let callee = codegen
.module
.get_function("nyash_array_length_h")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash_array_length_h",
fnty,
None,
)
});
let call = codegen
.builder
.build_call(callee, &[recv_h.into()], "alen")
.map_err(|e| e.to_string())?;
if let Some(d) = dst {
let rv = call.try_as_basic_value().left().ok_or(
"array_length_h returned void".to_string(),
)?;
vmap.insert(*d, rv);
}
}
_ => {}
}
}
}
// Instance field helpers: getField/setField (safe path)
if method == "getField" {
if args.len() != 1 {
return Err("getField expects 1 arg (name)".to_string());
}
let name_v = *vmap.get(&args[0]).ok_or("getField name missing")?;
let name_p = if let BasicValueEnum::PointerValue(pv) = name_v {
pv
} else {
return Err("getField name must be pointer".to_string());
};
let i8p = codegen.context.ptr_type(AddressSpace::from(0));
let fnty = i64t.fn_type(&[i64t.into(), i8p.into()], false);
let callee = codegen
.module
.get_function("nyash.instance.get_field_h")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash.instance.get_field_h",
fnty,
None,
)
});
let call = codegen
.builder
.build_call(callee, &[recv_h.into(), name_p.into()], "getField")
.map_err(|e| e.to_string())?;
if let Some(d) = dst {
let rv = call
.try_as_basic_value()
.left()
.ok_or("get_field returned void".to_string())?;
// rv is i64 handle; convert to i8*
let h = if let BasicValueEnum::IntValue(iv) = rv {
iv
} else {
return Err("get_field ret expected i64".to_string());
};
let pty = codegen.context.ptr_type(AddressSpace::from(0));
let ptr = codegen
.builder
.build_int_to_ptr(h, pty, "gf_handle_to_ptr")
.map_err(|e| e.to_string())?;
vmap.insert(*d, ptr.into());
}
// no early return; continue lowering
}
if method == "setField" {
if args.len() != 2 {
return Err("setField expects 2 args (name, value)".to_string());
}
let name_v = *vmap.get(&args[0]).ok_or("setField name missing")?;
let val_v = *vmap.get(&args[1]).ok_or("setField value missing")?;
let name_p = if let BasicValueEnum::PointerValue(pv) = name_v {
pv
} else {
return Err("setField name must be pointer".to_string());
};
let val_h = match val_v {
BasicValueEnum::PointerValue(pv) => codegen
.builder
.build_ptr_to_int(pv, i64t, "val_p2i")
.map_err(|e| e.to_string())?,
BasicValueEnum::IntValue(iv) => iv,
_ => {
return Err(
"setField value must be handle/ptr or i64".to_string()
)
}
};
let i8p = codegen.context.ptr_type(AddressSpace::from(0));
let fnty = i64t.fn_type(&[i64t.into(), i8p.into(), i64t.into()], false);
let callee = codegen
.module
.get_function("nyash.instance.set_field_h")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash.instance.set_field_h",
fnty,
None,
)
});
let _ = codegen
.builder
.build_call(
callee,
&[recv_h.into(), name_p.into(), val_h.into()],
"setField",
)
.map_err(|e| e.to_string())?;
// no early return; continue lowering
}
// General by-id invoke when method_id is available
if let Some(mid) = method_id {
// Prepare up to 4 args (i64 or f64 bits or handle)
let argc_val = i64t.const_int(args.len() as u64, false);
let mut a1 = i64t.const_zero();
let mut a2 = i64t.const_zero();
let mut a3 = i64t.const_zero();
let mut a4 = i64t.const_zero();
let get_i64 =
|vid: ValueId| -> Result<inkwell::values::IntValue, String> {
let v = *vmap.get(&vid).ok_or("arg missing")?;
to_i64_any(codegen.context, &codegen.builder, v)
};
if args.len() >= 1 {
a1 = get_i64(args[0])?;
}
if args.len() >= 2 {
a2 = get_i64(args[1])?;
}
if args.len() >= 3 {
a3 = get_i64(args[2])?;
}
if args.len() >= 4 {
a4 = get_i64(args[3])?;
}
// Choose return ABI by dst annotated type
let dst_ty =
dst.as_ref().and_then(|d| func.metadata.value_types.get(d));
let use_f64_ret = matches!(dst_ty, Some(crate::mir::MirType::Float));
if use_f64_ret {
// declare double @nyash_plugin_invoke3_f64(i64,i64,i64,i64,i64,i64)
let fnty = codegen.context.f64_type().fn_type(
&[
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
],
false,
);
let callee = codegen
.module
.get_function("nyash_plugin_invoke3_f64")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash_plugin_invoke3_f64",
fnty,
None,
)
});
let tid = i64t.const_int(type_id as u64, true);
let midv = i64t.const_int((*mid) as u64, false);
let call = codegen
.builder
.build_call(
callee,
&[
tid.into(),
midv.into(),
argc_val.into(),
recv_h.into(),
a1.into(),
a2.into(),
],
"pinvoke_f64",
)
.map_err(|e| e.to_string())?;
if let Some(d) = dst {
let rv = call
.try_as_basic_value()
.left()
.ok_or("invoke3_f64 returned void".to_string())?;
vmap.insert(*d, rv);
}
return Ok(());
}
// For argument typing, use tagged variant to allow f64/handle
// Prepare tags for a1..a4: 5=float, 8=handle(ptr), 3=int
let mut tag1 = i64t.const_int(3, false);
let mut tag2 = i64t.const_int(3, false);
let mut tag3 = i64t.const_int(3, false);
let mut tag4 = i64t.const_int(3, false);
let classify = |vid: ValueId| -> Option<i64> {
vmap.get(&vid).map(|v| classify_tag(*v))
};
if args.len() >= 1 {
if let Some(t) = classify(args[0]) {
tag1 = i64t.const_int(t as u64, false);
}
}
if args.len() >= 2 {
if let Some(t) = classify(args[1]) {
tag2 = i64t.const_int(t as u64, false);
}
}
if args.len() >= 3 {
if let Some(t) = classify(args[2]) {
tag3 = i64t.const_int(t as u64, false);
}
}
if args.len() >= 4 {
if let Some(t) = classify(args[3]) {
tag4 = i64t.const_int(t as u64, false);
}
}
if args.len() <= 4 {
// Call fixed-arity tagged shim (up to 4 args)
let fnty = i64t.fn_type(
&[
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
],
false,
);
let callee = codegen
.module
.get_function("nyash_plugin_invoke3_tagged_i64")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash_plugin_invoke3_tagged_i64",
fnty,
None,
)
});
let tid = i64t.const_int(type_id as u64, true);
let midv = i64t.const_int((*mid) as u64, false);
let call = codegen
.builder
.build_call(
callee,
&[
tid.into(),
midv.into(),
argc_val.into(),
recv_h.into(),
a1.into(),
tag1.into(),
a2.into(),
tag2.into(),
a3.into(),
tag3.into(),
a4.into(),
tag4.into(),
],
"pinvoke_tagged",
)
.map_err(|e| e.to_string())?;
if let Some(d) = dst {
let rv = call
.try_as_basic_value()
.left()
.ok_or("invoke3_i64 returned void".to_string())?;
// Decide return lowering by dst annotated type
if let Some(mt) = func.metadata.value_types.get(d) {
match mt {
crate::mir::MirType::Integer
| crate::mir::MirType::Bool => {
vmap.insert(*d, rv);
}
crate::mir::MirType::Box(_)
| crate::mir::MirType::String
| crate::mir::MirType::Array(_)
| crate::mir::MirType::Future(_)
| crate::mir::MirType::Unknown => {
let h = if let BasicValueEnum::IntValue(iv) = rv {
iv
} else {
return Err(
"invoke ret expected i64".to_string()
);
};
let pty = codegen
.context
.ptr_type(AddressSpace::from(0));
let ptr = codegen
.builder
.build_int_to_ptr(h, pty, "ret_handle_to_ptr")
.map_err(|e| e.to_string())?;
vmap.insert(*d, ptr.into());
}
_ => {
vmap.insert(*d, rv);
}
}
} else {
vmap.insert(*d, rv);
}
}
} else {
// Variable-length path: build arrays of values/tags and call vector shim
let n = args.len() as u32;
// alloca [N x i64] for vals and tags
let arr_ty = i64t.array_type(n);
let vals_arr = entry_builder
.build_alloca(arr_ty, "vals_arr")
.map_err(|e| e.to_string())?;
let tags_arr = entry_builder
.build_alloca(arr_ty, "tags_arr")
.map_err(|e| e.to_string())?;
for (i, vid) in args.iter().enumerate() {
let idx = [
codegen.context.i32_type().const_zero(),
codegen.context.i32_type().const_int(i as u64, false),
];
let gep_v = unsafe {
codegen
.builder
.build_in_bounds_gep(
arr_ty,
vals_arr,
&idx,
&format!("v_gep_{}", i),
)
.map_err(|e| e.to_string())?
};
let gep_t = unsafe {
codegen
.builder
.build_in_bounds_gep(
arr_ty,
tags_arr,
&idx,
&format!("t_gep_{}", i),
)
.map_err(|e| e.to_string())?
};
let vi = get_i64(*vid)?;
let tag = classify(*vid).unwrap_or(3);
let tagv = i64t.const_int(tag as u64, false);
codegen
.builder
.build_store(gep_v, vi)
.map_err(|e| e.to_string())?;
codegen
.builder
.build_store(gep_t, tagv)
.map_err(|e| e.to_string())?;
}
// cast to i64* pointers
let i64p = codegen.context.ptr_type(AddressSpace::from(0));
let vals_ptr = codegen
.builder
.build_pointer_cast(vals_arr, i64p, "vals_ptr")
.map_err(|e| e.to_string())?;
let tags_ptr = codegen
.builder
.build_pointer_cast(tags_arr, i64p, "tags_ptr")
.map_err(|e| e.to_string())?;
// declare i64 @nyash.plugin.invoke_tagged_v_i64(i64,i64,i64,i64,i64*,i64*)
let fnty = i64t.fn_type(
&[
i64t.into(),
i64t.into(),
i64t.into(),
i64t.into(),
i64p.into(),
i64p.into(),
],
false,
);
let callee = codegen
.module
.get_function("nyash.plugin.invoke_tagged_v_i64")
.unwrap_or_else(|| {
codegen.module.add_function(
"nyash.plugin.invoke_tagged_v_i64",
fnty,
None,
)
});
let tid = i64t.const_int(type_id as u64, true);
let midv = i64t.const_int((*mid) as u64, false);
let call = codegen
.builder
.build_call(
callee,
&[
tid.into(),
midv.into(),
argc_val.into(),
recv_h.into(),
vals_ptr.into(),
tags_ptr.into(),
],
"pinvoke_tagged_v",
)
.map_err(|e| e.to_string())?;
if let Some(d) = dst {
let rv = call
.try_as_basic_value()
.left()
.ok_or("invoke_v returned void".to_string())?;
if let Some(mt) = func.metadata.value_types.get(d) {
match mt {
crate::mir::MirType::Integer
| crate::mir::MirType::Bool => {
vmap.insert(*d, rv);
}
crate::mir::MirType::Box(_)
| crate::mir::MirType::String
| crate::mir::MirType::Array(_)
| crate::mir::MirType::Future(_)
| crate::mir::MirType::Unknown => {
let h = if let BasicValueEnum::IntValue(iv) = rv {
iv
} else {
return Err(
"invoke ret expected i64".to_string()
);
};
let pty = codegen
.context
.ptr_type(AddressSpace::from(0));
let ptr = codegen
.builder
.build_int_to_ptr(h, pty, "ret_handle_to_ptr")
.map_err(|e| e.to_string())?;
vmap.insert(*d, ptr.into());
}
_ => {
vmap.insert(*d, rv);
}
}
} else {
vmap.insert(*d, rv);
}
}
}
// handled above per-branch
} else {
return Err(format!("BoxCall requires method_id for method '{}'. The method_id should be automatically injected during MIR compilation.", method));
}
}
MirInstruction::ExternCall { dst, iface_name, method_name, args, effects: _ } => {
instructions::lower_externcall(&codegen, func, &mut vmap, dst, iface_name, method_name, args)?;