diff --git a/docs/development/architecture/box-externcall-design.md b/docs/development/architecture/box-externcall-design.md new file mode 100644 index 00000000..e7dd2e85 --- /dev/null +++ b/docs/development/architecture/box-externcall-design.md @@ -0,0 +1,77 @@ +# Box/ExternCall Architecture Design Decision + +## Date: 2025-09-11 + +### Background +During LLVM backend development, confusion arose about the proper boundary between: +- ExternCall (core runtime functions) +- BoxCall (unified Box method dispatch) +- nyrt (Nyash runtime library) + +### Design Decision + +#### 1. **nyrt Built-in Core Boxes** +The following boxes are built into nyrt for self-hosting stability: + +```rust +// crates/nyrt/src/core_boxes/ +├── integer.rs // IntegerBox(arithmetic, comparison) +├── string.rs // StringBox(string operations) +├── array.rs // ArrayBox(array operations) +├── map.rs // MapBox(key-value storage) +└── bool.rs // BoolBox(logical operations) +``` + +**Rationale**: +- Essential for self-hosting (compiler needs these) +- Available at boot time (no plugin loader dependency) +- High performance (no FFI overhead) +- Environment independent + +#### 2. **Plugin Boxes** +All other boxes are implemented as plugins: +``` +plugins/ +├── file/ // FileBox +├── net/ // NetBox +└── custom/ // User-defined boxes +``` + +#### 3. **Minimal ExternCall** +ExternCall is limited to truly external operations: + +```rust +// Only these 5 functions! +extern nyash.io.print(handle: i64) +extern nyash.io.error(handle: i64) +extern nyash.runtime.panic(handle: i64) +extern nyash.runtime.exit(code: i64) +extern nyash.time.now() -> i64 +``` + +### Key Principle: Everything Goes Through BoxCall + +```nyash +local s = new StringBox("Hello") // BoxCall → nyrt built-in +local f = new FileBox() // BoxCall → plugin +s.concat(" World") // BoxCall (unified interface) +``` + +Even core boxes use the same BoxCall mechanism - no special fast paths! + +### Trade-offs Considered + +**Option 1: Everything as plugins** +- ✅ Beautiful uniformity +- ❌ Complex bootstrap +- ❌ Performance overhead +- ❌ Environment dependencies + +**Option 2: Core boxes in nyrt (chosen)** +- ✅ Simple, stable bootstrap +- ✅ Self-hosting friendly +- ✅ High performance for basics +- ❌ Slightly larger core + +### Conclusion +This design prioritizes self-hosting stability while maintaining the "Everything is Box" philosophy through unified BoxCall interface. \ No newline at end of file diff --git a/src/backend/llvm/compiler/codegen/mod.rs b/src/backend/llvm/compiler/codegen/mod.rs index 73df005b..153a1954 100644 --- a/src/backend/llvm/compiler/codegen/mod.rs +++ b/src/backend/llvm/compiler/codegen/mod.rs @@ -348,638 +348,6 @@ impl LLVMCompiler { &entry_builder, )?; continue; - let i64t = codegen.context.i64_type(); - // Receiver handle (i64) - let recv_v = *vmap.get(box_val).ok_or("box receiver missing")?; - // Accept either an opaque pointer (i8*) or an i64 handle for the receiver - let recv_p = match recv_v { - BasicValueEnum::PointerValue(pv) => pv, - BasicValueEnum::IntValue(iv) => { - // Treat as Nyash handle and convert to opaque pointer - let pty = codegen.context.ptr_type(AddressSpace::from(0)); - codegen - .builder - .build_int_to_ptr(iv, pty, "recv_i2p") - .map_err(|e| e.to_string())? - } - _ => { - return Err("box receiver must be pointer or i64 handle".to_string()) - } - }; - let recv_h = codegen - .builder - .build_ptr_to_int(recv_p, i64t, "recv_p2i") - .map_err(|e| e.to_string())?; - // Resolve type_id from metadata (Box("Type")) using box_type_ids - let type_id: i64 = if let Some(crate::mir::MirType::Box(bname)) = - func.metadata.value_types.get(box_val) - { - *box_type_ids.get(bname).unwrap_or(&0) - } else if let Some(crate::mir::MirType::String) = - func.metadata.value_types.get(box_val) - { - *box_type_ids.get("StringBox").unwrap_or(&0) - } else { - 0 - }; - - // Special-case ArrayBox get/set/push/length until general by-id is widely annotated - if let Some(crate::mir::MirType::Box(bname)) = - func.metadata.value_types.get(box_val) - { - if bname == "ArrayBox" - && (method == "get" - || method == "set" - || method == "push" - || method == "length") - { - match method.as_str() { - "get" => { - if args.len() != 1 { - return Err("ArrayBox.get expects 1 arg".to_string()); - } - let idx_v = - *vmap.get(&args[0]).ok_or("array.get index missing")?; - let idx_i = if let BasicValueEnum::IntValue(iv) = idx_v { - iv - } else { - return Err("array.get index must be int".to_string()); - }; - let fnty = i64t.fn_type(&[i64t.into(), i64t.into()], false); - let callee = codegen - .module - .get_function("nyash_array_get_h") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash_array_get_h", - fnty, - None, - ) - }); - let call = codegen - .builder - .build_call( - callee, - &[recv_h.into(), idx_i.into()], - "aget", - ) - .map_err(|e| e.to_string())?; - if let Some(d) = dst { - let rv = call - .try_as_basic_value() - .left() - .ok_or("array_get_h returned void".to_string())?; - vmap.insert(*d, rv); - } - } - "set" => { - if args.len() != 2 { - return Err("ArrayBox.set expects 2 arg".to_string()); - } - let idx_v = - *vmap.get(&args[0]).ok_or("array.set index missing")?; - let val_v = - *vmap.get(&args[1]).ok_or("array.set value missing")?; - let idx_i = if let BasicValueEnum::IntValue(iv) = idx_v { - iv - } else { - return Err("array.set index must be int".to_string()); - }; - let val_i = if let BasicValueEnum::IntValue(iv) = val_v { - iv - } else { - return Err("array.set value must be int".to_string()); - }; - let fnty = i64t.fn_type( - &[i64t.into(), i64t.into(), i64t.into()], - false, - ); - let callee = codegen - .module - .get_function("nyash_array_set_h") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash_array_set_h", - fnty, - None, - ) - }); - let _ = codegen - .builder - .build_call( - callee, - &[recv_h.into(), idx_i.into(), val_i.into()], - "aset", - ) - .map_err(|e| e.to_string())?; - } - "push" => { - if args.len() != 1 { - return Err("ArrayBox.push expects 1 arg".to_string()); - } - let val_v = *vmap - .get(&args[0]) - .ok_or("array.push value missing")?; - let val_i = - match val_v { - BasicValueEnum::IntValue(iv) => iv, - BasicValueEnum::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "val_p2i") - .map_err(|e| e.to_string())?, - _ => return Err( - "array.push value must be int or handle ptr" - .to_string(), - ), - }; - let fnty = i64t.fn_type(&[i64t.into(), i64t.into()], false); - let callee = codegen - .module - .get_function("nyash_array_push_h") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash_array_push_h", - fnty, - None, - ) - }); - let _ = codegen - .builder - .build_call( - callee, - &[recv_h.into(), val_i.into()], - "apush", - ) - .map_err(|e| e.to_string())?; - } - "length" => { - if !args.is_empty() { - return Err("ArrayBox.length expects 0 arg".to_string()); - } - let fnty = i64t.fn_type(&[i64t.into()], false); - let callee = codegen - .module - .get_function("nyash_array_length_h") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash_array_length_h", - fnty, - None, - ) - }); - let call = codegen - .builder - .build_call(callee, &[recv_h.into()], "alen") - .map_err(|e| e.to_string())?; - if let Some(d) = dst { - let rv = call.try_as_basic_value().left().ok_or( - "array_length_h returned void".to_string(), - )?; - vmap.insert(*d, rv); - } - } - _ => {} - } - } - } - - // Instance field helpers: getField/setField (safe path) - if method == "getField" { - if args.len() != 1 { - return Err("getField expects 1 arg (name)".to_string()); - } - let name_v = *vmap.get(&args[0]).ok_or("getField name missing")?; - let name_p = if let BasicValueEnum::PointerValue(pv) = name_v { - pv - } else { - return Err("getField name must be pointer".to_string()); - }; - let i8p = codegen.context.ptr_type(AddressSpace::from(0)); - let fnty = i64t.fn_type(&[i64t.into(), i8p.into()], false); - let callee = codegen - .module - .get_function("nyash.instance.get_field_h") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash.instance.get_field_h", - fnty, - None, - ) - }); - let call = codegen - .builder - .build_call(callee, &[recv_h.into(), name_p.into()], "getField") - .map_err(|e| e.to_string())?; - if let Some(d) = dst { - let rv = call - .try_as_basic_value() - .left() - .ok_or("get_field returned void".to_string())?; - // rv is i64 handle; convert to i8* - let h = if let BasicValueEnum::IntValue(iv) = rv { - iv - } else { - return Err("get_field ret expected i64".to_string()); - }; - let pty = codegen.context.ptr_type(AddressSpace::from(0)); - let ptr = codegen - .builder - .build_int_to_ptr(h, pty, "gf_handle_to_ptr") - .map_err(|e| e.to_string())?; - vmap.insert(*d, ptr.into()); - } - // no early return; continue lowering - } - if method == "setField" { - if args.len() != 2 { - return Err("setField expects 2 args (name, value)".to_string()); - } - let name_v = *vmap.get(&args[0]).ok_or("setField name missing")?; - let val_v = *vmap.get(&args[1]).ok_or("setField value missing")?; - let name_p = if let BasicValueEnum::PointerValue(pv) = name_v { - pv - } else { - return Err("setField name must be pointer".to_string()); - }; - let val_h = match val_v { - BasicValueEnum::PointerValue(pv) => codegen - .builder - .build_ptr_to_int(pv, i64t, "val_p2i") - .map_err(|e| e.to_string())?, - BasicValueEnum::IntValue(iv) => iv, - _ => { - return Err( - "setField value must be handle/ptr or i64".to_string() - ) - } - }; - let i8p = codegen.context.ptr_type(AddressSpace::from(0)); - let fnty = i64t.fn_type(&[i64t.into(), i8p.into(), i64t.into()], false); - let callee = codegen - .module - .get_function("nyash.instance.set_field_h") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash.instance.set_field_h", - fnty, - None, - ) - }); - let _ = codegen - .builder - .build_call( - callee, - &[recv_h.into(), name_p.into(), val_h.into()], - "setField", - ) - .map_err(|e| e.to_string())?; - // no early return; continue lowering - } - - // General by-id invoke when method_id is available - if let Some(mid) = method_id { - // Prepare up to 4 args (i64 or f64 bits or handle) - let argc_val = i64t.const_int(args.len() as u64, false); - let mut a1 = i64t.const_zero(); - let mut a2 = i64t.const_zero(); - let mut a3 = i64t.const_zero(); - let mut a4 = i64t.const_zero(); - let get_i64 = - |vid: ValueId| -> Result { - let v = *vmap.get(&vid).ok_or("arg missing")?; - to_i64_any(codegen.context, &codegen.builder, v) - }; - if args.len() >= 1 { - a1 = get_i64(args[0])?; - } - if args.len() >= 2 { - a2 = get_i64(args[1])?; - } - if args.len() >= 3 { - a3 = get_i64(args[2])?; - } - if args.len() >= 4 { - a4 = get_i64(args[3])?; - } - // Choose return ABI by dst annotated type - let dst_ty = - dst.as_ref().and_then(|d| func.metadata.value_types.get(d)); - let use_f64_ret = matches!(dst_ty, Some(crate::mir::MirType::Float)); - if use_f64_ret { - // declare double @nyash_plugin_invoke3_f64(i64,i64,i64,i64,i64,i64) - let fnty = codegen.context.f64_type().fn_type( - &[ - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - ], - false, - ); - let callee = codegen - .module - .get_function("nyash_plugin_invoke3_f64") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash_plugin_invoke3_f64", - fnty, - None, - ) - }); - let tid = i64t.const_int(type_id as u64, true); - let midv = i64t.const_int((*mid) as u64, false); - let call = codegen - .builder - .build_call( - callee, - &[ - tid.into(), - midv.into(), - argc_val.into(), - recv_h.into(), - a1.into(), - a2.into(), - ], - "pinvoke_f64", - ) - .map_err(|e| e.to_string())?; - if let Some(d) = dst { - let rv = call - .try_as_basic_value() - .left() - .ok_or("invoke3_f64 returned void".to_string())?; - vmap.insert(*d, rv); - } - return Ok(()); - } - // For argument typing, use tagged variant to allow f64/handle - // Prepare tags for a1..a4: 5=float, 8=handle(ptr), 3=int - let mut tag1 = i64t.const_int(3, false); - let mut tag2 = i64t.const_int(3, false); - let mut tag3 = i64t.const_int(3, false); - let mut tag4 = i64t.const_int(3, false); - let classify = |vid: ValueId| -> Option { - vmap.get(&vid).map(|v| classify_tag(*v)) - }; - if args.len() >= 1 { - if let Some(t) = classify(args[0]) { - tag1 = i64t.const_int(t as u64, false); - } - } - if args.len() >= 2 { - if let Some(t) = classify(args[1]) { - tag2 = i64t.const_int(t as u64, false); - } - } - if args.len() >= 3 { - if let Some(t) = classify(args[2]) { - tag3 = i64t.const_int(t as u64, false); - } - } - if args.len() >= 4 { - if let Some(t) = classify(args[3]) { - tag4 = i64t.const_int(t as u64, false); - } - } - if args.len() <= 4 { - // Call fixed-arity tagged shim (up to 4 args) - let fnty = i64t.fn_type( - &[ - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - ], - false, - ); - let callee = codegen - .module - .get_function("nyash_plugin_invoke3_tagged_i64") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash_plugin_invoke3_tagged_i64", - fnty, - None, - ) - }); - let tid = i64t.const_int(type_id as u64, true); - let midv = i64t.const_int((*mid) as u64, false); - let call = codegen - .builder - .build_call( - callee, - &[ - tid.into(), - midv.into(), - argc_val.into(), - recv_h.into(), - a1.into(), - tag1.into(), - a2.into(), - tag2.into(), - a3.into(), - tag3.into(), - a4.into(), - tag4.into(), - ], - "pinvoke_tagged", - ) - .map_err(|e| e.to_string())?; - if let Some(d) = dst { - let rv = call - .try_as_basic_value() - .left() - .ok_or("invoke3_i64 returned void".to_string())?; - // Decide return lowering by dst annotated type - if let Some(mt) = func.metadata.value_types.get(d) { - match mt { - crate::mir::MirType::Integer - | crate::mir::MirType::Bool => { - vmap.insert(*d, rv); - } - crate::mir::MirType::Box(_) - | crate::mir::MirType::String - | crate::mir::MirType::Array(_) - | crate::mir::MirType::Future(_) - | crate::mir::MirType::Unknown => { - let h = if let BasicValueEnum::IntValue(iv) = rv { - iv - } else { - return Err( - "invoke ret expected i64".to_string() - ); - }; - let pty = codegen - .context - .ptr_type(AddressSpace::from(0)); - let ptr = codegen - .builder - .build_int_to_ptr(h, pty, "ret_handle_to_ptr") - .map_err(|e| e.to_string())?; - vmap.insert(*d, ptr.into()); - } - _ => { - vmap.insert(*d, rv); - } - } - } else { - vmap.insert(*d, rv); - } - } - } else { - // Variable-length path: build arrays of values/tags and call vector shim - let n = args.len() as u32; - // alloca [N x i64] for vals and tags - let arr_ty = i64t.array_type(n); - let vals_arr = entry_builder - .build_alloca(arr_ty, "vals_arr") - .map_err(|e| e.to_string())?; - let tags_arr = entry_builder - .build_alloca(arr_ty, "tags_arr") - .map_err(|e| e.to_string())?; - for (i, vid) in args.iter().enumerate() { - let idx = [ - codegen.context.i32_type().const_zero(), - codegen.context.i32_type().const_int(i as u64, false), - ]; - let gep_v = unsafe { - codegen - .builder - .build_in_bounds_gep( - arr_ty, - vals_arr, - &idx, - &format!("v_gep_{}", i), - ) - .map_err(|e| e.to_string())? - }; - let gep_t = unsafe { - codegen - .builder - .build_in_bounds_gep( - arr_ty, - tags_arr, - &idx, - &format!("t_gep_{}", i), - ) - .map_err(|e| e.to_string())? - }; - let vi = get_i64(*vid)?; - let tag = classify(*vid).unwrap_or(3); - let tagv = i64t.const_int(tag as u64, false); - codegen - .builder - .build_store(gep_v, vi) - .map_err(|e| e.to_string())?; - codegen - .builder - .build_store(gep_t, tagv) - .map_err(|e| e.to_string())?; - } - // cast to i64* pointers - let i64p = codegen.context.ptr_type(AddressSpace::from(0)); - let vals_ptr = codegen - .builder - .build_pointer_cast(vals_arr, i64p, "vals_ptr") - .map_err(|e| e.to_string())?; - let tags_ptr = codegen - .builder - .build_pointer_cast(tags_arr, i64p, "tags_ptr") - .map_err(|e| e.to_string())?; - // declare i64 @nyash.plugin.invoke_tagged_v_i64(i64,i64,i64,i64,i64*,i64*) - let fnty = i64t.fn_type( - &[ - i64t.into(), - i64t.into(), - i64t.into(), - i64t.into(), - i64p.into(), - i64p.into(), - ], - false, - ); - let callee = codegen - .module - .get_function("nyash.plugin.invoke_tagged_v_i64") - .unwrap_or_else(|| { - codegen.module.add_function( - "nyash.plugin.invoke_tagged_v_i64", - fnty, - None, - ) - }); - let tid = i64t.const_int(type_id as u64, true); - let midv = i64t.const_int((*mid) as u64, false); - let call = codegen - .builder - .build_call( - callee, - &[ - tid.into(), - midv.into(), - argc_val.into(), - recv_h.into(), - vals_ptr.into(), - tags_ptr.into(), - ], - "pinvoke_tagged_v", - ) - .map_err(|e| e.to_string())?; - if let Some(d) = dst { - let rv = call - .try_as_basic_value() - .left() - .ok_or("invoke_v returned void".to_string())?; - if let Some(mt) = func.metadata.value_types.get(d) { - match mt { - crate::mir::MirType::Integer - | crate::mir::MirType::Bool => { - vmap.insert(*d, rv); - } - crate::mir::MirType::Box(_) - | crate::mir::MirType::String - | crate::mir::MirType::Array(_) - | crate::mir::MirType::Future(_) - | crate::mir::MirType::Unknown => { - let h = if let BasicValueEnum::IntValue(iv) = rv { - iv - } else { - return Err( - "invoke ret expected i64".to_string() - ); - }; - let pty = codegen - .context - .ptr_type(AddressSpace::from(0)); - let ptr = codegen - .builder - .build_int_to_ptr(h, pty, "ret_handle_to_ptr") - .map_err(|e| e.to_string())?; - vmap.insert(*d, ptr.into()); - } - _ => { - vmap.insert(*d, rv); - } - } - } else { - vmap.insert(*d, rv); - } - } - } - // handled above per-branch - } else { - return Err(format!("BoxCall requires method_id for method '{}'. The method_id should be automatically injected during MIR compilation.", method)); - } } MirInstruction::ExternCall { dst, iface_name, method_name, args, effects: _ } => { instructions::lower_externcall(&codegen, func, &mut vmap, dst, iface_name, method_name, args)?;