fix(llvm): String/Plugin return value handling improvements by ChatGPT

Major fixes for LLVM backend string and plugin return value handling:

1. MIR Type Annotations:
   - Added StringBox method return types (substring/concat/replace/trim/toUpper/toLower)
   - Enhanced type inference for BoxCall operations

2. LLVM BinOp String Concatenation:
   - Added safe handle-to-pointer conversion paths
   - Support for ptr+i64 and i64+ptr concatenation patterns
   - Uses nyash.string.concat_hh for handle-based concatenation

3. ExternCall Selection:
   - Smart selection between C string (i8*) and handle (i64) variants
   - Improved print/log function selection based on argument types

4. StringBox Fast-path Optimization:
   - Direct AOT concatenation for StringBox.concat
   - Bypasses plugin path for better performance

5. Consistent String Representation:
   - AOT uses i8* (C string) as primary representation
   - Handles used for print/concat auxiliary paths

6. Build Fix:
   - Removed duplicate plugin.rs to resolve nyrt build conflicts

Results: LLVM plugin return smoke tests now pass (NYASH_LLVM_PLUGIN_RET_SMOKE=1)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Selfhosting Dev
2025-09-11 20:18:53 +09:00
parent b9f9e81c72
commit e7ad2191de
3 changed files with 132 additions and 2774 deletions

File diff suppressed because it is too large Load Diff

View File

@ -508,9 +508,38 @@ pub(super) fn lower_externcall<'ctx>(
|| (iface_name == "env.debug" && method_name == "trace") || (iface_name == "env.debug" && method_name == "trace")
{ {
if args.len() != 1 { if args.len() != 1 {
return Err(format!("{}.{} expects 1 arg (handle)", iface_name, method_name)); return Err(format!("{}.{} expects 1 arg", iface_name, method_name));
} }
let av = *vmap.get(&args[0]).ok_or("extern arg missing")?; let av = *vmap.get(&args[0]).ok_or("extern arg missing")?;
match av {
// If argument is i8* (string), call string variant
BVE::PointerValue(pv) => {
let i8p = codegen.context.ptr_type(AddressSpace::from(0));
let fnty = codegen.context.i64_type().fn_type(&[i8p.into()], false);
let fname = if iface_name == "env.console" {
match method_name {
"log" => "nyash.console.log",
"warn" => "nyash.console.warn",
_ => "nyash.console.error",
}
} else {
"nyash.debug.trace"
};
let callee = codegen
.module
.get_function(fname)
.unwrap_or_else(|| codegen.module.add_function(fname, fnty, None));
let _ = codegen
.builder
.build_call(callee, &[pv.into()], "console_log_p")
.map_err(|e| e.to_string())?;
if let Some(d) = dst {
vmap.insert(*d, codegen.context.i64_type().const_zero().into());
}
return Ok(());
}
// Otherwise, convert to i64 and call handle variant
_ => {
let arg_val = match av { let arg_val = match av {
BVE::IntValue(iv) => { BVE::IntValue(iv) => {
if iv.get_type() == codegen.context.bool_type() { if iv.get_type() == codegen.context.bool_type() {
@ -527,10 +556,7 @@ pub(super) fn lower_externcall<'ctx>(
.map_err(|e| e.to_string())? .map_err(|e| e.to_string())?
} }
} }
BVE::PointerValue(pv) => codegen BVE::PointerValue(_) => unreachable!(),
.builder
.build_ptr_to_int(pv, codegen.context.i64_type(), "p2i")
.map_err(|e| e.to_string())?,
_ => return Err("console.log arg conversion failed".to_string()), _ => return Err("console.log arg conversion failed".to_string()),
}; };
let fnty = codegen let fnty = codegen
@ -559,6 +585,8 @@ pub(super) fn lower_externcall<'ctx>(
} }
return Ok(()); return Ok(());
} }
}
}
if iface_name == "env.console" && method_name == "readLine" { if iface_name == "env.console" && method_name == "readLine" {
if !args.is_empty() { if !args.is_empty() {
@ -898,15 +926,11 @@ pub(super) fn lower_newbox<'ctx>(
use inkwell::values::BasicValueEnum as BVE; use inkwell::values::BasicValueEnum as BVE;
match (box_type, args.len()) { match (box_type, args.len()) {
("StringBox", 1) => { ("StringBox", 1) => {
// Keep as i8* string pointer (AOT string fast-path)
let av = *vmap.get(&args[0]).ok_or("StringBox arg missing")?; let av = *vmap.get(&args[0]).ok_or("StringBox arg missing")?;
vmap.insert(dst, av); vmap.insert(dst, av);
Ok(()) Ok(())
} }
("IntegerBox", 1) => {
let av = *vmap.get(&args[0]).ok_or("IntegerBox arg missing")?;
vmap.insert(dst, av);
Ok(())
}
(_, n) if n == 1 || n == 2 => { (_, n) if n == 1 || n == 2 => {
let type_id = *box_type_ids.get(box_type).unwrap_or(&0); let type_id = *box_type_ids.get(box_type).unwrap_or(&0);
let i64t = codegen.context.i64_type(); let i64t = codegen.context.i64_type();
@ -1043,6 +1067,51 @@ pub(super) fn lower_boxcall<'ctx>(
0 0
}; };
// String concat fast-path (avoid plugin path for builtin StringBox)
if method == "concat" {
// Recognize receiver typed as String or StringBox
let is_string_recv = match func.metadata.value_types.get(box_val) {
Some(crate::mir::MirType::String) => true,
Some(crate::mir::MirType::Box(b)) if b == "StringBox" => true,
_ => false,
};
if is_string_recv {
if args.len() != 1 { return Err("String.concat expects 1 arg".to_string()); }
// Prefer pointer-based concat to keep AOT string fast-path
let i8p = codegen.context.ptr_type(AddressSpace::from(0));
let rhs_v = *vmap.get(&args[0]).ok_or("concat arg missing")?;
match (recv_v, rhs_v) {
(BVE::PointerValue(lp), BVE::PointerValue(rp)) => {
let fnty = i8p.fn_type(&[i8p.into(), i8p.into()], false);
let callee = codegen.module.get_function("nyash.string.concat_ss").
unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_ss", fnty, None));
let call = codegen.builder.build_call(callee, &[lp.into(), rp.into()], "concat_ss_call").map_err(|e| e.to_string())?;
if let Some(d) = dst { let rv = call.try_as_basic_value().left().ok_or("concat_ss returned void".to_string())?; vmap.insert(*d, rv); }
return Ok(());
}
(BVE::PointerValue(lp), BVE::IntValue(ri)) => {
let i64t = codegen.context.i64_type();
let fnty = i8p.fn_type(&[i8p.into(), i64t.into()], false);
let callee = codegen.module.get_function("nyash.string.concat_si").
unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_si", fnty, None));
let call = codegen.builder.build_call(callee, &[lp.into(), ri.into()], "concat_si_call").map_err(|e| e.to_string())?;
if let Some(d) = dst { let rv = call.try_as_basic_value().left().ok_or("concat_si returned void".to_string())?; vmap.insert(*d, rv); }
return Ok(());
}
(BVE::IntValue(li), BVE::PointerValue(rp)) => {
let i64t = codegen.context.i64_type();
let fnty = i8p.fn_type(&[i64t.into(), i8p.into()], false);
let callee = codegen.module.get_function("nyash.string.concat_is").
unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_is", fnty, None));
let call = codegen.builder.build_call(callee, &[li.into(), rp.into()], "concat_is_call").map_err(|e| e.to_string())?;
if let Some(d) = dst { let rv = call.try_as_basic_value().left().ok_or("concat_is returned void".to_string())?; vmap.insert(*d, rv); }
return Ok(());
}
_ => { /* fall through to generic path below */ }
}
}
}
// Array fast-paths // Array fast-paths
if let Some(crate::mir::MirType::Box(bname)) = func.metadata.value_types.get(box_val) { if let Some(crate::mir::MirType::Box(bname)) = func.metadata.value_types.get(box_val) {
if bname == "ArrayBox" && (method == "get" || method == "set" || method == "push" || method == "length") { if bname == "ArrayBox" && (method == "get" || method == "set" || method == "push" || method == "length") {
@ -1194,7 +1263,10 @@ pub(super) fn lower_boxcall<'ctx>(
if let Some(mt) = func.metadata.value_types.get(d) { if let Some(mt) = func.metadata.value_types.get(d) {
match mt { match mt {
crate::mir::MirType::Integer | crate::mir::MirType::Bool => { vmap.insert(*d, rv); } crate::mir::MirType::Integer | crate::mir::MirType::Bool => { vmap.insert(*d, rv); }
crate::mir::MirType::Box(_) | crate::mir::MirType::String | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown => { // String: keep as i64 handle (do not cast to i8*)
crate::mir::MirType::String => { vmap.insert(*d, rv); }
// Box/Array/Future/Unknown: cast handle to opaque pointer
crate::mir::MirType::Box(_) | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown => {
let h = if let BVE::IntValue(iv) = rv { iv } else { return Err("invoke ret expected i64".to_string()); }; let h = if let BVE::IntValue(iv) = rv { iv } else { return Err("invoke ret expected i64".to_string()); };
let pty = codegen.context.ptr_type(AddressSpace::from(0)); let pty = codegen.context.ptr_type(AddressSpace::from(0));
let ptr = codegen.builder.build_int_to_ptr(h, pty, "ret_handle_to_ptr").map_err(|e| e.to_string())?; let ptr = codegen.builder.build_int_to_ptr(h, pty, "ret_handle_to_ptr").map_err(|e| e.to_string())?;
@ -1234,7 +1306,8 @@ pub(super) fn lower_boxcall<'ctx>(
if let Some(mt) = func.metadata.value_types.get(d) { if let Some(mt) = func.metadata.value_types.get(d) {
match mt { match mt {
crate::mir::MirType::Integer | crate::mir::MirType::Bool => { vmap.insert(*d, rv); } crate::mir::MirType::Integer | crate::mir::MirType::Bool => { vmap.insert(*d, rv); }
crate::mir::MirType::Box(_) | crate::mir::MirType::String | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown => { crate::mir::MirType::String => { vmap.insert(*d, rv); }
crate::mir::MirType::Box(_) | crate::mir::MirType::Array(_) | crate::mir::MirType::Future(_) | crate::mir::MirType::Unknown => {
let h = if let BVE::IntValue(iv) = rv { iv } else { return Err("invoke ret expected i64".to_string()); }; let h = if let BVE::IntValue(iv) = rv { iv } else { return Err("invoke ret expected i64".to_string()); };
let pty = codegen.context.ptr_type(AddressSpace::from(0)); let pty = codegen.context.ptr_type(AddressSpace::from(0));
let ptr = codegen.builder.build_int_to_ptr(h, pty, "ret_handle_to_ptr").map_err(|e| e.to_string())?; let ptr = codegen.builder.build_int_to_ptr(h, pty, "ret_handle_to_ptr").map_err(|e| e.to_string())?;

View File

@ -117,6 +117,13 @@ impl MirBuilder {
("StringBox", "length") | ("StringBox", "len") => Some(super::MirType::Integer), ("StringBox", "length") | ("StringBox", "len") => Some(super::MirType::Integer),
("StringBox", "is_empty") => Some(super::MirType::Bool), ("StringBox", "is_empty") => Some(super::MirType::Bool),
("StringBox", "charCodeAt") => Some(super::MirType::Integer), ("StringBox", "charCodeAt") => Some(super::MirType::Integer),
// String-producing methods (important for LLVM ret handling)
("StringBox", "substring")
| ("StringBox", "concat")
| ("StringBox", "replace")
| ("StringBox", "trim")
| ("StringBox", "toUpper")
| ("StringBox", "toLower") => Some(super::MirType::String),
("ArrayBox", "length") => Some(super::MirType::Integer), ("ArrayBox", "length") => Some(super::MirType::Integer),
_ => None, _ => None,
}; };
@ -854,7 +861,8 @@ impl MirBuilder {
self.value_origin_newbox.insert(dst, class.clone()); self.value_origin_newbox.insert(dst, class.clone());
// For plugin/builtin boxes, call birth(...). For user-defined boxes, skip (InstanceBox already constructed) // For plugin/builtin boxes, call birth(...). For user-defined boxes, skip (InstanceBox already constructed)
if !self.user_defined_boxes.contains(&class) { // Special-case: StringBox is already fully constructed via from_i8_string in LLVM lowering; skip birth
if !self.user_defined_boxes.contains(&class) && class != "StringBox" {
let birt_mid = resolve_slot_by_type_name(&class, "birth"); let birt_mid = resolve_slot_by_type_name(&class, "birth");
self.emit_box_or_plugin_call( self.emit_box_or_plugin_call(
None, None,