feat(llvm): Comprehensive LLVM backend improvements by ChatGPT
Major enhancements to LLVM code generation and type handling: 1. String Operations: - Added StringBox length fast-path (length/len methods) - Converts i8* to handle when needed for len_h call - Consistent handle-based string operations 2. Array/Map Fast-paths: - ArrayBox: get/set/push/length operations - MapBox: get/set/has/size with handle-based keys - Optimized paths for common collection operations 3. Field Access: - getField/setField implementation with handle conversion - Proper i64 handle to pointer conversions 4. NewBox Improvements: - StringBox/IntegerBox pass-through optimizations - Fallback to env.box.new when type_id unavailable - Support for dynamic box creation 5. Documentation: - Added ARCHITECTURE.md for overall design - Added EXTERNCALL.md for external call specs - Added LOWERING_LLVM.md for LLVM lowering rules - Added PLUGIN_ABI.md for plugin interface 6. Type System: - Added UserBox type registration in nyash_box.toml - Consistent handle (i64) representation across system Results: More robust LLVM code generation with proper type handling 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -1112,6 +1112,58 @@ pub(super) fn lower_boxcall<'ctx>(
|
||||
}
|
||||
}
|
||||
|
||||
// String length fast-path: length/len
|
||||
if method == "length" || method == "len" {
|
||||
// Only when receiver is String/StringBox by annotation
|
||||
let is_string_recv = match func.metadata.value_types.get(box_val) {
|
||||
Some(crate::mir::MirType::String) => true,
|
||||
Some(crate::mir::MirType::Box(b)) if b == "StringBox" => true,
|
||||
_ => false,
|
||||
};
|
||||
if is_string_recv {
|
||||
let i64t = codegen.context.i64_type();
|
||||
// Ensure we have a handle: convert i8* receiver to handle when needed
|
||||
let recv_h = match recv_v {
|
||||
BVE::IntValue(h) => h,
|
||||
BVE::PointerValue(p) => {
|
||||
let fnty = i64t.fn_type(&[codegen.context.ptr_type(AddressSpace::from(0)).into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.box.from_i8_string")
|
||||
.unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty, None));
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[p.into()], "str_ptr_to_handle")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("from_i8_string returned void".to_string())?;
|
||||
if let BVE::IntValue(iv) = rv { iv } else { return Err("from_i8_string ret expected i64".to_string()); }
|
||||
}
|
||||
_ => return Err("String.length receiver type unsupported".to_string()),
|
||||
};
|
||||
// call i64 @nyash.string.len_h(i64)
|
||||
let fnty = i64t.fn_type(&[i64t.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.string.len_h")
|
||||
.unwrap_or_else(|| codegen.module.add_function("nyash.string.len_h", fnty, None));
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[recv_h.into()], "strlen_h")
|
||||
.map_err(|e| e.to_string())?;
|
||||
if let Some(d) = dst {
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("len_h returned void".to_string())?;
|
||||
vmap.insert(*d, rv);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// Array fast-paths
|
||||
if let Some(crate::mir::MirType::Box(bname)) = func.metadata.value_types.get(box_val) {
|
||||
if bname == "ArrayBox" && (method == "get" || method == "set" || method == "push" || method == "length") {
|
||||
@ -1169,6 +1221,91 @@ pub(super) fn lower_boxcall<'ctx>(
|
||||
}
|
||||
}
|
||||
|
||||
// Map fast-paths (minimal): get/set/has/size with i64 keys
|
||||
if let Some(crate::mir::MirType::Box(bname)) = func.metadata.value_types.get(box_val) {
|
||||
if bname == "MapBox" && (method == "get" || method == "set" || method == "has" || method == "size") {
|
||||
let i64t = codegen.context.i64_type();
|
||||
match method {
|
||||
"size" => {
|
||||
if !args.is_empty() { return Err("MapBox.size expects 0 arg".to_string()); }
|
||||
let fnty = i64t.fn_type(&[i64t.into()], false);
|
||||
let callee = codegen.module.get_function("nyash.map.size_h").unwrap_or_else(|| codegen.module.add_function("nyash.map.size_h", fnty, None));
|
||||
let call = codegen.builder.build_call(callee, &[recv_h.into()], "msize").map_err(|e| e.to_string())?;
|
||||
if let Some(d) = dst {
|
||||
let rv = call.try_as_basic_value().left().ok_or("map.size_h returned void".to_string())?;
|
||||
vmap.insert(*d, rv);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
"has" => {
|
||||
if args.len() != 1 { return Err("MapBox.has expects 1 arg".to_string()); }
|
||||
let key_v = *vmap.get(&args[0]).ok_or("map.has key missing")?;
|
||||
let key_i = match key_v {
|
||||
BVE::IntValue(iv) => iv,
|
||||
BVE::PointerValue(pv) => codegen.builder.build_ptr_to_int(pv, i64t, "key_p2i").map_err(|e| e.to_string())?,
|
||||
_ => return Err("map.has key must be int or handle ptr".to_string()),
|
||||
};
|
||||
let fnty = i64t.fn_type(&[i64t.into(), i64t.into()], false);
|
||||
let callee = codegen.module.get_function("nyash.map.has_h").unwrap_or_else(|| codegen.module.add_function("nyash.map.has_h", fnty, None));
|
||||
let call = codegen.builder.build_call(callee, &[recv_h.into(), key_i.into()], "mhas").map_err(|e| e.to_string())?;
|
||||
if let Some(d) = dst {
|
||||
let rv = call.try_as_basic_value().left().ok_or("map.has_h returned void".to_string())?;
|
||||
vmap.insert(*d, rv);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
"get" => {
|
||||
if args.len() != 1 { return Err("MapBox.get expects 1 arg".to_string()); }
|
||||
let key_v = *vmap.get(&args[0]).ok_or("map.get key missing")?;
|
||||
// prefer integer key path; if pointer, convert to handle and call get_hh
|
||||
let call = match key_v {
|
||||
BVE::IntValue(iv) => {
|
||||
let fnty = i64t.fn_type(&[i64t.into(), i64t.into()], false);
|
||||
let callee = codegen.module.get_function("nyash.map.get_h").unwrap_or_else(|| codegen.module.add_function("nyash.map.get_h", fnty, None));
|
||||
codegen.builder.build_call(callee, &[recv_h.into(), iv.into()], "mget").map_err(|e| e.to_string())?
|
||||
}
|
||||
BVE::PointerValue(pv) => {
|
||||
// key: i8* -> i64 handle via from_i8_string (string key)
|
||||
let fnty_conv = i64t.fn_type(&[codegen.context.ptr_type(AddressSpace::from(0)).into()], false);
|
||||
let conv = codegen.module.get_function("nyash.box.from_i8_string").unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty_conv, None));
|
||||
let kcall = codegen.builder.build_call(conv, &[pv.into()], "key_i8_to_handle").map_err(|e| e.to_string())?;
|
||||
let kh = kcall.try_as_basic_value().left().ok_or("from_i8_string returned void".to_string())?.into_int_value();
|
||||
let fnty = i64t.fn_type(&[i64t.into(), i64t.into()], false);
|
||||
let callee = codegen.module.get_function("nyash.map.get_hh").unwrap_or_else(|| codegen.module.add_function("nyash.map.get_hh", fnty, None));
|
||||
codegen.builder.build_call(callee, &[recv_h.into(), kh.into()], "mget_hh").map_err(|e| e.to_string())?
|
||||
}
|
||||
_ => return Err("map.get key must be int or pointer".to_string()),
|
||||
};
|
||||
if let Some(d) = dst {
|
||||
let rv = call.try_as_basic_value().left().ok_or("map.get returned void".to_string())?;
|
||||
vmap.insert(*d, rv);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
"set" => {
|
||||
if args.len() != 2 { return Err("MapBox.set expects 2 args (key, value)".to_string()); }
|
||||
let key_v = *vmap.get(&args[0]).ok_or("map.set key missing")?;
|
||||
let val_v = *vmap.get(&args[1]).ok_or("map.set value missing")?;
|
||||
let key_i = match key_v {
|
||||
BVE::IntValue(iv) => iv,
|
||||
BVE::PointerValue(pv) => codegen.builder.build_ptr_to_int(pv, i64t, "key_p2i").map_err(|e| e.to_string())?,
|
||||
_ => return Err("map.set key must be int or handle ptr".to_string()),
|
||||
};
|
||||
let val_i = match val_v {
|
||||
BVE::IntValue(iv) => iv,
|
||||
BVE::PointerValue(pv) => codegen.builder.build_ptr_to_int(pv, i64t, "val_p2i").map_err(|e| e.to_string())?,
|
||||
_ => return Err("map.set value must be int or handle ptr".to_string()),
|
||||
};
|
||||
let fnty = i64t.fn_type(&[i64t.into(), i64t.into(), i64t.into()], false);
|
||||
let callee = codegen.module.get_function("nyash.map.set_h").unwrap_or_else(|| codegen.module.add_function("nyash.map.set_h", fnty, None));
|
||||
let _ = codegen.builder.build_call(callee, &[recv_h.into(), key_i.into(), val_i.into()], "mset").map_err(|e| e.to_string())?;
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getField
|
||||
if method == "getField" {
|
||||
if args.len() != 1 { return Err("getField expects 1 arg (name)".to_string()); }
|
||||
|
||||
@ -224,7 +224,8 @@ impl LLVMCompiler {
|
||||
vmap.insert(*dst, ptr.into());
|
||||
}
|
||||
_ => {
|
||||
// No-arg birth via central type registry
|
||||
// No-arg birth via central type registry (preferred),
|
||||
// fallback to env.box.new(name) when type_id is unavailable.
|
||||
if !args.is_empty() {
|
||||
return Err(
|
||||
"NewBox with >2 args not yet supported in LLVM lowering"
|
||||
@ -233,35 +234,63 @@ impl LLVMCompiler {
|
||||
}
|
||||
let type_id = *box_type_ids.get(box_type).unwrap_or(&0);
|
||||
let i64t = codegen.context.i64_type();
|
||||
// declare i64 @nyash.box.birth_h(i64)
|
||||
let fn_ty = i64t.fn_type(&[i64t.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.box.birth_h")
|
||||
.unwrap_or_else(|| {
|
||||
codegen.module.add_function(
|
||||
"nyash.box.birth_h",
|
||||
fn_ty,
|
||||
None,
|
||||
)
|
||||
});
|
||||
let tid = i64t.const_int(type_id as u64, true);
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[tid.into()], "birth")
|
||||
.map_err(|e| e.to_string())?;
|
||||
// Handle is i64; represent Box as opaque i8* via inttoptr
|
||||
let h_i64 = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("birth_h returned void".to_string())?
|
||||
.into_int_value();
|
||||
let pty = codegen.context.ptr_type(AddressSpace::from(0));
|
||||
let ptr = codegen
|
||||
.builder
|
||||
.build_int_to_ptr(h_i64, pty, "handle_to_ptr")
|
||||
.map_err(|e| e.to_string())?;
|
||||
vmap.insert(*dst, ptr.into());
|
||||
if type_id != 0 {
|
||||
// declare i64 @nyash.box.birth_h(i64)
|
||||
let fn_ty = i64t.fn_type(&[i64t.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.box.birth_h")
|
||||
.unwrap_or_else(|| {
|
||||
codegen.module.add_function(
|
||||
"nyash.box.birth_h",
|
||||
fn_ty,
|
||||
None,
|
||||
)
|
||||
});
|
||||
let tid = i64t.const_int(type_id as u64, true);
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[tid.into()], "birth")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let h_i64 = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("birth_h returned void".to_string())?
|
||||
.into_int_value();
|
||||
let pty = codegen.context.ptr_type(AddressSpace::from(0));
|
||||
let ptr = codegen
|
||||
.builder
|
||||
.build_int_to_ptr(h_i64, pty, "handle_to_ptr")
|
||||
.map_err(|e| e.to_string())?;
|
||||
vmap.insert(*dst, ptr.into());
|
||||
} else {
|
||||
// Fallback: call i64 @nyash.env.box.new(i8*) with type name
|
||||
let i8p = codegen.context.ptr_type(AddressSpace::from(0));
|
||||
let fn_ty = i64t.fn_type(&[i8p.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.env.box.new")
|
||||
.unwrap_or_else(|| codegen.module.add_function("nyash.env.box.new", fn_ty, None));
|
||||
let tn = codegen
|
||||
.builder
|
||||
.build_global_string_ptr(box_type.as_str(), "box_type_name")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[tn.as_pointer_value().into()], "env_box_new")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let h_i64 = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("env.box.new returned void".to_string())?
|
||||
.into_int_value();
|
||||
let pty = codegen.context.ptr_type(AddressSpace::from(0));
|
||||
let ptr = codegen
|
||||
.builder
|
||||
.build_int_to_ptr(h_i64, pty, "handle_to_ptr")
|
||||
.map_err(|e| e.to_string())?;
|
||||
vmap.insert(*dst, ptr.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -394,6 +423,13 @@ impl LLVMCompiler {
|
||||
// and op is Add, route to NyRT concat helpers
|
||||
if let crate::mir::BinaryOp::Add = op {
|
||||
let i8p = codegen.context.ptr_type(AddressSpace::from(0));
|
||||
let is_stringish = |vid: &ValueId| -> bool {
|
||||
match func.metadata.value_types.get(vid) {
|
||||
Some(crate::mir::MirType::String) => true,
|
||||
Some(crate::mir::MirType::Box(_)) => true,
|
||||
_ => false,
|
||||
}
|
||||
};
|
||||
match (lv, rv) {
|
||||
(
|
||||
BasicValueEnum::PointerValue(lp),
|
||||
@ -425,55 +461,125 @@ impl LLVMCompiler {
|
||||
BasicValueEnum::PointerValue(lp),
|
||||
BasicValueEnum::IntValue(ri),
|
||||
) => {
|
||||
let i64t = codegen.context.i64_type();
|
||||
let fnty = i8p.fn_type(&[i8p.into(), i64t.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.string.concat_si")
|
||||
.unwrap_or_else(|| {
|
||||
codegen.module.add_function(
|
||||
"nyash.string.concat_si",
|
||||
fnty,
|
||||
None,
|
||||
)
|
||||
});
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[lp.into(), ri.into()], "concat_si")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("concat_si returned void".to_string())?;
|
||||
vmap.insert(*dst, rv);
|
||||
handled_concat = true;
|
||||
// Minimal fallback: if both sides are annotated String/Box, convert ptr->handle and use concat_hh
|
||||
if is_stringish(lhs) && is_stringish(rhs) {
|
||||
let i64t = codegen.context.i64_type();
|
||||
// from_i8_string: i64(i8*)
|
||||
let fnty_conv = i64t.fn_type(&[i8p.into()], false);
|
||||
let conv = codegen
|
||||
.module
|
||||
.get_function("nyash.box.from_i8_string")
|
||||
.unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty_conv, None));
|
||||
let call_c = codegen
|
||||
.builder
|
||||
.build_call(conv, &[lp.into()], "lhs_i8_to_handle")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let lh = call_c
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("from_i8_string returned void".to_string())?
|
||||
.into_int_value();
|
||||
// concat_hh: i64(i64,i64)
|
||||
let fnty_hh = i64t.fn_type(&[i64t.into(), i64t.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.string.concat_hh")
|
||||
.unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_hh", fnty_hh, None));
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[lh.into(), ri.into()], "concat_hh")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("concat_hh returned void".to_string())?;
|
||||
vmap.insert(*dst, rv);
|
||||
handled_concat = true;
|
||||
} else {
|
||||
let i64t = codegen.context.i64_type();
|
||||
let fnty = i8p.fn_type(&[i8p.into(), i64t.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.string.concat_si")
|
||||
.unwrap_or_else(|| {
|
||||
codegen.module.add_function(
|
||||
"nyash.string.concat_si",
|
||||
fnty,
|
||||
None,
|
||||
)
|
||||
});
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[lp.into(), ri.into()], "concat_si")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("concat_si returned void".to_string())?;
|
||||
vmap.insert(*dst, rv);
|
||||
handled_concat = true;
|
||||
}
|
||||
}
|
||||
(
|
||||
BasicValueEnum::IntValue(li),
|
||||
BasicValueEnum::PointerValue(rp),
|
||||
) => {
|
||||
let i64t = codegen.context.i64_type();
|
||||
let fnty = i8p.fn_type(&[i64t.into(), i8p.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.string.concat_is")
|
||||
.unwrap_or_else(|| {
|
||||
codegen.module.add_function(
|
||||
"nyash.string.concat_is",
|
||||
fnty,
|
||||
None,
|
||||
)
|
||||
});
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[li.into(), rp.into()], "concat_is")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("concat_is returned void".to_string())?;
|
||||
vmap.insert(*dst, rv);
|
||||
handled_concat = true;
|
||||
// Minimal fallback: if both sides are annotated String/Box, convert ptr->handle and use concat_hh
|
||||
if is_stringish(lhs) && is_stringish(rhs) {
|
||||
let i64t = codegen.context.i64_type();
|
||||
let fnty_conv = i64t.fn_type(&[i8p.into()], false);
|
||||
let conv = codegen
|
||||
.module
|
||||
.get_function("nyash.box.from_i8_string")
|
||||
.unwrap_or_else(|| codegen.module.add_function("nyash.box.from_i8_string", fnty_conv, None));
|
||||
let call_c = codegen
|
||||
.builder
|
||||
.build_call(conv, &[rp.into()], "rhs_i8_to_handle")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rh = call_c
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("from_i8_string returned void".to_string())?
|
||||
.into_int_value();
|
||||
let fnty_hh = i64t.fn_type(&[i64t.into(), i64t.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.string.concat_hh")
|
||||
.unwrap_or_else(|| codegen.module.add_function("nyash.string.concat_hh", fnty_hh, None));
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[li.into(), rh.into()], "concat_hh")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("concat_hh returned void".to_string())?;
|
||||
vmap.insert(*dst, rv);
|
||||
handled_concat = true;
|
||||
} else {
|
||||
let i64t = codegen.context.i64_type();
|
||||
let fnty = i8p.fn_type(&[i64t.into(), i8p.into()], false);
|
||||
let callee = codegen
|
||||
.module
|
||||
.get_function("nyash.string.concat_is")
|
||||
.unwrap_or_else(|| {
|
||||
codegen.module.add_function(
|
||||
"nyash.string.concat_is",
|
||||
fnty,
|
||||
None,
|
||||
)
|
||||
});
|
||||
let call = codegen
|
||||
.builder
|
||||
.build_call(callee, &[li.into(), rp.into()], "concat_is")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rv = call
|
||||
.try_as_basic_value()
|
||||
.left()
|
||||
.ok_or("concat_is returned void".to_string())?;
|
||||
vmap.insert(*dst, rv);
|
||||
handled_concat = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user