Implement basic WASM string constant support
Co-authored-by: moe-charm <217100418+moe-charm@users.noreply.github.com>
This commit is contained in:
@ -13,6 +13,7 @@ use std::collections::HashMap;
|
|||||||
pub struct WasmModule {
|
pub struct WasmModule {
|
||||||
pub imports: Vec<String>,
|
pub imports: Vec<String>,
|
||||||
pub memory: String,
|
pub memory: String,
|
||||||
|
pub data_segments: Vec<String>,
|
||||||
pub globals: Vec<String>,
|
pub globals: Vec<String>,
|
||||||
pub functions: Vec<String>,
|
pub functions: Vec<String>,
|
||||||
pub exports: Vec<String>,
|
pub exports: Vec<String>,
|
||||||
@ -23,6 +24,7 @@ impl WasmModule {
|
|||||||
Self {
|
Self {
|
||||||
imports: Vec::new(),
|
imports: Vec::new(),
|
||||||
memory: String::new(),
|
memory: String::new(),
|
||||||
|
data_segments: Vec::new(),
|
||||||
globals: Vec::new(),
|
globals: Vec::new(),
|
||||||
functions: Vec::new(),
|
functions: Vec::new(),
|
||||||
exports: Vec::new(),
|
exports: Vec::new(),
|
||||||
@ -44,6 +46,11 @@ impl WasmModule {
|
|||||||
wat.push_str(&format!(" {}\n", self.memory));
|
wat.push_str(&format!(" {}\n", self.memory));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add data segments (must come after memory)
|
||||||
|
for data_segment in &self.data_segments {
|
||||||
|
wat.push_str(&format!(" {}\n", data_segment));
|
||||||
|
}
|
||||||
|
|
||||||
// Add globals
|
// Add globals
|
||||||
for global in &self.globals {
|
for global in &self.globals {
|
||||||
wat.push_str(&format!(" {}\n", global));
|
wat.push_str(&format!(" {}\n", global));
|
||||||
@ -69,6 +76,9 @@ pub struct WasmCodegen {
|
|||||||
/// Current function context for local variable management
|
/// Current function context for local variable management
|
||||||
current_locals: HashMap<ValueId, u32>,
|
current_locals: HashMap<ValueId, u32>,
|
||||||
next_local_index: u32,
|
next_local_index: u32,
|
||||||
|
/// String literals and their data segment offsets
|
||||||
|
string_literals: HashMap<String, u32>,
|
||||||
|
next_data_offset: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WasmCodegen {
|
impl WasmCodegen {
|
||||||
@ -76,6 +86,8 @@ impl WasmCodegen {
|
|||||||
Self {
|
Self {
|
||||||
current_locals: HashMap::new(),
|
current_locals: HashMap::new(),
|
||||||
next_local_index: 0,
|
next_local_index: 0,
|
||||||
|
string_literals: HashMap::new(),
|
||||||
|
next_data_offset: 0x1000, // Start data after initial heap space
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,6 +126,9 @@ impl WasmCodegen {
|
|||||||
wasm_module.functions.push(wasm_function);
|
wasm_module.functions.push(wasm_function);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add string literal data segments
|
||||||
|
wasm_module.data_segments.extend(self.generate_data_segments());
|
||||||
|
|
||||||
// Add main function export if it exists
|
// Add main function export if it exists
|
||||||
if mir_module.functions.contains_key("main") {
|
if mir_module.functions.contains_key("main") {
|
||||||
wasm_module.exports.push("(export \"main\" (func $main))".to_string());
|
wasm_module.exports.push("(export \"main\" (func $main))".to_string());
|
||||||
@ -188,7 +203,7 @@ impl WasmCodegen {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Generate WASM instructions for a basic block
|
/// Generate WASM instructions for a basic block
|
||||||
fn generate_basic_block(&self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result<Vec<String>, WasmError> {
|
fn generate_basic_block(&mut self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result<Vec<String>, WasmError> {
|
||||||
let block = mir_function.blocks.get(&block_id)
|
let block = mir_function.blocks.get(&block_id)
|
||||||
.ok_or_else(|| WasmError::CodegenError(format!("Basic block {:?} not found", block_id)))?;
|
.ok_or_else(|| WasmError::CodegenError(format!("Basic block {:?} not found", block_id)))?;
|
||||||
|
|
||||||
@ -210,7 +225,7 @@ impl WasmCodegen {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Generate WASM instructions for a single MIR instruction
|
/// Generate WASM instructions for a single MIR instruction
|
||||||
fn generate_instruction(&self, instruction: &MirInstruction) -> Result<Vec<String>, WasmError> {
|
fn generate_instruction(&mut self, instruction: &MirInstruction) -> Result<Vec<String>, WasmError> {
|
||||||
match instruction {
|
match instruction {
|
||||||
// Phase 8.2 PoC1: Basic operations
|
// Phase 8.2 PoC1: Basic operations
|
||||||
MirInstruction::Const { dst, value } => {
|
MirInstruction::Const { dst, value } => {
|
||||||
@ -348,11 +363,20 @@ impl WasmCodegen {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Generate constant loading
|
/// Generate constant loading
|
||||||
fn generate_const(&self, dst: ValueId, value: &ConstValue) -> Result<Vec<String>, WasmError> {
|
fn generate_const(&mut self, dst: ValueId, value: &ConstValue) -> Result<Vec<String>, WasmError> {
|
||||||
let const_instruction = match value {
|
let const_instruction = match value {
|
||||||
ConstValue::Integer(n) => format!("i32.const {}", n),
|
ConstValue::Integer(n) => format!("i32.const {}", n),
|
||||||
ConstValue::Bool(b) => format!("i32.const {}", if *b { 1 } else { 0 }),
|
ConstValue::Bool(b) => format!("i32.const {}", if *b { 1 } else { 0 }),
|
||||||
ConstValue::Void => "i32.const 0".to_string(),
|
ConstValue::Void => "i32.const 0".to_string(),
|
||||||
|
ConstValue::String(s) => {
|
||||||
|
// Register the string literal and get its offset
|
||||||
|
let data_offset = self.register_string_literal(s);
|
||||||
|
let string_len = s.len() as u32;
|
||||||
|
|
||||||
|
// Generate code to allocate a StringBox and return its pointer
|
||||||
|
// This is more complex and will need StringBox allocation
|
||||||
|
return self.generate_string_box_const(dst, data_offset, string_len);
|
||||||
|
},
|
||||||
_ => return Err(WasmError::UnsupportedInstruction(
|
_ => return Err(WasmError::UnsupportedInstruction(
|
||||||
format!("Unsupported constant type: {:?}", value)
|
format!("Unsupported constant type: {:?}", value)
|
||||||
)),
|
)),
|
||||||
@ -417,6 +441,35 @@ impl WasmCodegen {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate StringBox allocation for a string constant
|
||||||
|
fn generate_string_box_const(&self, dst: ValueId, data_offset: u32, string_len: u32) -> Result<Vec<String>, WasmError> {
|
||||||
|
// Allocate a StringBox using the StringBox allocator
|
||||||
|
// StringBox layout: [type_id:0x1001][ref_count:1][field_count:2][data_ptr:offset][length:len]
|
||||||
|
Ok(vec![
|
||||||
|
// Call StringBox allocator function
|
||||||
|
"call $alloc_stringbox".to_string(),
|
||||||
|
// Store the result (StringBox pointer) in local variable
|
||||||
|
format!("local.set ${}", self.get_local_index(dst)?),
|
||||||
|
|
||||||
|
// Initialize StringBox fields
|
||||||
|
// Get StringBox pointer back
|
||||||
|
format!("local.get ${}", self.get_local_index(dst)?),
|
||||||
|
// Set data_ptr field (offset 12 from StringBox pointer)
|
||||||
|
"i32.const 12".to_string(),
|
||||||
|
"i32.add".to_string(),
|
||||||
|
format!("i32.const {}", data_offset),
|
||||||
|
"i32.store".to_string(),
|
||||||
|
|
||||||
|
// Get StringBox pointer again
|
||||||
|
format!("local.get ${}", self.get_local_index(dst)?),
|
||||||
|
// Set length field (offset 16 from StringBox pointer)
|
||||||
|
"i32.const 16".to_string(),
|
||||||
|
"i32.add".to_string(),
|
||||||
|
format!("i32.const {}", string_len),
|
||||||
|
"i32.store".to_string(),
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
/// Generate print instruction (calls env.print import)
|
/// Generate print instruction (calls env.print import)
|
||||||
fn generate_print(&self, value: ValueId) -> Result<Vec<String>, WasmError> {
|
fn generate_print(&self, value: ValueId) -> Result<Vec<String>, WasmError> {
|
||||||
Ok(vec![
|
Ok(vec![
|
||||||
@ -425,6 +478,44 @@ impl WasmCodegen {
|
|||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Register a string literal and return its data offset
|
||||||
|
fn register_string_literal(&mut self, string: &str) -> u32 {
|
||||||
|
if let Some(&offset) = self.string_literals.get(string) {
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
let offset = self.next_data_offset;
|
||||||
|
let string_bytes = string.as_bytes();
|
||||||
|
self.string_literals.insert(string.to_string(), offset);
|
||||||
|
self.next_data_offset += string_bytes.len() as u32;
|
||||||
|
|
||||||
|
offset
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate data segments for all registered string literals
|
||||||
|
fn generate_data_segments(&self) -> Vec<String> {
|
||||||
|
let mut segments = Vec::new();
|
||||||
|
|
||||||
|
for (string, &offset) in &self.string_literals {
|
||||||
|
let string_bytes = string.as_bytes();
|
||||||
|
|
||||||
|
// Convert to hex-escaped string for WAT
|
||||||
|
let byte_string = string_bytes.iter()
|
||||||
|
.map(|b| format!("\\{:02x}", b))
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
let data_segment = format!(
|
||||||
|
"(data (i32.const {}) \"{}\")",
|
||||||
|
offset,
|
||||||
|
byte_string
|
||||||
|
);
|
||||||
|
|
||||||
|
segments.push(data_segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
segments
|
||||||
|
}
|
||||||
|
|
||||||
/// Get WASM local variable index for ValueId
|
/// Get WASM local variable index for ValueId
|
||||||
fn get_local_index(&self, value_id: ValueId) -> Result<u32, WasmError> {
|
fn get_local_index(&self, value_id: ValueId) -> Result<u32, WasmError> {
|
||||||
self.current_locals.get(&value_id)
|
self.current_locals.get(&value_id)
|
||||||
@ -452,7 +543,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_constant_generation() {
|
fn test_constant_generation() {
|
||||||
let codegen = WasmCodegen::new();
|
let mut codegen = WasmCodegen::new();
|
||||||
let dst = ValueId::new(0);
|
let dst = ValueId::new(0);
|
||||||
|
|
||||||
let result = codegen.generate_const(dst, &ConstValue::Integer(42));
|
let result = codegen.generate_const(dst, &ConstValue::Integer(42));
|
||||||
|
|||||||
@ -43,7 +43,14 @@ impl RuntimeImports {
|
|||||||
result: None,
|
result: None,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Future: env.print_string for string output
|
// env.print_str for string debugging (ptr, len)
|
||||||
|
self.imports.push(ImportFunction {
|
||||||
|
module: "env".to_string(),
|
||||||
|
name: "print_str".to_string(),
|
||||||
|
params: vec!["i32".to_string(), "i32".to_string()],
|
||||||
|
result: None,
|
||||||
|
});
|
||||||
|
|
||||||
// Future: env.file_read, env.file_write for file I/O
|
// Future: env.file_read, env.file_write for file I/O
|
||||||
// Future: env.http_request for network access
|
// Future: env.http_request for network access
|
||||||
}
|
}
|
||||||
|
|||||||
164
tests/wasm_string_constants.rs
Normal file
164
tests/wasm_string_constants.rs
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
/*!
|
||||||
|
* WASM String Constants Test - Validates Issue #65 implementation
|
||||||
|
*
|
||||||
|
* Tests string constant support in WASM backend:
|
||||||
|
* - ConstValue::String handling in generate_const
|
||||||
|
* - Data segment generation for string literals
|
||||||
|
* - StringBox creation with proper layout
|
||||||
|
* - WAT generation includes data segments and string allocation
|
||||||
|
*/
|
||||||
|
|
||||||
|
use nyash_rust::mir::{
|
||||||
|
MirModule, MirFunction, FunctionSignature, MirType, EffectMask,
|
||||||
|
BasicBlock, BasicBlockId, ValueId, MirInstruction, ConstValue
|
||||||
|
};
|
||||||
|
use nyash_rust::backend::wasm::WasmBackend;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_wasm_string_constant_basic() {
|
||||||
|
// Build MIR equivalent to:
|
||||||
|
// function main() {
|
||||||
|
// %str = const "Hello, WASM!"
|
||||||
|
// return %str // Should return StringBox pointer
|
||||||
|
// }
|
||||||
|
|
||||||
|
let mut backend = WasmBackend::new();
|
||||||
|
let mir_module = build_string_const_mir_module();
|
||||||
|
|
||||||
|
// Generate WAT text for debugging
|
||||||
|
let wat_result = backend.compile_to_wat(mir_module.clone());
|
||||||
|
assert!(wat_result.is_ok(), "WAT generation should succeed for string constants");
|
||||||
|
|
||||||
|
let wat_text = wat_result.unwrap();
|
||||||
|
|
||||||
|
// Verify WAT contains expected elements for string support
|
||||||
|
assert!(wat_text.contains("(module"), "Should contain module declaration");
|
||||||
|
assert!(wat_text.contains("memory"), "Should contain memory declaration");
|
||||||
|
assert!(wat_text.contains("data"), "Should contain data segment for string literal");
|
||||||
|
assert!(wat_text.contains("\\48\\65\\6c\\6c\\6f"), "Should contain UTF-8 bytes for 'Hello'");
|
||||||
|
assert!(wat_text.contains("$alloc_stringbox"), "Should contain StringBox allocator");
|
||||||
|
assert!(wat_text.contains("print_str"), "Should contain print_str import");
|
||||||
|
|
||||||
|
// Verify string literal is properly embedded
|
||||||
|
// (The assertion for UTF-8 bytes is above)
|
||||||
|
|
||||||
|
// Compile to WASM binary
|
||||||
|
let wasm_result = backend.compile_module(mir_module);
|
||||||
|
if let Err(e) = &wasm_result {
|
||||||
|
println!("WASM compilation error: {}", e);
|
||||||
|
println!("Generated WAT:\n{}", wat_text);
|
||||||
|
}
|
||||||
|
assert!(wasm_result.is_ok(), "WASM compilation should succeed for string constants");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_wasm_string_constant_multiple() {
|
||||||
|
// Test multiple string constants to verify data segment management
|
||||||
|
// function main() {
|
||||||
|
// %str1 = const "First"
|
||||||
|
// %str2 = const "Second"
|
||||||
|
// %str3 = const "First" // Duplicate should reuse data segment
|
||||||
|
// return %str1
|
||||||
|
// }
|
||||||
|
|
||||||
|
let mut backend = WasmBackend::new();
|
||||||
|
let mir_module = build_multiple_string_const_mir_module();
|
||||||
|
|
||||||
|
let wat_result = backend.compile_to_wat(mir_module.clone());
|
||||||
|
assert!(wat_result.is_ok(), "WAT generation should succeed for multiple strings");
|
||||||
|
|
||||||
|
let wat_text = wat_result.unwrap();
|
||||||
|
|
||||||
|
// Should contain both unique strings (in hex format)
|
||||||
|
assert!(wat_text.contains("\\46\\69\\72\\73\\74"), "Should contain 'First' string in hex");
|
||||||
|
assert!(wat_text.contains("\\53\\65\\63\\6f\\6e\\64"), "Should contain 'Second' string in hex");
|
||||||
|
|
||||||
|
// Should have 2 data segments (First and Second, duplicate First reused)
|
||||||
|
let data_count = wat_text.matches("(data").count();
|
||||||
|
assert_eq!(data_count, 2, "Should have exactly 2 data segments for 2 unique strings");
|
||||||
|
|
||||||
|
let wasm_result = backend.compile_module(mir_module);
|
||||||
|
assert!(wasm_result.is_ok(), "WASM compilation should succeed for multiple strings");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a MIR module with a single string constant
|
||||||
|
fn build_string_const_mir_module() -> MirModule {
|
||||||
|
let mut module = MirModule::new("test_string_const".to_string());
|
||||||
|
|
||||||
|
// Create main function signature
|
||||||
|
let main_signature = FunctionSignature {
|
||||||
|
name: "main".to_string(),
|
||||||
|
params: vec![],
|
||||||
|
return_type: MirType::Integer, // StringBox pointer as i32
|
||||||
|
effects: EffectMask::PURE,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create basic block
|
||||||
|
let entry_block = BasicBlockId::new(0);
|
||||||
|
let mut main_function = MirFunction::new(main_signature, entry_block);
|
||||||
|
let mut block = BasicBlock::new(entry_block);
|
||||||
|
|
||||||
|
// %str = const "Hello, WASM!"
|
||||||
|
let str_value = ValueId::new(0);
|
||||||
|
block.instructions.push(MirInstruction::Const {
|
||||||
|
dst: str_value,
|
||||||
|
value: ConstValue::String("Hello, WASM!".to_string()),
|
||||||
|
});
|
||||||
|
|
||||||
|
// return %str
|
||||||
|
block.terminator = Some(MirInstruction::Return {
|
||||||
|
value: Some(str_value),
|
||||||
|
});
|
||||||
|
|
||||||
|
main_function.blocks.insert(entry_block, block);
|
||||||
|
|
||||||
|
module.functions.insert("main".to_string(), main_function);
|
||||||
|
module
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a MIR module with multiple string constants
|
||||||
|
fn build_multiple_string_const_mir_module() -> MirModule {
|
||||||
|
let mut module = MirModule::new("test_multiple_strings".to_string());
|
||||||
|
|
||||||
|
let main_signature = FunctionSignature {
|
||||||
|
name: "main".to_string(),
|
||||||
|
params: vec![],
|
||||||
|
return_type: MirType::Integer,
|
||||||
|
effects: EffectMask::PURE,
|
||||||
|
};
|
||||||
|
|
||||||
|
let entry_block = BasicBlockId::new(0);
|
||||||
|
let mut main_function = MirFunction::new(main_signature, entry_block);
|
||||||
|
let mut block = BasicBlock::new(entry_block);
|
||||||
|
|
||||||
|
// %str1 = const "First"
|
||||||
|
let str1_value = ValueId::new(0);
|
||||||
|
block.instructions.push(MirInstruction::Const {
|
||||||
|
dst: str1_value,
|
||||||
|
value: ConstValue::String("First".to_string()),
|
||||||
|
});
|
||||||
|
|
||||||
|
// %str2 = const "Second"
|
||||||
|
let str2_value = ValueId::new(1);
|
||||||
|
block.instructions.push(MirInstruction::Const {
|
||||||
|
dst: str2_value,
|
||||||
|
value: ConstValue::String("Second".to_string()),
|
||||||
|
});
|
||||||
|
|
||||||
|
// %str3 = const "First" (duplicate)
|
||||||
|
let str3_value = ValueId::new(2);
|
||||||
|
block.instructions.push(MirInstruction::Const {
|
||||||
|
dst: str3_value,
|
||||||
|
value: ConstValue::String("First".to_string()),
|
||||||
|
});
|
||||||
|
|
||||||
|
// return %str1
|
||||||
|
block.terminator = Some(MirInstruction::Return {
|
||||||
|
value: Some(str1_value),
|
||||||
|
});
|
||||||
|
|
||||||
|
main_function.blocks.insert(entry_block, block);
|
||||||
|
|
||||||
|
module.functions.insert("main".to_string(), main_function);
|
||||||
|
module
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user