Implement basic WASM string constant support
Co-authored-by: moe-charm <217100418+moe-charm@users.noreply.github.com>
This commit is contained in:
@ -13,6 +13,7 @@ use std::collections::HashMap;
|
||||
pub struct WasmModule {
|
||||
pub imports: Vec<String>,
|
||||
pub memory: String,
|
||||
pub data_segments: Vec<String>,
|
||||
pub globals: Vec<String>,
|
||||
pub functions: Vec<String>,
|
||||
pub exports: Vec<String>,
|
||||
@ -23,6 +24,7 @@ impl WasmModule {
|
||||
Self {
|
||||
imports: Vec::new(),
|
||||
memory: String::new(),
|
||||
data_segments: Vec::new(),
|
||||
globals: Vec::new(),
|
||||
functions: Vec::new(),
|
||||
exports: Vec::new(),
|
||||
@ -44,6 +46,11 @@ impl WasmModule {
|
||||
wat.push_str(&format!(" {}\n", self.memory));
|
||||
}
|
||||
|
||||
// Add data segments (must come after memory)
|
||||
for data_segment in &self.data_segments {
|
||||
wat.push_str(&format!(" {}\n", data_segment));
|
||||
}
|
||||
|
||||
// Add globals
|
||||
for global in &self.globals {
|
||||
wat.push_str(&format!(" {}\n", global));
|
||||
@ -69,6 +76,9 @@ pub struct WasmCodegen {
|
||||
/// Current function context for local variable management
|
||||
current_locals: HashMap<ValueId, u32>,
|
||||
next_local_index: u32,
|
||||
/// String literals and their data segment offsets
|
||||
string_literals: HashMap<String, u32>,
|
||||
next_data_offset: u32,
|
||||
}
|
||||
|
||||
impl WasmCodegen {
|
||||
@ -76,6 +86,8 @@ impl WasmCodegen {
|
||||
Self {
|
||||
current_locals: HashMap::new(),
|
||||
next_local_index: 0,
|
||||
string_literals: HashMap::new(),
|
||||
next_data_offset: 0x1000, // Start data after initial heap space
|
||||
}
|
||||
}
|
||||
|
||||
@ -114,6 +126,9 @@ impl WasmCodegen {
|
||||
wasm_module.functions.push(wasm_function);
|
||||
}
|
||||
|
||||
// Add string literal data segments
|
||||
wasm_module.data_segments.extend(self.generate_data_segments());
|
||||
|
||||
// Add main function export if it exists
|
||||
if mir_module.functions.contains_key("main") {
|
||||
wasm_module.exports.push("(export \"main\" (func $main))".to_string());
|
||||
@ -188,7 +203,7 @@ impl WasmCodegen {
|
||||
}
|
||||
|
||||
/// Generate WASM instructions for a basic block
|
||||
fn generate_basic_block(&self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result<Vec<String>, WasmError> {
|
||||
fn generate_basic_block(&mut self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result<Vec<String>, WasmError> {
|
||||
let block = mir_function.blocks.get(&block_id)
|
||||
.ok_or_else(|| WasmError::CodegenError(format!("Basic block {:?} not found", block_id)))?;
|
||||
|
||||
@ -210,7 +225,7 @@ impl WasmCodegen {
|
||||
}
|
||||
|
||||
/// Generate WASM instructions for a single MIR instruction
|
||||
fn generate_instruction(&self, instruction: &MirInstruction) -> Result<Vec<String>, WasmError> {
|
||||
fn generate_instruction(&mut self, instruction: &MirInstruction) -> Result<Vec<String>, WasmError> {
|
||||
match instruction {
|
||||
// Phase 8.2 PoC1: Basic operations
|
||||
MirInstruction::Const { dst, value } => {
|
||||
@ -348,11 +363,20 @@ impl WasmCodegen {
|
||||
}
|
||||
|
||||
/// Generate constant loading
|
||||
fn generate_const(&self, dst: ValueId, value: &ConstValue) -> Result<Vec<String>, WasmError> {
|
||||
fn generate_const(&mut self, dst: ValueId, value: &ConstValue) -> Result<Vec<String>, WasmError> {
|
||||
let const_instruction = match value {
|
||||
ConstValue::Integer(n) => format!("i32.const {}", n),
|
||||
ConstValue::Bool(b) => format!("i32.const {}", if *b { 1 } else { 0 }),
|
||||
ConstValue::Void => "i32.const 0".to_string(),
|
||||
ConstValue::String(s) => {
|
||||
// Register the string literal and get its offset
|
||||
let data_offset = self.register_string_literal(s);
|
||||
let string_len = s.len() as u32;
|
||||
|
||||
// Generate code to allocate a StringBox and return its pointer
|
||||
// This is more complex and will need StringBox allocation
|
||||
return self.generate_string_box_const(dst, data_offset, string_len);
|
||||
},
|
||||
_ => return Err(WasmError::UnsupportedInstruction(
|
||||
format!("Unsupported constant type: {:?}", value)
|
||||
)),
|
||||
@ -417,6 +441,35 @@ impl WasmCodegen {
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate StringBox allocation for a string constant
|
||||
fn generate_string_box_const(&self, dst: ValueId, data_offset: u32, string_len: u32) -> Result<Vec<String>, WasmError> {
|
||||
// Allocate a StringBox using the StringBox allocator
|
||||
// StringBox layout: [type_id:0x1001][ref_count:1][field_count:2][data_ptr:offset][length:len]
|
||||
Ok(vec![
|
||||
// Call StringBox allocator function
|
||||
"call $alloc_stringbox".to_string(),
|
||||
// Store the result (StringBox pointer) in local variable
|
||||
format!("local.set ${}", self.get_local_index(dst)?),
|
||||
|
||||
// Initialize StringBox fields
|
||||
// Get StringBox pointer back
|
||||
format!("local.get ${}", self.get_local_index(dst)?),
|
||||
// Set data_ptr field (offset 12 from StringBox pointer)
|
||||
"i32.const 12".to_string(),
|
||||
"i32.add".to_string(),
|
||||
format!("i32.const {}", data_offset),
|
||||
"i32.store".to_string(),
|
||||
|
||||
// Get StringBox pointer again
|
||||
format!("local.get ${}", self.get_local_index(dst)?),
|
||||
// Set length field (offset 16 from StringBox pointer)
|
||||
"i32.const 16".to_string(),
|
||||
"i32.add".to_string(),
|
||||
format!("i32.const {}", string_len),
|
||||
"i32.store".to_string(),
|
||||
])
|
||||
}
|
||||
|
||||
/// Generate print instruction (calls env.print import)
|
||||
fn generate_print(&self, value: ValueId) -> Result<Vec<String>, WasmError> {
|
||||
Ok(vec![
|
||||
@ -425,6 +478,44 @@ impl WasmCodegen {
|
||||
])
|
||||
}
|
||||
|
||||
/// Register a string literal and return its data offset
|
||||
fn register_string_literal(&mut self, string: &str) -> u32 {
|
||||
if let Some(&offset) = self.string_literals.get(string) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
let offset = self.next_data_offset;
|
||||
let string_bytes = string.as_bytes();
|
||||
self.string_literals.insert(string.to_string(), offset);
|
||||
self.next_data_offset += string_bytes.len() as u32;
|
||||
|
||||
offset
|
||||
}
|
||||
|
||||
/// Generate data segments for all registered string literals
|
||||
fn generate_data_segments(&self) -> Vec<String> {
|
||||
let mut segments = Vec::new();
|
||||
|
||||
for (string, &offset) in &self.string_literals {
|
||||
let string_bytes = string.as_bytes();
|
||||
|
||||
// Convert to hex-escaped string for WAT
|
||||
let byte_string = string_bytes.iter()
|
||||
.map(|b| format!("\\{:02x}", b))
|
||||
.collect::<String>();
|
||||
|
||||
let data_segment = format!(
|
||||
"(data (i32.const {}) \"{}\")",
|
||||
offset,
|
||||
byte_string
|
||||
);
|
||||
|
||||
segments.push(data_segment);
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
|
||||
/// Get WASM local variable index for ValueId
|
||||
fn get_local_index(&self, value_id: ValueId) -> Result<u32, WasmError> {
|
||||
self.current_locals.get(&value_id)
|
||||
@ -452,7 +543,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_constant_generation() {
|
||||
let codegen = WasmCodegen::new();
|
||||
let mut codegen = WasmCodegen::new();
|
||||
let dst = ValueId::new(0);
|
||||
|
||||
let result = codegen.generate_const(dst, &ConstValue::Integer(42));
|
||||
|
||||
@ -43,7 +43,14 @@ impl RuntimeImports {
|
||||
result: None,
|
||||
});
|
||||
|
||||
// Future: env.print_string for string output
|
||||
// env.print_str for string debugging (ptr, len)
|
||||
self.imports.push(ImportFunction {
|
||||
module: "env".to_string(),
|
||||
name: "print_str".to_string(),
|
||||
params: vec!["i32".to_string(), "i32".to_string()],
|
||||
result: None,
|
||||
});
|
||||
|
||||
// Future: env.file_read, env.file_write for file I/O
|
||||
// Future: env.http_request for network access
|
||||
}
|
||||
|
||||
164
tests/wasm_string_constants.rs
Normal file
164
tests/wasm_string_constants.rs
Normal file
@ -0,0 +1,164 @@
|
||||
/*!
|
||||
* WASM String Constants Test - Validates Issue #65 implementation
|
||||
*
|
||||
* Tests string constant support in WASM backend:
|
||||
* - ConstValue::String handling in generate_const
|
||||
* - Data segment generation for string literals
|
||||
* - StringBox creation with proper layout
|
||||
* - WAT generation includes data segments and string allocation
|
||||
*/
|
||||
|
||||
use nyash_rust::mir::{
|
||||
MirModule, MirFunction, FunctionSignature, MirType, EffectMask,
|
||||
BasicBlock, BasicBlockId, ValueId, MirInstruction, ConstValue
|
||||
};
|
||||
use nyash_rust::backend::wasm::WasmBackend;
|
||||
|
||||
#[test]
|
||||
fn test_wasm_string_constant_basic() {
|
||||
// Build MIR equivalent to:
|
||||
// function main() {
|
||||
// %str = const "Hello, WASM!"
|
||||
// return %str // Should return StringBox pointer
|
||||
// }
|
||||
|
||||
let mut backend = WasmBackend::new();
|
||||
let mir_module = build_string_const_mir_module();
|
||||
|
||||
// Generate WAT text for debugging
|
||||
let wat_result = backend.compile_to_wat(mir_module.clone());
|
||||
assert!(wat_result.is_ok(), "WAT generation should succeed for string constants");
|
||||
|
||||
let wat_text = wat_result.unwrap();
|
||||
|
||||
// Verify WAT contains expected elements for string support
|
||||
assert!(wat_text.contains("(module"), "Should contain module declaration");
|
||||
assert!(wat_text.contains("memory"), "Should contain memory declaration");
|
||||
assert!(wat_text.contains("data"), "Should contain data segment for string literal");
|
||||
assert!(wat_text.contains("\\48\\65\\6c\\6c\\6f"), "Should contain UTF-8 bytes for 'Hello'");
|
||||
assert!(wat_text.contains("$alloc_stringbox"), "Should contain StringBox allocator");
|
||||
assert!(wat_text.contains("print_str"), "Should contain print_str import");
|
||||
|
||||
// Verify string literal is properly embedded
|
||||
// (The assertion for UTF-8 bytes is above)
|
||||
|
||||
// Compile to WASM binary
|
||||
let wasm_result = backend.compile_module(mir_module);
|
||||
if let Err(e) = &wasm_result {
|
||||
println!("WASM compilation error: {}", e);
|
||||
println!("Generated WAT:\n{}", wat_text);
|
||||
}
|
||||
assert!(wasm_result.is_ok(), "WASM compilation should succeed for string constants");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wasm_string_constant_multiple() {
|
||||
// Test multiple string constants to verify data segment management
|
||||
// function main() {
|
||||
// %str1 = const "First"
|
||||
// %str2 = const "Second"
|
||||
// %str3 = const "First" // Duplicate should reuse data segment
|
||||
// return %str1
|
||||
// }
|
||||
|
||||
let mut backend = WasmBackend::new();
|
||||
let mir_module = build_multiple_string_const_mir_module();
|
||||
|
||||
let wat_result = backend.compile_to_wat(mir_module.clone());
|
||||
assert!(wat_result.is_ok(), "WAT generation should succeed for multiple strings");
|
||||
|
||||
let wat_text = wat_result.unwrap();
|
||||
|
||||
// Should contain both unique strings (in hex format)
|
||||
assert!(wat_text.contains("\\46\\69\\72\\73\\74"), "Should contain 'First' string in hex");
|
||||
assert!(wat_text.contains("\\53\\65\\63\\6f\\6e\\64"), "Should contain 'Second' string in hex");
|
||||
|
||||
// Should have 2 data segments (First and Second, duplicate First reused)
|
||||
let data_count = wat_text.matches("(data").count();
|
||||
assert_eq!(data_count, 2, "Should have exactly 2 data segments for 2 unique strings");
|
||||
|
||||
let wasm_result = backend.compile_module(mir_module);
|
||||
assert!(wasm_result.is_ok(), "WASM compilation should succeed for multiple strings");
|
||||
}
|
||||
|
||||
/// Build a MIR module with a single string constant
|
||||
fn build_string_const_mir_module() -> MirModule {
|
||||
let mut module = MirModule::new("test_string_const".to_string());
|
||||
|
||||
// Create main function signature
|
||||
let main_signature = FunctionSignature {
|
||||
name: "main".to_string(),
|
||||
params: vec![],
|
||||
return_type: MirType::Integer, // StringBox pointer as i32
|
||||
effects: EffectMask::PURE,
|
||||
};
|
||||
|
||||
// Create basic block
|
||||
let entry_block = BasicBlockId::new(0);
|
||||
let mut main_function = MirFunction::new(main_signature, entry_block);
|
||||
let mut block = BasicBlock::new(entry_block);
|
||||
|
||||
// %str = const "Hello, WASM!"
|
||||
let str_value = ValueId::new(0);
|
||||
block.instructions.push(MirInstruction::Const {
|
||||
dst: str_value,
|
||||
value: ConstValue::String("Hello, WASM!".to_string()),
|
||||
});
|
||||
|
||||
// return %str
|
||||
block.terminator = Some(MirInstruction::Return {
|
||||
value: Some(str_value),
|
||||
});
|
||||
|
||||
main_function.blocks.insert(entry_block, block);
|
||||
|
||||
module.functions.insert("main".to_string(), main_function);
|
||||
module
|
||||
}
|
||||
|
||||
/// Build a MIR module with multiple string constants
|
||||
fn build_multiple_string_const_mir_module() -> MirModule {
|
||||
let mut module = MirModule::new("test_multiple_strings".to_string());
|
||||
|
||||
let main_signature = FunctionSignature {
|
||||
name: "main".to_string(),
|
||||
params: vec![],
|
||||
return_type: MirType::Integer,
|
||||
effects: EffectMask::PURE,
|
||||
};
|
||||
|
||||
let entry_block = BasicBlockId::new(0);
|
||||
let mut main_function = MirFunction::new(main_signature, entry_block);
|
||||
let mut block = BasicBlock::new(entry_block);
|
||||
|
||||
// %str1 = const "First"
|
||||
let str1_value = ValueId::new(0);
|
||||
block.instructions.push(MirInstruction::Const {
|
||||
dst: str1_value,
|
||||
value: ConstValue::String("First".to_string()),
|
||||
});
|
||||
|
||||
// %str2 = const "Second"
|
||||
let str2_value = ValueId::new(1);
|
||||
block.instructions.push(MirInstruction::Const {
|
||||
dst: str2_value,
|
||||
value: ConstValue::String("Second".to_string()),
|
||||
});
|
||||
|
||||
// %str3 = const "First" (duplicate)
|
||||
let str3_value = ValueId::new(2);
|
||||
block.instructions.push(MirInstruction::Const {
|
||||
dst: str3_value,
|
||||
value: ConstValue::String("First".to_string()),
|
||||
});
|
||||
|
||||
// return %str1
|
||||
block.terminator = Some(MirInstruction::Return {
|
||||
value: Some(str1_value),
|
||||
});
|
||||
|
||||
main_function.blocks.insert(entry_block, block);
|
||||
|
||||
module.functions.insert("main".to_string(), main_function);
|
||||
module
|
||||
}
|
||||
Reference in New Issue
Block a user