diff --git a/src/backend/wasm/codegen.rs b/src/backend/wasm/codegen.rs index d94b50dc..1faa9c9f 100644 --- a/src/backend/wasm/codegen.rs +++ b/src/backend/wasm/codegen.rs @@ -13,6 +13,7 @@ use std::collections::HashMap; pub struct WasmModule { pub imports: Vec, pub memory: String, + pub data_segments: Vec, pub globals: Vec, pub functions: Vec, pub exports: Vec, @@ -23,6 +24,7 @@ impl WasmModule { Self { imports: Vec::new(), memory: String::new(), + data_segments: Vec::new(), globals: Vec::new(), functions: Vec::new(), exports: Vec::new(), @@ -44,6 +46,11 @@ impl WasmModule { wat.push_str(&format!(" {}\n", self.memory)); } + // Add data segments (must come after memory) + for data_segment in &self.data_segments { + wat.push_str(&format!(" {}\n", data_segment)); + } + // Add globals for global in &self.globals { wat.push_str(&format!(" {}\n", global)); @@ -69,6 +76,9 @@ pub struct WasmCodegen { /// Current function context for local variable management current_locals: HashMap, next_local_index: u32, + /// String literals and their data segment offsets + string_literals: HashMap, + next_data_offset: u32, } impl WasmCodegen { @@ -76,6 +86,8 @@ impl WasmCodegen { Self { current_locals: HashMap::new(), next_local_index: 0, + string_literals: HashMap::new(), + next_data_offset: 0x1000, // Start data after initial heap space } } @@ -114,6 +126,9 @@ impl WasmCodegen { wasm_module.functions.push(wasm_function); } + // Add string literal data segments + wasm_module.data_segments.extend(self.generate_data_segments()); + // Add main function export if it exists if mir_module.functions.contains_key("main") { wasm_module.exports.push("(export \"main\" (func $main))".to_string()); @@ -188,7 +203,7 @@ impl WasmCodegen { } /// Generate WASM instructions for a basic block - fn generate_basic_block(&self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result, WasmError> { + fn generate_basic_block(&mut self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result, WasmError> { let block = mir_function.blocks.get(&block_id) .ok_or_else(|| WasmError::CodegenError(format!("Basic block {:?} not found", block_id)))?; @@ -210,7 +225,7 @@ impl WasmCodegen { } /// Generate WASM instructions for a single MIR instruction - fn generate_instruction(&self, instruction: &MirInstruction) -> Result, WasmError> { + fn generate_instruction(&mut self, instruction: &MirInstruction) -> Result, WasmError> { match instruction { // Phase 8.2 PoC1: Basic operations MirInstruction::Const { dst, value } => { @@ -348,11 +363,20 @@ impl WasmCodegen { } /// Generate constant loading - fn generate_const(&self, dst: ValueId, value: &ConstValue) -> Result, WasmError> { + fn generate_const(&mut self, dst: ValueId, value: &ConstValue) -> Result, WasmError> { let const_instruction = match value { ConstValue::Integer(n) => format!("i32.const {}", n), ConstValue::Bool(b) => format!("i32.const {}", if *b { 1 } else { 0 }), ConstValue::Void => "i32.const 0".to_string(), + ConstValue::String(s) => { + // Register the string literal and get its offset + let data_offset = self.register_string_literal(s); + let string_len = s.len() as u32; + + // Generate code to allocate a StringBox and return its pointer + // This is more complex and will need StringBox allocation + return self.generate_string_box_const(dst, data_offset, string_len); + }, _ => return Err(WasmError::UnsupportedInstruction( format!("Unsupported constant type: {:?}", value) )), @@ -417,6 +441,35 @@ impl WasmCodegen { } } + /// Generate StringBox allocation for a string constant + fn generate_string_box_const(&self, dst: ValueId, data_offset: u32, string_len: u32) -> Result, WasmError> { + // Allocate a StringBox using the StringBox allocator + // StringBox layout: [type_id:0x1001][ref_count:1][field_count:2][data_ptr:offset][length:len] + Ok(vec![ + // Call StringBox allocator function + "call $alloc_stringbox".to_string(), + // Store the result (StringBox pointer) in local variable + format!("local.set ${}", self.get_local_index(dst)?), + + // Initialize StringBox fields + // Get StringBox pointer back + format!("local.get ${}", self.get_local_index(dst)?), + // Set data_ptr field (offset 12 from StringBox pointer) + "i32.const 12".to_string(), + "i32.add".to_string(), + format!("i32.const {}", data_offset), + "i32.store".to_string(), + + // Get StringBox pointer again + format!("local.get ${}", self.get_local_index(dst)?), + // Set length field (offset 16 from StringBox pointer) + "i32.const 16".to_string(), + "i32.add".to_string(), + format!("i32.const {}", string_len), + "i32.store".to_string(), + ]) + } + /// Generate print instruction (calls env.print import) fn generate_print(&self, value: ValueId) -> Result, WasmError> { Ok(vec![ @@ -425,6 +478,44 @@ impl WasmCodegen { ]) } + /// Register a string literal and return its data offset + fn register_string_literal(&mut self, string: &str) -> u32 { + if let Some(&offset) = self.string_literals.get(string) { + return offset; + } + + let offset = self.next_data_offset; + let string_bytes = string.as_bytes(); + self.string_literals.insert(string.to_string(), offset); + self.next_data_offset += string_bytes.len() as u32; + + offset + } + + /// Generate data segments for all registered string literals + fn generate_data_segments(&self) -> Vec { + let mut segments = Vec::new(); + + for (string, &offset) in &self.string_literals { + let string_bytes = string.as_bytes(); + + // Convert to hex-escaped string for WAT + let byte_string = string_bytes.iter() + .map(|b| format!("\\{:02x}", b)) + .collect::(); + + let data_segment = format!( + "(data (i32.const {}) \"{}\")", + offset, + byte_string + ); + + segments.push(data_segment); + } + + segments + } + /// Get WASM local variable index for ValueId fn get_local_index(&self, value_id: ValueId) -> Result { self.current_locals.get(&value_id) @@ -452,7 +543,7 @@ mod tests { #[test] fn test_constant_generation() { - let codegen = WasmCodegen::new(); + let mut codegen = WasmCodegen::new(); let dst = ValueId::new(0); let result = codegen.generate_const(dst, &ConstValue::Integer(42)); diff --git a/src/backend/wasm/runtime.rs b/src/backend/wasm/runtime.rs index 5ebeb2e6..889efda6 100644 --- a/src/backend/wasm/runtime.rs +++ b/src/backend/wasm/runtime.rs @@ -43,7 +43,14 @@ impl RuntimeImports { result: None, }); - // Future: env.print_string for string output + // env.print_str for string debugging (ptr, len) + self.imports.push(ImportFunction { + module: "env".to_string(), + name: "print_str".to_string(), + params: vec!["i32".to_string(), "i32".to_string()], + result: None, + }); + // Future: env.file_read, env.file_write for file I/O // Future: env.http_request for network access } diff --git a/tests/wasm_string_constants.rs b/tests/wasm_string_constants.rs new file mode 100644 index 00000000..9a3b0df1 --- /dev/null +++ b/tests/wasm_string_constants.rs @@ -0,0 +1,164 @@ +/*! + * WASM String Constants Test - Validates Issue #65 implementation + * + * Tests string constant support in WASM backend: + * - ConstValue::String handling in generate_const + * - Data segment generation for string literals + * - StringBox creation with proper layout + * - WAT generation includes data segments and string allocation + */ + +use nyash_rust::mir::{ + MirModule, MirFunction, FunctionSignature, MirType, EffectMask, + BasicBlock, BasicBlockId, ValueId, MirInstruction, ConstValue +}; +use nyash_rust::backend::wasm::WasmBackend; + +#[test] +fn test_wasm_string_constant_basic() { + // Build MIR equivalent to: + // function main() { + // %str = const "Hello, WASM!" + // return %str // Should return StringBox pointer + // } + + let mut backend = WasmBackend::new(); + let mir_module = build_string_const_mir_module(); + + // Generate WAT text for debugging + let wat_result = backend.compile_to_wat(mir_module.clone()); + assert!(wat_result.is_ok(), "WAT generation should succeed for string constants"); + + let wat_text = wat_result.unwrap(); + + // Verify WAT contains expected elements for string support + assert!(wat_text.contains("(module"), "Should contain module declaration"); + assert!(wat_text.contains("memory"), "Should contain memory declaration"); + assert!(wat_text.contains("data"), "Should contain data segment for string literal"); + assert!(wat_text.contains("\\48\\65\\6c\\6c\\6f"), "Should contain UTF-8 bytes for 'Hello'"); + assert!(wat_text.contains("$alloc_stringbox"), "Should contain StringBox allocator"); + assert!(wat_text.contains("print_str"), "Should contain print_str import"); + + // Verify string literal is properly embedded + // (The assertion for UTF-8 bytes is above) + + // Compile to WASM binary + let wasm_result = backend.compile_module(mir_module); + if let Err(e) = &wasm_result { + println!("WASM compilation error: {}", e); + println!("Generated WAT:\n{}", wat_text); + } + assert!(wasm_result.is_ok(), "WASM compilation should succeed for string constants"); +} + +#[test] +fn test_wasm_string_constant_multiple() { + // Test multiple string constants to verify data segment management + // function main() { + // %str1 = const "First" + // %str2 = const "Second" + // %str3 = const "First" // Duplicate should reuse data segment + // return %str1 + // } + + let mut backend = WasmBackend::new(); + let mir_module = build_multiple_string_const_mir_module(); + + let wat_result = backend.compile_to_wat(mir_module.clone()); + assert!(wat_result.is_ok(), "WAT generation should succeed for multiple strings"); + + let wat_text = wat_result.unwrap(); + + // Should contain both unique strings (in hex format) + assert!(wat_text.contains("\\46\\69\\72\\73\\74"), "Should contain 'First' string in hex"); + assert!(wat_text.contains("\\53\\65\\63\\6f\\6e\\64"), "Should contain 'Second' string in hex"); + + // Should have 2 data segments (First and Second, duplicate First reused) + let data_count = wat_text.matches("(data").count(); + assert_eq!(data_count, 2, "Should have exactly 2 data segments for 2 unique strings"); + + let wasm_result = backend.compile_module(mir_module); + assert!(wasm_result.is_ok(), "WASM compilation should succeed for multiple strings"); +} + +/// Build a MIR module with a single string constant +fn build_string_const_mir_module() -> MirModule { + let mut module = MirModule::new("test_string_const".to_string()); + + // Create main function signature + let main_signature = FunctionSignature { + name: "main".to_string(), + params: vec![], + return_type: MirType::Integer, // StringBox pointer as i32 + effects: EffectMask::PURE, + }; + + // Create basic block + let entry_block = BasicBlockId::new(0); + let mut main_function = MirFunction::new(main_signature, entry_block); + let mut block = BasicBlock::new(entry_block); + + // %str = const "Hello, WASM!" + let str_value = ValueId::new(0); + block.instructions.push(MirInstruction::Const { + dst: str_value, + value: ConstValue::String("Hello, WASM!".to_string()), + }); + + // return %str + block.terminator = Some(MirInstruction::Return { + value: Some(str_value), + }); + + main_function.blocks.insert(entry_block, block); + + module.functions.insert("main".to_string(), main_function); + module +} + +/// Build a MIR module with multiple string constants +fn build_multiple_string_const_mir_module() -> MirModule { + let mut module = MirModule::new("test_multiple_strings".to_string()); + + let main_signature = FunctionSignature { + name: "main".to_string(), + params: vec![], + return_type: MirType::Integer, + effects: EffectMask::PURE, + }; + + let entry_block = BasicBlockId::new(0); + let mut main_function = MirFunction::new(main_signature, entry_block); + let mut block = BasicBlock::new(entry_block); + + // %str1 = const "First" + let str1_value = ValueId::new(0); + block.instructions.push(MirInstruction::Const { + dst: str1_value, + value: ConstValue::String("First".to_string()), + }); + + // %str2 = const "Second" + let str2_value = ValueId::new(1); + block.instructions.push(MirInstruction::Const { + dst: str2_value, + value: ConstValue::String("Second".to_string()), + }); + + // %str3 = const "First" (duplicate) + let str3_value = ValueId::new(2); + block.instructions.push(MirInstruction::Const { + dst: str3_value, + value: ConstValue::String("First".to_string()), + }); + + // return %str1 + block.terminator = Some(MirInstruction::Return { + value: Some(str1_value), + }); + + main_function.blocks.insert(entry_block, block); + + module.functions.insert("main".to_string(), main_function); + module +} \ No newline at end of file