Implement basic WASM string constant support

Co-authored-by: moe-charm <217100418+moe-charm@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-08-14 05:31:36 +00:00
parent 0fd094dafb
commit c1ae84ce61
3 changed files with 267 additions and 5 deletions

View File

@ -13,6 +13,7 @@ use std::collections::HashMap;
pub struct WasmModule { pub struct WasmModule {
pub imports: Vec<String>, pub imports: Vec<String>,
pub memory: String, pub memory: String,
pub data_segments: Vec<String>,
pub globals: Vec<String>, pub globals: Vec<String>,
pub functions: Vec<String>, pub functions: Vec<String>,
pub exports: Vec<String>, pub exports: Vec<String>,
@ -23,6 +24,7 @@ impl WasmModule {
Self { Self {
imports: Vec::new(), imports: Vec::new(),
memory: String::new(), memory: String::new(),
data_segments: Vec::new(),
globals: Vec::new(), globals: Vec::new(),
functions: Vec::new(), functions: Vec::new(),
exports: Vec::new(), exports: Vec::new(),
@ -44,6 +46,11 @@ impl WasmModule {
wat.push_str(&format!(" {}\n", self.memory)); wat.push_str(&format!(" {}\n", self.memory));
} }
// Add data segments (must come after memory)
for data_segment in &self.data_segments {
wat.push_str(&format!(" {}\n", data_segment));
}
// Add globals // Add globals
for global in &self.globals { for global in &self.globals {
wat.push_str(&format!(" {}\n", global)); wat.push_str(&format!(" {}\n", global));
@ -69,6 +76,9 @@ pub struct WasmCodegen {
/// Current function context for local variable management /// Current function context for local variable management
current_locals: HashMap<ValueId, u32>, current_locals: HashMap<ValueId, u32>,
next_local_index: u32, next_local_index: u32,
/// String literals and their data segment offsets
string_literals: HashMap<String, u32>,
next_data_offset: u32,
} }
impl WasmCodegen { impl WasmCodegen {
@ -76,6 +86,8 @@ impl WasmCodegen {
Self { Self {
current_locals: HashMap::new(), current_locals: HashMap::new(),
next_local_index: 0, next_local_index: 0,
string_literals: HashMap::new(),
next_data_offset: 0x1000, // Start data after initial heap space
} }
} }
@ -114,6 +126,9 @@ impl WasmCodegen {
wasm_module.functions.push(wasm_function); wasm_module.functions.push(wasm_function);
} }
// Add string literal data segments
wasm_module.data_segments.extend(self.generate_data_segments());
// Add main function export if it exists // Add main function export if it exists
if mir_module.functions.contains_key("main") { if mir_module.functions.contains_key("main") {
wasm_module.exports.push("(export \"main\" (func $main))".to_string()); wasm_module.exports.push("(export \"main\" (func $main))".to_string());
@ -188,7 +203,7 @@ impl WasmCodegen {
} }
/// Generate WASM instructions for a basic block /// Generate WASM instructions for a basic block
fn generate_basic_block(&self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result<Vec<String>, WasmError> { fn generate_basic_block(&mut self, mir_function: &MirFunction, block_id: BasicBlockId) -> Result<Vec<String>, WasmError> {
let block = mir_function.blocks.get(&block_id) let block = mir_function.blocks.get(&block_id)
.ok_or_else(|| WasmError::CodegenError(format!("Basic block {:?} not found", block_id)))?; .ok_or_else(|| WasmError::CodegenError(format!("Basic block {:?} not found", block_id)))?;
@ -210,7 +225,7 @@ impl WasmCodegen {
} }
/// Generate WASM instructions for a single MIR instruction /// Generate WASM instructions for a single MIR instruction
fn generate_instruction(&self, instruction: &MirInstruction) -> Result<Vec<String>, WasmError> { fn generate_instruction(&mut self, instruction: &MirInstruction) -> Result<Vec<String>, WasmError> {
match instruction { match instruction {
// Phase 8.2 PoC1: Basic operations // Phase 8.2 PoC1: Basic operations
MirInstruction::Const { dst, value } => { MirInstruction::Const { dst, value } => {
@ -348,11 +363,20 @@ impl WasmCodegen {
} }
/// Generate constant loading /// Generate constant loading
fn generate_const(&self, dst: ValueId, value: &ConstValue) -> Result<Vec<String>, WasmError> { fn generate_const(&mut self, dst: ValueId, value: &ConstValue) -> Result<Vec<String>, WasmError> {
let const_instruction = match value { let const_instruction = match value {
ConstValue::Integer(n) => format!("i32.const {}", n), ConstValue::Integer(n) => format!("i32.const {}", n),
ConstValue::Bool(b) => format!("i32.const {}", if *b { 1 } else { 0 }), ConstValue::Bool(b) => format!("i32.const {}", if *b { 1 } else { 0 }),
ConstValue::Void => "i32.const 0".to_string(), ConstValue::Void => "i32.const 0".to_string(),
ConstValue::String(s) => {
// Register the string literal and get its offset
let data_offset = self.register_string_literal(s);
let string_len = s.len() as u32;
// Generate code to allocate a StringBox and return its pointer
// This is more complex and will need StringBox allocation
return self.generate_string_box_const(dst, data_offset, string_len);
},
_ => return Err(WasmError::UnsupportedInstruction( _ => return Err(WasmError::UnsupportedInstruction(
format!("Unsupported constant type: {:?}", value) format!("Unsupported constant type: {:?}", value)
)), )),
@ -417,6 +441,35 @@ impl WasmCodegen {
} }
} }
/// Generate StringBox allocation for a string constant
fn generate_string_box_const(&self, dst: ValueId, data_offset: u32, string_len: u32) -> Result<Vec<String>, WasmError> {
// Allocate a StringBox using the StringBox allocator
// StringBox layout: [type_id:0x1001][ref_count:1][field_count:2][data_ptr:offset][length:len]
Ok(vec![
// Call StringBox allocator function
"call $alloc_stringbox".to_string(),
// Store the result (StringBox pointer) in local variable
format!("local.set ${}", self.get_local_index(dst)?),
// Initialize StringBox fields
// Get StringBox pointer back
format!("local.get ${}", self.get_local_index(dst)?),
// Set data_ptr field (offset 12 from StringBox pointer)
"i32.const 12".to_string(),
"i32.add".to_string(),
format!("i32.const {}", data_offset),
"i32.store".to_string(),
// Get StringBox pointer again
format!("local.get ${}", self.get_local_index(dst)?),
// Set length field (offset 16 from StringBox pointer)
"i32.const 16".to_string(),
"i32.add".to_string(),
format!("i32.const {}", string_len),
"i32.store".to_string(),
])
}
/// Generate print instruction (calls env.print import) /// Generate print instruction (calls env.print import)
fn generate_print(&self, value: ValueId) -> Result<Vec<String>, WasmError> { fn generate_print(&self, value: ValueId) -> Result<Vec<String>, WasmError> {
Ok(vec![ Ok(vec![
@ -425,6 +478,44 @@ impl WasmCodegen {
]) ])
} }
/// Register a string literal and return its data offset
fn register_string_literal(&mut self, string: &str) -> u32 {
if let Some(&offset) = self.string_literals.get(string) {
return offset;
}
let offset = self.next_data_offset;
let string_bytes = string.as_bytes();
self.string_literals.insert(string.to_string(), offset);
self.next_data_offset += string_bytes.len() as u32;
offset
}
/// Generate data segments for all registered string literals
fn generate_data_segments(&self) -> Vec<String> {
let mut segments = Vec::new();
for (string, &offset) in &self.string_literals {
let string_bytes = string.as_bytes();
// Convert to hex-escaped string for WAT
let byte_string = string_bytes.iter()
.map(|b| format!("\\{:02x}", b))
.collect::<String>();
let data_segment = format!(
"(data (i32.const {}) \"{}\")",
offset,
byte_string
);
segments.push(data_segment);
}
segments
}
/// Get WASM local variable index for ValueId /// Get WASM local variable index for ValueId
fn get_local_index(&self, value_id: ValueId) -> Result<u32, WasmError> { fn get_local_index(&self, value_id: ValueId) -> Result<u32, WasmError> {
self.current_locals.get(&value_id) self.current_locals.get(&value_id)
@ -452,7 +543,7 @@ mod tests {
#[test] #[test]
fn test_constant_generation() { fn test_constant_generation() {
let codegen = WasmCodegen::new(); let mut codegen = WasmCodegen::new();
let dst = ValueId::new(0); let dst = ValueId::new(0);
let result = codegen.generate_const(dst, &ConstValue::Integer(42)); let result = codegen.generate_const(dst, &ConstValue::Integer(42));

View File

@ -43,7 +43,14 @@ impl RuntimeImports {
result: None, result: None,
}); });
// Future: env.print_string for string output // env.print_str for string debugging (ptr, len)
self.imports.push(ImportFunction {
module: "env".to_string(),
name: "print_str".to_string(),
params: vec!["i32".to_string(), "i32".to_string()],
result: None,
});
// Future: env.file_read, env.file_write for file I/O // Future: env.file_read, env.file_write for file I/O
// Future: env.http_request for network access // Future: env.http_request for network access
} }

View File

@ -0,0 +1,164 @@
/*!
* WASM String Constants Test - Validates Issue #65 implementation
*
* Tests string constant support in WASM backend:
* - ConstValue::String handling in generate_const
* - Data segment generation for string literals
* - StringBox creation with proper layout
* - WAT generation includes data segments and string allocation
*/
use nyash_rust::mir::{
MirModule, MirFunction, FunctionSignature, MirType, EffectMask,
BasicBlock, BasicBlockId, ValueId, MirInstruction, ConstValue
};
use nyash_rust::backend::wasm::WasmBackend;
#[test]
fn test_wasm_string_constant_basic() {
// Build MIR equivalent to:
// function main() {
// %str = const "Hello, WASM!"
// return %str // Should return StringBox pointer
// }
let mut backend = WasmBackend::new();
let mir_module = build_string_const_mir_module();
// Generate WAT text for debugging
let wat_result = backend.compile_to_wat(mir_module.clone());
assert!(wat_result.is_ok(), "WAT generation should succeed for string constants");
let wat_text = wat_result.unwrap();
// Verify WAT contains expected elements for string support
assert!(wat_text.contains("(module"), "Should contain module declaration");
assert!(wat_text.contains("memory"), "Should contain memory declaration");
assert!(wat_text.contains("data"), "Should contain data segment for string literal");
assert!(wat_text.contains("\\48\\65\\6c\\6c\\6f"), "Should contain UTF-8 bytes for 'Hello'");
assert!(wat_text.contains("$alloc_stringbox"), "Should contain StringBox allocator");
assert!(wat_text.contains("print_str"), "Should contain print_str import");
// Verify string literal is properly embedded
// (The assertion for UTF-8 bytes is above)
// Compile to WASM binary
let wasm_result = backend.compile_module(mir_module);
if let Err(e) = &wasm_result {
println!("WASM compilation error: {}", e);
println!("Generated WAT:\n{}", wat_text);
}
assert!(wasm_result.is_ok(), "WASM compilation should succeed for string constants");
}
#[test]
fn test_wasm_string_constant_multiple() {
// Test multiple string constants to verify data segment management
// function main() {
// %str1 = const "First"
// %str2 = const "Second"
// %str3 = const "First" // Duplicate should reuse data segment
// return %str1
// }
let mut backend = WasmBackend::new();
let mir_module = build_multiple_string_const_mir_module();
let wat_result = backend.compile_to_wat(mir_module.clone());
assert!(wat_result.is_ok(), "WAT generation should succeed for multiple strings");
let wat_text = wat_result.unwrap();
// Should contain both unique strings (in hex format)
assert!(wat_text.contains("\\46\\69\\72\\73\\74"), "Should contain 'First' string in hex");
assert!(wat_text.contains("\\53\\65\\63\\6f\\6e\\64"), "Should contain 'Second' string in hex");
// Should have 2 data segments (First and Second, duplicate First reused)
let data_count = wat_text.matches("(data").count();
assert_eq!(data_count, 2, "Should have exactly 2 data segments for 2 unique strings");
let wasm_result = backend.compile_module(mir_module);
assert!(wasm_result.is_ok(), "WASM compilation should succeed for multiple strings");
}
/// Build a MIR module with a single string constant
fn build_string_const_mir_module() -> MirModule {
let mut module = MirModule::new("test_string_const".to_string());
// Create main function signature
let main_signature = FunctionSignature {
name: "main".to_string(),
params: vec![],
return_type: MirType::Integer, // StringBox pointer as i32
effects: EffectMask::PURE,
};
// Create basic block
let entry_block = BasicBlockId::new(0);
let mut main_function = MirFunction::new(main_signature, entry_block);
let mut block = BasicBlock::new(entry_block);
// %str = const "Hello, WASM!"
let str_value = ValueId::new(0);
block.instructions.push(MirInstruction::Const {
dst: str_value,
value: ConstValue::String("Hello, WASM!".to_string()),
});
// return %str
block.terminator = Some(MirInstruction::Return {
value: Some(str_value),
});
main_function.blocks.insert(entry_block, block);
module.functions.insert("main".to_string(), main_function);
module
}
/// Build a MIR module with multiple string constants
fn build_multiple_string_const_mir_module() -> MirModule {
let mut module = MirModule::new("test_multiple_strings".to_string());
let main_signature = FunctionSignature {
name: "main".to_string(),
params: vec![],
return_type: MirType::Integer,
effects: EffectMask::PURE,
};
let entry_block = BasicBlockId::new(0);
let mut main_function = MirFunction::new(main_signature, entry_block);
let mut block = BasicBlock::new(entry_block);
// %str1 = const "First"
let str1_value = ValueId::new(0);
block.instructions.push(MirInstruction::Const {
dst: str1_value,
value: ConstValue::String("First".to_string()),
});
// %str2 = const "Second"
let str2_value = ValueId::new(1);
block.instructions.push(MirInstruction::Const {
dst: str2_value,
value: ConstValue::String("Second".to_string()),
});
// %str3 = const "First" (duplicate)
let str3_value = ValueId::new(2);
block.instructions.push(MirInstruction::Const {
dst: str3_value,
value: ConstValue::String("First".to_string()),
});
// return %str1
block.terminator = Some(MirInstruction::Return {
value: Some(str1_value),
});
main_function.blocks.insert(entry_block, block);
module.functions.insert("main".to_string(), main_function);
module
}