runner: preserve UTF-8 in text-merge normalization

This commit is contained in:
2025-12-28 13:51:44 +09:00
parent fe3eb9f1f7
commit 5757eb740e
2 changed files with 16 additions and 4 deletions

View File

@ -217,8 +217,8 @@ fn normalize_text_for_inline(s: &str) -> String {
// pattern: `;` followed by optional spaces/newlines then `}`
// Do a few passes to cover nested cases without regex
for _ in 0..2 {
let mut tmp = String::with_capacity(out.len());
let bytes = out.as_bytes();
let mut tmp: Vec<u8> = Vec::with_capacity(bytes.len());
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] == b';' {
@ -238,13 +238,25 @@ fn normalize_text_for_inline(s: &str) -> String {
continue;
}
}
tmp.push(bytes[i] as char);
tmp.push(bytes[i]);
i += 1;
}
out = tmp;
out = String::from_utf8(tmp).expect("normalize_text_for_inline: invalid UTF-8");
}
if !out.ends_with('\n') {
out.push('\n');
}
out
}
#[cfg(test)]
mod tests {
use super::normalize_text_for_inline;
#[test]
fn normalize_text_for_inline_preserves_utf8() {
let src = "aé𝄞;\n}\n";
let out = normalize_text_for_inline(src);
assert_eq!(out, "aé𝄞\n}\n");
}
}

View File

@ -13,7 +13,7 @@ run_case() {
local expect_len="$2"
set +e
OUTPUT=$(timeout "$RUN_TIMEOUT_SECS" env -u NYASH_ROOT NYASH_STR_CP="$mode" "$NYASH_BIN" "$INPUT" 2>&1)
OUTPUT=$(timeout "$RUN_TIMEOUT_SECS" env NYASH_STR_CP="$mode" "$NYASH_BIN" "$INPUT" 2>&1)
EXIT_CODE=$?
set -e