runner: preserve UTF-8 in text-merge normalization

2025-12-28 13:51:44 +09:00
parent fe3eb9f1f7
commit 5757eb740e
2 changed files with 16 additions and 4 deletions
--- a/src/runner/modes/common_util/resolve/strip/merge.rs
+++ b/src/runner/modes/common_util/resolve/strip/merge.rs
@ -217,8 +217,8 @@ fn normalize_text_for_inline(s: &str) -> String {
    // pattern: `;` followed by optional spaces/newlines then `}`
    // Do a few passes to cover nested cases without regex
    for _ in 0..2 {
-        let mut tmp = String::with_capacity(out.len());
        let bytes = out.as_bytes();
+        let mut tmp: Vec<u8> = Vec::with_capacity(bytes.len());
        let mut i = 0usize;
        while i < bytes.len() {
            if bytes[i] == b';' {
@ -238,13 +238,25 @@ fn normalize_text_for_inline(s: &str) -> String {
                    continue;
                }
            }
-            tmp.push(bytes[i] as char);
+            tmp.push(bytes[i]);
            i += 1;
        }
-        out = tmp;
+        out = String::from_utf8(tmp).expect("normalize_text_for_inline: invalid UTF-8");
    }
    if !out.ends_with('\n') {
        out.push('\n');
    }
    out
 }
+
+#[cfg(test)]
+mod tests {
+    use super::normalize_text_for_inline;
+
+    #[test]
+    fn normalize_text_for_inline_preserves_utf8() {
+        let src = "aé𝄞;\n}\n";
+        let out = normalize_text_for_inline(src);
+        assert_eq!(out, "aé𝄞\n}\n");
+    }
+}
--- a/tools/smokes/v2/profiles/integration/apps/string_cp_mode_min_vm.sh
+++ b/tools/smokes/v2/profiles/integration/apps/string_cp_mode_min_vm.sh
@ -13,7 +13,7 @@ run_case() {
    local expect_len="$2"

    set +e
-    OUTPUT=$(timeout "$RUN_TIMEOUT_SECS" env -u NYASH_ROOT NYASH_STR_CP="$mode" "$NYASH_BIN" "$INPUT" 2>&1)
+    OUTPUT=$(timeout "$RUN_TIMEOUT_SECS" env NYASH_STR_CP="$mode" "$NYASH_BIN" "$INPUT" 2>&1)
    EXIT_CODE=$?
    set -e