Files
hakorune/src/tests/tokenizer_unicode_toggle.rs
nyash-codex 96ea3892af phase-20.45: PRIMARY no-fallback reps + MIR v0 shape fixes
- Fix MIR v0 shape in lowers: functions[] + name="main" + blocks.id
  * lower_return_int_box.hako
  * lower_return_binop_box.hako
- runner_min: adopt LowerReturnBinOpBox before ReturnInt
- Add PRIMARY no-fallback canaries (all PASS):
  * return-binop / array-size / load-store / return-logical (OR)
- Fix phase2043 runner_min canary alias (Runner -> BuilderRunnerMinBox)
- Update docs: phase-20.45 README (PRIMARY reps), CURRENT_TASK progress

Ancillary: keep builder/provider/canary files in sync; no unrelated behavior changes.
2025-11-05 18:57:03 +09:00

34 lines
1.0 KiB
Rust

use crate::tokenizer::{NyashTokenizer, TokenType};
fn collect_string_token(src: &str) -> String {
let mut t = NyashTokenizer::new(src);
let tokens = t.tokenize().expect("tokenize");
// Expect first non-EOF token to be STRING
for tok in tokens {
if let TokenType::STRING(s) = tok.token_type { return s; }
}
panic!("no STRING token found");
}
#[test]
fn unicode_decode_toggle_off_keeps_literal() {
// OFF by default
std::env::remove_var("NYASH_PARSER_DECODE_UNICODE");
std::env::remove_var("HAKO_PARSER_DECODE_UNICODE");
let s = collect_string_token("\"\\u0041\"");
assert_eq!(s, "\\u0041");
}
#[test]
fn unicode_decode_toggle_on_decodes_basic_and_surrogate() {
// ON: enable decode
std::env::set_var("NYASH_PARSER_DECODE_UNICODE", "1");
let s = collect_string_token("\"\\u0041\"");
assert_eq!(s, "A");
let s2 = collect_string_token("\"\\uD83D\\uDE00\"");
// Expect surrogate pair to decode into one char (😀)
assert_eq!(s2.chars().count(), 1);
}