phase-20.45: PRIMARY no-fallback reps + MIR v0 shape fixes
- Fix MIR v0 shape in lowers: functions[] + name="main" + blocks.id * lower_return_int_box.hako * lower_return_binop_box.hako - runner_min: adopt LowerReturnBinOpBox before ReturnInt - Add PRIMARY no-fallback canaries (all PASS): * return-binop / array-size / load-store / return-logical (OR) - Fix phase2043 runner_min canary alias (Runner -> BuilderRunnerMinBox) - Update docs: phase-20.45 README (PRIMARY reps), CURRENT_TASK progress Ancillary: keep builder/provider/canary files in sync; no unrelated behavior changes.
This commit is contained in:
@ -28,6 +28,66 @@ impl NyashTokenizer {
|
||||
Some('"') => string_value.push('"'),
|
||||
Some('\'') => string_value.push('\''), // 1-quote: エスケープされたシングルクォート
|
||||
Some('/') => string_value.push('/'), // \/ を許容
|
||||
Some('u') => {
|
||||
// Unicode decode (optional; default OFF)
|
||||
if crate::config::env::parser_decode_unicode() {
|
||||
let base = self.position; // index of 'u'
|
||||
// read 4 hex digits without consuming; then advance position in bulk
|
||||
let read_hex4 = |input: &Vec<char>, start: usize| -> Option<u32> {
|
||||
if start + 4 > input.len() { return None; }
|
||||
let d0 = input.get(start)?.to_digit(16)?;
|
||||
let d1 = input.get(start + 1)?.to_digit(16)?;
|
||||
let d2 = input.get(start + 2)?.to_digit(16)?;
|
||||
let d3 = input.get(start + 3)?.to_digit(16)?;
|
||||
Some((d0 << 12) | (d1 << 8) | (d2 << 4) | d3)
|
||||
};
|
||||
let first_start = base + 1; // after 'u'
|
||||
if let Some(u1) = read_hex4(&self.input, first_start) {
|
||||
// consume 'u' + 4 hex
|
||||
self.position = base + 5;
|
||||
let mut out_char: Option<char> = None;
|
||||
// surrogate pair
|
||||
if (0xD800..=0xDBFF).contains(&u1) {
|
||||
if self.position + 6 <= self.input.len()
|
||||
&& self.input.get(self.position) == Some(&'\\')
|
||||
&& self.input.get(self.position + 1) == Some(&'u')
|
||||
{
|
||||
if let Some(u2) = read_hex4(&self.input, self.position + 2) {
|
||||
if (0xDC00..=0xDFFF).contains(&u2) {
|
||||
let high_ten = (u1 - 0xD800) as u32;
|
||||
let low_ten = (u2 - 0xDC00) as u32;
|
||||
let scalar = 0x10000 + ((high_ten << 10) | low_ten);
|
||||
out_char = std::char::from_u32(scalar);
|
||||
// consume '\\u' + 4 hex of low surrogate
|
||||
self.position += 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if out_char.is_none() {
|
||||
out_char = std::char::from_u32(u1 as u32);
|
||||
}
|
||||
if let Some(ch) = out_char {
|
||||
string_value.push(ch);
|
||||
// Skip the generic advance at loop end to avoid double step
|
||||
continue;
|
||||
} else {
|
||||
// Fallback to literal when invalid
|
||||
string_value.push('\\');
|
||||
string_value.push('u');
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// Not enough hex digits; keep literal
|
||||
string_value.push('\\');
|
||||
string_value.push('u');
|
||||
}
|
||||
} else {
|
||||
// Decoding disabled → keep literal
|
||||
string_value.push('\\');
|
||||
string_value.push('u');
|
||||
}
|
||||
}
|
||||
// TODO: 将来 `\uXXXX` デコード(既定OFF)
|
||||
Some(c2) => {
|
||||
// 未知のエスケープはそのまま残す(互換性維持)
|
||||
|
||||
Reference in New Issue
Block a user