phase-20.45: PRIMARY no-fallback reps + MIR v0 shape fixes

- Fix MIR v0 shape in lowers: functions[] + name="main" + blocks.id
  * lower_return_int_box.hako
  * lower_return_binop_box.hako
- runner_min: adopt LowerReturnBinOpBox before ReturnInt
- Add PRIMARY no-fallback canaries (all PASS):
  * return-binop / array-size / load-store / return-logical (OR)
- Fix phase2043 runner_min canary alias (Runner -> BuilderRunnerMinBox)
- Update docs: phase-20.45 README (PRIMARY reps), CURRENT_TASK progress

Ancillary: keep builder/provider/canary files in sync; no unrelated behavior changes.
This commit is contained in:
nyash-codex
2025-11-05 18:57:03 +09:00
parent 0996090d6d
commit 96ea3892af
119 changed files with 4746 additions and 316 deletions

View File

@ -28,6 +28,66 @@ impl NyashTokenizer {
Some('"') => string_value.push('"'),
Some('\'') => string_value.push('\''), // 1-quote: エスケープされたシングルクォート
Some('/') => string_value.push('/'), // \/ を許容
Some('u') => {
// Unicode decode (optional; default OFF)
if crate::config::env::parser_decode_unicode() {
let base = self.position; // index of 'u'
// read 4 hex digits without consuming; then advance position in bulk
let read_hex4 = |input: &Vec<char>, start: usize| -> Option<u32> {
if start + 4 > input.len() { return None; }
let d0 = input.get(start)?.to_digit(16)?;
let d1 = input.get(start + 1)?.to_digit(16)?;
let d2 = input.get(start + 2)?.to_digit(16)?;
let d3 = input.get(start + 3)?.to_digit(16)?;
Some((d0 << 12) | (d1 << 8) | (d2 << 4) | d3)
};
let first_start = base + 1; // after 'u'
if let Some(u1) = read_hex4(&self.input, first_start) {
// consume 'u' + 4 hex
self.position = base + 5;
let mut out_char: Option<char> = None;
// surrogate pair
if (0xD800..=0xDBFF).contains(&u1) {
if self.position + 6 <= self.input.len()
&& self.input.get(self.position) == Some(&'\\')
&& self.input.get(self.position + 1) == Some(&'u')
{
if let Some(u2) = read_hex4(&self.input, self.position + 2) {
if (0xDC00..=0xDFFF).contains(&u2) {
let high_ten = (u1 - 0xD800) as u32;
let low_ten = (u2 - 0xDC00) as u32;
let scalar = 0x10000 + ((high_ten << 10) | low_ten);
out_char = std::char::from_u32(scalar);
// consume '\\u' + 4 hex of low surrogate
self.position += 6;
}
}
}
}
if out_char.is_none() {
out_char = std::char::from_u32(u1 as u32);
}
if let Some(ch) = out_char {
string_value.push(ch);
// Skip the generic advance at loop end to avoid double step
continue;
} else {
// Fallback to literal when invalid
string_value.push('\\');
string_value.push('u');
continue;
}
} else {
// Not enough hex digits; keep literal
string_value.push('\\');
string_value.push('u');
}
} else {
// Decoding disabled → keep literal
string_value.push('\\');
string_value.push('u');
}
}
// TODO: 将来 `\uXXXX` デコード既定OFF
Some(c2) => {
// 未知のエスケープはそのまま残す(互換性維持)