diff --git a/apps/tests/phase263_pattern2_seg_realworld_min.hako b/apps/tests/phase263_pattern2_seg_realworld_min.hako new file mode 100644 index 00000000..98864f71 --- /dev/null +++ b/apps/tests/phase263_pattern2_seg_realworld_min.hako @@ -0,0 +1,34 @@ +// Phase 29ab P4 / Phase 263: Pattern2 seg real-world minimal repro +// +// Goal: +// - seg is LoopBodyLocal and reassigned in the loop body +// - break condition depends on seg +// - current behavior: JoinIR freeze (read-only contract violation) +// - after fix: prints "4" and returns 4 + +static box Main { + main() { + local table = "a|||" + local i = 0 + + loop(true) { + local j = table.indexOf("|||", i) + local seg = "" + + if j >= 0 { + seg = table.substring(i, j) + } else { + seg = table.substring(i, table.length()) + } + + if seg == "" { + break + } + + i = j + 3 + } + + print(i) + return i + } +} diff --git a/docs/development/current/main/phases/phase-263/README.md b/docs/development/current/main/phases/phase-263/README.md index b7a89d0c..bb7cf5bb 100644 --- a/docs/development/current/main/phases/phase-263/README.md +++ b/docs/development/current/main/phases/phase-263/README.md @@ -215,6 +215,39 @@ git revert 93022e7e1 - **方針**: Pattern2 scope 内で完結、SSOT 維持、既定挙動不変 - **Fail-Fast 原則**: 対象外は Ok(None) で後続経路へ、対象だが未対応は Err で即座に失敗(silent skip 禁止) +--- + +# Phase 29ab P4: Stage‑B 実ログ seg(Derived vs Promote 決定) + +## Decision (SSOT) + +**A: Derived slot** を採用する。 + +理由: +- `seg` は loop body で再代入されるため、read-only promotion は原理的に不成立。 +- Stage‑B 実ログの形は「body 内で seg を再計算 → break で参照」であり、毎イテレーション再計算の Derived が素直。 +- 既存の Pattern2 の構造(BodyInit → Break)と `LoopBodyLocalEnv` に収まる。 + +## Derived slot contract (minimal) + +- 対象は **Pattern2 break 条件で参照される LoopBodyLocal 1 変数**。 +- ループ body に以下の最小形があること: + 1. `local seg = ` が top-level に存在 + 2. `if { seg = } else { seg = }` が top-level に存在 + 3. break guard より前に 1) と 2) がある +- `seg` への代入は上記 if/else のみ(他の代入がある場合は out-of-scope) +- 代入式は **純粋**(MethodCall/Literal/Variable)のみ + +## Fixtures / Smokes + +- `apps/tests/phase263_pattern2_seg_realworld_min.hako` (Stage‑B 実ログ最小化) +- `tools/smokes/v2/profiles/integration/apps/phase263_pattern2_seg_realworld_min_vm.sh` + +### Smoke switch rule + +- **Before Derived slot**: freeze を PASS(`[joinir/freeze]` を期待) +- **After Derived slot**: `print/return = 4` を PASS に切り替える + ## Related Documentation - **Plan file**: `/home/tomoaki/.claude/plans/eventual-mapping-lemon.md` diff --git a/src/backend/mir_interpreter/handlers/boxes_plugin.rs b/src/backend/mir_interpreter/handlers/boxes_plugin.rs index 11279786..91bf332e 100644 --- a/src/backend/mir_interpreter/handlers/boxes_plugin.rs +++ b/src/backend/mir_interpreter/handlers/boxes_plugin.rs @@ -1,4 +1,7 @@ use super::*; +use super::string_method_helpers::{ + parse_index_of_args, parse_last_index_of_args, ArgParsePolicy, +}; use crate::box_trait::NyashBox; pub(super) fn invoke_plugin_box( @@ -62,26 +65,31 @@ pub(super) fn invoke_plugin_box( let s = s_box.value; match method { "lastIndexOf" => { - if let Some(arg_id) = args.get(0) { - let needle = this.reg_load(*arg_id)?.to_string(); - let helper = crate::boxes::string_box::StringBox::new(s); - let result_box = helper.lastIndexOf(&needle); - this.write_from_box(dst, result_box); - Ok(()) - } else { - Err(this.err_invalid("lastIndexOf requires 1 argument")) - } + let needle = parse_last_index_of_args( + this, + args, + ArgParsePolicy::STRICT, + "lastIndexOf requires 1 argument", + )?; + let helper = crate::boxes::string_box::StringBox::new(s); + let result_box = helper.lastIndexOf(&needle); + this.write_from_box(dst, result_box); + Ok(()) } "indexOf" | "find" => { - if let Some(arg_id) = args.get(0) { - let needle = this.reg_load(*arg_id)?.to_string(); - let helper = crate::boxes::string_box::StringBox::new(s); - let result_box = helper.find(&needle); - this.write_from_box(dst, result_box); - Ok(()) - } else { - Err(this.err_invalid("indexOf/find requires 1 argument")) - } + let helper = crate::boxes::string_box::StringBox::new(s); + let (needle, start) = parse_index_of_args( + this, + args, + ArgParsePolicy::STRICT, + "indexOf/find requires 1 or 2 arguments", + )?; + let result_box = match start { + Some(start) => helper.find_from(&needle, start), + None => helper.find(&needle), + }; + this.write_from_box(dst, result_box); + Ok(()) } // Phase 25.1m: minimal builtin support for StringBox.is_space(ch) "is_space" => { diff --git a/src/backend/mir_interpreter/handlers/boxes_string.rs b/src/backend/mir_interpreter/handlers/boxes_string.rs index 1775008a..cd4767b7 100644 --- a/src/backend/mir_interpreter/handlers/boxes_string.rs +++ b/src/backend/mir_interpreter/handlers/boxes_string.rs @@ -1,4 +1,8 @@ use super::*; +use super::string_method_helpers::{ + parse_index_of_args, parse_last_index_of_args, parse_substring_args, ArgParsePolicy, +}; +use crate::boxes::string_ops; pub(super) fn try_handle_string_box( this: &mut MirInterpreter, @@ -90,38 +94,14 @@ pub(super) fn try_handle_string_box( return Ok(true); } "indexOf" => { - // Support both 1-arg indexOf(search) and 2-arg indexOf(search, fromIndex) - let (needle, from_index) = match args.len() { - 1 => { - // indexOf(search) - search from beginning - let n = this.reg_load(args[0])?.to_string(); - (n, 0) - } - 2 => { - // indexOf(search, fromIndex) - search from specified position - let n = this.reg_load(args[0])?.to_string(); - let from = this.reg_load(args[1])?.as_integer().unwrap_or(0); - (n, from.max(0) as usize) - } - _ => { - return Err( - this.err_invalid("indexOf expects 1 or 2 args (search [, fromIndex])") - ); - } - }; - - // Search for needle starting from from_index - let search_str = if from_index >= sb_norm.value.len() { - "" - } else { - &sb_norm.value[from_index..] - }; - - let idx = search_str - .find(&needle) - .map(|i| (from_index + i) as i64) - .unwrap_or(-1); - + let (needle, start) = parse_index_of_args( + this, + args, + ArgParsePolicy::STRICT, + "indexOf expects 1 or 2 args (search [, fromIndex])", + )?; + let mode = string_ops::index_mode_from_env(); + let idx = string_ops::index_of(&sb_norm.value, &needle, start, mode); this.write_result(dst, VMValue::Integer(idx)); return Ok(true); } @@ -135,10 +115,14 @@ pub(super) fn try_handle_string_box( return Ok(true); } "lastIndexOf" => { - // lastIndexOf(substr) -> last index or -1 - this.validate_args_exact("lastIndexOf", args, 1)?; - let needle = this.reg_load(args[0])?.to_string(); - let idx = sb_norm.value.rfind(&needle).map(|i| i as i64).unwrap_or(-1); + let needle = parse_last_index_of_args( + this, + args, + ArgParsePolicy::STRICT, + "lastIndexOf requires 1 argument", + )?; + let mode = string_ops::index_mode_from_env(); + let idx = string_ops::last_index_of(&sb_norm.value, &needle, mode); this.write_result(dst, VMValue::Integer(idx)); return Ok(true); } @@ -165,29 +149,14 @@ pub(super) fn try_handle_string_box( return Ok(true); } "substring" => { - // Support both 1-arg (start to end) and 2-arg (start, end) forms - let (s_idx, e_idx) = match args.len() { - 1 => { - // substring(start) - from start to end of string - let s = this.reg_load(args[0])?.as_integer().unwrap_or(0); - let len = sb_norm.value.chars().count() as i64; - (s, len) - } - 2 => { - // substring(start, end) - half-open interval [start, end) - let s = this.reg_load(args[0])?.as_integer().unwrap_or(0); - let e = this.reg_load(args[1])?.as_integer().unwrap_or(0); - (s, e) - } - _ => { - return Err(this.err_invalid("substring expects 1 or 2 args (start [, end])")); - } - }; - let len = sb_norm.value.chars().count() as i64; - let start = s_idx.max(0).min(len) as usize; - let end = e_idx.max(start as i64).min(len) as usize; - let chars: Vec = sb_norm.value.chars().collect(); - let sub: String = chars[start..end].iter().collect(); + let (start, end) = parse_substring_args( + this, + args, + ArgParsePolicy::STRICT, + "substring expects 1 or 2 args (start [, end])", + )?; + let mode = string_ops::index_mode_from_env(); + let sub = string_ops::substring(&sb_norm.value, start, end, mode); this.write_result( dst, VMValue::from_nyash_box(Box::new(crate::box_trait::StringBox::new(sub))), diff --git a/src/backend/mir_interpreter/handlers/calls/method.rs b/src/backend/mir_interpreter/handlers/calls/method.rs index 22156024..94afffdb 100644 --- a/src/backend/mir_interpreter/handlers/calls/method.rs +++ b/src/backend/mir_interpreter/handlers/calls/method.rs @@ -1,4 +1,8 @@ use super::*; +use crate::backend::mir_interpreter::handlers::string_method_helpers::{ + parse_index_of_args, parse_last_index_of_args, parse_substring_args, ArgParsePolicy, +}; +use crate::boxes::string_ops; impl MirInterpreter { pub(super) fn execute_method_callee( @@ -205,13 +209,15 @@ impl MirInterpreter { ("String", 303) => { // indexOf if let VMValue::String(s) = receiver { - if let Some(arg_id) = args.get(0) { - let needle = self.reg_load(*arg_id)?.to_string(); - let idx = s.find(&needle).map(|i| i as i64).unwrap_or(-1); - Ok(VMValue::Integer(idx)) - } else { - Err(self.err_invalid("String.indexOf: requires 1 argument")) - } + let (needle, start) = parse_index_of_args( + self, + args, + ArgParsePolicy::LENIENT, + "String.indexOf: requires 1 argument", + )?; + let mode = string_ops::index_mode_from_env(); + let idx = string_ops::index_of(s, &needle, start, mode); + Ok(VMValue::Integer(idx)) } else { Err(self.err_invalid("String.indexOf: invalid receiver")) } @@ -219,13 +225,15 @@ impl MirInterpreter { ("String", 308) => { // lastIndexOf if let VMValue::String(s) = receiver { - if let Some(arg_id) = args.get(0) { - let needle = self.reg_load(*arg_id)?.to_string(); - let idx = s.rfind(&needle).map(|i| i as i64).unwrap_or(-1); - Ok(VMValue::Integer(idx)) - } else { - Err(self.err_invalid("String.lastIndexOf: requires 1 argument")) - } + let needle = parse_last_index_of_args( + self, + args, + ArgParsePolicy::LENIENT, + "String.lastIndexOf: requires 1 argument", + )?; + let mode = string_ops::index_mode_from_env(); + let idx = string_ops::last_index_of(s, &needle, mode); + Ok(VMValue::Integer(idx)) } else { Err(self.err_invalid("String.lastIndexOf: invalid receiver")) } @@ -233,24 +241,14 @@ impl MirInterpreter { ("String", 301) => { // substring if let VMValue::String(s) = receiver { - let start = if let Some(a0) = args.get(0) { - self.reg_load(*a0)?.as_integer().unwrap_or(0) - } else { - 0 - }; - let end = if let Some(a1) = args.get(1) { - self.reg_load(*a1)?.as_integer().unwrap_or(s.len() as i64) - } else { - s.len() as i64 - }; - let len = s.len() as i64; - let i0 = start.max(0).min(len) as usize; - let i1 = end.max(0).min(len) as usize; - if i0 > i1 { - return Ok(VMValue::String(String::new())); - } - let bytes = s.as_bytes(); - let sub = String::from_utf8(bytes[i0..i1].to_vec()).unwrap_or_default(); + let (start, end) = parse_substring_args( + self, + args, + ArgParsePolicy::LENIENT, + "String.substring: requires 1 or 2 arguments", + )?; + let mode = string_ops::index_mode_from_env(); + let sub = string_ops::substring(s, start, end, mode); Ok(VMValue::String(sub)) } else { Err(self.err_invalid("String.substring: invalid receiver")) @@ -390,12 +388,15 @@ impl MirInterpreter { if let VMValue::BoxRef(bx) = receiver { let s_box = bx.to_string_box(); let s = s_box.value; - if let Some(arg_id) = args.get(0) { - let needle = self.reg_load(*arg_id)?.to_string(); - let helper = crate::boxes::string_box::StringBox::new(s); - let result_box = helper.lastIndexOf(&needle); - return Ok(VMValue::from_nyash_box(result_box)); - } + let needle = parse_last_index_of_args( + self, + args, + ArgParsePolicy::STRICT, + "StringBox.lastIndexOf: requires 1 argument", + )?; + let helper = crate::boxes::string_box::StringBox::new(s); + let result_box = helper.lastIndexOf(&needle); + return Ok(VMValue::from_nyash_box(result_box)); } Err(self.err_invalid("StringBox.lastIndexOf: requires 1 argument")) } @@ -404,14 +405,20 @@ impl MirInterpreter { if let VMValue::BoxRef(bx) = receiver { let s_box = bx.to_string_box(); let s = s_box.value; - if let Some(arg_id) = args.get(0) { - let needle = self.reg_load(*arg_id)?.to_string(); - let helper = crate::boxes::string_box::StringBox::new(s); - let result_box = helper.find(&needle); - return Ok(VMValue::from_nyash_box(result_box)); - } + let helper = crate::boxes::string_box::StringBox::new(s); + let (needle, start) = parse_index_of_args( + self, + args, + ArgParsePolicy::STRICT, + "StringBox.indexOf: requires 1 or 2 arguments", + )?; + let result_box = match start { + Some(start) => helper.find_from(&needle, start), + None => helper.find(&needle), + }; + return Ok(VMValue::from_nyash_box(result_box)); } - Err(self.err_invalid("StringBox.indexOf: requires 1 argument")) + Err(self.err_invalid("StringBox.indexOf: requires 1 or 2 arguments")) } // Plugin Box methods (slot >= 1000) diff --git a/src/backend/mir_interpreter/handlers/mod.rs b/src/backend/mir_interpreter/handlers/mod.rs index 1717d01d..681d741e 100644 --- a/src/backend/mir_interpreter/handlers/mod.rs +++ b/src/backend/mir_interpreter/handlers/mod.rs @@ -24,6 +24,7 @@ mod externals; mod lifecycle; mod memory; mod misc; +mod string_method_helpers; mod type_ops; mod weak; // Phase 285A0: WeakRef handlers diff --git a/src/backend/mir_interpreter/handlers/string_method_helpers.rs b/src/backend/mir_interpreter/handlers/string_method_helpers.rs new file mode 100644 index 00000000..85c2d1d7 --- /dev/null +++ b/src/backend/mir_interpreter/handlers/string_method_helpers.rs @@ -0,0 +1,86 @@ +use super::*; + +#[derive(Debug, Clone, Copy)] +pub(super) struct ArgParsePolicy { + pub allow_empty: bool, + pub allow_extra: bool, +} + +impl ArgParsePolicy { + pub const STRICT: Self = Self { + allow_empty: false, + allow_extra: false, + }; + pub const LENIENT: Self = Self { + allow_empty: true, + allow_extra: true, + }; +} + +pub(super) fn parse_index_of_args( + this: &mut MirInterpreter, + args: &[ValueId], + policy: ArgParsePolicy, + err_label: &str, +) -> Result<(String, Option), VMError> { + if args.is_empty() { + return Err(this.err_invalid(err_label)); + } + let needle = this.reg_load(args[0])?.to_string(); + let start = if args.len() >= 2 { + Some(this.reg_load(args[1])?.as_integer().unwrap_or(0)) + } else { + None + }; + if !policy.allow_extra && args.len() > 2 { + return Err(this.err_invalid(err_label)); + } + Ok((needle, start)) +} + +pub(super) fn parse_last_index_of_args( + this: &mut MirInterpreter, + args: &[ValueId], + policy: ArgParsePolicy, + err_label: &str, +) -> Result { + if args.is_empty() { + return Err(this.err_invalid(err_label)); + } + if !policy.allow_extra && args.len() > 1 { + return Err(this.err_invalid(err_label)); + } + Ok(this.reg_load(args[0])?.to_string()) +} + +pub(super) fn parse_substring_args( + this: &mut MirInterpreter, + args: &[ValueId], + policy: ArgParsePolicy, + err_label: &str, +) -> Result<(i64, Option), VMError> { + match args.len() { + 0 => { + if policy.allow_empty { + Ok((0, None)) + } else { + Err(this.err_invalid(err_label)) + } + } + 1 => Ok((this.reg_load(args[0])?.as_integer().unwrap_or(0), None)), + 2 => Ok(( + this.reg_load(args[0])?.as_integer().unwrap_or(0), + Some(this.reg_load(args[1])?.as_integer().unwrap_or(0)), + )), + _ => { + if policy.allow_extra { + Ok(( + this.reg_load(args[0])?.as_integer().unwrap_or(0), + Some(this.reg_load(args[1])?.as_integer().unwrap_or(0)), + )) + } else { + Err(this.err_invalid(err_label)) + } + } + } +} diff --git a/src/boxes/basic/string_box.rs b/src/boxes/basic/string_box.rs index 105e800a..3e5670fb 100644 --- a/src/boxes/basic/string_box.rs +++ b/src/boxes/basic/string_box.rs @@ -41,10 +41,9 @@ impl StringBox { /// Find substring and return position (or -1 if not found) pub fn find(&self, search: &str) -> Box { use crate::box_trait::IntegerBox; - match self.value.find(search) { - Some(pos) => Box::new(IntegerBox::new(pos as i64)), - None => Box::new(IntegerBox::new(-1)), - } + let mode = crate::boxes::string_ops::index_mode_from_env(); + let idx = crate::boxes::string_ops::index_of(&self.value, search, None, mode); + Box::new(IntegerBox::new(idx)) } /// Replace all occurrences of old with new diff --git a/src/boxes/mod.rs b/src/boxes/mod.rs index e6c6133b..27315ee0 100644 --- a/src/boxes/mod.rs +++ b/src/boxes/mod.rs @@ -65,6 +65,7 @@ pub mod integer_box; pub mod math_box; pub mod random_box; pub mod string_box; +pub mod string_ops; pub mod time_box; // These boxes use web APIs that require special handling in WASM pub mod aot_compiler_box; diff --git a/src/boxes/string_box.rs b/src/boxes/string_box.rs index 5653e97c..d278e8a0 100644 --- a/src/boxes/string_box.rs +++ b/src/boxes/string_box.rs @@ -13,6 +13,7 @@ * - `toLowerCase()` - 小文字変換 * - `trim()` - 前後の空白除去 * - `indexOf(search)` - 文字列検索 + * - `indexOf(search, fromIndex)` - 指定位置から検索 * - `replace(from, to)` - 文字列置換 * - `charAt(index)` - 指定位置の文字取得 * @@ -71,18 +72,18 @@ impl StringBox { /// Env gate: NYASH_STR_CP=1 → return codepoint index; default is byte index pub fn find(&self, search: &str) -> Box { use crate::boxes::integer_box::IntegerBox; - match self.value.find(search) { - Some(byte_pos) => { - let use_cp = std::env::var("NYASH_STR_CP").ok().as_deref() == Some("1"); - let idx = if use_cp { - self.value[..byte_pos].chars().count() as i64 - } else { - byte_pos as i64 - }; - Box::new(IntegerBox::new(idx)) - } - None => Box::new(IntegerBox::new(-1)), - } + let mode = crate::boxes::string_ops::index_mode_from_env(); + let idx = crate::boxes::string_ops::index_of(&self.value, search, None, mode); + Box::new(IntegerBox::new(idx)) + } + + /// Find substring starting from a given index (or -1 if not found) + /// Env gate: NYASH_STR_CP=1 → indices are codepoint-based; default is byte index + pub fn find_from(&self, search: &str, start: i64) -> Box { + use crate::boxes::integer_box::IntegerBox; + let mode = crate::boxes::string_ops::index_mode_from_env(); + let idx = crate::boxes::string_ops::index_of(&self.value, search, Some(start), mode); + Box::new(IntegerBox::new(idx)) } /// Replace all occurrences of old with new @@ -94,18 +95,9 @@ impl StringBox { /// Env gate: NYASH_STR_CP=1 → return codepoint index; default is byte index. pub fn lastIndexOf(&self, search: &str) -> Box { use crate::boxes::integer_box::IntegerBox; - match self.value.rfind(search) { - Some(byte_pos) => { - let use_cp = std::env::var("NYASH_STR_CP").ok().as_deref() == Some("1"); - let idx = if use_cp { - self.value[..byte_pos].chars().count() as i64 - } else { - byte_pos as i64 - }; - Box::new(IntegerBox::new(idx)) - } - None => Box::new(IntegerBox::new(-1)), - } + let mode = crate::boxes::string_ops::index_mode_from_env(); + let idx = crate::boxes::string_ops::last_index_of(&self.value, search, mode); + Box::new(IntegerBox::new(idx)) } /// Trim whitespace from both ends diff --git a/src/boxes/string_ops.rs b/src/boxes/string_ops.rs new file mode 100644 index 00000000..f0eac34c --- /dev/null +++ b/src/boxes/string_ops.rs @@ -0,0 +1,101 @@ +//! Shared string indexing helpers (byte vs codepoint). + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StringIndexMode { + Byte, + CodePoint, +} + +pub fn index_mode_from_env() -> StringIndexMode { + if std::env::var("NYASH_STR_CP").ok().as_deref() == Some("1") { + StringIndexMode::CodePoint + } else { + StringIndexMode::Byte + } +} + +pub fn index_of(haystack: &str, needle: &str, start: Option, mode: StringIndexMode) -> i64 { + match mode { + StringIndexMode::Byte => index_of_bytes(haystack, needle, start), + StringIndexMode::CodePoint => index_of_codepoints(haystack, needle, start), + } +} + +pub fn last_index_of(haystack: &str, needle: &str, mode: StringIndexMode) -> i64 { + match mode { + StringIndexMode::Byte => haystack.rfind(needle).map(|i| i as i64).unwrap_or(-1), + StringIndexMode::CodePoint => haystack + .rfind(needle) + .map(|byte_pos| haystack[..byte_pos].chars().count() as i64) + .unwrap_or(-1), + } +} + +pub fn substring(haystack: &str, start: i64, end: Option, mode: StringIndexMode) -> String { + match mode { + StringIndexMode::Byte => substring_bytes(haystack, start, end), + StringIndexMode::CodePoint => substring_codepoints(haystack, start, end), + } +} + +fn index_of_bytes(haystack: &str, needle: &str, start: Option) -> i64 { + let start_idx = start.unwrap_or(0).max(0) as usize; + if start_idx > haystack.len() { + return -1; + } + haystack[start_idx..] + .find(needle) + .map(|i| (start_idx + i) as i64) + .unwrap_or(-1) +} + +fn index_of_codepoints(haystack: &str, needle: &str, start: Option) -> i64 { + let start_idx = start.unwrap_or(0).max(0) as usize; + let Some(byte_start) = byte_offset_for_cp(haystack, start_idx) else { + return -1; + }; + if byte_start > haystack.len() { + return -1; + } + haystack[byte_start..] + .find(needle) + .map(|rel| { + let abs = byte_start + rel; + haystack[..abs].chars().count() as i64 + }) + .unwrap_or(-1) +} + +fn substring_bytes(haystack: &str, start: i64, end: Option) -> String { + let len = haystack.len() as i64; + let start = start.max(0).min(len); + let end = end.unwrap_or(len).max(0).min(len); + if start > end { + return String::new(); + } + let bytes = haystack.as_bytes(); + String::from_utf8(bytes[start as usize..end as usize].to_vec()).unwrap_or_default() +} + +fn substring_codepoints(haystack: &str, start: i64, end: Option) -> String { + let len = haystack.chars().count() as i64; + let start = start.max(0).min(len) as usize; + let end = end.unwrap_or(len).max(start as i64).min(len) as usize; + let chars: Vec = haystack.chars().collect(); + chars[start..end].iter().collect() +} + +fn byte_offset_for_cp(haystack: &str, cp_index: usize) -> Option { + let mut count = 0usize; + for (byte_pos, _) in haystack.char_indices() { + if count == cp_index { + return Some(byte_pos); + } + count += 1; + } + if count == cp_index { + Some(haystack.len()) + } else { + None + } +} diff --git a/src/mir/builder/calls/guard.rs b/src/mir/builder/calls/guard.rs index 64255dd1..3f88ddfb 100644 --- a/src/mir/builder/calls/guard.rs +++ b/src/mir/builder/calls/guard.rs @@ -14,6 +14,7 @@ use crate::mir::definitions::call_unified::CalleeBoxKind; use crate::mir::{Callee, MirType, ValueId}; +use crate::runtime::core_method_aliases::canonical_method_name; use std::collections::BTreeMap; /// 構造ガード専用箱 @@ -103,15 +104,16 @@ impl<'a> CalleeGuardBox<'a> { // // Common string methods that should be routed to StringBox: // length, substring, charAt, indexOf, etc. + let canonical = canonical_method_name(method); let is_string_method = matches!( - method.as_str(), + canonical, "length" | "substring" | "charAt" | "indexOf" | "lastIndexOf" - | "toUpperCase" - | "toLowerCase" + | "toUpper" + | "toLower" | "trim" | "split" ); diff --git a/src/mir/builder/control_flow/joinir/patterns/body_local_policy.rs b/src/mir/builder/control_flow/joinir/patterns/body_local_policy.rs index 002ce3b1..6fb67823 100644 --- a/src/mir/builder/control_flow/joinir/patterns/body_local_policy.rs +++ b/src/mir/builder/control_flow/joinir/patterns/body_local_policy.rs @@ -6,7 +6,9 @@ use crate::ast::ASTNode; use crate::mir::builder::MirBuilder; use crate::mir::builder::control_flow::joinir::patterns::policies::PolicyDecision; +use crate::mir::builder::control_flow::joinir::patterns::pattern2::contracts::derived_slot::extract_derived_slot_for_conditions; use crate::mir::join_ir::lowering::carrier_info::CarrierInfo; +use crate::mir::join_ir::lowering::common::body_local_derived_slot_emitter::BodyLocalDerivedSlotRecipe; use crate::mir::join_ir::lowering::common::body_local_slot::{ ReadOnlyBodyLocalSlot, ReadOnlyBodyLocalSlotBox, }; @@ -27,6 +29,7 @@ pub enum BodyLocalRoute { carrier_name: String, }, ReadOnlySlot(ReadOnlyBodyLocalSlot), + DerivedSlot(BodyLocalDerivedSlotRecipe), } pub fn classify_for_pattern2( @@ -60,17 +63,29 @@ pub fn classify_for_pattern2( carrier_info: promoted_carrier, promoted_var, carrier_name, - } => PolicyDecision::Use(BodyLocalRoute::Promotion { - promoted_carrier, - promoted_var, - carrier_name, - }), + } => match extract_derived_slot_for_conditions(&vars, body) { + Ok(Some(recipe)) => PolicyDecision::Use(BodyLocalRoute::DerivedSlot(recipe)), + Ok(None) => PolicyDecision::Use(BodyLocalRoute::Promotion { + promoted_carrier, + promoted_var, + carrier_name, + }), + Err(slot_err) => PolicyDecision::Reject(format!( + "[pattern2/body_local_policy] derived-slot check failed: {slot_err}" + )), + }, ConditionPromotionResult::CannotPromote { reason, .. } => { - match extract_body_local_inits_for_conditions(&vars, body) { - Ok(Some(slot)) => PolicyDecision::Use(BodyLocalRoute::ReadOnlySlot(slot)), - Ok(None) => PolicyDecision::Reject(reason), + match extract_derived_slot_for_conditions(&vars, body) { + Ok(Some(recipe)) => PolicyDecision::Use(BodyLocalRoute::DerivedSlot(recipe)), + Ok(None) => match extract_body_local_inits_for_conditions(&vars, body) { + Ok(Some(slot)) => PolicyDecision::Use(BodyLocalRoute::ReadOnlySlot(slot)), + Ok(None) => PolicyDecision::Reject(reason), + Err(slot_err) => PolicyDecision::Reject(format!( + "{reason}; read-only-slot rejected: {slot_err}" + )), + }, Err(slot_err) => PolicyDecision::Reject(format!( - "{reason}; read-only-slot rejected: {slot_err}" + "{reason}; derived-slot rejected: {slot_err}" )), } } diff --git a/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs index d00c77c3..b41c90c0 100644 --- a/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/loop_true_counter_extractor.rs @@ -8,10 +8,11 @@ //! - 曖昧な loop(true) を **通さない**(Fail-Fast で理由を返す) //! //! ## Contract(Fail-Fast) -//! 許可(read_digits(loop(true)) 系で必要な最小): +//! 許可(loop(true) 系で必要な最小): //! - カウンタ候補が **ちょうど1つ** -//! - 更新が `i = i + 1` 形(定数 1 のみ) -//! - `s.substring(i, i + 1)` 形が body のどこかに存在(誤マッチ防止) +//! - 更新が `i = i + 1` 形(定数 1 のみ) **または** +//! `i = j + K` 形(`j = s.indexOf(..., i)` 由来、K は整数定数) +//! - `substring(i, ...)` が body のどこかに存在(誤マッチ防止) //! - `i` が loop-outer var(`variable_map` に存在)である //! //! 禁止: @@ -86,13 +87,6 @@ impl LoopTrueCounterExtractorBox { } } - fn extract_var_name(n: &ASTNode) -> Option { - match n { - ASTNode::Variable { name, .. } => Some(name.clone()), - _ => None, - } - } - fn is_self_plus_const_one(value: &ASTNode, target: &ASTNode) -> bool { let target_name = match extract_var_name(target) { Some(n) => n, @@ -126,22 +120,122 @@ impl LoopTrueCounterExtractorBox { candidates.sort(); candidates.dedup(); - let loop_var_name = match candidates.len() { - 0 => { - return Err( - "[pattern2/loop_true_counter/contract/no_candidate] Cannot find unique counter update `i = i + 1` in loop(true) body" - .to_string(), - ); - } - 1 => candidates[0].clone(), - _ => { + if candidates.len() > 1 { + return Err(format!( + "[pattern2/loop_true_counter/contract/multiple_candidates] Multiple counter candidates found in loop(true) body: {:?}", + candidates + )); + } + + if candidates.len() == 1 { + let loop_var_name = candidates[0].clone(); + let host_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| { + format!( + "[pattern2/loop_true_counter/contract/not_loop_outer] Counter '{}' not found in variable_map (loop-outer var required)", + loop_var_name + ) + })?; + + if !has_substring_read(body, &loop_var_name) { return Err(format!( - "[pattern2/loop_true_counter/contract/multiple_candidates] Multiple counter candidates found in loop(true) body: {:?}", - candidates + "[pattern2/loop_true_counter/contract/missing_substring_guard] Counter '{}' found, but missing substring pattern `s.substring({}, {} + 1)`", + loop_var_name, loop_var_name, loop_var_name )); } - }; + return Ok((loop_var_name, host_id)); + } + + if let Some((loop_var_name, host_id)) = + extract_loop_counter_from_indexof_pattern(body, variable_map)? + { + return Ok((loop_var_name, host_id)); + } + + Err( + "[pattern2/loop_true_counter/contract/no_candidate] Cannot find unique counter update `i = i + 1` in loop(true) body" + .to_string(), + ) + } +} + +fn extract_var_name(n: &ASTNode) -> Option { + match n { + ASTNode::Variable { name, .. } => Some(name.clone()), + _ => None, + } +} + +fn extract_loop_counter_from_indexof_pattern( + body: &[ASTNode], + variable_map: &BTreeMap, +) -> Result, String> { + let indexof_bindings = collect_indexof_bindings(body); + if indexof_bindings.is_empty() { + return Ok(None); + } + + let mut candidates: Vec = Vec::new(); + + fn walk_assign( + node: &ASTNode, + indexof_bindings: &[(String, String)], + candidates: &mut Vec, + ) { + match node { + ASTNode::Assignment { target, value, .. } => { + if let (Some(target_name), Some((index_var, const_val))) = + (extract_var_name(target.as_ref()), extract_add_var_const(value.as_ref())) + { + if const_val <= 0 { + return; + } + if indexof_bindings.iter().any(|(idx_var, start_var)| { + idx_var == &index_var && start_var == &target_name + }) { + candidates.push(target_name); + } + } + } + ASTNode::If { + then_body, + else_body, + .. + } => { + for s in then_body { + walk_assign(s, indexof_bindings, candidates); + } + if let Some(eb) = else_body { + for s in eb { + walk_assign(s, indexof_bindings, candidates); + } + } + } + ASTNode::Loop { body, .. } => { + for s in body { + walk_assign(s, indexof_bindings, candidates); + } + } + _ => {} + } + } + + for stmt in body { + walk_assign(stmt, &indexof_bindings, &mut candidates); + } + + candidates.sort(); + candidates.dedup(); + + if candidates.len() > 1 { + return Err(format!( + "[pattern2/loop_true_counter/contract/multiple_candidates] Multiple counter candidates found in loop(true) body: {:?}", + candidates + )); + } + + if candidates.len() == 1 { + let loop_var_name = candidates[0].clone(); let host_id = variable_map.get(&loop_var_name).copied().ok_or_else(|| { format!( "[pattern2/loop_true_counter/contract/not_loop_outer] Counter '{}' not found in variable_map (loop-outer var required)", @@ -149,14 +243,112 @@ impl LoopTrueCounterExtractorBox { ) })?; - if !has_substring_read(body, &loop_var_name) { + if !has_substring_read_with_start(body, &loop_var_name) { return Err(format!( - "[pattern2/loop_true_counter/contract/missing_substring_guard] Counter '{}' found, but missing substring pattern `s.substring({}, {} + 1)`", - loop_var_name, loop_var_name, loop_var_name + "[pattern2/loop_true_counter/contract/missing_substring_guard] Counter '{}' found, but missing substring pattern `substring({}, ...)`", + loop_var_name, loop_var_name )); } - Ok((loop_var_name, host_id)) + return Ok(Some((loop_var_name, host_id))); + } + + Ok(None) +} + +fn collect_indexof_bindings(body: &[ASTNode]) -> Vec<(String, String)> { + fn extract_indexof_binding(node: &ASTNode) -> Option<(String, String)> { + let (target_name, value_node) = match node { + ASTNode::Local { + variables, + initial_values, + .. + } => { + if variables.len() != 1 { + return None; + } + let value = initial_values.get(0).and_then(|v| v.as_ref())?; + (variables[0].clone(), value.as_ref()) + } + ASTNode::Assignment { target, value, .. } => { + let target_name = extract_var_name(target.as_ref())?; + (target_name, value.as_ref()) + } + _ => return None, + }; + + if let ASTNode::MethodCall { + method, + arguments, + .. + } = value_node + { + if method == "indexOf" && arguments.len() == 2 { + if let ASTNode::Variable { name, .. } = &arguments[1] { + return Some((target_name, name.clone())); + } + } + } + + None + } + + fn walk(node: &ASTNode, out: &mut Vec<(String, String)>) { + if let Some(binding) = extract_indexof_binding(node) { + out.push(binding); + } + + match node { + ASTNode::If { + then_body, + else_body, + .. + } => { + for s in then_body { + walk(s, out); + } + if let Some(eb) = else_body { + for s in eb { + walk(s, out); + } + } + } + ASTNode::Loop { body, .. } => { + for s in body { + walk(s, out); + } + } + _ => {} + } + } + + let mut bindings = Vec::new(); + for stmt in body { + walk(stmt, &mut bindings); + } + bindings +} + +fn extract_add_var_const(value: &ASTNode) -> Option<(String, i64)> { + match value { + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } => { + if let ASTNode::Variable { name, .. } = left.as_ref() { + if let ASTNode::Literal { + value: LiteralValue::Integer(i), + .. + } = right.as_ref() + { + return Some((name.clone(), *i)); + } + } + None + } + _ => None, } } @@ -221,6 +413,55 @@ fn has_substring_read(body: &[ASTNode], counter: &str) -> bool { body.iter().any(|s| walk(s, counter)) } +fn has_substring_read_with_start(body: &[ASTNode], counter: &str) -> bool { + fn walk(node: &ASTNode, counter: &str) -> bool { + match node { + ASTNode::Assignment { value, .. } => walk(value.as_ref(), counter), + ASTNode::Local { initial_values, .. } => initial_values + .iter() + .filter_map(|v| v.as_ref()) + .any(|v| walk(v.as_ref(), counter)), + ASTNode::MethodCall { + method, + arguments, + .. + } => { + if method == "substring" && arguments.len() == 2 { + if matches!( + &arguments[0], + ASTNode::Variable { name, .. } if name == counter + ) { + return true; + } + } + arguments.iter().any(|a| walk(a, counter)) + } + ASTNode::BinaryOp { left, right, .. } => { + walk(left.as_ref(), counter) || walk(right.as_ref(), counter) + } + ASTNode::If { + condition, + then_body, + else_body, + .. + } => { + walk(condition.as_ref(), counter) + || then_body.iter().any(|s| walk(s, counter)) + || else_body + .as_ref() + .map(|eb| eb.iter().any(|s| walk(s, counter))) + .unwrap_or(false) + } + ASTNode::Loop { body, condition, .. } => { + walk(condition.as_ref(), counter) || body.iter().any(|s| walk(s, counter)) + } + _ => false, + } + } + + body.iter().any(|s| walk(s, counter)) +} + #[cfg(test)] mod tests { use super::*; @@ -244,6 +485,13 @@ mod tests { } } + fn lit_s(s: &str) -> ASTNode { + ASTNode::Literal { + value: LiteralValue::String(s.to_string()), + span: span(), + } + } + fn add(left: ASTNode, right: ASTNode) -> ASTNode { ASTNode::BinaryOp { operator: BinaryOperator::Add, @@ -270,6 +518,24 @@ mod tests { } } + fn substring_ij(s_var: &str, i_var: &str, j_var: &str) -> ASTNode { + ASTNode::MethodCall { + object: Box::new(var(s_var)), + method: "substring".to_string(), + arguments: vec![var(i_var), var(j_var)], + span: span(), + } + } + + fn indexof(s_var: &str, needle: &str, start_var: &str) -> ASTNode { + ASTNode::MethodCall { + object: Box::new(var(s_var)), + method: "indexOf".to_string(), + arguments: vec![lit_s(needle), var(start_var)], + span: span(), + } + } + fn local_one(name: &str, init: ASTNode) -> ASTNode { ASTNode::Local { variables: vec![name.to_string()], @@ -332,4 +598,21 @@ mod tests { .unwrap_err(); assert!(err.contains("missing_substring_guard")); } + + #[test] + fn extract_indexof_candidate_ok() { + let body = vec![ + local_one("j", indexof("table", "|||", "i")), + local_one("seg", substring_ij("table", "i", "j")), + assign(var("i"), add(var("j"), lit_i(3))), + ]; + let mut variable_map = BTreeMap::new(); + variable_map.insert("i".to_string(), ValueId(7)); + + let (name, host_id) = + LoopTrueCounterExtractorBox::extract_loop_counter_from_body(&body, &variable_map) + .unwrap(); + assert_eq!(name, "i"); + assert_eq!(host_id, ValueId(7)); + } } diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2/README.md b/src/mir/builder/control_flow/joinir/patterns/pattern2/README.md index 2c7bfc5f..0d119448 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2/README.md +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2/README.md @@ -4,6 +4,7 @@ - `loop(...) { ... break ... }` (break present, no continue/return) - break condition is normalized to "break when is true" - loop variable comes from header condition or loop(true) counter extraction + - loop(true): `i = i + 1` + `substring(i, i + 1)` or `i = j + K` with `j = indexOf(..., i)` + `substring(i, ...)` ## LoopBodyLocal promotion - SSOT entry: `pattern2::api::try_promote` @@ -15,6 +16,12 @@ - Break guard: `if seg == " " || seg == "\\t" { break }` - seg is read-only (no reassignment in the loop body) +## Derived slot minimal shape (seg) +- Example shape (Derived): `local seg = ""` then `if cond { seg = expr1 } else { seg = expr2 }` +- Break guard: `if seg == "" { break }` (seg used in break condition) +- seg is recomputed per-iteration (Select), no promotion +- Contract SSOT: `pattern2/contracts/derived_slot.rs` + ## Carrier binding rules (Pattern2) - `CarrierInit::FromHost` -> host binding required - `CarrierInit::BoolConst(_)` / `CarrierInit::LoopLocalZero` -> host binding is skipped @@ -22,9 +29,9 @@ ## Out of scope - multiple breaks / continue / return in the loop body -- reassigned LoopBodyLocal or ReadOnlySlot contract violations +- reassigned LoopBodyLocal outside the derived-slot shape - break conditions with unsupported AST shapes -- seg reassignment or non-substring init (e.g., `seg = other_call()`) +- non-substring init for Trim promotion (e.g., `seg = other_call()`) ## Fail-Fast policy - `PromoteDecision::Freeze` -> Err (missing implementation or contract violation) @@ -32,4 +39,4 @@ ## `Ok(None)` meaning - not Pattern2 (extractor returns None) -- promotion NotApplicable (router fallback) +- promotion NotApplicable (continue Pattern2 without promotion) diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2/api/promote_runner.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2/api/promote_runner.rs index 74e0587e..ca9e861f 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2/api/promote_runner.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2/api/promote_runner.rs @@ -106,6 +106,10 @@ pub(in crate::mir::builder) fn try_promote( inputs.allowed_body_locals_for_conditions = vec![slot.name.clone()]; inputs.read_only_body_local_slot = Some(slot); } + PolicyDecision::Use(BodyLocalRoute::DerivedSlot(recipe)) => { + inputs.allowed_body_locals_for_conditions = vec![recipe.name.clone()]; + inputs.body_local_derived_slot_recipe = Some(recipe); + } PolicyDecision::Reject(reason) => { // Phase 263 P0.1: Reject を PromoteDecision で二分化(型安全) // 対象だが未対応(freeze級): 実装バグ or 将来実装予定 → Freeze で Fail-Fast diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2/contracts/derived_slot.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2/contracts/derived_slot.rs new file mode 100644 index 00000000..bef9130b --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2/contracts/derived_slot.rs @@ -0,0 +1,230 @@ +//! Phase 29ab P4: Derived slot contract for Pattern2 +//! +//! Responsibility: +//! - Extract a minimal derived-slot recipe for a single LoopBodyLocal variable +//! used in Pattern2 break conditions. +//! - No JoinIR emission; detection only. + +use crate::ast::ASTNode; +use crate::mir::join_ir::lowering::common::body_local_derived_slot_emitter::BodyLocalDerivedSlotRecipe; + +pub(crate) fn extract_body_local_derived_slot( + name: &str, + body: &[ASTNode], +) -> Result, String> { + let break_guard_idx = match find_first_top_level_break_guard_if(body) { + Some(idx) => idx, + None => return Ok(None), + }; + + let (decl_idx, base_init_expr) = match find_top_level_local_init(body, name) { + Some(result) => result, + None => return Ok(None), + }; + + if decl_idx >= break_guard_idx { + return Ok(None); + } + + let (assign_idx, assign_cond, then_expr, else_expr) = + match find_top_level_conditional_assignment(body, name, break_guard_idx) { + Some(result) => result, + None => return Ok(None), + }; + + if assign_idx <= decl_idx { + return Ok(None); + } + + if has_other_assignments(body, name, assign_idx) { + return Ok(None); + } + + Ok(Some(BodyLocalDerivedSlotRecipe { + name: name.to_string(), + base_init_expr, + assign_cond, + then_expr, + else_expr: Some(else_expr), + })) +} + +pub(crate) fn extract_derived_slot_for_conditions( + body_local_names_in_conditions: &[String], + body: &[ASTNode], +) -> Result, String> { + if body_local_names_in_conditions.len() != 1 { + return Ok(None); + } + extract_body_local_derived_slot(&body_local_names_in_conditions[0], body) +} + +fn find_first_top_level_break_guard_if(body: &[ASTNode]) -> Option { + for (idx, stmt) in body.iter().enumerate() { + if let ASTNode::If { + then_body, + else_body, + .. + } = stmt + { + if then_body.iter().any(|n| matches!(n, ASTNode::Break { .. })) { + return Some(idx); + } + if let Some(else_body) = else_body { + if else_body.iter().any(|n| matches!(n, ASTNode::Break { .. })) { + return Some(idx); + } + } + } + } + None +} + +fn find_top_level_local_init(body: &[ASTNode], name: &str) -> Option<(usize, ASTNode)> { + for (idx, stmt) in body.iter().enumerate() { + if let ASTNode::Local { + variables, + initial_values, + .. + } = stmt + { + if variables.len() != 1 { + continue; + } + if variables[0] != name { + continue; + } + let init = initial_values + .get(0) + .and_then(|v| v.as_ref()) + .map(|b| (*b.clone()).clone())?; + return Some((idx, init)); + } + } + None +} + +fn find_top_level_conditional_assignment( + body: &[ASTNode], + name: &str, + break_guard_idx: usize, +) -> Option<(usize, ASTNode, ASTNode, ASTNode)> { + for (idx, stmt) in body.iter().enumerate() { + if idx >= break_guard_idx { + break; + } + let ASTNode::If { + condition, + then_body, + else_body, + .. + } = stmt else { + continue; + }; + + let Some(else_body) = else_body.as_ref() else { + continue; + }; + + let Some(then_expr) = extract_single_assignment_expr(then_body, name) else { + continue; + }; + let Some(else_expr) = extract_single_assignment_expr(else_body, name) else { + continue; + }; + + return Some((idx, (*condition.clone()), then_expr, else_expr)); + } + None +} + +fn extract_single_assignment_expr(stmts: &[ASTNode], name: &str) -> Option { + if stmts.len() != 1 { + return None; + } + match &stmts[0] { + ASTNode::Assignment { target, value, .. } => { + if matches!(&**target, ASTNode::Variable { name: n, .. } if n == name) { + Some((**value).clone()) + } else { + None + } + } + _ => None, + } +} + +fn has_other_assignments(body: &[ASTNode], name: &str, assign_if_idx: usize) -> bool { + body.iter().enumerate().any(|(idx, stmt)| { + if idx == assign_if_idx { + return false; + } + contains_assignment_to_name_in_node(stmt, name) + }) +} + +fn contains_assignment_to_name_in_node(node: &ASTNode, name: &str) -> bool { + match node { + ASTNode::Assignment { target, value, .. } => { + if matches!(&**target, ASTNode::Variable { name: n, .. } if n == name) { + return true; + } + contains_assignment_to_name_in_node(target, name) + || contains_assignment_to_name_in_node(value, name) + } + ASTNode::Nowait { variable, .. } => variable == name, + ASTNode::If { + condition, + then_body, + else_body, + .. + } => { + contains_assignment_to_name_in_node(condition, name) + || then_body + .iter() + .any(|n| contains_assignment_to_name_in_node(n, name)) + || else_body.as_ref().is_some_and(|e| { + e.iter() + .any(|n| contains_assignment_to_name_in_node(n, name)) + }) + } + ASTNode::Loop { condition, body, .. } => { + contains_assignment_to_name_in_node(condition, name) + || body + .iter() + .any(|n| contains_assignment_to_name_in_node(n, name)) + } + ASTNode::While { condition, body, .. } => { + contains_assignment_to_name_in_node(condition, name) + || body + .iter() + .any(|n| contains_assignment_to_name_in_node(n, name)) + } + ASTNode::ForRange { body, .. } => body + .iter() + .any(|n| contains_assignment_to_name_in_node(n, name)), + ASTNode::TryCatch { + try_body, + catch_clauses, + finally_body, + .. + } => { + try_body + .iter() + .any(|n| contains_assignment_to_name_in_node(n, name)) + || catch_clauses.iter().any(|c| { + c.body + .iter() + .any(|n| contains_assignment_to_name_in_node(n, name)) + }) + || finally_body.as_ref().is_some_and(|b| { + b.iter() + .any(|n| contains_assignment_to_name_in_node(n, name)) + }) + } + ASTNode::ScopeBox { body, .. } => body + .iter() + .any(|n| contains_assignment_to_name_in_node(n, name)), + _ => false, + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2/contracts/mod.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2/contracts/mod.rs new file mode 100644 index 00000000..f916962d --- /dev/null +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2/contracts/mod.rs @@ -0,0 +1,3 @@ +//! Phase 29ab P4: Pattern2 contract modules (SSOT) + +pub(crate) mod derived_slot; diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2/mod.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2/mod.rs index 9a95cc4b..4bc876d4 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2/mod.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2/mod.rs @@ -4,3 +4,4 @@ //! - `api/` - Public entry point for promotion logic (SSOT) pub(in crate::mir::builder) mod api; +pub(in crate::mir::builder) mod contracts; diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2_inputs_facts_box.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2_inputs_facts_box.rs index 69b12c86..e31398c6 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2_inputs_facts_box.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2_inputs_facts_box.rs @@ -87,6 +87,10 @@ pub(in crate::mir::builder) struct Pattern2Inputs { /// Phase 94: BodyLocalDerived recipe for P5b "ch" reassignment + escape counter. pub body_local_derived_recipe: Option, + /// Phase 29ab P4: Derived slot recipe for seg-like conditional assignments. + pub body_local_derived_slot_recipe: Option< + crate::mir::join_ir::lowering::common::body_local_derived_slot_emitter::BodyLocalDerivedSlotRecipe, + >, /// Phase 107: Balanced depth-scan (find_balanced_*) derived recipe. pub balanced_depth_scan_recipe: Option, diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/apply_policy_step_box.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/apply_policy_step_box.rs index a8200a02..18bbed17 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/apply_policy_step_box.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/apply_policy_step_box.rs @@ -30,6 +30,7 @@ impl ApplyPolicyStepBox { is_loop_true_read_digits: policy.is_loop_true_read_digits, condition_only_recipe: None, body_local_derived_recipe: None, + body_local_derived_slot_recipe: None, balanced_depth_scan_recipe: policy.balanced_depth_scan_recipe, carrier_updates_override: policy.carrier_updates_override, post_loop_early_return: policy.post_loop_early_return, diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/emit_joinir_step_box.rs b/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/emit_joinir_step_box.rs index ba0c0ca2..c7b8007f 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/emit_joinir_step_box.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern2_steps/emit_joinir_step_box.rs @@ -51,6 +51,7 @@ impl EmitJoinIRStepBox { skeleton, condition_only_recipe: inputs.condition_only_recipe.as_ref(), body_local_derived_recipe: inputs.body_local_derived_recipe.as_ref(), + body_local_derived_slot_recipe: inputs.body_local_derived_slot_recipe.as_ref(), balanced_depth_scan_recipe: inputs.balanced_depth_scan_recipe.as_ref(), current_static_box_name: inputs.current_static_box_name.clone(), // Phase 252 }; diff --git a/src/mir/builder/control_flow/joinir/patterns/policies/trim_policy.rs b/src/mir/builder/control_flow/joinir/patterns/policies/trim_policy.rs index 849dff51..00d3ad09 100644 --- a/src/mir/builder/control_flow/joinir/patterns/policies/trim_policy.rs +++ b/src/mir/builder/control_flow/joinir/patterns/policies/trim_policy.rs @@ -4,6 +4,7 @@ //! 生成(lowering)は従来通り TrimLoopLowerer 側が担当する。 use crate::ast::ASTNode; +use crate::mir::builder::control_flow::joinir::patterns::pattern2::contracts::derived_slot::extract_body_local_derived_slot; use crate::mir::builder::control_flow::joinir::patterns::trim_loop_lowering::TrimLoopLowerer; use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape; use crate::mir::loop_pattern_detection::loop_condition_scope::{ @@ -23,6 +24,7 @@ pub fn classify_trim_like_loop( scope: &LoopScopeShape, loop_cond: &ASTNode, break_cond: &ASTNode, + body: &[ASTNode], loop_var_name: &str, ) -> PolicyDecision { let cond_scope = @@ -44,6 +46,18 @@ pub fn classify_trim_like_loop( return PolicyDecision::None; } + if condition_body_locals.len() == 1 { + match extract_body_local_derived_slot(&condition_body_locals[0].name, body) { + Ok(Some(_)) => return PolicyDecision::None, + Ok(None) => {} + Err(reason) => { + return PolicyDecision::Reject(format!( + "[trim_policy] derived-slot check failed: {reason}" + )) + } + } + } + PolicyDecision::Use(TrimPolicyResult { cond_scope, condition_body_locals, diff --git a/src/mir/builder/control_flow/joinir/patterns/trim_loop_lowering.rs b/src/mir/builder/control_flow/joinir/patterns/trim_loop_lowering.rs index 559016d8..6c0af770 100644 --- a/src/mir/builder/control_flow/joinir/patterns/trim_loop_lowering.rs +++ b/src/mir/builder/control_flow/joinir/patterns/trim_loop_lowering.rs @@ -187,7 +187,7 @@ impl TrimLoopLowerer { let TrimPolicyResult { cond_scope, condition_body_locals, - } = match classify_trim_like_loop(scope, loop_cond, break_cond, loop_var_name) { + } = match classify_trim_like_loop(scope, loop_cond, break_cond, body, loop_var_name) { PolicyDecision::Use(res) => res, PolicyDecision::None => return Ok(None), PolicyDecision::Reject(reason) => return Err(reason), diff --git a/src/mir/builder/control_flow/normalization/README.md b/src/mir/builder/control_flow/normalization/README.md index 09cb988b..f04ff270 100644 --- a/src/mir/builder/control_flow/normalization/README.md +++ b/src/mir/builder/control_flow/normalization/README.md @@ -122,7 +122,9 @@ pub enum PlanKind { ### Phase 142 P0: Statement-Level Normalization **CURRENT** - **Change**: Normalization unit changed from "block suffix" to "statement (loop only)" -- **Pattern**: `loop(true) { ... break }` - always returns `LoopOnly`, regardless of subsequent statements +- **Pattern**: `loop(true)` with **Normalized-supported body shapes** only + - Body ends with `break` and prior statements are `assignment`/`local` only + - Body is a single `if` with `break`/`continue` branches (optional else) - **Consumed**: Always 1 statement (the loop itself) - **Kind**: `PlanKind::LoopOnly` - **Subsequent statements**: Handled by normal MIR lowering (not normalized) diff --git a/src/mir/builder/control_flow/normalization/plan_box.rs b/src/mir/builder/control_flow/normalization/plan_box.rs index a96d3a77..92b9b98f 100644 --- a/src/mir/builder/control_flow/normalization/plan_box.rs +++ b/src/mir/builder/control_flow/normalization/plan_box.rs @@ -99,9 +99,21 @@ impl NormalizationPlanBox { } } - // Phase 142 P0: Always return loop_only for loop(true), regardless of what follows + // Phase 142 P0: Only return loop_only when loop body is in Normalized scope // Normalization unit is now "statement (loop 1個)" not "block suffix" // Subsequent statements (return, assignments, etc.) handled by normal MIR lowering + if let Some(body) = loop_body { + if !loop_true_body_supported_for_normalized(body) { + if debug { + trace.routing( + "normalization/plan", + func_name, + "Loop(true) body is out of scope for Normalized (returning None)", + ); + } + return Ok(None); + } + } if debug { trace.routing( "normalization/plan", @@ -113,6 +125,44 @@ impl NormalizationPlanBox { } } +fn loop_true_body_supported_for_normalized(body: &[ASTNode]) -> bool { + if body.is_empty() { + return false; + } + + if body.len() == 1 { + if let ASTNode::If { + then_body, + else_body, + .. + } = &body[0] + { + if is_break_or_continue_only(then_body) + && else_body + .as_ref() + .map_or(true, |branch| is_break_or_continue_only(branch)) + { + return true; + } + } + } + + if !matches!(body.last(), Some(ASTNode::Break { .. })) { + return false; + } + + body[..body.len() - 1].iter().all(|stmt| { + matches!(stmt, ASTNode::Assignment { .. } | ASTNode::Local { .. }) + }) +} + +fn is_break_or_continue_only(stmts: &[ASTNode]) -> bool { + if stmts.len() != 1 { + return false; + } + matches!(stmts[0], ASTNode::Break { .. } | ASTNode::Continue { .. }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/mir/builder/types/annotation.rs b/src/mir/builder/types/annotation.rs index 73b68858..aae1991b 100644 --- a/src/mir/builder/types/annotation.rs +++ b/src/mir/builder/types/annotation.rs @@ -43,6 +43,9 @@ fn infer_return_type(func_name: &str) -> Option { if func_name.ends_with(".indexOf/1") { return Some(MirType::Integer); } + if func_name.ends_with(".indexOf/2") { + return Some(MirType::Integer); + } if func_name.ends_with(".lastIndexOf/1") { return Some(MirType::Integer); } diff --git a/src/mir/join_ir/lowering/common.rs b/src/mir/join_ir/lowering/common.rs index a9a0ad1b..f6dc2e12 100644 --- a/src/mir/join_ir/lowering/common.rs +++ b/src/mir/join_ir/lowering/common.rs @@ -8,6 +8,7 @@ pub mod body_local_slot; // Phase 92 P3: Read-only body-local slot for condition pub mod dual_value_rewriter; // Phase 246-EX/247-EX: name-based dual-value rewrites pub mod condition_only_emitter; // Phase 93 P0: ConditionOnly (Derived Slot) recalculation pub mod body_local_derived_emitter; // Phase 94: Derived body-local (P5b escape "ch" reassignment) +pub mod body_local_derived_slot_emitter; // Phase 29ab P4: Derived slot for seg pub mod balanced_depth_scan_emitter; // Phase 107: Balanced depth-scan (find_balanced_* recipe) pub mod string_accumulator_emitter; // Phase 100 P3-2: String accumulator (out = out + ch) diff --git a/src/mir/join_ir/lowering/common/README.md b/src/mir/join_ir/lowering/common/README.md index 4e8fd4fa..c9301f77 100644 --- a/src/mir/join_ir/lowering/common/README.md +++ b/src/mir/join_ir/lowering/common/README.md @@ -6,6 +6,7 @@ - `condition_only_emitter.rs` — ConditionOnly derived slot の再計算(Phase 93) - `body_local_slot.rs` — 読み取り専用 body-local を条件式で使うためのガード付き抽出(Phase 92) - `body_local_derived_emitter.rs` — 再代入される body-local(P5b `ch`)を Select で統合し、loop-var の +1/+2 も同時に出す(Phase 94) +- `body_local_derived_slot_emitter.rs` — 条件付き代入で再計算される body-local(seg)を Select で統合する(Phase 29ab P4) - `dual_value_rewriter.rs` — name ベースの dual-value 書き換え(BodyLocal vs Carrier)を一箇所に閉じ込める Fail-Fast 原則: diff --git a/src/mir/join_ir/lowering/common/body_local_derived_slot_emitter.rs b/src/mir/join_ir/lowering/common/body_local_derived_slot_emitter.rs new file mode 100644 index 00000000..61d2d7fb --- /dev/null +++ b/src/mir/join_ir/lowering/common/body_local_derived_slot_emitter.rs @@ -0,0 +1,175 @@ +//! Phase 29ab P4: BodyLocalDerivedSlotEmitter (seg derived slot) +//! +//! Goal: treat a single body-local variable (e.g., seg) as a derived value +//! computed before the break check, without promotion or PHI. +//! +//! This is intentionally minimal and fail-fast: +//! - Supports a single derived variable recipe +//! - Requires top-level if/else assignment shape (validated on builder side) +//! - Expression lowering is limited to pure expressions (Literal/Variable/MethodCall) + +use crate::ast::ASTNode; +use crate::mir::join_ir::lowering::condition_env::ConditionEnv; +use crate::mir::join_ir::lowering::condition_lowerer::lower_condition_to_joinir; +use crate::mir::join_ir::lowering::error_tags; +use crate::mir::join_ir::lowering::loop_body_local_env::LoopBodyLocalEnv; +use crate::mir::join_ir::lowering::method_call_lowerer::MethodCallLowerer; +use crate::mir::join_ir::{ConstValue, JoinInst, MirLikeInst}; +use crate::mir::{MirType, ValueId}; + +#[derive(Debug, Clone)] +pub struct BodyLocalDerivedSlotRecipe { + pub name: String, + /// Base init expression from `local name = ` (diagnostics only; lowering is done elsewhere). + #[allow(dead_code)] + pub base_init_expr: ASTNode, + pub assign_cond: ASTNode, + pub then_expr: ASTNode, + pub else_expr: Option, +} + +pub struct BodyLocalDerivedSlotEmitter; + +impl BodyLocalDerivedSlotEmitter { + pub fn emit( + recipe: &BodyLocalDerivedSlotRecipe, + alloc_value: &mut dyn FnMut() -> ValueId, + env: &ConditionEnv, + body_local_env: &mut LoopBodyLocalEnv, + instructions: &mut Vec, + current_static_box_name: Option<&str>, + ) -> Result { + let base_value = body_local_env.get(&recipe.name).ok_or_else(|| { + error_tags::freeze(&format!( + "[phase29ab/body_local_derived_slot/contract/missing_base] Missing base ValueId for '{}'", + recipe.name + )) + })?; + + let (cond_id, cond_insts) = lower_condition_to_joinir( + &recipe.assign_cond, + alloc_value, + env, + Some(body_local_env), + current_static_box_name, + )?; + instructions.extend(cond_insts); + + let then_val = lower_value_expr( + &recipe.then_expr, + alloc_value, + env, + body_local_env, + current_static_box_name, + instructions, + )?; + + let else_val = if let Some(expr) = &recipe.else_expr { + lower_value_expr( + expr, + alloc_value, + env, + body_local_env, + current_static_box_name, + instructions, + )? + } else { + base_value + }; + + let derived = alloc_value(); + instructions.push(JoinInst::Select { + dst: derived, + cond: cond_id, + then_val, + else_val, + type_hint: Some(MirType::String), + }); + + body_local_env.insert(recipe.name.clone(), derived); + Ok(derived) + } +} + +fn lower_value_expr( + expr: &ASTNode, + alloc_value: &mut dyn FnMut() -> ValueId, + env: &ConditionEnv, + body_local_env: &LoopBodyLocalEnv, + _current_static_box_name: Option<&str>, + instructions: &mut Vec, +) -> Result { + match expr { + ASTNode::Literal { value, .. } => match value { + crate::ast::LiteralValue::Integer(i) => { + let vid = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: vid, + value: ConstValue::Integer(*i), + })); + Ok(vid) + } + crate::ast::LiteralValue::String(s) => { + let vid = alloc_value(); + instructions.push(JoinInst::Compute(MirLikeInst::Const { + dst: vid, + value: ConstValue::String(s.clone()), + })); + Ok(vid) + } + _ => Err(error_tags::freeze(&format!( + "[phase29ab/body_local_derived_slot/contract/unsupported_literal] {:?}", + value + ))), + }, + ASTNode::Variable { name, .. } => resolve_var_value(name, env, body_local_env).ok_or_else(|| { + error_tags::freeze(&format!( + "[phase29ab/body_local_derived_slot/contract/missing_var] '{}' not found in envs", + name + )) + }), + ASTNode::MethodCall { + object, + method, + arguments, + .. + } => { + let recv_val = match object.as_ref() { + ASTNode::Variable { name, .. } => resolve_var_value(name, env, body_local_env) + .ok_or_else(|| { + error_tags::freeze(&format!( + "[phase29ab/body_local_derived_slot/contract/missing_receiver] '{}' not found in envs", + name + )) + })?, + _ => { + return Err(error_tags::freeze(&format!( + "[phase29ab/body_local_derived_slot/contract/receiver_kind] Unsupported receiver: {:?}", + object + ))) + } + }; + MethodCallLowerer::lower_for_init( + recv_val, + method, + arguments, + alloc_value, + env, + body_local_env, + instructions, + ) + } + _ => Err(error_tags::freeze(&format!( + "[phase29ab/body_local_derived_slot/contract/unsupported_expr] {:?}", + expr + ))), + } +} + +fn resolve_var_value( + name: &str, + env: &ConditionEnv, + body_local_env: &LoopBodyLocalEnv, +) -> Option { + body_local_env.get(name).or_else(|| env.get(name)) +} diff --git a/src/mir/join_ir/lowering/loop_with_break_minimal.rs b/src/mir/join_ir/lowering/loop_with_break_minimal.rs index 30404e36..5679db32 100644 --- a/src/mir/join_ir/lowering/loop_with_break_minimal.rs +++ b/src/mir/join_ir/lowering/loop_with_break_minimal.rs @@ -108,6 +108,10 @@ pub(crate) struct LoopWithBreakLoweringInputs<'a> { pub condition_only_recipe: Option<&'a crate::mir::join_ir::lowering::common::condition_only_emitter::ConditionOnlyRecipe>, /// Phase 94: BodyLocalDerived recipe (P5b escape `ch` reassignment + conditional counter). pub body_local_derived_recipe: Option<&'a BodyLocalDerivedRecipe>, + /// Phase 29ab P4: Derived slot recipe for seg-like conditional assignments. + pub body_local_derived_slot_recipe: Option< + &'a crate::mir::join_ir::lowering::common::body_local_derived_slot_emitter::BodyLocalDerivedSlotRecipe, + >, /// Phase 107: Balanced depth-scan recipe (find_balanced_* family). pub balanced_depth_scan_recipe: Option<&'a BalancedDepthScanRecipe>, /// Phase 252: Name of the static box being lowered (for this.method(...) in break conditions) @@ -190,6 +194,7 @@ pub(crate) fn lower_loop_with_break_minimal( skeleton, condition_only_recipe, body_local_derived_recipe, + body_local_derived_slot_recipe, balanced_depth_scan_recipe, current_static_box_name, // Phase 252 } = inputs; @@ -458,6 +463,7 @@ pub(crate) fn lower_loop_with_break_minimal( body_ast, body_local_env.as_deref_mut(), condition_only_recipe, + body_local_derived_slot_recipe, balanced_depth_scan_recipe, current_static_box_name.clone(), &mut alloc_local_fn, @@ -524,6 +530,24 @@ pub(crate) fn lower_loop_with_break_minimal( &dev_log, )?; + let loop_var_next_override = if loop_var_next_override.is_none() { + if let Some(update_expr) = extract_loop_var_update_expr(body_ast, loop_var_name) { + let next_id = crate::mir::join_ir::lowering::condition_lowerer::lower_value_expression( + &update_expr, + &mut alloc_local_fn, + env, + body_local_env.as_ref().map(|e| &**e), + current_static_box_name.as_deref(), + &mut tail_block, + )?; + Some(next_id) + } else { + None + } + } else { + loop_var_next_override + }; + emit_tail_call( loop_step_id, i_param, @@ -615,3 +639,24 @@ pub(crate) fn lower_loop_with_break_minimal( Ok((join_module, fragment_meta)) } + +fn extract_loop_var_update_expr(body_ast: &[ASTNode], loop_var_name: &str) -> Option { + let mut matches = Vec::new(); + + for node in body_ast { + if let ASTNode::Assignment { target, value, .. } = node { + if matches!( + target.as_ref(), + ASTNode::Variable { name, .. } if name == loop_var_name + ) { + matches.push((**value).clone()); + } + } + } + + if matches.len() == 1 { + Some(matches.remove(0)) + } else { + None + } +} diff --git a/src/mir/join_ir/lowering/loop_with_break_minimal/body_local_init.rs b/src/mir/join_ir/lowering/loop_with_break_minimal/body_local_init.rs index 1ff1c8ce..c95121c4 100644 --- a/src/mir/join_ir/lowering/loop_with_break_minimal/body_local_init.rs +++ b/src/mir/join_ir/lowering/loop_with_break_minimal/body_local_init.rs @@ -2,6 +2,9 @@ use crate::ast::ASTNode; use crate::mir::join_ir::lowering::common::balanced_depth_scan_emitter::{ BalancedDepthScanEmitter, BalancedDepthScanRecipe, }; +use crate::mir::join_ir::lowering::common::body_local_derived_slot_emitter::{ + BodyLocalDerivedSlotEmitter, BodyLocalDerivedSlotRecipe, +}; use crate::mir::join_ir::lowering::common::condition_only_emitter::ConditionOnlyRecipe; use crate::mir::join_ir::lowering::condition_env::ConditionEnv; use crate::mir::join_ir::lowering::debug_output_box::DebugOutputBox; @@ -14,6 +17,7 @@ pub(crate) fn emit_body_local_inits( body_ast: &[ASTNode], body_local_env: Option<&mut LoopBodyLocalEnv>, condition_only_recipe: Option<&ConditionOnlyRecipe>, + body_local_derived_slot_recipe: Option<&BodyLocalDerivedSlotRecipe>, balanced_depth_scan_recipe: Option<&BalancedDepthScanRecipe>, current_static_box_name: Option, alloc_local_fn: &mut F, @@ -33,7 +37,7 @@ where env, body_init_block, Box::new(&mut *alloc_local_fn), - current_static_box_name, + current_static_box_name.clone(), ); init_lowerer.lower_inits_for_loop(body_ast, body_env)?; @@ -69,6 +73,17 @@ where }); } + if let Some(recipe) = body_local_derived_slot_recipe { + BodyLocalDerivedSlotEmitter::emit( + recipe, + alloc_local_fn, + env, + body_env, + body_init_block, + current_static_box_name.as_deref(), + )?; + } + if let Some(recipe) = balanced_depth_scan_recipe { BalancedDepthScanEmitter::emit_derived( recipe, diff --git a/src/mir/join_ir/lowering/loop_with_break_minimal/tests.rs b/src/mir/join_ir/lowering/loop_with_break_minimal/tests.rs index a55de90f..419f1652 100644 --- a/src/mir/join_ir/lowering/loop_with_break_minimal/tests.rs +++ b/src/mir/join_ir/lowering/loop_with_break_minimal/tests.rs @@ -148,6 +148,7 @@ fn test_pattern2_header_condition_via_exprlowerer() { skeleton: None, // Phase 92 P0-3: skeleton=None for backward compatibility condition_only_recipe: None, // Phase 93 P0: None for normal loops body_local_derived_recipe: None, // Phase 94: None for normal loops + body_local_derived_slot_recipe: None, // Phase 29ab P4: None for normal loops balanced_depth_scan_recipe: None, // Phase 107: None for normal loops current_static_box_name: None, // Phase 252: No static box context in test }); diff --git a/src/mir/join_ir/lowering/method_call_lowerer.rs b/src/mir/join_ir/lowering/method_call_lowerer.rs index bb8a5879..c927d51b 100644 --- a/src/mir/join_ir/lowering/method_call_lowerer.rs +++ b/src/mir/join_ir/lowering/method_call_lowerer.rs @@ -37,6 +37,22 @@ use crate::ast::ASTNode; use crate::mir::join_ir::{JoinInst, MirLikeInst}; use crate::mir::ValueId; use crate::runtime::core_box_ids::CoreMethodId; +use crate::runtime::core_method_aliases::canonical_method_name; + +fn resolve_core_method_id(method_name: &str, arg_len: usize) -> Result> { + CoreMethodId::resolve_by_name_and_arity(method_name, arg_len) +} + +fn format_expected_arities(expected: &[usize]) -> String { + let mut list = expected.to_vec(); + list.sort_unstable(); + list.dedup(); + if list.len() == 1 { + list[0].to_string() + } else { + format!("{:?}", list) + } +} use super::condition_env::ConditionEnv; use super::loop_body_local_env::LoopBodyLocalEnv; @@ -144,16 +160,27 @@ impl MethodCallLowerer { env: &ConditionEnv, instructions: &mut Vec, ) -> Result { - // Resolve method name to CoreMethodId + // Resolve method name + arity to CoreMethodId // Note: We don't know receiver type at this point, so we try all methods - let method_id = CoreMethodId::iter() - .find(|m| m.name() == method_name) - .ok_or_else(|| { - format!( + let canonical_name = canonical_method_name(method_name); + let method_id = match resolve_core_method_id(canonical_name, args.len()) { + Ok(id) => id, + Err(expected) if expected.is_empty() => { + return Err(format!( "MethodCall not recognized as CoreMethodId: {}.{}()", recv_val.0, method_name - ) - })?; + )); + } + Err(expected) => { + return Err(format!( + "Arity mismatch: {}.{}() expects {} args, got {}", + recv_val.0, + method_name, + format_expected_arities(&expected), + args.len() + )); + } + }; // Check if allowed in condition context if !method_id.allowed_in_condition() { @@ -200,7 +227,7 @@ impl MethodCallLowerer { instructions.push(JoinInst::Compute(MirLikeInst::BoxCall { dst: Some(dst), box_name, - method: method_name.to_string(), + method: canonical_name.to_string(), args: full_args, })); @@ -232,15 +259,26 @@ impl MethodCallLowerer { body_local_env: &LoopBodyLocalEnv, instructions: &mut Vec, ) -> Result { - // Resolve method name to CoreMethodId - let method_id = CoreMethodId::iter() - .find(|m| m.name() == method_name) - .ok_or_else(|| { - format!( + // Resolve method name + arity to CoreMethodId + let canonical_name = canonical_method_name(method_name); + let method_id = match resolve_core_method_id(canonical_name, args.len()) { + Ok(id) => id, + Err(expected) if expected.is_empty() => { + return Err(format!( "MethodCall not recognized as CoreMethodId: {}.{}()", recv_val.0, method_name - ) - })?; + )); + } + Err(expected) => { + return Err(format!( + "Arity mismatch: {}.{}() expects {} args, got {}", + recv_val.0, + method_name, + format_expected_arities(&expected), + args.len() + )); + } + }; // Check if allowed in init context if !method_id.allowed_in_init() { @@ -282,7 +320,7 @@ impl MethodCallLowerer { instructions.push(JoinInst::Compute(MirLikeInst::BoxCall { dst: Some(dst), box_name, - method: method_name.to_string(), + method: canonical_name.to_string(), args: full_args, })); @@ -354,7 +392,7 @@ mod tests { #[test] fn test_not_allowed_in_condition() { - // Test: s.upper() not whitelisted for conditions + // Test: s.toUpper() not whitelisted for conditions let recv_val = ValueId(10); let mut value_counter = 100u32; let mut alloc_value = || { @@ -367,7 +405,7 @@ mod tests { let result = MethodCallLowerer::lower_for_condition( recv_val, - "upper", + "toUpper", &[], &mut alloc_value, &env, diff --git a/src/mir/join_ir/lowering/type_inference.rs b/src/mir/join_ir/lowering/type_inference.rs index ad296a80..596f2b89 100644 --- a/src/mir/join_ir/lowering/type_inference.rs +++ b/src/mir/join_ir/lowering/type_inference.rs @@ -5,6 +5,8 @@ // P3-C(ジェネリック型推論)は Phase 66+ に延期。 use crate::mir::MirType; +use crate::runtime::core_box_ids::{CoreBoxId, CoreMethodId}; +use crate::runtime::core_method_aliases::canonical_method_name; /// Phase 65-2-A: MethodCall 戻り値型推論 /// @@ -20,34 +22,35 @@ use crate::mir::MirType; /// - `Some(MirType)`: 推論成功時の戻り値型 /// - `None`: 推論失敗(未知のメソッド、P3-C 対象など) pub fn infer_method_return_type(receiver_type: &MirType, method_name: &str) -> Option { - // Phase 65-2-A: StringBox メソッド(MirType::String) + let canonical = canonical_method_name(method_name); + if let Some(core_box_id) = core_box_id_for_receiver(receiver_type) { + if let Some(method_id) = CoreMethodId::iter() + .find(|method_id| method_id.box_id() == core_box_id && method_id.name() == canonical) + { + return mir_type_from_return_name(method_id.return_type_name()); + } + } + + // Phase 65-2-A: non-core (legacy) method hints match receiver_type { - MirType::String => match method_name { - "substring" => Some(MirType::String), + MirType::String => match canonical { "charAt" => Some(MirType::String), - "indexOf" => Some(MirType::Integer), - "length" => Some(MirType::Integer), - "toUpper" => Some(MirType::String), - "toLower" => Some(MirType::String), - "concat" => Some(MirType::String), - _ => None, // Unknown メソッド + _ => None, }, MirType::Box(box_name) => match box_name.as_str() { - "ArrayBox" => match method_name { + "ArrayBox" => match canonical { "size" => Some(MirType::Integer), "push" => Some(MirType::Void), - // P3-C: get, pop は要素型依存 → Phase 66+ _ => None, }, - "MapBox" => match method_name { + "MapBox" => match canonical { "size" => Some(MirType::Integer), "has" => Some(MirType::Bool), - // P3-C: get は値型依存 → Phase 66+ _ => None, }, - _ => None, // その他の Box + _ => None, }, - _ => None, // その他の型 + _ => None, } } @@ -82,6 +85,28 @@ pub fn infer_box_type(box_name: &str) -> Option { } } +fn core_box_id_for_receiver(receiver_type: &MirType) -> Option { + match receiver_type { + MirType::String => Some(CoreBoxId::String), + MirType::Integer => Some(CoreBoxId::Integer), + MirType::Bool => Some(CoreBoxId::Bool), + MirType::Box(box_name) => CoreBoxId::from_name(box_name), + _ => None, + } +} + +fn mir_type_from_return_name(return_name: &str) -> Option { + match return_name { + "IntegerBox" => Some(MirType::Integer), + "StringBox" => Some(MirType::String), + "BoolBox" => Some(MirType::Bool), + "Void" => Some(MirType::Void), + "Unknown" => None, + other if other.ends_with("Box") => Some(MirType::Box(other.to_string())), + _ => None, + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/mir/join_ir/normalized/fixtures.rs b/src/mir/join_ir/normalized/fixtures.rs index 5eb3f99c..41f6d4cb 100644 --- a/src/mir/join_ir/normalized/fixtures.rs +++ b/src/mir/join_ir/normalized/fixtures.rs @@ -137,6 +137,7 @@ pub fn build_pattern2_minimal_structured() -> JoinModule { skeleton: None, // Phase 92 P0-3: skeleton=None for backward compatibility condition_only_recipe: None, // Phase 93 P0: None for normal loops body_local_derived_recipe: None, // Phase 94: None for fixture + body_local_derived_slot_recipe: None, // Phase 29ab P4: None for fixture balanced_depth_scan_recipe: None, // Phase 107: None for fixture }, ) diff --git a/src/runtime/core_box_ids.rs b/src/runtime/core_box_ids.rs index 14783e3f..635c4bdc 100644 --- a/src/runtime/core_box_ids.rs +++ b/src/runtime/core_box_ids.rs @@ -169,6 +169,7 @@ pub enum CoreMethodId { StringConcat, StringSubstring, StringIndexOf, + StringIndexOfFrom, StringReplace, StringTrim, StringSplit, @@ -210,156 +211,446 @@ pub enum CoreMethodId { ResultGetValue, } +/// SSOT for CoreMethodId metadata (name/arity/return types and policy flags). +#[derive(Debug, Clone, Copy)] +struct CoreMethodSpec { + id: CoreMethodId, + box_id: CoreBoxId, + name: &'static str, + arity: usize, + return_type_name: &'static str, + is_pure: bool, + allowed_in_condition: bool, + allowed_in_init: bool, + vtable_slot: Option, +} + +const CORE_METHOD_SPECS: &[CoreMethodSpec] = &[ + // StringBox methods + CoreMethodSpec { + id: CoreMethodId::StringLength, + box_id: CoreBoxId::String, + name: "length", + arity: 0, + return_type_name: "IntegerBox", + is_pure: true, + allowed_in_condition: true, + allowed_in_init: true, + vtable_slot: Some(300), + }, + CoreMethodSpec { + id: CoreMethodId::StringUpper, + box_id: CoreBoxId::String, + name: "toUpper", + arity: 0, + return_type_name: "StringBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::StringLower, + box_id: CoreBoxId::String, + name: "toLower", + arity: 0, + return_type_name: "StringBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::StringConcat, + box_id: CoreBoxId::String, + name: "concat", + arity: 1, + return_type_name: "StringBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(302), + }, + CoreMethodSpec { + id: CoreMethodId::StringSubstring, + box_id: CoreBoxId::String, + name: "substring", + arity: 2, + return_type_name: "StringBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(301), + }, + CoreMethodSpec { + id: CoreMethodId::StringIndexOf, + box_id: CoreBoxId::String, + name: "indexOf", + arity: 1, + return_type_name: "IntegerBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(303), + }, + CoreMethodSpec { + id: CoreMethodId::StringIndexOfFrom, + box_id: CoreBoxId::String, + name: "indexOf", + arity: 2, + return_type_name: "IntegerBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(303), + }, + CoreMethodSpec { + id: CoreMethodId::StringReplace, + box_id: CoreBoxId::String, + name: "replace", + arity: 2, + return_type_name: "StringBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(304), + }, + CoreMethodSpec { + id: CoreMethodId::StringTrim, + box_id: CoreBoxId::String, + name: "trim", + arity: 0, + return_type_name: "StringBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(305), + }, + CoreMethodSpec { + id: CoreMethodId::StringSplit, + box_id: CoreBoxId::String, + name: "split", + arity: 1, + return_type_name: "Unknown", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: None, + }, + // IntegerBox methods + CoreMethodSpec { + id: CoreMethodId::IntegerAbs, + box_id: CoreBoxId::Integer, + name: "abs", + arity: 0, + return_type_name: "IntegerBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::IntegerMin, + box_id: CoreBoxId::Integer, + name: "min", + arity: 1, + return_type_name: "IntegerBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::IntegerMax, + box_id: CoreBoxId::Integer, + name: "max", + arity: 1, + return_type_name: "IntegerBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: None, + }, + // BoolBox methods + CoreMethodSpec { + id: CoreMethodId::BoolNot, + box_id: CoreBoxId::Bool, + name: "not", + arity: 0, + return_type_name: "BoolBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::BoolAnd, + box_id: CoreBoxId::Bool, + name: "and", + arity: 1, + return_type_name: "BoolBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::BoolOr, + box_id: CoreBoxId::Bool, + name: "or", + arity: 1, + return_type_name: "BoolBox", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: None, + }, + // ArrayBox methods + CoreMethodSpec { + id: CoreMethodId::ArrayLength, + box_id: CoreBoxId::Array, + name: "length", + arity: 0, + return_type_name: "IntegerBox", + is_pure: true, + allowed_in_condition: true, + allowed_in_init: true, + vtable_slot: Some(102), + }, + CoreMethodSpec { + id: CoreMethodId::ArrayPush, + box_id: CoreBoxId::Array, + name: "push", + arity: 1, + return_type_name: "Void", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: Some(103), + }, + CoreMethodSpec { + id: CoreMethodId::ArrayPop, + box_id: CoreBoxId::Array, + name: "pop", + arity: 0, + return_type_name: "Void", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: Some(104), + }, + CoreMethodSpec { + id: CoreMethodId::ArrayGet, + box_id: CoreBoxId::Array, + name: "get", + arity: 1, + return_type_name: "Unknown", + is_pure: true, + allowed_in_condition: true, + allowed_in_init: true, + vtable_slot: Some(100), + }, + // MapBox methods + CoreMethodSpec { + id: CoreMethodId::MapGet, + box_id: CoreBoxId::Map, + name: "get", + arity: 1, + return_type_name: "Unknown", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(203), + }, + CoreMethodSpec { + id: CoreMethodId::MapSet, + box_id: CoreBoxId::Map, + name: "set", + arity: 2, + return_type_name: "Void", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: Some(204), + }, + CoreMethodSpec { + id: CoreMethodId::MapHas, + box_id: CoreBoxId::Map, + name: "has", + arity: 1, + return_type_name: "BoolBox", + is_pure: true, + allowed_in_condition: true, + allowed_in_init: true, + vtable_slot: Some(202), + }, + CoreMethodSpec { + id: CoreMethodId::MapKeys, + box_id: CoreBoxId::Map, + name: "keys", + arity: 0, + return_type_name: "Unknown", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: Some(206), + }, + // ConsoleBox methods + CoreMethodSpec { + id: CoreMethodId::ConsolePrintln, + box_id: CoreBoxId::Console, + name: "println", + arity: 1, + return_type_name: "Void", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: Some(400), + }, + CoreMethodSpec { + id: CoreMethodId::ConsoleLog, + box_id: CoreBoxId::Console, + name: "log", + arity: 1, + return_type_name: "Void", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: Some(400), + }, + CoreMethodSpec { + id: CoreMethodId::ConsoleError, + box_id: CoreBoxId::Console, + name: "error", + arity: 1, + return_type_name: "Void", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: Some(402), + }, + // FileBox methods + CoreMethodSpec { + id: CoreMethodId::FileRead, + box_id: CoreBoxId::File, + name: "read", + arity: 1, + return_type_name: "StringBox", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::FileWrite, + box_id: CoreBoxId::File, + name: "write", + arity: 1, + return_type_name: "Void", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::FileOpen, + box_id: CoreBoxId::File, + name: "open", + arity: 1, + return_type_name: "FileBox", + is_pure: false, + allowed_in_condition: false, + allowed_in_init: false, + vtable_slot: None, + }, + // ResultBox methods + CoreMethodSpec { + id: CoreMethodId::ResultIsOk, + box_id: CoreBoxId::Result, + name: "isOk", + arity: 0, + return_type_name: "BoolBox", + is_pure: true, + allowed_in_condition: true, + allowed_in_init: true, + vtable_slot: None, + }, + CoreMethodSpec { + id: CoreMethodId::ResultGetValue, + box_id: CoreBoxId::Result, + name: "getValue", + arity: 0, + return_type_name: "Unknown", + is_pure: true, + allowed_in_condition: false, + allowed_in_init: true, + vtable_slot: None, + }, +]; + impl CoreMethodId { + fn spec(&self) -> &'static CoreMethodSpec { + CORE_METHOD_SPECS + .iter() + .find(|spec| spec.id == *self) + .expect("CoreMethodSpec missing for CoreMethodId") + } + /// メソッドが属する Box ID pub fn box_id(&self) -> CoreBoxId { - use CoreMethodId::*; - match self { - StringLength | StringUpper | StringLower | StringConcat | StringSubstring - | StringIndexOf | StringReplace | StringTrim | StringSplit => CoreBoxId::String, - - IntegerAbs | IntegerMin | IntegerMax => CoreBoxId::Integer, - - BoolNot | BoolAnd | BoolOr => CoreBoxId::Bool, - - ArrayLength | ArrayPush | ArrayPop | ArrayGet => CoreBoxId::Array, - - MapGet | MapSet | MapHas | MapKeys => CoreBoxId::Map, - - ConsolePrintln | ConsoleLog | ConsoleError => CoreBoxId::Console, - - FileRead | FileWrite | FileOpen => CoreBoxId::File, - - ResultIsOk | ResultGetValue => CoreBoxId::Result, - } + self.spec().box_id } /// メソッド名(例: "length") pub fn name(&self) -> &'static str { - use CoreMethodId::*; - match self { - StringLength => "length", - StringUpper => "upper", - StringLower => "lower", - StringConcat => "concat", - StringSubstring => "substring", - StringIndexOf => "indexOf", - StringReplace => "replace", - StringTrim => "trim", - StringSplit => "split", - - IntegerAbs => "abs", - IntegerMin => "min", - IntegerMax => "max", - - BoolNot => "not", - BoolAnd => "and", - BoolOr => "or", - - ArrayLength => "length", - ArrayPush => "push", - ArrayPop => "pop", - ArrayGet => "get", - - MapGet => "get", - MapSet => "set", - MapHas => "has", - MapKeys => "keys", - - ConsolePrintln => "println", - ConsoleLog => "log", - ConsoleError => "error", - - FileRead => "read", - FileWrite => "write", - FileOpen => "open", - - ResultIsOk => "isOk", - ResultGetValue => "getValue", - } + self.spec().name } /// 引数の数 pub fn arity(&self) -> usize { - use CoreMethodId::*; - match self { - StringLength | StringUpper | StringLower | StringTrim | IntegerAbs | BoolNot - | ArrayLength | ArrayPop | MapKeys | ResultIsOk | ResultGetValue => 0, - - StringConcat | StringIndexOf | StringReplace | StringSplit | IntegerMin - | IntegerMax | BoolAnd | BoolOr | ArrayGet | ArrayPush | MapGet | MapHas - | ConsolePrintln | ConsoleLog | ConsoleError | FileRead | FileWrite | FileOpen => 1, - - StringSubstring | MapSet => 2, - } + self.spec().arity } /// Phase 84-4-B: 戻り値型(型推論用) pub fn return_type_name(&self) -> &'static str { - use CoreMethodId::*; - match self { - StringLength | StringIndexOf | ArrayLength => "IntegerBox", + self.spec().return_type_name + } - StringUpper | StringLower | StringConcat | StringSubstring | StringReplace - | StringTrim => "StringBox", - - IntegerAbs | IntegerMin | IntegerMax => "IntegerBox", - - BoolNot | BoolAnd | BoolOr | MapHas | ResultIsOk => "BoolBox", - - ArrayPush | ArrayPop | MapSet | ConsolePrintln | ConsoleLog | ConsoleError - | FileWrite => "Void", - - ArrayGet | MapGet | MapKeys | StringSplit => "Unknown", - - FileRead => "StringBox", - FileOpen => "FileBox", - ResultGetValue => "Unknown", - } + /// VTable slot for TypeRegistry (None when not exposed via vtable). + pub fn vtable_slot(&self) -> Option { + self.spec().vtable_slot } /// 全CoreMethodIdを反復 pub fn iter() -> impl Iterator { - use CoreMethodId::*; - [ - StringLength, - StringUpper, - StringLower, - StringConcat, - StringSubstring, - StringIndexOf, - StringReplace, - StringTrim, - StringSplit, - IntegerAbs, - IntegerMin, - IntegerMax, - BoolNot, - BoolAnd, - BoolOr, - ArrayLength, - ArrayPush, - ArrayPop, - ArrayGet, - MapGet, - MapSet, - MapHas, - MapKeys, - ConsolePrintln, - ConsoleLog, - ConsoleError, - FileRead, - FileWrite, - FileOpen, - ResultIsOk, - ResultGetValue, - ] - .into_iter() + CORE_METHOD_SPECS.iter().map(|spec| spec.id) } /// Box名とメソッド名から CoreMethodId を取得 pub fn from_box_and_method(box_id: CoreBoxId, method: &str) -> Option { - Self::iter().find(|m| m.box_id() == box_id && m.name() == method) + let canonical = crate::runtime::core_method_aliases::canonical_method_name(method); + CORE_METHOD_SPECS + .iter() + .find(|spec| spec.box_id == box_id && spec.name == canonical) + .map(|spec| spec.id) + } + + /// メソッド名とアリティから CoreMethodId を解決 + pub fn resolve_by_name_and_arity( + method_name: &str, + arg_len: usize, + ) -> Result> { + let canonical = crate::runtime::core_method_aliases::canonical_method_name(method_name); + let mut expected = Vec::new(); + for spec in CORE_METHOD_SPECS.iter().filter(|spec| spec.name == canonical) { + expected.push(spec.arity); + if spec.arity == arg_len { + return Ok(spec.id); + } + } + expected.sort_unstable(); + expected.dedup(); + Err(expected) } /// Phase 224-B: Pure function (no side effects, deterministic) @@ -373,28 +664,7 @@ impl CoreMethodId { /// - `StringLength`: Pure - always returns same length for same string /// - `ArrayPush`: Not pure - mutates the array (side effect) pub fn is_pure(&self) -> bool { - use CoreMethodId::*; - match self { - // String methods (pure - return new values, don't mutate) - StringLength | StringUpper | StringLower | StringConcat | StringSubstring - | StringIndexOf | StringReplace | StringTrim | StringSplit => true, - - // Integer/Bool methods (pure - mathematical operations) - IntegerAbs | IntegerMin | IntegerMax | BoolNot | BoolAnd | BoolOr => true, - - // Array/Map read operations (pure - don't mutate) - ArrayLength | ArrayGet => true, - MapGet | MapHas => true, - MapKeys => true, - - // ResultBox read operations (pure) - ResultIsOk | ResultGetValue => true, - - // Impure - mutate state or have side effects - ArrayPush | ArrayPop | MapSet => false, - ConsolePrintln | ConsoleLog | ConsoleError => false, - FileRead | FileWrite | FileOpen => false, - } + self.spec().is_pure } /// Phase 224-B: Allowed in loop condition expressions @@ -406,36 +676,7 @@ impl CoreMethodId { /// /// This is a whitelist approach - default to false for safety. pub fn allowed_in_condition(&self) -> bool { - use CoreMethodId::*; - match self { - // String read operations - allowed - StringLength => true, - - // Array read operations - allowed - ArrayLength | ArrayGet => true, - - // Map read operations - allowed - MapHas => true, - - // ResultBox operations - allowed - ResultIsOk => true, - - // Not yet whitelisted - be conservative - StringUpper | StringLower | StringConcat | StringSubstring | StringIndexOf - | StringReplace | StringTrim | StringSplit => false, - - IntegerAbs | IntegerMin | IntegerMax => false, - BoolNot | BoolAnd | BoolOr => false, - - MapGet => false, - MapKeys => false, - ResultGetValue => false, - - // Obviously disallowed - side effects - ArrayPush | ArrayPop | MapSet => false, - ConsolePrintln | ConsoleLog | ConsoleError => false, - FileRead | FileWrite | FileOpen => false, - } + self.spec().allowed_in_condition } /// Phase 224-B: Allowed in loop body init expressions @@ -443,43 +684,14 @@ impl CoreMethodId { /// Methods allowed for LoopBodyLocal initialization. /// Similar to condition requirements but slightly more permissive. pub fn allowed_in_init(&self) -> bool { - use CoreMethodId::*; - match self { - // String operations - allowed - StringLength | StringSubstring | StringIndexOf => true, - - // String transformations - allowed for init - StringUpper | StringLower | StringTrim => true, - - // Array operations - allowed - ArrayLength | ArrayGet => true, - - // Map operations - allowed - MapGet | MapHas | MapKeys => true, - - // ResultBox operations - allowed - ResultIsOk | ResultGetValue => true, - - // String operations that create new strings - allowed - StringConcat | StringReplace | StringSplit => true, - - // Math operations - allowed - IntegerAbs | IntegerMin | IntegerMax => true, - - // Not allowed - side effects - ArrayPush | ArrayPop | MapSet => false, - ConsolePrintln | ConsoleLog | ConsoleError => false, - FileRead | FileWrite | FileOpen => false, - - // Bool operations - technically pure but unusual in init - BoolNot | BoolAnd | BoolOr => false, - } + self.spec().allowed_in_init } } #[cfg(test)] mod tests { use super::*; + use std::collections::HashSet; // ===== CoreBoxId tests ===== @@ -573,6 +785,7 @@ mod tests { fn test_core_method_id_arity() { assert_eq!(CoreMethodId::StringLength.arity(), 0); assert_eq!(CoreMethodId::StringConcat.arity(), 1); + assert_eq!(CoreMethodId::StringIndexOfFrom.arity(), 2); assert_eq!(CoreMethodId::MapSet.arity(), 2); } @@ -606,6 +819,22 @@ mod tests { assert!(count >= 27); // Phase 87: 27個以上のメソッド } + #[test] + fn test_core_method_spec_uniqueness() { + let mut ids = HashSet::new(); + let mut signatures = HashSet::new(); + for spec in CORE_METHOD_SPECS { + assert!(ids.insert(spec.id), "duplicate CoreMethodId in specs"); + let key = (spec.box_id, spec.name, spec.arity); + assert!( + signatures.insert(key), + "duplicate CoreMethodSpec signature: {:?}", + key + ); + } + assert_eq!(ids.len(), CoreMethodId::iter().count()); + } + // ===== Phase 224-B tests ===== #[test] diff --git a/src/runtime/core_method_aliases.rs b/src/runtime/core_method_aliases.rs new file mode 100644 index 00000000..ada16346 --- /dev/null +++ b/src/runtime/core_method_aliases.rs @@ -0,0 +1,43 @@ +//! Core method alias table (SSOT). + +#[derive(Debug, Clone, Copy)] +pub struct CoreMethodAlias { + pub alias: &'static str, + pub canonical: &'static str, +} + +const CORE_METHOD_ALIASES: &[CoreMethodAlias] = &[ + CoreMethodAlias { + alias: "toUpperCase", + canonical: "toUpper", + }, + CoreMethodAlias { + alias: "toLowerCase", + canonical: "toLower", + }, + CoreMethodAlias { + alias: "find", + canonical: "indexOf", + }, +]; + +pub fn canonical_method_name(method_name: &str) -> &str { + CORE_METHOD_ALIASES + .iter() + .find(|alias| alias.alias == method_name) + .map(|alias| alias.canonical) + .unwrap_or(method_name) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_canonical_method_name_aliases() { + assert_eq!(canonical_method_name("toUpperCase"), "toUpper"); + assert_eq!(canonical_method_name("toLowerCase"), "toLower"); + assert_eq!(canonical_method_name("find"), "indexOf"); + assert_eq!(canonical_method_name("indexOf"), "indexOf"); + } +} diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index b8b3070b..43ff4984 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -4,6 +4,7 @@ pub mod box_registry; pub mod core_box_ids; // Phase 87: CoreBoxId/CoreMethodId 型安全enum +pub mod core_method_aliases; // Phase 29ab: Core method alias SSOT pub mod core_services; // Phase 91: CoreServices trait 定義 pub mod deprecations; pub mod gc; diff --git a/src/runtime/type_registry.rs b/src/runtime/type_registry.rs index e8239dba..70c93615 100644 --- a/src/runtime/type_registry.rs +++ b/src/runtime/type_registry.rs @@ -23,16 +23,46 @@ * - This enables unified dispatch for both VMValue::String and VMValue::BoxRef(StringBox) */ +use super::core_box_ids::{CoreBoxId, CoreMethodId}; use super::type_box_abi::{MethodEntry, TypeBox}; +use std::collections::HashSet; +use std::sync::OnceLock; -// 最小サンプル: MapBox の TypeBox を事前登録(Tier-1 PoC 用) -// --- ArrayBox --- -const ARRAY_METHODS: &[MethodEntry] = &[ - MethodEntry { - name: "get", - arity: 1, - slot: 100, - }, +fn core_method_entries_for_box(box_id: CoreBoxId) -> Vec { + CoreMethodId::iter() + .filter(|method_id| method_id.box_id() == box_id) + .filter_map(|method_id| { + method_id.vtable_slot().map(|slot| MethodEntry { + name: method_id.name(), + arity: method_id.arity() as u8, + slot, + }) + }) + .collect() +} + +fn core_method_entries_for_box_signatures( + box_id: CoreBoxId, + allowed: &[(&'static str, usize)], +) -> Vec { + let core = core_method_entries_for_box(box_id); + core.into_iter() + .filter(|entry| { + allowed + .iter() + .any(|(name, arity)| entry.name == *name && entry.arity as usize == *arity) + }) + .collect() +} + +fn merge_method_entries(mut entries: Vec, extras: &[MethodEntry]) -> &'static [MethodEntry] { + entries.extend_from_slice(extras); + let mut seen = HashSet::new(); + entries.retain(|method| seen.insert((method.name, method.arity))); + Box::leak(entries.into_boxed_slice()) +} + +const ARRAY_METHOD_EXTRAS: &[MethodEntry] = &[ MethodEntry { name: "set", arity: 2, @@ -43,22 +73,6 @@ const ARRAY_METHODS: &[MethodEntry] = &[ arity: 0, slot: 102, }, - MethodEntry { - name: "length", - arity: 0, - slot: 102, - }, - // P0: vtable coverage extension - MethodEntry { - name: "push", - arity: 1, - slot: 103, - }, - MethodEntry { - name: "pop", - arity: 0, - slot: 104, - }, MethodEntry { name: "clear", arity: 0, @@ -97,10 +111,8 @@ const ARRAY_METHODS: &[MethodEntry] = &[ slot: 111, }, ]; -static ARRAYBOX_TB: TypeBox = TypeBox::new_with("ArrayBox", ARRAY_METHODS); -// --- MapBox --- -const MAP_METHODS: &[MethodEntry] = &[ +const MAP_METHOD_EXTRAS: &[MethodEntry] = &[ MethodEntry { name: "size", arity: 0, @@ -111,21 +123,6 @@ const MAP_METHODS: &[MethodEntry] = &[ arity: 0, slot: 201, }, - MethodEntry { - name: "has", - arity: 1, - slot: 202, - }, - MethodEntry { - name: "get", - arity: 1, - slot: 203, - }, - MethodEntry { - name: "set", - arity: 2, - slot: 204, - }, // Extended MethodEntry { name: "delete", @@ -137,11 +134,6 @@ const MAP_METHODS: &[MethodEntry] = &[ arity: 1, slot: 205, }, - MethodEntry { - name: "keys", - arity: 0, - slot: 206, - }, MethodEntry { name: "values", arity: 0, @@ -153,41 +145,13 @@ const MAP_METHODS: &[MethodEntry] = &[ slot: 208, }, ]; -static MAPBOX_TB: TypeBox = TypeBox::new_with("MapBox", MAP_METHODS); -// --- StringBox --- -const STRING_METHODS: &[MethodEntry] = &[ +const STRING_METHOD_EXTRAS: &[MethodEntry] = &[ MethodEntry { name: "len", arity: 0, slot: 300, }, - // P1: extend String vtable - MethodEntry { - name: "substring", - arity: 2, - slot: 301, - }, - MethodEntry { - name: "concat", - arity: 1, - slot: 302, - }, - MethodEntry { - name: "indexOf", - arity: 1, - slot: 303, - }, - MethodEntry { - name: "replace", - arity: 2, - slot: 304, - }, - MethodEntry { - name: "trim", - arity: 0, - slot: 305, - }, MethodEntry { name: "toUpper", arity: 0, @@ -199,40 +163,58 @@ const STRING_METHODS: &[MethodEntry] = &[ slot: 307, }, ]; -static STRINGBOX_TB: TypeBox = TypeBox::new_with("StringBox", STRING_METHODS); // --- ConsoleBox --- (WASM v2 unified dispatch 用の雛形) // 400: log(..), 401: warn(..), 402: error(..), 403: clear() -const CONSOLE_METHODS: &[MethodEntry] = &[ - MethodEntry { - name: "log", - arity: 1, - slot: 400, - }, +const CONSOLE_METHOD_EXTRAS: &[MethodEntry] = &[ MethodEntry { name: "warn", arity: 1, slot: 401, }, - MethodEntry { - name: "error", - arity: 1, - slot: 402, - }, MethodEntry { name: "clear", arity: 0, slot: 403, }, - // Phase 122: println は log のエイリアス - // JSON v0/selfhost が println を吐いても log と同じスロットを使うための alias - MethodEntry { - name: "println", - arity: 1, - slot: 400, - }, ]; -static CONSOLEBOX_TB: TypeBox = TypeBox::new_with("ConsoleBox", CONSOLE_METHODS); + +static ARRAYBOX_TB: OnceLock = OnceLock::new(); +static MAPBOX_TB: OnceLock = OnceLock::new(); +static STRINGBOX_TB: OnceLock = OnceLock::new(); +static CONSOLEBOX_TB: OnceLock = OnceLock::new(); + +fn arraybox_typebox() -> &'static TypeBox { + ARRAYBOX_TB.get_or_init(|| { + let core = core_method_entries_for_box(CoreBoxId::Array); + let methods = merge_method_entries(core, ARRAY_METHOD_EXTRAS); + TypeBox::new_with("ArrayBox", methods) + }) +} + +fn mapbox_typebox() -> &'static TypeBox { + MAPBOX_TB.get_or_init(|| { + let core = core_method_entries_for_box(CoreBoxId::Map); + let methods = merge_method_entries(core, MAP_METHOD_EXTRAS); + TypeBox::new_with("MapBox", methods) + }) +} + +fn stringbox_typebox() -> &'static TypeBox { + STRINGBOX_TB.get_or_init(|| { + let core = core_method_entries_for_box(CoreBoxId::String); + let methods = merge_method_entries(core, STRING_METHOD_EXTRAS); + TypeBox::new_with("StringBox", methods) + }) +} + +fn consolebox_typebox() -> &'static TypeBox { + CONSOLEBOX_TB.get_or_init(|| { + let core = core_method_entries_for_box(CoreBoxId::Console); + let methods = merge_method_entries(core, CONSOLE_METHOD_EXTRAS); + TypeBox::new_with("ConsoleBox", methods) + }) +} // --- InstanceBox --- // Representative methods exposed via unified slots for field access and diagnostics. @@ -268,48 +250,27 @@ static INSTANCEBOX_TB: TypeBox = TypeBox::new_with("InstanceBox", INSTANCE_METHO // Primitive types (String, Integer, Array) share the same slot numbers as their Box variants // This enables unified dispatch for both primitives and boxes -// Primitive String uses same slots as StringBox (300+) -const PRIMITIVE_STRING_METHODS: &[MethodEntry] = &[ - MethodEntry { - name: "length", - arity: 0, - slot: 300, - }, - MethodEntry { - name: "substring", - arity: 2, - slot: 301, - }, - MethodEntry { - name: "concat", - arity: 1, - slot: 302, - }, - MethodEntry { - name: "indexOf", - arity: 1, - slot: 303, - }, - MethodEntry { - name: "replace", - arity: 2, - slot: 304, - }, +const PRIMITIVE_STRING_ALLOWED_SIGNATURES: &[(&str, usize)] = &[ + ("length", 0), + ("substring", 2), + ("concat", 1), + ("indexOf", 1), + ("replace", 2), +]; +const PRIMITIVE_STRING_EXTRAS: &[MethodEntry] = &[ MethodEntry { name: "lastIndexOf", arity: 1, slot: 308, }, ]; -static PRIMITIVE_STRING_TB: TypeBox = TypeBox::new_with("String", PRIMITIVE_STRING_METHODS); -// Primitive Array uses same slots as ArrayBox (100+) -const PRIMITIVE_ARRAY_METHODS: &[MethodEntry] = &[ - MethodEntry { - name: "get", - arity: 1, - slot: 100, - }, +const PRIMITIVE_ARRAY_ALLOWED_SIGNATURES: &[(&str, usize)] = &[ + ("get", 1), + ("length", 0), + ("push", 1), +]; +const PRIMITIVE_ARRAY_EXTRAS: &[MethodEntry] = &[ MethodEntry { name: "set", arity: 2, @@ -320,30 +281,44 @@ const PRIMITIVE_ARRAY_METHODS: &[MethodEntry] = &[ arity: 0, slot: 102, }, - MethodEntry { - name: "length", - arity: 0, - slot: 102, - }, - MethodEntry { - name: "push", - arity: 1, - slot: 103, - }, ]; -static PRIMITIVE_ARRAY_TB: TypeBox = TypeBox::new_with("Array", PRIMITIVE_ARRAY_METHODS); + +static PRIMITIVE_STRING_TB: OnceLock = OnceLock::new(); +static PRIMITIVE_ARRAY_TB: OnceLock = OnceLock::new(); + +fn primitive_string_typebox() -> &'static TypeBox { + PRIMITIVE_STRING_TB.get_or_init(|| { + let core = core_method_entries_for_box_signatures( + CoreBoxId::String, + PRIMITIVE_STRING_ALLOWED_SIGNATURES, + ); + let methods = merge_method_entries(core, PRIMITIVE_STRING_EXTRAS); + TypeBox::new_with("String", methods) + }) +} + +fn primitive_array_typebox() -> &'static TypeBox { + PRIMITIVE_ARRAY_TB.get_or_init(|| { + let core = core_method_entries_for_box_signatures( + CoreBoxId::Array, + PRIMITIVE_ARRAY_ALLOWED_SIGNATURES, + ); + let methods = merge_method_entries(core, PRIMITIVE_ARRAY_EXTRAS); + TypeBox::new_with("Array", methods) + }) +} /// 型名から TypeBox を解決(雛形)。現在は常に None。 pub fn resolve_typebox_by_name(type_name: &str) -> Option<&'static TypeBox> { match type_name { - "MapBox" => Some(&MAPBOX_TB), - "ArrayBox" => Some(&ARRAYBOX_TB), - "StringBox" => Some(&STRINGBOX_TB), - "ConsoleBox" => Some(&CONSOLEBOX_TB), + "MapBox" => Some(mapbox_typebox()), + "ArrayBox" => Some(arraybox_typebox()), + "StringBox" => Some(stringbox_typebox()), + "ConsoleBox" => Some(consolebox_typebox()), "InstanceBox" => Some(&INSTANCEBOX_TB), // Phase 124: Primitive types - "String" => Some(&PRIMITIVE_STRING_TB), - "Array" => Some(&PRIMITIVE_ARRAY_TB), + "String" => Some(primitive_string_typebox()), + "Array" => Some(primitive_array_typebox()), _ => None, } } @@ -368,3 +343,27 @@ pub fn known_methods_for(type_name: &str) -> Option> { v.dedup(); Some(v) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_core_method_vtable_slots_match_registry() { + for method_id in CoreMethodId::iter() { + let Some(expected_slot) = method_id.vtable_slot() else { + continue; + }; + let type_name = method_id.box_id().name(); + let resolved = resolve_slot_by_name(type_name, method_id.name(), method_id.arity()); + assert_eq!( + resolved, + Some(expected_slot), + "vtable slot mismatch: {}.{}({})", + type_name, + method_id.name(), + method_id.arity() + ); + } + } +} diff --git a/tools/smokes/v2/profiles/integration/apps/phase263_pattern2_seg_realworld_min_vm.sh b/tools/smokes/v2/profiles/integration/apps/phase263_pattern2_seg_realworld_min_vm.sh new file mode 100644 index 00000000..6967915d --- /dev/null +++ b/tools/smokes/v2/profiles/integration/apps/phase263_pattern2_seg_realworld_min_vm.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Phase 29ab P4: Pattern2 seg real-world minimal repro (VM backend) +# Expectation: Derived slot path succeeds (output: 4) + +source "$(dirname "$0")/../../../lib/test_runner.sh" +export SMOKES_USE_PYVM=0 +require_env || exit 2 + +INPUT="$NYASH_ROOT/apps/tests/phase263_pattern2_seg_realworld_min.hako" +RUN_TIMEOUT_SECS=${RUN_TIMEOUT_SECS:-10} + +set +e +OUTPUT=$(timeout "$RUN_TIMEOUT_SECS" env NYASH_DISABLE_PLUGINS=1 HAKO_JOINIR_STRICT=1 "$NYASH_BIN" "$INPUT" 2>&1) +EXIT_CODE=$? +set -e + +if [ "$EXIT_CODE" -eq 124 ]; then + test_fail "phase263_pattern2_seg_realworld_min_vm: hakorune timed out (>${RUN_TIMEOUT_SECS}s)" + exit 1 +fi + +OUTPUT_CLEAN=$(echo "$OUTPUT" | filter_noise) + +if echo "$OUTPUT_CLEAN" | grep -q "^4$" || echo "$OUTPUT" | grep -q "^RC: 4$"; then + test_pass "phase263_pattern2_seg_realworld_min_vm: Derived slot promotion succeeded (output: 4)" + exit 0 +else + echo "[FAIL] Unexpected output (expected: 4)" + echo "[INFO] Exit code: $EXIT_CODE" + echo "[INFO] Output (clean):" + echo "$OUTPUT_CLEAN" | tail -n 20 || true + test_fail "phase263_pattern2_seg_realworld_min_vm: Unexpected output" + exit 1 +fi