refactor: unify string helpers and pattern2 derived slot
This commit is contained in:
@ -41,10 +41,9 @@ impl StringBox {
|
||||
/// Find substring and return position (or -1 if not found)
|
||||
pub fn find(&self, search: &str) -> Box<dyn NyashBox> {
|
||||
use crate::box_trait::IntegerBox;
|
||||
match self.value.find(search) {
|
||||
Some(pos) => Box::new(IntegerBox::new(pos as i64)),
|
||||
None => Box::new(IntegerBox::new(-1)),
|
||||
}
|
||||
let mode = crate::boxes::string_ops::index_mode_from_env();
|
||||
let idx = crate::boxes::string_ops::index_of(&self.value, search, None, mode);
|
||||
Box::new(IntegerBox::new(idx))
|
||||
}
|
||||
|
||||
/// Replace all occurrences of old with new
|
||||
|
||||
@ -65,6 +65,7 @@ pub mod integer_box;
|
||||
pub mod math_box;
|
||||
pub mod random_box;
|
||||
pub mod string_box;
|
||||
pub mod string_ops;
|
||||
pub mod time_box;
|
||||
// These boxes use web APIs that require special handling in WASM
|
||||
pub mod aot_compiler_box;
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
* - `toLowerCase()` - 小文字変換
|
||||
* - `trim()` - 前後の空白除去
|
||||
* - `indexOf(search)` - 文字列検索
|
||||
* - `indexOf(search, fromIndex)` - 指定位置から検索
|
||||
* - `replace(from, to)` - 文字列置換
|
||||
* - `charAt(index)` - 指定位置の文字取得
|
||||
*
|
||||
@ -71,18 +72,18 @@ impl StringBox {
|
||||
/// Env gate: NYASH_STR_CP=1 → return codepoint index; default is byte index
|
||||
pub fn find(&self, search: &str) -> Box<dyn NyashBox> {
|
||||
use crate::boxes::integer_box::IntegerBox;
|
||||
match self.value.find(search) {
|
||||
Some(byte_pos) => {
|
||||
let use_cp = std::env::var("NYASH_STR_CP").ok().as_deref() == Some("1");
|
||||
let idx = if use_cp {
|
||||
self.value[..byte_pos].chars().count() as i64
|
||||
} else {
|
||||
byte_pos as i64
|
||||
};
|
||||
Box::new(IntegerBox::new(idx))
|
||||
}
|
||||
None => Box::new(IntegerBox::new(-1)),
|
||||
}
|
||||
let mode = crate::boxes::string_ops::index_mode_from_env();
|
||||
let idx = crate::boxes::string_ops::index_of(&self.value, search, None, mode);
|
||||
Box::new(IntegerBox::new(idx))
|
||||
}
|
||||
|
||||
/// Find substring starting from a given index (or -1 if not found)
|
||||
/// Env gate: NYASH_STR_CP=1 → indices are codepoint-based; default is byte index
|
||||
pub fn find_from(&self, search: &str, start: i64) -> Box<dyn NyashBox> {
|
||||
use crate::boxes::integer_box::IntegerBox;
|
||||
let mode = crate::boxes::string_ops::index_mode_from_env();
|
||||
let idx = crate::boxes::string_ops::index_of(&self.value, search, Some(start), mode);
|
||||
Box::new(IntegerBox::new(idx))
|
||||
}
|
||||
|
||||
/// Replace all occurrences of old with new
|
||||
@ -94,18 +95,9 @@ impl StringBox {
|
||||
/// Env gate: NYASH_STR_CP=1 → return codepoint index; default is byte index.
|
||||
pub fn lastIndexOf(&self, search: &str) -> Box<dyn NyashBox> {
|
||||
use crate::boxes::integer_box::IntegerBox;
|
||||
match self.value.rfind(search) {
|
||||
Some(byte_pos) => {
|
||||
let use_cp = std::env::var("NYASH_STR_CP").ok().as_deref() == Some("1");
|
||||
let idx = if use_cp {
|
||||
self.value[..byte_pos].chars().count() as i64
|
||||
} else {
|
||||
byte_pos as i64
|
||||
};
|
||||
Box::new(IntegerBox::new(idx))
|
||||
}
|
||||
None => Box::new(IntegerBox::new(-1)),
|
||||
}
|
||||
let mode = crate::boxes::string_ops::index_mode_from_env();
|
||||
let idx = crate::boxes::string_ops::last_index_of(&self.value, search, mode);
|
||||
Box::new(IntegerBox::new(idx))
|
||||
}
|
||||
|
||||
/// Trim whitespace from both ends
|
||||
|
||||
101
src/boxes/string_ops.rs
Normal file
101
src/boxes/string_ops.rs
Normal file
@ -0,0 +1,101 @@
|
||||
//! Shared string indexing helpers (byte vs codepoint).
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum StringIndexMode {
|
||||
Byte,
|
||||
CodePoint,
|
||||
}
|
||||
|
||||
pub fn index_mode_from_env() -> StringIndexMode {
|
||||
if std::env::var("NYASH_STR_CP").ok().as_deref() == Some("1") {
|
||||
StringIndexMode::CodePoint
|
||||
} else {
|
||||
StringIndexMode::Byte
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_of(haystack: &str, needle: &str, start: Option<i64>, mode: StringIndexMode) -> i64 {
|
||||
match mode {
|
||||
StringIndexMode::Byte => index_of_bytes(haystack, needle, start),
|
||||
StringIndexMode::CodePoint => index_of_codepoints(haystack, needle, start),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn last_index_of(haystack: &str, needle: &str, mode: StringIndexMode) -> i64 {
|
||||
match mode {
|
||||
StringIndexMode::Byte => haystack.rfind(needle).map(|i| i as i64).unwrap_or(-1),
|
||||
StringIndexMode::CodePoint => haystack
|
||||
.rfind(needle)
|
||||
.map(|byte_pos| haystack[..byte_pos].chars().count() as i64)
|
||||
.unwrap_or(-1),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn substring(haystack: &str, start: i64, end: Option<i64>, mode: StringIndexMode) -> String {
|
||||
match mode {
|
||||
StringIndexMode::Byte => substring_bytes(haystack, start, end),
|
||||
StringIndexMode::CodePoint => substring_codepoints(haystack, start, end),
|
||||
}
|
||||
}
|
||||
|
||||
fn index_of_bytes(haystack: &str, needle: &str, start: Option<i64>) -> i64 {
|
||||
let start_idx = start.unwrap_or(0).max(0) as usize;
|
||||
if start_idx > haystack.len() {
|
||||
return -1;
|
||||
}
|
||||
haystack[start_idx..]
|
||||
.find(needle)
|
||||
.map(|i| (start_idx + i) as i64)
|
||||
.unwrap_or(-1)
|
||||
}
|
||||
|
||||
fn index_of_codepoints(haystack: &str, needle: &str, start: Option<i64>) -> i64 {
|
||||
let start_idx = start.unwrap_or(0).max(0) as usize;
|
||||
let Some(byte_start) = byte_offset_for_cp(haystack, start_idx) else {
|
||||
return -1;
|
||||
};
|
||||
if byte_start > haystack.len() {
|
||||
return -1;
|
||||
}
|
||||
haystack[byte_start..]
|
||||
.find(needle)
|
||||
.map(|rel| {
|
||||
let abs = byte_start + rel;
|
||||
haystack[..abs].chars().count() as i64
|
||||
})
|
||||
.unwrap_or(-1)
|
||||
}
|
||||
|
||||
fn substring_bytes(haystack: &str, start: i64, end: Option<i64>) -> String {
|
||||
let len = haystack.len() as i64;
|
||||
let start = start.max(0).min(len);
|
||||
let end = end.unwrap_or(len).max(0).min(len);
|
||||
if start > end {
|
||||
return String::new();
|
||||
}
|
||||
let bytes = haystack.as_bytes();
|
||||
String::from_utf8(bytes[start as usize..end as usize].to_vec()).unwrap_or_default()
|
||||
}
|
||||
|
||||
fn substring_codepoints(haystack: &str, start: i64, end: Option<i64>) -> String {
|
||||
let len = haystack.chars().count() as i64;
|
||||
let start = start.max(0).min(len) as usize;
|
||||
let end = end.unwrap_or(len).max(start as i64).min(len) as usize;
|
||||
let chars: Vec<char> = haystack.chars().collect();
|
||||
chars[start..end].iter().collect()
|
||||
}
|
||||
|
||||
fn byte_offset_for_cp(haystack: &str, cp_index: usize) -> Option<usize> {
|
||||
let mut count = 0usize;
|
||||
for (byte_pos, _) in haystack.char_indices() {
|
||||
if count == cp_index {
|
||||
return Some(byte_pos);
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
if count == cp_index {
|
||||
Some(haystack.len())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user