phase29aq(p2): add index_of_string/to_upper stdlib subsets

This commit is contained in:
2025-12-31 11:37:27 +09:00
parent 4c3070cedd
commit ead9a1edeb
16 changed files with 312 additions and 33 deletions

View File

@ -0,0 +1,3 @@
using "apps/lib/json_native/utils/string.hako" as StringUtils
print(StringUtils.index_of_string("hello", "ll"))

View File

@ -0,0 +1,3 @@
using "apps/lib/json_native/utils/string.hako" as StringUtils
print(StringUtils.to_upper("abc"))

View File

@ -3,7 +3,7 @@
## Current Focus
- Phase: `docs/development/current/main/phases/phase-29aq/README.md`
- Next: Phase 29aq P2 (stdlib scan subset extensions)
- Next: Phase 29aq P3 (stdlib split/scan derivatives)
## Gate (SSOT)

View File

@ -5,7 +5,7 @@ Scope: 「次にやる候補」を短く列挙するメモ。入口は `docs/dev
## Active
- Phase 29aq: `docs/development/current/main/phases/phase-29aq/README.md` (Next: P2 scan subsets)
- Phase 29aq: `docs/development/current/main/phases/phase-29aq/README.md` (Next: P3 split/scan derivatives)
- JoinIR regression gate SSOT: `docs/development/current/main/phases/phase-29ae/README.md`
- CorePlan hardening (docs-first): `docs/development/current/main/phases/phase-29al/README.md`

View File

@ -34,7 +34,7 @@ Related:
## 1.1 Current (active)
- Active phase: `docs/development/current/main/phases/phase-29aq/README.md`
- Next step: Phase 29aq P2 (stdlib scan subset extensions)
- Next step: Phase 29aq P3 (stdlib split/scan derivatives)
## 2. すでに固めた SSOT再発防止の土台

View File

@ -17,8 +17,10 @@ Goal: JoinIR の最小回帰セットを SSOT として固定する。
- Pattern1 (stdlib join, VM): `phase29ap_stringutils_join_vm`
- ScanWithInit (stdlib index_of, VM): `phase29aq_string_index_of_min_vm`
- ScanWithInit (stdlib last_index_of, VM): `phase29aq_string_last_index_of_min_vm`
- ScanWithInit (stdlib index_of_string, VM): `phase29aq_string_index_of_string_min_vm`
- Pattern2 (stdlib parse_integer, VM): `phase29aq_string_parse_integer_min_vm`
- SplitScan (stdlib split, VM): `phase29aq_string_split_min_vm`
- Pattern1 (stdlib to_upper, VM): `phase29aq_string_to_upper_min_vm`
- Pattern5 (Break, VM): `phase286_pattern5_break_vm`
- Pattern5 (strict shadow, VM): `phase29ao_pattern5_strict_shadow_vm`
- Pattern5 (release adopt, VM): `phase29ao_pattern5_release_adopt_vm`

View File

@ -0,0 +1,58 @@
---
Status: Done
Scope: stdlib subsets (index_of_string, to_upper)
Related:
- docs/development/current/main/phases/phase-29aq/README.md
- docs/development/current/main/phases/phase-29ae/README.md
---
# Phase 29aq P2: stdlib subsets (index_of_string, to_upper)
Goal: add two stdlib subsets with fixtures and integration smokes, wired to the
JoinIR regression gate (phase29ae pack).
## P2-1: to_upper (Pattern1CharMap)
Target: `apps/lib/json_native/utils/string.hako`
- loop(i < s.length())
- local ch = s.substring(i, i + 1)
- result = result + this.char_to_upper(ch)
- i = i + 1
Notes:
- Use existing Pattern1CharMap facts/planner/normalizer path.
- No new CorePlan vocabulary or logs.
Fixtures/smokes:
- `apps/tests/phase29aq_string_to_upper_min.hako`
- `tools/smokes/v2/profiles/integration/joinir/phase29aq_string_to_upper_min_vm.sh`
## P2-2: index_of_string (ScanWithInit dynamic needle)
Target: `apps/lib/json_native/utils/string.hako`
- loop(i <= s.length() - substr.length())
- if s.substring(i, i + substr.length()) == substr { return i }
- i = i + 1
- return -1
Notes:
- Treat as ScanWithInit with dynamic needle length.
- Facts must detect the dynamic needle length and forward scan shape.
Fixtures/smokes:
- `apps/tests/phase29aq_string_index_of_string_min.hako`
- `tools/smokes/v2/profiles/integration/joinir/phase29aq_string_index_of_string_min_vm.sh`
## Gate wiring (SSOT)
- Add both smokes to `tools/smokes/v2/profiles/integration/joinir/phase29aq_stdlib_pack_vm.sh`.
- Ensure `phase29ae_regression_pack_vm.sh` runs the stdlib pack.
- Update `docs/development/current/main/phases/phase-29ae/README.md`.
## Verification
- `cargo build --release`
- `./tools/smokes/v2/run.sh --profile quick`
- `./tools/smokes/v2/profiles/integration/joinir/phase29ae_regression_pack_vm.sh`

View File

@ -57,7 +57,8 @@ Plan/Composer subsets (or mark unsupported) before adding new subsets.
## Progress
- P1: Add stdlib subsets in priority order (index_of/last_index_of → parse_integer → split).
- P2: Add stdlib subsets (index_of_string → to_upper).
## Next (planned)
- P2: Extend stdlib scan subsets (candidate: index_of_string, to_upper).
- P3: Expand split/scan derivatives (candidate: starts_with/ends_with).

View File

@ -51,9 +51,15 @@ pub(super) fn try_compose_core_loop_v0_scan_with_init(
Some(haystack) => haystack == &scan.haystack,
None => true,
};
let needle_matches = match shape.needle_var.as_ref() {
Some(needle) => needle == &scan.needle,
None => true,
};
shape.idx_var == scan.loop_var
&& shape.step_lit == scan.step_lit
&& shape.dynamic_needle == scan.dynamic_needle
&& haystack_matches
&& needle_matches
});
if !shapes_match {
return Ok(None);
@ -70,7 +76,7 @@ pub(super) fn try_compose_core_loop_v0_scan_with_init(
},
not_found_return_lit: -1,
scan_direction,
dynamic_needle: false,
dynamic_needle: scan.dynamic_needle,
};
let core = PlanNormalizer::normalize_scan_with_init(builder, plan, ctx)?;
Ok(Some(core))
@ -409,6 +415,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: 1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,
@ -477,6 +484,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: 1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,
@ -544,6 +552,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: 1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,

View File

@ -384,6 +384,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: 1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,

View File

@ -77,6 +77,7 @@ pub(in crate::mir::builder) struct ScanWithInitFacts {
pub haystack: String,
pub needle: String,
pub step_lit: i64,
pub dynamic_needle: bool,
}
#[derive(Debug, Clone)]
@ -224,37 +225,46 @@ fn try_extract_condition_shape(condition: &ASTNode) -> Result<Option<ConditionSh
return Ok(None);
};
let ASTNode::MethodCall {
object,
method,
arguments,
..
} = right.as_ref()
else {
return Ok(None);
};
if !arguments.is_empty() {
return Ok(None);
}
let method = match method.as_str() {
"length" => LengthMethod::Length,
"size" => LengthMethod::Size,
_ => return Ok(None),
};
let ASTNode::Variable {
name: haystack_var,
..
} = object.as_ref()
else {
let Some((haystack_var, method)) = match_length_call(right.as_ref()) else {
return Ok(None);
};
Ok(Some(ConditionShape::VarLessLength {
idx_var: idx_var.clone(),
haystack_var: haystack_var.clone(),
haystack_var,
method,
}))
}
BinaryOperator::LessEqual => {
let ASTNode::Variable { name: idx_var, .. } = left.as_ref() else {
return Ok(None);
};
let ASTNode::BinaryOp {
operator: BinaryOperator::Subtract,
left: minus_left,
right: minus_right,
..
} = right.as_ref()
else {
return Ok(None);
};
let Some((haystack_var, haystack_method)) = match_length_call(minus_left.as_ref())
else {
return Ok(None);
};
let Some((needle_var, needle_method)) = match_length_call(minus_right.as_ref()) else {
return Ok(None);
};
Ok(Some(ConditionShape::VarLessEqualLengthMinusNeedle {
idx_var: idx_var.clone(),
haystack_var,
needle_var,
haystack_method,
needle_method,
}))
}
BinaryOperator::GreaterEqual => {
let ASTNode::Variable { name: idx_var, .. } = left.as_ref() else {
return Ok(None);
@ -274,6 +284,30 @@ fn try_extract_condition_shape(condition: &ASTNode) -> Result<Option<ConditionSh
}
}
fn match_length_call(expr: &ASTNode) -> Option<(String, LengthMethod)> {
let ASTNode::MethodCall {
object,
method,
arguments,
..
} = expr
else {
return None;
};
if !arguments.is_empty() {
return None;
}
let method = match method.as_str() {
"length" => LengthMethod::Length,
"size" => LengthMethod::Size,
_ => return None,
};
let ASTNode::Variable { name, .. } = object.as_ref() else {
return None;
};
Some((name.clone(), method))
}
fn try_extract_step_shape(body: &[ASTNode]) -> Result<Option<StepShape>, Freeze> {
let Some(last) = body.last() else {
return Ok(None);
@ -345,6 +379,15 @@ mod tests_invariants {
}
}
fn len_call(var: &str) -> ASTNode {
ASTNode::MethodCall {
object: Box::new(v(var)),
method: "length".to_string(),
arguments: vec![],
span: Span::unknown(),
}
}
#[test]
fn loop_facts_require_skeleton_and_features_when_present() {
let condition = ASTNode::BinaryOp {
@ -438,7 +481,7 @@ fn try_extract_scan_with_init_facts(
None => obj.clone(),
};
// substring(i, i + 1)
// substring(i, i + 1) or substring(i, i + needle.length())
let (start, end) = (&arguments[0], &arguments[1]);
match start {
ASTNode::Variable { name, .. } if name == idx_var => {}
@ -457,17 +500,42 @@ fn try_extract_scan_with_init_facts(
ASTNode::Variable { name, .. } if name == idx_var => {}
_ => continue,
}
match end_right.as_ref() {
let (dynamic_needle, needle_len_var) = match end_right.as_ref() {
ASTNode::Literal {
value: LiteralValue::Integer(1),
..
} => {}
} => (false, None),
ASTNode::MethodCall {
object,
method,
arguments,
..
} if arguments.is_empty()
&& (method == "length" || method == "size")
&& matches!(object.as_ref(), ASTNode::Variable { .. }) =>
{
let ASTNode::Variable { name, .. } = object.as_ref() else {
continue;
};
(true, Some(name.as_str()))
}
_ => continue,
}
};
let ASTNode::Variable { name: needle, .. } = right.as_ref() else {
continue;
};
if dynamic_needle && needle_len_var != Some(needle.as_str()) {
continue;
}
if dynamic_needle != shape.dynamic_needle {
continue;
}
if let Some(shape_needle) = shape.needle_var.as_deref() {
if shape_needle != needle {
continue;
}
}
// then-body must contain `return i` (minimal)
if !then_body.iter().any(|n| {
@ -487,6 +555,7 @@ fn try_extract_scan_with_init_facts(
haystack: haystack_var,
needle: needle.clone(),
step_lit,
dynamic_needle,
}));
}
@ -935,6 +1004,71 @@ mod tests {
assert_eq!(scan.step_lit, -1);
}
#[test]
fn loopfacts_ok_some_for_dynamic_needle_scan_with_init() {
let condition = ASTNode::BinaryOp {
operator: BinaryOperator::LessEqual,
left: Box::new(v("i")),
right: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Subtract,
left: Box::new(len_call("s")),
right: Box::new(len_call("needle")),
span: Span::unknown(),
}),
span: Span::unknown(),
};
let if_stmt = ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::MethodCall {
object: Box::new(v("s")),
method: "substring".to_string(),
arguments: vec![
v("i"),
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(v("i")),
right: Box::new(len_call("needle")),
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
right: Box::new(v("needle")),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Return {
value: Some(Box::new(v("i"))),
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
};
let step = ASTNode::Assignment {
target: Box::new(v("i")),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(v("i")),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
};
let facts = try_build_loop_facts(&condition, &[if_stmt, step])
.expect("Ok")
.expect("Some");
let scan = facts.scan_with_init.expect("scan facts");
assert_eq!(scan.loop_var, "i");
assert_eq!(scan.haystack, "s");
assert_eq!(scan.needle, "needle");
assert_eq!(scan.step_lit, 1);
assert!(scan.dynamic_needle);
}
#[test]
fn loopfacts_ctx_skips_pattern1_when_kind_mismatch() {
let condition = ASTNode::BinaryOp {

View File

@ -21,6 +21,13 @@ pub(in crate::mir::builder) enum ConditionShape {
haystack_var: String,
method: LengthMethod,
},
VarLessEqualLengthMinusNeedle {
idx_var: String,
haystack_var: String,
needle_var: String,
haystack_method: LengthMethod,
needle_method: LengthMethod,
},
VarGreaterEqualZero {
idx_var: String,
},
@ -37,6 +44,8 @@ pub(in crate::mir::builder) struct ScanWithInitShape {
pub idx_var: String,
pub haystack_var: Option<String>,
pub step_lit: i64,
pub dynamic_needle: bool,
pub needle_var: Option<String>,
}
pub(in crate::mir::builder) fn match_scan_with_init_shape(
@ -55,6 +64,23 @@ pub(in crate::mir::builder) fn match_scan_with_init_shape(
idx_var: idx_var.clone(),
haystack_var: Some(haystack_var.clone()),
step_lit: *k,
dynamic_needle: false,
needle_var: None,
}),
(
ConditionShape::VarLessEqualLengthMinusNeedle {
idx_var,
haystack_var,
needle_var,
..
},
StepShape::AssignAddConst { var, k },
) if *k == 1 && var == idx_var => Some(ScanWithInitShape {
idx_var: idx_var.clone(),
haystack_var: Some(haystack_var.clone()),
step_lit: *k,
dynamic_needle: true,
needle_var: Some(needle_var.clone()),
}),
(
ConditionShape::VarGreaterEqualZero { idx_var },
@ -63,6 +89,8 @@ pub(in crate::mir::builder) fn match_scan_with_init_shape(
idx_var: idx_var.clone(),
haystack_var: None,
step_lit: *k,
dynamic_needle: false,
needle_var: None,
}),
_ => None,
}

View File

@ -205,7 +205,7 @@ fn push_scan_with_init(candidates: &mut CandidateSet, facts: &CanonicalLoopFacts
},
not_found_return_lit: -1,
scan_direction,
dynamic_needle: false,
dynamic_needle: scan.dynamic_needle,
}),
rule: "loop/scan_with_init",
});
@ -601,6 +601,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: 1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,
@ -639,6 +640,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: -1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,
@ -682,6 +684,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: 1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,
@ -718,6 +721,7 @@ mod tests {
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: 1,
dynamic_needle: false,
}),
split_scan: None,
pattern1_simplewhile: None,

View File

@ -19,8 +19,10 @@ run_filter() {
run_filter "string_index_of_vm" "phase29aq_string_index_of_min_vm" || exit 1
run_filter "string_last_index_of_vm" "phase29aq_string_last_index_of_min_vm" || exit 1
run_filter "string_index_of_string_vm" "phase29aq_string_index_of_string_min_vm" || exit 1
run_filter "string_parse_integer_vm" "phase29aq_string_parse_integer_min_vm" || exit 1
run_filter "string_split_vm" "phase29aq_string_split_min_vm" || exit 1
run_filter "string_to_upper_vm" "phase29aq_string_to_upper_min_vm" || exit 1
log_success "phase29aq_stdlib_pack_vm: all stdlib subset filters passed"
exit 0

View File

@ -0,0 +1,17 @@
#!/bin/bash
# phase29aq_string_index_of_string_min_vm.sh - StringUtils.index_of_string via plan/composer (VM)
source "$(dirname "$0")/../../../lib/test_runner.sh"
require_env || exit 2
FIXTURE="$NYASH_ROOT/apps/tests/phase29aq_string_index_of_string_min.hako"
export NYASH_ALLOW_USING_FILE=1
output=$(run_nyash_vm "$FIXTURE")
expected=$(cat << 'TXT'
2
TXT
)
compare_outputs "$expected" "$output" "phase29aq_string_index_of_string_min_vm" || exit 1

View File

@ -0,0 +1,17 @@
#!/bin/bash
# phase29aq_string_to_upper_min_vm.sh - StringUtils.to_upper via plan/composer (VM)
source "$(dirname "$0")/../../../lib/test_runner.sh"
require_env || exit 2
FIXTURE="$NYASH_ROOT/apps/tests/phase29aq_string_to_upper_min.hako"
export NYASH_ALLOW_USING_FILE=1
output=$(run_nyash_vm "$FIXTURE")
expected=$(cat << 'TXT'
ABC
TXT
)
compare_outputs "$expected" "$output" "phase29aq_string_to_upper_min_vm" || exit 1