phase29aq(p1): add stdlib scan/parse/split subsets

This commit is contained in:
2025-12-31 11:02:01 +09:00
parent e360ea7de7
commit cedecb8301
19 changed files with 310 additions and 58 deletions

View File

@ -0,0 +1,3 @@
using "apps/lib/json_native/utils/string.hako" as StringUtils
print(StringUtils.index_of("hello", "l"))

View File

@ -0,0 +1,3 @@
using "apps/lib/json_native/utils/string.hako" as StringUtils
print(StringUtils.last_index_of("hello", "l"))

View File

@ -0,0 +1,3 @@
using "apps/lib/json_native/utils/string.hako" as StringUtils
print(StringUtils.parse_integer("12345"))

View File

@ -0,0 +1,9 @@
using "apps/lib/json_native/utils/string.hako" as StringUtils
static box Main {
main(args) {
local result = StringUtils.split("a,b,c", ",")
print(result.length())
return 0
}
}

View File

@ -3,7 +3,7 @@
## Current Focus
- Phase: `docs/development/current/main/phases/phase-29aq/README.md`
- Next: Phase 29aq P1 (stdlib subset additions)
- Next: Phase 29aq P2 (stdlib scan subset extensions)
## Gate (SSOT)

View File

@ -5,7 +5,7 @@ Scope: 「次にやる候補」を短く列挙するメモ。入口は `docs/dev
## Active
- Phase 29aq: `docs/development/current/main/phases/phase-29aq/README.md` (Next: P1 subsets)
- Phase 29aq: `docs/development/current/main/phases/phase-29aq/README.md` (Next: P2 scan subsets)
- JoinIR regression gate SSOT: `docs/development/current/main/phases/phase-29ae/README.md`
- CorePlan hardening (docs-first): `docs/development/current/main/phases/phase-29al/README.md`

View File

@ -34,7 +34,7 @@ Related:
## 1.1 Current (active)
- Active phase: `docs/development/current/main/phases/phase-29aq/README.md`
- Next step: Phase 29aq P1 (stdlib subset additions)
- Next step: Phase 29aq P2 (stdlib scan subset extensions)
## 2. すでに固めた SSOT再発防止の土台

View File

@ -15,6 +15,10 @@ Goal: JoinIR の最小回帰セットを SSOT として固定する。
- Pattern1 (subset reject, VM): `phase29ao_pattern1_subset_reject_extra_stmt_vm`
- Pattern1 (stdlib to_lower, VM): `phase29ap_stringutils_tolower_vm`
- Pattern1 (stdlib join, VM): `phase29ap_stringutils_join_vm`
- ScanWithInit (stdlib index_of, VM): `phase29aq_string_index_of_min_vm`
- ScanWithInit (stdlib last_index_of, VM): `phase29aq_string_last_index_of_min_vm`
- Pattern2 (stdlib parse_integer, VM): `phase29aq_string_parse_integer_min_vm`
- SplitScan (stdlib split, VM): `phase29aq_string_split_min_vm`
- Pattern5 (Break, VM): `phase286_pattern5_break_vm`
- Pattern5 (strict shadow, VM): `phase29ao_pattern5_strict_shadow_vm`
- Pattern5 (release adopt, VM): `phase29ao_pattern5_release_adopt_vm`

View File

@ -1,5 +1,5 @@
---
Status: Planned
Status: Done
Scope: stdlib subsets (index_of/last_index_of, parse_integer, split)
Related:
- docs/development/current/main/phases/phase-29aq/README.md

View File

@ -54,4 +54,10 @@ Plan/Composer subsets (or mark unsupported) before adding new subsets.
## Next (planned)
## Progress
- P1: Add stdlib subsets in priority order (index_of/last_index_of → parse_integer → split).
## Next (planned)
- P2: Extend stdlib scan subsets (candidate: index_of_string, to_upper).

View File

@ -59,6 +59,10 @@ pub(super) fn try_compose_core_loop_v0_scan_with_init(
&& haystack_var == &scan.haystack
&& var == &scan.loop_var
&& *k == scan.step_lit,
(
ConditionShape::VarGreaterEqualZero { idx_var },
StepShape::AssignAddConst { var, k },
) => idx_var == &scan.loop_var && var == &scan.loop_var && *k == scan.step_lit,
_ => false,
};
if !shapes_match {

View File

@ -207,7 +207,7 @@ fn try_build_loop_facts_inner(
fn try_extract_condition_shape(condition: &ASTNode) -> Result<Option<ConditionShape>, Freeze> {
let ASTNode::BinaryOp {
operator: BinaryOperator::Less,
operator,
left,
right,
..
@ -216,40 +216,60 @@ fn try_extract_condition_shape(condition: &ASTNode) -> Result<Option<ConditionSh
return Ok(None);
};
let ASTNode::Variable { name: idx_var, .. } = left.as_ref() else {
return Ok(None);
};
match operator {
BinaryOperator::Less => {
let ASTNode::Variable { name: idx_var, .. } = left.as_ref() else {
return Ok(None);
};
let ASTNode::MethodCall {
object,
method,
arguments,
..
} = right.as_ref()
else {
return Ok(None);
};
if !arguments.is_empty() {
return Ok(None);
let ASTNode::MethodCall {
object,
method,
arguments,
..
} = right.as_ref()
else {
return Ok(None);
};
if !arguments.is_empty() {
return Ok(None);
}
let method = match method.as_str() {
"length" => LengthMethod::Length,
"size" => LengthMethod::Size,
_ => return Ok(None),
};
let ASTNode::Variable {
name: haystack_var,
..
} = object.as_ref()
else {
return Ok(None);
};
Ok(Some(ConditionShape::VarLessLength {
idx_var: idx_var.clone(),
haystack_var: haystack_var.clone(),
method,
}))
}
BinaryOperator::GreaterEqual => {
let ASTNode::Variable { name: idx_var, .. } = left.as_ref() else {
return Ok(None);
};
let ASTNode::Literal { value, .. } = right.as_ref() else {
return Ok(None);
};
if !matches!(value, LiteralValue::Integer(0)) {
return Ok(None);
}
Ok(Some(ConditionShape::VarGreaterEqualZero {
idx_var: idx_var.clone(),
}))
}
_ => Ok(None),
}
let method = match method.as_str() {
"length" => LengthMethod::Length,
"size" => LengthMethod::Size,
_ => return Ok(None),
};
let ASTNode::Variable {
name: haystack_var,
..
} = object.as_ref()
else {
return Ok(None);
};
Ok(Some(ConditionShape::VarLessLength {
idx_var: idx_var.clone(),
haystack_var: haystack_var.clone(),
method,
}))
}
fn try_extract_step_shape(body: &[ASTNode]) -> Result<Option<StepShape>, Freeze> {
@ -265,7 +285,7 @@ fn try_extract_step_shape(body: &[ASTNode]) -> Result<Option<StepShape>, Freeze>
};
let ASTNode::BinaryOp {
operator: BinaryOperator::Add,
operator,
left,
right,
..
@ -287,13 +307,19 @@ fn try_extract_step_shape(body: &[ASTNode]) -> Result<Option<StepShape>, Freeze>
return Ok(None);
};
if *k != 1 {
let k = match operator {
BinaryOperator::Add => *k,
BinaryOperator::Subtract => -*k,
_ => return Ok(None),
};
if k != 1 && k != -1 {
return Ok(None);
}
Ok(Some(StepShape::AssignAddConst {
var: var.clone(),
k: *k,
k,
}))
}
@ -352,19 +378,22 @@ fn try_extract_scan_with_init_facts(
condition_shape: &ConditionShape,
step_shape: &StepShape,
) -> Result<Option<ScanWithInitFacts>, Freeze> {
let (ConditionShape::VarLessLength {
idx_var,
haystack_var,
..
}, StepShape::AssignAddConst { var: step_var, k: 1 }) = (condition_shape, step_shape)
else {
return Ok(None);
let (idx_var, expected_haystack, step_lit) = match (condition_shape, step_shape) {
(
ConditionShape::VarLessLength {
idx_var,
haystack_var,
..
},
StepShape::AssignAddConst { var, k },
) if *k == 1 && var == idx_var => (idx_var, Some(haystack_var), *k),
(
ConditionShape::VarGreaterEqualZero { idx_var },
StepShape::AssignAddConst { var, k },
) if *k == -1 && var == idx_var => (idx_var, None, *k),
_ => return Ok(None),
};
if step_var != idx_var {
return Ok(None);
}
// Find `if s.substring(i, i + 1) == ch { return i }` anywhere except the last step.
for stmt in body.iter().take(body.len().saturating_sub(1)) {
let ASTNode::If {
@ -406,9 +435,15 @@ fn try_extract_scan_with_init_facts(
let ASTNode::Variable { name: obj, .. } = object.as_ref() else {
continue;
};
if obj != haystack_var {
continue;
}
let haystack_var = match expected_haystack {
Some(expected) => {
if obj != expected {
continue;
}
expected.clone()
}
None => obj.clone(),
};
// substring(i, i + 1)
let (start, end) = (&arguments[0], &arguments[1]);
@ -456,9 +491,9 @@ fn try_extract_scan_with_init_facts(
return Ok(Some(ScanWithInitFacts {
loop_var: idx_var.clone(),
haystack: haystack_var.clone(),
haystack: haystack_var,
needle: needle.clone(),
step_lit: 1,
step_lit,
}));
}
@ -843,6 +878,70 @@ mod tests {
assert!(facts.is_some());
}
#[test]
fn loopfacts_ok_some_for_reverse_scan_with_init_minimal() {
let condition = ASTNode::BinaryOp {
operator: BinaryOperator::GreaterEqual,
left: Box::new(v("i")),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(0),
span: Span::unknown(),
}),
span: Span::unknown(),
};
let if_stmt = ASTNode::If {
condition: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(ASTNode::MethodCall {
object: Box::new(v("s")),
method: "substring".to_string(),
arguments: vec![
v("i"),
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left: Box::new(v("i")),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
right: Box::new(v("ch")),
span: Span::unknown(),
}),
then_body: vec![ASTNode::Return {
value: Some(Box::new(v("i"))),
span: Span::unknown(),
}],
else_body: None,
span: Span::unknown(),
};
let step = ASTNode::Assignment {
target: Box::new(v("i")),
value: Box::new(ASTNode::BinaryOp {
operator: BinaryOperator::Subtract,
left: Box::new(v("i")),
right: Box::new(ASTNode::Literal {
value: LiteralValue::Integer(1),
span: Span::unknown(),
}),
span: Span::unknown(),
}),
span: Span::unknown(),
};
let facts = try_build_loop_facts(&condition, &[if_stmt, step])
.expect("Ok")
.expect("Some");
let scan = facts.scan_with_init.expect("scan facts");
assert_eq!(scan.loop_var, "i");
assert_eq!(scan.haystack, "s");
assert_eq!(scan.step_lit, -1);
}
#[test]
fn loopfacts_ctx_skips_pattern1_when_kind_mismatch() {
let condition = ASTNode::BinaryOp {

View File

@ -21,6 +21,9 @@ pub(in crate::mir::builder) enum ConditionShape {
haystack_var: String,
method: LengthMethod,
},
VarGreaterEqualZero {
idx_var: String,
},
Unknown,
}

View File

@ -190,6 +190,11 @@ fn push_scan_with_init(candidates: &mut CandidateSet, facts: &CanonicalLoopFacts
let Some(scan) = &facts.facts.scan_with_init else {
return;
};
let scan_direction = match scan.step_lit {
1 => ScanDirection::Forward,
-1 => ScanDirection::Reverse,
_ => return,
};
candidates.push(PlanCandidate {
plan: DomainPlan::ScanWithInit(ScanWithInitPlan {
loop_var: scan.loop_var.clone(),
@ -201,7 +206,7 @@ fn push_scan_with_init(candidates: &mut CandidateSet, facts: &CanonicalLoopFacts
span: crate::ast::Span::unknown(),
},
not_found_return_lit: -1,
scan_direction: ScanDirection::Forward,
scan_direction,
dynamic_needle: false,
}),
rule: "loop/scan_with_init",
@ -466,7 +471,9 @@ mod tests {
SkeletonFacts, SkeletonKind,
};
use crate::mir::builder::control_flow::plan::normalize::canonicalize_loop_facts;
use crate::mir::builder::control_flow::plan::{Pattern2PromotionHint, Pattern5ExitKind};
use crate::mir::builder::control_flow::plan::{
Pattern2PromotionHint, Pattern5ExitKind, ScanDirection,
};
use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span};
use std::collections::{BTreeMap, BTreeSet};
@ -620,6 +627,45 @@ mod tests {
}
}
#[test]
fn planner_sets_reverse_scan_direction_for_negative_step() {
let facts = LoopFacts {
condition_shape: ConditionShape::Unknown,
step_shape: StepShape::Unknown,
skeleton: SkeletonFacts {
kind: SkeletonKind::Loop,
},
features: LoopFeatureFacts::default(),
scan_with_init: Some(ScanWithInitFacts {
loop_var: "i".to_string(),
haystack: "s".to_string(),
needle: "ch".to_string(),
step_lit: -1,
}),
split_scan: None,
pattern1_simplewhile: None,
pattern1_char_map: None,
pattern1_array_join: None,
pattern3_ifphi: None,
pattern4_continue: None,
pattern5_infinite_early_exit: None,
pattern8_bool_predicate_scan: None,
pattern9_accum_const_loop: None,
pattern2_break: None,
pattern2_loopbodylocal: None,
pattern6_nested_minimal: None,
};
let canonical = canonicalize_loop_facts(facts);
let plan = build_plan_from_facts(canonical).expect("Ok");
match plan {
Some(DomainPlan::ScanWithInit(plan)) => {
assert_eq!(plan.step_lit, -1);
assert_eq!(plan.scan_direction, ScanDirection::Reverse);
}
other => panic!("expected scan_with_init plan, got {:?}", other),
}
}
#[test]
fn planner_ignores_skeleton_and_feature_staging() {
let facts = LoopFacts {

View File

@ -28,6 +28,10 @@ run_filter "pattern1_strict_shadow_vm" "phase29ao_pattern1_strict_shadow_vm" ||
run_filter "pattern1_subset_reject_extra_stmt_vm" "phase29ao_pattern1_subset_reject_extra_stmt_vm" || exit 1
run_filter "pattern1_stringutils_tolower_vm" "phase29ap_stringutils_tolower_vm" || exit 1
run_filter "pattern1_stringutils_join_vm" "phase29ap_stringutils_join_vm" || exit 1
run_filter "string_index_of_vm" "phase29aq_string_index_of_min_vm" || exit 1
run_filter "string_last_index_of_vm" "phase29aq_string_last_index_of_min_vm" || exit 1
run_filter "string_parse_integer_vm" "phase29aq_string_parse_integer_min_vm" || exit 1
run_filter "string_split_vm" "phase29aq_string_split_min_vm" || exit 1
run_filter "pattern5_break_vm" "phase286_pattern5_break_vm" || exit 1
run_filter "pattern5_strict_shadow_vm" "phase29ao_pattern5_strict_shadow_vm" || exit 1
run_filter "pattern5_release_adopt_vm" "phase29ao_pattern5_release_adopt_vm" || exit 1

View File

@ -0,0 +1,17 @@
#!/bin/bash
# phase29aq_string_index_of_min_vm.sh - StringUtils.index_of via plan/composer (VM)
source "$(dirname "$0")/../../../lib/test_runner.sh"
require_env || exit 2
FIXTURE="$NYASH_ROOT/apps/tests/phase29aq_string_index_of_min.hako"
export NYASH_ALLOW_USING_FILE=1
output=$(run_nyash_vm "$FIXTURE")
expected=$(cat << 'TXT'
2
TXT
)
compare_outputs "$expected" "$output" "phase29aq_string_index_of_min_vm" || exit 1

View File

@ -0,0 +1,17 @@
#!/bin/bash
# phase29aq_string_last_index_of_min_vm.sh - StringUtils.last_index_of via plan/composer (VM)
source "$(dirname "$0")/../../../lib/test_runner.sh"
require_env || exit 2
FIXTURE="$NYASH_ROOT/apps/tests/phase29aq_string_last_index_of_min.hako"
export NYASH_ALLOW_USING_FILE=1
output=$(run_nyash_vm "$FIXTURE")
expected=$(cat << 'TXT'
3
TXT
)
compare_outputs "$expected" "$output" "phase29aq_string_last_index_of_min_vm" || exit 1

View File

@ -0,0 +1,17 @@
#!/bin/bash
# phase29aq_string_parse_integer_min_vm.sh - StringUtils.parse_integer via plan/composer (VM)
source "$(dirname "$0")/../../../lib/test_runner.sh"
require_env || exit 2
FIXTURE="$NYASH_ROOT/apps/tests/phase29aq_string_parse_integer_min.hako"
export NYASH_ALLOW_USING_FILE=1
output=$(run_nyash_vm "$FIXTURE")
expected=$(cat << 'TXT'
12345
TXT
)
compare_outputs "$expected" "$output" "phase29aq_string_parse_integer_min_vm" || exit 1

View File

@ -0,0 +1,17 @@
#!/bin/bash
# phase29aq_string_split_min_vm.sh - StringUtils.split via plan/composer (VM)
source "$(dirname "$0")/../../../lib/test_runner.sh"
require_env || exit 2
FIXTURE="$NYASH_ROOT/apps/tests/phase29aq_string_split_min.hako"
export NYASH_ALLOW_USING_FILE=1
output=$(run_nyash_vm "$FIXTURE")
expected=$(cat << 'TXT'
3
TXT
)
compare_outputs "$expected" "$output" "phase29aq_string_split_min_vm" || exit 1