feat(joinir): Phase 258 P0 dynamic needle window scan

This commit is contained in:
2025-12-21 00:29:50 +09:00
parent 73ddc5f58d
commit 23531bf643
5 changed files with 361 additions and 35 deletions

View File

@ -0,0 +1,40 @@
// Phase 258 P0: index_of_string minimal test (実物と同形)
// Target: StringUtils.index_of_string(s, substr)
//
// Loop form (matches actual implementation):
// local i = 0
// loop(i <= s.length() - substr.length()) {
// if s.substring(i, i + substr.length()) == substr { return i }
// i = i + 1
// }
// return -1
//
// Expected JoinIR (Phase 258 P0 dynamic needle):
// - needle_len = substr.length() // Dynamic calculation
// - bound = len - needle_len
// - exit_cond = (i > bound) // Not found case
// - window = s.substring(i, i + needle_len) // Dynamic window
// - if window == substr { return i } // Found case
// - i = i + 1 // Step
static box StringUtils {
index_of_string(s, substr) {
local i
i = 0
loop(i <= s.length() - substr.length()) {
if s.substring(i, i + substr.length()) == substr {
return i
}
i = i + 1
}
return -1
}
}
static box Main {
main() {
local result
result = StringUtils.index_of_string("hello world", "world")
return result // Expected: 6 (index of "world" in "hello world")
}
}

View File

@ -67,6 +67,8 @@ struct ScanParts {
not_found_return_lit: i64,
/// Scan direction (Phase 257 P0)
scan_direction: ScanDirection,
/// Phase 258 P0: True if dynamic needle (substr.length()), false if fixed (ch)
dynamic_needle: bool,
}
/// Phase 254 P0: Detection for Pattern 6 (ScanWithInit)
@ -146,6 +148,77 @@ fn is_const_step_pattern(value: &ASTNode) -> bool {
}
}
/// Phase 258 P0: Extract and validate substring window arguments
///
/// Checks if the substring call uses a dynamic window or fixed window:
/// - Fixed: `substring(i, i + 1)` → returns `false`
/// - Dynamic: `substring(i, i + substr.length())` → returns `true`
///
/// # Arguments
///
/// * `substring_call` - The MethodCall AST node for substring()
/// * `loop_var` - The loop index variable name (e.g., "i")
///
/// # Returns
///
/// * `Ok(true)` - Dynamic needle (variable.length())
/// * `Ok(false)` - Fixed needle (literal 1)
/// * `Err(String)` - Invalid substring pattern (not this pattern)
fn extract_substring_window(
substring_call: &ASTNode,
loop_var: &str,
) -> Result<bool, String> {
use crate::ast::{BinaryOperator, LiteralValue};
// Extract arguments from substring(start, end)
let args = match substring_call {
ASTNode::MethodCall { method, arguments, .. } if method == "substring" => arguments,
_ => return Err("Not a substring call".to_string()),
};
if args.len() != 2 {
return Err(format!("substring expects 2 args, got {}", args.len()));
}
// Check arg[0] is loop_var
match &args[0] {
ASTNode::Variable { name, .. } if name == loop_var => {}
_ => return Err("substring start must be loop_var".to_string()),
}
// Check arg[1] is loop_var + <expr>
match &args[1] {
ASTNode::BinaryOp {
operator: BinaryOperator::Add,
left,
right,
..
} => {
// Left must be loop_var
match left.as_ref() {
ASTNode::Variable { name, .. } if name == loop_var => {}
_ => return Err("substring end must be loop_var + <expr>".to_string()),
}
// Right determines mode
match right.as_ref() {
// Fixed: substring(i, i + 1)
ASTNode::Literal {
value: LiteralValue::Integer(1),
..
} => Ok(false), // Fixed window (ch)
// Dynamic: substring(i, i + substr.length())
ASTNode::MethodCall { method, .. } if method == "length" => Ok(true), // Dynamic window (substr)
// Other patterns not supported
_ => Err("substring window must be 1 or variable.length()".to_string()),
}
}
_ => Err("substring end must be loop_var + <expr>".to_string()),
}
}
/// Phase 254 P1: Extract scan-with-init pattern parts from loop AST
///
/// This function analyzes the loop structure and extracts all necessary information
@ -177,8 +250,9 @@ fn extract_scan_with_init_parts(
use crate::ast::{BinaryOperator, LiteralValue};
// 1. Check loop condition: i < s.length() (forward) or i >= 0 (reverse)
// Phase 258 P0: Also accept i <= s.length() - substr.length() (dynamic needle)
let (loop_var, haystack_opt, scan_direction) = match condition {
// Forward: i < s.length()
// Forward (Fixed): i < s.length()
ASTNode::BinaryOp {
operator: BinaryOperator::Less,
left,
@ -202,6 +276,52 @@ fn extract_scan_with_init_parts(
(loop_var, Some(haystack), ScanDirection::Forward)
}
// Forward (Dynamic): i <= s.length() - substr.length()
// Phase 258 P0: Accept dynamic needle form for index_of_string
ASTNode::BinaryOp {
operator: BinaryOperator::LessEqual,
left,
right,
..
} => {
let loop_var = match left.as_ref() {
ASTNode::Variable { name, .. } => name.clone(),
_ => return Ok(None),
};
// Right side must be: s.length() - substr.length()
let haystack = match right.as_ref() {
ASTNode::BinaryOp {
operator: BinaryOperator::Subtract,
left: sub_left,
right: sub_right,
..
} => {
// Left of subtraction: s.length()
let haystack = match sub_left.as_ref() {
ASTNode::MethodCall {
object, method, ..
} if method == "length" => match object.as_ref() {
ASTNode::Variable { name, .. } => name.clone(),
_ => return Ok(None),
},
_ => return Ok(None),
};
// Right of subtraction: substr.length()
match sub_right.as_ref() {
ASTNode::MethodCall { method, .. } if method == "length" => {
// Valid: s.length() - substr.length()
haystack
}
_ => return Ok(None),
}
}
_ => return Ok(None),
};
(loop_var, Some(haystack), ScanDirection::Forward)
}
// Reverse: i >= 0
ASTNode::BinaryOp {
operator: BinaryOperator::GreaterEqual,
@ -234,6 +354,7 @@ fn extract_scan_with_init_parts(
let mut needle_opt = None;
let mut early_return_expr_opt = None;
let mut haystack_from_substring_opt = None;
let mut dynamic_needle_opt: Option<bool> = None; // Phase 258 P0: Track window mode
for stmt in body {
if let ASTNode::If {
@ -278,6 +399,17 @@ fn extract_scan_with_init_parts(
}
}
// Phase 258 P0: Validate substring arguments and extract window mode
match extract_substring_window(substring_side, &loop_var) {
Ok(dynamic_needle) => {
dynamic_needle_opt = Some(dynamic_needle);
}
Err(_) => {
// Not a valid substring pattern, fall through
continue;
}
}
if let ASTNode::Variable { name: needle_name, .. } = needle_side {
// Check then_body contains return loop_var
if then_body.len() == 1 {
@ -364,6 +496,9 @@ fn extract_scan_with_init_parts(
// 4. P0: not-found return must be -1 (hardcoded for now)
let not_found_return_lit = -1;
// Phase 258 P0: Extract dynamic_needle (default to false for backward compat)
let dynamic_needle = dynamic_needle_opt.unwrap_or(false);
Ok(Some(ScanParts {
loop_var,
haystack,
@ -372,6 +507,7 @@ fn extract_scan_with_init_parts(
early_return_expr,
not_found_return_lit,
scan_direction,
dynamic_needle,
}))
}
@ -474,10 +610,12 @@ impl MirBuilder {
let join_module = match parts.scan_direction {
ScanDirection::Forward => {
use crate::mir::join_ir::lowering::scan_with_init_minimal::lower_scan_with_init_minimal;
lower_scan_with_init_minimal(&mut join_value_space)
// Phase 258 P0: Pass dynamic_needle to forward lowerer
lower_scan_with_init_minimal(&mut join_value_space, parts.dynamic_needle)
}
ScanDirection::Reverse => {
use crate::mir::join_ir::lowering::scan_with_init_reverse::lower_scan_with_init_reverse;
// P0: Reverse lowerer does not support dynamic needle yet
lower_scan_with_init_reverse(&mut join_value_space)
}
};
@ -494,12 +632,13 @@ impl MirBuilder {
);
// Phase 255 P2: Create loop_invariants for ch and s
// CRITICAL: Order MUST match JoinModule loop_step params: [i, ch, s]
// CRITICAL: Order MUST match JoinModule loop_step params: [i, needle, haystack]
// carrier_order is built as: [loop_var] + loop_invariants
// So loop_invariants order determines param-to-PHI mapping for invariants!
// Phase 258 P0: In both fixed and dynamic modes, order is [needle, haystack]
let loop_invariants = vec![
(parts.needle.clone(), ch_host), // ch: needle (JoinIR param 1)
(parts.haystack.clone(), s_host), // s: haystack (JoinIR param 2)
(parts.needle.clone(), ch_host), // needle (ch or substr) → JoinIR param 1
(parts.haystack.clone(), s_host), // haystack (s) → JoinIR param 2
];
if debug {

View File

@ -93,6 +93,7 @@ use crate::mir::join_ir::{
/// * `JoinModule` - Successfully lowered to JoinIR
pub(crate) fn lower_scan_with_init_minimal(
join_value_space: &mut JoinValueSpace,
dynamic_needle: bool, // Phase 258 P0: true if substr.length(), false if fixed (ch)
) -> JoinModule {
let mut join_module = JoinModule::new();
@ -127,6 +128,16 @@ pub(crate) fn lower_scan_with_init_minimal(
let cur = join_value_space.alloc_local(); // substring result
let match_cond = join_value_space.alloc_local(); // cur == ch
// Phase 258 P0: Conditional allocation for dynamic needle
let (needle_len, bound) = if dynamic_needle {
(
Some(join_value_space.alloc_local()), // substr.length()
Some(join_value_space.alloc_local()), // len - needle_len
)
} else {
(None, None)
};
// k_exit params
let i_exit_param = join_value_space.alloc_param(); // exit parameter (index or -1)
@ -172,17 +183,102 @@ pub(crate) fn lower_scan_with_init_minimal(
args: vec![s_step_param],
}));
// 2. exit_cond = (i >= len)
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: exit_cond,
op: CompareOp::Ge,
lhs: i_step_param,
rhs: len,
}));
// Phase 258 P0: Dynamic needle support - compute exit condition and window size
let (exit_cond_rhs, i_plus_N) = if dynamic_needle {
// 2a. needle_len = substr.length()
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BoxCall {
dst: Some(needle_len.unwrap()),
box_name: "StringBox".to_string(),
method: "length".to_string(),
args: vec![ch_step_param], // ch_step_param is actually substr in dynamic mode
}));
// 3. const -1
// 2b. bound = len - needle_len
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: bound.unwrap(),
op: BinOpKind::Sub,
lhs: len,
rhs: needle_len.unwrap(),
}));
// 2c. const 1 (for loop increment)
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_1,
value: ConstValue::Integer(1),
}));
// 2d. i_plus_1 = i + 1 (loop increment)
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: i_plus_1,
op: BinOpKind::Add,
lhs: i_step_param,
rhs: const_1,
}));
// 2e. i_plus_needle_len = i + needle_len (substring window end)
let i_plus_N_id = join_value_space.alloc_local();
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: i_plus_N_id,
op: BinOpKind::Add,
lhs: i_step_param,
rhs: needle_len.unwrap(),
}));
(bound.unwrap(), i_plus_N_id)
} else {
// 2a. Fixed: const 1
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_1,
value: ConstValue::Integer(1),
}));
// 2b. i_plus_1 = i + 1 (both loop increment and substring window end)
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: i_plus_1,
op: BinOpKind::Add,
lhs: i_step_param,
rhs: const_1,
}));
(len, i_plus_1) // exit_cond = (i >= len)
};
// 3. exit_cond comparison (dynamic: i > bound, fixed: i >= len)
if dynamic_needle {
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: exit_cond,
op: CompareOp::Gt,
lhs: i_step_param,
rhs: exit_cond_rhs,
}));
} else {
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: exit_cond,
op: CompareOp::Ge,
lhs: i_step_param,
rhs: exit_cond_rhs,
}));
}
// 4. const -1
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
@ -190,38 +286,21 @@ pub(crate) fn lower_scan_with_init_minimal(
value: ConstValue::Integer(-1),
}));
// 4. Jump(k_exit, [-1], cond=exit_cond) - not found case
// 5. Jump(k_exit, [-1], cond=exit_cond) - not found case
loop_step_func.body.push(JoinInst::Jump {
cont: k_exit_id.as_cont(),
args: vec![const_minus_1],
cond: Some(exit_cond),
});
// 5. i_plus_1 = i + 1
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_1,
value: ConstValue::Integer(1),
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: i_plus_1,
op: BinOpKind::Add,
lhs: i_step_param,
rhs: const_1,
}));
// 6. cur = s.substring(i, i_plus_1) - init-time BoxCall
// 6. cur = s.substring(i, i_plus_N) - dynamic window
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BoxCall {
dst: Some(cur),
box_name: "StringBox".to_string(),
method: "substring".to_string(),
args: vec![s_step_param, i_step_param, i_plus_1],
args: vec![s_step_param, i_step_param, i_plus_N],
}));
// 7. match_cond = (cur == ch)

View File

@ -0,0 +1,19 @@
#!/bin/bash
# Phase 258 P0: index_of_string (dynamic needle) - LLVM backend
set -euo pipefail
HAKO_PATH="apps/tests/phase258_p0_index_of_string_min.hako"
# Test: "hello world".index_of_string("world") → 6
EXPECTED_EXIT=6
NYASH_LLVM_USE_HARNESS=1 $HAKORUNE_BIN --backend llvm "$HAKO_PATH"
actual_exit=$?
if [[ $actual_exit -eq $EXPECTED_EXIT ]]; then
echo "✅ phase258_p0_index_of_string_llvm_exe: PASS (exit=$actual_exit)"
exit 0
else
echo "❌ phase258_p0_index_of_string_llvm_exe: FAIL (expected=$EXPECTED_EXIT, got=$actual_exit)"
exit 1
fi

View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
# Phase 258 P0: index_of_string pattern (dynamic needle) - VM
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../../../../.." && pwd)"
HAKORUNE_BIN="${HAKORUNE_BIN:-$PROJECT_ROOT/target/release/hakorune}"
HAKO_PATH="$PROJECT_ROOT/apps/tests/phase258_p0_index_of_string_min.hako"
echo "[INFO] Environment check passed"
echo "[INFO] Plugin mode: dynamic"
echo "[INFO] Dynamic plugins check passed"
# Phase 258 P0: Step 1 - Add --verify flag (fail-fast on MIR errors)
set +e
VERIFY_OUTPUT=$("$HAKORUNE_BIN" --backend vm --verify "$HAKO_PATH" 2>&1)
VERIFY_EXIT=$?
set -e
if [ "$VERIFY_EXIT" -ne 0 ]; then
echo "❌ phase258_p0_index_of_string_vm: FAIL (MIR verification failed)"
echo "$VERIFY_OUTPUT"
exit 1
fi
# Phase 258 P0: Step 2 - Run VM with error detection
set +e
OUTPUT=$("$HAKORUNE_BIN" --backend vm "$HAKO_PATH" 2>&1)
EXIT_CODE=$?
set -e
# Check for VM errors in output (regardless of exit code)
if echo "$OUTPUT" | grep -Ei "error|panic|undefined|phi pred mismatch"; then
echo "❌ phase258_p0_index_of_string_vm: FAIL (VM runtime error detected)"
echo "$OUTPUT"
exit 1
fi
# Validate expected exit code (now safe - we've ruled out errors)
# Expected: 6 (index of "world" in "hello world")
EXPECTED_EXIT=6
if [ "$EXIT_CODE" -eq "$EXPECTED_EXIT" ]; then
echo "✅ phase258_p0_index_of_string_vm: PASS (exit=$EXIT_CODE, no errors)"
exit 0
else
echo "❌ phase258_p0_index_of_string_vm: FAIL (exit=$EXIT_CODE, expected $EXPECTED_EXIT)"
echo "$OUTPUT"
exit 1
fi