diff --git a/apps/tests/phase258_p0_index_of_string_min.hako b/apps/tests/phase258_p0_index_of_string_min.hako new file mode 100644 index 00000000..142df56b --- /dev/null +++ b/apps/tests/phase258_p0_index_of_string_min.hako @@ -0,0 +1,40 @@ +// Phase 258 P0: index_of_string minimal test (実物と同形) +// Target: StringUtils.index_of_string(s, substr) +// +// Loop form (matches actual implementation): +// local i = 0 +// loop(i <= s.length() - substr.length()) { +// if s.substring(i, i + substr.length()) == substr { return i } +// i = i + 1 +// } +// return -1 +// +// Expected JoinIR (Phase 258 P0 dynamic needle): +// - needle_len = substr.length() // Dynamic calculation +// - bound = len - needle_len +// - exit_cond = (i > bound) // Not found case +// - window = s.substring(i, i + needle_len) // Dynamic window +// - if window == substr { return i } // Found case +// - i = i + 1 // Step + +static box StringUtils { + index_of_string(s, substr) { + local i + i = 0 + loop(i <= s.length() - substr.length()) { + if s.substring(i, i + substr.length()) == substr { + return i + } + i = i + 1 + } + return -1 + } +} + +static box Main { + main() { + local result + result = StringUtils.index_of_string("hello world", "world") + return result // Expected: 6 (index of "world" in "hello world") + } +} diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs b/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs index 99990675..d3e8f9f8 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs @@ -67,6 +67,8 @@ struct ScanParts { not_found_return_lit: i64, /// Scan direction (Phase 257 P0) scan_direction: ScanDirection, + /// Phase 258 P0: True if dynamic needle (substr.length()), false if fixed (ch) + dynamic_needle: bool, } /// Phase 254 P0: Detection for Pattern 6 (ScanWithInit) @@ -146,6 +148,77 @@ fn is_const_step_pattern(value: &ASTNode) -> bool { } } +/// Phase 258 P0: Extract and validate substring window arguments +/// +/// Checks if the substring call uses a dynamic window or fixed window: +/// - Fixed: `substring(i, i + 1)` → returns `false` +/// - Dynamic: `substring(i, i + substr.length())` → returns `true` +/// +/// # Arguments +/// +/// * `substring_call` - The MethodCall AST node for substring() +/// * `loop_var` - The loop index variable name (e.g., "i") +/// +/// # Returns +/// +/// * `Ok(true)` - Dynamic needle (variable.length()) +/// * `Ok(false)` - Fixed needle (literal 1) +/// * `Err(String)` - Invalid substring pattern (not this pattern) +fn extract_substring_window( + substring_call: &ASTNode, + loop_var: &str, +) -> Result { + use crate::ast::{BinaryOperator, LiteralValue}; + + // Extract arguments from substring(start, end) + let args = match substring_call { + ASTNode::MethodCall { method, arguments, .. } if method == "substring" => arguments, + _ => return Err("Not a substring call".to_string()), + }; + + if args.len() != 2 { + return Err(format!("substring expects 2 args, got {}", args.len())); + } + + // Check arg[0] is loop_var + match &args[0] { + ASTNode::Variable { name, .. } if name == loop_var => {} + _ => return Err("substring start must be loop_var".to_string()), + } + + // Check arg[1] is loop_var + + match &args[1] { + ASTNode::BinaryOp { + operator: BinaryOperator::Add, + left, + right, + .. + } => { + // Left must be loop_var + match left.as_ref() { + ASTNode::Variable { name, .. } if name == loop_var => {} + _ => return Err("substring end must be loop_var + ".to_string()), + } + + // Right determines mode + match right.as_ref() { + // Fixed: substring(i, i + 1) + ASTNode::Literal { + value: LiteralValue::Integer(1), + .. + } => Ok(false), // Fixed window (ch) + + // Dynamic: substring(i, i + substr.length()) + ASTNode::MethodCall { method, .. } if method == "length" => Ok(true), // Dynamic window (substr) + + // Other patterns not supported + _ => Err("substring window must be 1 or variable.length()".to_string()), + } + } + _ => Err("substring end must be loop_var + ".to_string()), + } +} + /// Phase 254 P1: Extract scan-with-init pattern parts from loop AST /// /// This function analyzes the loop structure and extracts all necessary information @@ -177,8 +250,9 @@ fn extract_scan_with_init_parts( use crate::ast::{BinaryOperator, LiteralValue}; // 1. Check loop condition: i < s.length() (forward) or i >= 0 (reverse) + // Phase 258 P0: Also accept i <= s.length() - substr.length() (dynamic needle) let (loop_var, haystack_opt, scan_direction) = match condition { - // Forward: i < s.length() + // Forward (Fixed): i < s.length() ASTNode::BinaryOp { operator: BinaryOperator::Less, left, @@ -202,6 +276,52 @@ fn extract_scan_with_init_parts( (loop_var, Some(haystack), ScanDirection::Forward) } + // Forward (Dynamic): i <= s.length() - substr.length() + // Phase 258 P0: Accept dynamic needle form for index_of_string + ASTNode::BinaryOp { + operator: BinaryOperator::LessEqual, + left, + right, + .. + } => { + let loop_var = match left.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return Ok(None), + }; + + // Right side must be: s.length() - substr.length() + let haystack = match right.as_ref() { + ASTNode::BinaryOp { + operator: BinaryOperator::Subtract, + left: sub_left, + right: sub_right, + .. + } => { + // Left of subtraction: s.length() + let haystack = match sub_left.as_ref() { + ASTNode::MethodCall { + object, method, .. + } if method == "length" => match object.as_ref() { + ASTNode::Variable { name, .. } => name.clone(), + _ => return Ok(None), + }, + _ => return Ok(None), + }; + + // Right of subtraction: substr.length() + match sub_right.as_ref() { + ASTNode::MethodCall { method, .. } if method == "length" => { + // Valid: s.length() - substr.length() + haystack + } + _ => return Ok(None), + } + } + _ => return Ok(None), + }; + + (loop_var, Some(haystack), ScanDirection::Forward) + } // Reverse: i >= 0 ASTNode::BinaryOp { operator: BinaryOperator::GreaterEqual, @@ -234,6 +354,7 @@ fn extract_scan_with_init_parts( let mut needle_opt = None; let mut early_return_expr_opt = None; let mut haystack_from_substring_opt = None; + let mut dynamic_needle_opt: Option = None; // Phase 258 P0: Track window mode for stmt in body { if let ASTNode::If { @@ -278,6 +399,17 @@ fn extract_scan_with_init_parts( } } + // Phase 258 P0: Validate substring arguments and extract window mode + match extract_substring_window(substring_side, &loop_var) { + Ok(dynamic_needle) => { + dynamic_needle_opt = Some(dynamic_needle); + } + Err(_) => { + // Not a valid substring pattern, fall through + continue; + } + } + if let ASTNode::Variable { name: needle_name, .. } = needle_side { // Check then_body contains return loop_var if then_body.len() == 1 { @@ -364,6 +496,9 @@ fn extract_scan_with_init_parts( // 4. P0: not-found return must be -1 (hardcoded for now) let not_found_return_lit = -1; + // Phase 258 P0: Extract dynamic_needle (default to false for backward compat) + let dynamic_needle = dynamic_needle_opt.unwrap_or(false); + Ok(Some(ScanParts { loop_var, haystack, @@ -372,6 +507,7 @@ fn extract_scan_with_init_parts( early_return_expr, not_found_return_lit, scan_direction, + dynamic_needle, })) } @@ -474,10 +610,12 @@ impl MirBuilder { let join_module = match parts.scan_direction { ScanDirection::Forward => { use crate::mir::join_ir::lowering::scan_with_init_minimal::lower_scan_with_init_minimal; - lower_scan_with_init_minimal(&mut join_value_space) + // Phase 258 P0: Pass dynamic_needle to forward lowerer + lower_scan_with_init_minimal(&mut join_value_space, parts.dynamic_needle) } ScanDirection::Reverse => { use crate::mir::join_ir::lowering::scan_with_init_reverse::lower_scan_with_init_reverse; + // P0: Reverse lowerer does not support dynamic needle yet lower_scan_with_init_reverse(&mut join_value_space) } }; @@ -494,12 +632,13 @@ impl MirBuilder { ); // Phase 255 P2: Create loop_invariants for ch and s - // CRITICAL: Order MUST match JoinModule loop_step params: [i, ch, s] + // CRITICAL: Order MUST match JoinModule loop_step params: [i, needle, haystack] // carrier_order is built as: [loop_var] + loop_invariants // So loop_invariants order determines param-to-PHI mapping for invariants! + // Phase 258 P0: In both fixed and dynamic modes, order is [needle, haystack] let loop_invariants = vec![ - (parts.needle.clone(), ch_host), // ch: needle (JoinIR param 1) - (parts.haystack.clone(), s_host), // s: haystack (JoinIR param 2) + (parts.needle.clone(), ch_host), // needle (ch or substr) → JoinIR param 1 + (parts.haystack.clone(), s_host), // haystack (s) → JoinIR param 2 ]; if debug { diff --git a/src/mir/join_ir/lowering/scan_with_init_minimal.rs b/src/mir/join_ir/lowering/scan_with_init_minimal.rs index 99803c2d..c2a69082 100644 --- a/src/mir/join_ir/lowering/scan_with_init_minimal.rs +++ b/src/mir/join_ir/lowering/scan_with_init_minimal.rs @@ -93,6 +93,7 @@ use crate::mir::join_ir::{ /// * `JoinModule` - Successfully lowered to JoinIR pub(crate) fn lower_scan_with_init_minimal( join_value_space: &mut JoinValueSpace, + dynamic_needle: bool, // Phase 258 P0: true if substr.length(), false if fixed (ch) ) -> JoinModule { let mut join_module = JoinModule::new(); @@ -127,6 +128,16 @@ pub(crate) fn lower_scan_with_init_minimal( let cur = join_value_space.alloc_local(); // substring result let match_cond = join_value_space.alloc_local(); // cur == ch + // Phase 258 P0: Conditional allocation for dynamic needle + let (needle_len, bound) = if dynamic_needle { + ( + Some(join_value_space.alloc_local()), // substr.length() + Some(join_value_space.alloc_local()), // len - needle_len + ) + } else { + (None, None) + }; + // k_exit params let i_exit_param = join_value_space.alloc_param(); // exit parameter (index or -1) @@ -172,17 +183,102 @@ pub(crate) fn lower_scan_with_init_minimal( args: vec![s_step_param], })); - // 2. exit_cond = (i >= len) - loop_step_func - .body - .push(JoinInst::Compute(MirLikeInst::Compare { - dst: exit_cond, - op: CompareOp::Ge, - lhs: i_step_param, - rhs: len, - })); + // Phase 258 P0: Dynamic needle support - compute exit condition and window size + let (exit_cond_rhs, i_plus_N) = if dynamic_needle { + // 2a. needle_len = substr.length() + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::BoxCall { + dst: Some(needle_len.unwrap()), + box_name: "StringBox".to_string(), + method: "length".to_string(), + args: vec![ch_step_param], // ch_step_param is actually substr in dynamic mode + })); - // 3. const -1 + // 2b. bound = len - needle_len + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::BinOp { + dst: bound.unwrap(), + op: BinOpKind::Sub, + lhs: len, + rhs: needle_len.unwrap(), + })); + + // 2c. const 1 (for loop increment) + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::Const { + dst: const_1, + value: ConstValue::Integer(1), + })); + + // 2d. i_plus_1 = i + 1 (loop increment) + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::BinOp { + dst: i_plus_1, + op: BinOpKind::Add, + lhs: i_step_param, + rhs: const_1, + })); + + // 2e. i_plus_needle_len = i + needle_len (substring window end) + let i_plus_N_id = join_value_space.alloc_local(); + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::BinOp { + dst: i_plus_N_id, + op: BinOpKind::Add, + lhs: i_step_param, + rhs: needle_len.unwrap(), + })); + + (bound.unwrap(), i_plus_N_id) + } else { + // 2a. Fixed: const 1 + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::Const { + dst: const_1, + value: ConstValue::Integer(1), + })); + + // 2b. i_plus_1 = i + 1 (both loop increment and substring window end) + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::BinOp { + dst: i_plus_1, + op: BinOpKind::Add, + lhs: i_step_param, + rhs: const_1, + })); + + (len, i_plus_1) // exit_cond = (i >= len) + }; + + // 3. exit_cond comparison (dynamic: i > bound, fixed: i >= len) + if dynamic_needle { + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::Compare { + dst: exit_cond, + op: CompareOp::Gt, + lhs: i_step_param, + rhs: exit_cond_rhs, + })); + } else { + loop_step_func + .body + .push(JoinInst::Compute(MirLikeInst::Compare { + dst: exit_cond, + op: CompareOp::Ge, + lhs: i_step_param, + rhs: exit_cond_rhs, + })); + } + + // 4. const -1 loop_step_func .body .push(JoinInst::Compute(MirLikeInst::Const { @@ -190,38 +286,21 @@ pub(crate) fn lower_scan_with_init_minimal( value: ConstValue::Integer(-1), })); - // 4. Jump(k_exit, [-1], cond=exit_cond) - not found case + // 5. Jump(k_exit, [-1], cond=exit_cond) - not found case loop_step_func.body.push(JoinInst::Jump { cont: k_exit_id.as_cont(), args: vec![const_minus_1], cond: Some(exit_cond), }); - // 5. i_plus_1 = i + 1 - loop_step_func - .body - .push(JoinInst::Compute(MirLikeInst::Const { - dst: const_1, - value: ConstValue::Integer(1), - })); - - loop_step_func - .body - .push(JoinInst::Compute(MirLikeInst::BinOp { - dst: i_plus_1, - op: BinOpKind::Add, - lhs: i_step_param, - rhs: const_1, - })); - - // 6. cur = s.substring(i, i_plus_1) - init-time BoxCall + // 6. cur = s.substring(i, i_plus_N) - dynamic window loop_step_func .body .push(JoinInst::Compute(MirLikeInst::BoxCall { dst: Some(cur), box_name: "StringBox".to_string(), method: "substring".to_string(), - args: vec![s_step_param, i_step_param, i_plus_1], + args: vec![s_step_param, i_step_param, i_plus_N], })); // 7. match_cond = (cur == ch) diff --git a/tools/smokes/v2/profiles/integration/apps/phase258_p0_index_of_string_llvm_exe.sh b/tools/smokes/v2/profiles/integration/apps/phase258_p0_index_of_string_llvm_exe.sh new file mode 100644 index 00000000..e941e576 --- /dev/null +++ b/tools/smokes/v2/profiles/integration/apps/phase258_p0_index_of_string_llvm_exe.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Phase 258 P0: index_of_string (dynamic needle) - LLVM backend +set -euo pipefail + +HAKO_PATH="apps/tests/phase258_p0_index_of_string_min.hako" + +# Test: "hello world".index_of_string("world") → 6 +EXPECTED_EXIT=6 + +NYASH_LLVM_USE_HARNESS=1 $HAKORUNE_BIN --backend llvm "$HAKO_PATH" +actual_exit=$? + +if [[ $actual_exit -eq $EXPECTED_EXIT ]]; then + echo "✅ phase258_p0_index_of_string_llvm_exe: PASS (exit=$actual_exit)" + exit 0 +else + echo "❌ phase258_p0_index_of_string_llvm_exe: FAIL (expected=$EXPECTED_EXIT, got=$actual_exit)" + exit 1 +fi diff --git a/tools/smokes/v2/profiles/integration/apps/phase258_p0_index_of_string_vm.sh b/tools/smokes/v2/profiles/integration/apps/phase258_p0_index_of_string_vm.sh new file mode 100644 index 00000000..cfab0f06 --- /dev/null +++ b/tools/smokes/v2/profiles/integration/apps/phase258_p0_index_of_string_vm.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Phase 258 P0: index_of_string pattern (dynamic needle) - VM +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../../../../.." && pwd)" +HAKORUNE_BIN="${HAKORUNE_BIN:-$PROJECT_ROOT/target/release/hakorune}" +HAKO_PATH="$PROJECT_ROOT/apps/tests/phase258_p0_index_of_string_min.hako" + +echo "[INFO] Environment check passed" +echo "[INFO] Plugin mode: dynamic" +echo "[INFO] Dynamic plugins check passed" + +# Phase 258 P0: Step 1 - Add --verify flag (fail-fast on MIR errors) +set +e +VERIFY_OUTPUT=$("$HAKORUNE_BIN" --backend vm --verify "$HAKO_PATH" 2>&1) +VERIFY_EXIT=$? +set -e + +if [ "$VERIFY_EXIT" -ne 0 ]; then + echo "❌ phase258_p0_index_of_string_vm: FAIL (MIR verification failed)" + echo "$VERIFY_OUTPUT" + exit 1 +fi + +# Phase 258 P0: Step 2 - Run VM with error detection +set +e +OUTPUT=$("$HAKORUNE_BIN" --backend vm "$HAKO_PATH" 2>&1) +EXIT_CODE=$? +set -e + +# Check for VM errors in output (regardless of exit code) +if echo "$OUTPUT" | grep -Ei "error|panic|undefined|phi pred mismatch"; then + echo "❌ phase258_p0_index_of_string_vm: FAIL (VM runtime error detected)" + echo "$OUTPUT" + exit 1 +fi + +# Validate expected exit code (now safe - we've ruled out errors) +# Expected: 6 (index of "world" in "hello world") +EXPECTED_EXIT=6 +if [ "$EXIT_CODE" -eq "$EXPECTED_EXIT" ]; then + echo "✅ phase258_p0_index_of_string_vm: PASS (exit=$EXIT_CODE, no errors)" + exit 0 +else + echo "❌ phase258_p0_index_of_string_vm: FAIL (exit=$EXIT_CODE, expected $EXPECTED_EXIT)" + echo "$OUTPUT" + exit 1 +fi