diff --git a/src/mir/loop_pattern_detection/function_scope_capture.rs b/src/mir/loop_pattern_detection/function_scope_capture/analyzers.rs similarity index 64% rename from src/mir/loop_pattern_detection/function_scope_capture.rs rename to src/mir/loop_pattern_detection/function_scope_capture/analyzers.rs index c5177563..c9a7dfab 100644 --- a/src/mir/loop_pattern_detection/function_scope_capture.rs +++ b/src/mir/loop_pattern_detection/function_scope_capture/analyzers.rs @@ -1,98 +1,12 @@ -//! Phase 200-A: Function scope capture infrastructure -//! -//! This module provides types for capturing function-scoped variables -//! that are effectively immutable within a loop context. -//! -//! # Example -//! -//! For a function like JsonParser._atoi(): -//! -//! ```nyash -//! method _atoi(s, pos, len) { -//! local digits = "0123456789" // <-- Captured variable -//! local value = 0 -//! loop(pos < len) { -//! local ch = s.charAt(pos) -//! local digit = digits.indexOf(ch) // Uses captured 'digits' -//! if (digit < 0) { break } -//! value = value * 10 + digit -//! pos = pos + 1 -//! } -//! return value -//! } -//! ``` -//! -//! Here, `digits` is: -//! - Declared in function scope (before the loop) -//! - Never reassigned (effectively immutable) -//! - Referenced in loop body (digits.indexOf(ch)) -//! -//! Phase 200-A creates the infrastructure to capture such variables. -//! Phase 200-B will implement the actual detection logic. +//! Core analysis functions for function scope capture use crate::ast::ASTNode; use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape; use crate::mir::ValueId; use std::collections::BTreeSet; -/// A variable captured from function scope for use in loop conditions/body. -/// -/// Example: `local digits = "0123456789"` in JsonParser._atoi() -/// -/// # Invariants -/// -/// - `name`: Variable name as it appears in the source code -/// - `host_id`: MIR ValueId of the original definition in the host function -/// - `is_immutable`: True if the variable is never reassigned in the function -#[derive(Debug, Clone)] -pub struct CapturedVar { - /// Variable name (e.g., "digits", "table") - pub name: String, - - /// MIR ValueId of the original definition in the host function - pub host_id: ValueId, - - /// Whether this variable is never reassigned in the function - /// - /// Phase 200-B will implement assignment analysis to determine this. - /// For now, this is always set to true as a conservative default. - pub is_immutable: bool, -} - -/// Environment containing function-scoped captured variables. -/// -/// Phase 200-A: Type definition only, not yet integrated with ConditionEnv. -/// Phase 200-B: Will be populated by FunctionScopeCaptureAnalyzer and -/// integrated into ConditionEnv via ConditionEnvBuilder v2. -#[derive(Debug, Clone, Default)] -pub struct CapturedEnv { - /// List of captured variables - pub vars: Vec, -} - -impl CapturedEnv { - /// Create a new empty environment - pub fn new() -> Self { - Self { vars: Vec::new() } - } - - /// Check if the environment is empty - pub fn is_empty(&self) -> bool { - self.vars.is_empty() - } - - /// Add a captured variable to the environment - pub fn add_var(&mut self, var: CapturedVar) { - self.vars.push(var); - } - - /// Look up a captured variable by name - /// - /// Returns `Some(&CapturedVar)` if found, `None` otherwise. - pub fn get(&self, name: &str) -> Option<&CapturedVar> { - self.vars.iter().find(|v| v.name == name) - } -} +use super::helpers::*; +use super::types::{CapturedEnv, CapturedVar}; /// Analyzes function-scoped variables that can be safely captured for loop conditions/body. /// @@ -443,461 +357,17 @@ pub(crate) fn analyze_captured_vars_v2( env } -/// Find the index of a loop statement in the function body -/// -/// Returns Some(index) if found, None otherwise. -#[allow(dead_code)] -fn find_stmt_index(fn_body: &[ASTNode], loop_ast: &ASTNode) -> Option { - // Compare by pointer address (same AST node instance) - fn_body - .iter() - .position(|stmt| std::ptr::eq(stmt as *const ASTNode, loop_ast as *const ASTNode)) -} - -/// Phase 200-C: Find loop index by structure matching (condition + body comparison) -/// -/// Instead of pointer comparison, compare the loop structure. -/// This is useful when the loop AST is constructed dynamically. -fn find_loop_index_by_structure( - fn_body: &[ASTNode], - target_condition: &ASTNode, - target_body: &[ASTNode], -) -> Option { - for (idx, stmt) in fn_body.iter().enumerate() { - if let ASTNode::Loop { - condition, body, .. - } = stmt - { - // Compare condition and body by structure - if ast_matches(condition, target_condition) && body_matches(body, target_body) { - return Some(idx); - } - } - } - None -} - -/// Simple structural AST comparison -/// -/// Uses Debug string comparison as a heuristic. This is not perfect but -/// works well enough for finding loops by structure. -fn ast_matches(a: &ASTNode, b: &ASTNode) -> bool { - format!("{:?}", a) == format!("{:?}", b) -} - -/// Compare two body slices by structure -fn body_matches(a: &[ASTNode], b: &[ASTNode]) -> bool { - if a.len() != b.len() { - return false; - } - a.iter().zip(b.iter()).all(|(x, y)| ast_matches(x, y)) -} - -/// Collect local variable declarations from statements -/// -/// Returns Vec<(name, init_expr)> for each variable declared with `local`. -fn collect_local_declarations(stmts: &[ASTNode]) -> Vec<(String, Option>)> { - let mut locals = Vec::new(); - - for stmt in stmts { - if let ASTNode::Local { - variables, - initial_values, - .. - } = stmt - { - // Local declaration can have multiple variables (e.g., local a, b, c) - for (i, name) in variables.iter().enumerate() { - let init_expr = initial_values.get(i).and_then(|opt| opt.clone()); - locals.push((name.clone(), init_expr)); - } - } - } - - locals -} - -/// Check if expression is a safe constant (string/integer literal) -/// -/// Phase 200-B: Only string and integer literals are allowed. -/// Future: May expand to include other safe constant patterns. -fn is_safe_const_init(expr: &Option>) -> bool { - match expr { - Some(boxed) => match boxed.as_ref() { - ASTNode::Literal { value, .. } => matches!( - value, - crate::ast::LiteralValue::String(_) | crate::ast::LiteralValue::Integer(_) - ), - _ => false, - }, - None => false, - } -} - -/// Check if variable is reassigned anywhere in function body -/// -/// Walks the entire function body AST to detect any assignments to the variable. -/// Returns true if the variable is reassigned (excluding the initial local declaration). -fn is_reassigned_in_fn(fn_body: &[ASTNode], name: &str) -> bool { - fn check_node(node: &ASTNode, name: &str) -> bool { - match node { - // Assignment to this variable - ASTNode::Assignment { target, value, .. } => { - // Check if target is the variable we're looking for - let is_target_match = match target.as_ref() { - ASTNode::Variable { name: var_name, .. } => var_name == name, - ASTNode::FieldAccess { .. } | ASTNode::Index { .. } => { - // Field access or index assignment doesn't count as reassignment - false - } - _ => false, - }; - - is_target_match || check_node(value, name) - } - - // Grouped assignment expression: (x = expr) - ASTNode::GroupedAssignmentExpr { lhs, rhs, .. } => lhs == name || check_node(rhs, name), - - // Recursive cases - ASTNode::If { - condition, - then_body, - else_body, - .. - } => { - check_node(condition, name) - || then_body.iter().any(|n| check_node(n, name)) - || else_body - .as_ref() - .map_or(false, |body| body.iter().any(|n| check_node(n, name))) - } - - ASTNode::Loop { - condition, body, .. - } => check_node(condition, name) || body.iter().any(|n| check_node(n, name)), - - ASTNode::While { - condition, body, .. - } => check_node(condition, name) || body.iter().any(|n| check_node(n, name)), - - ASTNode::TryCatch { - try_body, - catch_clauses, - finally_body, - .. - } => { - try_body.iter().any(|n| check_node(n, name)) - || catch_clauses - .iter() - .any(|clause| clause.body.iter().any(|n| check_node(n, name))) - || finally_body - .as_ref() - .map_or(false, |body| body.iter().any(|n| check_node(n, name))) - } - - ASTNode::UnaryOp { operand, .. } => check_node(operand, name), - - ASTNode::BinaryOp { left, right, .. } => { - check_node(left, name) || check_node(right, name) - } - - ASTNode::MethodCall { - object, arguments, .. - } => check_node(object, name) || arguments.iter().any(|arg| check_node(arg, name)), - - ASTNode::FunctionCall { arguments, .. } => { - arguments.iter().any(|arg| check_node(arg, name)) - } - - ASTNode::FieldAccess { object, .. } => check_node(object, name), - - ASTNode::Index { target, index, .. } => { - check_node(target, name) || check_node(index, name) - } - - ASTNode::Return { value, .. } => value.as_ref().map_or(false, |v| check_node(v, name)), - - ASTNode::Local { .. } => { - // Local declarations are not reassignments - false - } - - _ => false, - } - } - - fn_body.iter().any(|stmt| check_node(stmt, name)) -} - -/// Check if variable is referenced in loop condition or body -/// -/// Returns true if the variable name appears anywhere in the loop AST. -#[allow(dead_code)] -fn is_used_in_loop(loop_ast: &ASTNode, name: &str) -> bool { - fn check_usage(node: &ASTNode, name: &str) -> bool { - match node { - ASTNode::Variable { name: var_name, .. } => var_name == name, - - ASTNode::Loop { - condition, body, .. - } => check_usage(condition, name) || body.iter().any(|n| check_usage(n, name)), - - ASTNode::If { - condition, - then_body, - else_body, - .. - } => { - check_usage(condition, name) - || then_body.iter().any(|n| check_usage(n, name)) - || else_body - .as_ref() - .map_or(false, |body| body.iter().any(|n| check_usage(n, name))) - } - - ASTNode::Assignment { target, value, .. } => { - check_usage(target, name) || check_usage(value, name) - } - - ASTNode::UnaryOp { operand, .. } => check_usage(operand, name), - - ASTNode::BinaryOp { left, right, .. } => { - check_usage(left, name) || check_usage(right, name) - } - - ASTNode::MethodCall { - object, arguments, .. - } => check_usage(object, name) || arguments.iter().any(|arg| check_usage(arg, name)), - - ASTNode::FunctionCall { arguments, .. } => { - arguments.iter().any(|arg| check_usage(arg, name)) - } - - ASTNode::FieldAccess { object, .. } => check_usage(object, name), - - ASTNode::Index { target, index, .. } => { - check_usage(target, name) || check_usage(index, name) - } - - ASTNode::Return { value, .. } => value.as_ref().map_or(false, |v| check_usage(v, name)), - - ASTNode::Local { initial_values, .. } => initial_values - .iter() - .any(|opt| opt.as_ref().map_or(false, |init| check_usage(init, name))), - - _ => false, - } - } - - check_usage(loop_ast, name) -} - -/// Phase 200-C: Check if variable is used in loop condition or body (separate parts) -/// -/// This is used by analyze_captured_vars_v2 when condition and body are passed separately. -fn is_used_in_loop_parts(condition: &ASTNode, body: &[ASTNode], name: &str) -> bool { - fn check_usage(node: &ASTNode, name: &str) -> bool { - match node { - ASTNode::Variable { name: var_name, .. } => var_name == name, - - ASTNode::Loop { - condition, body, .. - } => check_usage(condition, name) || body.iter().any(|n| check_usage(n, name)), - - ASTNode::If { - condition, - then_body, - else_body, - .. - } => { - check_usage(condition, name) - || then_body.iter().any(|n| check_usage(n, name)) - || else_body - .as_ref() - .map_or(false, |body| body.iter().any(|n| check_usage(n, name))) - } - - ASTNode::Assignment { target, value, .. } => { - check_usage(target, name) || check_usage(value, name) - } - - ASTNode::UnaryOp { operand, .. } => check_usage(operand, name), - - ASTNode::BinaryOp { left, right, .. } => { - check_usage(left, name) || check_usage(right, name) - } - - ASTNode::MethodCall { - object, arguments, .. - } => check_usage(object, name) || arguments.iter().any(|arg| check_usage(arg, name)), - - ASTNode::FunctionCall { arguments, .. } => { - arguments.iter().any(|arg| check_usage(arg, name)) - } - - ASTNode::FieldAccess { object, .. } => check_usage(object, name), - - ASTNode::Index { target, index, .. } => { - check_usage(target, name) || check_usage(index, name) - } - - ASTNode::Return { value, .. } => value.as_ref().map_or(false, |v| check_usage(v, name)), - - ASTNode::Local { initial_values, .. } => initial_values - .iter() - .any(|opt| opt.as_ref().map_or(false, |init| check_usage(init, name))), - - _ => false, - } - } - - check_usage(condition, name) || body.iter().any(|n| check_usage(n, name)) -} - -/// Phase 245C: Collect all variable names used in loop condition and body -/// -/// Helper for function parameter capture. Returns a set of all variable names -/// that appear in the loop's condition or body. -fn collect_names_in_loop_parts(condition: &ASTNode, body: &[ASTNode]) -> BTreeSet { - fn collect(node: &ASTNode, acc: &mut BTreeSet) { - match node { - ASTNode::Variable { name, .. } => { - acc.insert(name.clone()); - } - ASTNode::If { - condition, - then_body, - else_body, - .. - } => { - collect(condition, acc); - for stmt in then_body { - collect(stmt, acc); - } - if let Some(else_stmts) = else_body { - for stmt in else_stmts { - collect(stmt, acc); - } - } - } - ASTNode::Assignment { target, value, .. } => { - collect(target, acc); - collect(value, acc); - } - ASTNode::UnaryOp { operand, .. } => { - collect(operand, acc); - } - ASTNode::Return { - value: Some(operand), - .. - } => { - collect(operand, acc); - } - ASTNode::BinaryOp { left, right, .. } => { - collect(left, acc); - collect(right, acc); - } - ASTNode::MethodCall { - object, arguments, .. - } => { - collect(object, acc); - for arg in arguments { - collect(arg, acc); - } - } - ASTNode::FunctionCall { arguments, .. } => { - for arg in arguments { - collect(arg, acc); - } - } - ASTNode::Local { initial_values, .. } => { - for init_opt in initial_values { - if let Some(val) = init_opt { - collect(val, acc); - } - } - } - ASTNode::FieldAccess { object, .. } => { - collect(object, acc); - } - ASTNode::Index { target, index, .. } => { - collect(target, acc); - collect(index, acc); - } - ASTNode::Loop { - condition, body, .. - } => { - collect(condition, acc); - for stmt in body { - collect(stmt, acc); - } - } - _ => {} - } - } - - let mut acc = BTreeSet::new(); - collect(condition, &mut acc); - for stmt in body { - collect(stmt, &mut acc); - } - acc -} - #[cfg(test)] mod tests { use super::*; - - #[test] - fn test_captured_env_empty() { - let env = CapturedEnv::new(); - assert!(env.is_empty()); - assert!(env.get("digits").is_none()); - } - - #[test] - fn test_captured_env_add_and_get() { - let mut env = CapturedEnv::new(); - env.add_var(CapturedVar { - name: "digits".to_string(), - host_id: ValueId(42), - is_immutable: true, - }); - - assert!(!env.is_empty()); - let var = env.get("digits").unwrap(); - assert_eq!(var.name, "digits"); - assert_eq!(var.host_id, ValueId(42)); - assert!(var.is_immutable); - } - - #[test] - fn test_captured_env_multiple_vars() { - let mut env = CapturedEnv::new(); - env.add_var(CapturedVar { - name: "digits".to_string(), - host_id: ValueId(42), - is_immutable: true, - }); - env.add_var(CapturedVar { - name: "table".to_string(), - host_id: ValueId(100), - is_immutable: true, - }); - - assert_eq!(env.vars.len(), 2); - assert!(env.get("digits").is_some()); - assert!(env.get("table").is_some()); - assert!(env.get("nonexistent").is_none()); - } + use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span}; + use crate::mir::BasicBlockId; + use std::collections::{BTreeMap, BTreeSet}; // Phase 200-B: Capture analysis tests #[test] fn test_capture_simple_digits() { - use crate::ast::{ASTNode, LiteralValue, Span}; - // Build AST for: // local digits = "0123456789" // loop(i < 10) { @@ -949,9 +419,6 @@ mod tests { let fn_body = vec![digits_decl, loop_node.clone()]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -978,8 +445,6 @@ mod tests { #[test] fn test_capture_reassigned_rejected() { - use crate::ast::{ASTNode, LiteralValue, Span}; - // Build AST for: // local digits = "0123456789" // digits = "abc" // reassignment @@ -1041,9 +506,6 @@ mod tests { let fn_body = vec![digits_decl, reassignment, loop_node.clone()]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -1065,8 +527,6 @@ mod tests { #[test] fn test_capture_after_loop_rejected() { - use crate::ast::{ASTNode, LiteralValue, Span}; - // Build AST for: // loop(i < 10) { } // local digits = "0123456789" // defined AFTER loop @@ -1099,9 +559,6 @@ mod tests { let fn_body = vec![loop_node.clone(), digits_decl]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -1123,8 +580,6 @@ mod tests { #[test] fn test_capture_method_call_init_rejected() { - use crate::ast::{ASTNode, LiteralValue, Span}; - // Build AST for: // local result = someBox.getValue() // MethodCall init // loop(i < 10) { @@ -1178,9 +633,6 @@ mod tests { let fn_body = vec![result_decl, loop_node.clone()]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -1202,8 +654,6 @@ mod tests { #[test] fn test_capture_unused_in_loop_rejected() { - use crate::ast::{ASTNode, LiteralValue, Span}; - // Build AST for: // local digits = "0123456789" // not used in loop // loop(i < 10) { @@ -1238,9 +688,6 @@ mod tests { let fn_body = vec![digits_decl, loop_node.clone()]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -1264,8 +711,6 @@ mod tests { #[test] fn test_capture_function_param_used_in_condition() { - use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span}; - // Simulate: fn parse_number(s, p, len) { loop(p < len) { ... } } // Expected: 'len' should be captured (used in condition, not reassigned) @@ -1302,9 +747,6 @@ mod tests { span: Span::unknown(), }]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -1331,8 +773,6 @@ mod tests { #[test] fn test_capture_function_param_used_in_method_call() { - use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span}; - // Simulate: fn parse_number(s, p) { loop(p < s.length()) { ch = s.charAt(p) } } // Expected: 's' should be captured (used in condition and body, not reassigned) @@ -1392,9 +832,6 @@ mod tests { }, ]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -1421,8 +858,6 @@ mod tests { #[test] fn test_capture_function_param_reassigned_rejected() { - use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span}; - // Simulate: fn bad_func(x) { x = 5; loop(x < 10) { x = x + 1 } } // Expected: 'x' should NOT be captured (reassigned in function) @@ -1472,9 +907,6 @@ mod tests { span: Span::unknown(), }]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), @@ -1497,8 +929,6 @@ mod tests { #[test] fn test_capture_mixed_locals_and_params() { - use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span}; - // Simulate: fn parse(s, len) { local digits = "0123"; loop(p < len) { ch = digits.indexOf(...); s.charAt(...) } } // Expected: 'len', 's', and 'digits' should all be captured @@ -1560,9 +990,6 @@ mod tests { span: Span::unknown(), }]; - use crate::mir::BasicBlockId; - use std::collections::{BTreeMap, BTreeSet}; - let scope = crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape { header: BasicBlockId(0), body: BasicBlockId(1), diff --git a/src/mir/loop_pattern_detection/function_scope_capture/helpers.rs b/src/mir/loop_pattern_detection/function_scope_capture/helpers.rs new file mode 100644 index 00000000..20853f5d --- /dev/null +++ b/src/mir/loop_pattern_detection/function_scope_capture/helpers.rs @@ -0,0 +1,411 @@ +//! Helper functions for AST analysis, reassignment detection, and structural matching + +use crate::ast::ASTNode; +use std::collections::BTreeSet; + +/// Find the index of a loop statement in the function body +/// +/// Returns Some(index) if found, None otherwise. +#[allow(dead_code)] +pub(super) fn find_stmt_index(fn_body: &[ASTNode], loop_ast: &ASTNode) -> Option { + // Compare by pointer address (same AST node instance) + fn_body + .iter() + .position(|stmt| std::ptr::eq(stmt as *const ASTNode, loop_ast as *const ASTNode)) +} + +/// Phase 200-C: Find loop index by structure matching (condition + body comparison) +/// +/// Instead of pointer comparison, compare the loop structure. +/// This is useful when the loop AST is constructed dynamically. +pub(super) fn find_loop_index_by_structure( + fn_body: &[ASTNode], + target_condition: &ASTNode, + target_body: &[ASTNode], +) -> Option { + for (idx, stmt) in fn_body.iter().enumerate() { + if let ASTNode::Loop { + condition, body, .. + } = stmt + { + // Compare condition and body by structure + if ast_matches(condition, target_condition) && body_matches(body, target_body) { + return Some(idx); + } + } + } + None +} + +/// Simple structural AST comparison +/// +/// Uses Debug string comparison as a heuristic. This is not perfect but +/// works well enough for finding loops by structure. +pub(super) fn ast_matches(a: &ASTNode, b: &ASTNode) -> bool { + format!("{:?}", a) == format!("{:?}", b) +} + +/// Compare two body slices by structure +pub(super) fn body_matches(a: &[ASTNode], b: &[ASTNode]) -> bool { + if a.len() != b.len() { + return false; + } + a.iter().zip(b.iter()).all(|(x, y)| ast_matches(x, y)) +} + +/// Collect local variable declarations from statements +/// +/// Returns Vec<(name, init_expr)> for each variable declared with `local`. +pub(super) fn collect_local_declarations( + stmts: &[ASTNode], +) -> Vec<(String, Option>)> { + let mut locals = Vec::new(); + + for stmt in stmts { + if let ASTNode::Local { + variables, + initial_values, + .. + } = stmt + { + // Local declaration can have multiple variables (e.g., local a, b, c) + for (i, name) in variables.iter().enumerate() { + let init_expr = initial_values.get(i).and_then(|opt| opt.clone()); + locals.push((name.clone(), init_expr)); + } + } + } + + locals +} + +/// Check if expression is a safe constant (string/integer literal) +/// +/// Phase 200-B: Only string and integer literals are allowed. +/// Future: May expand to include other safe constant patterns. +pub(super) fn is_safe_const_init(expr: &Option>) -> bool { + match expr { + Some(boxed) => match boxed.as_ref() { + ASTNode::Literal { value, .. } => matches!( + value, + crate::ast::LiteralValue::String(_) | crate::ast::LiteralValue::Integer(_) + ), + _ => false, + }, + None => false, + } +} + +/// Check if variable is reassigned anywhere in function body +/// +/// Walks the entire function body AST to detect any assignments to the variable. +/// Returns true if the variable is reassigned (excluding the initial local declaration). +pub(super) fn is_reassigned_in_fn(fn_body: &[ASTNode], name: &str) -> bool { + fn check_node(node: &ASTNode, name: &str) -> bool { + match node { + // Assignment to this variable + ASTNode::Assignment { target, value, .. } => { + // Check if target is the variable we're looking for + let is_target_match = match target.as_ref() { + ASTNode::Variable { name: var_name, .. } => var_name == name, + ASTNode::FieldAccess { .. } | ASTNode::Index { .. } => { + // Field access or index assignment doesn't count as reassignment + false + } + _ => false, + }; + + is_target_match || check_node(value, name) + } + + // Grouped assignment expression: (x = expr) + ASTNode::GroupedAssignmentExpr { lhs, rhs, .. } => lhs == name || check_node(rhs, name), + + // Recursive cases + ASTNode::If { + condition, + then_body, + else_body, + .. + } => { + check_node(condition, name) + || then_body.iter().any(|n| check_node(n, name)) + || else_body + .as_ref() + .map_or(false, |body| body.iter().any(|n| check_node(n, name))) + } + + ASTNode::Loop { + condition, body, .. + } => check_node(condition, name) || body.iter().any(|n| check_node(n, name)), + + ASTNode::While { + condition, body, .. + } => check_node(condition, name) || body.iter().any(|n| check_node(n, name)), + + ASTNode::TryCatch { + try_body, + catch_clauses, + finally_body, + .. + } => { + try_body.iter().any(|n| check_node(n, name)) + || catch_clauses + .iter() + .any(|clause| clause.body.iter().any(|n| check_node(n, name))) + || finally_body + .as_ref() + .map_or(false, |body| body.iter().any(|n| check_node(n, name))) + } + + ASTNode::UnaryOp { operand, .. } => check_node(operand, name), + + ASTNode::BinaryOp { left, right, .. } => { + check_node(left, name) || check_node(right, name) + } + + ASTNode::MethodCall { + object, arguments, .. + } => check_node(object, name) || arguments.iter().any(|arg| check_node(arg, name)), + + ASTNode::FunctionCall { arguments, .. } => { + arguments.iter().any(|arg| check_node(arg, name)) + } + + ASTNode::FieldAccess { object, .. } => check_node(object, name), + + ASTNode::Index { target, index, .. } => { + check_node(target, name) || check_node(index, name) + } + + ASTNode::Return { value, .. } => value.as_ref().map_or(false, |v| check_node(v, name)), + + ASTNode::Local { .. } => { + // Local declarations are not reassignments + false + } + + _ => false, + } + } + + fn_body.iter().any(|stmt| check_node(stmt, name)) +} + +/// Check if variable is referenced in loop condition or body +/// +/// Returns true if the variable name appears anywhere in the loop AST. +#[allow(dead_code)] +pub(super) fn is_used_in_loop(loop_ast: &ASTNode, name: &str) -> bool { + fn check_usage(node: &ASTNode, name: &str) -> bool { + match node { + ASTNode::Variable { name: var_name, .. } => var_name == name, + + ASTNode::Loop { + condition, body, .. + } => check_usage(condition, name) || body.iter().any(|n| check_usage(n, name)), + + ASTNode::If { + condition, + then_body, + else_body, + .. + } => { + check_usage(condition, name) + || then_body.iter().any(|n| check_usage(n, name)) + || else_body + .as_ref() + .map_or(false, |body| body.iter().any(|n| check_usage(n, name))) + } + + ASTNode::Assignment { target, value, .. } => { + check_usage(target, name) || check_usage(value, name) + } + + ASTNode::UnaryOp { operand, .. } => check_usage(operand, name), + + ASTNode::BinaryOp { left, right, .. } => { + check_usage(left, name) || check_usage(right, name) + } + + ASTNode::MethodCall { + object, arguments, .. + } => check_usage(object, name) || arguments.iter().any(|arg| check_usage(arg, name)), + + ASTNode::FunctionCall { arguments, .. } => { + arguments.iter().any(|arg| check_usage(arg, name)) + } + + ASTNode::FieldAccess { object, .. } => check_usage(object, name), + + ASTNode::Index { target, index, .. } => { + check_usage(target, name) || check_usage(index, name) + } + + ASTNode::Return { value, .. } => value.as_ref().map_or(false, |v| check_usage(v, name)), + + ASTNode::Local { initial_values, .. } => initial_values + .iter() + .any(|opt| opt.as_ref().map_or(false, |init| check_usage(init, name))), + + _ => false, + } + } + + check_usage(loop_ast, name) +} + +/// Phase 200-C: Check if variable is used in loop condition or body (separate parts) +/// +/// This is used by analyze_captured_vars_v2 when condition and body are passed separately. +pub(super) fn is_used_in_loop_parts(condition: &ASTNode, body: &[ASTNode], name: &str) -> bool { + fn check_usage(node: &ASTNode, name: &str) -> bool { + match node { + ASTNode::Variable { name: var_name, .. } => var_name == name, + + ASTNode::Loop { + condition, body, .. + } => check_usage(condition, name) || body.iter().any(|n| check_usage(n, name)), + + ASTNode::If { + condition, + then_body, + else_body, + .. + } => { + check_usage(condition, name) + || then_body.iter().any(|n| check_usage(n, name)) + || else_body + .as_ref() + .map_or(false, |body| body.iter().any(|n| check_usage(n, name))) + } + + ASTNode::Assignment { target, value, .. } => { + check_usage(target, name) || check_usage(value, name) + } + + ASTNode::UnaryOp { operand, .. } => check_usage(operand, name), + + ASTNode::BinaryOp { left, right, .. } => { + check_usage(left, name) || check_usage(right, name) + } + + ASTNode::MethodCall { + object, arguments, .. + } => check_usage(object, name) || arguments.iter().any(|arg| check_usage(arg, name)), + + ASTNode::FunctionCall { arguments, .. } => { + arguments.iter().any(|arg| check_usage(arg, name)) + } + + ASTNode::FieldAccess { object, .. } => check_usage(object, name), + + ASTNode::Index { target, index, .. } => { + check_usage(target, name) || check_usage(index, name) + } + + ASTNode::Return { value, .. } => value.as_ref().map_or(false, |v| check_usage(v, name)), + + ASTNode::Local { initial_values, .. } => initial_values + .iter() + .any(|opt| opt.as_ref().map_or(false, |init| check_usage(init, name))), + + _ => false, + } + } + + check_usage(condition, name) || body.iter().any(|n| check_usage(n, name)) +} + +/// Phase 245C: Collect all variable names used in loop condition and body +/// +/// Helper for function parameter capture. Returns a set of all variable names +/// that appear in the loop's condition or body. +pub(super) fn collect_names_in_loop_parts( + condition: &ASTNode, + body: &[ASTNode], +) -> BTreeSet { + fn collect(node: &ASTNode, acc: &mut BTreeSet) { + match node { + ASTNode::Variable { name, .. } => { + acc.insert(name.clone()); + } + ASTNode::If { + condition, + then_body, + else_body, + .. + } => { + collect(condition, acc); + for stmt in then_body { + collect(stmt, acc); + } + if let Some(else_stmts) = else_body { + for stmt in else_stmts { + collect(stmt, acc); + } + } + } + ASTNode::Assignment { target, value, .. } => { + collect(target, acc); + collect(value, acc); + } + ASTNode::UnaryOp { operand, .. } => { + collect(operand, acc); + } + ASTNode::Return { + value: Some(operand), + .. + } => { + collect(operand, acc); + } + ASTNode::BinaryOp { left, right, .. } => { + collect(left, acc); + collect(right, acc); + } + ASTNode::MethodCall { + object, arguments, .. + } => { + collect(object, acc); + for arg in arguments { + collect(arg, acc); + } + } + ASTNode::FunctionCall { arguments, .. } => { + for arg in arguments { + collect(arg, acc); + } + } + ASTNode::Local { initial_values, .. } => { + for init_opt in initial_values { + if let Some(val) = init_opt { + collect(val, acc); + } + } + } + ASTNode::FieldAccess { object, .. } => { + collect(object, acc); + } + ASTNode::Index { target, index, .. } => { + collect(target, acc); + collect(index, acc); + } + ASTNode::Loop { + condition, body, .. + } => { + collect(condition, acc); + for stmt in body { + collect(stmt, acc); + } + } + _ => {} + } + } + + let mut acc = BTreeSet::new(); + collect(condition, &mut acc); + for stmt in body { + collect(stmt, &mut acc); + } + acc +} diff --git a/src/mir/loop_pattern_detection/function_scope_capture/mod.rs b/src/mir/loop_pattern_detection/function_scope_capture/mod.rs new file mode 100644 index 00000000..d78a9a13 --- /dev/null +++ b/src/mir/loop_pattern_detection/function_scope_capture/mod.rs @@ -0,0 +1,57 @@ +//! Phase 200-A: Function scope capture infrastructure +//! +//! This module provides types and analysis functions for capturing function-scoped variables +//! that are effectively immutable within a loop context. +//! +//! # Example +//! +//! For a function like JsonParser._atoi(): +//! +//! ```nyash +//! method _atoi(s, pos, len) { +//! local digits = "0123456789" // <-- Captured variable +//! local value = 0 +//! loop(pos < len) { +//! local ch = s.charAt(pos) +//! local digit = digits.indexOf(ch) // Uses captured 'digits' +//! if (digit < 0) { break } +//! value = value * 10 + digit +//! pos = pos + 1 +//! } +//! return value +//! } +//! ``` +//! +//! Here, `digits` is: +//! - Declared in function scope (before the loop) +//! - Never reassigned (effectively immutable) +//! - Referenced in loop body (digits.indexOf(ch)) +//! +//! Phase 200-A creates the infrastructure to capture such variables. +//! Phase 200-B implements the actual detection logic. +//! +//! # Module Structure +//! +//! This module is organized following the Box-First principle: +//! +//! - `types` - Core type definitions (CapturedVar, CapturedEnv) +//! - `analyzers` - Analysis functions (analyze_captured_vars, analyze_captured_vars_v2) +//! - `helpers` - Helper functions for AST analysis and structural matching +//! +//! # Public API +//! +//! The primary entry points are: +//! +//! - `analyze_captured_vars()` - Main analysis function (uses pointer comparison) +//! - `analyze_captured_vars_v2()` - Alternative using structural matching (Phase 200-C) +//! - `CapturedVar` - Represents a captured variable +//! - `CapturedEnv` - Environment containing all captured variables + +// Module declarations +mod analyzers; +mod helpers; +mod types; + +// Public re-exports +pub(crate) use analyzers::{analyze_captured_vars, analyze_captured_vars_v2}; +pub use types::{CapturedEnv, CapturedVar}; diff --git a/src/mir/loop_pattern_detection/function_scope_capture/types.rs b/src/mir/loop_pattern_detection/function_scope_capture/types.rs new file mode 100644 index 00000000..0070286f --- /dev/null +++ b/src/mir/loop_pattern_detection/function_scope_capture/types.rs @@ -0,0 +1,110 @@ +//! Type definitions for function scope capture + +use crate::mir::ValueId; + +/// A variable captured from function scope for use in loop conditions/body. +/// +/// Example: `local digits = "0123456789"` in JsonParser._atoi() +/// +/// # Invariants +/// +/// - `name`: Variable name as it appears in the source code +/// - `host_id`: MIR ValueId of the original definition in the host function +/// - `is_immutable`: True if the variable is never reassigned in the function +#[derive(Debug, Clone)] +pub struct CapturedVar { + /// Variable name (e.g., "digits", "table") + pub name: String, + + /// MIR ValueId of the original definition in the host function + pub host_id: ValueId, + + /// Whether this variable is never reassigned in the function + /// + /// Phase 200-B will implement assignment analysis to determine this. + /// For now, this is always set to true as a conservative default. + pub is_immutable: bool, +} + +/// Environment containing function-scoped captured variables. +/// +/// Phase 200-A: Type definition only, not yet integrated with ConditionEnv. +/// Phase 200-B: Will be populated by FunctionScopeCaptureAnalyzer and +/// integrated into ConditionEnv via ConditionEnvBuilder v2. +#[derive(Debug, Clone, Default)] +pub struct CapturedEnv { + /// List of captured variables + pub vars: Vec, +} + +impl CapturedEnv { + /// Create a new empty environment + pub fn new() -> Self { + Self { vars: Vec::new() } + } + + /// Check if the environment is empty + pub fn is_empty(&self) -> bool { + self.vars.is_empty() + } + + /// Add a captured variable to the environment + pub fn add_var(&mut self, var: CapturedVar) { + self.vars.push(var); + } + + /// Look up a captured variable by name + /// + /// Returns `Some(&CapturedVar)` if found, `None` otherwise. + pub fn get(&self, name: &str) -> Option<&CapturedVar> { + self.vars.iter().find(|v| v.name == name) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_captured_env_empty() { + let env = CapturedEnv::new(); + assert!(env.is_empty()); + assert!(env.get("digits").is_none()); + } + + #[test] + fn test_captured_env_add_and_get() { + let mut env = CapturedEnv::new(); + env.add_var(CapturedVar { + name: "digits".to_string(), + host_id: ValueId(42), + is_immutable: true, + }); + + assert!(!env.is_empty()); + let var = env.get("digits").unwrap(); + assert_eq!(var.name, "digits"); + assert_eq!(var.host_id, ValueId(42)); + assert!(var.is_immutable); + } + + #[test] + fn test_captured_env_multiple_vars() { + let mut env = CapturedEnv::new(); + env.add_var(CapturedVar { + name: "digits".to_string(), + host_id: ValueId(42), + is_immutable: true, + }); + env.add_var(CapturedVar { + name: "table".to_string(), + host_id: ValueId(100), + is_immutable: true, + }); + + assert_eq!(env.vars.len(), 2); + assert!(env.get("digits").is_some()); + assert!(env.get("table").is_some()); + assert!(env.get("nonexistent").is_none()); + } +}