feat(joinir): Phase 171-C-2 Trim pattern detection in LoopBodyCarrierPromoter

Implements the Trim pattern detection logic for carrier promotion:

- find_definition_in_body(): Iterative AST traversal to locate variable definitions
- is_substring_method_call(): Detects substring() method calls
- extract_equality_literals(): Extracts string literals from OR chains (ch == " " || ch == "\t")
- TrimPatternInfo: Captures detected pattern details for carrier promotion

This enables Pattern 5 to detect trim-style loops:
```hako
loop(start < end) {
    local ch = s.substring(start, start+1)
    if ch == " " || ch == "\t" || ch == "\n" || ch == "\r" {
        start = start + 1
    } else {
        break
    }
}
```

Unit tests cover:
- Simple and nested definition detection
- substring method call detection
- Single and chained equality literal extraction
- Full Trim pattern detection with 2-4 whitespace characters

Next: Phase 171-C-3 integration with Pattern 2/4 routing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-12-07 23:09:25 +09:00
parent 907a54b55c
commit 88400e7e22
11 changed files with 2334 additions and 4668 deletions

View File

@ -130,8 +130,17 @@ pub fn is_outer_scope_variable(
scope: Option<&LoopScopeShape>,
) -> bool {
match scope {
None => false, // No scope info → assume body-local
// No scope information: be conservative but *not* overstrict.
// We treat unknown as body-local only when we have a LoopScopeShape
// that explicitly marks it so (via body_locals / definitions).
// Here we simply say "unknown" and let the caller decide.
None => false,
Some(scope) => {
// If the variable is explicitly marked as body-local, it is NOT outer.
if scope.body_locals.contains(var_name) {
return false;
}
// Check 1: Is it a pinned variable (loop parameter or passed-in)?
if scope.pinned.contains(var_name) {
return true;
@ -151,15 +160,27 @@ pub fn is_outer_scope_variable(
// This supports loop patterns like:
// local i = 0 (header)
// loop(i < 10) {
// ...
// i = i + 1 (latch)
// ...
// i = i + 1 (latch)
// }
if def_blocks.iter().all(|b| *b == scope.header || *b == scope.latch) {
return true;
}
// Any other definition pattern (e.g. body-only or body+header)
// is treated as body-local / internal.
return false;
}
false
// At this point:
// - The variable is NOT in body_locals
// - There is no explicit definition info for it
//
// This typically means "function parameter" or "outer local"
// (e.g. JsonParserBox.s, .pos, etc.). Those should be treated
// as OuterLocal for condition analysis, otherwise we wrongly
// block valid loops as using loop-body-local variables.
true
}
}
}
@ -338,6 +359,34 @@ mod tests {
assert!(!is_outer_scope_variable("ch", Some(&scope)));
}
#[test]
fn test_is_outer_scope_variable_function_param_like() {
// Variables that are *not* marked as body_locals and have no explicit
// variable_definitions entry represent things like function parameters
// or outer locals. These must be treated as OuterLocal so that valid
// conditions such as `p < s.length()` (with `s` a parameter) are
// accepted by Pattern 2/4.
use std::collections::{BTreeMap, BTreeSet};
let scope = LoopScopeShape {
header: BasicBlockId(0),
body: BasicBlockId(1),
latch: BasicBlockId(2),
exit: BasicBlockId(3),
pinned: BTreeSet::new(),
carriers: BTreeSet::new(),
body_locals: BTreeSet::new(),
exit_live: BTreeSet::new(),
progress_carrier: None,
variable_definitions: BTreeMap::new(),
};
assert!(
is_outer_scope_variable("s", Some(&scope)),
"Function parameterlike variable should be classified as OuterLocal"
);
}
// ========================================================================
// Phase 170-ultrathink: Additional Edge Case Tests (Issue #3)
// ========================================================================

View File

@ -0,0 +1,600 @@
//! Phase 171-C: LoopBodyCarrierPromoter Box
//!
//! LoopBodyLocal 変数を carrier に昇格させることで、
//! Pattern 2/4 の範囲で処理可能にするための箱。
//!
//! ## Design Philosophy
//!
//! - 入力: LoopScopeShape + LoopConditionScope + break 条件 AST
//! - 出力: 昇格成功なら CarrierInfo、失敗なら理由
//! - 役割: LoopBodyLocal を「評価済み bool carrier」に変換
//!
//! ## Implementation Scope
//!
//! ### Phase 171-C-1: スケルトン実装 ✅
//! - LoopBodyLocal の検出
//! - 定義の探索
//!
//! ### Phase 171-C-2: Trim パターン昇格 ✅
//! - `local ch = s.substring(...)` パターン検出
//! - `ch == " " || ch == "\t" ...` の等価比較検出
//! - `is_whitespace` bool carrier への変換情報生成
use crate::ast::{ASTNode, BinaryOperator, LiteralValue};
use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape;
use crate::mir::loop_pattern_detection::loop_condition_scope::LoopConditionScope;
/// 昇格リクエスト
pub struct PromotionRequest<'a> {
/// ループのスコープ情報
pub scope: &'a LoopScopeShape,
/// 条件変数のスコープ分類
pub cond_scope: &'a LoopConditionScope,
/// break 条件の ASTPattern 2 の場合)
pub break_cond: Option<&'a ASTNode>,
/// ループ本体の AST
pub loop_body: &'a [ASTNode],
}
/// Phase 171-C-2: 検出された Trim パターン情報
#[derive(Debug, Clone)]
pub struct TrimPatternInfo {
/// LoopBodyLocal 変数名(例: "ch"
pub var_name: String,
/// 比較対象の文字列リテラル(例: [" ", "\t", "\n", "\r"]
pub comparison_literals: Vec<String>,
/// 生成する carrier 名(例: "is_whitespace"
pub carrier_name: String,
}
/// 昇格結果
pub enum PromotionResult {
/// 昇格成功: Trim パターン情報を返す
///
/// Phase 171-C-2: CarrierInfo の実際の更新は Phase 171-C-3 で実装
Promoted {
/// Phase 171-C-2: 検出された Trim パターン情報
trim_info: TrimPatternInfo,
},
/// 昇格不可: 理由を説明
CannotPromote {
reason: String,
vars: Vec<String>, // 問題の LoopBodyLocal
},
}
/// Phase 171-C: LoopBodyCarrierPromoter Box
pub struct LoopBodyCarrierPromoter;
impl LoopBodyCarrierPromoter {
/// LoopBodyLocal を carrier に昇格できるか試行
///
/// # Phase 171-C-2: Trim パターン実装
///
/// 現在の実装では:
/// 1. LoopBodyLocal を抽出
/// 2. 各変数の定義を探索
/// 3. Trim パターンsubstring + equalityを検出
/// 4. 昇格可能なら TrimPatternInfo を返す
pub fn try_promote(request: &PromotionRequest) -> PromotionResult {
use crate::mir::loop_pattern_detection::loop_condition_scope::CondVarScope;
// 1. LoopBodyLocal を抽出
let body_locals: Vec<&String> = request.cond_scope.vars.iter()
.filter(|v| v.scope == CondVarScope::LoopBodyLocal)
.map(|v| &v.name)
.collect();
if body_locals.is_empty() {
// LoopBodyLocal がなければ昇格不要
return PromotionResult::CannotPromote {
reason: "No LoopBodyLocal variables to promote".to_string(),
vars: vec![],
};
}
eprintln!(
"[promoter/pattern5] Phase 171-C: Found {} LoopBodyLocal variables: {:?}",
body_locals.len(),
body_locals
);
// 2. 各 LoopBodyLocal の定義を探す
for var_name in &body_locals {
let definition = Self::find_definition_in_body(request.loop_body, var_name);
if let Some(def_node) = definition {
eprintln!(
"[promoter/pattern5] Found definition for '{}' in loop body",
var_name
);
// 3. Phase 171-C-2: Trim パターンを検出
if Self::is_substring_method_call(def_node) {
eprintln!(
"[promoter/pattern5] '{}' is defined by substring() call - Trim pattern candidate",
var_name
);
// 4. break 条件から等価比較リテラルを抽出
if let Some(break_cond) = request.break_cond {
let literals = Self::extract_equality_literals(break_cond, var_name);
if !literals.is_empty() {
eprintln!(
"[promoter/pattern5] Trim pattern detected! var='{}', literals={:?}",
var_name, literals
);
// 昇格成功!
let trim_info = TrimPatternInfo {
var_name: var_name.to_string(),
comparison_literals: literals,
carrier_name: format!("is_{}_match", var_name),
};
// Phase 171-C-2: TrimPatternInfo を返す
// CarrierInfo の実際の更新は Phase 171-C-3 で実装
return PromotionResult::Promoted { trim_info };
}
}
}
} else {
eprintln!(
"[promoter/pattern5] Definition for '{}' not found in loop body",
var_name
);
}
}
// 昇格パターンに一致しない
PromotionResult::CannotPromote {
reason: "No promotable Trim pattern detected".to_string(),
vars: body_locals.iter().map(|s| s.to_string()).collect(),
}
}
/// ループ本体から変数の定義Assignmentを探す
///
/// # Phase 171-C-2: 実装済み
///
/// iterative worklist で AST を探索し、`local var = ...` または
/// `var = ...` の代入を見つける。
///
/// # Arguments
///
/// * `body` - ループ本体の AST ノード列
/// * `var_name` - 探す変数名
///
/// # Returns
///
/// 定義(代入の RHSが見つかれば Some(&ASTNode)、なければ None
fn find_definition_in_body<'a>(body: &'a [ASTNode], var_name: &str) -> Option<&'a ASTNode> {
let mut worklist: Vec<&'a ASTNode> = body.iter().collect();
while let Some(node) = worklist.pop() {
match node {
// Assignment: target = value
ASTNode::Assignment { target, value, .. } => {
// target が Variable で、名前が一致すれば定義発見
if let ASTNode::Variable { name, .. } = target.as_ref() {
if name == var_name {
return Some(value.as_ref());
}
}
}
// If: then_body と else_body を探索
ASTNode::If { then_body, else_body, .. } => {
for stmt in then_body {
worklist.push(stmt);
}
if let Some(else_stmts) = else_body {
for stmt in else_stmts {
worklist.push(stmt);
}
}
}
// Loop: body を探索(ネストループ)
ASTNode::Loop { body: loop_body, .. } => {
for stmt in loop_body {
worklist.push(stmt);
}
}
// その他のノードは無視
_ => {}
}
}
None
}
/// RHS が substring() メソッド呼び出しかどうかを判定
///
/// # Phase 171-C-2
///
/// Trim パターンでは `local ch = s.substring(start, start+1)` のように
/// substring メソッドで1文字を取り出すパターンを使う。
fn is_substring_method_call(node: &ASTNode) -> bool {
matches!(
node,
ASTNode::MethodCall { method, .. } if method == "substring"
)
}
/// break 条件から、指定変数との等価比較リテラルを抽出
///
/// # Phase 171-C-2
///
/// `ch == " " || ch == "\t" || ch == "\n"` のような条件から
/// `[" ", "\t", "\n"]` を抽出する。
///
/// # Arguments
///
/// * `cond` - break 条件の AST
/// * `var_name` - 比較対象の変数名
///
/// # Returns
///
/// 等価比較で使われている文字列リテラルのリスト
fn extract_equality_literals(cond: &ASTNode, var_name: &str) -> Vec<String> {
let mut result = Vec::new();
let mut worklist = vec![cond];
while let Some(node) = worklist.pop() {
match node {
// BinaryOp: Or で分岐、Eq で比較
ASTNode::BinaryOp { operator, left, right, .. } => {
match operator {
// Or: 両側を探索
BinaryOperator::Or => {
worklist.push(left.as_ref());
worklist.push(right.as_ref());
}
// Equal: var == literal パターンを検出
BinaryOperator::Equal => {
// left が Variable で var_name に一致
if let ASTNode::Variable { name, .. } = left.as_ref() {
if name == var_name {
// right が String リテラル
if let ASTNode::Literal { value: LiteralValue::String(s), .. } = right.as_ref() {
result.push(s.clone());
}
}
}
// right が Variable で var_name に一致(逆順)
if let ASTNode::Variable { name, .. } = right.as_ref() {
if name == var_name {
if let ASTNode::Literal { value: LiteralValue::String(s), .. } = left.as_ref() {
result.push(s.clone());
}
}
}
}
_ => {}
}
}
// UnaryOp: Not の内側を探索
ASTNode::UnaryOp { operand, .. } => {
worklist.push(operand.as_ref());
}
_ => {}
}
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::Span;
use crate::mir::BasicBlockId;
use crate::mir::loop_pattern_detection::loop_condition_scope::{
CondVarScope, LoopConditionScope,
};
use std::collections::{BTreeMap, BTreeSet};
fn minimal_scope() -> LoopScopeShape {
LoopScopeShape {
header: BasicBlockId(0),
body: BasicBlockId(1),
latch: BasicBlockId(2),
exit: BasicBlockId(3),
pinned: BTreeSet::new(),
carriers: BTreeSet::new(),
body_locals: BTreeSet::new(),
exit_live: BTreeSet::new(),
progress_carrier: None,
variable_definitions: BTreeMap::new(),
}
}
fn cond_scope_with_body_local(var_name: &str) -> LoopConditionScope {
let mut scope = LoopConditionScope::new();
scope.add_var(var_name.to_string(), CondVarScope::LoopBodyLocal);
scope
}
// Helper: Create a Variable node
fn var_node(name: &str) -> ASTNode {
ASTNode::Variable {
name: name.to_string(),
span: Span::unknown(),
}
}
// Helper: Create a String literal node
fn str_literal(s: &str) -> ASTNode {
ASTNode::Literal {
value: LiteralValue::String(s.to_string()),
span: Span::unknown(),
}
}
// Helper: Create an equality comparison (var == literal)
fn eq_cmp(var_name: &str, literal: &str) -> ASTNode {
ASTNode::BinaryOp {
operator: BinaryOperator::Equal,
left: Box::new(var_node(var_name)),
right: Box::new(str_literal(literal)),
span: Span::unknown(),
}
}
// Helper: Create an Or expression
fn or_expr(left: ASTNode, right: ASTNode) -> ASTNode {
ASTNode::BinaryOp {
operator: BinaryOperator::Or,
left: Box::new(left),
right: Box::new(right),
span: Span::unknown(),
}
}
// Helper: Create a MethodCall node
fn method_call(object: &str, method: &str) -> ASTNode {
ASTNode::MethodCall {
object: Box::new(var_node(object)),
method: method.to_string(),
arguments: vec![],
span: Span::unknown(),
}
}
// Helper: Create an Assignment node
fn assignment(target: &str, value: ASTNode) -> ASTNode {
ASTNode::Assignment {
target: Box::new(var_node(target)),
value: Box::new(value),
span: Span::unknown(),
}
}
#[test]
fn test_promoter_no_body_locals() {
let scope = minimal_scope();
let cond_scope = LoopConditionScope::new(); // Empty, no LoopBodyLocal
let request = PromotionRequest {
scope: &scope,
cond_scope: &cond_scope,
break_cond: None,
loop_body: &[],
};
let result = LoopBodyCarrierPromoter::try_promote(&request);
match result {
PromotionResult::CannotPromote { reason, vars } => {
assert!(vars.is_empty());
assert!(reason.contains("No LoopBodyLocal"));
}
_ => panic!("Expected CannotPromote when no LoopBodyLocal variables"),
}
}
#[test]
fn test_promoter_body_local_no_definition() {
// LoopBodyLocal があるが、定義が見つからない場合
let scope = minimal_scope();
let cond_scope = cond_scope_with_body_local("ch");
let request = PromotionRequest {
scope: &scope,
cond_scope: &cond_scope,
break_cond: None,
loop_body: &[], // Empty body - no definition
};
let result = LoopBodyCarrierPromoter::try_promote(&request);
match result {
PromotionResult::CannotPromote { reason, vars } => {
assert!(vars.contains(&"ch".to_string()));
assert!(reason.contains("No promotable Trim pattern"));
}
_ => panic!("Expected CannotPromote when definition not found"),
}
}
// ========================================================================
// Phase 171-C-2: Trim Pattern Detection Tests
// ========================================================================
#[test]
fn test_find_definition_in_body_simple() {
// Test: local ch = s.substring(...)
let body = vec![
assignment("ch", method_call("s", "substring")),
];
let result = LoopBodyCarrierPromoter::find_definition_in_body(&body, "ch");
assert!(result.is_some(), "Definition should be found");
match result.unwrap() {
ASTNode::MethodCall { method, .. } => {
assert_eq!(method, "substring");
}
_ => panic!("Expected MethodCall"),
}
}
#[test]
fn test_find_definition_in_body_nested_if() {
// Test: Definition inside if-else block
let body = vec![
ASTNode::If {
condition: Box::new(var_node("flag")),
then_body: vec![
assignment("ch", method_call("s", "substring")),
],
else_body: None,
span: Span::unknown(),
},
];
let result = LoopBodyCarrierPromoter::find_definition_in_body(&body, "ch");
assert!(result.is_some(), "Definition should be found inside if block");
}
#[test]
fn test_is_substring_method_call() {
let substring_call = method_call("s", "substring");
let other_call = method_call("s", "length");
assert!(LoopBodyCarrierPromoter::is_substring_method_call(&substring_call));
assert!(!LoopBodyCarrierPromoter::is_substring_method_call(&other_call));
}
#[test]
fn test_extract_equality_literals_single() {
// Test: ch == " "
let cond = eq_cmp("ch", " ");
let result = LoopBodyCarrierPromoter::extract_equality_literals(&cond, "ch");
assert_eq!(result.len(), 1);
assert!(result.contains(&" ".to_string()));
}
#[test]
fn test_extract_equality_literals_or_chain() {
// Test: ch == " " || ch == "\t" || ch == "\n"
let cond = or_expr(
or_expr(
eq_cmp("ch", " "),
eq_cmp("ch", "\t"),
),
eq_cmp("ch", "\n"),
);
let result = LoopBodyCarrierPromoter::extract_equality_literals(&cond, "ch");
assert_eq!(result.len(), 3);
assert!(result.contains(&" ".to_string()));
assert!(result.contains(&"\t".to_string()));
assert!(result.contains(&"\n".to_string()));
}
#[test]
fn test_extract_equality_literals_wrong_var() {
// Test: other_var == " " (should not match for "ch")
let cond = eq_cmp("other_var", " ");
let result = LoopBodyCarrierPromoter::extract_equality_literals(&cond, "ch");
assert!(result.is_empty(), "Should not extract literals for wrong variable");
}
#[test]
fn test_trim_pattern_full_detection() {
// Full Trim pattern test:
// - LoopBodyLocal: ch
// - Definition: ch = s.substring(...)
// - Break condition: ch == " " || ch == "\t"
let scope = minimal_scope();
let cond_scope = cond_scope_with_body_local("ch");
let loop_body = vec![
assignment("ch", method_call("s", "substring")),
];
let break_cond = or_expr(
eq_cmp("ch", " "),
eq_cmp("ch", "\t"),
);
let request = PromotionRequest {
scope: &scope,
cond_scope: &cond_scope,
break_cond: Some(&break_cond),
loop_body: &loop_body,
};
let result = LoopBodyCarrierPromoter::try_promote(&request);
match result {
PromotionResult::Promoted { trim_info } => {
assert_eq!(trim_info.var_name, "ch");
assert_eq!(trim_info.comparison_literals.len(), 2);
assert!(trim_info.comparison_literals.contains(&" ".to_string()));
assert!(trim_info.comparison_literals.contains(&"\t".to_string()));
assert_eq!(trim_info.carrier_name, "is_ch_match");
}
PromotionResult::CannotPromote { reason, .. } => {
panic!("Expected Promoted, got CannotPromote: {}", reason);
}
}
}
#[test]
fn test_trim_pattern_with_4_whitespace_chars() {
// Full whitespace pattern: " " || "\t" || "\n" || "\r"
let scope = minimal_scope();
let cond_scope = cond_scope_with_body_local("ch");
let loop_body = vec![
assignment("ch", method_call("s", "substring")),
];
let break_cond = or_expr(
or_expr(
eq_cmp("ch", " "),
eq_cmp("ch", "\t"),
),
or_expr(
eq_cmp("ch", "\n"),
eq_cmp("ch", "\r"),
),
);
let request = PromotionRequest {
scope: &scope,
cond_scope: &cond_scope,
break_cond: Some(&break_cond),
loop_body: &loop_body,
};
let result = LoopBodyCarrierPromoter::try_promote(&request);
match result {
PromotionResult::Promoted { trim_info } => {
assert_eq!(trim_info.comparison_literals.len(), 4);
}
PromotionResult::CannotPromote { reason, .. } => {
panic!("Expected Promoted, got CannotPromote: {}", reason);
}
}
}
}

View File

@ -758,3 +758,6 @@ pub mod condition_var_analyzer;
// Phase 170-ultrathink: Error Message Utilities
pub mod error_messages;
// Phase 171-C: LoopBodyLocal Carrier Promotion
pub mod loop_body_carrier_promoter;