From e4b5a8e832b28cc87b69d4bbb589a9253bcf68af Mon Sep 17 00:00:00 2001 From: tomoaki Date: Mon, 22 Dec 2025 23:35:43 +0900 Subject: [PATCH] feat(plan): Phase 273 P2 Step 3-6 - Pattern7 (SplitScan) to Plan line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate Pattern7 from legacy lowering to Plan architecture: DomainPlan (mod.rs): - Added SplitScan(SplitScanPlan) variant - SplitScanPlan: s_var, sep_var, result_var, i_var, start_var Extractor (pattern7_split_scan.rs): - extract_split_scan_plan() returning DomainPlan - Reuses existing extract_split_scan_parts() Router (router.rs): - Pattern7 now uses Plan line (Normalize→Verify→Lower) - Removed from LOOP_PATTERNS table Normalizer (normalizer.rs): - normalize_split_scan() - 400+ lines migrated from impl - 6 blocks: preheader/header/body/then/else/step/after - 4 PHIs: header(2) + step(2) for i/start carriers - Side effect: push with EffectMask::MUT Bug fixes: - Pattern6 extractor returns Ok(None) for non-match (allows fallback) - Reverse scan filtered early in extractor (P1 scope) Tests: - phase256_p0_split_vm: PASS (exit=3) - phase258_p0_index_of_string_vm: PASS (exit=6) Lowerer no longer contains "split" - pattern-agnostic achieved! 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../patterns/pattern6_scan_with_init.rs | 19 +- .../joinir/patterns/pattern7_split_scan.rs | 26 ++ .../control_flow/joinir/patterns/router.rs | 38 +- src/mir/builder/control_flow/plan/mod.rs | 21 +- .../builder/control_flow/plan/normalizer.rs | 420 +++++++++++++++++- 5 files changed, 515 insertions(+), 9 deletions(-) diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs b/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs index 4fced8fa..7ad435e2 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern6_scan_with_init.rs @@ -85,6 +85,14 @@ pub(crate) fn extract_scan_with_init_plan( // Call internal extraction helper let parts = extract_scan_with_init_parts(condition, body, fn_body)?; + // Phase 273 P1: Filter out patterns not supported by Plan-based normalizer + if let Some(ref p) = parts { + // P1 scope: Only forward scan (step=1) supported + if p.step_lit != 1 { + return Ok(None); // Let legacy path handle reverse scans + } + } + // Wrap in DomainPlan if extracted successfully Ok(parts.map(|p| { DomainPlan::ScanWithInit(ScanWithInitPlan { @@ -416,8 +424,15 @@ fn extract_scan_with_init_parts( } } - let needle = needle_opt.ok_or_else(|| "No matching needle pattern found")?; - let early_return_expr = early_return_expr_opt.ok_or_else(|| "No early return found")?; + // Phase 273 P2: Return Ok(None) if pattern doesn't match (allow Pattern7 to try) + let needle = match needle_opt { + Some(n) => n, + None => return Ok(None), // Not Pattern6, try next pattern + }; + let early_return_expr = match early_return_expr_opt { + Some(e) => e, + None => return Ok(None), // Not Pattern6, try next pattern + }; // Phase 257 P0: Determine haystack based on scan direction let haystack = match scan_direction { diff --git a/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs b/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs index 9040ed26..451a0b43 100644 --- a/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs +++ b/src/mir/builder/control_flow/joinir/patterns/pattern7_split_scan.rs @@ -54,6 +54,32 @@ struct SplitScanParts { post_push_ast: Option, // result.push(s.substring(start, s.length())) } +/// Phase 273 P2: Extract SplitScanPlan from AST (pure) +/// +/// Returns DomainPlan if pattern matches, Ok(None) if not. +pub(crate) fn extract_split_scan_plan( + condition: &ASTNode, + body: &[ASTNode], + _post_loop_code: &[ASTNode], +) -> Result, String> { + use crate::mir::builder::control_flow::plan::{DomainPlan, SplitScanPlan}; + + // Try to extract using existing implementation + match extract_split_scan_parts(condition, body, &[]) { + Ok(parts) => { + let plan = SplitScanPlan { + s_var: parts.s_var, + sep_var: parts.sep_var, + result_var: parts.result_var, + i_var: parts.i_var, + start_var: parts.start_var, + }; + Ok(Some(DomainPlan::SplitScan(plan))) + } + Err(_) => Ok(None), // Pattern doesn't match + } +} + /// Phase 256 P0: Extract SplitScanParts from AST /// /// **P0 Strategy**: Fixed-form parser (Fail-Fast on mismatch) diff --git a/src/mir/builder/control_flow/joinir/patterns/router.rs b/src/mir/builder/control_flow/joinir/patterns/router.rs index 60880539..8322e35a 100644 --- a/src/mir/builder/control_flow/joinir/patterns/router.rs +++ b/src/mir/builder/control_flow/joinir/patterns/router.rs @@ -236,11 +236,8 @@ pub(crate) static LOOP_PATTERNS: &[LoopPatternEntry] = &[ }, // Phase 273 P0.1: Pattern6 entry removed (migrated to Plan-based routing) // Pattern6_ScanWithInit now handled via extract_scan_with_init_plan() + PlanLowerer - LoopPatternEntry { - name: "Pattern7_SplitScan", // Phase 256 P0: split/tokenization with variable step (before P3) - detect: super::pattern7_split_scan::can_lower, - lower: super::pattern7_split_scan::lower, - }, + // Phase 273 P2: Pattern7 entry removed (migrated to Plan-based routing) + // Pattern7_SplitScan now handled via extract_split_scan_plan() + PlanLowerer LoopPatternEntry { name: "Pattern8_BoolPredicateScan", // Phase 259 P0: boolean predicate scan (is_integer/is_valid) detect: super::pattern8_scan_bool_predicate::can_lower, @@ -325,6 +322,37 @@ pub(crate) fn route_loop_pattern( } } + // Phase 273 P2: Try Plan-based Pattern7 (SplitScan) + // Flow: Extract → Normalize → Verify → Lower + match super::pattern7_split_scan::extract_split_scan_plan( + ctx.condition, + ctx.body, + &[], + )? { + Some(domain_plan) => { + // DomainPlan extracted successfully + trace::trace().pattern("route", "Pattern7_SplitScan (DomainPlan)", true); + + // Step 1: Normalize DomainPlan → CorePlan + let core_plan = PlanNormalizer::normalize(builder, domain_plan, ctx)?; + + // Step 2: Verify CorePlan invariants (fail-fast) + PlanVerifier::verify(&core_plan)?; + + // Step 3: Lower CorePlan → MIR + return PlanLowerer::lower(builder, core_plan, ctx); + } + None => { + // Not Pattern7 - continue to other patterns + if ctx.debug { + trace::trace().debug( + "route", + "Pattern7 Plan extraction returned None, trying other patterns", + ); + } + } + } + // Phase 183: Route based on pre-classified pattern kind // Pattern kind was already determined by ctx.pattern_kind in LoopPatternContext::new() // This eliminates duplicate detection logic across routers. diff --git a/src/mir/builder/control_flow/plan/mod.rs b/src/mir/builder/control_flow/plan/mod.rs index e5da60a0..11c2f781 100644 --- a/src/mir/builder/control_flow/plan/mod.rs +++ b/src/mir/builder/control_flow/plan/mod.rs @@ -43,7 +43,9 @@ pub(in crate::mir::builder) mod verifier; pub(in crate::mir::builder) enum DomainPlan { /// Pattern6: index_of / find scan ScanWithInit(ScanWithInitPlan), - // P2+: Split(SplitPlan), BoolPredicate(BoolPredicatePlan), etc. + /// Pattern7: split / tokenization scan + SplitScan(SplitScanPlan), + // P2+: BoolPredicate(BoolPredicatePlan), etc. } /// Phase 273 P0: Scan direction for forward/reverse scan @@ -79,6 +81,23 @@ pub(in crate::mir::builder) struct ScanWithInitPlan { pub dynamic_needle: bool, } +/// Phase 273 P2: Extracted structure for split-scan pattern +/// +/// This structure contains all the information needed to lower a split-style loop. +#[derive(Debug, Clone)] +pub(in crate::mir::builder) struct SplitScanPlan { + /// Haystack variable name (e.g., "s") + pub s_var: String, + /// Separator variable name (e.g., "separator") + pub sep_var: String, + /// Accumulator variable name (e.g., "result", ArrayBox) + pub result_var: String, + /// Loop index variable name (e.g., "i") + pub i_var: String, + /// Segment start position variable name (e.g., "start") + pub start_var: String, +} + // ============================================================================ // CorePlan (固定語彙 - 構造ノードのみ) // ============================================================================ diff --git a/src/mir/builder/control_flow/plan/normalizer.rs b/src/mir/builder/control_flow/plan/normalizer.rs index 3d1847e0..0cad79bc 100644 --- a/src/mir/builder/control_flow/plan/normalizer.rs +++ b/src/mir/builder/control_flow/plan/normalizer.rs @@ -12,7 +12,8 @@ //! Lowerer processes CorePlan without any pattern knowledge. use super::{ - CoreCarrierInfo, CoreEffectPlan, CoreLoopPlan, CorePhiInfo, CorePlan, DomainPlan, ScanWithInitPlan, + CoreCarrierInfo, CoreEffectPlan, CoreLoopPlan, CorePhiInfo, CorePlan, DomainPlan, + ScanWithInitPlan, SplitScanPlan, }; use crate::mir::builder::control_flow::joinir::patterns::router::LoopPatternContext; use crate::mir::builder::MirBuilder; @@ -36,6 +37,7 @@ impl PlanNormalizer { ) -> Result { match domain { DomainPlan::ScanWithInit(parts) => Self::normalize_scan_with_init(builder, parts, ctx), + DomainPlan::SplitScan(parts) => Self::normalize_split_scan(builder, parts, ctx), } } @@ -370,4 +372,420 @@ impl PlanNormalizer { Ok(CorePlan::Loop(loop_plan)) } + + /// SplitScan → CorePlan 変換 + /// + /// Expands split-specific semantics into generic CorePlan: + /// - 2 carriers: i (loop index), start (segment start) + /// - 6 blocks: preheader, header, body, then, else, step, after + /// - 4 PHI nodes: header (i_current, start_current) + step (i_next, start_next) + /// - Side effect: result.push(segment) in then_bb + fn normalize_split_scan( + builder: &mut MirBuilder, + parts: SplitScanPlan, + ctx: &LoopPatternContext, + ) -> Result { + use crate::mir::builder::control_flow::joinir::trace; + + let trace_logger = trace::trace(); + let debug = ctx.debug; + + if debug { + trace_logger.debug( + "normalizer/split_scan", + &format!( + "Phase 273 P2: Normalizing SplitScan for {}", + ctx.func_name + ), + ); + } + + // Step 1: Get host ValueIds for variables + let s_host = builder + .variable_ctx + .variable_map + .get(&parts.s_var) + .copied() + .ok_or_else(|| format!("[normalizer] Variable {} not found", parts.s_var))?; + + let sep_host = builder + .variable_ctx + .variable_map + .get(&parts.sep_var) + .copied() + .ok_or_else(|| format!("[normalizer] Variable {} not found", parts.sep_var))?; + + let result_host = builder + .variable_ctx + .variable_map + .get(&parts.result_var) + .copied() + .ok_or_else(|| format!("[normalizer] Variable {} not found", parts.result_var))?; + + let i_init_val = builder + .variable_ctx + .variable_map + .get(&parts.i_var) + .copied() + .ok_or_else(|| format!("[normalizer] Variable {} not found", parts.i_var))?; + + let start_init_val = builder + .variable_ctx + .variable_map + .get(&parts.start_var) + .copied() + .ok_or_else(|| format!("[normalizer] Variable {} not found", parts.start_var))?; + + // Step 2: Capture preheader block + let preheader_bb = builder + .current_block + .ok_or_else(|| "[normalizer] No current block for loop entry".to_string())?; + + // Step 3: Allocate BasicBlockIds for 6 blocks + let header_bb = builder.next_block_id(); + let body_bb = builder.next_block_id(); + let then_bb = builder.next_block_id(); + let else_bb = builder.next_block_id(); + let step_bb = builder.next_block_id(); + let after_bb = builder.next_block_id(); + + // Step 4: Allocate ValueIds for PHI destinations (before blocks) + let i_current = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(i_current, MirType::Integer); + + let start_current = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(start_current, MirType::Integer); + + let i_next = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(i_next, MirType::Integer); + + let start_next = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(start_next, MirType::Integer); + + // Step 5: Allocate ValueIds for expressions + let sep_len = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(sep_len, MirType::Integer); + + let s_len = builder.next_value_id(); + builder.type_ctx.value_types.insert(s_len, MirType::Integer); + + let limit = builder.next_value_id(); + builder.type_ctx.value_types.insert(limit, MirType::Integer); + + let cond_loop = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(cond_loop, MirType::Bool); + + let i_plus_sep = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(i_plus_sep, MirType::Integer); + + let chunk = builder.next_value_id(); + builder.type_ctx.value_types.insert(chunk, MirType::String); + + let cond_match = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(cond_match, MirType::Bool); + + let segment = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(segment, MirType::String); + + let start_next_then = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(start_next_then, MirType::Integer); + + let one = builder.next_value_id(); + builder.type_ctx.value_types.insert(one, MirType::Integer); + + let i_next_else = builder.next_value_id(); + builder + .type_ctx + .value_types + .insert(i_next_else, MirType::Integer); + + if debug { + trace_logger.debug( + "normalizer/split_scan", + &format!( + "Allocated: preheader={:?}, header={:?}, body={:?}, then={:?}, else={:?}, step={:?}, after={:?}", + preheader_bb, header_bb, body_bb, then_bb, else_bb, step_bb, after_bb + ), + ); + } + + // Step 6: Build header_effects + let header_effects = vec![ + // sep_len = sep.length() + CoreEffectPlan::MethodCall { + dst: Some(sep_len), + object: sep_host, + method: "length".to_string(), + args: vec![], + effects: EffectMask::PURE.add(Effect::Io), + }, + // s_len = s.length() + CoreEffectPlan::MethodCall { + dst: Some(s_len), + object: s_host, + method: "length".to_string(), + args: vec![], + effects: EffectMask::PURE.add(Effect::Io), + }, + // limit = s_len - sep_len + CoreEffectPlan::BinOp { + dst: limit, + lhs: s_len, + op: BinaryOp::Sub, + rhs: sep_len, + }, + // cond_loop = i <= limit + CoreEffectPlan::Compare { + dst: cond_loop, + lhs: i_current, + op: CompareOp::Le, + rhs: limit, + }, + ]; + + // Step 7: Build body effects and plans + let body = vec![ + // i_plus_sep = i + sep_len + CorePlan::Effect(CoreEffectPlan::BinOp { + dst: i_plus_sep, + lhs: i_current, + op: BinaryOp::Add, + rhs: sep_len, + }), + // chunk = s.substring(i, i_plus_sep) + CorePlan::Effect(CoreEffectPlan::MethodCall { + dst: Some(chunk), + object: s_host, + method: "substring".to_string(), + args: vec![i_current, i_plus_sep], + effects: EffectMask::PURE.add(Effect::Io), + }), + // cond_match = chunk == sep + CorePlan::Effect(CoreEffectPlan::Compare { + dst: cond_match, + lhs: chunk, + op: CompareOp::Eq, + rhs: sep_host, + }), + ]; + + // Step 8: Build then_effects (push + updates) + let then_effects = vec![ + // segment = s.substring(start, i) + CoreEffectPlan::MethodCall { + dst: Some(segment), + object: s_host, + method: "substring".to_string(), + args: vec![start_current, i_current], + effects: EffectMask::PURE.add(Effect::Io), + }, + // result.push(segment) - Side effect! + CoreEffectPlan::MethodCall { + dst: None, // push returns Void + object: result_host, + method: "push".to_string(), + args: vec![segment], + effects: EffectMask::MUT, + }, + // start_next_then = i + sep_len + CoreEffectPlan::BinOp { + dst: start_next_then, + lhs: i_current, + op: BinaryOp::Add, + rhs: sep_len, + }, + ]; + + // Step 9: Build else_effects (increment i) + let else_effects = vec![ + // one = const 1 + CoreEffectPlan::Const { + dst: one, + value: ConstValue::Integer(1), + }, + // i_next_else = i + 1 + CoreEffectPlan::BinOp { + dst: i_next_else, + lhs: i_current, + op: BinaryOp::Add, + rhs: one, + }, + ]; + + // Step 10: Build block_effects (SSOT ordering: preheader, header, body, then, else, step) + let block_effects = vec![ + (preheader_bb, vec![]), // No effects in preheader + (header_bb, header_effects.clone()), + (body_bb, vec![]), // Body effects are in body CorePlan + (then_bb, then_effects), + (else_bb, else_effects), + (step_bb, vec![]), // No effects in step + ]; + + // Step 11: Build phis (4 PHIs: 2 in header + 2 in step) + let phis = vec![ + // Header PHI 1: i_current + CorePhiInfo { + block: header_bb, + dst: i_current, + inputs: vec![ + (preheader_bb, i_init_val), + (step_bb, i_next), + ], + tag: format!("loop_carrier_i_{}", parts.i_var), + }, + // Header PHI 2: start_current + CorePhiInfo { + block: header_bb, + dst: start_current, + inputs: vec![ + (preheader_bb, start_init_val), + (step_bb, start_next), + ], + tag: format!("loop_carrier_start_{}", parts.start_var), + }, + // Step PHI 1: i_next + CorePhiInfo { + block: step_bb, + dst: i_next, + inputs: vec![ + (then_bb, start_next_then), // i = start (from then) + (else_bb, i_next_else), // i = i + 1 (from else) + ], + tag: format!("step_phi_i_{}", parts.i_var), + }, + // Step PHI 2: start_next + CorePhiInfo { + block: step_bb, + dst: start_next, + inputs: vec![ + (then_bb, start_next_then), // start updated + (else_bb, start_current), // start unchanged + ], + tag: format!("step_phi_start_{}", parts.start_var), + }, + ]; + + // Step 12: Build Frag (2 branches + 3 wires) + let empty_args = EdgeArgs { + layout: JumpArgsLayout::CarriersOnly, + values: vec![], + }; + + let branches = vec![ + // header -> body/after + BranchStub { + from: header_bb, + cond: cond_loop, + then_target: body_bb, + then_args: empty_args.clone(), + else_target: after_bb, + else_args: empty_args.clone(), + }, + // body -> then/else + BranchStub { + from: body_bb, + cond: cond_match, + then_target: then_bb, + then_args: empty_args.clone(), + else_target: else_bb, + else_args: empty_args.clone(), + }, + ]; + + let wires = vec![ + // then -> step + EdgeStub { + from: then_bb, + kind: ExitKind::Normal, + target: Some(step_bb), + args: empty_args.clone(), + }, + // else -> step + EdgeStub { + from: else_bb, + kind: ExitKind::Normal, + target: Some(step_bb), + args: empty_args.clone(), + }, + // step -> header (back-edge) + EdgeStub { + from: step_bb, + kind: ExitKind::Normal, + target: Some(header_bb), + args: empty_args, + }, + ]; + + let mut frag = Frag::new(header_bb); + frag.branches = branches; + frag.wires = wires; + + // Step 13: Build final_values (i, start for post-loop) + let final_values = vec![ + (parts.i_var.clone(), i_current), + (parts.start_var.clone(), start_current), + ]; + + // Step 14: Build CoreLoopPlan (generalized fields only) + let loop_plan = CoreLoopPlan { + preheader_bb, + header_bb, + body_bb, + step_bb, + after_bb, + found_bb: after_bb, // No early exit for split pattern + header_effects, + body, + step_effects: vec![], // No step_effects (done in then/else) + carriers: vec![], // Legacy field (not used with generalized) + cond_loop, + cond_match, + loop_var_name: parts.i_var, + // Phase 273 P2: Generalized fields populated + block_effects: Some(block_effects), + phis: Some(phis), + frag: Some(frag), + final_values: Some(final_values), + }; + + if debug { + trace_logger.debug( + "normalizer/split_scan", + "CorePlan construction complete (6 blocks, 4 PHIs)", + ); + } + + Ok(CorePlan::Loop(loop_plan)) + } }