#![cfg(feature = "normalized_dev")] use crate::config::env::joinir_dev_enabled; use crate::mir::join_ir::normalized::dev_env; use crate::mir::join_ir::{JoinFuncId, JoinFunction, JoinInst, JoinModule}; /// Phase 44: Shape capability kinds (capability-based routing) #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ShapeCapabilityKind { /// P2 Core: Simple mini patterns (i/acc/n etc) P2CoreSimple, /// P2 Core: skip_whitespace mini/real P2CoreSkipWs, /// P2 Core: _atoi mini/real P2CoreAtoi, /// P2 Mid: _parse_number real (p + num_str + result) P2MidParseNumber, /// P3 If-Sum family (minimal/multi/json) P3IfSum, /// P4 Continue (skip whitespace) family P4ContinueSkipWs, /// Selfhost P2 core (token scan) SelfhostP2Core, /// Selfhost P3 if-sum family SelfhostP3IfSum, // Future: Other P2 patterns // P2MidAtOfLoop, // P2HeavyString, } /// Phase 44: Shape capability descriptor #[derive(Debug, Clone)] pub struct ShapeCapability { pub kind: ShapeCapabilityKind, // Future extensibility fields (not all used yet): // pub pattern_kind: LoopPatternKind, // pub loop_param_count: usize, // pub carrier_roles: Vec, // pub method_calls: Vec, } impl ShapeCapability { pub fn new(kind: ShapeCapabilityKind) -> Self { Self { kind } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum NormalizedDevShape { Pattern1Mini, Pattern2Mini, JsonparserSkipWsMini, JsonparserSkipWsReal, JsonparserAtoiMini, JsonparserAtoiReal, JsonparserParseNumberReal, // Phase 47-A: Pattern3 (if-sum) minimal Pattern3IfSumMinimal, // Phase 47-B: Pattern3 extended (multi/json) Pattern3IfSumMulti, Pattern3IfSumJson, // Phase 48-A: Pattern4 (continue) minimal Pattern4ContinueMinimal, // Phase 48-B: Pattern4 (continue) JsonParser skip_ws (array/object) JsonparserParseArrayContinueSkipWs, JsonparserParseObjectContinueSkipWs, // Phase 50: selfhost P2/P3 dev shapes SelfhostTokenScanP2, SelfhostIfSumP3, // Phase 51: selfhost P2/P3 dev extensions SelfhostTokenScanP2Accum, SelfhostIfSumP3Ext, // Phase 53: selfhost P2/P3 practical variations SelfhostArgsParseP2, SelfhostStmtCountP3, // Phase 54: selfhost P2/P3 shape growth (structural axis expansion) SelfhostVerifySchemaP2, SelfhostDetectFormatP3, // Phase 89: Continue + Early Return pattern (dev-only) PatternContinueReturnMinimal, } type Detector = fn(&JoinModule) -> bool; const SHAPE_DETECTORS: &[(NormalizedDevShape, Detector)] = &[ (NormalizedDevShape::Pattern1Mini, detectors::is_pattern1_mini), (NormalizedDevShape::Pattern2Mini, detectors::is_pattern2_mini), ( NormalizedDevShape::JsonparserSkipWsMini, detectors::is_jsonparser_skip_ws_mini, ), ( NormalizedDevShape::JsonparserSkipWsReal, detectors::is_jsonparser_skip_ws_real, ), ( NormalizedDevShape::JsonparserAtoiMini, detectors::is_jsonparser_atoi_mini, ), ( NormalizedDevShape::JsonparserAtoiReal, detectors::is_jsonparser_atoi_real, ), ( NormalizedDevShape::JsonparserParseNumberReal, detectors::is_jsonparser_parse_number_real, ), ( NormalizedDevShape::SelfhostTokenScanP2, detectors::is_selfhost_token_scan_p2, ), ( NormalizedDevShape::SelfhostTokenScanP2Accum, detectors::is_selfhost_token_scan_p2_accum, ), // Phase 47-A: Pattern3 if-sum minimal ( NormalizedDevShape::Pattern3IfSumMinimal, detectors::is_pattern3_if_sum_minimal, ), ( NormalizedDevShape::Pattern3IfSumMulti, detectors::is_pattern3_if_sum_multi, ), ( NormalizedDevShape::Pattern3IfSumJson, detectors::is_pattern3_if_sum_json, ), // Phase 48-A: Pattern4 continue minimal ( NormalizedDevShape::Pattern4ContinueMinimal, detectors::is_pattern4_continue_minimal, ), ( NormalizedDevShape::JsonparserParseArrayContinueSkipWs, detectors::is_jsonparser_parse_array_continue_skip_ws, ), ( NormalizedDevShape::JsonparserParseObjectContinueSkipWs, detectors::is_jsonparser_parse_object_continue_skip_ws, ), ( NormalizedDevShape::SelfhostIfSumP3, detectors::is_selfhost_if_sum_p3, ), ( NormalizedDevShape::SelfhostIfSumP3Ext, detectors::is_selfhost_if_sum_p3_ext, ), // Phase 53: selfhost P2/P3 practical variations ( NormalizedDevShape::SelfhostArgsParseP2, detectors::is_selfhost_args_parse_p2, ), ( NormalizedDevShape::SelfhostStmtCountP3, detectors::is_selfhost_stmt_count_p3, ), // Phase 54: selfhost P2/P3 shape growth ( NormalizedDevShape::SelfhostVerifySchemaP2, detectors::is_selfhost_verify_schema_p2, ), ( NormalizedDevShape::SelfhostDetectFormatP3, detectors::is_selfhost_detect_format_p3, ), // Phase 89: Continue + Early Return pattern ( NormalizedDevShape::PatternContinueReturnMinimal, detectors::is_pattern_continue_return_minimal, ), ]; /// direct ブリッジで扱う shape(dev 限定)。 pub(crate) fn direct_shapes(module: &JoinModule) -> Vec { let shapes = detect_shapes(module); log_shapes("direct", &shapes); shapes } /// Structured→Normalized の対象 shape(dev 限定)。 pub(crate) fn supported_shapes(module: &JoinModule) -> Vec { let shapes = detect_shapes(module); log_shapes("roundtrip", &shapes); shapes } /// Phase 44: Map NormalizedDevShape to ShapeCapability pub fn capability_for_shape(shape: &NormalizedDevShape) -> ShapeCapability { use NormalizedDevShape::*; use ShapeCapabilityKind::*; let kind = match shape { Pattern2Mini => P2CoreSimple, JsonparserSkipWsMini | JsonparserSkipWsReal => P2CoreSkipWs, JsonparserAtoiMini | JsonparserAtoiReal => P2CoreAtoi, JsonparserParseNumberReal => P2MidParseNumber, Pattern1Mini => P2CoreSimple, // Also core simple pattern // Phase 47-B: P3 if-sum family Pattern3IfSumMinimal | Pattern3IfSumMulti | Pattern3IfSumJson => P3IfSum, // Phase 48-A/B: P4 continue family Pattern4ContinueMinimal | JsonparserParseArrayContinueSkipWs | JsonparserParseObjectContinueSkipWs => P4ContinueSkipWs, // Phase 50: selfhost P2/P3 dev shapes SelfhostTokenScanP2 | SelfhostTokenScanP2Accum => SelfhostP2Core, SelfhostIfSumP3 | SelfhostIfSumP3Ext => SelfhostP3IfSum, // Phase 53: selfhost P2/P3 practical variations SelfhostArgsParseP2 => SelfhostP2Core, SelfhostStmtCountP3 => SelfhostP3IfSum, // Phase 54: selfhost P2/P3 shape growth SelfhostVerifySchemaP2 => SelfhostP2Core, SelfhostDetectFormatP3 => SelfhostP3IfSum, // Phase 89: Continue + Early Return pattern (dev-only, maps to P4 family) PatternContinueReturnMinimal => P4ContinueSkipWs, }; ShapeCapability::new(kind) } /// Phase 46+: Canonical shapes that ALWAYS use Normalized→MIR(direct) /// regardless of feature flags or mode. /// /// Canonical set (Phase 48-C): /// - P2-Core: Pattern2Mini, JsonparserSkipWsMini, JsonparserSkipWsReal, JsonparserAtoiMini /// - P2-Mid: JsonparserAtoiReal, JsonparserParseNumberReal /// - P3: Pattern3 If-sum minimal/multi/json /// - P4: Pattern4 continue minimal + JsonParser skip_ws (array/object) pub fn is_canonical_shape(shape: &NormalizedDevShape) -> bool { use NormalizedDevShape::*; matches!( shape, Pattern2Mini | JsonparserSkipWsMini | JsonparserSkipWsReal | JsonparserAtoiMini // Phase 46: Add P2-Mid patterns | JsonparserAtoiReal | JsonparserParseNumberReal // Phase 47-C: P3 if-sum canonical set | Pattern3IfSumMinimal | Pattern3IfSumMulti | Pattern3IfSumJson // Phase 48-C: P4 continue canonical set | Pattern4ContinueMinimal | JsonparserParseArrayContinueSkipWs | JsonparserParseObjectContinueSkipWs ) } /// Phase 44: Check if capability kind is in P2-Core family /// /// This checks capability-level membership, not granular canonical status. /// Use `is_canonical_shape()` for exact canonical filtering. pub fn is_p2_core_capability(cap: &ShapeCapability) -> bool { use ShapeCapabilityKind::*; matches!( cap.kind, P2CoreSimple | P2CoreSkipWs | P2CoreAtoi | P2MidParseNumber | P3IfSum | P4ContinueSkipWs | SelfhostP2Core | SelfhostP3IfSum ) } /// Phase 44: Check if capability is supported by Normalized dev pub fn is_supported_by_normalized(cap: &ShapeCapability) -> bool { // Currently same as P2-Core family is_p2_core_capability(cap) } /// canonical(常時 Normalized 経路を通す)対象。 /// Phase 46: Extract canonical shapes from JoinModule. /// /// Canonical set (P2-Core + P2-Mid): /// - Pattern2Mini, skip_ws mini/real, atoi mini/real, parse_number real /// /// These shapes ALWAYS use Normalized→MIR(direct) regardless of mode. /// P3/P4 patterns are NOT canonical (future NORM-P3/NORM-P4 phases). pub(crate) fn canonical_shapes(module: &JoinModule) -> Vec { let shapes: Vec<_> = detect_shapes(module) .into_iter() .filter(|s| is_canonical_shape(s)) .collect(); log_shapes("canonical", &shapes); shapes } #[allow(dead_code)] pub(crate) fn is_direct_supported(module: &JoinModule) -> bool { !detect_shapes(module).is_empty() } pub fn detect_shapes(module: &JoinModule) -> Vec { let mut shapes: Vec<_> = SHAPE_DETECTORS .iter() .filter_map(|(shape, detector)| if detector(module) { Some(*shape) } else { None }) .collect(); // Pattern1 は「最小の後方互換」なので、より具体的な shape が見つかった場合は外しておく。 if shapes.len() > 1 { shapes.retain(|s| *s != NormalizedDevShape::Pattern1Mini); } // selfhost shapesは canonical P2/P3 の generic 判定から分離する if shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2) || shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2Accum) || shapes.contains(&NormalizedDevShape::SelfhostArgsParseP2) || shapes.contains(&NormalizedDevShape::SelfhostVerifySchemaP2) { shapes.retain(|s| { *s != NormalizedDevShape::Pattern2Mini && *s != NormalizedDevShape::Pattern4ContinueMinimal }); } if shapes.contains(&NormalizedDevShape::SelfhostIfSumP3) || shapes.contains(&NormalizedDevShape::SelfhostIfSumP3Ext) || shapes.contains(&NormalizedDevShape::SelfhostStmtCountP3) || shapes.contains(&NormalizedDevShape::SelfhostDetectFormatP3) { shapes.retain(|s| { !matches!( s, NormalizedDevShape::Pattern3IfSumMinimal | NormalizedDevShape::Pattern3IfSumMulti | NormalizedDevShape::Pattern3IfSumJson | NormalizedDevShape::Pattern4ContinueMinimal ) }); } shapes } // --- 判定ロジック(共通) --- mod detectors { use super::*; pub(super) fn is_pattern1_mini(module: &JoinModule) -> bool { module.is_structured() && find_loop_step(module).is_some() } pub(super) fn is_pattern2_mini(module: &JoinModule) -> bool { if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_func = match find_loop_step(module) { Some(f) => f, None => return false, }; if !(1..=3).contains(&loop_func.params.len()) { return false; } let has_cond_jump = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); let has_tail_call = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); has_cond_jump && has_tail_call } pub(super) fn is_jsonparser_skip_ws_mini(module: &JoinModule) -> bool { is_pattern2_mini(module) && module .functions .values() .any(|f| f.name == "jsonparser_skip_ws_mini") } pub(crate) fn is_jsonparser_skip_ws_real(module: &JoinModule) -> bool { if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_func = match find_loop_step(module) { Some(f) => f, None => return false, }; if !(2..=6).contains(&loop_func.params.len()) { return false; } let has_cond_jump = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); let has_tail_call = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); has_cond_jump && has_tail_call && module .functions .values() .any(|f| f.name == "jsonparser_skip_ws_real") } pub(crate) fn is_jsonparser_atoi_mini(module: &JoinModule) -> bool { if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_func = match find_loop_step(module) { Some(f) => f, None => return false, }; if !(3..=8).contains(&loop_func.params.len()) { return false; } let has_cond_jump = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); let has_tail_call = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); has_cond_jump && has_tail_call && module .functions .values() .any(|f| f.name == "jsonparser_atoi_mini") } pub(crate) fn is_jsonparser_atoi_real(module: &JoinModule) -> bool { if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_func = match find_loop_step(module) { Some(f) => f, None => return false, }; if !(3..=10).contains(&loop_func.params.len()) { return false; } let has_cond_jump = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); let has_tail_call = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); has_cond_jump && has_tail_call && module .functions .values() .any(|f| f.name == "jsonparser_atoi_real") } pub(crate) fn is_jsonparser_parse_number_real(module: &JoinModule) -> bool { if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_func = match find_loop_step(module) { Some(f) => f, None => return false, }; if !(3..=12).contains(&loop_func.params.len()) { return false; } let has_cond_jump = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); let has_tail_call = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); has_cond_jump && has_tail_call && module .functions .values() .any(|f| f.name == "jsonparser_parse_number_real") } fn name_guard_exact(module: &JoinModule, expected_name: &str) -> bool { module.functions.values().any(|f| f.name == expected_name) } /// Phase 52: Selfhost P2 core family structure signature (dev-only). /// /// This is intentionally narrow to avoid swallowing generic P2 shapes: /// - loop_step params: 3..=4 (i + host + 1..2 carriers) /// - P2 break-loop skeleton (cond jump + tail call) /// - no Select / BoxCall in body pub(super) fn is_selfhost_p2_core_family_candidate(module: &JoinModule) -> bool { if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_func = match find_loop_step(module) { Some(f) => f, None => return false, }; if !(3..=4).contains(&loop_func.params.len()) { return false; } let has_cond_jump = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); let has_tail_call = loop_func .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); let has_select = loop_func.body.iter().any(|inst| match inst { JoinInst::Select { .. } => true, JoinInst::Compute(mir_inst) => matches!( mir_inst, crate::mir::join_ir::MirLikeInst::Select { .. } ), _ => false, }); let has_boxcall = loop_func.body.iter().any(|inst| match inst { JoinInst::Compute(mir_inst) => matches!( mir_inst, crate::mir::join_ir::MirLikeInst::BoxCall { .. } ), _ => false, }); has_cond_jump && has_tail_call && !has_select && !has_boxcall } /// Phase 52: Selfhost P3 if-sum family structure signature (dev-only). /// /// Note: current selfhost baseline is still P2-like (normalize_pattern2_minimal), /// so the signature avoids requiring Select and focuses on the explicit break-if. /// /// Distinguish selfhost P3 from canonical P3 by requiring: /// - loop_step params == 4 (i + host + sum + count) /// - an explicit Ge compare between params (break-if) /// - P2/P3 loop skeleton (cond jump + tail call) /// - no BoxCall in body pub(super) fn is_selfhost_p3_if_sum_family_candidate(module: &JoinModule) -> bool { if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_step = match find_loop_step(module) { Some(f) => f, None => return false, }; if loop_step.params.len() != 4 { return false; } let has_cond_jump = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); let has_tail_call = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); let param_set: std::collections::BTreeSet<_> = loop_step.params.iter().copied().collect(); let has_ge_compare_between_params = loop_step.body.iter().any(|inst| match inst { JoinInst::Compute(mir_inst) => match mir_inst { crate::mir::join_ir::MirLikeInst::Compare { op, lhs, rhs, .. } => { *op == crate::mir::join_ir::CompareOp::Ge && param_set.contains(lhs) && param_set.contains(rhs) } _ => false, }, _ => false, }); let has_boxcall = loop_step.body.iter().any(|inst| match inst { JoinInst::Compute(mir_inst) => matches!( mir_inst, crate::mir::join_ir::MirLikeInst::BoxCall { .. } ), _ => false, }); has_cond_jump && has_tail_call && has_ge_compare_between_params && !has_boxcall } pub(crate) fn is_selfhost_token_scan_p2(module: &JoinModule) -> bool { is_selfhost_p2_core_family_candidate(module) && name_guard_exact(module, "selfhost_token_scan_p2") } pub(crate) fn is_selfhost_token_scan_p2_accum(module: &JoinModule) -> bool { is_selfhost_p2_core_family_candidate(module) && name_guard_exact(module, "selfhost_token_scan_p2_accum") } /// Phase 47-A: Check if module matches Pattern3 if-sum minimal shape pub(crate) fn is_pattern3_if_sum_minimal(module: &JoinModule) -> bool { // Structure-based detection (avoid name-based heuristics) // Must have exactly 3 functions: main, loop_step, k_exit if !module.is_structured() || module.functions.len() != 3 { return false; } // Find loop_step function let loop_step = match find_loop_step(module) { Some(f) => f, None => return false, }; // P3 characteristics: // - Has Compare instruction (loop condition) // - Has Select instruction (conditional carrier update: if-then-else) // - Has tail call (Call with k_next: None) let has_compare = loop_step.body.iter().any(|inst| { matches!( inst, JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { .. }) ) }); // Phase 220: Select can be either JoinInst::Select or Compute(MirLikeInst::Select) let has_select = loop_step.body.iter().any(|inst| match inst { JoinInst::Select { .. } => true, JoinInst::Compute(mir_inst) => matches!( mir_inst, crate::mir::join_ir::MirLikeInst::Select { .. } ), _ => false, }); let has_tail_call = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); // P3 minimal/multi/json: typically 2-6 params (i + carriers + len/host) let reasonable_param_count = (2..=6).contains(&loop_step.params.len()); has_compare && has_select && has_tail_call && reasonable_param_count } pub(crate) fn is_selfhost_if_sum_p3(module: &JoinModule) -> bool { is_selfhost_p3_if_sum_family_candidate(module) && name_guard_exact(module, "selfhost_if_sum_p3") } pub(crate) fn is_selfhost_if_sum_p3_ext(module: &JoinModule) -> bool { is_selfhost_p3_if_sum_family_candidate(module) && name_guard_exact(module, "selfhost_if_sum_p3_ext") } /// Phase 53: selfhost args-parse P2 detector (practical variation with string carrier) /// /// Two-stage detection: /// 1. Structural primary check (P2 break pattern, 1-3 carriers) /// 2. dev-only name guard for final confirmation (ambiguity resolver) pub(crate) fn is_selfhost_args_parse_p2(module: &JoinModule) -> bool { // 1. Structural primary check (P2 core family) if !is_selfhost_p2_core_family_candidate(module) { return false; } // 2. dev-only name guard for final confirmation name_guard_exact(module, "selfhost_args_parse_p2") } /// Phase 53: selfhost stmt-count P3 detector (practical variation with multi-branch if-else) /// /// Two-stage detection: /// 1. Structural primary check (P3 if-sum pattern, 2-10 carriers, multi-branch) /// 2. dev-only name guard for final confirmation (ambiguity resolver) pub(crate) fn is_selfhost_stmt_count_p3(module: &JoinModule) -> bool { // 1. Structural primary check if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_step = match find_loop_step(module) { Some(f) => f, None => return false, }; // Allow 2-10 carriers (5 statement counters: r/e/l/iff/lp + i) let carrier_count = loop_step.params.len(); if !(2..=10).contains(&carrier_count) { return false; } // Must have conditional jump (break pattern) let has_cond_jump = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); // Must have tail call (loop continuation) let has_tail_call = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); if !has_cond_jump || !has_tail_call { return false; } // 2. dev-only name guard for final confirmation name_guard_exact(module, "selfhost_stmt_count_p3") } /// Phase 54: Count Compare operations with specific op fn count_compare_ops(module: &JoinModule, target_op: crate::mir::join_ir::CompareOp) -> usize { module .functions .values() .flat_map(|f| &f.body) .filter(|inst| match inst { JoinInst::Compute(mir_inst) => match mir_inst { crate::mir::join_ir::MirLikeInst::Compare { op, .. } => *op == target_op, _ => false, }, _ => false, }) .count() } /// Phase 54: selfhost verify-schema P2 detector (Ne-heavy pattern, early return diversity) /// /// Two-stage detection: /// 1. Structural primary check (P2 break pattern, 2-3 carriers, Ne conditions) /// 2. dev-only name guard for final confirmation (ambiguity resolver) pub(crate) fn is_selfhost_verify_schema_p2(module: &JoinModule) -> bool { // 1. Structural primary check (P2 core family) if !is_selfhost_p2_core_family_candidate(module) { return false; } let loop_step = match find_loop_step(module) { Some(f) => f, None => return false, }; // verify_schema pattern: 2-3 carriers (ver + kind + host param) let carrier_count = loop_step.params.len(); if !(2..=3).contains(&carrier_count) { return false; } // Ne condition pattern (verify != expected) let ne_count = count_compare_ops(module, crate::mir::join_ir::CompareOp::Ne); if ne_count < 1 { return false; // Ne条件必須 } // 2. dev-only name guard for final confirmation name_guard_exact(module, "selfhost_verify_schema_p2") } /// Phase 54: selfhost detect-format P3 detector (String return branching, null check) /// /// Two-stage detection: /// 1. Structural primary check (P3 if-sum pattern, 2-4 carriers, conditional jump) /// 2. dev-only name guard for final confirmation (ambiguity resolver) pub(crate) fn is_selfhost_detect_format_p3(module: &JoinModule) -> bool { // 1. Structural primary check if !module.is_structured() || module.functions.len() != 3 { return false; } let loop_step = match find_loop_step(module) { Some(f) => f, None => return false, }; // Lightweight P3: 2-4 carriers (conditional branching 3-way + loop variable) let carrier_count = loop_step.params.len(); if !(2..=4).contains(&carrier_count) { return false; } // Conditional branching pattern (multiple if) let has_cond_jump = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); if !has_cond_jump { return false; } // 2. dev-only name guard for final confirmation name_guard_exact(module, "selfhost_detect_format_p3") } /// Phase 47-B: P3 if-sum (multi-carrier) shape detector pub(crate) fn is_pattern3_if_sum_multi(module: &JoinModule) -> bool { if !is_pattern3_if_sum_minimal(module) { return false; } module .functions .values() .any(|f| f.name == "pattern3_if_sum_multi_min") } /// Phase 47-B: P3 if-sum (JsonParser mini) shape detector pub(crate) fn is_pattern3_if_sum_json(module: &JoinModule) -> bool { if !is_pattern3_if_sum_minimal(module) { return false; } module .functions .values() .any(|f| f.name == "jsonparser_if_sum_min") } /// Phase 48-A: Check if module matches Pattern4 continue minimal shape /// /// Phase 89: Tightened to prevent continue + early return misdetection: /// - Requires at least one Select instruction (continue's core) /// - Requires exactly one conditional Jump to k_exit (loop break, not early return) pub(crate) fn is_pattern4_continue_minimal(module: &JoinModule) -> bool { // Structure-based detection (avoid name-based heuristics) // Must have exactly 3 functions: main, loop_step, k_exit if !module.is_structured() || module.functions.len() != 3 { return false; } // Find loop_step function let loop_step = match find_loop_step(module) { Some(f) => f, None => return false, }; // P4 characteristics: // - Has Compare instruction (loop condition or continue check) // - Has Select instruction (continue's core - carrier switching) // - Has tail call (loop back) // - Has exactly one conditional Jump to k_exit (loop break only) // // Phase 89: Tightened to exclude loop-internal return patterns let has_compare = loop_step.body.iter().any(|inst| { matches!( inst, JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { .. }) ) }); // Phase 89: Require Select (continue's core) let has_select = loop_step.body.iter().any(|inst| match inst { JoinInst::Select { .. } => true, JoinInst::Compute(mir_inst) => matches!( mir_inst, crate::mir::join_ir::MirLikeInst::Select { .. } ), _ => false, }); // Phase 89: Count conditional Jumps to k_exit // Continue pattern should have exactly 1 (loop break), not multiple (early returns) let k_exit_jumps_count = loop_step.body.iter().filter(|inst| { matches!(inst, JoinInst::Jump { cond: Some(_), .. }) }).count(); let has_tail_call = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); // P4 minimal has 2-4 params (i, acc, possibly n) let reasonable_param_count = (2..=4).contains(&loop_step.params.len()); // Phase 89: Tightened conditions has_compare && has_select && has_tail_call && reasonable_param_count && k_exit_jumps_count == 1 // Exactly one loop break (not early return) } pub(crate) fn is_jsonparser_parse_array_continue_skip_ws(module: &JoinModule) -> bool { is_pattern4_continue_minimal(module) && module .functions .values() .any(|f| f.name == "jsonparser_parse_array_continue_skip_ws") } pub(crate) fn is_jsonparser_parse_object_continue_skip_ws(module: &JoinModule) -> bool { is_pattern4_continue_minimal(module) && module .functions .values() .any(|f| f.name == "jsonparser_parse_object_continue_skip_ws") } /// Phase 89: Check if module matches Continue + Early Return pattern /// /// Structural characteristics: /// - 3 functions (main, loop_step, k_exit) /// - Has Select instruction (continue's core) /// - Has TWO or more conditional Jumps to k_exit (loop break + early return) /// - Has Compare instruction /// - Has tail call (loop back) pub(crate) fn is_pattern_continue_return_minimal(module: &JoinModule) -> bool { // Must have exactly 3 functions if !module.is_structured() || module.functions.len() != 3 { return false; } // Find loop_step function let loop_step = match find_loop_step(module) { Some(f) => f, None => return false, }; // Continue + Return characteristics: // - Has Select instruction (continue's core) // - Has TWO or more conditional Jumps (loop break + early return) // - Has Compare instruction // - Has tail call (loop back) let has_compare = loop_step.body.iter().any(|inst| { matches!( inst, JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { .. }) ) }); let has_select = loop_step.body.iter().any(|inst| match inst { JoinInst::Select { .. } => true, JoinInst::Compute(mir_inst) => matches!( mir_inst, crate::mir::join_ir::MirLikeInst::Select { .. } ), _ => false, }); // Continue + Return pattern requires TWO or more conditional Jumps // (at least one for loop break, one for early return) let k_exit_jumps_count = loop_step.body.iter().filter(|inst| { matches!(inst, JoinInst::Jump { cond: Some(_), .. }) }).count(); let has_tail_call = loop_step .body .iter() .any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. })); // Reasonable param count (i, acc, possibly n) let reasonable_param_count = (2..=4).contains(&loop_step.params.len()); // Phase 89: Continue + Return pattern requires >= 2 conditional Jumps has_compare && has_select && has_tail_call && reasonable_param_count && k_exit_jumps_count >= 2 // At least 2: loop break + early return } pub(super) fn find_loop_step(module: &JoinModule) -> Option<&JoinFunction> { module .functions .values() .find(|f| f.name == "loop_step") .or_else(|| module.functions.get(&JoinFuncId::new(1))) } } fn log_shapes(tag: &str, shapes: &[NormalizedDevShape]) { if shapes.is_empty() { return; } if dev_env::normalized_dev_logs_enabled() && joinir_dev_enabled() { eprintln!("[joinir/normalized-dev/shape] {}: {:?}", tag, shapes); } } #[cfg(test)] mod tests { use super::*; #[cfg(feature = "normalized_dev")] #[test] fn test_detect_pattern3_if_sum_minimal_shape() { use crate::mir::join_ir::normalized::fixtures::build_pattern3_if_sum_min_structured_for_normalized_dev; let module = build_pattern3_if_sum_min_structured_for_normalized_dev(); // Should detect Pattern3IfSumMinimal shape assert!( detectors::is_pattern3_if_sum_minimal(&module), "pattern3_if_sum_minimal fixture should be detected" ); let shapes = detect_shapes(&module); assert!( shapes.contains(&NormalizedDevShape::Pattern3IfSumMinimal), "detect_shapes() should include Pattern3IfSumMinimal, got: {:?}", shapes ); } #[cfg(feature = "normalized_dev")] #[test] fn test_selfhost_p2_core_structural_candidate_signature() { use crate::mir::join_ir::normalized::fixtures::{ build_jsonparser_skip_ws_structured_for_normalized_dev, build_pattern2_minimal_structured, build_selfhost_token_scan_p2_accum_structured_for_normalized_dev, build_selfhost_token_scan_p2_structured_for_normalized_dev, }; let selfhost_p2 = build_selfhost_token_scan_p2_structured_for_normalized_dev(); let selfhost_p2_accum = build_selfhost_token_scan_p2_accum_structured_for_normalized_dev(); let json_p2 = build_jsonparser_skip_ws_structured_for_normalized_dev(); let canonical_p2_min = build_pattern2_minimal_structured(); assert!( detectors::is_selfhost_p2_core_family_candidate(&selfhost_p2), "selfhost_token_scan_p2 should match structural candidate" ); assert!( detectors::is_selfhost_p2_core_family_candidate(&selfhost_p2_accum), "selfhost_token_scan_p2_accum should match structural candidate" ); // Structural signature is intentionally ambiguous with JsonParser P2-mini family. assert!( detectors::is_selfhost_p2_core_family_candidate(&json_p2), "jsonparser_skip_ws_mini should also match P2 core candidate" ); assert!( !detectors::is_selfhost_p2_core_family_candidate(&canonical_p2_min), "canonical Pattern2Mini fixture should not match selfhost P2 candidate" ); } #[cfg(feature = "normalized_dev")] #[test] fn test_selfhost_p3_if_sum_structural_candidate_signature() { use crate::mir::join_ir::normalized::fixtures::{ build_pattern3_if_sum_min_structured_for_normalized_dev, build_pattern3_if_sum_multi_min_structured_for_normalized_dev, build_selfhost_if_sum_p3_ext_structured_for_normalized_dev, build_selfhost_if_sum_p3_structured_for_normalized_dev, }; let selfhost_p3 = build_selfhost_if_sum_p3_structured_for_normalized_dev(); let selfhost_p3_ext = build_selfhost_if_sum_p3_ext_structured_for_normalized_dev(); let canonical_p3_min = build_pattern3_if_sum_min_structured_for_normalized_dev(); let canonical_p3_multi = build_pattern3_if_sum_multi_min_structured_for_normalized_dev(); assert!( detectors::is_selfhost_p3_if_sum_family_candidate(&selfhost_p3), "selfhost_if_sum_p3 should match structural candidate" ); assert!( detectors::is_selfhost_p3_if_sum_family_candidate(&selfhost_p3_ext), "selfhost_if_sum_p3_ext should match structural candidate" ); assert!( !detectors::is_selfhost_p3_if_sum_family_candidate(&canonical_p3_min), "canonical P3 minimal should not match selfhost P3 candidate" ); assert!( !detectors::is_selfhost_p3_if_sum_family_candidate(&canonical_p3_multi), "canonical P3 multi should not match selfhost P3 candidate" ); } #[cfg(feature = "normalized_dev")] #[test] fn test_detect_selfhost_token_scan_p2_shape() { use crate::mir::join_ir::normalized::fixtures::build_selfhost_token_scan_p2_structured_for_normalized_dev; let module = build_selfhost_token_scan_p2_structured_for_normalized_dev(); let shapes = detect_shapes(&module); assert!( shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2), "selfhost_token_scan_p2 shape missing: {:?}", shapes ); assert!( !shapes.contains(&NormalizedDevShape::Pattern2Mini), "selfhost_token_scan_p2 should not be treated as canonical Pattern2Mini: {:?}", shapes ); } #[cfg(feature = "normalized_dev")] #[test] fn test_detect_selfhost_token_scan_p2_accum_shape() { use crate::mir::join_ir::normalized::fixtures::build_selfhost_token_scan_p2_accum_structured_for_normalized_dev; let module = build_selfhost_token_scan_p2_accum_structured_for_normalized_dev(); let shapes = detect_shapes(&module); assert!( shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2Accum), "selfhost_token_scan_p2_accum shape missing: {:?}", shapes ); assert!( !shapes.contains(&NormalizedDevShape::Pattern2Mini), "selfhost_token_scan_p2_accum should not be treated as canonical Pattern2Mini: {:?}", shapes ); } #[cfg(feature = "normalized_dev")] #[test] fn test_detect_selfhost_if_sum_p3_shape() { use crate::mir::join_ir::normalized::fixtures::build_selfhost_if_sum_p3_structured_for_normalized_dev; let module = build_selfhost_if_sum_p3_structured_for_normalized_dev(); let shapes = detect_shapes(&module); assert!( shapes.contains(&NormalizedDevShape::SelfhostIfSumP3), "selfhost_if_sum_p3 shape missing: {:?}", shapes ); assert!( !shapes.iter().any(|s| matches!(s, NormalizedDevShape::Pattern3IfSumMinimal)), "selfhost_if_sum_p3 should not rely on canonical P3 minimal detection: {:?}", shapes ); } #[cfg(feature = "normalized_dev")] #[test] fn test_detect_selfhost_if_sum_p3_ext_shape() { use crate::mir::join_ir::normalized::fixtures::build_selfhost_if_sum_p3_ext_structured_for_normalized_dev; let module = build_selfhost_if_sum_p3_ext_structured_for_normalized_dev(); let shapes = detect_shapes(&module); assert!( shapes.contains(&NormalizedDevShape::SelfhostIfSumP3Ext), "selfhost_if_sum_p3_ext shape missing: {:?}", shapes ); assert!( !shapes.iter().any(|s| matches!( s, NormalizedDevShape::Pattern3IfSumMinimal | NormalizedDevShape::Pattern3IfSumMulti | NormalizedDevShape::Pattern3IfSumJson )), "selfhost_if_sum_p3_ext should not rely on canonical P3 detection: {:?}", shapes ); } #[cfg(feature = "normalized_dev")] #[test] fn test_detect_pattern4_continue_minimal_shape() { use crate::mir::join_ir::normalized::fixtures::build_pattern4_continue_min_structured_for_normalized_dev; let module = build_pattern4_continue_min_structured_for_normalized_dev(); // Should detect Pattern4ContinueMinimal shape assert!( detectors::is_pattern4_continue_minimal(&module), "pattern4_continue_minimal fixture should be detected" ); let shapes = detect_shapes(&module); assert!( shapes.contains(&NormalizedDevShape::Pattern4ContinueMinimal), "detect_shapes() should include Pattern4ContinueMinimal, got: {:?}", shapes ); } #[cfg(feature = "normalized_dev")] #[test] fn test_detect_pattern4_jsonparser_continue_shapes() { use crate::mir::join_ir::normalized::fixtures::{ build_jsonparser_parse_array_continue_skip_ws_structured_for_normalized_dev, build_jsonparser_parse_object_continue_skip_ws_structured_for_normalized_dev, }; let array = build_jsonparser_parse_array_continue_skip_ws_structured_for_normalized_dev(); assert!( detectors::is_jsonparser_parse_array_continue_skip_ws(&array), "array continue fixture should be detected" ); let array_shapes = detect_shapes(&array); assert!( array_shapes.contains(&NormalizedDevShape::JsonparserParseArrayContinueSkipWs), "array continue shape missing, got {:?}", array_shapes ); let object = build_jsonparser_parse_object_continue_skip_ws_structured_for_normalized_dev(); assert!( detectors::is_jsonparser_parse_object_continue_skip_ws(&object), "object continue fixture should be detected" ); let object_shapes = detect_shapes(&object); assert!( object_shapes.contains(&NormalizedDevShape::JsonparserParseObjectContinueSkipWs), "object continue shape missing, got {:?}", object_shapes ); } #[cfg(feature = "normalized_dev")] #[test] fn test_pattern4_detector_rejects_loop_with_return() { // Phase 89: Verify that Pattern4 detector does NOT match // modules with loop-internal return (continue + early return pattern) use crate::mir::join_ir::{JoinFuncId, JoinModule}; use crate::mir::ValueId; use std::collections::BTreeMap; // Minimal module with loop + continue + return // (this would be the ContinueReturn pattern, NOT Pattern4) let mut functions = BTreeMap::new(); // Entry function let entry_func = JoinFunction { id: JoinFuncId::new(0), name: "loop_with_return_test".to_string(), params: vec![ValueId(0)], body: vec![ JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Const { dst: ValueId(1), value: crate::mir::join_ir::ConstValue::Integer(0), }), JoinInst::Call { func: JoinFuncId::new(1), args: vec![ValueId(1), ValueId(1), ValueId(0)], k_next: None, dst: Some(ValueId(2)), }, JoinInst::Ret { value: Some(ValueId(2)) }, ], exit_cont: None, }; // loop_step function with TWO conditional Jumps (break + early return) let loop_step_func = JoinFunction { id: JoinFuncId::new(1), name: "loop_step".to_string(), params: vec![ValueId(0), ValueId(1), ValueId(2)], body: vec![ // Compare for loop condition JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { dst: ValueId(10), op: crate::mir::join_ir::CompareOp::Lt, lhs: ValueId(0), rhs: ValueId(2), }), JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Const { dst: ValueId(11), value: crate::mir::join_ir::ConstValue::Bool(false), }), JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { dst: ValueId(12), op: crate::mir::join_ir::CompareOp::Eq, lhs: ValueId(10), rhs: ValueId(11), }), // First Jump: loop break JoinInst::Jump { cont: JoinFuncId::new(2).as_cont(), args: vec![ValueId(1)], cond: Some(ValueId(12)), }, // Compare for early return condition JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { dst: ValueId(20), op: crate::mir::join_ir::CompareOp::Eq, lhs: ValueId(0), rhs: ValueId(2), }), // Second Jump: early return (THIS MAKES IT NOT PATTERN4) JoinInst::Jump { cont: JoinFuncId::new(2).as_cont(), args: vec![ValueId(1)], cond: Some(ValueId(20)), }, // Select (continue's core) JoinInst::Select { dst: ValueId(30), cond: ValueId(20), then_val: ValueId(1), else_val: ValueId(1), type_hint: None, }, // Tail call (loop back) JoinInst::Call { func: JoinFuncId::new(1), args: vec![ValueId(0), ValueId(30), ValueId(2)], k_next: None, dst: Some(ValueId(40)), }, JoinInst::Ret { value: Some(ValueId(40)) }, ], exit_cont: None, }; // k_exit function let k_exit_func = JoinFunction { id: JoinFuncId::new(2), name: "k_exit".to_string(), params: vec![ValueId(0)], body: vec![JoinInst::Ret { value: Some(ValueId(0)) }], exit_cont: None, }; functions.insert(JoinFuncId::new(0), entry_func); functions.insert(JoinFuncId::new(1), loop_step_func); functions.insert(JoinFuncId::new(2), k_exit_func); let module = JoinModule { functions, entry: Some(JoinFuncId::new(0)), phase: crate::mir::join_ir::JoinIrPhase::Structured, }; // Phase 89: This should NOT be detected as Pattern4ContinueMinimal // because it has TWO conditional Jumps (loop break + early return) assert!( !detectors::is_pattern4_continue_minimal(&module), "Module with loop-internal return should NOT match Pattern4ContinueMinimal" ); let shapes = detect_shapes(&module); assert!( !shapes.contains(&NormalizedDevShape::Pattern4ContinueMinimal), "Pattern4ContinueMinimal should not be detected for loop with return, got: {:?}", shapes ); } }