Files
hakorune/src/mir/join_ir/normalized/shape_guard.rs
nyash-codex 45add0f5d3 feat(joinir): Phase 90 P0 - ParseStringComposite pattern
## Composite Pattern
- Continue(escape i+=2) + EarlyReturn(close quote)
- parse_string_composite_pattern.rs (50行、continue_return 再利用)
- 89% コード削減(450行→50行)

## Shape Detection
- BinOp Add const 2 検出(escape 特徴)
- LoopStepInspector 活用(Phase 89 リファクタ成果)

## SSOT Integration
- dev_fixtures.rs に登録
- StepCalculator 再利用(Phase 89-2 成果)

## Tests
- +2 tests (vm_bridge + 期待値 n=10→acc=5)
- normalized_dev: 61→63 passed
- lib: 993 passed (回帰なし)

Impact:
- Reuse over Duplication 実践
- Phase 89 リファクタ成果の完全活用
- 箱化原則 5/5 遵守

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-14 03:36:45 +09:00

1390 lines
50 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#![cfg(feature = "normalized_dev")]
use crate::config::env::joinir_dev_enabled;
use crate::mir::join_ir::normalized::dev_env;
use crate::mir::join_ir::normalized::loop_step_inspector::LoopStepInspector;
use crate::mir::join_ir::{JoinFuncId, JoinFunction, JoinInst, JoinModule};
/// Phase 44: Shape capability kinds (capability-based routing)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ShapeCapabilityKind {
/// P2 Core: Simple mini patterns (i/acc/n etc)
P2CoreSimple,
/// P2 Core: skip_whitespace mini/real
P2CoreSkipWs,
/// P2 Core: _atoi mini/real
P2CoreAtoi,
/// P2 Mid: _parse_number real (p + num_str + result)
P2MidParseNumber,
/// P3 If-Sum family (minimal/multi/json)
P3IfSum,
/// P4 Continue (skip whitespace) family
P4ContinueSkipWs,
/// P4 Continue + Early Return family (Phase 89)
P4ContinueEarlyReturn,
/// Composite Parse String (continue + early return + variable step) (Phase 90)
CompositeParseString,
/// Selfhost P2 core (token scan)
SelfhostP2Core,
/// Selfhost P3 if-sum family
SelfhostP3IfSum,
// Future: Other P2 patterns
// P2MidAtOfLoop,
// P2HeavyString,
}
/// Phase 44: Shape capability descriptor
#[derive(Debug, Clone)]
pub struct ShapeCapability {
pub kind: ShapeCapabilityKind,
// Future extensibility fields (not all used yet):
// pub pattern_kind: LoopPatternKind,
// pub loop_param_count: usize,
// pub carrier_roles: Vec<CarrierRole>,
// pub method_calls: Vec<MethodCallSignature>,
}
impl ShapeCapability {
pub fn new(kind: ShapeCapabilityKind) -> Self {
Self { kind }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NormalizedDevShape {
Pattern1Mini,
Pattern2Mini,
JsonparserSkipWsMini,
JsonparserSkipWsReal,
JsonparserAtoiMini,
JsonparserAtoiReal,
JsonparserParseNumberReal,
// Phase 47-A: Pattern3 (if-sum) minimal
Pattern3IfSumMinimal,
// Phase 47-B: Pattern3 extended (multi/json)
Pattern3IfSumMulti,
Pattern3IfSumJson,
// Phase 48-A: Pattern4 (continue) minimal
Pattern4ContinueMinimal,
// Phase 48-B: Pattern4 (continue) JsonParser skip_ws (array/object)
JsonparserParseArrayContinueSkipWs,
JsonparserParseObjectContinueSkipWs,
// Phase 50: selfhost P2/P3 dev shapes
SelfhostTokenScanP2,
SelfhostIfSumP3,
// Phase 51: selfhost P2/P3 dev extensions
SelfhostTokenScanP2Accum,
SelfhostIfSumP3Ext,
// Phase 53: selfhost P2/P3 practical variations
SelfhostArgsParseP2,
SelfhostStmtCountP3,
// Phase 54: selfhost P2/P3 shape growth (structural axis expansion)
SelfhostVerifySchemaP2,
SelfhostDetectFormatP3,
// Phase 89: Continue + Early Return pattern (dev-only)
PatternContinueReturnMinimal,
// Phase 90: Parse String Composite pattern (dev-only: continue + early return + variable step)
ParseStringCompositeMinimal,
}
type Detector = fn(&JoinModule) -> bool;
const SHAPE_DETECTORS: &[(NormalizedDevShape, Detector)] = &[
(NormalizedDevShape::Pattern1Mini, detectors::is_pattern1_mini),
(NormalizedDevShape::Pattern2Mini, detectors::is_pattern2_mini),
(
NormalizedDevShape::JsonparserSkipWsMini,
detectors::is_jsonparser_skip_ws_mini,
),
(
NormalizedDevShape::JsonparserSkipWsReal,
detectors::is_jsonparser_skip_ws_real,
),
(
NormalizedDevShape::JsonparserAtoiMini,
detectors::is_jsonparser_atoi_mini,
),
(
NormalizedDevShape::JsonparserAtoiReal,
detectors::is_jsonparser_atoi_real,
),
(
NormalizedDevShape::JsonparserParseNumberReal,
detectors::is_jsonparser_parse_number_real,
),
(
NormalizedDevShape::SelfhostTokenScanP2,
detectors::is_selfhost_token_scan_p2,
),
(
NormalizedDevShape::SelfhostTokenScanP2Accum,
detectors::is_selfhost_token_scan_p2_accum,
),
// Phase 47-A: Pattern3 if-sum minimal
(
NormalizedDevShape::Pattern3IfSumMinimal,
detectors::is_pattern3_if_sum_minimal,
),
(
NormalizedDevShape::Pattern3IfSumMulti,
detectors::is_pattern3_if_sum_multi,
),
(
NormalizedDevShape::Pattern3IfSumJson,
detectors::is_pattern3_if_sum_json,
),
// Phase 48-A: Pattern4 continue minimal
(
NormalizedDevShape::Pattern4ContinueMinimal,
detectors::is_pattern4_continue_minimal,
),
(
NormalizedDevShape::JsonparserParseArrayContinueSkipWs,
detectors::is_jsonparser_parse_array_continue_skip_ws,
),
(
NormalizedDevShape::JsonparserParseObjectContinueSkipWs,
detectors::is_jsonparser_parse_object_continue_skip_ws,
),
(
NormalizedDevShape::SelfhostIfSumP3,
detectors::is_selfhost_if_sum_p3,
),
(
NormalizedDevShape::SelfhostIfSumP3Ext,
detectors::is_selfhost_if_sum_p3_ext,
),
// Phase 53: selfhost P2/P3 practical variations
(
NormalizedDevShape::SelfhostArgsParseP2,
detectors::is_selfhost_args_parse_p2,
),
(
NormalizedDevShape::SelfhostStmtCountP3,
detectors::is_selfhost_stmt_count_p3,
),
// Phase 54: selfhost P2/P3 shape growth
(
NormalizedDevShape::SelfhostVerifySchemaP2,
detectors::is_selfhost_verify_schema_p2,
),
(
NormalizedDevShape::SelfhostDetectFormatP3,
detectors::is_selfhost_detect_format_p3,
),
// Phase 89: Continue + Early Return pattern
(
NormalizedDevShape::PatternContinueReturnMinimal,
detectors::is_pattern_continue_return_minimal,
),
// Phase 90: Parse String Composite pattern
(
NormalizedDevShape::ParseStringCompositeMinimal,
detectors::is_parse_string_composite_minimal,
),
];
/// direct ブリッジで扱う shapedev 限定)。
pub(crate) fn direct_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let shapes = detect_shapes(module);
log_shapes("direct", &shapes);
shapes
}
/// Structured→Normalized の対象 shapedev 限定)。
pub(crate) fn supported_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let shapes = detect_shapes(module);
log_shapes("roundtrip", &shapes);
shapes
}
/// Phase 44: Map NormalizedDevShape to ShapeCapability
pub fn capability_for_shape(shape: &NormalizedDevShape) -> ShapeCapability {
use NormalizedDevShape::*;
use ShapeCapabilityKind::*;
let kind = match shape {
Pattern2Mini => P2CoreSimple,
JsonparserSkipWsMini | JsonparserSkipWsReal => P2CoreSkipWs,
JsonparserAtoiMini | JsonparserAtoiReal => P2CoreAtoi,
JsonparserParseNumberReal => P2MidParseNumber,
Pattern1Mini => P2CoreSimple, // Also core simple pattern
// Phase 47-B: P3 if-sum family
Pattern3IfSumMinimal | Pattern3IfSumMulti | Pattern3IfSumJson => P3IfSum,
// Phase 48-A/B: P4 continue family
Pattern4ContinueMinimal
| JsonparserParseArrayContinueSkipWs
| JsonparserParseObjectContinueSkipWs => P4ContinueSkipWs,
// Phase 50: selfhost P2/P3 dev shapes
SelfhostTokenScanP2 | SelfhostTokenScanP2Accum => SelfhostP2Core,
SelfhostIfSumP3 | SelfhostIfSumP3Ext => SelfhostP3IfSum,
// Phase 53: selfhost P2/P3 practical variations
SelfhostArgsParseP2 => SelfhostP2Core,
SelfhostStmtCountP3 => SelfhostP3IfSum,
// Phase 54: selfhost P2/P3 shape growth
SelfhostVerifySchemaP2 => SelfhostP2Core,
SelfhostDetectFormatP3 => SelfhostP3IfSum,
// Phase 89: Continue + Early Return pattern (dev-only, dedicated capability)
PatternContinueReturnMinimal => P4ContinueEarlyReturn,
// Phase 90: Parse String Composite pattern (dev-only, dedicated capability)
ParseStringCompositeMinimal => CompositeParseString,
};
ShapeCapability::new(kind)
}
/// Phase 46+: Canonical shapes that ALWAYS use Normalized→MIR(direct)
/// regardless of feature flags or mode.
///
/// Canonical set (Phase 48-C):
/// - P2-Core: Pattern2Mini, JsonparserSkipWsMini, JsonparserSkipWsReal, JsonparserAtoiMini
/// - P2-Mid: JsonparserAtoiReal, JsonparserParseNumberReal
/// - P3: Pattern3 If-sum minimal/multi/json
/// - P4: Pattern4 continue minimal + JsonParser skip_ws (array/object)
pub fn is_canonical_shape(shape: &NormalizedDevShape) -> bool {
use NormalizedDevShape::*;
matches!(
shape,
Pattern2Mini
| JsonparserSkipWsMini
| JsonparserSkipWsReal
| JsonparserAtoiMini
// Phase 46: Add P2-Mid patterns
| JsonparserAtoiReal
| JsonparserParseNumberReal
// Phase 47-C: P3 if-sum canonical set
| Pattern3IfSumMinimal
| Pattern3IfSumMulti
| Pattern3IfSumJson
// Phase 48-C: P4 continue canonical set
| Pattern4ContinueMinimal
| JsonparserParseArrayContinueSkipWs
| JsonparserParseObjectContinueSkipWs
)
}
/// Phase 44: Check if capability kind is in P2-Core family
///
/// This checks capability-level membership, not granular canonical status.
/// Use `is_canonical_shape()` for exact canonical filtering.
pub fn is_p2_core_capability(cap: &ShapeCapability) -> bool {
use ShapeCapabilityKind::*;
matches!(
cap.kind,
P2CoreSimple
| P2CoreSkipWs
| P2CoreAtoi
| P2MidParseNumber
| P3IfSum
| P4ContinueSkipWs
| P4ContinueEarlyReturn
| CompositeParseString
| SelfhostP2Core
| SelfhostP3IfSum
)
}
/// Phase 44: Check if capability is supported by Normalized dev
pub fn is_supported_by_normalized(cap: &ShapeCapability) -> bool {
// Currently same as P2-Core family
is_p2_core_capability(cap)
}
/// canonical常時 Normalized 経路を通す)対象。
/// Phase 46: Extract canonical shapes from JoinModule.
///
/// Canonical set (P2-Core + P2-Mid):
/// - Pattern2Mini, skip_ws mini/real, atoi mini/real, parse_number real
///
/// These shapes ALWAYS use Normalized→MIR(direct) regardless of mode.
/// P3/P4 patterns are NOT canonical (future NORM-P3/NORM-P4 phases).
pub(crate) fn canonical_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let shapes: Vec<_> = detect_shapes(module)
.into_iter()
.filter(|s| is_canonical_shape(s))
.collect();
log_shapes("canonical", &shapes);
shapes
}
#[allow(dead_code)]
pub(crate) fn is_direct_supported(module: &JoinModule) -> bool {
!detect_shapes(module).is_empty()
}
pub fn detect_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let mut shapes: Vec<_> = SHAPE_DETECTORS
.iter()
.filter_map(|(shape, detector)| if detector(module) { Some(*shape) } else { None })
.collect();
// Pattern1 は「最小の後方互換」なので、より具体的な shape が見つかった場合は外しておく。
if shapes.len() > 1 {
shapes.retain(|s| *s != NormalizedDevShape::Pattern1Mini);
}
// selfhost shapesは canonical P2/P3 の generic 判定から分離する
if shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2)
|| shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2Accum)
|| shapes.contains(&NormalizedDevShape::SelfhostArgsParseP2)
|| shapes.contains(&NormalizedDevShape::SelfhostVerifySchemaP2)
{
shapes.retain(|s| {
*s != NormalizedDevShape::Pattern2Mini
&& *s != NormalizedDevShape::Pattern4ContinueMinimal
});
}
if shapes.contains(&NormalizedDevShape::SelfhostIfSumP3)
|| shapes.contains(&NormalizedDevShape::SelfhostIfSumP3Ext)
|| shapes.contains(&NormalizedDevShape::SelfhostStmtCountP3)
|| shapes.contains(&NormalizedDevShape::SelfhostDetectFormatP3)
{
shapes.retain(|s| {
!matches!(
s,
NormalizedDevShape::Pattern3IfSumMinimal
| NormalizedDevShape::Pattern3IfSumMulti
| NormalizedDevShape::Pattern3IfSumJson
| NormalizedDevShape::Pattern4ContinueMinimal
)
});
}
shapes
}
// --- 判定ロジック(共通) ---
mod detectors {
use super::*;
pub(super) fn is_pattern1_mini(module: &JoinModule) -> bool {
module.is_structured() && find_loop_step(module).is_some()
}
pub(super) fn is_pattern2_mini(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(1..=3).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump && has_tail_call
}
pub(super) fn is_jsonparser_skip_ws_mini(module: &JoinModule) -> bool {
is_pattern2_mini(module)
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_skip_ws_mini")
}
pub(crate) fn is_jsonparser_skip_ws_real(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(2..=6).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_skip_ws_real")
}
pub(crate) fn is_jsonparser_atoi_mini(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=8).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_atoi_mini")
}
pub(crate) fn is_jsonparser_atoi_real(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=10).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_atoi_real")
}
pub(crate) fn is_jsonparser_parse_number_real(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=12).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_parse_number_real")
}
fn name_guard_exact(module: &JoinModule, expected_name: &str) -> bool {
module.functions.values().any(|f| f.name == expected_name)
}
/// Phase 52: Selfhost P2 core family structure signature (dev-only).
///
/// This is intentionally narrow to avoid swallowing generic P2 shapes:
/// - loop_step params: 3..=4 (i + host + 1..2 carriers)
/// - P2 break-loop skeleton (cond jump + tail call)
/// - no Select / BoxCall in body
pub(super) fn is_selfhost_p2_core_family_candidate(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=4).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
let has_select = loop_func.body.iter().any(|inst| match inst {
JoinInst::Select { .. } => true,
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::Select { .. }
),
_ => false,
});
let has_boxcall = loop_func.body.iter().any(|inst| match inst {
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::BoxCall { .. }
),
_ => false,
});
has_cond_jump && has_tail_call && !has_select && !has_boxcall
}
/// Phase 52: Selfhost P3 if-sum family structure signature (dev-only).
///
/// Note: current selfhost baseline is still P2-like (normalize_pattern2_minimal),
/// so the signature avoids requiring Select and focuses on the explicit break-if.
///
/// Distinguish selfhost P3 from canonical P3 by requiring:
/// - loop_step params == 4 (i + host + sum + count)
/// - an explicit Ge compare between params (break-if)
/// - P2/P3 loop skeleton (cond jump + tail call)
/// - no BoxCall in body
pub(super) fn is_selfhost_p3_if_sum_family_candidate(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if loop_step.params.len() != 4 {
return false;
}
let has_cond_jump = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
let param_set: std::collections::BTreeSet<_> =
loop_step.params.iter().copied().collect();
let has_ge_compare_between_params = loop_step.body.iter().any(|inst| match inst {
JoinInst::Compute(mir_inst) => match mir_inst {
crate::mir::join_ir::MirLikeInst::Compare { op, lhs, rhs, .. } => {
*op == crate::mir::join_ir::CompareOp::Ge
&& param_set.contains(lhs)
&& param_set.contains(rhs)
}
_ => false,
},
_ => false,
});
let has_boxcall = loop_step.body.iter().any(|inst| match inst {
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::BoxCall { .. }
),
_ => false,
});
has_cond_jump && has_tail_call && has_ge_compare_between_params && !has_boxcall
}
pub(crate) fn is_selfhost_token_scan_p2(module: &JoinModule) -> bool {
is_selfhost_p2_core_family_candidate(module)
&& name_guard_exact(module, "selfhost_token_scan_p2")
}
pub(crate) fn is_selfhost_token_scan_p2_accum(module: &JoinModule) -> bool {
is_selfhost_p2_core_family_candidate(module)
&& name_guard_exact(module, "selfhost_token_scan_p2_accum")
}
/// Phase 47-A: Check if module matches Pattern3 if-sum minimal shape
pub(crate) fn is_pattern3_if_sum_minimal(module: &JoinModule) -> bool {
// Structure-based detection (avoid name-based heuristics)
// Must have exactly 3 functions: main, loop_step, k_exit
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
// Find loop_step function
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// P3 characteristics:
// - Has Compare instruction (loop condition)
// - Has Select instruction (conditional carrier update: if-then-else)
// - Has tail call (Call with k_next: None)
let has_compare = loop_step.body.iter().any(|inst| {
matches!(
inst,
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { .. })
)
});
// Phase 220: Select can be either JoinInst::Select or Compute(MirLikeInst::Select)
let has_select = loop_step.body.iter().any(|inst| match inst {
JoinInst::Select { .. } => true,
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::Select { .. }
),
_ => false,
});
let has_tail_call = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
// P3 minimal/multi/json: typically 2-6 params (i + carriers + len/host)
let reasonable_param_count = (2..=6).contains(&loop_step.params.len());
has_compare && has_select && has_tail_call && reasonable_param_count
}
pub(crate) fn is_selfhost_if_sum_p3(module: &JoinModule) -> bool {
is_selfhost_p3_if_sum_family_candidate(module)
&& name_guard_exact(module, "selfhost_if_sum_p3")
}
pub(crate) fn is_selfhost_if_sum_p3_ext(module: &JoinModule) -> bool {
is_selfhost_p3_if_sum_family_candidate(module)
&& name_guard_exact(module, "selfhost_if_sum_p3_ext")
}
/// Phase 53: selfhost args-parse P2 detector (practical variation with string carrier)
///
/// Two-stage detection:
/// 1. Structural primary check (P2 break pattern, 1-3 carriers)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_args_parse_p2(module: &JoinModule) -> bool {
// 1. Structural primary check (P2 core family)
if !is_selfhost_p2_core_family_candidate(module) {
return false;
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_args_parse_p2")
}
/// Phase 53: selfhost stmt-count P3 detector (practical variation with multi-branch if-else)
///
/// Two-stage detection:
/// 1. Structural primary check (P3 if-sum pattern, 2-10 carriers, multi-branch)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_stmt_count_p3(module: &JoinModule) -> bool {
// 1. Structural primary check
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// Allow 2-10 carriers (5 statement counters: r/e/l/iff/lp + i)
let carrier_count = loop_step.params.len();
if !(2..=10).contains(&carrier_count) {
return false;
}
// Must have conditional jump (break pattern)
let has_cond_jump = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
// Must have tail call (loop continuation)
let has_tail_call = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
if !has_cond_jump || !has_tail_call {
return false;
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_stmt_count_p3")
}
/// Phase 54: Count Compare operations with specific op
fn count_compare_ops(module: &JoinModule, target_op: crate::mir::join_ir::CompareOp) -> usize {
module
.functions
.values()
.flat_map(|f| &f.body)
.filter(|inst| match inst {
JoinInst::Compute(mir_inst) => match mir_inst {
crate::mir::join_ir::MirLikeInst::Compare { op, .. } => *op == target_op,
_ => false,
},
_ => false,
})
.count()
}
/// Phase 54: selfhost verify-schema P2 detector (Ne-heavy pattern, early return diversity)
///
/// Two-stage detection:
/// 1. Structural primary check (P2 break pattern, 2-3 carriers, Ne conditions)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_verify_schema_p2(module: &JoinModule) -> bool {
// 1. Structural primary check (P2 core family)
if !is_selfhost_p2_core_family_candidate(module) {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// verify_schema pattern: 2-3 carriers (ver + kind + host param)
let carrier_count = loop_step.params.len();
if !(2..=3).contains(&carrier_count) {
return false;
}
// Ne condition pattern (verify != expected)
let ne_count = count_compare_ops(module, crate::mir::join_ir::CompareOp::Ne);
if ne_count < 1 {
return false; // Ne条件必須
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_verify_schema_p2")
}
/// Phase 54: selfhost detect-format P3 detector (String return branching, null check)
///
/// Two-stage detection:
/// 1. Structural primary check (P3 if-sum pattern, 2-4 carriers, conditional jump)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_detect_format_p3(module: &JoinModule) -> bool {
// 1. Structural primary check
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// Lightweight P3: 2-4 carriers (conditional branching 3-way + loop variable)
let carrier_count = loop_step.params.len();
if !(2..=4).contains(&carrier_count) {
return false;
}
// Conditional branching pattern (multiple if)
let has_cond_jump = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
if !has_cond_jump {
return false;
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_detect_format_p3")
}
/// Phase 47-B: P3 if-sum (multi-carrier) shape detector
pub(crate) fn is_pattern3_if_sum_multi(module: &JoinModule) -> bool {
if !is_pattern3_if_sum_minimal(module) {
return false;
}
module
.functions
.values()
.any(|f| f.name == "pattern3_if_sum_multi_min")
}
/// Phase 47-B: P3 if-sum (JsonParser mini) shape detector
pub(crate) fn is_pattern3_if_sum_json(module: &JoinModule) -> bool {
if !is_pattern3_if_sum_minimal(module) {
return false;
}
module
.functions
.values()
.any(|f| f.name == "jsonparser_if_sum_min")
}
/// Phase 48-A: Check if module matches Pattern4 continue minimal shape
///
/// Phase 89: Tightened to prevent continue + early return misdetection:
/// - Requires at least one Select instruction (continue's core)
/// - Requires exactly one conditional Jump to k_exit (loop break, not early return)
pub(crate) fn is_pattern4_continue_minimal(module: &JoinModule) -> bool {
// Structure-based detection (avoid name-based heuristics)
// Must have exactly 3 functions: main, loop_step, k_exit
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
// Find loop_step function
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// P4 characteristics (use Inspector for common logic):
// - Has Compare instruction (loop condition or continue check)
// - Has Select instruction (continue's core - carrier switching)
// - Has tail call (loop back)
// - Has exactly one conditional Jump to k_exit (loop break only)
//
// Phase 89: Tightened to exclude loop-internal return patterns
let has_compare = LoopStepInspector::has_compare_instruction(loop_step);
let has_select = LoopStepInspector::has_select_instruction(loop_step);
let k_exit_jumps_count = LoopStepInspector::count_conditional_jumps(loop_step);
let has_tail_call = LoopStepInspector::has_tail_call(loop_step);
let reasonable_param_count = LoopStepInspector::has_reasonable_param_count(loop_step);
// Phase 89: Tightened conditions
has_compare
&& has_select
&& has_tail_call
&& reasonable_param_count
&& k_exit_jumps_count == 1 // Exactly one loop break (not early return)
}
pub(crate) fn is_jsonparser_parse_array_continue_skip_ws(module: &JoinModule) -> bool {
is_pattern4_continue_minimal(module)
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_parse_array_continue_skip_ws")
}
pub(crate) fn is_jsonparser_parse_object_continue_skip_ws(module: &JoinModule) -> bool {
is_pattern4_continue_minimal(module)
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_parse_object_continue_skip_ws")
}
/// Phase 89: Check if module matches Continue + Early Return pattern
///
/// Structural characteristics:
/// - 3 functions (main, loop_step, k_exit)
/// - Has Select instruction (continue's core)
/// - Has TWO or more conditional Jumps to k_exit (loop break + early return)
/// - Has Compare instruction
/// - Has tail call (loop back)
pub(crate) fn is_pattern_continue_return_minimal(module: &JoinModule) -> bool {
// Must have exactly 3 functions
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
// Find loop_step function
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// Continue + Return characteristics (use Inspector for common logic):
// - Has Select instruction (continue's core)
// - Has TWO or more conditional Jumps (loop break + early return)
// - Has Compare instruction
// - Has tail call (loop back)
let has_compare = LoopStepInspector::has_compare_instruction(loop_step);
let has_select = LoopStepInspector::has_select_instruction(loop_step);
let k_exit_jumps_count = LoopStepInspector::count_conditional_jumps(loop_step);
let has_tail_call = LoopStepInspector::has_tail_call(loop_step);
let reasonable_param_count = LoopStepInspector::has_reasonable_param_count(loop_step);
// Phase 89: Continue + Return pattern requires >= 2 conditional Jumps
has_compare
&& has_select
&& has_tail_call
&& reasonable_param_count
&& k_exit_jumps_count >= 2 // At least 2: loop break + early return
}
/// Phase 90: Check if module matches Parse String Composite pattern
///
/// Structural characteristics:
/// - 3 functions (main, loop_step, k_exit)
/// - Has Select instruction (continue's core)
/// - Has TWO or more conditional Jumps to k_exit (loop break + early return)
/// - Has Compare instruction
/// - Has tail call (loop back)
/// - Has variable step increment (distinguishing feature from ContinueReturn)
///
/// Distinguishing from ContinueReturn:
/// - ParseStringComposite has i+=2 in continue branch (escape character handling)
/// - ContinueReturn has i+=1 in continue branch
/// - Detection: Check for BinOp Add with const value 2 in loop body
pub(crate) fn is_parse_string_composite_minimal(module: &JoinModule) -> bool {
// Must match basic Continue + Return structure first
if !is_pattern_continue_return_minimal(module) {
return false;
}
// Find loop_step function
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// Additional check: Must have BinOp Add with const value 2 (escape handling)
// This distinguishes ParseStringComposite from generic ContinueReturn
let has_variable_step = loop_step.body.iter().any(|inst| match inst {
JoinInst::Compute(mir_inst) => match mir_inst {
crate::mir::join_ir::MirLikeInst::BinOp { op, rhs, .. } => {
// Check if it's Add operation
if *op != crate::mir::join_ir::BinOpKind::Add {
return false;
}
// Check if rhs is a const value 2 (indicating i+=2 for escape)
// We need to check if rhs points to a Const instruction with value 2
loop_step.body.iter().any(|other_inst| match other_inst {
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Const { dst, value }) => {
dst == rhs && matches!(value, crate::mir::join_ir::ConstValue::Integer(2))
}
_ => false,
})
}
_ => false,
},
_ => false,
});
has_variable_step
}
pub(super) fn find_loop_step(module: &JoinModule) -> Option<&JoinFunction> {
module
.functions
.values()
.find(|f| f.name == "loop_step")
.or_else(|| module.functions.get(&JoinFuncId::new(1)))
}
}
fn log_shapes(tag: &str, shapes: &[NormalizedDevShape]) {
if shapes.is_empty() {
return;
}
if dev_env::normalized_dev_logs_enabled() && joinir_dev_enabled() {
eprintln!("[joinir/normalized-dev/shape] {}: {:?}", tag, shapes);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_pattern3_if_sum_minimal_shape() {
use crate::mir::join_ir::normalized::fixtures::build_pattern3_if_sum_min_structured_for_normalized_dev;
let module = build_pattern3_if_sum_min_structured_for_normalized_dev();
// Should detect Pattern3IfSumMinimal shape
assert!(
detectors::is_pattern3_if_sum_minimal(&module),
"pattern3_if_sum_minimal fixture should be detected"
);
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::Pattern3IfSumMinimal),
"detect_shapes() should include Pattern3IfSumMinimal, got: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_selfhost_p2_core_structural_candidate_signature() {
use crate::mir::join_ir::normalized::fixtures::{
build_jsonparser_skip_ws_structured_for_normalized_dev,
build_pattern2_minimal_structured,
build_selfhost_token_scan_p2_accum_structured_for_normalized_dev,
build_selfhost_token_scan_p2_structured_for_normalized_dev,
};
let selfhost_p2 = build_selfhost_token_scan_p2_structured_for_normalized_dev();
let selfhost_p2_accum = build_selfhost_token_scan_p2_accum_structured_for_normalized_dev();
let json_p2 = build_jsonparser_skip_ws_structured_for_normalized_dev();
let canonical_p2_min = build_pattern2_minimal_structured();
assert!(
detectors::is_selfhost_p2_core_family_candidate(&selfhost_p2),
"selfhost_token_scan_p2 should match structural candidate"
);
assert!(
detectors::is_selfhost_p2_core_family_candidate(&selfhost_p2_accum),
"selfhost_token_scan_p2_accum should match structural candidate"
);
// Structural signature is intentionally ambiguous with JsonParser P2-mini family.
assert!(
detectors::is_selfhost_p2_core_family_candidate(&json_p2),
"jsonparser_skip_ws_mini should also match P2 core candidate"
);
assert!(
!detectors::is_selfhost_p2_core_family_candidate(&canonical_p2_min),
"canonical Pattern2Mini fixture should not match selfhost P2 candidate"
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_selfhost_p3_if_sum_structural_candidate_signature() {
use crate::mir::join_ir::normalized::fixtures::{
build_pattern3_if_sum_min_structured_for_normalized_dev,
build_pattern3_if_sum_multi_min_structured_for_normalized_dev,
build_selfhost_if_sum_p3_ext_structured_for_normalized_dev,
build_selfhost_if_sum_p3_structured_for_normalized_dev,
};
let selfhost_p3 = build_selfhost_if_sum_p3_structured_for_normalized_dev();
let selfhost_p3_ext = build_selfhost_if_sum_p3_ext_structured_for_normalized_dev();
let canonical_p3_min = build_pattern3_if_sum_min_structured_for_normalized_dev();
let canonical_p3_multi = build_pattern3_if_sum_multi_min_structured_for_normalized_dev();
assert!(
detectors::is_selfhost_p3_if_sum_family_candidate(&selfhost_p3),
"selfhost_if_sum_p3 should match structural candidate"
);
assert!(
detectors::is_selfhost_p3_if_sum_family_candidate(&selfhost_p3_ext),
"selfhost_if_sum_p3_ext should match structural candidate"
);
assert!(
!detectors::is_selfhost_p3_if_sum_family_candidate(&canonical_p3_min),
"canonical P3 minimal should not match selfhost P3 candidate"
);
assert!(
!detectors::is_selfhost_p3_if_sum_family_candidate(&canonical_p3_multi),
"canonical P3 multi should not match selfhost P3 candidate"
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_token_scan_p2_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_token_scan_p2_structured_for_normalized_dev;
let module = build_selfhost_token_scan_p2_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2),
"selfhost_token_scan_p2 shape missing: {:?}",
shapes
);
assert!(
!shapes.contains(&NormalizedDevShape::Pattern2Mini),
"selfhost_token_scan_p2 should not be treated as canonical Pattern2Mini: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_token_scan_p2_accum_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_token_scan_p2_accum_structured_for_normalized_dev;
let module = build_selfhost_token_scan_p2_accum_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2Accum),
"selfhost_token_scan_p2_accum shape missing: {:?}",
shapes
);
assert!(
!shapes.contains(&NormalizedDevShape::Pattern2Mini),
"selfhost_token_scan_p2_accum should not be treated as canonical Pattern2Mini: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_if_sum_p3_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_if_sum_p3_structured_for_normalized_dev;
let module = build_selfhost_if_sum_p3_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostIfSumP3),
"selfhost_if_sum_p3 shape missing: {:?}",
shapes
);
assert!(
!shapes.iter().any(|s| matches!(s, NormalizedDevShape::Pattern3IfSumMinimal)),
"selfhost_if_sum_p3 should not rely on canonical P3 minimal detection: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_if_sum_p3_ext_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_if_sum_p3_ext_structured_for_normalized_dev;
let module = build_selfhost_if_sum_p3_ext_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostIfSumP3Ext),
"selfhost_if_sum_p3_ext shape missing: {:?}",
shapes
);
assert!(
!shapes.iter().any(|s| matches!(
s,
NormalizedDevShape::Pattern3IfSumMinimal
| NormalizedDevShape::Pattern3IfSumMulti
| NormalizedDevShape::Pattern3IfSumJson
)),
"selfhost_if_sum_p3_ext should not rely on canonical P3 detection: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_pattern4_continue_minimal_shape() {
use crate::mir::join_ir::normalized::fixtures::build_pattern4_continue_min_structured_for_normalized_dev;
let module = build_pattern4_continue_min_structured_for_normalized_dev();
// Should detect Pattern4ContinueMinimal shape
assert!(
detectors::is_pattern4_continue_minimal(&module),
"pattern4_continue_minimal fixture should be detected"
);
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::Pattern4ContinueMinimal),
"detect_shapes() should include Pattern4ContinueMinimal, got: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_pattern4_jsonparser_continue_shapes() {
use crate::mir::join_ir::normalized::fixtures::{
build_jsonparser_parse_array_continue_skip_ws_structured_for_normalized_dev,
build_jsonparser_parse_object_continue_skip_ws_structured_for_normalized_dev,
};
let array = build_jsonparser_parse_array_continue_skip_ws_structured_for_normalized_dev();
assert!(
detectors::is_jsonparser_parse_array_continue_skip_ws(&array),
"array continue fixture should be detected"
);
let array_shapes = detect_shapes(&array);
assert!(
array_shapes.contains(&NormalizedDevShape::JsonparserParseArrayContinueSkipWs),
"array continue shape missing, got {:?}",
array_shapes
);
let object = build_jsonparser_parse_object_continue_skip_ws_structured_for_normalized_dev();
assert!(
detectors::is_jsonparser_parse_object_continue_skip_ws(&object),
"object continue fixture should be detected"
);
let object_shapes = detect_shapes(&object);
assert!(
object_shapes.contains(&NormalizedDevShape::JsonparserParseObjectContinueSkipWs),
"object continue shape missing, got {:?}",
object_shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_pattern4_detector_rejects_loop_with_return() {
// Phase 89: Verify that Pattern4 detector does NOT match
// modules with loop-internal return (continue + early return pattern)
use crate::mir::join_ir::{JoinFuncId, JoinModule};
use crate::mir::ValueId;
use std::collections::BTreeMap;
// Minimal module with loop + continue + return
// (this would be the ContinueReturn pattern, NOT Pattern4)
let mut functions = BTreeMap::new();
// Entry function
let entry_func = JoinFunction {
id: JoinFuncId::new(0),
name: "loop_with_return_test".to_string(),
params: vec![ValueId(0)],
body: vec![
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Const {
dst: ValueId(1),
value: crate::mir::join_ir::ConstValue::Integer(0),
}),
JoinInst::Call {
func: JoinFuncId::new(1),
args: vec![ValueId(1), ValueId(1), ValueId(0)],
k_next: None,
dst: Some(ValueId(2)),
},
JoinInst::Ret { value: Some(ValueId(2)) },
],
exit_cont: None,
};
// loop_step function with TWO conditional Jumps (break + early return)
let loop_step_func = JoinFunction {
id: JoinFuncId::new(1),
name: "loop_step".to_string(),
params: vec![ValueId(0), ValueId(1), ValueId(2)],
body: vec![
// Compare for loop condition
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare {
dst: ValueId(10),
op: crate::mir::join_ir::CompareOp::Lt,
lhs: ValueId(0),
rhs: ValueId(2),
}),
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Const {
dst: ValueId(11),
value: crate::mir::join_ir::ConstValue::Bool(false),
}),
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare {
dst: ValueId(12),
op: crate::mir::join_ir::CompareOp::Eq,
lhs: ValueId(10),
rhs: ValueId(11),
}),
// First Jump: loop break
JoinInst::Jump {
cont: JoinFuncId::new(2).as_cont(),
args: vec![ValueId(1)],
cond: Some(ValueId(12)),
},
// Compare for early return condition
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare {
dst: ValueId(20),
op: crate::mir::join_ir::CompareOp::Eq,
lhs: ValueId(0),
rhs: ValueId(2),
}),
// Second Jump: early return (THIS MAKES IT NOT PATTERN4)
JoinInst::Jump {
cont: JoinFuncId::new(2).as_cont(),
args: vec![ValueId(1)],
cond: Some(ValueId(20)),
},
// Select (continue's core)
JoinInst::Select {
dst: ValueId(30),
cond: ValueId(20),
then_val: ValueId(1),
else_val: ValueId(1),
type_hint: None,
},
// Tail call (loop back)
JoinInst::Call {
func: JoinFuncId::new(1),
args: vec![ValueId(0), ValueId(30), ValueId(2)],
k_next: None,
dst: Some(ValueId(40)),
},
JoinInst::Ret { value: Some(ValueId(40)) },
],
exit_cont: None,
};
// k_exit function
let k_exit_func = JoinFunction {
id: JoinFuncId::new(2),
name: "k_exit".to_string(),
params: vec![ValueId(0)],
body: vec![JoinInst::Ret { value: Some(ValueId(0)) }],
exit_cont: None,
};
functions.insert(JoinFuncId::new(0), entry_func);
functions.insert(JoinFuncId::new(1), loop_step_func);
functions.insert(JoinFuncId::new(2), k_exit_func);
let module = JoinModule {
functions,
entry: Some(JoinFuncId::new(0)),
phase: crate::mir::join_ir::JoinIrPhase::Structured,
};
// Phase 89: This should NOT be detected as Pattern4ContinueMinimal
// because it has TWO conditional Jumps (loop break + early return)
assert!(
!detectors::is_pattern4_continue_minimal(&module),
"Module with loop-internal return should NOT match Pattern4ContinueMinimal"
);
let shapes = detect_shapes(&module);
assert!(
!shapes.contains(&NormalizedDevShape::Pattern4ContinueMinimal),
"Pattern4ContinueMinimal should not be detected for loop with return, got: {:?}",
shapes
);
}
}