Files
hakorune/src/mir/join_ir/normalized/shape_guard.rs
nyash-codex 80e952b83a feat(joinir): Phase 54 SELFHOST-SHAPE-GROWTH - 構造軸育成 + 偽陽性観測
Phase 53 成果を踏まえ、構造シグネチャ軸を 5+ に育て、
偽陽性観測テストで name ガード縮小準備を整えた。

方針変更: 新ループ追加 → 構造軸育成 + 偽陽性率測定に焦点変更
- 理由: Phase 53 で selfhost P2/P3 実戦パターン追加済み
- 焦点: 既存ループに対する構造軸拡張 + 精度測定

主な成果:

1. 構造軸 5+ 達成:
   - carrier 数
   - carrier 型
   - Compare パターン
   - branch 構造
   - NEW: Compare op 分布 (count_compare_ops ヘルパー)

2. 偽陽性観測テスト追加:
   - test_phase54_structural_axis_discrimination_p2()
   - test_phase54_structural_axis_discrimination_p3()

3. 重要な発見 - 偽陽性率 ~50%:
   - P2: selfhost P2 が正しく検出されず (name ガード依存)
   - P3: selfhost P3 が Pattern4ContinueMinimal と誤検出 (構造的類似性)
   - 結論: 構造判定のみでは分離不十分、name ガード必須と判明

変更内容:

- shape_guard.rs (+80 lines):
  - count_compare_ops() 構造軸ヘルパー追加
  - detect_shapes() pub 化 (テストから呼び出し可能に)
  - SelfhostVerifySchemaP2/SelfhostDetectFormatP3 enum 追加 (将来用)

- normalized_joinir_min.rs (+110 lines):
  - 偽陽性観測テスト 2 個追加 (P2/P3 各1)
  - canonical shapes vs selfhost shapes 構造判定精度測定

- phase49 doc (+200 lines):
  - Phase 54 節完成版
  - 偽陽性分析結果記録
  - name ガード縮小方針明記

- enum 拡張対応:
  - bridge.rs (+8 lines)
  - normalized.rs (+8 lines)
  - ast_lowerer/mod.rs (+2 lines)

偽陽性観測結果 (2025-12-12):
- P2 構造判定: selfhost P2 検出失敗 → name ガード必須
- P3 構造判定: selfhost P3 が Pattern4 と誤判定 → 構造的類似性問題
- 総合: 偽陽性率 ~50% → 構造軸 5 本では不十分

次フェーズ方針 (Phase 55+):
- Phase 55-A: 条件複雑度軸追加 (BinOp/UnaryOp ネスト深度)
- Phase 55-B: 算術パターン軸追加 (Mul/Sub/Div 出現パターン)
- Phase 56: selfhost 実戦ループ追加 (6 本以上蓄積)
- Phase 57: 誤判定率 < 5% 達成後に name ガード縮小開始

name ガード撤去条件 (Phase 57):
- 構造軸 8+ 本確立
- selfhost P2/P3 各 6 本以上蓄積
- 誤判定率 < 5% 達成
- 複合的特徴量ベース判定実装

回帰テスト:  939 PASS, 0 FAIL (既存挙動不変)

Files Modified: 8 files
Lines Added: ~408 lines (net)
Implementation: Pure additive (feature-gated)

Phase 54 完了!構造軸育成・偽陽性観測基盤確立!
2025-12-12 17:12:58 +09:00

1137 lines
39 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#![cfg(feature = "normalized_dev")]
use crate::config::env::joinir_dev_enabled;
use crate::mir::join_ir::normalized::dev_env;
use crate::mir::join_ir::{JoinFuncId, JoinFunction, JoinInst, JoinModule};
/// Phase 44: Shape capability kinds (capability-based routing)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ShapeCapabilityKind {
/// P2 Core: Simple mini patterns (i/acc/n etc)
P2CoreSimple,
/// P2 Core: skip_whitespace mini/real
P2CoreSkipWs,
/// P2 Core: _atoi mini/real
P2CoreAtoi,
/// P2 Mid: _parse_number real (p + num_str + result)
P2MidParseNumber,
/// P3 If-Sum family (minimal/multi/json)
P3IfSum,
/// P4 Continue (skip whitespace) family
P4ContinueSkipWs,
/// Selfhost P2 core (token scan)
SelfhostP2Core,
/// Selfhost P3 if-sum family
SelfhostP3IfSum,
// Future: Other P2 patterns
// P2MidAtOfLoop,
// P2HeavyString,
}
/// Phase 44: Shape capability descriptor
#[derive(Debug, Clone)]
pub struct ShapeCapability {
pub kind: ShapeCapabilityKind,
// Future extensibility fields (not all used yet):
// pub pattern_kind: LoopPatternKind,
// pub loop_param_count: usize,
// pub carrier_roles: Vec<CarrierRole>,
// pub method_calls: Vec<MethodCallSignature>,
}
impl ShapeCapability {
pub fn new(kind: ShapeCapabilityKind) -> Self {
Self { kind }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NormalizedDevShape {
Pattern1Mini,
Pattern2Mini,
JsonparserSkipWsMini,
JsonparserSkipWsReal,
JsonparserAtoiMini,
JsonparserAtoiReal,
JsonparserParseNumberReal,
// Phase 47-A: Pattern3 (if-sum) minimal
Pattern3IfSumMinimal,
// Phase 47-B: Pattern3 extended (multi/json)
Pattern3IfSumMulti,
Pattern3IfSumJson,
// Phase 48-A: Pattern4 (continue) minimal
Pattern4ContinueMinimal,
// Phase 48-B: Pattern4 (continue) JsonParser skip_ws (array/object)
JsonparserParseArrayContinueSkipWs,
JsonparserParseObjectContinueSkipWs,
// Phase 50: selfhost P2/P3 dev shapes
SelfhostTokenScanP2,
SelfhostIfSumP3,
// Phase 51: selfhost P2/P3 dev extensions
SelfhostTokenScanP2Accum,
SelfhostIfSumP3Ext,
// Phase 53: selfhost P2/P3 practical variations
SelfhostArgsParseP2,
SelfhostStmtCountP3,
// Phase 54: selfhost P2/P3 shape growth (structural axis expansion)
SelfhostVerifySchemaP2,
SelfhostDetectFormatP3,
}
type Detector = fn(&JoinModule) -> bool;
const SHAPE_DETECTORS: &[(NormalizedDevShape, Detector)] = &[
(NormalizedDevShape::Pattern1Mini, detectors::is_pattern1_mini),
(NormalizedDevShape::Pattern2Mini, detectors::is_pattern2_mini),
(
NormalizedDevShape::JsonparserSkipWsMini,
detectors::is_jsonparser_skip_ws_mini,
),
(
NormalizedDevShape::JsonparserSkipWsReal,
detectors::is_jsonparser_skip_ws_real,
),
(
NormalizedDevShape::JsonparserAtoiMini,
detectors::is_jsonparser_atoi_mini,
),
(
NormalizedDevShape::JsonparserAtoiReal,
detectors::is_jsonparser_atoi_real,
),
(
NormalizedDevShape::JsonparserParseNumberReal,
detectors::is_jsonparser_parse_number_real,
),
(
NormalizedDevShape::SelfhostTokenScanP2,
detectors::is_selfhost_token_scan_p2,
),
(
NormalizedDevShape::SelfhostTokenScanP2Accum,
detectors::is_selfhost_token_scan_p2_accum,
),
// Phase 47-A: Pattern3 if-sum minimal
(
NormalizedDevShape::Pattern3IfSumMinimal,
detectors::is_pattern3_if_sum_minimal,
),
(
NormalizedDevShape::Pattern3IfSumMulti,
detectors::is_pattern3_if_sum_multi,
),
(
NormalizedDevShape::Pattern3IfSumJson,
detectors::is_pattern3_if_sum_json,
),
// Phase 48-A: Pattern4 continue minimal
(
NormalizedDevShape::Pattern4ContinueMinimal,
detectors::is_pattern4_continue_minimal,
),
(
NormalizedDevShape::JsonparserParseArrayContinueSkipWs,
detectors::is_jsonparser_parse_array_continue_skip_ws,
),
(
NormalizedDevShape::JsonparserParseObjectContinueSkipWs,
detectors::is_jsonparser_parse_object_continue_skip_ws,
),
(
NormalizedDevShape::SelfhostIfSumP3,
detectors::is_selfhost_if_sum_p3,
),
(
NormalizedDevShape::SelfhostIfSumP3Ext,
detectors::is_selfhost_if_sum_p3_ext,
),
// Phase 53: selfhost P2/P3 practical variations
(
NormalizedDevShape::SelfhostArgsParseP2,
detectors::is_selfhost_args_parse_p2,
),
(
NormalizedDevShape::SelfhostStmtCountP3,
detectors::is_selfhost_stmt_count_p3,
),
// Phase 54: selfhost P2/P3 shape growth
(
NormalizedDevShape::SelfhostVerifySchemaP2,
detectors::is_selfhost_verify_schema_p2,
),
(
NormalizedDevShape::SelfhostDetectFormatP3,
detectors::is_selfhost_detect_format_p3,
),
];
/// direct ブリッジで扱う shapedev 限定)。
pub(crate) fn direct_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let shapes = detect_shapes(module);
log_shapes("direct", &shapes);
shapes
}
/// Structured→Normalized の対象 shapedev 限定)。
pub(crate) fn supported_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let shapes = detect_shapes(module);
log_shapes("roundtrip", &shapes);
shapes
}
/// Phase 44: Map NormalizedDevShape to ShapeCapability
pub fn capability_for_shape(shape: &NormalizedDevShape) -> ShapeCapability {
use NormalizedDevShape::*;
use ShapeCapabilityKind::*;
let kind = match shape {
Pattern2Mini => P2CoreSimple,
JsonparserSkipWsMini | JsonparserSkipWsReal => P2CoreSkipWs,
JsonparserAtoiMini | JsonparserAtoiReal => P2CoreAtoi,
JsonparserParseNumberReal => P2MidParseNumber,
Pattern1Mini => P2CoreSimple, // Also core simple pattern
// Phase 47-B: P3 if-sum family
Pattern3IfSumMinimal | Pattern3IfSumMulti | Pattern3IfSumJson => P3IfSum,
// Phase 48-A/B: P4 continue family
Pattern4ContinueMinimal
| JsonparserParseArrayContinueSkipWs
| JsonparserParseObjectContinueSkipWs => P4ContinueSkipWs,
// Phase 50: selfhost P2/P3 dev shapes
SelfhostTokenScanP2 | SelfhostTokenScanP2Accum => SelfhostP2Core,
SelfhostIfSumP3 | SelfhostIfSumP3Ext => SelfhostP3IfSum,
// Phase 53: selfhost P2/P3 practical variations
SelfhostArgsParseP2 => SelfhostP2Core,
SelfhostStmtCountP3 => SelfhostP3IfSum,
// Phase 54: selfhost P2/P3 shape growth
SelfhostVerifySchemaP2 => SelfhostP2Core,
SelfhostDetectFormatP3 => SelfhostP3IfSum,
};
ShapeCapability::new(kind)
}
/// Phase 46+: Canonical shapes that ALWAYS use Normalized→MIR(direct)
/// regardless of feature flags or mode.
///
/// Canonical set (Phase 48-C):
/// - P2-Core: Pattern2Mini, JsonparserSkipWsMini, JsonparserSkipWsReal, JsonparserAtoiMini
/// - P2-Mid: JsonparserAtoiReal, JsonparserParseNumberReal
/// - P3: Pattern3 If-sum minimal/multi/json
/// - P4: Pattern4 continue minimal + JsonParser skip_ws (array/object)
pub fn is_canonical_shape(shape: &NormalizedDevShape) -> bool {
use NormalizedDevShape::*;
matches!(
shape,
Pattern2Mini
| JsonparserSkipWsMini
| JsonparserSkipWsReal
| JsonparserAtoiMini
// Phase 46: Add P2-Mid patterns
| JsonparserAtoiReal
| JsonparserParseNumberReal
// Phase 47-C: P3 if-sum canonical set
| Pattern3IfSumMinimal
| Pattern3IfSumMulti
| Pattern3IfSumJson
// Phase 48-C: P4 continue canonical set
| Pattern4ContinueMinimal
| JsonparserParseArrayContinueSkipWs
| JsonparserParseObjectContinueSkipWs
)
}
/// Phase 44: Check if capability kind is in P2-Core family
///
/// This checks capability-level membership, not granular canonical status.
/// Use `is_canonical_shape()` for exact canonical filtering.
pub fn is_p2_core_capability(cap: &ShapeCapability) -> bool {
use ShapeCapabilityKind::*;
matches!(
cap.kind,
P2CoreSimple
| P2CoreSkipWs
| P2CoreAtoi
| P2MidParseNumber
| P3IfSum
| P4ContinueSkipWs
| SelfhostP2Core
| SelfhostP3IfSum
)
}
/// Phase 44: Check if capability is supported by Normalized dev
pub fn is_supported_by_normalized(cap: &ShapeCapability) -> bool {
// Currently same as P2-Core family
is_p2_core_capability(cap)
}
/// canonical常時 Normalized 経路を通す)対象。
/// Phase 46: Extract canonical shapes from JoinModule.
///
/// Canonical set (P2-Core + P2-Mid):
/// - Pattern2Mini, skip_ws mini/real, atoi mini/real, parse_number real
///
/// These shapes ALWAYS use Normalized→MIR(direct) regardless of mode.
/// P3/P4 patterns are NOT canonical (future NORM-P3/NORM-P4 phases).
pub(crate) fn canonical_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let shapes: Vec<_> = detect_shapes(module)
.into_iter()
.filter(|s| is_canonical_shape(s))
.collect();
log_shapes("canonical", &shapes);
shapes
}
#[allow(dead_code)]
pub(crate) fn is_direct_supported(module: &JoinModule) -> bool {
!detect_shapes(module).is_empty()
}
pub fn detect_shapes(module: &JoinModule) -> Vec<NormalizedDevShape> {
let mut shapes: Vec<_> = SHAPE_DETECTORS
.iter()
.filter_map(|(shape, detector)| if detector(module) { Some(*shape) } else { None })
.collect();
// Pattern1 は「最小の後方互換」なので、より具体的な shape が見つかった場合は外しておく。
if shapes.len() > 1 {
shapes.retain(|s| *s != NormalizedDevShape::Pattern1Mini);
}
// selfhost shapesは canonical P2/P3 の generic 判定から分離する
if shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2)
|| shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2Accum)
|| shapes.contains(&NormalizedDevShape::SelfhostArgsParseP2)
|| shapes.contains(&NormalizedDevShape::SelfhostVerifySchemaP2)
{
shapes.retain(|s| *s != NormalizedDevShape::Pattern2Mini);
}
if shapes.contains(&NormalizedDevShape::SelfhostIfSumP3)
|| shapes.contains(&NormalizedDevShape::SelfhostIfSumP3Ext)
|| shapes.contains(&NormalizedDevShape::SelfhostStmtCountP3)
|| shapes.contains(&NormalizedDevShape::SelfhostDetectFormatP3)
{
shapes.retain(|s| {
!matches!(
s,
NormalizedDevShape::Pattern3IfSumMinimal
| NormalizedDevShape::Pattern3IfSumMulti
| NormalizedDevShape::Pattern3IfSumJson
)
});
}
shapes
}
// --- 判定ロジック(共通) ---
mod detectors {
use super::*;
pub(super) fn is_pattern1_mini(module: &JoinModule) -> bool {
module.is_structured() && find_loop_step(module).is_some()
}
pub(super) fn is_pattern2_mini(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(1..=3).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump && has_tail_call
}
pub(super) fn is_jsonparser_skip_ws_mini(module: &JoinModule) -> bool {
is_pattern2_mini(module)
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_skip_ws_mini")
}
pub(crate) fn is_jsonparser_skip_ws_real(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(2..=6).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_skip_ws_real")
}
pub(crate) fn is_jsonparser_atoi_mini(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=8).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_atoi_mini")
}
pub(crate) fn is_jsonparser_atoi_real(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=10).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_atoi_real")
}
pub(crate) fn is_jsonparser_parse_number_real(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=12).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
has_cond_jump
&& has_tail_call
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_parse_number_real")
}
fn name_guard_exact(module: &JoinModule, expected_name: &str) -> bool {
module.functions.values().any(|f| f.name == expected_name)
}
/// Phase 52: Selfhost P2 core family structure signature (dev-only).
///
/// This is intentionally narrow to avoid swallowing generic P2 shapes:
/// - loop_step params: 3..=4 (i + host + 1..2 carriers)
/// - P2 break-loop skeleton (cond jump + tail call)
/// - no Select / BoxCall in body
pub(super) fn is_selfhost_p2_core_family_candidate(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_func = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if !(3..=4).contains(&loop_func.params.len()) {
return false;
}
let has_cond_jump = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_func
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
let has_select = loop_func.body.iter().any(|inst| match inst {
JoinInst::Select { .. } => true,
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::Select { .. }
),
_ => false,
});
let has_boxcall = loop_func.body.iter().any(|inst| match inst {
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::BoxCall { .. }
),
_ => false,
});
has_cond_jump && has_tail_call && !has_select && !has_boxcall
}
/// Phase 52: Selfhost P3 if-sum family structure signature (dev-only).
///
/// Note: current selfhost baseline is still P2-like (normalize_pattern2_minimal),
/// so the signature avoids requiring Select and focuses on the explicit break-if.
///
/// Distinguish selfhost P3 from canonical P3 by requiring:
/// - loop_step params == 4 (i + host + sum + count)
/// - an explicit Ge compare between params (break-if)
/// - P2/P3 loop skeleton (cond jump + tail call)
/// - no BoxCall in body
pub(super) fn is_selfhost_p3_if_sum_family_candidate(module: &JoinModule) -> bool {
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
if loop_step.params.len() != 4 {
return false;
}
let has_cond_jump = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
let has_tail_call = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
let param_set: std::collections::BTreeSet<_> =
loop_step.params.iter().copied().collect();
let has_ge_compare_between_params = loop_step.body.iter().any(|inst| match inst {
JoinInst::Compute(mir_inst) => match mir_inst {
crate::mir::join_ir::MirLikeInst::Compare { op, lhs, rhs, .. } => {
*op == crate::mir::join_ir::CompareOp::Ge
&& param_set.contains(lhs)
&& param_set.contains(rhs)
}
_ => false,
},
_ => false,
});
let has_boxcall = loop_step.body.iter().any(|inst| match inst {
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::BoxCall { .. }
),
_ => false,
});
has_cond_jump && has_tail_call && has_ge_compare_between_params && !has_boxcall
}
pub(crate) fn is_selfhost_token_scan_p2(module: &JoinModule) -> bool {
is_selfhost_p2_core_family_candidate(module)
&& name_guard_exact(module, "selfhost_token_scan_p2")
}
pub(crate) fn is_selfhost_token_scan_p2_accum(module: &JoinModule) -> bool {
is_selfhost_p2_core_family_candidate(module)
&& name_guard_exact(module, "selfhost_token_scan_p2_accum")
}
/// Phase 47-A: Check if module matches Pattern3 if-sum minimal shape
pub(crate) fn is_pattern3_if_sum_minimal(module: &JoinModule) -> bool {
// Structure-based detection (avoid name-based heuristics)
// Must have exactly 3 functions: main, loop_step, k_exit
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
// Find loop_step function
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// P3 characteristics:
// - Has Compare instruction (loop condition)
// - Has Select instruction (conditional carrier update: if-then-else)
// - Has tail call (Call with k_next: None)
let has_compare = loop_step.body.iter().any(|inst| {
matches!(
inst,
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { .. })
)
});
// Phase 220: Select can be either JoinInst::Select or Compute(MirLikeInst::Select)
let has_select = loop_step.body.iter().any(|inst| match inst {
JoinInst::Select { .. } => true,
JoinInst::Compute(mir_inst) => matches!(
mir_inst,
crate::mir::join_ir::MirLikeInst::Select { .. }
),
_ => false,
});
let has_tail_call = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
// P3 minimal/multi/json: typically 2-6 params (i + carriers + len/host)
let reasonable_param_count = (2..=6).contains(&loop_step.params.len());
has_compare && has_select && has_tail_call && reasonable_param_count
}
pub(crate) fn is_selfhost_if_sum_p3(module: &JoinModule) -> bool {
is_selfhost_p3_if_sum_family_candidate(module)
&& name_guard_exact(module, "selfhost_if_sum_p3")
}
pub(crate) fn is_selfhost_if_sum_p3_ext(module: &JoinModule) -> bool {
is_selfhost_p3_if_sum_family_candidate(module)
&& name_guard_exact(module, "selfhost_if_sum_p3_ext")
}
/// Phase 53: selfhost args-parse P2 detector (practical variation with string carrier)
///
/// Two-stage detection:
/// 1. Structural primary check (P2 break pattern, 1-3 carriers)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_args_parse_p2(module: &JoinModule) -> bool {
// 1. Structural primary check (P2 core family)
if !is_selfhost_p2_core_family_candidate(module) {
return false;
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_args_parse_p2")
}
/// Phase 53: selfhost stmt-count P3 detector (practical variation with multi-branch if-else)
///
/// Two-stage detection:
/// 1. Structural primary check (P3 if-sum pattern, 2-10 carriers, multi-branch)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_stmt_count_p3(module: &JoinModule) -> bool {
// 1. Structural primary check
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// Allow 2-10 carriers (5 statement counters: r/e/l/iff/lp + i)
let carrier_count = loop_step.params.len();
if !(2..=10).contains(&carrier_count) {
return false;
}
// Must have conditional jump (break pattern)
let has_cond_jump = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
// Must have tail call (loop continuation)
let has_tail_call = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Call { k_next: None, .. }));
if !has_cond_jump || !has_tail_call {
return false;
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_stmt_count_p3")
}
/// Phase 54: Count Compare operations with specific op
fn count_compare_ops(module: &JoinModule, target_op: crate::mir::join_ir::CompareOp) -> usize {
module
.functions
.values()
.flat_map(|f| &f.body)
.filter(|inst| match inst {
JoinInst::Compute(mir_inst) => match mir_inst {
crate::mir::join_ir::MirLikeInst::Compare { op, .. } => *op == target_op,
_ => false,
},
_ => false,
})
.count()
}
/// Phase 54: selfhost verify-schema P2 detector (Ne-heavy pattern, early return diversity)
///
/// Two-stage detection:
/// 1. Structural primary check (P2 break pattern, 2-3 carriers, Ne conditions)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_verify_schema_p2(module: &JoinModule) -> bool {
// 1. Structural primary check (P2 core family)
if !is_selfhost_p2_core_family_candidate(module) {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// verify_schema pattern: 2-3 carriers (ver + kind + host param)
let carrier_count = loop_step.params.len();
if !(2..=3).contains(&carrier_count) {
return false;
}
// Ne condition pattern (verify != expected)
let ne_count = count_compare_ops(module, crate::mir::join_ir::CompareOp::Ne);
if ne_count < 1 {
return false; // Ne条件必須
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_verify_schema_p2")
}
/// Phase 54: selfhost detect-format P3 detector (String return branching, null check)
///
/// Two-stage detection:
/// 1. Structural primary check (P3 if-sum pattern, 2-4 carriers, conditional jump)
/// 2. dev-only name guard for final confirmation (ambiguity resolver)
pub(crate) fn is_selfhost_detect_format_p3(module: &JoinModule) -> bool {
// 1. Structural primary check
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// Lightweight P3: 2-4 carriers (conditional branching 3-way + loop variable)
let carrier_count = loop_step.params.len();
if !(2..=4).contains(&carrier_count) {
return false;
}
// Conditional branching pattern (multiple if)
let has_cond_jump = loop_step
.body
.iter()
.any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. }));
if !has_cond_jump {
return false;
}
// 2. dev-only name guard for final confirmation
name_guard_exact(module, "selfhost_detect_format_p3")
}
/// Phase 47-B: P3 if-sum (multi-carrier) shape detector
pub(crate) fn is_pattern3_if_sum_multi(module: &JoinModule) -> bool {
if !is_pattern3_if_sum_minimal(module) {
return false;
}
module
.functions
.values()
.any(|f| f.name == "pattern3_if_sum_multi_min")
}
/// Phase 47-B: P3 if-sum (JsonParser mini) shape detector
pub(crate) fn is_pattern3_if_sum_json(module: &JoinModule) -> bool {
if !is_pattern3_if_sum_minimal(module) {
return false;
}
module
.functions
.values()
.any(|f| f.name == "jsonparser_if_sum_min")
}
/// Phase 48-A: Check if module matches Pattern4 continue minimal shape
pub(crate) fn is_pattern4_continue_minimal(module: &JoinModule) -> bool {
// Structure-based detection (avoid name-based heuristics)
// Must have exactly 3 functions: main, loop_step, k_exit
if !module.is_structured() || module.functions.len() != 3 {
return false;
}
// Find loop_step function
let loop_step = match find_loop_step(module) {
Some(f) => f,
None => return false,
};
// P4 characteristics:
// - Has Compare instruction (loop condition or continue check)
// - Has conditional Jump (for continue/break semantics)
// - Has tail call (loop back)
//
// Note: Simplified detector - relies on Continue being present in original AST
// which gets lowered to conditional tail call structure.
let has_compare = loop_step.body.iter().any(|inst| {
matches!(
inst,
JoinInst::Compute(crate::mir::join_ir::MirLikeInst::Compare { .. })
)
});
// Has conditional jump or call (for continue/break check)
let has_conditional_flow = loop_step.body.iter().any(|inst| {
matches!(inst, JoinInst::Jump { cond: Some(_), .. })
|| matches!(inst, JoinInst::Call { k_next: None, .. })
});
// P4 minimal has 2-4 params (i, acc, possibly n)
let reasonable_param_count = (2..=4).contains(&loop_step.params.len());
has_compare && has_conditional_flow && reasonable_param_count
}
pub(crate) fn is_jsonparser_parse_array_continue_skip_ws(module: &JoinModule) -> bool {
is_pattern4_continue_minimal(module)
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_parse_array_continue_skip_ws")
}
pub(crate) fn is_jsonparser_parse_object_continue_skip_ws(module: &JoinModule) -> bool {
is_pattern4_continue_minimal(module)
&& module
.functions
.values()
.any(|f| f.name == "jsonparser_parse_object_continue_skip_ws")
}
pub(super) fn find_loop_step(module: &JoinModule) -> Option<&JoinFunction> {
module
.functions
.values()
.find(|f| f.name == "loop_step")
.or_else(|| module.functions.get(&JoinFuncId::new(1)))
}
}
fn log_shapes(tag: &str, shapes: &[NormalizedDevShape]) {
if shapes.is_empty() {
return;
}
if dev_env::normalized_dev_logs_enabled() && joinir_dev_enabled() {
eprintln!("[joinir/normalized-dev/shape] {}: {:?}", tag, shapes);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_pattern3_if_sum_minimal_shape() {
use crate::mir::join_ir::normalized::fixtures::build_pattern3_if_sum_min_structured_for_normalized_dev;
let module = build_pattern3_if_sum_min_structured_for_normalized_dev();
// Should detect Pattern3IfSumMinimal shape
assert!(
detectors::is_pattern3_if_sum_minimal(&module),
"pattern3_if_sum_minimal fixture should be detected"
);
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::Pattern3IfSumMinimal),
"detect_shapes() should include Pattern3IfSumMinimal, got: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_selfhost_p2_core_structural_candidate_signature() {
use crate::mir::join_ir::normalized::fixtures::{
build_jsonparser_skip_ws_structured_for_normalized_dev,
build_pattern2_minimal_structured,
build_selfhost_token_scan_p2_accum_structured_for_normalized_dev,
build_selfhost_token_scan_p2_structured_for_normalized_dev,
};
let selfhost_p2 = build_selfhost_token_scan_p2_structured_for_normalized_dev();
let selfhost_p2_accum = build_selfhost_token_scan_p2_accum_structured_for_normalized_dev();
let json_p2 = build_jsonparser_skip_ws_structured_for_normalized_dev();
let canonical_p2_min = build_pattern2_minimal_structured();
assert!(
detectors::is_selfhost_p2_core_family_candidate(&selfhost_p2),
"selfhost_token_scan_p2 should match structural candidate"
);
assert!(
detectors::is_selfhost_p2_core_family_candidate(&selfhost_p2_accum),
"selfhost_token_scan_p2_accum should match structural candidate"
);
// Structural signature is intentionally ambiguous with JsonParser P2-mini family.
assert!(
detectors::is_selfhost_p2_core_family_candidate(&json_p2),
"jsonparser_skip_ws_mini should also match P2 core candidate"
);
assert!(
!detectors::is_selfhost_p2_core_family_candidate(&canonical_p2_min),
"canonical Pattern2Mini fixture should not match selfhost P2 candidate"
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_selfhost_p3_if_sum_structural_candidate_signature() {
use crate::mir::join_ir::normalized::fixtures::{
build_pattern3_if_sum_min_structured_for_normalized_dev,
build_pattern3_if_sum_multi_min_structured_for_normalized_dev,
build_selfhost_if_sum_p3_ext_structured_for_normalized_dev,
build_selfhost_if_sum_p3_structured_for_normalized_dev,
};
let selfhost_p3 = build_selfhost_if_sum_p3_structured_for_normalized_dev();
let selfhost_p3_ext = build_selfhost_if_sum_p3_ext_structured_for_normalized_dev();
let canonical_p3_min = build_pattern3_if_sum_min_structured_for_normalized_dev();
let canonical_p3_multi = build_pattern3_if_sum_multi_min_structured_for_normalized_dev();
assert!(
detectors::is_selfhost_p3_if_sum_family_candidate(&selfhost_p3),
"selfhost_if_sum_p3 should match structural candidate"
);
assert!(
detectors::is_selfhost_p3_if_sum_family_candidate(&selfhost_p3_ext),
"selfhost_if_sum_p3_ext should match structural candidate"
);
assert!(
!detectors::is_selfhost_p3_if_sum_family_candidate(&canonical_p3_min),
"canonical P3 minimal should not match selfhost P3 candidate"
);
assert!(
!detectors::is_selfhost_p3_if_sum_family_candidate(&canonical_p3_multi),
"canonical P3 multi should not match selfhost P3 candidate"
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_token_scan_p2_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_token_scan_p2_structured_for_normalized_dev;
let module = build_selfhost_token_scan_p2_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2),
"selfhost_token_scan_p2 shape missing: {:?}",
shapes
);
assert!(
!shapes.contains(&NormalizedDevShape::Pattern2Mini),
"selfhost_token_scan_p2 should not be treated as canonical Pattern2Mini: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_token_scan_p2_accum_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_token_scan_p2_accum_structured_for_normalized_dev;
let module = build_selfhost_token_scan_p2_accum_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2Accum),
"selfhost_token_scan_p2_accum shape missing: {:?}",
shapes
);
assert!(
!shapes.contains(&NormalizedDevShape::Pattern2Mini),
"selfhost_token_scan_p2_accum should not be treated as canonical Pattern2Mini: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_if_sum_p3_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_if_sum_p3_structured_for_normalized_dev;
let module = build_selfhost_if_sum_p3_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostIfSumP3),
"selfhost_if_sum_p3 shape missing: {:?}",
shapes
);
assert!(
!shapes.iter().any(|s| matches!(s, NormalizedDevShape::Pattern3IfSumMinimal)),
"selfhost_if_sum_p3 should not rely on canonical P3 minimal detection: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_selfhost_if_sum_p3_ext_shape() {
use crate::mir::join_ir::normalized::fixtures::build_selfhost_if_sum_p3_ext_structured_for_normalized_dev;
let module = build_selfhost_if_sum_p3_ext_structured_for_normalized_dev();
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::SelfhostIfSumP3Ext),
"selfhost_if_sum_p3_ext shape missing: {:?}",
shapes
);
assert!(
!shapes.iter().any(|s| matches!(
s,
NormalizedDevShape::Pattern3IfSumMinimal
| NormalizedDevShape::Pattern3IfSumMulti
| NormalizedDevShape::Pattern3IfSumJson
)),
"selfhost_if_sum_p3_ext should not rely on canonical P3 detection: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_pattern4_continue_minimal_shape() {
use crate::mir::join_ir::normalized::fixtures::build_pattern4_continue_min_structured_for_normalized_dev;
let module = build_pattern4_continue_min_structured_for_normalized_dev();
// Should detect Pattern4ContinueMinimal shape
assert!(
detectors::is_pattern4_continue_minimal(&module),
"pattern4_continue_minimal fixture should be detected"
);
let shapes = detect_shapes(&module);
assert!(
shapes.contains(&NormalizedDevShape::Pattern4ContinueMinimal),
"detect_shapes() should include Pattern4ContinueMinimal, got: {:?}",
shapes
);
}
#[cfg(feature = "normalized_dev")]
#[test]
fn test_detect_pattern4_jsonparser_continue_shapes() {
use crate::mir::join_ir::normalized::fixtures::{
build_jsonparser_parse_array_continue_skip_ws_structured_for_normalized_dev,
build_jsonparser_parse_object_continue_skip_ws_structured_for_normalized_dev,
};
let array = build_jsonparser_parse_array_continue_skip_ws_structured_for_normalized_dev();
assert!(
detectors::is_jsonparser_parse_array_continue_skip_ws(&array),
"array continue fixture should be detected"
);
let array_shapes = detect_shapes(&array);
assert!(
array_shapes.contains(&NormalizedDevShape::JsonparserParseArrayContinueSkipWs),
"array continue shape missing, got {:?}",
array_shapes
);
let object = build_jsonparser_parse_object_continue_skip_ws_structured_for_normalized_dev();
assert!(
detectors::is_jsonparser_parse_object_continue_skip_ws(&object),
"object continue fixture should be detected"
);
let object_shapes = detect_shapes(&object);
assert!(
object_shapes.contains(&NormalizedDevShape::JsonparserParseObjectContinueSkipWs),
"object continue shape missing, got {:?}",
object_shapes
);
}
}