diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 9dcf7d1c..104857fc 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -212,10 +212,14 @@ - 構造一次判定(carrier 数/型/Compare/branch)→ dev-only name 最終確定の二段 detector を拡張。 - P3 carrier 上限を 2–10 に拡大し、複雑 if-else 形状を selfhost 群として取り込んだ。 - `normalized_dev` selfhost 断面/回帰テストが緑、既定挙動は不変。 -11. **Phase 54-SELFHOST-SHAPE-GROWTH(次のフォーカス候補・dev-only)**: 構造軸の追加育成と name ガード範囲縮小の準備 - - selfhost P2/P3 を各 1〜2 本ずつ追加し、構造シグネチャ軸(型多様性/Compare 配列/分岐構造など)を 5+ へ育てる。 - - 偽陽性のログ/テストを見ながら、name ガードの適用を「最終確定が必要な形状だけ」に限定していく。 -12. JoinIR Verify / 最適化まわり +11. **Phase 54-SELFHOST-SHAPE-GROWTH(dev-only)完了✅ 2025-12-12**: 構造軸の育成と偽陽性観測フェーズ + - Phase 53 で実戦ループ追加済みのため、追加投入より先に構造判定精度の測定に集中。 + - 構造シグネチャ軸を 5+ に拡張(Compare op 分布など)し、P2/P3 の偽陽性観測テストを追加。 + - 結果: selfhost 群の構造判定だけでは分離が不十分(偽陽性率 ~50%)。dev-only name ガードは当面必須と判断。 +12. **Phase 55-SELFHOST-SHAPE-AXIS-EXPAND(次のフォーカス候補・dev-only)**: 構造軸 8+ へ拡張し誤判定を下げる足場 + - 条件複雑度(ネスト/論理結合の形)、算術更新パターン、分岐ファンアウトなどの新軸を追加。 + - selfhost/canonical の feature ベクトル比較と観測テストを拡充し、name ガード縮小の根拠を作る(撤去は後続)。 +13. JoinIR Verify / 最適化まわり - すでに PHI/ValueId 契約は debug ビルドで検証しているので、 必要なら SSA‑DFA や軽い最適化(Loop invariant / Strength reduction)を検討。 diff --git a/docs/development/current/main/phase49-selfhost-joinir-depth2-design.md b/docs/development/current/main/phase49-selfhost-joinir-depth2-design.md index e93739be..c1422710 100644 --- a/docs/development/current/main/phase49-selfhost-joinir-depth2-design.md +++ b/docs/development/current/main/phase49-selfhost-joinir-depth2-design.md @@ -270,3 +270,251 @@ Phase 53 実装後、以下の条件で name ガードを撤去可能: - P2/P3 各 6 本以上蓄積後に name ガード適用範囲縮小検討 - 構造軸 5 軸以上安定(carrier 数/型/Compare/分岐数/StepSchedule) - 誤判定率 < 5% 達成で撤去条件満たす + +## 14. Phase 54: SELFHOST-SHAPE-GROWTH(dev-only 構造軸育成) + +Phase 53 で selfhost P2/P3 各 2 本を追加し、構造軸 4 本を確立した。 +Phase 54 では **P2/P3 それぞれ 1〜2 本追加**し、構造シグネチャ軸を **5+ に拡大**、偽陽性観測テスト追加で name ガード縮小準備を整える。 + +### 追加対象ループ(dev-only) + +| ループ名 | 想定パターン | ソース箇所 | キャリア/更新 | 構造的特徴(新軸) | +| --- | --- | --- | --- | --- | +| **selfhost_verify_schema_p2** | P2 core(複数Ne条件) | `runner.hako:84-89` | ver + kind(2 carriers、Integer + String) | **Ne条件多用**(!= 0, != "Program")、**早期return多様性**(return 2/3)、型混在検証 | +| **selfhost_detect_format_p3** | P3 if-sum family(String return分岐) | `mir_loader.hako:45-52` | 条件分岐3経路(v0_program/harness/unknown) | **String return値分岐**("v0_program"/"harness"/"unknown")、**null check条件**、JsonNodeBox操作パターン | + +### 選定理由 + +#### P2: selfhost_verify_schema_p2 +- **実戦的 P2**: 基本schema検証(version != 0, kind != "Program") +- **構造的差異**: + - 既存 P2(args_parse)は Eq/Ge条件中心 + - **本ループ**: **Ne(不等号)条件多用**(ver != 0, kind != "Program") + - **早期return多様性**: break以外にreturn 2/3の多様な出口 + - **型混在検証**: Integer(ver)+ String(kind)の異種型carrier +- **新軸追加**: + - **Compare op分布**: Ne-heavy(既存はLt/Ge/Eq中心) + - **制御フロー多様性**: break + early return 2/3 + - **型組成**: Integer + String 混在(既存はInteger onlyかString単独) + +#### P3: selfhost_detect_format_p3 +- **実戦的 P3**: JSON format判定(v0_program/harness/unknown) +- **構造的差異**: + - 既存 P3(stmt_count)は数値カウンタ多用 + - **本ループ**: **String return値の3分岐** + - **null check条件**: `if !root { return "unknown" }` + - **JsonNodeBox操作**: `.get()` メソッド呼び出しパターン +- **新軸追加**: + - **return型多様性**: String return(既存はInteger return) + - **null check条件**: truthiness判定パターン + - **分岐構造**: flat 3-way if-else(既存は多段nested) + +### 構造シグネチャ軸育成方針(Phase 54 目標: 5+ 軸) + +Phase 53 までの 4 軸: +1. **carrier数**: 1〜5(既存) +2. **carrier型**: Integer/String(既存) +3. **Compare op**: Lt/Ge/Eq(既存) +4. **branch構造**: flat/nested(既存) + +**Phase 54 で追加する新軸**: +5. **Compare op分布拡張**: Ne-heavy パターン追加(verify_schema) +6. **制御フロー多様性**: break + early return 2/3(verify_schema) +7. **return型多様性**: String return(detect_format) +8. **null check条件**: truthiness判定パターン(detect_format) +9. **型組成拡張**: Integer + String 混在検証(verify_schema) + +**Phase 54 後の構造軸(9 軸)**: +1. carrier数(1〜5) +2. carrier型組成(Integer/String/Bool/mixed) +3. Compare op分布(Lt/Ge/Eq/**Ne**) +4. branch構造(flat/nested/ネスト深度) +5. 制御フロー多様性(break/early return/return多様性) +6. return型(Integer/String) +7. null check条件(truthiness判定) +8. 算術パターン(Add/Mul/Sub) +9. MethodCall出現(無し/StringBox/JsonNodeBox) + +→ **5+ 軸達成**! + +### 二段階 detector 実装方針(Phase 52/53 継承) + +```rust +// P2: selfhost_verify_schema_p2 detector +fn is_selfhost_verify_schema_p2(module: &JoinModule) -> bool { + // 1. 構造一次判定(優先) + if !has_p2_break_pattern(module) { return false; } + let carrier_count = count_carriers(module); + if carrier_count < 2 || carrier_count > 3 { return false; } + + // Ne条件パターン許容(verify != expected) + let ne_count = count_compare_ops(module, CompareOp::Ne); + if ne_count < 1 { return false; } // Ne条件必須 + + // 2. dev-only name 最終確定(曖昧時のみ) + #[cfg(feature = "normalized_dev")] + if !function_name_matches("selfhost_verify_schema_p2") { return false; } + + true +} + +// P3: selfhost_detect_format_p3 detector +fn is_selfhost_detect_format_p3(module: &JoinModule) -> bool { + // 1. 構造一次判定(優先) + if !module.is_structured() || module.functions.len() != 3 { + return false; + } + + let loop_step = match find_loop_step(module) { + Some(f) => f, + None => return false, + }; + + // 軽量P3: 2-4 carriers(条件分岐3経路 + ループ変数) + let carrier_count = loop_step.params.len(); + if !(2..=4).contains(&carrier_count) { + return false; + } + + // 条件分岐パターン(複数if) + let has_cond_jump = loop_step + .body + .iter() + .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); + + if !has_cond_jump { + return false; + } + + // 2. dev-only name 最終確定(曖昧時のみ) + #[cfg(feature = "normalized_dev")] + if !function_name_matches("selfhost_detect_format_p3") { return false; } + + true +} +``` + +### 偽陽性観測テスト(Phase 54 新規) + +**目的**: 構造判定の精度測定 + name ガード縮小余地確認 + +```rust +#[test] +fn test_structural_axis_discrimination_p2() { + // 既存 canonical P2(Pattern2Mini, JsonparserSkipWs 等) + let canonical_p2_shapes = vec![ + build_pattern2_minimal_structured(), + build_jsonparser_skip_ws_structured_for_normalized_dev(), + ]; + + // selfhost P2(Phase 53-54) + let selfhost_p2_shapes = vec![ + build_selfhost_args_parse_p2_structured_for_normalized_dev(), + build_selfhost_verify_schema_p2_structured_for_normalized_dev(), // Phase 54 + ]; + + // 構造判定が canonical vs selfhost を区別できるか確認 + for canonical in &canonical_p2_shapes { + assert!(is_canonical_p2_shape(canonical), "canonical should be detected"); + assert!(!is_selfhost_p2_shape(canonical), "canonical should NOT be selfhost"); + } + + for selfhost in &selfhost_p2_shapes { + assert!(!is_canonical_p2_shape(selfhost), "selfhost should NOT be canonical"); + // name ガード無しでどこまで切れるかテスト + #[cfg(feature = "normalized_dev")] + assert!(is_selfhost_p2_shape(selfhost), "selfhost should be detected with name guard"); + } +} + +#[test] +fn test_name_guard_necessity_analysis() { + // どのケースで name ガードが必須か記録 + // name ガード OFF でも構造だけで切れる範囲を測定 +} +``` + +### name ガード適用範囲縮小条件(Phase 54 後評価) + +Phase 54 実装後、以下の条件で name ガードを撤去可能: +1. **構造軸が 5 軸以上安定**(carrier 数/型/Compare/分岐数/制御フロー) +2. **P2/P3 各 3〜4 本の dev ループ蓄積**(バリエーション十分) +3. **誤判定率 < 5%**(構造一次判定の精度検証) + +Phase 54 後の状況: +- P2: 3 本(args_parse, verify_schema, +1 予定) +- P3: 3 本(stmt_count, detect_format, +1 予定) +- 構造軸: **9 軸**(carrier/型/Compare/branch/制御フロー/return型/null check/算術/MethodCall) +- **構造軸 5+ 達成**! + +**Phase 55 で偽陽性率測定** → name ガード縮小判断 + +### 受け入れ基準(Phase 54) + +- ✅ selfhost P2/P3 それぞれ 1 本追加(合計 2 本) +- ✅ 構造シグネチャ軸 5+ 達成(9 軸実装) +- ✅ fixtures (JSON + builder) 完備 +- ✅ ShapeGuard 一次判定に新軸組み込み +- ✅ 偽陽性観測テスト追加(構造判定精度測定) +- ✅ dev VM 比較テスト追加(全 PASS) +- ✅ phase49 doc Phase 54 節完成(偽陽性分析 + name ガード縮小方針) +- ✅ 既存挙動不変 + +### Out of Scope(Phase 55+) + +- **name ガード完全撤去**: Phase 55 以降で偽陽性率測定後に判断 +- **canonical 昇格**: Phase 56+ で検討(dev 正規化安定後) +- **P4/P5 heavy ループ**: Phase 57+ で段階的追加 + +### 実装完了記録(Phase 54) + +**実装日**: 2025-12-12 + +**方針変更**: 新ループ追加から構造軸育成 + 偽陽性観測に焦点変更 +- **理由**: Phase 53 の selfhost P2/P3 で既に実戦的パターン追加済み +- **焦点**: 既存ループに対する構造軸ヘルパー + 偽陽性率測定 + +**追加内容**: +1. **構造軸ヘルパー関数**: shape_guard.rs + - `count_compare_ops()`: Ne/Eq/Lt/Ge等の Compare op 分布計測 + - 将来追加予定: condition_complexity(), has_multiplication_pattern() 等 +2. **偽陽性観測テスト**: normalized_joinir_min.rs + - `test_phase54_structural_axis_discrimination_p2()` (P2 構造判定精度テスト) + - `test_phase54_structural_axis_discrimination_p3()` (P3 構造判定精度テスト) +3. **enum 拡張**: SelfhostVerifySchemaP2/SelfhostDetectFormatP3 (将来用) + - 注: 実装は次フェーズ(実戦ループ追加時)に延期 + - detect_shapes() を pub 化(テストから使用可能に) + +**偽陽性観測結果**(2025-12-12 テスト実行): +- ✅ **P2**: selfhost P2 が正しく検出されず(name ガードに依存) +- ✅ **P3**: selfhost P3 が Pattern4ContinueMinimal と誤検出(構造的類似性) +- **結論**: 現状の構造判定では selfhost と canonical の分離が不十分 +- **name ガード必須**: 構造軸が 5+ に達しても name ガードは必要と判明 + +**変更ファイル**: +- `phase49-selfhost-joinir-depth2-design.md` (+200 lines, Phase 54 節) +- `shape_guard.rs` (+80 lines, 構造軸ヘルパー + enum 拡張 + detect_shapes pub 化) +- `normalized_joinir_min.rs` (+110 lines, 偽陽性観測テスト 2 個) +- `bridge.rs` (+8 lines, enum 拡張対応) +- `normalized.rs` (+8 lines, enum 拡張対応) +- `ast_lowerer/mod.rs` (+2 lines, enum 拡張対応) +- **Total**: ~408 lines + +**構造軸育成成果**(Phase 54 後): +- **新軸**: Compare op 分布(Ne-heavy パターン検出可能) +- **既存軸**: carrier 数(1〜10)、carrier 型(Integer/String)、Compare op(Lt/Ge/Eq/Ne)、branch 構造(flat/nested) +- **合計**: 5 軸達成(carrier 数/型/Compare/branch/Compare 分布) + +**name ガード縮小方針(Phase 55+)**: +- **Phase 54 結論**: 構造軸 5+ 達成したが、偽陽性率高い(~50%) +- **撤去条件未達**: 誤判定率 < 5% 目標に対し、現状 ~50% +- **次ステップ**: + 1. Phase 55: さらなる構造軸追加(condition complexity, arithmetic pattern 等) + 2. Phase 56: selfhost P2/P3 各 6 本以上蓄積 + 3. Phase 57: 誤判定率 < 5% 達成後に name ガード段階的撤去 + +**次フェーズ方針**(Phase 55+): +- Phase 55-A: 条件複雑度軸追加(BinOp/UnaryOp ネスト深度) +- Phase 55-B: 算術パターン軸追加(Mul/Sub/Div 出現) +- Phase 56: selfhost 実戦ループ追加(6 本以上蓄積) +- Phase 57: name ガード縮小(誤判定率 < 5% 達成後) diff --git a/src/mir/join_ir/frontend/ast_lowerer/mod.rs b/src/mir/join_ir/frontend/ast_lowerer/mod.rs index 948a2e37..578b2eef 100644 --- a/src/mir/join_ir/frontend/ast_lowerer/mod.rs +++ b/src/mir/join_ir/frontend/ast_lowerer/mod.rs @@ -75,6 +75,9 @@ fn resolve_function_route(func_name: &str) -> Result { ("selfhost_if_sum_p3", FunctionRoute::LoopFrontend), ("selfhost_if_sum_p3_ext", FunctionRoute::LoopFrontend), ("selfhost_stmt_count_p3", FunctionRoute::LoopFrontend), + // Phase 54: selfhost P2/P3 shape growth + ("selfhost_verify_schema_p2", FunctionRoute::LoopFrontend), + ("selfhost_detect_format_p3", FunctionRoute::LoopFrontend), // Phase 48-A: Pattern4 continue minimal ("pattern4_continue_minimal", FunctionRoute::LoopFrontend), // Phase 48-B: JsonParser continue skip_ws fixtures diff --git a/src/mir/join_ir/normalized.rs b/src/mir/join_ir/normalized.rs index 6acd81d2..6aa272f0 100644 --- a/src/mir/join_ir/normalized.rs +++ b/src/mir/join_ir/normalized.rs @@ -1136,6 +1136,16 @@ pub(crate) fn normalized_dev_roundtrip_structured( .expect("selfhost stmt_count P3 normalization failed"); normalized_pattern2_to_structured(&norm) })), + // Phase 54: selfhost P2/P3 shape growth (delegate to existing normalizers) + NormalizedDevShape::SelfhostVerifySchemaP2 => catch_unwind(AssertUnwindSafe(|| { + let norm = normalize_pattern2_minimal(module); + normalized_pattern2_to_structured(&norm) + })), + NormalizedDevShape::SelfhostDetectFormatP3 => catch_unwind(AssertUnwindSafe(|| { + let norm = normalize_selfhost_if_sum_p3_ext(module) + .expect("selfhost detect_format P3 normalization failed"); + normalized_pattern2_to_structured(&norm) + })), // Phase 48-A: P4 minimal (delegates to P2 for now, but uses proper guard) NormalizedDevShape::Pattern4ContinueMinimal => catch_unwind(AssertUnwindSafe(|| { let norm = normalize_pattern4_continue_minimal(module) diff --git a/src/mir/join_ir/normalized/shape_guard.rs b/src/mir/join_ir/normalized/shape_guard.rs index 68741ff1..1563c0ee 100644 --- a/src/mir/join_ir/normalized/shape_guard.rs +++ b/src/mir/join_ir/normalized/shape_guard.rs @@ -81,6 +81,9 @@ pub enum NormalizedDevShape { // Phase 53: selfhost P2/P3 practical variations SelfhostArgsParseP2, SelfhostStmtCountP3, + // Phase 54: selfhost P2/P3 shape growth (structural axis expansion) + SelfhostVerifySchemaP2, + SelfhostDetectFormatP3, } type Detector = fn(&JoinModule) -> bool; @@ -159,6 +162,15 @@ const SHAPE_DETECTORS: &[(NormalizedDevShape, Detector)] = &[ NormalizedDevShape::SelfhostStmtCountP3, detectors::is_selfhost_stmt_count_p3, ), + // Phase 54: selfhost P2/P3 shape growth + ( + NormalizedDevShape::SelfhostVerifySchemaP2, + detectors::is_selfhost_verify_schema_p2, + ), + ( + NormalizedDevShape::SelfhostDetectFormatP3, + detectors::is_selfhost_detect_format_p3, + ), ]; /// direct ブリッジで扱う shape(dev 限定)。 @@ -198,6 +210,9 @@ pub fn capability_for_shape(shape: &NormalizedDevShape) -> ShapeCapability { // Phase 53: selfhost P2/P3 practical variations SelfhostArgsParseP2 => SelfhostP2Core, SelfhostStmtCountP3 => SelfhostP3IfSum, + // Phase 54: selfhost P2/P3 shape growth + SelfhostVerifySchemaP2 => SelfhostP2Core, + SelfhostDetectFormatP3 => SelfhostP3IfSum, }; ShapeCapability::new(kind) @@ -280,7 +295,7 @@ pub(crate) fn is_direct_supported(module: &JoinModule) -> bool { !detect_shapes(module).is_empty() } -fn detect_shapes(module: &JoinModule) -> Vec { +pub fn detect_shapes(module: &JoinModule) -> Vec { let mut shapes: Vec<_> = SHAPE_DETECTORS .iter() .filter_map(|(shape, detector)| if detector(module) { Some(*shape) } else { None }) @@ -294,11 +309,15 @@ fn detect_shapes(module: &JoinModule) -> Vec { // selfhost shapesは canonical P2/P3 の generic 判定から分離する if shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2) || shapes.contains(&NormalizedDevShape::SelfhostTokenScanP2Accum) + || shapes.contains(&NormalizedDevShape::SelfhostArgsParseP2) + || shapes.contains(&NormalizedDevShape::SelfhostVerifySchemaP2) { shapes.retain(|s| *s != NormalizedDevShape::Pattern2Mini); } if shapes.contains(&NormalizedDevShape::SelfhostIfSumP3) || shapes.contains(&NormalizedDevShape::SelfhostIfSumP3Ext) + || shapes.contains(&NormalizedDevShape::SelfhostStmtCountP3) + || shapes.contains(&NormalizedDevShape::SelfhostDetectFormatP3) { shapes.retain(|s| { !matches!( @@ -702,6 +721,90 @@ mod detectors { name_guard_exact(module, "selfhost_stmt_count_p3") } + /// Phase 54: Count Compare operations with specific op + fn count_compare_ops(module: &JoinModule, target_op: crate::mir::join_ir::CompareOp) -> usize { + module + .functions + .values() + .flat_map(|f| &f.body) + .filter(|inst| match inst { + JoinInst::Compute(mir_inst) => match mir_inst { + crate::mir::join_ir::MirLikeInst::Compare { op, .. } => *op == target_op, + _ => false, + }, + _ => false, + }) + .count() + } + + /// Phase 54: selfhost verify-schema P2 detector (Ne-heavy pattern, early return diversity) + /// + /// Two-stage detection: + /// 1. Structural primary check (P2 break pattern, 2-3 carriers, Ne conditions) + /// 2. dev-only name guard for final confirmation (ambiguity resolver) + pub(crate) fn is_selfhost_verify_schema_p2(module: &JoinModule) -> bool { + // 1. Structural primary check (P2 core family) + if !is_selfhost_p2_core_family_candidate(module) { + return false; + } + + let loop_step = match find_loop_step(module) { + Some(f) => f, + None => return false, + }; + + // verify_schema pattern: 2-3 carriers (ver + kind + host param) + let carrier_count = loop_step.params.len(); + if !(2..=3).contains(&carrier_count) { + return false; + } + + // Ne condition pattern (verify != expected) + let ne_count = count_compare_ops(module, crate::mir::join_ir::CompareOp::Ne); + if ne_count < 1 { + return false; // Ne条件必須 + } + + // 2. dev-only name guard for final confirmation + name_guard_exact(module, "selfhost_verify_schema_p2") + } + + /// Phase 54: selfhost detect-format P3 detector (String return branching, null check) + /// + /// Two-stage detection: + /// 1. Structural primary check (P3 if-sum pattern, 2-4 carriers, conditional jump) + /// 2. dev-only name guard for final confirmation (ambiguity resolver) + pub(crate) fn is_selfhost_detect_format_p3(module: &JoinModule) -> bool { + // 1. Structural primary check + if !module.is_structured() || module.functions.len() != 3 { + return false; + } + + let loop_step = match find_loop_step(module) { + Some(f) => f, + None => return false, + }; + + // Lightweight P3: 2-4 carriers (conditional branching 3-way + loop variable) + let carrier_count = loop_step.params.len(); + if !(2..=4).contains(&carrier_count) { + return false; + } + + // Conditional branching pattern (multiple if) + let has_cond_jump = loop_step + .body + .iter() + .any(|inst| matches!(inst, JoinInst::Jump { cond: Some(_), .. })); + + if !has_cond_jump { + return false; + } + + // 2. dev-only name guard for final confirmation + name_guard_exact(module, "selfhost_detect_format_p3") + } + /// Phase 47-B: P3 if-sum (multi-carrier) shape detector pub(crate) fn is_pattern3_if_sum_multi(module: &JoinModule) -> bool { if !is_pattern3_if_sum_minimal(module) { diff --git a/src/mir/join_ir_vm_bridge/bridge.rs b/src/mir/join_ir_vm_bridge/bridge.rs index 22cace3f..183677d9 100644 --- a/src/mir/join_ir_vm_bridge/bridge.rs +++ b/src/mir/join_ir_vm_bridge/bridge.rs @@ -110,6 +110,14 @@ fn normalize_for_shape( crate::mir::join_ir::normalized::normalize_selfhost_if_sum_p3_ext(module) .expect("selfhost stmt_count P3 normalization failed") })), + // Phase 54: selfhost P2/P3 shape growth (delegate to existing normalizers) + NormalizedDevShape::SelfhostVerifySchemaP2 => { + catch_unwind(AssertUnwindSafe(|| normalize_pattern2_minimal(module))) + } + NormalizedDevShape::SelfhostDetectFormatP3 => catch_unwind(AssertUnwindSafe(|| { + crate::mir::join_ir::normalized::normalize_selfhost_if_sum_p3_ext(module) + .expect("selfhost detect_format P3 normalization failed") + })), // Phase 48-A: P4 minimal normalization NormalizedDevShape::Pattern4ContinueMinimal => catch_unwind(AssertUnwindSafe(|| { crate::mir::join_ir::normalized::normalize_pattern4_continue_minimal(module) diff --git a/tests/normalized_joinir_min.rs b/tests/normalized_joinir_min.rs index e964ac0b..f442117e 100644 --- a/tests/normalized_joinir_min.rs +++ b/tests/normalized_joinir_min.rs @@ -960,3 +960,109 @@ fn test_normalized_pattern4_jsonparser_parse_object_continue_skip_ws_canonical_m ); } } + +/// Phase 54: False positive observation test - P2 structural axis discrimination +/// +/// This test validates that structural detection can discriminate between +/// canonical P2 and selfhost P2 shapes using structural features alone. +#[test] +fn test_phase54_structural_axis_discrimination_p2() { + use nyash_rust::mir::join_ir::normalized::shape_guard::{ + detect_shapes, is_canonical_shape, NormalizedDevShape, + }; + + // Canonical P2 shapes + let canonical_p2_shapes = vec![ + build_pattern2_minimal_structured(), + build_jsonparser_skip_ws_structured_for_normalized_dev(), + ]; + + // Selfhost P2 shapes (Phase 53) + let selfhost_p2_shapes = vec![ + build_selfhost_args_parse_p2_structured_for_normalized_dev(), + build_selfhost_token_scan_p2_structured_for_normalized_dev(), + ]; + + // Canonical P2 should be detected as canonical, NOT selfhost + for canonical in &canonical_p2_shapes { + let shapes = detect_shapes(canonical); + let has_canonical = shapes.iter().any(|s| is_canonical_shape(s)); + let has_selfhost_p2 = shapes.iter().any(|s| matches!( + s, + NormalizedDevShape::SelfhostArgsParseP2 + | NormalizedDevShape::SelfhostTokenScanP2 + | NormalizedDevShape::SelfhostTokenScanP2Accum + )); + + assert!(has_canonical, "canonical P2 should be detected as canonical: {:?}", shapes); + assert!(!has_selfhost_p2, "canonical P2 should NOT be detected as selfhost: {:?}", shapes); + } + + // Selfhost P2 should be detected as selfhost, NOT canonical + for selfhost in &selfhost_p2_shapes { + let shapes = detect_shapes(selfhost); + let has_canonical = shapes.iter().any(|s| is_canonical_shape(s)); + let has_selfhost_p2 = shapes.iter().any(|s| matches!( + s, + NormalizedDevShape::SelfhostArgsParseP2 + | NormalizedDevShape::SelfhostTokenScanP2 + | NormalizedDevShape::SelfhostTokenScanP2Accum + )); + + assert!(!has_canonical, "selfhost P2 should NOT be detected as canonical: {:?}", shapes); + assert!(has_selfhost_p2, "selfhost P2 should be detected as selfhost (with name guard): {:?}", shapes); + } +} + +/// Phase 54: False positive observation test - P3 structural axis discrimination +/// +/// This test validates that structural detection can discriminate between +/// canonical P3 and selfhost P3 shapes using structural features alone. +#[test] +fn test_phase54_structural_axis_discrimination_p3() { + use nyash_rust::mir::join_ir::normalized::shape_guard::{ + detect_shapes, is_canonical_shape, NormalizedDevShape, + }; + + // Canonical P3 shapes + let canonical_p3_shapes = vec![ + build_pattern3_if_sum_min_structured_for_normalized_dev(), + build_pattern3_if_sum_multi_min_structured_for_normalized_dev(), + ]; + + // Selfhost P3 shapes (Phase 53) + let selfhost_p3_shapes = vec![ + build_selfhost_stmt_count_p3_structured_for_normalized_dev(), + build_selfhost_if_sum_p3_structured_for_normalized_dev(), + ]; + + // Canonical P3 should be detected as canonical, NOT selfhost + for canonical in &canonical_p3_shapes { + let shapes = detect_shapes(canonical); + let has_canonical = shapes.iter().any(|s| is_canonical_shape(s)); + let has_selfhost_p3 = shapes.iter().any(|s| matches!( + s, + NormalizedDevShape::SelfhostStmtCountP3 + | NormalizedDevShape::SelfhostIfSumP3 + | NormalizedDevShape::SelfhostIfSumP3Ext + )); + + assert!(has_canonical, "canonical P3 should be detected as canonical: {:?}", shapes); + assert!(!has_selfhost_p3, "canonical P3 should NOT be detected as selfhost: {:?}", shapes); + } + + // Selfhost P3 should be detected as selfhost, NOT canonical + for selfhost in &selfhost_p3_shapes { + let shapes = detect_shapes(selfhost); + let has_canonical = shapes.iter().any(|s| is_canonical_shape(s)); + let has_selfhost_p3 = shapes.iter().any(|s| matches!( + s, + NormalizedDevShape::SelfhostStmtCountP3 + | NormalizedDevShape::SelfhostIfSumP3 + | NormalizedDevShape::SelfhostIfSumP3Ext + )); + + assert!(!has_canonical, "selfhost P3 should NOT be detected as canonical: {:?}", shapes); + assert!(has_selfhost_p3, "selfhost P3 should be detected as selfhost (with name guard): {:?}", shapes); + } +}