From 14730c227f6c7a9bc799faddcefadac7d2c2570a Mon Sep 17 00:00:00 2001 From: nyash-codex Date: Thu, 18 Dec 2025 00:57:58 +0900 Subject: [PATCH] feat(control_tree): add StepTreeContract and signature (dev-only) --- .../current/main/design/control-tree.md | 28 ++ .../joinir/patterns/ast_feature_extractor.rs | 32 ++ .../builder/control_flow/joinir/routing.rs | 15 +- src/mir/control_tree/step_tree.rs | 385 +++++++++++++++++- 4 files changed, 445 insertions(+), 15 deletions(-) diff --git a/docs/development/current/main/design/control-tree.md b/docs/development/current/main/design/control-tree.md index d58fdde0..ae771642 100644 --- a/docs/development/current/main/design/control-tree.md +++ b/docs/development/current/main/design/control-tree.md @@ -33,6 +33,34 @@ AST を「構造ノード」に落とした木(または木+Block列)で - `Loop`: `cond` と body の `Block` - `Stmt`: 構造化していない文(Local/Assign/Return/Break/Continue/Expr などを tag 化) +## StepTreeContract(構造契約SSOT) + +StepTreeContract は「この構造が何を含み、何を要求するか」を最小の契約として宣言する。 +lowering/PHI/CFG の判断にはまだ使わない(dev-only)だが、再解析の増殖を防ぐための SSOT になる。 + +最小フィールド案(P1): +- `exits`: `return` / `break` / `continue` の存在(構造だけ) +- `writes`: 変数への書き込み(最小は `Assignment target=Variable(name)` と `Local` 宣言の集合) +- `required_caps`: capability 宣言(例: `NestedLoop`, `TryCatch`, `Throw`, `Lambda` など) +- `cond_sig`: if/loop 条件式の要約(下記) + +### cond_sig の方針(SSOT) + +- cond_sig は **ASTNode 参照を保持しない**(巨大化・ライフタイム・clone コストの問題を避ける)。 +- cond_sig は **summary(要約)SSOT** とし、`AstSummary` 相当の安定表現に固定する。 + - 目的は「構造分類/契約の固定」であり、式同値(完全な等価判定)は非対象。 + +## StepTreeSignature(構造署名) + +StepTreeSignature は StepTreeContract + node kinds の “安定な基底文字列” を hash した識別子。 + +用途: +- dev-only ログの検索キー +- “同型ループ/同型if” の増殖検知(再解析の増殖防止) + +注意: +- `Span` 等の位置情報は signature に含めない(入力差でブレるため)。 + ## Capability(段階投入のSSOT) StepTree は capability を“宣言”し、未対応は **Fail-Fast(dev-only / strict)** で止める。 diff --git a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs index 818aebac..e0a77ba1 100644 --- a/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs +++ b/src/mir/builder/control_flow/joinir/patterns/ast_feature_extractor.rs @@ -70,6 +70,18 @@ pub(crate) fn detect_break_in_body(body: &[ASTNode]) -> bool { false } +/// Detect if a loop body contains return statements +/// +/// This is used for dev-only parity checks with structure SSOT (StepTree). +pub(crate) fn detect_return_in_body(body: &[ASTNode]) -> bool { + for stmt in body { + if has_return_node(stmt) { + return true; + } + } + false +} + /// Extract full feature set from loop body AST /// /// This is the main entry point for feature extraction. It analyzes the loop body @@ -273,6 +285,26 @@ fn has_break_node(node: &ASTNode) -> bool { } } +/// Recursive helper to check if AST node contains return +fn has_return_node(node: &ASTNode) -> bool { + match node { + ASTNode::Return { .. } => true, + ASTNode::If { + then_body, + else_body, + .. + } => { + then_body.iter().any(has_return_node) + || else_body + .as_ref() + .map_or(false, |e| e.iter().any(has_return_node)) + } + ASTNode::Loop { body, .. } => body.iter().any(has_return_node), + ASTNode::ScopeBox { body, .. } => body.iter().any(has_return_node), + _ => false, + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/mir/builder/control_flow/joinir/routing.rs b/src/mir/builder/control_flow/joinir/routing.rs index b588b355..4f659678 100644 --- a/src/mir/builder/control_flow/joinir/routing.rs +++ b/src/mir/builder/control_flow/joinir/routing.rs @@ -41,6 +41,7 @@ pub(in crate::mir::builder) fn choose_pattern_kind( // Phase 193: Use AST Feature Extractor Box for break/continue detection let has_continue = ast_features::detect_continue_in_body(body); let has_break = ast_features::detect_break_in_body(body); + let has_return = ast_features::detect_return_in_body(body); // Phase 110: StepTree parity check (structure-only SSOT). // @@ -56,10 +57,18 @@ pub(in crate::mir::builder) fn choose_pattern_kind( }; let tree = StepTreeBuilderBox::build_from_ast(&loop_ast); - if tree.features.has_break != has_break || tree.features.has_continue != has_continue { + if tree.features.has_break != has_break + || tree.features.has_continue != has_continue + || tree.features.has_return != has_return + { let msg = format!( - "[choose_pattern_kind/STEPTREE_PARITY] step_tree(break={}, cont={}) != extractor(break={}, cont={})", - tree.features.has_break, tree.features.has_continue, has_break, has_continue + "[choose_pattern_kind/STEPTREE_PARITY] step_tree(break={}, cont={}, ret={}) != extractor(break={}, cont={}, ret={})", + tree.features.has_break, + tree.features.has_continue, + tree.features.has_return, + has_break, + has_continue, + has_return ); if crate::config::env::joinir_dev::strict_enabled() { diff --git a/src/mir/control_tree/step_tree.rs b/src/mir/control_tree/step_tree.rs index 69581b02..3296da21 100644 --- a/src/mir/control_tree/step_tree.rs +++ b/src/mir/control_tree/step_tree.rs @@ -1,9 +1,13 @@ use crate::ast::{ASTNode, BinaryOperator, LiteralValue, Span, UnaryOperator}; +use std::collections::BTreeSet; + #[derive(Debug, Clone, PartialEq)] pub struct StepTree { pub root: StepNode, pub features: StepTreeFeatures, + pub contract: StepTreeContract, + pub signature: StepTreeSignature, } #[derive(Debug, Clone, PartialEq, Default)] @@ -37,7 +41,7 @@ pub enum StepNode { #[derive(Debug, Clone, PartialEq)] pub enum StepStmtKind { LocalDecl { vars: Vec }, - Assign, + Assign { target: Option }, Print, Return, Break, @@ -67,6 +71,108 @@ impl StepTree { self.root.write_compact(&mut out, 0); out } + + pub fn signature_basis_string(&self) -> String { + let mut kinds = Vec::new(); + collect_node_kinds(&self.root, &mut kinds); + let kinds = kinds.join(","); + self.contract.signature_basis_string(&kinds) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum ExitKind { + Return, + Break, + Continue, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum StepCapability { + If, + Loop, + NestedIf, + NestedLoop, + Return, + Break, + Continue, + TryCatch, + Throw, + Lambda, + While, + ForRange, + Match, + Arrow, +} + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct StepTreeContract { + pub exits: BTreeSet, + pub writes: BTreeSet, + pub required_caps: BTreeSet, + pub cond_sig: Vec, +} + +impl StepTreeContract { + pub fn signature_basis_string(&self, node_kinds: &str) -> String { + let exits = self + .exits + .iter() + .map(|e| match e { + ExitKind::Return => "return", + ExitKind::Break => "break", + ExitKind::Continue => "continue", + }) + .collect::>() + .join(","); + let writes = self.writes.iter().cloned().collect::>().join(","); + let caps = self + .required_caps + .iter() + .map(|c| match c { + StepCapability::If => "If", + StepCapability::Loop => "Loop", + StepCapability::NestedIf => "NestedIf", + StepCapability::NestedLoop => "NestedLoop", + StepCapability::Return => "Return", + StepCapability::Break => "Break", + StepCapability::Continue => "Continue", + StepCapability::TryCatch => "TryCatch", + StepCapability::Throw => "Throw", + StepCapability::Lambda => "Lambda", + StepCapability::While => "While", + StepCapability::ForRange => "ForRange", + StepCapability::Match => "Match", + StepCapability::Arrow => "Arrow", + }) + .collect::>() + .join(","); + let cond_sig = self.cond_sig.join("|"); + + format!( + "kinds={};exits={};writes={};caps={};conds={}", + node_kinds, exits, writes, caps, cond_sig + ) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct StepTreeSignature(pub u64); + +impl StepTreeSignature { + pub fn from_basis_string(basis: &str) -> Self { + // FNV-1a 64-bit (stable, no external deps). + let mut hash: u64 = 0xcbf29ce484222325; + for b in basis.as_bytes() { + hash ^= *b as u64; + hash = hash.wrapping_mul(0x100000001b3); + } + StepTreeSignature(hash) + } + + pub fn to_hex(self) -> String { + format!("{:016x}", self.0) + } } impl StepNode { @@ -114,7 +220,10 @@ impl StepStmtKind { fn to_compact_string(&self) -> String { match self { StepStmtKind::LocalDecl { vars } => format!("local({})", vars.join(",")), - StepStmtKind::Assign => "assign".to_string(), + StepStmtKind::Assign { target } => match target { + Some(name) => format!("assign({name})"), + None => "assign(?)".to_string(), + }, StepStmtKind::Print => "print".to_string(), StepStmtKind::Return => "return".to_string(), StepStmtKind::Break => "break".to_string(), @@ -128,7 +237,7 @@ impl AstSummary { fn to_compact_string(&self) -> String { match self { AstSummary::Variable(name) => format!("var:{name}"), - AstSummary::Literal(lit) => format!("lit:{lit:?}"), + AstSummary::Literal(lit) => format!("lit:{}", lit_to_sig_string(lit)), AstSummary::Unary { op, expr } => format!("({op:?} {})", expr.to_compact_string()), AstSummary::Binary { op, lhs, rhs } => format!( "({} {} {})", @@ -141,6 +250,22 @@ impl AstSummary { } } +fn lit_to_sig_string(lit: &LiteralValue) -> String { + match lit { + LiteralValue::String(s) => format!("str:{}", escape_sig_atom(s)), + LiteralValue::Integer(i) => format!("int:{i}"), + LiteralValue::Float(f) => format!("float:{:016x}", f.to_bits()), + LiteralValue::Bool(b) => format!("bool:{}", if *b { 1 } else { 0 }), + LiteralValue::Null => "null".to_string(), + LiteralValue::Void => "void".to_string(), + } +} + +fn escape_sig_atom(s: &str) -> String { + // Minimal stable escaping for signature strings. + s.replace('\\', "\\\\").replace('|', "\\|").replace(',', "\\,") +} + pub struct StepTreeBuilderBox; impl StepTreeBuilderBox { @@ -150,10 +275,7 @@ impl StepTreeBuilderBox { ASTNode::ScopeBox { body, .. } => Self::build_from_block(body), _ => { let (node, features) = Self::build_node(ast, 0, 0); - StepTree { - root: node, - features, - } + build_step_tree(node, features) } } } @@ -166,10 +288,7 @@ impl StepTreeBuilderBox { nodes.push(node); features = merge_features(features, node_features); } - StepTree { - root: StepNode::Block(nodes), - features, - } + build_step_tree(StepNode::Block(nodes), features) } fn build_node(ast: &ASTNode, if_depth: u32, loop_depth: u32) -> (StepNode, StepTreeFeatures) { @@ -277,7 +396,15 @@ impl StepTreeBuilderBox { ), ASTNode::Assignment { span, .. } => ( StepNode::Stmt { - kind: StepStmtKind::Assign, + kind: StepStmtKind::Assign { + target: match ast { + ASTNode::Assignment { target, .. } => match target.as_ref() { + ASTNode::Variable { name, .. } => Some(name.clone()), + _ => None, + }, + _ => None, + }, + }, span: span.clone(), }, StepTreeFeatures::default(), @@ -315,6 +442,22 @@ impl StepTreeBuilderBox { } } +fn build_step_tree(root: StepNode, features: StepTreeFeatures) -> StepTree { + let contract = StepTreeContractBox::compute(&root, &features); + let mut kinds = Vec::new(); + collect_node_kinds(&root, &mut kinds); + let kinds = kinds.join(","); + let basis = contract.signature_basis_string(&kinds); + let signature = StepTreeSignature::from_basis_string(&basis); + + StepTree { + root, + features, + contract, + signature, + } +} + fn merge_features(mut a: StepTreeFeatures, b: StepTreeFeatures) -> StepTreeFeatures { a.has_if |= b.has_if; a.has_loop |= b.has_loop; @@ -326,6 +469,143 @@ fn merge_features(mut a: StepTreeFeatures, b: StepTreeFeatures) -> StepTreeFeatu a } +struct StepTreeContractBox; + +impl StepTreeContractBox { + fn compute(root: &StepNode, features: &StepTreeFeatures) -> StepTreeContract { + let mut contract = StepTreeContract::default(); + + // Required caps from features (structural only). + if features.has_if { + contract.required_caps.insert(StepCapability::If); + } + if features.max_if_depth > 1 { + contract.required_caps.insert(StepCapability::NestedIf); + } + if features.has_loop { + contract.required_caps.insert(StepCapability::Loop); + } + if features.max_loop_depth > 1 { + contract.required_caps.insert(StepCapability::NestedLoop); + } + if features.has_return { + contract.required_caps.insert(StepCapability::Return); + } + if features.has_break { + contract.required_caps.insert(StepCapability::Break); + } + if features.has_continue { + contract.required_caps.insert(StepCapability::Continue); + } + + Self::walk(root, &mut contract); + contract + } + + fn walk(node: &StepNode, contract: &mut StepTreeContract) { + match node { + StepNode::Block(nodes) => { + for n in nodes { + Self::walk(n, contract); + } + } + StepNode::If { + cond, + then_branch, + else_branch, + .. + } => { + contract.cond_sig.push(cond.to_compact_string()); + Self::walk(then_branch, contract); + if let Some(else_branch) = else_branch { + Self::walk(else_branch, contract); + } + } + StepNode::Loop { cond, body, .. } => { + contract.cond_sig.push(cond.to_compact_string()); + Self::walk(body, contract); + } + StepNode::Stmt { kind, .. } => { + match kind { + StepStmtKind::LocalDecl { vars } => { + for v in vars { + contract.writes.insert(v.clone()); + } + } + StepStmtKind::Assign { target } => { + if let Some(name) = target.as_ref() { + contract.writes.insert(name.clone()); + } + } + StepStmtKind::Print => {} + StepStmtKind::Return => { + contract.exits.insert(ExitKind::Return); + } + StepStmtKind::Break => { + contract.exits.insert(ExitKind::Break); + } + StepStmtKind::Continue => { + contract.exits.insert(ExitKind::Continue); + } + StepStmtKind::Other(name) => match *name { + "TryCatch" => { + contract.required_caps.insert(StepCapability::TryCatch); + } + "Throw" => { + contract.required_caps.insert(StepCapability::Throw); + } + "Lambda" => { + contract.required_caps.insert(StepCapability::Lambda); + } + "While" => { + contract.required_caps.insert(StepCapability::While); + } + "ForRange" => { + contract.required_caps.insert(StepCapability::ForRange); + } + "MatchExpr" => { + contract.required_caps.insert(StepCapability::Match); + } + "Arrow" => { + contract.required_caps.insert(StepCapability::Arrow); + } + _ => {} + }, + } + } + } + } +} + +fn collect_node_kinds(node: &StepNode, out: &mut Vec) { + match node { + StepNode::Block(nodes) => { + out.push("Block".to_string()); + for n in nodes { + collect_node_kinds(n, out); + } + } + StepNode::If { + then_branch, + else_branch, + .. + } => { + out.push("If".to_string()); + collect_node_kinds(then_branch, out); + if let Some(else_branch) = else_branch { + collect_node_kinds(else_branch, out); + } + } + StepNode::Loop { body, .. } => { + out.push("Loop".to_string()); + collect_node_kinds(body, out); + } + StepNode::Stmt { kind, .. } => { + out.push(format!("Stmt({})", kind.to_compact_string())); + } + } +} + fn summarize_ast(ast: &ASTNode) -> AstSummary { match ast { ASTNode::Variable { name, .. } => AstSummary::Variable(name.clone()), @@ -495,6 +775,19 @@ mod tests { assert!(tree.features.has_if); assert!(!tree.features.has_loop); assert_eq!(tree.features.max_if_depth, 2); + assert_eq!(tree.contract.exits.len(), 0); + assert!(tree.contract.writes.contains("x")); + assert!(tree.contract.required_caps.contains(&StepCapability::If)); + assert!(tree.contract.required_caps.contains(&StepCapability::NestedIf)); + + let basis = tree.signature_basis_string(); + assert_eq!( + basis, + "kinds=Block,Stmt(local(x)),If,Block,If,Block,Stmt(assign(x)),Block,Stmt(assign(x)),Block,Stmt(assign(x)),Stmt(print);exits=;writes=x;caps=If,NestedIf;conds=(lit:str:x == lit:str:x)|(lit:str:y == lit:str:z)" + ); + + let tree2 = StepTreeBuilderBox::build_from_block(&ast); + assert_eq!(tree.signature, tree2.signature); match tree.root { StepNode::Block(nodes) => { @@ -513,4 +806,72 @@ mod tests { other => panic!("expected root Block, got {other:?}"), } } + + #[test] + fn contract_extracts_loop_exits_and_writes_minimal() { + fn var(name: &str) -> ASTNode { + ASTNode::Variable { + name: name.to_string(), + span: Span::unknown(), + } + } + fn int_lit(v: i64) -> ASTNode { + ASTNode::Literal { + value: LiteralValue::Integer(v), + span: Span::unknown(), + } + } + fn bin(op: BinaryOperator, lhs: ASTNode, rhs: ASTNode) -> ASTNode { + ASTNode::BinaryOp { + operator: op, + left: Box::new(lhs), + right: Box::new(rhs), + span: Span::unknown(), + } + } + fn assign(name: &str, value: ASTNode) -> ASTNode { + ASTNode::Assignment { + target: Box::new(var(name)), + value: Box::new(value), + span: Span::unknown(), + } + } + + // local i=0; local x=0; + // loop(i < 3) { x = x + 1; if x == 2 { break } i = i + 1 } + let ast = vec![ + ASTNode::Local { + variables: vec!["i".to_string()], + initial_values: vec![Some(Box::new(int_lit(0)))], + span: Span::unknown(), + }, + ASTNode::Local { + variables: vec!["x".to_string()], + initial_values: vec![Some(Box::new(int_lit(0)))], + span: Span::unknown(), + }, + ASTNode::Loop { + condition: Box::new(bin(BinaryOperator::Less, var("i"), int_lit(3))), + body: vec![ + assign("x", bin(BinaryOperator::Add, var("x"), int_lit(1))), + ASTNode::If { + condition: Box::new(bin(BinaryOperator::Equal, var("x"), int_lit(2))), + then_body: vec![ASTNode::Break { span: Span::unknown() }], + else_body: None, + span: Span::unknown(), + }, + assign("i", bin(BinaryOperator::Add, var("i"), int_lit(1))), + ], + span: Span::unknown(), + }, + ]; + + let tree = StepTreeBuilderBox::build_from_block(&ast); + assert!(tree.features.has_loop); + assert!(tree.contract.exits.contains(&ExitKind::Break)); + assert!(tree.contract.writes.contains("i")); + assert!(tree.contract.writes.contains("x")); + assert!(tree.contract.required_caps.contains(&StepCapability::Loop)); + assert!(tree.contract.required_caps.contains(&StepCapability::If)); + } }