feat(joinir): Phase 213-2 Step 2-2 & 2-3 Data structure extensions

Extended PatternPipelineContext and CarrierUpdateInfo for Pattern 3 AST-based generalization.

Changes:
1. PatternPipelineContext:
   - Added loop_condition: Option<ASTNode>
   - Added loop_body: Option<Vec<ASTNode>>
   - Added loop_update_summary: Option<LoopUpdateSummary>
   - Updated build_pattern_context() for Pattern 3

2. CarrierUpdateInfo:
   - Added then_expr: Option<ASTNode>
   - Added else_expr: Option<ASTNode>
   - Updated analyze_loop_updates() with None defaults

Status: Phase 213-2 Steps 2-2 & 2-3 complete
Next: Create Pattern3IfAnalyzer to extract if statement and populate update summary
This commit is contained in:
nyash-codex
2025-12-10 00:01:53 +09:00
parent 577b5b01d5
commit d7805e5974
138 changed files with 3529 additions and 378 deletions

View File

@ -97,6 +97,7 @@ impl LoopHeaderPhiInfo {
}
/// Check if all carriers have latch incoming set
#[allow(dead_code)]
pub fn all_latch_set(&self) -> bool {
self.carrier_phis.values().all(|e| e.latch_incoming.is_some())
}

View File

@ -21,6 +21,7 @@ pub struct MergeResult {
impl MergeResult {
/// Create a new MergeResult with empty inputs
#[allow(dead_code)]
pub fn new(exit_block_id: BasicBlockId) -> Self {
Self {
exit_block_id,
@ -30,11 +31,13 @@ impl MergeResult {
}
/// Add an exit PHI input
#[allow(dead_code)]
pub fn add_exit_phi_input(&mut self, from_block: BasicBlockId, value: ValueId) {
self.exit_phi_inputs.push((from_block, value));
}
/// Add a carrier input
#[allow(dead_code)]
pub fn add_carrier_input(&mut self, carrier_name: String, from_block: BasicBlockId, value: ValueId) {
self.carrier_inputs
.entry(carrier_name)

View File

@ -783,6 +783,76 @@ fn verify_exit_line(
}
}
/// Phase 205-4: Verify ValueId regions follow JoinValueSpace contracts
///
/// # Checks
///
/// 1. All `boundary.join_inputs` are in Param region (100-999)
/// 2. All `carrier_phis[].phi_dst` are within valid range (<= LOCAL_MAX)
/// 3. All `condition_bindings[].join_value` are in Param region
///
/// # Rationale
///
/// JoinValueSpace enforces disjoint regions (Param: 100-999, Local: 1000+)
/// to prevent ValueId collisions. This verifier ensures that the boundary
/// contracts are respected after JoinIR generation.
///
/// # Panics
///
/// Panics in debug mode if any ValueId is in an unexpected region.
#[cfg(debug_assertions)]
fn verify_valueid_regions(
loop_info: &LoopHeaderPhiInfo,
boundary: &JoinInlineBoundary,
) {
use crate::mir::join_ir::lowering::join_value_space::{PARAM_MIN, PARAM_MAX, LOCAL_MAX};
// Helper to classify region
fn region_name(id: ValueId) -> &'static str {
if id.0 < PARAM_MIN {
"PHI Reserved"
} else if id.0 <= PARAM_MAX {
"Param"
} else if id.0 <= LOCAL_MAX {
"Local"
} else {
"Invalid (> LOCAL_MAX)"
}
}
// Check 1: Boundary join_inputs must be in Param region
for join_id in &boundary.join_inputs {
if join_id.0 < PARAM_MIN || join_id.0 > PARAM_MAX {
panic!(
"[RegionVerifier] Boundary input {:?} is in {} region, expected Param (100-999)",
join_id, region_name(*join_id)
);
}
}
// Check 2: Condition bindings must be in Param region
for binding in &boundary.condition_bindings {
let join_value = binding.join_value;
if join_value.0 < PARAM_MIN || join_value.0 > PARAM_MAX {
panic!(
"[RegionVerifier] Condition binding '{}' join_value {:?} is in {} region, expected Param (100-999)",
binding.name, join_value, region_name(join_value)
);
}
}
// Check 3: PHI dst must be within valid range
for (carrier_name, entry) in &loop_info.carrier_phis {
let phi_dst = entry.phi_dst;
if phi_dst.0 > LOCAL_MAX {
panic!(
"[RegionVerifier] Carrier '{}' PHI dst {:?} exceeds LOCAL_MAX ({})",
carrier_name, phi_dst, LOCAL_MAX
);
}
}
}
/// Verify that PHI dst values are not overwritten by later instructions (Phase 204-2)
///
/// # Checks
@ -957,4 +1027,5 @@ fn verify_joinir_contracts(
verify_no_phi_dst_overwrite(func, header_block, loop_info); // Phase 204-2
verify_phi_inputs_defined(func, header_block); // Phase 204-3
verify_exit_line(func, exit_block, boundary);
verify_valueid_regions(loop_info, boundary); // Phase 205-4
}

View File

@ -151,8 +151,8 @@ impl CommonPatternInitializer {
/// ```
pub fn check_carrier_updates_allowed(
body: &[ASTNode],
loop_var_name: &str,
variable_map: &BTreeMap<String, ValueId>,
_loop_var_name: &str,
_variable_map: &BTreeMap<String, ValueId>,
) -> bool {
use crate::mir::join_ir::lowering::loop_update_analyzer::{LoopUpdateAnalyzer, UpdateExpr, UpdateRhs};
use crate::mir::join_ir::lowering::carrier_info::CarrierVar;

View File

@ -50,6 +50,7 @@ impl ConditionEnvBuilder {
/// # Returns
///
/// ConditionEnv with only the loop parameter mapped to ValueId(0)
#[allow(dead_code)]
pub fn build_loop_param_only(loop_var_name: &str) -> ConditionEnv {
let mut env = ConditionEnv::new();
env.insert(loop_var_name.to_string(), ValueId(0));
@ -125,6 +126,7 @@ impl ConditionEnvBuilder {
/// Phase 201: Build ConditionEnv with loop parameter only using JoinValueSpace
///
/// Uses JoinValueSpace to allocate the loop parameter ValueId.
#[allow(dead_code)]
pub fn build_loop_param_only_v2(loop_var_name: &str, space: &mut JoinValueSpace) -> (ConditionEnv, ValueId) {
let mut env = ConditionEnv::new();
let loop_var_join_id = space.alloc_param();
@ -172,6 +174,7 @@ impl ConditionEnvBuilder {
/// // env.captured: "digits" → ValueId(1)
/// // boundary.condition_bindings: [ConditionBinding { name: "digits", host_value: ValueId(42), join_value: ValueId(1) }]
/// ```
#[allow(dead_code)]
pub fn build_with_captures(
loop_var_name: &str,
captured: &CapturedEnv,

View File

@ -16,6 +16,7 @@ use std::collections::HashMap;
///
/// Phase 193-4: Fully boxifies exit binding generation.
/// Eliminates hardcoded variable names and ValueId plumbing scattered across lowerers.
#[allow(dead_code)]
pub struct ExitBindingBuilder<'a> {
carrier_info: &'a CarrierInfo,
exit_meta: &'a ExitMeta,
@ -44,6 +45,7 @@ impl<'a> ExitBindingBuilder<'a> {
/// # Returns
///
/// ExitBindingBuilder instance, or error if metadata is inconsistent
#[allow(dead_code)]
pub fn new(
carrier_info: &'a CarrierInfo,
exit_meta: &'a ExitMeta,
@ -91,6 +93,7 @@ impl<'a> ExitBindingBuilder<'a> {
/// # Returns
///
/// Vec of LoopExitBinding, one per carrier, sorted by carrier name
#[allow(dead_code)]
pub fn build_loop_exit_bindings(&mut self) -> Result<Vec<LoopExitBinding>, String> {
let mut bindings = Vec::new();
@ -126,6 +129,7 @@ impl<'a> ExitBindingBuilder<'a> {
/// # Returns
///
/// Success or error if boundary cannot be updated
#[allow(dead_code)]
pub fn apply_to_boundary(&self, boundary: &mut JoinInlineBoundary) -> Result<(), String> {
// Build explicit exit bindings (loop var + carriers)
let mut bindings = Vec::new();
@ -166,6 +170,7 @@ impl<'a> ExitBindingBuilder<'a> {
/// Get the loop variable exit binding
///
/// The loop variable is always the first exit (index 0).
#[allow(dead_code)]
pub fn loop_var_exit_binding(&self) -> LoopExitBinding {
LoopExitBinding {
carrier_name: self.carrier_info.loop_var_name.clone(),
@ -178,6 +183,7 @@ impl<'a> ExitBindingBuilder<'a> {
///
/// Phase 193-4: Temporary sequential allocation strategy.
/// Future improvement: Delegate to MirBuilder's next_value_id() for proper allocation.
#[allow(dead_code)]
fn allocate_new_value_id(&self) -> ValueId {
// Find the maximum ValueId in current variable_map
let max_id = self.variable_map.values()

View File

@ -19,6 +19,7 @@ use super::super::trace;
/// # Returns
///
/// Vector of (variable_name, join_value_id) pairs for all body-local variables
#[allow(dead_code)]
fn collect_body_local_variables(
body: &[ASTNode],
alloc_join_value: &mut dyn FnMut() -> ValueId,
@ -95,6 +96,7 @@ impl MirBuilder {
///
/// Note: Pattern 2 has complex Trim pattern logic that remains inline
/// for now. Future Phase 180+ will move Trim logic to dedicated module.
#[allow(dead_code)]
pub(in crate::mir::builder) fn cf_loop_pattern2_with_break(
&mut self,
condition: &ASTNode,

View File

@ -122,6 +122,7 @@ impl Pattern4CarrierAnalyzer {
/// # Returns
///
/// Ok(()) if continue structure is valid, Err(message) otherwise
#[allow(dead_code)]
pub fn validate_continue_structure(body: &[ASTNode]) -> Result<(), String> {
// Check for at least one continue statement
for stmt in body {
@ -143,6 +144,7 @@ impl Pattern4CarrierAnalyzer {
/// # Returns
///
/// true if the node or any of its children is a Continue statement
#[allow(dead_code)]
fn has_continue(node: &ASTNode) -> bool {
match node {
ASTNode::Continue { .. } => true,

View File

@ -38,6 +38,7 @@ use crate::mir::join_ir::lowering::condition_env::ConditionEnv;
use crate::mir::join_ir::lowering::condition_env::ConditionBinding;
use crate::mir::join_ir::lowering::loop_scope_shape::LoopScopeShape;
use crate::mir::join_ir::lowering::loop_update_analyzer::UpdateExpr;
use crate::mir::join_ir::lowering::loop_update_summary::LoopUpdateSummary; // Phase 213
use crate::mir::loop_pattern_detection::trim_loop_helper::TrimLoopHelper;
use crate::mir::ValueId;
use crate::mir::BasicBlockId;
@ -90,27 +91,49 @@ pub struct PatternPipelineContext {
/// Condition environment (variable → JoinIR ValueId mapping)
/// Used by Pattern 2 (break condition) and Pattern 4 (continue condition)
#[allow(dead_code)]
pub condition_env: Option<ConditionEnv>,
/// Condition bindings (HOST↔JoinIR value mappings)
/// Used by Pattern 2 and Pattern 4
#[allow(dead_code)]
pub condition_bindings: Option<Vec<ConditionBinding>>,
/// Carrier update expressions (variable → UpdateExpr)
/// Used by Pattern 2 (multi-carrier) and Pattern 4 (Select-based updates)
#[allow(dead_code)]
pub carrier_updates: Option<HashMap<String, UpdateExpr>>,
// === Pattern 2/4: Trim Pattern Support ===
/// Trim loop helper (if Trim pattern detected during promotion)
/// Used by Pattern 2 (string trim) - Pattern 4 support TBD
#[allow(dead_code)]
pub trim_helper: Option<TrimLoopHelper>,
// === Pattern 2: Break Condition ===
/// Effective break condition (may be modified for Trim pattern)
/// Used only by Pattern 2
#[allow(dead_code)]
pub break_condition: Option<ASTNode>,
// === Pattern 3: If-Sum Generalization (Phase 213) ===
/// Loop condition AST node
/// Used by Pattern 3 for dynamic loop condition lowering
#[allow(dead_code)]
pub loop_condition: Option<ASTNode>,
/// Loop body AST nodes
/// Used by Pattern 3 to extract if statement for if-sum lowering
#[allow(dead_code)]
pub loop_body: Option<Vec<ASTNode>>,
/// Loop update summary with then/else expressions
/// Used by Pattern 3 for dynamic carrier update lowering
#[allow(dead_code)]
pub loop_update_summary: Option<LoopUpdateSummary>,
}
/// Pattern variant selector
@ -128,21 +151,25 @@ pub enum PatternVariant {
impl PatternPipelineContext {
/// Get the number of carriers (excluding loop variable)
#[allow(dead_code)]
pub fn carrier_count(&self) -> usize {
self.carrier_info.carrier_count()
}
/// Check if this is a Trim pattern
#[allow(dead_code)]
pub fn is_trim_pattern(&self) -> bool {
self.trim_helper.is_some()
}
/// Check if this has condition environment (Pattern 2/4)
#[allow(dead_code)]
pub fn has_condition_env(&self) -> bool {
self.condition_env.is_some()
}
/// Check if this has carrier updates (Pattern 2/4)
#[allow(dead_code)]
pub fn has_carrier_updates(&self) -> bool {
self.carrier_updates.is_some()
}
@ -214,16 +241,25 @@ pub fn build_pattern_context(
};
// Step 3: Pattern-specific preprocessing
let (condition_env, condition_bindings, carrier_updates, trim_helper, break_condition) =
let (condition_env, condition_bindings, carrier_updates, trim_helper, break_condition,
loop_condition, loop_body, loop_update_summary) =
match variant {
PatternVariant::Pattern1 => {
// Pattern 1: No additional preprocessing needed
(None, None, None, None, None)
(None, None, None, None, None, None, None, None)
}
PatternVariant::Pattern3 => {
// Pattern 3: No condition env, but may have carrier updates for if-else PHI
// TODO: Pattern 3 analyzer integration (future work)
(None, None, None, None, None)
// Pattern 3: Phase 213 - Store loop condition and body for AST-based lowering
(
None, // No condition_env
None, // No condition_bindings
None, // No carrier_updates (old style)
None, // No trim_helper
None, // No break_condition
Some(condition.clone()), // loop_condition (Phase 213)
Some(body.to_vec()), // loop_body (Phase 213)
None, // loop_update_summary (TODO: Phase 213-2-3)
)
}
PatternVariant::Pattern2 | PatternVariant::Pattern4 => {
// Pattern 2/4: Full preprocessing will be handled by existing code
@ -235,7 +271,7 @@ pub fn build_pattern_context(
// - Trim pattern promotion
// These will remain in pattern2/pattern4.rs for now and will be
// gradually migrated into this pipeline in future phases.
(None, None, None, None, None)
(None, None, None, None, None, None, None, None)
}
};
@ -249,6 +285,9 @@ pub fn build_pattern_context(
carrier_updates,
trim_helper,
break_condition,
loop_condition, // Phase 213
loop_body, // Phase 213
loop_update_summary, // Phase 213
})
}
@ -315,6 +354,9 @@ mod tests {
carrier_updates: None,
trim_helper: None,
break_condition: None,
loop_condition: None, // Phase 213
loop_body: None, // Phase 213
loop_update_summary: None, // Phase 213
};
assert_eq!(ctx.carrier_count(), 2);
@ -354,6 +396,9 @@ mod tests {
whitespace_chars: vec![" ".to_string(), "\t".to_string()],
}),
break_condition: None,
loop_condition: None, // Phase 213
loop_body: None, // Phase 213
loop_update_summary: None, // Phase 213
};
assert!(ctx.is_trim_pattern());

View File

@ -44,12 +44,15 @@ pub struct LoopPatternContext<'a> {
pub debug: bool,
/// Has continue statement(s) in body? (Phase 194+)
#[allow(dead_code)]
pub has_continue: bool,
/// Has break statement(s) in body? (Phase 194+)
#[allow(dead_code)]
pub has_break: bool,
/// Phase 192: Loop features extracted from AST
#[allow(dead_code)]
pub features: LoopFeatures,
/// Phase 192: Pattern classification based on features
@ -119,6 +122,7 @@ pub struct LoopPatternEntry {
pub name: &'static str,
/// Priority (lower = tried first). Pattern1=10, Pattern2=20, Pattern3=30
#[allow(dead_code)]
pub priority: u8,
/// Detection function: returns true if this pattern matches

View File

@ -54,35 +54,16 @@ impl MirBuilder {
trace::trace().routing("router", &func_name, "Structure-only mode enabled, skipping whitelist");
} else {
// Phase 49-4 + Phase 80: Multi-target routing (legacy whitelist)
// - Core ON なら代表2本print_tokens / ArrayExt.filterは JoinIR を優先し、失敗したら LoopBuilder へフォールバック
// - Core OFF では従来通り dev フラグで opt-in
// - JoinIR は常時 ON。legacy LoopBuilder は削除済み。
// - 代表2本print_tokens / ArrayExt.filterも常に JoinIR で試行する。
// Note: Arity does NOT include implicit `me` receiver
// Phase 188: Add "main" routing for loop pattern expansion
// Phase 170: Add JsonParserBox methods for selfhost validation
let core_on = crate::config::env::joinir_core_enabled();
let is_target = match func_name.as_str() {
"main" => true, // Phase 188-Impl-1: Enable JoinIR for main function (Pattern 1)
"JoinIrMin.main/0" => true, // Phase 188-Impl-2: Enable JoinIR for JoinIrMin.main/0 (Pattern 2)
"JsonTokenizer.print_tokens/0" => {
if core_on {
true
} else {
std::env::var("HAKO_JOINIR_PRINT_TOKENS_MAIN")
.ok()
.as_deref()
== Some("1")
}
}
"ArrayExtBox.filter/2" => {
if core_on {
true
} else {
std::env::var("HAKO_JOINIR_ARRAY_FILTER_MAIN")
.ok()
.as_deref()
== Some("1")
}
}
"JsonTokenizer.print_tokens/0" => true,
"ArrayExtBox.filter/2" => true,
// Phase 170-A-1: Enable JsonParserBox methods for JoinIR routing
"JsonParserBox._trim/1" => true,
"JsonParserBox._skip_whitespace/2" => true,

View File

@ -83,6 +83,7 @@ impl JoinLoopTrace {
}
/// Check if varmap tracing is enabled
#[allow(dead_code)]
pub fn is_varmap_enabled(&self) -> bool {
self.varmap_enabled
}
@ -150,6 +151,7 @@ impl JoinLoopTrace {
/// # Arguments
/// - `tag`: Context identifier (e.g., "pattern3", "exit_block")
/// - `msg`: Human-readable message about the PHI operation
#[allow(dead_code)]
pub fn phi(&self, tag: &str, msg: &str) {
if self.phi_enabled {
eprintln!("[trace:phi] {}: {}", tag, msg);
@ -161,6 +163,7 @@ impl JoinLoopTrace {
/// # Arguments
/// - `tag`: Context identifier (e.g., "pattern3", "block_allocation")
/// - `msg`: Human-readable message about the merge operation
#[allow(dead_code)]
pub fn merge(&self, tag: &str, msg: &str) {
if self.joinir_enabled || self.varmap_enabled {
eprintln!("[trace:merge] {}: {}", tag, msg);
@ -174,6 +177,7 @@ impl JoinLoopTrace {
/// - `var_name`: Name of the variable being reconnected
/// - `old_id`: Old ValueId (before exit PHI)
/// - `new_id`: New ValueId (after exit PHI)
#[allow(dead_code)]
pub fn exit_phi(&self, tag: &str, var_name: &str, old_id: ValueId, new_id: ValueId) {
if self.varmap_enabled {
eprintln!(

View File

@ -86,9 +86,8 @@ impl super::MirBuilder {
///
/// This is the unified entry point for all loop lowering. All loops are processed
/// via JoinIR Frontend (Phase 187-2: LoopBuilder removed).
/// Specific functions are enabled via dev flags (Phase 49) or Core policy (Phase 80):
/// Specific functions are enabled via dev flags (Phase 49):
///
/// - Core ON (`joinir_core_enabled()`): print_tokens / ArrayExt.filter はまず JoinIR Frontend を試す
/// - Dev フラグ(既存):
/// - `HAKO_JOINIR_PRINT_TOKENS_MAIN=1`: JsonTokenizer.print_tokens/0
/// - `HAKO_JOINIR_ARRAY_FILTER_MAIN=1`: ArrayExtBox.filter/2