Files
hakorune/src/mir/join_ir/lowering/funcscanner_trim.rs
nyash-codex 447bbec998 refactor(joinir): Split ast_lowerer and join_ir_vm_bridge into modules
ast_lowerer.rs → ast_lowerer/ (10 files):
- mod.rs: public surface + entry dispatch
- context.rs: ExtractCtx helpers
- expr.rs: expression-to-JoinIR extraction
- if_return.rs: simple if→Select lowering
- loop_patterns.rs: loop variants (simple/break/continue)
- read_quoted.rs: read_quoted_from lowering (Phase 45-46)
- nested_if.rs: NestedIfMerge lowering
- analysis.rs: loop if-var analysis + metadata helpers
- tests.rs: frontend lowering tests
- README.md: module documentation

join_ir_vm_bridge.rs → join_ir_vm_bridge/ (5 files):
- mod.rs: public surface + shared helpers
- convert.rs: JoinIR→MIR lowering
- runner.rs: VM execution entry (run_joinir_via_vm)
- meta.rs: experimental metadata-aware hooks
- tests.rs: bridge-specific unit tests
- README.md: module documentation

Benefits:
- Clear separation of concerns per pattern
- Easier navigation and maintenance
- Each file has single responsibility
- README documents module boundaries

Co-authored-by: ChatGPT <noreply@openai.com>

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 17:42:19 +09:00

651 lines
20 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Phase 27.1: FuncScannerBox.trim/1 の MIR → JoinIR 変換
//!
//! 目的: lang/src/compiler/entry/func_scanner.hako の trim メソッドを JoinIR に変換
//!
//! 期待される変換:
//! ```text
//! // MIR (元):
//! method trim(s) {
//! local e = n
//! loop(e > b) {
//! local ch = str.substring(e - 1, e)
//! if ch == " " || ch == "\t" || ch == "\n" || ch == "\r" {
//! e = e - 1
//! } else {
//! break
//! }
//! }
//! return substring(b, e)
//! }
//!
//! // JoinIR (変換後):
//! fn trim_main(s_param, k_exit) {
//! str = "" + s_param
//! n = str.length()
//! b = skip_whitespace(str, 0)
//! e_init = n
//! loop_step(str, b, e_init, k_exit)
//! }
//!
//! fn loop_step(str, b, e, k_exit) {
//! cond = (e > b)
//! if cond {
//! ch = str.substring(e - 1, e)
//! is_space = (ch == " " || ch == "\t" || ch == "\n" || ch == "\r")
//! if is_space {
//! e_next = e - 1
//! loop_step(str, b, e_next, k_exit)
//! } else {
//! k_exit(e)
//! }
//! } else {
//! k_exit(e)
//! }
//! }
//! ```
use crate::mir::join_ir::{
BinOpKind, CompareOp, ConstValue, JoinContId, JoinFuncId, JoinFunction, JoinInst, JoinModule,
LoopExitShape, LoopHeaderShape, MirLikeInst,
};
use crate::mir::ValueId;
/// Phase 27.9: Toggle dispatcher for trim lowering
/// - Default: handwritten lowering
/// - NYASH_JOINIR_LOWER_FROM_MIR=1: MIR-based lowering
pub fn lower_funcscanner_trim_to_joinir(module: &crate::mir::MirModule) -> Option<JoinModule> {
super::common::dispatch_lowering("trim", module, lower_trim_from_mir, lower_trim_handwritten)
}
/// Phase 27.11: Common JoinIR builder for FuncScannerBox.trim/1
///
/// This function generates the JoinIR for trim/1, shared by both:
/// - lower_trim_handwritten (always uses this)
/// - lower_trim_from_mir (uses this after CFG sanity checks pass)
fn build_funcscanner_trim_joinir(module: &crate::mir::MirModule) -> Option<JoinModule> {
// Step 1: "FuncScannerBox.trim/1" を探す
let target_func = module.functions.get("FuncScannerBox.trim/1")?;
eprintln!("[joinir/trim/build] Found FuncScannerBox.trim/1");
eprintln!(
"[joinir/trim/build] MIR blocks: {}",
target_func.blocks.len()
);
let mut join_module = JoinModule::new();
// trim_main 関数: 前処理 + 先頭/末尾の空白を除去
let trim_main_id = JoinFuncId::new(0);
let s_param = ValueId(5000);
let mut trim_main_func =
JoinFunction::new(trim_main_id, "trim_main".to_string(), vec![s_param]);
let str_val = ValueId(5001);
let n_val = ValueId(5002);
let b_val = ValueId(5003);
let e_init = ValueId(5004);
let const_empty = ValueId(5005);
let const_zero = ValueId(5006);
// str = "" + s_param (文字列化)
trim_main_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_empty,
value: ConstValue::String("".to_string()),
}));
trim_main_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: str_val,
lhs: const_empty,
rhs: s_param,
op: BinOpKind::Add,
}));
// n = str.length()
trim_main_func
.body
.push(JoinInst::Compute(MirLikeInst::BoxCall {
dst: Some(n_val),
box_name: "StringBox".to_string(),
method: "length".to_string(),
args: vec![str_val],
}));
// const 0
trim_main_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_zero,
value: ConstValue::Integer(0),
}));
// b = skip_leading_whitespace(str, 0, n)
let skip_leading_id = JoinFuncId::new(2);
trim_main_func.body.push(JoinInst::Call {
func: skip_leading_id,
args: vec![str_val, const_zero, n_val],
k_next: None,
dst: Some(b_val),
});
// e_init = n (コピー)
trim_main_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: e_init,
op: BinOpKind::Add,
lhs: n_val,
rhs: const_zero,
}));
// loop_step(str, b, e_init) -> 戻り値をそのまま返す
let loop_step_id = JoinFuncId::new(1);
trim_main_func.body.push(JoinInst::Call {
func: loop_step_id,
args: vec![str_val, b_val, e_init],
k_next: None,
dst: None,
});
join_module.entry = Some(trim_main_id);
join_module.add_function(trim_main_func);
// Phase 27.4-A: trim loop_step の Pinned/Carrier 構造を明示
// trim ループの場合:
// - Pinned: str (文字列), b (開始位置) - ループ中で不変
// - Carrier: e (終了位置) - ループで後ろから前へ更新される
let str_loop = ValueId(6000); // Pinned
let b_loop = ValueId(6001); // Pinned
let e_loop = ValueId(6002); // Carrier
let _header_shape = LoopHeaderShape::new_manual(
vec![str_loop, b_loop], // Pinned: str, b
vec![e_loop], // Carrier: e
);
// 将来: to_loop_step_params() で [str, b, e] (pinned..., carriers...) を生成する設計。
// 現在は既存 JoinIR テストとの互換性のため、手動で [str, b, e] の順を維持している。
// loop_step 関数: 末尾の空白を削り、最終的に substring(b, e) を返す
let mut loop_step_func = JoinFunction::new(
loop_step_id,
"loop_step".to_string(),
vec![str_loop, b_loop, e_loop],
);
// cond = (e > b)
let cond = ValueId(6003);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cond,
lhs: e_loop,
rhs: b_loop,
op: CompareOp::Gt,
}));
// bool false (共通)
let bool_false = ValueId(6019);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: bool_false,
value: ConstValue::Bool(false),
}));
// trimmed_base = str.substring(b, e)
let trimmed_base = ValueId(6004);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BoxCall {
dst: Some(trimmed_base),
box_name: "StringBox".to_string(),
method: "substring".to_string(),
args: vec![str_loop, b_loop, e_loop],
}));
// cond_is_false = (cond == false)
let cond_is_false = ValueId(6020);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cond_is_false,
lhs: cond,
rhs: bool_false,
op: CompareOp::Eq,
}));
// Phase 27.5: Exit φ の意味を LoopExitShape で明示Option A
// trim のループ脱出時は e の値で substring(b, e) を計算済み
let _exit_shape_trim = LoopExitShape::new_manual(vec![e_loop]); // exit_args = [e] (Option A)
// 実装上は既に trimmed_base = substring(b, e) を計算済みで、その結果を返している
// if !(e > b) { return substring(b, e) }
loop_step_func.body.push(JoinInst::Jump {
cont: JoinContId::new(0),
args: vec![trimmed_base], // ← substring(b, e) の結果
cond: Some(cond_is_false),
});
// const 1
let const_1 = ValueId(6005);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_1,
value: ConstValue::Integer(1),
}));
// e_minus_1 = e - 1
let e_minus_1 = ValueId(6006);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: e_minus_1,
lhs: e_loop,
rhs: const_1,
op: BinOpKind::Sub,
}));
let ch = ValueId(6007);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BoxCall {
dst: Some(ch),
box_name: "StringBox".to_string(),
method: "substring".to_string(),
args: vec![str_loop, e_minus_1, e_loop],
}));
// is_space = (ch == " " || ch == "\\t" || ch == "\\n" || ch == "\\r")
let cmp_space = ValueId(6008);
let cmp_tab = ValueId(6009);
let cmp_newline = ValueId(6010);
let cmp_cr = ValueId(6011);
let const_space = ValueId(6012);
let const_tab = ValueId(6013);
let const_newline = ValueId(6014);
let const_cr = ValueId(6015);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_space,
value: ConstValue::String(" ".to_string()),
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_space,
lhs: ch,
rhs: const_space,
op: CompareOp::Eq,
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_tab,
value: ConstValue::String("\\t".to_string()),
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_tab,
lhs: ch,
rhs: const_tab,
op: CompareOp::Eq,
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_newline,
value: ConstValue::String("\\n".to_string()),
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_newline,
lhs: ch,
rhs: const_newline,
op: CompareOp::Eq,
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_cr,
value: ConstValue::String("\\r".to_string()),
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_cr,
lhs: ch,
rhs: const_cr,
op: CompareOp::Eq,
}));
// OR chain: (cmp_space || cmp_tab) || cmp_newline || cmp_cr
let or1 = ValueId(6016);
let or2 = ValueId(6017);
let is_space = ValueId(6018);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: or1,
lhs: cmp_space,
rhs: cmp_tab,
op: BinOpKind::Or,
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: or2,
lhs: or1,
rhs: cmp_newline,
op: BinOpKind::Or,
}));
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: is_space,
lhs: or2,
rhs: cmp_cr,
op: BinOpKind::Or,
}));
// is_space_false = (is_space == false)
let is_space_false = ValueId(6021);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::Compare {
dst: is_space_false,
lhs: is_space,
rhs: bool_false,
op: CompareOp::Eq,
}));
// Phase 27.5: 2箇所目の exit パス(同じく exit_args = [e], Option A
// if !is_space { return substring(b, e) }
loop_step_func.body.push(JoinInst::Jump {
cont: JoinContId::new(1),
args: vec![trimmed_base], // ← substring(b, e) の結果1箇所目と同じ
cond: Some(is_space_false),
});
// continue path: e_next = e - 1; loop_step(str, b, e_next)
let e_next = ValueId(6022);
loop_step_func
.body
.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: e_next,
lhs: e_loop,
rhs: const_1,
op: BinOpKind::Sub,
}));
loop_step_func.body.push(JoinInst::Call {
func: loop_step_id, // 再帰呼び出し
args: vec![str_loop, b_loop, e_next],
k_next: None,
dst: None,
});
join_module.add_function(loop_step_func);
// skip_leading 関数: 先頭の空白をスキップして位置を返す
let mut skip_func = JoinFunction::new(
skip_leading_id,
"skip_leading".to_string(),
vec![ValueId(7000), ValueId(7001), ValueId(7002)], // (s, i, n)
);
let s_skip = ValueId(7000);
let i_skip = ValueId(7001);
let n_skip = ValueId(7002);
let cmp_len = ValueId(7003);
let const_1_skip = ValueId(7004);
let i_plus_1_skip = ValueId(7005);
let ch_skip = ValueId(7006);
let cmp_space_skip = ValueId(7007);
let cmp_tab_skip = ValueId(7008);
let cmp_newline_skip = ValueId(7009);
let cmp_cr_skip = ValueId(7010);
let const_space_skip = ValueId(7011);
let const_tab_skip = ValueId(7012);
let const_newline_skip = ValueId(7013);
let const_cr_skip = ValueId(7014);
let or1_skip = ValueId(7015);
let or2_skip = ValueId(7016);
let is_space_skip = ValueId(7017);
let bool_false_skip = ValueId(7018);
let is_space_false_skip = ValueId(7019);
// cmp_len = (i >= n)
skip_func.body.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_len,
lhs: i_skip,
rhs: n_skip,
op: CompareOp::Ge,
}));
// if i >= n { return i }
skip_func.body.push(JoinInst::Jump {
cont: JoinContId::new(2),
args: vec![i_skip],
cond: Some(cmp_len),
});
// const 1
skip_func.body.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_1_skip,
value: ConstValue::Integer(1),
}));
// i_plus_1 = i + 1
skip_func.body.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: i_plus_1_skip,
lhs: i_skip,
rhs: const_1_skip,
op: BinOpKind::Add,
}));
// ch = s.substring(i, i + 1)
skip_func.body.push(JoinInst::Compute(MirLikeInst::BoxCall {
dst: Some(ch_skip),
box_name: "StringBox".to_string(),
method: "substring".to_string(),
args: vec![s_skip, i_skip, i_plus_1_skip],
}));
// whitespace constants + comparisons
skip_func.body.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_space_skip,
value: ConstValue::String(" ".to_string()),
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_space_skip,
lhs: ch_skip,
rhs: const_space_skip,
op: CompareOp::Eq,
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_tab_skip,
value: ConstValue::String("\\t".to_string()),
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_tab_skip,
lhs: ch_skip,
rhs: const_tab_skip,
op: CompareOp::Eq,
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_newline_skip,
value: ConstValue::String("\\n".to_string()),
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_newline_skip,
lhs: ch_skip,
rhs: const_newline_skip,
op: CompareOp::Eq,
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Const {
dst: const_cr_skip,
value: ConstValue::String("\\r".to_string()),
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Compare {
dst: cmp_cr_skip,
lhs: ch_skip,
rhs: const_cr_skip,
op: CompareOp::Eq,
}));
// is_space_skip = OR chain
skip_func.body.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: or1_skip,
lhs: cmp_space_skip,
rhs: cmp_tab_skip,
op: BinOpKind::Or,
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: or2_skip,
lhs: or1_skip,
rhs: cmp_newline_skip,
op: BinOpKind::Or,
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::BinOp {
dst: is_space_skip,
lhs: or2_skip,
rhs: cmp_cr_skip,
op: BinOpKind::Or,
}));
// bool false + negation
skip_func.body.push(JoinInst::Compute(MirLikeInst::Const {
dst: bool_false_skip,
value: ConstValue::Bool(false),
}));
skip_func.body.push(JoinInst::Compute(MirLikeInst::Compare {
dst: is_space_false_skip,
lhs: is_space_skip,
rhs: bool_false_skip,
op: CompareOp::Eq,
}));
// if not space -> return i
skip_func.body.push(JoinInst::Jump {
cont: JoinContId::new(3),
args: vec![i_skip],
cond: Some(is_space_false_skip),
});
// continue path: skip_leading(s, i + 1, n)
skip_func.body.push(JoinInst::Call {
func: skip_leading_id,
args: vec![s_skip, i_plus_1_skip, n_skip],
k_next: None,
dst: None,
});
join_module.add_function(skip_func);
eprintln!(
"[joinir/trim] Generated {} JoinIR functions",
join_module.functions.len()
);
Some(join_module)
}
/// Phase 27.11: Handwritten lowering wrapper for FuncScannerBox.trim/1
///
/// This is a thin wrapper that calls the shared build_funcscanner_trim_joinir() function.
/// Maintains the handwritten lowering path as the baseline reference.
fn lower_trim_handwritten(module: &crate::mir::MirModule) -> Option<JoinModule> {
eprintln!("[joinir/trim/handwritten] Using handwritten lowering path");
build_funcscanner_trim_joinir(module)
}
/// Phase 27.9: MIR-based lowering for FuncScannerBox.trim/1
/// - Lightweight CFG sanity checks
/// - Fallback to handwritten if MIR structure is unexpected
fn lower_trim_from_mir(module: &crate::mir::MirModule) -> Option<JoinModule> {
use super::common::{
ensure_entry_has_succs, has_binop, has_const_string, has_string_method, log_fallback,
};
use crate::mir::query::MirQueryBox;
use crate::mir::BinaryOp;
// Step 1: "FuncScannerBox.trim/1" を探す
let target_func = module.functions.get("FuncScannerBox.trim/1")?;
eprintln!("[joinir/trim/mir] Found FuncScannerBox.trim/1 (MIR-based lowering)");
eprintln!("[joinir/trim/mir] MIR blocks: {}", target_func.blocks.len());
// Phase 27.10: Lightweight CFG sanity checks using common utilities
let query = MirQueryBox::new(target_func);
let entry_id = target_func.entry_block;
// Check 1: Entry block has at least 1 successor
if !ensure_entry_has_succs(&query, entry_id) {
log_fallback("trim", "entry has no successors");
return lower_trim_handwritten(module);
}
// Check 2: Entry block contains expected patterns
// - Const("") for string coercion
// - BoxCall(String.length)
// - BinOp(Add) for "" + s
if !has_const_string(&query, entry_id, "")
|| !has_string_method(&query, entry_id, "length")
|| !has_binop(&query, entry_id, BinaryOp::Add)
{
log_fallback(
"trim",
"entry block missing expected patterns (Const(\"\"), String.length, or BinOp(Add))",
);
return lower_trim_handwritten(module);
}
eprintln!("[joinir/trim/mir] CFG sanity checks passed ✅");
// Phase 31: LoopToJoinLowerer 統一箱経由に移行
// Phase 32: construct_simple_while_loopform 共通ヘルパーを使用
if crate::mir::join_ir::env_flag_is_1("NYASH_JOINIR_LOWER_GENERIC") {
use crate::mir::join_ir::lowering::common::construct_simple_while_loopform;
use crate::mir::join_ir::lowering::loop_to_join::LoopToJoinLowerer;
// trim: entry_is_preheader=true, has_break=true
let Some(loop_form) = construct_simple_while_loopform(entry_id, &query, true, true) else {
eprintln!("[joinir/trim/generic-hook] failed to construct LoopForm from CFG");
return build_funcscanner_trim_joinir(module);
};
if crate::mir::join_ir::lowering::common::case_a::is_simple_case_a_loop(&loop_form) {
eprintln!("[joinir/trim/generic-hook] simple Case A loop detected (LoopToJoinLowerer)");
let lowerer = LoopToJoinLowerer::new();
if let Some(jm) = lowerer.lower_case_a_for_trim(target_func, &loop_form) {
eprintln!(
"[joinir/trim/generic-hook] LoopToJoinLowerer produced JoinIR, returning early"
);
return Some(jm);
}
eprintln!(
"[joinir/trim/generic-hook] LoopToJoinLowerer returned None, falling back to handwritten"
);
}
}
// Phase 27.11: Generate JoinIR using shared builder
// CFG checks passed, so we can use build_funcscanner_trim_joinir() directly
eprintln!("[joinir/trim/mir] Calling build_funcscanner_trim_joinir() after CFG validation");
build_funcscanner_trim_joinir(module)
}