feat(llvm): LoopForm experimental implementation Phase 1

- Added LoopForm IR scaffolding with 5-block structure (header/body/dispatch/latch/exit)
- Implemented dispatch block with PHI nodes for tag(i8) and payload(i64)
- Created registry infrastructure for future body→dispatch wiring
- Header→dispatch wiring complete with Break=1 signal
- Gated behind NYASH_ENABLE_LOOPFORM=1 environment variable
- Successfully tested with loop_min_while.nyash (1120 bytes object)

Next steps:
- Implement 2-step Jump chain detection
- Add NYASH_LOOPFORM_BODY2DISPATCH for body→dispatch redirect
- Connect latch→header when safe

🚀 Phase 1 foundation complete and working!
This commit is contained in:
Selfhosting Dev
2025-09-12 16:41:29 +09:00
parent 043472c170
commit 65497bac04
2 changed files with 73 additions and 8 deletions

View File

@ -1,6 +1,6 @@
use inkwell::{ use inkwell::{
basic_block::BasicBlock, basic_block::BasicBlock,
values::{BasicValueEnum, FunctionValue}, values::{BasicValueEnum, FunctionValue, PhiValue},
}; };
use crate::backend::llvm::context::CodegenContext; use crate::backend::llvm::context::CodegenContext;
@ -69,6 +69,9 @@ pub fn lower_while_loopform<'ctx, 'b>(
after_bb: BasicBlockId, after_bb: BasicBlockId,
bb_map: &std::collections::HashMap<BasicBlockId, BasicBlock<'ctx>>, bb_map: &std::collections::HashMap<BasicBlockId, BasicBlock<'ctx>>,
vmap: &std::collections::HashMap<ValueId, BasicValueEnum<'ctx>>, vmap: &std::collections::HashMap<ValueId, BasicValueEnum<'ctx>>,
// Registry to allow later body→dispatch wiring (simple bodies)
registry: &mut std::collections::HashMap<BasicBlockId, (BasicBlock<'ctx>, PhiValue<'ctx>, PhiValue<'ctx>, BasicBlock<'ctx>)>,
body_to_header: &mut std::collections::HashMap<BasicBlockId, BasicBlockId>,
) -> Result<bool, String> { ) -> Result<bool, String> {
let enabled = std::env::var("NYASH_ENABLE_LOOPFORM").ok().as_deref() == Some("1"); let enabled = std::env::var("NYASH_ENABLE_LOOPFORM").ok().as_deref() == Some("1");
if !enabled { return Ok(false); } if !enabled { return Ok(false); }
@ -95,22 +98,52 @@ pub fn lower_while_loopform<'ctx, 'b>(
}); });
}); });
// Dispatch: currently pass-through to original else/after block // Dispatch: create PHIs (tag i8, payload i64) and switch(tag)
// For now, only header(false) contributes (Break=1); body path does not reach dispatch in Phase 1 wiring.
let orig_after = *bb_map.get(&after_bb).ok_or("loopform: after bb missing")?; let orig_after = *bb_map.get(&after_bb).ok_or("loopform: after bb missing")?;
cursor.with_block(after_bb, lf.dispatch, |c| { let header_llbb = *bb_map.get(&header_bid).ok_or("loopform: header bb missing")?;
let (tag_phi, payload_phi) = cursor.with_block(after_bb, lf.dispatch, |c| {
let i8t = codegen.context.i8_type();
let i64t = codegen.context.i64_type();
let tag_ty: inkwell::types::BasicTypeEnum = i8t.into();
let tag_phi = codegen
.builder
.build_phi(tag_ty, "lf_tag")
.map_err(|e| e.to_string())
.unwrap();
let payload_ty: inkwell::types::BasicTypeEnum = i64t.into();
let payload_phi = codegen
.builder
.build_phi(payload_ty, "lf_payload")
.map_err(|e| e.to_string())
.unwrap();
let tag_break = i8t.const_int(1, false);
let payload_zero = i64t.const_zero();
tag_phi.add_incoming(&[(&tag_break, header_llbb)]);
payload_phi.add_incoming(&[(&payload_zero, header_llbb)]);
let tag_iv = tag_phi.as_basic_value().into_int_value();
c.emit_term(after_bb, |b| { c.emit_term(after_bb, |b| {
b.build_unconditional_branch(orig_after) b.build_switch(tag_iv, lf.exit, &[(i8t.const_int(0, false), lf.latch)])
.map_err(|e| e.to_string()) .map_err(|e| e.to_string())
.unwrap(); .unwrap();
}); });
(tag_phi, payload_phi)
}); });
// Latch/Exit are reserved for Phase 2 wiring (PHI + switch), keep them unreachable for now // Register for simple body→dispatch wiring later (at body terminator lowering time)
// to avoid verifier errors from unterminated blocks. registry.insert(header_bid, (lf.dispatch, tag_phi, payload_phi, lf.latch));
body_to_header.insert(body_bb, header_bid);
// Latch: keep unreachable for now (avoid adding a new predecessor to header)
codegen.builder.position_at_end(lf.latch); codegen.builder.position_at_end(lf.latch);
let _ = codegen.builder.build_unreachable(); let _ = codegen.builder.build_unreachable();
// Exit: to original after
codegen.builder.position_at_end(lf.exit); codegen.builder.position_at_end(lf.exit);
let _ = codegen.builder.build_unreachable(); codegen
.builder
.build_unconditional_branch(orig_after)
.map_err(|e| e.to_string())
.unwrap();
if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") { if std::env::var("NYASH_CLI_VERBOSE").ok().as_deref() == Some("1") {
eprintln!( eprintln!(

View File

@ -204,6 +204,9 @@ impl LLVMCompiler {
// Lower body // Lower body
let mut loopform_loop_id: u32 = 0; let mut loopform_loop_id: u32 = 0;
let sealed_mode = std::env::var("NYASH_LLVM_PHI_SEALED").ok().as_deref() == Some("1"); let sealed_mode = std::env::var("NYASH_LLVM_PHI_SEALED").ok().as_deref() == Some("1");
// LoopForm registry (per-function lowering; gated)
let mut loopform_registry: HashMap<crate::mir::BasicBlockId, (inkwell::basic_block::BasicBlock, PhiValue, PhiValue, inkwell::basic_block::BasicBlock)> = HashMap::new();
let mut loopform_body_to_header: HashMap<crate::mir::BasicBlockId, crate::mir::BasicBlockId> = HashMap::new();
for (bi, bid) in block_ids.iter().enumerate() { for (bi, bid) in block_ids.iter().enumerate() {
let bb = *bb_map.get(bid).unwrap(); let bb = *bb_map.get(bid).unwrap();
// Use cursor to position at BB start for lowering // Use cursor to position at BB start for lowering
@ -339,7 +342,34 @@ impl LLVMCompiler {
instructions::emit_return(&codegen, &mut cursor, *bid, func, &vmap, value)?; instructions::emit_return(&codegen, &mut cursor, *bid, func, &vmap, value)?;
} }
MirInstruction::Jump { target } => { MirInstruction::Jump { target } => {
instructions::emit_jump(&codegen, &mut cursor, *bid, target, &bb_map, &phis_by_block, &vmap)?; // LoopForm simple body→dispatch wiring: if this block is a loop body
// and jumps back to its header, redirect to dispatch and add PHI incoming
let mut handled = false;
if std::env::var("NYASH_ENABLE_LOOPFORM").ok().as_deref() == Some("1") &&
std::env::var("NYASH_LOOPFORM_BODY2DISPATCH").ok().as_deref() == Some("1") {
if let Some(hdr) = loopform_body_to_header.get(bid) {
if hdr == target {
if let Some((dispatch_bb, tag_phi, payload_phi, _latch_bb)) = loopform_registry.get(hdr) {
// Add Next(tag=0) + payload=0 incoming from this pred
let i8t = codegen.context.i8_type();
let i64t = codegen.context.i64_type();
let pred_llbb = *bb_map.get(bid).ok_or("loopform: body llbb missing")?;
let z = i8t.const_zero();
let pz = i64t.const_zero();
tag_phi.add_incoming(&[(&z, pred_llbb)]);
payload_phi.add_incoming(&[(&pz, pred_llbb)]);
// Redirect to dispatch
cursor.emit_term(*bid, |b| {
b.build_unconditional_branch(*dispatch_bb).map_err(|e| e.to_string()).unwrap();
});
handled = true;
}
}
}
}
if !handled {
instructions::emit_jump(&codegen, &mut cursor, *bid, target, &bb_map, &phis_by_block, &vmap)?;
}
} }
MirInstruction::Branch { condition, then_bb, else_bb } => { MirInstruction::Branch { condition, then_bb, else_bb } => {
// LoopForm Phase 1 (gated): detect simple while-pattern and rewire header // LoopForm Phase 1 (gated): detect simple while-pattern and rewire header
@ -393,6 +423,8 @@ impl LLVMCompiler {
after_sel, after_sel,
&bb_map, &bb_map,
&vmap, &vmap,
&mut loopform_registry,
&mut loopform_body_to_header,
)?; )?;
loopform_loop_id = loopform_loop_id.wrapping_add(1); loopform_loop_id = loopform_loop_id.wrapping_add(1);
} }