pyvm: implement TypeOp(check) + strict match-guard smokes; parser: guard support in match; llvm: PHI wiring at block head + incoming normalization; docs: AGENTS LLVM/PHI + guard policy; add tests; plan: refactor parse_box_declaration + TODO triage + clone reduction + CLI split + LLVM builder split; update CURRENT_TASK.md

This commit is contained in:
Selfhosting Dev
2025-09-19 10:52:57 +09:00
parent e55ce363c3
commit 7dfd55bfdb
22 changed files with 2622 additions and 86 deletions

View File

@ -352,30 +352,7 @@ class NyashLLVMBuilder:
if not hasattr(self, 'block_phi_incomings') or self.block_phi_incomings is None:
self.block_phi_incomings = {}
for bbid, ret_vid in plan.items():
# Create a placeholder PHI at block head if missing
bb0 = self.bb_map.get(bbid)
if bb0 is not None:
b0 = ir.IRBuilder(bb0)
try:
b0.position_at_start(bb0)
except Exception:
pass
cur = self.vmap.get(ret_vid)
need_new = True
try:
need_new = not (cur is not None and hasattr(cur, 'add_incoming'))
except Exception:
need_new = True
if need_new:
ph = b0.phi(self.i64, name=f"phi_ret_{ret_vid}")
self.vmap[ret_vid] = ph
else:
ph = cur
# Record for later unify
try:
self.predeclared_ret_phis[(int(bbid), int(ret_vid))] = ph
except Exception:
pass
# Do not pre-materialize PHI here; record only metadata.
# Record declared incoming metadata using the same value-id
# for each predecessor; finalize_phis will resolve per-pred end values.
try:
@ -398,7 +375,7 @@ class NyashLLVMBuilder:
except Exception:
pass
try:
trace_debug(f"[prepass] if-merge: predeclare PHI at bb{bbid} for v{ret_vid} preds={preds_list}")
trace_debug(f"[prepass] if-merge: plan metadata at bb{bbid} for v{ret_vid} preds={preds_list}")
except Exception:
pass
except Exception:
@ -460,19 +437,8 @@ class NyashLLVMBuilder:
except Exception:
pass
for vid in need:
# Skip if we already have a PHI mapped for (bid, vid)
cur = self.vmap.get(int(vid))
has_phi_here = False
try:
has_phi_here = (
cur is not None and hasattr(cur, 'add_incoming') and
getattr(getattr(cur, 'basic_block', None), 'name', None) == bb0.name
)
except Exception:
has_phi_here = False
if not has_phi_here:
ph = b0.phi(self.i64, name=f"phi_{vid}")
self.vmap[int(vid)] = ph
# Do not create placeholder here; let finalize_phis materialize
# to keep PHIs strictly grouped at block heads and avoid dups.
# Record incoming metadata for finalize_phis (pred -> same vid)
try:
self.block_phi_incomings.setdefault(int(bid), {}).setdefault(int(vid), [])
@ -498,11 +464,7 @@ class NyashLLVMBuilder:
except Exception:
loop_plan = None
# Provide predeclared ret-phi map to resolver for ret lowering to reuse
try:
self.resolver.ret_phi_map = self.predeclared_ret_phis
except Exception:
pass
# No predeclared PHIs are materialized; resolver may ignore ret_phi_map
# Now lower blocks
skipped: set[int] = set()
@ -1244,7 +1206,19 @@ class NyashLLVMBuilder:
target_machine = target.create_target_machine()
# Compile
mod = llvm.parse_assembly(str(self.module))
ir_text = str(self.module)
# Sanitize: drop any empty PHI rows (no incoming list) to satisfy IR parser
try:
fixed_lines = []
for line in ir_text.splitlines():
if (" = phi i64" in line or " = phi i64" in line) and ("[" not in line):
# Skip malformed PHI without incoming pairs
continue
fixed_lines.append(line)
ir_text = "\n".join(fixed_lines)
except Exception:
pass
mod = llvm.parse_assembly(ir_text)
# Allow skipping verifier for iterative bring-up
if os.environ.get('NYASH_LLVM_SKIP_VERIFY') != '1':
mod.verify()

View File

@ -31,7 +31,8 @@ def setup_phi_placeholders(builder, blocks: List[Dict[str, Any]]):
incoming0 = []
if dst0 is None or bb0 is None:
continue
_ = ensure_phi(builder, bid0, dst0, bb0)
# Do not materialize PHI here; finalize_phis will ensure and wire at block head.
# _ = ensure_phi(builder, bid0, dst0, bb0)
# Tag propagation
try:
dst_type0 = inst.get("dst_type")

View File

@ -5,9 +5,17 @@ import llvmlite.ir as ir
from .common import trace
def _const_i64(builder, n: int) -> ir.Constant:
try:
return ir.Constant(builder.i64, int(n))
except Exception:
# Failsafe: llvmlite requires a Module-bound type; fallback to 64-bit 0
return ir.Constant(ir.IntType(64), int(n) if isinstance(n, int) else 0)
def ensure_phi(builder, block_id: int, dst_vid: int, bb: ir.Block) -> ir.Instruction:
"""Ensure a PHI placeholder exists at the block head for dst_vid and return it."""
# Always place PHI at block start to keep LLVM invariant "PHI nodes at top"
b = ir.IRBuilder(bb)
try:
b.position_at_start(bb)
@ -114,8 +122,16 @@ def wire_incomings(builder, block_id: int, dst_vid: int, incoming: List[Tuple[in
)
except Exception:
val = None
# Normalize to a well-typed LLVM value (i64)
if val is None:
val = ir.Constant(builder.i64, 0)
val = _const_i64(builder, 0)
else:
try:
# Some paths can accidentally pass plain integers; coerce to i64 const
if not hasattr(val, 'type'):
val = _const_i64(builder, int(val))
except Exception:
val = _const_i64(builder, 0)
chosen[pred_match] = val
trace({"phi": "wire_choose", "pred": int(pred_match), "dst": int(dst_vid), "src": int(vs)})
wired = 0
@ -123,6 +139,7 @@ def wire_incomings(builder, block_id: int, dst_vid: int, incoming: List[Tuple[in
pred_bb = builder.bb_map.get(pred_bid)
if pred_bb is None:
continue
# llvmlite requires (value, block) of correct types
phi.add_incoming(val, pred_bb)
trace({"phi": "add_incoming", "dst": int(dst_vid), "pred": int(pred_bid)})
wired += 1

View File

@ -264,6 +264,60 @@ class PyVM:
i += 1
continue
if op == "typeop":
# operation: "check" | "cast" ("as" is treated as cast for MVP)
operation = inst.get("operation") or inst.get("op")
src_vid = inst.get("src")
dst_vid = inst.get("dst")
target = (inst.get("target_type") or "")
src_val = self._read(regs, src_vid)
def is_type(val: Any, ty: str) -> bool:
t = (ty or "").strip()
t = t.lower()
# Normalize aliases
if t in ("stringbox",):
t = "string"
if t in ("integerbox", "int", "i64"):
t = "integer"
if t in ("floatbox", "f64"):
t = "float"
if t in ("boolbox", "boolean"):
t = "bool"
# Check by Python types/our boxed representations
if t == "string":
return isinstance(val, str)
if t == "integer":
# Treat Python ints (including 0/1) as integer
return isinstance(val, int) and not isinstance(val, bool)
if t == "float":
return isinstance(val, float)
if t == "bool":
# Our VM uses 0/1 ints for bool; accept 0 or 1
return isinstance(val, int) and (val == 0 or val == 1)
# Boxed receivers
if t.endswith("box"):
box_name = ty
if isinstance(val, dict) and val.get("__box__") == box_name:
return True
if box_name == "StringBox" and isinstance(val, str):
return True
if box_name == "ConsoleBox" and self._is_console(val):
return True
if box_name == "ArrayBox" and isinstance(val, dict) and val.get("__box__") == "ArrayBox":
return True
if box_name == "MapBox" and isinstance(val, dict) and val.get("__box__") == "MapBox":
return True
return False
return False
if (operation or "").lower() in ("check", "is"):
out = 1 if is_type(src_val, str(target)) else 0
self._set(regs, dst_vid, out)
else:
# cast/as: MVP pass-through
self._set(regs, dst_vid, src_val)
i += 1
continue
if op == "unop":
kind = inst.get("kind")
src = self._read(regs, inst.get("src"))

View File

@ -249,11 +249,13 @@ impl super::MirBuilder {
// Map a user-facing type name to MIR type
pub(super) fn parse_type_name_to_mir(name: &str) -> super::MirType {
match name {
"Integer" | "Int" | "I64" => super::MirType::Integer,
"Float" | "F64" => super::MirType::Float,
"Bool" | "Boolean" => super::MirType::Bool,
"String" => super::MirType::String,
// Primitive families
"Integer" | "Int" | "I64" | "IntegerBox" | "IntBox" => super::MirType::Integer,
"Float" | "F64" | "FloatBox" => super::MirType::Float,
"Bool" | "Boolean" | "BoolBox" => super::MirType::Bool,
"String" | "StringBox" => super::MirType::String,
"Void" | "Unit" => super::MirType::Void,
// Fallback: treat as user box type
other => super::MirType::Box(other.to_string()),
}
}

View File

@ -8,18 +8,19 @@ impl NyashParser {
/// MVP: リテラルパターンORデフォルト(_) のみ。アーム本体は式またはブロック。
pub(crate) fn expr_parse_match(&mut self) -> Result<ASTNode, ParseError> {
self.advance(); // consume 'match'
// Scrutinee: MVPでは primary/call に限定(表現力は十分
let scrutinee = self.expr_parse_primary()?;
// Scrutinee: 通常の式を受理(演算子優先順位を含む
let scrutinee = self.parse_expression()?;
self.consume(TokenType::LBRACE)?;
enum MatchArm {
Lit(Vec<LiteralValue>, ASTNode),
Type { ty: String, bind: String, body: ASTNode },
Lit { lits: Vec<LiteralValue>, guard: Option<ASTNode>, body: ASTNode },
Type { ty: String, bind: String, guard: Option<ASTNode>, body: ASTNode },
Default(ASTNode),
}
let mut arms_any: Vec<MatchArm> = Vec::new();
let mut saw_type_arm = false;
let mut saw_guard = false;
let mut default_expr: Option<ASTNode> = None;
while !self.match_token(&TokenType::RBRACE) && !self.is_at_end() {
@ -36,6 +37,15 @@ impl NyashParser {
let is_default = matches!(self.current_token().token_type, TokenType::IDENTIFIER(ref s) if s == "_");
if is_default {
self.advance(); // consume '_'
// MVP: default '_' does not accept guard
if self.match_token(&TokenType::IF) {
let line = self.current_token().line;
return Err(ParseError::UnexpectedToken {
found: self.current_token().token_type.clone(),
expected: "'=>' (guard is not allowed for default arm)".to_string(),
line,
});
}
self.consume(TokenType::FatArrow)?;
let expr = if self.match_token(&TokenType::LBRACE) {
// ブロックを式として扱う(最後の文の値が返る)
@ -53,8 +63,8 @@ impl NyashParser {
span: Span::unknown(),
}
} else {
// MVP: アームは primary/call を優先
self.expr_parse_primary()?
// 値アームは通常の式全体を受理
self.parse_expression()?
};
default_expr = Some(expr.clone());
arms_any.push(MatchArm::Default(expr));
@ -85,6 +95,13 @@ impl NyashParser {
}
};
self.consume(TokenType::RPAREN)?;
// Optional guard
let guard = if self.match_token(&TokenType::IF) {
self.advance();
let g = self.parse_expression()?;
saw_guard = true;
Some(g)
} else { None };
self.consume(TokenType::FatArrow)?;
let body = if self.match_token(&TokenType::LBRACE) {
self.advance(); // consume '{'
@ -92,26 +109,24 @@ impl NyashParser {
while !self.match_token(&TokenType::RBRACE) && !self.is_at_end() {
self.skip_newlines();
if !self.match_token(&TokenType::RBRACE) {
eprintln!("[parser.match] in-block before stmt token={:?} line={}", self.current_token().token_type, self.current_token().line);
let st = self.parse_statement()?;
eprintln!("[parser.match] parsed stmt kind={}", st.info());
stmts.push(st);
}
}
self.consume(TokenType::RBRACE)?;
ASTNode::Program { statements: stmts, span: Span::unknown() }
} else {
self.expr_parse_primary()?
// 値アームは通常の式全体を受理
self.parse_expression()?
};
// type arm parsed
arms_any.push(MatchArm::Type { ty, bind, body });
arms_any.push(MatchArm::Type { ty, bind, guard, body });
saw_type_arm = true;
handled = true;
}
}
if !handled {
// リテラルOR結合可
eprintln!("[parser.match] parse literal pattern, token={:?}", self.current_token().token_type);
let mut lits: Vec<crate::ast::LiteralValue> = Vec::new();
let first = self.lit_only_for_match()?;
lits.push(first);
@ -120,10 +135,15 @@ impl NyashParser {
let nxt = self.lit_only_for_match()?;
lits.push(nxt);
}
// Optional guard before '=>'
let guard = if self.match_token(&TokenType::IF) {
self.advance();
let g = self.parse_expression()?;
saw_guard = true;
Some(g)
} else { None };
self.consume(TokenType::FatArrow)?;
eprintln!("[parser.match] after FatArrow token={:?}", self.current_token().token_type);
let expr = if self.match_token(&TokenType::LBRACE) {
eprintln!("[parser.match] entering block arm");
self.advance(); // consume '{'
let mut stmts: Vec<ASTNode> = Vec::new();
while !self.match_token(&TokenType::RBRACE) && !self.is_at_end() {
@ -136,9 +156,10 @@ impl NyashParser {
self.consume(TokenType::RBRACE)?;
ASTNode::Program { statements: stmts, span: Span::unknown() }
} else {
self.expr_parse_primary()?
// 値アームは通常の式全体を受理
self.parse_expression()?
};
arms_any.push(MatchArm::Lit(lits, expr));
arms_any.push(MatchArm::Lit { lits, guard, body: expr });
}
}
@ -156,14 +177,14 @@ impl NyashParser {
line: self.current_token().line,
})?;
if !saw_type_arm {
if !saw_type_arm && !saw_guard {
// 既存の Lower を活用するため PeekExpr に落とす(型パターンが無い場合のみ)
let mut lit_arms: Vec<(LiteralValue, ASTNode)> = Vec::new();
for arm in arms_any.into_iter() {
match arm {
MatchArm::Lit(lits, expr) => {
MatchArm::Lit { lits, guard: _, body } => {
for lit in lits.into_iter() {
lit_arms.push((lit, expr.clone()));
lit_arms.push((lit, body.clone()));
}
}
MatchArm::Default(_) => { /* handled via else_expr above */ }
@ -198,7 +219,7 @@ impl NyashParser {
MatchArm::Default(_) => {
// already handled as else_node
}
MatchArm::Lit(lits, body) => {
MatchArm::Lit { lits, guard, body } => {
// condition: (scr == lit1) || (scr == lit2) || ...
let mut cond: Option<ASTNode> = None;
for lit in lits.into_iter() {
@ -218,15 +239,27 @@ impl NyashParser {
},
});
}
let then_prog = ASTNode::Program { statements: vec![body], span: Span::unknown() };
let else_statements = match else_node.clone() { ASTNode::Program { statements, .. } => statements, other => vec![other] };
let then_body_statements = if let Some(g) = guard {
// Nested guard: if g then body else else_node
let guard_if = ASTNode::If {
condition: Box::new(g),
then_body: vec![body],
else_body: Some(else_statements.clone()),
span: Span::unknown(),
};
vec![guard_if]
} else {
vec![body]
};
else_node = ASTNode::If {
condition: Box::new(cond.expect("literal arm must have at least one literal")),
then_body: match then_prog { ASTNode::Program { statements, .. } => statements, _ => unreachable!() },
else_body: Some(match else_node.clone() { ASTNode::Program { statements, .. } => statements, other => vec![other] }),
then_body: then_body_statements,
else_body: Some(else_statements),
span: Span::unknown(),
};
}
MatchArm::Type { ty, bind, body } => {
MatchArm::Type { ty, bind, guard, body } => {
// condition: scr.is("Type")
let is_call = ASTNode::MethodCall {
object: Box::new(ASTNode::Variable { name: scr_var.clone(), span: Span::unknown() }),
@ -246,11 +279,23 @@ impl NyashParser {
initial_values: vec![Some(Box::new(cast))],
span: Span::unknown(),
};
let then_prog = ASTNode::Program { statements: vec![bind_local, body], span: Span::unknown() };
let else_statements = match else_node.clone() { ASTNode::Program { statements, .. } => statements, other => vec![other] };
let then_body_statements = if let Some(g) = guard {
// After binding, check guard then branch to body else fallthrough to else_node
let guard_if = ASTNode::If {
condition: Box::new(g),
then_body: vec![body],
else_body: Some(else_statements.clone()),
span: Span::unknown(),
};
vec![bind_local, guard_if]
} else {
vec![bind_local, body]
};
else_node = ASTNode::If {
condition: Box::new(is_call),
then_body: match then_prog { ASTNode::Program { statements, .. } => statements, _ => unreachable!() },
else_body: Some(match else_node.clone() { ASTNode::Program { statements, .. } => statements, other => vec![other] }),
then_body: then_body_statements,
else_body: Some(else_statements),
span: Span::unknown(),
};
}

View File

@ -20,7 +20,6 @@ pub fn emit_mir_json_for_harness(
let mut block_defines: std::collections::HashSet<u32> = std::collections::HashSet::new();
for inst in &bb.instructions {
match inst {
I::Copy { dst, .. }
| I::UnaryOp { dst, .. }
| I::Const { dst, .. }
| I::BinOp { dst, .. }
@ -127,6 +126,29 @@ pub fn emit_mir_json_for_harness(
}
}
}
I::TypeOp { dst, op, value, ty } => {
let op_s = match op {
nyash_rust::mir::TypeOpKind::Check => "check",
nyash_rust::mir::TypeOpKind::Cast => "cast",
};
let ty_s = match ty {
MirType::Integer => "Integer".to_string(),
MirType::Float => "Float".to_string(),
MirType::Bool => "Bool".to_string(),
MirType::String => "String".to_string(),
MirType::Void => "Void".to_string(),
MirType::Box(name) => name.clone(),
_ => "Unknown".to_string(),
};
insts.push(json!({
"op":"typeop",
"operation": op_s,
"src": value.as_u32(),
"dst": dst.as_u32(),
"target_type": ty_s,
}));
emitted_defs.insert(dst.as_u32());
}
I::BinOp { dst, op, lhs, rhs } => {
let op_s = match op {
B::Add => "+",