docs: add papers on seam-aware JSON unification and Nyash Box FFI; fix seam inspector string parsing; dev: updates in mini_vm_prints, PyVM vm, and loop_builder

This commit is contained in:
Selfhosting Dev
2025-09-22 09:32:54 +09:00
parent 8e4cadd349
commit 6d80338814
3 changed files with 100 additions and 47 deletions

View File

@ -115,39 +115,60 @@ static box MiniVmPrints {
// Attempt plugin route in a guarded block
@printed = 0
@ok = 0
@dbg = _trace_enabled()
// new JsonDocBox()/JsonNodeBox are provided by the JSON plugin
@doc = new JsonDocBox()
doc.parse(json)
if dbg == 1 {
@perr = doc.error()
if perr == "" { print("[json] parse ok") } else { print("[json] parse err=" + perr) }
}
@root = doc.root()
if dbg == 1 {
@rkind = root.kind()
print("[json] root.kind=" + rkind)
}
if root {
@stmts = root.get("statements")
if dbg == 1 {
@skind = stmts.kind()
@ssize = stmts.size()
print("[json] stmts.kind=" + skind + " size=" + new MiniVmScan()._int_to_str(ssize))
}
if stmts {
@n = stmts.size()
@i = 0
loop (i < n) {
if dbg == 1 {
print("[json] loop i=" + new MiniVmScan()._int_to_str(i) + "/" + new MiniVmScan()._int_to_str(n))
if i > 1000 { print("[json] debug guard: break loop at i>1000") break }
}
@node = stmts.at(i)
if !node { i = i + 1 continue }
@expr = node.get("expression")
if !expr { i = i + 1 continue }
@k = expr.get("kind").str()
if dbg == 1 { print("[json] expr.kind=" + k) }
if k == "Literal" {
@val = expr.get("value")
if val {
@ty = val.get("type").str()
if ty == "string" { print(val.get("value").str()) } else { print(val.get("value").int()) }
printed = printed + 1
}
printed = printed + 1
i = i + 1
continue
}
if k == "FunctionCall" {
@name = expr.get("name").str()
if dbg == 1 { print("[json] func name=" + name) }
@args = expr.get("arguments")
if !args { i = i + 1 continue }
@asz = args.size()
if asz <= 0 {
if name == "echo" { print("") printed = printed + 1 }
if name == "itoa" { print("0") printed = printed + 1 }
if name == "echo" { print("") }
if name == "itoa" { print("0") }
printed = printed + 1
i = i + 1
continue
}
@ -156,31 +177,37 @@ static box MiniVmPrints {
if arg0v {
@t = arg0v.get("type").str()
if t == "string" { print(arg0v.get("value").str()) } else { print(arg0v.get("value").int()) }
printed = printed + 1
}
printed = printed + 1
if dbg == 1 { print("[json] before inc i=" + new MiniVmScan()._int_to_str(i)) }
i = i + 1
if dbg == 1 { print("[json] after inc i=" + new MiniVmScan()._int_to_str(i)) }
continue
}
if name == "itoa" {
if arg0v { print(arg0v.get("value").int()) printed = printed + 1 }
if arg0v { print(arg0v.get("value").int()) }
printed = printed + 1
if dbg == 1 { print("[json] before inc i=" + new MiniVmScan()._int_to_str(i)) }
i = i + 1
if dbg == 1 { print("[json] after inc i=" + new MiniVmScan()._int_to_str(i)) }
continue
}
printed = printed + 1
if dbg == 1 { print("[json] before inc i=" + new MiniVmScan()._int_to_str(i)) }
i = i + 1
if dbg == 1 { print("[json] after inc i=" + new MiniVmScan()._int_to_str(i)) }
continue
}
if k == "Compare" {
@op = expr.get("operation").str()
@lhs = expr.get("lhs").get("value").get("value").int()
@rhs = expr.get("rhs").get("value").get("value").int()
@res = 0
if op == "<" { if lhs < rhs { res = 1 } }
if op == "==" { if lhs == rhs { res = 1 } }
if op == "<=" { if lhs <= rhs { res = 1 } }
if op == ">" { if lhs > rhs { res = 1 } }
if op == ">=" { if lhs >= rhs { res = 1 } }
if op == "!=" { if lhs != rhs { res = 1 } }
print(res)
if op == "<" { if lhs < rhs { print(1) } else { print(0) } }
if op == "==" { if lhs == rhs { print(1) } else { print(0) } }
if op == "<=" { if lhs <= rhs { print(1) } else { print(0) } }
if op == ">" { if lhs > rhs { print(1) } else { print(0) } }
if op == ">=" { if lhs >= rhs { print(1) } else { print(0) } }
if op == "!=" { if lhs != rhs { print(1) } else { print(0) } }
printed = printed + 1
i = i + 1
continue
@ -196,6 +223,7 @@ static box MiniVmPrints {
continue
}
}
// Unknown expression kind: treat as a no-op; do not count
i = i + 1
}
ok = 1
@ -204,10 +232,12 @@ static box MiniVmPrints {
// Prefer plugin result whenever JSON route ran (ok==1). Even if printed==0,
// return early to avoid falling back to the heuristic scanner which can loop
// on malformed inputs or seam-edge cases.
if dbg == 1 { print("[json] plugin_ok=" + new MiniVmScan()._int_to_str(ok) + " printed=" + new MiniVmScan()._int_to_str(printed)) }
if ok == 1 { return printed }
}
// Fallback: text scanner開発用
if _trace_enabled() == 1 { print("[json] fallback engaged") }
local scan = new MiniVmScan()
local bin = new MiniVmBinOp()
local cmp = new MiniVmCompare()

View File

@ -53,6 +53,10 @@ class PyVM:
def __init__(self, program: Dict[str, Any]):
self.functions: Dict[str, Function] = {}
self._debug = os.environ.get('NYASH_PYVM_DEBUG') in ('1','true','on')
# Targeted trace controls (default OFF)
self._trace_fn = os.environ.get('NYASH_PYVM_TRACE_FN')
self._trace_reg = os.environ.get('NYASH_PYVM_TRACE_REG') # string compare
self._cur_fn: Optional[str] = None
for f in program.get("functions", []):
name = f.get("name")
params = [int(p) for p in f.get("params", [])]
@ -121,7 +125,14 @@ class PyVM:
def _set(self, regs: Dict[int, Any], dst: Optional[int], val: Any) -> None:
if dst is None:
return
regs[int(dst)] = val
rid = int(dst)
regs[rid] = val
try:
if self._trace_fn and self._cur_fn == self._trace_fn:
if self._trace_reg is None or self._trace_reg == str(rid):
self._dbg(f"[pyvm][set] fn={self._cur_fn} r{rid}={val}")
except Exception:
pass
def _truthy(self, v: Any) -> bool:
if isinstance(v, bool):
@ -189,6 +200,7 @@ class PyVM:
return self._exec_function(fn, call_args)
def _exec_function(self, fn: Function, args: List[Any]) -> Any:
self._cur_fn = fn.name
self._dbg(f"[pyvm] call {fn.name} args={args}")
# Intrinsic fast path for small helpers used in smokes
ok, ret = self._try_intrinsic(fn.name, args)

View File

@ -613,46 +613,57 @@ impl<'a> LoopBuilder<'a> {
// Continue at merge
self.set_current_block(merge_bb)?;
// If both branches assign the same variable, emit phi and bind it
let then_prog = ASTNode::Program {
statements: then_body.clone(),
span: crate::ast::Span::unknown(),
};
let assigned_then = extract_assigned_var_local(&then_prog);
let assigned_else = else_body.as_ref().and_then(|es| {
let ep = ASTNode::Program {
statements: es.clone(),
span: crate::ast::Span::unknown(),
};
extract_assigned_var_local(&ep)
});
if let Some(var_name) = assigned_then {
let else_assigns_same = assigned_else
.as_ref()
.map(|s| s == &var_name)
.unwrap_or(false);
let then_value_for_var = then_var_map_end.get(&var_name).copied();
let else_value_for_var = if else_assigns_same {
else_var_map_end_opt
// If branches assign variables, emit PHIs per variable and bind them.
// Previous logic handled only a single variable; here we generalize to all assigned vars.
fn collect_assigned_vars(ast: &ASTNode, out: &mut std::collections::HashSet<String>) {
match ast {
ASTNode::Assignment { target, .. } => {
if let ASTNode::Variable { name, .. } = target.as_ref() {
out.insert(name.clone());
}
}
ASTNode::Program { statements, .. } => {
for s in statements { collect_assigned_vars(s, out); }
}
ASTNode::If { then_body, else_body, .. } => {
let tp = ASTNode::Program { statements: then_body.clone(), span: crate::ast::Span::unknown() };
collect_assigned_vars(&tp, out);
if let Some(eb) = else_body {
let ep = ASTNode::Program { statements: eb.clone(), span: crate::ast::Span::unknown() };
collect_assigned_vars(&ep, out);
}
}
_ => {}
}
}
let mut vars: std::collections::HashSet<String> = std::collections::HashSet::new();
let then_prog = ASTNode::Program { statements: then_body.clone(), span: crate::ast::Span::unknown() };
collect_assigned_vars(&then_prog, &mut vars);
if let Some(es) = &else_body {
let else_prog = ASTNode::Program { statements: es.clone(), span: crate::ast::Span::unknown() };
collect_assigned_vars(&else_prog, &mut vars);
}
// Reset to pre-if map before rebinding to ensure a clean environment
self.parent_builder.variable_map = pre_if_var_map.clone();
for var_name in vars.into_iter() {
// then-side value: from then end map if assigned there; otherwise pre-if value
let then_val = then_var_map_end.get(&var_name).copied().or_else(|| pre_then_var_value.get(&var_name).copied());
// else-side value: prefer else end map when else assigns; otherwise pre-if value
let else_val = else_var_map_end_opt
.as_ref()
.and_then(|m| m.get(&var_name).copied())
} else {
pre_then_var_value.get(&var_name).copied()
};
if let (Some(tv), Some(ev)) = (then_value_for_var, else_value_for_var) {
.or_else(|| pre_then_var_value.get(&var_name).copied());
if let (Some(tv), Some(ev)) = (then_val, else_val) {
let phi_id = self.new_value();
if self.no_phi_mode {
self.parent_builder.insert_edge_copy(then_bb, phi_id, tv)?;
self.parent_builder.insert_edge_copy(else_bb, phi_id, ev)?;
} else {
self.emit_phi_at_block_start(
merge_bb,
phi_id,
vec![(then_bb, tv), (else_bb, ev)],
)?;
self.emit_phi_at_block_start(merge_bb, phi_id, vec![(then_bb, tv), (else_bb, ev)])?;
}
// Reset to pre-if map and bind the phi result
self.parent_builder.variable_map = pre_if_var_map.clone();
self.parent_builder.variable_map.insert(var_name, phi_id);
}
}