Phase 21.6 solidification: chain green (return/binop/loop/call); add Phase 21.7 normalization plan (methodize static boxes). Update CURRENT_TASK.md and docs.

This commit is contained in:
nyash-codex
2025-11-11 22:35:45 +09:00
parent 52b62c5772
commit 9e2fa1e36e
19 changed files with 1309 additions and 35 deletions

View File

@ -17,6 +17,7 @@
using selfhost.shared.json.utils.json_frag as JsonFragBox
using "hako.mir.builder.internal.jsonfrag_normalizer" as NormBox
using "hako.mir.builder.internal.pattern_util" as PatternUtilBox
using lang.mir.builder.func_lowering as FuncLoweringBox
static box MirBuilderBox {
// Availability probe (for canaries)
@ -27,6 +28,7 @@ static box MirBuilderBox {
if ("" + t) == "1" { return 1 } else { return 0 }
}
// Main entry
method emit_from_program_json_v0(program_json, opts) {
// Debug tag (dev toggle only)
@ -45,12 +47,26 @@ static box MirBuilderBox {
print("[mirbuilder/input/invalid] missing version/kind keys")
return null
}
// Helper: optional normalization (dev toggle, default OFF)
// Dev-toggle: extract and lower function definitions (defs)
// Toggle: HAKO_MIR_BUILDER_FUNCS=1
// Policy: delegate to FuncLoweringBox for lowering
// Output: inject additional MIR functions to output JSON
local func_defs_mir = ""
{
local funcs_toggle = env.get("HAKO_MIR_BUILDER_FUNCS")
if funcs_toggle != null && ("" + funcs_toggle) == "1" {
func_defs_mir = FuncLoweringBox.lower_func_defs(s, s)
}
}
// Helper: optional normalization (dev toggle, default OFF) + func injection
local norm_if = function(m) {
if m == null { return null }
// Inject function definitions if available
local result = FuncLoweringBox.inject_funcs(m, func_defs_mir)
local nv = env.get("HAKO_MIR_BUILDER_JSONFRAG_NORMALIZE")
if nv != null && ("" + nv) == "1" { return NormBox.normalize_all(m) }
return m
if nv != null && ("" + nv) == "1" { return NormBox.normalize_all(result) }
return result
}
// Internal path既定ON — const(int)+ret, binop+ret ほか、registry 優先の lowering
// Disable with: HAKO_MIR_BUILDER_INTERNAL=0

View File

@ -0,0 +1,567 @@
// FuncLoweringBox — Function definition lowering and Call resolution for MirBuilder
// Policy: Lower function defs to MIR + resolve Call targets to qualified names
// Toggle: HAKO_MIR_BUILDER_FUNCS=1, HAKO_MIR_BUILDER_CALL_RESOLVE=1
// Scope: Minimal support for Return(Int), Return(Binary(+|-|*|/, Int|Var, Int|Var)), Return(Call)
// Output: Additional MIR functions + resolved Call targets
using selfhost.shared.json.utils.json_frag as JsonFragBox
static box FuncLoweringBox {
// Lower function definitions to MIR
// Returns comma-separated JSON strings for additional functions
method lower_func_defs(program_json, defs_json) {
if defs_json == null || defs_json == "" { return "" }
local s = "" + program_json
local func_defs_mir = ""
// Check for "defs" key in Program JSON
local defs_idx = JsonFragBox.index_of_from(s, "\"defs\":", 0)
if defs_idx < 0 { return "" }
// Extract defs array bounds
local defs_start = -1
local defs_end = -1
{
local j = defs_idx + 7 // skip "defs":
// skip whitespace
loop(j < s.length()) {
local ch = s.substring(j, j + 1)
if ch == " " || ch == "\t" || ch == "\n" || ch == "\r" { j = j + 1 } else { break }
}
if j < s.length() && s.substring(j, j + 1) == "[" {
defs_start = j + 1
// Find matching ]
local depth = 1
local k = j + 1
local in_str = 0
local esc = 0
loop(k < s.length()) {
local ch = s.substring(k, k + 1)
if in_str == 1 {
if esc == 1 { esc = 0 k = k + 1 continue }
if ch == "\\" { esc = 1 k = k + 1 continue }
if ch == "\"" { in_str = 0 k = k + 1 continue }
k = k + 1
continue
}
if ch == "\"" { in_str = 1 k = k + 1 continue }
if ch == "[" { depth = depth + 1 k = k + 1 continue }
if ch == "]" {
depth = depth - 1
if depth == 0 { defs_end = k break }
k = k + 1
continue
}
k = k + 1
}
}
}
if defs_start < 0 || defs_end < 0 { return "" }
// Parse each def object in defs array
local defs_str = s.substring(defs_start, defs_end)
local func_jsons = new ArrayBox()
local func_map = new MapBox() // For Call resolution: name -> "Box.method"
// Scan for {"name": pattern
local pos = 0
loop(pos < defs_str.length()) {
local name_idx = JsonFragBox.index_of_from(defs_str, "\"name\":\"", pos)
if name_idx < 0 { break }
// Extract name
local name_start = name_idx + 8
local name_end = -1
{
local j = name_start
loop(j < defs_str.length()) {
if defs_str.substring(j, j + 1) == "\"" { name_end = j break }
j = j + 1
}
}
if name_end < 0 { break }
local func_name = defs_str.substring(name_start, name_end)
// Extract box name
local box_name = "Main" // default
local box_idx = JsonFragBox.index_of_from(defs_str, "\"box\":\"", name_end)
if box_idx >= 0 {
local box_start = box_idx + 7
local box_end = -1
{
local j = box_start
loop(j < defs_str.length()) {
if defs_str.substring(j, j + 1) == "\"" { box_end = j break }
j = j + 1
}
}
if box_end >= 0 { box_name = defs_str.substring(box_start, box_end) }
}
// Register function in map for Call resolution
func_map.set(func_name, box_name + "." + func_name)
// Extract params array
local params_arr = new ArrayBox()
local params_idx = JsonFragBox.index_of_from(defs_str, "\"params\":[", name_end)
if params_idx >= 0 {
local params_start = params_idx + 10
local params_end = -1
{
local j = params_start
local depth = 1
local in_str = 0
local esc = 0
loop(j < defs_str.length()) {
local ch = defs_str.substring(j, j + 1)
if in_str == 1 {
if esc == 1 { esc = 0 j = j + 1 continue }
if ch == "\\" { esc = 1 j = j + 1 continue }
if ch == "\"" { in_str = 0 j = j + 1 continue }
j = j + 1
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == "[" { depth = depth + 1 j = j + 1 continue }
if ch == "]" {
depth = depth - 1
if depth == 0 { params_end = j break }
j = j + 1
continue
}
j = j + 1
}
}
if params_end >= 0 {
local params_str = defs_str.substring(params_start, params_end)
// Extract param names from JSON array
local p_pos = 0
loop(p_pos < params_str.length()) {
local p_idx = JsonFragBox.index_of_from(params_str, "\"", p_pos)
if p_idx < 0 { break }
local p_start = p_idx + 1
local p_end = -1
{
local j = p_start
loop(j < params_str.length()) {
if params_str.substring(j, j + 1) == "\"" { p_end = j break }
j = j + 1
}
}
if p_end < 0 { break }
params_arr.push(params_str.substring(p_start, p_end))
p_pos = p_end + 1
}
}
}
// Extract body JSON (Program statements)
local body_idx = JsonFragBox.index_of_from(defs_str, "\"body\":", name_end)
if body_idx >= 0 {
local body_start = body_idx + 7
// Find body object bounds (scan for balanced {})
local body_end = -1
{
local j = body_start
// skip whitespace
loop(j < defs_str.length()) {
local ch = defs_str.substring(j, j + 1)
if ch == " " || ch == "\t" || ch == "\n" || ch == "\r" { j = j + 1 } else { break }
}
if j < defs_str.length() && defs_str.substring(j, j + 1) == "{" {
local depth = 1
local k = j + 1
local in_str = 0
local esc = 0
loop(k < defs_str.length()) {
local ch = defs_str.substring(k, k + 1)
if in_str == 1 {
if esc == 1 { esc = 0 k = k + 1 continue }
if ch == "\\" { esc = 1 k = k + 1 continue }
if ch == "\"" { in_str = 0 k = k + 1 continue }
k = k + 1
continue
}
if ch == "\"" { in_str = 1 k = k + 1 continue }
if ch == "{" { depth = depth + 1 k = k + 1 continue }
if ch == "}" {
depth = depth - 1
if depth == 0 { body_end = k + 1 break }
k = k + 1
continue
}
k = k + 1
}
}
}
if body_end >= 0 {
local body_json = defs_str.substring(body_start, body_end)
// Try to lower body to MIR
local mir_func = me._lower_func_body(func_name, box_name, params_arr, body_json, func_map)
if mir_func != null && mir_func != "" {
func_jsons.push(mir_func)
}
}
}
pos = name_end
}
// Build additional functions JSON
if func_jsons.length() > 0 {
local fi = 0
local fn = func_jsons.length()
loop(fi < fn) {
func_defs_mir = func_defs_mir + "," + ("" + func_jsons.get(fi))
fi = fi + 1
}
}
return func_defs_mir
}
// Lower function body to MIR (minimal support)
// Supports: Return(Int), Return(Binary(+|-|*|/, Int|Var, Int|Var)), Return(Call)
method _lower_func_body(func_name, box_name, params_arr, body_json, func_map) {
local body_str = "" + body_json
// Check for Return statement
local ret_idx = JsonFragBox.index_of_from(body_str, "\"type\":\"Return\"", 0)
if ret_idx < 0 { return null }
// Check for Call in Return
local call_idx = JsonFragBox.index_of_from(body_str, "\"type\":\"Call\"", ret_idx)
if call_idx >= 0 {
// Return(Call(name, args))
return me._lower_return_call(func_name, box_name, params_arr, body_str, call_idx, func_map)
}
// Check for Binary in Return
local bin_idx = JsonFragBox.index_of_from(body_str, "\"type\":\"Binary\"", ret_idx)
if bin_idx >= 0 {
// Return(Binary(op, lhs, rhs))
return me._lower_return_binary(func_name, box_name, params_arr, body_str, bin_idx)
}
// Check for Return(Int) directly
local int_idx = JsonFragBox.index_of_from(body_str, "\"type\":\"Int\"", ret_idx)
if int_idx >= 0 {
local val_idx = JsonFragBox.index_of_from(body_str, "\"value\":", int_idx)
if val_idx >= 0 {
local val = JsonFragBox.read_int_after(body_str, val_idx + 8)
if val != null {
// Build params JSON array
local params_json = me._build_params_json(params_arr)
local mir = "{\\\"name\\\":\\\"" + box_name + "." + func_name + "\\\",\\\"params\\\":" + params_json + ",\\\"locals\\\":[],\\\"blocks\\\":[{\\\"id\\\":0,\\\"instructions\\\":[{\\\"op\\\":\\\"const\\\",\\\"dst\\\":1,\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + val + "}},{\\\"op\\\":\\\"ret\\\",\\\"value\\\":1}]}]}"
return mir
}
}
}
return null
}
// Lower Return(Binary(op, lhs, rhs))
method _lower_return_binary(func_name, box_name, params_arr, body_str, bin_idx) {
// Extract op
local op_idx = JsonFragBox.index_of_from(body_str, "\"op\":\"", bin_idx)
if op_idx < 0 { return null }
local op = JsonFragBox.read_string_after(body_str, op_idx + 5)
if !(op == "+" || op == "-" || op == "*" || op == "/") { return null }
// Extract lhs (Var or Int)
local lhs_idx = JsonFragBox.index_of_from(body_str, "\"lhs\":{", bin_idx)
local lhs_type = null
local lhs_val = null
if lhs_idx >= 0 {
local lhs_type_idx = JsonFragBox.index_of_from(body_str, "\"type\":\"", lhs_idx)
if lhs_type_idx >= 0 {
lhs_type = JsonFragBox.read_string_after(body_str, lhs_type_idx + 7)
if lhs_type == "Var" {
local var_idx = JsonFragBox.index_of_from(body_str, "\"name\":\"", lhs_type_idx)
if var_idx >= 0 { lhs_val = JsonFragBox.read_string_after(body_str, var_idx + 8) }
} else if lhs_type == "Int" {
local val_idx = JsonFragBox.index_of_from(body_str, "\"value\":", lhs_type_idx)
if val_idx >= 0 { lhs_val = JsonFragBox.read_int_after(body_str, val_idx + 8) }
}
}
}
// Extract rhs (Var or Int)
local rhs_idx = JsonFragBox.index_of_from(body_str, "\"rhs\":{", bin_idx)
local rhs_type = null
local rhs_val = null
if rhs_idx >= 0 {
local rhs_type_idx = JsonFragBox.index_of_from(body_str, "\"type\":\"", rhs_idx)
if rhs_type_idx >= 0 {
rhs_type = JsonFragBox.read_string_after(body_str, rhs_type_idx + 7)
if rhs_type == "Var" {
local var_idx = JsonFragBox.index_of_from(body_str, "\"name\":\"", rhs_type_idx)
if var_idx >= 0 { rhs_val = JsonFragBox.read_string_after(body_str, var_idx + 8) }
} else if rhs_type == "Int" {
local val_idx = JsonFragBox.index_of_from(body_str, "\"value\":", rhs_type_idx)
if val_idx >= 0 { rhs_val = JsonFragBox.read_int_after(body_str, val_idx + 8) }
}
}
}
if lhs_type == null || rhs_type == null || lhs_val == null || rhs_val == null { return null }
// Build MIR function with params
local insts = ""
local next_reg = 1
// Map params to registers (params start at r1, r2, ...)
local param_map = new MapBox()
{
local pi = 0
local pn = params_arr.length()
loop(pi < pn) {
param_map.set("" + params_arr.get(pi), "" + next_reg)
next_reg = next_reg + 1
pi = pi + 1
}
}
// Load lhs
local lhs_reg = next_reg
if lhs_type == "Var" {
// Use param register
local preg = param_map.get("" + lhs_val)
if preg == null { return null }
lhs_reg = JsonFragBox._str_to_int("" + preg)
} else if lhs_type == "Int" {
insts = "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":" + next_reg + ",\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + lhs_val + "}}"
lhs_reg = next_reg
next_reg = next_reg + 1
}
// Load rhs
local rhs_reg = next_reg
if rhs_type == "Var" {
// Use param register
local preg = param_map.get("" + rhs_val)
if preg == null { return null }
rhs_reg = JsonFragBox._str_to_int("" + preg)
} else if rhs_type == "Int" {
if insts != "" { insts = insts + "," }
insts = insts + "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":" + next_reg + ",\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + rhs_val + "}}"
rhs_reg = next_reg
next_reg = next_reg + 1
}
// binop
if insts != "" { insts = insts + "," }
insts = insts + "{\\\"op\\\":\\\"binop\\\",\\\"operation\\\":\\\"" + op + "\\\",\\\"lhs\\\":" + lhs_reg + ",\\\"rhs\\\":" + rhs_reg + ",\\\"dst\\\":" + next_reg + "}"
local result_reg = next_reg
next_reg = next_reg + 1
// ret
insts = insts + ",{\\\"op\\\":\\\"ret\\\",\\\"value\\\":" + result_reg + "}"
// Build params JSON array
local params_json = me._build_params_json(params_arr)
local mir = "{\\\"name\\\":\\\"" + box_name + "." + func_name + "\\\",\\\"params\\\":" + params_json + ",\\\"locals\\\":[],\\\"blocks\\\":[{\\\"id\\\":0,\\\"instructions\\\":[" + insts + "]}]}"
return mir
}
// Lower Return(Call(name, args))
method _lower_return_call(func_name, box_name, params_arr, body_str, call_idx, func_map) {
// Extract call function name
local func_idx = JsonFragBox.index_of_from(body_str, "\"func\":\"", call_idx)
if func_idx < 0 { return null }
local call_name = JsonFragBox.read_string_after(body_str, func_idx + 8)
if call_name == null { return null }
// Resolve call target
local resolved_name = me.resolve_call_target(call_name, func_map)
// Extract args (minimal: Int or Var)
local args_arr = new ArrayBox()
local args_idx = JsonFragBox.index_of_from(body_str, "\"args\":[", call_idx)
if args_idx >= 0 {
// Parse args array (simplified)
local args_pos = args_idx + 8
loop(args_pos < body_str.length()) {
local arg_type_idx = JsonFragBox.index_of_from(body_str, "\"type\":\"", args_pos)
if arg_type_idx < 0 { break }
if arg_type_idx > call_idx + 200 { break } // Limit search scope
local arg_type = JsonFragBox.read_string_after(body_str, arg_type_idx + 7)
if arg_type == "Int" {
local val_idx = JsonFragBox.index_of_from(body_str, "\"value\":", arg_type_idx)
if val_idx >= 0 {
local val = JsonFragBox.read_int_after(body_str, val_idx + 8)
if val != null {
local arg_info = new MapBox()
arg_info.set("type", "Int")
arg_info.set("value", val)
args_arr.push(arg_info)
}
}
} else if arg_type == "Var" {
local var_idx = JsonFragBox.index_of_from(body_str, "\"name\":\"", arg_type_idx)
if var_idx >= 0 {
local var_name = JsonFragBox.read_string_after(body_str, var_idx + 8)
if var_name != null {
local arg_info = new MapBox()
arg_info.set("type", "Var")
arg_info.set("value", var_name)
args_arr.push(arg_info)
}
}
}
args_pos = arg_type_idx + 20
}
}
// Build MIR for Call
local insts = ""
local next_reg = 1
// Map params to registers
local param_map = new MapBox()
{
local pi = 0
local pn = params_arr.length()
loop(pi < pn) {
param_map.set("" + params_arr.get(pi), "" + next_reg)
next_reg = next_reg + 1
pi = pi + 1
}
}
// Load const for function name
insts = "{\\\"op\\\":\\\"const\\\",\\\"dst\\\":" + next_reg + ",\\\"value\\\":{\\\"type\\\":\\\"string\\\",\\\"value\\\":\\\"" + resolved_name + "\\\"}}"
local func_reg = next_reg
next_reg = next_reg + 1
// Load args
local arg_regs = new ArrayBox()
{
local ai = 0
local an = args_arr.length()
loop(ai < an) {
local arg_info = args_arr.get(ai)
local arg_type = "" + arg_info.get("type")
if arg_type == "Int" {
local val = arg_info.get("value")
insts = insts + ",{\\\"op\\\":\\\"const\\\",\\\"dst\\\":" + next_reg + ",\\\"value\\\":{\\\"type\\\":\\\"i64\\\",\\\"value\\\":" + val + "}}"
arg_regs.push(next_reg)
next_reg = next_reg + 1
} else if arg_type == "Var" {
local var_name = "" + arg_info.get("value")
local preg = param_map.get(var_name)
if preg != null {
arg_regs.push(JsonFragBox._str_to_int("" + preg))
}
}
ai = ai + 1
}
}
// Build args list
local args_list = ""
{
local ri = 0
local rn = arg_regs.length()
loop(ri < rn) {
if ri > 0 { args_list = args_list + "," }
args_list = args_list + ("" + arg_regs.get(ri))
ri = ri + 1
}
}
// call instruction (using func_reg for function name)
insts = insts + ",{\\\"op\\\":\\\"call\\\",\\\"func\\\":" + func_reg + ",\\\"args\\\":[" + args_list + "],\\\"dst\\\":" + next_reg + "}"
local result_reg = next_reg
next_reg = next_reg + 1
// ret
insts = insts + ",{\\\"op\\\":\\\"ret\\\",\\\"value\\\":" + result_reg + "}"
// Build params JSON array
local params_json = me._build_params_json(params_arr)
local mir = "{\\\"name\\\":\\\"" + box_name + "." + func_name + "\\\",\\\"params\\\":" + params_json + ",\\\"locals\\\":[],\\\"blocks\\\":[{\\\"id\\\":0,\\\"instructions\\\":[" + insts + "]}]}"
// Debug log
if env.get("HAKO_MIR_BUILDER_DEBUG") == "1" {
print("[mirbuilder/call:lowered] " + func_name + " -> call(" + resolved_name + ")")
}
return mir
}
// Resolve call target using function map
// Toggle: HAKO_MIR_BUILDER_CALL_RESOLVE=1
method resolve_call_target(call_name, func_map) {
if env.get("HAKO_MIR_BUILDER_CALL_RESOLVE") != "1" { return call_name }
local resolved = func_map.get(call_name)
if resolved != null {
if env.get("HAKO_MIR_BUILDER_DEBUG") == "1" {
print("[mirbuilder/call:resolve] " + call_name + " => " + resolved)
}
return "" + resolved
}
return call_name
}
// Helper: build params JSON array
method _build_params_json(params_arr) {
local params_json = "["
local pi = 0
local pn = params_arr.length()
loop(pi < pn) {
if pi > 0 { params_json = params_json + "," }
params_json = params_json + "\\\"" + ("" + params_arr.get(pi)) + "\\\""
pi = pi + 1
}
params_json = params_json + "]"
return params_json
}
// Inject function definitions into MIR JSON
method inject_funcs(mir_json, func_defs_mir) {
if func_defs_mir == null || func_defs_mir == "" { return mir_json }
// Find "functions":[{ in mir_json and inject after first function
local mir_str = "" + mir_json
local funcs_idx = JsonFragBox.index_of_from(mir_str, "\"functions\":[", 0)
if funcs_idx < 0 { return mir_json }
// Find first function's closing }
local first_func_start = funcs_idx + 13 // skip "functions":[
local brace_depth = 0
local first_func_end = -1
{
local j = first_func_start
local in_str = 0
local esc = 0
loop(j < mir_str.length()) {
local ch = mir_str.substring(j, j + 1)
if in_str == 1 {
if esc == 1 { esc = 0 j = j + 1 continue }
if ch == "\\" { esc = 1 j = j + 1 continue }
if ch == "\"" { in_str = 0 j = j + 1 continue }
j = j + 1
continue
}
if ch == "\"" { in_str = 1 j = j + 1 continue }
if ch == "{" { brace_depth = brace_depth + 1 j = j + 1 continue }
if ch == "}" {
brace_depth = brace_depth - 1
if brace_depth == 0 { first_func_end = j + 1 break }
j = j + 1
continue
}
j = j + 1
}
}
if first_func_end < 0 { return mir_json }
// Inject func_defs_mir after first function
local result = mir_str.substring(0, first_func_end) + func_defs_mir + mir_str.substring(first_func_end, mir_str.length())
return result
}
}

View File

@ -0,0 +1,26 @@
[module]
name = "lang.mir"
version = "1.0.0"
[exports]
# MIR builder modules
builder.func_lowering = "builder/func_lowering.hako"
builder.MirBuilderBox = "builder/MirBuilderBox.hako"
builder.MirBuilderMinBox = "builder/MirBuilderMinBox.hako"
builder.pattern_registry = "builder/pattern_registry.hako"
# MIR builder internal modules
builder.internal.prog_scan_box = "builder/internal/prog_scan_box.hako"
builder.internal.lower_load_store_local_box = "builder/internal/lower_load_store_local_box.hako"
builder.internal.lower_typeop_cast_box = "builder/internal/lower_typeop_cast_box.hako"
builder.internal.lower_typeop_check_box = "builder/internal/lower_typeop_check_box.hako"
builder.internal.lower_loop_simple_box = "builder/internal/lower_loop_simple_box.hako"
builder.internal.loop_opts_adapter_box = "builder/internal/loop_opts_adapter_box.hako"
builder.internal.builder_config_box = "builder/internal/builder_config_box.hako"
builder.internal.jsonfrag_normalizer_box = "builder/internal/jsonfrag_normalizer_box.hako"
# MIR emitter
min_emitter = "min_emitter.hako"
[dependencies]
"selfhost.shared" = "^1.0.0"