hakorune/build.rs

use std::{env, fs, path::PathBuf};

fn main() {
    // Path to grammar spec
    let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
    let grammar_dir = manifest_dir.join("grammar");
    let grammar_file = grammar_dir.join("unified-grammar.toml");

    // Ensure output dir exists
    let out_dir = manifest_dir.join("src").join("grammar");
    fs::create_dir_all(&out_dir).ok();
    let out_file = out_dir.join("generated.rs");

    // If grammar file is missing, create a minimal one
    if !grammar_file.exists() {
        fs::create_dir_all(&grammar_dir).ok();
        let minimal = r#"
[keywords.me]
token = "ME"

[keywords.from]
token = "FROM"

[keywords.loop]
token = "LOOP"

[operators.add]
symbol = "+"
coercion_strategy = "string_priority"
type_rules = [
  { left = "String", right = "String", result = "String", action = "concat" },
  { left = "String", right = "Integer", result = "String", action = "concat" },
  { left = "Integer", right = "String", result = "String", action = "concat" },
  { left = "String", right = "Bool", result = "String", action = "concat" },
  { left = "Bool", right = "String", result = "String", action = "concat" },
  { left = "String", right = "Other", result = "String", action = "concat" },
  { left = "Other", right = "String", result = "String", action = "concat" },
  { left = "Integer", right = "Integer", result = "Integer", action = "add_i64" },
  { left = "Float", right = "Float", result = "Float", action = "add_f64" }
]
"#;
        fs::write(&grammar_file, minimal).expect("write minimal unified-grammar.toml");
        println!(
            "cargo:warning=Created minimal grammar at {}",
            grammar_file.display()
        );
    }

    // Read and very light parse: collect
    // - keywords.<name>.token
    // - operators.{add,sub,mul,div}.{coercion_strategy,type_rules}
    // - syntax.statements.allow = [..]
    // - syntax.expressions.allow_binops = [..]
    let content = fs::read_to_string(&grammar_file).expect("read unified-grammar.toml");

    // Naive line scan to avoid build-deps; supports lines like: [keywords.xxx] then token = "YYY"
    let mut current_key: Option<String> = None;
    let mut in_operators_add = false;
    let mut in_operators_sub = false;
    let mut in_operators_mul = false;
    let mut in_operators_div = false;
    let mut add_coercion: Option<String> = None;
    let mut sub_coercion: Option<String> = None;
    let mut mul_coercion: Option<String> = None;
    let mut div_coercion: Option<String> = None;
    let mut entries: Vec<(String, String)> = Vec::new();
    let mut in_type_rules = false;
    let mut add_rules: Vec<(String, String, String, String)> = Vec::new();
    let mut sub_rules: Vec<(String, String, String, String)> = Vec::new();
    let mut mul_rules: Vec<(String, String, String, String)> = Vec::new();
    let mut div_rules: Vec<(String, String, String, String)> = Vec::new();
    for line in content.lines() {
        let s = line.trim();
        if s.starts_with("[keywords.") && s.ends_with("]") {
            let name = s
                .trim_start_matches("[keywords.")
                .trim_end_matches("]")
                .to_string();
            current_key = Some(name);
            in_operators_add = false;
            in_operators_sub = false;
            in_operators_mul = false;
            in_operators_div = false;
            continue;
        }
        if s == "[operators.add]" {
            current_key = None;
            in_operators_add = true;
            in_operators_sub = false;
            in_operators_mul = false;
            in_operators_div = false;
            in_type_rules = false;
            continue;
        }
        if s == "[operators.sub]" {
            current_key = None;
            in_operators_add = false;
            in_operators_sub = true;
            in_operators_mul = false;
            in_operators_div = false;
            in_type_rules = false;
            continue;
        }
        if s == "[operators.mul]" {
            current_key = None;
            in_operators_add = false;
            in_operators_sub = false;
            in_operators_mul = true;
            in_operators_div = false;
            in_type_rules = false;
            continue;
        }
        if s == "[operators.div]" {
            current_key = None;
            in_operators_add = false;
            in_operators_sub = false;
            in_operators_mul = false;
            in_operators_div = true;
            in_type_rules = false;
            continue;
        }
        if let Some(ref key) = current_key {
            if let Some(rest) = s.strip_prefix("token") {
                if let Some(eq) = rest.find('=') {
                    let val = rest[eq + 1..].trim().trim_matches('"').to_string();
                    entries.push((key.clone(), val));
                }
            }
        }
        if in_operators_add || in_operators_sub || in_operators_mul || in_operators_div {
            if s.starts_with("type_rules") && s.contains('[') {
                in_type_rules = true;
                continue;
            }
            if in_type_rules {
                if s.starts_with(']') {
                    in_type_rules = false;
                    continue;
                }
                // Expect lines like: { left = "String", right = "String", result = "String", action = "concat" },
                if s.starts_with('{') && s.ends_with("},") || s.ends_with('}') {
                    let inner = s
                        .trim_start_matches('{')
                        .trim_end_matches('}')
                        .trim_end_matches(',');
                    let mut left = String::new();
                    let mut right = String::new();
                    let mut result = String::new();
                    let mut action = String::new();
                    for part in inner.split(',') {
                        let kv = part.trim();
                        if let Some(eq) = kv.find('=') {
                            let key = kv[..eq].trim();
                            let val = kv[eq + 1..].trim().trim_matches('"').to_string();
                            match key {
                                "left" => left = val,
                                "right" => right = val,
                                "result" => result = val,
                                "action" => action = val,
                                _ => {}
                            }
                        }
                    }
                    if !left.is_empty()
                        && !right.is_empty()
                        && !result.is_empty()
                        && !action.is_empty()
                    {
                        if in_operators_add {
                            add_rules.push((left, right, result, action));
                        } else if in_operators_sub {
                            sub_rules.push((left, right, result, action));
                        } else if in_operators_mul {
                            mul_rules.push((left, right, result, action));
                        } else if in_operators_div {
                            div_rules.push((left, right, result, action));
                        }
                    }
                }
            }
            if let Some(rest) = s.strip_prefix("coercion_strategy") {
                if let Some(eq) = rest.find('=') {
                    let val = rest[eq + 1..].trim().trim_matches('"').to_string();
                    if in_operators_add {
                        add_coercion = Some(val.clone());
                    } else if in_operators_sub {
                        sub_coercion = Some(val.clone());
                    } else if in_operators_mul {
                        mul_coercion = Some(val.clone());
                    } else if in_operators_div {
                        div_coercion = Some(val.clone());
                    }
                }
            }
        }
    }

    // Default rules if none present in TOML (keep codegen deterministic)
    if add_rules.is_empty() {
        add_rules.push((
            "String".into(),
            "String".into(),
            "String".into(),
            "concat".into(),
        ));
        add_rules.push((
            "String".into(),
            "Integer".into(),
            "String".into(),
            "concat".into(),
        ));
        add_rules.push((
            "Integer".into(),
            "String".into(),
            "String".into(),
            "concat".into(),
        ));
        add_rules.push((
            "String".into(),
            "Bool".into(),
            "String".into(),
            "concat".into(),
        ));
        add_rules.push((
            "Bool".into(),
            "String".into(),
            "String".into(),
            "concat".into(),
        ));
        add_rules.push((
            "String".into(),
            "Other".into(),
            "String".into(),
            "concat".into(),
        ));
        add_rules.push((
            "Other".into(),
            "String".into(),
            "String".into(),
            "concat".into(),
        ));
        add_rules.push((
            "Integer".into(),
            "Integer".into(),
            "Integer".into(),
            "add_i64".into(),
        ));
        add_rules.push((
            "Float".into(),
            "Float".into(),
            "Float".into(),
            "add_f64".into(),
        ));
    }
    if sub_rules.is_empty() {
        sub_rules.push((
            "Integer".into(),
            "Integer".into(),
            "Integer".into(),
            "sub_i64".into(),
        ));
        sub_rules.push((
            "Float".into(),
            "Float".into(),
            "Float".into(),
            "sub_f64".into(),
        ));
    }
    if mul_rules.is_empty() {
        mul_rules.push((
            "Integer".into(),
            "Integer".into(),
            "Integer".into(),
            "mul_i64".into(),
        ));
        mul_rules.push((
            "Float".into(),
            "Float".into(),
            "Float".into(),
            "mul_f64".into(),
        ));
    }
    if div_rules.is_empty() {
        div_rules.push((
            "Integer".into(),
            "Integer".into(),
            "Integer".into(),
            "div_i64".into(),
        ));
        div_rules.push((
            "Float".into(),
            "Float".into(),
            "Float".into(),
            "div_f64".into(),
        ));
    }

    // Generate Rust code
    let mut code = String::new();
    code.push_str("// Auto-generated from grammar/unified-grammar.toml\n");
    code.push_str("pub static KEYWORDS: &[(&str, &str)] = &[\n");
    for (k, t) in &entries {
        code.push_str(&format!("    (\"{}\", \"{}\"),\n", k, t));
    }
    code.push_str("];");
    let add_coercion_val = add_coercion.unwrap_or_else(|| "string_priority".to_string());
    let sub_coercion_val = sub_coercion.unwrap_or_else(|| "numeric_only".to_string());
    let mul_coercion_val = mul_coercion.unwrap_or_else(|| "numeric_only".to_string());
    let div_coercion_val = div_coercion.unwrap_or_else(|| "numeric_only".to_string());
    code.push_str(&format!(
        "\npub static OPERATORS_ADD_COERCION: &str = \"{}\";\n",
        add_coercion_val
    ));
    code.push_str(&format!(
        "pub static OPERATORS_SUB_COERCION: &str = \"{}\";\n",
        sub_coercion_val
    ));
    code.push_str(&format!(
        "pub static OPERATORS_MUL_COERCION: &str = \"{}\";\n",
        mul_coercion_val
    ));
    code.push_str(&format!(
        "pub static OPERATORS_DIV_COERCION: &str = \"{}\";\n",
        div_coercion_val
    ));
    // Emit add rules
    code.push_str("pub static OPERATORS_ADD_RULES: &[(&str, &str, &str, &str)] = &[\n");
    for (l, r, res, act) in &add_rules {
        code.push_str(&format!(
            "    (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
            l, r, res, act
        ));
    }
    code.push_str("];");
    // Emit sub rules
    code.push_str("\npub static OPERATORS_SUB_RULES: &[(&str, &str, &str, &str)] = &[\n");
    for (l, r, res, act) in &sub_rules {
        code.push_str(&format!(
            "    (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
            l, r, res, act
        ));
    }
    code.push_str("];");
    // Emit mul rules
    code.push_str("\npub static OPERATORS_MUL_RULES: &[(&str, &str, &str, &str)] = &[\n");
    for (l, r, res, act) in &mul_rules {
        code.push_str(&format!(
            "    (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
            l, r, res, act
        ));
    }
    code.push_str("];");
    // Emit div rules
    code.push_str("\npub static OPERATORS_DIV_RULES: &[(&str, &str, &str, &str)] = &[\n");
    for (l, r, res, act) in &div_rules {
        code.push_str(&format!(
            "    (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
            l, r, res, act
        ));
    }
    code.push_str("];");
    code.push_str(
        r#"
pub fn lookup_keyword(word: &str) -> Option<&'static str> {
    for (k, t) in KEYWORDS {
        if *k == word { return Some(*t); }
    }
    None
}
"#,
    );

    // --- Naive parse for syntax rules (statements/expressions) ---
    let mut syntax_statements: Vec<String> = Vec::new();
    let mut syntax_binops: Vec<String> = Vec::new();
    let mut in_syntax_statements = false;
    let mut in_syntax_expressions = false;
    for line in content.lines() {
        let s = line.trim();
        if s == "[syntax.statements]" {
            in_syntax_statements = true;
            in_syntax_expressions = false;
            continue;
        }
        if s == "[syntax.expressions]" {
            in_syntax_statements = false;
            in_syntax_expressions = true;
            continue;
        }
        if s.starts_with('[') {
            in_syntax_statements = false;
            in_syntax_expressions = false;
        }
        if in_syntax_statements {
            if let Some(rest) = s.strip_prefix("allow") {
                if let Some(eq) = rest.find('=') {
                    let arr = rest[eq + 1..].trim();
                    // Expect [ "if", "loop", ... ] possibly spanning multiple lines; simple split for this snapshot
                    for part in arr.trim_matches(&['[', ']'][..]).split(',') {
                        let v = part.trim().trim_matches('"');
                        if !v.is_empty() {
                            syntax_statements.push(v.to_string());
                        }
                    }
                }
            }
        }
        if in_syntax_expressions {
            if let Some(rest) = s.strip_prefix("allow_binops") {
                if let Some(eq) = rest.find('=') {
                    let arr = rest[eq + 1..].trim();
                    for part in arr.trim_matches(&['[', ']'][..]).split(',') {
                        let v = part.trim().trim_matches('"');
                        if !v.is_empty() {
                            syntax_binops.push(v.to_string());
                        }
                    }
                }
            }
        }
    }
    if syntax_statements.is_empty() {
        syntax_statements = vec![
            "box".into(),
            "global".into(),
            "function".into(),
            "static".into(),
            "if".into(),
            "loop".into(),
            "break".into(),
            "return".into(),
            "print".into(),
            "nowait".into(),
            "include".into(),
            "local".into(),
            "outbox".into(),
            "try".into(),
            "throw".into(),
            "using".into(),
            "from".into(),
        ];
    }
    if syntax_binops.is_empty() {
        syntax_binops = vec!["add".into(), "sub".into(), "mul".into(), "div".into()];
    }
    // Emit syntax arrays
    code.push_str("\npub static SYNTAX_ALLOWED_STATEMENTS: &[&str] = &[\n");
    for k in &syntax_statements {
        code.push_str(&format!("    \"{}\",\n", k));
    }
    code.push_str("];");
    code.push_str("\npub static SYNTAX_ALLOWED_BINOPS: &[&str] = &[\n");
    for k in &syntax_binops {
        code.push_str(&format!("    \"{}\",\n", k));
    }
    code.push_str("];");

    fs::write(&out_file, code).expect("write generated.rs");
    println!("cargo:rerun-if-changed={}", grammar_file.display());
}