461 lines
16 KiB
Rust
461 lines
16 KiB
Rust
use std::{env, fs, path::PathBuf};
|
|
|
|
fn main() {
|
|
// Path to grammar spec
|
|
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
|
|
let grammar_dir = manifest_dir.join("grammar");
|
|
let grammar_file = grammar_dir.join("unified-grammar.toml");
|
|
|
|
// Ensure output dir exists
|
|
let out_dir = manifest_dir.join("src").join("grammar");
|
|
fs::create_dir_all(&out_dir).ok();
|
|
let out_file = out_dir.join("generated.rs");
|
|
|
|
// If grammar file is missing, create a minimal one
|
|
if !grammar_file.exists() {
|
|
fs::create_dir_all(&grammar_dir).ok();
|
|
let minimal = r#"
|
|
[keywords.me]
|
|
token = "ME"
|
|
|
|
[keywords.from]
|
|
token = "FROM"
|
|
|
|
[keywords.loop]
|
|
token = "LOOP"
|
|
|
|
[operators.add]
|
|
symbol = "+"
|
|
coercion_strategy = "string_priority"
|
|
type_rules = [
|
|
{ left = "String", right = "String", result = "String", action = "concat" },
|
|
{ left = "String", right = "Integer", result = "String", action = "concat" },
|
|
{ left = "Integer", right = "String", result = "String", action = "concat" },
|
|
{ left = "String", right = "Bool", result = "String", action = "concat" },
|
|
{ left = "Bool", right = "String", result = "String", action = "concat" },
|
|
{ left = "String", right = "Other", result = "String", action = "concat" },
|
|
{ left = "Other", right = "String", result = "String", action = "concat" },
|
|
{ left = "Integer", right = "Integer", result = "Integer", action = "add_i64" },
|
|
{ left = "Float", right = "Float", result = "Float", action = "add_f64" }
|
|
]
|
|
"#;
|
|
fs::write(&grammar_file, minimal).expect("write minimal unified-grammar.toml");
|
|
println!(
|
|
"cargo:warning=Created minimal grammar at {}",
|
|
grammar_file.display()
|
|
);
|
|
}
|
|
|
|
// Read and very light parse: collect
|
|
// - keywords.<name>.token
|
|
// - operators.{add,sub,mul,div}.{coercion_strategy,type_rules}
|
|
// - syntax.statements.allow = [..]
|
|
// - syntax.expressions.allow_binops = [..]
|
|
let content = fs::read_to_string(&grammar_file).expect("read unified-grammar.toml");
|
|
|
|
// Naive line scan to avoid build-deps; supports lines like: [keywords.xxx] then token = "YYY"
|
|
let mut current_key: Option<String> = None;
|
|
let mut in_operators_add = false;
|
|
let mut in_operators_sub = false;
|
|
let mut in_operators_mul = false;
|
|
let mut in_operators_div = false;
|
|
let mut add_coercion: Option<String> = None;
|
|
let mut sub_coercion: Option<String> = None;
|
|
let mut mul_coercion: Option<String> = None;
|
|
let mut div_coercion: Option<String> = None;
|
|
let mut entries: Vec<(String, String)> = Vec::new();
|
|
let mut in_type_rules = false;
|
|
let mut add_rules: Vec<(String, String, String, String)> = Vec::new();
|
|
let mut sub_rules: Vec<(String, String, String, String)> = Vec::new();
|
|
let mut mul_rules: Vec<(String, String, String, String)> = Vec::new();
|
|
let mut div_rules: Vec<(String, String, String, String)> = Vec::new();
|
|
for line in content.lines() {
|
|
let s = line.trim();
|
|
if s.starts_with("[keywords.") && s.ends_with("]") {
|
|
let name = s
|
|
.trim_start_matches("[keywords.")
|
|
.trim_end_matches("]")
|
|
.to_string();
|
|
current_key = Some(name);
|
|
in_operators_add = false;
|
|
in_operators_sub = false;
|
|
in_operators_mul = false;
|
|
in_operators_div = false;
|
|
continue;
|
|
}
|
|
if s == "[operators.add]" {
|
|
current_key = None;
|
|
in_operators_add = true;
|
|
in_operators_sub = false;
|
|
in_operators_mul = false;
|
|
in_operators_div = false;
|
|
in_type_rules = false;
|
|
continue;
|
|
}
|
|
if s == "[operators.sub]" {
|
|
current_key = None;
|
|
in_operators_add = false;
|
|
in_operators_sub = true;
|
|
in_operators_mul = false;
|
|
in_operators_div = false;
|
|
in_type_rules = false;
|
|
continue;
|
|
}
|
|
if s == "[operators.mul]" {
|
|
current_key = None;
|
|
in_operators_add = false;
|
|
in_operators_sub = false;
|
|
in_operators_mul = true;
|
|
in_operators_div = false;
|
|
in_type_rules = false;
|
|
continue;
|
|
}
|
|
if s == "[operators.div]" {
|
|
current_key = None;
|
|
in_operators_add = false;
|
|
in_operators_sub = false;
|
|
in_operators_mul = false;
|
|
in_operators_div = true;
|
|
in_type_rules = false;
|
|
continue;
|
|
}
|
|
if let Some(ref key) = current_key {
|
|
if let Some(rest) = s.strip_prefix("token") {
|
|
if let Some(eq) = rest.find('=') {
|
|
let val = rest[eq + 1..].trim().trim_matches('"').to_string();
|
|
entries.push((key.clone(), val));
|
|
}
|
|
}
|
|
}
|
|
if in_operators_add || in_operators_sub || in_operators_mul || in_operators_div {
|
|
if s.starts_with("type_rules") && s.contains('[') {
|
|
in_type_rules = true;
|
|
continue;
|
|
}
|
|
if in_type_rules {
|
|
if s.starts_with(']') {
|
|
in_type_rules = false;
|
|
continue;
|
|
}
|
|
// Expect lines like: { left = "String", right = "String", result = "String", action = "concat" },
|
|
if s.starts_with('{') && s.ends_with("},") || s.ends_with('}') {
|
|
let inner = s
|
|
.trim_start_matches('{')
|
|
.trim_end_matches('}')
|
|
.trim_end_matches(',');
|
|
let mut left = String::new();
|
|
let mut right = String::new();
|
|
let mut result = String::new();
|
|
let mut action = String::new();
|
|
for part in inner.split(',') {
|
|
let kv = part.trim();
|
|
if let Some(eq) = kv.find('=') {
|
|
let key = kv[..eq].trim();
|
|
let val = kv[eq + 1..].trim().trim_matches('"').to_string();
|
|
match key {
|
|
"left" => left = val,
|
|
"right" => right = val,
|
|
"result" => result = val,
|
|
"action" => action = val,
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
if !left.is_empty()
|
|
&& !right.is_empty()
|
|
&& !result.is_empty()
|
|
&& !action.is_empty()
|
|
{
|
|
if in_operators_add {
|
|
add_rules.push((left, right, result, action));
|
|
} else if in_operators_sub {
|
|
sub_rules.push((left, right, result, action));
|
|
} else if in_operators_mul {
|
|
mul_rules.push((left, right, result, action));
|
|
} else if in_operators_div {
|
|
div_rules.push((left, right, result, action));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if let Some(rest) = s.strip_prefix("coercion_strategy") {
|
|
if let Some(eq) = rest.find('=') {
|
|
let val = rest[eq + 1..].trim().trim_matches('"').to_string();
|
|
if in_operators_add {
|
|
add_coercion = Some(val.clone());
|
|
} else if in_operators_sub {
|
|
sub_coercion = Some(val.clone());
|
|
} else if in_operators_mul {
|
|
mul_coercion = Some(val.clone());
|
|
} else if in_operators_div {
|
|
div_coercion = Some(val.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Default rules if none present in TOML (keep codegen deterministic)
|
|
if add_rules.is_empty() {
|
|
add_rules.push((
|
|
"String".into(),
|
|
"String".into(),
|
|
"String".into(),
|
|
"concat".into(),
|
|
));
|
|
add_rules.push((
|
|
"String".into(),
|
|
"Integer".into(),
|
|
"String".into(),
|
|
"concat".into(),
|
|
));
|
|
add_rules.push((
|
|
"Integer".into(),
|
|
"String".into(),
|
|
"String".into(),
|
|
"concat".into(),
|
|
));
|
|
add_rules.push((
|
|
"String".into(),
|
|
"Bool".into(),
|
|
"String".into(),
|
|
"concat".into(),
|
|
));
|
|
add_rules.push((
|
|
"Bool".into(),
|
|
"String".into(),
|
|
"String".into(),
|
|
"concat".into(),
|
|
));
|
|
add_rules.push((
|
|
"String".into(),
|
|
"Other".into(),
|
|
"String".into(),
|
|
"concat".into(),
|
|
));
|
|
add_rules.push((
|
|
"Other".into(),
|
|
"String".into(),
|
|
"String".into(),
|
|
"concat".into(),
|
|
));
|
|
add_rules.push((
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"add_i64".into(),
|
|
));
|
|
add_rules.push((
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"add_f64".into(),
|
|
));
|
|
}
|
|
if sub_rules.is_empty() {
|
|
sub_rules.push((
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"sub_i64".into(),
|
|
));
|
|
sub_rules.push((
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"sub_f64".into(),
|
|
));
|
|
}
|
|
if mul_rules.is_empty() {
|
|
mul_rules.push((
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"mul_i64".into(),
|
|
));
|
|
mul_rules.push((
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"mul_f64".into(),
|
|
));
|
|
}
|
|
if div_rules.is_empty() {
|
|
div_rules.push((
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"Integer".into(),
|
|
"div_i64".into(),
|
|
));
|
|
div_rules.push((
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"Float".into(),
|
|
"div_f64".into(),
|
|
));
|
|
}
|
|
|
|
// Generate Rust code
|
|
let mut code = String::new();
|
|
code.push_str("// Auto-generated from grammar/unified-grammar.toml\n");
|
|
code.push_str("pub static KEYWORDS: &[(&str, &str)] = &[\n");
|
|
for (k, t) in &entries {
|
|
code.push_str(&format!(" (\"{}\", \"{}\"),\n", k, t));
|
|
}
|
|
code.push_str("];");
|
|
let add_coercion_val = add_coercion.unwrap_or_else(|| "string_priority".to_string());
|
|
let sub_coercion_val = sub_coercion.unwrap_or_else(|| "numeric_only".to_string());
|
|
let mul_coercion_val = mul_coercion.unwrap_or_else(|| "numeric_only".to_string());
|
|
let div_coercion_val = div_coercion.unwrap_or_else(|| "numeric_only".to_string());
|
|
code.push_str(&format!(
|
|
"\npub static OPERATORS_ADD_COERCION: &str = \"{}\";\n",
|
|
add_coercion_val
|
|
));
|
|
code.push_str(&format!(
|
|
"pub static OPERATORS_SUB_COERCION: &str = \"{}\";\n",
|
|
sub_coercion_val
|
|
));
|
|
code.push_str(&format!(
|
|
"pub static OPERATORS_MUL_COERCION: &str = \"{}\";\n",
|
|
mul_coercion_val
|
|
));
|
|
code.push_str(&format!(
|
|
"pub static OPERATORS_DIV_COERCION: &str = \"{}\";\n",
|
|
div_coercion_val
|
|
));
|
|
// Emit add rules
|
|
code.push_str("pub static OPERATORS_ADD_RULES: &[(&str, &str, &str, &str)] = &[\n");
|
|
for (l, r, res, act) in &add_rules {
|
|
code.push_str(&format!(
|
|
" (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
|
|
l, r, res, act
|
|
));
|
|
}
|
|
code.push_str("];");
|
|
// Emit sub rules
|
|
code.push_str("\npub static OPERATORS_SUB_RULES: &[(&str, &str, &str, &str)] = &[\n");
|
|
for (l, r, res, act) in &sub_rules {
|
|
code.push_str(&format!(
|
|
" (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
|
|
l, r, res, act
|
|
));
|
|
}
|
|
code.push_str("];");
|
|
// Emit mul rules
|
|
code.push_str("\npub static OPERATORS_MUL_RULES: &[(&str, &str, &str, &str)] = &[\n");
|
|
for (l, r, res, act) in &mul_rules {
|
|
code.push_str(&format!(
|
|
" (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
|
|
l, r, res, act
|
|
));
|
|
}
|
|
code.push_str("];");
|
|
// Emit div rules
|
|
code.push_str("\npub static OPERATORS_DIV_RULES: &[(&str, &str, &str, &str)] = &[\n");
|
|
for (l, r, res, act) in &div_rules {
|
|
code.push_str(&format!(
|
|
" (\"{}\", \"{}\", \"{}\", \"{}\"),\n",
|
|
l, r, res, act
|
|
));
|
|
}
|
|
code.push_str("];");
|
|
code.push_str(
|
|
r#"
|
|
pub fn lookup_keyword(word: &str) -> Option<&'static str> {
|
|
for (k, t) in KEYWORDS {
|
|
if *k == word { return Some(*t); }
|
|
}
|
|
None
|
|
}
|
|
"#,
|
|
);
|
|
|
|
// --- Naive parse for syntax rules (statements/expressions) ---
|
|
let mut syntax_statements: Vec<String> = Vec::new();
|
|
let mut syntax_binops: Vec<String> = Vec::new();
|
|
let mut in_syntax_statements = false;
|
|
let mut in_syntax_expressions = false;
|
|
for line in content.lines() {
|
|
let s = line.trim();
|
|
if s == "[syntax.statements]" {
|
|
in_syntax_statements = true;
|
|
in_syntax_expressions = false;
|
|
continue;
|
|
}
|
|
if s == "[syntax.expressions]" {
|
|
in_syntax_statements = false;
|
|
in_syntax_expressions = true;
|
|
continue;
|
|
}
|
|
if s.starts_with('[') {
|
|
in_syntax_statements = false;
|
|
in_syntax_expressions = false;
|
|
}
|
|
if in_syntax_statements {
|
|
if let Some(rest) = s.strip_prefix("allow") {
|
|
if let Some(eq) = rest.find('=') {
|
|
let arr = rest[eq + 1..].trim();
|
|
// Expect [ "if", "loop", ... ] possibly spanning multiple lines; simple split for this snapshot
|
|
for part in arr.trim_matches(&['[', ']'][..]).split(',') {
|
|
let v = part.trim().trim_matches('"');
|
|
if !v.is_empty() {
|
|
syntax_statements.push(v.to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if in_syntax_expressions {
|
|
if let Some(rest) = s.strip_prefix("allow_binops") {
|
|
if let Some(eq) = rest.find('=') {
|
|
let arr = rest[eq + 1..].trim();
|
|
for part in arr.trim_matches(&['[', ']'][..]).split(',') {
|
|
let v = part.trim().trim_matches('"');
|
|
if !v.is_empty() {
|
|
syntax_binops.push(v.to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if syntax_statements.is_empty() {
|
|
syntax_statements = vec![
|
|
"box".into(),
|
|
"global".into(),
|
|
"function".into(),
|
|
"static".into(),
|
|
"if".into(),
|
|
"loop".into(),
|
|
"break".into(),
|
|
"return".into(),
|
|
"print".into(),
|
|
"nowait".into(),
|
|
"include".into(),
|
|
"local".into(),
|
|
"outbox".into(),
|
|
"try".into(),
|
|
"throw".into(),
|
|
"using".into(),
|
|
"from".into(),
|
|
];
|
|
}
|
|
if syntax_binops.is_empty() {
|
|
syntax_binops = vec!["add".into(), "sub".into(), "mul".into(), "div".into()];
|
|
}
|
|
// Emit syntax arrays
|
|
code.push_str("\npub static SYNTAX_ALLOWED_STATEMENTS: &[&str] = &[\n");
|
|
for k in &syntax_statements {
|
|
code.push_str(&format!(" \"{}\",\n", k));
|
|
}
|
|
code.push_str("];");
|
|
code.push_str("\npub static SYNTAX_ALLOWED_BINOPS: &[&str] = &[\n");
|
|
for k in &syntax_binops {
|
|
code.push_str(&format!(" \"{}\",\n", k));
|
|
}
|
|
code.push_str("];");
|
|
|
|
fs::write(&out_file, code).expect("write generated.rs");
|
|
println!("cargo:rerun-if-changed={}", grammar_file.display());
|
|
}
|