Files
hakorune/src/tokenizer/lex_ident.rs

151 lines
5.5 KiB
Rust

use super::{NyashTokenizer, TokenType};
use crate::grammar::engine;
impl NyashTokenizer {
/// キーワードまたは識別子を読み取り
pub(crate) fn read_keyword_or_identifier(&mut self) -> TokenType {
let mut identifier = String::new();
while let Some(c) = self.current_char() {
if c.is_alphanumeric() || c == '_' {
identifier.push(c);
self.advance();
} else {
break;
}
}
// キーワードチェック
let mut tok = match identifier.as_str() {
"box" => TokenType::BOX,
"global" => TokenType::GLOBAL,
"singleton" => TokenType::SINGLETON,
"new" => TokenType::NEW,
"match" => TokenType::MATCH,
"if" => TokenType::IF,
"else" => TokenType::ELSE,
"loop" => TokenType::LOOP,
"break" => TokenType::BREAK,
"continue" => TokenType::CONTINUE,
"return" => TokenType::RETURN,
"function" => TokenType::FUNCTION,
"fn" => TokenType::FN,
"print" => TokenType::PRINT,
"this" => TokenType::THIS,
"me" => TokenType::ME,
"init" => TokenType::INIT,
"pack" => TokenType::PACK,
"birth" => TokenType::BIRTH,
"nowait" => TokenType::NOWAIT,
"await" => TokenType::AWAIT,
"interface" => TokenType::INTERFACE,
// "include" keyword removed (use `using` instead)
"import" => TokenType::IMPORT,
"try" => TokenType::TRY,
"catch" => TokenType::CATCH,
"cleanup" => TokenType::CLEANUP,
"throw" => TokenType::THROW,
"local" => TokenType::LOCAL,
"flow" => TokenType::FLOW,
"static" => TokenType::STATIC,
"outbox" => TokenType::OUTBOX,
"not" => TokenType::NOT,
"override" => TokenType::OVERRIDE,
"from" => TokenType::FROM,
"weak" => TokenType::WEAK,
"using" => TokenType::USING,
"and" => TokenType::AND,
"or" => TokenType::OR,
// Stage-3 loop keywords (gated below)
"while" => TokenType::WHILE,
"for" => TokenType::FOR,
"in" => TokenType::IN,
"true" => TokenType::TRUE,
"false" => TokenType::FALSE,
"null" => TokenType::NULL,
_ => TokenType::IDENTIFIER(identifier.clone()),
};
// Stage-3 gate: LOCAL/FLOW/TRY/CATCH/THROW require Stage-3 parser (default ON)
let stage3_enabled = crate::config::env::parser_stage3_enabled();
if !stage3_enabled {
let is_stage3 = matches!(
tok,
TokenType::LOCAL
| TokenType::FLOW
| TokenType::TRY
| TokenType::CATCH
| TokenType::THROW
| TokenType::WHILE
| TokenType::FOR
| TokenType::IN
);
if is_stage3 {
if std::env::var("NYASH_TOK_TRACE").ok().as_deref() == Some("1") {
eprintln!(
"[tok-stage3] Degrading {:?} to IDENTIFIER (stage3_enabled={})",
tok, stage3_enabled
);
}
tok = TokenType::IDENTIFIER(identifier.clone());
}
} else {
if std::env::var("NYASH_TOK_TRACE").ok().as_deref() == Some("1") {
let is_stage3 = matches!(
tok,
TokenType::LOCAL
| TokenType::FLOW
| TokenType::TRY
| TokenType::CATCH
| TokenType::THROW
| TokenType::WHILE
| TokenType::FOR
| TokenType::IN
);
if is_stage3 {
eprintln!(
"[tok-stage3] Keeping {:?} as keyword (stage3_enabled={})",
tok, stage3_enabled
);
}
}
}
// 12.7 Strict mode: fallback extended keywords to IDENTIFIER
if Self::strict_12_7() {
let is_extended = matches!(
tok,
TokenType::INTERFACE
| TokenType::USING
| TokenType::OUTBOX
| TokenType::NOWAIT
| TokenType::OVERRIDE
| TokenType::WEAK
| TokenType::PACK
);
if is_extended {
tok = TokenType::IDENTIFIER(identifier.clone());
}
}
// 統一文法エンジンとの差分チェック(動作は変更しない)
if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") {
if let Some(kw) = engine::get().is_keyword_str(&identifier) {
if let TokenType::IDENTIFIER(_) = tok {
eprintln!(
"[GRAMMAR-DIFF] tokenizer=IDENT, grammar=KEYWORD({}) word='{}'",
kw, identifier
);
}
} else if !matches!(tok, TokenType::IDENTIFIER(_)) {
eprintln!(
"[GRAMMAR-DIFF] tokenizer=KEYWORD, grammar=IDENT word='{}'",
identifier
);
}
}
tok
}
}