Files
hakorune/src/parser/mod.rs

430 lines
15 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*!
* Nyash Parser - Rust Implementation
*
* Python版nyashc_v4.pyのNyashParserをRustで完全再実装
* Token列をAST (Abstract Syntax Tree) に変換
*
* モジュール構造:
* - common.rs: 共通ユーティリティとトレイト (ParserUtils)
* - expressions.rs: 式パーサー (parse_expression, parse_or, parse_and等)
* - statements.rs: 文パーサー (parse_statement, parse_if, parse_loop等)
* - declarations/: Box宣言パーサー (box_definition, static_box, dependency_helpers)
* - items/: トップレベル宣言 (global_vars, functions, static_items)
*
* 2025-08-16: 大規模リファクタリング完了
* - 1530行 → 227行 (85%削減)
* - 機能ごとにモジュール分離で保守性向上
*/
// サブモジュール宣言
mod common;
mod cursor; // TokenCursor: 改行処理を一元管理
mod declarations;
// depth_tracking.rs was a legacy depth counter for Smart advance.
// Phase 15.5: removed in favor of TokenCursor-centric newline handling.
pub mod entry_sugar; // helper to parse with sugar level
mod expr;
mod expr_cursor; // TokenCursorを使用した式パーサー実験的
mod expressions;
mod items;
mod statements; // Now uses modular structure in statements/
pub mod sugar; // Phase 12.7-B: desugar pass (basic)
pub mod sugar_gate; // thread-local gate for sugar parsing (tests/docs)
// mod errors;
use common::ParserUtils;
use crate::ast::{ASTNode, Span};
use crate::tokenizer::{Token, TokenType, TokenizeError};
use thiserror::Error;
#[inline]
fn is_sugar_enabled() -> bool {
crate::parser::sugar_gate::is_enabled()
}
// ===== 🔥 Debug Macros =====
/// Infinite loop detection macro - must be called in every loop that advances tokens
/// Prevents parser from hanging due to token consumption bugs
/// Uses parser's debug_fuel field for centralized fuel management
#[macro_export]
macro_rules! must_advance {
($parser:expr, $fuel:expr, $location:literal) => {
// デバッグ燃料がSomeの場合のみ制限チェック
if let Some(ref mut limit) = $parser.debug_fuel {
if *limit == 0 {
eprintln!("🚨 PARSER INFINITE LOOP DETECTED at {}", $location);
eprintln!(
"🔍 Current token: {:?} at line {}",
$parser.current_token().token_type,
$parser.current_token().line
);
eprintln!(
"🔍 Parser position: {}/{}",
$parser.current,
$parser.tokens.len()
);
return Err($crate::parser::ParseError::InfiniteLoop {
location: $location.to_string(),
token: $parser.current_token().token_type.clone(),
line: $parser.current_token().line,
});
}
*limit -= 1;
}
// None の場合は無制限なのでチェックしない
};
}
/// Initialize debug fuel for loop monitoring
#[macro_export]
macro_rules! debug_fuel {
() => {
100_000 // Default: 100k iterations should be enough for any reasonable program
};
}
// Two-phase parser structures are no longer needed - simplified to direct parsing
/// パースエラー
#[derive(Error, Debug)]
pub enum ParseError {
#[error("Unexpected token {found:?}, expected {expected} at line {line}")]
UnexpectedToken {
found: TokenType,
expected: String,
line: usize,
},
#[error("Unexpected end of file")]
UnexpectedEOF,
#[error("Invalid expression at line {line}")]
InvalidExpression { line: usize },
#[error("Invalid statement at line {line}")]
InvalidStatement { line: usize },
#[error("Unsupported identifier '{name}' at line {line}")]
UnsupportedIdentifier { name: String, line: usize },
#[error("Circular dependency detected between static boxes: {cycle}")]
CircularDependency { cycle: String },
#[error("🚨 Infinite loop detected in parser at {location} - token: {token:?} at line {line}")]
InfiniteLoop {
location: String,
token: TokenType,
line: usize,
},
#[error("🔥 Transparency system removed: {suggestion} at line {line}")]
TransparencySystemRemoved { suggestion: String, line: usize },
#[error(
"Unsupported namespace '{name}' at line {line}. Only 'nyashstd' is supported in Phase 0."
)]
UnsupportedNamespace { name: String, line: usize },
#[error("Expected identifier at line {line}")]
ExpectedIdentifier { line: usize },
#[error("Tokenize error: {0}")]
TokenizeError(#[from] TokenizeError),
}
/// Nyashパーサー - トークン列をASTに変換
pub struct NyashParser {
pub(super) tokens: Vec<Token>,
pub(super) current: usize,
/// 🔥 Static box依存関係追跡循環依存検出用
pub(super) static_box_dependencies:
std::collections::HashMap<String, std::collections::HashSet<String>>,
/// 🔥 デバッグ燃料:無限ループ検出用制限値 (None = 無制限)
pub(super) debug_fuel: Option<usize>,
}
// ParserUtils trait implementation now lives here (legacy depth tracking removed)
impl NyashParser {
/// 新しいパーサーを作成
pub fn new(tokens: Vec<Token>) -> Self {
Self {
tokens,
current: 0,
static_box_dependencies: std::collections::HashMap::new(),
debug_fuel: Some(100_000), // デフォルト値
}
}
/// 文字列からパース (トークナイズ + パース)
/// Note: Reads parser_stage3_enabled() (NYASH_FEATURES=stage3 or legacy env) for using-chain parsing
pub fn parse_from_string(input: impl Into<String>) -> Result<ASTNode, ParseError> {
// Ensure Stage-3 features are enabled when parsing using-chain files
// when parent requested Stage-3 parsing via NYASH_FEATURES/legacy env
Self::parse_from_string_with_fuel(input, Some(100_000))
}
/// 文字列からパース (デバッグ燃料指定版)
/// fuel: Some(n) = n回まで、None = 無制限
pub fn parse_from_string_with_fuel(
input: impl Into<String>,
fuel: Option<usize>,
) -> Result<ASTNode, ParseError> {
// Normalize logical operators '||'/'&&' to 'or'/'and' before tokenization (outside strings/comments)
fn normalize_logical_ops(src: &str) -> String {
let mut out = String::with_capacity(src.len());
let mut it = src.chars().peekable();
let mut in_str = false;
let mut in_line = false;
let mut in_block = false;
while let Some(c) = it.next() {
if in_line {
out.push(c);
if c == '\n' {
in_line = false;
}
continue;
}
if in_block {
out.push(c);
if c == '*' && matches!(it.peek(), Some('/')) {
out.push('/');
it.next();
in_block = false;
}
continue;
}
if in_str {
out.push(c);
if c == '\\' {
if let Some(nc) = it.next() {
out.push(nc);
}
continue;
}
if c == '"' {
in_str = false;
}
continue;
}
match c {
'"' => {
in_str = true;
out.push(c);
}
'/' => match it.peek() {
Some('/') => {
out.push('/');
out.push('/');
it.next();
in_line = true;
}
Some('*') => {
out.push('/');
out.push('*');
it.next();
in_block = true;
}
_ => out.push('/'),
},
'#' => {
in_line = true;
out.push('#');
}
'|' => {
if matches!(it.peek(), Some('|')) {
out.push_str(" or ");
it.next();
} else if matches!(it.peek(), Some('>')) {
out.push('|');
out.push('>');
it.next();
} else {
out.push('|');
}
}
'&' => {
if matches!(it.peek(), Some('&')) {
out.push_str(" and ");
it.next();
} else {
out.push('&');
}
}
_ => out.push(c),
}
}
out
}
let input_s: String = input.into();
let pre = normalize_logical_ops(&input_s);
let mut tokenizer = crate::tokenizer::NyashTokenizer::new(pre);
let tokens = tokenizer.tokenize()?;
for tok in &tokens {
if let TokenType::IDENTIFIER(name) = &tok.token_type {
if name == "self" {
return Err(ParseError::UnsupportedIdentifier {
name: name.clone(),
line: tok.line,
});
}
}
}
let mut parser = Self::new(tokens);
parser.debug_fuel = fuel;
let result = parser.parse();
result
}
/// パース実行 - Program ASTを返す
pub fn parse(&mut self) -> Result<ASTNode, ParseError> {
self.parse_program()
}
// ===== パース関数群 =====
/// プログラム全体をパース
fn parse_program(&mut self) -> Result<ASTNode, ParseError> {
let mut statements = Vec::new();
let mut _statement_count = 0;
let allow_sc = std::env::var("NYASH_PARSER_ALLOW_SEMICOLON")
.ok()
.map(|v| {
let lv = v.to_ascii_lowercase();
!(lv == "0" || lv == "false" || lv == "off")
})
.unwrap_or(true);
while !self.is_at_end() {
// EOF tokenはスキップ
if matches!(self.current_token().token_type, TokenType::EOF) {
break;
}
// NEWLINE tokenはスキップ文の区切りとして使用
if matches!(self.current_token().token_type, TokenType::NEWLINE)
|| (allow_sc && matches!(self.current_token().token_type, TokenType::SEMICOLON))
{
self.advance();
continue;
}
let statement = self.parse_statement()?;
statements.push(statement);
_statement_count += 1;
}
// 🔥 すべてのstatic box解析後に循環依存検出
self.check_circular_dependencies()?;
Ok(ASTNode::Program {
statements,
span: Span::unknown(),
})
}
// Statement parsing methods are now in statements.rs module
/// 代入文または関数呼び出しをパース
fn parse_assignment_or_function_call(&mut self) -> Result<ASTNode, ParseError> {
// まず左辺を式としてパース
let expr = self.parse_expression()?;
// 次のトークンが = または 複合代入演算子 なら代入文
if self.match_token(&TokenType::ASSIGN) {
self.advance(); // consume '='
let value = Box::new(self.parse_expression()?);
// 左辺が代入可能な形式かチェック
match &expr {
ASTNode::Variable { .. } | ASTNode::FieldAccess { .. } | ASTNode::Index { .. } => {
Ok(ASTNode::Assignment {
target: Box::new(expr),
value,
span: Span::unknown(),
})
}
_ => {
let line = self.current_token().line;
Err(ParseError::InvalidStatement { line })
}
}
} else if self.match_token(&TokenType::PlusAssign)
|| self.match_token(&TokenType::MinusAssign)
|| self.match_token(&TokenType::MulAssign)
|| self.match_token(&TokenType::DivAssign)
{
if !is_sugar_enabled() {
let line = self.current_token().line;
return Err(ParseError::UnexpectedToken {
found: self.current_token().token_type.clone(),
expected: "enable NYASH_SYNTAX_SUGAR_LEVEL=basic|full for '+=' and friends"
.to_string(),
line,
});
}
// determine operator
let op = match &self.current_token().token_type {
TokenType::PlusAssign => crate::ast::BinaryOperator::Add,
TokenType::MinusAssign => crate::ast::BinaryOperator::Subtract,
TokenType::MulAssign => crate::ast::BinaryOperator::Multiply,
TokenType::DivAssign => crate::ast::BinaryOperator::Divide,
_ => unreachable!(),
};
self.advance(); // consume 'op='
let rhs = self.parse_expression()?;
// 左辺が代入可能な形式かチェック
match &expr {
ASTNode::Variable { .. } | ASTNode::FieldAccess { .. } => {
let left_clone = expr.clone();
let value = ASTNode::BinaryOp {
operator: op,
left: Box::new(left_clone),
right: Box::new(rhs),
span: Span::unknown(),
};
Ok(ASTNode::Assignment {
target: Box::new(expr),
value: Box::new(value),
span: Span::unknown(),
})
}
_ => {
let line = self.current_token().line;
Err(ParseError::InvalidStatement { line })
}
}
} else {
// 代入文でなければ式文として返す
Ok(expr)
}
}
// Expression parsing methods are now in expressions.rs module
// Utility methods are now in common.rs module via ParserUtils trait
// Item parsing methods are now in items.rs module
// ===== 🔥 Static Box循環依存検出 =====
}
// ---- Minimal ParserUtils impl (depth-less; TokenCursor handles newline policy) ----
impl common::ParserUtils for NyashParser {
fn tokens(&self) -> &Vec<Token> {
&self.tokens
}
fn current(&self) -> usize {
self.current
}
fn current_mut(&mut self) -> &mut usize {
&mut self.current
}
fn update_depth_before_advance(&mut self) { /* no-op (legacy removed) */
}
fn update_depth_after_advance(&mut self) { /* no-op (legacy removed) */
}
}