430 lines
15 KiB
Rust
430 lines
15 KiB
Rust
/*!
|
||
* Nyash Parser - Rust Implementation
|
||
*
|
||
* Python版nyashc_v4.pyのNyashParserをRustで完全再実装
|
||
* Token列をAST (Abstract Syntax Tree) に変換
|
||
*
|
||
* モジュール構造:
|
||
* - common.rs: 共通ユーティリティとトレイト (ParserUtils)
|
||
* - expressions.rs: 式パーサー (parse_expression, parse_or, parse_and等)
|
||
* - statements.rs: 文パーサー (parse_statement, parse_if, parse_loop等)
|
||
* - declarations/: Box宣言パーサー (box_definition, static_box, dependency_helpers)
|
||
* - items/: トップレベル宣言 (global_vars, functions, static_items)
|
||
*
|
||
* 2025-08-16: 大規模リファクタリング完了
|
||
* - 1530行 → 227行 (85%削減)
|
||
* - 機能ごとにモジュール分離で保守性向上
|
||
*/
|
||
|
||
// サブモジュール宣言
|
||
mod common;
|
||
mod cursor; // TokenCursor: 改行処理を一元管理
|
||
mod declarations;
|
||
// depth_tracking.rs was a legacy depth counter for Smart advance.
|
||
// Phase 15.5: removed in favor of TokenCursor-centric newline handling.
|
||
pub mod entry_sugar; // helper to parse with sugar level
|
||
mod expr;
|
||
mod expr_cursor; // TokenCursorを使用した式パーサー(実験的)
|
||
mod expressions;
|
||
mod items;
|
||
mod statements; // Now uses modular structure in statements/
|
||
pub mod sugar; // Phase 12.7-B: desugar pass (basic)
|
||
pub mod sugar_gate; // thread-local gate for sugar parsing (tests/docs)
|
||
// mod errors;
|
||
|
||
use common::ParserUtils;
|
||
|
||
use crate::ast::{ASTNode, Span};
|
||
use crate::tokenizer::{Token, TokenType, TokenizeError};
|
||
use thiserror::Error;
|
||
|
||
#[inline]
|
||
fn is_sugar_enabled() -> bool {
|
||
crate::parser::sugar_gate::is_enabled()
|
||
}
|
||
|
||
// ===== 🔥 Debug Macros =====
|
||
|
||
/// Infinite loop detection macro - must be called in every loop that advances tokens
|
||
/// Prevents parser from hanging due to token consumption bugs
|
||
/// Uses parser's debug_fuel field for centralized fuel management
|
||
#[macro_export]
|
||
macro_rules! must_advance {
|
||
($parser:expr, $fuel:expr, $location:literal) => {
|
||
// デバッグ燃料がSomeの場合のみ制限チェック
|
||
if let Some(ref mut limit) = $parser.debug_fuel {
|
||
if *limit == 0 {
|
||
eprintln!("🚨 PARSER INFINITE LOOP DETECTED at {}", $location);
|
||
eprintln!(
|
||
"🔍 Current token: {:?} at line {}",
|
||
$parser.current_token().token_type,
|
||
$parser.current_token().line
|
||
);
|
||
eprintln!(
|
||
"🔍 Parser position: {}/{}",
|
||
$parser.current,
|
||
$parser.tokens.len()
|
||
);
|
||
return Err($crate::parser::ParseError::InfiniteLoop {
|
||
location: $location.to_string(),
|
||
token: $parser.current_token().token_type.clone(),
|
||
line: $parser.current_token().line,
|
||
});
|
||
}
|
||
*limit -= 1;
|
||
}
|
||
// None の場合は無制限なのでチェックしない
|
||
};
|
||
}
|
||
|
||
/// Initialize debug fuel for loop monitoring
|
||
#[macro_export]
|
||
macro_rules! debug_fuel {
|
||
() => {
|
||
100_000 // Default: 100k iterations should be enough for any reasonable program
|
||
};
|
||
}
|
||
|
||
// Two-phase parser structures are no longer needed - simplified to direct parsing
|
||
|
||
/// パースエラー
|
||
#[derive(Error, Debug)]
|
||
pub enum ParseError {
|
||
#[error("Unexpected token {found:?}, expected {expected} at line {line}")]
|
||
UnexpectedToken {
|
||
found: TokenType,
|
||
expected: String,
|
||
line: usize,
|
||
},
|
||
|
||
#[error("Unexpected end of file")]
|
||
UnexpectedEOF,
|
||
|
||
#[error("Invalid expression at line {line}")]
|
||
InvalidExpression { line: usize },
|
||
|
||
#[error("Invalid statement at line {line}")]
|
||
InvalidStatement { line: usize },
|
||
|
||
#[error("Unsupported identifier '{name}' at line {line}")]
|
||
UnsupportedIdentifier { name: String, line: usize },
|
||
|
||
#[error("Circular dependency detected between static boxes: {cycle}")]
|
||
CircularDependency { cycle: String },
|
||
|
||
#[error("🚨 Infinite loop detected in parser at {location} - token: {token:?} at line {line}")]
|
||
InfiniteLoop {
|
||
location: String,
|
||
token: TokenType,
|
||
line: usize,
|
||
},
|
||
|
||
#[error("🔥 Transparency system removed: {suggestion} at line {line}")]
|
||
TransparencySystemRemoved { suggestion: String, line: usize },
|
||
|
||
#[error(
|
||
"Unsupported namespace '{name}' at line {line}. Only 'nyashstd' is supported in Phase 0."
|
||
)]
|
||
UnsupportedNamespace { name: String, line: usize },
|
||
|
||
#[error("Expected identifier at line {line}")]
|
||
ExpectedIdentifier { line: usize },
|
||
|
||
#[error("Tokenize error: {0}")]
|
||
TokenizeError(#[from] TokenizeError),
|
||
}
|
||
|
||
/// Nyashパーサー - トークン列をASTに変換
|
||
pub struct NyashParser {
|
||
pub(super) tokens: Vec<Token>,
|
||
pub(super) current: usize,
|
||
/// 🔥 Static box依存関係追跡(循環依存検出用)
|
||
pub(super) static_box_dependencies:
|
||
std::collections::HashMap<String, std::collections::HashSet<String>>,
|
||
/// 🔥 デバッグ燃料:無限ループ検出用制限値 (None = 無制限)
|
||
pub(super) debug_fuel: Option<usize>,
|
||
}
|
||
|
||
// ParserUtils trait implementation now lives here (legacy depth tracking removed)
|
||
|
||
impl NyashParser {
|
||
/// 新しいパーサーを作成
|
||
pub fn new(tokens: Vec<Token>) -> Self {
|
||
Self {
|
||
tokens,
|
||
current: 0,
|
||
static_box_dependencies: std::collections::HashMap::new(),
|
||
debug_fuel: Some(100_000), // デフォルト値
|
||
}
|
||
}
|
||
|
||
/// 文字列からパース (トークナイズ + パース)
|
||
/// Note: Reads parser_stage3_enabled() (NYASH_FEATURES=stage3 or legacy env) for using-chain parsing
|
||
pub fn parse_from_string(input: impl Into<String>) -> Result<ASTNode, ParseError> {
|
||
// Ensure Stage-3 features are enabled when parsing using-chain files
|
||
// when parent requested Stage-3 parsing via NYASH_FEATURES/legacy env
|
||
Self::parse_from_string_with_fuel(input, Some(100_000))
|
||
}
|
||
|
||
/// 文字列からパース (デバッグ燃料指定版)
|
||
/// fuel: Some(n) = n回まで、None = 無制限
|
||
pub fn parse_from_string_with_fuel(
|
||
input: impl Into<String>,
|
||
fuel: Option<usize>,
|
||
) -> Result<ASTNode, ParseError> {
|
||
// Normalize logical operators '||'/'&&' to 'or'/'and' before tokenization (outside strings/comments)
|
||
fn normalize_logical_ops(src: &str) -> String {
|
||
let mut out = String::with_capacity(src.len());
|
||
let mut it = src.chars().peekable();
|
||
let mut in_str = false;
|
||
let mut in_line = false;
|
||
let mut in_block = false;
|
||
while let Some(c) = it.next() {
|
||
if in_line {
|
||
out.push(c);
|
||
if c == '\n' {
|
||
in_line = false;
|
||
}
|
||
continue;
|
||
}
|
||
if in_block {
|
||
out.push(c);
|
||
if c == '*' && matches!(it.peek(), Some('/')) {
|
||
out.push('/');
|
||
it.next();
|
||
in_block = false;
|
||
}
|
||
continue;
|
||
}
|
||
if in_str {
|
||
out.push(c);
|
||
if c == '\\' {
|
||
if let Some(nc) = it.next() {
|
||
out.push(nc);
|
||
}
|
||
continue;
|
||
}
|
||
if c == '"' {
|
||
in_str = false;
|
||
}
|
||
continue;
|
||
}
|
||
match c {
|
||
'"' => {
|
||
in_str = true;
|
||
out.push(c);
|
||
}
|
||
'/' => match it.peek() {
|
||
Some('/') => {
|
||
out.push('/');
|
||
out.push('/');
|
||
it.next();
|
||
in_line = true;
|
||
}
|
||
Some('*') => {
|
||
out.push('/');
|
||
out.push('*');
|
||
it.next();
|
||
in_block = true;
|
||
}
|
||
_ => out.push('/'),
|
||
},
|
||
'#' => {
|
||
in_line = true;
|
||
out.push('#');
|
||
}
|
||
'|' => {
|
||
if matches!(it.peek(), Some('|')) {
|
||
out.push_str(" or ");
|
||
it.next();
|
||
} else if matches!(it.peek(), Some('>')) {
|
||
out.push('|');
|
||
out.push('>');
|
||
it.next();
|
||
} else {
|
||
out.push('|');
|
||
}
|
||
}
|
||
'&' => {
|
||
if matches!(it.peek(), Some('&')) {
|
||
out.push_str(" and ");
|
||
it.next();
|
||
} else {
|
||
out.push('&');
|
||
}
|
||
}
|
||
_ => out.push(c),
|
||
}
|
||
}
|
||
out
|
||
}
|
||
let input_s: String = input.into();
|
||
let pre = normalize_logical_ops(&input_s);
|
||
let mut tokenizer = crate::tokenizer::NyashTokenizer::new(pre);
|
||
let tokens = tokenizer.tokenize()?;
|
||
|
||
for tok in &tokens {
|
||
if let TokenType::IDENTIFIER(name) = &tok.token_type {
|
||
if name == "self" {
|
||
return Err(ParseError::UnsupportedIdentifier {
|
||
name: name.clone(),
|
||
line: tok.line,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
let mut parser = Self::new(tokens);
|
||
parser.debug_fuel = fuel;
|
||
let result = parser.parse();
|
||
result
|
||
}
|
||
|
||
/// パース実行 - Program ASTを返す
|
||
pub fn parse(&mut self) -> Result<ASTNode, ParseError> {
|
||
self.parse_program()
|
||
}
|
||
|
||
// ===== パース関数群 =====
|
||
|
||
/// プログラム全体をパース
|
||
fn parse_program(&mut self) -> Result<ASTNode, ParseError> {
|
||
let mut statements = Vec::new();
|
||
let mut _statement_count = 0;
|
||
|
||
let allow_sc = std::env::var("NYASH_PARSER_ALLOW_SEMICOLON")
|
||
.ok()
|
||
.map(|v| {
|
||
let lv = v.to_ascii_lowercase();
|
||
!(lv == "0" || lv == "false" || lv == "off")
|
||
})
|
||
.unwrap_or(true);
|
||
|
||
while !self.is_at_end() {
|
||
// EOF tokenはスキップ
|
||
if matches!(self.current_token().token_type, TokenType::EOF) {
|
||
break;
|
||
}
|
||
|
||
// NEWLINE tokenはスキップ(文の区切りとして使用)
|
||
if matches!(self.current_token().token_type, TokenType::NEWLINE)
|
||
|| (allow_sc && matches!(self.current_token().token_type, TokenType::SEMICOLON))
|
||
{
|
||
self.advance();
|
||
continue;
|
||
}
|
||
|
||
let statement = self.parse_statement()?;
|
||
statements.push(statement);
|
||
_statement_count += 1;
|
||
}
|
||
|
||
// 🔥 すべてのstatic box解析後に循環依存検出
|
||
self.check_circular_dependencies()?;
|
||
|
||
Ok(ASTNode::Program {
|
||
statements,
|
||
span: Span::unknown(),
|
||
})
|
||
}
|
||
// Statement parsing methods are now in statements.rs module
|
||
|
||
/// 代入文または関数呼び出しをパース
|
||
fn parse_assignment_or_function_call(&mut self) -> Result<ASTNode, ParseError> {
|
||
// まず左辺を式としてパース
|
||
let expr = self.parse_expression()?;
|
||
|
||
// 次のトークンが = または 複合代入演算子 なら代入文
|
||
if self.match_token(&TokenType::ASSIGN) {
|
||
self.advance(); // consume '='
|
||
let value = Box::new(self.parse_expression()?);
|
||
|
||
// 左辺が代入可能な形式かチェック
|
||
match &expr {
|
||
ASTNode::Variable { .. } | ASTNode::FieldAccess { .. } | ASTNode::Index { .. } => {
|
||
Ok(ASTNode::Assignment {
|
||
target: Box::new(expr),
|
||
value,
|
||
span: Span::unknown(),
|
||
})
|
||
}
|
||
_ => {
|
||
let line = self.current_token().line;
|
||
Err(ParseError::InvalidStatement { line })
|
||
}
|
||
}
|
||
} else if self.match_token(&TokenType::PlusAssign)
|
||
|| self.match_token(&TokenType::MinusAssign)
|
||
|| self.match_token(&TokenType::MulAssign)
|
||
|| self.match_token(&TokenType::DivAssign)
|
||
{
|
||
if !is_sugar_enabled() {
|
||
let line = self.current_token().line;
|
||
return Err(ParseError::UnexpectedToken {
|
||
found: self.current_token().token_type.clone(),
|
||
expected: "enable NYASH_SYNTAX_SUGAR_LEVEL=basic|full for '+=' and friends"
|
||
.to_string(),
|
||
line,
|
||
});
|
||
}
|
||
// determine operator
|
||
let op = match &self.current_token().token_type {
|
||
TokenType::PlusAssign => crate::ast::BinaryOperator::Add,
|
||
TokenType::MinusAssign => crate::ast::BinaryOperator::Subtract,
|
||
TokenType::MulAssign => crate::ast::BinaryOperator::Multiply,
|
||
TokenType::DivAssign => crate::ast::BinaryOperator::Divide,
|
||
_ => unreachable!(),
|
||
};
|
||
self.advance(); // consume 'op='
|
||
let rhs = self.parse_expression()?;
|
||
// 左辺が代入可能な形式かチェック
|
||
match &expr {
|
||
ASTNode::Variable { .. } | ASTNode::FieldAccess { .. } => {
|
||
let left_clone = expr.clone();
|
||
let value = ASTNode::BinaryOp {
|
||
operator: op,
|
||
left: Box::new(left_clone),
|
||
right: Box::new(rhs),
|
||
span: Span::unknown(),
|
||
};
|
||
Ok(ASTNode::Assignment {
|
||
target: Box::new(expr),
|
||
value: Box::new(value),
|
||
span: Span::unknown(),
|
||
})
|
||
}
|
||
_ => {
|
||
let line = self.current_token().line;
|
||
Err(ParseError::InvalidStatement { line })
|
||
}
|
||
}
|
||
} else {
|
||
// 代入文でなければ式文として返す
|
||
Ok(expr)
|
||
}
|
||
}
|
||
|
||
// Expression parsing methods are now in expressions.rs module
|
||
// Utility methods are now in common.rs module via ParserUtils trait
|
||
// Item parsing methods are now in items.rs module
|
||
|
||
// ===== 🔥 Static Box循環依存検出 =====
|
||
}
|
||
|
||
// ---- Minimal ParserUtils impl (depth-less; TokenCursor handles newline policy) ----
|
||
impl common::ParserUtils for NyashParser {
|
||
fn tokens(&self) -> &Vec<Token> {
|
||
&self.tokens
|
||
}
|
||
fn current(&self) -> usize {
|
||
self.current
|
||
}
|
||
fn current_mut(&mut self) -> &mut usize {
|
||
&mut self.current
|
||
}
|
||
fn update_depth_before_advance(&mut self) { /* no-op (legacy removed) */
|
||
}
|
||
fn update_depth_after_advance(&mut self) { /* no-op (legacy removed) */
|
||
}
|
||
}
|