2025-08-09 15:14:44 +09:00
/*!
* Nyash Parser - Rust Implementation
2025-09-17 07:43:07 +09:00
*
2025-08-09 15:14:44 +09:00
* Python版nyashc_v4 . pyのNyashParserをRustで完全再実装
* Token列をAST ( Abstract Syntax Tree ) に 変 換
2025-09-17 07:43:07 +09:00
*
2025-08-16 12:26:17 +09:00
* モ ジ ュ ー ル 構 造 :
* - common . rs : 共 通 ユ ー テ ィ リ テ ィ と ト レ イ ト ( ParserUtils )
2025-08-09 15:14:44 +09:00
* - expressions . rs : 式 パ ー サ ー ( parse_expression , parse_or , parse_and等 )
* - statements . rs : 文 パ ー サ ー ( parse_statement , parse_if , parse_loop等 )
2025-08-16 12:26:17 +09:00
* - declarations / : Box宣言パーサー ( box_definition , static_box , dependency_helpers )
* - items / : ト ッ プ レ ベ ル 宣 言 ( global_vars , functions , static_items )
2025-09-17 07:43:07 +09:00
*
2025-08-16 12:26:17 +09:00
* 2025 - 08 - 16 : 大 規 模 リ フ ァ ク タ リ ン グ 完 了
* - 1530 行 → 227 行 ( 85 % 削 減 )
* - 機 能 ご と に モ ジ ュ ー ル 分 離 で 保 守 性 向 上
2025-08-09 15:14:44 +09:00
* /
// サブモジュール宣言
2025-08-16 11:35:57 +09:00
mod common ;
2025-09-23 10:24:40 +09:00
mod cursor ; // TokenCursor: 改行処理を一元管理
2025-08-16 12:19:58 +09:00
mod declarations ;
2025-09-25 09:01:55 +09:00
// depth_tracking.rs was a legacy depth counter for Smart advance.
// Phase 15.5: removed in favor of TokenCursor-centric newline handling.
2025-09-17 07:43:07 +09:00
pub mod entry_sugar ; // helper to parse with sugar level
mod expr ;
2025-09-23 10:24:40 +09:00
mod expr_cursor ; // TokenCursorを使用した式パーサー( 実験的)
2025-09-17 07:43:07 +09:00
mod expressions ;
2025-08-16 12:24:23 +09:00
mod items ;
2025-09-25 09:01:55 +09:00
mod statements ; // Now uses modular structure in statements/
2025-09-05 05:16:21 +09:00
pub mod sugar ; // Phase 12.7-B: desugar pass (basic)
pub mod sugar_gate ; // thread-local gate for sugar parsing (tests/docs)
2025-09-17 07:43:07 +09:00
// mod errors;
2025-08-09 15:14:44 +09:00
2025-08-16 11:35:57 +09:00
use common ::ParserUtils ;
2025-08-09 15:14:44 +09:00
use crate ::ast ::{ ASTNode , Span } ;
2025-09-17 07:43:07 +09:00
use crate ::tokenizer ::{ Token , TokenType , TokenizeError } ;
2025-08-09 15:14:44 +09:00
use thiserror ::Error ;
2025-09-05 05:16:21 +09:00
#[ inline ]
2025-09-17 07:43:07 +09:00
fn is_sugar_enabled ( ) -> bool {
crate ::parser ::sugar_gate ::is_enabled ( )
}
2025-09-05 05:16:21 +09:00
2025-08-10 11:32:32 +09:00
// ===== 🔥 Debug Macros =====
/// Infinite loop detection macro - must be called in every loop that advances tokens
/// Prevents parser from hanging due to token consumption bugs
/// Uses parser's debug_fuel field for centralized fuel management
2025-08-16 11:35:57 +09:00
#[ macro_export ]
2025-08-10 11:32:32 +09:00
macro_rules ! must_advance {
( $parser :expr , $fuel :expr , $location :literal ) = > {
// デバッグ燃料がSomeの場合のみ制限チェック
if let Some ( ref mut limit ) = $parser . debug_fuel {
if * limit = = 0 {
eprintln! ( " 🚨 PARSER INFINITE LOOP DETECTED at {} " , $location ) ;
2025-09-17 07:43:07 +09:00
eprintln! (
" 🔍 Current token: {:?} at line {} " ,
$parser . current_token ( ) . token_type ,
$parser . current_token ( ) . line
) ;
eprintln! (
" 🔍 Parser position: {}/{} " ,
$parser . current ,
$parser . tokens . len ( )
) ;
return Err ( $crate ::parser ::ParseError ::InfiniteLoop {
2025-08-10 11:32:32 +09:00
location : $location . to_string ( ) ,
token : $parser . current_token ( ) . token_type . clone ( ) ,
line : $parser . current_token ( ) . line ,
} ) ;
}
* limit - = 1 ;
}
// None の場合は無制限なのでチェックしない
} ;
}
/// Initialize debug fuel for loop monitoring
2025-08-16 11:35:57 +09:00
#[ macro_export ]
2025-08-10 11:32:32 +09:00
macro_rules ! debug_fuel {
( ) = > {
100_000 // Default: 100k iterations should be enough for any reasonable program
} ;
}
2025-08-09 15:14:44 +09:00
// Two-phase parser structures are no longer needed - simplified to direct parsing
/// パースエラー
#[ derive(Error, Debug) ]
pub enum ParseError {
#[ error( " Unexpected token {found:?}, expected {expected} at line {line} " ) ]
2025-09-17 07:43:07 +09:00
UnexpectedToken {
found : TokenType ,
expected : String ,
line : usize ,
} ,
2025-08-09 15:14:44 +09:00
#[ error( " Unexpected end of file " ) ]
UnexpectedEOF ,
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
#[ error( " Invalid expression at line {line} " ) ]
InvalidExpression { line : usize } ,
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
#[ error( " Invalid statement at line {line} " ) ]
InvalidStatement { line : usize } ,
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
#[ error( " Circular dependency detected between static boxes: {cycle} " ) ]
CircularDependency { cycle : String } ,
2025-09-17 07:43:07 +09:00
2025-08-10 11:32:32 +09:00
#[ error( " 🚨 Infinite loop detected in parser at {location} - token: {token:?} at line {line} " ) ]
2025-09-17 07:43:07 +09:00
InfiniteLoop {
location : String ,
token : TokenType ,
line : usize ,
} ,
2025-08-15 11:47:46 +00:00
#[ error( " 🔥 Transparency system removed: {suggestion} at line {line} " ) ]
TransparencySystemRemoved { suggestion : String , line : usize } ,
2025-09-17 07:43:07 +09:00
#[ error(
" Unsupported namespace '{name}' at line {line}. Only 'nyashstd' is supported in Phase 0. "
) ]
2025-08-16 01:12:10 +09:00
UnsupportedNamespace { name : String , line : usize } ,
2025-09-17 07:43:07 +09:00
2025-08-16 01:12:10 +09:00
#[ error( " Expected identifier at line {line} " ) ]
ExpectedIdentifier { line : usize } ,
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
#[ error( " Tokenize error: {0} " ) ]
TokenizeError ( #[ from ] TokenizeError ) ,
}
/// Nyashパーサー - トークン列をASTに変換
pub struct NyashParser {
2025-08-16 11:35:57 +09:00
pub ( super ) tokens : Vec < Token > ,
pub ( super ) current : usize ,
2025-08-09 15:14:44 +09:00
/// 🔥 Static box依存関係追跡( 循環依存検出用)
2025-09-17 07:43:07 +09:00
pub ( super ) static_box_dependencies :
std ::collections ::HashMap < String , std ::collections ::HashSet < String > > ,
2025-08-10 11:32:32 +09:00
/// 🔥 デバッグ燃料:無限ループ検出用制限値 (None = 無制限)
2025-08-16 11:35:57 +09:00
pub ( super ) debug_fuel : Option < usize > ,
}
2025-09-25 09:01:55 +09:00
// ParserUtils trait implementation now lives here (legacy depth tracking removed)
2025-08-09 15:14:44 +09:00
impl NyashParser {
/// 新しいパーサーを作成
pub fn new ( tokens : Vec < Token > ) -> Self {
Self {
tokens ,
current : 0 ,
static_box_dependencies : std ::collections ::HashMap ::new ( ) ,
2025-08-10 11:32:32 +09:00
debug_fuel : Some ( 100_000 ) , // デフォルト値
2025-08-09 15:14:44 +09:00
}
}
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
/// 文字列からパース (トークナイズ + パース)
pub fn parse_from_string ( input : impl Into < String > ) -> Result < ASTNode , ParseError > {
2025-08-10 11:32:32 +09:00
Self ::parse_from_string_with_fuel ( input , Some ( 100_000 ) )
}
2025-09-17 07:43:07 +09:00
2025-08-10 11:32:32 +09:00
/// 文字列からパース (デバッグ燃料指定版)
/// fuel: Some(n) = n回まで、None = 無制限
2025-09-17 07:43:07 +09:00
pub fn parse_from_string_with_fuel (
input : impl Into < String > ,
fuel : Option < usize > ,
) -> Result < ASTNode , ParseError > {
2025-09-22 07:54:25 +09:00
// Normalize logical operators '||'/'&&' to 'or'/'and' before tokenization (outside strings/comments)
fn normalize_logical_ops ( src : & str ) -> String {
let mut out = String ::with_capacity ( src . len ( ) ) ;
let mut it = src . chars ( ) . peekable ( ) ;
let mut in_str = false ;
let mut in_line = false ;
let mut in_block = false ;
while let Some ( c ) = it . next ( ) {
if in_line {
out . push ( c ) ;
if c = = '\n' { in_line = false ; }
continue ;
}
if in_block {
out . push ( c ) ;
if c = = '*' & & matches! ( it . peek ( ) , Some ( '/' ) ) { out . push ( '/' ) ; it . next ( ) ; in_block = false ; }
continue ;
}
if in_str {
out . push ( c ) ;
if c = = '\\' { if let Some ( nc ) = it . next ( ) { out . push ( nc ) ; } continue ; }
if c = = '"' { in_str = false ; }
continue ;
}
match c {
'"' = > { in_str = true ; out . push ( c ) ; }
'/' = > {
match it . peek ( ) { Some ( '/' ) = > { out . push ( '/' ) ; out . push ( '/' ) ; it . next ( ) ; in_line = true ; } , Some ( '*' ) = > { out . push ( '/' ) ; out . push ( '*' ) ; it . next ( ) ; in_block = true ; } , _ = > out . push ( '/' ) }
}
'#' = > { in_line = true ; out . push ( '#' ) ; }
'|' = > {
if matches! ( it . peek ( ) , Some ( '|' ) ) { out . push_str ( " or " ) ; it . next ( ) ; } else if matches! ( it . peek ( ) , Some ( '>' ) ) { out . push ( '|' ) ; out . push ( '>' ) ; it . next ( ) ; } else { out . push ( '|' ) ; }
}
'&' = > {
if matches! ( it . peek ( ) , Some ( '&' ) ) { out . push_str ( " and " ) ; it . next ( ) ; } else { out . push ( '&' ) ; }
}
_ = > out . push ( c ) ,
}
}
out
}
let input_s : String = input . into ( ) ;
let pre = normalize_logical_ops ( & input_s ) ;
let mut tokenizer = crate ::tokenizer ::NyashTokenizer ::new ( pre ) ;
2025-08-09 15:14:44 +09:00
let tokens = tokenizer . tokenize ( ) ? ;
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
let mut parser = Self ::new ( tokens ) ;
2025-08-10 11:32:32 +09:00
parser . debug_fuel = fuel ;
2025-08-10 07:54:03 +09:00
let result = parser . parse ( ) ;
result
2025-08-09 15:14:44 +09:00
}
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
/// パース実行 - Program ASTを返す
pub fn parse ( & mut self ) -> Result < ASTNode , ParseError > {
self . parse_program ( )
}
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
// ===== パース関数群 =====
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
/// プログラム全体をパース
fn parse_program ( & mut self ) -> Result < ASTNode , ParseError > {
let mut statements = Vec ::new ( ) ;
2025-08-16 17:39:04 +09:00
let mut _statement_count = 0 ;
2025-09-17 07:43:07 +09:00
2025-09-21 08:53:00 +09:00
let allow_sc = std ::env ::var ( " NYASH_PARSER_ALLOW_SEMICOLON " ) . ok ( ) . map ( | v | {
let lv = v . to_ascii_lowercase ( ) ;
2025-11-01 13:28:56 +09:00
! ( lv = = " 0 " | | lv = = " false " | | lv = = " off " )
} ) . unwrap_or ( true ) ;
2025-09-21 08:53:00 +09:00
2025-08-09 15:14:44 +09:00
while ! self . is_at_end ( ) {
// EOF tokenはスキップ
if matches! ( self . current_token ( ) . token_type , TokenType ::EOF ) {
break ;
}
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
// NEWLINE tokenはスキップ( 文の区切りとして使用)
2025-09-21 08:53:00 +09:00
if matches! ( self . current_token ( ) . token_type , TokenType ::NEWLINE )
| | ( allow_sc & & matches! ( self . current_token ( ) . token_type , TokenType ::SEMICOLON ) )
{
2025-08-09 15:14:44 +09:00
self . advance ( ) ;
continue ;
}
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
let statement = self . parse_statement ( ) ? ;
statements . push ( statement ) ;
2025-08-16 17:39:04 +09:00
_statement_count + = 1 ;
2025-08-09 15:14:44 +09:00
}
2025-09-17 07:43:07 +09:00
2025-08-09 15:14:44 +09:00
// 🔥 すべてのstatic box解析後に循環依存検出
self . check_circular_dependencies ( ) ? ;
2025-09-17 07:43:07 +09:00
Ok ( ASTNode ::Program {
statements ,
span : Span ::unknown ( ) ,
} )
2025-08-09 15:14:44 +09:00
}
// Statement parsing methods are now in statements.rs module
2025-09-17 07:43:07 +09:00
2025-08-16 12:19:58 +09:00
/// 代入文または関数呼び出しをパース
fn parse_assignment_or_function_call ( & mut self ) -> Result < ASTNode , ParseError > {
// まず左辺を式としてパース
let expr = self . parse_expression ( ) ? ;
2025-09-17 07:43:07 +09:00
2025-09-05 05:16:21 +09:00
// 次のトークンが = または 複合代入演算子 なら代入文
2025-08-16 12:19:58 +09:00
if self . match_token ( & TokenType ::ASSIGN ) {
self . advance ( ) ; // consume '='
let value = Box ::new ( self . parse_expression ( ) ? ) ;
2025-09-17 07:43:07 +09:00
2025-08-16 12:19:58 +09:00
// 左辺が代入可能な形式かチェック
match & expr {
2025-10-31 20:18:39 +09:00
ASTNode ::Variable { .. }
| ASTNode ::FieldAccess { .. }
| ASTNode ::Index { .. } = > Ok ( ASTNode ::Assignment {
2025-09-17 07:43:07 +09:00
target : Box ::new ( expr ) ,
value ,
span : Span ::unknown ( ) ,
} ) ,
2025-08-16 12:19:58 +09:00
_ = > {
2025-08-09 15:14:44 +09:00
let line = self . current_token ( ) . line ;
2025-08-16 12:19:58 +09:00
Err ( ParseError ::InvalidStatement { line } )
2025-08-09 15:14:44 +09:00
}
}
2025-09-17 07:43:07 +09:00
} else if self . match_token ( & TokenType ::PlusAssign )
| | self . match_token ( & TokenType ::MinusAssign )
| | self . match_token ( & TokenType ::MulAssign )
| | self . match_token ( & TokenType ::DivAssign )
{
2025-09-05 05:16:21 +09:00
if ! is_sugar_enabled ( ) {
let line = self . current_token ( ) . line ;
return Err ( ParseError ::UnexpectedToken {
found : self . current_token ( ) . token_type . clone ( ) ,
2025-09-17 07:43:07 +09:00
expected : " enable NYASH_SYNTAX_SUGAR_LEVEL=basic|full for '+=' and friends "
. to_string ( ) ,
2025-09-05 05:16:21 +09:00
line ,
} ) ;
}
// determine operator
let op = match & self . current_token ( ) . token_type {
2025-09-11 16:24:18 +09:00
TokenType ::PlusAssign = > crate ::ast ::BinaryOperator ::Add ,
TokenType ::MinusAssign = > crate ::ast ::BinaryOperator ::Subtract ,
TokenType ::MulAssign = > crate ::ast ::BinaryOperator ::Multiply ,
TokenType ::DivAssign = > crate ::ast ::BinaryOperator ::Divide ,
2025-09-05 05:16:21 +09:00
_ = > unreachable! ( ) ,
} ;
self . advance ( ) ; // consume 'op='
let rhs = self . parse_expression ( ) ? ;
// 左辺が代入可能な形式かチェック
match & expr {
ASTNode ::Variable { .. } | ASTNode ::FieldAccess { .. } = > {
let left_clone = expr . clone ( ) ;
2025-09-17 07:43:07 +09:00
let value = ASTNode ::BinaryOp {
operator : op ,
left : Box ::new ( left_clone ) ,
right : Box ::new ( rhs ) ,
span : Span ::unknown ( ) ,
} ;
Ok ( ASTNode ::Assignment {
target : Box ::new ( expr ) ,
value : Box ::new ( value ) ,
span : Span ::unknown ( ) ,
} )
2025-09-05 05:16:21 +09:00
}
_ = > {
let line = self . current_token ( ) . line ;
Err ( ParseError ::InvalidStatement { line } )
}
}
2025-08-09 15:14:44 +09:00
} else {
2025-08-16 12:19:58 +09:00
// 代入文でなければ式文として返す
Ok ( expr )
2025-08-11 10:14:47 +09:00
}
2025-08-09 15:14:44 +09:00
}
2025-09-17 07:43:07 +09:00
2025-08-16 12:19:58 +09:00
// Expression parsing methods are now in expressions.rs module
// Utility methods are now in common.rs module via ParserUtils trait
2025-08-16 12:24:23 +09:00
// Item parsing methods are now in items.rs module
2025-09-17 07:43:07 +09:00
2025-08-16 12:19:58 +09:00
// ===== 🔥 Static Box循環依存検出 =====
2025-08-11 10:14:47 +09:00
}
2025-09-25 09:01:55 +09:00
// ---- Minimal ParserUtils impl (depth-less; TokenCursor handles newline policy) ----
impl common ::ParserUtils for NyashParser {
fn tokens ( & self ) -> & Vec < Token > { & self . tokens }
fn current ( & self ) -> usize { self . current }
fn current_mut ( & mut self ) -> & mut usize { & mut self . current }
fn update_depth_before_advance ( & mut self ) { /* no-op (legacy removed) */ }
fn update_depth_after_advance ( & mut self ) { /* no-op (legacy removed) */ }
}