hakorune/src/parser/mod.rs

/*!
 * Nyash Parser - Rust Implementation
 *
 * Python版nyashc_v4.pyのNyashParserをRustで完全再実装
 * Token列をAST (Abstract Syntax Tree) に変換
 *
 * モジュール構造:
 * - common.rs: 共通ユーティリティとトレイト (ParserUtils)
 * - expressions.rs: 式パーサー (parse_expression, parse_or, parse_and等)
 * - statements.rs: 文パーサー (parse_statement, parse_if, parse_loop等)
 * - declarations/: Box宣言パーサー (box_definition, static_box, dependency_helpers)
 * - items/: トップレベル宣言 (global_vars, functions, static_items)
 *
 * 2025-08-16: 大規模リファクタリング完了
 * - 1530行 → 227行 (85%削減)
 * - 機能ごとにモジュール分離で保守性向上
 */

// サブモジュール宣言
mod common;
mod cursor; // TokenCursor: 改行処理を一元管理
mod declarations;
// depth_tracking.rs was a legacy depth counter for Smart advance.
// Phase 15.5: removed in favor of TokenCursor-centric newline handling.
pub mod entry_sugar; // helper to parse with sugar level
mod expr;
mod expr_cursor; // TokenCursorを使用した式パーサー（実験的）
mod expressions;
mod items;
mod statements; // Now uses modular structure in statements/
pub mod sugar; // Phase 12.7-B: desugar pass (basic)
pub mod sugar_gate; // thread-local gate for sugar parsing (tests/docs)
                    // mod errors;

use common::ParserUtils;

use crate::ast::{ASTNode, Span};
use crate::tokenizer::{Token, TokenType, TokenizeError};
use thiserror::Error;

#[inline]
fn is_sugar_enabled() -> bool {
    crate::parser::sugar_gate::is_enabled()
}

// ===== 🔥 Debug Macros =====

/// Infinite loop detection macro - must be called in every loop that advances tokens
/// Prevents parser from hanging due to token consumption bugs
/// Uses parser's debug_fuel field for centralized fuel management
#[macro_export]
macro_rules! must_advance {
    ($parser:expr, $fuel:expr, $location:literal) => {
        // デバッグ燃料がSomeの場合のみ制限チェック
        if let Some(ref mut limit) = $parser.debug_fuel {
            if *limit == 0 {
                eprintln!("🚨 PARSER INFINITE LOOP DETECTED at {}", $location);
                eprintln!(
                    "🔍 Current token: {:?} at line {}",
                    $parser.current_token().token_type,
                    $parser.current_token().line
                );
                eprintln!(
                    "🔍 Parser position: {}/{}",
                    $parser.current,
                    $parser.tokens.len()
                );
                return Err($crate::parser::ParseError::InfiniteLoop {
                    location: $location.to_string(),
                    token: $parser.current_token().token_type.clone(),
                    line: $parser.current_token().line,
                });
            }
            *limit -= 1;
        }
        // None の場合は無制限なのでチェックしない
    };
}

/// Initialize debug fuel for loop monitoring
#[macro_export]
macro_rules! debug_fuel {
    () => {
        100_000 // Default: 100k iterations should be enough for any reasonable program
    };
}

// Two-phase parser structures are no longer needed - simplified to direct parsing

/// パースエラー
#[derive(Error, Debug)]
pub enum ParseError {
    #[error("Unexpected token {found:?}, expected {expected} at line {line}")]
    UnexpectedToken {
        found: TokenType,
        expected: String,
        line: usize,
    },

    #[error("Unexpected end of file")]
    UnexpectedEOF,

    #[error("Invalid expression at line {line}")]
    InvalidExpression { line: usize },

    #[error("Invalid statement at line {line}")]
    InvalidStatement { line: usize },

    #[error("Unsupported identifier '{name}' at line {line}")]
    UnsupportedIdentifier { name: String, line: usize },

    #[error("Circular dependency detected between static boxes: {cycle}")]
    CircularDependency { cycle: String },

    #[error("🚨 Infinite loop detected in parser at {location} - token: {token:?} at line {line}")]
    InfiniteLoop {
        location: String,
        token: TokenType,
        line: usize,
    },

    #[error("🔥 Transparency system removed: {suggestion} at line {line}")]
    TransparencySystemRemoved { suggestion: String, line: usize },

    #[error(
        "Unsupported namespace '{name}' at line {line}. Only 'nyashstd' is supported in Phase 0."
    )]
    UnsupportedNamespace { name: String, line: usize },

    #[error("Expected identifier at line {line}")]
    ExpectedIdentifier { line: usize },

    #[error("Tokenize error: {0}")]
    TokenizeError(#[from] TokenizeError),
}

/// Nyashパーサー - トークン列をASTに変換
pub struct NyashParser {
    pub(super) tokens: Vec<Token>,
    pub(super) current: usize,
    /// 🔥 Static box依存関係追跡（循環依存検出用）
    pub(super) static_box_dependencies:
        std::collections::HashMap<String, std::collections::HashSet<String>>,
    /// 🔥 デバッグ燃料：無限ループ検出用制限値 (None = 無制限)
    pub(super) debug_fuel: Option<usize>,
}

// ParserUtils trait implementation now lives here (legacy depth tracking removed)

impl NyashParser {
    /// 新しいパーサーを作成
    pub fn new(tokens: Vec<Token>) -> Self {
        Self {
            tokens,
            current: 0,
            static_box_dependencies: std::collections::HashMap::new(),
            debug_fuel: Some(100_000), // デフォルト値
        }
    }

    /// 文字列からパース (トークナイズ + パース)
    /// Note: Reads parser_stage3_enabled() (NYASH_FEATURES=stage3 or legacy env) for using-chain parsing
    pub fn parse_from_string(input: impl Into<String>) -> Result<ASTNode, ParseError> {
        // Ensure Stage-3 features are enabled when parsing using-chain files
        // when parent requested Stage-3 parsing via NYASH_FEATURES/legacy env
        Self::parse_from_string_with_fuel(input, Some(100_000))
    }

    /// 文字列からパース (デバッグ燃料指定版)
    /// fuel: Some(n) = n回まで、None = 無制限
    pub fn parse_from_string_with_fuel(
        input: impl Into<String>,
        fuel: Option<usize>,
    ) -> Result<ASTNode, ParseError> {
        // Normalize logical operators '||'/'&&' to 'or'/'and' before tokenization (outside strings/comments)
        fn normalize_logical_ops(src: &str) -> String {
            let mut out = String::with_capacity(src.len());
            let mut it = src.chars().peekable();
            let mut in_str = false;
            let mut in_line = false;
            let mut in_block = false;
            while let Some(c) = it.next() {
                if in_line {
                    out.push(c);
                    if c == '\n' {
                        in_line = false;
                    }
                    continue;
                }
                if in_block {
                    out.push(c);
                    if c == '*' && matches!(it.peek(), Some('/')) {
                        out.push('/');
                        it.next();
                        in_block = false;
                    }
                    continue;
                }
                if in_str {
                    out.push(c);
                    if c == '\\' {
                        if let Some(nc) = it.next() {
                            out.push(nc);
                        }
                        continue;
                    }
                    if c == '"' {
                        in_str = false;
                    }
                    continue;
                }
                match c {
                    '"' => {
                        in_str = true;
                        out.push(c);
                    }
                    '/' => match it.peek() {
                        Some('/') => {
                            out.push('/');
                            out.push('/');
                            it.next();
                            in_line = true;
                        }
                        Some('*') => {
                            out.push('/');
                            out.push('*');
                            it.next();
                            in_block = true;
                        }
                        _ => out.push('/'),
                    },
                    '#' => {
                        in_line = true;
                        out.push('#');
                    }
                    '|' => {
                        if matches!(it.peek(), Some('|')) {
                            out.push_str(" or ");
                            it.next();
                        } else if matches!(it.peek(), Some('>')) {
                            out.push('|');
                            out.push('>');
                            it.next();
                        } else {
                            out.push('|');
                        }
                    }
                    '&' => {
                        if matches!(it.peek(), Some('&')) {
                            out.push_str(" and ");
                            it.next();
                        } else {
                            out.push('&');
                        }
                    }
                    _ => out.push(c),
                }
            }
            out
        }
        let input_s: String = input.into();
        let pre = normalize_logical_ops(&input_s);
        let mut tokenizer = crate::tokenizer::NyashTokenizer::new(pre);
        let tokens = tokenizer.tokenize()?;

        for tok in &tokens {
            if let TokenType::IDENTIFIER(name) = &tok.token_type {
                if name == "self" {
                    return Err(ParseError::UnsupportedIdentifier {
                        name: name.clone(),
                        line: tok.line,
                    });
                }
            }
        }

        let mut parser = Self::new(tokens);
        parser.debug_fuel = fuel;
        let result = parser.parse();
        result
    }

    /// パース実行 - Program ASTを返す
    pub fn parse(&mut self) -> Result<ASTNode, ParseError> {
        self.parse_program()
    }

    // ===== パース関数群 =====

    /// プログラム全体をパース
    fn parse_program(&mut self) -> Result<ASTNode, ParseError> {
        let mut statements = Vec::new();
        let mut _statement_count = 0;

        let allow_sc = std::env::var("NYASH_PARSER_ALLOW_SEMICOLON")
            .ok()
            .map(|v| {
                let lv = v.to_ascii_lowercase();
                !(lv == "0" || lv == "false" || lv == "off")
            })
            .unwrap_or(true);

        while !self.is_at_end() {
            // EOF tokenはスキップ
            if matches!(self.current_token().token_type, TokenType::EOF) {
                break;
            }

            // NEWLINE tokenはスキップ（文の区切りとして使用）
            if matches!(self.current_token().token_type, TokenType::NEWLINE)
                || (allow_sc && matches!(self.current_token().token_type, TokenType::SEMICOLON))
            {
                self.advance();
                continue;
            }

            let statement = self.parse_statement()?;
            statements.push(statement);
            _statement_count += 1;
        }

        // 🔥 すべてのstatic box解析後に循環依存検出
        self.check_circular_dependencies()?;

        Ok(ASTNode::Program {
            statements,
            span: Span::unknown(),
        })
    }
    // Statement parsing methods are now in statements.rs module

    /// 代入文または関数呼び出しをパース
    fn parse_assignment_or_function_call(&mut self) -> Result<ASTNode, ParseError> {
        // まず左辺を式としてパース
        let expr = self.parse_expression()?;

        // 次のトークンが = または 複合代入演算子 なら代入文
        if self.match_token(&TokenType::ASSIGN) {
            self.advance(); // consume '='
            let value = Box::new(self.parse_expression()?);

            // 左辺が代入可能な形式かチェック
            match &expr {
                ASTNode::Variable { .. } | ASTNode::FieldAccess { .. } | ASTNode::Index { .. } => {
                    Ok(ASTNode::Assignment {
                        target: Box::new(expr),
                        value,
                        span: Span::unknown(),
                    })
                }
                _ => {
                    let line = self.current_token().line;
                    Err(ParseError::InvalidStatement { line })
                }
            }
        } else if self.match_token(&TokenType::PlusAssign)
            || self.match_token(&TokenType::MinusAssign)
            || self.match_token(&TokenType::MulAssign)
            || self.match_token(&TokenType::DivAssign)
        {
            if !is_sugar_enabled() {
                let line = self.current_token().line;
                return Err(ParseError::UnexpectedToken {
                    found: self.current_token().token_type.clone(),
                    expected: "enable NYASH_SYNTAX_SUGAR_LEVEL=basic|full for '+=' and friends"
                        .to_string(),
                    line,
                });
            }
            // determine operator
            let op = match &self.current_token().token_type {
                TokenType::PlusAssign => crate::ast::BinaryOperator::Add,
                TokenType::MinusAssign => crate::ast::BinaryOperator::Subtract,
                TokenType::MulAssign => crate::ast::BinaryOperator::Multiply,
                TokenType::DivAssign => crate::ast::BinaryOperator::Divide,
                _ => unreachable!(),
            };
            self.advance(); // consume 'op='
            let rhs = self.parse_expression()?;
            // 左辺が代入可能な形式かチェック
            match &expr {
                ASTNode::Variable { .. } | ASTNode::FieldAccess { .. } => {
                    let left_clone = expr.clone();
                    let value = ASTNode::BinaryOp {
                        operator: op,
                        left: Box::new(left_clone),
                        right: Box::new(rhs),
                        span: Span::unknown(),
                    };
                    Ok(ASTNode::Assignment {
                        target: Box::new(expr),
                        value: Box::new(value),
                        span: Span::unknown(),
                    })
                }
                _ => {
                    let line = self.current_token().line;
                    Err(ParseError::InvalidStatement { line })
                }
            }
        } else {
            // 代入文でなければ式文として返す
            Ok(expr)
        }
    }

    // Expression parsing methods are now in expressions.rs module
    // Utility methods are now in common.rs module via ParserUtils trait
    // Item parsing methods are now in items.rs module

    // ===== 🔥 Static Box循環依存検出 =====
}

// ---- Minimal ParserUtils impl (depth-less; TokenCursor handles newline policy) ----
impl common::ParserUtils for NyashParser {
    fn tokens(&self) -> &Vec<Token> {
        &self.tokens
    }
    fn current(&self) -> usize {
        self.current
    }
    fn current_mut(&mut self) -> &mut usize {
        &mut self.current
    }
    fn update_depth_before_advance(&mut self) { /* no-op (legacy removed) */
    }
    fn update_depth_after_advance(&mut self) { /* no-op (legacy removed) */
    }
}