Files
hakorune/src/parser/common.rs
Selfhosting Dev c8063c9e41 pyvm: split op handlers into ops_core/ops_box/ops_ctrl; add ops_flow + intrinsic; delegate vm.py without behavior change
net-plugin: modularize constants (consts.rs) and sockets (sockets.rs); remove legacy commented socket code; fix unused imports
mir: move instruction unit tests to tests/mir_instruction_unit.rs (file lean-up); no semantic changes
runner/pyvm: ensure using pre-strip; misc docs updates

Build: cargo build ok; legacy cfg warnings remain as before
2025-09-21 08:53:00 +09:00

143 lines
4.4 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*!
* Parser Common Utilities
*
* パーサーモジュール間で共有されるヘルパー関数や型定義
* Extracted from parser/mod.rs as part of modularization
*/
use super::ParseError;
use crate::ast::Span;
use crate::tokenizer::{Token, TokenType};
/// Parser utility methods
pub trait ParserUtils {
fn tokens(&self) -> &Vec<Token>;
fn current(&self) -> usize;
fn current_mut(&mut self) -> &mut usize;
/// 現在のトークンを取得
fn current_token(&self) -> &Token {
self.tokens().get(self.current()).unwrap_or(&Token {
token_type: TokenType::EOF,
line: 0,
column: 0,
})
}
/// 次のトークンを先読み(位置を進めない)
fn peek_token(&self) -> &TokenType {
if self.current() + 1 < self.tokens().len() {
&self.tokens()[self.current() + 1].token_type
} else {
&TokenType::EOF
}
}
/// N個先のトークンを先読み
#[allow(dead_code)]
fn peek_nth_token(&self, n: usize) -> &TokenType {
if self.current() + n < self.tokens().len() {
&self.tokens()[self.current() + n].token_type
} else {
&TokenType::EOF
}
}
/// 位置を1つ進める
fn advance(&mut self) {
if !self.is_at_end() {
*self.current_mut() += 1;
}
}
/// NEWLINEトークンをスキップ
fn skip_newlines(&mut self) {
let allow_sc = std::env::var("NYASH_PARSER_ALLOW_SEMICOLON").ok().map(|v| {
let lv = v.to_ascii_lowercase();
lv == "1" || lv == "true" || lv == "on"
}).unwrap_or(false);
loop {
let is_nl = matches!(self.current_token().token_type, TokenType::NEWLINE);
let is_sc = allow_sc && matches!(self.current_token().token_type, TokenType::SEMICOLON);
if (is_nl || is_sc) && !self.is_at_end() {
self.advance();
continue;
}
break;
}
}
/// 指定されたトークンタイプを消費 (期待通りでなければエラー)
fn consume(&mut self, expected: TokenType) -> Result<Token, ParseError> {
if std::mem::discriminant(&self.current_token().token_type)
== std::mem::discriminant(&expected)
{
let token = self.current_token().clone();
self.advance();
Ok(token)
} else {
let line = self.current_token().line;
Err(ParseError::UnexpectedToken {
found: self.current_token().token_type.clone(),
expected: format!("{:?}", expected),
line,
})
}
}
/// 現在のトークンが指定されたタイプかチェック
fn match_token(&self, token_type: &TokenType) -> bool {
std::mem::discriminant(&self.current_token().token_type)
== std::mem::discriminant(token_type)
}
/// 複数のトークンタイプのいずれかにマッチするかチェック
#[allow(dead_code)]
fn match_any_token(&self, token_types: &[TokenType]) -> bool {
let current_discriminant = std::mem::discriminant(&self.current_token().token_type);
token_types
.iter()
.any(|tt| std::mem::discriminant(tt) == current_discriminant)
}
/// 終端に達したかチェック
fn is_at_end(&self) -> bool {
self.current() >= self.tokens().len()
|| matches!(self.current_token().token_type, TokenType::EOF)
}
/// 現在のトークンが行の終わりNEWLINE or EOFかチェック
#[allow(dead_code)]
fn is_line_end(&self) -> bool {
matches!(
self.current_token().token_type,
TokenType::NEWLINE | TokenType::EOF
)
}
/// エラー報告用の現在位置情報を取得
#[allow(dead_code)]
fn current_position(&self) -> (usize, usize) {
let token = self.current_token();
(token.line, token.column)
}
/// 現在のトークンからSpanを作成
#[allow(dead_code)]
fn current_span(&self) -> Span {
let token = self.current_token();
Span {
start: 0, // Token doesn't have byte offset, so using 0
end: 0,
line: token.line,
column: token.column,
}
}
}
/// Helper function to create unknown span
#[allow(dead_code)]
pub fn unknown_span() -> Span {
Span::unknown()
}