678 lines
23 KiB
Rust
678 lines
23 KiB
Rust
/*!
|
||
* Nyash Tokenizer - .nyashソースコードをトークン列に変換
|
||
*
|
||
* Python版nyashc_v4.pyのNyashTokenizerをRustで完全再実装
|
||
* 正規表現ベース → 高速なcharレベル処理に最適化
|
||
*/
|
||
|
||
use thiserror::Error;
|
||
use crate::grammar::engine;
|
||
|
||
/// トークンの種類を表すenum
|
||
#[derive(Debug, Clone, PartialEq)]
|
||
pub enum TokenType {
|
||
// リテラル
|
||
STRING(String),
|
||
NUMBER(i64),
|
||
FLOAT(f64), // 浮動小数点数サポート追加
|
||
TRUE,
|
||
FALSE,
|
||
NULL, // null リテラル
|
||
|
||
// キーワード
|
||
BOX,
|
||
GLOBAL,
|
||
SINGLETON,
|
||
NEW,
|
||
PEEK,
|
||
IF,
|
||
ELSE,
|
||
LOOP,
|
||
BREAK,
|
||
CONTINUE,
|
||
RETURN,
|
||
FUNCTION,
|
||
PRINT,
|
||
THIS,
|
||
ME,
|
||
INIT, // init (初期化ブロック)
|
||
PACK, // pack (コンストラクタ - 互換性)
|
||
BIRTH, // birth (コンストラクタ)
|
||
NOWAIT, // nowait
|
||
AWAIT, // await
|
||
INTERFACE, // interface
|
||
COLON, // : (継承用)
|
||
INCLUDE, // include (ファイル読み込み)
|
||
TRY, // try
|
||
CATCH, // catch
|
||
FINALLY, // finally
|
||
THROW, // throw
|
||
LOCAL, // local (一時変数宣言)
|
||
STATIC, // static (静的メソッド)
|
||
OUTBOX, // outbox (所有権移転変数)
|
||
NOT, // not (否定演算子)
|
||
OVERRIDE, // override (明示的オーバーライド)
|
||
FROM, // from (親メソッド呼び出し)
|
||
WEAK, // weak (弱参照修飾子)
|
||
USING, // using (名前空間インポート)
|
||
|
||
// 演算子 (長いものから先に定義)
|
||
ARROW, // >> (legacy arrow)
|
||
FAT_ARROW, // => (peek arms)
|
||
EQUALS, // ==
|
||
NotEquals, // !=
|
||
LessEquals, // <=
|
||
GreaterEquals, // >=
|
||
AND, // && または and
|
||
OR, // || または or
|
||
LESS, // <
|
||
GREATER, // >
|
||
ASSIGN, // =
|
||
PLUS, // +
|
||
MINUS, // -
|
||
MULTIPLY, // *
|
||
DIVIDE, // /
|
||
MODULO, // %
|
||
|
||
// 記号
|
||
DOT, // .
|
||
DOUBLE_COLON, // :: (Parent::method) - P1用(定義のみ)
|
||
LPAREN, // (
|
||
RPAREN, // )
|
||
LBRACE, // {
|
||
RBRACE, // }
|
||
COMMA, // ,
|
||
NEWLINE, // \n
|
||
|
||
// 識別子
|
||
IDENTIFIER(String),
|
||
|
||
// 特殊
|
||
EOF,
|
||
}
|
||
|
||
/// トークンの位置情報を含む構造体
|
||
#[derive(Debug, Clone)]
|
||
pub struct Token {
|
||
pub token_type: TokenType,
|
||
pub line: usize,
|
||
pub column: usize,
|
||
}
|
||
|
||
impl Token {
|
||
pub fn new(token_type: TokenType, line: usize, column: usize) -> Self {
|
||
Self { token_type, line, column }
|
||
}
|
||
}
|
||
|
||
/// トークナイズエラー
|
||
#[derive(Error, Debug)]
|
||
pub enum TokenizeError {
|
||
#[error("Unexpected character '{char}' at line {line}, column {column}")]
|
||
UnexpectedCharacter { char: char, line: usize, column: usize },
|
||
|
||
#[error("Unterminated string literal at line {line}")]
|
||
UnterminatedString { line: usize },
|
||
|
||
#[error("Invalid number format at line {line}")]
|
||
InvalidNumber { line: usize },
|
||
|
||
#[error("Comment not closed at line {line}")]
|
||
UnterminatedComment { line: usize },
|
||
}
|
||
|
||
/// Nyashトークナイザー
|
||
pub struct NyashTokenizer {
|
||
input: Vec<char>,
|
||
position: usize,
|
||
line: usize,
|
||
column: usize,
|
||
}
|
||
|
||
impl NyashTokenizer {
|
||
/// 新しいトークナイザーを作成
|
||
pub fn new(input: impl Into<String>) -> Self {
|
||
let input_string = input.into();
|
||
Self {
|
||
input: input_string.chars().collect(),
|
||
position: 0,
|
||
line: 1,
|
||
column: 1,
|
||
}
|
||
}
|
||
|
||
/// 完全なトークナイズを実行
|
||
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizeError> {
|
||
let mut tokens = Vec::new();
|
||
|
||
while !self.is_at_end() {
|
||
// 空白をスキップ
|
||
self.skip_whitespace();
|
||
|
||
if self.is_at_end() {
|
||
break;
|
||
}
|
||
|
||
// 次のトークンを読み取り
|
||
let token = self.tokenize_next()?;
|
||
tokens.push(token);
|
||
}
|
||
|
||
// EOF トークンを追加
|
||
tokens.push(Token::new(TokenType::EOF, self.line, self.column));
|
||
|
||
Ok(tokens)
|
||
}
|
||
|
||
/// 次の一つのトークンを読み取り
|
||
fn tokenize_next(&mut self) -> Result<Token, TokenizeError> {
|
||
let start_line = self.line;
|
||
let start_column = self.column;
|
||
|
||
match self.current_char() {
|
||
Some('"') => {
|
||
let string_value = self.read_string()?;
|
||
Ok(Token::new(TokenType::STRING(string_value), start_line, start_column))
|
||
}
|
||
Some(c) if c.is_ascii_digit() => {
|
||
let token_type = self.read_numeric_literal()?;
|
||
Ok(Token::new(token_type, start_line, start_column))
|
||
}
|
||
Some(c) if c.is_alphabetic() || c == '_' => {
|
||
let token_type = self.read_keyword_or_identifier();
|
||
Ok(Token::new(token_type, start_line, start_column))
|
||
}
|
||
Some('/') if self.peek_char() == Some('/') => {
|
||
self.skip_line_comment();
|
||
self.skip_whitespace(); // コメント後の空白もスキップ
|
||
return self.tokenize_next();
|
||
}
|
||
Some('#') => {
|
||
self.skip_line_comment();
|
||
self.skip_whitespace(); // コメント後の空白もスキップ
|
||
return self.tokenize_next();
|
||
}
|
||
Some('>') if self.peek_char() == Some('>') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::ARROW, start_line, start_column))
|
||
}
|
||
Some(':') if self.peek_char() == Some(':') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::DOUBLE_COLON, start_line, start_column))
|
||
}
|
||
Some('=') if self.peek_char() == Some('>') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::FAT_ARROW, start_line, start_column))
|
||
}
|
||
Some('=') if self.peek_char() == Some('=') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::EQUALS, start_line, start_column))
|
||
}
|
||
Some('!') if self.peek_char() == Some('=') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::NotEquals, start_line, start_column))
|
||
}
|
||
Some('<') if self.peek_char() == Some('=') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::LessEquals, start_line, start_column))
|
||
}
|
||
Some('>') if self.peek_char() == Some('=') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::GreaterEquals, start_line, start_column))
|
||
}
|
||
Some('&') if self.peek_char() == Some('&') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::AND, start_line, start_column))
|
||
}
|
||
Some('|') if self.peek_char() == Some('|') => {
|
||
self.advance();
|
||
self.advance();
|
||
Ok(Token::new(TokenType::OR, start_line, start_column))
|
||
}
|
||
Some('<') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::LESS, start_line, start_column))
|
||
}
|
||
Some('>') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::GREATER, start_line, start_column))
|
||
}
|
||
Some('=') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::ASSIGN, start_line, start_column))
|
||
}
|
||
Some('+') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::PLUS, start_line, start_column))
|
||
}
|
||
Some('-') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::MINUS, start_line, start_column))
|
||
}
|
||
Some('*') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::MULTIPLY, start_line, start_column))
|
||
}
|
||
Some('/') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::DIVIDE, start_line, start_column))
|
||
}
|
||
Some('%') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::MODULO, start_line, start_column))
|
||
}
|
||
Some('.') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::DOT, start_line, start_column))
|
||
}
|
||
Some('(') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::LPAREN, start_line, start_column))
|
||
}
|
||
Some(')') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::RPAREN, start_line, start_column))
|
||
}
|
||
Some('{') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::LBRACE, start_line, start_column))
|
||
}
|
||
Some('}') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::RBRACE, start_line, start_column))
|
||
}
|
||
Some(',') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::COMMA, start_line, start_column))
|
||
}
|
||
Some(':') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::COLON, start_line, start_column))
|
||
}
|
||
Some('\n') => {
|
||
self.advance();
|
||
Ok(Token::new(TokenType::NEWLINE, start_line, start_column))
|
||
}
|
||
Some(c) => {
|
||
Err(TokenizeError::UnexpectedCharacter {
|
||
char: c,
|
||
line: self.line,
|
||
column: self.column,
|
||
})
|
||
}
|
||
None => {
|
||
Ok(Token::new(TokenType::EOF, self.line, self.column))
|
||
}
|
||
}
|
||
}
|
||
|
||
/// 文字列リテラルを読み取り
|
||
fn read_string(&mut self) -> Result<String, TokenizeError> {
|
||
let start_line = self.line;
|
||
self.advance(); // 開始の '"' をスキップ
|
||
|
||
let mut string_value = String::new();
|
||
|
||
while let Some(c) = self.current_char() {
|
||
if c == '"' {
|
||
self.advance(); // 終了の '"' をスキップ
|
||
return Ok(string_value);
|
||
}
|
||
|
||
// エスケープ文字の処理
|
||
if c == '\\' {
|
||
self.advance();
|
||
match self.current_char() {
|
||
Some('n') => string_value.push('\n'),
|
||
Some('t') => string_value.push('\t'),
|
||
Some('r') => string_value.push('\r'),
|
||
Some('\\') => string_value.push('\\'),
|
||
Some('"') => string_value.push('"'),
|
||
Some(c) => {
|
||
string_value.push('\\');
|
||
string_value.push(c);
|
||
}
|
||
None => break,
|
||
}
|
||
} else {
|
||
string_value.push(c);
|
||
}
|
||
|
||
self.advance();
|
||
}
|
||
|
||
Err(TokenizeError::UnterminatedString { line: start_line })
|
||
}
|
||
|
||
/// 数値リテラル(整数または浮動小数点数)を読み取り
|
||
fn read_numeric_literal(&mut self) -> Result<TokenType, TokenizeError> {
|
||
let start_line = self.line;
|
||
let mut number_str = String::new();
|
||
let mut has_dot = false;
|
||
|
||
// 整数部分を読み取り
|
||
while let Some(c) = self.current_char() {
|
||
if c.is_ascii_digit() {
|
||
number_str.push(c);
|
||
self.advance();
|
||
} else if c == '.' && !has_dot && self.peek_char().map_or(false, |ch| ch.is_ascii_digit()) {
|
||
// 小数点の後に数字が続く場合のみ受け入れる
|
||
has_dot = true;
|
||
number_str.push(c);
|
||
self.advance();
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
|
||
if has_dot {
|
||
// 浮動小数点数として解析
|
||
number_str.parse::<f64>()
|
||
.map(TokenType::FLOAT)
|
||
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
|
||
} else {
|
||
// 整数として解析
|
||
number_str.parse::<i64>()
|
||
.map(TokenType::NUMBER)
|
||
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
|
||
}
|
||
}
|
||
|
||
/// キーワードまたは識別子を読み取り
|
||
fn read_keyword_or_identifier(&mut self) -> TokenType {
|
||
let mut identifier = String::new();
|
||
|
||
while let Some(c) = self.current_char() {
|
||
if c.is_alphanumeric() || c == '_' {
|
||
identifier.push(c);
|
||
self.advance();
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
|
||
// キーワードチェック
|
||
let tok = match identifier.as_str() {
|
||
"box" => TokenType::BOX,
|
||
"global" => TokenType::GLOBAL,
|
||
"singleton" => TokenType::SINGLETON,
|
||
"new" => TokenType::NEW,
|
||
"peek" => TokenType::PEEK,
|
||
"if" => TokenType::IF,
|
||
"else" => TokenType::ELSE,
|
||
"loop" => TokenType::LOOP,
|
||
"break" => TokenType::BREAK,
|
||
"continue" => TokenType::CONTINUE,
|
||
"return" => TokenType::RETURN,
|
||
"function" => TokenType::FUNCTION,
|
||
// Alias support: `fn` as shorthand for function
|
||
"fn" => TokenType::FUNCTION,
|
||
"print" => TokenType::PRINT,
|
||
"this" => TokenType::THIS,
|
||
"me" => TokenType::ME,
|
||
"init" => TokenType::INIT,
|
||
"pack" => TokenType::PACK,
|
||
"birth" => TokenType::BIRTH,
|
||
"nowait" => TokenType::NOWAIT,
|
||
"await" => TokenType::AWAIT,
|
||
"interface" => TokenType::INTERFACE,
|
||
"include" => TokenType::INCLUDE,
|
||
"try" => TokenType::TRY,
|
||
"catch" => TokenType::CATCH,
|
||
"finally" => TokenType::FINALLY,
|
||
"throw" => TokenType::THROW,
|
||
"local" => TokenType::LOCAL,
|
||
"static" => TokenType::STATIC,
|
||
"outbox" => TokenType::OUTBOX,
|
||
"not" => TokenType::NOT,
|
||
"override" => TokenType::OVERRIDE,
|
||
"from" => TokenType::FROM,
|
||
"weak" => TokenType::WEAK,
|
||
"using" => TokenType::USING,
|
||
"and" => TokenType::AND,
|
||
"or" => TokenType::OR,
|
||
"true" => TokenType::TRUE,
|
||
"false" => TokenType::FALSE,
|
||
"null" => TokenType::NULL,
|
||
_ => TokenType::IDENTIFIER(identifier.clone()),
|
||
};
|
||
|
||
// 統一文法エンジンとの差分チェック(動作は変更しない)
|
||
if std::env::var("NYASH_GRAMMAR_DIFF").ok().as_deref() == Some("1") {
|
||
// 安全に参照(初期導入のため、存在しない場合は無視)
|
||
let kw = engine::get().is_keyword_str(&identifier);
|
||
match (&tok, kw) {
|
||
(TokenType::IDENTIFIER(_), Some(name)) => {
|
||
eprintln!("[GRAMMAR-DIFF] tokenizer=IDENT, grammar=KEYWORD({}) word='{}'", name, identifier);
|
||
}
|
||
(TokenType::IDENTIFIER(_), None) => {}
|
||
// tokenizerがキーワード、grammarが未定義
|
||
(t, None) if !matches!(t, TokenType::IDENTIFIER(_)) => {
|
||
eprintln!("[GRAMMAR-DIFF] tokenizer=KEYWORD, grammar=IDENT word='{}'", identifier);
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
tok
|
||
}
|
||
|
||
/// 行コメントをスキップ
|
||
fn skip_line_comment(&mut self) {
|
||
while let Some(c) = self.current_char() {
|
||
if c == '\n' {
|
||
break; // 改行文字は消費せずに残す
|
||
}
|
||
self.advance();
|
||
}
|
||
}
|
||
|
||
/// 空白文字をスキップ(改行は除く:改行はNEWLINEトークンとして扱う)
|
||
fn skip_whitespace(&mut self) {
|
||
while let Some(c) = self.current_char() {
|
||
if c.is_whitespace() && c != '\n' {
|
||
self.advance();
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// 現在の文字を取得
|
||
fn current_char(&self) -> Option<char> {
|
||
self.input.get(self.position).copied()
|
||
}
|
||
|
||
/// 次の文字を先読み
|
||
fn peek_char(&self) -> Option<char> {
|
||
self.input.get(self.position + 1).copied()
|
||
}
|
||
|
||
/// 位置を1つ進める
|
||
fn advance(&mut self) {
|
||
if let Some(c) = self.current_char() {
|
||
if c == '\n' {
|
||
self.line += 1;
|
||
self.column = 1;
|
||
} else {
|
||
self.column += 1;
|
||
}
|
||
self.position += 1;
|
||
}
|
||
}
|
||
|
||
/// 入力の終端に達したかチェック
|
||
fn is_at_end(&self) -> bool {
|
||
self.position >= self.input.len()
|
||
}
|
||
}
|
||
|
||
// ===== Tests =====
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn test_simple_tokens() {
|
||
let mut tokenizer = NyashTokenizer::new("box new = + - *");
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
assert_eq!(tokens.len(), 7); // 6 tokens + EOF
|
||
assert_eq!(tokens[0].token_type, TokenType::BOX);
|
||
assert_eq!(tokens[1].token_type, TokenType::NEW);
|
||
assert_eq!(tokens[2].token_type, TokenType::ASSIGN);
|
||
assert_eq!(tokens[3].token_type, TokenType::PLUS);
|
||
assert_eq!(tokens[4].token_type, TokenType::MINUS);
|
||
assert_eq!(tokens[5].token_type, TokenType::MULTIPLY);
|
||
assert_eq!(tokens[6].token_type, TokenType::EOF);
|
||
}
|
||
|
||
#[test]
|
||
fn test_string_literal() {
|
||
let mut tokenizer = NyashTokenizer::new(r#""Hello, World!""#);
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
assert_eq!(tokens.len(), 2); // STRING + EOF
|
||
match &tokens[0].token_type {
|
||
TokenType::STRING(s) => assert_eq!(s, "Hello, World!"),
|
||
_ => panic!("Expected STRING token"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_number_literal() {
|
||
let mut tokenizer = NyashTokenizer::new("42 123 0");
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
assert_eq!(tokens.len(), 4); // 3 numbers + EOF
|
||
match &tokens[0].token_type {
|
||
TokenType::NUMBER(n) => assert_eq!(*n, 42),
|
||
_ => panic!("Expected NUMBER token"),
|
||
}
|
||
match &tokens[1].token_type {
|
||
TokenType::NUMBER(n) => assert_eq!(*n, 123),
|
||
_ => panic!("Expected NUMBER token"),
|
||
}
|
||
match &tokens[2].token_type {
|
||
TokenType::NUMBER(n) => assert_eq!(*n, 0),
|
||
_ => panic!("Expected NUMBER token"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_identifier() {
|
||
let mut tokenizer = NyashTokenizer::new("test_var myBox getValue");
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
assert_eq!(tokens.len(), 4); // 3 identifiers + EOF
|
||
match &tokens[0].token_type {
|
||
TokenType::IDENTIFIER(s) => assert_eq!(s, "test_var"),
|
||
_ => panic!("Expected IDENTIFIER token"),
|
||
}
|
||
match &tokens[1].token_type {
|
||
TokenType::IDENTIFIER(s) => assert_eq!(s, "myBox"),
|
||
_ => panic!("Expected IDENTIFIER token"),
|
||
}
|
||
match &tokens[2].token_type {
|
||
TokenType::IDENTIFIER(s) => assert_eq!(s, "getValue"),
|
||
_ => panic!("Expected IDENTIFIER token"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_operators() {
|
||
let mut tokenizer = NyashTokenizer::new(">> == != <= >= < >");
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
assert_eq!(tokens[0].token_type, TokenType::ARROW);
|
||
assert_eq!(tokens[1].token_type, TokenType::EQUALS);
|
||
assert_eq!(tokens[2].token_type, TokenType::NotEquals);
|
||
assert_eq!(tokens[3].token_type, TokenType::LessEquals);
|
||
assert_eq!(tokens[4].token_type, TokenType::GreaterEquals);
|
||
assert_eq!(tokens[5].token_type, TokenType::LESS);
|
||
assert_eq!(tokens[6].token_type, TokenType::GREATER);
|
||
}
|
||
|
||
#[test]
|
||
fn test_complex_code() {
|
||
let code = r#"
|
||
box TestBox {
|
||
value
|
||
|
||
getValue() {
|
||
return this.value
|
||
}
|
||
}
|
||
|
||
obj = new TestBox()
|
||
obj.value = "test123"
|
||
"#;
|
||
|
||
let mut tokenizer = NyashTokenizer::new(code);
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
// 基本的なトークンがある事を確認
|
||
let token_types: Vec<_> = tokens.iter().map(|t| &t.token_type).collect();
|
||
assert!(token_types.contains(&&TokenType::BOX));
|
||
assert!(token_types.contains(&&TokenType::NEW));
|
||
assert!(token_types.contains(&&TokenType::THIS));
|
||
assert!(token_types.contains(&&TokenType::RETURN));
|
||
assert!(token_types.contains(&&TokenType::DOT));
|
||
}
|
||
|
||
#[test]
|
||
fn test_line_numbers() {
|
||
let code = "box\ntest\nvalue";
|
||
let mut tokenizer = NyashTokenizer::new(code);
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
// NEWLINEトークンを除外して確認
|
||
let non_newline: Vec<&Token> = tokens.iter().filter(|t| !matches!(t.token_type, TokenType::NEWLINE)).collect();
|
||
assert_eq!(non_newline[0].line, 1); // box
|
||
assert_eq!(non_newline[1].line, 2); // test
|
||
assert_eq!(non_newline[2].line, 3); // value
|
||
}
|
||
|
||
#[test]
|
||
fn test_comments() {
|
||
let code = r#"box Test // this is a comment
|
||
# this is also a comment
|
||
value"#;
|
||
|
||
let mut tokenizer = NyashTokenizer::new(code);
|
||
let tokens = tokenizer.tokenize().unwrap();
|
||
|
||
// コメントは除外されている
|
||
let token_types: Vec<_> = tokens.iter()
|
||
.filter(|t| !matches!(t.token_type, TokenType::NEWLINE))
|
||
.map(|t| &t.token_type)
|
||
.collect();
|
||
assert_eq!(token_types.len(), 4); // box, Test, value, EOF
|
||
}
|
||
|
||
#[test]
|
||
fn test_error_handling() {
|
||
let mut tokenizer = NyashTokenizer::new("@#$%");
|
||
let result = tokenizer.tokenize();
|
||
|
||
assert!(result.is_err());
|
||
match result {
|
||
Err(TokenizeError::UnexpectedCharacter { char, line, column }) => {
|
||
assert_eq!(char, '@');
|
||
assert_eq!(line, 1);
|
||
assert_eq!(column, 1);
|
||
}
|
||
_ => panic!("Expected UnexpectedCharacter error"),
|
||
}
|
||
}
|
||
}
|