2025-08-09 15:14:44 +09:00
|
|
|
/*!
|
|
|
|
|
* Nyash Tokenizer - .nyashソースコードをトークン列に変換
|
|
|
|
|
*
|
|
|
|
|
* Python版nyashc_v4.pyのNyashTokenizerをRustで完全再実装
|
|
|
|
|
* 正規表現ベース → 高速なcharレベル処理に最適化
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
use thiserror::Error;
|
|
|
|
|
|
|
|
|
|
/// トークンの種類を表すenum
|
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
|
|
|
pub enum TokenType {
|
|
|
|
|
// リテラル
|
|
|
|
|
STRING(String),
|
|
|
|
|
NUMBER(i64),
|
|
|
|
|
FLOAT(f64), // 浮動小数点数サポート追加
|
|
|
|
|
TRUE,
|
|
|
|
|
FALSE,
|
|
|
|
|
|
|
|
|
|
// キーワード
|
|
|
|
|
BOX,
|
|
|
|
|
GLOBAL,
|
|
|
|
|
SINGLETON,
|
|
|
|
|
NEW,
|
|
|
|
|
IF,
|
|
|
|
|
ELSE,
|
|
|
|
|
LOOP,
|
|
|
|
|
BREAK,
|
|
|
|
|
RETURN,
|
|
|
|
|
FUNCTION,
|
|
|
|
|
PRINT,
|
|
|
|
|
THIS,
|
|
|
|
|
ME,
|
|
|
|
|
INIT, // init (初期化ブロック)
|
2025-08-11 09:40:24 +09:00
|
|
|
PACK, // pack (コンストラクタ)
|
2025-08-09 15:14:44 +09:00
|
|
|
NOWAIT, // nowait
|
|
|
|
|
AWAIT, // await
|
|
|
|
|
INTERFACE, // interface
|
2025-08-10 11:32:32 +09:00
|
|
|
COLON, // : (継承用)
|
2025-08-09 15:14:44 +09:00
|
|
|
INCLUDE, // include (ファイル読み込み)
|
|
|
|
|
TRY, // try
|
|
|
|
|
CATCH, // catch
|
|
|
|
|
FINALLY, // finally
|
|
|
|
|
THROW, // throw
|
|
|
|
|
LOCAL, // local (一時変数宣言)
|
|
|
|
|
STATIC, // static (静的メソッド)
|
|
|
|
|
OUTBOX, // outbox (所有権移転変数)
|
|
|
|
|
NOT, // not (否定演算子)
|
2025-08-11 07:55:41 +09:00
|
|
|
OVERRIDE, // override (明示的オーバーライド)
|
|
|
|
|
FROM, // from (親メソッド呼び出し)
|
2025-08-12 20:27:20 +00:00
|
|
|
WEAK, // weak (弱参照修飾子)
|
2025-08-09 15:14:44 +09:00
|
|
|
|
|
|
|
|
// 演算子 (長いものから先に定義)
|
|
|
|
|
ARROW, // >>
|
|
|
|
|
EQUALS, // ==
|
|
|
|
|
NotEquals, // !=
|
|
|
|
|
LessEquals, // <=
|
|
|
|
|
GreaterEquals, // >=
|
|
|
|
|
AND, // && または and
|
|
|
|
|
OR, // || または or
|
|
|
|
|
LESS, // <
|
|
|
|
|
GREATER, // >
|
|
|
|
|
ASSIGN, // =
|
|
|
|
|
PLUS, // +
|
|
|
|
|
MINUS, // -
|
|
|
|
|
MULTIPLY, // *
|
|
|
|
|
DIVIDE, // /
|
|
|
|
|
|
|
|
|
|
// 記号
|
|
|
|
|
DOT, // .
|
|
|
|
|
LPAREN, // (
|
|
|
|
|
RPAREN, // )
|
|
|
|
|
LBRACE, // {
|
|
|
|
|
RBRACE, // }
|
|
|
|
|
COMMA, // ,
|
|
|
|
|
NEWLINE, // \n
|
|
|
|
|
|
|
|
|
|
// 識別子
|
|
|
|
|
IDENTIFIER(String),
|
|
|
|
|
|
|
|
|
|
// 特殊
|
|
|
|
|
EOF,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// トークンの位置情報を含む構造体
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
pub struct Token {
|
|
|
|
|
pub token_type: TokenType,
|
|
|
|
|
pub line: usize,
|
|
|
|
|
pub column: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Token {
|
|
|
|
|
pub fn new(token_type: TokenType, line: usize, column: usize) -> Self {
|
|
|
|
|
Self { token_type, line, column }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// トークナイズエラー
|
|
|
|
|
#[derive(Error, Debug)]
|
|
|
|
|
pub enum TokenizeError {
|
|
|
|
|
#[error("Unexpected character '{char}' at line {line}, column {column}")]
|
|
|
|
|
UnexpectedCharacter { char: char, line: usize, column: usize },
|
|
|
|
|
|
|
|
|
|
#[error("Unterminated string literal at line {line}")]
|
|
|
|
|
UnterminatedString { line: usize },
|
|
|
|
|
|
|
|
|
|
#[error("Invalid number format at line {line}")]
|
|
|
|
|
InvalidNumber { line: usize },
|
|
|
|
|
|
|
|
|
|
#[error("Comment not closed at line {line}")]
|
|
|
|
|
UnterminatedComment { line: usize },
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Nyashトークナイザー
|
|
|
|
|
pub struct NyashTokenizer {
|
|
|
|
|
input: Vec<char>,
|
|
|
|
|
position: usize,
|
|
|
|
|
line: usize,
|
|
|
|
|
column: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl NyashTokenizer {
|
|
|
|
|
/// 新しいトークナイザーを作成
|
|
|
|
|
pub fn new(input: impl Into<String>) -> Self {
|
|
|
|
|
let input_string = input.into();
|
|
|
|
|
Self {
|
|
|
|
|
input: input_string.chars().collect(),
|
|
|
|
|
position: 0,
|
|
|
|
|
line: 1,
|
|
|
|
|
column: 1,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 完全なトークナイズを実行
|
|
|
|
|
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizeError> {
|
|
|
|
|
let mut tokens = Vec::new();
|
|
|
|
|
|
|
|
|
|
while !self.is_at_end() {
|
|
|
|
|
// 空白をスキップ
|
|
|
|
|
self.skip_whitespace();
|
|
|
|
|
|
|
|
|
|
if self.is_at_end() {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 次のトークンを読み取り
|
|
|
|
|
let token = self.tokenize_next()?;
|
|
|
|
|
tokens.push(token);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// EOF トークンを追加
|
|
|
|
|
tokens.push(Token::new(TokenType::EOF, self.line, self.column));
|
|
|
|
|
|
|
|
|
|
Ok(tokens)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 次の一つのトークンを読み取り
|
|
|
|
|
fn tokenize_next(&mut self) -> Result<Token, TokenizeError> {
|
|
|
|
|
let start_line = self.line;
|
|
|
|
|
let start_column = self.column;
|
|
|
|
|
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
Some('"') => {
|
|
|
|
|
let string_value = self.read_string()?;
|
|
|
|
|
Ok(Token::new(TokenType::STRING(string_value), start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some(c) if c.is_ascii_digit() => {
|
|
|
|
|
let token_type = self.read_numeric_literal()?;
|
|
|
|
|
Ok(Token::new(token_type, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some(c) if c.is_alphabetic() || c == '_' => {
|
|
|
|
|
let token_type = self.read_keyword_or_identifier();
|
|
|
|
|
Ok(Token::new(token_type, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('/') if self.peek_char() == Some('/') => {
|
|
|
|
|
self.skip_line_comment();
|
|
|
|
|
self.skip_whitespace(); // コメント後の空白もスキップ
|
|
|
|
|
return self.tokenize_next();
|
|
|
|
|
}
|
|
|
|
|
Some('#') => {
|
|
|
|
|
self.skip_line_comment();
|
|
|
|
|
self.skip_whitespace(); // コメント後の空白もスキップ
|
|
|
|
|
return self.tokenize_next();
|
|
|
|
|
}
|
|
|
|
|
Some('>') if self.peek_char() == Some('>') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::ARROW, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('=') if self.peek_char() == Some('=') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::EQUALS, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('!') if self.peek_char() == Some('=') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::NotEquals, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('<') if self.peek_char() == Some('=') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::LessEquals, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('>') if self.peek_char() == Some('=') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::GreaterEquals, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('&') if self.peek_char() == Some('&') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::AND, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('|') if self.peek_char() == Some('|') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::OR, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('<') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::LESS, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('>') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::GREATER, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('=') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::ASSIGN, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('+') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::PLUS, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('-') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::MINUS, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('*') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::MULTIPLY, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('/') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::DIVIDE, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('.') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::DOT, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('(') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::LPAREN, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some(')') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::RPAREN, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('{') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::LBRACE, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some('}') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::RBRACE, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some(',') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::COMMA, start_line, start_column))
|
|
|
|
|
}
|
2025-08-10 11:32:32 +09:00
|
|
|
Some(':') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::COLON, start_line, start_column))
|
|
|
|
|
}
|
2025-08-09 15:14:44 +09:00
|
|
|
Some('\n') => {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(Token::new(TokenType::NEWLINE, start_line, start_column))
|
|
|
|
|
}
|
|
|
|
|
Some(c) => {
|
|
|
|
|
Err(TokenizeError::UnexpectedCharacter {
|
|
|
|
|
char: c,
|
|
|
|
|
line: self.line,
|
|
|
|
|
column: self.column,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
None => {
|
|
|
|
|
Ok(Token::new(TokenType::EOF, self.line, self.column))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 文字列リテラルを読み取り
|
|
|
|
|
fn read_string(&mut self) -> Result<String, TokenizeError> {
|
|
|
|
|
let start_line = self.line;
|
|
|
|
|
self.advance(); // 開始の '"' をスキップ
|
|
|
|
|
|
|
|
|
|
let mut string_value = String::new();
|
|
|
|
|
|
|
|
|
|
while let Some(c) = self.current_char() {
|
|
|
|
|
if c == '"' {
|
|
|
|
|
self.advance(); // 終了の '"' をスキップ
|
|
|
|
|
return Ok(string_value);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// エスケープ文字の処理
|
|
|
|
|
if c == '\\' {
|
|
|
|
|
self.advance();
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
Some('n') => string_value.push('\n'),
|
|
|
|
|
Some('t') => string_value.push('\t'),
|
|
|
|
|
Some('r') => string_value.push('\r'),
|
|
|
|
|
Some('\\') => string_value.push('\\'),
|
|
|
|
|
Some('"') => string_value.push('"'),
|
|
|
|
|
Some(c) => {
|
|
|
|
|
string_value.push('\\');
|
|
|
|
|
string_value.push(c);
|
|
|
|
|
}
|
|
|
|
|
None => break,
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
string_value.push(c);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Err(TokenizeError::UnterminatedString { line: start_line })
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 数値リテラル(整数または浮動小数点数)を読み取り
|
|
|
|
|
fn read_numeric_literal(&mut self) -> Result<TokenType, TokenizeError> {
|
|
|
|
|
let start_line = self.line;
|
|
|
|
|
let mut number_str = String::new();
|
|
|
|
|
let mut has_dot = false;
|
|
|
|
|
|
|
|
|
|
// 整数部分を読み取り
|
|
|
|
|
while let Some(c) = self.current_char() {
|
|
|
|
|
if c.is_ascii_digit() {
|
|
|
|
|
number_str.push(c);
|
|
|
|
|
self.advance();
|
|
|
|
|
} else if c == '.' && !has_dot && self.peek_char().map_or(false, |ch| ch.is_ascii_digit()) {
|
|
|
|
|
// 小数点の後に数字が続く場合のみ受け入れる
|
|
|
|
|
has_dot = true;
|
|
|
|
|
number_str.push(c);
|
|
|
|
|
self.advance();
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if has_dot {
|
|
|
|
|
// 浮動小数点数として解析
|
|
|
|
|
number_str.parse::<f64>()
|
|
|
|
|
.map(TokenType::FLOAT)
|
|
|
|
|
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
|
|
|
|
|
} else {
|
|
|
|
|
// 整数として解析
|
|
|
|
|
number_str.parse::<i64>()
|
|
|
|
|
.map(TokenType::NUMBER)
|
|
|
|
|
.map_err(|_| TokenizeError::InvalidNumber { line: start_line })
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// キーワードまたは識別子を読み取り
|
|
|
|
|
fn read_keyword_or_identifier(&mut self) -> TokenType {
|
|
|
|
|
let mut identifier = String::new();
|
|
|
|
|
|
|
|
|
|
while let Some(c) = self.current_char() {
|
|
|
|
|
if c.is_alphanumeric() || c == '_' {
|
|
|
|
|
identifier.push(c);
|
|
|
|
|
self.advance();
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// キーワードチェック
|
|
|
|
|
match identifier.as_str() {
|
|
|
|
|
"box" => TokenType::BOX,
|
|
|
|
|
"global" => TokenType::GLOBAL,
|
|
|
|
|
"singleton" => TokenType::SINGLETON,
|
|
|
|
|
"new" => TokenType::NEW,
|
|
|
|
|
"if" => TokenType::IF,
|
|
|
|
|
"else" => TokenType::ELSE,
|
|
|
|
|
"loop" => TokenType::LOOP,
|
|
|
|
|
"break" => TokenType::BREAK,
|
|
|
|
|
"return" => TokenType::RETURN,
|
|
|
|
|
"function" => TokenType::FUNCTION,
|
|
|
|
|
"print" => TokenType::PRINT,
|
|
|
|
|
"this" => TokenType::THIS,
|
|
|
|
|
"me" => TokenType::ME,
|
|
|
|
|
"init" => TokenType::INIT,
|
2025-08-11 09:40:24 +09:00
|
|
|
"pack" => TokenType::PACK,
|
2025-08-09 15:14:44 +09:00
|
|
|
"nowait" => TokenType::NOWAIT,
|
|
|
|
|
"await" => TokenType::AWAIT,
|
|
|
|
|
"interface" => TokenType::INTERFACE,
|
|
|
|
|
"include" => TokenType::INCLUDE,
|
|
|
|
|
"try" => TokenType::TRY,
|
|
|
|
|
"catch" => TokenType::CATCH,
|
|
|
|
|
"finally" => TokenType::FINALLY,
|
|
|
|
|
"throw" => TokenType::THROW,
|
|
|
|
|
"local" => TokenType::LOCAL,
|
|
|
|
|
"static" => TokenType::STATIC,
|
|
|
|
|
"outbox" => TokenType::OUTBOX,
|
|
|
|
|
"not" => TokenType::NOT,
|
2025-08-11 07:55:41 +09:00
|
|
|
"override" => TokenType::OVERRIDE,
|
|
|
|
|
"from" => TokenType::FROM,
|
2025-08-12 20:27:20 +00:00
|
|
|
"weak" => TokenType::WEAK,
|
2025-08-09 15:14:44 +09:00
|
|
|
"and" => TokenType::AND,
|
|
|
|
|
"or" => TokenType::OR,
|
|
|
|
|
"true" => TokenType::TRUE,
|
|
|
|
|
"false" => TokenType::FALSE,
|
|
|
|
|
_ => TokenType::IDENTIFIER(identifier),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 行コメントをスキップ
|
|
|
|
|
fn skip_line_comment(&mut self) {
|
|
|
|
|
while let Some(c) = self.current_char() {
|
|
|
|
|
if c == '\n' {
|
|
|
|
|
break; // 改行文字は消費せずに残す
|
|
|
|
|
}
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 空白文字をスキップ(改行は除く)
|
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
|
|
|
while let Some(c) = self.current_char() {
|
|
|
|
|
if c.is_whitespace() && c != '\n' {
|
|
|
|
|
self.advance();
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 現在の文字を取得
|
|
|
|
|
fn current_char(&self) -> Option<char> {
|
|
|
|
|
self.input.get(self.position).copied()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 次の文字を先読み
|
|
|
|
|
fn peek_char(&self) -> Option<char> {
|
|
|
|
|
self.input.get(self.position + 1).copied()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 位置を1つ進める
|
|
|
|
|
fn advance(&mut self) {
|
|
|
|
|
if let Some(c) = self.current_char() {
|
|
|
|
|
if c == '\n' {
|
|
|
|
|
self.line += 1;
|
|
|
|
|
self.column = 1;
|
|
|
|
|
} else {
|
|
|
|
|
self.column += 1;
|
|
|
|
|
}
|
|
|
|
|
self.position += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// 入力の終端に達したかチェック
|
|
|
|
|
fn is_at_end(&self) -> bool {
|
|
|
|
|
self.position >= self.input.len()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ===== Tests =====
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_simple_tokens() {
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new("box new = + - *");
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(tokens.len(), 7); // 6 tokens + EOF
|
|
|
|
|
assert_eq!(tokens[0].token_type, TokenType::BOX);
|
|
|
|
|
assert_eq!(tokens[1].token_type, TokenType::NEW);
|
|
|
|
|
assert_eq!(tokens[2].token_type, TokenType::ASSIGN);
|
|
|
|
|
assert_eq!(tokens[3].token_type, TokenType::PLUS);
|
|
|
|
|
assert_eq!(tokens[4].token_type, TokenType::MINUS);
|
|
|
|
|
assert_eq!(tokens[5].token_type, TokenType::MULTIPLY);
|
|
|
|
|
assert_eq!(tokens[6].token_type, TokenType::EOF);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_string_literal() {
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new(r#""Hello, World!""#);
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(tokens.len(), 2); // STRING + EOF
|
|
|
|
|
match &tokens[0].token_type {
|
|
|
|
|
TokenType::STRING(s) => assert_eq!(s, "Hello, World!"),
|
|
|
|
|
_ => panic!("Expected STRING token"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_number_literal() {
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new("42 123 0");
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(tokens.len(), 4); // 3 numbers + EOF
|
|
|
|
|
match &tokens[0].token_type {
|
|
|
|
|
TokenType::NUMBER(n) => assert_eq!(*n, 42),
|
|
|
|
|
_ => panic!("Expected NUMBER token"),
|
|
|
|
|
}
|
|
|
|
|
match &tokens[1].token_type {
|
|
|
|
|
TokenType::NUMBER(n) => assert_eq!(*n, 123),
|
|
|
|
|
_ => panic!("Expected NUMBER token"),
|
|
|
|
|
}
|
|
|
|
|
match &tokens[2].token_type {
|
|
|
|
|
TokenType::NUMBER(n) => assert_eq!(*n, 0),
|
|
|
|
|
_ => panic!("Expected NUMBER token"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_identifier() {
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new("test_var myBox getValue");
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(tokens.len(), 4); // 3 identifiers + EOF
|
|
|
|
|
match &tokens[0].token_type {
|
|
|
|
|
TokenType::IDENTIFIER(s) => assert_eq!(s, "test_var"),
|
|
|
|
|
_ => panic!("Expected IDENTIFIER token"),
|
|
|
|
|
}
|
|
|
|
|
match &tokens[1].token_type {
|
|
|
|
|
TokenType::IDENTIFIER(s) => assert_eq!(s, "myBox"),
|
|
|
|
|
_ => panic!("Expected IDENTIFIER token"),
|
|
|
|
|
}
|
|
|
|
|
match &tokens[2].token_type {
|
|
|
|
|
TokenType::IDENTIFIER(s) => assert_eq!(s, "getValue"),
|
|
|
|
|
_ => panic!("Expected IDENTIFIER token"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_operators() {
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new(">> == != <= >= < >");
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(tokens[0].token_type, TokenType::ARROW);
|
|
|
|
|
assert_eq!(tokens[1].token_type, TokenType::EQUALS);
|
|
|
|
|
assert_eq!(tokens[2].token_type, TokenType::NotEquals);
|
|
|
|
|
assert_eq!(tokens[3].token_type, TokenType::LessEquals);
|
|
|
|
|
assert_eq!(tokens[4].token_type, TokenType::GreaterEquals);
|
|
|
|
|
assert_eq!(tokens[5].token_type, TokenType::LESS);
|
|
|
|
|
assert_eq!(tokens[6].token_type, TokenType::GREATER);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_complex_code() {
|
|
|
|
|
let code = r#"
|
|
|
|
|
box TestBox {
|
|
|
|
|
value
|
|
|
|
|
|
|
|
|
|
getValue() {
|
|
|
|
|
return this.value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
obj = new TestBox()
|
|
|
|
|
obj.value = "test123"
|
|
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new(code);
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
// 基本的なトークンがある事を確認
|
|
|
|
|
let token_types: Vec<_> = tokens.iter().map(|t| &t.token_type).collect();
|
|
|
|
|
assert!(token_types.contains(&&TokenType::BOX));
|
|
|
|
|
assert!(token_types.contains(&&TokenType::NEW));
|
|
|
|
|
assert!(token_types.contains(&&TokenType::THIS));
|
|
|
|
|
assert!(token_types.contains(&&TokenType::RETURN));
|
|
|
|
|
assert!(token_types.contains(&&TokenType::DOT));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_line_numbers() {
|
|
|
|
|
let code = "box\ntest\nvalue";
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new(code);
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(tokens[0].line, 1); // box
|
|
|
|
|
assert_eq!(tokens[1].line, 2); // test
|
|
|
|
|
assert_eq!(tokens[2].line, 3); // value
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_comments() {
|
|
|
|
|
let code = r#"box Test // this is a comment
|
|
|
|
|
# this is also a comment
|
|
|
|
|
value"#;
|
|
|
|
|
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new(code);
|
|
|
|
|
let tokens = tokenizer.tokenize().unwrap();
|
|
|
|
|
|
|
|
|
|
// コメントは除外されている
|
|
|
|
|
let token_types: Vec<_> = tokens.iter().map(|t| &t.token_type).collect();
|
|
|
|
|
assert_eq!(token_types.len(), 4); // box, Test, value, EOF
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_error_handling() {
|
|
|
|
|
let mut tokenizer = NyashTokenizer::new("@#$%");
|
|
|
|
|
let result = tokenizer.tokenize();
|
|
|
|
|
|
|
|
|
|
assert!(result.is_err());
|
|
|
|
|
match result {
|
|
|
|
|
Err(TokenizeError::UnexpectedCharacter { char, line, column }) => {
|
|
|
|
|
assert_eq!(char, '@');
|
|
|
|
|
assert_eq!(line, 1);
|
|
|
|
|
assert_eq!(column, 1);
|
|
|
|
|
}
|
|
|
|
|
_ => panic!("Expected UnexpectedCharacter error"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|