232 lines
7.2 KiB
Rust
232 lines
7.2 KiB
Rust
use std::iter::Peekable;
|
|
use std::str::Chars;
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum Token {
|
|
Namespace,
|
|
Identifier(String),
|
|
Public,
|
|
Module,
|
|
CurlyOpen,
|
|
CurlyClose,
|
|
Interface,
|
|
Colon,
|
|
Function,
|
|
ParenOpen,
|
|
ParenClose,
|
|
Enum,
|
|
LessThan,
|
|
GreaterThan,
|
|
Intersection,
|
|
Union,
|
|
And,
|
|
Or,
|
|
Equals,
|
|
BigArrow,
|
|
LittleArrow,
|
|
Plus,
|
|
Minus,
|
|
Dot,
|
|
Ellipsis,
|
|
Abstract,
|
|
}
|
|
|
|
pub fn tokenize(input: &String) -> Result<Vec<Token>, &'static str> {
|
|
let mut tokens: Vec<Token> = Vec::new();
|
|
let mut peekable = input.chars().peekable();
|
|
while let Some(c) = peekable.next() {
|
|
match c {
|
|
' ' | '\n' | '\r' | '\t' => { /* ignore */ }
|
|
'{' => tokens.push(Token::CurlyOpen),
|
|
'}' => tokens.push(Token::CurlyClose),
|
|
':' => tokens.push(Token::Colon),
|
|
'(' => tokens.push(Token::ParenOpen),
|
|
')' => tokens.push(Token::ParenClose),
|
|
'<' => tokens.push(Token::LessThan),
|
|
'>' => tokens.push(Token::GreaterThan),
|
|
'&' => match peekable.peek() {
|
|
Some('&') => {
|
|
let _ = peekable.next();
|
|
tokens.push(Token::And);
|
|
}
|
|
Some(_) | None => tokens.push(Token::Intersection),
|
|
},
|
|
'|' => match peekable.next_if_eq(&'|') {
|
|
Some(_) => tokens.push(Token::Or),
|
|
None => tokens.push(Token::Union),
|
|
},
|
|
'=' => match peekable.next_if_eq(&'>') {
|
|
Some(_) => tokens.push(Token::BigArrow),
|
|
None => tokens.push(Token::Equals),
|
|
},
|
|
'+' => tokens.push(Token::Plus),
|
|
'-' => match peekable.next_if_eq(&'>') {
|
|
Some(_) => tokens.push(Token::LittleArrow),
|
|
None => tokens.push(Token::Minus),
|
|
},
|
|
'.' => {
|
|
let mut count = 1;
|
|
while let Some(_) = peekable.next_if_eq(&'.') {
|
|
count += 1;
|
|
}
|
|
match count {
|
|
1 => tokens.push(Token::Dot),
|
|
3 => tokens.push(Token::Ellipsis),
|
|
_ => return Err("Unexpected number of tokens after '.'"),
|
|
}
|
|
}
|
|
_ => {
|
|
if let Some(token) = match_identifier_or_keyword(c, &mut peekable) {
|
|
tokens.push(token);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(tokens)
|
|
}
|
|
|
|
fn match_identifier_or_keyword(start_char: char, peekable: &mut Peekable<Chars>) -> Option<Token> {
|
|
if !is_valid_identifier_start_char(start_char) {
|
|
return None;
|
|
}
|
|
|
|
// append start char
|
|
let mut buffer = String::new();
|
|
buffer.push(start_char);
|
|
|
|
// munch while we have valid identifier chars
|
|
while let Some(c) = peekable.next_if(|next_char| is_valid_identifier_char(*next_char)) {
|
|
buffer.push(c);
|
|
}
|
|
|
|
// match to a keyword if possible, else identifier
|
|
match buffer.as_str() {
|
|
"abs" => Some(Token::Abstract),
|
|
"enum" => Some(Token::Enum),
|
|
"fn" => Some(Token::Function),
|
|
"int" => Some(Token::Interface),
|
|
"mod" => Some(Token::Module),
|
|
"ns" => Some(Token::Namespace),
|
|
"pub" => Some(Token::Public),
|
|
_ => Some(Token::Identifier(buffer)),
|
|
}
|
|
}
|
|
|
|
fn is_valid_identifier_start_char(c: char) -> bool {
|
|
match c {
|
|
'a'..='z' | 'A'..='Z' | '_' => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
fn is_valid_identifier_char(c: char) -> bool {
|
|
match c {
|
|
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::fs::File;
|
|
use std::io::Read;
|
|
use std::path::Path;
|
|
|
|
#[test]
|
|
fn simple_ns() {
|
|
let result = tokenize(&String::from("ns simple")).unwrap();
|
|
assert_eq!(Token::Namespace, result[0]);
|
|
assert_eq!(Token::Identifier(String::from("simple")), result[1]);
|
|
}
|
|
|
|
#[test]
|
|
#[cfg_attr(miri, ignore)]
|
|
fn simple_ns_file() {
|
|
let mut src_file = File::open(Path::new("test-data/lexer/simple_ns.dm")).unwrap();
|
|
let mut src = String::new();
|
|
let _ = src_file.read_to_string(&mut src);
|
|
let result = tokenize(&src).unwrap();
|
|
assert_eq!(Token::Namespace, result[0]);
|
|
assert_eq!(Token::Identifier(String::from("simple")), result[1]);
|
|
}
|
|
|
|
#[test]
|
|
fn pub_mod_simple() {
|
|
let result = tokenize(&String::from("pub mod simple")).unwrap();
|
|
assert_eq!(Token::Public, result[0]);
|
|
assert_eq!(Token::Module, result[1]);
|
|
assert_eq!(Token::Identifier(String::from("simple")), result[2]);
|
|
}
|
|
|
|
#[test]
|
|
fn curly_open_and_close() {
|
|
let result = tokenize(&String::from("{ }")).unwrap();
|
|
assert_eq!(Token::CurlyOpen, result[0]);
|
|
assert_eq!(Token::CurlyClose, result[1]);
|
|
}
|
|
|
|
#[test]
|
|
fn simple_int() {
|
|
let result = tokenize(&String::from("int simple")).unwrap();
|
|
assert_eq!(Token::Interface, result[0]);
|
|
assert_eq!(Token::Identifier(String::from("simple")), result[1]);
|
|
}
|
|
|
|
#[test]
|
|
fn ns_pub_mod_simple() {
|
|
let result = tokenize(&String::from("ns simple_ns\npub mod simple { }")).unwrap();
|
|
assert_eq!(Token::Namespace, result[0]);
|
|
assert_eq!(Token::Identifier(String::from("simple_ns")), result[1]);
|
|
assert_eq!(Token::Public, result[2]);
|
|
assert_eq!(Token::Module, result[3]);
|
|
assert_eq!(Token::Identifier(String::from("simple")), result[4]);
|
|
assert_eq!(Token::CurlyOpen, result[5]);
|
|
assert_eq!(Token::CurlyClose, result[6]);
|
|
}
|
|
|
|
#[test]
|
|
fn curly_open_and_close_no_space() {
|
|
let result = tokenize(&String::from("{}")).unwrap();
|
|
assert_eq!(Token::CurlyOpen, result[0]);
|
|
assert_eq!(Token::CurlyClose, result[1]);
|
|
}
|
|
|
|
#[test]
|
|
fn interface_function() {
|
|
let result = tokenize(&String::from("fn test(): Test")).unwrap();
|
|
assert_eq!(Token::Function, result[0]);
|
|
assert_eq!(Token::Identifier(String::from("test")), result[1]);
|
|
assert_eq!(Token::ParenOpen, result[2]);
|
|
assert_eq!(Token::ParenClose, result[3]);
|
|
assert_eq!(Token::Colon, result[4]);
|
|
assert_eq!(Token::Identifier(String::from("Test")), result[5]);
|
|
}
|
|
|
|
#[test]
|
|
fn interface_prop() {
|
|
let result = tokenize(&String::from("test: Test")).unwrap();
|
|
assert_eq!(Token::Identifier(String::from("test")), result[0]);
|
|
assert_eq!(Token::Colon, result[1]);
|
|
assert_eq!(Token::Identifier(String::from("Test")), result[2]);
|
|
}
|
|
|
|
#[test]
|
|
fn enum_decl() {
|
|
let result = tokenize(&String::from("enum Test {}")).unwrap();
|
|
assert_eq!(Token::Enum, result[0]);
|
|
assert_eq!(Token::Identifier(String::from("Test")), result[1]);
|
|
assert_eq!(Token::CurlyOpen, result[2]);
|
|
assert_eq!(Token::CurlyClose, result[3]);
|
|
}
|
|
|
|
#[test]
|
|
fn spread_operator() {
|
|
let result = tokenize(&String::from("{ ...props }")).unwrap();
|
|
assert_eq!(Token::CurlyOpen, result[0]);
|
|
assert_eq!(Token::Ellipsis, result[1]);
|
|
assert_eq!(Token::Identifier(String::from("props")), result[2]);
|
|
assert_eq!(Token::CurlyClose, result[3]);
|
|
}
|
|
}
|