Adding more keywords and symbols to lexer.
This commit is contained in:
parent
b3177a612f
commit
88119e3001
168
src/lexer/mod.rs
168
src/lexer/mod.rs
@ -1,3 +1,6 @@
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Token {
|
||||
Namespace,
|
||||
@ -7,39 +10,127 @@ pub enum Token {
|
||||
CurlyOpen,
|
||||
CurlyClose,
|
||||
Interface,
|
||||
Colon,
|
||||
Function,
|
||||
ParenOpen,
|
||||
ParenClose,
|
||||
Enum,
|
||||
LessThan,
|
||||
GreaterThan,
|
||||
Intersection,
|
||||
Union,
|
||||
And,
|
||||
Or,
|
||||
Equals,
|
||||
BigArrow,
|
||||
LittleArrow,
|
||||
Plus,
|
||||
Minus,
|
||||
Dot,
|
||||
Ellipsis,
|
||||
Abstract
|
||||
}
|
||||
|
||||
pub fn tokenize(input: &String) -> Vec<Token> {
|
||||
let mut tokens: Vec<Token> = Vec::new();
|
||||
let mut buffer = String::new();
|
||||
let mut peekable = input.chars().peekable();
|
||||
while let Some(c) = peekable.next() {
|
||||
// if not whitespace, append to buffer
|
||||
// else, we should match a token
|
||||
if !c.is_whitespace() {
|
||||
buffer.push(c);
|
||||
} else {
|
||||
tokens.push(match_buffer(&mut buffer));
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
// check if eof
|
||||
if peekable.peek().is_none() {
|
||||
tokens.push(match_buffer(&mut buffer));
|
||||
match c {
|
||||
' ' | '\n' | '\r' | '\t' => { /* ignore */ }
|
||||
'{' => tokens.push(Token::CurlyOpen),
|
||||
'}' => tokens.push(Token::CurlyClose),
|
||||
':' => tokens.push(Token::Colon),
|
||||
'(' => tokens.push(Token::ParenOpen),
|
||||
')' => tokens.push(Token::ParenClose),
|
||||
'<' => tokens.push(Token::LessThan),
|
||||
'>' => tokens.push(Token::GreaterThan),
|
||||
'&' => {
|
||||
match peekable.peek() {
|
||||
Some('&') => {
|
||||
let _ = peekable.next();
|
||||
tokens.push(Token::And);
|
||||
},
|
||||
Some(_) | None => tokens.push(Token::Intersection),
|
||||
}
|
||||
}
|
||||
'|' => {
|
||||
match peekable.next_if_eq(&'|') {
|
||||
Some(_) => tokens.push(Token::Or),
|
||||
None => tokens.push(Token::Union),
|
||||
}
|
||||
},
|
||||
'=' => {
|
||||
match peekable.next_if_eq(&'>') {
|
||||
Some(_) => tokens.push(Token::BigArrow),
|
||||
None => tokens.push(Token::Equals),
|
||||
}
|
||||
},
|
||||
'+' => tokens.push(Token::Plus),
|
||||
'-' => {
|
||||
match peekable.next_if_eq(&'>') {
|
||||
Some(_) => tokens.push(Token::LittleArrow),
|
||||
None => tokens.push(Token::Minus),
|
||||
}
|
||||
},
|
||||
'.' => {
|
||||
let mut count = 1;
|
||||
while let Some(_) = peekable.next_if_eq(&'.') {
|
||||
count += 1;
|
||||
}
|
||||
match count {
|
||||
1 => tokens.push(Token::Dot),
|
||||
3 => tokens.push(Token::Ellipsis),
|
||||
_ => panic!("Too many dots.")
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if let Some(token) = match_identifier_or_keyword(c, &mut peekable) {
|
||||
tokens.push(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
fn match_buffer(buffer: &mut String) -> Token {
|
||||
fn match_identifier_or_keyword(start_char: char, peekable: &mut Peekable<Chars>) -> Option<Token> {
|
||||
if !is_valid_identifier_start_char(start_char) {
|
||||
return None
|
||||
}
|
||||
|
||||
// append start char
|
||||
let mut buffer = String::new();
|
||||
buffer.push(start_char);
|
||||
|
||||
// munch while we have valid identifier chars
|
||||
while let Some(c) = peekable.next_if(|next_char| is_valid_identifier_char(*next_char)) {
|
||||
buffer.push(c);
|
||||
}
|
||||
|
||||
// match to a keyword if possible, else identifier
|
||||
match buffer.as_str() {
|
||||
"int" => Token::Interface,
|
||||
"mod" => Token::Module,
|
||||
"ns" => Token::Namespace,
|
||||
"pub" => Token::Public,
|
||||
"{" => Token::CurlyOpen,
|
||||
"}" => Token::CurlyClose,
|
||||
identifier => Token::Identifier(identifier.to_string()),
|
||||
"abs" => Some(Token::Abstract),
|
||||
"enum" => Some(Token::Enum),
|
||||
"fn" => Some(Token::Function),
|
||||
"int" => Some(Token::Interface),
|
||||
"mod" => Some(Token::Module),
|
||||
"ns" => Some(Token::Namespace),
|
||||
"pub" => Some(Token::Public),
|
||||
_ => Some(Token::Identifier(buffer)),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_valid_identifier_start_char(c: char) -> bool {
|
||||
match c {
|
||||
'a'..='z' | 'A'..='Z' | '_' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_valid_identifier_char(c: char) -> bool {
|
||||
match c {
|
||||
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -100,4 +191,39 @@ mod tests {
|
||||
assert_eq!(Token::CurlyOpen, result[5]);
|
||||
assert_eq!(Token::CurlyClose, result[6]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn curly_open_and_close_no_space() {
|
||||
let result = tokenize(&String::from("{}"));
|
||||
assert_eq!(Token::CurlyOpen, result[0]);
|
||||
assert_eq!(Token::CurlyClose, result[1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn interface_function() {
|
||||
let result = tokenize(&String::from("fn test(): Test"));
|
||||
assert_eq!(Token::Function, result[0]);
|
||||
assert_eq!(Token::Identifier(String::from("test")), result[1]);
|
||||
assert_eq!(Token::ParenOpen, result[2]);
|
||||
assert_eq!(Token::ParenClose, result[3]);
|
||||
assert_eq!(Token::Colon, result[4]);
|
||||
assert_eq!(Token::Identifier(String::from("Test")), result[5]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn interface_prop() {
|
||||
let result = tokenize(&String::from("test: Test"));
|
||||
assert_eq!(Token::Identifier(String::from("test")), result[0]);
|
||||
assert_eq!(Token::Colon, result[1]);
|
||||
assert_eq!(Token::Identifier(String::from("Test")), result[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_decl() {
|
||||
let result = tokenize(&String::from("enum Test {}"));
|
||||
assert_eq!(Token::Enum, result[0]);
|
||||
assert_eq!(Token::Identifier(String::from("Test")), result[1]);
|
||||
assert_eq!(Token::CurlyOpen, result[2]);
|
||||
assert_eq!(Token::CurlyClose, result[3]);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user