Refactor lexer for readability and less method calls.

This commit is contained in:
Jesse Brault 2026-03-23 10:47:23 -05:00
parent 916b6377ac
commit 41e798d8a6

View File

@ -34,134 +34,127 @@ impl<'a> Lexer<'a> {
} }
} }
let token = if chunk.starts_with("->") { let mut chars = chunk.chars();
Token::new(self.position, self.position + 2, TokenKind::RightArrow) let current = chars.next().unwrap(); // safe because we return None if chunk is empty above
} else if chunk.starts_with("-") { let peek = chars.next();
Token::new(self.position, self.position + 1, TokenKind::Minus)
} else if chunk.starts_with("(") {
Token::new(self.position, self.position + 1, TokenKind::LeftParentheses)
} else if chunk.starts_with(")") {
Token::new(
self.position,
self.position + 1,
TokenKind::RightParentheses,
)
} else if chunk.starts_with("*") {
Token::new(self.position, self.position + 1, TokenKind::Star)
} else if chunk.starts_with("/") {
Token::new(self.position, self.position + 1, TokenKind::Slash)
} else if chunk.starts_with("%") {
Token::new(self.position, self.position + 1, TokenKind::Modulo)
} else if chunk.starts_with("+") {
Token::new(self.position, self.position + 1, TokenKind::Plus)
} else if chunk.starts_with("&") {
Token::new(self.position, self.position + 1, TokenKind::Ampersand)
} else if chunk.starts_with("^") {
Token::new(self.position, self.position + 1, TokenKind::Caret)
} else if chunk.starts_with("|") {
Token::new(self.position, self.position + 1, TokenKind::Bar)
} else if chunk.starts_with("=") {
Token::new(self.position, self.position + 1, TokenKind::Equals)
} else if chunk.starts_with(",") {
Token::new(self.position, self.position + 1, TokenKind::Comma)
} else if chunk.starts_with(":") {
Token::new(self.position, self.position + 1, TokenKind::Colon)
} else if chunk.starts_with(".") {
Token::new(self.position, self.position + 1, TokenKind::Dot)
} else if chunk.starts_with("[") {
Token::new(self.position, self.position + 1, TokenKind::LeftSquare)
} else if chunk.starts_with("]") {
Token::new(self.position, self.position + 1, TokenKind::RightSquare)
} else if chunk.starts_with("<") {
Token::new(self.position, self.position + 1, TokenKind::Lt)
} else if chunk.starts_with(">") {
Token::new(self.position, self.position + 1, TokenKind::Gt)
} else {
// more than one char token
if chunk.starts_with(|c: char| c.is_ascii_digit()) {
// number literal
let mut end = self.position;
let mut whole_chars = chunk.chars();
while let Some(c) = whole_chars.next() {
if c.is_ascii_digit() {
end += 1;
} else {
break;
}
}
let mut fraction_chars = chunk.chars().skip(end - self.position); let (end, kind) = match current {
if fraction_chars.next().map(|c| c == '.').unwrap_or(false) { '-' => {
let mut found_fraction = false; if let Some(peek) = peek {
while let Some(c) = fraction_chars.next() { if peek == '>' {
(self.position + 2, TokenKind::RightArrow)
} else {
(self.position + 1, TokenKind::Minus)
}
} else {
(self.position + 1, TokenKind::Minus)
}
}
'(' => (self.position + 1, TokenKind::LeftParentheses),
')' => (self.position + 1, TokenKind::RightParentheses),
'[' => (self.position + 1, TokenKind::LeftSquare),
']' => (self.position + 1, TokenKind::RightSquare),
'<' => (self.position + 1, TokenKind::Lt),
'>' => (self.position + 1, TokenKind::Gt),
'*' => (self.position + 1, TokenKind::Star),
'/' => (self.position + 1, TokenKind::Slash),
'%' => (self.position + 1, TokenKind::Modulo),
'+' => (self.position + 1, TokenKind::Plus),
'&' => (self.position + 1, TokenKind::Ampersand),
'^' => (self.position + 1, TokenKind::Caret),
'|' => (self.position + 1, TokenKind::Bar),
'=' => (self.position + 1, TokenKind::Equals),
',' => (self.position + 1, TokenKind::Comma),
'.' => (self.position + 1, TokenKind::Dot),
':' => (self.position + 1, TokenKind::Colon),
_ => {
// more than one char token
if chunk.starts_with(|c: char| c.is_ascii_digit()) {
// number literal
let mut end = self.position;
let mut whole_chars = chunk.chars();
while let Some(c) = whole_chars.next() {
if c.is_ascii_digit() { if c.is_ascii_digit() {
end += 1; end += 1;
if !found_fraction {
end += 1; // to account for decimal point
found_fraction = true;
}
} else { } else {
break; break;
} }
} }
if found_fraction {
Token::new(self.position, end, TokenKind::DoubleLiteral) let mut fraction_chars = chunk.chars().skip(end - self.position);
if fraction_chars.next().map(|c| c == '.').unwrap_or(false) {
let mut found_fraction = false;
while let Some(c) = fraction_chars.next() {
if c.is_ascii_digit() {
end += 1;
if !found_fraction {
end += 1; // to account for decimal point
found_fraction = true;
}
} else {
break;
}
}
if found_fraction {
(end, TokenKind::DoubleLiteral)
} else {
(end, TokenKind::IntegerLiteral)
}
} else { } else {
Token::new(self.position, end, TokenKind::IntegerLiteral) (end, TokenKind::IntegerLiteral)
} }
} else { } else if chunk.starts_with("\"") {
Token::new(self.position, end, TokenKind::IntegerLiteral) // string literal
} let mut end = self.position;
} else if chunk.starts_with("\"") { let mut terminated = false;
// string literal let mut chars = chunk.chars();
let mut end = self.position; chars.next(); // skip opening quote
let mut terminated = false;
let mut chars = chunk.chars();
chars.next(); // skip opening quote
end += 1;
for char in chars {
end += 1; end += 1;
if char == '"' { for char in chars {
terminated = true; end += 1;
break; if char == '"' {
terminated = true;
break;
}
} }
} if !terminated {
if !terminated { return Some(Err(LexerError::new(LexerErrorKind::UnterminatedString)));
return Some(Err(LexerError::new(LexerErrorKind::UnterminatedString))); }
} (end, TokenKind::String)
Token::new(self.position, end, TokenKind::String) } else {
} else { // keyword or identifier
// keyword or identifier let mut prefix = String::new();
let mut prefix = String::new(); for char in chunk.chars() {
for char in chunk.chars() { if char.is_alphanumeric() || char == '_' {
if char.is_alphanumeric() || char == '_' { prefix.push(char);
prefix.push(char); } else {
} else { break;
break; }
} }
}
if prefix.len() == 0 { if prefix.len() == 0 {
return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter( return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter(
chunk.chars().next().unwrap(), chunk.chars().next().unwrap(),
)))); ))));
} }
let token_kind = match prefix.as_str() { let token_kind = match prefix.as_str() {
"fn" => TokenKind::Fn, "fn" => TokenKind::Fn,
"end" => TokenKind::End, "end" => TokenKind::End,
"let" => TokenKind::Let, "let" => TokenKind::Let,
"extern" => TokenKind::Extern, "extern" => TokenKind::Extern,
"class" => TokenKind::Class, "class" => TokenKind::Class,
"self" => TokenKind::SelfKw, "self" => TokenKind::SelfKw,
"pub" => TokenKind::Public, "pub" => TokenKind::Public,
"mut" => TokenKind::Mut, "mut" => TokenKind::Mut,
"ctor" => TokenKind::Ctor, "ctor" => TokenKind::Ctor,
_ => TokenKind::Identifier, _ => TokenKind::Identifier,
}; };
Token::new(self.position, self.position + prefix.len(), token_kind) (self.position + prefix.len(), token_kind)
}
} }
}; };
let token = Token::new(self.position, end, kind);
self.position += token.end() - token.start(); self.position += token.end() - token.start();
Some(Ok(token)) Some(Ok(token))
} }