From 41e798d8a674ce22f06b46cc8c9feac763329ee8 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Mon, 23 Mar 2026 10:47:23 -0500 Subject: [PATCH] Refactor lexer for readability and less method calls. --- dmc-lib/src/lexer.rs | 217 +++++++++++++++++++++---------------------- 1 file changed, 105 insertions(+), 112 deletions(-) diff --git a/dmc-lib/src/lexer.rs b/dmc-lib/src/lexer.rs index cd1c8c6..4ad720e 100644 --- a/dmc-lib/src/lexer.rs +++ b/dmc-lib/src/lexer.rs @@ -34,134 +34,127 @@ impl<'a> Lexer<'a> { } } - let token = if chunk.starts_with("->") { - Token::new(self.position, self.position + 2, TokenKind::RightArrow) - } else if chunk.starts_with("-") { - Token::new(self.position, self.position + 1, TokenKind::Minus) - } else if chunk.starts_with("(") { - Token::new(self.position, self.position + 1, TokenKind::LeftParentheses) - } else if chunk.starts_with(")") { - Token::new( - self.position, - self.position + 1, - TokenKind::RightParentheses, - ) - } else if chunk.starts_with("*") { - Token::new(self.position, self.position + 1, TokenKind::Star) - } else if chunk.starts_with("/") { - Token::new(self.position, self.position + 1, TokenKind::Slash) - } else if chunk.starts_with("%") { - Token::new(self.position, self.position + 1, TokenKind::Modulo) - } else if chunk.starts_with("+") { - Token::new(self.position, self.position + 1, TokenKind::Plus) - } else if chunk.starts_with("&") { - Token::new(self.position, self.position + 1, TokenKind::Ampersand) - } else if chunk.starts_with("^") { - Token::new(self.position, self.position + 1, TokenKind::Caret) - } else if chunk.starts_with("|") { - Token::new(self.position, self.position + 1, TokenKind::Bar) - } else if chunk.starts_with("=") { - Token::new(self.position, self.position + 1, TokenKind::Equals) - } else if chunk.starts_with(",") { - Token::new(self.position, self.position + 1, TokenKind::Comma) - } else if chunk.starts_with(":") { - Token::new(self.position, self.position + 1, TokenKind::Colon) - } else if chunk.starts_with(".") { - Token::new(self.position, self.position + 1, TokenKind::Dot) - } else if chunk.starts_with("[") { - Token::new(self.position, self.position + 1, TokenKind::LeftSquare) - } else if chunk.starts_with("]") { - Token::new(self.position, self.position + 1, TokenKind::RightSquare) - } else if chunk.starts_with("<") { - Token::new(self.position, self.position + 1, TokenKind::Lt) - } else if chunk.starts_with(">") { - Token::new(self.position, self.position + 1, TokenKind::Gt) - } else { - // more than one char token - if chunk.starts_with(|c: char| c.is_ascii_digit()) { - // number literal - let mut end = self.position; - let mut whole_chars = chunk.chars(); - while let Some(c) = whole_chars.next() { - if c.is_ascii_digit() { - end += 1; - } else { - break; - } - } + let mut chars = chunk.chars(); + let current = chars.next().unwrap(); // safe because we return None if chunk is empty above + let peek = chars.next(); - let mut fraction_chars = chunk.chars().skip(end - self.position); - if fraction_chars.next().map(|c| c == '.').unwrap_or(false) { - let mut found_fraction = false; - while let Some(c) = fraction_chars.next() { + let (end, kind) = match current { + '-' => { + if let Some(peek) = peek { + if peek == '>' { + (self.position + 2, TokenKind::RightArrow) + } else { + (self.position + 1, TokenKind::Minus) + } + } else { + (self.position + 1, TokenKind::Minus) + } + } + '(' => (self.position + 1, TokenKind::LeftParentheses), + ')' => (self.position + 1, TokenKind::RightParentheses), + '[' => (self.position + 1, TokenKind::LeftSquare), + ']' => (self.position + 1, TokenKind::RightSquare), + '<' => (self.position + 1, TokenKind::Lt), + '>' => (self.position + 1, TokenKind::Gt), + '*' => (self.position + 1, TokenKind::Star), + '/' => (self.position + 1, TokenKind::Slash), + '%' => (self.position + 1, TokenKind::Modulo), + '+' => (self.position + 1, TokenKind::Plus), + '&' => (self.position + 1, TokenKind::Ampersand), + '^' => (self.position + 1, TokenKind::Caret), + '|' => (self.position + 1, TokenKind::Bar), + '=' => (self.position + 1, TokenKind::Equals), + ',' => (self.position + 1, TokenKind::Comma), + '.' => (self.position + 1, TokenKind::Dot), + ':' => (self.position + 1, TokenKind::Colon), + _ => { + // more than one char token + if chunk.starts_with(|c: char| c.is_ascii_digit()) { + // number literal + let mut end = self.position; + let mut whole_chars = chunk.chars(); + while let Some(c) = whole_chars.next() { if c.is_ascii_digit() { end += 1; - if !found_fraction { - end += 1; // to account for decimal point - found_fraction = true; - } } else { break; } } - if found_fraction { - Token::new(self.position, end, TokenKind::DoubleLiteral) + + let mut fraction_chars = chunk.chars().skip(end - self.position); + if fraction_chars.next().map(|c| c == '.').unwrap_or(false) { + let mut found_fraction = false; + while let Some(c) = fraction_chars.next() { + if c.is_ascii_digit() { + end += 1; + if !found_fraction { + end += 1; // to account for decimal point + found_fraction = true; + } + } else { + break; + } + } + if found_fraction { + (end, TokenKind::DoubleLiteral) + } else { + (end, TokenKind::IntegerLiteral) + } } else { - Token::new(self.position, end, TokenKind::IntegerLiteral) + (end, TokenKind::IntegerLiteral) } - } else { - Token::new(self.position, end, TokenKind::IntegerLiteral) - } - } else if chunk.starts_with("\"") { - // string literal - let mut end = self.position; - let mut terminated = false; - let mut chars = chunk.chars(); - chars.next(); // skip opening quote - end += 1; - for char in chars { + } else if chunk.starts_with("\"") { + // string literal + let mut end = self.position; + let mut terminated = false; + let mut chars = chunk.chars(); + chars.next(); // skip opening quote end += 1; - if char == '"' { - terminated = true; - break; + for char in chars { + end += 1; + if char == '"' { + terminated = true; + break; + } } - } - if !terminated { - return Some(Err(LexerError::new(LexerErrorKind::UnterminatedString))); - } - Token::new(self.position, end, TokenKind::String) - } else { - // keyword or identifier - let mut prefix = String::new(); - for char in chunk.chars() { - if char.is_alphanumeric() || char == '_' { - prefix.push(char); - } else { - break; + if !terminated { + return Some(Err(LexerError::new(LexerErrorKind::UnterminatedString))); + } + (end, TokenKind::String) + } else { + // keyword or identifier + let mut prefix = String::new(); + for char in chunk.chars() { + if char.is_alphanumeric() || char == '_' { + prefix.push(char); + } else { + break; + } } - } - if prefix.len() == 0 { - return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter( - chunk.chars().next().unwrap(), - )))); - } + if prefix.len() == 0 { + return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter( + chunk.chars().next().unwrap(), + )))); + } - let token_kind = match prefix.as_str() { - "fn" => TokenKind::Fn, - "end" => TokenKind::End, - "let" => TokenKind::Let, - "extern" => TokenKind::Extern, - "class" => TokenKind::Class, - "self" => TokenKind::SelfKw, - "pub" => TokenKind::Public, - "mut" => TokenKind::Mut, - "ctor" => TokenKind::Ctor, - _ => TokenKind::Identifier, - }; - Token::new(self.position, self.position + prefix.len(), token_kind) + let token_kind = match prefix.as_str() { + "fn" => TokenKind::Fn, + "end" => TokenKind::End, + "let" => TokenKind::Let, + "extern" => TokenKind::Extern, + "class" => TokenKind::Class, + "self" => TokenKind::SelfKw, + "pub" => TokenKind::Public, + "mut" => TokenKind::Mut, + "ctor" => TokenKind::Ctor, + _ => TokenKind::Identifier, + }; + (self.position + prefix.len(), token_kind) + } } }; + let token = Token::new(self.position, end, kind); self.position += token.end() - token.start(); Some(Ok(token)) }