Some parser refactoring.

This commit is contained in:
Jesse Brault 2026-03-23 15:47:37 -05:00
parent 53ec6a8f4f
commit 655a0288d3
4 changed files with 198 additions and 185 deletions

View File

@ -1,3 +1,5 @@
pub type Diagnostics = Vec<Diagnostic>;
#[derive(Debug)]
pub struct Diagnostic {
message: String,

View File

@ -1,5 +1,7 @@
pub type ErrorCode = usize;
pub const LEXER_ERROR: ErrorCode = 1;
pub const PARSE_ERROR: ErrorCode = 2;
pub const SYMBOL_NOT_FOUND: ErrorCode = 13;
pub const SYMBOL_ALREADY_DECLARED: ErrorCode = 14;
pub const BINARY_INCOMPATIBLE_TYPES: ErrorCode = 15;

View File

@ -118,7 +118,11 @@ impl<'a> Lexer<'a> {
}
}
if !terminated {
return Some(Err(LexerError::new(LexerErrorKind::UnterminatedString)));
return Some(Err(LexerError::new(
self.position,
end,
LexerErrorKind::UnterminatedString,
)));
}
(end, TokenKind::String)
} else {
@ -133,9 +137,11 @@ impl<'a> Lexer<'a> {
}
if prefix.len() == 0 {
return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter(
chunk.chars().next().unwrap(),
))));
return Some(Err(LexerError::new(
self.position,
self.position + 1,
LexerErrorKind::UnrecognizedCharacter(chunk.chars().next().unwrap()),
)));
}
let token_kind = match prefix.as_str() {
@ -162,12 +168,22 @@ impl<'a> Lexer<'a> {
#[derive(Debug, Eq, PartialEq)]
pub struct LexerError {
start: usize,
end: usize,
kind: LexerErrorKind,
}
impl LexerError {
pub fn new(kind: LexerErrorKind) -> Self {
Self { kind }
pub fn new(start: usize, end: usize, kind: LexerErrorKind) -> Self {
Self { start, end, kind }
}
pub fn start(&self) -> usize {
self.start
}
pub fn end(&self) -> usize {
self.end
}
pub fn kind(&self) -> LexerErrorKind {

View File

@ -19,20 +19,25 @@ use crate::ast::parameter::Parameter;
use crate::ast::statement::Statement;
use crate::ast::string_literal::StringLiteral;
use crate::ast::type_use::TypeUse;
use crate::diagnostic::Diagnostic;
use crate::lexer::Lexer;
use crate::diagnostic::{Diagnostic, Diagnostics};
use crate::error_codes::{LEXER_ERROR, PARSE_ERROR};
use crate::lexer::{Lexer, LexerErrorKind};
use crate::source_range::SourceRange;
use crate::token::{Token, TokenKind};
use crate::{handle_diagnostics, ok_or_err_diagnostics};
use std::str::FromStr;
pub fn parse_compilation_unit(input: &str) -> Result<CompilationUnit, Vec<Diagnostic>> {
pub type ParseResult<T> = Result<T, Diagnostics>;
pub fn parse_compilation_unit(input: &str) -> ParseResult<CompilationUnit> {
let mut parser = Parser::new(input);
parser.advance()?; // get started
parser.compilation_unit()
}
pub fn parse_expression(input: &str) -> Result<Expression, Vec<Diagnostic>> {
pub fn parse_expression(input: &str) -> ParseResult<Expression> {
let mut parser = Parser::new(input);
parser.advance(); // get started
parser.advance()?; // get started
parser.expression()
}
@ -96,96 +101,116 @@ impl<'a> Parser<'a> {
}
}
fn advance(&mut self) {
fn advance(&mut self) -> Result<(), Diagnostics> {
fn fetch(lexer: &mut Lexer) -> Result<Option<Token>, Diagnostics> {
let mut diagnostics = vec![];
let mut maybe_token: Option<Token> = None;
while let Some(lexer_result) = lexer.next() {
match lexer_result {
Ok(token) => {
maybe_token = Some(token);
break;
}
Err(lexer_error) => {
let diagnostic = match lexer_error.kind() {
LexerErrorKind::UnterminatedString => Diagnostic::new(
"Unterminated string literal.",
lexer_error.start(),
lexer_error.end(),
)
.with_error_code(LEXER_ERROR),
LexerErrorKind::UnrecognizedCharacter(c) => Diagnostic::new(
&format!("Unrecognized character: {}", c),
lexer_error.start(),
lexer_error.end(),
)
.with_error_code(LEXER_ERROR),
};
diagnostics.push(diagnostic);
}
}
}
ok_or_err_diagnostics!(maybe_token, diagnostics)
}
if self.lookahead.is_some() {
// we've advanced at least once
self.current = self.lookahead.take();
self.lookahead = match self.lexer.next() {
None => None,
Some(result) => match result {
Ok(token) => Some(token),
Err(lexer_error) => {
panic!("{:?}", lexer_error);
}
},
}
self.lookahead = fetch(&mut self.lexer)?;
Ok(())
} else if self.lookahead.is_none() && self.current.is_some() {
// we're on the last token
self.current = None;
Ok(())
} else {
// we've not yet advanced, so fetch both
// current
match self.lexer.next() {
None => {}
Some(result) => match result {
Ok(token) => {
self.current = Some(token);
}
Err(lexer_error) => {
panic!("{:?}", lexer_error);
}
},
}
// lookahead
match self.lexer.next() {
None => {}
Some(result) => match result {
Ok(token) => {
self.lookahead = Some(token);
}
Err(lexer_error) => {
panic!("{:?}", lexer_error);
}
},
}
self.current = fetch(&mut self.lexer)?;
self.lookahead = fetch(&mut self.lexer)?;
Ok(())
}
}
fn expect_advance(&mut self, token_kind: TokenKind) -> Result<Token, Vec<Diagnostic>> {
fn join_kinds(kinds: &[TokenKind]) -> String {
kinds
.iter()
.map(|kind| format!("{:?}", kind))
.collect::<Vec<_>>()
.join(", ")
}
fn get_expected_but_found(kinds: &[TokenKind], found: &Token) -> Diagnostic {
Diagnostic::new(
&format!("Unexpected token: {:?}.", found.kind()),
found.start(),
found.end(),
)
.with_error_code(PARSE_ERROR)
.with_primary_label_message(&format!("Expected {}.", Self::join_kinds(kinds)))
}
fn get_expected_but_found_eoi(kinds: &[TokenKind], position: usize) -> Diagnostic {
Diagnostic::new("Unexpected end-of-input.", position, position)
.with_error_code(PARSE_ERROR)
.with_primary_label_message(&format!("Expected {}.", Self::join_kinds(kinds)))
}
fn expect_advance(&mut self, token_kind: TokenKind) -> Result<Token, Diagnostics> {
match self.current.take() {
None => Err(vec![
Diagnostic::new(
&format!("Expected {:?} but found end-of-input.", token_kind),
self.input.len(),
self.input.len(),
)
.with_reporter(file!(), line!()),
]),
None => Err(vec![Self::get_expected_but_found_eoi(
&[token_kind],
self.input.len(),
)]),
Some(token) => {
if token.kind() == token_kind {
self.advance();
self.advance()?;
Ok(token)
} else {
self.advance_until(&[token_kind]);
Err(vec![
Diagnostic::new(
&format!("Expected {:?} but found {:?}", token_kind, token.kind()),
token.start(),
token.end(),
)
.with_reporter(file!(), line!()),
])
Err(vec![Self::get_expected_but_found(&[token_kind], &token)])
}
}
}
}
fn expect_position_advance(
fn expect_immediately_after_advance(
&mut self,
token_kind: TokenKind,
start_position: usize,
) -> Result<Token, Vec<Diagnostic>> {
previous_token: &Token,
) -> Result<Token, Diagnostics> {
let matched = self.expect_advance(token_kind)?;
if matched.start() == start_position {
if matched.start() == previous_token.end() {
Ok(matched)
} else {
Err(vec![
Diagnostic::new(
&format!("Expected {:?} but found {:?}", token_kind, matched.kind()),
&format!(
"Expected {:?} immediately after previous token.",
token_kind
),
matched.start(),
matched.end(),
)
.with_reporter(file!(), line!()),
.with_error_code(PARSE_ERROR),
])
}
}
@ -206,13 +231,6 @@ impl<'a> Parser<'a> {
}
}
fn peek_lookahead(&self, token_kind: TokenKind) -> bool {
match &self.lookahead {
None => panic!("Unexpected end of input."),
Some(token) => token.kind() == token_kind,
}
}
fn sample_input(&self, start: usize, end: usize) -> &'a str {
&self.input[start..end]
}
@ -221,49 +239,40 @@ impl<'a> Parser<'a> {
self.sample_input(token.start(), token.end())
}
pub fn compilation_unit(&mut self) -> Result<CompilationUnit, Vec<Diagnostic>> {
fn compilation_unit(&mut self) -> Result<CompilationUnit, Vec<Diagnostic>> {
let mut functions: Vec<Function> = vec![];
let mut extern_functions: Vec<ExternFunction> = vec![];
let mut classes: Vec<Class> = vec![];
let mut diagnostics = vec![];
self.advance(); // get started
while self.current.is_some() {
let current = self.get_current();
match current.kind() {
TokenKind::Fn | TokenKind::Extern | TokenKind::Class => {
match self.module_level_declaration(
&mut functions,
&mut extern_functions,
&mut classes,
) {
Ok(_) => {}
Err(mut declaration_diagnostics) => {
diagnostics.append(&mut declaration_diagnostics)
}
}
handle_diagnostics!(
self.module_level_declaration(
&mut functions,
&mut extern_functions,
&mut classes
),
diagnostics
);
}
_ => {
diagnostics.push(Diagnostic::new(
&format!(
"Expected any of {:?}; found {:?}",
[TokenKind::Fn, TokenKind::Extern, TokenKind::Class],
current.kind()
),
current.start(),
current.end(),
diagnostics.push(Self::get_expected_but_found(
&[TokenKind::Fn, TokenKind::Extern, TokenKind::Class],
current,
));
self.advance_until(&[TokenKind::Fn, TokenKind::Extern]);
self.advance_until(&[TokenKind::Fn, TokenKind::Extern, TokenKind::Class]);
}
}
}
if diagnostics.is_empty() {
Ok(CompilationUnit::new(functions, extern_functions, classes))
} else {
Err(diagnostics)
}
ok_or_err_diagnostics!(
CompilationUnit::new(functions, extern_functions, classes),
diagnostics
)
}
fn module_level_declaration(
@ -307,7 +316,7 @@ impl<'a> Parser<'a> {
fn function(&mut self) -> Result<Function, Vec<Diagnostic>> {
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
self.advance(); // pub
self.advance()?; // pub
true
} else {
false
@ -356,18 +365,17 @@ impl<'a> Parser<'a> {
_ => {}
}
if diagnostics.is_empty() {
Ok(Function::new(
ok_or_err_diagnostics!(
Function::new(
self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()),
is_public,
parameters,
return_type,
statements,
))
} else {
Err(diagnostics)
}
),
diagnostics
)
}
fn extern_function(&mut self) -> Result<ExternFunction, Vec<Diagnostic>> {
@ -399,16 +407,15 @@ impl<'a> Parser<'a> {
let return_type = self.return_type()?;
if diagnostics.is_empty() {
Ok(ExternFunction::new(
ok_or_err_diagnostics!(
ExternFunction::new(
self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()),
maybe_parameters.unwrap(),
return_type,
))
} else {
Err(diagnostics)
}
),
diagnostics
)
}
fn class(&mut self) -> Result<Class, Vec<Diagnostic>> {
@ -459,18 +466,17 @@ impl<'a> Parser<'a> {
self.expect_advance(TokenKind::End)?;
if diagnostics.is_empty() {
Ok(Class::new(
ok_or_err_diagnostics!(
Class::new(
self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()),
generic_parameters,
maybe_constructor,
fields,
functions,
))
} else {
Err(diagnostics)
}
),
diagnostics
)
}
fn parameter_list(&mut self) -> Result<Vec<Parameter>, Vec<Diagnostic>> {
@ -487,14 +493,15 @@ impl<'a> Parser<'a> {
}
}
if self.current.is_some() && self.peek_current(TokenKind::Comma) {
self.advance();
match self.advance() {
Ok(_) => {}
Err(mut ds) => {
diagnostics.append(&mut ds);
}
};
}
}
if diagnostics.is_empty() {
Ok(parameters)
} else {
Err(diagnostics)
}
ok_or_err_diagnostics!(parameters, diagnostics)
}
fn parameter(&mut self) -> Result<Parameter, Vec<Diagnostic>> {
@ -518,7 +525,7 @@ impl<'a> Parser<'a> {
let current = self.get_current();
return match current.kind() {
TokenKind::LeftSquare => {
self.advance(); // [
self.advance()?; // [
let inner_type_use = self.type_use()?;
self.expect_advance(TokenKind::RightSquare)?;
todo!()
@ -527,7 +534,7 @@ impl<'a> Parser<'a> {
let identifier_token = self.expect_advance(TokenKind::Identifier)?;
let generic_arguments =
if self.current.is_some() && self.peek_current(TokenKind::Lt) {
self.advance(); // <
self.advance()?; // <
let generic_arguments = self.generic_arguments_list()?;
self.expect_advance(TokenKind::Gt)?; // >
generic_arguments
@ -540,19 +547,14 @@ impl<'a> Parser<'a> {
generic_arguments,
))
}
_ => Err(vec![Diagnostic::new(
&format!(
"Expected LeftSquare or Identifier; found: {:?}",
current.kind()
),
current.start(),
current.end(),
_ => Err(vec![Self::get_expected_but_found(
&[TokenKind::LeftSquare, TokenKind::Identifier],
current,
)]),
};
}
Err(vec![Diagnostic::new(
"Expected LeftSquare or Identifier; found end of input.",
self.input.len(),
Err(vec![Self::get_expected_but_found_eoi(
&[TokenKind::LeftSquare, TokenKind::Identifier],
self.input.len(),
)])
}
@ -562,7 +564,7 @@ impl<'a> Parser<'a> {
while self.current.is_some() && matches_type_use_first!(self.get_current().kind()) {
generic_arguments.push(self.type_use()?);
if self.current.is_some() && self.peek_current(TokenKind::Comma) {
self.advance(); // comma
self.advance()?; // comma
} else {
break;
}
@ -576,7 +578,7 @@ impl<'a> Parser<'a> {
while self.current.is_some() && self.peek_current(TokenKind::Identifier) {
parameters.push(self.generic_parameter()?);
if self.current.is_some() && self.peek_current(TokenKind::Plus) {
self.advance(); // +
self.advance()?; // +
} else {
break;
}
@ -589,11 +591,11 @@ impl<'a> Parser<'a> {
let identifier = self.expect_advance(TokenKind::Identifier)?;
let mut extends_list: Vec<TypeUse> = vec![];
if self.current.is_some() && self.peek_current(TokenKind::Colon) {
self.advance(); // :
self.advance()?; // :
while self.current.is_some() && matches_type_use_first!(self.get_current().kind()) {
extends_list.push(self.type_use()?);
if self.current.is_some() && self.peek_current(TokenKind::Comma) {
self.advance(); // ,
self.advance()?; // ,
} else {
break;
}
@ -623,34 +625,25 @@ impl<'a> Parser<'a> {
}
_ => {
let lookahead = self.lookahead.as_ref().unwrap();
return Err(vec![Diagnostic::new(
&format!(
"Expected any of {:?}; found {:?}",
[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn],
lookahead.kind()
),
lookahead.start(),
lookahead.end(),
return Err(vec![Self::get_expected_but_found(
&[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn],
lookahead,
)]);
}
}
Ok(())
} else {
let current = self.current.as_ref().unwrap();
Err(vec![Diagnostic::new(
&format!(
"Expected any of {:?}; found end-of-input.",
[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn]
),
current.end(),
current.end(),
Err(vec![Self::get_expected_but_found(
&[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn],
current,
)])
}
}
fn constructor(&mut self) -> Result<Constructor, Vec<Diagnostic>> {
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
self.advance();
self.advance()?;
true
} else {
false
@ -700,14 +693,14 @@ impl<'a> Parser<'a> {
fn field(&mut self) -> Result<Field, Vec<Diagnostic>> {
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
self.advance();
self.advance()?;
true
} else {
false
};
let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) {
self.advance();
self.advance()?;
true
} else {
false
@ -716,14 +709,14 @@ impl<'a> Parser<'a> {
let identifier = self.expect_advance(TokenKind::Identifier)?;
let declared_type = if self.current.is_some() && self.peek_current(TokenKind::Colon) {
self.advance(); // colon
self.advance()?; // colon
Some(self.type_use()?)
} else {
None
};
let initializer = if self.current.is_some() && self.peek_current(TokenKind::Equals) {
self.advance(); // equals
self.advance()?; // equals
Some(self.expression()?)
} else {
None
@ -751,7 +744,7 @@ impl<'a> Parser<'a> {
self.expect_advance(TokenKind::Let)?;
let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) {
self.advance();
self.advance()?;
true
} else {
false
@ -790,7 +783,7 @@ impl<'a> Parser<'a> {
fn bitwise_or_expression(&mut self) -> Result<Expression, Vec<Diagnostic>> {
let mut result = self.bitwise_xor_expression()?;
while self.current.is_some() && self.peek_current(TokenKind::Bar) {
self.advance(); // |
self.advance()?; // |
let rhs = self.bitwise_xor_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -807,7 +800,7 @@ impl<'a> Parser<'a> {
fn bitwise_xor_expression(&mut self) -> Result<Expression, Vec<Diagnostic>> {
let mut result = self.bitwise_and_expression()?;
while self.current.is_some() && self.peek_current(TokenKind::Caret) {
self.advance(); // ^
self.advance()?; // ^
let rhs = self.bitwise_and_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -824,7 +817,7 @@ impl<'a> Parser<'a> {
fn bitwise_and_expression(&mut self) -> Result<Expression, Vec<Diagnostic>> {
let mut result = self.shift_expression()?;
while self.current.is_some() && self.peek_current(TokenKind::Ampersand) {
self.advance(); // &
self.advance()?; // &
let rhs = self.shift_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -844,9 +837,9 @@ impl<'a> Parser<'a> {
let current = self.get_current();
match current.kind() {
TokenKind::Lt => {
let second_lt_start = current.start() + 1;
self.advance(); // first <
self.expect_position_advance(TokenKind::Lt, second_lt_start)?; // second <
let previous_cloned = current.clone();
self.advance()?; // first <
self.expect_immediately_after_advance(TokenKind::Lt, &previous_cloned)?; // second <
let rhs = self.additive_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -858,9 +851,9 @@ impl<'a> Parser<'a> {
));
}
TokenKind::Gt => {
let second_gt_start = current.start() + 1;
self.advance(); // first >
self.expect_position_advance(TokenKind::Gt, second_gt_start)?; // second gt
let previous_cloned = current.clone();
self.advance()?; // first >
self.expect_immediately_after_advance(TokenKind::Gt, &previous_cloned)?; // second gt
let rhs = self.additive_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -883,7 +876,7 @@ impl<'a> Parser<'a> {
let current = self.get_current();
match current.kind() {
TokenKind::Plus => {
self.advance(); // plus
self.advance()?; // plus
let rhs = self.multiplicative_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -895,7 +888,7 @@ impl<'a> Parser<'a> {
));
}
TokenKind::Minus => {
self.advance(); // minus
self.advance()?; // minus
let rhs = self.multiplicative_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -918,7 +911,7 @@ impl<'a> Parser<'a> {
let current = self.get_current();
match current.kind() {
TokenKind::Star => {
self.advance(); // multiply
self.advance()?; // multiply
let rhs = self.prefix_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -930,7 +923,7 @@ impl<'a> Parser<'a> {
));
}
TokenKind::Slash => {
self.advance(); // slash
self.advance()?; // slash
let rhs = self.prefix_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -942,7 +935,7 @@ impl<'a> Parser<'a> {
))
}
TokenKind::Modulo => {
self.advance(); // modulo
self.advance()?; // modulo
let rhs = self.prefix_expression()?;
let source_range =
SourceRange::new(result.source_range().start(), rhs.source_range().end());
@ -967,7 +960,7 @@ impl<'a> Parser<'a> {
match current.kind() {
TokenKind::Minus => {
operator_tokens.push(current.clone()); // unfortunately necessary
self.advance();
self.advance()?;
}
_ => break,
}
@ -1010,7 +1003,7 @@ impl<'a> Parser<'a> {
TokenKind::IntegerLiteral => {
let raw = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
self.advance();
self.advance()?;
Ok(Expression::Integer(IntegerLiteral::new(
i32::from_str(raw).unwrap(),
source_range,
@ -1019,7 +1012,7 @@ impl<'a> Parser<'a> {
TokenKind::DoubleLiteral => {
let raw = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
self.advance();
self.advance()?;
Ok(Expression::Double(DoubleLiteral::new(
f64::from_str(raw).unwrap(),
source_range,
@ -1028,7 +1021,7 @@ impl<'a> Parser<'a> {
TokenKind::String => {
let with_quotes = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
self.advance();
self.advance()?;
Ok(Expression::String(StringLiteral::new(
&with_quotes[1..with_quotes.len() - 1],
source_range,
@ -1037,7 +1030,7 @@ impl<'a> Parser<'a> {
TokenKind::Identifier => {
let declared_name = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
self.advance();
self.advance()?;
Ok(Expression::Identifier(Identifier::new(
declared_name,
source_range,
@ -1065,7 +1058,7 @@ impl<'a> Parser<'a> {
let mut expressions = vec![];
expressions.push(self.expression()?);
while self.current.is_some() && self.peek_current(TokenKind::Comma) {
self.advance(); // comma
self.advance()?; // comma
expressions.push(self.expression()?);
}
Ok(expressions)