diff --git a/dmc-lib/src/diagnostic.rs b/dmc-lib/src/diagnostic.rs index 4044c2f..5fc8c08 100644 --- a/dmc-lib/src/diagnostic.rs +++ b/dmc-lib/src/diagnostic.rs @@ -1,3 +1,5 @@ +pub type Diagnostics = Vec; + #[derive(Debug)] pub struct Diagnostic { message: String, diff --git a/dmc-lib/src/error_codes.rs b/dmc-lib/src/error_codes.rs index 32b3115..8f1a701 100644 --- a/dmc-lib/src/error_codes.rs +++ b/dmc-lib/src/error_codes.rs @@ -1,5 +1,7 @@ pub type ErrorCode = usize; +pub const LEXER_ERROR: ErrorCode = 1; +pub const PARSE_ERROR: ErrorCode = 2; pub const SYMBOL_NOT_FOUND: ErrorCode = 13; pub const SYMBOL_ALREADY_DECLARED: ErrorCode = 14; pub const BINARY_INCOMPATIBLE_TYPES: ErrorCode = 15; diff --git a/dmc-lib/src/lexer.rs b/dmc-lib/src/lexer.rs index 4ad720e..5319373 100644 --- a/dmc-lib/src/lexer.rs +++ b/dmc-lib/src/lexer.rs @@ -118,7 +118,11 @@ impl<'a> Lexer<'a> { } } if !terminated { - return Some(Err(LexerError::new(LexerErrorKind::UnterminatedString))); + return Some(Err(LexerError::new( + self.position, + end, + LexerErrorKind::UnterminatedString, + ))); } (end, TokenKind::String) } else { @@ -133,9 +137,11 @@ impl<'a> Lexer<'a> { } if prefix.len() == 0 { - return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter( - chunk.chars().next().unwrap(), - )))); + return Some(Err(LexerError::new( + self.position, + self.position + 1, + LexerErrorKind::UnrecognizedCharacter(chunk.chars().next().unwrap()), + ))); } let token_kind = match prefix.as_str() { @@ -162,12 +168,22 @@ impl<'a> Lexer<'a> { #[derive(Debug, Eq, PartialEq)] pub struct LexerError { + start: usize, + end: usize, kind: LexerErrorKind, } impl LexerError { - pub fn new(kind: LexerErrorKind) -> Self { - Self { kind } + pub fn new(start: usize, end: usize, kind: LexerErrorKind) -> Self { + Self { start, end, kind } + } + + pub fn start(&self) -> usize { + self.start + } + + pub fn end(&self) -> usize { + self.end } pub fn kind(&self) -> LexerErrorKind { diff --git a/dmc-lib/src/parser.rs b/dmc-lib/src/parser.rs index a7d0ebd..790cfba 100644 --- a/dmc-lib/src/parser.rs +++ b/dmc-lib/src/parser.rs @@ -19,20 +19,25 @@ use crate::ast::parameter::Parameter; use crate::ast::statement::Statement; use crate::ast::string_literal::StringLiteral; use crate::ast::type_use::TypeUse; -use crate::diagnostic::Diagnostic; -use crate::lexer::Lexer; +use crate::diagnostic::{Diagnostic, Diagnostics}; +use crate::error_codes::{LEXER_ERROR, PARSE_ERROR}; +use crate::lexer::{Lexer, LexerErrorKind}; use crate::source_range::SourceRange; use crate::token::{Token, TokenKind}; +use crate::{handle_diagnostics, ok_or_err_diagnostics}; use std::str::FromStr; -pub fn parse_compilation_unit(input: &str) -> Result> { +pub type ParseResult = Result; + +pub fn parse_compilation_unit(input: &str) -> ParseResult { let mut parser = Parser::new(input); + parser.advance()?; // get started parser.compilation_unit() } -pub fn parse_expression(input: &str) -> Result> { +pub fn parse_expression(input: &str) -> ParseResult { let mut parser = Parser::new(input); - parser.advance(); // get started + parser.advance()?; // get started parser.expression() } @@ -96,96 +101,116 @@ impl<'a> Parser<'a> { } } - fn advance(&mut self) { + fn advance(&mut self) -> Result<(), Diagnostics> { + fn fetch(lexer: &mut Lexer) -> Result, Diagnostics> { + let mut diagnostics = vec![]; + let mut maybe_token: Option = None; + while let Some(lexer_result) = lexer.next() { + match lexer_result { + Ok(token) => { + maybe_token = Some(token); + break; + } + Err(lexer_error) => { + let diagnostic = match lexer_error.kind() { + LexerErrorKind::UnterminatedString => Diagnostic::new( + "Unterminated string literal.", + lexer_error.start(), + lexer_error.end(), + ) + .with_error_code(LEXER_ERROR), + LexerErrorKind::UnrecognizedCharacter(c) => Diagnostic::new( + &format!("Unrecognized character: {}", c), + lexer_error.start(), + lexer_error.end(), + ) + .with_error_code(LEXER_ERROR), + }; + diagnostics.push(diagnostic); + } + } + } + ok_or_err_diagnostics!(maybe_token, diagnostics) + } + if self.lookahead.is_some() { // we've advanced at least once self.current = self.lookahead.take(); - self.lookahead = match self.lexer.next() { - None => None, - Some(result) => match result { - Ok(token) => Some(token), - Err(lexer_error) => { - panic!("{:?}", lexer_error); - } - }, - } + self.lookahead = fetch(&mut self.lexer)?; + Ok(()) } else if self.lookahead.is_none() && self.current.is_some() { // we're on the last token self.current = None; + Ok(()) } else { // we've not yet advanced, so fetch both - // current - match self.lexer.next() { - None => {} - Some(result) => match result { - Ok(token) => { - self.current = Some(token); - } - Err(lexer_error) => { - panic!("{:?}", lexer_error); - } - }, - } - // lookahead - match self.lexer.next() { - None => {} - Some(result) => match result { - Ok(token) => { - self.lookahead = Some(token); - } - Err(lexer_error) => { - panic!("{:?}", lexer_error); - } - }, - } + self.current = fetch(&mut self.lexer)?; + self.lookahead = fetch(&mut self.lexer)?; + Ok(()) } } - fn expect_advance(&mut self, token_kind: TokenKind) -> Result> { + fn join_kinds(kinds: &[TokenKind]) -> String { + kinds + .iter() + .map(|kind| format!("{:?}", kind)) + .collect::>() + .join(", ") + } + + fn get_expected_but_found(kinds: &[TokenKind], found: &Token) -> Diagnostic { + Diagnostic::new( + &format!("Unexpected token: {:?}.", found.kind()), + found.start(), + found.end(), + ) + .with_error_code(PARSE_ERROR) + .with_primary_label_message(&format!("Expected {}.", Self::join_kinds(kinds))) + } + + fn get_expected_but_found_eoi(kinds: &[TokenKind], position: usize) -> Diagnostic { + Diagnostic::new("Unexpected end-of-input.", position, position) + .with_error_code(PARSE_ERROR) + .with_primary_label_message(&format!("Expected {}.", Self::join_kinds(kinds))) + } + + fn expect_advance(&mut self, token_kind: TokenKind) -> Result { match self.current.take() { - None => Err(vec![ - Diagnostic::new( - &format!("Expected {:?} but found end-of-input.", token_kind), - self.input.len(), - self.input.len(), - ) - .with_reporter(file!(), line!()), - ]), + None => Err(vec![Self::get_expected_but_found_eoi( + &[token_kind], + self.input.len(), + )]), Some(token) => { if token.kind() == token_kind { - self.advance(); + self.advance()?; Ok(token) } else { self.advance_until(&[token_kind]); - Err(vec![ - Diagnostic::new( - &format!("Expected {:?} but found {:?}", token_kind, token.kind()), - token.start(), - token.end(), - ) - .with_reporter(file!(), line!()), - ]) + Err(vec![Self::get_expected_but_found(&[token_kind], &token)]) } } } } - fn expect_position_advance( + fn expect_immediately_after_advance( &mut self, token_kind: TokenKind, - start_position: usize, - ) -> Result> { + previous_token: &Token, + ) -> Result { let matched = self.expect_advance(token_kind)?; - if matched.start() == start_position { + if matched.start() == previous_token.end() { Ok(matched) } else { Err(vec![ Diagnostic::new( - &format!("Expected {:?} but found {:?}", token_kind, matched.kind()), + &format!( + "Expected {:?} immediately after previous token.", + token_kind + ), matched.start(), matched.end(), ) - .with_reporter(file!(), line!()), + .with_error_code(PARSE_ERROR), ]) } } @@ -206,13 +231,6 @@ impl<'a> Parser<'a> { } } - fn peek_lookahead(&self, token_kind: TokenKind) -> bool { - match &self.lookahead { - None => panic!("Unexpected end of input."), - Some(token) => token.kind() == token_kind, - } - } - fn sample_input(&self, start: usize, end: usize) -> &'a str { &self.input[start..end] } @@ -221,49 +239,40 @@ impl<'a> Parser<'a> { self.sample_input(token.start(), token.end()) } - pub fn compilation_unit(&mut self) -> Result> { + fn compilation_unit(&mut self) -> Result> { let mut functions: Vec = vec![]; let mut extern_functions: Vec = vec![]; let mut classes: Vec = vec![]; let mut diagnostics = vec![]; - self.advance(); // get started - while self.current.is_some() { let current = self.get_current(); match current.kind() { TokenKind::Fn | TokenKind::Extern | TokenKind::Class => { - match self.module_level_declaration( - &mut functions, - &mut extern_functions, - &mut classes, - ) { - Ok(_) => {} - Err(mut declaration_diagnostics) => { - diagnostics.append(&mut declaration_diagnostics) - } - } + handle_diagnostics!( + self.module_level_declaration( + &mut functions, + &mut extern_functions, + &mut classes + ), + diagnostics + ); } _ => { - diagnostics.push(Diagnostic::new( - &format!( - "Expected any of {:?}; found {:?}", - [TokenKind::Fn, TokenKind::Extern, TokenKind::Class], - current.kind() - ), - current.start(), - current.end(), + diagnostics.push(Self::get_expected_but_found( + &[TokenKind::Fn, TokenKind::Extern, TokenKind::Class], + current, )); - self.advance_until(&[TokenKind::Fn, TokenKind::Extern]); + self.advance_until(&[TokenKind::Fn, TokenKind::Extern, TokenKind::Class]); } } } - if diagnostics.is_empty() { - Ok(CompilationUnit::new(functions, extern_functions, classes)) - } else { - Err(diagnostics) - } + + ok_or_err_diagnostics!( + CompilationUnit::new(functions, extern_functions, classes), + diagnostics + ) } fn module_level_declaration( @@ -307,7 +316,7 @@ impl<'a> Parser<'a> { fn function(&mut self) -> Result> { let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) { - self.advance(); // pub + self.advance()?; // pub true } else { false @@ -356,18 +365,17 @@ impl<'a> Parser<'a> { _ => {} } - if diagnostics.is_empty() { - Ok(Function::new( + ok_or_err_diagnostics!( + Function::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), is_public, parameters, return_type, statements, - )) - } else { - Err(diagnostics) - } + ), + diagnostics + ) } fn extern_function(&mut self) -> Result> { @@ -399,16 +407,15 @@ impl<'a> Parser<'a> { let return_type = self.return_type()?; - if diagnostics.is_empty() { - Ok(ExternFunction::new( + ok_or_err_diagnostics!( + ExternFunction::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), maybe_parameters.unwrap(), return_type, - )) - } else { - Err(diagnostics) - } + ), + diagnostics + ) } fn class(&mut self) -> Result> { @@ -459,18 +466,17 @@ impl<'a> Parser<'a> { self.expect_advance(TokenKind::End)?; - if diagnostics.is_empty() { - Ok(Class::new( + ok_or_err_diagnostics!( + Class::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), generic_parameters, maybe_constructor, fields, functions, - )) - } else { - Err(diagnostics) - } + ), + diagnostics + ) } fn parameter_list(&mut self) -> Result, Vec> { @@ -487,14 +493,15 @@ impl<'a> Parser<'a> { } } if self.current.is_some() && self.peek_current(TokenKind::Comma) { - self.advance(); + match self.advance() { + Ok(_) => {} + Err(mut ds) => { + diagnostics.append(&mut ds); + } + }; } } - if diagnostics.is_empty() { - Ok(parameters) - } else { - Err(diagnostics) - } + ok_or_err_diagnostics!(parameters, diagnostics) } fn parameter(&mut self) -> Result> { @@ -518,7 +525,7 @@ impl<'a> Parser<'a> { let current = self.get_current(); return match current.kind() { TokenKind::LeftSquare => { - self.advance(); // [ + self.advance()?; // [ let inner_type_use = self.type_use()?; self.expect_advance(TokenKind::RightSquare)?; todo!() @@ -527,7 +534,7 @@ impl<'a> Parser<'a> { let identifier_token = self.expect_advance(TokenKind::Identifier)?; let generic_arguments = if self.current.is_some() && self.peek_current(TokenKind::Lt) { - self.advance(); // < + self.advance()?; // < let generic_arguments = self.generic_arguments_list()?; self.expect_advance(TokenKind::Gt)?; // > generic_arguments @@ -540,19 +547,14 @@ impl<'a> Parser<'a> { generic_arguments, )) } - _ => Err(vec![Diagnostic::new( - &format!( - "Expected LeftSquare or Identifier; found: {:?}", - current.kind() - ), - current.start(), - current.end(), + _ => Err(vec![Self::get_expected_but_found( + &[TokenKind::LeftSquare, TokenKind::Identifier], + current, )]), }; } - Err(vec![Diagnostic::new( - "Expected LeftSquare or Identifier; found end of input.", - self.input.len(), + Err(vec![Self::get_expected_but_found_eoi( + &[TokenKind::LeftSquare, TokenKind::Identifier], self.input.len(), )]) } @@ -562,7 +564,7 @@ impl<'a> Parser<'a> { while self.current.is_some() && matches_type_use_first!(self.get_current().kind()) { generic_arguments.push(self.type_use()?); if self.current.is_some() && self.peek_current(TokenKind::Comma) { - self.advance(); // comma + self.advance()?; // comma } else { break; } @@ -576,7 +578,7 @@ impl<'a> Parser<'a> { while self.current.is_some() && self.peek_current(TokenKind::Identifier) { parameters.push(self.generic_parameter()?); if self.current.is_some() && self.peek_current(TokenKind::Plus) { - self.advance(); // + + self.advance()?; // + } else { break; } @@ -589,11 +591,11 @@ impl<'a> Parser<'a> { let identifier = self.expect_advance(TokenKind::Identifier)?; let mut extends_list: Vec = vec![]; if self.current.is_some() && self.peek_current(TokenKind::Colon) { - self.advance(); // : + self.advance()?; // : while self.current.is_some() && matches_type_use_first!(self.get_current().kind()) { extends_list.push(self.type_use()?); if self.current.is_some() && self.peek_current(TokenKind::Comma) { - self.advance(); // , + self.advance()?; // , } else { break; } @@ -623,34 +625,25 @@ impl<'a> Parser<'a> { } _ => { let lookahead = self.lookahead.as_ref().unwrap(); - return Err(vec![Diagnostic::new( - &format!( - "Expected any of {:?}; found {:?}", - [TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn], - lookahead.kind() - ), - lookahead.start(), - lookahead.end(), + return Err(vec![Self::get_expected_but_found( + &[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn], + lookahead, )]); } } Ok(()) } else { let current = self.current.as_ref().unwrap(); - Err(vec![Diagnostic::new( - &format!( - "Expected any of {:?}; found end-of-input.", - [TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn] - ), - current.end(), - current.end(), + Err(vec![Self::get_expected_but_found( + &[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn], + current, )]) } } fn constructor(&mut self) -> Result> { let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) { - self.advance(); + self.advance()?; true } else { false @@ -700,14 +693,14 @@ impl<'a> Parser<'a> { fn field(&mut self) -> Result> { let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) { - self.advance(); + self.advance()?; true } else { false }; let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) { - self.advance(); + self.advance()?; true } else { false @@ -716,14 +709,14 @@ impl<'a> Parser<'a> { let identifier = self.expect_advance(TokenKind::Identifier)?; let declared_type = if self.current.is_some() && self.peek_current(TokenKind::Colon) { - self.advance(); // colon + self.advance()?; // colon Some(self.type_use()?) } else { None }; let initializer = if self.current.is_some() && self.peek_current(TokenKind::Equals) { - self.advance(); // equals + self.advance()?; // equals Some(self.expression()?) } else { None @@ -751,7 +744,7 @@ impl<'a> Parser<'a> { self.expect_advance(TokenKind::Let)?; let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) { - self.advance(); + self.advance()?; true } else { false @@ -790,7 +783,7 @@ impl<'a> Parser<'a> { fn bitwise_or_expression(&mut self) -> Result> { let mut result = self.bitwise_xor_expression()?; while self.current.is_some() && self.peek_current(TokenKind::Bar) { - self.advance(); // | + self.advance()?; // | let rhs = self.bitwise_xor_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -807,7 +800,7 @@ impl<'a> Parser<'a> { fn bitwise_xor_expression(&mut self) -> Result> { let mut result = self.bitwise_and_expression()?; while self.current.is_some() && self.peek_current(TokenKind::Caret) { - self.advance(); // ^ + self.advance()?; // ^ let rhs = self.bitwise_and_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -824,7 +817,7 @@ impl<'a> Parser<'a> { fn bitwise_and_expression(&mut self) -> Result> { let mut result = self.shift_expression()?; while self.current.is_some() && self.peek_current(TokenKind::Ampersand) { - self.advance(); // & + self.advance()?; // & let rhs = self.shift_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -844,9 +837,9 @@ impl<'a> Parser<'a> { let current = self.get_current(); match current.kind() { TokenKind::Lt => { - let second_lt_start = current.start() + 1; - self.advance(); // first < - self.expect_position_advance(TokenKind::Lt, second_lt_start)?; // second < + let previous_cloned = current.clone(); + self.advance()?; // first < + self.expect_immediately_after_advance(TokenKind::Lt, &previous_cloned)?; // second < let rhs = self.additive_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -858,9 +851,9 @@ impl<'a> Parser<'a> { )); } TokenKind::Gt => { - let second_gt_start = current.start() + 1; - self.advance(); // first > - self.expect_position_advance(TokenKind::Gt, second_gt_start)?; // second gt + let previous_cloned = current.clone(); + self.advance()?; // first > + self.expect_immediately_after_advance(TokenKind::Gt, &previous_cloned)?; // second gt let rhs = self.additive_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -883,7 +876,7 @@ impl<'a> Parser<'a> { let current = self.get_current(); match current.kind() { TokenKind::Plus => { - self.advance(); // plus + self.advance()?; // plus let rhs = self.multiplicative_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -895,7 +888,7 @@ impl<'a> Parser<'a> { )); } TokenKind::Minus => { - self.advance(); // minus + self.advance()?; // minus let rhs = self.multiplicative_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -918,7 +911,7 @@ impl<'a> Parser<'a> { let current = self.get_current(); match current.kind() { TokenKind::Star => { - self.advance(); // multiply + self.advance()?; // multiply let rhs = self.prefix_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -930,7 +923,7 @@ impl<'a> Parser<'a> { )); } TokenKind::Slash => { - self.advance(); // slash + self.advance()?; // slash let rhs = self.prefix_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -942,7 +935,7 @@ impl<'a> Parser<'a> { )) } TokenKind::Modulo => { - self.advance(); // modulo + self.advance()?; // modulo let rhs = self.prefix_expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); @@ -967,7 +960,7 @@ impl<'a> Parser<'a> { match current.kind() { TokenKind::Minus => { operator_tokens.push(current.clone()); // unfortunately necessary - self.advance(); + self.advance()?; } _ => break, } @@ -1010,7 +1003,7 @@ impl<'a> Parser<'a> { TokenKind::IntegerLiteral => { let raw = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); - self.advance(); + self.advance()?; Ok(Expression::Integer(IntegerLiteral::new( i32::from_str(raw).unwrap(), source_range, @@ -1019,7 +1012,7 @@ impl<'a> Parser<'a> { TokenKind::DoubleLiteral => { let raw = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); - self.advance(); + self.advance()?; Ok(Expression::Double(DoubleLiteral::new( f64::from_str(raw).unwrap(), source_range, @@ -1028,7 +1021,7 @@ impl<'a> Parser<'a> { TokenKind::String => { let with_quotes = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); - self.advance(); + self.advance()?; Ok(Expression::String(StringLiteral::new( &with_quotes[1..with_quotes.len() - 1], source_range, @@ -1037,7 +1030,7 @@ impl<'a> Parser<'a> { TokenKind::Identifier => { let declared_name = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); - self.advance(); + self.advance()?; Ok(Expression::Identifier(Identifier::new( declared_name, source_range, @@ -1065,7 +1058,7 @@ impl<'a> Parser<'a> { let mut expressions = vec![]; expressions.push(self.expression()?); while self.current.is_some() && self.peek_current(TokenKind::Comma) { - self.advance(); // comma + self.advance()?; // comma expressions.push(self.expression()?); } Ok(expressions)