From 7c041e40adeefd231d41852298fe6be7d27228cb Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Mon, 2 Mar 2026 14:06:14 -0600 Subject: [PATCH] Add sort-of-working error recovery to parser. --- dm/src/main.rs | 11 +- dmc-lib/src/asm/mod.rs | 11 +- dmc-lib/src/ast/mod.rs | 45 +--- dmc-lib/src/parser.rs | 315 +++++++++++++++++++++------- examples/hello.dm | 2 +- examples/parse_errors/forgot_end.dm | 2 + 6 files changed, 267 insertions(+), 119 deletions(-) create mode 100644 examples/parse_errors/forgot_end.dm diff --git a/dm/src/main.rs b/dm/src/main.rs index fec9c2a..6bb3716 100644 --- a/dm/src/main.rs +++ b/dm/src/main.rs @@ -25,11 +25,18 @@ fn main() { let args = Cli::parse(); let input = std::fs::read_to_string(&args.script).unwrap(); - let mut compilation_unit = parse_compilation_unit(&input); - let mut files: SimpleFiles<&str, &str> = SimpleFiles::new(); let script_file_id = files.add(args.script.to_str().unwrap(), &input); + let parse_result = parse_compilation_unit(&input); + let mut compilation_unit = match parse_result { + Ok(compilation_unit) => compilation_unit, + Err(diagnostics) => { + check_and_report_diagnostics(&files, script_file_id, &diagnostics); + unreachable!(); + } + }; + let mut symbol_table = SymbolTable::new(); let gather_names_diagnostics = compilation_unit.gather_declared_names(&mut symbol_table); diff --git a/dmc-lib/src/asm/mod.rs b/dmc-lib/src/asm/mod.rs index 7e611dd..996d136 100644 --- a/dmc-lib/src/asm/mod.rs +++ b/dmc-lib/src/asm/mod.rs @@ -10,7 +10,16 @@ mod smoke_tests { use crate::symbol_table::SymbolTable; fn assemble(src: &str) -> Vec { - let mut compilation_unit = parse_compilation_unit(src); + let parse_result = parse_compilation_unit(src); + let mut compilation_unit = match parse_result { + Ok(compilation_unit) => compilation_unit, + Err(diagnostics) => { + for diagnostic in diagnostics { + eprintln!("{:?}", diagnostic); + } + panic!(); + } + }; let mut symbol_table = SymbolTable::new(); compilation_unit.gather_declared_names(&mut symbol_table); compilation_unit.check_name_usages(&symbol_table); diff --git a/dmc-lib/src/ast/mod.rs b/dmc-lib/src/ast/mod.rs index 8b912b1..4bde6d3 100644 --- a/dmc-lib/src/ast/mod.rs +++ b/dmc-lib/src/ast/mod.rs @@ -1,4 +1,4 @@ -mod assemble_context; +pub mod assemble_context; pub mod call; pub mod compilation_unit; pub mod expression; @@ -12,46 +12,3 @@ pub mod let_statement; pub mod module_level_declaration; pub mod statement; pub mod string_literal; - -#[cfg(test)] -mod name_tests { - use crate::parser::parse_compilation_unit; - use crate::symbol_table::SymbolTable; - - #[test] - fn smoke_screen() { - let mut symbol_table = SymbolTable::new(); - let mut compilation_unit = - parse_compilation_unit("fn println() end fn main() let x = 42 println(x) end"); - assert_eq!( - compilation_unit - .gather_declared_names(&mut symbol_table) - .len(), - 0 - ); - assert_eq!(compilation_unit.check_name_usages(&symbol_table).len(), 0); - } - - #[test] - fn hello_world() { - let mut symbol_table = SymbolTable::new(); - let mut compilation_unit = - parse_compilation_unit("fn println() end fn main() println(\"Hello, World!\") end"); - compilation_unit.gather_declared_names(&mut symbol_table); - compilation_unit.check_name_usages(&symbol_table); - } - - #[test] - fn get_some_diagnostics() { - let mut symbol_table = SymbolTable::new(); - let mut compilation_unit = parse_compilation_unit("fn main() notDefined(uhOh) end"); - assert_eq!( - compilation_unit - .gather_declared_names(&mut symbol_table) - .len(), - 0 - ); - let name_usage_diagnostics = compilation_unit.check_name_usages(&symbol_table); - assert_eq!(name_usage_diagnostics.len(), 2); - } -} diff --git a/dmc-lib/src/parser.rs b/dmc-lib/src/parser.rs index 0aabd1b..5099c85 100644 --- a/dmc-lib/src/parser.rs +++ b/dmc-lib/src/parser.rs @@ -10,12 +10,13 @@ use crate::ast::let_statement::LetStatement; use crate::ast::module_level_declaration::ModuleLevelDeclaration; use crate::ast::statement::Statement; use crate::ast::string_literal::StringLiteral; +use crate::diagnostic::Diagnostic; use crate::lexer::Lexer; use crate::source_range::SourceRange; use crate::token::{Token, TokenKind}; use std::str::FromStr; -pub fn parse_compilation_unit(input: &str) -> CompilationUnit { +pub fn parse_compilation_unit(input: &str) -> Result> { let mut parser = Parser::new(input); parser.compilation_unit() } @@ -37,6 +38,22 @@ impl<'a> Parser<'a> { } } + fn advance_until(&mut self, token_kinds: &[TokenKind]) { + while self.current.is_some() { + self.advance(); + match &self.current { + None => { + // reached eoi + } + Some(current) => { + if token_kinds.contains(¤t.kind()) { + break; + } + } + } + } + } + fn advance(&mut self) { if self.lookahead.is_some() { // we've advanced at least once @@ -50,6 +67,9 @@ impl<'a> Parser<'a> { } }, } + } else if self.lookahead.is_none() && self.current.is_some() { + // we're on the last token + self.current = None; } else { // we've not yet advanced, so fetch both // current @@ -79,22 +99,24 @@ impl<'a> Parser<'a> { } } - fn expect_advance(&mut self, token_kind: TokenKind) -> Token { + fn expect_advance(&mut self, token_kind: TokenKind) -> Result> { match self.current.take() { - None => { - panic!("Expected {:?} but found end of input", token_kind); - } + None => Err(vec![Diagnostic::new( + &format!("Expected {:?} but found end-of-input.", token_kind), + self.input.len(), + self.input.len(), + )]), Some(token) => { if token.kind() == token_kind { self.advance(); - token + Ok(token) } else { - panic!( - "Expected {:?} but found {:?} at {}", - token_kind, - token.kind(), - token.start() - ); + self.advance_until(&[token_kind]); + Err(vec![Diagnostic::new( + &format!("Expected {:?} but found {:?}", token_kind, token.kind()), + token.start(), + token.end(), + )]) } } } @@ -102,7 +124,7 @@ impl<'a> Parser<'a> { fn peek_current(&self, token_kind: TokenKind) -> bool { match &self.current { - None => false, + None => panic!("Unexpected end of input."), Some(token) => token.kind() == token_kind, } } @@ -124,85 +146,150 @@ impl<'a> Parser<'a> { self.sample_input(token.start(), token.end()) } - pub fn compilation_unit(&mut self) -> CompilationUnit { + pub fn compilation_unit(&mut self) -> Result> { let mut declarations = vec![]; - self.advance(); + let mut diagnostics = vec![]; + self.advance(); // get started while self.current.is_some() { - declarations.push(self.module_level_declaration()); + let current = self.get_current(); + match current.kind() { + TokenKind::Fn | TokenKind::Extern => { + let declaration_result = self.module_level_declaration(); + match declaration_result { + Ok(declaration) => declarations.push(declaration), + Err(mut declaration_diagnostics) => { + diagnostics.append(&mut declaration_diagnostics) + } + } + } + _ => { + diagnostics.push(Diagnostic::new( + &format!( + "Expected any of: {:?}; found {:?}", + [TokenKind::Fn, TokenKind::Extern], + current.kind() + ), + current.start(), + current.end(), + )); + self.advance_until(&[TokenKind::Fn, TokenKind::Extern]); + } + } + } + if diagnostics.is_empty() { + Ok(CompilationUnit::new(declarations)) + } else { + Err(diagnostics) } - CompilationUnit::new(declarations) } - fn module_level_declaration(&mut self) -> ModuleLevelDeclaration { + fn module_level_declaration(&mut self) -> Result> { let current = self.get_current(); match current.kind() { - TokenKind::Fn => ModuleLevelDeclaration::Function(self.function()), - TokenKind::Extern => ModuleLevelDeclaration::ExternFunction(self.extern_function()), - _ => panic!(), + TokenKind::Fn => { + let function_result = self.function(); + match function_result { + Ok(function) => Ok(ModuleLevelDeclaration::Function(function)), + Err(function_diagnostics) => Err(function_diagnostics), + } + } + TokenKind::Extern => { + let extern_function_result = self.extern_function(); + match extern_function_result { + Ok(extern_function) => { + Ok(ModuleLevelDeclaration::ExternFunction(extern_function)) + } + Err(extern_function_diagnostics) => Err(extern_function_diagnostics), + } + } + _ => unreachable!(), } } - fn function(&mut self) -> Function { - self.expect_advance(TokenKind::Fn); - let identifier_token = self.expect_advance(TokenKind::Identifier); - self.expect_advance(TokenKind::LeftParentheses); + fn function(&mut self) -> Result> { + self.expect_advance(TokenKind::Fn)?; + let identifier_token = self.expect_advance(TokenKind::Identifier)?; + self.expect_advance(TokenKind::LeftParentheses)?; // add params - self.expect_advance(TokenKind::RightParentheses); + self.expect_advance(TokenKind::RightParentheses)?; let mut statements = vec![]; - while !self.peek_current(TokenKind::End) { - statements.push(self.statement()); + let mut diagnostics = vec![]; + while self.current.is_some() && !self.peek_current(TokenKind::End) { + let statement_result = self.statement(); + match statement_result { + Ok(statement) => { + statements.push(statement); + } + Err(mut statement_diagnostics) => { + diagnostics.append(&mut statement_diagnostics); + } + } + } + + // if we're missing "end", append it to the other statement diagnostics + let end_result = self.expect_advance(TokenKind::End); + match end_result { + Err(mut end_diagnostics) => { + diagnostics.append(&mut end_diagnostics); + } + _ => {} + } + + if diagnostics.is_empty() { + Ok(Function::new( + self.token_text(&identifier_token), + SourceRange::new(identifier_token.start(), identifier_token.end()), + statements, + )) + } else { + Err(diagnostics) } - self.expect_advance(TokenKind::End); - Function::new( - self.token_text(&identifier_token), - SourceRange::new(identifier_token.start(), identifier_token.end()), - statements, - ) } - fn extern_function(&mut self) -> ExternFunction { - self.expect_advance(TokenKind::Extern); - self.expect_advance(TokenKind::Fn); - let identifier_token = self.expect_advance(TokenKind::Identifier); - self.expect_advance(TokenKind::LeftParentheses); + fn extern_function(&mut self) -> Result> { + self.expect_advance(TokenKind::Extern)?; + self.expect_advance(TokenKind::Fn)?; + let identifier_token = self.expect_advance(TokenKind::Identifier)?; + self.expect_advance(TokenKind::LeftParentheses)?; // params - self.expect_advance(TokenKind::RightParentheses); + self.expect_advance(TokenKind::RightParentheses)?; // return type - ExternFunction::new( + Ok(ExternFunction::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), - ) + )) } - fn statement(&mut self) -> Statement { + fn statement(&mut self) -> Result> { let current = self.get_current(); match current.kind() { - TokenKind::Let => self.let_statement(), - _ => self.expression_statement(), + TokenKind::Let => Ok(Statement::Let(self.let_statement()?)), + _ => Ok(Statement::Expression(self.expression_statement()?)), } } - fn let_statement(&mut self) -> Statement { - self.expect_advance(TokenKind::Let); - let identifier = self.expect_advance(TokenKind::Identifier); - self.expect_advance(TokenKind::Equals); - let expression = self.expression(); - Statement::Let(LetStatement::new( + fn let_statement(&mut self) -> Result> { + self.expect_advance(TokenKind::Let)?; + let identifier = self.expect_advance(TokenKind::Identifier)?; + self.expect_advance(TokenKind::Equals)?; + let expression = self.expression()?; + Ok(LetStatement::new( self.token_text(&identifier), SourceRange::new(identifier.start(), identifier.end()), expression, )) } - fn expression_statement(&mut self) -> Statement { - Statement::Expression(ExpressionStatement::new(self.expression())) + fn expression_statement(&mut self) -> Result> { + Ok(ExpressionStatement::new(self.expression()?)) } - fn expression(&mut self) -> Expression { - let current = self.get_current(); - let mut result = match current.kind() { + fn expression(&mut self) -> Result> { + let current = self.get_current().clone(); // I don't love this clone + let mut diagnostics = vec![]; + let mut expression = match current.kind() { TokenKind::IntegerLiteral => { - let raw = self.token_text(current); + let raw = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Expression::IntegerLiteral(IntegerLiteral::new( @@ -211,7 +298,7 @@ impl<'a> Parser<'a> { )) } TokenKind::String => { - let with_quotes = self.token_text(current); + let with_quotes = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Expression::String(StringLiteral::new( @@ -220,37 +307,73 @@ impl<'a> Parser<'a> { )) } TokenKind::Identifier => { - let declared_name = self.token_text(current); + let declared_name = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Expression::Identifier(Identifier::new(declared_name, source_range)) } - _ => panic!("Unexpected token {:?}", current.kind()), + _ => { + diagnostics.push(Diagnostic::new( + &format!( + "Expected any of {:?} but found {:?}", + [ + TokenKind::IntegerLiteral, + TokenKind::String, + TokenKind::Identifier + ], + current.kind() + ), + current.start(), + current.end(), + )); + + self.advance_until(&[ + TokenKind::IntegerLiteral, + TokenKind::String, + TokenKind::Identifier, + ]); + if self.current.is_some() { + let try_again_result = self.expression(); + match try_again_result { + Ok(expression) => expression, + Err(mut try_again_diagnostics) => { + diagnostics.append(&mut try_again_diagnostics); + return Err(diagnostics); + } + } + } else { + return Err(diagnostics); + } + } }; // postfixes while let Some(current) = &self.current { match current.kind() { TokenKind::LeftParentheses => { - result = Expression::Call(self.call(result)); + expression = Expression::Call(self.call(expression)?); } _ => break, } } - result + if diagnostics.is_empty() { + Ok(expression) + } else { + Err(diagnostics) + } } - fn call(&mut self, callee: Expression) -> Call { - self.expect_advance(TokenKind::LeftParentheses); + fn call(&mut self, callee: Expression) -> Result> { + self.expect_advance(TokenKind::LeftParentheses)?; let mut arguments = vec![]; - while !self.peek_current(TokenKind::RightParentheses) { - arguments.push(self.expression()); + while self.current.is_some() && !self.peek_current(TokenKind::RightParentheses) { + arguments.push(self.expression()?); } - let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses); + let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses)?; let source_range = SourceRange::new(callee.source_range().start(), right_parentheses_token.end()); - Call::new(callee, arguments, source_range) + Ok(Call::new(callee, arguments, source_range)) } } @@ -265,7 +388,16 @@ mod smoke_tests { #[test] fn hello_world() { - let compilation_unit = parse_compilation_unit("fn main() println(\"Hello, World!\") end"); + let parse_result = parse_compilation_unit("fn main() println(\"Hello, World!\") end"); + let compilation_unit = match parse_result { + Ok(compilation_unit) => compilation_unit, + Err(diagnostics) => { + for diagnostic in &diagnostics { + eprintln!("{:?}", diagnostic) + } + panic!() + } + }; let declarations = compilation_unit.declarations(); assert_eq!(declarations.len(), 1); let function = match &declarations[0] { @@ -313,8 +445,17 @@ mod concrete_tests { #[test] fn parses_extern_fn() { - let cu = parse_compilation_unit("extern fn println()"); - let declarations = cu.declarations(); + let parse_result = parse_compilation_unit("extern fn println()"); + let compilation_unit = match parse_result { + Ok(compilation_unit) => compilation_unit, + Err(diagnostics) => { + for diagnostic in diagnostics { + eprintln!("{:?}", diagnostic); + } + panic!(); + } + }; + let declarations = compilation_unit.declarations(); assert_eq!(declarations.len(), 1); let extern_function = match &declarations[0] { ModuleLevelDeclaration::ExternFunction(extern_function) => extern_function, @@ -323,3 +464,35 @@ mod concrete_tests { assert_eq!(extern_function.declared_name(), "println"); } } + +#[cfg(test)] +mod parse_failure_tests { + use super::*; + + #[test] + fn lone_end() { + let parse_result = parse_compilation_unit("end"); + match parse_result { + Err(diagnostics) => { + assert_eq!(diagnostics.len(), 1); + for diagnostic in &diagnostics { + println!("{:?}", diagnostic) + } + } + Ok(_) => panic!(), + } + } + + #[test] + fn two_ends() { + let parse_result = parse_compilation_unit("end end"); + match parse_result { + Err(diagnostics) => { + // Should only have an error on the first end, since we advance until we find a + // token we can recover from (fn or extern) + assert_eq!(diagnostics.len(), 1); + } + Ok(_) => panic!(), + } + } +} diff --git a/examples/hello.dm b/examples/hello.dm index 65091d4..84cb693 100644 --- a/examples/hello.dm +++ b/examples/hello.dm @@ -1,4 +1,4 @@ -fn println() end +extern fn println() fn main() let x = "Hello, World!" diff --git a/examples/parse_errors/forgot_end.dm b/examples/parse_errors/forgot_end.dm new file mode 100644 index 0000000..0141797 --- /dev/null +++ b/examples/parse_errors/forgot_end.dm @@ -0,0 +1,2 @@ +fn main() + println(42) \ No newline at end of file