Add sort-of-working error recovery to parser.

This commit is contained in:
Jesse Brault 2026-03-02 14:06:14 -06:00
parent 506e260c75
commit 7c041e40ad
6 changed files with 267 additions and 119 deletions

View File

@ -25,11 +25,18 @@ fn main() {
let args = Cli::parse(); let args = Cli::parse();
let input = std::fs::read_to_string(&args.script).unwrap(); let input = std::fs::read_to_string(&args.script).unwrap();
let mut compilation_unit = parse_compilation_unit(&input);
let mut files: SimpleFiles<&str, &str> = SimpleFiles::new(); let mut files: SimpleFiles<&str, &str> = SimpleFiles::new();
let script_file_id = files.add(args.script.to_str().unwrap(), &input); let script_file_id = files.add(args.script.to_str().unwrap(), &input);
let parse_result = parse_compilation_unit(&input);
let mut compilation_unit = match parse_result {
Ok(compilation_unit) => compilation_unit,
Err(diagnostics) => {
check_and_report_diagnostics(&files, script_file_id, &diagnostics);
unreachable!();
}
};
let mut symbol_table = SymbolTable::new(); let mut symbol_table = SymbolTable::new();
let gather_names_diagnostics = compilation_unit.gather_declared_names(&mut symbol_table); let gather_names_diagnostics = compilation_unit.gather_declared_names(&mut symbol_table);

View File

@ -10,7 +10,16 @@ mod smoke_tests {
use crate::symbol_table::SymbolTable; use crate::symbol_table::SymbolTable;
fn assemble(src: &str) -> Vec<AsmFunction> { fn assemble(src: &str) -> Vec<AsmFunction> {
let mut compilation_unit = parse_compilation_unit(src); let parse_result = parse_compilation_unit(src);
let mut compilation_unit = match parse_result {
Ok(compilation_unit) => compilation_unit,
Err(diagnostics) => {
for diagnostic in diagnostics {
eprintln!("{:?}", diagnostic);
}
panic!();
}
};
let mut symbol_table = SymbolTable::new(); let mut symbol_table = SymbolTable::new();
compilation_unit.gather_declared_names(&mut symbol_table); compilation_unit.gather_declared_names(&mut symbol_table);
compilation_unit.check_name_usages(&symbol_table); compilation_unit.check_name_usages(&symbol_table);

View File

@ -1,4 +1,4 @@
mod assemble_context; pub mod assemble_context;
pub mod call; pub mod call;
pub mod compilation_unit; pub mod compilation_unit;
pub mod expression; pub mod expression;
@ -12,46 +12,3 @@ pub mod let_statement;
pub mod module_level_declaration; pub mod module_level_declaration;
pub mod statement; pub mod statement;
pub mod string_literal; pub mod string_literal;
#[cfg(test)]
mod name_tests {
use crate::parser::parse_compilation_unit;
use crate::symbol_table::SymbolTable;
#[test]
fn smoke_screen() {
let mut symbol_table = SymbolTable::new();
let mut compilation_unit =
parse_compilation_unit("fn println() end fn main() let x = 42 println(x) end");
assert_eq!(
compilation_unit
.gather_declared_names(&mut symbol_table)
.len(),
0
);
assert_eq!(compilation_unit.check_name_usages(&symbol_table).len(), 0);
}
#[test]
fn hello_world() {
let mut symbol_table = SymbolTable::new();
let mut compilation_unit =
parse_compilation_unit("fn println() end fn main() println(\"Hello, World!\") end");
compilation_unit.gather_declared_names(&mut symbol_table);
compilation_unit.check_name_usages(&symbol_table);
}
#[test]
fn get_some_diagnostics() {
let mut symbol_table = SymbolTable::new();
let mut compilation_unit = parse_compilation_unit("fn main() notDefined(uhOh) end");
assert_eq!(
compilation_unit
.gather_declared_names(&mut symbol_table)
.len(),
0
);
let name_usage_diagnostics = compilation_unit.check_name_usages(&symbol_table);
assert_eq!(name_usage_diagnostics.len(), 2);
}
}

View File

@ -10,12 +10,13 @@ use crate::ast::let_statement::LetStatement;
use crate::ast::module_level_declaration::ModuleLevelDeclaration; use crate::ast::module_level_declaration::ModuleLevelDeclaration;
use crate::ast::statement::Statement; use crate::ast::statement::Statement;
use crate::ast::string_literal::StringLiteral; use crate::ast::string_literal::StringLiteral;
use crate::diagnostic::Diagnostic;
use crate::lexer::Lexer; use crate::lexer::Lexer;
use crate::source_range::SourceRange; use crate::source_range::SourceRange;
use crate::token::{Token, TokenKind}; use crate::token::{Token, TokenKind};
use std::str::FromStr; use std::str::FromStr;
pub fn parse_compilation_unit(input: &str) -> CompilationUnit { pub fn parse_compilation_unit(input: &str) -> Result<CompilationUnit, Vec<Diagnostic>> {
let mut parser = Parser::new(input); let mut parser = Parser::new(input);
parser.compilation_unit() parser.compilation_unit()
} }
@ -37,6 +38,22 @@ impl<'a> Parser<'a> {
} }
} }
fn advance_until(&mut self, token_kinds: &[TokenKind]) {
while self.current.is_some() {
self.advance();
match &self.current {
None => {
// reached eoi
}
Some(current) => {
if token_kinds.contains(&current.kind()) {
break;
}
}
}
}
}
fn advance(&mut self) { fn advance(&mut self) {
if self.lookahead.is_some() { if self.lookahead.is_some() {
// we've advanced at least once // we've advanced at least once
@ -50,6 +67,9 @@ impl<'a> Parser<'a> {
} }
}, },
} }
} else if self.lookahead.is_none() && self.current.is_some() {
// we're on the last token
self.current = None;
} else { } else {
// we've not yet advanced, so fetch both // we've not yet advanced, so fetch both
// current // current
@ -79,22 +99,24 @@ impl<'a> Parser<'a> {
} }
} }
fn expect_advance(&mut self, token_kind: TokenKind) -> Token { fn expect_advance(&mut self, token_kind: TokenKind) -> Result<Token, Vec<Diagnostic>> {
match self.current.take() { match self.current.take() {
None => { None => Err(vec![Diagnostic::new(
panic!("Expected {:?} but found end of input", token_kind); &format!("Expected {:?} but found end-of-input.", token_kind),
} self.input.len(),
self.input.len(),
)]),
Some(token) => { Some(token) => {
if token.kind() == token_kind { if token.kind() == token_kind {
self.advance(); self.advance();
token Ok(token)
} else { } else {
panic!( self.advance_until(&[token_kind]);
"Expected {:?} but found {:?} at {}", Err(vec![Diagnostic::new(
token_kind, &format!("Expected {:?} but found {:?}", token_kind, token.kind()),
token.kind(), token.start(),
token.start() token.end(),
); )])
} }
} }
} }
@ -102,7 +124,7 @@ impl<'a> Parser<'a> {
fn peek_current(&self, token_kind: TokenKind) -> bool { fn peek_current(&self, token_kind: TokenKind) -> bool {
match &self.current { match &self.current {
None => false, None => panic!("Unexpected end of input."),
Some(token) => token.kind() == token_kind, Some(token) => token.kind() == token_kind,
} }
} }
@ -124,85 +146,150 @@ impl<'a> Parser<'a> {
self.sample_input(token.start(), token.end()) self.sample_input(token.start(), token.end())
} }
pub fn compilation_unit(&mut self) -> CompilationUnit { pub fn compilation_unit(&mut self) -> Result<CompilationUnit, Vec<Diagnostic>> {
let mut declarations = vec![]; let mut declarations = vec![];
self.advance(); let mut diagnostics = vec![];
self.advance(); // get started
while self.current.is_some() { while self.current.is_some() {
declarations.push(self.module_level_declaration()); let current = self.get_current();
match current.kind() {
TokenKind::Fn | TokenKind::Extern => {
let declaration_result = self.module_level_declaration();
match declaration_result {
Ok(declaration) => declarations.push(declaration),
Err(mut declaration_diagnostics) => {
diagnostics.append(&mut declaration_diagnostics)
}
}
}
_ => {
diagnostics.push(Diagnostic::new(
&format!(
"Expected any of: {:?}; found {:?}",
[TokenKind::Fn, TokenKind::Extern],
current.kind()
),
current.start(),
current.end(),
));
self.advance_until(&[TokenKind::Fn, TokenKind::Extern]);
}
}
}
if diagnostics.is_empty() {
Ok(CompilationUnit::new(declarations))
} else {
Err(diagnostics)
} }
CompilationUnit::new(declarations)
} }
fn module_level_declaration(&mut self) -> ModuleLevelDeclaration { fn module_level_declaration(&mut self) -> Result<ModuleLevelDeclaration, Vec<Diagnostic>> {
let current = self.get_current(); let current = self.get_current();
match current.kind() { match current.kind() {
TokenKind::Fn => ModuleLevelDeclaration::Function(self.function()), TokenKind::Fn => {
TokenKind::Extern => ModuleLevelDeclaration::ExternFunction(self.extern_function()), let function_result = self.function();
_ => panic!(), match function_result {
Ok(function) => Ok(ModuleLevelDeclaration::Function(function)),
Err(function_diagnostics) => Err(function_diagnostics),
}
}
TokenKind::Extern => {
let extern_function_result = self.extern_function();
match extern_function_result {
Ok(extern_function) => {
Ok(ModuleLevelDeclaration::ExternFunction(extern_function))
}
Err(extern_function_diagnostics) => Err(extern_function_diagnostics),
}
}
_ => unreachable!(),
} }
} }
fn function(&mut self) -> Function { fn function(&mut self) -> Result<Function, Vec<Diagnostic>> {
self.expect_advance(TokenKind::Fn); self.expect_advance(TokenKind::Fn)?;
let identifier_token = self.expect_advance(TokenKind::Identifier); let identifier_token = self.expect_advance(TokenKind::Identifier)?;
self.expect_advance(TokenKind::LeftParentheses); self.expect_advance(TokenKind::LeftParentheses)?;
// add params // add params
self.expect_advance(TokenKind::RightParentheses); self.expect_advance(TokenKind::RightParentheses)?;
let mut statements = vec![]; let mut statements = vec![];
while !self.peek_current(TokenKind::End) { let mut diagnostics = vec![];
statements.push(self.statement()); while self.current.is_some() && !self.peek_current(TokenKind::End) {
let statement_result = self.statement();
match statement_result {
Ok(statement) => {
statements.push(statement);
}
Err(mut statement_diagnostics) => {
diagnostics.append(&mut statement_diagnostics);
}
}
}
// if we're missing "end", append it to the other statement diagnostics
let end_result = self.expect_advance(TokenKind::End);
match end_result {
Err(mut end_diagnostics) => {
diagnostics.append(&mut end_diagnostics);
}
_ => {}
}
if diagnostics.is_empty() {
Ok(Function::new(
self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()),
statements,
))
} else {
Err(diagnostics)
} }
self.expect_advance(TokenKind::End);
Function::new(
self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()),
statements,
)
} }
fn extern_function(&mut self) -> ExternFunction { fn extern_function(&mut self) -> Result<ExternFunction, Vec<Diagnostic>> {
self.expect_advance(TokenKind::Extern); self.expect_advance(TokenKind::Extern)?;
self.expect_advance(TokenKind::Fn); self.expect_advance(TokenKind::Fn)?;
let identifier_token = self.expect_advance(TokenKind::Identifier); let identifier_token = self.expect_advance(TokenKind::Identifier)?;
self.expect_advance(TokenKind::LeftParentheses); self.expect_advance(TokenKind::LeftParentheses)?;
// params // params
self.expect_advance(TokenKind::RightParentheses); self.expect_advance(TokenKind::RightParentheses)?;
// return type // return type
ExternFunction::new( Ok(ExternFunction::new(
self.token_text(&identifier_token), self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()), SourceRange::new(identifier_token.start(), identifier_token.end()),
) ))
} }
fn statement(&mut self) -> Statement { fn statement(&mut self) -> Result<Statement, Vec<Diagnostic>> {
let current = self.get_current(); let current = self.get_current();
match current.kind() { match current.kind() {
TokenKind::Let => self.let_statement(), TokenKind::Let => Ok(Statement::Let(self.let_statement()?)),
_ => self.expression_statement(), _ => Ok(Statement::Expression(self.expression_statement()?)),
} }
} }
fn let_statement(&mut self) -> Statement { fn let_statement(&mut self) -> Result<LetStatement, Vec<Diagnostic>> {
self.expect_advance(TokenKind::Let); self.expect_advance(TokenKind::Let)?;
let identifier = self.expect_advance(TokenKind::Identifier); let identifier = self.expect_advance(TokenKind::Identifier)?;
self.expect_advance(TokenKind::Equals); self.expect_advance(TokenKind::Equals)?;
let expression = self.expression(); let expression = self.expression()?;
Statement::Let(LetStatement::new( Ok(LetStatement::new(
self.token_text(&identifier), self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()), SourceRange::new(identifier.start(), identifier.end()),
expression, expression,
)) ))
} }
fn expression_statement(&mut self) -> Statement { fn expression_statement(&mut self) -> Result<ExpressionStatement, Vec<Diagnostic>> {
Statement::Expression(ExpressionStatement::new(self.expression())) Ok(ExpressionStatement::new(self.expression()?))
} }
fn expression(&mut self) -> Expression { fn expression(&mut self) -> Result<Expression, Vec<Diagnostic>> {
let current = self.get_current(); let current = self.get_current().clone(); // I don't love this clone
let mut result = match current.kind() { let mut diagnostics = vec![];
let mut expression = match current.kind() {
TokenKind::IntegerLiteral => { TokenKind::IntegerLiteral => {
let raw = self.token_text(current); let raw = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end()); let source_range = SourceRange::new(current.start(), current.end());
self.advance(); self.advance();
Expression::IntegerLiteral(IntegerLiteral::new( Expression::IntegerLiteral(IntegerLiteral::new(
@ -211,7 +298,7 @@ impl<'a> Parser<'a> {
)) ))
} }
TokenKind::String => { TokenKind::String => {
let with_quotes = self.token_text(current); let with_quotes = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end()); let source_range = SourceRange::new(current.start(), current.end());
self.advance(); self.advance();
Expression::String(StringLiteral::new( Expression::String(StringLiteral::new(
@ -220,37 +307,73 @@ impl<'a> Parser<'a> {
)) ))
} }
TokenKind::Identifier => { TokenKind::Identifier => {
let declared_name = self.token_text(current); let declared_name = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end()); let source_range = SourceRange::new(current.start(), current.end());
self.advance(); self.advance();
Expression::Identifier(Identifier::new(declared_name, source_range)) Expression::Identifier(Identifier::new(declared_name, source_range))
} }
_ => panic!("Unexpected token {:?}", current.kind()), _ => {
diagnostics.push(Diagnostic::new(
&format!(
"Expected any of {:?} but found {:?}",
[
TokenKind::IntegerLiteral,
TokenKind::String,
TokenKind::Identifier
],
current.kind()
),
current.start(),
current.end(),
));
self.advance_until(&[
TokenKind::IntegerLiteral,
TokenKind::String,
TokenKind::Identifier,
]);
if self.current.is_some() {
let try_again_result = self.expression();
match try_again_result {
Ok(expression) => expression,
Err(mut try_again_diagnostics) => {
diagnostics.append(&mut try_again_diagnostics);
return Err(diagnostics);
}
}
} else {
return Err(diagnostics);
}
}
}; };
// postfixes // postfixes
while let Some(current) = &self.current { while let Some(current) = &self.current {
match current.kind() { match current.kind() {
TokenKind::LeftParentheses => { TokenKind::LeftParentheses => {
result = Expression::Call(self.call(result)); expression = Expression::Call(self.call(expression)?);
} }
_ => break, _ => break,
} }
} }
result if diagnostics.is_empty() {
Ok(expression)
} else {
Err(diagnostics)
}
} }
fn call(&mut self, callee: Expression) -> Call { fn call(&mut self, callee: Expression) -> Result<Call, Vec<Diagnostic>> {
self.expect_advance(TokenKind::LeftParentheses); self.expect_advance(TokenKind::LeftParentheses)?;
let mut arguments = vec![]; let mut arguments = vec![];
while !self.peek_current(TokenKind::RightParentheses) { while self.current.is_some() && !self.peek_current(TokenKind::RightParentheses) {
arguments.push(self.expression()); arguments.push(self.expression()?);
} }
let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses); let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses)?;
let source_range = let source_range =
SourceRange::new(callee.source_range().start(), right_parentheses_token.end()); SourceRange::new(callee.source_range().start(), right_parentheses_token.end());
Call::new(callee, arguments, source_range) Ok(Call::new(callee, arguments, source_range))
} }
} }
@ -265,7 +388,16 @@ mod smoke_tests {
#[test] #[test]
fn hello_world() { fn hello_world() {
let compilation_unit = parse_compilation_unit("fn main() println(\"Hello, World!\") end"); let parse_result = parse_compilation_unit("fn main() println(\"Hello, World!\") end");
let compilation_unit = match parse_result {
Ok(compilation_unit) => compilation_unit,
Err(diagnostics) => {
for diagnostic in &diagnostics {
eprintln!("{:?}", diagnostic)
}
panic!()
}
};
let declarations = compilation_unit.declarations(); let declarations = compilation_unit.declarations();
assert_eq!(declarations.len(), 1); assert_eq!(declarations.len(), 1);
let function = match &declarations[0] { let function = match &declarations[0] {
@ -313,8 +445,17 @@ mod concrete_tests {
#[test] #[test]
fn parses_extern_fn() { fn parses_extern_fn() {
let cu = parse_compilation_unit("extern fn println()"); let parse_result = parse_compilation_unit("extern fn println()");
let declarations = cu.declarations(); let compilation_unit = match parse_result {
Ok(compilation_unit) => compilation_unit,
Err(diagnostics) => {
for diagnostic in diagnostics {
eprintln!("{:?}", diagnostic);
}
panic!();
}
};
let declarations = compilation_unit.declarations();
assert_eq!(declarations.len(), 1); assert_eq!(declarations.len(), 1);
let extern_function = match &declarations[0] { let extern_function = match &declarations[0] {
ModuleLevelDeclaration::ExternFunction(extern_function) => extern_function, ModuleLevelDeclaration::ExternFunction(extern_function) => extern_function,
@ -323,3 +464,35 @@ mod concrete_tests {
assert_eq!(extern_function.declared_name(), "println"); assert_eq!(extern_function.declared_name(), "println");
} }
} }
#[cfg(test)]
mod parse_failure_tests {
use super::*;
#[test]
fn lone_end() {
let parse_result = parse_compilation_unit("end");
match parse_result {
Err(diagnostics) => {
assert_eq!(diagnostics.len(), 1);
for diagnostic in &diagnostics {
println!("{:?}", diagnostic)
}
}
Ok(_) => panic!(),
}
}
#[test]
fn two_ends() {
let parse_result = parse_compilation_unit("end end");
match parse_result {
Err(diagnostics) => {
// Should only have an error on the first end, since we advance until we find a
// token we can recover from (fn or extern)
assert_eq!(diagnostics.len(), 1);
}
Ok(_) => panic!(),
}
}
}

View File

@ -1,4 +1,4 @@
fn println() end extern fn println()
fn main() fn main()
let x = "Hello, World!" let x = "Hello, World!"

View File

@ -0,0 +1,2 @@
fn main()
println(42)