Add sort-of-working error recovery to parser.
This commit is contained in:
parent
506e260c75
commit
7c041e40ad
@ -25,11 +25,18 @@ fn main() {
|
||||
let args = Cli::parse();
|
||||
let input = std::fs::read_to_string(&args.script).unwrap();
|
||||
|
||||
let mut compilation_unit = parse_compilation_unit(&input);
|
||||
|
||||
let mut files: SimpleFiles<&str, &str> = SimpleFiles::new();
|
||||
let script_file_id = files.add(args.script.to_str().unwrap(), &input);
|
||||
|
||||
let parse_result = parse_compilation_unit(&input);
|
||||
let mut compilation_unit = match parse_result {
|
||||
Ok(compilation_unit) => compilation_unit,
|
||||
Err(diagnostics) => {
|
||||
check_and_report_diagnostics(&files, script_file_id, &diagnostics);
|
||||
unreachable!();
|
||||
}
|
||||
};
|
||||
|
||||
let mut symbol_table = SymbolTable::new();
|
||||
|
||||
let gather_names_diagnostics = compilation_unit.gather_declared_names(&mut symbol_table);
|
||||
|
||||
@ -10,7 +10,16 @@ mod smoke_tests {
|
||||
use crate::symbol_table::SymbolTable;
|
||||
|
||||
fn assemble(src: &str) -> Vec<AsmFunction> {
|
||||
let mut compilation_unit = parse_compilation_unit(src);
|
||||
let parse_result = parse_compilation_unit(src);
|
||||
let mut compilation_unit = match parse_result {
|
||||
Ok(compilation_unit) => compilation_unit,
|
||||
Err(diagnostics) => {
|
||||
for diagnostic in diagnostics {
|
||||
eprintln!("{:?}", diagnostic);
|
||||
}
|
||||
panic!();
|
||||
}
|
||||
};
|
||||
let mut symbol_table = SymbolTable::new();
|
||||
compilation_unit.gather_declared_names(&mut symbol_table);
|
||||
compilation_unit.check_name_usages(&symbol_table);
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
mod assemble_context;
|
||||
pub mod assemble_context;
|
||||
pub mod call;
|
||||
pub mod compilation_unit;
|
||||
pub mod expression;
|
||||
@ -12,46 +12,3 @@ pub mod let_statement;
|
||||
pub mod module_level_declaration;
|
||||
pub mod statement;
|
||||
pub mod string_literal;
|
||||
|
||||
#[cfg(test)]
|
||||
mod name_tests {
|
||||
use crate::parser::parse_compilation_unit;
|
||||
use crate::symbol_table::SymbolTable;
|
||||
|
||||
#[test]
|
||||
fn smoke_screen() {
|
||||
let mut symbol_table = SymbolTable::new();
|
||||
let mut compilation_unit =
|
||||
parse_compilation_unit("fn println() end fn main() let x = 42 println(x) end");
|
||||
assert_eq!(
|
||||
compilation_unit
|
||||
.gather_declared_names(&mut symbol_table)
|
||||
.len(),
|
||||
0
|
||||
);
|
||||
assert_eq!(compilation_unit.check_name_usages(&symbol_table).len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hello_world() {
|
||||
let mut symbol_table = SymbolTable::new();
|
||||
let mut compilation_unit =
|
||||
parse_compilation_unit("fn println() end fn main() println(\"Hello, World!\") end");
|
||||
compilation_unit.gather_declared_names(&mut symbol_table);
|
||||
compilation_unit.check_name_usages(&symbol_table);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_some_diagnostics() {
|
||||
let mut symbol_table = SymbolTable::new();
|
||||
let mut compilation_unit = parse_compilation_unit("fn main() notDefined(uhOh) end");
|
||||
assert_eq!(
|
||||
compilation_unit
|
||||
.gather_declared_names(&mut symbol_table)
|
||||
.len(),
|
||||
0
|
||||
);
|
||||
let name_usage_diagnostics = compilation_unit.check_name_usages(&symbol_table);
|
||||
assert_eq!(name_usage_diagnostics.len(), 2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -10,12 +10,13 @@ use crate::ast::let_statement::LetStatement;
|
||||
use crate::ast::module_level_declaration::ModuleLevelDeclaration;
|
||||
use crate::ast::statement::Statement;
|
||||
use crate::ast::string_literal::StringLiteral;
|
||||
use crate::diagnostic::Diagnostic;
|
||||
use crate::lexer::Lexer;
|
||||
use crate::source_range::SourceRange;
|
||||
use crate::token::{Token, TokenKind};
|
||||
use std::str::FromStr;
|
||||
|
||||
pub fn parse_compilation_unit(input: &str) -> CompilationUnit {
|
||||
pub fn parse_compilation_unit(input: &str) -> Result<CompilationUnit, Vec<Diagnostic>> {
|
||||
let mut parser = Parser::new(input);
|
||||
parser.compilation_unit()
|
||||
}
|
||||
@ -37,6 +38,22 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn advance_until(&mut self, token_kinds: &[TokenKind]) {
|
||||
while self.current.is_some() {
|
||||
self.advance();
|
||||
match &self.current {
|
||||
None => {
|
||||
// reached eoi
|
||||
}
|
||||
Some(current) => {
|
||||
if token_kinds.contains(¤t.kind()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
if self.lookahead.is_some() {
|
||||
// we've advanced at least once
|
||||
@ -50,6 +67,9 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
},
|
||||
}
|
||||
} else if self.lookahead.is_none() && self.current.is_some() {
|
||||
// we're on the last token
|
||||
self.current = None;
|
||||
} else {
|
||||
// we've not yet advanced, so fetch both
|
||||
// current
|
||||
@ -79,22 +99,24 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn expect_advance(&mut self, token_kind: TokenKind) -> Token {
|
||||
fn expect_advance(&mut self, token_kind: TokenKind) -> Result<Token, Vec<Diagnostic>> {
|
||||
match self.current.take() {
|
||||
None => {
|
||||
panic!("Expected {:?} but found end of input", token_kind);
|
||||
}
|
||||
None => Err(vec![Diagnostic::new(
|
||||
&format!("Expected {:?} but found end-of-input.", token_kind),
|
||||
self.input.len(),
|
||||
self.input.len(),
|
||||
)]),
|
||||
Some(token) => {
|
||||
if token.kind() == token_kind {
|
||||
self.advance();
|
||||
token
|
||||
Ok(token)
|
||||
} else {
|
||||
panic!(
|
||||
"Expected {:?} but found {:?} at {}",
|
||||
token_kind,
|
||||
token.kind(),
|
||||
token.start()
|
||||
);
|
||||
self.advance_until(&[token_kind]);
|
||||
Err(vec![Diagnostic::new(
|
||||
&format!("Expected {:?} but found {:?}", token_kind, token.kind()),
|
||||
token.start(),
|
||||
token.end(),
|
||||
)])
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -102,7 +124,7 @@ impl<'a> Parser<'a> {
|
||||
|
||||
fn peek_current(&self, token_kind: TokenKind) -> bool {
|
||||
match &self.current {
|
||||
None => false,
|
||||
None => panic!("Unexpected end of input."),
|
||||
Some(token) => token.kind() == token_kind,
|
||||
}
|
||||
}
|
||||
@ -124,85 +146,150 @@ impl<'a> Parser<'a> {
|
||||
self.sample_input(token.start(), token.end())
|
||||
}
|
||||
|
||||
pub fn compilation_unit(&mut self) -> CompilationUnit {
|
||||
pub fn compilation_unit(&mut self) -> Result<CompilationUnit, Vec<Diagnostic>> {
|
||||
let mut declarations = vec![];
|
||||
self.advance();
|
||||
let mut diagnostics = vec![];
|
||||
self.advance(); // get started
|
||||
while self.current.is_some() {
|
||||
declarations.push(self.module_level_declaration());
|
||||
let current = self.get_current();
|
||||
match current.kind() {
|
||||
TokenKind::Fn | TokenKind::Extern => {
|
||||
let declaration_result = self.module_level_declaration();
|
||||
match declaration_result {
|
||||
Ok(declaration) => declarations.push(declaration),
|
||||
Err(mut declaration_diagnostics) => {
|
||||
diagnostics.append(&mut declaration_diagnostics)
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
diagnostics.push(Diagnostic::new(
|
||||
&format!(
|
||||
"Expected any of: {:?}; found {:?}",
|
||||
[TokenKind::Fn, TokenKind::Extern],
|
||||
current.kind()
|
||||
),
|
||||
current.start(),
|
||||
current.end(),
|
||||
));
|
||||
self.advance_until(&[TokenKind::Fn, TokenKind::Extern]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if diagnostics.is_empty() {
|
||||
Ok(CompilationUnit::new(declarations))
|
||||
} else {
|
||||
Err(diagnostics)
|
||||
}
|
||||
CompilationUnit::new(declarations)
|
||||
}
|
||||
|
||||
fn module_level_declaration(&mut self) -> ModuleLevelDeclaration {
|
||||
fn module_level_declaration(&mut self) -> Result<ModuleLevelDeclaration, Vec<Diagnostic>> {
|
||||
let current = self.get_current();
|
||||
match current.kind() {
|
||||
TokenKind::Fn => ModuleLevelDeclaration::Function(self.function()),
|
||||
TokenKind::Extern => ModuleLevelDeclaration::ExternFunction(self.extern_function()),
|
||||
_ => panic!(),
|
||||
TokenKind::Fn => {
|
||||
let function_result = self.function();
|
||||
match function_result {
|
||||
Ok(function) => Ok(ModuleLevelDeclaration::Function(function)),
|
||||
Err(function_diagnostics) => Err(function_diagnostics),
|
||||
}
|
||||
}
|
||||
TokenKind::Extern => {
|
||||
let extern_function_result = self.extern_function();
|
||||
match extern_function_result {
|
||||
Ok(extern_function) => {
|
||||
Ok(ModuleLevelDeclaration::ExternFunction(extern_function))
|
||||
}
|
||||
Err(extern_function_diagnostics) => Err(extern_function_diagnostics),
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn function(&mut self) -> Function {
|
||||
self.expect_advance(TokenKind::Fn);
|
||||
let identifier_token = self.expect_advance(TokenKind::Identifier);
|
||||
self.expect_advance(TokenKind::LeftParentheses);
|
||||
fn function(&mut self) -> Result<Function, Vec<Diagnostic>> {
|
||||
self.expect_advance(TokenKind::Fn)?;
|
||||
let identifier_token = self.expect_advance(TokenKind::Identifier)?;
|
||||
self.expect_advance(TokenKind::LeftParentheses)?;
|
||||
// add params
|
||||
self.expect_advance(TokenKind::RightParentheses);
|
||||
self.expect_advance(TokenKind::RightParentheses)?;
|
||||
let mut statements = vec![];
|
||||
while !self.peek_current(TokenKind::End) {
|
||||
statements.push(self.statement());
|
||||
let mut diagnostics = vec![];
|
||||
while self.current.is_some() && !self.peek_current(TokenKind::End) {
|
||||
let statement_result = self.statement();
|
||||
match statement_result {
|
||||
Ok(statement) => {
|
||||
statements.push(statement);
|
||||
}
|
||||
Err(mut statement_diagnostics) => {
|
||||
diagnostics.append(&mut statement_diagnostics);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we're missing "end", append it to the other statement diagnostics
|
||||
let end_result = self.expect_advance(TokenKind::End);
|
||||
match end_result {
|
||||
Err(mut end_diagnostics) => {
|
||||
diagnostics.append(&mut end_diagnostics);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if diagnostics.is_empty() {
|
||||
Ok(Function::new(
|
||||
self.token_text(&identifier_token),
|
||||
SourceRange::new(identifier_token.start(), identifier_token.end()),
|
||||
statements,
|
||||
))
|
||||
} else {
|
||||
Err(diagnostics)
|
||||
}
|
||||
self.expect_advance(TokenKind::End);
|
||||
Function::new(
|
||||
self.token_text(&identifier_token),
|
||||
SourceRange::new(identifier_token.start(), identifier_token.end()),
|
||||
statements,
|
||||
)
|
||||
}
|
||||
|
||||
fn extern_function(&mut self) -> ExternFunction {
|
||||
self.expect_advance(TokenKind::Extern);
|
||||
self.expect_advance(TokenKind::Fn);
|
||||
let identifier_token = self.expect_advance(TokenKind::Identifier);
|
||||
self.expect_advance(TokenKind::LeftParentheses);
|
||||
fn extern_function(&mut self) -> Result<ExternFunction, Vec<Diagnostic>> {
|
||||
self.expect_advance(TokenKind::Extern)?;
|
||||
self.expect_advance(TokenKind::Fn)?;
|
||||
let identifier_token = self.expect_advance(TokenKind::Identifier)?;
|
||||
self.expect_advance(TokenKind::LeftParentheses)?;
|
||||
// params
|
||||
self.expect_advance(TokenKind::RightParentheses);
|
||||
self.expect_advance(TokenKind::RightParentheses)?;
|
||||
// return type
|
||||
ExternFunction::new(
|
||||
Ok(ExternFunction::new(
|
||||
self.token_text(&identifier_token),
|
||||
SourceRange::new(identifier_token.start(), identifier_token.end()),
|
||||
)
|
||||
))
|
||||
}
|
||||
|
||||
fn statement(&mut self) -> Statement {
|
||||
fn statement(&mut self) -> Result<Statement, Vec<Diagnostic>> {
|
||||
let current = self.get_current();
|
||||
match current.kind() {
|
||||
TokenKind::Let => self.let_statement(),
|
||||
_ => self.expression_statement(),
|
||||
TokenKind::Let => Ok(Statement::Let(self.let_statement()?)),
|
||||
_ => Ok(Statement::Expression(self.expression_statement()?)),
|
||||
}
|
||||
}
|
||||
|
||||
fn let_statement(&mut self) -> Statement {
|
||||
self.expect_advance(TokenKind::Let);
|
||||
let identifier = self.expect_advance(TokenKind::Identifier);
|
||||
self.expect_advance(TokenKind::Equals);
|
||||
let expression = self.expression();
|
||||
Statement::Let(LetStatement::new(
|
||||
fn let_statement(&mut self) -> Result<LetStatement, Vec<Diagnostic>> {
|
||||
self.expect_advance(TokenKind::Let)?;
|
||||
let identifier = self.expect_advance(TokenKind::Identifier)?;
|
||||
self.expect_advance(TokenKind::Equals)?;
|
||||
let expression = self.expression()?;
|
||||
Ok(LetStatement::new(
|
||||
self.token_text(&identifier),
|
||||
SourceRange::new(identifier.start(), identifier.end()),
|
||||
expression,
|
||||
))
|
||||
}
|
||||
|
||||
fn expression_statement(&mut self) -> Statement {
|
||||
Statement::Expression(ExpressionStatement::new(self.expression()))
|
||||
fn expression_statement(&mut self) -> Result<ExpressionStatement, Vec<Diagnostic>> {
|
||||
Ok(ExpressionStatement::new(self.expression()?))
|
||||
}
|
||||
|
||||
fn expression(&mut self) -> Expression {
|
||||
let current = self.get_current();
|
||||
let mut result = match current.kind() {
|
||||
fn expression(&mut self) -> Result<Expression, Vec<Diagnostic>> {
|
||||
let current = self.get_current().clone(); // I don't love this clone
|
||||
let mut diagnostics = vec![];
|
||||
let mut expression = match current.kind() {
|
||||
TokenKind::IntegerLiteral => {
|
||||
let raw = self.token_text(current);
|
||||
let raw = self.token_text(¤t);
|
||||
let source_range = SourceRange::new(current.start(), current.end());
|
||||
self.advance();
|
||||
Expression::IntegerLiteral(IntegerLiteral::new(
|
||||
@ -211,7 +298,7 @@ impl<'a> Parser<'a> {
|
||||
))
|
||||
}
|
||||
TokenKind::String => {
|
||||
let with_quotes = self.token_text(current);
|
||||
let with_quotes = self.token_text(¤t);
|
||||
let source_range = SourceRange::new(current.start(), current.end());
|
||||
self.advance();
|
||||
Expression::String(StringLiteral::new(
|
||||
@ -220,37 +307,73 @@ impl<'a> Parser<'a> {
|
||||
))
|
||||
}
|
||||
TokenKind::Identifier => {
|
||||
let declared_name = self.token_text(current);
|
||||
let declared_name = self.token_text(¤t);
|
||||
let source_range = SourceRange::new(current.start(), current.end());
|
||||
self.advance();
|
||||
Expression::Identifier(Identifier::new(declared_name, source_range))
|
||||
}
|
||||
_ => panic!("Unexpected token {:?}", current.kind()),
|
||||
_ => {
|
||||
diagnostics.push(Diagnostic::new(
|
||||
&format!(
|
||||
"Expected any of {:?} but found {:?}",
|
||||
[
|
||||
TokenKind::IntegerLiteral,
|
||||
TokenKind::String,
|
||||
TokenKind::Identifier
|
||||
],
|
||||
current.kind()
|
||||
),
|
||||
current.start(),
|
||||
current.end(),
|
||||
));
|
||||
|
||||
self.advance_until(&[
|
||||
TokenKind::IntegerLiteral,
|
||||
TokenKind::String,
|
||||
TokenKind::Identifier,
|
||||
]);
|
||||
if self.current.is_some() {
|
||||
let try_again_result = self.expression();
|
||||
match try_again_result {
|
||||
Ok(expression) => expression,
|
||||
Err(mut try_again_diagnostics) => {
|
||||
diagnostics.append(&mut try_again_diagnostics);
|
||||
return Err(diagnostics);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(diagnostics);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// postfixes
|
||||
while let Some(current) = &self.current {
|
||||
match current.kind() {
|
||||
TokenKind::LeftParentheses => {
|
||||
result = Expression::Call(self.call(result));
|
||||
expression = Expression::Call(self.call(expression)?);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
if diagnostics.is_empty() {
|
||||
Ok(expression)
|
||||
} else {
|
||||
Err(diagnostics)
|
||||
}
|
||||
}
|
||||
|
||||
fn call(&mut self, callee: Expression) -> Call {
|
||||
self.expect_advance(TokenKind::LeftParentheses);
|
||||
fn call(&mut self, callee: Expression) -> Result<Call, Vec<Diagnostic>> {
|
||||
self.expect_advance(TokenKind::LeftParentheses)?;
|
||||
let mut arguments = vec![];
|
||||
while !self.peek_current(TokenKind::RightParentheses) {
|
||||
arguments.push(self.expression());
|
||||
while self.current.is_some() && !self.peek_current(TokenKind::RightParentheses) {
|
||||
arguments.push(self.expression()?);
|
||||
}
|
||||
let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses);
|
||||
let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses)?;
|
||||
let source_range =
|
||||
SourceRange::new(callee.source_range().start(), right_parentheses_token.end());
|
||||
Call::new(callee, arguments, source_range)
|
||||
Ok(Call::new(callee, arguments, source_range))
|
||||
}
|
||||
}
|
||||
|
||||
@ -265,7 +388,16 @@ mod smoke_tests {
|
||||
|
||||
#[test]
|
||||
fn hello_world() {
|
||||
let compilation_unit = parse_compilation_unit("fn main() println(\"Hello, World!\") end");
|
||||
let parse_result = parse_compilation_unit("fn main() println(\"Hello, World!\") end");
|
||||
let compilation_unit = match parse_result {
|
||||
Ok(compilation_unit) => compilation_unit,
|
||||
Err(diagnostics) => {
|
||||
for diagnostic in &diagnostics {
|
||||
eprintln!("{:?}", diagnostic)
|
||||
}
|
||||
panic!()
|
||||
}
|
||||
};
|
||||
let declarations = compilation_unit.declarations();
|
||||
assert_eq!(declarations.len(), 1);
|
||||
let function = match &declarations[0] {
|
||||
@ -313,8 +445,17 @@ mod concrete_tests {
|
||||
|
||||
#[test]
|
||||
fn parses_extern_fn() {
|
||||
let cu = parse_compilation_unit("extern fn println()");
|
||||
let declarations = cu.declarations();
|
||||
let parse_result = parse_compilation_unit("extern fn println()");
|
||||
let compilation_unit = match parse_result {
|
||||
Ok(compilation_unit) => compilation_unit,
|
||||
Err(diagnostics) => {
|
||||
for diagnostic in diagnostics {
|
||||
eprintln!("{:?}", diagnostic);
|
||||
}
|
||||
panic!();
|
||||
}
|
||||
};
|
||||
let declarations = compilation_unit.declarations();
|
||||
assert_eq!(declarations.len(), 1);
|
||||
let extern_function = match &declarations[0] {
|
||||
ModuleLevelDeclaration::ExternFunction(extern_function) => extern_function,
|
||||
@ -323,3 +464,35 @@ mod concrete_tests {
|
||||
assert_eq!(extern_function.declared_name(), "println");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod parse_failure_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn lone_end() {
|
||||
let parse_result = parse_compilation_unit("end");
|
||||
match parse_result {
|
||||
Err(diagnostics) => {
|
||||
assert_eq!(diagnostics.len(), 1);
|
||||
for diagnostic in &diagnostics {
|
||||
println!("{:?}", diagnostic)
|
||||
}
|
||||
}
|
||||
Ok(_) => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_ends() {
|
||||
let parse_result = parse_compilation_unit("end end");
|
||||
match parse_result {
|
||||
Err(diagnostics) => {
|
||||
// Should only have an error on the first end, since we advance until we find a
|
||||
// token we can recover from (fn or extern)
|
||||
assert_eq!(diagnostics.len(), 1);
|
||||
}
|
||||
Ok(_) => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
fn println() end
|
||||
extern fn println()
|
||||
|
||||
fn main()
|
||||
let x = "Hello, World!"
|
||||
|
||||
2
examples/parse_errors/forgot_end.dm
Normal file
2
examples/parse_errors/forgot_end.dm
Normal file
@ -0,0 +1,2 @@
|
||||
fn main()
|
||||
println(42)
|
||||
Loading…
Reference in New Issue
Block a user