499 lines
17 KiB
Rust
499 lines
17 KiB
Rust
use crate::ast::call::Call;
|
|
use crate::ast::compilation_unit::CompilationUnit;
|
|
use crate::ast::expression::Expression;
|
|
use crate::ast::expression_statement::ExpressionStatement;
|
|
use crate::ast::extern_function::ExternFunction;
|
|
use crate::ast::function::Function;
|
|
use crate::ast::identifier::Identifier;
|
|
use crate::ast::integer_literal::IntegerLiteral;
|
|
use crate::ast::let_statement::LetStatement;
|
|
use crate::ast::module_level_declaration::ModuleLevelDeclaration;
|
|
use crate::ast::statement::Statement;
|
|
use crate::ast::string_literal::StringLiteral;
|
|
use crate::diagnostic::Diagnostic;
|
|
use crate::lexer::Lexer;
|
|
use crate::source_range::SourceRange;
|
|
use crate::token::{Token, TokenKind};
|
|
use std::str::FromStr;
|
|
|
|
pub fn parse_compilation_unit(input: &str) -> Result<CompilationUnit, Vec<Diagnostic>> {
|
|
let mut parser = Parser::new(input);
|
|
parser.compilation_unit()
|
|
}
|
|
|
|
struct Parser<'a> {
|
|
input: &'a str,
|
|
lexer: Lexer<'a>,
|
|
current: Option<Token>,
|
|
lookahead: Option<Token>,
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
fn new(input: &'a str) -> Self {
|
|
Self {
|
|
input,
|
|
lexer: Lexer::new(input),
|
|
current: None,
|
|
lookahead: None,
|
|
}
|
|
}
|
|
|
|
fn advance_until(&mut self, token_kinds: &[TokenKind]) {
|
|
while self.current.is_some() {
|
|
self.advance();
|
|
match &self.current {
|
|
None => {
|
|
// reached eoi
|
|
}
|
|
Some(current) => {
|
|
if token_kinds.contains(¤t.kind()) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn advance(&mut self) {
|
|
if self.lookahead.is_some() {
|
|
// we've advanced at least once
|
|
self.current = self.lookahead.take();
|
|
self.lookahead = match self.lexer.next() {
|
|
None => None,
|
|
Some(result) => match result {
|
|
Ok(token) => Some(token),
|
|
Err(lexer_error) => {
|
|
panic!("{:?}", lexer_error);
|
|
}
|
|
},
|
|
}
|
|
} else if self.lookahead.is_none() && self.current.is_some() {
|
|
// we're on the last token
|
|
self.current = None;
|
|
} else {
|
|
// we've not yet advanced, so fetch both
|
|
// current
|
|
match self.lexer.next() {
|
|
None => {}
|
|
Some(result) => match result {
|
|
Ok(token) => {
|
|
self.current = Some(token);
|
|
}
|
|
Err(lexer_error) => {
|
|
panic!("{:?}", lexer_error);
|
|
}
|
|
},
|
|
}
|
|
// lookahead
|
|
match self.lexer.next() {
|
|
None => {}
|
|
Some(result) => match result {
|
|
Ok(token) => {
|
|
self.lookahead = Some(token);
|
|
}
|
|
Err(lexer_error) => {
|
|
panic!("{:?}", lexer_error);
|
|
}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
fn expect_advance(&mut self, token_kind: TokenKind) -> Result<Token, Vec<Diagnostic>> {
|
|
match self.current.take() {
|
|
None => Err(vec![Diagnostic::new(
|
|
&format!("Expected {:?} but found end-of-input.", token_kind),
|
|
self.input.len(),
|
|
self.input.len(),
|
|
)]),
|
|
Some(token) => {
|
|
if token.kind() == token_kind {
|
|
self.advance();
|
|
Ok(token)
|
|
} else {
|
|
self.advance_until(&[token_kind]);
|
|
Err(vec![Diagnostic::new(
|
|
&format!("Expected {:?} but found {:?}", token_kind, token.kind()),
|
|
token.start(),
|
|
token.end(),
|
|
)])
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn peek_current(&self, token_kind: TokenKind) -> bool {
|
|
match &self.current {
|
|
None => panic!("Unexpected end of input."),
|
|
Some(token) => token.kind() == token_kind,
|
|
}
|
|
}
|
|
|
|
fn get_current(&self) -> &Token {
|
|
match &self.current {
|
|
None => {
|
|
panic!("Unexpected end of input");
|
|
}
|
|
Some(token) => token,
|
|
}
|
|
}
|
|
|
|
fn sample_input(&self, start: usize, end: usize) -> &'a str {
|
|
&self.input[start..end]
|
|
}
|
|
|
|
fn token_text(&self, token: &Token) -> &'a str {
|
|
self.sample_input(token.start(), token.end())
|
|
}
|
|
|
|
pub fn compilation_unit(&mut self) -> Result<CompilationUnit, Vec<Diagnostic>> {
|
|
let mut declarations = vec![];
|
|
let mut diagnostics = vec![];
|
|
self.advance(); // get started
|
|
while self.current.is_some() {
|
|
let current = self.get_current();
|
|
match current.kind() {
|
|
TokenKind::Fn | TokenKind::Extern => {
|
|
let declaration_result = self.module_level_declaration();
|
|
match declaration_result {
|
|
Ok(declaration) => declarations.push(declaration),
|
|
Err(mut declaration_diagnostics) => {
|
|
diagnostics.append(&mut declaration_diagnostics)
|
|
}
|
|
}
|
|
}
|
|
_ => {
|
|
diagnostics.push(Diagnostic::new(
|
|
&format!(
|
|
"Expected any of: {:?}; found {:?}",
|
|
[TokenKind::Fn, TokenKind::Extern],
|
|
current.kind()
|
|
),
|
|
current.start(),
|
|
current.end(),
|
|
));
|
|
self.advance_until(&[TokenKind::Fn, TokenKind::Extern]);
|
|
}
|
|
}
|
|
}
|
|
if diagnostics.is_empty() {
|
|
Ok(CompilationUnit::new(declarations))
|
|
} else {
|
|
Err(diagnostics)
|
|
}
|
|
}
|
|
|
|
fn module_level_declaration(&mut self) -> Result<ModuleLevelDeclaration, Vec<Diagnostic>> {
|
|
let current = self.get_current();
|
|
match current.kind() {
|
|
TokenKind::Fn => {
|
|
let function_result = self.function();
|
|
match function_result {
|
|
Ok(function) => Ok(ModuleLevelDeclaration::Function(function)),
|
|
Err(function_diagnostics) => Err(function_diagnostics),
|
|
}
|
|
}
|
|
TokenKind::Extern => {
|
|
let extern_function_result = self.extern_function();
|
|
match extern_function_result {
|
|
Ok(extern_function) => {
|
|
Ok(ModuleLevelDeclaration::ExternFunction(extern_function))
|
|
}
|
|
Err(extern_function_diagnostics) => Err(extern_function_diagnostics),
|
|
}
|
|
}
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
|
|
fn function(&mut self) -> Result<Function, Vec<Diagnostic>> {
|
|
self.expect_advance(TokenKind::Fn)?;
|
|
let identifier_token = self.expect_advance(TokenKind::Identifier)?;
|
|
self.expect_advance(TokenKind::LeftParentheses)?;
|
|
// add params
|
|
self.expect_advance(TokenKind::RightParentheses)?;
|
|
let mut statements = vec![];
|
|
let mut diagnostics = vec![];
|
|
while self.current.is_some() && !self.peek_current(TokenKind::End) {
|
|
let statement_result = self.statement();
|
|
match statement_result {
|
|
Ok(statement) => {
|
|
statements.push(statement);
|
|
}
|
|
Err(mut statement_diagnostics) => {
|
|
diagnostics.append(&mut statement_diagnostics);
|
|
}
|
|
}
|
|
}
|
|
|
|
// if we're missing "end", append it to the other statement diagnostics
|
|
let end_result = self.expect_advance(TokenKind::End);
|
|
match end_result {
|
|
Err(mut end_diagnostics) => {
|
|
diagnostics.append(&mut end_diagnostics);
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
if diagnostics.is_empty() {
|
|
Ok(Function::new(
|
|
self.token_text(&identifier_token),
|
|
SourceRange::new(identifier_token.start(), identifier_token.end()),
|
|
statements,
|
|
))
|
|
} else {
|
|
Err(diagnostics)
|
|
}
|
|
}
|
|
|
|
fn extern_function(&mut self) -> Result<ExternFunction, Vec<Diagnostic>> {
|
|
self.expect_advance(TokenKind::Extern)?;
|
|
self.expect_advance(TokenKind::Fn)?;
|
|
let identifier_token = self.expect_advance(TokenKind::Identifier)?;
|
|
self.expect_advance(TokenKind::LeftParentheses)?;
|
|
// params
|
|
self.expect_advance(TokenKind::RightParentheses)?;
|
|
// return type
|
|
Ok(ExternFunction::new(
|
|
self.token_text(&identifier_token),
|
|
SourceRange::new(identifier_token.start(), identifier_token.end()),
|
|
))
|
|
}
|
|
|
|
fn statement(&mut self) -> Result<Statement, Vec<Diagnostic>> {
|
|
let current = self.get_current();
|
|
match current.kind() {
|
|
TokenKind::Let => Ok(Statement::Let(self.let_statement()?)),
|
|
_ => Ok(Statement::Expression(self.expression_statement()?)),
|
|
}
|
|
}
|
|
|
|
fn let_statement(&mut self) -> Result<LetStatement, Vec<Diagnostic>> {
|
|
self.expect_advance(TokenKind::Let)?;
|
|
let identifier = self.expect_advance(TokenKind::Identifier)?;
|
|
self.expect_advance(TokenKind::Equals)?;
|
|
let expression = self.expression()?;
|
|
Ok(LetStatement::new(
|
|
self.token_text(&identifier),
|
|
SourceRange::new(identifier.start(), identifier.end()),
|
|
expression,
|
|
))
|
|
}
|
|
|
|
fn expression_statement(&mut self) -> Result<ExpressionStatement, Vec<Diagnostic>> {
|
|
Ok(ExpressionStatement::new(self.expression()?))
|
|
}
|
|
|
|
fn expression(&mut self) -> Result<Expression, Vec<Diagnostic>> {
|
|
let current = self.get_current().clone(); // I don't love this clone
|
|
let mut diagnostics = vec![];
|
|
let mut expression = match current.kind() {
|
|
TokenKind::IntegerLiteral => {
|
|
let raw = self.token_text(¤t);
|
|
let source_range = SourceRange::new(current.start(), current.end());
|
|
self.advance();
|
|
Expression::IntegerLiteral(IntegerLiteral::new(
|
|
i32::from_str(raw).unwrap(),
|
|
source_range,
|
|
))
|
|
}
|
|
TokenKind::String => {
|
|
let with_quotes = self.token_text(¤t);
|
|
let source_range = SourceRange::new(current.start(), current.end());
|
|
self.advance();
|
|
Expression::String(StringLiteral::new(
|
|
&with_quotes[1..with_quotes.len() - 1],
|
|
source_range,
|
|
))
|
|
}
|
|
TokenKind::Identifier => {
|
|
let declared_name = self.token_text(¤t);
|
|
let source_range = SourceRange::new(current.start(), current.end());
|
|
self.advance();
|
|
Expression::Identifier(Identifier::new(declared_name, source_range))
|
|
}
|
|
_ => {
|
|
diagnostics.push(Diagnostic::new(
|
|
&format!(
|
|
"Expected any of {:?} but found {:?}",
|
|
[
|
|
TokenKind::IntegerLiteral,
|
|
TokenKind::String,
|
|
TokenKind::Identifier
|
|
],
|
|
current.kind()
|
|
),
|
|
current.start(),
|
|
current.end(),
|
|
));
|
|
|
|
self.advance_until(&[
|
|
TokenKind::IntegerLiteral,
|
|
TokenKind::String,
|
|
TokenKind::Identifier,
|
|
]);
|
|
if self.current.is_some() {
|
|
let try_again_result = self.expression();
|
|
match try_again_result {
|
|
Ok(expression) => expression,
|
|
Err(mut try_again_diagnostics) => {
|
|
diagnostics.append(&mut try_again_diagnostics);
|
|
return Err(diagnostics);
|
|
}
|
|
}
|
|
} else {
|
|
return Err(diagnostics);
|
|
}
|
|
}
|
|
};
|
|
|
|
// postfixes
|
|
while let Some(current) = &self.current {
|
|
match current.kind() {
|
|
TokenKind::LeftParentheses => {
|
|
expression = Expression::Call(self.call(expression)?);
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
if diagnostics.is_empty() {
|
|
Ok(expression)
|
|
} else {
|
|
Err(diagnostics)
|
|
}
|
|
}
|
|
|
|
fn call(&mut self, callee: Expression) -> Result<Call, Vec<Diagnostic>> {
|
|
self.expect_advance(TokenKind::LeftParentheses)?;
|
|
let mut arguments = vec![];
|
|
while self.current.is_some() && !self.peek_current(TokenKind::RightParentheses) {
|
|
arguments.push(self.expression()?);
|
|
}
|
|
let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses)?;
|
|
let source_range =
|
|
SourceRange::new(callee.source_range().start(), right_parentheses_token.end());
|
|
Ok(Call::new(callee, arguments, source_range))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod smoke_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn forty_two() {
|
|
parse_compilation_unit("fn main() 42 end");
|
|
}
|
|
|
|
#[test]
|
|
fn hello_world() {
|
|
let parse_result = parse_compilation_unit("fn main() println(\"Hello, World!\") end");
|
|
let compilation_unit = match parse_result {
|
|
Ok(compilation_unit) => compilation_unit,
|
|
Err(diagnostics) => {
|
|
for diagnostic in &diagnostics {
|
|
eprintln!("{:?}", diagnostic)
|
|
}
|
|
panic!()
|
|
}
|
|
};
|
|
let declarations = compilation_unit.declarations();
|
|
assert_eq!(declarations.len(), 1);
|
|
let function = match &declarations[0] {
|
|
ModuleLevelDeclaration::Function(function) => function,
|
|
_ => panic!(),
|
|
};
|
|
assert_eq!(function.declared_name(), "main");
|
|
let statements = function.statements();
|
|
assert_eq!(statements.len(), 1);
|
|
if let Statement::Expression(expression_statement) = statements[0] {
|
|
if let Expression::Call(call) = expression_statement.expression() {
|
|
let callee = call.callee();
|
|
match callee {
|
|
Expression::Identifier(identifier) => {
|
|
assert_eq!(identifier.name(), "println");
|
|
}
|
|
_ => panic!("Expected identifier"),
|
|
}
|
|
let arguments = call.arguments();
|
|
assert_eq!(arguments.len(), 1);
|
|
let first_argument = arguments[0];
|
|
match first_argument {
|
|
Expression::String(s) => {
|
|
assert_eq!(s.content(), "Hello, World!");
|
|
}
|
|
_ => panic!("Expected string"),
|
|
}
|
|
} else {
|
|
panic!("Expected call");
|
|
}
|
|
} else {
|
|
panic!("Expected expression");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn chained_calls() {
|
|
parse_compilation_unit("fn main() getCl()() end");
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod concrete_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn parses_extern_fn() {
|
|
let parse_result = parse_compilation_unit("extern fn println()");
|
|
let compilation_unit = match parse_result {
|
|
Ok(compilation_unit) => compilation_unit,
|
|
Err(diagnostics) => {
|
|
for diagnostic in diagnostics {
|
|
eprintln!("{:?}", diagnostic);
|
|
}
|
|
panic!();
|
|
}
|
|
};
|
|
let declarations = compilation_unit.declarations();
|
|
assert_eq!(declarations.len(), 1);
|
|
let extern_function = match &declarations[0] {
|
|
ModuleLevelDeclaration::ExternFunction(extern_function) => extern_function,
|
|
_ => panic!(),
|
|
};
|
|
assert_eq!(extern_function.declared_name(), "println");
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod parse_failure_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn lone_end() {
|
|
let parse_result = parse_compilation_unit("end");
|
|
match parse_result {
|
|
Err(diagnostics) => {
|
|
assert_eq!(diagnostics.len(), 1);
|
|
for diagnostic in &diagnostics {
|
|
println!("{:?}", diagnostic)
|
|
}
|
|
}
|
|
Ok(_) => panic!(),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn two_ends() {
|
|
let parse_result = parse_compilation_unit("end end");
|
|
match parse_result {
|
|
Err(diagnostics) => {
|
|
// Should only have an error on the first end, since we advance until we find a
|
|
// token we can recover from (fn or extern)
|
|
assert_eq!(diagnostics.len(), 1);
|
|
}
|
|
Ok(_) => panic!(),
|
|
}
|
|
}
|
|
}
|