deimos-lang/dmc-lib/src/parser.rs
2026-03-30 14:04:29 -05:00

1718 lines
57 KiB
Rust

use crate::ast::assign_statement::AssignStatement;
use crate::ast::binary_expression::{BinaryExpression, BinaryOperation};
use crate::ast::call::Call;
use crate::ast::class::Class;
use crate::ast::compilation_unit::CompilationUnit;
use crate::ast::constructor::Constructor;
use crate::ast::double_literal::DoubleLiteral;
use crate::ast::expression::Expression;
use crate::ast::expression_statement::ExpressionStatement;
use crate::ast::extern_function::ExternFunction;
use crate::ast::field::Field;
use crate::ast::function::Function;
use crate::ast::generic_parameter::GenericParameter;
use crate::ast::identifier::Identifier;
use crate::ast::integer_literal::IntegerLiteral;
use crate::ast::let_statement::LetStatement;
use crate::ast::negative_expression::NegativeExpression;
use crate::ast::parameter::Parameter;
use crate::ast::statement::Statement;
use crate::ast::string_literal::StringLiteral;
use crate::ast::type_use::TypeUse;
use crate::diagnostic::{Diagnostic, Diagnostics};
use crate::error_codes::{LEXER_ERROR, PARSE_ERROR};
use crate::lexer::{Lexer, LexerErrorKind};
use crate::source_range::SourceRange;
use crate::token::{Token, TokenKind};
use std::str::FromStr;
pub type ParseResult<T> = (T, Diagnostics);
pub fn get_compilation_unit(input: &str) -> Result<CompilationUnit, Diagnostics> {
let (compilation_unit, diagnostics) = parse_compilation_unit(input);
if diagnostics.is_empty() {
Ok(compilation_unit)
} else {
Err(diagnostics)
}
}
pub fn parse_compilation_unit(input: &str) -> ParseResult<CompilationUnit> {
let mut parser = Parser::new(input);
let mut diagnostics = Vec::new();
diagnostics.append(&mut parser.advance());
let (compilation_unit, mut ds) = parser.compilation_unit();
diagnostics.append(&mut ds);
(compilation_unit, diagnostics)
}
pub fn parse_expression(input: &str) -> ParseResult<Expression> {
let mut parser = Parser::new(input);
let mut diagnostics = Vec::new();
diagnostics.append(&mut parser.advance());
let (expression, mut ds) = parser.expression();
diagnostics.append(&mut ds);
(expression, diagnostics)
}
pub fn parse_let_statement(input: &str) -> ParseResult<Option<LetStatement>> {
let mut parser = Parser::new(input);
let mut diagnostics = Vec::new();
diagnostics.append(&mut parser.advance());
let (let_statement, mut ds) = parser.let_statement();
diagnostics.append(&mut ds);
(let_statement, diagnostics)
}
macro_rules! matches_expression_first {
( $token_kind : expr ) => {
matches!(
$token_kind,
TokenKind::IntegerLiteral
| TokenKind::DoubleLiteral
| TokenKind::LongLiteral
| TokenKind::String
| TokenKind::Minus
| TokenKind::SelfKw
| TokenKind::Identifier
)
};
}
macro_rules! matches_statement_first {
( $token_kind : expr ) => {
matches!($token_kind, TokenKind::Let) || matches_expression_first!($token_kind)
};
}
macro_rules! matches_type_use_first {
( $token_kind: expr ) => {
matches!($token_kind, TokenKind::LeftSquare | TokenKind::Identifier)
};
}
struct Parser<'a> {
input: &'a str,
lexer: Lexer<'a>,
current: Option<Token>,
lookahead: Option<Token>,
node_id: usize,
}
impl<'a> Parser<'a> {
fn new(input: &'a str) -> Self {
Self {
input,
lexer: Lexer::new(input),
current: None,
lookahead: None,
node_id: 0,
}
}
#[must_use]
fn advance_until(&mut self, token_kinds: &[TokenKind]) -> Diagnostics {
let mut diagnostics = Vec::new();
while self.current.is_some() {
diagnostics.append(&mut self.advance());
match &self.current {
None => {
// reached eoi
}
Some(current) => {
if token_kinds.contains(&current.kind()) {
break;
}
}
}
}
diagnostics
}
#[must_use]
fn advance(&mut self) -> Diagnostics {
#[must_use]
fn fetch(lexer: &mut Lexer) -> (Option<Token>, Diagnostics) {
let mut diagnostics = vec![];
let mut maybe_token: Option<Token> = None;
while let Some(lexer_result) = lexer.next() {
match lexer_result {
Ok(token) => {
maybe_token = Some(token);
break;
}
Err(lexer_error) => {
let diagnostic = match lexer_error.kind() {
LexerErrorKind::UnterminatedString => Diagnostic::new(
"Unterminated string literal.",
lexer_error.start(),
lexer_error.end(),
)
.with_error_code(LEXER_ERROR),
LexerErrorKind::UnrecognizedCharacter(c) => Diagnostic::new(
&format!("Unrecognized character: {}", c),
lexer_error.start(),
lexer_error.end(),
)
.with_error_code(LEXER_ERROR),
};
diagnostics.push(diagnostic);
}
}
}
(maybe_token, diagnostics)
}
let mut diagnostics = Vec::new();
if self.lookahead.is_some() {
// we've advanced at least once
self.current = self.lookahead.take();
let (lookahead, mut ds) = fetch(&mut self.lexer);
self.lookahead = lookahead;
diagnostics.append(&mut ds);
} else if self.lookahead.is_none() && self.current.is_some() {
// we're on the last token
self.current = None;
} else {
// we've not yet advanced, so fetch both
let (current, mut ds) = fetch(&mut self.lexer);
self.current = current;
diagnostics.append(&mut ds);
let (lookahead, mut ds) = fetch(&mut self.lexer);
self.lookahead = lookahead;
diagnostics.append(&mut ds);
}
diagnostics
}
fn join_kinds(kinds: &[TokenKind]) -> String {
kinds
.iter()
.map(|kind| format!("{:?}", kind))
.collect::<Vec<_>>()
.join(", ")
}
fn get_expected_but_found(kinds: &[TokenKind], found: &Token) -> Diagnostic {
Diagnostic::new(
&format!("Unexpected token: {:?}.", found.kind()),
found.start(),
found.end(),
)
.with_error_code(PARSE_ERROR)
.with_primary_label_message(&format!("Expected {}.", Self::join_kinds(kinds)))
}
fn get_expected_but_found_eoi(kinds: &[TokenKind], position: usize) -> Diagnostic {
Diagnostic::new("Unexpected end-of-input.", position, position)
.with_error_code(PARSE_ERROR)
.with_primary_label_message(&format!("Expected {}.", Self::join_kinds(kinds)))
}
#[must_use]
fn expect_advance(&mut self, token_kind: TokenKind) -> (Option<Token>, Diagnostics) {
match self.current.take() {
None => (
None,
vec![Self::get_expected_but_found_eoi(
&[token_kind],
self.input.len(),
)],
),
Some(token) => {
let mut diagnostics = Vec::new();
if token.kind() == token_kind {
diagnostics.append(&mut self.advance());
(Some(token), diagnostics)
} else {
(
None,
vec![Self::get_expected_but_found(&[token_kind], &token)],
)
}
}
}
}
#[must_use]
fn expect_immediately_after_advance(
&mut self,
token_kind: TokenKind,
previous_token: &Token,
) -> (Option<Token>, Diagnostics) {
let (maybe_matched, mut diagnostics) = self.expect_advance(token_kind);
match maybe_matched {
None => (None, diagnostics),
Some(matched) => {
if matched.start() == previous_token.end() {
(Some(matched), diagnostics)
} else {
diagnostics.push(
Diagnostic::new(
&format!(
"Expected {:?} immediately after previous token.",
token_kind
),
matched.start(),
matched.end(),
)
.with_error_code(PARSE_ERROR),
);
(None, diagnostics)
}
}
}
}
fn peek_current(&self, token_kind: TokenKind) -> bool {
match &self.current {
None => panic!("Unexpected end of input."),
Some(token) => token.kind() == token_kind,
}
}
fn get_current(&self) -> &Token {
match &self.current {
None => {
panic!("Unexpected end of input");
}
Some(token) => token,
}
}
fn sample_input(&self, start: usize, end: usize) -> &'a str {
&self.input[start..end]
}
fn token_text(&self, token: &Token) -> &'a str {
self.sample_input(token.start(), token.end())
}
fn next_node_id(&mut self) -> usize {
let node_id = self.node_id;
self.node_id += 1;
node_id
}
fn compilation_unit(&mut self) -> ParseResult<CompilationUnit> {
let mut functions: Vec<Function> = vec![];
let mut extern_functions: Vec<ExternFunction> = vec![];
let mut classes: Vec<Class> = vec![];
let mut diagnostics = vec![];
while self.current.is_some() {
let current = self.get_current();
match current.kind() {
TokenKind::Fn | TokenKind::Extern | TokenKind::Class => {
let (_, mut ds) = self.module_level_declaration(
&mut functions,
&mut extern_functions,
&mut classes,
);
diagnostics.append(&mut ds);
}
_ => {
diagnostics.push(Self::get_expected_but_found(
&[TokenKind::Fn, TokenKind::Extern, TokenKind::Class],
current,
));
diagnostics.append(&mut self.advance_until(&[
TokenKind::Fn,
TokenKind::Extern,
TokenKind::Class,
]));
}
}
}
(
CompilationUnit::new(functions, extern_functions, classes),
diagnostics,
)
}
fn module_level_declaration(
&mut self,
functions: &mut Vec<Function>,
extern_functions: &mut Vec<ExternFunction>,
classes: &mut Vec<Class>,
) -> ParseResult<()> {
let mut diagnostics = Vec::new();
let current = self.get_current();
match current.kind() {
TokenKind::Fn => {
let (maybe_function, mut ds) = self.function();
diagnostics.append(&mut ds);
if let Some(function) = maybe_function {
functions.push(function);
}
}
TokenKind::Extern => {
let (maybe_extern_function, mut ds) = self.extern_function();
diagnostics.append(&mut ds);
if let Some(extern_function) = maybe_extern_function {
extern_functions.push(extern_function);
}
}
TokenKind::Class => {
let (maybe_class, mut ds) = self.class();
diagnostics.append(&mut ds);
if let Some(class) = maybe_class {
classes.push(class);
}
}
_ => unreachable!(),
}
((), diagnostics)
}
fn function(&mut self) -> ParseResult<Option<Function>> {
let mut diagnostics = Vec::new();
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
diagnostics.append(&mut self.advance()); // pub
true
} else {
false
};
let (_, mut ds) = self.expect_advance(TokenKind::Fn);
diagnostics.append(&mut ds);
let (maybe_identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::LeftParentheses);
diagnostics.append(&mut ds);
let (parameters, mut ds) = self.parameter_list();
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::RightParentheses);
diagnostics.append(&mut ds);
let return_type = if self.current.is_some() && self.peek_current(TokenKind::RightArrow) {
let (maybe_type_use, mut ds) = self.return_type();
diagnostics.append(&mut ds);
maybe_type_use
} else {
None
};
let mut statements = vec![];
while self.current.is_some() && !self.peek_current(TokenKind::End) {
let (maybe_statement, mut ds) = self.statement();
diagnostics.append(&mut ds);
if let Some(statement) = maybe_statement {
statements.push(statement);
}
}
let (_, mut ds) = self.expect_advance(TokenKind::End);
diagnostics.append(&mut ds);
if let Some(identifier) = maybe_identifier {
let function = Function::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
is_public,
parameters,
return_type,
statements,
);
(Some(function), diagnostics)
} else {
(None, diagnostics)
}
}
fn extern_function(&mut self) -> ParseResult<Option<ExternFunction>> {
let mut diagnostics = Vec::new();
let (_, mut ds) = self.expect_advance(TokenKind::Extern);
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::Fn);
diagnostics.append(&mut ds);
let (maybe_identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::LeftParentheses);
diagnostics.append(&mut ds);
let (parameters, mut ds) = self.parameter_list();
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::RightParentheses);
diagnostics.append(&mut ds);
let (return_type, mut ds) = self.return_type();
diagnostics.append(&mut ds);
if let Some(identifier) = maybe_identifier
&& let Some(type_use) = return_type
{
let extern_function = ExternFunction::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
parameters,
type_use,
);
(Some(extern_function), diagnostics)
} else {
(None, diagnostics)
}
}
fn class(&mut self) -> ParseResult<Option<Class>> {
let mut diagnostics = Vec::new();
let (_, mut ds) = self.expect_advance(TokenKind::Class);
diagnostics.append(&mut ds);
let (maybe_identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let generic_parameters: Vec<GenericParameter> =
if self.current.is_some() && self.peek_current(TokenKind::Lt) {
let (gps, mut ds) = self.generic_parameters();
diagnostics.append(&mut ds);
gps
} else {
Vec::new()
};
let mut fields = vec![];
let mut functions = vec![];
let mut maybe_constructor: Option<Constructor> = None;
while self.current.is_some() && !self.peek_current(TokenKind::End) {
match self.get_current().kind() {
TokenKind::Public => {
let (_, mut ds) = self.public_class_member(
&mut fields,
&mut functions,
&mut maybe_constructor,
);
diagnostics.append(&mut ds);
}
TokenKind::Mut | TokenKind::Identifier => {
let (maybe_field, mut ds) = self.field();
diagnostics.append(&mut ds);
if let Some(field) = maybe_field {
fields.push(field);
}
}
TokenKind::Fn => {
let (maybe_function, mut ds) = self.function();
diagnostics.append(&mut ds);
if let Some(function) = maybe_function {
functions.push(function);
}
}
TokenKind::Ctor => {
let (constructor, mut ds) = self.constructor();
diagnostics.append(&mut ds);
if let Some(constructor) = constructor {
maybe_constructor = Some(constructor);
}
}
_ => unreachable!(),
}
}
let (_, mut ds) = self.expect_advance(TokenKind::End);
diagnostics.append(&mut ds);
if let Some(identifier) = maybe_identifier {
let class = Class::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
generic_parameters,
maybe_constructor,
fields,
functions,
);
(Some(class), diagnostics)
} else {
(None, diagnostics)
}
}
fn parameter_list(&mut self) -> ParseResult<Vec<Parameter>> {
let mut parameters = vec![];
let mut diagnostics = vec![];
while self.current.is_some() && self.peek_current(TokenKind::Identifier) {
let (maybe_parameter, mut ds) = self.parameter();
diagnostics.append(&mut ds);
if let Some(parameter) = maybe_parameter {
parameters.push(parameter);
}
if self.current.is_some() && self.peek_current(TokenKind::Comma) {
diagnostics.append(&mut self.advance());
}
}
(parameters, diagnostics)
}
fn parameter(&mut self) -> ParseResult<Option<Parameter>> {
let mut diagnostics = Vec::new();
let (maybe_identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::Colon);
diagnostics.append(&mut ds);
let (maybe_type_use, mut ds) = self.type_use();
diagnostics.append(&mut ds);
if let Some(identifier) = maybe_identifier
&& let Some(type_use) = maybe_type_use
{
let parameter = Parameter::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
type_use,
);
(Some(parameter), diagnostics)
} else {
(None, diagnostics)
}
}
fn return_type(&mut self) -> ParseResult<Option<TypeUse>> {
let mut diagnostics = Vec::new();
let (_, mut ds) = self.expect_advance(TokenKind::RightArrow);
diagnostics.append(&mut ds);
let (maybe_type_use, mut ds) = self.type_use();
diagnostics.append(&mut ds);
(maybe_type_use, diagnostics)
}
fn type_use(&mut self) -> ParseResult<Option<TypeUse>> {
let mut diagnostics = Vec::new();
if self.current.is_some() {
let current = self.get_current();
return match current.kind() {
TokenKind::LeftSquare => {
diagnostics.append(&mut self.advance()); // [
let (inner_type_use, mut ds) = self.type_use();
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::RightSquare);
diagnostics.append(&mut ds);
todo!()
}
TokenKind::Identifier => {
let (maybe_identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let generic_arguments =
if self.current.is_some() && self.peek_current(TokenKind::Lt) {
diagnostics.append(&mut self.advance()); // <
let (generic_arguments, mut ds) = self.generic_arguments_list();
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::Gt); // >
diagnostics.append(&mut ds);
generic_arguments
} else {
vec![]
};
if let Some(identifier) = maybe_identifier {
let type_use = TypeUse::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
generic_arguments,
);
(Some(type_use), diagnostics)
} else {
(None, diagnostics)
}
}
_ => {
diagnostics.push(Self::get_expected_but_found(
&[TokenKind::LeftSquare, TokenKind::Identifier],
current,
));
(None, diagnostics)
}
};
}
diagnostics.push(Self::get_expected_but_found_eoi(
&[TokenKind::LeftSquare, TokenKind::Identifier],
self.input.len(),
));
(None, diagnostics)
}
fn generic_arguments_list(&mut self) -> ParseResult<Vec<TypeUse>> {
let mut diagnostics = Vec::new();
let mut generic_arguments: Vec<TypeUse> = vec![];
while self.current.is_some() && matches_type_use_first!(self.get_current().kind()) {
let (type_use, mut ds) = self.type_use();
diagnostics.append(&mut ds);
if let Some(type_use) = type_use {
generic_arguments.push(type_use);
}
if self.current.is_some() && self.peek_current(TokenKind::Comma) {
diagnostics.append(&mut self.advance()); // comma
} else {
break;
}
}
(generic_arguments, diagnostics)
}
fn generic_parameters(&mut self) -> ParseResult<Vec<GenericParameter>> {
let mut diagnostics = Vec::new();
let (_, mut ds) = self.expect_advance(TokenKind::Lt);
diagnostics.append(&mut ds);
let mut parameters: Vec<GenericParameter> = vec![];
while self.current.is_some() && self.peek_current(TokenKind::Identifier) {
let (maybe_generic_parameter, mut ds) = self.generic_parameter();
diagnostics.append(&mut ds);
if let Some(generic_parameter) = maybe_generic_parameter {
parameters.push(generic_parameter);
}
if self.current.is_some() && self.peek_current(TokenKind::Plus) {
diagnostics.append(&mut self.advance()); // +
} else {
break;
}
}
let (_, mut ds) = self.expect_advance(TokenKind::Gt);
diagnostics.append(&mut ds);
(parameters, diagnostics)
}
fn generic_parameter(&mut self) -> ParseResult<Option<GenericParameter>> {
let mut diagnostics = Vec::new();
let (identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let mut extends_list: Vec<TypeUse> = vec![];
if self.current.is_some() && self.peek_current(TokenKind::Colon) {
diagnostics.append(&mut self.advance()); // :
while self.current.is_some() && matches_type_use_first!(self.get_current().kind()) {
let (maybe_type_use, mut ds) = self.type_use();
diagnostics.append(&mut ds);
if let Some(type_use) = maybe_type_use {
extends_list.push(type_use);
}
if self.current.is_some() && self.peek_current(TokenKind::Comma) {
diagnostics.append(&mut self.advance()); // ,
} else {
break;
}
}
}
if let Some(identifier) = identifier {
let generic_parameter = GenericParameter::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
extends_list,
);
(Some(generic_parameter), diagnostics)
} else {
(None, diagnostics)
}
}
fn public_class_member(
&mut self,
fields: &mut Vec<Field>,
functions: &mut Vec<Function>,
maybe_ctor: &mut Option<Constructor>,
) -> ParseResult<()> {
let mut diagnostics = Vec::new();
if self.lookahead.is_some() {
match self.lookahead.as_ref().unwrap().kind() {
TokenKind::Mut | TokenKind::Identifier => {
let (maybe_field, mut ds) = self.field();
diagnostics.append(&mut ds);
if let Some(field) = maybe_field {
fields.push(field);
}
}
TokenKind::Fn => {
let (maybe_function, mut ds) = self.function();
diagnostics.append(&mut ds);
if let Some(function) = maybe_function {
functions.push(function);
}
}
TokenKind::Ctor => {
let (ctor, mut ds) = self.constructor();
diagnostics.append(&mut ds);
if let Some(ctor) = ctor {
maybe_ctor.replace(ctor);
}
}
_ => {
let lookahead = self.lookahead.as_ref().unwrap();
diagnostics.push(Self::get_expected_but_found(
&[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn],
lookahead,
));
}
}
} else {
let current = self.current.as_ref().unwrap();
diagnostics.push(Self::get_expected_but_found(
&[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn],
current,
));
}
((), diagnostics)
}
fn constructor(&mut self) -> ParseResult<Option<Constructor>> {
let mut diagnostics = Vec::new();
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
diagnostics.append(&mut self.advance());
true
} else {
false
};
let (ctor_keyword, mut ds) = self.expect_advance(TokenKind::Ctor);
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::LeftParentheses);
diagnostics.append(&mut ds);
let parameters = if self.current.is_some() && self.peek_current(TokenKind::Identifier) {
let (parameters, mut ds) = self.parameter_list();
diagnostics.append(&mut ds);
parameters
} else {
vec![]
};
let (_, mut ds) = self.expect_advance(TokenKind::RightParentheses);
diagnostics.append(&mut ds);
// statements
let mut statements: Vec<Statement> = vec![];
let mut diagnostics: Vec<Diagnostic> = vec![];
while self.current.is_some()
&& matches_statement_first!(self.current.as_ref().unwrap().kind())
{
let (maybe_statement, mut ds) = self.statement();
diagnostics.append(&mut ds);
if let Some(statement) = maybe_statement {
statements.push(statement);
}
}
let (_, mut ds) = self.expect_advance(TokenKind::End);
diagnostics.append(&mut ds);
if let Some(ctor_keyword) = ctor_keyword {
let constructor = Constructor::new(
is_public,
SourceRange::new(ctor_keyword.start(), ctor_keyword.end()),
parameters,
statements,
);
(Some(constructor), diagnostics)
} else {
(None, diagnostics)
}
}
fn field(&mut self) -> ParseResult<Option<Field>> {
let mut diagnostics = Vec::new();
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
diagnostics.append(&mut self.advance());
true
} else {
false
};
let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) {
diagnostics.append(&mut self.advance());
true
} else {
false
};
let (identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let declared_type = if self.current.is_some() && self.peek_current(TokenKind::Colon) {
diagnostics.append(&mut self.advance()); // colon
let (type_use, mut ds) = self.type_use();
diagnostics.append(&mut ds);
type_use
} else {
None
};
let initializer = if self.current.is_some() && self.peek_current(TokenKind::Equals) {
diagnostics.append(&mut self.advance()); // equals
let (expression, mut ds) = self.expression();
diagnostics.append(&mut ds);
Some(expression)
} else {
None
};
if let Some(identifier) = identifier {
let field = Field::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
is_public,
is_mut,
declared_type,
initializer,
);
(Some(field), diagnostics)
} else {
(None, diagnostics)
}
}
fn statement(&mut self) -> ParseResult<Option<Statement>> {
let current = self.get_current();
match current.kind() {
TokenKind::Let => {
let (maybe_let_statement, diagnostics) = self.let_statement();
match maybe_let_statement {
None => (None, diagnostics),
Some(let_statement) => (Some(Statement::Let(let_statement)), diagnostics),
}
}
_ => {
let (statement, diagnostics) = self.expression_statement_or_assign_statement();
(Some(statement), diagnostics)
}
}
}
fn let_statement(&mut self) -> ParseResult<Option<LetStatement>> {
let mut diagnostics = Vec::new();
let (_, mut ds) = self.expect_advance(TokenKind::Let);
diagnostics.append(&mut ds);
let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) {
diagnostics.append(&mut self.advance());
true
} else {
false
};
let (maybe_identifier, mut ds) = self.expect_advance(TokenKind::Identifier);
diagnostics.append(&mut ds);
let (_, mut ds) = self.expect_advance(TokenKind::Equals);
diagnostics.append(&mut ds);
let (expression, mut ds) = self.expression();
diagnostics.append(&mut ds);
if let Some(identifier) = maybe_identifier {
let let_statement = LetStatement::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
is_mut,
expression,
);
(Some(let_statement), diagnostics)
} else {
(None, diagnostics)
}
}
fn expression_statement_or_assign_statement(&mut self) -> ParseResult<Statement> {
let mut diagnostics = Vec::new();
let (base, mut ds) = self.expression();
diagnostics.append(&mut ds);
if self.current.is_some() && self.peek_current(TokenKind::Equals) {
let (assign_statement, mut ds) = self.assign_rhs(base);
diagnostics.append(&mut ds);
(Statement::Assign(assign_statement), diagnostics)
} else {
(
Statement::Expression(ExpressionStatement::new(base)),
diagnostics,
)
}
}
fn assign_rhs(&mut self, destination: Expression) -> ParseResult<AssignStatement> {
let mut diagnostics = Vec::new();
let (_, mut ds) = self.expect_advance(TokenKind::Equals);
diagnostics.append(&mut ds);
let (value, mut ds) = self.expression();
diagnostics.append(&mut ds);
(AssignStatement::new(destination, value), diagnostics)
}
fn expression(&mut self) -> ParseResult<Expression> {
self.bitwise_or_expression()
}
fn bitwise_or_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
let (mut base, mut ds) = self.bitwise_xor_expression();
diagnostics.append(&mut ds);
while self.current.is_some() && self.peek_current(TokenKind::Bar) {
diagnostics.append(&mut self.advance()); // |
let (rhs, mut ds) = self.bitwise_xor_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::BitwiseOr,
source_range,
));
}
(base, diagnostics)
}
fn bitwise_xor_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
let (mut base, mut ds) = self.bitwise_and_expression();
diagnostics.append(&mut ds);
while self.current.is_some() && self.peek_current(TokenKind::Caret) {
diagnostics.append(&mut self.advance()); // ^
let (rhs, mut ds) = self.bitwise_and_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::BitwiseXor,
source_range,
));
}
(base, diagnostics)
}
fn bitwise_and_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
let (mut base, mut ds) = self.shift_expression();
diagnostics.append(&mut ds);
while self.current.is_some() && self.peek_current(TokenKind::Ampersand) {
diagnostics.append(&mut self.advance()); // &
let (rhs, mut ds) = self.shift_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::BitwiseAnd,
source_range,
));
}
(base, diagnostics)
}
fn shift_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
let (mut base, mut ds) = self.additive_expression();
diagnostics.append(&mut ds);
while self.current.is_some() {
let current = self.get_current();
match current.kind() {
TokenKind::Lt => {
let previous_cloned = current.clone();
diagnostics.append(&mut self.advance()); // first <
let (_, mut ds) =
self.expect_immediately_after_advance(TokenKind::Lt, &previous_cloned); // second <
diagnostics.append(&mut ds);
let (rhs, mut ds) = self.additive_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::LeftShift,
source_range,
));
}
TokenKind::Gt => {
let previous_cloned = current.clone();
diagnostics.append(&mut self.advance()); // first >
let (_, mut ds) =
self.expect_immediately_after_advance(TokenKind::Gt, &previous_cloned); // second >
diagnostics.append(&mut ds);
let (rhs, mut ds) = self.additive_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::RightShift,
source_range,
));
}
_ => break,
}
}
(base, diagnostics)
}
fn additive_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
let (mut base, mut ds) = self.multiplicative_expression();
diagnostics.append(&mut ds);
while self.current.is_some() {
let current = self.get_current();
match current.kind() {
TokenKind::Plus => {
diagnostics.append(&mut self.advance()); // plus
let (rhs, mut ds) = self.multiplicative_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::Add,
source_range,
));
}
TokenKind::Minus => {
diagnostics.append(&mut self.advance()); // minus
let (rhs, mut ds) = self.multiplicative_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::Subtract,
source_range,
));
}
_ => break,
}
}
(base, diagnostics)
}
fn multiplicative_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
let (mut base, mut ds) = self.prefix_expression();
diagnostics.append(&mut ds);
while self.current.is_some() {
let current = self.get_current();
match current.kind() {
TokenKind::Star => {
diagnostics.append(&mut self.advance()); // multiply
let (rhs, mut ds) = self.prefix_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::Multiply,
source_range,
));
}
TokenKind::Slash => {
diagnostics.append(&mut self.advance()); // slash
let (rhs, mut ds) = self.prefix_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::Divide,
source_range,
))
}
TokenKind::Modulo => {
diagnostics.append(&mut self.advance()); // modulo
let (rhs, mut ds) = self.prefix_expression();
diagnostics.append(&mut ds);
let source_range =
SourceRange::new(base.source_range().start(), rhs.source_range().end());
base = Expression::Binary(BinaryExpression::new(
base,
rhs,
BinaryOperation::Modulo,
source_range,
));
}
_ => break,
}
}
(base, diagnostics)
}
fn prefix_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
// first, collect all consecutive operators
let mut operator_tokens = vec![];
while self.current.is_some() {
let current = self.get_current();
match current.kind() {
TokenKind::Minus => {
operator_tokens.push(current.clone()); // unfortunately necessary
diagnostics.append(&mut self.advance());
}
_ => break,
}
}
// now go in reverse and build up expressions
// the parser is currently just after the prefix operators, so we need a suffix expression
// as a base
let (mut base, mut ds) = self.suffix_expression();
diagnostics.append(&mut ds);
while let Some(operator_token) = operator_tokens.pop() {
let source_range = SourceRange::new(operator_token.start(), base.source_range().end());
match operator_token.kind() {
TokenKind::Minus => {
base = Expression::Negative(NegativeExpression::new(base, source_range));
}
_ => unreachable!(),
}
}
(base, diagnostics)
}
fn suffix_expression(&mut self) -> ParseResult<Expression> {
let mut diagnostics = Vec::new();
let (mut base, mut ds) = self.expression_base();
diagnostics.append(&mut ds);
while self.current.is_some() {
let current = self.get_current();
match current.kind() {
TokenKind::LeftParentheses => {
let (call, mut ds) = self.call(base);
diagnostics.append(&mut ds);
base = Expression::Call(call);
}
_ => break,
}
}
(base, diagnostics)
}
fn expression_base(&mut self) -> ParseResult<Expression> {
let current = self.get_current();
let mut diagnostics = Vec::new();
match current.kind() {
TokenKind::IntegerLiteral => {
let raw = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
diagnostics.append(&mut self.advance());
(
Expression::Integer(IntegerLiteral::new(
i32::from_str(raw).unwrap(),
source_range,
)),
diagnostics,
)
}
TokenKind::DoubleLiteral => {
let raw = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
diagnostics.append(&mut self.advance());
(
Expression::Double(DoubleLiteral::new(
f64::from_str(raw).unwrap(),
source_range,
)),
diagnostics,
)
}
TokenKind::String => {
let with_quotes = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
diagnostics.append(&mut self.advance());
(
Expression::String(StringLiteral::new(
&with_quotes[1..with_quotes.len() - 1],
source_range,
)),
diagnostics,
)
}
TokenKind::Identifier => {
let declared_name = self.token_text(&current);
let source_range = SourceRange::new(current.start(), current.end());
diagnostics.append(&mut self.advance());
(
Expression::Identifier(Identifier::new(declared_name, source_range)),
diagnostics,
)
}
_ => unreachable!("Unreachable token type found: {:?}", current.kind()),
}
}
fn call(&mut self, callee: Expression) -> ParseResult<Call> {
let mut diagnostics = Vec::new();
let (_, mut ds) = self.expect_advance(TokenKind::LeftParentheses);
diagnostics.append(&mut ds);
let mut arguments = vec![];
if let Some(current) = &self.current {
if matches_expression_first!(current.kind()) {
let (mut expressions, mut ds) = self.expression_list();
arguments.append(&mut expressions);
diagnostics.append(&mut ds);
}
}
let (maybe_right_parenthesis, mut ds) = self.expect_advance(TokenKind::RightParentheses);
diagnostics.append(&mut ds);
let source_range = if let Some(token) = maybe_right_parenthesis {
SourceRange::new(callee.source_range().start(), token.end())
} else {
// This should be good enough for error reporting if we're missing the right parenthesis
SourceRange::new(callee.source_range().start(), callee.source_range().end())
};
(Call::new(callee, arguments, source_range), diagnostics)
}
fn expression_list(&mut self) -> ParseResult<Vec<Expression>> {
let mut expressions = vec![];
let mut diagnostics = Vec::new();
let (expression, mut ds) = self.expression();
expressions.push(expression);
diagnostics.append(&mut ds);
while self.current.is_some() && self.peek_current(TokenKind::Comma) {
diagnostics.append(&mut self.advance()); // comma
let (expression, mut ds) = self.expression();
expressions.push(expression);
diagnostics.append(&mut ds);
}
(expressions, diagnostics)
}
}
#[cfg(test)]
mod smoke_tests {
use super::*;
fn smoke_test(input: &str) {
let (_, diagnostics) = parse_compilation_unit(input);
if !diagnostics.is_empty() {
eprintln!("{:#?}", diagnostics);
panic!("There were diagnostics during parsing");
}
}
#[test]
fn forty_two() {
smoke_test("fn main() 42 end");
}
#[test]
fn chained_calls() {
smoke_test("fn main() getCl()() end");
}
#[test]
fn extern_fn_with_param() {
smoke_test("extern fn println(message: Any) -> Void");
}
#[test]
fn fn_with_param() {
smoke_test("fn foo(bar: Int) end");
}
#[test]
fn fn_with_params() {
smoke_test("fn foo(bar: Int, baz: Int) end");
}
#[test]
fn return_type() {
smoke_test("fn foo() -> Int end")
}
#[test]
fn extern_return_type() {
smoke_test("extern fn foo() -> Int");
}
#[test]
fn add_two_numbers() {
smoke_test("fn main() 1 + 2 end");
}
#[test]
fn negative_return() {
smoke_test("fn main() -> Int -1 end");
}
#[test]
fn negative_left_add() {
smoke_test("fn main() -> Int -1 + 1 end");
}
#[test]
fn negative_right_add() {
smoke_test("fn main() -> Int 1 + -1 end");
}
#[test]
fn two_negatives() {
smoke_test("fn main() -> Int -1 + -1 end");
}
#[test]
fn minus_positive_number() {
smoke_test("fn main() -> Int 1 - 1 end");
}
#[test]
fn minus_negative_number() {
smoke_test("fn main() -> Int 1 - -1 end");
}
#[test]
fn empty_class() {
smoke_test("class Foo end");
}
#[test]
fn class_with_pub_member() {
smoke_test("class Foo pub bar end");
}
#[test]
fn class_with_mut_member() {
smoke_test("class Foo mut bar end");
}
#[test]
fn class_with_nothing_member() {
smoke_test("class Foo bar end");
}
#[test]
fn class_with_member_type_use() {
smoke_test("class Foo bar: Int end");
}
#[test]
fn class_with_member_init() {
smoke_test("class Foo bar = 42 end");
}
#[test]
fn class_with_member_type_use_and_init() {
smoke_test("class Foo bar: Int = 42 end");
}
#[test]
fn class_with_member_all() {
smoke_test("class Foo pub mut bar: Bar = Baz() end");
}
#[test]
fn class_with_pub_fn() {
smoke_test(
"
class Greeter
pub fn greet() end
end
",
);
}
#[test]
fn simple_assign() {
smoke_test(
"
fn main()
let mut x = 4
x = 42
end
",
);
}
#[test]
fn simple_multiply() {
smoke_test("fn main() 1 * 2 end");
}
#[test]
fn simple_divide() {
smoke_test("fn main() 1 / 2 end");
}
#[test]
fn simple_modulo() {
smoke_test("fn main() 1 % 2 end");
}
#[test]
fn simple_left_shift() {
smoke_test("fn main() 2 << 1 end");
}
#[test]
fn simple_right_shift() {
smoke_test("fn main() 4 >> 1 end");
}
#[test]
fn simple_bitwise_and() {
smoke_test("fn main() 2 & 1 end");
}
#[test]
fn simple_bitwise_xor() {
smoke_test("fn main() 1 ^ 2 end");
}
#[test]
fn simple_bitwise_or() {
smoke_test("fn main() 1 | 2 end");
}
#[test]
fn ops_left_to_right() {
smoke_test(
"
fn main()
1 | 2 ^ 3 & 4 << 5 >> 7 + 8 - 9 * 10 / 11 % 12
end
",
)
}
#[test]
fn array_generic_arg() {
smoke_test("fn main(args: Array<String>) end");
}
#[test]
fn nested_generic_args() {
smoke_test("fn main(foo: Array<Bar<Foo>>) end");
}
#[test]
fn class_with_generic_param() {
smoke_test("class Foo<T> end");
}
}
#[cfg(test)]
mod concrete_tests {
use super::*;
fn report_diagnostics(diagnostics: &[Diagnostic]) -> ! {
for diagnostic in diagnostics {
eprintln!("{:?}", diagnostic);
}
panic!();
}
fn assert_compilation_unit(input: &str) -> CompilationUnit {
let (compilation_unit, diagnostics) = parse_compilation_unit(input);
if !diagnostics.is_empty() {
report_diagnostics(&diagnostics);
}
compilation_unit
}
fn assert_expression(input: &str) -> Expression {
let (expression, diagnostics) = parse_expression(input);
if !diagnostics.is_empty() {
report_diagnostics(&diagnostics);
}
expression
}
fn assert_function_in<'a>(
compilation_unit: &'a CompilationUnit,
function_name: &str,
) -> &'a Function {
compilation_unit
.functions()
.iter()
.find(|f| f.declared_name() == function_name)
.unwrap()
}
#[test]
fn parses_extern_fn() {
let compilation_unit = assert_compilation_unit("extern fn println() -> Void");
let extern_functions = compilation_unit.extern_functions();
assert_eq!(extern_functions.len(), 1);
let extern_function = &extern_functions[0];
assert_eq!(extern_function.declared_name(), "println");
}
#[test]
fn hello_world() {
let compilation_unit = assert_compilation_unit("fn main() println(\"Hello, World!\") end");
let function = assert_function_in(&compilation_unit, "main");
let statements = function.statements();
assert_eq!(statements.len(), 1);
if let Statement::Expression(expression_statement) = statements[0] {
if let Expression::Call(call) = expression_statement.expression() {
let callee = call.callee();
match callee {
Expression::Identifier(identifier) => {
assert_eq!(identifier.name(), "println");
}
_ => panic!("Expected identifier"),
}
let arguments = call.arguments();
assert_eq!(arguments.len(), 1);
let first_argument = arguments[0];
match first_argument {
Expression::String(s) => {
assert_eq!(s.content(), "Hello, World!");
}
_ => panic!("Expected string"),
}
} else {
panic!("Expected call");
}
} else {
panic!("Expected expression");
}
}
#[test]
fn negative_expression() {
let expression = assert_expression("-1");
match expression {
Expression::Negative(negative_expression) => match negative_expression.operand() {
Expression::Integer(integer_literal) => {
assert_eq!(integer_literal.value(), 1);
}
_ => panic!("Expected integer literal"),
},
_ => panic!("Expected negative expression"),
}
}
#[test]
fn add_negative() {
let expression = assert_expression("1 + -1");
match expression {
Expression::Binary(binary_expression) => {
assert!(matches!(binary_expression.op(), BinaryOperation::Add));
match binary_expression.lhs() {
Expression::Integer(integer_literal) => {
assert_eq!(integer_literal.value(), 1);
}
_ => panic!("Expected integer literal"),
}
match binary_expression.rhs() {
Expression::Negative(negative_expression) => {
match negative_expression.operand() {
Expression::Integer(integer_literal) => {
assert_eq!(integer_literal.value(), 1);
}
_ => panic!("Expected integer literal"),
}
}
_ => panic!("Expected negative expression"),
}
}
_ => panic!("Expected additive expression"),
}
}
#[test]
fn simple_subtract() {
let expression = assert_expression("1 - 1");
match expression {
Expression::Binary(binary_expression) => {
assert!(matches!(binary_expression.op(), BinaryOperation::Subtract));
match binary_expression.lhs() {
Expression::Integer(integer_literal) => {
assert_eq!(integer_literal.value(), 1);
}
_ => panic!("Expected integer literal"),
}
match binary_expression.rhs() {
Expression::Integer(integer_literal) => {
assert_eq!(integer_literal.value(), 1);
}
_ => panic!("Expected integer literal"),
}
}
_ => panic!("Expected subtract expression"),
}
}
}