deimos-lang/dmc-lib/src/parser.rs

281 lines
9.0 KiB
Rust

use crate::ast::call::Call;
use crate::ast::compilation_unit::CompilationUnit;
use crate::ast::expression::Expression;
use crate::ast::expression_statement::ExpressionStatement;
use crate::ast::function::Function;
use crate::ast::identifier::Identifier;
use crate::ast::integer_literal::IntegerLiteral;
use crate::ast::let_statement::LetStatement;
use crate::ast::statement::Statement;
use crate::ast::string_literal::StringLiteral;
use crate::lexer::Lexer;
use crate::source_range::SourceRange;
use crate::token::{Token, TokenKind};
use std::str::FromStr;
pub fn parse_compilation_unit(input: &str) -> CompilationUnit {
let mut parser = Parser::new(input);
parser.compilation_unit()
}
struct Parser<'a> {
input: &'a str,
lexer: Lexer<'a>,
current: Option<Token>,
lookahead: Option<Token>,
}
impl<'a> Parser<'a> {
fn new(input: &'a str) -> Self {
Self {
input,
lexer: Lexer::new(input),
current: None,
lookahead: None,
}
}
fn advance(&mut self) {
if self.lookahead.is_some() {
// we've advanced at least once
self.current = self.lookahead.take();
self.lookahead = match self.lexer.next() {
None => None,
Some(result) => match result {
Ok(token) => Some(token),
Err(lexer_error) => {
panic!("{:?}", lexer_error);
}
},
}
} else {
// we've not yet advanced, so fetch both
// current
match self.lexer.next() {
None => {}
Some(result) => match result {
Ok(token) => {
self.current = Some(token);
}
Err(lexer_error) => {
panic!("{:?}", lexer_error);
}
},
}
// lookahead
match self.lexer.next() {
None => {}
Some(result) => match result {
Ok(token) => {
self.lookahead = Some(token);
}
Err(lexer_error) => {
panic!("{:?}", lexer_error);
}
},
}
}
}
fn expect_advance(&mut self, token_kind: TokenKind) -> Token {
match self.current.take() {
None => {
panic!("Expected {:?} but found end of input", token_kind);
}
Some(token) => {
if token.kind() == token_kind {
self.advance();
token
} else {
panic!(
"Expected {:?} but found {:?} at {}",
token_kind,
token.kind(),
token.start()
);
}
}
}
}
fn peek_current(&self, token_kind: TokenKind) -> bool {
match &self.current {
None => false,
Some(token) => token.kind() == token_kind,
}
}
fn get_current(&self) -> &Token {
match &self.current {
None => {
panic!("Unexpected end of input");
}
Some(token) => token,
}
}
fn sample_input(&self, start: usize, end: usize) -> &'a str {
&self.input[start..end]
}
fn token_text(&self, token: &Token) -> &'a str {
self.sample_input(token.start(), token.end())
}
pub fn compilation_unit(&mut self) -> CompilationUnit {
let mut functions = vec![];
self.advance();
while self.current.is_some() {
functions.push(self.function());
}
CompilationUnit::new(functions)
}
fn function(&mut self) -> Function {
self.expect_advance(TokenKind::Fn);
let identifier_token = self.expect_advance(TokenKind::Identifier);
self.expect_advance(TokenKind::LeftParentheses);
// add params
self.expect_advance(TokenKind::RightParentheses);
let mut statements = vec![];
while !self.peek_current(TokenKind::End) {
statements.push(self.statement());
}
self.expect_advance(TokenKind::End);
Function::new(
self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()),
statements,
)
}
fn statement(&mut self) -> Statement {
let current = self.get_current();
match current.kind() {
TokenKind::Let => self.let_statement(),
_ => self.expression_statement(),
}
}
fn let_statement(&mut self) -> Statement {
self.expect_advance(TokenKind::Let);
let identifier = self.expect_advance(TokenKind::Identifier);
self.expect_advance(TokenKind::Equals);
let expression = self.expression();
Statement::Let(LetStatement::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
expression,
))
}
fn expression_statement(&mut self) -> Statement {
Statement::Expression(ExpressionStatement::new(self.expression()))
}
fn expression(&mut self) -> Expression {
let current = self.get_current();
let mut result = match current.kind() {
TokenKind::IntegerLiteral => {
let raw = self.token_text(current);
let source_range = SourceRange::new(current.start(), current.end());
self.advance();
Expression::IntegerLiteral(IntegerLiteral::new(
i32::from_str(raw).unwrap(),
source_range,
))
}
TokenKind::String => {
let with_quotes = self.token_text(current);
let source_range = SourceRange::new(current.start(), current.end());
self.advance();
Expression::String(StringLiteral::new(
&with_quotes[1..with_quotes.len() - 1],
source_range,
))
}
TokenKind::Identifier => {
let declared_name = self.token_text(current);
let source_range = SourceRange::new(current.start(), current.end());
self.advance();
Expression::Identifier(Identifier::new(declared_name, source_range))
}
_ => panic!("Unexpected token {:?}", current.kind()),
};
// postfixes
while let Some(current) = &self.current {
match current.kind() {
TokenKind::LeftParentheses => {
result = Expression::Call(self.call(result));
}
_ => break,
}
}
result
}
fn call(&mut self, callee: Expression) -> Call {
self.expect_advance(TokenKind::LeftParentheses);
let mut arguments = vec![];
while !self.peek_current(TokenKind::RightParentheses) {
arguments.push(self.expression());
}
let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses);
let source_range =
SourceRange::new(callee.source_range().start(), right_parentheses_token.end());
Call::new(callee, arguments, source_range)
}
}
#[cfg(test)]
mod smoke_tests {
use super::*;
#[test]
fn forty_two() {
parse_compilation_unit("fn main() 42 end");
}
#[test]
fn hello_world() {
let compilation_unit = parse_compilation_unit("fn main() println(\"Hello, World!\") end");
let functions = compilation_unit.functions();
assert_eq!(functions.len(), 1);
let function = functions[0];
assert_eq!(function.declared_name(), "main");
let statements = function.statements();
assert_eq!(statements.len(), 1);
if let Statement::Expression(expression_statement) = statements[0] {
if let Expression::Call(call) = expression_statement.expression() {
let callee = call.callee();
match callee {
Expression::Identifier(identifier) => {
assert_eq!(identifier.name(), "println");
}
_ => panic!("Expected identifier"),
}
let arguments = call.arguments();
assert_eq!(arguments.len(), 1);
let first_argument = arguments[0];
match first_argument {
Expression::String(s) => {
assert_eq!(s.content(), "Hello, World!");
}
_ => panic!("Expected string"),
}
} else {
panic!("Expected call");
}
} else {
panic!("Expected expression");
}
}
#[test]
fn chained_calls() {
parse_compilation_unit("fn main() getCl()() end");
}
}