use crate::ast::additive_expression::AdditiveExpression; use crate::ast::call::Call; use crate::ast::compilation_unit::CompilationUnit; use crate::ast::expression::Expression; use crate::ast::expression_statement::ExpressionStatement; use crate::ast::extern_function::ExternFunction; use crate::ast::function::Function; use crate::ast::identifier::Identifier; use crate::ast::integer_literal::IntegerLiteral; use crate::ast::let_statement::LetStatement; use crate::ast::module_level_declaration::ModuleLevelDeclaration; use crate::ast::parameter::Parameter; use crate::ast::statement::Statement; use crate::ast::string_literal::StringLiteral; use crate::ast::type_use::TypeUse; use crate::diagnostic::Diagnostic; use crate::lexer::Lexer; use crate::source_range::SourceRange; use crate::token::{Token, TokenKind}; use std::str::FromStr; pub fn parse_compilation_unit(input: &str) -> Result> { let mut parser = Parser::new(input); parser.compilation_unit() } struct Parser<'a> { input: &'a str, lexer: Lexer<'a>, current: Option, lookahead: Option, } impl<'a> Parser<'a> { fn new(input: &'a str) -> Self { Self { input, lexer: Lexer::new(input), current: None, lookahead: None, } } fn advance_until(&mut self, token_kinds: &[TokenKind]) { while self.current.is_some() { self.advance(); match &self.current { None => { // reached eoi } Some(current) => { if token_kinds.contains(¤t.kind()) { break; } } } } } fn advance(&mut self) { if self.lookahead.is_some() { // we've advanced at least once self.current = self.lookahead.take(); self.lookahead = match self.lexer.next() { None => None, Some(result) => match result { Ok(token) => Some(token), Err(lexer_error) => { panic!("{:?}", lexer_error); } }, } } else if self.lookahead.is_none() && self.current.is_some() { // we're on the last token self.current = None; } else { // we've not yet advanced, so fetch both // current match self.lexer.next() { None => {} Some(result) => match result { Ok(token) => { self.current = Some(token); } Err(lexer_error) => { panic!("{:?}", lexer_error); } }, } // lookahead match self.lexer.next() { None => {} Some(result) => match result { Ok(token) => { self.lookahead = Some(token); } Err(lexer_error) => { panic!("{:?}", lexer_error); } }, } } } fn expect_advance(&mut self, token_kind: TokenKind) -> Result> { match self.current.take() { None => Err(vec![Diagnostic::new( &format!("Expected {:?} but found end-of-input.", token_kind), self.input.len(), self.input.len(), )]), Some(token) => { if token.kind() == token_kind { self.advance(); Ok(token) } else { self.advance_until(&[token_kind]); Err(vec![Diagnostic::new( &format!("Expected {:?} but found {:?}", token_kind, token.kind()), token.start(), token.end(), )]) } } } } fn peek_current(&self, token_kind: TokenKind) -> bool { match &self.current { None => panic!("Unexpected end of input."), Some(token) => token.kind() == token_kind, } } fn get_current(&self) -> &Token { match &self.current { None => { panic!("Unexpected end of input"); } Some(token) => token, } } fn peek_lookahead(&self, token_kind: TokenKind) -> bool { match &self.lookahead { None => panic!("Unexpected end of input."), Some(token) => token.kind() == token_kind, } } fn sample_input(&self, start: usize, end: usize) -> &'a str { &self.input[start..end] } fn token_text(&self, token: &Token) -> &'a str { self.sample_input(token.start(), token.end()) } pub fn compilation_unit(&mut self) -> Result> { let mut declarations = vec![]; let mut diagnostics = vec![]; self.advance(); // get started while self.current.is_some() { let current = self.get_current(); match current.kind() { TokenKind::Fn | TokenKind::Extern => { let declaration_result = self.module_level_declaration(); match declaration_result { Ok(declaration) => declarations.push(declaration), Err(mut declaration_diagnostics) => { diagnostics.append(&mut declaration_diagnostics) } } } _ => { diagnostics.push(Diagnostic::new( &format!( "Expected any of: {:?}; found {:?}", [TokenKind::Fn, TokenKind::Extern], current.kind() ), current.start(), current.end(), )); self.advance_until(&[TokenKind::Fn, TokenKind::Extern]); } } } if diagnostics.is_empty() { Ok(CompilationUnit::new(declarations)) } else { Err(diagnostics) } } fn module_level_declaration(&mut self) -> Result> { let current = self.get_current(); match current.kind() { TokenKind::Fn => { let function_result = self.function(); match function_result { Ok(function) => Ok(ModuleLevelDeclaration::Function(function)), Err(function_diagnostics) => Err(function_diagnostics), } } TokenKind::Extern => { let extern_function_result = self.extern_function(); match extern_function_result { Ok(extern_function) => { Ok(ModuleLevelDeclaration::ExternFunction(extern_function)) } Err(extern_function_diagnostics) => Err(extern_function_diagnostics), } } _ => unreachable!(), } } fn function(&mut self) -> Result> { self.expect_advance(TokenKind::Fn)?; let identifier_token = self.expect_advance(TokenKind::Identifier)?; self.expect_advance(TokenKind::LeftParentheses)?; let parameters = self.parameter_list()?; self.expect_advance(TokenKind::RightParentheses)?; let mut diagnostics = vec![]; let return_type = if self.current.is_some() && self.peek_current(TokenKind::RightArrow) { match self.return_type() { Ok(type_use) => Some(type_use), Err(mut return_type_diagnostics) => { diagnostics.append(&mut return_type_diagnostics); None } } } else { None }; let mut statements = vec![]; while self.current.is_some() && !self.peek_current(TokenKind::End) { let statement_result = self.statement(); match statement_result { Ok(statement) => { statements.push(statement); } Err(mut statement_diagnostics) => { diagnostics.append(&mut statement_diagnostics); } } } // if we're missing "end", append it to the other statement diagnostics let end_result = self.expect_advance(TokenKind::End); match end_result { Err(mut end_diagnostics) => { diagnostics.append(&mut end_diagnostics); } _ => {} } if diagnostics.is_empty() { Ok(Function::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), parameters, return_type, statements, )) } else { Err(diagnostics) } } fn extern_function(&mut self) -> Result> { self.expect_advance(TokenKind::Extern)?; self.expect_advance(TokenKind::Fn)?; let identifier_token = self.expect_advance(TokenKind::Identifier)?; self.expect_advance(TokenKind::LeftParentheses)?; let mut diagnostics = vec![]; let mut maybe_parameters: Option> = None; let params_result = self.parameter_list(); match params_result { Ok(parameters) => { maybe_parameters = Some(parameters); } Err(mut parameter_list_diagnostics) => { diagnostics.append(&mut parameter_list_diagnostics); } } let right_parentheses_result = self.expect_advance(TokenKind::RightParentheses); match right_parentheses_result { Err(mut right_parentheses_diagnostics) => { diagnostics.append(&mut right_parentheses_diagnostics); } Ok(_) => {} } let return_type = self.return_type()?; if diagnostics.is_empty() { Ok(ExternFunction::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), maybe_parameters.unwrap(), return_type, )) } else { Err(diagnostics) } } fn parameter_list(&mut self) -> Result, Vec> { let mut parameters = vec![]; let mut diagnostics = vec![]; while self.current.is_some() && self.peek_current(TokenKind::Identifier) { let parameter_result = self.parameter(); match parameter_result { Ok(parameter) => { parameters.push(parameter); } Err(mut parameter_diagnostics) => { diagnostics.append(&mut parameter_diagnostics); } } if self.current.is_some() && self.peek_current(TokenKind::Comma) { self.advance(); } } if diagnostics.is_empty() { Ok(parameters) } else { Err(diagnostics) } } fn parameter(&mut self) -> Result> { let identifier_token = self.expect_advance(TokenKind::Identifier)?; self.expect_advance(TokenKind::Colon)?; let type_use = self.type_use()?; Ok(Parameter::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), type_use, )) } fn return_type(&mut self) -> Result> { self.expect_advance(TokenKind::RightArrow)?; self.type_use() } fn type_use(&mut self) -> Result> { let identifier_token = self.expect_advance(TokenKind::Identifier)?; Ok(TypeUse::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), )) } fn statement(&mut self) -> Result> { let current = self.get_current(); match current.kind() { TokenKind::Let => Ok(Statement::Let(self.let_statement()?)), _ => Ok(Statement::Expression(self.expression_statement()?)), } } fn let_statement(&mut self) -> Result> { self.expect_advance(TokenKind::Let)?; let identifier = self.expect_advance(TokenKind::Identifier)?; self.expect_advance(TokenKind::Equals)?; let expression = self.expression()?; Ok(LetStatement::new( self.token_text(&identifier), SourceRange::new(identifier.start(), identifier.end()), expression, )) } fn expression_statement(&mut self) -> Result> { Ok(ExpressionStatement::new(self.expression()?)) } fn expression(&mut self) -> Result> { self.additive_expression() } fn additive_expression(&mut self) -> Result> { let mut result = self.suffix_expression()?; while self.current.is_some() { let current = self.get_current(); match current.kind() { TokenKind::Plus => { self.advance(); // plus let rhs = self.expression()?; let source_range = SourceRange::new(result.source_range().start(), rhs.source_range().end()); result = Expression::Additive(AdditiveExpression::new(result, rhs, source_range)); } _ => break, } } Ok(result) } fn suffix_expression(&mut self) -> Result> { let mut result = self.expression_base()?; while self.current.is_some() { let current = self.get_current(); match current.kind() { TokenKind::LeftParentheses => { result = Expression::Call(self.call(result)?); } _ => break, } } Ok(result) } fn expression_base(&mut self) -> Result> { let current = self.get_current(); match current.kind() { TokenKind::IntegerLiteral => { let raw = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Ok(Expression::IntegerLiteral(IntegerLiteral::new( i32::from_str(raw).unwrap(), source_range, ))) } TokenKind::String => { let with_quotes = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Ok(Expression::String(StringLiteral::new( &with_quotes[1..with_quotes.len() - 1], source_range, ))) } TokenKind::Identifier => { let declared_name = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Ok(Expression::Identifier(Identifier::new( declared_name, source_range, ))) } _ => unreachable!(), } } fn call(&mut self, callee: Expression) -> Result> { self.expect_advance(TokenKind::LeftParentheses)?; let mut arguments = vec![]; if let Some(current) = &self.current { if matches!( current.kind(), TokenKind::IntegerLiteral | TokenKind::String | TokenKind::Identifier ) { arguments.append(&mut self.expression_list()?); } } let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses)?; let source_range = SourceRange::new(callee.source_range().start(), right_parentheses_token.end()); Ok(Call::new(callee, arguments, source_range)) } fn expression_list(&mut self) -> Result, Vec> { let mut expressions = vec![]; expressions.push(self.expression()?); while self.current.is_some() && self.peek_current(TokenKind::Comma) { self.advance(); // comma expressions.push(self.expression()?); } Ok(expressions) } } #[cfg(test)] mod smoke_tests { use super::*; fn smoke_test(input: &str) { let parse_result = parse_compilation_unit(input); match parse_result { Ok(_) => {} Err(diagnostics) => { eprintln!("{:#?}", diagnostics); panic!("There were diagnostics during parsing"); } } } #[test] fn forty_two() { smoke_test("fn main() 42 end"); } #[test] fn chained_calls() { smoke_test("fn main() getCl()() end"); } #[test] fn extern_fn_with_param() { smoke_test("extern fn println(message: Any) -> Void"); } #[test] fn fn_with_param() { smoke_test("fn foo(bar: Int) end"); } #[test] fn fn_with_params() { smoke_test("fn foo(bar: Int, baz: Int) end"); } #[test] fn return_type() { smoke_test("fn foo() -> Int end") } #[test] fn extern_return_type() { smoke_test("extern fn foo() -> Int"); } #[test] fn add_two_numbers() { smoke_test("fn main() 1 + 2 end"); } } #[cfg(test)] mod concrete_tests { use super::*; #[test] fn parses_extern_fn() { let parse_result = parse_compilation_unit("extern fn println() -> Void"); let compilation_unit = match parse_result { Ok(compilation_unit) => compilation_unit, Err(diagnostics) => { for diagnostic in diagnostics { eprintln!("{:?}", diagnostic); } panic!(); } }; let declarations = compilation_unit.declarations(); assert_eq!(declarations.len(), 1); let extern_function = match &declarations[0] { ModuleLevelDeclaration::ExternFunction(extern_function) => extern_function, _ => panic!(), }; assert_eq!(extern_function.declared_name(), "println"); } #[test] fn hello_world() { let parse_result = parse_compilation_unit("fn main() println(\"Hello, World!\") end"); let compilation_unit = match parse_result { Ok(compilation_unit) => compilation_unit, Err(diagnostics) => { for diagnostic in &diagnostics { eprintln!("{:?}", diagnostic) } panic!() } }; let declarations = compilation_unit.declarations(); assert_eq!(declarations.len(), 1); let function = match &declarations[0] { ModuleLevelDeclaration::Function(function) => function, _ => panic!(), }; assert_eq!(function.declared_name(), "main"); let statements = function.statements(); assert_eq!(statements.len(), 1); if let Statement::Expression(expression_statement) = statements[0] { if let Expression::Call(call) = expression_statement.expression() { let callee = call.callee(); match callee { Expression::Identifier(identifier) => { assert_eq!(identifier.name(), "println"); } _ => panic!("Expected identifier"), } let arguments = call.arguments(); assert_eq!(arguments.len(), 1); let first_argument = arguments[0]; match first_argument { Expression::String(s) => { assert_eq!(s.content(), "Hello, World!"); } _ => panic!("Expected string"), } } else { panic!("Expected call"); } } else { panic!("Expected expression"); } } } #[cfg(test)] mod parse_failure_tests { use super::*; #[test] fn lone_end() { let parse_result = parse_compilation_unit("end"); match parse_result { Err(diagnostics) => { assert_eq!(diagnostics.len(), 1); for diagnostic in &diagnostics { println!("{:?}", diagnostic) } } Ok(_) => panic!(), } } #[test] fn two_ends() { let parse_result = parse_compilation_unit("end end"); match parse_result { Err(diagnostics) => { // Should only have an error on the first end, since we advance until we find a // token we can recover from (fn or extern) assert_eq!(diagnostics.len(), 1); } Ok(_) => panic!(), } } }