use crate::ast::call::Call; use crate::ast::compilation_unit::CompilationUnit; use crate::ast::expression::Expression; use crate::ast::expression_statement::ExpressionStatement; use crate::ast::extern_function::ExternFunction; use crate::ast::function::Function; use crate::ast::identifier::Identifier; use crate::ast::integer_literal::IntegerLiteral; use crate::ast::let_statement::LetStatement; use crate::ast::module_level_declaration::ModuleLevelDeclaration; use crate::ast::statement::Statement; use crate::ast::string_literal::StringLiteral; use crate::diagnostic::Diagnostic; use crate::lexer::Lexer; use crate::source_range::SourceRange; use crate::token::{Token, TokenKind}; use std::str::FromStr; pub fn parse_compilation_unit(input: &str) -> Result> { let mut parser = Parser::new(input); parser.compilation_unit() } struct Parser<'a> { input: &'a str, lexer: Lexer<'a>, current: Option, lookahead: Option, } impl<'a> Parser<'a> { fn new(input: &'a str) -> Self { Self { input, lexer: Lexer::new(input), current: None, lookahead: None, } } fn advance_until(&mut self, token_kinds: &[TokenKind]) { while self.current.is_some() { self.advance(); match &self.current { None => { // reached eoi } Some(current) => { if token_kinds.contains(¤t.kind()) { break; } } } } } fn advance(&mut self) { if self.lookahead.is_some() { // we've advanced at least once self.current = self.lookahead.take(); self.lookahead = match self.lexer.next() { None => None, Some(result) => match result { Ok(token) => Some(token), Err(lexer_error) => { panic!("{:?}", lexer_error); } }, } } else if self.lookahead.is_none() && self.current.is_some() { // we're on the last token self.current = None; } else { // we've not yet advanced, so fetch both // current match self.lexer.next() { None => {} Some(result) => match result { Ok(token) => { self.current = Some(token); } Err(lexer_error) => { panic!("{:?}", lexer_error); } }, } // lookahead match self.lexer.next() { None => {} Some(result) => match result { Ok(token) => { self.lookahead = Some(token); } Err(lexer_error) => { panic!("{:?}", lexer_error); } }, } } } fn expect_advance(&mut self, token_kind: TokenKind) -> Result> { match self.current.take() { None => Err(vec![Diagnostic::new( &format!("Expected {:?} but found end-of-input.", token_kind), self.input.len(), self.input.len(), )]), Some(token) => { if token.kind() == token_kind { self.advance(); Ok(token) } else { self.advance_until(&[token_kind]); Err(vec![Diagnostic::new( &format!("Expected {:?} but found {:?}", token_kind, token.kind()), token.start(), token.end(), )]) } } } } fn peek_current(&self, token_kind: TokenKind) -> bool { match &self.current { None => panic!("Unexpected end of input."), Some(token) => token.kind() == token_kind, } } fn get_current(&self) -> &Token { match &self.current { None => { panic!("Unexpected end of input"); } Some(token) => token, } } fn sample_input(&self, start: usize, end: usize) -> &'a str { &self.input[start..end] } fn token_text(&self, token: &Token) -> &'a str { self.sample_input(token.start(), token.end()) } pub fn compilation_unit(&mut self) -> Result> { let mut declarations = vec![]; let mut diagnostics = vec![]; self.advance(); // get started while self.current.is_some() { let current = self.get_current(); match current.kind() { TokenKind::Fn | TokenKind::Extern => { let declaration_result = self.module_level_declaration(); match declaration_result { Ok(declaration) => declarations.push(declaration), Err(mut declaration_diagnostics) => { diagnostics.append(&mut declaration_diagnostics) } } } _ => { diagnostics.push(Diagnostic::new( &format!( "Expected any of: {:?}; found {:?}", [TokenKind::Fn, TokenKind::Extern], current.kind() ), current.start(), current.end(), )); self.advance_until(&[TokenKind::Fn, TokenKind::Extern]); } } } if diagnostics.is_empty() { Ok(CompilationUnit::new(declarations)) } else { Err(diagnostics) } } fn module_level_declaration(&mut self) -> Result> { let current = self.get_current(); match current.kind() { TokenKind::Fn => { let function_result = self.function(); match function_result { Ok(function) => Ok(ModuleLevelDeclaration::Function(function)), Err(function_diagnostics) => Err(function_diagnostics), } } TokenKind::Extern => { let extern_function_result = self.extern_function(); match extern_function_result { Ok(extern_function) => { Ok(ModuleLevelDeclaration::ExternFunction(extern_function)) } Err(extern_function_diagnostics) => Err(extern_function_diagnostics), } } _ => unreachable!(), } } fn function(&mut self) -> Result> { self.expect_advance(TokenKind::Fn)?; let identifier_token = self.expect_advance(TokenKind::Identifier)?; self.expect_advance(TokenKind::LeftParentheses)?; // add params self.expect_advance(TokenKind::RightParentheses)?; let mut statements = vec![]; let mut diagnostics = vec![]; while self.current.is_some() && !self.peek_current(TokenKind::End) { let statement_result = self.statement(); match statement_result { Ok(statement) => { statements.push(statement); } Err(mut statement_diagnostics) => { diagnostics.append(&mut statement_diagnostics); } } } // if we're missing "end", append it to the other statement diagnostics let end_result = self.expect_advance(TokenKind::End); match end_result { Err(mut end_diagnostics) => { diagnostics.append(&mut end_diagnostics); } _ => {} } if diagnostics.is_empty() { Ok(Function::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), statements, )) } else { Err(diagnostics) } } fn extern_function(&mut self) -> Result> { self.expect_advance(TokenKind::Extern)?; self.expect_advance(TokenKind::Fn)?; let identifier_token = self.expect_advance(TokenKind::Identifier)?; self.expect_advance(TokenKind::LeftParentheses)?; // params self.expect_advance(TokenKind::RightParentheses)?; // return type Ok(ExternFunction::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), )) } fn statement(&mut self) -> Result> { let current = self.get_current(); match current.kind() { TokenKind::Let => Ok(Statement::Let(self.let_statement()?)), _ => Ok(Statement::Expression(self.expression_statement()?)), } } fn let_statement(&mut self) -> Result> { self.expect_advance(TokenKind::Let)?; let identifier = self.expect_advance(TokenKind::Identifier)?; self.expect_advance(TokenKind::Equals)?; let expression = self.expression()?; Ok(LetStatement::new( self.token_text(&identifier), SourceRange::new(identifier.start(), identifier.end()), expression, )) } fn expression_statement(&mut self) -> Result> { Ok(ExpressionStatement::new(self.expression()?)) } fn expression(&mut self) -> Result> { let current = self.get_current().clone(); // I don't love this clone let mut diagnostics = vec![]; let mut expression = match current.kind() { TokenKind::IntegerLiteral => { let raw = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Expression::IntegerLiteral(IntegerLiteral::new( i32::from_str(raw).unwrap(), source_range, )) } TokenKind::String => { let with_quotes = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Expression::String(StringLiteral::new( &with_quotes[1..with_quotes.len() - 1], source_range, )) } TokenKind::Identifier => { let declared_name = self.token_text(¤t); let source_range = SourceRange::new(current.start(), current.end()); self.advance(); Expression::Identifier(Identifier::new(declared_name, source_range)) } _ => { diagnostics.push(Diagnostic::new( &format!( "Expected any of {:?} but found {:?}", [ TokenKind::IntegerLiteral, TokenKind::String, TokenKind::Identifier ], current.kind() ), current.start(), current.end(), )); self.advance_until(&[ TokenKind::IntegerLiteral, TokenKind::String, TokenKind::Identifier, ]); if self.current.is_some() { let try_again_result = self.expression(); match try_again_result { Ok(expression) => expression, Err(mut try_again_diagnostics) => { diagnostics.append(&mut try_again_diagnostics); return Err(diagnostics); } } } else { return Err(diagnostics); } } }; // postfixes while let Some(current) = &self.current { match current.kind() { TokenKind::LeftParentheses => { expression = Expression::Call(self.call(expression)?); } _ => break, } } if diagnostics.is_empty() { Ok(expression) } else { Err(diagnostics) } } fn call(&mut self, callee: Expression) -> Result> { self.expect_advance(TokenKind::LeftParentheses)?; let mut arguments = vec![]; while self.current.is_some() && !self.peek_current(TokenKind::RightParentheses) { arguments.push(self.expression()?); } let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses)?; let source_range = SourceRange::new(callee.source_range().start(), right_parentheses_token.end()); Ok(Call::new(callee, arguments, source_range)) } } #[cfg(test)] mod smoke_tests { use super::*; #[test] fn forty_two() { parse_compilation_unit("fn main() 42 end"); } #[test] fn hello_world() { let parse_result = parse_compilation_unit("fn main() println(\"Hello, World!\") end"); let compilation_unit = match parse_result { Ok(compilation_unit) => compilation_unit, Err(diagnostics) => { for diagnostic in &diagnostics { eprintln!("{:?}", diagnostic) } panic!() } }; let declarations = compilation_unit.declarations(); assert_eq!(declarations.len(), 1); let function = match &declarations[0] { ModuleLevelDeclaration::Function(function) => function, _ => panic!(), }; assert_eq!(function.declared_name(), "main"); let statements = function.statements(); assert_eq!(statements.len(), 1); if let Statement::Expression(expression_statement) = statements[0] { if let Expression::Call(call) = expression_statement.expression() { let callee = call.callee(); match callee { Expression::Identifier(identifier) => { assert_eq!(identifier.name(), "println"); } _ => panic!("Expected identifier"), } let arguments = call.arguments(); assert_eq!(arguments.len(), 1); let first_argument = arguments[0]; match first_argument { Expression::String(s) => { assert_eq!(s.content(), "Hello, World!"); } _ => panic!("Expected string"), } } else { panic!("Expected call"); } } else { panic!("Expected expression"); } } #[test] fn chained_calls() { parse_compilation_unit("fn main() getCl()() end"); } } #[cfg(test)] mod concrete_tests { use super::*; #[test] fn parses_extern_fn() { let parse_result = parse_compilation_unit("extern fn println()"); let compilation_unit = match parse_result { Ok(compilation_unit) => compilation_unit, Err(diagnostics) => { for diagnostic in diagnostics { eprintln!("{:?}", diagnostic); } panic!(); } }; let declarations = compilation_unit.declarations(); assert_eq!(declarations.len(), 1); let extern_function = match &declarations[0] { ModuleLevelDeclaration::ExternFunction(extern_function) => extern_function, _ => panic!(), }; assert_eq!(extern_function.declared_name(), "println"); } } #[cfg(test)] mod parse_failure_tests { use super::*; #[test] fn lone_end() { let parse_result = parse_compilation_unit("end"); match parse_result { Err(diagnostics) => { assert_eq!(diagnostics.len(), 1); for diagnostic in &diagnostics { println!("{:?}", diagnostic) } } Ok(_) => panic!(), } } #[test] fn two_ends() { let parse_result = parse_compilation_unit("end end"); match parse_result { Err(diagnostics) => { // Should only have an error on the first end, since we advance until we find a // token we can recover from (fn or extern) assert_eq!(diagnostics.len(), 1); } Ok(_) => panic!(), } } }