From 8df46eec8d7736416c317e2360788af58406158e Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Fri, 27 Feb 2026 12:13:28 -0600 Subject: [PATCH] Moving to dmc-lib, keeping it smaller! --- Cargo.lock | 4 + Cargo.toml | 2 +- dmc-lib/Cargo.toml | 6 + dmc-lib/src/ast/call.rs | 80 ++++++++ dmc-lib/src/ast/compilation_unit.rs | 43 +++++ dmc-lib/src/ast/expression.rs | 58 ++++++ dmc-lib/src/ast/function.rs | 78 ++++++++ dmc-lib/src/ast/identifier.rs | 64 +++++++ dmc-lib/src/ast/integer_literal.rs | 23 +++ dmc-lib/src/ast/let_statement.rs | 71 +++++++ dmc-lib/src/ast/mod.rs | 43 +++++ dmc-lib/src/ast/statement.rs | 32 ++++ dmc-lib/src/ast/string_literal.rs | 23 +++ dmc-lib/src/diagnostic.rs | 28 +++ dmc-lib/src/lexer.rs | 157 ++++++++++++++++ dmc-lib/src/lib.rs | 10 + dmc-lib/src/parser.rs | 279 ++++++++++++++++++++++++++++ dmc-lib/src/scope.rs | 51 +++++ dmc-lib/src/source_range.rs | 18 ++ dmc-lib/src/symbol.rs | 90 +++++++++ dmc-lib/src/symbol_table.rs | 146 +++++++++++++++ dmc-lib/src/token.rs | 38 ++++ dmc-lib/src/type_info.rs | 9 + 23 files changed, 1352 insertions(+), 1 deletion(-) create mode 100644 dmc-lib/Cargo.toml create mode 100644 dmc-lib/src/ast/call.rs create mode 100644 dmc-lib/src/ast/compilation_unit.rs create mode 100644 dmc-lib/src/ast/expression.rs create mode 100644 dmc-lib/src/ast/function.rs create mode 100644 dmc-lib/src/ast/identifier.rs create mode 100644 dmc-lib/src/ast/integer_literal.rs create mode 100644 dmc-lib/src/ast/let_statement.rs create mode 100644 dmc-lib/src/ast/mod.rs create mode 100644 dmc-lib/src/ast/statement.rs create mode 100644 dmc-lib/src/ast/string_literal.rs create mode 100644 dmc-lib/src/diagnostic.rs create mode 100644 dmc-lib/src/lexer.rs create mode 100644 dmc-lib/src/lib.rs create mode 100644 dmc-lib/src/parser.rs create mode 100644 dmc-lib/src/scope.rs create mode 100644 dmc-lib/src/source_range.rs create mode 100644 dmc-lib/src/symbol.rs create mode 100644 dmc-lib/src/symbol_table.rs create mode 100644 dmc-lib/src/token.rs create mode 100644 dmc-lib/src/type_info.rs diff --git a/Cargo.lock b/Cargo.lock index 25ad009..1780281 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -204,6 +204,10 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dmc-lib" +version = "0.1.0" + [[package]] name = "encoding_rs" version = "0.8.35" diff --git a/Cargo.toml b/Cargo.toml index 337c69a..0236f76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,4 +25,4 @@ cst-test-generator = { path = "cst-test-generator" } [workspace] resolver = "3" -members = ["ast-generator", "cst-test-generator"] +members = ["ast-generator", "cst-test-generator", "dmc-lib"] diff --git a/dmc-lib/Cargo.toml b/dmc-lib/Cargo.toml new file mode 100644 index 0000000..a31e708 --- /dev/null +++ b/dmc-lib/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "dmc-lib" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/dmc-lib/src/ast/call.rs b/dmc-lib/src/ast/call.rs new file mode 100644 index 0000000..5d80155 --- /dev/null +++ b/dmc-lib/src/ast/call.rs @@ -0,0 +1,80 @@ +use crate::ast::expression::Expression; +use crate::diagnostic::Diagnostic; +use crate::source_range::SourceRange; +use crate::symbol_table::SymbolTable; +use crate::type_info::TypeInfo; + +pub struct Call { + callee: Box, + arguments: Vec, + source_range: SourceRange, +} + +impl Call { + pub fn new(callee: Expression, arguments: Vec, source_range: SourceRange) -> Self { + Self { + callee: callee.into(), + arguments, + source_range, + } + } + + pub fn callee(&self) -> &Expression { + &self.callee + } + + pub fn arguments(&self) -> Vec<&Expression> { + self.arguments.iter().collect() + } + + pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { + let mut diagnostics = vec![]; + diagnostics.append(&mut self.callee.gather_declared_names(symbol_table)); + for argument in &mut self.arguments { + diagnostics.append(&mut argument.gather_declared_names(symbol_table)); + } + diagnostics + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + let mut diagnostics = vec![]; + diagnostics.append(&mut self.callee.check_name_usages(symbol_table)); + for argument in &mut self.arguments { + diagnostics.append(&mut argument.check_name_usages(symbol_table)); + } + diagnostics + } + + pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { + let mut diagnostics = vec![]; + diagnostics.append(&mut self.callee.type_check(symbol_table)); + for argument in &mut self.arguments { + diagnostics.append(&mut argument.type_check(symbol_table)); + } + + // check that callee is callable + match self.callee.type_info() { + TypeInfo::Function(_) => {} + _ => { + diagnostics.push(Diagnostic::new( + "Receiver is not callable", + self.callee.source_range().start(), + self.callee.source_range().end(), + )); + } + } + + diagnostics + } + + pub fn type_info(&self) -> TypeInfo { + match self.callee.type_info() { + TypeInfo::Function(function_symbol) => function_symbol.return_type(), + _ => panic!(), + } + } + + pub fn source_range(&self) -> &SourceRange { + &self.source_range + } +} diff --git a/dmc-lib/src/ast/compilation_unit.rs b/dmc-lib/src/ast/compilation_unit.rs new file mode 100644 index 0000000..4e19aa9 --- /dev/null +++ b/dmc-lib/src/ast/compilation_unit.rs @@ -0,0 +1,43 @@ +use crate::ast::function::Function; +use crate::diagnostic::Diagnostic; +use crate::symbol_table::SymbolTable; + +pub struct CompilationUnit { + functions: Vec, +} + +impl CompilationUnit { + pub fn new(functions: Vec) -> Self { + Self { functions } + } + + pub fn functions(&self) -> Vec<&Function> { + self.functions.iter().collect() + } + + pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { + let mut diagnostics = vec![]; + symbol_table.push_scope("compilation_unit_scope"); + for function in &mut self.functions { + diagnostics.append(&mut function.gather_declared_names(symbol_table)); + } + symbol_table.pop_scope(); + diagnostics + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + let mut diagnostics = vec![]; + for function in &mut self.functions { + diagnostics.append(&mut function.check_name_usages(symbol_table)); + } + diagnostics + } + + pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { + let mut diagnostics = vec![]; + for function in &mut self.functions { + diagnostics.append(&mut function.type_check(symbol_table)); + } + diagnostics + } +} diff --git a/dmc-lib/src/ast/expression.rs b/dmc-lib/src/ast/expression.rs new file mode 100644 index 0000000..8d9dbba --- /dev/null +++ b/dmc-lib/src/ast/expression.rs @@ -0,0 +1,58 @@ +use crate::ast::call::Call; +use crate::ast::identifier::Identifier; +use crate::ast::integer_literal::IntegerLiteral; +use crate::ast::string_literal::StringLiteral; +use crate::diagnostic::Diagnostic; +use crate::source_range::SourceRange; +use crate::symbol_table::SymbolTable; +use crate::type_info::TypeInfo; + +pub enum Expression { + Call(Call), + IntegerLiteral(IntegerLiteral), + String(StringLiteral), + Identifier(Identifier), +} + +impl Expression { + pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { + match self { + Expression::Call(call) => call.gather_declared_names(symbol_table), + Expression::Identifier(identifier) => identifier.gather_declared_names(symbol_table), + _ => vec![], + } + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + match self { + Expression::Call(call) => call.check_name_usages(symbol_table), + Expression::Identifier(identifier) => identifier.check_name_usages(symbol_table), + _ => vec![], + } + } + + pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { + match self { + Expression::Call(call) => call.type_check(symbol_table), + _ => vec![], + } + } + + pub fn type_info(&self) -> TypeInfo { + match self { + Expression::Call(call) => call.type_info(), + Expression::IntegerLiteral(_) => TypeInfo::Integer, + Expression::String(_) => TypeInfo::String, + Expression::Identifier(identifier) => identifier.type_info(), + } + } + + pub fn source_range(&self) -> &SourceRange { + match self { + Expression::Call(call) => call.source_range(), + Expression::IntegerLiteral(integer_literal) => integer_literal.source_range(), + Expression::String(string_literal) => string_literal.source_range(), + Expression::Identifier(identifier) => identifier.source_range(), + } + } +} diff --git a/dmc-lib/src/ast/function.rs b/dmc-lib/src/ast/function.rs new file mode 100644 index 0000000..888f6f7 --- /dev/null +++ b/dmc-lib/src/ast/function.rs @@ -0,0 +1,78 @@ +use crate::ast::statement::Statement; +use crate::diagnostic::Diagnostic; +use crate::source_range::SourceRange; +use crate::symbol::FunctionSymbol; +use crate::symbol_table::{SymbolInsertError, SymbolTable}; + +pub struct Function { + declared_name: String, + declared_name_source_range: SourceRange, + statements: Vec, +} + +impl Function { + pub fn new( + declared_name: &str, + declared_name_source_range: SourceRange, + statements: Vec, + ) -> Self { + Self { + declared_name: declared_name.to_string(), + declared_name_source_range, + statements, + } + } + + pub fn declared_name(&self) -> &str { + &self.declared_name + } + + pub fn statements(&self) -> Vec<&Statement> { + self.statements.iter().collect() + } + + pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { + let mut diagnostics = vec![]; + // insert function symbol + let insert_result = symbol_table.insert_function_symbol(FunctionSymbol::new( + self.declared_name(), + &vec![], // todo + )); + if let Err(symbol_insert_error) = insert_result { + match symbol_insert_error { + SymbolInsertError::AlreadyDeclared(already_declared) => { + diagnostics.push(Diagnostic::new( + &format!( + "Function {} already declared in current scope", + already_declared.name() + ), + self.declared_name_source_range.start(), + self.declared_name_source_range.end(), + )); + } + } + } + symbol_table.push_scope(&format!("function_scope({})", self.declared_name())); + for statement in &mut self.statements { + diagnostics.append(&mut statement.gather_declared_names(symbol_table)); + } + symbol_table.pop_scope(); + diagnostics + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + let mut diagnostics = vec![]; + for statement in &mut self.statements { + diagnostics.append(&mut statement.check_name_usages(symbol_table)); + } + diagnostics + } + + pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { + let mut diagnostics = vec![]; + for statement in &mut self.statements { + diagnostics.append(&mut statement.type_check(symbol_table)); + } + diagnostics + } +} diff --git a/dmc-lib/src/ast/identifier.rs b/dmc-lib/src/ast/identifier.rs new file mode 100644 index 0000000..0782dcd --- /dev/null +++ b/dmc-lib/src/ast/identifier.rs @@ -0,0 +1,64 @@ +use crate::diagnostic::Diagnostic; +use crate::source_range::SourceRange; +use crate::symbol::ExpressibleSymbol; +use crate::symbol_table::SymbolTable; +use crate::type_info::TypeInfo; + +pub struct Identifier { + name: String, + scope_id: Option, + expressible_symbol: Option, + source_range: SourceRange, +} + +impl Identifier { + pub fn new(name: &str, source_range: SourceRange) -> Self { + Self { + name: name.into(), + scope_id: None, + expressible_symbol: None, + source_range, + } + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn gather_declared_names(&mut self, symbol_table: &SymbolTable) -> Vec { + self.scope_id = Some(symbol_table.current_scope_id()); + vec![] + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + let maybe_expressible_symbol = + symbol_table.find_expressible_symbol(self.scope_id.unwrap(), &self.name); + match maybe_expressible_symbol { + None => { + vec![Diagnostic::new( + &format!("Unable to resolve symbol {}", self.name), + self.source_range.start(), + self.source_range.end(), + )] + } + Some(expressible_symbol) => { + self.expressible_symbol = Some(expressible_symbol); + vec![] + } + } + } + + pub fn type_info(&self) -> TypeInfo { + match self.expressible_symbol.as_ref().unwrap() { + ExpressibleSymbol::Function(function_symbol) => { + TypeInfo::Function(function_symbol.clone()) + } + ExpressibleSymbol::Parameter(parameter_symbol) => parameter_symbol.type_info().clone(), + ExpressibleSymbol::Variable(variable_symbol) => variable_symbol.type_info().clone(), + } + } + + pub fn source_range(&self) -> &SourceRange { + &self.source_range + } +} diff --git a/dmc-lib/src/ast/integer_literal.rs b/dmc-lib/src/ast/integer_literal.rs new file mode 100644 index 0000000..3c907e5 --- /dev/null +++ b/dmc-lib/src/ast/integer_literal.rs @@ -0,0 +1,23 @@ +use crate::source_range::SourceRange; + +pub struct IntegerLiteral { + value: i64, + source_range: SourceRange, +} + +impl IntegerLiteral { + pub fn new(value: i64, source_range: SourceRange) -> Self { + Self { + value, + source_range, + } + } + + pub fn value(&self) -> i64 { + self.value + } + + pub fn source_range(&self) -> &SourceRange { + &self.source_range + } +} diff --git a/dmc-lib/src/ast/let_statement.rs b/dmc-lib/src/ast/let_statement.rs new file mode 100644 index 0000000..e778dfb --- /dev/null +++ b/dmc-lib/src/ast/let_statement.rs @@ -0,0 +1,71 @@ +use crate::ast::expression::Expression; +use crate::diagnostic::Diagnostic; +use crate::source_range::SourceRange; +use crate::symbol::VariableSymbol; +use crate::symbol_table::{SymbolInsertError, SymbolTable}; + +pub struct LetStatement { + declared_name: String, + declared_name_source_range: SourceRange, + initializer: Box, +} + +impl LetStatement { + pub fn new( + declared_name: &str, + declared_name_source_range: SourceRange, + initializer: Expression, + ) -> Self { + Self { + declared_name: declared_name.to_string(), + declared_name_source_range, + initializer: initializer.into(), + } + } + + pub fn declared_name(&self) -> &str { + &self.declared_name + } + + pub fn initializer(&self) -> &Expression { + &self.initializer + } + + pub fn initializer_mut(&mut self) -> &mut Expression { + &mut self.initializer + } + + pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { + let mut diagnostics = vec![]; + self.initializer_mut().gather_declared_names(symbol_table); + let insert_result = symbol_table.insert_variable_symbol(VariableSymbol::new( + self.declared_name(), + self.initializer().type_info().clone(), + )); + if let Err(symbol_insert_error) = insert_result { + match symbol_insert_error { + SymbolInsertError::AlreadyDeclared(already_declared) => { + diagnostics.push(Diagnostic::new( + &format!( + "Symbol {} already declared in current scope", + already_declared.name() + ), + self.declared_name_source_range.start(), + self.declared_name_source_range.end(), + )) + } + } + } + diagnostics + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + self.initializer.check_name_usages(symbol_table) + } + + pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { + let mut diagnostics = vec![]; + diagnostics.append(&mut self.initializer.type_check(symbol_table)); + diagnostics + } +} diff --git a/dmc-lib/src/ast/mod.rs b/dmc-lib/src/ast/mod.rs new file mode 100644 index 0000000..b75f1ef --- /dev/null +++ b/dmc-lib/src/ast/mod.rs @@ -0,0 +1,43 @@ +pub mod call; +pub mod compilation_unit; +pub mod expression; +pub mod function; +pub mod identifier; +pub mod integer_literal; +pub mod let_statement; +pub mod statement; +pub mod string_literal; + +#[cfg(test)] +mod name_tests { + use crate::parser::parse_compilation_unit; + use crate::symbol_table::SymbolTable; + + #[test] + fn smoke_screen() { + let mut symbol_table = SymbolTable::new(); + let mut compilation_unit = + parse_compilation_unit("fn println() end fn main() let x = 42 println(x) end"); + assert_eq!( + compilation_unit + .gather_declared_names(&mut symbol_table) + .len(), + 0 + ); + assert_eq!(compilation_unit.check_name_usages(&symbol_table).len(), 0); + } + + #[test] + fn get_some_diagnostics() { + let mut symbol_table = SymbolTable::new(); + let mut compilation_unit = parse_compilation_unit("fn main() notDefined(uhOh) end"); + assert_eq!( + compilation_unit + .gather_declared_names(&mut symbol_table) + .len(), + 0 + ); + let name_usage_diagnostics = compilation_unit.check_name_usages(&symbol_table); + assert_eq!(name_usage_diagnostics.len(), 2); + } +} diff --git a/dmc-lib/src/ast/statement.rs b/dmc-lib/src/ast/statement.rs new file mode 100644 index 0000000..16b46c1 --- /dev/null +++ b/dmc-lib/src/ast/statement.rs @@ -0,0 +1,32 @@ +use crate::ast::expression::Expression; +use crate::ast::let_statement::LetStatement; +use crate::diagnostic::Diagnostic; +use crate::symbol_table::SymbolTable; + +pub enum Statement { + Let(LetStatement), + Expression(Expression), +} + +impl Statement { + pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { + match self { + Statement::Let(let_statement) => let_statement.gather_declared_names(symbol_table), + Statement::Expression(expression) => expression.gather_declared_names(symbol_table), + } + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + match self { + Statement::Let(let_statement) => let_statement.check_name_usages(symbol_table), + Statement::Expression(expression) => expression.check_name_usages(symbol_table), + } + } + + pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { + match self { + Statement::Let(let_statement) => let_statement.type_check(symbol_table), + Statement::Expression(expression) => expression.type_check(symbol_table), + } + } +} diff --git a/dmc-lib/src/ast/string_literal.rs b/dmc-lib/src/ast/string_literal.rs new file mode 100644 index 0000000..c9c920f --- /dev/null +++ b/dmc-lib/src/ast/string_literal.rs @@ -0,0 +1,23 @@ +use crate::source_range::SourceRange; + +pub struct StringLiteral { + content: String, + source_range: SourceRange, +} + +impl StringLiteral { + pub fn new(content: &str, source_range: SourceRange) -> Self { + Self { + content: content.into(), + source_range, + } + } + + pub fn content(&self) -> &str { + &self.content + } + + pub fn source_range(&self) -> &SourceRange { + &self.source_range + } +} diff --git a/dmc-lib/src/diagnostic.rs b/dmc-lib/src/diagnostic.rs new file mode 100644 index 0000000..1a6cbb1 --- /dev/null +++ b/dmc-lib/src/diagnostic.rs @@ -0,0 +1,28 @@ +#[derive(Debug)] +pub struct Diagnostic { + message: String, + start: usize, + end: usize, +} + +impl Diagnostic { + pub fn new(message: &str, start: usize, end: usize) -> Self { + Self { + message: message.into(), + start, + end, + } + } + + pub fn message(&self) -> &str { + &self.message + } + + pub fn start(&self) -> usize { + self.start + } + + pub fn end(&self) -> usize { + self.end + } +} diff --git a/dmc-lib/src/lexer.rs b/dmc-lib/src/lexer.rs new file mode 100644 index 0000000..ea51ec7 --- /dev/null +++ b/dmc-lib/src/lexer.rs @@ -0,0 +1,157 @@ +use crate::token::{Token, TokenKind}; + +pub struct Lexer<'a> { + input: &'a str, + position: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + Self { input, position: 0 } + } + + pub fn next(&mut self) -> Option> { + let maybe_chunk = self.input.get(self.position..); + if maybe_chunk.is_none() { + return None; + } + + let mut chunk = maybe_chunk.unwrap(); + if chunk.is_empty() { + // in case we're done + return None; + } + while chunk.starts_with(&[' ', '\t', '\r', '\n']) { + // ignore whitespace + self.position += 1; + let maybe_chunk = self.input.get(self.position..); + if maybe_chunk.is_none() { + return None; + } else { + chunk = maybe_chunk.unwrap(); + } + } + + let token = if chunk.starts_with("(") { + Token::new(self.position, self.position + 1, TokenKind::LeftParentheses) + } else if chunk.starts_with(")") { + Token::new( + self.position, + self.position + 1, + TokenKind::RightParentheses, + ) + } else if chunk.starts_with("=") { + Token::new(self.position, self.position + 1, TokenKind::Equals) + } else { + // more than one char token + if chunk.starts_with(|c: char| c.is_ascii_digit()) { + // number literal + let mut end = self.position; + for char in chunk.chars() { + if char.is_ascii_digit() { + end += 1; + } else { + break; + } + } + Token::new(self.position, end, TokenKind::IntegerLiteral) + } else if chunk.starts_with("\"") { + // string literal + let mut end = self.position; + let mut terminated = false; + let mut chars = chunk.chars(); + chars.next(); // skip opening quote + end += 1; + for char in chars { + end += 1; + if char == '"' { + terminated = true; + break; + } + } + if !terminated { + return Some(Err(LexerError::new(LexerErrorKind::UnterminatedString))); + } + Token::new(self.position, end, TokenKind::String) + } else { + // keyword or identifier + let mut prefix = String::new(); + for char in chunk.chars() { + if char.is_alphanumeric() || char == '_' { + prefix.push(char); + } else { + break; + } + } + let token_kind = match prefix.as_str() { + "fn" => TokenKind::Fn, + "end" => TokenKind::End, + "let" => TokenKind::Let, + _ => TokenKind::Identifier, + }; + Token::new(self.position, self.position + prefix.len(), token_kind) + } + }; + self.position += token.end() - token.start(); + Some(Ok(token)) + } +} + +#[derive(Debug, Eq, PartialEq)] +pub struct LexerError { + kind: LexerErrorKind, +} + +impl LexerError { + pub fn new(kind: LexerErrorKind) -> Self { + Self { kind } + } + + pub fn kind(&self) -> LexerErrorKind { + self.kind + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum LexerErrorKind { + UnterminatedString, +} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_next(lexer: &mut Lexer, kind: TokenKind, length: usize) { + let token = lexer.next().unwrap().unwrap(); + println!("{:?}", token); + assert_eq!(token.kind(), kind); + assert_eq!(token.end() - token.start(), length); + } + + #[test] + fn forty_two() { + let mut lexer = Lexer::new("fn main() 42 end"); + assert_next(&mut lexer, TokenKind::Fn, 2); + assert_next(&mut lexer, TokenKind::Identifier, 4); + assert_next(&mut lexer, TokenKind::LeftParentheses, 1); + assert_next(&mut lexer, TokenKind::RightParentheses, 1); + assert_next(&mut lexer, TokenKind::IntegerLiteral, 2); + assert_next(&mut lexer, TokenKind::End, 3); + assert_eq!(lexer.next(), None); + } + + #[test] + fn hello_world() { + let mut lexer = Lexer::new("fn main() println(\"Hello, World!\") end"); + assert_next(&mut lexer, TokenKind::Fn, 2); + assert_next(&mut lexer, TokenKind::Identifier, 4); + assert_next(&mut lexer, TokenKind::LeftParentheses, 1); + assert_next(&mut lexer, TokenKind::RightParentheses, 1); + assert_next(&mut lexer, TokenKind::Identifier, 7); + assert_next(&mut lexer, TokenKind::LeftParentheses, 1); + assert_next(&mut lexer, TokenKind::String, 15); + assert_next(&mut lexer, TokenKind::RightParentheses, 1); + assert_next(&mut lexer, TokenKind::End, 3); + assert_eq!(lexer.next(), None); + } +} diff --git a/dmc-lib/src/lib.rs b/dmc-lib/src/lib.rs new file mode 100644 index 0000000..1fecf9c --- /dev/null +++ b/dmc-lib/src/lib.rs @@ -0,0 +1,10 @@ +mod ast; +mod diagnostic; +mod lexer; +mod parser; +mod scope; +mod source_range; +mod symbol; +mod symbol_table; +mod token; +mod type_info; diff --git a/dmc-lib/src/parser.rs b/dmc-lib/src/parser.rs new file mode 100644 index 0000000..3ef0041 --- /dev/null +++ b/dmc-lib/src/parser.rs @@ -0,0 +1,279 @@ +use crate::ast::call::Call; +use crate::ast::compilation_unit::CompilationUnit; +use crate::ast::expression::Expression; +use crate::ast::function::Function; +use crate::ast::identifier::Identifier; +use crate::ast::integer_literal::IntegerLiteral; +use crate::ast::let_statement::LetStatement; +use crate::ast::statement::Statement; +use crate::ast::string_literal::StringLiteral; +use crate::lexer::Lexer; +use crate::source_range::SourceRange; +use crate::token::{Token, TokenKind}; +use std::str::FromStr; + +pub fn parse_compilation_unit(input: &str) -> CompilationUnit { + let mut parser = Parser::new(input); + parser.compilation_unit() +} + +struct Parser<'a> { + input: &'a str, + lexer: Lexer<'a>, + current: Option, + lookahead: Option, +} + +impl<'a> Parser<'a> { + fn new(input: &'a str) -> Self { + Self { + input, + lexer: Lexer::new(input), + current: None, + lookahead: None, + } + } + + fn advance(&mut self) { + if self.lookahead.is_some() { + // we've advanced at least once + self.current = self.lookahead.take(); + self.lookahead = match self.lexer.next() { + None => None, + Some(result) => match result { + Ok(token) => Some(token), + Err(lexer_error) => { + panic!("{:?}", lexer_error); + } + }, + } + } else { + // we've not yet advanced, so fetch both + // current + match self.lexer.next() { + None => {} + Some(result) => match result { + Ok(token) => { + self.current = Some(token); + } + Err(lexer_error) => { + panic!("{:?}", lexer_error); + } + }, + } + // lookahead + match self.lexer.next() { + None => {} + Some(result) => match result { + Ok(token) => { + self.lookahead = Some(token); + } + Err(lexer_error) => { + panic!("{:?}", lexer_error); + } + }, + } + } + } + + fn expect_advance(&mut self, token_kind: TokenKind) -> Token { + match self.current.take() { + None => { + panic!("Expected {:?} but found end of input", token_kind); + } + Some(token) => { + if token.kind() == token_kind { + self.advance(); + token + } else { + panic!( + "Expected {:?} but found {:?} at {}", + token_kind, + token.kind(), + token.start() + ); + } + } + } + } + + fn peek_current(&self, token_kind: TokenKind) -> bool { + match &self.current { + None => false, + Some(token) => token.kind() == token_kind, + } + } + + fn get_current(&self) -> &Token { + match &self.current { + None => { + panic!("Unexpected end of input"); + } + Some(token) => token, + } + } + + fn sample_input(&self, start: usize, end: usize) -> &'a str { + &self.input[start..end] + } + + fn token_text(&self, token: &Token) -> &'a str { + self.sample_input(token.start(), token.end()) + } + + pub fn compilation_unit(&mut self) -> CompilationUnit { + let mut functions = vec![]; + self.advance(); + while self.current.is_some() { + functions.push(self.function()); + } + CompilationUnit::new(functions) + } + + fn function(&mut self) -> Function { + self.expect_advance(TokenKind::Fn); + let identifier_token = self.expect_advance(TokenKind::Identifier); + self.expect_advance(TokenKind::LeftParentheses); + // add params + self.expect_advance(TokenKind::RightParentheses); + let mut statements = vec![]; + while !self.peek_current(TokenKind::End) { + statements.push(self.statement()); + } + self.expect_advance(TokenKind::End); + Function::new( + self.token_text(&identifier_token), + SourceRange::new(identifier_token.start(), identifier_token.end()), + statements, + ) + } + + fn statement(&mut self) -> Statement { + let current = self.get_current(); + match current.kind() { + TokenKind::Let => self.let_statement(), + _ => self.expression_statement(), + } + } + + fn let_statement(&mut self) -> Statement { + self.expect_advance(TokenKind::Let); + let identifier = self.expect_advance(TokenKind::Identifier); + self.expect_advance(TokenKind::Equals); + let expression = self.expression(); + Statement::Let(LetStatement::new( + self.token_text(&identifier), + SourceRange::new(identifier.start(), identifier.end()), + expression, + )) + } + + fn expression_statement(&mut self) -> Statement { + Statement::Expression(self.expression()) + } + + fn expression(&mut self) -> Expression { + let current = self.get_current(); + let mut result = match current.kind() { + TokenKind::IntegerLiteral => { + let raw = self.token_text(current); + let source_range = SourceRange::new(current.start(), current.end()); + self.advance(); + Expression::IntegerLiteral(IntegerLiteral::new( + i64::from_str(raw).unwrap(), + source_range, + )) + } + TokenKind::String => { + let with_quotes = self.token_text(current); + let source_range = SourceRange::new(current.start(), current.end()); + self.advance(); + Expression::String(StringLiteral::new( + &with_quotes[1..with_quotes.len() - 1], + source_range, + )) + } + TokenKind::Identifier => { + let declared_name = self.token_text(current); + let source_range = SourceRange::new(current.start(), current.end()); + self.advance(); + Expression::Identifier(Identifier::new(declared_name, source_range)) + } + _ => panic!("Unexpected token {:?}", current.kind()), + }; + + // postfixes + while let Some(current) = &self.current { + match current.kind() { + TokenKind::LeftParentheses => { + result = Expression::Call(self.call(result)); + } + _ => break, + } + } + + result + } + + fn call(&mut self, callee: Expression) -> Call { + self.expect_advance(TokenKind::LeftParentheses); + let mut arguments = vec![]; + while !self.peek_current(TokenKind::RightParentheses) { + arguments.push(self.expression()); + } + let right_parentheses_token = self.expect_advance(TokenKind::RightParentheses); + let source_range = + SourceRange::new(callee.source_range().start(), right_parentheses_token.end()); + Call::new(callee, arguments, source_range) + } +} + +#[cfg(test)] +mod smoke_tests { + use super::*; + + #[test] + fn forty_two() { + parse_compilation_unit("fn main() 42 end"); + } + + #[test] + fn hello_world() { + let compilation_unit = parse_compilation_unit("fn main() println(\"Hello, World!\") end"); + let functions = compilation_unit.functions(); + assert_eq!(functions.len(), 1); + let function = functions[0]; + assert_eq!(function.declared_name(), "main"); + let statements = function.statements(); + assert_eq!(statements.len(), 1); + if let Statement::Expression(expression) = statements[0] { + if let Expression::Call(call) = expression { + let callee = call.callee(); + match callee { + Expression::Identifier(identifier) => { + assert_eq!(identifier.name(), "println"); + } + _ => panic!("Expected identifier"), + } + let arguments = call.arguments(); + assert_eq!(arguments.len(), 1); + let first_argument = arguments[0]; + match first_argument { + Expression::String(s) => { + assert_eq!(s.content(), "Hello, World!"); + } + _ => panic!("Expected string"), + } + } else { + panic!("Expected call"); + } + } else { + panic!("Expected expression"); + } + } + + #[test] + fn chained_calls() { + parse_compilation_unit("fn main() getCl()() end"); + } +} diff --git a/dmc-lib/src/scope.rs b/dmc-lib/src/scope.rs new file mode 100644 index 0000000..f0f219d --- /dev/null +++ b/dmc-lib/src/scope.rs @@ -0,0 +1,51 @@ +use crate::symbol::{FunctionSymbol, ParameterSymbol, VariableSymbol}; +use std::collections::HashMap; +use std::rc::Rc; + +pub struct Scope { + debug_name: String, + parent_id: Option, + function_symbols: HashMap, Rc>, + parameter_symbols: HashMap, Rc>, + variable_symbols: HashMap, Rc>, +} + +impl Scope { + pub fn new(debug_name: &str, parent_id: Option) -> Self { + Self { + debug_name: debug_name.into(), + parent_id, + function_symbols: HashMap::new(), + parameter_symbols: HashMap::new(), + variable_symbols: HashMap::new(), + } + } + + pub fn function_symbols(&self) -> &HashMap, Rc> { + &self.function_symbols + } + + pub fn function_symbols_mut(&mut self) -> &mut HashMap, Rc> { + &mut self.function_symbols + } + + pub fn parameter_symbols(&self) -> &HashMap, Rc> { + &self.parameter_symbols + } + + pub fn parameter_symbols_mut(&mut self) -> &mut HashMap, Rc> { + &mut self.parameter_symbols + } + + pub fn variable_symbols(&self) -> &HashMap, Rc> { + &self.variable_symbols + } + + pub fn variable_symbols_mut(&mut self) -> &mut HashMap, Rc> { + &mut self.variable_symbols + } + + pub fn parent_id(&self) -> Option { + self.parent_id + } +} diff --git a/dmc-lib/src/source_range.rs b/dmc-lib/src/source_range.rs new file mode 100644 index 0000000..3996719 --- /dev/null +++ b/dmc-lib/src/source_range.rs @@ -0,0 +1,18 @@ +pub struct SourceRange { + start: usize, + end: usize, +} + +impl SourceRange { + pub fn new(start: usize, end: usize) -> Self { + Self { start, end } + } + + pub fn start(&self) -> usize { + self.start + } + + pub fn end(&self) -> usize { + self.end + } +} diff --git a/dmc-lib/src/symbol.rs b/dmc-lib/src/symbol.rs new file mode 100644 index 0000000..1943dcb --- /dev/null +++ b/dmc-lib/src/symbol.rs @@ -0,0 +1,90 @@ +use crate::type_info::TypeInfo; +use std::rc::Rc; + +pub struct FunctionSymbol { + name: Rc, + parameters: Vec>, +} + +impl FunctionSymbol { + pub fn new(name: &str, parameters: &[Rc]) -> Self { + Self { + name: name.into(), + parameters: parameters.into(), + } + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn name_owned(&self) -> Rc { + self.name.clone() + } + + pub fn parameters(&self) -> &[Rc] { + &self.parameters + } + + pub fn return_type(&self) -> TypeInfo { + todo!() + } +} + +pub struct ParameterSymbol { + name: Rc, + type_info: TypeInfo, +} + +impl ParameterSymbol { + pub fn new(name: &str, type_info: TypeInfo) -> Self { + Self { + name: name.into(), + type_info, + } + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn name_owned(&self) -> Rc { + self.name.clone() + } + + pub fn type_info(&self) -> &TypeInfo { + &self.type_info + } +} + +pub struct VariableSymbol { + name: Rc, + type_info: TypeInfo, +} + +impl VariableSymbol { + pub fn new(name: &str, type_info: TypeInfo) -> Self { + Self { + name: name.into(), + type_info, + } + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn name_owned(&self) -> Rc { + self.name.clone() + } + + pub fn type_info(&self) -> &TypeInfo { + &self.type_info + } +} + +pub enum ExpressibleSymbol { + Function(Rc), + Parameter(Rc), + Variable(Rc), +} diff --git a/dmc-lib/src/symbol_table.rs b/dmc-lib/src/symbol_table.rs new file mode 100644 index 0000000..a50af89 --- /dev/null +++ b/dmc-lib/src/symbol_table.rs @@ -0,0 +1,146 @@ +use crate::scope::Scope; +use crate::symbol::{ExpressibleSymbol, FunctionSymbol, ParameterSymbol, VariableSymbol}; +use std::rc::Rc; + +pub struct SymbolTable { + scopes: Vec, + current_scope_id: Option, +} + +impl SymbolTable { + pub fn new() -> Self { + Self { + scopes: vec![], + current_scope_id: None, + } + } + + pub fn push_scope(&mut self, debug_name: &str) -> usize { + let scope_id = self.scopes.len(); + let parent_id = self.current_scope_id; + let scope = Scope::new(debug_name, parent_id); + self.scopes.push(scope); + self.current_scope_id = Some(scope_id); + scope_id + } + + pub fn pop_scope(&mut self) { + self.current_scope_id = self.current_scope().parent_id(); + } + + pub fn current_scope_id(&self) -> usize { + self.current_scope_id.unwrap() + } + + fn current_scope(&self) -> &Scope { + &self.scopes[self.current_scope_id.unwrap()] + } + + fn current_scope_mut(&mut self) -> &mut Scope { + &mut self.scopes[self.current_scope_id.unwrap()] + } + + fn current_scope_has_name(&self, name: &str) -> bool { + let current_scope = self.current_scope(); + current_scope.function_symbols().contains_key(name) + || current_scope.parameter_symbols().contains_key(name) + || current_scope.variable_symbols().contains_key(name) + } + + pub fn insert_function_symbol( + &mut self, + function_symbol: FunctionSymbol, + ) -> Result<(), SymbolInsertError> { + if self.current_scope_has_name(function_symbol.name()) { + return Err(SymbolInsertError::AlreadyDeclared(AlreadyDeclared::new( + function_symbol.name(), + ))); + } + self.current_scope_mut() + .function_symbols_mut() + .insert(function_symbol.name_owned(), Rc::new(function_symbol)); + Ok(()) + } + + pub fn insert_parameter_symbol( + &mut self, + parameter_symbol: ParameterSymbol, + ) -> Result<(), SymbolInsertError> { + if self.current_scope_has_name(parameter_symbol.name()) { + return Err(SymbolInsertError::AlreadyDeclared(AlreadyDeclared::new( + parameter_symbol.name(), + ))); + } + self.current_scope_mut() + .parameter_symbols_mut() + .insert(parameter_symbol.name_owned(), Rc::new(parameter_symbol)); + Ok(()) + } + + pub fn insert_variable_symbol( + &mut self, + variable_symbol: VariableSymbol, + ) -> Result<(), SymbolInsertError> { + if self.current_scope_has_name(variable_symbol.name()) { + return Err(SymbolInsertError::AlreadyDeclared(AlreadyDeclared::new( + variable_symbol.name(), + ))); + } + self.current_scope_mut() + .variable_symbols_mut() + .insert(variable_symbol.name_owned(), Rc::new(variable_symbol)); + Ok(()) + } + + pub fn find_expressible_symbol( + &self, + scope_id: usize, + name: &str, + ) -> Option { + let mut maybe_scope = self.scopes.get(scope_id); + if maybe_scope.is_none() { + panic!("Invalid scope_id: {}", scope_id); + } + while let Some(scope) = maybe_scope { + let maybe_expressible_symbol = scope + .variable_symbols() + .get(name) + .map(|variable_symbol| ExpressibleSymbol::Variable(variable_symbol.clone())) + .or_else(|| { + scope + .function_symbols() + .get(name) + .map(|function_symbol| ExpressibleSymbol::Function(function_symbol.clone())) + }) + .or_else(|| { + scope.parameter_symbols().get(name).map(|parameter_symbol| { + ExpressibleSymbol::Parameter(parameter_symbol.clone()) + }) + }); + if maybe_expressible_symbol.is_some() { + return maybe_expressible_symbol; + } else { + maybe_scope = scope.parent_id().map(|id| &self.scopes[id]); + } + } + None + } +} + +pub enum SymbolInsertError { + AlreadyDeclared(AlreadyDeclared), +} + +pub struct AlreadyDeclared { + name: String, +} + +impl AlreadyDeclared { + pub fn new(name: &str) -> Self { + Self { name: name.into() } + } + + pub fn name(&self) -> &str { + &self.name + } +} diff --git a/dmc-lib/src/token.rs b/dmc-lib/src/token.rs new file mode 100644 index 0000000..b8bd672 --- /dev/null +++ b/dmc-lib/src/token.rs @@ -0,0 +1,38 @@ +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct Token { + start: usize, + end: usize, + kind: TokenKind, +} + +impl Token { + pub fn new(start: usize, end: usize, kind: TokenKind) -> Self { + Self { start, end, kind } + } + + pub fn start(&self) -> usize { + self.start + } + + pub fn end(&self) -> usize { + self.end + } + + pub fn kind(&self) -> TokenKind { + self.kind + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum TokenKind { + Fn, + Identifier, + LeftParentheses, + RightParentheses, + End, + Let, + Equals, + IntegerLiteral, + LongLiteral, + String, +} diff --git a/dmc-lib/src/type_info.rs b/dmc-lib/src/type_info.rs new file mode 100644 index 0000000..3d161e0 --- /dev/null +++ b/dmc-lib/src/type_info.rs @@ -0,0 +1,9 @@ +use crate::symbol::FunctionSymbol; +use std::rc::Rc; + +#[derive(Clone)] +pub enum TypeInfo { + Integer, + String, + Function(Rc), +}