From 80b6b96aeb165b504755508266061e708c158be8 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Tue, 10 Mar 2026 21:20:21 -0500 Subject: [PATCH] Implement lexing and parsing for classes. --- dmc-lib/src/ast/class.rs | 27 +++ dmc-lib/src/ast/field.rs | 33 +++ dmc-lib/src/ast/function.rs | 3 + dmc-lib/src/ast/mod.rs | 2 + dmc-lib/src/ast/module_level_declaration.rs | 11 + dmc-lib/src/diagnostic.rs | 14 ++ dmc-lib/src/lexer.rs | 41 ++++ dmc-lib/src/parser.rs | 240 ++++++++++++++++++-- dmc-lib/src/token.rs | 5 + 9 files changed, 356 insertions(+), 20 deletions(-) create mode 100644 dmc-lib/src/ast/class.rs create mode 100644 dmc-lib/src/ast/field.rs diff --git a/dmc-lib/src/ast/class.rs b/dmc-lib/src/ast/class.rs new file mode 100644 index 0000000..bd8e7fa --- /dev/null +++ b/dmc-lib/src/ast/class.rs @@ -0,0 +1,27 @@ +use crate::ast::field::Field; +use crate::ast::function::Function; +use crate::source_range::SourceRange; +use std::rc::Rc; + +pub struct Class { + declared_name: Rc, + declared_name_source_range: SourceRange, + fields: Vec, + functions: Vec, +} + +impl Class { + pub fn new( + declared_name: &str, + declared_name_source_range: SourceRange, + fields: Vec, + functions: Vec, + ) -> Self { + Class { + declared_name: declared_name.into(), + declared_name_source_range, + fields, + functions, + } + } +} diff --git a/dmc-lib/src/ast/field.rs b/dmc-lib/src/ast/field.rs new file mode 100644 index 0000000..4aa8c76 --- /dev/null +++ b/dmc-lib/src/ast/field.rs @@ -0,0 +1,33 @@ +use crate::ast::expression::Expression; +use crate::ast::type_use::TypeUse; +use crate::source_range::SourceRange; +use std::rc::Rc; + +pub struct Field { + declared_name: Rc, + declared_name_source_range: SourceRange, + is_public: bool, + is_mut: bool, + declared_type: Option>, + initializer: Option>, +} + +impl Field { + pub fn new( + declared_name: &str, + declared_name_source_range: SourceRange, + is_public: bool, + is_mut: bool, + declared_type: Option, + initializer: Option, + ) -> Self { + Self { + declared_name: declared_name.into(), + declared_name_source_range, + is_public, + is_mut, + declared_type: declared_type.map(Box::new), + initializer: initializer.map(Box::new), + } + } +} diff --git a/dmc-lib/src/ast/function.rs b/dmc-lib/src/ast/function.rs index efeead8..804de82 100644 --- a/dmc-lib/src/ast/function.rs +++ b/dmc-lib/src/ast/function.rs @@ -16,6 +16,7 @@ use std::rc::Rc; pub struct Function { declared_name: String, declared_name_source_range: SourceRange, + is_public: bool, parameters: Vec, return_type: Option, statements: Vec, @@ -26,6 +27,7 @@ impl Function { pub fn new( declared_name: &str, declared_name_source_range: SourceRange, + is_public: bool, parameters: Vec, return_type: Option, statements: Vec, @@ -33,6 +35,7 @@ impl Function { Self { declared_name: declared_name.to_string(), declared_name_source_range, + is_public, parameters, return_type, statements, diff --git a/dmc-lib/src/ast/mod.rs b/dmc-lib/src/ast/mod.rs index a918efb..b71053e 100644 --- a/dmc-lib/src/ast/mod.rs +++ b/dmc-lib/src/ast/mod.rs @@ -1,10 +1,12 @@ pub mod add_expression; pub mod call; +pub mod class; pub mod compilation_unit; pub mod double_literal; pub mod expression; pub mod expression_statement; pub mod extern_function; +pub mod field; pub mod fqn; pub mod function; pub mod identifier; diff --git a/dmc-lib/src/ast/module_level_declaration.rs b/dmc-lib/src/ast/module_level_declaration.rs index 9b778c0..c9a24a4 100644 --- a/dmc-lib/src/ast/module_level_declaration.rs +++ b/dmc-lib/src/ast/module_level_declaration.rs @@ -1,3 +1,4 @@ +use crate::ast::class::Class; use crate::ast::extern_function::ExternFunction; use crate::ast::function::Function; use crate::diagnostic::Diagnostic; @@ -6,6 +7,7 @@ use crate::symbol_table::SymbolTable; pub enum ModuleLevelDeclaration { Function(Function), ExternFunction(ExternFunction), + Class(Class), } impl ModuleLevelDeclaration { @@ -20,6 +22,9 @@ impl ModuleLevelDeclaration { ModuleLevelDeclaration::ExternFunction(extern_function) => { extern_function.gather_declared_names(symbol_table) } + ModuleLevelDeclaration::Class(class) => { + todo!() + } } } @@ -29,6 +34,9 @@ impl ModuleLevelDeclaration { ModuleLevelDeclaration::ExternFunction(extern_function) => { extern_function.check_name_usages(symbol_table) } + ModuleLevelDeclaration::Class(class) => { + todo!() + } } } @@ -38,6 +46,9 @@ impl ModuleLevelDeclaration { ModuleLevelDeclaration::ExternFunction(extern_function) => { extern_function.type_check(symbol_table) } + ModuleLevelDeclaration::Class(class) => { + todo!() + } } } } diff --git a/dmc-lib/src/diagnostic.rs b/dmc-lib/src/diagnostic.rs index 1a6cbb1..06ee3f8 100644 --- a/dmc-lib/src/diagnostic.rs +++ b/dmc-lib/src/diagnostic.rs @@ -3,6 +3,8 @@ pub struct Diagnostic { message: String, start: usize, end: usize, + reporter_file: Option<&'static str>, + reporter_line: Option, } impl Diagnostic { @@ -11,6 +13,8 @@ impl Diagnostic { message: message.into(), start, end, + reporter_line: None, + reporter_file: None, } } @@ -25,4 +29,14 @@ impl Diagnostic { pub fn end(&self) -> usize { self.end } + + pub fn with_reporter(&self, file: &'static str, line: u32) -> Self { + Self { + message: self.message.clone(), + start: self.start, + end: self.end, + reporter_file: Some(file), + reporter_line: Some(line), + } + } } diff --git a/dmc-lib/src/lexer.rs b/dmc-lib/src/lexer.rs index 6a475d3..462610a 100644 --- a/dmc-lib/src/lexer.rs +++ b/dmc-lib/src/lexer.rs @@ -54,6 +54,8 @@ impl<'a> Lexer<'a> { Token::new(self.position, self.position + 1, TokenKind::Comma) } else if chunk.starts_with(":") { Token::new(self.position, self.position + 1, TokenKind::Colon) + } else if chunk.starts_with(".") { + Token::new(self.position, self.position + 1, TokenKind::Dot) } else { // more than one char token if chunk.starts_with(|c: char| c.is_ascii_digit()) { @@ -118,11 +120,22 @@ impl<'a> Lexer<'a> { break; } } + + if prefix.len() == 0 { + return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter( + chunk.chars().next().unwrap(), + )))); + } + let token_kind = match prefix.as_str() { "fn" => TokenKind::Fn, "end" => TokenKind::End, "let" => TokenKind::Let, "extern" => TokenKind::Extern, + "class" => TokenKind::Class, + "self" => TokenKind::SelfKw, + "pub" => TokenKind::Public, + "mut" => TokenKind::Mut, _ => TokenKind::Identifier, }; Token::new(self.position, self.position + prefix.len(), token_kind) @@ -151,6 +164,7 @@ impl LexerError { #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum LexerErrorKind { UnterminatedString, + UnrecognizedCharacter(char), } #[cfg(test)] @@ -239,4 +253,31 @@ mod tests { assert_next(&mut lexer, TokenKind::RightParentheses, 1); assert!(lexer.next().is_none()); } + + #[test] + fn class_with_fields_smoke_test() { + let mut lexer = Lexer::new( + " + class Foo + pub bar: Int + mut baz: String + car = 42 + + fn new(bar: Int, baz: String) + self.bar = bar + self.baz = baz + end + end + ", + ); + while let Some(result) = lexer.next() { + match result { + Ok(_) => {} + Err(lexer_error) => { + panic!("{:?}", lexer_error); + } + } + } + assert!(lexer.next().is_none()); + } } diff --git a/dmc-lib/src/parser.rs b/dmc-lib/src/parser.rs index 96efc41..654ef6e 100644 --- a/dmc-lib/src/parser.rs +++ b/dmc-lib/src/parser.rs @@ -1,10 +1,12 @@ use crate::ast::add_expression::AddExpression; use crate::ast::call::Call; +use crate::ast::class::Class; use crate::ast::compilation_unit::CompilationUnit; use crate::ast::double_literal::DoubleLiteral; use crate::ast::expression::Expression; use crate::ast::expression_statement::ExpressionStatement; use crate::ast::extern_function::ExternFunction; +use crate::ast::field::Field; use crate::ast::function::Function; use crate::ast::identifier::Identifier; use crate::ast::integer_literal::IntegerLiteral; @@ -33,6 +35,21 @@ pub fn parse_expression(input: &str) -> Result> { parser.expression() } +macro_rules! matches_expression_first { + ( $token_kind : expr ) => { + matches!( + $token_kind, + TokenKind::IntegerLiteral + | TokenKind::DoubleLiteral + | TokenKind::LongLiteral + | TokenKind::String + | TokenKind::Minus + | TokenKind::SelfKw + | TokenKind::Identifier + ) + }; +} + struct Parser<'a> { input: &'a str, lexer: Lexer<'a>, @@ -113,22 +130,28 @@ impl<'a> Parser<'a> { fn expect_advance(&mut self, token_kind: TokenKind) -> Result> { match self.current.take() { - None => Err(vec![Diagnostic::new( - &format!("Expected {:?} but found end-of-input.", token_kind), - self.input.len(), - self.input.len(), - )]), + None => Err(vec![ + Diagnostic::new( + &format!("Expected {:?} but found end-of-input.", token_kind), + self.input.len(), + self.input.len(), + ) + .with_reporter(file!(), line!()), + ]), Some(token) => { if token.kind() == token_kind { self.advance(); Ok(token) } else { self.advance_until(&[token_kind]); - Err(vec![Diagnostic::new( - &format!("Expected {:?} but found {:?}", token_kind, token.kind()), - token.start(), - token.end(), - )]) + Err(vec![ + Diagnostic::new( + &format!("Expected {:?} but found {:?}", token_kind, token.kind()), + token.start(), + token.end(), + ) + .with_reporter(file!(), line!()), + ]) } } } @@ -172,7 +195,7 @@ impl<'a> Parser<'a> { while self.current.is_some() { let current = self.get_current(); match current.kind() { - TokenKind::Fn | TokenKind::Extern => { + TokenKind::Fn | TokenKind::Extern | TokenKind::Class => { let declaration_result = self.module_level_declaration(); match declaration_result { Ok(declaration) => declarations.push(declaration), @@ -184,8 +207,8 @@ impl<'a> Parser<'a> { _ => { diagnostics.push(Diagnostic::new( &format!( - "Expected any of: {:?}; found {:?}", - [TokenKind::Fn, TokenKind::Extern], + "Expected any of {:?}; found {:?}", + [TokenKind::Fn, TokenKind::Extern, TokenKind::Class], current.kind() ), current.start(), @@ -221,11 +244,22 @@ impl<'a> Parser<'a> { Err(extern_function_diagnostics) => Err(extern_function_diagnostics), } } + TokenKind::Class => match self.class() { + Ok(class) => Ok(ModuleLevelDeclaration::Class(class)), + Err(class_diagnostics) => Err(class_diagnostics), + }, _ => unreachable!(), } } fn function(&mut self) -> Result> { + let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) { + self.advance(); // pub + true + } else { + false + }; + self.expect_advance(TokenKind::Fn)?; let identifier_token = self.expect_advance(TokenKind::Identifier)?; @@ -273,6 +307,7 @@ impl<'a> Parser<'a> { Ok(Function::new( self.token_text(&identifier_token), SourceRange::new(identifier_token.start(), identifier_token.end()), + is_public, parameters, return_type, statements, @@ -323,6 +358,46 @@ impl<'a> Parser<'a> { } } + fn class(&mut self) -> Result> { + self.expect_advance(TokenKind::Class)?; + let identifier_token = self.expect_advance(TokenKind::Identifier)?; + let mut fields = vec![]; + let mut functions = vec![]; + + let mut diagnostics = vec![]; + + while self.current.is_some() && !self.peek_current(TokenKind::End) { + match self.get_current().kind() { + TokenKind::Public => match self.public_class_member(&mut fields, &mut functions) { + Ok(_) => {} + Err(mut member_diagnostics) => diagnostics.append(&mut member_diagnostics), + }, + TokenKind::Mut | TokenKind::Identifier => match self.field() { + Ok(field) => fields.push(field), + Err(mut field_diagnostics) => diagnostics.append(&mut field_diagnostics), + }, + TokenKind::Fn => match self.function() { + Ok(function) => functions.push(function), + Err(mut function_diagnostics) => diagnostics.append(&mut function_diagnostics), + }, + _ => unreachable!(), + } + } + + self.expect_advance(TokenKind::End)?; + + if diagnostics.is_empty() { + Ok(Class::new( + self.token_text(&identifier_token), + SourceRange::new(identifier_token.start(), identifier_token.end()), + fields, + functions, + )) + } else { + Err(diagnostics) + } + } + fn parameter_list(&mut self) -> Result, Vec> { let mut parameters = vec![]; let mut diagnostics = vec![]; @@ -371,6 +446,86 @@ impl<'a> Parser<'a> { )) } + fn public_class_member( + &mut self, + fields: &mut Vec, + functions: &mut Vec, + ) -> Result<(), Vec> { + if self.lookahead.is_some() { + if matches!( + self.lookahead.as_ref().unwrap().kind(), + TokenKind::Mut | TokenKind::Identifier + ) { + fields.push(self.field()?); + } else if matches!(self.lookahead.as_ref().unwrap().kind(), TokenKind::Fn) { + functions.push(self.function()?); + } else { + let lookahead = self.lookahead.as_ref().unwrap(); + return Err(vec![Diagnostic::new( + &format!( + "Expected any of {:?}; found {:?}", + [TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn], + lookahead.kind() + ), + lookahead.start(), + lookahead.end(), + )]); + } + Ok(()) + } else { + let current = self.current.as_ref().unwrap(); + Err(vec![Diagnostic::new( + &format!( + "Expected any of {:?}; found end-of-input.", + [TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn] + ), + current.end(), + current.end(), + )]) + } + } + + fn field(&mut self) -> Result> { + let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) { + self.advance(); + true + } else { + false + }; + + let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) { + self.advance(); + true + } else { + false + }; + + let identifier = self.expect_advance(TokenKind::Identifier)?; + + let declared_type = if self.current.is_some() && self.peek_current(TokenKind::Colon) { + self.advance(); // colon + Some(self.type_use()?) + } else { + None + }; + + let initializer = if self.current.is_some() && self.peek_current(TokenKind::Equals) { + self.advance(); // equals + Some(self.expression()?) + } else { + None + }; + + Ok(Field::new( + self.token_text(&identifier), + SourceRange::new(identifier.start(), identifier.end()), + is_public, + is_mut, + declared_type, + initializer, + )) + } + fn statement(&mut self) -> Result> { let current = self.get_current(); match current.kind() { @@ -517,13 +672,7 @@ impl<'a> Parser<'a> { self.expect_advance(TokenKind::LeftParentheses)?; let mut arguments = vec![]; if let Some(current) = &self.current { - if matches!( - current.kind(), - TokenKind::IntegerLiteral - | TokenKind::DoubleLiteral - | TokenKind::String - | TokenKind::Identifier - ) { + if matches_expression_first!(current.kind()) { arguments.append(&mut self.expression_list()?); } } @@ -628,6 +777,57 @@ mod smoke_tests { fn minus_negative_number() { smoke_test("fn main() -> Int 1 - -1 end"); } + + #[test] + fn empty_class() { + smoke_test("class Foo end"); + } + + #[test] + fn class_with_pub_member() { + smoke_test("class Foo pub bar end"); + } + + #[test] + fn class_with_mut_member() { + smoke_test("class Foo mut bar end"); + } + + #[test] + fn class_with_nothing_member() { + smoke_test("class Foo bar end"); + } + + #[test] + fn class_with_member_type_use() { + smoke_test("class Foo bar: Int end"); + } + + #[test] + fn class_with_member_init() { + smoke_test("class Foo bar = 42 end"); + } + + #[test] + fn class_with_member_type_use_and_init() { + smoke_test("class Foo bar: Int = 42 end"); + } + + #[test] + fn class_with_member_all() { + smoke_test("class Foo pub mut bar: Bar = Baz() end"); + } + + #[test] + fn class_with_pub_fn() { + smoke_test( + " + class Greeter + pub fn greet() end + end + ", + ); + } } #[cfg(test)] diff --git a/dmc-lib/src/token.rs b/dmc-lib/src/token.rs index 7e79457..515d70a 100644 --- a/dmc-lib/src/token.rs +++ b/dmc-lib/src/token.rs @@ -42,4 +42,9 @@ pub enum TokenKind { RightArrow, Plus, Minus, + Class, + Dot, + SelfKw, + Public, + Mut, }