Implement lexing and parsing for classes.

This commit is contained in:
Jesse Brault 2026-03-10 21:20:21 -05:00
parent 9df9edc508
commit 80b6b96aeb
9 changed files with 356 additions and 20 deletions

27
dmc-lib/src/ast/class.rs Normal file
View File

@ -0,0 +1,27 @@
use crate::ast::field::Field;
use crate::ast::function::Function;
use crate::source_range::SourceRange;
use std::rc::Rc;
pub struct Class {
declared_name: Rc<str>,
declared_name_source_range: SourceRange,
fields: Vec<Field>,
functions: Vec<Function>,
}
impl Class {
pub fn new(
declared_name: &str,
declared_name_source_range: SourceRange,
fields: Vec<Field>,
functions: Vec<Function>,
) -> Self {
Class {
declared_name: declared_name.into(),
declared_name_source_range,
fields,
functions,
}
}
}

33
dmc-lib/src/ast/field.rs Normal file
View File

@ -0,0 +1,33 @@
use crate::ast::expression::Expression;
use crate::ast::type_use::TypeUse;
use crate::source_range::SourceRange;
use std::rc::Rc;
pub struct Field {
declared_name: Rc<str>,
declared_name_source_range: SourceRange,
is_public: bool,
is_mut: bool,
declared_type: Option<Box<TypeUse>>,
initializer: Option<Box<Expression>>,
}
impl Field {
pub fn new(
declared_name: &str,
declared_name_source_range: SourceRange,
is_public: bool,
is_mut: bool,
declared_type: Option<TypeUse>,
initializer: Option<Expression>,
) -> Self {
Self {
declared_name: declared_name.into(),
declared_name_source_range,
is_public,
is_mut,
declared_type: declared_type.map(Box::new),
initializer: initializer.map(Box::new),
}
}
}

View File

@ -16,6 +16,7 @@ use std::rc::Rc;
pub struct Function { pub struct Function {
declared_name: String, declared_name: String,
declared_name_source_range: SourceRange, declared_name_source_range: SourceRange,
is_public: bool,
parameters: Vec<Parameter>, parameters: Vec<Parameter>,
return_type: Option<TypeUse>, return_type: Option<TypeUse>,
statements: Vec<Statement>, statements: Vec<Statement>,
@ -26,6 +27,7 @@ impl Function {
pub fn new( pub fn new(
declared_name: &str, declared_name: &str,
declared_name_source_range: SourceRange, declared_name_source_range: SourceRange,
is_public: bool,
parameters: Vec<Parameter>, parameters: Vec<Parameter>,
return_type: Option<TypeUse>, return_type: Option<TypeUse>,
statements: Vec<Statement>, statements: Vec<Statement>,
@ -33,6 +35,7 @@ impl Function {
Self { Self {
declared_name: declared_name.to_string(), declared_name: declared_name.to_string(),
declared_name_source_range, declared_name_source_range,
is_public,
parameters, parameters,
return_type, return_type,
statements, statements,

View File

@ -1,10 +1,12 @@
pub mod add_expression; pub mod add_expression;
pub mod call; pub mod call;
pub mod class;
pub mod compilation_unit; pub mod compilation_unit;
pub mod double_literal; pub mod double_literal;
pub mod expression; pub mod expression;
pub mod expression_statement; pub mod expression_statement;
pub mod extern_function; pub mod extern_function;
pub mod field;
pub mod fqn; pub mod fqn;
pub mod function; pub mod function;
pub mod identifier; pub mod identifier;

View File

@ -1,3 +1,4 @@
use crate::ast::class::Class;
use crate::ast::extern_function::ExternFunction; use crate::ast::extern_function::ExternFunction;
use crate::ast::function::Function; use crate::ast::function::Function;
use crate::diagnostic::Diagnostic; use crate::diagnostic::Diagnostic;
@ -6,6 +7,7 @@ use crate::symbol_table::SymbolTable;
pub enum ModuleLevelDeclaration { pub enum ModuleLevelDeclaration {
Function(Function), Function(Function),
ExternFunction(ExternFunction), ExternFunction(ExternFunction),
Class(Class),
} }
impl ModuleLevelDeclaration { impl ModuleLevelDeclaration {
@ -20,6 +22,9 @@ impl ModuleLevelDeclaration {
ModuleLevelDeclaration::ExternFunction(extern_function) => { ModuleLevelDeclaration::ExternFunction(extern_function) => {
extern_function.gather_declared_names(symbol_table) extern_function.gather_declared_names(symbol_table)
} }
ModuleLevelDeclaration::Class(class) => {
todo!()
}
} }
} }
@ -29,6 +34,9 @@ impl ModuleLevelDeclaration {
ModuleLevelDeclaration::ExternFunction(extern_function) => { ModuleLevelDeclaration::ExternFunction(extern_function) => {
extern_function.check_name_usages(symbol_table) extern_function.check_name_usages(symbol_table)
} }
ModuleLevelDeclaration::Class(class) => {
todo!()
}
} }
} }
@ -38,6 +46,9 @@ impl ModuleLevelDeclaration {
ModuleLevelDeclaration::ExternFunction(extern_function) => { ModuleLevelDeclaration::ExternFunction(extern_function) => {
extern_function.type_check(symbol_table) extern_function.type_check(symbol_table)
} }
ModuleLevelDeclaration::Class(class) => {
todo!()
}
} }
} }
} }

View File

@ -3,6 +3,8 @@ pub struct Diagnostic {
message: String, message: String,
start: usize, start: usize,
end: usize, end: usize,
reporter_file: Option<&'static str>,
reporter_line: Option<u32>,
} }
impl Diagnostic { impl Diagnostic {
@ -11,6 +13,8 @@ impl Diagnostic {
message: message.into(), message: message.into(),
start, start,
end, end,
reporter_line: None,
reporter_file: None,
} }
} }
@ -25,4 +29,14 @@ impl Diagnostic {
pub fn end(&self) -> usize { pub fn end(&self) -> usize {
self.end self.end
} }
pub fn with_reporter(&self, file: &'static str, line: u32) -> Self {
Self {
message: self.message.clone(),
start: self.start,
end: self.end,
reporter_file: Some(file),
reporter_line: Some(line),
}
}
} }

View File

@ -54,6 +54,8 @@ impl<'a> Lexer<'a> {
Token::new(self.position, self.position + 1, TokenKind::Comma) Token::new(self.position, self.position + 1, TokenKind::Comma)
} else if chunk.starts_with(":") { } else if chunk.starts_with(":") {
Token::new(self.position, self.position + 1, TokenKind::Colon) Token::new(self.position, self.position + 1, TokenKind::Colon)
} else if chunk.starts_with(".") {
Token::new(self.position, self.position + 1, TokenKind::Dot)
} else { } else {
// more than one char token // more than one char token
if chunk.starts_with(|c: char| c.is_ascii_digit()) { if chunk.starts_with(|c: char| c.is_ascii_digit()) {
@ -118,11 +120,22 @@ impl<'a> Lexer<'a> {
break; break;
} }
} }
if prefix.len() == 0 {
return Some(Err(LexerError::new(LexerErrorKind::UnrecognizedCharacter(
chunk.chars().next().unwrap(),
))));
}
let token_kind = match prefix.as_str() { let token_kind = match prefix.as_str() {
"fn" => TokenKind::Fn, "fn" => TokenKind::Fn,
"end" => TokenKind::End, "end" => TokenKind::End,
"let" => TokenKind::Let, "let" => TokenKind::Let,
"extern" => TokenKind::Extern, "extern" => TokenKind::Extern,
"class" => TokenKind::Class,
"self" => TokenKind::SelfKw,
"pub" => TokenKind::Public,
"mut" => TokenKind::Mut,
_ => TokenKind::Identifier, _ => TokenKind::Identifier,
}; };
Token::new(self.position, self.position + prefix.len(), token_kind) Token::new(self.position, self.position + prefix.len(), token_kind)
@ -151,6 +164,7 @@ impl LexerError {
#[derive(Debug, Clone, Copy, Eq, PartialEq)] #[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum LexerErrorKind { pub enum LexerErrorKind {
UnterminatedString, UnterminatedString,
UnrecognizedCharacter(char),
} }
#[cfg(test)] #[cfg(test)]
@ -239,4 +253,31 @@ mod tests {
assert_next(&mut lexer, TokenKind::RightParentheses, 1); assert_next(&mut lexer, TokenKind::RightParentheses, 1);
assert!(lexer.next().is_none()); assert!(lexer.next().is_none());
} }
#[test]
fn class_with_fields_smoke_test() {
let mut lexer = Lexer::new(
"
class Foo
pub bar: Int
mut baz: String
car = 42
fn new(bar: Int, baz: String)
self.bar = bar
self.baz = baz
end
end
",
);
while let Some(result) = lexer.next() {
match result {
Ok(_) => {}
Err(lexer_error) => {
panic!("{:?}", lexer_error);
}
}
}
assert!(lexer.next().is_none());
}
} }

View File

@ -1,10 +1,12 @@
use crate::ast::add_expression::AddExpression; use crate::ast::add_expression::AddExpression;
use crate::ast::call::Call; use crate::ast::call::Call;
use crate::ast::class::Class;
use crate::ast::compilation_unit::CompilationUnit; use crate::ast::compilation_unit::CompilationUnit;
use crate::ast::double_literal::DoubleLiteral; use crate::ast::double_literal::DoubleLiteral;
use crate::ast::expression::Expression; use crate::ast::expression::Expression;
use crate::ast::expression_statement::ExpressionStatement; use crate::ast::expression_statement::ExpressionStatement;
use crate::ast::extern_function::ExternFunction; use crate::ast::extern_function::ExternFunction;
use crate::ast::field::Field;
use crate::ast::function::Function; use crate::ast::function::Function;
use crate::ast::identifier::Identifier; use crate::ast::identifier::Identifier;
use crate::ast::integer_literal::IntegerLiteral; use crate::ast::integer_literal::IntegerLiteral;
@ -33,6 +35,21 @@ pub fn parse_expression(input: &str) -> Result<Expression, Vec<Diagnostic>> {
parser.expression() parser.expression()
} }
macro_rules! matches_expression_first {
( $token_kind : expr ) => {
matches!(
$token_kind,
TokenKind::IntegerLiteral
| TokenKind::DoubleLiteral
| TokenKind::LongLiteral
| TokenKind::String
| TokenKind::Minus
| TokenKind::SelfKw
| TokenKind::Identifier
)
};
}
struct Parser<'a> { struct Parser<'a> {
input: &'a str, input: &'a str,
lexer: Lexer<'a>, lexer: Lexer<'a>,
@ -113,22 +130,28 @@ impl<'a> Parser<'a> {
fn expect_advance(&mut self, token_kind: TokenKind) -> Result<Token, Vec<Diagnostic>> { fn expect_advance(&mut self, token_kind: TokenKind) -> Result<Token, Vec<Diagnostic>> {
match self.current.take() { match self.current.take() {
None => Err(vec![Diagnostic::new( None => Err(vec![
Diagnostic::new(
&format!("Expected {:?} but found end-of-input.", token_kind), &format!("Expected {:?} but found end-of-input.", token_kind),
self.input.len(), self.input.len(),
self.input.len(), self.input.len(),
)]), )
.with_reporter(file!(), line!()),
]),
Some(token) => { Some(token) => {
if token.kind() == token_kind { if token.kind() == token_kind {
self.advance(); self.advance();
Ok(token) Ok(token)
} else { } else {
self.advance_until(&[token_kind]); self.advance_until(&[token_kind]);
Err(vec![Diagnostic::new( Err(vec![
Diagnostic::new(
&format!("Expected {:?} but found {:?}", token_kind, token.kind()), &format!("Expected {:?} but found {:?}", token_kind, token.kind()),
token.start(), token.start(),
token.end(), token.end(),
)]) )
.with_reporter(file!(), line!()),
])
} }
} }
} }
@ -172,7 +195,7 @@ impl<'a> Parser<'a> {
while self.current.is_some() { while self.current.is_some() {
let current = self.get_current(); let current = self.get_current();
match current.kind() { match current.kind() {
TokenKind::Fn | TokenKind::Extern => { TokenKind::Fn | TokenKind::Extern | TokenKind::Class => {
let declaration_result = self.module_level_declaration(); let declaration_result = self.module_level_declaration();
match declaration_result { match declaration_result {
Ok(declaration) => declarations.push(declaration), Ok(declaration) => declarations.push(declaration),
@ -184,8 +207,8 @@ impl<'a> Parser<'a> {
_ => { _ => {
diagnostics.push(Diagnostic::new( diagnostics.push(Diagnostic::new(
&format!( &format!(
"Expected any of: {:?}; found {:?}", "Expected any of {:?}; found {:?}",
[TokenKind::Fn, TokenKind::Extern], [TokenKind::Fn, TokenKind::Extern, TokenKind::Class],
current.kind() current.kind()
), ),
current.start(), current.start(),
@ -221,11 +244,22 @@ impl<'a> Parser<'a> {
Err(extern_function_diagnostics) => Err(extern_function_diagnostics), Err(extern_function_diagnostics) => Err(extern_function_diagnostics),
} }
} }
TokenKind::Class => match self.class() {
Ok(class) => Ok(ModuleLevelDeclaration::Class(class)),
Err(class_diagnostics) => Err(class_diagnostics),
},
_ => unreachable!(), _ => unreachable!(),
} }
} }
fn function(&mut self) -> Result<Function, Vec<Diagnostic>> { fn function(&mut self) -> Result<Function, Vec<Diagnostic>> {
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
self.advance(); // pub
true
} else {
false
};
self.expect_advance(TokenKind::Fn)?; self.expect_advance(TokenKind::Fn)?;
let identifier_token = self.expect_advance(TokenKind::Identifier)?; let identifier_token = self.expect_advance(TokenKind::Identifier)?;
@ -273,6 +307,7 @@ impl<'a> Parser<'a> {
Ok(Function::new( Ok(Function::new(
self.token_text(&identifier_token), self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()), SourceRange::new(identifier_token.start(), identifier_token.end()),
is_public,
parameters, parameters,
return_type, return_type,
statements, statements,
@ -323,6 +358,46 @@ impl<'a> Parser<'a> {
} }
} }
fn class(&mut self) -> Result<Class, Vec<Diagnostic>> {
self.expect_advance(TokenKind::Class)?;
let identifier_token = self.expect_advance(TokenKind::Identifier)?;
let mut fields = vec![];
let mut functions = vec![];
let mut diagnostics = vec![];
while self.current.is_some() && !self.peek_current(TokenKind::End) {
match self.get_current().kind() {
TokenKind::Public => match self.public_class_member(&mut fields, &mut functions) {
Ok(_) => {}
Err(mut member_diagnostics) => diagnostics.append(&mut member_diagnostics),
},
TokenKind::Mut | TokenKind::Identifier => match self.field() {
Ok(field) => fields.push(field),
Err(mut field_diagnostics) => diagnostics.append(&mut field_diagnostics),
},
TokenKind::Fn => match self.function() {
Ok(function) => functions.push(function),
Err(mut function_diagnostics) => diagnostics.append(&mut function_diagnostics),
},
_ => unreachable!(),
}
}
self.expect_advance(TokenKind::End)?;
if diagnostics.is_empty() {
Ok(Class::new(
self.token_text(&identifier_token),
SourceRange::new(identifier_token.start(), identifier_token.end()),
fields,
functions,
))
} else {
Err(diagnostics)
}
}
fn parameter_list(&mut self) -> Result<Vec<Parameter>, Vec<Diagnostic>> { fn parameter_list(&mut self) -> Result<Vec<Parameter>, Vec<Diagnostic>> {
let mut parameters = vec![]; let mut parameters = vec![];
let mut diagnostics = vec![]; let mut diagnostics = vec![];
@ -371,6 +446,86 @@ impl<'a> Parser<'a> {
)) ))
} }
fn public_class_member(
&mut self,
fields: &mut Vec<Field>,
functions: &mut Vec<Function>,
) -> Result<(), Vec<Diagnostic>> {
if self.lookahead.is_some() {
if matches!(
self.lookahead.as_ref().unwrap().kind(),
TokenKind::Mut | TokenKind::Identifier
) {
fields.push(self.field()?);
} else if matches!(self.lookahead.as_ref().unwrap().kind(), TokenKind::Fn) {
functions.push(self.function()?);
} else {
let lookahead = self.lookahead.as_ref().unwrap();
return Err(vec![Diagnostic::new(
&format!(
"Expected any of {:?}; found {:?}",
[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn],
lookahead.kind()
),
lookahead.start(),
lookahead.end(),
)]);
}
Ok(())
} else {
let current = self.current.as_ref().unwrap();
Err(vec![Diagnostic::new(
&format!(
"Expected any of {:?}; found end-of-input.",
[TokenKind::Mut, TokenKind::Identifier, TokenKind::Fn]
),
current.end(),
current.end(),
)])
}
}
fn field(&mut self) -> Result<Field, Vec<Diagnostic>> {
let is_public = if self.current.is_some() && self.peek_current(TokenKind::Public) {
self.advance();
true
} else {
false
};
let is_mut = if self.current.is_some() && self.peek_current(TokenKind::Mut) {
self.advance();
true
} else {
false
};
let identifier = self.expect_advance(TokenKind::Identifier)?;
let declared_type = if self.current.is_some() && self.peek_current(TokenKind::Colon) {
self.advance(); // colon
Some(self.type_use()?)
} else {
None
};
let initializer = if self.current.is_some() && self.peek_current(TokenKind::Equals) {
self.advance(); // equals
Some(self.expression()?)
} else {
None
};
Ok(Field::new(
self.token_text(&identifier),
SourceRange::new(identifier.start(), identifier.end()),
is_public,
is_mut,
declared_type,
initializer,
))
}
fn statement(&mut self) -> Result<Statement, Vec<Diagnostic>> { fn statement(&mut self) -> Result<Statement, Vec<Diagnostic>> {
let current = self.get_current(); let current = self.get_current();
match current.kind() { match current.kind() {
@ -517,13 +672,7 @@ impl<'a> Parser<'a> {
self.expect_advance(TokenKind::LeftParentheses)?; self.expect_advance(TokenKind::LeftParentheses)?;
let mut arguments = vec![]; let mut arguments = vec![];
if let Some(current) = &self.current { if let Some(current) = &self.current {
if matches!( if matches_expression_first!(current.kind()) {
current.kind(),
TokenKind::IntegerLiteral
| TokenKind::DoubleLiteral
| TokenKind::String
| TokenKind::Identifier
) {
arguments.append(&mut self.expression_list()?); arguments.append(&mut self.expression_list()?);
} }
} }
@ -628,6 +777,57 @@ mod smoke_tests {
fn minus_negative_number() { fn minus_negative_number() {
smoke_test("fn main() -> Int 1 - -1 end"); smoke_test("fn main() -> Int 1 - -1 end");
} }
#[test]
fn empty_class() {
smoke_test("class Foo end");
}
#[test]
fn class_with_pub_member() {
smoke_test("class Foo pub bar end");
}
#[test]
fn class_with_mut_member() {
smoke_test("class Foo mut bar end");
}
#[test]
fn class_with_nothing_member() {
smoke_test("class Foo bar end");
}
#[test]
fn class_with_member_type_use() {
smoke_test("class Foo bar: Int end");
}
#[test]
fn class_with_member_init() {
smoke_test("class Foo bar = 42 end");
}
#[test]
fn class_with_member_type_use_and_init() {
smoke_test("class Foo bar: Int = 42 end");
}
#[test]
fn class_with_member_all() {
smoke_test("class Foo pub mut bar: Bar = Baz() end");
}
#[test]
fn class_with_pub_fn() {
smoke_test(
"
class Greeter
pub fn greet() end
end
",
);
}
} }
#[cfg(test)] #[cfg(test)]

View File

@ -42,4 +42,9 @@ pub enum TokenKind {
RightArrow, RightArrow,
Plus, Plus,
Minus, Minus,
Class,
Dot,
SelfKw,
Public,
Mut,
} }