From aefac57b9d76127bf7869d7fceb5e0925b18b238 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Fri, 27 Feb 2026 16:26:37 -0600 Subject: [PATCH] Starting work on IR. --- dmc-lib/src/ast/call.rs | 16 +++++++ dmc-lib/src/ast/compilation_unit.rs | 9 ++++ dmc-lib/src/ast/expression.rs | 11 +++++ dmc-lib/src/ast/expression_statement.rs | 38 +++++++++++++++ dmc-lib/src/ast/fqn.rs | 15 ++++++ dmc-lib/src/ast/function.rs | 64 +++++++++++++++++++++++++ dmc-lib/src/ast/identifier.rs | 7 +++ dmc-lib/src/ast/integer_literal.rs | 6 +++ dmc-lib/src/ast/let_statement.rs | 11 +++++ dmc-lib/src/ast/mod.rs | 20 ++++++++ dmc-lib/src/ast/statement.rs | 28 +++++++++-- dmc-lib/src/ast/string_literal.rs | 13 +++++ dmc-lib/src/ir/ir_assign.rs | 17 +++++++ dmc-lib/src/ir/ir_call.rs | 16 +++++++ dmc-lib/src/ir/ir_constant.rs | 21 ++++++++ dmc-lib/src/ir/ir_expression.rs | 11 +++++ dmc-lib/src/ir/ir_function.rs | 12 +++++ dmc-lib/src/ir/ir_l_value.rs | 6 +++ dmc-lib/src/ir/ir_r_value.rs | 8 ++++ dmc-lib/src/ir/ir_statement.rs | 8 ++++ dmc-lib/src/ir/ir_variable.rs | 10 ++++ dmc-lib/src/ir/mod.rs | 18 +++++++ dmc-lib/src/lib.rs | 1 + dmc-lib/src/parser.rs | 7 +-- 24 files changed, 365 insertions(+), 8 deletions(-) create mode 100644 dmc-lib/src/ast/expression_statement.rs create mode 100644 dmc-lib/src/ast/fqn.rs create mode 100644 dmc-lib/src/ir/ir_assign.rs create mode 100644 dmc-lib/src/ir/ir_call.rs create mode 100644 dmc-lib/src/ir/ir_constant.rs create mode 100644 dmc-lib/src/ir/ir_expression.rs create mode 100644 dmc-lib/src/ir/ir_function.rs create mode 100644 dmc-lib/src/ir/ir_l_value.rs create mode 100644 dmc-lib/src/ir/ir_r_value.rs create mode 100644 dmc-lib/src/ir/ir_statement.rs create mode 100644 dmc-lib/src/ir/ir_variable.rs create mode 100644 dmc-lib/src/ir/mod.rs diff --git a/dmc-lib/src/ast/call.rs b/dmc-lib/src/ast/call.rs index 5d80155..7f0b125 100644 --- a/dmc-lib/src/ast/call.rs +++ b/dmc-lib/src/ast/call.rs @@ -1,5 +1,8 @@ use crate::ast::expression::Expression; +use crate::ast::function::FunctionLoweringContext; use crate::diagnostic::Diagnostic; +use crate::ir::ir_call::IrCall; +use crate::ir::ir_expression::IrExpression; use crate::source_range::SourceRange; use crate::symbol_table::SymbolTable; use crate::type_info::TypeInfo; @@ -74,6 +77,19 @@ impl Call { } } + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) -> IrExpression { + let function_name = match self.callee() { + Expression::Identifier(identifier) => identifier.name(), + _ => panic!("Calling things other than identifiers not yet supported."), + }; + let arguments = self + .arguments() + .iter() + .map(|arg| arg.lower_to_ir(context)) + .collect(); + IrExpression::Call(IrCall::new(function_name, arguments)) + } + pub fn source_range(&self) -> &SourceRange { &self.source_range } diff --git a/dmc-lib/src/ast/compilation_unit.rs b/dmc-lib/src/ast/compilation_unit.rs index 4e19aa9..15e54fc 100644 --- a/dmc-lib/src/ast/compilation_unit.rs +++ b/dmc-lib/src/ast/compilation_unit.rs @@ -1,5 +1,6 @@ use crate::ast::function::Function; use crate::diagnostic::Diagnostic; +use crate::ir::Ir; use crate::symbol_table::SymbolTable; pub struct CompilationUnit { @@ -40,4 +41,12 @@ impl CompilationUnit { } diagnostics } + + pub fn lower_to_ir(&self) -> Vec { + let mut irs = vec![]; + for function in &self.functions { + irs.append(&mut function.lower_to_ir()); + } + irs + } } diff --git a/dmc-lib/src/ast/expression.rs b/dmc-lib/src/ast/expression.rs index 8d9dbba..8695c95 100644 --- a/dmc-lib/src/ast/expression.rs +++ b/dmc-lib/src/ast/expression.rs @@ -1,8 +1,10 @@ use crate::ast::call::Call; +use crate::ast::function::FunctionLoweringContext; use crate::ast::identifier::Identifier; use crate::ast::integer_literal::IntegerLiteral; use crate::ast::string_literal::StringLiteral; use crate::diagnostic::Diagnostic; +use crate::ir::ir_expression::IrExpression; use crate::source_range::SourceRange; use crate::symbol_table::SymbolTable; use crate::type_info::TypeInfo; @@ -55,4 +57,13 @@ impl Expression { Expression::Identifier(identifier) => identifier.source_range(), } } + + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) -> IrExpression { + match self { + Expression::Call(call) => call.lower_to_ir(context), + Expression::IntegerLiteral(integer_literal) => integer_literal.lower_to_ir(context), + Expression::String(string_literal) => string_literal.lower_to_ir(context), + Expression::Identifier(identifier) => identifier.lower_to_ir(context), + } + } } diff --git a/dmc-lib/src/ast/expression_statement.rs b/dmc-lib/src/ast/expression_statement.rs new file mode 100644 index 0000000..8b6ce9e --- /dev/null +++ b/dmc-lib/src/ast/expression_statement.rs @@ -0,0 +1,38 @@ +use crate::ast::expression::Expression; +use crate::ast::function::FunctionLoweringContext; +use crate::diagnostic::Diagnostic; +use crate::ir::ir_statement::IrStatement; +use crate::symbol_table::SymbolTable; + +pub struct ExpressionStatement { + expression: Box, +} + +impl ExpressionStatement { + pub fn new(expression: Expression) -> Self { + Self { + expression: expression.into(), + } + } + + pub fn expression(&self) -> &Expression { + &self.expression + } + + pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { + self.expression.gather_declared_names(symbol_table) + } + + pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { + self.expression.check_name_usages(symbol_table) + } + + pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { + self.expression.type_check(symbol_table) + } + + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) { + let ir_expression = self.expression.lower_to_ir(context); + context.add_statement(IrStatement::Expression(ir_expression)); + } +} diff --git a/dmc-lib/src/ast/fqn.rs b/dmc-lib/src/ast/fqn.rs new file mode 100644 index 0000000..6a645db --- /dev/null +++ b/dmc-lib/src/ast/fqn.rs @@ -0,0 +1,15 @@ +pub struct Fqn { + parts: Vec, +} + +impl Fqn { + pub fn new(parts: &[&str]) -> Self { + Self { + parts: parts.iter().map(|s| s.to_string()).collect(), + } + } + + pub fn parts(&self) -> &[String] { + self.parts.as_slice() + } +} diff --git a/dmc-lib/src/ast/function.rs b/dmc-lib/src/ast/function.rs index 888f6f7..07152da 100644 --- a/dmc-lib/src/ast/function.rs +++ b/dmc-lib/src/ast/function.rs @@ -1,5 +1,9 @@ use crate::ast::statement::Statement; use crate::diagnostic::Diagnostic; +use crate::ir::Ir; +use crate::ir::ir_constant::IrConstant; +use crate::ir::ir_function::IrFunction; +use crate::ir::ir_statement::IrStatement; use crate::source_range::SourceRange; use crate::symbol::FunctionSymbol; use crate::symbol_table::{SymbolInsertError, SymbolTable}; @@ -75,4 +79,64 @@ impl Function { } diagnostics } + + pub fn lower_to_ir(&self) -> Vec { + let mut context = FunctionLoweringContext::new(); + for statement in &self.statements { + statement.lower_to_ir(&mut context); + } + let mut irs = vec![]; + for constant in context.take_constants() { + irs.push(Ir::Constant(constant)); + } + let ir_function = IrFunction::new(context.take_statements()); + irs.push(Ir::Function(ir_function)); + irs + } +} + +pub struct FunctionLoweringContext { + temp_variable_counter: usize, + constant_counter: usize, + constants: Vec, + statements: Vec, +} + +impl FunctionLoweringContext { + pub fn new() -> Self { + Self { + temp_variable_counter: 0, + constant_counter: 0, + constants: vec![], + statements: vec![], + } + } + + pub fn next_temp_variable(&mut self) -> String { + let temp_variable = format!("t_{}", self.temp_variable_counter); + self.temp_variable_counter += 1; + temp_variable + } + + pub fn next_constant_name(&mut self) -> String { + let constant_name = format!("%const_{}", self.constant_counter); + self.constant_counter += 1; + constant_name + } + + pub fn add_constant(&mut self, constant: IrConstant) { + self.constants.push(constant); + } + + pub fn take_constants(&mut self) -> Vec { + std::mem::take(&mut self.constants) + } + + pub fn add_statement(&mut self, statement: IrStatement) { + self.statements.push(statement); + } + + pub fn take_statements(&mut self) -> Vec { + std::mem::take(&mut self.statements) + } } diff --git a/dmc-lib/src/ast/identifier.rs b/dmc-lib/src/ast/identifier.rs index 0782dcd..32b4834 100644 --- a/dmc-lib/src/ast/identifier.rs +++ b/dmc-lib/src/ast/identifier.rs @@ -1,4 +1,7 @@ +use crate::ast::function::FunctionLoweringContext; use crate::diagnostic::Diagnostic; +use crate::ir::ir_expression::IrExpression; +use crate::ir::ir_variable::IrVariable; use crate::source_range::SourceRange; use crate::symbol::ExpressibleSymbol; use crate::symbol_table::SymbolTable; @@ -58,6 +61,10 @@ impl Identifier { } } + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) -> IrExpression { + IrExpression::Variable(IrVariable::new(self.name())) + } + pub fn source_range(&self) -> &SourceRange { &self.source_range } diff --git a/dmc-lib/src/ast/integer_literal.rs b/dmc-lib/src/ast/integer_literal.rs index 3c907e5..6cec042 100644 --- a/dmc-lib/src/ast/integer_literal.rs +++ b/dmc-lib/src/ast/integer_literal.rs @@ -1,3 +1,5 @@ +use crate::ast::function::FunctionLoweringContext; +use crate::ir::ir_expression::IrExpression; use crate::source_range::SourceRange; pub struct IntegerLiteral { @@ -17,6 +19,10 @@ impl IntegerLiteral { self.value } + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) -> IrExpression { + IrExpression::IntegerLiteral(self.value) + } + pub fn source_range(&self) -> &SourceRange { &self.source_range } diff --git a/dmc-lib/src/ast/let_statement.rs b/dmc-lib/src/ast/let_statement.rs index e778dfb..c04f862 100644 --- a/dmc-lib/src/ast/let_statement.rs +++ b/dmc-lib/src/ast/let_statement.rs @@ -1,5 +1,9 @@ use crate::ast::expression::Expression; +use crate::ast::function::FunctionLoweringContext; use crate::diagnostic::Diagnostic; +use crate::ir::ir_assign::IrAssign; +use crate::ir::ir_statement::IrStatement; +use crate::ir::ir_variable::IrVariable; use crate::source_range::SourceRange; use crate::symbol::VariableSymbol; use crate::symbol_table::{SymbolInsertError, SymbolTable}; @@ -68,4 +72,11 @@ impl LetStatement { diagnostics.append(&mut self.initializer.type_check(symbol_table)); diagnostics } + + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) { + let data = self.initializer.lower_to_ir(context); + let destination = IrVariable::new(self.declared_name()); + let assign_statement = IrAssign::new(destination, data); + context.add_statement(IrStatement::Assign(assign_statement)); + } } diff --git a/dmc-lib/src/ast/mod.rs b/dmc-lib/src/ast/mod.rs index b75f1ef..46ac3e2 100644 --- a/dmc-lib/src/ast/mod.rs +++ b/dmc-lib/src/ast/mod.rs @@ -1,6 +1,8 @@ pub mod call; pub mod compilation_unit; pub mod expression; +pub mod expression_statement; +pub mod fqn; pub mod function; pub mod identifier; pub mod integer_literal; @@ -25,6 +27,24 @@ mod name_tests { 0 ); assert_eq!(compilation_unit.check_name_usages(&symbol_table).len(), 0); + let irs = compilation_unit.lower_to_ir(); + for ir in &irs { + println!("{:#?}", ir); + } + } + + #[test] + fn hello_world() { + let mut symbol_table = SymbolTable::new(); + let mut compilation_unit = + parse_compilation_unit("fn println() end fn main() println(\"Hello, World!\") end"); + compilation_unit.gather_declared_names(&mut symbol_table); + compilation_unit.check_name_usages(&symbol_table); + compilation_unit.type_check(&symbol_table); + let irs = compilation_unit.lower_to_ir(); + for ir in &irs { + println!("{:#?}", ir); + } } #[test] diff --git a/dmc-lib/src/ast/statement.rs b/dmc-lib/src/ast/statement.rs index 16b46c1..2cef340 100644 --- a/dmc-lib/src/ast/statement.rs +++ b/dmc-lib/src/ast/statement.rs @@ -1,32 +1,50 @@ -use crate::ast::expression::Expression; +use crate::ast::expression_statement::ExpressionStatement; +use crate::ast::function::FunctionLoweringContext; use crate::ast::let_statement::LetStatement; use crate::diagnostic::Diagnostic; use crate::symbol_table::SymbolTable; pub enum Statement { Let(LetStatement), - Expression(Expression), + Expression(ExpressionStatement), } impl Statement { pub fn gather_declared_names(&mut self, symbol_table: &mut SymbolTable) -> Vec { match self { Statement::Let(let_statement) => let_statement.gather_declared_names(symbol_table), - Statement::Expression(expression) => expression.gather_declared_names(symbol_table), + Statement::Expression(expression_statement) => { + expression_statement.gather_declared_names(symbol_table) + } } } pub fn check_name_usages(&mut self, symbol_table: &SymbolTable) -> Vec { match self { Statement::Let(let_statement) => let_statement.check_name_usages(symbol_table), - Statement::Expression(expression) => expression.check_name_usages(symbol_table), + Statement::Expression(expression_statement) => { + expression_statement.check_name_usages(symbol_table) + } } } pub fn type_check(&mut self, symbol_table: &SymbolTable) -> Vec { match self { Statement::Let(let_statement) => let_statement.type_check(symbol_table), - Statement::Expression(expression) => expression.type_check(symbol_table), + Statement::Expression(expression_statement) => { + expression_statement.type_check(symbol_table) + } + } + } + + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) { + match self { + Statement::Let(let_statement) => { + let_statement.lower_to_ir(context); + } + Statement::Expression(expression) => { + expression.lower_to_ir(context); + } } } } diff --git a/dmc-lib/src/ast/string_literal.rs b/dmc-lib/src/ast/string_literal.rs index c9c920f..14f12c9 100644 --- a/dmc-lib/src/ast/string_literal.rs +++ b/dmc-lib/src/ast/string_literal.rs @@ -1,4 +1,8 @@ +use crate::ast::function::FunctionLoweringContext; +use crate::ir::ir_constant::{IrConstant, IrStringConstant}; +use crate::ir::ir_expression::IrExpression; use crate::source_range::SourceRange; +use std::rc::Rc; pub struct StringLiteral { content: String, @@ -17,6 +21,15 @@ impl StringLiteral { &self.content } + pub fn lower_to_ir(&self, context: &mut FunctionLoweringContext) -> IrExpression { + let ir_string_constant = Rc::new(IrStringConstant::new( + self.content(), + &context.next_constant_name(), + )); + context.add_constant(IrConstant::String(ir_string_constant.clone())); + IrExpression::Constant(IrConstant::String(ir_string_constant)) + } + pub fn source_range(&self) -> &SourceRange { &self.source_range } diff --git a/dmc-lib/src/ir/ir_assign.rs b/dmc-lib/src/ir/ir_assign.rs new file mode 100644 index 0000000..2f028df --- /dev/null +++ b/dmc-lib/src/ir/ir_assign.rs @@ -0,0 +1,17 @@ +use crate::ir::ir_expression::IrExpression; +use crate::ir::ir_variable::IrVariable; + +#[derive(Debug)] +pub struct IrAssign { + destination: Box, + value: Box, +} + +impl IrAssign { + pub fn new(destination: IrVariable, value: IrExpression) -> Self { + Self { + destination: destination.into(), + value: value.into(), + } + } +} diff --git a/dmc-lib/src/ir/ir_call.rs b/dmc-lib/src/ir/ir_call.rs new file mode 100644 index 0000000..b2c07c2 --- /dev/null +++ b/dmc-lib/src/ir/ir_call.rs @@ -0,0 +1,16 @@ +use crate::ir::ir_expression::IrExpression; + +#[derive(Debug)] +pub struct IrCall { + name: String, + arguments: Vec, +} + +impl IrCall { + pub fn new(name: &str, arguments: Vec) -> Self { + Self { + name: name.into(), + arguments, + } + } +} diff --git a/dmc-lib/src/ir/ir_constant.rs b/dmc-lib/src/ir/ir_constant.rs new file mode 100644 index 0000000..ed05e09 --- /dev/null +++ b/dmc-lib/src/ir/ir_constant.rs @@ -0,0 +1,21 @@ +use std::rc::Rc; + +#[derive(Debug)] +pub enum IrConstant { + String(Rc), +} + +#[derive(Debug)] +pub struct IrStringConstant { + value: String, + name: String, +} + +impl IrStringConstant { + pub fn new(value: &str, name: &str) -> Self { + Self { + value: value.into(), + name: name.into(), + } + } +} diff --git a/dmc-lib/src/ir/ir_expression.rs b/dmc-lib/src/ir/ir_expression.rs new file mode 100644 index 0000000..6a604a6 --- /dev/null +++ b/dmc-lib/src/ir/ir_expression.rs @@ -0,0 +1,11 @@ +use crate::ir::ir_call::IrCall; +use crate::ir::ir_constant::IrConstant; +use crate::ir::ir_variable::IrVariable; + +#[derive(Debug)] +pub enum IrExpression { + Call(IrCall), + Constant(IrConstant), + IntegerLiteral(i64), + Variable(IrVariable), +} diff --git a/dmc-lib/src/ir/ir_function.rs b/dmc-lib/src/ir/ir_function.rs new file mode 100644 index 0000000..095b69b --- /dev/null +++ b/dmc-lib/src/ir/ir_function.rs @@ -0,0 +1,12 @@ +use crate::ir::ir_statement::IrStatement; + +#[derive(Debug)] +pub struct IrFunction { + statements: Vec, +} + +impl IrFunction { + pub fn new(statements: Vec) -> Self { + Self { statements } + } +} diff --git a/dmc-lib/src/ir/ir_l_value.rs b/dmc-lib/src/ir/ir_l_value.rs new file mode 100644 index 0000000..e0bed77 --- /dev/null +++ b/dmc-lib/src/ir/ir_l_value.rs @@ -0,0 +1,6 @@ +use crate::ir::ir_variable::IrVariable; + +pub enum IrLhs { + Variable(IrVariable), + FunctionName(String), +} diff --git a/dmc-lib/src/ir/ir_r_value.rs b/dmc-lib/src/ir/ir_r_value.rs new file mode 100644 index 0000000..7e5c0ec --- /dev/null +++ b/dmc-lib/src/ir/ir_r_value.rs @@ -0,0 +1,8 @@ +use crate::ir::ir_constant::IrConstant; +use crate::ir::ir_variable::IrVariable; + +pub enum IrRhs { + Constant(IrConstant), + IntegerLiteral(i64), + Variable(IrVariable), +} diff --git a/dmc-lib/src/ir/ir_statement.rs b/dmc-lib/src/ir/ir_statement.rs new file mode 100644 index 0000000..2c85a8f --- /dev/null +++ b/dmc-lib/src/ir/ir_statement.rs @@ -0,0 +1,8 @@ +use crate::ir::ir_assign::IrAssign; +use crate::ir::ir_expression::IrExpression; + +#[derive(Debug)] +pub enum IrStatement { + Assign(IrAssign), + Expression(IrExpression), +} diff --git a/dmc-lib/src/ir/ir_variable.rs b/dmc-lib/src/ir/ir_variable.rs new file mode 100644 index 0000000..c40c7ca --- /dev/null +++ b/dmc-lib/src/ir/ir_variable.rs @@ -0,0 +1,10 @@ +#[derive(Debug)] +pub struct IrVariable { + name: String, +} + +impl IrVariable { + pub fn new(name: &str) -> Self { + Self { name: name.into() } + } +} diff --git a/dmc-lib/src/ir/mod.rs b/dmc-lib/src/ir/mod.rs new file mode 100644 index 0000000..4eb46f9 --- /dev/null +++ b/dmc-lib/src/ir/mod.rs @@ -0,0 +1,18 @@ +use crate::ir::ir_constant::IrConstant; +use crate::ir::ir_function::IrFunction; + +pub mod ir_assign; +pub mod ir_call; +pub mod ir_constant; +pub mod ir_expression; +pub mod ir_function; +pub mod ir_l_value; +pub mod ir_r_value; +pub mod ir_statement; +pub mod ir_variable; + +#[derive(Debug)] +pub enum Ir { + Function(IrFunction), + Constant(IrConstant), +} diff --git a/dmc-lib/src/lib.rs b/dmc-lib/src/lib.rs index 1fecf9c..d012a96 100644 --- a/dmc-lib/src/lib.rs +++ b/dmc-lib/src/lib.rs @@ -1,5 +1,6 @@ mod ast; mod diagnostic; +mod ir; mod lexer; mod parser; mod scope; diff --git a/dmc-lib/src/parser.rs b/dmc-lib/src/parser.rs index 3ef0041..af0d8d7 100644 --- a/dmc-lib/src/parser.rs +++ b/dmc-lib/src/parser.rs @@ -1,6 +1,7 @@ use crate::ast::call::Call; use crate::ast::compilation_unit::CompilationUnit; use crate::ast::expression::Expression; +use crate::ast::expression_statement::ExpressionStatement; use crate::ast::function::Function; use crate::ast::identifier::Identifier; use crate::ast::integer_literal::IntegerLiteral; @@ -169,7 +170,7 @@ impl<'a> Parser<'a> { } fn expression_statement(&mut self) -> Statement { - Statement::Expression(self.expression()) + Statement::Expression(ExpressionStatement::new(self.expression())) } fn expression(&mut self) -> Expression { @@ -246,8 +247,8 @@ mod smoke_tests { assert_eq!(function.declared_name(), "main"); let statements = function.statements(); assert_eq!(statements.len(), 1); - if let Statement::Expression(expression) = statements[0] { - if let Expression::Call(call) = expression { + if let Statement::Expression(expression_statement) = statements[0] { + if let Expression::Call(call) = expression_statement.expression() { let callee = call.callee(); match callee { Expression::Identifier(identifier) => {