From ce20cece219fa1528d3b427de6c864e159b0638e Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Thu, 15 May 2025 21:09:55 -0500 Subject: [PATCH] Work on basic name analysis. --- sketching/may_2025/name_one.dm | 10 + src/ast/build.rs | 24 +- src/ast/mod.rs | 70 ++++- src/ast/named.rs | 29 +++ src/ast/pretty_print.rs | 2 +- src/ast/unparse.rs | 2 +- src/bin/dmc/main.rs | 10 + src/bin/dmc/name_analysis.rs | 36 +++ src/compile/mod.rs | 1 + src/compile/name_analysis.rs | 450 +++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 11 files changed, 610 insertions(+), 25 deletions(-) create mode 100644 sketching/may_2025/name_one.dm create mode 100644 src/ast/named.rs create mode 100644 src/bin/dmc/name_analysis.rs create mode 100644 src/compile/mod.rs create mode 100644 src/compile/name_analysis.rs diff --git a/sketching/may_2025/name_one.dm b/sketching/may_2025/name_one.dm new file mode 100644 index 0000000..562c5b5 --- /dev/null +++ b/sketching/may_2025/name_one.dm @@ -0,0 +1,10 @@ +ns greeter + +fn main() { + let x = 'Hello'; + let y = 'World'; + { + let test = 'Test'; + }; + x = y; +} \ No newline at end of file diff --git a/src/ast/build.rs b/src/ast/build.rs index d5874b8..e2edddc 100644 --- a/src/ast/build.rs +++ b/src/ast/build.rs @@ -14,13 +14,11 @@ pub fn build_ast(compilation_unit_pair: Pair) -> CompilationUnit { } fn build_identifier(identifier_pair: Pair) -> Identifier { - Identifier { - name: identifier_pair.as_str().to_string(), - } + Identifier::new(identifier_pair.as_span().as_str()) } fn build_fqn(fqn_pair: Pair) -> FullyQualifiedName { - FullyQualifiedName( + FullyQualifiedName::new( fqn_pair .into_inner() .map(|identifier_pair| { @@ -745,14 +743,10 @@ fn build_call_statement(call_statement_pair: Pair) -> CallStatement { while let Some(inner_pair) = inner.next() { match inner_pair.as_rule() { Rule::ObjectAccess => { - result = Expression::ObjectAccess(build_object_access( - result, inner_pair, - )); + result = Expression::ObjectAccess(build_object_access(result, inner_pair)); } Rule::ParenthesesCall => { - result = Expression::Call(build_call_expression( - result, inner_pair, - )); + result = Expression::Call(build_call_expression(result, inner_pair)); } Rule::PlusPlus => { result = Expression::UnarySuffix(SuffixExpression { @@ -971,16 +965,10 @@ fn build_suffix_expression(suffix_pair: Pair) -> Expression { while let Some(suffix_pair) = inner.next() { match suffix_pair.as_rule() { Rule::ObjectAccess => { - result = Expression::ObjectAccess(build_object_access( - result, - suffix_pair, - )) + result = Expression::ObjectAccess(build_object_access(result, suffix_pair)) } Rule::ParenthesesCall => { - result = Expression::Call(build_call_expression( - result, - suffix_pair, - )) + result = Expression::Call(build_call_expression(result, suffix_pair)) } Rule::PlusPlus => { result = Expression::UnarySuffix(SuffixExpression { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 87df058..8300d16 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,6 +1,8 @@ +use crate::compile::name_analysis::Symbol; use pest::Parser; pub mod build; +pub mod named; pub mod pretty_print; pub mod unparse; // Operators @@ -52,10 +54,68 @@ pub enum SuffixUnaryOperator { #[derive(Debug, Clone)] pub struct Identifier { pub name: String, + scope_id: Option, + symbol: Option, +} + +impl Identifier { + pub fn new(name: &str) -> Self { + Identifier { + name: name.to_string(), + scope_id: None, + symbol: None, + } + } + + pub fn set_scope_id(&mut self, id: usize) { + self.scope_id = Some(id); + } + + pub fn scope_id(&self) -> Option { + self.scope_id + } + + pub fn set_symbol(&mut self, symbol: Symbol) { + self.symbol = Some(symbol); + } + + pub fn symbol(&self) -> &Option { + &self.symbol + } } #[derive(Debug)] -pub struct FullyQualifiedName(pub Vec); +pub struct FullyQualifiedName { + pub identifiers: Vec, + scope_id: Option, + symbol: Option, +} + +impl FullyQualifiedName { + pub fn new(identifiers: Vec) -> Self { + FullyQualifiedName { + identifiers, + scope_id: None, + symbol: None, + } + } + + pub fn set_scope_id(&mut self, scope_id: usize) { + self.scope_id = Some(scope_id); + } + + pub fn scope_id(&self) -> Option { + self.scope_id + } + + pub fn set_symbol(&mut self, symbol: Symbol) { + self.symbol = Some(symbol); + } + + pub fn symbol(&self) -> &Option { + &self.symbol + } +} /* Type Use */ @@ -400,10 +460,10 @@ pub enum Statement { #[derive(Debug)] pub struct VariableDeclarationStatement { - is_mutable: bool, - identifier: Identifier, - declared_type: Option, - initializer: Option, + pub is_mutable: bool, + pub identifier: Identifier, + pub declared_type: Option, + pub initializer: Option, } #[derive(Debug)] diff --git a/src/ast/named.rs b/src/ast/named.rs new file mode 100644 index 0000000..d319126 --- /dev/null +++ b/src/ast/named.rs @@ -0,0 +1,29 @@ +use crate::ast::{FullyQualifiedName, Identifier}; +use std::borrow::Cow; + +pub trait Named { + fn name(&self) -> Cow<'_, str>; +} + +impl Named for Identifier { + fn name(&self) -> Cow<'_, str> { + Cow::Borrowed(&self.name) + } +} + +impl Named for FullyQualifiedName { + fn name(&self) -> Cow<'_, str> { + if self.identifiers.len() == 1 { + self.identifiers[0].name() + } else { + let mut acc = String::new(); + for (i, identifier) in self.identifiers.iter().enumerate() { + acc += &identifier.name(); + if i < self.identifiers.len() - 1 { + acc += "::"; + } + } + Cow::Owned(acc) + } + } +} diff --git a/src/ast/pretty_print.rs b/src/ast/pretty_print.rs index fe63219..559103e 100644 --- a/src/ast/pretty_print.rs +++ b/src/ast/pretty_print.rs @@ -74,7 +74,7 @@ impl PrettyPrint for FullyQualifiedName { fn pretty_print(&self, writer: &mut IndentWriter) -> std::io::Result<()> { writer.writeln_indented("FullyQualifiedName")?; writer.increase_indent(); - for identifier in &self.0 { + for identifier in &self.identifiers { identifier.pretty_print(writer)?; } writer.decrease_indent(); diff --git a/src/ast/unparse.rs b/src/ast/unparse.rs index 5802f8b..38de925 100644 --- a/src/ast/unparse.rs +++ b/src/ast/unparse.rs @@ -147,7 +147,7 @@ impl ListUnparse for FullyQualifiedName { } fn inner(&self) -> Vec<&dyn Unparse> { - to_unparse_vec!(self.0) + to_unparse_vec!(self.identifiers) } } diff --git a/src/bin/dmc/main.rs b/src/bin/dmc/main.rs index c221d18..bc3948a 100644 --- a/src/bin/dmc/main.rs +++ b/src/bin/dmc/main.rs @@ -1,8 +1,10 @@ +mod name_analysis; mod p3; mod unparse; use std::path::PathBuf; +use crate::name_analysis::name_analysis; use crate::p3::pretty_print_parse; use crate::unparse::unparse; use clap::{Parser, Subcommand}; @@ -24,6 +26,9 @@ enum Commands { P3 { paths: Vec, }, + NameAnalysis { + paths: Vec, + }, } fn main() { @@ -39,5 +44,10 @@ fn main() { pretty_print_parse(&path) } } + Commands::NameAnalysis { paths } => { + for path in paths { + name_analysis(&path) + } + } } } diff --git a/src/bin/dmc/name_analysis.rs b/src/bin/dmc/name_analysis.rs new file mode 100644 index 0000000..3a506b5 --- /dev/null +++ b/src/bin/dmc/name_analysis.rs @@ -0,0 +1,36 @@ +use deimos::ast::build::build_ast; +use deimos::compile::name_analysis::{analyze_names, SymbolTable}; +use deimos::parser::{DeimosParser, Rule}; +use pest::Parser; +use std::path::Path; + +pub fn name_analysis(path: &Path) { + let src = std::fs::read_to_string(path).unwrap(); + let parse_result = DeimosParser::parse( + Rule::CompilationUnit, + &src + ); + match parse_result { + Ok(mut pairs) => { + let compilation_unit_pair = pairs.next().unwrap(); + let mut compilation_unit = build_ast(compilation_unit_pair); + let mut symbol_table = SymbolTable::new(); + let name_analysis_result = analyze_names( + &mut compilation_unit, + &mut symbol_table, + ); + match name_analysis_result { + Err(e) => { + eprintln!("{}", e); + } + Ok(_) => { + println!("name_analysis complete"); + println!("{}", symbol_table); + } + } + } + Err(e) => { + eprintln!("{}", e); + } + } +} \ No newline at end of file diff --git a/src/compile/mod.rs b/src/compile/mod.rs new file mode 100644 index 0000000..66315a3 --- /dev/null +++ b/src/compile/mod.rs @@ -0,0 +1 @@ +pub mod name_analysis; diff --git a/src/compile/name_analysis.rs b/src/compile/name_analysis.rs new file mode 100644 index 0000000..39e6bb1 --- /dev/null +++ b/src/compile/name_analysis.rs @@ -0,0 +1,450 @@ +use crate::ast::named::Named; +use crate::ast::*; +use std::collections::HashMap; +use std::fmt::Display; + +#[derive(Debug, Clone)] +pub enum Symbol { + Function(FunctionSymbol), + Variable(VariableSymbol), +} + +impl Display for Symbol { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use Symbol::*; + match self { + Function(function_symbol) => write!(f, "{}", function_symbol), + Variable(variable_symbol) => write!(f, "{}", variable_symbol), + } + } +} + +#[derive(Debug, Clone)] +pub struct FunctionSymbol { + pub fqn: String, + pub declared_name: String, + pub is_public: bool, +} + +impl Display for FunctionSymbol { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "FunctionSymbol(fqn = {}, declared_name = {}, is_public = {})", + self.fqn, self.declared_name, self.is_public + ) + } +} + +#[derive(Debug, Clone)] +pub struct VariableSymbol { + pub name: String, + pub is_mutable: bool, +} + +impl Display for VariableSymbol { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "VariableSymbol(name = {}, is_mutable = {})", + self.name, self.is_mutable + ) + } +} + +#[derive(Default)] +pub struct Scope { + parent: Option, + symbols: HashMap, + debug_name: String, +} + +#[derive(Default)] +pub struct SymbolTable { + scopes: Vec, + current_scope_id: usize, +} + +/// Contains a vec of scopes, like a flattened tree +impl SymbolTable { + pub fn new() -> Self { + let mut t = SymbolTable::default(); + t.scopes.push(Scope::default()); + t.current_scope_id = 0; + t + } + + pub fn current_scope_id(&self) -> usize { + self.current_scope_id + } + + pub fn push_scope(&mut self, debug_name: &str) { + let id = self.scopes.len(); + self.scopes.push(Scope { + symbols: HashMap::new(), + parent: Some(self.current_scope_id), + debug_name: debug_name.to_string(), + }); + self.current_scope_id = id; + } + + pub fn pop_scope(&mut self) { + if let Some(parent_id) = self.scopes[self.current_scope_id].parent { + self.current_scope_id = parent_id; + } + } + + pub fn insert(&mut self, name: String, symbol: Symbol) -> Result<(), String> { + if let Some(current_symbol) = self.scopes[self.current_scope_id].symbols.get(&name) { + Err(format!("Symbol '{}' already defined", current_symbol)) + } else { + self.scopes[self.current_scope_id] + .symbols + .insert(name, symbol); + Ok(()) + } + } + + pub fn lookup(&self, name: &str, scope_id: usize) -> Result<&Symbol, String> { + let mut scope_opt = Some(&self.scopes[scope_id]); + while let Some(scope) = scope_opt { + if let Some(symbol) = scope.symbols.get(name) { + return Ok(symbol); + } + scope_opt = if let Some(parent_id) = scope.parent { + Some(&self.scopes[parent_id]) + } else { + None + }; + } + Err(format!("Symbol '{}' not found", name)) + } +} + +impl Display for SymbolTable { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for (i, scope) in self.scopes.iter().enumerate() { + writeln!(f, "Scope {} {}", i, scope.debug_name)?; + for (name, symbol) in &scope.symbols { + writeln!(f, " {}({})", name, symbol)?; + } + } + Ok(()) + } +} + +struct FqnContext { + stack: Vec, +} + +impl FqnContext { + fn new() -> Self { + FqnContext { stack: Vec::new() } + } + + fn push(&mut self, name: String) { + self.stack.push(name); + } + + fn pop(&mut self) { + self.stack.pop(); + } + + fn current(&self) -> String { + let mut acc = String::new(); + for (i, name) in self.stack.iter().enumerate() { + acc.push_str(name); + if i != self.stack.len() - 1 { + acc.push_str("::") + } + } + acc + } + + fn resolve(&self, name: &str) -> String { + let mut acc = String::new(); + if !self.stack.is_empty() { + acc.push_str(&self.current()); + acc.push_str("::"); + } + acc.push_str(name); + acc + } +} + +pub fn analyze_names( + compilation_unit: &mut CompilationUnit, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + let mut fqn_context = FqnContext::new(); + if let Some(namespace) = &compilation_unit.namespace { + fqn_context.push(namespace.name().to_string()); + } + + for declaration in &mut compilation_unit.declarations { + gather_module_level_declaration(declaration, symbol_table, &mut fqn_context)?; + } + + assert_eq!(symbol_table.current_scope_id, 0); + + for declaration in &mut compilation_unit.declarations { + resolve_module_level_declaration(declaration, symbol_table)?; + } + + Ok(()) +} + +fn gather_module_level_declaration( + declaration: &mut ModuleLevelDeclaration, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + use ModuleLevelDeclaration::*; + match declaration { + Function(function_definition) => { + gather_function_definition(function_definition, symbol_table, fqn_context) + } + _ => todo!(), + } +} + +fn gather_function_definition( + function: &mut FunctionDefinition, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + let declared_name = function.identifier.name().to_string(); + let resolved_name = fqn_context.resolve(&declared_name); + symbol_table.insert( + declared_name.clone(), + Symbol::Function(FunctionSymbol { + fqn: resolved_name.clone(), + declared_name, + is_public: function.is_public, + }), + )?; + function + .identifier + .set_scope_id(symbol_table.current_scope_id()); + symbol_table.push_scope(&format!("FunctionScope({})", resolved_name)); + // TODO: params + gather_function_body(&mut function.body, symbol_table, fqn_context)?; + symbol_table.pop_scope(); + Ok(()) +} + +fn gather_function_body( + function_body: &mut FunctionBody, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + use FunctionBody::*; + match function_body { + Block(block) => gather_block_statement(block, symbol_table, fqn_context), + _ => todo!(), + } +} + +fn gather_block_statement( + block: &mut BlockStatement, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + symbol_table.push_scope("BlockStatementScope"); + for statement in &mut block.statements { + gather_statement(statement, symbol_table, fqn_context)?; + } + symbol_table.pop_scope(); + Ok(()) +} + +fn gather_statement( + statement: &mut Statement, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + use Statement::*; + match statement { + BlockStatement(block) => gather_block_statement(block, symbol_table, fqn_context), + VariableDeclarationStatement(variable_declaration) => { + gather_variable_declaration(variable_declaration, symbol_table, fqn_context) + } + AssignStatement(assign_statement) => { + gather_assign_statement(assign_statement, symbol_table, fqn_context) + } + _ => todo!(), + } +} + +fn gather_variable_declaration( + variable_declaration: &mut VariableDeclarationStatement, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + let variable_name = variable_declaration.identifier.name().to_string(); + symbol_table.insert( + variable_name.clone(), + Symbol::Variable(VariableSymbol { + name: variable_name, + is_mutable: variable_declaration.is_mutable, + }), + )?; + variable_declaration + .identifier + .set_scope_id(symbol_table.current_scope_id()); + Ok(()) +} + +fn gather_assign_statement( + assign_statement: &mut AssignStatement, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + gather_expression(&mut assign_statement.lhs, symbol_table, fqn_context)?; + gather_expression(&mut assign_statement.rhs, symbol_table, fqn_context)?; + Ok(()) +} + +fn gather_expression( + expression: &mut Expression, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + use Expression::*; + match expression { + FullyQualifiedName(fully_qualified_name) => { + gather_fully_qualified_name(fully_qualified_name, symbol_table, fqn_context)?; + } + _ => {} + } + Ok(()) +} + +fn gather_fully_qualified_name( + fully_qualified_name: &mut FullyQualifiedName, + symbol_table: &mut SymbolTable, + fqn_context: &mut FqnContext, +) -> Result<(), String> { + fully_qualified_name.set_scope_id(symbol_table.current_scope_id()); + Ok(()) +} + +/* Resolve */ + +fn resolve_module_level_declaration( + declaration: &mut ModuleLevelDeclaration, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + use ModuleLevelDeclaration::*; + match declaration { + Function(function_definition) => { + resolve_function_definition(function_definition, symbol_table) + } + _ => todo!(), + } +} + +fn resolve_function_definition( + function_definition: &mut FunctionDefinition, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + resolve_function_body(&mut function_definition.body, symbol_table) +} + +fn resolve_function_body( + function_body: &mut FunctionBody, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + use FunctionBody::*; + match function_body { + Block(block) => resolve_block(block, symbol_table), + _ => todo!(), + } +} + +fn resolve_block( + block_statement: &mut BlockStatement, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + for statement in &mut block_statement.statements { + resolve_statement(statement, symbol_table)?; + } + Ok(()) +} + +fn resolve_statement( + statement: &mut Statement, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + use Statement::*; + match statement { + BlockStatement(block) => resolve_block(block, symbol_table), + VariableDeclarationStatement(variable_declaration) => { + resolve_variable_declaration(variable_declaration, symbol_table) + } + AssignStatement(assign_statement) => { + resolve_assign_statement(assign_statement, symbol_table) + } + CallStatement(call_statement) => resolve_call_statement(call_statement, symbol_table), + _ => todo!(), + } +} + +fn resolve_variable_declaration( + variable_declaration: &mut VariableDeclarationStatement, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + if let Some(initializer) = &mut variable_declaration.initializer { + resolve_expression(initializer, symbol_table) + } else { + Ok(()) + } +} + +fn resolve_assign_statement( + assign_statement: &mut AssignStatement, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + resolve_expression(&mut assign_statement.lhs, symbol_table)?; + resolve_expression(&mut assign_statement.rhs, symbol_table)?; + Ok(()) +} + +fn resolve_call_statement( + call_statement: &mut CallStatement, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + resolve_expression(&mut call_statement.0, symbol_table) +} + +fn resolve_expression( + expression: &mut Expression, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + use Expression::*; + match expression { + FullyQualifiedName(fqn) => resolve_fully_qualified_name(fqn, symbol_table), + Literal(_) => Ok(()), + _ => todo!(), + } +} + +fn resolve_fully_qualified_name( + fully_qualified_name: &mut FullyQualifiedName, + symbol_table: &mut SymbolTable, +) -> Result<(), String> { + let lookup_result = symbol_table.lookup( + fully_qualified_name.name().as_ref(), + fully_qualified_name.scope_id().expect(&format!( + "FullyQualifiedName has no scope_id set: {:?}", + fully_qualified_name + )), + ); + match lookup_result { + Ok(symbol) => { + fully_qualified_name.set_symbol(symbol.clone()); + Ok(()) + } + Err(e) => Err(e), + } +} diff --git a/src/lib.rs b/src/lib.rs index 55ec8d5..c0a8b56 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![allow(warnings)] pub mod ast; +pub mod compile; pub mod module; pub mod object_file; pub mod parser;