From 13330300c1a744b58d88a8564a2a353fa6868063 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Fri, 31 Oct 2025 13:04:22 -0500 Subject: [PATCH] WIP fleshing out of block and expression name analysis. --- ast-generator/src/lib.rs | 3 +- src/name_analysis/first_pass.rs | 40 +-- src/name_analysis/mod.rs | 6 +- src/name_analysis/second_pass.rs | 298 +++++++++++++++++++- src/name_analysis/symbol/mod.rs | 7 + src/name_analysis/symbol/variable_symbol.rs | 9 +- src/name_analysis/symbol_table/mod.rs | 52 +++- src/name_analysis/symbol_table/scope.rs | 12 + src/parser/ast.yaml | 29 +- 9 files changed, 413 insertions(+), 43 deletions(-) diff --git a/ast-generator/src/lib.rs b/ast-generator/src/lib.rs index 22c0feb..0566049 100644 --- a/ast-generator/src/lib.rs +++ b/ast-generator/src/lib.rs @@ -116,8 +116,7 @@ fn generate_node_file(build_specs: &[BuildSpec]) -> AstGeneratedFile { use std::range::Range; use std::rc::Rc; use std::cell::RefCell; - use crate::name_analysis::symbol::type_symbol::*; - use crate::name_analysis::symbol::use_symbol::*; + use crate::name_analysis::symbol::*; #(#types)* }; diff --git a/src/name_analysis/first_pass.rs b/src/name_analysis/first_pass.rs index 6ffc7ee..4fabc83 100644 --- a/src/name_analysis/first_pass.rs +++ b/src/name_analysis/first_pass.rs @@ -14,11 +14,12 @@ use crate::name_analysis::symbol::primitive_type_symbol::PrimitiveTypeSymbol; use crate::name_analysis::symbol::source_definition::SourceDefinition; use crate::name_analysis::symbol::type_symbol::TypeSymbol; use crate::name_analysis::symbol::use_symbol::{ConcreteUseSymbol, StarUseSymbol}; -use crate::name_analysis::symbol_table::{SymbolInsertError, SymbolTable}; +use crate::name_analysis::symbol_table::SymbolTable; use crate::name_analysis::util::{ format_fqn, handle_insert_error, handle_lookup_error, join_fqn_parts, }; use std::cell::RefCell; +use std::fmt::format; use std::rc::Rc; pub fn na_p1_compilation_unit( @@ -250,9 +251,7 @@ fn na_p1_function( Some(SourceDefinition::from_identifier(function.identifier())), ); let function_symbol = match symbol_table.insert_function_symbol(to_insert) { - Ok(function_symbol) => { - Some(function_symbol) - } + Ok(function_symbol) => Some(function_symbol), Err(symbol_insert_error) => { handle_insert_error( symbol_insert_error, @@ -264,13 +263,13 @@ fn na_p1_function( None } }; - + if function_symbol.is_some() { let mut as_ref_mut = function_symbol.as_ref().unwrap().borrow_mut(); // push a scope for this function symbol_table.push_scope(&format!("FunctionScope {}", function.identifier().name())); - + // generics na_p1_generic_parameters(function.generics_mut(), symbol_table, diagnostics); @@ -282,15 +281,21 @@ fn na_p1_function( )); // return type - let return_type = - na_p1_return_type(function.return_type_mut(), symbol_table, diagnostics); + let return_type = na_p1_return_type(function.return_type_mut(), symbol_table, diagnostics); if let Some(type_symbol) = return_type { as_ref_mut.set_return_type(type_symbol); } - symbol_table.push_scope(&format!("FunctionBodyScope {}", function.identifier().name())); + symbol_table.push_scope(&format!( + "FunctionBodyScope {}", + function.identifier().name() + )); - na_p1_function_body(function.function_body_mut(), symbol_table, diagnostics); + na_p1_function_body( + function.function_body_mut(), + symbol_table, + diagnostics + ); symbol_table.pop_scope(); symbol_table.pop_scope(); @@ -324,9 +329,7 @@ fn na_p1_parameter( parameter_type_symbol, ); match symbol_table.insert_parameter_symbol(to_insert) { - Ok(parameter_symbol) => { - Some(parameter_symbol) - } + Ok(parameter_symbol) => Some(parameter_symbol), Err(symbol_insert_error) => { handle_insert_error( symbol_insert_error, @@ -473,17 +476,20 @@ fn na_p1_function_body( ) { match function_body { FunctionBody::FunctionAliasBody(alias_body) => { - // no-op, resolve in pass 2 + // set scope id for pass 2; see below + alias_body.set_scope_id(symbol_table.current_scope_id()); } FunctionBody::FunctionEqualsBody(equals_body) => { // see below + equals_body.set_scope_id(symbol_table.current_scope_id()); } FunctionBody::FunctionBlockBody(block_body) => { - // we need to do all insertion/resolution in pass 2, because we + // we need to do all insertion/resolution in pass 2, because we // might call functions/use classes/etc from the same compilation - // unit which haven't been defined yet. So the strategy is to set - // the scope id for the body and then in pass 2, set the symbol + // unit which haven't been defined yet. So the strategy is to set + // the scope id for the body and then in pass 2, set the symbol // table's current scope to that id. + block_body.set_scope_id(symbol_table.current_scope_id()); } } } diff --git a/src/name_analysis/mod.rs b/src/name_analysis/mod.rs index 7359e43..a562acf 100644 --- a/src/name_analysis/mod.rs +++ b/src/name_analysis/mod.rs @@ -51,13 +51,17 @@ pub fn analyze_names< let file_name = files.name(compilation_unit.file_id()).unwrap(); na_p1_compilation_unit(file_name, compilation_unit, symbol_table, &mut diagnostics); } + + if !diagnostics.is_empty() { + return diagnostics; + } // resolve symbols for compilation_unit in compilation_units { na_p2_compilation_unit(compilation_unit, symbol_table, &mut diagnostics); } - diagnostics.into() + diagnostics } #[cfg(test)] diff --git a/src/name_analysis/second_pass.rs b/src/name_analysis/second_pass.rs index 3cd90ab..7a5f0cd 100644 --- a/src/name_analysis/second_pass.rs +++ b/src/name_analysis/second_pass.rs @@ -1,28 +1,27 @@ -use crate::ast::node::{ - CompilationUnit, ConcreteUseStatement, ConcreteUseStatementSuffix, Identifier, - StarUseStatement, UseStatement, UseStatementIdentifier, UseStatementPrefix, -}; +use crate::ast::node::{AssignmentStatement, CompilationUnit, ConcreteUseStatement, ConcreteUseStatementSuffix, Expression, ExpressionStatement, Function, FunctionAliasBody, FunctionBlockBody, FunctionBody, Identifier, LValue, ModuleLevelDeclaration, StarUseStatement, Statement, TypeUse, UseStatement, UseStatementIdentifier, UseStatementPrefix, VariableDeclaration, VariableUse}; use crate::diagnostic::DmDiagnostic; +use crate::name_analysis::symbol::source_definition::SourceDefinition; +use crate::name_analysis::symbol::variable_symbol::VariableSymbol; use crate::name_analysis::symbol_table::{SymbolLookupError, SymbolTable}; -use crate::name_analysis::util::{handle_lookup_error, join_fqn_parts}; +use crate::name_analysis::util::{handle_insert_error, handle_lookup_error, join_fqn_parts}; use std::rc::Rc; pub fn na_p2_compilation_unit( compilation_unit: &mut CompilationUnit, - symbol_table: &SymbolTable, + symbol_table: &mut SymbolTable, diagnostics: &mut Vec, ) { - // TODO: check namespace for proper file name for use_statement in compilation_unit.use_statements_mut() { na_p2_use_statement(use_statement, symbol_table, diagnostics); } - - // TODO: declarations + for declaration in compilation_unit.module_level_declarations_mut() { + na_p2_module_level_declaration(declaration, symbol_table, diagnostics); + } } fn na_p2_use_statement( use_statement: &mut UseStatement, - symbol_table: &SymbolTable, + symbol_table: &mut SymbolTable, diagnostics: &mut Vec, ) { match use_statement { @@ -37,7 +36,7 @@ fn na_p2_use_statement( fn na_p2_concrete_use_statement( concrete_use_statement: &mut ConcreteUseStatement, - symbol_table: &SymbolTable, + symbol_table: &mut SymbolTable, diagnostics: &mut Vec, ) { let base_fqn_parts = concrete_use_statement @@ -72,7 +71,7 @@ fn na_p2_concrete_use_statement( fn handle_concrete_use_statement_identifier( base_fqn_parts: &[Rc], use_statement_identifier: &mut UseStatementIdentifier, - symbol_table: &SymbolTable, + symbol_table: &mut SymbolTable, diagnostics: &mut Vec, ) { let fqn_parts = { @@ -101,7 +100,7 @@ fn handle_concrete_use_statement_identifier( fn na_p2_star_use_statement( star_use_statement: &mut StarUseStatement, - symbol_table: &SymbolTable, + symbol_table: &mut SymbolTable, diagnostics: &mut Vec, ) { let mut symbol_ref_mut = star_use_statement.symbol().unwrap().borrow_mut(); @@ -120,3 +119,276 @@ fn na_p2_star_use_statement( } } } + +fn na_p2_module_level_declaration( + module_level_declaration: &mut ModuleLevelDeclaration, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + match module_level_declaration { + ModuleLevelDeclaration::Module(module_declaration) => { + todo!() + } + ModuleLevelDeclaration::Interface(interface) => { + todo!() + } + ModuleLevelDeclaration::Class(class) => { + todo!() + } + ModuleLevelDeclaration::Function(function) => { + na_p2_function(function, symbol_table, diagnostics); + } + ModuleLevelDeclaration::PlatformFunction(platform_function) => { + todo!() + } + } +} + +fn na_p2_function( + function: &mut Function, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + na_p2_function_body(function.function_body_mut(), symbol_table, diagnostics); +} + +fn na_p2_function_body( + function_body: &mut FunctionBody, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + match function_body { + FunctionBody::FunctionAliasBody(alias_body) => { + na_p2_function_alias_body(alias_body, symbol_table, diagnostics); + } + FunctionBody::FunctionEqualsBody(equals_body) => {} + FunctionBody::FunctionBlockBody(block_body) => { + na_p2_function_block_body(block_body, symbol_table, diagnostics); + } + } +} + +fn na_p2_function_alias_body( + function_alias_body: &mut FunctionAliasBody, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + let maybe_function_symbol = symbol_table.lookup_function_symbol( + function_alias_body.identifier().name(), + *function_alias_body.scope_id().unwrap(), + ); + match maybe_function_symbol { + Ok(function_symbol) => { + function_alias_body.set_resolved_function_symbol(function_symbol); + } + Err(symbol_lookup_error) => { + handle_lookup_error( + symbol_lookup_error, + function_alias_body.identifier().name(), + function_alias_body.identifier().file_id(), + function_alias_body.identifier().range(), + diagnostics, + ); + } + } +} + +fn na_p2_function_block_body( + function_block_body: &mut FunctionBlockBody, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + symbol_table.set_current_scope(*function_block_body.scope_id().unwrap()); + for statement in function_block_body.statements_mut() { + na_p2_statement(statement, symbol_table, diagnostics); + } +} + +fn na_p2_statement( + statement: &mut Statement, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + match statement { + Statement::VariableDeclaration(variable_declaration) => { + na_p2_variable_declaration(variable_declaration, symbol_table, diagnostics); + } + Statement::AssignmentStatement(assignment_statement) => { + na_p2_assignment_statement(assignment_statement, symbol_table, diagnostics); + } + Statement::ExpressionStatement(expression_statement) => { + na_p2_expression_statement(expression_statement, symbol_table, diagnostics); + } + Statement::UseStatement(use_statement) => { + todo!() + } + Statement::IfStatement(if_statement) => { + todo!() + } + Statement::WhileStatement(while_statement) => { + todo!() + } + Statement::ForStatement(for_statement) => { + todo!() + } + } +} + +fn na_p2_variable_declaration( + variable_declaration: &mut VariableDeclaration, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + // handle variable itself + let to_insert = VariableSymbol::new( + variable_declaration.identifier().name(), + variable_declaration.is_mut(), + Some(SourceDefinition::from_identifier( + variable_declaration.identifier(), + )), + ); + match symbol_table.insert_variable_symbol(to_insert) { + Ok(variable_symbol) => { + variable_declaration.set_variable_symbol(variable_symbol); + } + Err(symbol_insert_error) => { + handle_insert_error( + symbol_insert_error, + variable_declaration.identifier().name(), + variable_declaration.identifier().file_id(), + variable_declaration.identifier().range(), + diagnostics, + ); + } + } + + // type-use + if let Some(type_use) = variable_declaration.type_use_mut() { + na_p2_type_use(type_use, symbol_table, diagnostics); + } + + // initializer + if let Some(expression) = variable_declaration.expression_mut() { + na_p2_expression(expression, symbol_table, diagnostics); + } +} + +fn na_p2_assignment_statement( + assignment_statement: &mut AssignmentStatement, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + na_p2_l_value(assignment_statement.l_value_mut(), symbol_table, diagnostics); + na_p2_expression(assignment_statement.expression_mut(), symbol_table, diagnostics); +} + +fn na_p2_l_value( + l_value: &mut LValue, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + na_p2_variable_use(l_value.variable_use_mut(), symbol_table, diagnostics); + // TODO: suffixes +} + +fn na_p2_expression_statement( + expression_statement: &mut ExpressionStatement, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + na_p2_expression(expression_statement.expression_mut(), symbol_table, diagnostics); +} + +fn na_p2_variable_use( + variable_use: &mut VariableUse, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + match symbol_table.lookup_lv_symbol(variable_use.identifier().name()) { + Ok(lv_symbol) => { + variable_use.set_lv_symbol(lv_symbol); + } + Err(symbol_lookup_error) => { + handle_lookup_error( + symbol_lookup_error, + variable_use.identifier().name(), + variable_use.identifier().file_id(), + variable_use.identifier().range(), + diagnostics, + ); + } + } +} + +fn na_p2_expression( + expression: &mut Expression, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + match expression { + Expression::Ternary(ternary) => { + todo!() + } + Expression::Or(or) => { + todo!() + } + Expression::And(and) => { + todo!() + } + Expression::Comparison(comparison) => { + todo!() + } + Expression::Shift(shift) => { + todo!() + } + Expression::Additive(additive) => { + todo!() + } + Expression::Multiplicative(multiplicative) => { + todo!() + } + Expression::Prefix(prefix) => { + todo!() + } + Expression::Suffix(suffix) => { + todo!() + } + Expression::Literal(literal) => { + todo!() + } + Expression::VariableUse(variable_use) => { + na_p2_variable_use(variable_use, symbol_table, diagnostics); + } + Expression::Fqn(fqn) => { + todo!() + } + Expression::Closure(closure) => { + todo!() + } + Expression::List(list) => { + todo!() + } + } +} + +fn na_p2_type_use( + type_use: &mut TypeUse, + symbol_table: &mut SymbolTable, + diagnostics: &mut Vec, +) { + match type_use { + TypeUse::PrimitiveType(primitive_type_use) => { + todo!() + } + TypeUse::InterfaceOrClassTypeUse(interface_or_class_type_use) => { + todo!() + } + TypeUse::TupleTypeUse(tuple_type_use) => { + todo!() + } + TypeUse::FunctionTypeUse(function_type_use) => { + todo!() + } + } +} + diff --git a/src/name_analysis/symbol/mod.rs b/src/name_analysis/symbol/mod.rs index 2fc8bf4..c9f88e9 100644 --- a/src/name_analysis/symbol/mod.rs +++ b/src/name_analysis/symbol/mod.rs @@ -17,6 +17,13 @@ pub mod variable_symbol; use crate::name_analysis::symbol::source_definition::SourceDefinition; use std::fmt::Debug; +pub use self::{ + class_member_symbol::*, class_symbol::*, function_symbol::*, function_symbol::*, + generic_type_symbol::*, interface_symbol::*, lv_symbol::*, module_level_symbol::*, + module_symbol::*, parameter_symbol::*, primitive_type_symbol::*, type_symbol::*, + usable_symbol::*, use_symbol::*, variable_symbol::*, +}; + pub trait Symbol: Debug { fn source_definition(&self) -> Option<&SourceDefinition>; } diff --git a/src/name_analysis/symbol/variable_symbol.rs b/src/name_analysis/symbol/variable_symbol.rs index 2fd2e9e..ed7b9dd 100644 --- a/src/name_analysis/symbol/variable_symbol.rs +++ b/src/name_analysis/symbol/variable_symbol.rs @@ -1,10 +1,11 @@ use crate::name_analysis::symbol::source_definition::SourceDefinition; use crate::name_analysis::symbol::Symbol; use std::fmt::{Debug, Formatter}; +use std::rc::Rc; #[derive(Clone)] pub struct VariableSymbol { - declared_name: String, + declared_name: Rc, is_mutable: bool, source_definition: Option, } @@ -16,7 +17,7 @@ impl VariableSymbol { source_definition: Option, ) -> Self { VariableSymbol { - declared_name: declared_name.to_string(), + declared_name: Rc::from(declared_name), is_mutable, source_definition, } @@ -25,6 +26,10 @@ impl VariableSymbol { pub fn declared_name(&self) -> &str { &self.declared_name } + + pub fn declared_name_owned(&self) -> Rc { + self.declared_name.clone() + } pub fn is_mutable(&self) -> bool { self.is_mutable diff --git a/src/name_analysis/symbol_table/mod.rs b/src/name_analysis/symbol_table/mod.rs index cc38cea..42af0fe 100644 --- a/src/name_analysis/symbol_table/mod.rs +++ b/src/name_analysis/symbol_table/mod.rs @@ -5,7 +5,7 @@ use crate::name_analysis::symbol::parameter_symbol::ParameterSymbol; use crate::name_analysis::symbol::type_symbol::TypeSymbol; use crate::name_analysis::symbol::usable_symbol::UsableSymbol; use crate::name_analysis::symbol::use_symbol::{ConcreteUseSymbol, StarUseSymbol}; -use crate::name_analysis::symbol::Symbol; +use crate::name_analysis::symbol::{LVSymbol, Symbol}; use crate::name_analysis::symbol_table::fqn_context::FqnContext; use crate::name_analysis::symbol_table::symbol_tree::SymbolTree; use crate::name_analysis::symbol_table::SymbolInsertError::SymbolAlreadyDefined; @@ -14,6 +14,7 @@ use std::cell::RefCell; use std::fmt::Display; use std::ops::Deref; use std::rc::Rc; +use crate::name_analysis::symbol::variable_symbol::VariableSymbol; pub(self) mod fqn_context; mod scope; @@ -67,6 +68,10 @@ impl SymbolTable { self.current_scope_id = parent_id; } } + + pub fn set_current_scope(&mut self, id: usize) { + self.current_scope_id = id; + } pub fn set_current_fqn(&mut self, names: &[&str]) { self.fqn_context = Box::new(FqnContext::new()); @@ -226,6 +231,23 @@ impl SymbolTable { Ok(inserted) } } + + pub fn insert_variable_symbol( + &mut self, + variable_symbol: VariableSymbol, + ) -> Result>, SymbolInsertError> { + if let Some(defined_symbol) = self + .current_scope() + .find_lv_symbol(variable_symbol.declared_name()) + { + Err(SymbolAlreadyDefined(defined_symbol.to_symbol())) + } else { + let inserted = self + .current_scope_mut() + .insert_variable_symbol(variable_symbol); + Ok(inserted) + } + } pub fn lookup_type(&self, declared_name: &str) -> Result { let mut current_scope: Option<&Scope> = Some(self.current_scope()); @@ -244,6 +266,34 @@ impl SymbolTable { pub fn lookup_type_by_fqn(&self, fqn_parts: &[&str]) -> Result { todo!() } + + pub fn lookup_function_symbol(&self, declared_name: &str, scope_id: usize) -> Result>, SymbolLookupError> { + let mut current_scope: Option<&Scope> = self.scopes.get(scope_id); + while let Some(scope) = current_scope.take() { + if let Some(function_symbol) = scope.find_function_symbol(declared_name) { + return Ok(function_symbol.clone()); + } else { + current_scope = scope + .parent() + .and_then(|parent_id| self.scopes.get(parent_id)); + } + } + Err(SymbolLookupError::NoDefinition) + } + + pub fn lookup_lv_symbol(&self, declared_name: &str) -> Result { + let mut current_scope: Option<&Scope> = Some(self.current_scope()); + while let Some(scope) = current_scope.take() { + if let Some(lv_symbol) = scope.find_lv_symbol(declared_name) { + return Ok(lv_symbol) + } else { + current_scope = scope + .parent() + .and_then(|parent_id| self.scopes.get(parent_id)); + } + } + Err(SymbolLookupError::NoDefinition) + } } impl Display for SymbolTable { diff --git a/src/name_analysis/symbol_table/scope.rs b/src/name_analysis/symbol_table/scope.rs index cc75a7a..4ed39e7 100644 --- a/src/name_analysis/symbol_table/scope.rs +++ b/src/name_analysis/symbol_table/scope.rs @@ -142,6 +142,14 @@ impl Scope { let key = symbol.declared_name_owned(); insert_symbol!(self.parameter_symbols, symbol, key) } + + pub fn insert_variable_symbol( + &mut self, + symbol: VariableSymbol, + ) -> Rc> { + let key = symbol.declared_name_owned(); + insert_symbol!(self.variable_symbols, symbol, key) + } pub fn find_module_level_symbol(&self, declared_name: &str) -> Option { self.module_symbols @@ -195,6 +203,10 @@ impl Scope { .map(|variable_symbol| LVSymbol::Variable(variable_symbol.clone())) }) } + + pub fn find_function_symbol(&self, declared_name: &str) -> Option<&Rc>> { + self.function_symbols.get(declared_name) + } pub fn debug_name(&self) -> &str { &self.debug_name diff --git a/src/parser/ast.yaml b/src/parser/ast.yaml index dfe20be..ec8feef 100644 --- a/src/parser/ast.yaml +++ b/src/parser/ast.yaml @@ -60,17 +60,17 @@ FullyQualifiedName: vec: rule: Identifier - file_id: - special: + special: kind: file_id - range: - special: + special: kind: range derive: - PartialEq - Eq - Hash IdentifierOrFqn: - tree_enum: + tree_enum: rules: - Identifier - FullyQualifiedName @@ -609,6 +609,9 @@ FunctionEqualsBody: struct: children: - expression + fields: + - scope_id: + kind: usize FunctionAliasBody: struct: children: @@ -616,6 +619,12 @@ FunctionAliasBody: skip: rule: Alias - identifier + fields: + - scope_id: + kind: usize + - resolved_function_symbol: + kind: FunctionSymbol + wrap: rc_ref_cell FunctionBlockBody: struct: children: @@ -625,6 +634,9 @@ FunctionBlockBody: - end_kw: skip: rule: End + fields: + - scope_id: + kind: usize # Class constructs ClassConstructor: @@ -681,6 +693,10 @@ VariableDeclaration: - expression: member: optional: true + fields: + - variable_symbol: + kind: VariableSymbol + wrap: rc_ref_cell AssignmentStatement: struct: children: @@ -786,10 +802,9 @@ VariableUse: struct: children: - identifier - derive: - - PartialEq - - Eq - - Hash + fields: + - lv_symbol: + kind: LVSymbol # Expressions Expression: