From fcca1b7abaccf9ff1f2c9fe9b690a013399e0eb7 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Wed, 27 Aug 2025 12:35:43 -0500 Subject: [PATCH] More work on ast-generation. --- Cargo.lock | 72 ++++++++- ast-generator/Cargo.toml | 3 + ast-generator/src/lib.rs | 330 +++++++++++++++++++++++++++++++++++--- ast-generator/src/main.rs | 6 + build.rs | 15 +- src/parser/ast.yaml | 65 ++++++++ 6 files changed, 458 insertions(+), 33 deletions(-) create mode 100644 ast-generator/src/main.rs create mode 100644 src/parser/ast.yaml diff --git a/Cargo.lock b/Cargo.lock index 6f544a0..819984b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -51,13 +51,22 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + [[package]] name = "ast-generator" version = "0.1.0" dependencies = [ + "convert_case", "prettyplease", + "proc-macro2", "quote", "syn", + "yaml-rust2", ] [[package]] @@ -132,6 +141,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "convert_case" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cpufeatures" version = "0.2.16" @@ -174,6 +192,21 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "generic-array" version = "0.14.7" @@ -184,6 +217,24 @@ dependencies = [ "version_check", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown", +] + [[package]] name = "heck" version = "0.5.0" @@ -283,9 +334,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] @@ -394,6 +445,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.2.0" @@ -493,3 +550,14 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "yaml-rust2" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ce2a4ff45552406d02501cea6c18d8a7e50228e7736a872951fe2fe75c91be7" +dependencies = [ + "arraydeque", + "encoding_rs", + "hashlink", +] diff --git a/ast-generator/Cargo.toml b/ast-generator/Cargo.toml index 9897028..d4a88c2 100644 --- a/ast-generator/Cargo.toml +++ b/ast-generator/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" edition = "2024" [dependencies] +convert_case = "0.8.0" prettyplease = "0.2.37" +proc-macro2 = "1.0.101" quote = "1.0.40" syn = "2.0.106" +yaml-rust2 = "0.10.3" diff --git a/ast-generator/src/lib.rs b/ast-generator/src/lib.rs index 1626d60..e8bde7b 100644 --- a/ast-generator/src/lib.rs +++ b/ast-generator/src/lib.rs @@ -1,30 +1,318 @@ -use quote::quote; +use convert_case::{Case, Casing}; +use proc_macro2::{Ident, TokenStream}; +use quote::{format_ident, quote}; +use yaml_rust2::YamlLoader; -pub fn add(left: u64, right: u64) -> u64 { - left + right +enum BuildSpec { + Enum(EnumBuildSpec), + Struct(StructBuildSpec), } -pub fn speak(s: &str) { - println!("{}", s); +struct EnumBuildSpec { + name: String, + build: String, + rules: Vec, } -pub fn do_some_quoting(msg: &str) -> String { - let output = quote! { - pub fn some_quoting() { - println!(#msg); +struct EnumRule { + rule: String, + build: String, +} + +impl EnumRule { + fn from_rule_name(rule_name: &str) -> Self { + Self { + rule: rule_name.to_string(), + build: rule_name.to_string(), } - }; - let syntax_tree = syn::parse2(output).unwrap(); - prettyplease::unparse(&syntax_tree) -} + } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); + fn new(rule: &str, build: &str) -> Self { + Self { + rule: rule.to_string(), + build: build.to_string(), + } } } + +struct StructBuildSpec { + name: String, + build: String, + children: Vec, +} + +#[derive(Debug)] +struct ChildSpec { + name: String, + rule: String, + vec: bool, + skip: bool, + build: ChildToBuild, +} + +impl ChildSpec { + fn from_child_name(child_name: &str) -> Self { + Self { + name: child_name.to_string(), + rule: child_name.to_case(Case::Pascal), + skip: false, + vec: false, + build: ChildToBuild::TypeRef(child_name.to_case(Case::Pascal)), + } + } + + fn new(name: &str, rule: &str, vec: bool, skip: bool, build: ChildToBuild) -> Self { + Self { + name: name.to_string(), + rule: rule.to_string(), + vec, + skip, + build, + } + } +} + +#[derive(Debug)] +enum ChildToBuild { + TypeRef(String), + Boolean { on: BuildBooleanOn }, +} + +#[derive(Debug)] +enum BuildBooleanOn { + RulePresent, +} + +fn make_enum_type(build_spec: &EnumBuildSpec) -> TokenStream { + let children: Vec = build_spec + .rules + .iter() + .map(|rule| { + let member_name_ident = format_ident!("{}", rule.rule); + let child_name_ident = format_ident!("{}", rule.build); + quote! { + #member_name_ident(#child_name_ident) + } + }) + .collect(); + let type_name_ident = format_ident!("{}", build_spec.build); + quote! { + pub enum #type_name_ident { + #(#children),* + } + } +} + +fn make_child_type_ident(child_spec: &ChildSpec) -> Ident { + match &child_spec.build { + ChildToBuild::TypeRef(type_name) => format_ident!("{}", type_name), + ChildToBuild::Boolean { on: _ } => format_ident!("bool"), + } +} + +fn make_child_ident(child_spec: &ChildSpec) -> Ident { + format_ident!("{}", child_spec.name) +} + +fn make_struct_type(build_spec: &StructBuildSpec) -> TokenStream { + let mut member_names: Vec = vec![]; + let mut annotated_members: Vec = vec![]; + let mut accessors: Vec = vec![]; + + for child_spec in build_spec.children.iter() { + println!("{:?}", child_spec); + if !child_spec.skip { + let child_ident = make_child_ident(child_spec); + member_names.push(child_ident.clone()); + + let child_type_ident = make_child_type_ident(child_spec); + let type_annotation = if child_spec.vec { + quote! { Vec<#child_type_ident> } + } else { + quote! { #child_type_ident } + }; + annotated_members.push(quote! { + #child_ident: #type_annotation + }); + + accessors.push(quote! { + pub fn #child_ident(&self) -> &#type_annotation { + &self.#child_ident + } + }) + } + } + + let type_ident = format_ident!("{}", build_spec.build); + + quote! { + pub struct #type_ident { + #(#annotated_members),* + } + + impl #type_ident { + pub fn new(#(#annotated_members),*) -> Self { + Self { + #(#member_names),* + } + } + + #(#accessors)* + } + } +} + +fn make_child_holder(child_spec: &ChildSpec) -> TokenStream { + if child_spec.skip { + return quote! {}; + } + let child_ident = make_child_ident(child_spec); + let child_type_ident = make_child_type_ident(child_spec); + + if child_spec.vec { + quote! { let mut #child_ident: Vec<#child_type_ident> = vec![] } + } else { + match &child_spec.build { + ChildToBuild::TypeRef(_) => quote! { + let mut #child_ident: Option<#child_type_ident> = None + }, + ChildToBuild::Boolean { on } => match on { + BuildBooleanOn::RulePresent => { + quote! { let mut #child_ident: bool = false } + } + }, + } + } +} + +fn make_struct_build_fn(build_spec: &StructBuildSpec) -> TokenStream { + let child_holders = build_spec + .children + .iter() + .map(|child_spec| make_child_holder(child_spec)) + .collect::>(); + + let build_fn_identifier = format_ident!("build_{}", build_spec.name); + let return_type_ident = format_ident!("{}", build_spec.build); + quote! { + fn #build_fn_identifier() -> #return_type_ident { + #(#child_holders);* + } + } +} + +fn deserialize_yaml_file() -> Vec { + let docs = YamlLoader::load_from_str(include_str!("../../src/parser/ast.yaml")).unwrap(); + let doc = &docs[0]; + let mut build_specs: Vec = vec![]; + + for (build_spec_name, build_spec) in doc.as_hash().unwrap() { + let build_spec_name_pascal = build_spec_name.as_str().unwrap(); + let children = &build_spec["children"]; + + if children.is_array() { + let mut child_specs: Vec = vec![]; + for child_spec in children.as_vec().unwrap() { + if child_spec.is_hash() { + let as_hash = child_spec.as_hash().unwrap(); + let only_pair = as_hash.iter().next().unwrap(); + + let name = only_pair.0.as_str().unwrap(); + let props = only_pair.1; + + let rule = props["rule"].as_str().unwrap(); + + let skip = if !props["skip"].is_badvalue() { + props["skip"].as_bool().unwrap() + } else { + false + }; + + let vec = if !props["vec"].is_badvalue() { + props["vec"].as_bool().unwrap() + } else { + false + }; + + let build = &props["build"]; + let child_to_build = if build.is_hash() { + let r#type = build["type"].as_str().unwrap(); + let on = build["on"].as_str().unwrap(); + if r#type.eq("boolean") && on.eq("rule_present") { + ChildToBuild::Boolean { + on: BuildBooleanOn::RulePresent, + } + } else { + todo!("currently on boolean types are supported") + } + } else { + match build.as_str() { + Some(s) => ChildToBuild::TypeRef(s.to_string()), + None => ChildToBuild::TypeRef(rule.to_string()), + } + }; + + child_specs.push(ChildSpec::new(name, rule, vec, skip, child_to_build)) + } else { + child_specs.push(ChildSpec::from_child_name(child_spec.as_str().unwrap())); + } + } + + build_specs.push(BuildSpec::Struct(StructBuildSpec { + name: build_spec_name_pascal.to_string(), + build: build_spec_name_pascal.to_string(), + children: child_specs, + })); + } else { + let rule_specs = &build_spec["rules"]; + if rule_specs.is_array() { + let mut enum_rules: Vec = vec![]; + for rule_spec in rule_specs.as_vec().unwrap() { + if rule_spec.is_hash() { + let rule = rule_spec["rule"].as_str().unwrap(); + let build = rule_spec["build"].as_str().unwrap(); + enum_rules.push(EnumRule::new(rule, build)); + } else { + enum_rules.push(EnumRule::from_rule_name(rule_spec.as_str().unwrap())); + } + } + } else { + panic!("either children or rules must be present on the build spec"); + } + } + } + + build_specs +} + +pub fn test_dump() -> String { + let build_specs = deserialize_yaml_file(); + let mut streams: Vec = vec![]; + + for build_spec in &build_specs { + match build_spec { + BuildSpec::Enum(enum_spec) => { + streams.push(make_enum_type(enum_spec)); + } + BuildSpec::Struct(struct_spec) => { + streams.push(make_struct_type(struct_spec)); + } + } + } + + for build_spec in &build_specs { + match build_spec { + BuildSpec::Enum(_) => {} + BuildSpec::Struct(struct_spec) => { + streams.push(make_struct_type(struct_spec)); + } + } + } + + let combined = quote! { + #(#streams)* + }; + + let syntax_tree = syn::parse2(combined).unwrap(); + prettyplease::unparse(&syntax_tree) +} diff --git a/ast-generator/src/main.rs b/ast-generator/src/main.rs new file mode 100644 index 0000000..1ba4f81 --- /dev/null +++ b/ast-generator/src/main.rs @@ -0,0 +1,6 @@ +use ast_generator::test_dump; + +fn main() { + let s = test_dump(); + println!("{}", s); +} diff --git a/build.rs b/build.rs index f388c96..d321772 100644 --- a/build.rs +++ b/build.rs @@ -1,14 +1,9 @@ -use std::env; -use std::fs::write; -use std::path::Path; -use ast_generator::do_some_quoting; - fn main() -> std::io::Result<()> { println!("cargo:rerun-if-changed=src/parser/deimos.pest"); - let out_dir = env::var("OUT_DIR").unwrap(); - let out_dir_path = Path::new(&out_dir); - let testing_txt_path = out_dir_path.join("testing.rs"); - let output = do_some_quoting("Hello, World!"); - write(&testing_txt_path, output)?; + // let out_dir = env::var("OUT_DIR").unwrap(); + // let out_dir_path = Path::new(&out_dir); + // let testing_txt_path = out_dir_path.join("testing.rs"); + // let output = test_dump(); + // write(&testing_txt_path, output)?; Ok(()) } diff --git a/src/parser/ast.yaml b/src/parser/ast.yaml new file mode 100644 index 0000000..1443600 --- /dev/null +++ b/src/parser/ast.yaml @@ -0,0 +1,65 @@ +CompilationUnit: + children: + - namespace + - use_statements: + rule: UseStatement + vec: true + - module_level_declarations: + rule: ModuleLevelDeclaration +Namespace: + children: + - ns_kw: + rule: Ns + skip: true + - fqn: + rule: Fqn +UseStatement: + children: + - use_kw: + rule: Use + skip: true + - prefixes: + rule: UseStatementPrefix + vec: true + - suffix: + rule: UseStatementSuffix +ModuleLevelDeclaration: + rules: + - rule: Module + build: ModuleDeclaration + - rule: Interface + build: InterfaceDeclaration + - FunctionDefinition + - PlatformFunction +ModuleDeclaration: + children: + - is_public: + rule: Pub + build: + type: boolean + on: rule_present + - mod_kw: + rule: Mod + skip: true + - identifier: + rule: Identifier + - declarations: + rule: ModuleLevelDeclaration + vec: true +Class: + children: + - is_public: + rule: Pub + build: + type: boolean + on: rule_present + - class_kw: + rule: ClassKw + skip: true + - identifier + - generic_parameters + - class_constructor + - implements_list + - class_level_declarations: + rule: ClassLevelDeclaration + vec: true