Compare commits

..

2 Commits

Author SHA1 Message Date
Jesse Brault
fcca1b7aba More work on ast-generation. 2025-08-27 12:35:43 -05:00
Jesse Brault
206948efa1 Experimenting with build.rs, quote, and syn. 2025-08-26 15:39:36 -05:00
7 changed files with 512 additions and 7 deletions

102
Cargo.lock generated
View File

@ -51,6 +51,24 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "arraydeque"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236"
[[package]]
name = "ast-generator"
version = "0.1.0"
dependencies = [
"convert_case",
"prettyplease",
"proc-macro2",
"quote",
"syn",
"yaml-rust2",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@ -123,6 +141,15 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "convert_case"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "cpufeatures"
version = "0.2.16"
@ -146,6 +173,7 @@ dependencies = [
name = "deimos"
version = "0.1.0"
dependencies = [
"ast-generator",
"clap",
"codespan-reporting",
"indoc",
@ -164,6 +192,21 @@ dependencies = [
"crypto-common",
]
[[package]]
name = "encoding_rs"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "generic-array"
version = "0.14.7"
@ -174,6 +217,24 @@ dependencies = [
"version_check",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown",
]
[[package]]
name = "heck"
version = "0.5.0"
@ -262,19 +323,29 @@ dependencies = [
]
[[package]]
name = "proc-macro2"
version = "1.0.92"
name = "prettyplease"
version = "0.2.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.37"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
@ -318,9 +389,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.90"
version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [
"proc-macro2",
"quote",
@ -374,6 +445,12 @@ version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.2.0"
@ -473,3 +550,14 @@ name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "yaml-rust2"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ce2a4ff45552406d02501cea6c18d8a7e50228e7736a872951fe2fe75c91be7"
dependencies = [
"arraydeque",
"encoding_rs",
"hashlink",
]

View File

@ -18,3 +18,10 @@ pest_derive = { version = "2.8.0", features = ["grammar-extras"] }
codespan-reporting = "0.12.0"
log = "0.4.27"
indoc = "2.0.6"
[build-dependencies]
ast-generator = { path = "ast-generator" }
[workspace]
resolver = "3"
members = ["ast-generator"]

12
ast-generator/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "ast-generator"
version = "0.1.0"
edition = "2024"
[dependencies]
convert_case = "0.8.0"
prettyplease = "0.2.37"
proc-macro2 = "1.0.101"
quote = "1.0.40"
syn = "2.0.106"
yaml-rust2 = "0.10.3"

318
ast-generator/src/lib.rs Normal file
View File

@ -0,0 +1,318 @@
use convert_case::{Case, Casing};
use proc_macro2::{Ident, TokenStream};
use quote::{format_ident, quote};
use yaml_rust2::YamlLoader;
enum BuildSpec {
Enum(EnumBuildSpec),
Struct(StructBuildSpec),
}
struct EnumBuildSpec {
name: String,
build: String,
rules: Vec<EnumRule>,
}
struct EnumRule {
rule: String,
build: String,
}
impl EnumRule {
fn from_rule_name(rule_name: &str) -> Self {
Self {
rule: rule_name.to_string(),
build: rule_name.to_string(),
}
}
fn new(rule: &str, build: &str) -> Self {
Self {
rule: rule.to_string(),
build: build.to_string(),
}
}
}
struct StructBuildSpec {
name: String,
build: String,
children: Vec<ChildSpec>,
}
#[derive(Debug)]
struct ChildSpec {
name: String,
rule: String,
vec: bool,
skip: bool,
build: ChildToBuild,
}
impl ChildSpec {
fn from_child_name(child_name: &str) -> Self {
Self {
name: child_name.to_string(),
rule: child_name.to_case(Case::Pascal),
skip: false,
vec: false,
build: ChildToBuild::TypeRef(child_name.to_case(Case::Pascal)),
}
}
fn new(name: &str, rule: &str, vec: bool, skip: bool, build: ChildToBuild) -> Self {
Self {
name: name.to_string(),
rule: rule.to_string(),
vec,
skip,
build,
}
}
}
#[derive(Debug)]
enum ChildToBuild {
TypeRef(String),
Boolean { on: BuildBooleanOn },
}
#[derive(Debug)]
enum BuildBooleanOn {
RulePresent,
}
fn make_enum_type(build_spec: &EnumBuildSpec) -> TokenStream {
let children: Vec<TokenStream> = build_spec
.rules
.iter()
.map(|rule| {
let member_name_ident = format_ident!("{}", rule.rule);
let child_name_ident = format_ident!("{}", rule.build);
quote! {
#member_name_ident(#child_name_ident)
}
})
.collect();
let type_name_ident = format_ident!("{}", build_spec.build);
quote! {
pub enum #type_name_ident {
#(#children),*
}
}
}
fn make_child_type_ident(child_spec: &ChildSpec) -> Ident {
match &child_spec.build {
ChildToBuild::TypeRef(type_name) => format_ident!("{}", type_name),
ChildToBuild::Boolean { on: _ } => format_ident!("bool"),
}
}
fn make_child_ident(child_spec: &ChildSpec) -> Ident {
format_ident!("{}", child_spec.name)
}
fn make_struct_type(build_spec: &StructBuildSpec) -> TokenStream {
let mut member_names: Vec<Ident> = vec![];
let mut annotated_members: Vec<TokenStream> = vec![];
let mut accessors: Vec<TokenStream> = vec![];
for child_spec in build_spec.children.iter() {
println!("{:?}", child_spec);
if !child_spec.skip {
let child_ident = make_child_ident(child_spec);
member_names.push(child_ident.clone());
let child_type_ident = make_child_type_ident(child_spec);
let type_annotation = if child_spec.vec {
quote! { Vec<#child_type_ident> }
} else {
quote! { #child_type_ident }
};
annotated_members.push(quote! {
#child_ident: #type_annotation
});
accessors.push(quote! {
pub fn #child_ident(&self) -> &#type_annotation {
&self.#child_ident
}
})
}
}
let type_ident = format_ident!("{}", build_spec.build);
quote! {
pub struct #type_ident {
#(#annotated_members),*
}
impl #type_ident {
pub fn new(#(#annotated_members),*) -> Self {
Self {
#(#member_names),*
}
}
#(#accessors)*
}
}
}
fn make_child_holder(child_spec: &ChildSpec) -> TokenStream {
if child_spec.skip {
return quote! {};
}
let child_ident = make_child_ident(child_spec);
let child_type_ident = make_child_type_ident(child_spec);
if child_spec.vec {
quote! { let mut #child_ident: Vec<#child_type_ident> = vec![] }
} else {
match &child_spec.build {
ChildToBuild::TypeRef(_) => quote! {
let mut #child_ident: Option<#child_type_ident> = None
},
ChildToBuild::Boolean { on } => match on {
BuildBooleanOn::RulePresent => {
quote! { let mut #child_ident: bool = false }
}
},
}
}
}
fn make_struct_build_fn(build_spec: &StructBuildSpec) -> TokenStream {
let child_holders = build_spec
.children
.iter()
.map(|child_spec| make_child_holder(child_spec))
.collect::<Vec<_>>();
let build_fn_identifier = format_ident!("build_{}", build_spec.name);
let return_type_ident = format_ident!("{}", build_spec.build);
quote! {
fn #build_fn_identifier() -> #return_type_ident {
#(#child_holders);*
}
}
}
fn deserialize_yaml_file() -> Vec<BuildSpec> {
let docs = YamlLoader::load_from_str(include_str!("../../src/parser/ast.yaml")).unwrap();
let doc = &docs[0];
let mut build_specs: Vec<BuildSpec> = vec![];
for (build_spec_name, build_spec) in doc.as_hash().unwrap() {
let build_spec_name_pascal = build_spec_name.as_str().unwrap();
let children = &build_spec["children"];
if children.is_array() {
let mut child_specs: Vec<ChildSpec> = vec![];
for child_spec in children.as_vec().unwrap() {
if child_spec.is_hash() {
let as_hash = child_spec.as_hash().unwrap();
let only_pair = as_hash.iter().next().unwrap();
let name = only_pair.0.as_str().unwrap();
let props = only_pair.1;
let rule = props["rule"].as_str().unwrap();
let skip = if !props["skip"].is_badvalue() {
props["skip"].as_bool().unwrap()
} else {
false
};
let vec = if !props["vec"].is_badvalue() {
props["vec"].as_bool().unwrap()
} else {
false
};
let build = &props["build"];
let child_to_build = if build.is_hash() {
let r#type = build["type"].as_str().unwrap();
let on = build["on"].as_str().unwrap();
if r#type.eq("boolean") && on.eq("rule_present") {
ChildToBuild::Boolean {
on: BuildBooleanOn::RulePresent,
}
} else {
todo!("currently on boolean types are supported")
}
} else {
match build.as_str() {
Some(s) => ChildToBuild::TypeRef(s.to_string()),
None => ChildToBuild::TypeRef(rule.to_string()),
}
};
child_specs.push(ChildSpec::new(name, rule, vec, skip, child_to_build))
} else {
child_specs.push(ChildSpec::from_child_name(child_spec.as_str().unwrap()));
}
}
build_specs.push(BuildSpec::Struct(StructBuildSpec {
name: build_spec_name_pascal.to_string(),
build: build_spec_name_pascal.to_string(),
children: child_specs,
}));
} else {
let rule_specs = &build_spec["rules"];
if rule_specs.is_array() {
let mut enum_rules: Vec<EnumRule> = vec![];
for rule_spec in rule_specs.as_vec().unwrap() {
if rule_spec.is_hash() {
let rule = rule_spec["rule"].as_str().unwrap();
let build = rule_spec["build"].as_str().unwrap();
enum_rules.push(EnumRule::new(rule, build));
} else {
enum_rules.push(EnumRule::from_rule_name(rule_spec.as_str().unwrap()));
}
}
} else {
panic!("either children or rules must be present on the build spec");
}
}
}
build_specs
}
pub fn test_dump() -> String {
let build_specs = deserialize_yaml_file();
let mut streams: Vec<TokenStream> = vec![];
for build_spec in &build_specs {
match build_spec {
BuildSpec::Enum(enum_spec) => {
streams.push(make_enum_type(enum_spec));
}
BuildSpec::Struct(struct_spec) => {
streams.push(make_struct_type(struct_spec));
}
}
}
for build_spec in &build_specs {
match build_spec {
BuildSpec::Enum(_) => {}
BuildSpec::Struct(struct_spec) => {
streams.push(make_struct_type(struct_spec));
}
}
}
let combined = quote! {
#(#streams)*
};
let syntax_tree = syn::parse2(combined).unwrap();
prettyplease::unparse(&syntax_tree)
}

View File

@ -0,0 +1,6 @@
use ast_generator::test_dump;
fn main() {
let s = test_dump();
println!("{}", s);
}

9
build.rs Normal file
View File

@ -0,0 +1,9 @@
fn main() -> std::io::Result<()> {
println!("cargo:rerun-if-changed=src/parser/deimos.pest");
// let out_dir = env::var("OUT_DIR").unwrap();
// let out_dir_path = Path::new(&out_dir);
// let testing_txt_path = out_dir_path.join("testing.rs");
// let output = test_dump();
// write(&testing_txt_path, output)?;
Ok(())
}

65
src/parser/ast.yaml Normal file
View File

@ -0,0 +1,65 @@
CompilationUnit:
children:
- namespace
- use_statements:
rule: UseStatement
vec: true
- module_level_declarations:
rule: ModuleLevelDeclaration
Namespace:
children:
- ns_kw:
rule: Ns
skip: true
- fqn:
rule: Fqn
UseStatement:
children:
- use_kw:
rule: Use
skip: true
- prefixes:
rule: UseStatementPrefix
vec: true
- suffix:
rule: UseStatementSuffix
ModuleLevelDeclaration:
rules:
- rule: Module
build: ModuleDeclaration
- rule: Interface
build: InterfaceDeclaration
- FunctionDefinition
- PlatformFunction
ModuleDeclaration:
children:
- is_public:
rule: Pub
build:
type: boolean
on: rule_present
- mod_kw:
rule: Mod
skip: true
- identifier:
rule: Identifier
- declarations:
rule: ModuleLevelDeclaration
vec: true
Class:
children:
- is_public:
rule: Pub
build:
type: boolean
on: rule_present
- class_kw:
rule: ClassKw
skip: true
- identifier
- generic_parameters
- class_constructor
- implements_list
- class_level_declarations:
rule: ClassLevelDeclaration
vec: true