More work on ast-generation.

This commit is contained in:
Jesse Brault 2025-08-27 12:35:43 -05:00
parent 206948efa1
commit fcca1b7aba
6 changed files with 458 additions and 33 deletions

72
Cargo.lock generated
View File

@ -51,13 +51,22 @@ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]]
name = "arraydeque"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236"
[[package]] [[package]]
name = "ast-generator" name = "ast-generator"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"convert_case",
"prettyplease", "prettyplease",
"proc-macro2",
"quote", "quote",
"syn", "syn",
"yaml-rust2",
] ]
[[package]] [[package]]
@ -132,6 +141,15 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "convert_case"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f"
dependencies = [
"unicode-segmentation",
]
[[package]] [[package]]
name = "cpufeatures" name = "cpufeatures"
version = "0.2.16" version = "0.2.16"
@ -174,6 +192,21 @@ dependencies = [
"crypto-common", "crypto-common",
] ]
[[package]]
name = "encoding_rs"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]] [[package]]
name = "generic-array" name = "generic-array"
version = "0.14.7" version = "0.14.7"
@ -184,6 +217,24 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown",
]
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.5.0" version = "0.5.0"
@ -283,9 +334,9 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.92" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
@ -394,6 +445,12 @@ version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.2.0" version = "0.2.0"
@ -493,3 +550,14 @@ name = "windows_x86_64_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "yaml-rust2"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ce2a4ff45552406d02501cea6c18d8a7e50228e7736a872951fe2fe75c91be7"
dependencies = [
"arraydeque",
"encoding_rs",
"hashlink",
]

View File

@ -4,6 +4,9 @@ version = "0.1.0"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
convert_case = "0.8.0"
prettyplease = "0.2.37" prettyplease = "0.2.37"
proc-macro2 = "1.0.101"
quote = "1.0.40" quote = "1.0.40"
syn = "2.0.106" syn = "2.0.106"
yaml-rust2 = "0.10.3"

View File

@ -1,30 +1,318 @@
use quote::quote; use convert_case::{Case, Casing};
use proc_macro2::{Ident, TokenStream};
use quote::{format_ident, quote};
use yaml_rust2::YamlLoader;
pub fn add(left: u64, right: u64) -> u64 { enum BuildSpec {
left + right Enum(EnumBuildSpec),
Struct(StructBuildSpec),
} }
pub fn speak(s: &str) { struct EnumBuildSpec {
println!("{}", s); name: String,
build: String,
rules: Vec<EnumRule>,
} }
pub fn do_some_quoting(msg: &str) -> String { struct EnumRule {
let output = quote! { rule: String,
pub fn some_quoting() { build: String,
println!(#msg); }
impl EnumRule {
fn from_rule_name(rule_name: &str) -> Self {
Self {
rule: rule_name.to_string(),
build: rule_name.to_string(),
} }
}; }
let syntax_tree = syn::parse2(output).unwrap();
prettyplease::unparse(&syntax_tree)
}
#[cfg(test)] fn new(rule: &str, build: &str) -> Self {
mod tests { Self {
use super::*; rule: rule.to_string(),
build: build.to_string(),
#[test] }
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
} }
} }
struct StructBuildSpec {
name: String,
build: String,
children: Vec<ChildSpec>,
}
#[derive(Debug)]
struct ChildSpec {
name: String,
rule: String,
vec: bool,
skip: bool,
build: ChildToBuild,
}
impl ChildSpec {
fn from_child_name(child_name: &str) -> Self {
Self {
name: child_name.to_string(),
rule: child_name.to_case(Case::Pascal),
skip: false,
vec: false,
build: ChildToBuild::TypeRef(child_name.to_case(Case::Pascal)),
}
}
fn new(name: &str, rule: &str, vec: bool, skip: bool, build: ChildToBuild) -> Self {
Self {
name: name.to_string(),
rule: rule.to_string(),
vec,
skip,
build,
}
}
}
#[derive(Debug)]
enum ChildToBuild {
TypeRef(String),
Boolean { on: BuildBooleanOn },
}
#[derive(Debug)]
enum BuildBooleanOn {
RulePresent,
}
fn make_enum_type(build_spec: &EnumBuildSpec) -> TokenStream {
let children: Vec<TokenStream> = build_spec
.rules
.iter()
.map(|rule| {
let member_name_ident = format_ident!("{}", rule.rule);
let child_name_ident = format_ident!("{}", rule.build);
quote! {
#member_name_ident(#child_name_ident)
}
})
.collect();
let type_name_ident = format_ident!("{}", build_spec.build);
quote! {
pub enum #type_name_ident {
#(#children),*
}
}
}
fn make_child_type_ident(child_spec: &ChildSpec) -> Ident {
match &child_spec.build {
ChildToBuild::TypeRef(type_name) => format_ident!("{}", type_name),
ChildToBuild::Boolean { on: _ } => format_ident!("bool"),
}
}
fn make_child_ident(child_spec: &ChildSpec) -> Ident {
format_ident!("{}", child_spec.name)
}
fn make_struct_type(build_spec: &StructBuildSpec) -> TokenStream {
let mut member_names: Vec<Ident> = vec![];
let mut annotated_members: Vec<TokenStream> = vec![];
let mut accessors: Vec<TokenStream> = vec![];
for child_spec in build_spec.children.iter() {
println!("{:?}", child_spec);
if !child_spec.skip {
let child_ident = make_child_ident(child_spec);
member_names.push(child_ident.clone());
let child_type_ident = make_child_type_ident(child_spec);
let type_annotation = if child_spec.vec {
quote! { Vec<#child_type_ident> }
} else {
quote! { #child_type_ident }
};
annotated_members.push(quote! {
#child_ident: #type_annotation
});
accessors.push(quote! {
pub fn #child_ident(&self) -> &#type_annotation {
&self.#child_ident
}
})
}
}
let type_ident = format_ident!("{}", build_spec.build);
quote! {
pub struct #type_ident {
#(#annotated_members),*
}
impl #type_ident {
pub fn new(#(#annotated_members),*) -> Self {
Self {
#(#member_names),*
}
}
#(#accessors)*
}
}
}
fn make_child_holder(child_spec: &ChildSpec) -> TokenStream {
if child_spec.skip {
return quote! {};
}
let child_ident = make_child_ident(child_spec);
let child_type_ident = make_child_type_ident(child_spec);
if child_spec.vec {
quote! { let mut #child_ident: Vec<#child_type_ident> = vec![] }
} else {
match &child_spec.build {
ChildToBuild::TypeRef(_) => quote! {
let mut #child_ident: Option<#child_type_ident> = None
},
ChildToBuild::Boolean { on } => match on {
BuildBooleanOn::RulePresent => {
quote! { let mut #child_ident: bool = false }
}
},
}
}
}
fn make_struct_build_fn(build_spec: &StructBuildSpec) -> TokenStream {
let child_holders = build_spec
.children
.iter()
.map(|child_spec| make_child_holder(child_spec))
.collect::<Vec<_>>();
let build_fn_identifier = format_ident!("build_{}", build_spec.name);
let return_type_ident = format_ident!("{}", build_spec.build);
quote! {
fn #build_fn_identifier() -> #return_type_ident {
#(#child_holders);*
}
}
}
fn deserialize_yaml_file() -> Vec<BuildSpec> {
let docs = YamlLoader::load_from_str(include_str!("../../src/parser/ast.yaml")).unwrap();
let doc = &docs[0];
let mut build_specs: Vec<BuildSpec> = vec![];
for (build_spec_name, build_spec) in doc.as_hash().unwrap() {
let build_spec_name_pascal = build_spec_name.as_str().unwrap();
let children = &build_spec["children"];
if children.is_array() {
let mut child_specs: Vec<ChildSpec> = vec![];
for child_spec in children.as_vec().unwrap() {
if child_spec.is_hash() {
let as_hash = child_spec.as_hash().unwrap();
let only_pair = as_hash.iter().next().unwrap();
let name = only_pair.0.as_str().unwrap();
let props = only_pair.1;
let rule = props["rule"].as_str().unwrap();
let skip = if !props["skip"].is_badvalue() {
props["skip"].as_bool().unwrap()
} else {
false
};
let vec = if !props["vec"].is_badvalue() {
props["vec"].as_bool().unwrap()
} else {
false
};
let build = &props["build"];
let child_to_build = if build.is_hash() {
let r#type = build["type"].as_str().unwrap();
let on = build["on"].as_str().unwrap();
if r#type.eq("boolean") && on.eq("rule_present") {
ChildToBuild::Boolean {
on: BuildBooleanOn::RulePresent,
}
} else {
todo!("currently on boolean types are supported")
}
} else {
match build.as_str() {
Some(s) => ChildToBuild::TypeRef(s.to_string()),
None => ChildToBuild::TypeRef(rule.to_string()),
}
};
child_specs.push(ChildSpec::new(name, rule, vec, skip, child_to_build))
} else {
child_specs.push(ChildSpec::from_child_name(child_spec.as_str().unwrap()));
}
}
build_specs.push(BuildSpec::Struct(StructBuildSpec {
name: build_spec_name_pascal.to_string(),
build: build_spec_name_pascal.to_string(),
children: child_specs,
}));
} else {
let rule_specs = &build_spec["rules"];
if rule_specs.is_array() {
let mut enum_rules: Vec<EnumRule> = vec![];
for rule_spec in rule_specs.as_vec().unwrap() {
if rule_spec.is_hash() {
let rule = rule_spec["rule"].as_str().unwrap();
let build = rule_spec["build"].as_str().unwrap();
enum_rules.push(EnumRule::new(rule, build));
} else {
enum_rules.push(EnumRule::from_rule_name(rule_spec.as_str().unwrap()));
}
}
} else {
panic!("either children or rules must be present on the build spec");
}
}
}
build_specs
}
pub fn test_dump() -> String {
let build_specs = deserialize_yaml_file();
let mut streams: Vec<TokenStream> = vec![];
for build_spec in &build_specs {
match build_spec {
BuildSpec::Enum(enum_spec) => {
streams.push(make_enum_type(enum_spec));
}
BuildSpec::Struct(struct_spec) => {
streams.push(make_struct_type(struct_spec));
}
}
}
for build_spec in &build_specs {
match build_spec {
BuildSpec::Enum(_) => {}
BuildSpec::Struct(struct_spec) => {
streams.push(make_struct_type(struct_spec));
}
}
}
let combined = quote! {
#(#streams)*
};
let syntax_tree = syn::parse2(combined).unwrap();
prettyplease::unparse(&syntax_tree)
}

View File

@ -0,0 +1,6 @@
use ast_generator::test_dump;
fn main() {
let s = test_dump();
println!("{}", s);
}

View File

@ -1,14 +1,9 @@
use std::env;
use std::fs::write;
use std::path::Path;
use ast_generator::do_some_quoting;
fn main() -> std::io::Result<()> { fn main() -> std::io::Result<()> {
println!("cargo:rerun-if-changed=src/parser/deimos.pest"); println!("cargo:rerun-if-changed=src/parser/deimos.pest");
let out_dir = env::var("OUT_DIR").unwrap(); // let out_dir = env::var("OUT_DIR").unwrap();
let out_dir_path = Path::new(&out_dir); // let out_dir_path = Path::new(&out_dir);
let testing_txt_path = out_dir_path.join("testing.rs"); // let testing_txt_path = out_dir_path.join("testing.rs");
let output = do_some_quoting("Hello, World!"); // let output = test_dump();
write(&testing_txt_path, output)?; // write(&testing_txt_path, output)?;
Ok(()) Ok(())
} }

65
src/parser/ast.yaml Normal file
View File

@ -0,0 +1,65 @@
CompilationUnit:
children:
- namespace
- use_statements:
rule: UseStatement
vec: true
- module_level_declarations:
rule: ModuleLevelDeclaration
Namespace:
children:
- ns_kw:
rule: Ns
skip: true
- fqn:
rule: Fqn
UseStatement:
children:
- use_kw:
rule: Use
skip: true
- prefixes:
rule: UseStatementPrefix
vec: true
- suffix:
rule: UseStatementSuffix
ModuleLevelDeclaration:
rules:
- rule: Module
build: ModuleDeclaration
- rule: Interface
build: InterfaceDeclaration
- FunctionDefinition
- PlatformFunction
ModuleDeclaration:
children:
- is_public:
rule: Pub
build:
type: boolean
on: rule_present
- mod_kw:
rule: Mod
skip: true
- identifier:
rule: Identifier
- declarations:
rule: ModuleLevelDeclaration
vec: true
Class:
children:
- is_public:
rule: Pub
build:
type: boolean
on: rule_present
- class_kw:
rule: ClassKw
skip: true
- identifier
- generic_parameters
- class_constructor
- implements_list
- class_level_declarations:
rule: ClassLevelDeclaration
vec: true