From 2aee2cdd4e37ffb9cab4cb7ded6850da69639faf Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Mon, 15 Sep 2025 11:44:16 -0500 Subject: [PATCH] Refactor of ast yaml and schema. --- src/parser/ast.schema.yaml | 288 +++++++++++++++++++------------------ src/parser/ast.yaml | 258 ++++++++++++--------------------- src/parser/deimos.pest | 20 +-- 3 files changed, 242 insertions(+), 324 deletions(-) diff --git a/src/parser/ast.schema.yaml b/src/parser/ast.schema.yaml index 2211b78..11311fc 100644 --- a/src/parser/ast.schema.yaml +++ b/src/parser/ast.schema.yaml @@ -5,23 +5,43 @@ description: Top level is a map of node names in Pascal case (e.g., CompilationU additionalProperties: $ref: "#/$defs/NodeDefinition" $defs: + # Top level NodeDefinition: type: object additionalProperties: false - description: A definition of a node type. + description: | + A definition of a node type. The main key in this hash determines the type of node: `children` for Struct node, + `members` for Leaf-Struct node, `rules` for Enum node, `leaf_rules` for Leaf-Enum node, and `produce` for a + translate rule. + + A Struct node has child nodes and other properties, and is built by looping through all the inner pairs of the + given Parser Pair. + + A Leaf-Struct node does not have child nodes, but does have members. The members are built by some kind of parsing + of the string value of the the given Parser Pair (i.e., no looping through inner pairs). + + An Enum node maps Parser Rules to an enum of node types. Each enum member may have a child, or not. By default, a + Rule name maps to the node type-name of a single child. + + A Leaf-Enum node is like a regular enum node, but no children are allowed. Rather, the Parser Rule maps to a bare + enum member. + + A translate rule simply translates the Parser rule into some kind of arbitrary type, such as a string, int, etc. oneOf: - $ref: "#/$defs/StructNodeDefinition" - $ref: "#/$defs/LeafStructNodeDefinition" - $ref: "#/$defs/EnumNodeDefinition" - $ref: "#/$defs/LeafEnumNodeDefinition" + - $ref: "#/$defs/ProductionDefinition" + + # Four main types of nodes StructNodeDefinition: type: object additionalProperties: false description: A description of a Struct node to be built. properties: - type: - const: struct children: + type: array description: Ordered child fields for this node. items: $ref: "#/$defs/StructChildDefinition" @@ -32,22 +52,18 @@ $defs: additionalProperties: false description: A description of a Leaf-Struct node to be built. properties: - type: - const: leaf_struct - children: - description: Ordered child fields for this node. + members: + type: array + description: Ordered members for this node. items: - $ref: "#/$defs/LeafStructChildDefinition" + $ref: "#/$defs/LeafStructMemberDefinition" required: - - type - - children + - members EnumNodeDefinition: type: object additionalProperties: false description: A description of an Enum node to be built. properties: - type: - const: enum rules: type: array description: Alternative parse rules that build this node. @@ -60,39 +76,40 @@ $defs: additionalProperties: false description: A description of a leaf-enum node to be built. properties: - type: - const: leaf_enum - rules: + leaf_rules: type: array description: Alternative parse rules that build this node. items: - $ref: "#/$defs/LeafEnumChildDefinition" + type: string required: - - type - - rules + - leaf_rules + + # Struct node children StructChildDefinition: - description: A definition of a node's child. Either a bare child name (string) in snake case, or an object. + description: | + A definition of a Struct node's child. Either a bare child name (string) in snake case, or an object. The former + is a shorthand where the child name and built type are the same; casing is automatically done. The latter allows + further customization of the built child. oneOf: - type: string - description: Shorthand where child name, var, build, and with are inferred from the given snake-case child name. - - $ref: "#/$defs/ChildDefinitionWrapper" - ChildDefinitionWrapper: + - $ref: "#/$defs/StructChildDefinitionWrapper" + StructChildDefinitionWrapper: type: object - description: Single-key object mapping the child-name to its spec. + description: Single-key object mapping the child-name to its advanced definition. minProperties: 1 maxProperties: 1 additionalProperties: false patternProperties: "^[a-z][a-z0-9_]*$": - $ref: "#/$defs/ChildDefinition" - ChildDefinition: + $ref: "#/$defs/StructChildAdvancedDefinition" + StructChildAdvancedDefinition: type: object description: One of skip/vec/single child specs. oneOf: - - $ref: "#/$defs/SkipChildDefinition" - - $ref: "#/$defs/VecChildDefinition" - - $ref: "#/$defs/SingleChildDefinition" - SkipChildDefinition: + - $ref: "#/$defs/StructChildSkipChildDefinition" + - $ref: "#/$defs/StructChildVecChildDefinition" + - $ref: "#/$defs/StructChildMemberDefinition" + StructChildSkipChildDefinition: type: object additionalProperties: false description: A definition for a child rule that does nothing, i.e., is skipped. @@ -100,49 +117,130 @@ $defs: rule: type: string skip: - type: boolean # note: must be true + type: boolean + const: true required: - rule - skip - VecChildDefinition: + StructChildVecChildDefinition: type: object additionalProperties: false description: A definition for a child rule that can be matched multiple times. properties: rule: type: string + kind: + type: string + enum: + - string vec: type: boolean + const: true required: - rule - vec - SingleChildDefinition: + StructChildMemberDefinition: type: object additionalProperties: false - description: A definition for a child rule that builds one item. + description: | + A definition for a child rule that builds one member. If a bare string, it is assumed to be the name/build-type + for a node. An object allows different types (i.e., things additional to nodes) to be built. properties: rule: type: string - description: The type to build, in Pascal case. + description: The rule to match. optional: type: boolean description: If true, this child will be stored as an Option. build: oneOf: - type: string - - $ref: "#/$defs/SingleChildBuildDefinition" - SingleChildBuildDefinition: + - $ref: "#/$defs/StructChildMemberBuildDefinition" + StructChildMemberBuildDefinition: type: object additionalProperties: false description: A definition of what exactly to build for a given child rule. oneOf: - - $ref: "#/$defs/BuildSingleTypeChild" - - $ref: "#/$defs/BuildBooleanChild" - - $ref: "#/$defs/BuildStringChild" - - $ref: "#/$defs/BuildDoubleChild" - - $ref: "#/$defs/BuildIntChild" - - $ref: "#/$defs/BuildLongChild" - BuildSingleTypeChild: + - $ref: "#/$defs/BuildNode" + - $ref: "#/$defs/BuildBoolean" + + # Leaf Struct children + LeafStructMemberDefinition: + type: object + description: Single-key object mapping the member-name to what is to be built by parsing the Parser Pair. + minProperties: 1 + maxProperties: 1 + additionalProperties: false + patternProperties: + "^[a-z][a-z0-9_]*$": + oneOf: + - $ref: "#/$defs/BuildMember" + BuildMember: + type: object + description: A specification for a member to build. + additionalProperties: false + properties: + kind: + enum: + - string + from: + enum: + - whole_pair + required: + - kind + - from + + # Enum children + EnumChildDefinition: + oneOf: + - type: string + description: Shorthand where child name, var, build, and with are inferred from the given Pascal-case rule name. + - $ref: "#/$defs/LongEnumChildDefinition" + LongEnumChildDefinition: + type: object + additionalProperties: false + description: A format for an advanced enum child. + properties: + child: + type: boolean + kind: + enum: + - int + - long + - double + - usize + - string + - boolean + from: + enum: + - translate + required: + - kind + - from + + # Production definition + ProductionDefinition: + type: object + properties: + produce: + type: object + properties: + kind: + enum: + - int + - long + - double + - string + - boolean + from: + enum: + - translate_and_parse + - string_inner + - whole_pair + - parse_whole_pair + + # Common things to build + BuildNode: type: object additionalProperties: false description: A definition of a single-type child to build. @@ -153,109 +251,15 @@ $defs: or_else_default: type: boolean description: Whether to call the default method on the built-type if the rule is not found. - BuildBooleanChild: + BuildBoolean: type: object additionalProperties: false - description: A definition for building a boolean child. + description: A boolean member to be built. properties: - type: + kind: type: string - enum: - - boolean + const: boolean on: type: string enum: - - rule_present - from: - type: string - enum: - - parse_whole_pair - BuildStringChild: - type: object - additionalProperties: false - description: A definition for building a string child. - properties: - type: - const: string - from: - type: string - enum: - - whole_pair - BuildDoubleChild: - type: object - additionalProperties: false - description: A definition for building a Double child. - properties: - type: - const: f64 - from: - type: string - enum: - - parse_whole_pair - BuildIntChild: - type: object - additionalProperties: false - description: A definition for building an Int child. - properties: - type: - const: i32 - from: - type: string - enum: - - parse_number_base - BuildLongChild: - type: object - additionalProperties: false - description: A definition for building a Long child. - properties: - type: - const: i64 - from: - type: string - enum: - - parse_number_base - LeafStructChildDefinition: - # TODO - EnumChildDefinition: - description: A definition of an enum node's child. Either a bare rule (string) in Pascal case, or an object. - oneOf: - - type: string - description: Shorthand where child name, var, build, and with are inferred from the given Pascal-case rule name. - - $ref: "#/$defs/LongEnumChildDefinition" - LongEnumChildDefinition: - type: object - additionalProperties: false - description: A format for specifying more specific information for an enum child. - properties: - rule: - type: string - build: - type: string - required: - - rule - - build - LeafEnumChildDefinition: - description: A definition of a leaf-enum node's child. Either a bare rule-string in Pascal case, or an object. - oneOf: - - type: string - description: Shorthand where the rule name maps onto an empty enum rule. - - $ref: "#/$defs/LongLeafEnumChildDefinitionWrapper" - LongLeafEnumChildDefinitionWrapper: - type: object - description: Single-key object mapping the child-name to its spec. - minProperties: 1 - maxProperties: 1 - additionalProperties: false - patternProperties: - "^([A-Z][a-z0-9]*)*$": - $ref: "#/$defs/LongLeafEnumChildDefinition" - LongLeafEnumChildDefinition: - type: object - additionalProperties: false - description: A format for specifying more specific information about a leaf-enum child. - properties: - child: - type: boolean - description: If true, a node of the same name is built as the lone member of the enum child. - required: - - child \ No newline at end of file + - rule_present \ No newline at end of file diff --git a/src/parser/ast.yaml b/src/parser/ast.yaml index cb2801d..7707261 100644 --- a/src/parser/ast.yaml +++ b/src/parser/ast.yaml @@ -1,8 +1,7 @@ # $schema: ./ast.schema.yaml # Operators Operator: - type: leaf_enum - rules: + leaf_rules: - Or - And - EqualTo @@ -29,11 +28,10 @@ Operator: # Names Identifier: - type: leaf_struct - children: + members: - name: - build: - type: string + kind: string + from: whole_pair FullyQualifiedName: children: - identifiers: @@ -64,8 +62,7 @@ TypeUse: - TupleTypeUse - FunctionTypeUse PrimitiveType: - type: leaf_enum - rules: + leaf_rules: - Byte - Short - Char @@ -174,13 +171,11 @@ UseStatementPrefix: children: - identifier UseStatementSuffix: - type: leaf_enum rules: - - Identifier: - child: true - - Star - - UseList: - child: true + - Identifier + - Star: + child: false + - UseList UseList: children: - identifiers: @@ -219,7 +214,7 @@ Module: - is_public: rule: Pub build: - type: boolean + kind: boolean on: rule_present - mod_kw: rule: Mod @@ -251,7 +246,7 @@ Interface: - is_public: rule: Pub build: - type: boolean + kind: boolean on: rule_present - int_kw: rule: IntKw @@ -274,7 +269,7 @@ Class: - is_public: rule: Pub build: - type: boolean + kind: boolean on: rule_present - class_kw: rule: ClassKw @@ -299,7 +294,7 @@ Function: - is_public: rule: Pub build: - type: boolean + kind: boolean on: rule_present - fn_kw: rule: Fn @@ -317,7 +312,7 @@ OperatorFunction: - is_public: rule: Pub build: - type: boolean + kind: boolean on: rule_present - op_kw: rule: Op @@ -337,7 +332,7 @@ PlatformFunction: - is_public: rule: Pub build: - type: boolean + kind: boolean on: rule_present - platform_kw: rule: Platform @@ -415,14 +410,10 @@ InterfaceDefaultOperatorFunction: # Function Bodies FunctionBody: - type: leaf_enum rules: - - FunctionAliasBody: - child: true - - FunctionEqualsBody: - child: true - - FunctionBlockBody: - child: true + - FunctionAliasBody + - FunctionEqualsBody + - FunctionBlockBody FunctionEqualsBody: children: - expression @@ -452,34 +443,26 @@ Member: - is_public: rule: Pub build: - type: boolean + kind: boolean on: rule_present - is_mut: rule: Mut build: - type: boolean + kind: boolean on: rule_present - identifier - type_use # Statements Statement: - type: leaf_enum rules: - - VariableDeclaration: - child: true - - AssignmentStatement: - child: true - - ExpressionStatement: - child: true - - UseStatement: - child: true - - IfStatement: - child: true - - WhileStatement: - child: true - - ForStatement: - child: true + - VariableDeclaration + - AssignmentStatement + - ExpressionStatement + - UseStatement + - IfStatement + - WhileStatement + - ForStatement VariableDeclaration: children: - let_kw: @@ -488,7 +471,7 @@ VariableDeclaration: - is_mut: rule: Mut build: - type: boolean + kind: boolean on: rule_present - identifier - type_use: @@ -625,8 +608,7 @@ ComparisonExpression: rule: Expression optional: true ComparisonOperator: - type: leaf_enum - rules: + leaf_rules: - Greater - Less - GreaterEqual @@ -643,8 +625,7 @@ ShiftExpression: - right: rule: Expression ShiftOperator: - type: leaf_enum - rules: + leaf_rules: - LeftShift - RightShift AdditiveExpression: @@ -658,8 +639,7 @@ AdditiveExpression: rule: Expression optional: true AdditiveOperator: - type: leaf_enum - rules: + leaf_rules: - Add - Subtract MultiplicativeExpression: @@ -672,8 +652,7 @@ MultiplicativeExpression: - right: rule: Expression MultiplicativeOperator: - type: leaf_enum - rules: + leaf_rules: - Multiply - Divide - Modulo @@ -683,8 +662,7 @@ PrefixExpression: rule: PrefixOperator vec: true PrefixOperator: - type: leaf_enum - rules: + leaf_rules: - Spread - Not - Negative @@ -696,16 +674,14 @@ SuffixExpression: rule: SuffixOperator vec: true SuffixOperator: - type: leaf_enum rules: - - PlusPlus - - MinusMinus - - ObjectProperty: - child: true - - ObjectIndex: - child: true - - Call: - child: true + - PlusPlus: + child: false + - MinusMinus: + child: false + - ObjectProperty + - ObjectIndex + - Call ObjectProperty: children: - identifier @@ -713,23 +689,17 @@ ObjectIndex: children: - expression PrimaryExpression: - type: leaf_enum rules: - - Literal: - child: true - - FullyQualifiedName: - child: true - - Closure: - child: true - - ParenthesizedExpression: - child: true + - Literal + - FullyQualifiedName + - Closure + - ParenthesizedExpression ParenthesizedExpression: children: - expression # Calls Call: - type: leaf_enum rules: - ParenthesesCall - NonParenthesesCall @@ -780,101 +750,57 @@ ClosureParameter: # Literals Literal: - type: leaf_enum rules: - - NumberLiteral: - child: true - - StringLiteral: - child: true - - BooleanLiteral: - child: true -NumberLiteral: - type: leaf_enum - rules: - - DoubleLiteral: - child: true - - LongLiteral: - child: true - IntLiteral: - child: true -IntLiteral: - children: - - number_base -LongLiteral: - children: - - number_base -DoubleLiteral: - type: leaf_struct - children: - - literal: - build: - type: f64 -NumberBase: - type: leaf_enum - rules: - - BinaryBase: - child: true - - HexadecimalBase: - child: true - - DecimalBase: - child: true -DecimalBase: - type: leaf_struct - children: - - literal: - build: - type: string -BinaryBase: - children: - - binary_digits -BinaryDigits: - type: leaf_struct - children: - - literal: - build: - type: string -HexadecimalBase: - children: - - hexadecimal_digits -HexadecimalDigits: - type: leaf_struct - children: - - literal: - build: - type: string -StringLiteral: - type: leaf_enum - rules: + kind: int + from: translate + - LongLiteral: + kind: long + from: translate + - DoubleLiteral: + kind: double + from: translate - SingleQuoteString: - child: true - - DoubleQuoteString: - child: true - - BacktickString: - child: true + kind: string + from: translate + - DString + - BacktickString + - BooleanLiteral: + kind: boolean + from: translate + +# Numbers +IntLiteral: + produce: + kind: int + from: translate_and_parse +LongLiteral: + produce: + kind: long + from: translate_and_parse +DoubleLiteral: + produce: + kind: double + from: translate_and_parse + +# Strings SingleQuoteString: - children: - - string_inner: - optional: true -DoubleQuoteString: + produce: + kind: string + from: string_inner +DString: children: - inners: rule: DStringInner + kind: string vec: true - expressions: rule: DStringExpression vec: true -StringInner: - type: leaf_struct - children: - - literal: - build: - type: string DStringInner: - type: leaf_struct - children: - - literal: - build: - type: string + produce: + kind: string + from: whole_pair DStringExpression: children: - expression @@ -882,20 +808,16 @@ BacktickString: children: - inners: rule: BacktickInner + kind: string vec: true - expressions: rule: DStringExpression vec: true BacktickInner: - type: leaf_struct - children: - - literal: - build: - type: string + produce: + kind: string + from: string_inner BooleanLiteral: - type: leaf_struct - children: - - literal: - build: - type: boolean - from: parse_whole_pair \ No newline at end of file + produce: + kind: boolean + from: parse_whole_pair \ No newline at end of file diff --git a/src/parser/deimos.pest b/src/parser/deimos.pest index 974c41c..5256085 100644 --- a/src/parser/deimos.pest +++ b/src/parser/deimos.pest @@ -785,22 +785,20 @@ ClosureParameter = { // Literals Literal = { - NumberLiteral - | StringLiteral - | BooleanLiteral -} - -NumberLiteral = { DoubleLiteral | LongLiteral | IntLiteral + | SingleQuoteString + | DString + | BacktickString + | BooleanLiteral } IntLiteral = { NumberBase } LongLiteral = ${ NumberBase ~ "L" } -DoubleLiteral = @{ DecimalBase ~ "." ~ DecimalBase} +DoubleLiteral = @{ DecimalBase ~ "." ~ DecimalBase } NumberBase = { BinaryBase @@ -822,15 +820,9 @@ HexadecimalDigits = @{ HexadecimalDigit+ } HexadecimalDigit = { '0'..'9' | 'a'..'f' } -StringLiteral = { - SingleQuoteString - | DoubleQuoteString - | BacktickString -} - SingleQuoteString = { "'" ~ StringInner? ~ "'" } -DoubleQuoteString = { +DString = { "\"" ~ ( DStringInner? ~ DStringExpression )* ~ DStringInner?