deimos-lang/src/parser/deimos.pest
2025-09-08 11:06:10 -05:00

907 lines
12 KiB
Plaintext

// Keywords
Ns = { "ns" }
TypeKw = { "type" }
Mod = { "mod" }
IntKw = { "int" }
ClassKw = { "class" }
Platform = { "platform" }
Pub = { "pub" }
Fld = { "fld" }
Impl = { "impl" }
Mut = { "mut" }
Cons = { "cons" }
Static = { "static" }
Ref = { "ref" }
Def = { "def" }
Where = { "where" }
Infer = { "infer" }
Delegate = { "delegate" }
Let = { "let" }
Fn = { "fn" }
Op = { "op" }
Return = { "return" }
If = { "if" }
Else = { "else" }
While = { "while" }
For = { "for" }
In = { "in" }
Move = { "move" }
Alias = { "alias" }
True = { "true" }
False = { "false" }
Use = { "use" }
Then = { "then" }
Do = { "do" }
End = { "end" }
Companion = { "comp" }
// Keywords: primitive types
Byte = { "Byte" }
Short = { "Short" }
Char = { "Char" }
Int = { "Int" }
Long = { "Long" }
Double = { "Double" }
Bool = { "Bool" }
String = { "String" }
Array = { "Array" }
Any = { "Any" }
Void = { "Void" }
// Keywords as a rule (for preventing identifiers with keywords, etc.)
Keyword = {
Ns
| TypeKw
| Mod
| IntKw
| ClassKw
| Platform
| Pub
| Fld
| Impl
| Mut
| Cons
| Static
| Ref
| Def
| Where
| Infer
| Delegate
| Let
| Fn
| Op
| Return
| If
| Else
| While
| For
| In
| Move
| Alias
| True
| False
| Use
| Then
| Do
| End
| Companion
| Byte
| Short
| Char
| Int
| Long
| Double
| Bool
| String
| Array
| Any
| Void
}
// Symbols
Ellipsis = { "..." }
Underscore = { "_" }
Semicolon = { ";" }
// Operators
Or = { "||" }
And = { "&&" }
EqualTo = { "==" }
NotEqualTo = { "!=" }
Greater = { ">" }
Less = { "<" }
GreaterEqual = { ">=" }
LessEqual = { "<=" }
Add = { "+" }
Subtract = { "-" }
Multiply = { "*" }
Divide = { "/" }
Modulo = { "%" }
Not = { "!" }
Negative = { "-" }
PlusPlus = { "++" }
MinusMinus = { "--" }
CallOp = { "()" }
Spread = { Ellipsis }
Borrow = { "&" }
Star = { "*" }
LeftShift = { "<<" }
RightShift = { ">>" }
Index = { "[]" }
BorrowMut = { Borrow ~ Mut }
Operator = {
Or
| And
| EqualTo
| NotEqualTo
| Greater
| Less
| GreaterEqual
| LessEqual
| Add
| Subtract
| Multiply
| Divide
| Modulo
| LeftShift
| RightShift
// unary prefix
| Spread
| BorrowMut
| Borrow
| Star
| Not
| Negative
// unary suffix
| PlusPlus
| MinusMinus
| CallOp
| Index
}
// Names
Identifier = @{
( Keyword ~ IdentifierChar | !Keyword ~ IdentifierStartChar )
~ IdentifierChar*
}
IdentifierStartChar = {
'a'..'z'
| 'A'..'Z'
| "_"
}
IdentifierChar = {
'a'..'z'
| 'A'..'Z'
| '0'..'9'
| "_"
}
FullyQualifiedName = {
Identifier
~ ( "::" ~ Identifier )*
}
// Common lists
TypeUseList = {
TypeUse
~ ( "," ~ TypeUse )*
}
IdentifierList = {
Identifier
~ ( "," ~ Identifier )*
}
ParenthesesTypeUseList = {
"("
~ TypeUseList
~ ")"
}
ParenthesesOptionalTypeUseList = {
"("
~ TypeUseList?
~ ")"
}
// In general:
// Arguments = usage
// Parameters = declaration
TypeUse = {
PrimitiveType
| InterfaceOrClassTypeUse
| TupleTypeUse
| FunctionTypeUse
}
PrimitiveType = {
Byte
| Short
| Char
| Int
| Long
| Double
| Bool
| String
| Array ~ GenericArguments?
| Any
| Void
}
InterfaceOrClassTypeUse = {
Borrow*
~ Mut?
~ FullyQualifiedName
~ GenericArguments?
}
TupleTypeUse = {
Borrow*
~ Mut?
~ TupleArguments
}
FunctionTypeUse = {
Borrow*
~ FunctionTypeModifier?
~ Fn
~ GenericParameters?
~ Parameters
~ ReturnType
}
// Generic Arguments
GenericArguments = {
"<"
~ TypeUseList
~ ">"
}
// Generic Parameters
GenericParameters = {
"<"
~ IdentifierList
~ ">"
}
// Tuple Arguments
TupleArguments = {
ParenthesesOptionalTypeUseList
}
// Implements list
ImplementsList = {
":"
~ TypeUse
~ ( "+" ~ TypeUse )*
}
// Function type modifier
FunctionTypeModifier = {
Cons
| Mut ~ Ref
| Mut
| Ref
}
// Parameters
Parameters = {
"("
~ (
Parameter
~ ( "," ~ Parameter )*
)?
~ ")"
}
Parameter = {
Identifier
~ ":"
~ TypeUse
}
// Return type
ReturnType = {
"->"
~ TypeUse
~ RefList?
}
RefList = {
Ref
~ Identifier
~ ( "," ~ Identifier )
}
// Top-level constructs
CompilationUnit = {
SOI
~ ParentMod?
~ ( UseStatement | ModuleLevelDeclaration )*
~ EOI
}
ParentMod = {
Mod
~ FullyQualifiedName
}
UseStatement = {
Use
~ UseStatementPrefix*
~ UseStatementSuffix
}
UseStatementPrefix = {
Identifier
~ "::"
}
UseStatementSuffix = {
Identifier
| Star
| UseList
}
UseList = {
"{"
~ Identifier
~ ( "," ~ Identifier )*
~ "}"
}
// Organizational declarations
ModuleLevelDeclaration = {
Module
| Interface
| Class
| Function
| PlatformFunction
}
InterfaceLevelDeclaration = {
CompanionModule
| Interface
| Class
| InterfaceFunction
| InterfaceDefaultFunction
| InterfaceOperatorFunction
| InterfaceDefaultOperatorFunction
}
ClassLevelDeclaration = {
CompanionModule
| Interface
| Class
| Function
| OperatorFunction
| PlatformFunction
}
// Main organizational constructs
Module = {
Pub?
~ Mod
~ Identifier
~ ModuleLevelDeclaration*
~ End
}
CompanionModule = {
Companion
~ Mod
~ ModuleLevelDeclaration*
~ End
}
Interface = {
Pub?
~ IntKw
~ Identifier
~ GenericParameters?
~ ImplementsList?
~ InterfaceLevelDeclaration*
~ End
}
Class = {
Pub?
~ ClassKw
~ Identifier
~ GenericParameters?
~ ClassConstructor?
~ ImplementsList?
~ ClassLevelDeclaration*
~ End
}
// Function constructs
Function = {
Pub?
~ Fn
~ GenericParameters?
~ Identifier
~ Parameters
~ ReturnType?
~ FunctionBody
}
OperatorFunction = {
Pub?
~ Op
~ GenericParameters?
~ Operator
~ Parameters
~ ReturnType?
~ FunctionBody
}
PlatformFunction = {
Pub?
~ Platform
~ Fn
~ GenericParameters?
~ Identifier
~ Parameters
~ ReturnType
}
InterfaceFunction = {
Fn
~ GenericParameters?
~ Identifier
~ Parameters
~ ReturnType
}
InterfaceDefaultFunction = {
Def
~ Fn
~ GenericParameters?
~ Identifier
~ Parameters
~ ReturnType?
~ FunctionBody
}
InterfaceOperatorFunction = {
Op
~ GenericParameters?
~ Operator
~ Parameters
~ ReturnType
}
InterfaceDefaultOperatorFunction = {
Def
~ Op
~ GenericParameters?
~ Operator
~ Parameters
~ ReturnType?
~ FunctionBody
}
// Function Components
FunctionBody = {
FunctionAliasBody
| FunctionEqualsBody
| FunctionBlockBody
}
FunctionEqualsBody = {
"="
~ Expression
}
FunctionAliasBody = {
Alias
~ Identifier
}
FunctionBlockBody = {
Statement*
~ End
}
// Class constructs
ClassConstructor = {
"("
~ Member
~ ( "," ~ Member )*
~ ")"
}
Member = {
Pub?
~ Mut?
~ Identifier
~ ":"
~ TypeUse
}
// Statements
Statement = {
VariableDeclaration
| AssignmentStatement
| ExpressionStatement
| UseStatement
| IfStatement
| WhileStatement
| ForStatement
}
VariableDeclaration = {
Let
~ Mut?
~ Identifier
~ ( ":" ~ TypeUse )?
~ ( "=" ~ Expression )?
}
AssignmentStatement = {
Expression
~ "="
~ Expression
}
ExpressionStatement = {
Expression
}
IfStatement = {
IfClause
~ IfElseIf*
~ IfElse?
~ End
}
IfClause = {
If
~ Expression
~ Then
~ Statement*
}
IfElseIf = {
Else
~ IfClause
}
IfElse = {
Else
~ Statement*
}
WhileStatement = {
While
~ Expression
~ Do
~ Statement*
~ End
}
ForStatement = {
For
~ Identifier
~ In
~ Expression
~ Do
~ Statement*
~ End
}
// Expressions
Expression = {
TernaryExpression
}
TernaryExpression = {
OrExpression
~ ( TernaryAlternatives )?
}
TernaryAlternatives = {
TernaryTrueAlternative
~ TernaryFalseAlternative
}
TernaryTrueAlternative = {
"?"
~ Expression
}
TernaryFalseAlternative = {
":"
~ Expression
}
OrExpression = {
AndExpression
~ ( Or ~ Expression )?
}
AndExpression = {
ComparisonExpression
~ ( And ~ Expression )?
}
ComparisonExpression = {
ShiftExpression
~ (
ComparisonOperator
~ Expression
)?
}
ComparisonOperator = {
Greater
| Less
| GreaterEqual
| LessEqual
| EqualTo
| NotEqualTo
}
ShiftExpression = {
AdditiveExpression
~ (
ShiftOperator
~ Expression
)?
}
ShiftOperator = {
LeftShift
| RightShift
}
AdditiveExpression = {
MultiplicativeExpression
~ (
AdditiveOperator
~ Expression
)?
}
AdditiveOperator = {
Add
| Subtract
}
MultiplicativeExpression = {
PrefixExpression
~ (
MultiplicativeOperator
~ Expression
)?
}
MultiplicativeOperator = {
Multiply
| Divide
| Modulo
}
PrefixExpression = {
PrefixOperator*
~ SuffixExpression
}
PrefixOperator = {
Spread
| Not
| Negative
}
SuffixExpression = {
PrimaryExpression
~ SuffixOperator*
}
SuffixOperator = {
PlusPlus
| MinusMinus
| ObjectProperty
| ObjectIndex
| Call
}
ObjectProperty = {
"."
~ Identifier
}
ObjectIndex = {
"["
~ Expression
~ "]"
}
PrimaryExpression = {
Literal
| FullyQualifiedName
| Closure
| ParenthesizedExpression
}
ParenthesizedExpression = {
"("
~ Expression
~ ")"
}
// Calls
Call = {
ParenthesesCall
| NonParenthesesCall
}
ParenthesesCall = {
TurboFish?
~ "("
~ ExpressionList?
~ ")"
~ Closure?
}
NonParenthesesCall = {
TurboFish?
~ (
Closure
| ExpressionList
| ExpressionList ~ Closure
)
}
TurboFish = {
"::"
~ GenericArguments
}
ExpressionList = {
Expression
~ ( "," ~ Expression )*
}
// Closure
Closure = {
"{"
~ ( ClosureParameters? ~ "->" )?
~ Statement*
~ "}"
}
ClosureParameters = {
ClosureParameter
~ ( "," ~ ClosureParameter )*
}
ClosureParameter = {
Identifier
~ ( ":" ~ TypeUse )?
}
// Literals
Literal = {
NumberLiteral
| StringLiteral
| BooleanLiteral
}
NumberLiteral = {
DoubleLiteral
| LongLiteral
| IntLiteral
}
IntLiteral = { NumberBase }
LongLiteral = ${ NumberBase ~ "L" }
DoubleLiteral = ${ DoubleWhole ~ "." ~ DoubleFractional }
DoubleWhole = { DecimalBase }
DoubleFractional = { DecimalBase }
NumberBase = {
BinaryBase
| HexadecimalBase
| DecimalBase
}
DecimalBase = @{ '0'..'9'+ }
BinaryBase = { "0b" ~ BinaryDigits }
BinaryDigits = @{ BinaryDigit+ }
BinaryDigit = { "0" | "1" }
HexadecimalBase = { "0x" ~ HexadecimalDigits }
HexadecimalDigits = @{ HexadecimalDigit+ }
HexadecimalDigit = { '0'..'9' | 'a'..'f' }
StringLiteral = {
SingleQuoteString
| DoubleQuoteString
| BacktickString
}
SingleQuoteString = { "'" ~ StringInner? ~ "'" }
DoubleQuoteString = {
"\""
~ ( DStringInner? ~ DStringExpression )*
~ DStringInner?
~ "\""
}
StringInner = @{ StringChar+ }
StringChar = {
!( "\'" | "\\" ) ~ ANY
| "\\" ~ ( "'" | "\\" | "/" | "b" | "f" | "n" | "r" | "t" )
| "\\" ~ ( "u" ~ ASCII_HEX_DIGIT{4} )
}
DStringInner = @{ DStringChar+ }
DStringChar = {
!( "\"" | "\\" | "${" ) ~ ANY
| "\\" ~ ( "\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t" | "$" )
| "\\" ~ "u" ~ ASCII_HEX_DIGIT{4}
}
DStringExpression = {
"${"
~ Expression
~ "}"
}
BacktickString = {
"`"
~ ( BacktickInner? ~ DStringExpression )*
~ BacktickInner?
~ "`"
}
BacktickInner = @{ BacktickStringChar+ }
BacktickStringChar = {
!( "`" | "\\" | "${" ) ~ ANY
| "\\" ~ ( "`" | "\\" | "/" | "b" | "f" | "n" | "r" | "t" | "$" )
| "\\" ~ "u" ~ ASCII_HEX_DIGIT{4}
}
BooleanLiteral = { True | False }
WHITESPACE = _{ " " | "\t" | "\n" | "\r" }