Remove old lexer/parser code, in preparation for Pest usage.

This commit is contained in:
Jesse Brault 2024-12-31 09:48:29 -06:00
parent fc9cfcdf7c
commit 2967ceb2fc
6 changed files with 1 additions and 306 deletions

View File

@ -3,10 +3,6 @@ name = "deimos"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[[bin]]
name = "dmc"
path = "src/bin/compiler/main.rs"
[[bin]] [[bin]]
name = "dm" name = "dm"
path = "src/bin/dvm/main.rs" path = "src/bin/dvm/main.rs"

View File

@ -1,22 +0,0 @@
use deimos::lexer::tokenize;
use deimos::parser::parse;
use std::process::exit;
fn main() {
let src = String::from("print 42");
let tokenize_result = tokenize(&src);
if let Err(e) = tokenize_result {
eprintln!("{}", e);
exit(1);
}
let tokens = tokenize_result.unwrap();
println!("{:?}", tokens);
let parse_result = parse(&tokens);
if let Err(e) = parse_result {
eprintln!("{}", e);
exit(1);
}
let compilation_unit = parse_result.unwrap();
println!("{:?}", compilation_unit);
// TODO: compilation_unit to DmModule
}

View File

@ -1,254 +0,0 @@
use std::iter::Peekable;
use std::str::Chars;
#[derive(Debug, PartialEq, Eq)]
pub enum Token {
Namespace,
Identifier(String),
Public,
Module,
CurlyOpen,
CurlyClose,
Interface,
Colon,
Function,
ParenOpen,
ParenClose,
Enum,
LessThan,
GreaterThan,
Intersection,
Union,
And,
Or,
Equals,
BigArrow,
LittleArrow,
Plus,
Minus,
Dot,
Ellipsis,
Abstract,
NumberLiteral(String),
}
pub fn tokenize(input: &String) -> Result<Vec<Token>, String> {
let mut tokens: Vec<Token> = Vec::new();
let mut peekable = input.chars().peekable();
while let Some(c) = peekable.next() {
match c {
' ' | '\n' | '\r' | '\t' => { /* ignore */ }
'{' => tokens.push(Token::CurlyOpen),
'}' => tokens.push(Token::CurlyClose),
':' => tokens.push(Token::Colon),
'(' => tokens.push(Token::ParenOpen),
')' => tokens.push(Token::ParenClose),
'<' => tokens.push(Token::LessThan),
'>' => tokens.push(Token::GreaterThan),
'&' => match peekable.peek() {
Some('&') => {
let _ = peekable.next();
tokens.push(Token::And);
}
Some(_) | None => tokens.push(Token::Intersection),
},
'|' => match peekable.next_if_eq(&'|') {
Some(_) => tokens.push(Token::Or),
None => tokens.push(Token::Union),
},
'=' => match peekable.next_if_eq(&'>') {
Some(_) => tokens.push(Token::BigArrow),
None => tokens.push(Token::Equals),
},
'+' => tokens.push(Token::Plus),
'-' => match peekable.next_if_eq(&'>') {
Some(_) => tokens.push(Token::LittleArrow),
None => tokens.push(Token::Minus),
},
'.' => {
let mut count = 1;
while let Some(_) = peekable.next_if_eq(&'.') {
count += 1;
}
match count {
1 => tokens.push(Token::Dot),
3 => tokens.push(Token::Ellipsis),
_ => return Err(String::from("Unexpected number of tokens after '.'")),
}
}
'0'..='9' => {
let mut buffer = String::new();
buffer.push(c);
while let Some(num_char) = peekable.next_if(|c| {
c.is_digit(10)
|| match c {
'_' | 'x' | 'L' | 'd' => true,
_ => false,
}
}) {
buffer.push(num_char);
}
tokens.push(Token::NumberLiteral(buffer));
}
_ => {
if let Some(token) = match_identifier_or_keyword(c, &mut peekable) {
tokens.push(token);
} else {
return Err(String::from(format!("Unexpected token: {}", c)));
}
}
}
}
Ok(tokens)
}
fn match_identifier_or_keyword(start_char: char, peekable: &mut Peekable<Chars>) -> Option<Token> {
if !is_valid_identifier_start_char(start_char) {
return None;
}
// append start char
let mut buffer = String::new();
buffer.push(start_char);
// munch while we have valid identifier chars
while let Some(c) = peekable.next_if(|next_char| is_valid_identifier_char(*next_char)) {
buffer.push(c);
}
// match to a keyword if possible, else identifier
match buffer.as_str() {
"abs" => Some(Token::Abstract),
"enum" => Some(Token::Enum),
"fn" => Some(Token::Function),
"int" => Some(Token::Interface),
"mod" => Some(Token::Module),
"ns" => Some(Token::Namespace),
"pub" => Some(Token::Public),
_ => Some(Token::Identifier(buffer)),
}
}
fn is_valid_identifier_start_char(c: char) -> bool {
match c {
'a'..='z' | 'A'..='Z' | '_' => true,
_ => false,
}
}
fn is_valid_identifier_char(c: char) -> bool {
match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => true,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Read;
use std::path::Path;
#[test]
fn simple_ns() {
let result = tokenize(&String::from("ns simple")).unwrap();
assert_eq!(Token::Namespace, result[0]);
assert_eq!(Token::Identifier(String::from("simple")), result[1]);
}
#[test]
#[cfg_attr(miri, ignore)]
fn simple_ns_file() {
let mut src_file = File::open(Path::new("test-data/lexer/simple_ns.dm")).unwrap();
let mut src = String::new();
let _ = src_file.read_to_string(&mut src);
let result = tokenize(&src).unwrap();
assert_eq!(Token::Namespace, result[0]);
assert_eq!(Token::Identifier(String::from("simple")), result[1]);
}
#[test]
fn pub_mod_simple() {
let result = tokenize(&String::from("pub mod simple")).unwrap();
assert_eq!(Token::Public, result[0]);
assert_eq!(Token::Module, result[1]);
assert_eq!(Token::Identifier(String::from("simple")), result[2]);
}
#[test]
fn curly_open_and_close() {
let result = tokenize(&String::from("{ }")).unwrap();
assert_eq!(Token::CurlyOpen, result[0]);
assert_eq!(Token::CurlyClose, result[1]);
}
#[test]
fn simple_int() {
let result = tokenize(&String::from("int simple")).unwrap();
assert_eq!(Token::Interface, result[0]);
assert_eq!(Token::Identifier(String::from("simple")), result[1]);
}
#[test]
fn ns_pub_mod_simple() {
let result = tokenize(&String::from("ns simple_ns\npub mod simple { }")).unwrap();
assert_eq!(Token::Namespace, result[0]);
assert_eq!(Token::Identifier(String::from("simple_ns")), result[1]);
assert_eq!(Token::Public, result[2]);
assert_eq!(Token::Module, result[3]);
assert_eq!(Token::Identifier(String::from("simple")), result[4]);
assert_eq!(Token::CurlyOpen, result[5]);
assert_eq!(Token::CurlyClose, result[6]);
}
#[test]
fn curly_open_and_close_no_space() {
let result = tokenize(&String::from("{}")).unwrap();
assert_eq!(Token::CurlyOpen, result[0]);
assert_eq!(Token::CurlyClose, result[1]);
}
#[test]
fn interface_function() {
let result = tokenize(&String::from("fn test(): Test")).unwrap();
assert_eq!(Token::Function, result[0]);
assert_eq!(Token::Identifier(String::from("test")), result[1]);
assert_eq!(Token::ParenOpen, result[2]);
assert_eq!(Token::ParenClose, result[3]);
assert_eq!(Token::Colon, result[4]);
assert_eq!(Token::Identifier(String::from("Test")), result[5]);
}
#[test]
fn interface_prop() {
let result = tokenize(&String::from("test: Test")).unwrap();
assert_eq!(Token::Identifier(String::from("test")), result[0]);
assert_eq!(Token::Colon, result[1]);
assert_eq!(Token::Identifier(String::from("Test")), result[2]);
}
#[test]
fn enum_decl() {
let result = tokenize(&String::from("enum Test {}")).unwrap();
assert_eq!(Token::Enum, result[0]);
assert_eq!(Token::Identifier(String::from("Test")), result[1]);
assert_eq!(Token::CurlyOpen, result[2]);
assert_eq!(Token::CurlyClose, result[3]);
}
#[test]
fn spread_operator() {
let result = tokenize(&String::from("{ ...props }")).unwrap();
assert_eq!(Token::CurlyOpen, result[0]);
assert_eq!(Token::Ellipsis, result[1]);
assert_eq!(Token::Identifier(String::from("props")), result[2]);
assert_eq!(Token::CurlyClose, result[3]);
}
#[test]
fn simple_number() {
let result = tokenize(&String::from("123456")).unwrap();
assert_eq!(Token::NumberLiteral(String::from("123456")), result[0]);
}
}

View File

@ -1,4 +1,3 @@
pub mod lexer;
pub mod parser; pub mod parser;
mod util; mod util;
pub mod vm; pub mod vm;

View File

@ -1,18 +1,5 @@
mod types;
use crate::lexer::Token;
use crate::parser::types::AstNode;
use pest::Parser;
use pest_derive::Parser; use pest_derive::Parser;
#[derive(Parser)] #[derive(Parser)]
#[grammar = "parser/deimos.pest"] #[grammar = "parser/deimos.pest"]
struct DeimosParser; pub struct DeimosParser;
pub fn parse(tokens: &Vec<Token>) -> Result<AstNode, String> {
let p = DeimosParser::parse(Rule::compilation_unit, "ns std::core")
.expect("unable to parse")
.next()
.unwrap();
todo!()
}

View File

@ -1,11 +0,0 @@
use std::fmt::Debug;
pub type NodeChildren = Vec<Box<AstNode>>;
#[derive(Debug)]
pub enum AstNode {
CompilationUnit(NodeChildren),
BlockStatement(NodeChildren),
Statement(NodeChildren),
Expression(NodeChildren),
}