From 2967ceb2fc1d9c8a7e247192135a3a2643ff41e0 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Tue, 31 Dec 2024 09:48:29 -0600 Subject: [PATCH] Remove old lexer/parser code, in preparation for Pest usage. --- Cargo.toml | 4 - src/bin/compiler/main.rs | 22 ---- src/lexer/mod.rs | 254 --------------------------------------- src/lib.rs | 1 - src/parser/mod.rs | 15 +-- src/parser/types.rs | 11 -- 6 files changed, 1 insertion(+), 306 deletions(-) delete mode 100644 src/bin/compiler/main.rs delete mode 100644 src/lexer/mod.rs delete mode 100644 src/parser/types.rs diff --git a/Cargo.toml b/Cargo.toml index 9d8e516..42d0fab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,10 +3,6 @@ name = "deimos" version = "0.1.0" edition = "2021" -[[bin]] -name = "dmc" -path = "src/bin/compiler/main.rs" - [[bin]] name = "dm" path = "src/bin/dvm/main.rs" diff --git a/src/bin/compiler/main.rs b/src/bin/compiler/main.rs deleted file mode 100644 index 476d7dd..0000000 --- a/src/bin/compiler/main.rs +++ /dev/null @@ -1,22 +0,0 @@ -use deimos::lexer::tokenize; -use deimos::parser::parse; -use std::process::exit; - -fn main() { - let src = String::from("print 42"); - let tokenize_result = tokenize(&src); - if let Err(e) = tokenize_result { - eprintln!("{}", e); - exit(1); - } - let tokens = tokenize_result.unwrap(); - println!("{:?}", tokens); - let parse_result = parse(&tokens); - if let Err(e) = parse_result { - eprintln!("{}", e); - exit(1); - } - let compilation_unit = parse_result.unwrap(); - println!("{:?}", compilation_unit); - // TODO: compilation_unit to DmModule -} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs deleted file mode 100644 index fac180d..0000000 --- a/src/lexer/mod.rs +++ /dev/null @@ -1,254 +0,0 @@ -use std::iter::Peekable; -use std::str::Chars; - -#[derive(Debug, PartialEq, Eq)] -pub enum Token { - Namespace, - Identifier(String), - Public, - Module, - CurlyOpen, - CurlyClose, - Interface, - Colon, - Function, - ParenOpen, - ParenClose, - Enum, - LessThan, - GreaterThan, - Intersection, - Union, - And, - Or, - Equals, - BigArrow, - LittleArrow, - Plus, - Minus, - Dot, - Ellipsis, - Abstract, - NumberLiteral(String), -} - -pub fn tokenize(input: &String) -> Result, String> { - let mut tokens: Vec = Vec::new(); - let mut peekable = input.chars().peekable(); - while let Some(c) = peekable.next() { - match c { - ' ' | '\n' | '\r' | '\t' => { /* ignore */ } - '{' => tokens.push(Token::CurlyOpen), - '}' => tokens.push(Token::CurlyClose), - ':' => tokens.push(Token::Colon), - '(' => tokens.push(Token::ParenOpen), - ')' => tokens.push(Token::ParenClose), - '<' => tokens.push(Token::LessThan), - '>' => tokens.push(Token::GreaterThan), - '&' => match peekable.peek() { - Some('&') => { - let _ = peekable.next(); - tokens.push(Token::And); - } - Some(_) | None => tokens.push(Token::Intersection), - }, - '|' => match peekable.next_if_eq(&'|') { - Some(_) => tokens.push(Token::Or), - None => tokens.push(Token::Union), - }, - '=' => match peekable.next_if_eq(&'>') { - Some(_) => tokens.push(Token::BigArrow), - None => tokens.push(Token::Equals), - }, - '+' => tokens.push(Token::Plus), - '-' => match peekable.next_if_eq(&'>') { - Some(_) => tokens.push(Token::LittleArrow), - None => tokens.push(Token::Minus), - }, - '.' => { - let mut count = 1; - while let Some(_) = peekable.next_if_eq(&'.') { - count += 1; - } - match count { - 1 => tokens.push(Token::Dot), - 3 => tokens.push(Token::Ellipsis), - _ => return Err(String::from("Unexpected number of tokens after '.'")), - } - } - '0'..='9' => { - let mut buffer = String::new(); - buffer.push(c); - while let Some(num_char) = peekable.next_if(|c| { - c.is_digit(10) - || match c { - '_' | 'x' | 'L' | 'd' => true, - _ => false, - } - }) { - buffer.push(num_char); - } - tokens.push(Token::NumberLiteral(buffer)); - } - _ => { - if let Some(token) = match_identifier_or_keyword(c, &mut peekable) { - tokens.push(token); - } else { - return Err(String::from(format!("Unexpected token: {}", c))); - } - } - } - } - Ok(tokens) -} - -fn match_identifier_or_keyword(start_char: char, peekable: &mut Peekable) -> Option { - if !is_valid_identifier_start_char(start_char) { - return None; - } - - // append start char - let mut buffer = String::new(); - buffer.push(start_char); - - // munch while we have valid identifier chars - while let Some(c) = peekable.next_if(|next_char| is_valid_identifier_char(*next_char)) { - buffer.push(c); - } - - // match to a keyword if possible, else identifier - match buffer.as_str() { - "abs" => Some(Token::Abstract), - "enum" => Some(Token::Enum), - "fn" => Some(Token::Function), - "int" => Some(Token::Interface), - "mod" => Some(Token::Module), - "ns" => Some(Token::Namespace), - "pub" => Some(Token::Public), - _ => Some(Token::Identifier(buffer)), - } -} - -fn is_valid_identifier_start_char(c: char) -> bool { - match c { - 'a'..='z' | 'A'..='Z' | '_' => true, - _ => false, - } -} - -fn is_valid_identifier_char(c: char) -> bool { - match c { - 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => true, - _ => false, - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs::File; - use std::io::Read; - use std::path::Path; - - #[test] - fn simple_ns() { - let result = tokenize(&String::from("ns simple")).unwrap(); - assert_eq!(Token::Namespace, result[0]); - assert_eq!(Token::Identifier(String::from("simple")), result[1]); - } - - #[test] - #[cfg_attr(miri, ignore)] - fn simple_ns_file() { - let mut src_file = File::open(Path::new("test-data/lexer/simple_ns.dm")).unwrap(); - let mut src = String::new(); - let _ = src_file.read_to_string(&mut src); - let result = tokenize(&src).unwrap(); - assert_eq!(Token::Namespace, result[0]); - assert_eq!(Token::Identifier(String::from("simple")), result[1]); - } - - #[test] - fn pub_mod_simple() { - let result = tokenize(&String::from("pub mod simple")).unwrap(); - assert_eq!(Token::Public, result[0]); - assert_eq!(Token::Module, result[1]); - assert_eq!(Token::Identifier(String::from("simple")), result[2]); - } - - #[test] - fn curly_open_and_close() { - let result = tokenize(&String::from("{ }")).unwrap(); - assert_eq!(Token::CurlyOpen, result[0]); - assert_eq!(Token::CurlyClose, result[1]); - } - - #[test] - fn simple_int() { - let result = tokenize(&String::from("int simple")).unwrap(); - assert_eq!(Token::Interface, result[0]); - assert_eq!(Token::Identifier(String::from("simple")), result[1]); - } - - #[test] - fn ns_pub_mod_simple() { - let result = tokenize(&String::from("ns simple_ns\npub mod simple { }")).unwrap(); - assert_eq!(Token::Namespace, result[0]); - assert_eq!(Token::Identifier(String::from("simple_ns")), result[1]); - assert_eq!(Token::Public, result[2]); - assert_eq!(Token::Module, result[3]); - assert_eq!(Token::Identifier(String::from("simple")), result[4]); - assert_eq!(Token::CurlyOpen, result[5]); - assert_eq!(Token::CurlyClose, result[6]); - } - - #[test] - fn curly_open_and_close_no_space() { - let result = tokenize(&String::from("{}")).unwrap(); - assert_eq!(Token::CurlyOpen, result[0]); - assert_eq!(Token::CurlyClose, result[1]); - } - - #[test] - fn interface_function() { - let result = tokenize(&String::from("fn test(): Test")).unwrap(); - assert_eq!(Token::Function, result[0]); - assert_eq!(Token::Identifier(String::from("test")), result[1]); - assert_eq!(Token::ParenOpen, result[2]); - assert_eq!(Token::ParenClose, result[3]); - assert_eq!(Token::Colon, result[4]); - assert_eq!(Token::Identifier(String::from("Test")), result[5]); - } - - #[test] - fn interface_prop() { - let result = tokenize(&String::from("test: Test")).unwrap(); - assert_eq!(Token::Identifier(String::from("test")), result[0]); - assert_eq!(Token::Colon, result[1]); - assert_eq!(Token::Identifier(String::from("Test")), result[2]); - } - - #[test] - fn enum_decl() { - let result = tokenize(&String::from("enum Test {}")).unwrap(); - assert_eq!(Token::Enum, result[0]); - assert_eq!(Token::Identifier(String::from("Test")), result[1]); - assert_eq!(Token::CurlyOpen, result[2]); - assert_eq!(Token::CurlyClose, result[3]); - } - - #[test] - fn spread_operator() { - let result = tokenize(&String::from("{ ...props }")).unwrap(); - assert_eq!(Token::CurlyOpen, result[0]); - assert_eq!(Token::Ellipsis, result[1]); - assert_eq!(Token::Identifier(String::from("props")), result[2]); - assert_eq!(Token::CurlyClose, result[3]); - } - - #[test] - fn simple_number() { - let result = tokenize(&String::from("123456")).unwrap(); - assert_eq!(Token::NumberLiteral(String::from("123456")), result[0]); - } -} diff --git a/src/lib.rs b/src/lib.rs index b3fa46b..8d82b86 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -pub mod lexer; pub mod parser; mod util; pub mod vm; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 53609be..2669896 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,18 +1,5 @@ -mod types; - -use crate::lexer::Token; -use crate::parser::types::AstNode; -use pest::Parser; use pest_derive::Parser; #[derive(Parser)] #[grammar = "parser/deimos.pest"] -struct DeimosParser; - -pub fn parse(tokens: &Vec) -> Result { - let p = DeimosParser::parse(Rule::compilation_unit, "ns std::core") - .expect("unable to parse") - .next() - .unwrap(); - todo!() -} +pub struct DeimosParser; diff --git a/src/parser/types.rs b/src/parser/types.rs deleted file mode 100644 index a158e8d..0000000 --- a/src/parser/types.rs +++ /dev/null @@ -1,11 +0,0 @@ -use std::fmt::Debug; - -pub type NodeChildren = Vec>; - -#[derive(Debug)] -pub enum AstNode { - CompilationUnit(NodeChildren), - BlockStatement(NodeChildren), - Statement(NodeChildren), - Expression(NodeChildren), -}