From e8019366ee869021af43828a17994a18be69fbaf Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Tue, 26 Nov 2024 23:34:30 -0600 Subject: [PATCH] Start implementation of vm. --- sketching/goal_one.dm | 67 +++++++++++++++++++++ sketching/hkt.dm | 2 +- src/lexer/mod.rs | 45 ++++++-------- src/lib.rs | 1 + src/vm/mod.rs | 136 ++++++++++++++++++++++++++++++++++++++++++ src/vm/op_codes.rs | 72 ++++++++++++++++++++++ 6 files changed, 295 insertions(+), 28 deletions(-) create mode 100644 sketching/goal_one.dm create mode 100644 src/vm/mod.rs create mode 100644 src/vm/op_codes.rs diff --git a/sketching/goal_one.dm b/sketching/goal_one.dm new file mode 100644 index 0000000..cf1079e --- /dev/null +++ b/sketching/goal_one.dm @@ -0,0 +1,67 @@ +enum Option { + Some(item: T), + None +} + +fn make_one() = Some(1) + +fn main() { + if (make_one() is Some(num)) { + println(num) + } +} + +// Bytecode? + +// Start with some magic number +// Then a header. Each string starts with a length + +header { + module_name: default + symbol_table_addr: 25 // example + symbol_table_length: 104 // example + version: 0.1.0 +} + +symbols { + # type addr len name + 0 fn 0 4 Some + 1 fn 30 8 make_one + 2 fn 39 4 main +} + +layout Option { + [0]: byte // tag + - 0: Some + - 1: None + [1-8]: *Any // ptr to Any +} + +fn Some(r0: *Any) -> *Option + alloc r1, 5 // allocate 5 bytes and put address into r1 + mov [r1], 0 // move 0 into the (heap) location pointed to by r1 + mov [r1 + 1], r0 // move the value of r0 into the location pointed to by r1 + 1 + mov r0, r1 // move the value of r1 (the address of the struct) into r0 + ret // return r0 +len: 30 + +fn make_one: + mov r0, 1 + call Some +len: 9 + +fn main: + call make_one // op(1) + len(4) + name(8) = 13 + cmp_int 0, [r0] // op(1) + r(1) + offset(4) + operand(4) = 10 + jne .end // op(1) + offset(4) + mov r1, [r0 + 1] // op(1) + dest_r(1) + src_r(1) + src_offset(4) = 7 + mov r2, r1 // op(1) + dest_r(1) + src_r(1) + mov r0, r1 // op(1) + dest_r(1) + src_r(1) + call_ext println // op(1) + len(4) + name(7) + free r2, 5 // op(1) + r(1) + size(4) + + .end + mov r0, 0 // op(1) + r(1) + value(4) + ret // op(1) +len: + diff --git a/sketching/hkt.dm b/sketching/hkt.dm index 5fea200..491b470 100644 --- a/sketching/hkt.dm +++ b/sketching/hkt.dm @@ -9,7 +9,7 @@ pub hkt Monad[T] : Monad[T] { } pub enum Option : Monad { - Some(item) { + Some(item: T) { impl fn map(m) = Some(m(item)) impl fn flat_map(m) = m(item) }, diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 52645ff..d4897c3 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -28,7 +28,7 @@ pub enum Token { Minus, Dot, Ellipsis, - Abstract + Abstract, } pub fn tokenize(input: &String) -> Result, &'static str> { @@ -44,33 +44,25 @@ pub fn tokenize(input: &String) -> Result, &'static str> { ')' => tokens.push(Token::ParenClose), '<' => tokens.push(Token::LessThan), '>' => tokens.push(Token::GreaterThan), - '&' => { - match peekable.peek() { - Some('&') => { - let _ = peekable.next(); - tokens.push(Token::And); - }, - Some(_) | None => tokens.push(Token::Intersection), - } - } - '|' => { - match peekable.next_if_eq(&'|') { - Some(_) => tokens.push(Token::Or), - None => tokens.push(Token::Union), + '&' => match peekable.peek() { + Some('&') => { + let _ = peekable.next(); + tokens.push(Token::And); } + Some(_) | None => tokens.push(Token::Intersection), }, - '=' => { - match peekable.next_if_eq(&'>') { - Some(_) => tokens.push(Token::BigArrow), - None => tokens.push(Token::Equals), - } + '|' => match peekable.next_if_eq(&'|') { + Some(_) => tokens.push(Token::Or), + None => tokens.push(Token::Union), + }, + '=' => match peekable.next_if_eq(&'>') { + Some(_) => tokens.push(Token::BigArrow), + None => tokens.push(Token::Equals), }, '+' => tokens.push(Token::Plus), - '-' => { - match peekable.next_if_eq(&'>') { - Some(_) => tokens.push(Token::LittleArrow), - None => tokens.push(Token::Minus), - } + '-' => match peekable.next_if_eq(&'>') { + Some(_) => tokens.push(Token::LittleArrow), + None => tokens.push(Token::Minus), }, '.' => { let mut count = 1; @@ -95,7 +87,7 @@ pub fn tokenize(input: &String) -> Result, &'static str> { fn match_identifier_or_keyword(start_char: char, peekable: &mut Peekable) -> Option { if !is_valid_identifier_start_char(start_char) { - return None + return None; } // append start char @@ -182,8 +174,7 @@ mod tests { #[test] fn ns_pub_mod_simple() { - let result = tokenize(&String::from("ns simple_ns\npub mod simple { }")) - .unwrap(); + let result = tokenize(&String::from("ns simple_ns\npub mod simple { }")).unwrap(); assert_eq!(Token::Namespace, result[0]); assert_eq!(Token::Identifier(String::from("simple_ns")), result[1]); assert_eq!(Token::Public, result[2]); diff --git a/src/lib.rs b/src/lib.rs index a5464ca..e38af6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,2 @@ mod lexer; +mod vm; diff --git a/src/vm/mod.rs b/src/vm/mod.rs new file mode 100644 index 0000000..62df4de --- /dev/null +++ b/src/vm/mod.rs @@ -0,0 +1,136 @@ +mod op_codes; + +use op_codes::*; +use std::alloc::{alloc_zeroed, Layout}; + +pub fn run(code: &Vec, registers: &mut Vec, register_types: &mut Vec) { + let mut i = 0; + while i < code.len() { + match code[i] { + MOV_INT => { + let target_register = code[i + 1] as usize; + let operand: u32 = code[i + 2] as u32 + + ((code[i + 3] as u32) << 8) + + ((code[i + 4] as u32) << 16) + + ((code[i + 5] as u32) << 24); + registers[target_register] = operand as u64; + register_types[target_register] = RegisterType::Int; + i += 6 + } + MOV_LONG => { + let target_register = code[i + 1] as usize; + let operand: u64 = code[i + 2] as u64 + + ((code[i + 3] as u64) << 8) + + ((code[i + 4] as u64) << 16) + + ((code[i + 5] as u64) << 24) + + ((code[i + 6] as u64) << 32) + + ((code[i + 7] as u64) << 40) + + ((code[i + 8] as u64) << 48) + + ((code[i + 9] as u64) << 56); + registers[target_register] = operand; + register_types[target_register] = RegisterType::Long; + i += 10 + } + MOV_DOUBLE => { /* todo */ } + MOV_REGISTER => { + let target_register = code[i + 1] as usize; + let source_register = code[i + 2] as usize; + registers[target_register] = registers[source_register]; + register_types[target_register] = register_types[source_register].clone(); + i += 3; + } + ALLOC => { + let target_register = code[i + 1] as usize; + let n_bytes_to_allocate = code[i + 2] as u32 + + ((code[i + 3] as u32) << 8) + + ((code[i + 4] as u32) << 16) + + ((code[i + 5] as u32) << 24); + let layout = Layout::from_size_align(n_bytes_to_allocate as usize, 4).unwrap(); + let allocated = unsafe { alloc_zeroed(layout) }; + registers[target_register] = allocated as u64; + register_types[target_register] = RegisterType::Pointer; + i += 6; + } + MOV_INT_TO => { + let target_register = code[i + 1] as usize; + if register_types[target_register] != RegisterType::Pointer { + panic!("target_register {} is not a Pointer", target_register); + } + let offset = convert_to_u32(&code[(i + 2)..(i + 6)]) as isize; + let target = registers[target_register] as *mut u8; + unsafe { + target.offset(offset).write(code[i + 6]); + target.offset(offset + 1).write(code[i + 7]); + target.offset(offset + 2).write(code[i + 8]); + target.offset(offset + 3).write(code[i + 9]); + } + i += 10; + } + _ => panic!("Invalid code instruction"), + } + } +} + +fn convert_to_u32(bytes: &[u8]) -> u32 { + bytes[0] as u32 | (bytes[1] as u32) << 8 | (bytes[2] as u32) << 16 | (bytes[3] as u32) << 24 +} + +#[cfg(test)] +mod tests { + use super::*; + + fn init_registers(n_registers: usize) -> (Vec, Vec) { + (vec![0; n_registers], vec![RegisterType::Int; n_registers]) + } + + #[test] + fn mov_1_as_int() { + let mut code = Vec::new(); + add_mov_int(&mut code, 0, 1); + let (mut registers, mut register_types) = init_registers(1); + run(&code, &mut registers, &mut register_types); + assert_eq!(1, registers[0]); + } + + #[test] + fn move_65535_as_int() { + let mut code = Vec::new(); + add_mov_int(&mut code, 0, 0xffff); + let (mut registers, mut register_types) = init_registers(1); + run(&code, &mut registers, &mut register_types); + assert_eq!(0xffff, registers[0]); + } + + #[test] + fn move_int_max_as_int() { + let mut code = Vec::new(); + add_mov_int(&mut code, 0, 0xffff_ffff); + let (mut registers, mut register_types) = init_registers(1); + run(&code, &mut registers, &mut register_types); + assert_eq!(0xffff_ffff, registers[0]); + } + + #[test] + fn move_register() { + let mut code = Vec::new(); + add_mov_register(&mut code, 0, 1); + let (mut registers, mut register_types) = init_registers(2); + registers[1] = 1; + run(&code, &mut registers, &mut register_types); + assert_eq!(registers[0], 1); + } + + #[test] + fn mov_int_to_register_as_address() { + let mut code = Vec::new(); + add_alloc(&mut code, 0, 4); + add_mov_int_to(&mut code, 0, 0, 0xff); + let mut registers = vec![0; 16]; + let mut register_types = vec![RegisterType::Int; 16]; + run(&code, &mut registers, &mut register_types); + let target = registers[0] as *mut u8; + unsafe { + assert_eq!(0xff, *target); + } + } +} diff --git a/src/vm/op_codes.rs b/src/vm/op_codes.rs new file mode 100644 index 0000000..3a04f29 --- /dev/null +++ b/src/vm/op_codes.rs @@ -0,0 +1,72 @@ +/// ## mov(register: u8, operand: u32) +/// - 0: opcode +/// - 1: register +/// - 2..5: operand +pub const MOV_INT: u8 = 0x00; + +pub const MOV_LONG: u8 = 0x01; +pub const MOV_DOUBLE: u8 = 0x02; + +/// ## mov(target_register: u8, source_register: u8) +/// 0: opcode +/// 1: target_register +/// 2: source_register +pub const MOV_REGISTER: u8 = 0x03; + +/// ## alloc(register: u8, size: u32) +/// 0: opcode +/// 1: register +/// 2..5: size +pub const ALLOC: u8 = 0x04; + +/// ## mov_int_to(register: u8, offset: u32, operand: u32) +/// 0: opcode +/// 1: register +/// 2..5: offset +/// 6..9: operand +pub const MOV_INT_TO: u8 = 0x05; + +pub const MOV_LONG_TO: u8 = 0x06; +pub const MOV_DOUBLE_TO: u8 = 0x07; +pub const MOV_REGISTER_TO: u8 = 0x08; + +#[derive(PartialEq, Eq, Clone)] +pub enum RegisterType { + Int, + Long, + Double, + Pointer, +} + +pub fn add_mov_int(code: &mut Vec, register: u8, operand: u32) { + code.push(MOV_INT); + code.push(register); + for b in operand.to_le_bytes() { + code.push(b); + } +} + +pub fn add_mov_register(code: &mut Vec, target_register: u8, source_register: u8) { + code.push(MOV_REGISTER); + code.push(target_register); + code.push(source_register); +} + +pub fn add_alloc(code: &mut Vec, register: u8, size: u32) { + code.push(ALLOC); + code.push(register); + for b in size.to_le_bytes() { + code.push(b); + } +} + +pub fn add_mov_int_to(code: &mut Vec, register: u8, offset: u32, operand: u32) { + code.push(MOV_INT_TO); + code.push(register); + for b in offset.to_le_bytes() { + code.push(b); + } + for b in operand.to_le_bytes() { + code.push(b); + } +}