From c1618ba9a2f30e235da01b6978f1cf9fcfa3bd04 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Fri, 29 Nov 2024 17:01:33 -0600 Subject: [PATCH] Move macros to util; basic work on load_module. --- src/vm/mod.rs | 30 +++-------- src/vm/module.rs | 128 +++++++++++++++++++++++++++++++++++++++++++++++ src/vm/util.rs | 26 ++++++++++ 3 files changed, 162 insertions(+), 22 deletions(-) create mode 100644 src/vm/module.rs create mode 100644 src/vm/util.rs diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 1880bc8..16317f2 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -1,7 +1,15 @@ +mod module; mod op_codes; +mod util; use op_codes::*; use std::alloc::{alloc_zeroed, dealloc, Layout}; +use util::{get_32_le, get_64_le}; + +pub struct DmVirtualMachine { + registers: Vec, + register_types: Vec, +} struct DmObject { pointer: *mut u8, @@ -9,28 +17,6 @@ struct DmObject { layout: Layout, } -macro_rules! get_32_le { - ( $code: expr, $i: expr, $offset: literal, $T: ident ) => { - $code[$i + $offset] as $T - + (($code[$i + $offset + 1] as $T) << 8) - + (($code[$i + $offset + 2] as $T) << 16) - + (($code[$i + $offset + 3] as $T) << 24) - }; -} - -macro_rules! get_64_le { - ( $code: expr, $i: expr, $offset: literal, $T: ident ) => { - $code[$i + $offset] as $T - + (($code[$i + $offset + 1] as $T) << 8) - + (($code[$i + $offset + 2] as $T) << 16) - + (($code[$i + $offset + 3] as $T) << 24) - + (($code[$i + $offset + 4] as $T) << 32) - + (($code[$i + $offset + 5] as $T) << 40) - + (($code[$i + $offset + 6] as $T) << 48) - + (($code[$i + $offset + 7] as $T) << 56) - }; -} - pub fn run(code: &Vec, registers: &mut Vec, register_types: &mut Vec) { let mut i = 0; while i < code.len() { diff --git a/src/vm/module.rs b/src/vm/module.rs new file mode 100644 index 0000000..1b4de73 --- /dev/null +++ b/src/vm/module.rs @@ -0,0 +1,128 @@ +use crate::get_32_le; +use std::collections::HashMap; +use std::io::Read; + +pub const DEIMOS_MAGIC_NUMBER: u64 = 0x00_00_64_65_69_6d_6f_73; // ascii 'deimos' + +pub struct DmModule { + constants: HashMap, + functions: HashMap, +} + +pub enum DmConstant { + Int(i32), + Long(i64), + Double(f64), + String(String), +} + +pub struct DmFunction { + name: String, + byte_code: Vec, +} + +const CONST_SYMBOL: u8 = 0x01; +const FUNCTION_SYMBOL: u8 = 0x02; + +enum SymbolType { + Constant, + Function, +} + +struct DmSymbol { + name: String, + symbol_type: SymbolType, + address: u32, +} + +pub fn load_module(bytes: &[u8]) -> Result { + let mut ip: usize = 0; + // Check for magic number at bytes 0..5 + if !check_deimos(&bytes) { + return Err(String::from("Not a valid Deimos module.")); + } + ip = 6; + + // Get version string length from bytes 6..9 + let version_string_length = get_32_le!(bytes, 6, 0, usize); + ip = 10; + + // Bytes 10..(10 + version_string_length) are the version string, in utf8 + let mut version_string_raw: Vec = Vec::with_capacity(version_string_length); + let version_string_end = ip + version_string_length; + while ip < version_string_end { + version_string_raw.push(bytes[ip]); + ip += 1; + } + let version_string = String::from_utf8(version_string_raw).unwrap(); + + // Check version string. We'll use this in the future to not load modules compiled later than + // current version. + if version_string != "0.1.0" { + return Err(String::from("Invalid Deimos module version.")); + } + + // Holder for Symbols we will extract from the symbol table bytes + let mut symbols: Vec = Vec::new(); + + // Get the symbol table length and calculate how far we need to read + let symbol_table_length = get_32_le!(bytes, 10 + version_string_length, 0, usize); + let symbol_table_end = ip + symbol_table_length; + + // For each "row" in the symbol table, + // 1. Get the type + // 2. Obtain the name's length and then get the name in utf8 + // 3. Grab the address to the actual symbol in the module bytes + while ip < symbol_table_end { + let type_byte = bytes[ip]; + ip += 1; + let name_string_length = get_32_le!(bytes, ip, 0, usize); + ip += 4; + let name_string_raw = bytes[ip..ip + name_string_length].to_vec(); + let name = String::from_utf8(name_string_raw).unwrap(); + ip += name_string_length; + let address = get_32_le!(bytes, ip, 0, u32); + ip += 4; + let symbol_type = match type_byte { + CONST_SYMBOL => SymbolType::Constant, + FUNCTION_SYMBOL => SymbolType::Function, + _ => return Err(String::from("Invalid Deimos symbol type.")), + }; + symbols.push(DmSymbol { + name, + address, + symbol_type, + }); + } + + todo!() +} + +fn check_deimos(bytes: &[u8]) -> bool { + if bytes.len() < 6 { + return false; + } + let first_six = read_as_u64(&bytes[0..6]); + first_six == DEIMOS_MAGIC_NUMBER +} + +fn read_as_u64(bytes: &[u8]) -> u64 { + let mut result = 0u64; + let max_shift = (bytes.len() - 1) * 8; + for i in 0..bytes.len() { + result |= (bytes[i] as u64) << (max_shift - i * 8); + }; + result +} + +#[cfg(test)] +mod read_as_u64_tests { + use crate::vm::module::{read_as_u64, DEIMOS_MAGIC_NUMBER}; + + #[test] + fn read_6_bytes() { + let bytes = vec![0x64u8, 0x65u8, 0x69u8, 0x6du8, 0x6fu8, 0x73u8]; // ascii 'deimos' + let result = read_as_u64(&bytes); + assert_eq!(DEIMOS_MAGIC_NUMBER, result); + } +} diff --git a/src/vm/util.rs b/src/vm/util.rs new file mode 100644 index 0000000..b9dbf7d --- /dev/null +++ b/src/vm/util.rs @@ -0,0 +1,26 @@ +#[macro_export] +macro_rules! get_32_le { + ( $bytes: expr, $base: expr, $offset: literal, $T: ident ) => { + $bytes[$base + $offset] as $T + + (($bytes[$base + $offset + 1] as $T) << 8) + + (($bytes[$base + $offset + 2] as $T) << 16) + + (($bytes[$base + $offset + 3] as $T) << 24) + }; +} + +#[macro_export] +macro_rules! get_64_le { + ( $bytes: expr, $base: expr, $offset: literal, $T: ident ) => { + $bytes[$base + $offset] as $T + + (($bytes[$base + $offset + 1] as $T) << 8) + + (($bytes[$base + $offset + 2] as $T) << 16) + + (($bytes[$base + $offset + 3] as $T) << 24) + + (($bytes[$base + $offset + 4] as $T) << 32) + + (($bytes[$base + $offset + 5] as $T) << 40) + + (($bytes[$base + $offset + 6] as $T) << 48) + + (($bytes[$base + $offset + 7] as $T) << 56) + }; +} + +pub use get_32_le; +pub use get_64_le;