Start implementation of vm.

This commit is contained in:
Jesse Brault 2024-11-26 23:34:30 -06:00
parent 074af48c85
commit e8019366ee
6 changed files with 295 additions and 28 deletions

67
sketching/goal_one.dm Normal file
View File

@ -0,0 +1,67 @@
enum Option<T> {
Some(item: T),
None
}
fn make_one() = Some(1)
fn main() {
if (make_one() is Some(num)) {
println(num)
}
}
// Bytecode?
// Start with some magic number
// Then a header. Each string starts with a length
header {
module_name: default
symbol_table_addr: 25 // example
symbol_table_length: 104 // example
version: 0.1.0
}
symbols {
# type addr len name
0 fn 0 4 Some
1 fn 30 8 make_one
2 fn 39 4 main
}
layout Option {
[0]: byte // tag
- 0: Some
- 1: None
[1-8]: *Any // ptr to Any
}
fn Some(r0: *Any) -> *Option
alloc r1, 5 // allocate 5 bytes and put address into r1
mov [r1], 0 // move 0 into the (heap) location pointed to by r1
mov [r1 + 1], r0 // move the value of r0 into the location pointed to by r1 + 1
mov r0, r1 // move the value of r1 (the address of the struct) into r0
ret // return r0
len: 30
fn make_one:
mov r0, 1
call Some
len: 9
fn main:
call make_one // op(1) + len(4) + name(8) = 13
cmp_int 0, [r0] // op(1) + r(1) + offset(4) + operand(4) = 10
jne .end // op(1) + offset(4)
mov r1, [r0 + 1] // op(1) + dest_r(1) + src_r(1) + src_offset(4) = 7
mov r2, r1 // op(1) + dest_r(1) + src_r(1)
mov r0, r1 // op(1) + dest_r(1) + src_r(1)
call_ext println // op(1) + len(4) + name(7)
free r2, 5 // op(1) + r(1) + size(4)
.end
mov r0, 0 // op(1) + r(1) + value(4)
ret // op(1)
len:

View File

@ -9,7 +9,7 @@ pub hkt Monad[T<A, B>] : Monad[T<B>] {
} }
pub enum Option<T> : Monad { pub enum Option<T> : Monad {
Some(item) { Some(item: T) {
impl fn map(m) = Some(m(item)) impl fn map(m) = Some(m(item))
impl fn flat_map(m) = m(item) impl fn flat_map(m) = m(item)
}, },

View File

@ -28,7 +28,7 @@ pub enum Token {
Minus, Minus,
Dot, Dot,
Ellipsis, Ellipsis,
Abstract Abstract,
} }
pub fn tokenize(input: &String) -> Result<Vec<Token>, &'static str> { pub fn tokenize(input: &String) -> Result<Vec<Token>, &'static str> {
@ -44,33 +44,25 @@ pub fn tokenize(input: &String) -> Result<Vec<Token>, &'static str> {
')' => tokens.push(Token::ParenClose), ')' => tokens.push(Token::ParenClose),
'<' => tokens.push(Token::LessThan), '<' => tokens.push(Token::LessThan),
'>' => tokens.push(Token::GreaterThan), '>' => tokens.push(Token::GreaterThan),
'&' => { '&' => match peekable.peek() {
match peekable.peek() {
Some('&') => { Some('&') => {
let _ = peekable.next(); let _ = peekable.next();
tokens.push(Token::And); tokens.push(Token::And);
}, }
Some(_) | None => tokens.push(Token::Intersection), Some(_) | None => tokens.push(Token::Intersection),
} },
} '|' => match peekable.next_if_eq(&'|') {
'|' => {
match peekable.next_if_eq(&'|') {
Some(_) => tokens.push(Token::Or), Some(_) => tokens.push(Token::Or),
None => tokens.push(Token::Union), None => tokens.push(Token::Union),
}
}, },
'=' => { '=' => match peekable.next_if_eq(&'>') {
match peekable.next_if_eq(&'>') {
Some(_) => tokens.push(Token::BigArrow), Some(_) => tokens.push(Token::BigArrow),
None => tokens.push(Token::Equals), None => tokens.push(Token::Equals),
}
}, },
'+' => tokens.push(Token::Plus), '+' => tokens.push(Token::Plus),
'-' => { '-' => match peekable.next_if_eq(&'>') {
match peekable.next_if_eq(&'>') {
Some(_) => tokens.push(Token::LittleArrow), Some(_) => tokens.push(Token::LittleArrow),
None => tokens.push(Token::Minus), None => tokens.push(Token::Minus),
}
}, },
'.' => { '.' => {
let mut count = 1; let mut count = 1;
@ -95,7 +87,7 @@ pub fn tokenize(input: &String) -> Result<Vec<Token>, &'static str> {
fn match_identifier_or_keyword(start_char: char, peekable: &mut Peekable<Chars>) -> Option<Token> { fn match_identifier_or_keyword(start_char: char, peekable: &mut Peekable<Chars>) -> Option<Token> {
if !is_valid_identifier_start_char(start_char) { if !is_valid_identifier_start_char(start_char) {
return None return None;
} }
// append start char // append start char
@ -182,8 +174,7 @@ mod tests {
#[test] #[test]
fn ns_pub_mod_simple() { fn ns_pub_mod_simple() {
let result = tokenize(&String::from("ns simple_ns\npub mod simple { }")) let result = tokenize(&String::from("ns simple_ns\npub mod simple { }")).unwrap();
.unwrap();
assert_eq!(Token::Namespace, result[0]); assert_eq!(Token::Namespace, result[0]);
assert_eq!(Token::Identifier(String::from("simple_ns")), result[1]); assert_eq!(Token::Identifier(String::from("simple_ns")), result[1]);
assert_eq!(Token::Public, result[2]); assert_eq!(Token::Public, result[2]);

View File

@ -1 +1,2 @@
mod lexer; mod lexer;
mod vm;

136
src/vm/mod.rs Normal file
View File

@ -0,0 +1,136 @@
mod op_codes;
use op_codes::*;
use std::alloc::{alloc_zeroed, Layout};
pub fn run(code: &Vec<u8>, registers: &mut Vec<u64>, register_types: &mut Vec<RegisterType>) {
let mut i = 0;
while i < code.len() {
match code[i] {
MOV_INT => {
let target_register = code[i + 1] as usize;
let operand: u32 = code[i + 2] as u32
+ ((code[i + 3] as u32) << 8)
+ ((code[i + 4] as u32) << 16)
+ ((code[i + 5] as u32) << 24);
registers[target_register] = operand as u64;
register_types[target_register] = RegisterType::Int;
i += 6
}
MOV_LONG => {
let target_register = code[i + 1] as usize;
let operand: u64 = code[i + 2] as u64
+ ((code[i + 3] as u64) << 8)
+ ((code[i + 4] as u64) << 16)
+ ((code[i + 5] as u64) << 24)
+ ((code[i + 6] as u64) << 32)
+ ((code[i + 7] as u64) << 40)
+ ((code[i + 8] as u64) << 48)
+ ((code[i + 9] as u64) << 56);
registers[target_register] = operand;
register_types[target_register] = RegisterType::Long;
i += 10
}
MOV_DOUBLE => { /* todo */ }
MOV_REGISTER => {
let target_register = code[i + 1] as usize;
let source_register = code[i + 2] as usize;
registers[target_register] = registers[source_register];
register_types[target_register] = register_types[source_register].clone();
i += 3;
}
ALLOC => {
let target_register = code[i + 1] as usize;
let n_bytes_to_allocate = code[i + 2] as u32
+ ((code[i + 3] as u32) << 8)
+ ((code[i + 4] as u32) << 16)
+ ((code[i + 5] as u32) << 24);
let layout = Layout::from_size_align(n_bytes_to_allocate as usize, 4).unwrap();
let allocated = unsafe { alloc_zeroed(layout) };
registers[target_register] = allocated as u64;
register_types[target_register] = RegisterType::Pointer;
i += 6;
}
MOV_INT_TO => {
let target_register = code[i + 1] as usize;
if register_types[target_register] != RegisterType::Pointer {
panic!("target_register {} is not a Pointer", target_register);
}
let offset = convert_to_u32(&code[(i + 2)..(i + 6)]) as isize;
let target = registers[target_register] as *mut u8;
unsafe {
target.offset(offset).write(code[i + 6]);
target.offset(offset + 1).write(code[i + 7]);
target.offset(offset + 2).write(code[i + 8]);
target.offset(offset + 3).write(code[i + 9]);
}
i += 10;
}
_ => panic!("Invalid code instruction"),
}
}
}
fn convert_to_u32(bytes: &[u8]) -> u32 {
bytes[0] as u32 | (bytes[1] as u32) << 8 | (bytes[2] as u32) << 16 | (bytes[3] as u32) << 24
}
#[cfg(test)]
mod tests {
use super::*;
fn init_registers(n_registers: usize) -> (Vec<u64>, Vec<RegisterType>) {
(vec![0; n_registers], vec![RegisterType::Int; n_registers])
}
#[test]
fn mov_1_as_int() {
let mut code = Vec::new();
add_mov_int(&mut code, 0, 1);
let (mut registers, mut register_types) = init_registers(1);
run(&code, &mut registers, &mut register_types);
assert_eq!(1, registers[0]);
}
#[test]
fn move_65535_as_int() {
let mut code = Vec::new();
add_mov_int(&mut code, 0, 0xffff);
let (mut registers, mut register_types) = init_registers(1);
run(&code, &mut registers, &mut register_types);
assert_eq!(0xffff, registers[0]);
}
#[test]
fn move_int_max_as_int() {
let mut code = Vec::new();
add_mov_int(&mut code, 0, 0xffff_ffff);
let (mut registers, mut register_types) = init_registers(1);
run(&code, &mut registers, &mut register_types);
assert_eq!(0xffff_ffff, registers[0]);
}
#[test]
fn move_register() {
let mut code = Vec::new();
add_mov_register(&mut code, 0, 1);
let (mut registers, mut register_types) = init_registers(2);
registers[1] = 1;
run(&code, &mut registers, &mut register_types);
assert_eq!(registers[0], 1);
}
#[test]
fn mov_int_to_register_as_address() {
let mut code = Vec::new();
add_alloc(&mut code, 0, 4);
add_mov_int_to(&mut code, 0, 0, 0xff);
let mut registers = vec![0; 16];
let mut register_types = vec![RegisterType::Int; 16];
run(&code, &mut registers, &mut register_types);
let target = registers[0] as *mut u8;
unsafe {
assert_eq!(0xff, *target);
}
}
}

72
src/vm/op_codes.rs Normal file
View File

@ -0,0 +1,72 @@
/// ## mov(register: u8, operand: u32)
/// - 0: opcode
/// - 1: register
/// - 2..5: operand
pub const MOV_INT: u8 = 0x00;
pub const MOV_LONG: u8 = 0x01;
pub const MOV_DOUBLE: u8 = 0x02;
/// ## mov(target_register: u8, source_register: u8)
/// 0: opcode
/// 1: target_register
/// 2: source_register
pub const MOV_REGISTER: u8 = 0x03;
/// ## alloc(register: u8, size: u32)
/// 0: opcode
/// 1: register
/// 2..5: size
pub const ALLOC: u8 = 0x04;
/// ## mov_int_to(register: u8, offset: u32, operand: u32)
/// 0: opcode
/// 1: register
/// 2..5: offset
/// 6..9: operand
pub const MOV_INT_TO: u8 = 0x05;
pub const MOV_LONG_TO: u8 = 0x06;
pub const MOV_DOUBLE_TO: u8 = 0x07;
pub const MOV_REGISTER_TO: u8 = 0x08;
#[derive(PartialEq, Eq, Clone)]
pub enum RegisterType {
Int,
Long,
Double,
Pointer,
}
pub fn add_mov_int(code: &mut Vec<u8>, register: u8, operand: u32) {
code.push(MOV_INT);
code.push(register);
for b in operand.to_le_bytes() {
code.push(b);
}
}
pub fn add_mov_register(code: &mut Vec<u8>, target_register: u8, source_register: u8) {
code.push(MOV_REGISTER);
code.push(target_register);
code.push(source_register);
}
pub fn add_alloc(code: &mut Vec<u8>, register: u8, size: u32) {
code.push(ALLOC);
code.push(register);
for b in size.to_le_bytes() {
code.push(b);
}
}
pub fn add_mov_int_to(code: &mut Vec<u8>, register: u8, offset: u32, operand: u32) {
code.push(MOV_INT_TO);
code.push(register);
for b in offset.to_le_bytes() {
code.push(b);
}
for b in operand.to_le_bytes() {
code.push(b);
}
}