diff --git a/spec.md b/spec.md new file mode 100644 index 0000000..3c72352 --- /dev/null +++ b/spec.md @@ -0,0 +1,117 @@ +### Disclaimer + +This is a fantasy architecture on which I intend to write fantasy compilers. It was born out of the +"fuck around and find out" philosophy, and is a toy project. I will change a lot of stuff as I learn +how it's done in the real world. For now, I'm just gonna guess and have fun. + +Since I'm studying riscV, this will be a lot riscv inspired. + +# The GRAVEJIT virtual machine + +The gravejit virtual machine sports 16 16-bit registers (plus the program counter!) and 16 operations. + +Here is the list of registers togheter with memonics. + +0 : zero // register 0 is always 0. +1 : ra // return address +2 : sp // stack pointer +3 : t0 // temporary +4 : t1 +5 : t2 +6 : t3 +7 : a0 // function arguments +8 : a1 +9 : a2 +10: a3 +11: s0 // saved registers +12: s1 +13: s2 +14: s3 +15: t4 // don't know what to do with this + +pc: program counter. + +## ISA + +opcode | memonic | format | description + +0000 | NOP | just 0s'| Does nothing. +0001 | ADD s0 s1 s2 | R | s0 = s1 + s2 +0010 | SUB s0 s1 s2 | R | s0 = s1 - s2 +0011 | AND s0 s1 s2 | R | s0 = s1 && s2 +0100 | XOR s0 s1 s2 | R | s0 = s1 xor s2 +0101 | SLL s0 s1 s2 | R | s0 = s1 << s2 +0110 | SLI s0 c | I | s0 = s0 << c +0111 | ADDI s0 c | I | s0 = s0 + c +1000 | BEQ s0 s1 s2 | R | if (s1 == s2) -> pc = s0 +1001 | BGT s0 s1 s2 | R | if (s1 > s2) -> pc = s0 +1010 | JAL s0 s1 c | J | s0 = pc+1; pc += s1 + c; +1011 | +1100 | LOAD s0 s1 s2 | R | loads s1 + shift by s2 in s0 +1101 | STORE s0 s1 s2| R | stores s0 in address s1 + shift by s2 +1110 | CALL s0 c | I | performs system call +1111 | HALT | just 1s'| halt, and possibly catch fire. + + +### Operation formats: + +Each istruction is 16 bits long. +The first 4 most-significant bits are the opcode. +Constants (c in the above table) are always considered signed, and written in +two's compliment. Sign extension also takes place whenever needed. +i.e., to make an immediate subtraction, one just needs to add a negative number. + +#### R-type: +opcode: 4 bits +dest register: 4 bits +source 1 register: 4 bits +source 2 register: 4 bits + +example: ADD s0 s1 s2 = 0001 1011 1100 1101 + +#### I-type +opcode: 4 bits +dest register: 4 bits +constant: 8 bits + +example: +ADDI s0 28 = 0111 1011 00011100 +ADDI s0 -2 = 0111 1011 11111110 + + +#### J-Type +opcode: 4 bits +dest register: 4 bits +jump address register: 4 bits +constant: 4 bits + + +The constant is added to the value of the second register argument. + +### JIT's system calls: + +the `CALL` instruction is a bit of a hack because I want to load more functionality into the thing. +The JIT can decide what to do with the register s0 and the number c. +It should be possible to open files, write files, read stdin, write to stdout, etc... + +#### io\_vec: first systemcall environment + +Working on this, quick and dirty. + +### Binary executable format: + +Binary files start with two 16 bit numbers, a constant and a length N, followed by a list of +length N of pairs 16 bit numbers. This is the header of the file. + +The initial constant is currently unused and unimportant. In this draft-toy-spec, the initial constant +is always 39979. + +The first number is an offset, and the second number is a size N in bytes. + +The offset points at a null-terminated UTF-8 (yes.) string, located offset\*16 bits to the right after the end of the header in the binary file, followed by arbitrary binary content of size N\*16 bits. + +The utf-8 string cannot contain the null character anywhere, as that will be used as terminator. + +This represents a "symbols table" of the binary file, where functions and data can be stored. + +There must exist a symbol named "main", and it must point to a function: this will be the entrypoint to our program. diff --git a/src/assembler/AST.rs b/src/assembler/AST.rs new file mode 100644 index 0000000..57e4efc --- /dev/null +++ b/src/assembler/AST.rs @@ -0,0 +1,35 @@ +use crate::cpu::Word; + +type RegisterMem = String; + +pub type ConstId = String; + +#[derive(Debug)] +pub enum Const { + CS(ConstId), + C(u8), +} + +#[derive(Debug)] +pub enum Operation { + NOP, + HALT, + // R type + ADD(RegisterMem, RegisterMem, RegisterMem), + SUB(RegisterMem, RegisterMem, RegisterMem), + AND(RegisterMem, RegisterMem, RegisterMem), + XOR(RegisterMem, RegisterMem, RegisterMem), + SLL(RegisterMem, RegisterMem, RegisterMem), + BEQ(RegisterMem, RegisterMem, RegisterMem), + BGT(RegisterMem, RegisterMem, RegisterMem), + LOAD(RegisterMem, RegisterMem, RegisterMem), + STORE(RegisterMem, RegisterMem, RegisterMem), + + // I Type + SLI(RegisterMem, Const), + ADDI(RegisterMem, Const), + CALL(RegisterMem, Const), + + // J Type + JAL(RegisterMem, RegisterMem, Word), +} diff --git a/src/assembler/mod.rs b/src/assembler/mod.rs new file mode 100644 index 0000000..ae8c913 --- /dev/null +++ b/src/assembler/mod.rs @@ -0,0 +1,5 @@ +mod AST; +mod tests; +mod parser; + +struct Assembler {} diff --git a/src/assembler/parser.rs b/src/assembler/parser.rs new file mode 100644 index 0000000..9261d9f --- /dev/null +++ b/src/assembler/parser.rs @@ -0,0 +1,246 @@ +use crate::cpu::Registers; + +use super::AST::{Operation, Const}; +use Operation::*; + +type Loc = u16; + +#[derive(Debug)] +pub enum ParseError { + BadSectionHeader, + UnknownSectionKind, + UnexpectedEOF, + BadSectionContent, + BadInstruction +} + +/// represents the state of our parser. +pub struct Parser { + loc: u16, // current number of operations parsed. + symtable: Vec<(String, u16)>, // symbols encountered, position. + pub input: Vec, // input file +} + +impl Parser { + pub fn new(i: String) -> Self { + Parser { + loc: 0, + symtable: vec![], + input: sanitize(i), + } + } +} + +// removes comments and whitespaces, and splits the input in lines. +fn sanitize(i: String) -> Vec { + i.lines() + .map(|x| remove_comments(x)) + .map(|x| x.trim()) + .filter(|x| *x != "") + .map(|x| x.to_string()) + .collect() +} + +fn remove_comments(i: &str) -> &str { + if let Some(end) = i.find(';') { + return &i[0..end]; + } else { + return i; + } +} + + +/// Checks if the string i starts with pat. +/// Returns the rest of the input string on success +/// else, returns None +fn match_string(i: &str, pat: &str) -> Option { + + let mut in_chars = i.chars(); + + for pat_c in pat.chars() { + if let Some(c) = in_chars.next() { + if c != pat_c { + return None + } + }; + + } + + let rest = in_chars.collect(); + + return Some(rest); + +} + +/// Matches till the "stop" string is found. +/// Returns a tuple containing the preceeding string and +/// the rest of the input string. +/// +/// Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") ); +/// +fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> { + + // + if let Some((matched, rest)) = i.split_once(stop) { + return Some((matched.to_string(), rest.to_string())) + } else { + return None + } + + +} + +/// Matches inside the `start` and `stop` delimiters. +/// Return a tuple with the string in between the two +/// togheter with the rest of the string. +fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> { + + let s1 = match_string(i, start)?; + + return take_alpha_till(&s1, stop); + +} + +#[test] +fn take_between_test() { + assert_eq!(take_between("\"wow\" etc", "\"", "\""), Some(("wow".to_string(), " etc".to_string()))); +} + + +//// SECTION PARSING + +#[derive(Debug)] +enum SectionContent { + Code(Vec), + CString(String), + CVec() +} + +use SectionContent::*; + +#[derive(Debug)] +pub struct Section { + name: String, + content: SectionContent, +} + +// A .section has a name and variable content. +impl Parser { + + pub fn parse_sections(&mut self) -> Result, ParseError> { + let mut res = vec![]; + + let mut lines = self.input.iter().map(|x| x.as_str()).into_iter(); + + while let Some(l) = lines.next() { + println!("Examing line: {}", l); + if l.starts_with(".") { + let Some((kind, name)) = take_alpha_till(&l[1..], " ") else { + return Err(ParseError::BadSectionHeader); + }; + + match kind.as_str() { + "text" => { + let s : Vec<&str> = lines.clone().take_while(|&x| !(x).starts_with(".")).map(|x| x).collect(); + res.push(Section { name: name.trim().to_owned(), content: Code(parse_code(&s)?)}) + } + "asciiz" => { + let Some(s) = lines.next() else {return Err(ParseError::UnexpectedEOF)}; + let Some((s, _)) = take_between(s.trim(), "\"", "\"") else {return Err(ParseError::BadSectionContent)}; + res.push(Section { name: name.trim().to_owned(), content: CString(s)}) + + } + "i16" => { + let s = lines.next(); + + } + "u16" => { + let s = lines.next(); + + } + "vi16" => { + let s = lines.next(); + + } + "vu16" => { + let s = lines.next(); + + } + _ => { + return Err(ParseError::UnknownSectionKind); + } + } + } + + + + }; + + + return Ok(res); + } + +} + + +fn parse_code(i: &[&str]) -> Result, ParseError> { + + let mut res = vec![]; + + for line in i { + res.push(parse_code_line(line)?); + } + + return Ok(res); +} + + +fn parse_code_line(i: &str) -> Result { + + // every operation has at most 3 arguments + let mut bits = i.split_whitespace(); + println!("current parse code line: {}", i); + let Some(op) = bits.next() else {return Err(ParseError::BadSectionContent)}; + + // no type + match op { + "nop" => {return Ok(NOP);}, + "halt" => {return Ok(HALT);}, + _ => {} + }; + + // I-type + let Some(r1) = bits.next() else {return Err(ParseError::BadSectionHeader)}; + let Some(r2) = bits.next() else {return Err(ParseError::BadSectionHeader)}; + + match op { + "addi" => { + return Ok(ADDI(r1.to_owned(), parse_const(r2)?)); + } + "sli" => { + return Ok(SLI(r1.to_owned(), parse_const(r2)?)); + + } + "call" => { + + return Ok(CALL(r1.to_owned(), parse_const(r2)?)); + + } + _ => {} + } + + + return Err(ParseError::BadInstruction); + +} + +fn parse_const(i: &str) -> Result { + + // we try to parse the number, if we fail, we treat it as a string. + let Ok(num) = i.parse() else { + return Ok(Const::CS(i.to_owned())); + }; + return Ok(Const::C(num)); + +} + diff --git a/src/assembler/tests.rs b/src/assembler/tests.rs new file mode 100644 index 0000000..604f462 --- /dev/null +++ b/src/assembler/tests.rs @@ -0,0 +1,16 @@ +// use super::*; + +use crate::assembler::parser; + +#[test] +fn parser_test() { + + println!("Parser test begins"); + let code = std::fs::read_to_string("./tests/assembly/hello_world.grasm").unwrap(); + + let mut parser = parser::Parser::new(code); + + let r = parser.parse_sections().unwrap(); + + println!("Parsed sections: {:?}", r); +} diff --git a/src/cpu/decoder.rs b/src/cpu/decoder.rs new file mode 100644 index 0000000..e7269e9 --- /dev/null +++ b/src/cpu/decoder.rs @@ -0,0 +1,60 @@ +use super::registers::Register; + +type Constant = i8; // 8 bits max, so it works. + +#[derive(Debug)] +pub enum OP { + NOP, + ADD(Register, Register, Register), + SUB(Register, Register, Register), + AND(Register, Register, Register), + XOR(Register, Register, Register), + SLL(Register, Register, Register), + + SLI(Register, Constant), + ADDI(Register, Constant), + + BEQ(Register, Register, Register), + BGT(Register, Register, Register), + JAL(Register, Register, Constant), + + LOAD(Register, Register, Register), + STORE(Register, Register, Register), + CALL(Register, Constant), + HALT, +} + +pub use OP::*; + +pub fn decode(op: u16) -> OP { + let opcode = op >> 12; + let dest = ((op & 0x0F00) >> 8) as Register; + let r1 = ((op & 0x00F0) >> 4) as Register; + let r2 = (op & 0x000F) as Register; + + let c = Constant::from_be_bytes([(op & 0x00FF) as u8]); + let c4 = Constant::from_be_bytes([(op & 0x000F) as u8]); + + println!("opcode: {}", opcode); + + return match opcode { + // todo: write a macro for every type (I-type, R-type) + 0b0000 => NOP, + 0b0001 => ADD(dest, r1, r2), + 0b0010 => SUB(dest, r1, r2), + 0b0011 => AND(dest, r1, r2), + 0b0100 => XOR(dest, r1, r2), + 0b0101 => SLL(dest, r1, r2), + 0b0110 => SLI(dest, c), + 0b0111 => ADDI(dest, c), + 0b1000 => BEQ(dest, r1, r2), + 0b1001 => BGT(dest, r1, r2), + 0b1010 => JAL(dest, r1, c4), + 0b1011 => todo!(), + 0b1100 => LOAD(dest, r1, r2), + 0b1101 => STORE(dest, r1, r2), + 0b1110 => CALL(dest, c), + 0b1111 => HALT, + _ => panic!("Not an operation."), + }; +} diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs new file mode 100644 index 0000000..d1baf22 --- /dev/null +++ b/src/cpu/mod.rs @@ -0,0 +1,187 @@ +mod decoder; +mod ram; +mod registers; +mod sysenv; +mod tests; +pub use sysenv::*; + +pub use registers::*; + +use decoder::OP; +use ram::Ram; + +#[derive(Debug)] +pub enum ExecErr { + InvalidRegister, + InvalidMemoryAddr, + InvalidSyscall, + InvalidPC, + SyscallError(String), +} + +use ExecErr::*; + +use crate::{interpret_as_signed, interpret_as_unsigned}; + +use self::decoder::decode; + +/// Simple synonim for Result. +type CPUResult = Result; + +#[derive(Debug)] +/// The state of the interpreter. +pub struct CPU<'a, T> { + pub regs: Registers, + pub ram: Ram, + pub env: &'a mut T, + // should execution be halted? not sure if to include this or nah + halt: bool, +} + +impl<'a, T> CPU<'a, T> +where + T: Sys, +{ + pub fn execute_op(&mut self, op: OP) -> CPUResult<()> { + match op { + OP::NOP => { + self.regs.pc += 1; + } + OP::ADD(d, r1, r2) => { + let v1 = self.regs.get(r1).ok_or(InvalidRegister)?; + let v2 = self.regs.get(r2).ok_or(InvalidRegister)?; + self.regs.write(d, v1 + v2).ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::SUB(d, r1, r2) => { + let v1 = self.regs.get(r1).ok_or(InvalidRegister)?; + let v2 = self.regs.get(r2).ok_or(InvalidRegister)?; + self.regs.write(d, v1 - v2).ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::AND(d, r1, r2) => { + let v1 = self.regs.get(r1).ok_or(InvalidRegister)?; + let v2 = self.regs.get(r2).ok_or(InvalidRegister)?; + self.regs.write(d, v1 & v2).ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::XOR(d, r1, r2) => { + let v1 = self.regs.get(r1).ok_or(InvalidRegister)?; + let v2 = self.regs.get(r2).ok_or(InvalidRegister)?; + self.regs.write(d, v1 ^ v2).ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::SLL(d, r1, r2) => { + let v1 = self.regs.get(r1).ok_or(InvalidRegister)?; + let v2 = self.regs.get(r2).ok_or(InvalidRegister)?; + self.regs.write(d, v1 << v2).ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::SLI(d, c) => { + let v1 = self.regs.get(d).ok_or(InvalidRegister)?; + self.regs.write(d, v1 << c).ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::ADDI(d, c) => { + let v1 = self.regs.get(d).ok_or(InvalidRegister)?; + self.regs + .write( + d, + interpret_as_unsigned(interpret_as_signed(v1) + (c as i16)), + ) + .ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::BEQ(d, x0, x1) => { + if x0 == x1 { + let v = self.regs.get(d).ok_or(InvalidRegister)?; + self.regs.pc = v; + } + } + OP::BGT(d, x0, x1) => { + if x0 > x1 { + let v = self.regs.get(d).ok_or(InvalidRegister)?; + self.regs.pc = v; + } + } + OP::JAL(s0, s1, c) => { + self.regs + .write(s0, self.regs.pc + 1) + .ok_or(InvalidRegister)?; + let v = self.regs.get(s1).ok_or(InvalidRegister)?; + self.regs.pc = (v as i16 + (c as i16)) as Word; + } + + OP::LOAD(d, s1, s2) => { + let start = self.regs.get(s1).ok_or(InvalidRegister)?; + let offset = self.regs.get(s2).ok_or(InvalidRegister)?; + + let v = self.ram.get(start + offset).ok_or(InvalidMemoryAddr)?; + + self.regs.write(d, v).ok_or(InvalidRegister)?; + + self.regs.pc += 1; + } + OP::STORE(d, s1, s2) => { + let start = self.regs.get(s1).ok_or(InvalidRegister)?; + let offset = self.regs.get(s2).ok_or(InvalidRegister)?; + + let v = self.regs.get(d).ok_or(InvalidRegister)?; + self.ram.write(start + offset, v).ok_or(InvalidMemoryAddr)?; + + self.regs.pc += 1; + } + OP::CALL(r, c) => { + T::call(self, r.into(), c as u16)?; + self.regs.pc += 1; + } + OP::HALT => { + self.halt = true; + } + } + + return Ok(()); + } + + fn fetch(&self) -> CPUResult { + let binop = self.ram.get(self.regs.pc).ok_or(ExecErr::InvalidPC)?; + + println!("binop: {:#018b}", binop); + + Ok(decode(binop)) + } + + fn step(&mut self) -> CPUResult<()> { + let op = self.fetch()?; + println!("fetched op: {:?}, pc: {} ", op, self.regs.pc); + self.execute_op(op) + } + + pub fn run_code_raw(&mut self, bin_code: &[Word]) -> CPUResult<()> { + self.halt = false; + // put the code in memory: + self.ram.write_array(bin_code, 0); + + while !self.halt { + self.step()?; + } + + Ok(()) + } + + pub fn new(env: &'a mut T) -> Self { + CPU { + regs: Registers::default(), + ram: Ram::default(), + env, + halt: false, + } + } +} diff --git a/src/cpu/ram.rs b/src/cpu/ram.rs new file mode 100644 index 0000000..c5e9d88 --- /dev/null +++ b/src/cpu/ram.rs @@ -0,0 +1,66 @@ +use crate::cpu::registers::Word; + +/// We'll define our RAM as a static array. +/// The maximum adressable memory is, right now, just 65kbit of memory. + +// pub const MAX_MEM: usize = 65536; +pub const MAX_MEM: usize = 40; + +#[derive(Debug)] +pub struct Ram { + mem: [Word; MAX_MEM], +} + +impl Default for Ram { + fn default() -> Self { + return Ram { mem: [0; MAX_MEM] }; + } +} + +impl Ram { + /// Gets the word at memory address i. Returns none if i is + /// out of bounds. + pub fn get(&self, i: Word) -> Option { + if (i as usize) < MAX_MEM { + return Some(self.mem[i as usize]); + } else { + return None; + } + } + + /// Writes val into memory address i. Returns none if i is + /// out of bounds. + pub fn write(&mut self, i: Word, val: Word) -> Option<()> { + if (i as usize) < MAX_MEM { + self.mem[i as usize] = val; + return Some(()); + } else { + return None; + } + } + + /// Returns a slice of memory from start to end address, inclusive. + /// None is returned if the address is out of bounds. + pub fn slice(&self, start: Word, end: Word) -> Option<&[Word]> { + if (start as usize) < MAX_MEM && (end as usize) < MAX_MEM { + return Some(&self.mem[(start as usize)..(end as usize)]); + } else { + return None; + } + } + + /// Writes an array of data directly into memory, starting from + /// the "start" address. + /// Returns None if the data exceeds memory. + pub fn write_array(&mut self, data: &[Word], start: Word) -> Option<()> { + if start as usize + data.len() < MAX_MEM { + for i in 0..data.len() { + self.mem[start as usize + i] = data[i]; + } + } else { + return None; + } + + return Some(()); + } +} diff --git a/src/cpu/registers.rs b/src/cpu/registers.rs new file mode 100644 index 0000000..1bd2f49 --- /dev/null +++ b/src/cpu/registers.rs @@ -0,0 +1,85 @@ +pub type Word = u16; +pub type Register = u8; + +/// We need to hold 15 registers (zero is constant) + the program counter. +/// We'll just use a vector of u16. +#[derive(Debug)] +pub struct Registers { + regs: [Word; 15], + pub pc: Word, +} + +impl Default for Registers { + fn default() -> Self { + Registers { + regs: [0; 15], + pc: 0, + } + } +} + +impl Registers { + /// retrives the register's value. Returns None if trying to access a register + /// that doesn't exist. + pub fn get(&self, i: Register) -> Option { + match i { + 0 => Some(0), // zero is always 0 + 1..=15 => Some(self.regs[(i - 1) as usize]), + _ => None, + } + } + + /// writes val to the register i. Returns none on trying to write to zero, or to a register + /// that doesn't exist. + pub fn write(&mut self, i: Register, val: Word) -> Option<()> { + match i { + 0 => None, // cannot write to 0 + 1..=15 => { + self.regs[(i - 1) as usize] = val; + return Some(()); + } + _ => None, + } + } +} + +const ASSOCS: &'static [(Register, &'static str)] = &[ + (0, "zero"), + (1, "ra"), + (2, "sp"), + (3, "t0"), + (4, "t1"), + (5, "t2"), + (6, "t3"), + (7, "a0"), + (8, "a1"), + (9, "a2"), + (10, "a3"), + (11, "s0"), + (12, "s1"), + (13, "s2"), + (14, "s3"), + (15, "t4"), +]; + +/// gets the register memonic name. Useful for pretty printing. (11 -> s0) +pub fn get_memo(i: Register) -> Option<&'static str> { + for (a, b) in ASSOCS { + if i == *a { + return Some(b); + } + } + + return None; +} + +/// gets the register index from its memonic name (s0 -> 11) +pub fn get_num(s: &str) -> Option { + for (a, b) in ASSOCS { + if s == *b { + return Some(*a); + } + } + + return None; +} diff --git a/src/cpu/sysenv/io_vec.rs b/src/cpu/sysenv/io_vec.rs new file mode 100644 index 0000000..3f5c1ac --- /dev/null +++ b/src/cpu/sysenv/io_vec.rs @@ -0,0 +1,82 @@ +use std::io::stdin; + +use crate::{ + cpu::{ram::MAX_MEM, registers::Register}, + loader::{loader::find_and_read_string, unloader::make_string}, +}; + +use super::*; + +// first working environment, we get input from stdin and we write output +// to a string. +// + +// using strings to singal errors kinda sucks. +// TODO: Fix this + +#[derive(Debug, Default)] +pub struct IOBuffer { + pub output: String, +} + +impl Sys for IOBuffer { + fn call(cpu: &mut CPU, r: Register, c: Word) -> CPUResult<()> { + println!("called: {}", c); + match c { + // 0: write an integer to output + 0 => { + let i = cpu.regs.get(r).ok_or(ExecErr::InvalidRegister)?; + cpu.env.output.push_str(&format!("{}", i)); + } + // 1: read an integer to some register + 1 => { + let mut buf = String::new(); + stdin() + .read_line(&mut buf) + .map_err(|_| ExecErr::SyscallError("Cannot read stdin".to_owned()))?; + let n: Word = buf + .parse() + .map_err(|_| ExecErr::SyscallError("Cannot read number".to_owned()))?; + cpu.regs.write(r, n).ok_or(ExecErr::InvalidRegister)?; + } + // 2: reads a string from input and writes it to some location. + 2 => { + let mut buf = String::new(); + stdin() + .read_line(&mut buf) + .map_err(|_| ExecErr::SyscallError("Cannot read stdin".to_owned()))?; + + let s: Vec = make_string(&buf); + + let start = cpu.regs.get(r).ok_or(ExecErr::InvalidRegister)?; + + cpu.ram + .write_array(&s[..], start) + .ok_or(ExecErr::SyscallError("Cannot write slice".to_owned()))?; + } + + // 3: prints a string, reading it from memory. + // r must contain the address of the string. + // the string needs to be null-delimited. + 3 => { + let pos = cpu.regs.get(r).ok_or(ExecErr::InvalidRegister)?; + + // we slice from start to the end. + // why? good question. The find_and_read_string + // will short circuit as soon as it finds a null terminator, + // which might never be found. + let data = cpu + .ram + .slice(pos, MAX_MEM as Word - 1) + .ok_or(ExecErr::InvalidMemoryAddr)?; + let (s, _) = find_and_read_string(&data) + .map_err(|p| ExecErr::SyscallError("parse error!".to_owned()))?; + cpu.env.output.push_str(&s); + } + + _ => return Err(ExecErr::InvalidSyscall), + } + + return Ok(()); + } +} diff --git a/src/cpu/sysenv/mod.rs b/src/cpu/sysenv/mod.rs new file mode 100644 index 0000000..09ca2c0 --- /dev/null +++ b/src/cpu/sysenv/mod.rs @@ -0,0 +1,17 @@ +mod io_vec; + +pub use io_vec::IOBuffer; + +use super::{CPUResult, ExecErr, CPU}; + +use super::registers::{Register, Word}; + +/// This trait represents all environments where our CPU can operate. +/// What this means is roughly defining system calls. + +pub trait Sys: Sized { + // r should actually be 4 bits, while c should be + // 8 bits. TODO: more efficient packing? + /// Performs system call. + fn call(cpu: &mut CPU, r: Register, c: Word) -> CPUResult<()>; +} diff --git a/src/cpu/tests.rs b/src/cpu/tests.rs new file mode 100644 index 0000000..fb92155 --- /dev/null +++ b/src/cpu/tests.rs @@ -0,0 +1,35 @@ + +use super::*; + +use crate::loader::unloader::*; + + +#[test] +fn hello_world_binary_test() { + + let hw = String::from("Hello world!"); + + let mut k = make_string(&hw); + + let mut code: Vec = vec![ + 0b0111000100000011, // addi ra 3 + 0b1110000100000011, // ecall ra 3 + 0b1111000000000000, // HALT. + ]; + + code.append(&mut k); + + let mut env = IOBuffer::default(); + + let mut cpu = CPU::new(&mut env); + + for c in &code[..] { + println!("{:#018b}", c); + } + + cpu.run_code_raw(&code); + + assert_eq!(hw, cpu.env.output); +} + + diff --git a/src/jit/mod.rs b/src/jit/mod.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/jit/mod.rs @@ -0,0 +1 @@ + diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..aa3720e --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,23 @@ +use std::mem::transmute; + +pub mod cpu; +pub mod jit; +pub mod loader; +// pub mod ; +pub mod assembler; +// + + +pub fn interpret_as_signed(x: u16) -> i16 { + // the two types have the same size. + unsafe { + return transmute::(x); + } +} + +pub fn interpret_as_unsigned(x: i16) -> u16 { + // the two types have the same size. + unsafe { + return transmute::(x); + } +} diff --git a/src/loader/constants.rs b/src/loader/constants.rs new file mode 100644 index 0000000..411dcc8 --- /dev/null +++ b/src/loader/constants.rs @@ -0,0 +1 @@ +pub const MAGIC: u16 = 39979; diff --git a/src/loader/display.rs b/src/loader/display.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/loader/display.rs @@ -0,0 +1 @@ + diff --git a/src/loader/loader.rs b/src/loader/loader.rs new file mode 100644 index 0000000..949f830 --- /dev/null +++ b/src/loader/loader.rs @@ -0,0 +1,100 @@ +use crate::cpu::Word; + +use super::{constants::MAGIC, Section}; + +#[derive(Debug)] +pub enum ParseError { + EmptyHeader, + MagicNumberCheckFail, + UnexpectedHeaderEnd, + UnexpectedFileEnd, + Utf8ConvError, +} + +/// Reads a requence of u16, and returns a rust utf8 owned string +/// and the index of the next 16bit word that follows the string, +/// so the first 16 bit word after the null delimiter. +/// It splits every u16 into two chunks of 8 bits, reads until +/// it finds the first empty u8 and attempts to convert the u8 array to +/// a rust UTF8 String. +pub fn find_and_read_string(s: &[u16]) -> Result<(String, usize), ParseError> { + let mut bytes = vec![]; + + let mut index: usize = 0; + for b in s.iter() { + let x0 = (*b & 0xFF00) >> 8; + let x1 = *b & 0x00FF; + // exit when the first 0 bit is found. + if x0 == 0 { + index += 1; + break; + }; + bytes.push(x0 as u8); + if x1 == 0 { + index += 1; + break; + }; + bytes.push(x1 as u8); + index += 1; + } + + let s = String::from_utf8(bytes).map_err(|_| ParseError::Utf8ConvError)?; + + return Ok((s, index)); +} + +/// Takes a binary file and returns a list of sections +pub fn read_binary(b: &[u16]) -> Result, ParseError> { + let mut res = vec![]; + + let headers = parse_header(b)?; + let hlen = headers.len() * 2 + 2; // two 16bits words for every entry, + // and 2 etxra 16 bits number at the start + + for (offset, length) in headers { + // section start. The name begins here + let start = hlen + offset as usize; + let str_buffer = b.get(start..).ok_or(ParseError::UnexpectedHeaderEnd)?; + let (name, i) = find_and_read_string(str_buffer)?; + + let c_start = start + i; + let c_end = start + (length as usize) + i; + + println!("{:?}, start: {}, end: {}", b, c_start, c_end); + let Some(content) = b.get((c_start)..(c_end)) else {return Err(ParseError::UnexpectedFileEnd)}; + + res.push(Section::new(name, content)) + } + + Ok(res) +} + +/// Parses binary headers +fn parse_header(b: &[Word]) -> Result, ParseError> { + let Some([m, s]) = b.get(0..2) else {return Err(ParseError::EmptyHeader)}; + + // Magic number check. Can go unchecked, check spec. + // if (*m != MAGIC) { + // return Err(WrongMagicNum) + // }; + + // s is the number of pairs (offset, length) in our header. + // since we're counting pairs, we need s*2 numbers from input. + + let Some(headerdata) = b.get(2..((*s as usize * 2) + 2)) else {return Err(ParseError::UnexpectedHeaderEnd)}; + + assert!( + headerdata.len() % 2 == 0, + "Header does not have an even number of words" + ); + + let mut hd = headerdata.iter(); + + let mut res = vec![]; + + while let (Some(offset), Some(len)) = (hd.next(), hd.next()) { + res.push((*offset, *len)) + } + + Ok(res) +} diff --git a/src/loader/mod.rs b/src/loader/mod.rs new file mode 100644 index 0000000..8eea153 --- /dev/null +++ b/src/loader/mod.rs @@ -0,0 +1,23 @@ +mod display; +pub mod loader; +pub mod unloader; + +mod constants; + +mod tests; + +/// Represents a section, or symbol, that must end up in the +/// binary file. +#[derive(Debug)] +pub struct Section<'a> { + /// Name of the symbol/section + name: String, + /// Content in bytes + content: &'a [u16], +} + +impl Section<'_> { + pub fn new<'a>(name: String, content: &'a [u16]) -> Section<'a> { + Section { name, content } + } +} diff --git a/src/loader/tests.rs b/src/loader/tests.rs new file mode 100644 index 0000000..5b2befd --- /dev/null +++ b/src/loader/tests.rs @@ -0,0 +1,61 @@ +use super::loader::*; +use super::unloader::*; +use super::*; + +// fuzzable, TODO +fn write_read_str_identity(s: &str) { + let mut bytes = make_string(s); + + // pop null-terminator; + // bytes.pop().expect("String doesn't even have a single byte?"); + // println!("w-r s: {:?}, b: {:?}", s, bytes); + let (s0, _) = find_and_read_string(&bytes).unwrap(); + assert_eq!(s, &s0); +} + +fn read_write_str_identity(b: Vec) { + let (string, _) = find_and_read_string(&b).unwrap(); + let mut n_term = b.clone(); + // n_term.push(0); + // println!("r-w s: {:?}, b: {:?}", string, n_term); + assert_eq!(n_term, make_string(&string)); +} + +#[test] +fn label_parse_identity() { + let testwords = vec!["Hello,", "main", "รจ", "Hello,,", "v", "main", "pi"]; // should support utf-8 + // + for word in testwords { + println!("\nTEST {}\n", word); + + write_read_str_identity(word); + let bytes = make_string(word); + // bytes.pop(); + println!( + "word: {:?}, bytes: {:?}, length: {:?}", + word, + bytes, + bytes.len() + ); + read_write_str_identity(bytes); + println!("Done with {:?}", word); + } +} + +// Symbol table test + +#[test] +fn sy_test() { + let fake_symbol_table: Vec
= vec![ + Section::new("v".to_owned(), &[1, 2, 3]), + Section::new("pi".to_owned(), &[3]), + Section::new("main".to_owned(), &[231, 323, 433]), // Section { name: todo!(), content: todo!() }, + // Section { name: todo!(), content: todo!() } + ]; + + let bin = make_binary(&fake_symbol_table); + println!("{:?}", bin); + + let parsed_symbol_table = read_binary(&bin).expect("Wtf! We got error!"); + println!("{:?}", parsed_symbol_table); +} diff --git a/src/loader/unloader.rs b/src/loader/unloader.rs new file mode 100644 index 0000000..27ced25 --- /dev/null +++ b/src/loader/unloader.rs @@ -0,0 +1,115 @@ +use super::constants::MAGIC; + +use super::Section; + +impl Section<'_> { + /// Converts the entry's name to utf8 packed in bits of length 16. + fn serialize_name(&self) -> Vec { + return make_string(&self.name); + } + + /// Joins the name of the section followed by its contents in a + /// vector of 16 bits. + fn serialize(&self) -> Vec { + let mut tmp = make_string(&self.name); + tmp.append(&mut self.content.to_owned()); + return tmp; + } +} + +#[derive(Debug)] +/// Entry in the symbols table. +/// Consists of an offset and a lenght. +struct STEntry { + offset: u16, + length: u16, +} + +/// Takes a string, and creates a u16, null-terminated utf8 string, +/// in a vector of u16 (padding possible) +pub fn make_string(s: &str) -> Vec { + let raw_bytes: &[u8] = s.as_bytes(); + + let mut rb = raw_bytes.iter(); + // raw_bytes must be converted to u16. + // + let mut bytes: Vec = { + let mut res = vec![]; + // highly cursed: depends on the order in which the arguments of a tuple are + // evaluated. Does its job! + while let (Some(word0), Some(word1)) = (rb.next(), rb.next()) { + // println!("Pair: {}, {}, word: {:?}", word0, word1, raw_bytes); + res.push(((*word0 as u16) << 8) + (*word1 as u16)); + } + // if we branch into this else, either there's a single word left or zero. + // since, in case a single word was left, the first rb.next() call in the line + // above would've consumed it, we have to gather that last element again + match raw_bytes.len() { + 0 => res, + n => { + if n % 2 != 0 { + // if there's an uneven number of chunks of 8 bits, + // we introduce padding! + // println!("Adding last one too"); + res.push((*raw_bytes.last().unwrap() as u16) << 8); + } + res + } + } + }; + + // adding null termination byte + bytes.push(0); + + return bytes; +} + +/// Takes a list of Section struct to be inserted in the symbols table and returns +/// both the table and the inserted data, with offsets +fn conv(sy_table: &[Section]) -> (Vec, Vec) { + let mut current_offset: u16 = 0; + let mut entries: Vec = vec![]; + let mut content: Vec = vec![]; + + for entry in sy_table { + let mut binary_entry = entry.serialize(); + + // Add name + content to the whole file + content.extend_from_slice(&mut binary_entry); + + // take note of the current offset and content lenght in the entry table + // (without including the length of the string) + entries.push(STEntry { + offset: current_offset, + length: entry.content.len() as u16, + }); + + // add to the current offset the length of the data we've saved: + current_offset += binary_entry.len() as u16; + } + + return (entries, content); +} + +/// Takes a list of STentry and serializes them +fn make_header(sy_table: &[STEntry]) -> Vec { + let mut res = vec![]; + for entry in sy_table { + res.push(entry.offset); + res.push(entry.length); + } + + res +} + +/// Takes a list of Sections and returns a binary file. +pub fn make_binary(sections: &[Section]) -> Vec { + let (sy_table, mut data) = conv(sections); + + let mut header = make_header(&sy_table); + + let mut res = vec![MAGIC, sections.len() as u16]; + res.append(&mut header); + res.append(&mut data); + res +} diff --git a/src/main.rs b/src/main.rs index e7a11a9..8237ab5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,32 @@ +use dekejit::cpu::IOBuffer; +use dekejit::cpu::CPU; +use dekejit::loader::unloader::*; + fn main() { - println!("Hello, world!"); + let mut k = make_string("Hello world!"); + + let mut code: Vec = vec![ + 0b0111000100000011, // addi ra 3 + 0b1110000100000011, // ecall ra 3 + 0b1111000000000000, // HALT. + ]; + + code.append(&mut k); + + let mut env = IOBuffer::default(); + + let mut cpu = CPU::new(&mut env); + + for c in &code[..] { + println!("{:#018b}", c); + } + + match cpu.run_code_raw(&code) { + Ok(_) => { + println!("Result: {}", env.output) + } + Err(e) => println!("Err: {:?}", e), + }; + + } diff --git a/test.bin b/test.bin new file mode 100644 index 0000000..01e2b3c Binary files /dev/null and b/test.bin differ diff --git a/tests/assembly/hello_world.grasm b/tests/assembly/hello_world.grasm new file mode 100644 index 0000000..bdca998 --- /dev/null +++ b/tests/assembly/hello_world.grasm @@ -0,0 +1,11 @@ +; Hello world program. + +.asciiz World + "Hello world\n" + +.text main + addi t0 World ; load World's address into t0 + call t0 3 ; print string syscall + +.asciiz hey + "Hey dude\n" diff --git a/tests/mod.rs b/tests/mod.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/mod.rs @@ -0,0 +1 @@ +