second commit - still messy

2023-04-28 12:18:07 +02:00 · 2023-04-28 12:18:07 +02:00 · 18a5f17b80
commit 18a5f17b80
parent 427e139161
24 changed files with 1318 additions and 1 deletions
--- a/spec.md
+++ b/spec.md
@ -0,0 +1,117 @@
+### Disclaimer
+
+This is a fantasy architecture on which I intend to write fantasy compilers. It was born out of the
+"fuck around and find out" philosophy, and is a toy project. I will change a lot of stuff as I learn
+how it's done in the real world. For now, I'm just gonna guess and have fun.
+
+Since I'm studying riscV, this will be a lot riscv inspired.
+
+# The GRAVEJIT virtual machine
+
+The gravejit virtual machine sports 16 16-bit registers (plus the program counter!) and 16 operations.
+
+Here is the list of registers togheter with memonics.
+
+0 : zero // register 0 is always 0.
+1 : ra   // return address
+2 : sp   // stack pointer
+3 : t0   // temporary 
+4 : t1
+5 : t2
+6 : t3
+7 : a0   // function arguments
+8 : a1
+9 : a2
+10: a3
+11: s0   // saved registers
+12: s1
+13: s2
+14: s3
+15: t4 // don't know what to do with this
+
+pc: program counter.
+
+## ISA
+
+opcode |  memonic | format  | description
+
+0000 | NOP           | just 0s'| Does nothing.
+0001 | ADD s0 s1 s2  | R       | s0 = s1 + s2 
+0010 | SUB s0 s1 s2  | R       | s0 = s1 - s2
+0011 | AND s0 s1 s2  | R       | s0 = s1 && s2
+0100 | XOR s0 s1 s2  | R       | s0 = s1 xor s2
+0101 | SLL s0 s1 s2  | R       | s0 = s1 << s2
+0110 | SLI s0 c      | I       | s0 = s0 << c 
+0111 | ADDI s0 c     | I       | s0 = s0 + c
+1000 | BEQ s0 s1 s2  | R       | if (s1 == s2) -> pc = s0 
+1001 | BGT s0 s1 s2  | R       | if (s1 > s2) -> pc = s0
+1010 | JAL s0 s1 c   | J       | s0 = pc+1; pc += s1 + c;
+1011 |
+1100 | LOAD s0 s1 s2 | R       | loads s1 + shift by s2 in s0
+1101 | STORE s0 s1 s2| R       | stores s0 in address s1 + shift by s2
+1110 | CALL s0 c     | I       | performs system call
+1111 | HALT          | just 1s'| halt, and possibly catch fire.
+
+
+### Operation formats:
+
+Each istruction is 16 bits long.
+The first 4 most-significant bits are the opcode.
+Constants (c in the above table) are always considered signed, and written in
+two's compliment. Sign extension also takes place whenever needed.
+i.e., to make an immediate subtraction, one just needs to add a negative number.
+
+#### R-type:
+opcode: 4 bits
+dest register: 4 bits
+source 1 register: 4 bits
+source 2 register: 4 bits
+
+example: ADD s0 s1 s2 = 0001 1011 1100 1101
+
+#### I-type
+opcode: 4 bits
+dest register: 4 bits
+constant: 8 bits
+
+example: 
+ADDI s0 28 = 0111 1011 00011100
+ADDI s0 -2 = 0111 1011 11111110
+
+
+#### J-Type
+opcode: 4 bits
+dest register: 4 bits
+jump address register: 4 bits
+constant: 4 bits
+
+
+The constant is added to the value of the second register argument.
+
+### JIT's system calls:
+
+the `CALL` instruction is a bit of a hack because I want to load more functionality into the thing.
+The JIT can decide what to do with the register s0 and the number c.
+It should be possible to open files, write files, read stdin, write to stdout, etc...
+
+#### io\_vec: first systemcall environment
+
+Working on this, quick and dirty.
+
+### Binary executable format:
+
+Binary files start with two 16 bit numbers, a constant and a length N, followed by a list of 
+length N of pairs 16 bit numbers. This is the header of the file.
+
+The initial constant is currently unused and unimportant. In this draft-toy-spec, the initial constant
+is always 39979.
+
+The first number is an offset, and the second number is a size N in bytes.
+
+The offset points at a null-terminated UTF-8 (yes.) string, located offset\*16 bits to the right after the end of the header in the binary file, followed by arbitrary binary content of size N\*16 bits.
+
+The utf-8 string cannot contain the null character anywhere, as that will be used as terminator.
+
+This represents a "symbols table" of the binary file, where functions and data can be stored.
+
+There must exist a symbol named "main", and it must point to a function: this will be the entrypoint to our program.
--- a/src/assembler/AST.rs
+++ b/src/assembler/AST.rs
@ -0,0 +1,35 @@
+use crate::cpu::Word;
+
+type RegisterMem = String;
+
+pub type ConstId = String;
+
+#[derive(Debug)]
+pub enum Const {
+    CS(ConstId),
+    C(u8),
+}
+
+#[derive(Debug)]
+pub enum Operation {
+    NOP,
+    HALT,
+    // R type
+    ADD(RegisterMem, RegisterMem, RegisterMem),
+    SUB(RegisterMem, RegisterMem, RegisterMem),
+    AND(RegisterMem, RegisterMem, RegisterMem),
+    XOR(RegisterMem, RegisterMem, RegisterMem),
+    SLL(RegisterMem, RegisterMem, RegisterMem),
+    BEQ(RegisterMem, RegisterMem, RegisterMem),
+    BGT(RegisterMem, RegisterMem, RegisterMem),
+    LOAD(RegisterMem, RegisterMem, RegisterMem),
+    STORE(RegisterMem, RegisterMem, RegisterMem),
+
+    // I Type
+    SLI(RegisterMem, Const),
+    ADDI(RegisterMem, Const),
+    CALL(RegisterMem, Const),
+
+    // J Type
+    JAL(RegisterMem, RegisterMem, Word),
+}
--- a/src/assembler/mod.rs
+++ b/src/assembler/mod.rs
@ -0,0 +1,5 @@
+mod AST;
+mod tests;
+mod parser;
+
+struct Assembler {}
--- a/src/assembler/parser.rs
+++ b/src/assembler/parser.rs
@ -0,0 +1,246 @@
+use crate::cpu::Registers;
+
+use super::AST::{Operation, Const};
+use Operation::*;
+
+type Loc = u16;
+
+#[derive(Debug)]
+pub enum ParseError {
+    BadSectionHeader,
+    UnknownSectionKind,
+    UnexpectedEOF,
+    BadSectionContent,
+    BadInstruction
+}
+
+/// represents the state of our parser.
+pub struct Parser {
+    loc: u16,                     // current number of operations parsed.
+    symtable: Vec<(String, u16)>, // symbols encountered, position.
+    pub input: Vec<String>,           // input file
+}
+
+impl Parser {
+    pub fn new(i: String) -> Self {
+        Parser {
+            loc: 0,
+            symtable: vec![],
+            input: sanitize(i),
+        }
+    }
+}
+
+// removes comments and whitespaces, and splits the input in lines.
+fn sanitize(i: String) -> Vec<String> {
+    i.lines()
+        .map(|x| remove_comments(x))
+        .map(|x| x.trim())
+        .filter(|x| *x != "")
+        .map(|x| x.to_string())
+        .collect()
+}
+
+fn remove_comments(i: &str) -> &str {
+    if let Some(end) = i.find(';') {
+        return &i[0..end];
+    } else {
+        return i;
+    }
+} 
+
+
+/// Checks if the string i starts with pat.
+/// Returns the rest of the input string on success
+/// else, returns None
+fn match_string(i: &str, pat: &str) -> Option<String> {
+
+    let mut in_chars = i.chars();
+
+    for pat_c in pat.chars() {
+        if let Some(c) = in_chars.next() {
+            if c != pat_c {
+                return None
+            }
+        };
+
+    }
+
+    let rest = in_chars.collect();
+
+    return Some(rest);
+
+}
+
+///  Matches till the "stop" string is found.
+///  Returns a tuple containing the preceeding string and
+///  the rest of the input string.
+///
+///  Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") );
+///
+fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> {
+
+    //
+    if let Some((matched, rest)) = i.split_once(stop) {
+        return Some((matched.to_string(), rest.to_string()))
+    } else {
+        return None
+    }
+    
+
+}
+
+/// Matches inside the `start` and `stop` delimiters.
+/// Return a tuple with the string in between the two
+/// togheter with the rest of the string.
+fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> {
+    
+    let s1 = match_string(i, start)?;
+
+    return take_alpha_till(&s1, stop);
+
+}
+
+#[test]
+fn take_between_test() {
+    assert_eq!(take_between("\"wow\" etc", "\"", "\""), Some(("wow".to_string(), " etc".to_string())));
+} 
+
+
+//// SECTION PARSING
+
+#[derive(Debug)]
+enum SectionContent {
+    Code(Vec<Operation>),
+    CString(String),
+    CVec()
+}
+
+use SectionContent::*;
+
+#[derive(Debug)]
+pub struct Section {
+    name: String,
+    content: SectionContent, 
+}
+
+// A .section has a name and variable content.
+impl Parser {
+
+    pub fn parse_sections(&mut self) -> Result<Vec<Section>, ParseError> {
+        let mut res = vec![];
+
+        let mut lines = self.input.iter().map(|x| x.as_str()).into_iter();
+
+        while let Some(l) = lines.next() { 
+        println!("Examing line: {}", l);
+            if l.starts_with(".") {
+                let Some((kind, name)) = take_alpha_till(&l[1..], " ") else {
+                    return Err(ParseError::BadSectionHeader);
+                };
+
+                match kind.as_str() {
+                    "text" => {
+                        let s : Vec<&str> = lines.clone().take_while(|&x| !(x).starts_with(".")).map(|x| x).collect();
+                        res.push(Section { name: name.trim().to_owned(), content: Code(parse_code(&s)?)})
+                    }
+                    "asciiz" => {
+                        let Some(s) = lines.next() else {return Err(ParseError::UnexpectedEOF)};
+                        let Some((s, _)) = take_between(s.trim(), "\"", "\"") else {return Err(ParseError::BadSectionContent)};
+                        res.push(Section { name: name.trim().to_owned(), content: CString(s)})
+
+                    }
+                    "i16" => {
+                        let s = lines.next();
+
+                    }
+                    "u16" => {
+                        let s = lines.next();
+
+                    }
+                    "vi16" => {
+                        let s = lines.next();
+
+                    }
+                    "vu16" => {
+                        let s = lines.next();
+
+                    }
+                    _ => {
+                        return Err(ParseError::UnknownSectionKind);
+                    }
+                }
+            }
+
+
+
+        };
+
+
+        return Ok(res);
+    }
+
+}
+
+
+fn parse_code(i: &[&str]) -> Result<Vec<Operation>, ParseError> {
+
+    let mut res = vec![];
+
+    for line in i {
+        res.push(parse_code_line(line)?);
+    }
+
+    return Ok(res);
+}
+
+
+fn parse_code_line(i: &str) -> Result<Operation, ParseError> {
+    
+   // every operation has at most 3 arguments
+   let mut bits = i.split_whitespace();
+   println!("current parse code line: {}", i);
+   let Some(op) = bits.next() else {return Err(ParseError::BadSectionContent)};
+
+   // no type
+   match op {
+       "nop" => {return Ok(NOP);},
+       "halt" => {return Ok(HALT);},
+       _ => {}
+    };
+
+   // I-type
+   let Some(r1) = bits.next() else {return Err(ParseError::BadSectionHeader)};
+   let Some(r2) = bits.next() else {return Err(ParseError::BadSectionHeader)};
+
+   match op {
+       "addi" => {
+           return Ok(ADDI(r1.to_owned(), parse_const(r2)?));
+       }
+       "sli" => {
+           return Ok(SLI(r1.to_owned(), parse_const(r2)?));
+
+       }
+       "call" => {
+
+           return Ok(CALL(r1.to_owned(), parse_const(r2)?));
+
+       }
+       _ => {}
+   }
+
+    
+   return Err(ParseError::BadInstruction);
+
+}
+
+fn parse_const(i: &str) -> Result<Const, ParseError> {
+
+    // we try to parse the number, if we fail, we treat it as a string.
+    let Ok(num) = i.parse() else {
+        return Ok(Const::CS(i.to_owned()));
+    };
+    return Ok(Const::C(num));
+
+}
+
--- a/src/assembler/tests.rs
+++ b/src/assembler/tests.rs
@ -0,0 +1,16 @@
+// use super::*;
+
+use crate::assembler::parser;
+
+#[test]
+fn parser_test() {
+
+    println!("Parser test begins");
+    let code = std::fs::read_to_string("./tests/assembly/hello_world.grasm").unwrap();
+
+    let mut parser = parser::Parser::new(code);
+
+    let r = parser.parse_sections().unwrap();
+
+    println!("Parsed sections: {:?}", r);
+}
--- a/src/cpu/decoder.rs
+++ b/src/cpu/decoder.rs
@ -0,0 +1,60 @@
+use super::registers::Register;
+
+type Constant = i8; // 8 bits max, so it works.
+
+#[derive(Debug)]
+pub enum OP {
+    NOP,
+    ADD(Register, Register, Register),
+    SUB(Register, Register, Register),
+    AND(Register, Register, Register),
+    XOR(Register, Register, Register),
+    SLL(Register, Register, Register),
+
+    SLI(Register, Constant),
+    ADDI(Register, Constant),
+
+    BEQ(Register, Register, Register),
+    BGT(Register, Register, Register),
+    JAL(Register, Register, Constant),
+
+    LOAD(Register, Register, Register),
+    STORE(Register, Register, Register),
+    CALL(Register, Constant),
+    HALT,
+}
+
+pub use OP::*;
+
+pub fn decode(op: u16) -> OP {
+    let opcode = op >> 12;
+    let dest = ((op & 0x0F00) >> 8) as Register;
+    let r1 = ((op & 0x00F0) >> 4) as Register;
+    let r2 = (op & 0x000F) as Register;
+
+    let c = Constant::from_be_bytes([(op & 0x00FF) as u8]);
+    let c4 = Constant::from_be_bytes([(op & 0x000F) as u8]);
+
+    println!("opcode: {}", opcode);
+
+    return match opcode {
+        // todo: write a macro for every type (I-type, R-type)
+        0b0000 => NOP,
+        0b0001 => ADD(dest, r1, r2),
+        0b0010 => SUB(dest, r1, r2),
+        0b0011 => AND(dest, r1, r2),
+        0b0100 => XOR(dest, r1, r2),
+        0b0101 => SLL(dest, r1, r2),
+        0b0110 => SLI(dest, c),
+        0b0111 => ADDI(dest, c),
+        0b1000 => BEQ(dest, r1, r2),
+        0b1001 => BGT(dest, r1, r2),
+        0b1010 => JAL(dest, r1, c4),
+        0b1011 => todo!(),
+        0b1100 => LOAD(dest, r1, r2),
+        0b1101 => STORE(dest, r1, r2),
+        0b1110 => CALL(dest, c),
+        0b1111 => HALT,
+        _ => panic!("Not an operation."),
+    };
+}
--- a/src/cpu/mod.rs
+++ b/src/cpu/mod.rs
@ -0,0 +1,187 @@
+mod decoder;
+mod ram;
+mod registers;
+mod sysenv;
+mod tests;
+pub use sysenv::*;
+
+pub use registers::*;
+
+use decoder::OP;
+use ram::Ram;
+
+#[derive(Debug)]
+pub enum ExecErr {
+    InvalidRegister,
+    InvalidMemoryAddr,
+    InvalidSyscall,
+    InvalidPC,
+    SyscallError(String),
+}
+
+use ExecErr::*;
+
+use crate::{interpret_as_signed, interpret_as_unsigned};
+
+use self::decoder::decode;
+
+/// Simple synonim for Result<T, ExecErr>.
+type CPUResult<T> = Result<T, ExecErr>;
+
+#[derive(Debug)]
+/// The state of the interpreter.
+pub struct CPU<'a, T> {
+    pub regs: Registers,
+    pub ram: Ram,
+    pub env: &'a mut T,
+    // should execution be halted? not sure if to include this or nah
+    halt: bool,
+}
+
+impl<'a, T> CPU<'a, T>
+where
+    T: Sys,
+{
+    pub fn execute_op(&mut self, op: OP) -> CPUResult<()> {
+        match op {
+            OP::NOP => {
+                self.regs.pc += 1;
+            }
+            OP::ADD(d, r1, r2) => {
+                let v1 = self.regs.get(r1).ok_or(InvalidRegister)?;
+                let v2 = self.regs.get(r2).ok_or(InvalidRegister)?;
+                self.regs.write(d, v1 + v2).ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::SUB(d, r1, r2) => {
+                let v1 = self.regs.get(r1).ok_or(InvalidRegister)?;
+                let v2 = self.regs.get(r2).ok_or(InvalidRegister)?;
+                self.regs.write(d, v1 - v2).ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::AND(d, r1, r2) => {
+                let v1 = self.regs.get(r1).ok_or(InvalidRegister)?;
+                let v2 = self.regs.get(r2).ok_or(InvalidRegister)?;
+                self.regs.write(d, v1 & v2).ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::XOR(d, r1, r2) => {
+                let v1 = self.regs.get(r1).ok_or(InvalidRegister)?;
+                let v2 = self.regs.get(r2).ok_or(InvalidRegister)?;
+                self.regs.write(d, v1 ^ v2).ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::SLL(d, r1, r2) => {
+                let v1 = self.regs.get(r1).ok_or(InvalidRegister)?;
+                let v2 = self.regs.get(r2).ok_or(InvalidRegister)?;
+                self.regs.write(d, v1 << v2).ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::SLI(d, c) => {
+                let v1 = self.regs.get(d).ok_or(InvalidRegister)?;
+                self.regs.write(d, v1 << c).ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::ADDI(d, c) => {
+                let v1 = self.regs.get(d).ok_or(InvalidRegister)?;
+                self.regs
+                    .write(
+                        d,
+                        interpret_as_unsigned(interpret_as_signed(v1) + (c as i16)),
+                    )
+                    .ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::BEQ(d, x0, x1) => {
+                if x0 == x1 {
+                    let v = self.regs.get(d).ok_or(InvalidRegister)?;
+                    self.regs.pc = v;
+                }
+            }
+            OP::BGT(d, x0, x1) => {
+                if x0 > x1 {
+                    let v = self.regs.get(d).ok_or(InvalidRegister)?;
+                    self.regs.pc = v;
+                }
+            }
+            OP::JAL(s0, s1, c) => {
+                self.regs
+                    .write(s0, self.regs.pc + 1)
+                    .ok_or(InvalidRegister)?;
+                let v = self.regs.get(s1).ok_or(InvalidRegister)?;
+                self.regs.pc = (v as i16 + (c as i16)) as Word;
+            }
+
+            OP::LOAD(d, s1, s2) => {
+                let start = self.regs.get(s1).ok_or(InvalidRegister)?;
+                let offset = self.regs.get(s2).ok_or(InvalidRegister)?;
+
+                let v = self.ram.get(start + offset).ok_or(InvalidMemoryAddr)?;
+
+                self.regs.write(d, v).ok_or(InvalidRegister)?;
+
+                self.regs.pc += 1;
+            }
+            OP::STORE(d, s1, s2) => {
+                let start = self.regs.get(s1).ok_or(InvalidRegister)?;
+                let offset = self.regs.get(s2).ok_or(InvalidRegister)?;
+
+                let v = self.regs.get(d).ok_or(InvalidRegister)?;
+                self.ram.write(start + offset, v).ok_or(InvalidMemoryAddr)?;
+
+                self.regs.pc += 1;
+            }
+            OP::CALL(r, c) => {
+                T::call(self, r.into(), c as u16)?;
+                self.regs.pc += 1;
+            }
+            OP::HALT => {
+                self.halt = true;
+            }
+        }
+
+        return Ok(());
+    }
+
+    fn fetch(&self) -> CPUResult<OP> {
+        let binop = self.ram.get(self.regs.pc).ok_or(ExecErr::InvalidPC)?;
+
+        println!("binop: {:#018b}", binop);
+
+        Ok(decode(binop))
+    }
+
+    fn step(&mut self) -> CPUResult<()> {
+        let op = self.fetch()?;
+        println!("fetched op: {:?}, pc: {} ", op, self.regs.pc);
+        self.execute_op(op)
+    }
+
+    pub fn run_code_raw(&mut self, bin_code: &[Word]) -> CPUResult<()> {
+        self.halt = false;
+        // put the code in memory:
+        self.ram.write_array(bin_code, 0);
+
+        while !self.halt {
+            self.step()?;
+        }
+
+        Ok(())
+    }
+
+    pub fn new(env: &'a mut T) -> Self {
+        CPU {
+            regs: Registers::default(),
+            ram: Ram::default(),
+            env,
+            halt: false,
+        }
+    }
+}
--- a/src/cpu/ram.rs
+++ b/src/cpu/ram.rs
@ -0,0 +1,66 @@
+use crate::cpu::registers::Word;
+
+/// We'll define our RAM as a static array.
+/// The maximum adressable memory is, right now, just 65kbit of memory.
+
+// pub const MAX_MEM: usize = 65536;
+pub const MAX_MEM: usize = 40;
+
+#[derive(Debug)]
+pub struct Ram {
+    mem: [Word; MAX_MEM],
+}
+
+impl Default for Ram {
+    fn default() -> Self {
+        return Ram { mem: [0; MAX_MEM] };
+    }
+}
+
+impl Ram {
+    /// Gets the word at memory address i. Returns none if i is
+    /// out of bounds.
+    pub fn get(&self, i: Word) -> Option<Word> {
+        if (i as usize) < MAX_MEM {
+            return Some(self.mem[i as usize]);
+        } else {
+            return None;
+        }
+    }
+
+    /// Writes val into memory address i. Returns none if i is
+    /// out of bounds.
+    pub fn write(&mut self, i: Word, val: Word) -> Option<()> {
+        if (i as usize) < MAX_MEM {
+            self.mem[i as usize] = val;
+            return Some(());
+        } else {
+            return None;
+        }
+    }
+
+    /// Returns a slice of memory from start to end address, inclusive.
+    /// None is returned if the address is out of bounds.
+    pub fn slice(&self, start: Word, end: Word) -> Option<&[Word]> {
+        if (start as usize) < MAX_MEM && (end as usize) < MAX_MEM {
+            return Some(&self.mem[(start as usize)..(end as usize)]);
+        } else {
+            return None;
+        }
+    }
+
+    /// Writes an array of data directly into memory, starting from
+    /// the "start" address.
+    /// Returns None if the data exceeds memory.
+    pub fn write_array(&mut self, data: &[Word], start: Word) -> Option<()> {
+        if start as usize + data.len() < MAX_MEM {
+            for i in 0..data.len() {
+                self.mem[start as usize + i] = data[i];
+            }
+        } else {
+            return None;
+        }
+
+        return Some(());
+    }
+}
--- a/src/cpu/registers.rs
+++ b/src/cpu/registers.rs
@ -0,0 +1,85 @@
+pub type Word = u16;
+pub type Register = u8;
+
+/// We need to hold 15 registers (zero is constant) + the program counter.
+/// We'll just use a vector of u16.
+#[derive(Debug)]
+pub struct Registers {
+    regs: [Word; 15],
+    pub pc: Word,
+}
+
+impl Default for Registers {
+    fn default() -> Self {
+        Registers {
+            regs: [0; 15],
+            pc: 0,
+        }
+    }
+}
+
+impl Registers {
+    /// retrives the register's value. Returns None if trying to access a register
+    /// that doesn't exist.
+    pub fn get(&self, i: Register) -> Option<Word> {
+        match i {
+            0 => Some(0), // zero is always 0
+            1..=15 => Some(self.regs[(i - 1) as usize]),
+            _ => None,
+        }
+    }
+
+    /// writes val to the register i. Returns none on trying to write to zero, or to a register
+    /// that doesn't exist.
+    pub fn write(&mut self, i: Register, val: Word) -> Option<()> {
+        match i {
+            0 => None, // cannot write to 0
+            1..=15 => {
+                self.regs[(i - 1) as usize] = val;
+                return Some(());
+            }
+            _ => None,
+        }
+    }
+}
+
+const ASSOCS: &'static [(Register, &'static str)] = &[
+    (0, "zero"),
+    (1, "ra"),
+    (2, "sp"),
+    (3, "t0"),
+    (4, "t1"),
+    (5, "t2"),
+    (6, "t3"),
+    (7, "a0"),
+    (8, "a1"),
+    (9, "a2"),
+    (10, "a3"),
+    (11, "s0"),
+    (12, "s1"),
+    (13, "s2"),
+    (14, "s3"),
+    (15, "t4"),
+];
+
+/// gets the register memonic name. Useful for pretty printing. (11 -> s0)
+pub fn get_memo(i: Register) -> Option<&'static str> {
+    for (a, b) in ASSOCS {
+        if i == *a {
+            return Some(b);
+        }
+    }
+
+    return None;
+}
+
+/// gets the register index from its memonic name (s0 -> 11)
+pub fn get_num(s: &str) -> Option<Register> {
+    for (a, b) in ASSOCS {
+        if s == *b {
+            return Some(*a);
+        }
+    }
+
+    return None;
+}
--- a/src/cpu/sysenv/io_vec.rs
+++ b/src/cpu/sysenv/io_vec.rs
@ -0,0 +1,82 @@
+use std::io::stdin;
+
+use crate::{
+    cpu::{ram::MAX_MEM, registers::Register},
+    loader::{loader::find_and_read_string, unloader::make_string},
+};
+
+use super::*;
+
+// first working environment, we get input from stdin and we write output
+// to a string.
+//
+
+// using strings to singal errors kinda sucks.
+// TODO: Fix this
+
+#[derive(Debug, Default)]
+pub struct IOBuffer {
+    pub output: String,
+}
+
+impl Sys for IOBuffer {
+    fn call(cpu: &mut CPU<IOBuffer>, r: Register, c: Word) -> CPUResult<()> {
+        println!("called: {}", c);
+        match c {
+            // 0: write an integer to output
+            0 => {
+                let i = cpu.regs.get(r).ok_or(ExecErr::InvalidRegister)?;
+                cpu.env.output.push_str(&format!("{}", i));
+            }
+            // 1: read an integer to some register
+            1 => {
+                let mut buf = String::new();
+                stdin()
+                    .read_line(&mut buf)
+                    .map_err(|_| ExecErr::SyscallError("Cannot read stdin".to_owned()))?;
+                let n: Word = buf
+                    .parse()
+                    .map_err(|_| ExecErr::SyscallError("Cannot read number".to_owned()))?;
+                cpu.regs.write(r, n).ok_or(ExecErr::InvalidRegister)?;
+            }
+            // 2: reads a string from input and writes it to some location.
+            2 => {
+                let mut buf = String::new();
+                stdin()
+                    .read_line(&mut buf)
+                    .map_err(|_| ExecErr::SyscallError("Cannot read stdin".to_owned()))?;
+
+                let s: Vec<u16> = make_string(&buf);
+
+                let start = cpu.regs.get(r).ok_or(ExecErr::InvalidRegister)?;
+
+                cpu.ram
+                    .write_array(&s[..], start)
+                    .ok_or(ExecErr::SyscallError("Cannot write slice".to_owned()))?;
+            }
+
+            // 3: prints a string, reading it from memory.
+            // r must contain the address of the string.
+            // the string needs to be null-delimited.
+            3 => {
+                let pos = cpu.regs.get(r).ok_or(ExecErr::InvalidRegister)?;
+
+                // we slice from start to the end.
+                // why? good question. The find_and_read_string
+                // will short circuit as soon as it finds a null terminator,
+                // which might <potentially> never be found.
+                let data = cpu
+                    .ram
+                    .slice(pos, MAX_MEM as Word - 1)
+                    .ok_or(ExecErr::InvalidMemoryAddr)?;
+                let (s, _) = find_and_read_string(&data)
+                    .map_err(|p| ExecErr::SyscallError("parse error!".to_owned()))?;
+                cpu.env.output.push_str(&s);
+            }
+
+            _ => return Err(ExecErr::InvalidSyscall),
+        }
+
+        return Ok(());
+    }
+}
--- a/src/cpu/sysenv/mod.rs
+++ b/src/cpu/sysenv/mod.rs
@ -0,0 +1,17 @@
+mod io_vec;
+
+pub use io_vec::IOBuffer;
+
+use super::{CPUResult, ExecErr, CPU};
+
+use super::registers::{Register, Word};
+
+/// This trait represents all environments where our CPU can operate.
+/// What this means is roughly defining system calls.
+
+pub trait Sys: Sized {
+    // r should actually be 4 bits, while c should be
+    // 8 bits. TODO: more efficient packing?
+    /// Performs system call.
+    fn call(cpu: &mut CPU<Self>, r: Register, c: Word) -> CPUResult<()>;
+}
--- a/src/cpu/tests.rs
+++ b/src/cpu/tests.rs
@ -0,0 +1,35 @@
+
+use super::*;
+
+use crate::loader::unloader::*;
+
+
+#[test]
+fn hello_world_binary_test() {
+
+    let hw = String::from("Hello world!");
+
+    let mut k = make_string(&hw);
+
+    let mut code: Vec<u16> = vec![
+        0b0111000100000011, // addi ra 3
+        0b1110000100000011, // ecall ra 3
+        0b1111000000000000, // HALT.
+    ];
+
+    code.append(&mut k);
+
+    let mut env = IOBuffer::default();
+
+    let mut cpu = CPU::new(&mut env);
+
+    for c in &code[..] {
+        println!("{:#018b}", c);
+    }
+
+    cpu.run_code_raw(&code);
+
+    assert_eq!(hw, cpu.env.output);
+}
+
+
--- a/src/jit/mod.rs
+++ b/src/jit/mod.rs
@ -0,0 +1 @@
+
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,23 @@
+use std::mem::transmute;
+
+pub mod cpu;
+pub mod jit;
+pub mod loader;
+// pub mod ;
+pub mod assembler;
+//
+
+
+pub fn interpret_as_signed(x: u16) -> i16 {
+    // the two types have the same size.
+    unsafe {
+        return transmute::<u16, i16>(x);
+    }
+}
+
+pub fn interpret_as_unsigned(x: i16) -> u16 {
+    // the two types have the same size.
+    unsafe {
+        return transmute::<i16, u16>(x);
+    }
+}
--- a/src/loader/constants.rs
+++ b/src/loader/constants.rs
@ -0,0 +1 @@
+pub const MAGIC: u16 = 39979;
--- a/src/loader/display.rs
+++ b/src/loader/display.rs
@ -0,0 +1 @@
+
--- a/src/loader/loader.rs
+++ b/src/loader/loader.rs
@ -0,0 +1,100 @@
+use crate::cpu::Word;
+
+use super::{constants::MAGIC, Section};
+
+#[derive(Debug)]
+pub enum ParseError {
+    EmptyHeader,
+    MagicNumberCheckFail,
+    UnexpectedHeaderEnd,
+    UnexpectedFileEnd,
+    Utf8ConvError,
+}
+
+/// Reads a requence of u16, and returns a rust utf8 owned string
+/// and the index of the next 16bit word that follows the string,
+/// so the first 16 bit word after the null delimiter.
+/// It splits every u16 into two chunks of 8 bits, reads until
+/// it finds the first empty u8 and attempts to convert the u8 array to
+/// a rust UTF8 String.
+pub fn find_and_read_string(s: &[u16]) -> Result<(String, usize), ParseError> {
+    let mut bytes = vec![];
+
+    let mut index: usize = 0;
+    for b in s.iter() {
+        let x0 = (*b & 0xFF00) >> 8;
+        let x1 = *b & 0x00FF;
+        // exit when the first 0 bit is found.
+        if x0 == 0 {
+            index += 1;
+            break;
+        };
+        bytes.push(x0 as u8);
+        if x1 == 0 {
+            index += 1;
+            break;
+        };
+        bytes.push(x1 as u8);
+        index += 1;
+    }
+
+    let s = String::from_utf8(bytes).map_err(|_| ParseError::Utf8ConvError)?;
+
+    return Ok((s, index));
+}
+
+/// Takes a binary file and returns a list of sections
+pub fn read_binary(b: &[u16]) -> Result<Vec<Section>, ParseError> {
+    let mut res = vec![];
+
+    let headers = parse_header(b)?;
+    let hlen = headers.len() * 2 + 2; // two 16bits words for every entry,
+                                      // and 2 etxra 16 bits number at the start
+
+    for (offset, length) in headers {
+        // section start. The name begins here
+        let start = hlen + offset as usize;
+        let str_buffer = b.get(start..).ok_or(ParseError::UnexpectedHeaderEnd)?;
+        let (name, i) = find_and_read_string(str_buffer)?;
+
+        let c_start = start + i;
+        let c_end = start + (length as usize) + i;
+
+        println!("{:?}, start: {}, end: {}", b, c_start, c_end);
+        let Some(content) = b.get((c_start)..(c_end)) else {return Err(ParseError::UnexpectedFileEnd)};
+
+        res.push(Section::new(name, content))
+    }
+
+    Ok(res)
+}
+
+/// Parses binary headers
+fn parse_header(b: &[Word]) -> Result<Vec<(u16, u16)>, ParseError> {
+    let Some([m, s]) = b.get(0..2) else {return Err(ParseError::EmptyHeader)};
+
+    // Magic number check. Can go unchecked, check spec.
+    // if (*m != MAGIC) {
+    //     return Err(WrongMagicNum)
+    // };
+
+    // s is the number of pairs (offset, length) in our header.
+    // since we're counting pairs, we need s*2 numbers from input.
+
+    let Some(headerdata) = b.get(2..((*s as usize * 2) + 2)) else {return Err(ParseError::UnexpectedHeaderEnd)};
+
+    assert!(
+        headerdata.len() % 2 == 0,
+        "Header does not have an even number of words"
+    );
+
+    let mut hd = headerdata.iter();
+
+    let mut res = vec![];
+
+    while let (Some(offset), Some(len)) = (hd.next(), hd.next()) {
+        res.push((*offset, *len))
+    }
+
+    Ok(res)
+}
--- a/src/loader/mod.rs
+++ b/src/loader/mod.rs
@ -0,0 +1,23 @@
+mod display;
+pub mod loader;
+pub mod unloader;
+
+mod constants;
+
+mod tests;
+
+/// Represents a section, or symbol, that must end up in the
+/// binary file.
+#[derive(Debug)]
+pub struct Section<'a> {
+    /// Name of the symbol/section
+    name: String,
+    /// Content in bytes
+    content: &'a [u16],
+}
+
+impl Section<'_> {
+    pub fn new<'a>(name: String, content: &'a [u16]) -> Section<'a> {
+        Section { name, content }
+    }
+}
--- a/src/loader/tests.rs
+++ b/src/loader/tests.rs
@ -0,0 +1,61 @@
+use super::loader::*;
+use super::unloader::*;
+use super::*;
+
+// fuzzable, TODO
+fn write_read_str_identity(s: &str) {
+    let mut bytes = make_string(s);
+
+    // pop null-terminator;
+    // bytes.pop().expect("String doesn't even have a single byte?");
+    // println!("w-r s: {:?}, b: {:?}", s, bytes);
+    let (s0, _) = find_and_read_string(&bytes).unwrap();
+    assert_eq!(s, &s0);
+}
+
+fn read_write_str_identity(b: Vec<u16>) {
+    let (string, _) = find_and_read_string(&b).unwrap();
+    let mut n_term = b.clone();
+    // n_term.push(0);
+    // println!("r-w s: {:?}, b: {:?}", string, n_term);
+    assert_eq!(n_term, make_string(&string));
+}
+
+#[test]
+fn label_parse_identity() {
+    let testwords = vec!["Hello,", "main", "è", "Hello,,", "v", "main", "pi"]; // should support utf-8
+                                                                               //
+    for word in testwords {
+        println!("\nTEST {}\n", word);
+
+        write_read_str_identity(word);
+        let bytes = make_string(word);
+        // bytes.pop();
+        println!(
+            "word: {:?}, bytes: {:?}, length: {:?}",
+            word,
+            bytes,
+            bytes.len()
+        );
+        read_write_str_identity(bytes);
+        println!("Done with {:?}", word);
+    }
+}
+
+// Symbol table test
+
+#[test]
+fn sy_test() {
+    let fake_symbol_table: Vec<Section> = vec![
+        Section::new("v".to_owned(), &[1, 2, 3]),
+        Section::new("pi".to_owned(), &[3]),
+        Section::new("main".to_owned(), &[231, 323, 433]), // Section { name: todo!(), content: todo!() },
+                                                           // Section { name: todo!(), content: todo!() }
+    ];
+
+    let bin = make_binary(&fake_symbol_table);
+    println!("{:?}", bin);
+
+    let parsed_symbol_table = read_binary(&bin).expect("Wtf! We got error!");
+    println!("{:?}", parsed_symbol_table);
+}
--- a/src/loader/unloader.rs
+++ b/src/loader/unloader.rs
@ -0,0 +1,115 @@
+use super::constants::MAGIC;
+
+use super::Section;
+
+impl Section<'_> {
+    /// Converts the entry's name to utf8 packed in bits of length 16.
+    fn serialize_name(&self) -> Vec<u16> {
+        return make_string(&self.name);
+    }
+
+    /// Joins the name of the section followed by its contents in a
+    /// vector of 16 bits.
+    fn serialize(&self) -> Vec<u16> {
+        let mut tmp = make_string(&self.name);
+        tmp.append(&mut self.content.to_owned());
+        return tmp;
+    }
+}
+
+#[derive(Debug)]
+/// Entry in the symbols table.
+/// Consists of an offset and a lenght.
+struct STEntry {
+    offset: u16,
+    length: u16,
+}
+
+/// Takes a string, and creates a u16, null-terminated utf8 string,
+/// in a vector of u16 (padding possible)
+pub fn make_string(s: &str) -> Vec<u16> {
+    let raw_bytes: &[u8] = s.as_bytes();
+
+    let mut rb = raw_bytes.iter();
+    // raw_bytes must be converted to u16.
+    //
+    let mut bytes: Vec<u16> = {
+        let mut res = vec![];
+        // highly cursed: depends on the order in which the arguments of a tuple are
+        // evaluated. Does its job!
+        while let (Some(word0), Some(word1)) = (rb.next(), rb.next()) {
+            // println!("Pair: {}, {}, word: {:?}", word0, word1, raw_bytes);
+            res.push(((*word0 as u16) << 8) + (*word1 as u16));
+        }
+        // if we branch into this else, either there's a single word left or zero.
+        // since, in case a single word was left, the first rb.next() call in the line
+        // above would've consumed it, we have to gather that last element again
+        match raw_bytes.len() {
+            0 => res,
+            n => {
+                if n % 2 != 0 {
+                    // if there's an uneven number of chunks of 8 bits,
+                    // we introduce padding!
+                    // println!("Adding last one too");
+                    res.push((*raw_bytes.last().unwrap() as u16) << 8);
+                }
+                res
+            }
+        }
+    };
+
+    // adding null termination byte
+    bytes.push(0);
+
+    return bytes;
+}
+
+/// Takes a list of Section struct to be inserted in the symbols table and returns
+/// both the table and the inserted data, with offsets
+fn conv(sy_table: &[Section]) -> (Vec<STEntry>, Vec<u16>) {
+    let mut current_offset: u16 = 0;
+    let mut entries: Vec<STEntry> = vec![];
+    let mut content: Vec<u16> = vec![];
+
+    for entry in sy_table {
+        let mut binary_entry = entry.serialize();
+
+        // Add name + content to the whole file
+        content.extend_from_slice(&mut binary_entry);
+
+        // take note of the current offset and content lenght in the entry table
+        // (without including the length of the string)
+        entries.push(STEntry {
+            offset: current_offset,
+            length: entry.content.len() as u16,
+        });
+
+        // add to the current offset the length of the data we've saved:
+        current_offset += binary_entry.len() as u16;
+    }
+
+    return (entries, content);
+}
+
+/// Takes a list of STentry and serializes them
+fn make_header(sy_table: &[STEntry]) -> Vec<u16> {
+    let mut res = vec![];
+    for entry in sy_table {
+        res.push(entry.offset);
+        res.push(entry.length);
+    }
+
+    res
+}
+
+/// Takes a list of Sections and returns a binary file.
+pub fn make_binary(sections: &[Section]) -> Vec<u16> {
+    let (sy_table, mut data) = conv(sections);
+
+    let mut header = make_header(&sy_table);
+
+    let mut res = vec![MAGIC, sections.len() as u16];
+    res.append(&mut header);
+    res.append(&mut data);
+    res
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -1,3 +1,32 @@
+use dekejit::cpu::IOBuffer;
+use dekejit::cpu::CPU;
+use dekejit::loader::unloader::*;
+
 fn main() {
-    println!("Hello, world!");
+    let mut k = make_string("Hello world!");
+
+    let mut code: Vec<u16> = vec![
+        0b0111000100000011, // addi ra 3
+        0b1110000100000011, // ecall ra 3
+        0b1111000000000000, // HALT.
+    ];
+
+    code.append(&mut k);
+
+    let mut env = IOBuffer::default();
+
+    let mut cpu = CPU::new(&mut env);
+
+    for c in &code[..] {
+        println!("{:#018b}", c);
+    }
+
+    match cpu.run_code_raw(&code) {
+        Ok(_) => {
+            println!("Result: {}", env.output)
+        }
+        Err(e) => println!("Err: {:?}", e),
+    };
+
+
 }
--- a/test.bin
+++ b/test.bin
--- a/tests/assembly/hello_world.grasm
+++ b/tests/assembly/hello_world.grasm
@ -0,0 +1,11 @@
+; Hello world program.
+
+.asciiz World
+  "Hello world\n"
+
+.text main
+  addi t0 World   ; load World's address into t0
+  call t0 3       ; print string syscall
+
+.asciiz hey
+  "Hey dude\n"
--- a/tests/mod.rs
+++ b/tests/mod.rs
@ -0,0 +1 @@
+