From 8513b6222250f81a39d86b636bf371021db9f378 Mon Sep 17 00:00:00 2001 From: raphy Date: Tue, 2 May 2023 14:13:49 +0200 Subject: [PATCH] hello world works! --- spec.md | 8 ++ src/assembler/encoder.rs | 140 +++++++++++++++++++++++++++++++ src/assembler/mod.rs | 106 ++++++++++++++++++++++- src/assembler/parser.rs | 14 +++- src/cpu/mod.rs | 2 +- src/main.rs | 35 +++++--- tests/assembly/hello_world.grasm | 2 + 7 files changed, 291 insertions(+), 16 deletions(-) create mode 100644 src/assembler/encoder.rs diff --git a/spec.md b/spec.md index 3c72352..7471821 100644 --- a/spec.md +++ b/spec.md @@ -115,3 +115,11 @@ The utf-8 string cannot contain the null character anywhere, as that will be use This represents a "symbols table" of the binary file, where functions and data can be stored. There must exist a symbol named "main", and it must point to a function: this will be the entrypoint to our program. + + +### Executable, in memory. + +When loading a binary program, all the code in the binary file is placed at the start of our memory, followed by the data sections, in the order it appeared. + +The "text" sections (or code) are put in the order they appeared on the binary file, with the only exception of the "main" section, witch goes at the start of the file. + diff --git a/src/assembler/encoder.rs b/src/assembler/encoder.rs new file mode 100644 index 0000000..02108fd --- /dev/null +++ b/src/assembler/encoder.rs @@ -0,0 +1,140 @@ +use super::{SymbolTable, AST::*}; +use crate::cpu::{get_num, OP::*, get_memo}; + +pub trait CodeFormat { + fn encode_op(op: &Operation, sy: &SymbolTable) -> Option + where + Self: Sized; +} + +struct SyWrap<'a>(&'a SymbolTable); + +impl SyWrap<'_> { + pub fn lookup(&self, query: &str) -> u16 { + let SyWrap(sy) = self; + + for (name, loc) in sy.into_iter() { + if query == (*name) { + return *loc; + } + } + panic!( + "Symbol {} not found in symbol table. \nCurrent symbol table:{:?}", + query, sy + ); + } +} + +impl CodeFormat for u16 { + fn encode_op(op: &Operation, sy: &SymbolTable) -> Option { + let sy = SyWrap(sy); + match op { + Operation::NOP => Some(0b0000000000000000), + Operation::HALT => Some(0b1111111111111111), + Operation::ADD(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b0001 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::SUB(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b0010 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::AND(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b0011 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::XOR(r1, r2, r3) => { + + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b0100 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::SLL(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b0101 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::BEQ(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b1000 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::BGT(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b1001 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::LOAD(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b1100 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::STORE(r1, r2, r3) => { + let (r1b, r2b, r3b) = ( + get_num(&r1)? as u16, + get_num(&r2)? as u16, + get_num(&r3)? as u16, + ); + return Some((0b1101 << 12) + (r1b << 8) + (r2b << 4) + r3b); + } + Operation::SLI(r1, c) => { + let r1b = get_num(&r1)? as u16; + let cb = match c { + Const::CS(label) => sy.lookup(&label), + Const::C(n) => (*n) as u16, + }; + return Some((0b0110 << 12) + (r1b << 8) + cb); + }, + Operation::ADDI(r1, c) => { + let r1b = get_num(&r1)? as u16; + let cb = match c { + Const::CS(label) => sy.lookup(&label), + Const::C(n) => (*n) as u16, + }; + return Some((0b0111 << 12) + (r1b << 8) + cb); + }, + Operation::CALL(r1, c) => { + let r1b = get_num(&r1)? as u16; + let cb = match c { + Const::CS(label) => sy.lookup(&label), + Const::C(n) => (*n) as u16, + }; + return Some((0b1110 << 12) + (r1b << 8) + cb); + }, + Operation::JAL(r1, r2, c) => { + let r1b = get_num(&r1)? as u16; + let r2b = get_num(&r2)? as u16; + let cb = (*c) as u16; + return Some((0b1010 << 12) + (r1b << 8) + (r2b << 4) + cb); + }, + } + } +} + diff --git a/src/assembler/mod.rs b/src/assembler/mod.rs index d77a6b5..df8d06c 100644 --- a/src/assembler/mod.rs +++ b/src/assembler/mod.rs @@ -1,5 +1,107 @@ mod AST; -mod parser; +pub mod encoder; +pub mod parser; mod tests; -struct Assembler {} + +use encoder::CodeFormat; + +use parser::Section; + + +use crate::loader::unloader::make_string; + +use self::parser::SectionContent; + +type SymbolTable = Vec<(String, u16)>; + +impl Section { + fn get_size(&self) -> usize { + match &self.content { + SectionContent::Code(c) => c.len(), + SectionContent::CString(s) => { + let c = s.len(); + if c % 2 != 0 { + (c+1)/2 + 1 + } else {c/2 + 1} + }, + SectionContent::CVec() => todo!(), + } + } + + fn to_binary(&self, sy: &SymbolTable) -> Option> { + match &self.content { + SectionContent::Code(c) => { + let mut res = vec![]; + for op in c.iter() { + res.push(CodeFormat::encode_op(op, sy)?); + } + return Some(res); + + }, + SectionContent::CString(s) => { + + return Some(make_string(s)); + + }, + SectionContent::CVec() => todo!(), + } + } +} + +fn sort_sections(sections: Vec
) -> Option> { + + // we start with a mock section that we'll just replace. + let mut res: Vec
= vec![Section { name: "".to_owned(), content: SectionContent::CString("".to_owned())}]; + + let mut nocode: Vec
= vec![]; + + for section in sections.into_iter() { + // trick: if let to pattern match discarding result + if let SectionContent::Code(_) = section.content { + if section.name == "main" { + res[0] = section; + } else { + res.push(section); + } + } + else { + nocode.push(section); + } + } + + res.append(&mut nocode); + + return Some(res); +} + +fn make_symbol_table(sections: &Vec
) -> Option { + + let mut res = vec![]; + let mut pos : u16 = 0; + + for sec in sections { + res.push((sec.name.clone(), pos)); + pos += sec.get_size() as u16; + } + + + return Some(res); + +} + + +pub fn to_binary(sections: Vec
) -> Option> { + + let sorted = sort_sections(sections)?; + println!("sorted sections: {:?}", sorted); + let sy = make_symbol_table(&sorted)?; + println!("symbol table: {:?}", sy); + let k: Vec> = sorted.iter().map(|x| x.to_binary(&sy)).collect::>>>()?; + println!("binary sections: {:?}", k); + + + return Some(k.into_iter().flatten().collect()); + +} + diff --git a/src/assembler/parser.rs b/src/assembler/parser.rs index 5c9592b..28a27bb 100644 --- a/src/assembler/parser.rs +++ b/src/assembler/parser.rs @@ -90,6 +90,11 @@ fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> { return take_alpha_till(&s1, stop); } +/// finds special escaped characters in a string +/// (such as \n) and replaces them with the actual special +/// character +fn escaped_codes() {} + #[test] fn take_between_test() { assert_eq!( @@ -101,7 +106,7 @@ fn take_between_test() { //// SECTION PARSING #[derive(Debug)] -enum SectionContent { +pub enum SectionContent { Code(Vec), CString(String), CVec(), @@ -111,8 +116,8 @@ use SectionContent::*; #[derive(Debug)] pub struct Section { - name: String, - content: SectionContent, + pub name: String, + pub content: SectionContent, } // A .section has a name and variable content. @@ -213,6 +218,9 @@ fn parse_code_line(i: &str) -> Result { "call" => { return Ok(CALL(r1.to_owned(), parse_const(r2)?)); } + "halt" => { + return Ok(HALT); + } _ => {} } diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs index 00e1394..61d76ac 100644 --- a/src/cpu/mod.rs +++ b/src/cpu/mod.rs @@ -9,7 +9,7 @@ pub use sysenv::*; pub use registers::*; -use decoder::OP; +pub use decoder::OP; use ram::Ram; #[derive(Debug)] diff --git a/src/main.rs b/src/main.rs index 5b02822..4b20198 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,26 +1,41 @@ +use dekejit::assembler::parser; +use dekejit::assembler::to_binary; use dekejit::cpu::IOBuffer; use dekejit::cpu::CPU; use dekejit::loader::unloader::*; + fn main() { - let mut k = make_string("Hello world!"); - let mut code: Vec = vec![ - 0b0111000100000011, // addi ra 3 - 0b1110000100000011, // ecall ra 3 - 0b1111000000000000, // HALT. - ]; - code.append(&mut k); + let code = std::fs::read_to_string("./tests/assembly/hello_world.grasm").unwrap(); + let mut parser = parser::Parser::new(code); + + let r = parser.parse_sections().unwrap(); + + println!("Parsed sections: {:?}", r); + + let code = to_binary(r).unwrap(); + + // let mut k = make_string("Hello world!"); + // + // let mut code: Vec = vec![ + // 0b0111000100000011, // addi ra 3 + // 0b1110000100000011, // ecall ra 3 + // 0b1111000000000000, // HALT. + // ]; + // + // code.append(&mut k); + // let mut env = IOBuffer::default(); - + // let mut cpu = CPU::new(&mut env); - + // for c in &code[..] { println!("{:#018b}", c); } - + // match cpu.run_code_raw(&code) { Ok(_) => { println!("Result: {}", env.output) diff --git a/tests/assembly/hello_world.grasm b/tests/assembly/hello_world.grasm index bdca998..7df22da 100644 --- a/tests/assembly/hello_world.grasm +++ b/tests/assembly/hello_world.grasm @@ -6,6 +6,8 @@ .text main addi t0 World ; load World's address into t0 call t0 3 ; print string syscall + halt .asciiz hey "Hey dude\n" +