From f66bdb9b759b86f1998a9ebe4af8d97d3c0e881b Mon Sep 17 00:00:00 2001 From: raphy Date: Wed, 3 May 2023 08:40:47 +0200 Subject: [PATCH] refactored logging / added clap for cli --- Cargo.toml | 3 +++ src/assembler/AST.rs | 13 ++++++++++--- src/assembler/encoder.rs | 36 +++++++++++++++++++++++----------- src/assembler/mod.rs | 42 ++++++++++++++++++++++++++++++++++------ src/assembler/parser.rs | 29 +++++++++++++++++---------- src/cpu/decoder.rs | 5 ++++- src/cpu/mod.rs | 16 +++++++++++---- src/cpu/ram.rs | 7 +++++-- src/cpu/sysenv/io_vec.rs | 10 +++++----- src/lib.rs | 2 +- src/main.rs | 14 ++++++++++---- 11 files changed, 130 insertions(+), 47 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2ad23fd..be85442 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +clap = { version = "4.2.5", features = ["derive"] } +log = "0.4.17" +simple_logger = "4.1.0" diff --git a/src/assembler/AST.rs b/src/assembler/AST.rs index 46497d7..6e3d9d6 100644 --- a/src/assembler/AST.rs +++ b/src/assembler/AST.rs @@ -1,18 +1,25 @@ +/// Type alias to represent a register. type RegisterMem = String; +/// Type alias to represent a label used in place of a const. pub type ConstId = String; +/// A const can either be a number or a label of some section. #[derive(Debug)] pub enum Const { CS(ConstId), C(u8), } +/// Operations as they are parsed, before translating them to binary. +/// This type is used internally by the parser and +/// differs from cpu::Operation. #[derive(Debug)] pub enum Operation { + /// No type. NOP, HALT, - // R type + /// R type ADD(RegisterMem, RegisterMem, RegisterMem), SUB(RegisterMem, RegisterMem, RegisterMem), AND(RegisterMem, RegisterMem, RegisterMem), @@ -23,11 +30,11 @@ pub enum Operation { LOAD(RegisterMem, RegisterMem, RegisterMem), STORE(RegisterMem, RegisterMem, RegisterMem), - // I Type + /// I Type SLI(RegisterMem, Const), ADDI(RegisterMem, Const), CALL(RegisterMem, Const), - // J Type + /// J Type JAL(RegisterMem, RegisterMem, Const), } diff --git a/src/assembler/encoder.rs b/src/assembler/encoder.rs index d7bf849..8d69697 100644 --- a/src/assembler/encoder.rs +++ b/src/assembler/encoder.rs @@ -1,34 +1,48 @@ use super::AST::*; use crate::cpu::get_num; +use log::{trace, warn}; + + +/// Trait to represent a format we can translate our assembly to. pub trait CodeFormat { fn encode_op(op: &Operation, sy: &SymbolTable, current_pc: u16) -> Option where Self: Sized; } +/// Symbol table implemented as a vector. +/// This is a zero-size struct used to implement +/// lookup as an impl. + +#[repr(transparent)] #[derive(Debug)] pub struct SymbolTable(pub Vec<(String, u16)>); impl SymbolTable { - pub fn lookup(&self, query: &str) -> u16 { + /// Finds a symbol in the symbol table. + /// Fails if the symbol is not in the symbol table. + pub fn lookup(&self, query: &str) -> Option { + trace!("Looking up {} in the symbol table.", query); let SymbolTable(sy) = self; for (name, loc) in sy.into_iter() { if query == (*name) { - return *loc; + return Some(*loc); } } - panic!( - "Symbol {} not found in symbol table. \nCurrent symbol table:{:?}", - query, sy - ); + // panic!( + // "Symbol {} not found in symbol table. \nCurrent symbol table:{:?}", + // query, sy + // ); + warn!("Symbol {} not found in symbol table.", query); + return None } } impl CodeFormat for u16 { fn encode_op(op: &Operation, sy: &SymbolTable, current_pc: u16) -> Option { - println!("encoding {:?}", op); + trace!("encoding {:?}", op); match op { Operation::NOP => Some(0b0000000000000000), Operation::HALT => Some(0b1111111111111111), @@ -107,7 +121,7 @@ impl CodeFormat for u16 { Operation::SLI(r1, c) => { let r1b = get_num(&r1)? as u16; let cb = match c { - Const::CS(label) => sy.lookup(&label), + Const::CS(label) => sy.lookup(&label)?, Const::C(n) => (*n) as u16, }; return Some((0b0110 << 12) + (r1b << 8) + cb); @@ -115,7 +129,7 @@ impl CodeFormat for u16 { Operation::ADDI(r1, c) => { let r1b = get_num(&r1)? as u16; let cb = match c { - Const::CS(label) => sy.lookup(&label), + Const::CS(label) => sy.lookup(&label)?, Const::C(n) => (*n) as u16, }; return Some((0b0111 << 12) + (r1b << 8) + cb); @@ -123,7 +137,7 @@ impl CodeFormat for u16 { Operation::CALL(r1, c) => { let r1b = get_num(&r1)? as u16; let cb = match c { - Const::CS(label) => sy.lookup(&label), + Const::CS(label) => sy.lookup(&label)?, Const::C(n) => (*n) as u16, }; return Some((0b1110 << 12) + (r1b << 8) + cb); @@ -132,7 +146,7 @@ impl CodeFormat for u16 { let r1b = get_num(&r1)? as u16; let r2b = get_num(&r2)? as u16; let cb = match c { - Const::CS(label) => current_pc - sy.lookup(&label), + Const::CS(label) => current_pc - sy.lookup(&label)?, Const::C(n) => (*n) as u16, }; return Some((0b1010 << 12) + (r1b << 8) + (r2b << 4) + cb); diff --git a/src/assembler/mod.rs b/src/assembler/mod.rs index 9cd7914..860bc79 100644 --- a/src/assembler/mod.rs +++ b/src/assembler/mod.rs @@ -6,16 +6,26 @@ mod tests; use encoder::CodeFormat; use encoder::SymbolTable; +use log::{trace, debug}; + use parser::Section; use crate::loader::unloader::make_string; -use self::parser::SectionContent; +use parser::SectionContent; impl Section { + /// Calculates the size, in binary, of a section. fn get_size(&self) -> usize { match &self.content { + // code is 1 word for instruction. SectionContent::Code(c) => c.len(), + + // UTF-8 strings are collections of 8-bit chunks, but they're + // packed into words of 16 bits + 16 bit NULL. + // + // If there's an uneven number of bytes in a string, + // we add a 8 bit empty padding and then the NULL byte. SectionContent::CString(s) => { let c = s.len(); if c % 2 != 0 { @@ -28,15 +38,22 @@ impl Section { } } + /// Converts a section to binary. Needs symbol table to + /// resolve labels, and to quickly get the address of + /// this own section. fn to_binary(&self, sy: &SymbolTable) -> Option> { - let own_address = sy.lookup(&self.name); + let own_address = sy.lookup(&self.name)?; match &self.content { SectionContent::Code(c) => { let mut res = vec![]; + + // we keep track of the program counter because we + // need to calculate relative jumps. let mut pc = own_address; for op in c.iter() { - println!("converting {:?}", op); + trace!("converting {:?}", op); res.push(CodeFormat::encode_op(op, sy, pc)?); + // pc simply increases by one after each operation. pc += 1; } return Some(res); @@ -49,6 +66,13 @@ impl Section { } } + +/// Sorts a list of sections. +/// All .text sections containing code are +/// put at the beginning of the binary file, in the order they +/// appear in the assembly file, except the "main" section, +/// which is the entrypoint of our program and must be put +/// at the very beginning. fn sort_sections(sections: Vec
) -> Option> { // we start with a mock section that we'll just replace. let mut res: Vec
= vec![Section { @@ -73,9 +97,14 @@ fn sort_sections(sections: Vec
) -> Option> { res.append(&mut nocode); + // TODO: PANIC WHEN NO MAIN; + return Some(res); } +/// Creates symbol from a list of sorted sections. +/// Assumes the sections are already sorted in the +/// desired order. fn make_symbol_table<'a>(sections: &'a Vec
) -> Option { let mut res = vec![]; let mut pos: u16 = 0; @@ -88,16 +117,17 @@ fn make_symbol_table<'a>(sections: &'a Vec
) -> Option { return Some(SymbolTable(res)); } +/// Converts a vector of sections into binary. pub fn to_binary(sections: Vec
) -> Option> { let sorted = sort_sections(sections)?; - println!("sorted sections: {:?}", sorted); + trace!("sorted sections: {:?}", sorted); let sy = make_symbol_table(&sorted)?; - println!("symbol table: {:?}", sy); + debug!("symbol table: {:?}", sy); let k: Vec> = sorted .iter() .map(|x| x.to_binary(&sy)) .collect::>>>()?; - println!("binary sections: {:?}", k); + trace!("binary sections: {:?}", k); return Some(k.into_iter().flatten().collect()); } diff --git a/src/assembler/parser.rs b/src/assembler/parser.rs index 2a1231b..0327e31 100644 --- a/src/assembler/parser.rs +++ b/src/assembler/parser.rs @@ -1,8 +1,9 @@ use super::AST::{Const, Operation}; use Operation::*; -type Loc = u16; +use log::*; +/// Represents a parsing failure. #[derive(Debug)] pub enum ParseError { BadSectionHeader, @@ -13,23 +14,20 @@ pub enum ParseError { } /// represents the state of our parser. +/// Sadly parsing is stateless, pub struct Parser { - loc: u16, // current number of operations parsed. - symtable: Vec<(String, u16)>, // symbols encountered, position. - pub input: Vec, // input file + input: Vec, // input file } impl Parser { pub fn new(i: String) -> Self { Parser { - loc: 0, - symtable: vec![], input: sanitize(i), } } } -// removes comments and whitespaces, and splits the input in lines. +/// removes comments and whitespaces, and splits the input in lines. fn sanitize(i: String) -> Vec { i.lines() .map(|x| remove_comments(x)) @@ -39,6 +37,7 @@ fn sanitize(i: String) -> Vec { .collect() } +/// Removes comments. fn remove_comments(i: &str) -> &str { if let Some(end) = i.find(';') { return &i[0..end]; @@ -105,6 +104,7 @@ fn take_between_test() { //// SECTION PARSING +/// Enum to represent possible section content. #[derive(Debug)] pub enum SectionContent { Code(Vec), @@ -114,13 +114,14 @@ pub enum SectionContent { use SectionContent::*; +/// Binary file section, as parsed from a .grasm file. #[derive(Debug)] pub struct Section { pub name: String, pub content: SectionContent, } -// A .section has a name and variable content. + impl Parser { pub fn parse_sections(&mut self) -> Result, ParseError> { let mut res = vec![]; @@ -128,12 +129,15 @@ impl Parser { let mut lines = self.input.iter().map(|x| x.as_str()).into_iter(); while let Some(l) = lines.next() { - println!("Examing line: {}", l); + debug!("Examining line {}", l); + + // are we looking at a section header? if l.starts_with(".") { let Some((kind, name)) = take_alpha_till(&l[1..], " ") else { return Err(ParseError::BadSectionHeader); }; + // what kind of section? match kind.as_str() { "text" => { let s: Vec<&str> = lines @@ -156,15 +160,19 @@ impl Parser { } "i16" => { let _s = lines.next(); + todo!(); } "u16" => { let _s = lines.next(); + todo!(); } "vi16" => { let _s = lines.next(); + todo!(); } "vu16" => { let _s = lines.next(); + todo!(); } _ => { return Err(ParseError::UnknownSectionKind); @@ -187,10 +195,11 @@ fn parse_code(i: &[&str]) -> Result, ParseError> { return Ok(res); } +/// Parses a single line of code. fn parse_code_line(i: &str) -> Result { // every operation has at most 3 arguments let mut bits = i.split_whitespace(); - println!("current parse code line: {}", i); + trace!("current parse code line: {}", i); let Some(op) = bits.next() else {return Err(ParseError::BadInstruction)}; // no type diff --git a/src/cpu/decoder.rs b/src/cpu/decoder.rs index 9ce7741..0905bfc 100644 --- a/src/cpu/decoder.rs +++ b/src/cpu/decoder.rs @@ -2,6 +2,7 @@ use super::registers::Register; type Constant = i8; // 8 bits max, so it works. +// TODO: Use macros and a single reference for the ops. #[derive(Debug)] pub enum OP { NOP, @@ -26,6 +27,7 @@ pub enum OP { pub use OP::*; +/// Decodes a single binary operation. pub fn decode(op: u16) -> OP { let opcode = op >> 12; let dest = ((op & 0x0F00) >> 8) as Register; @@ -55,6 +57,7 @@ pub fn decode(op: u16) -> OP { 0b1101 => STORE(dest, r1, r2), 0b1110 => CALL(dest, c), 0b1111 => HALT, - _ => panic!("Not an operation."), + // opcode, by construction, is a binary 4 bits number. + _ => unreachable!(), }; } diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs index 368bc6f..1ca4870 100644 --- a/src/cpu/mod.rs +++ b/src/cpu/mod.rs @@ -5,13 +5,15 @@ mod sysenv; mod tests; -pub use sysenv::*; +pub use sysenv::{IOBuffer, Sys}; pub use registers::*; pub use decoder::OP; use ram::Ram; +use log::{debug}; + #[derive(Debug)] pub enum ExecErr { InvalidRegister, @@ -36,7 +38,6 @@ pub struct CPU<'a, T> { pub regs: Registers, pub ram: Ram, pub env: &'a mut T, - // should execution be halted? not sure if to include this or nah halt: bool, } @@ -160,20 +161,26 @@ where return Ok(()); } + /// fetch tne next operation from memory. + /// called by step. fn fetch(&self) -> CPUResult { let binop = self.ram.get(self.regs.pc).ok_or(ExecErr::InvalidPC)?; - println!("binop: {:#018b}", binop); + // debug!("fetched binop: {:#018b}", binop); Ok(decode(binop)) } + /// fetches an operation and runs one clock cycle. fn step(&mut self) -> CPUResult<()> { let op = self.fetch()?; - println!("fetched op: {:?}, pc: {} ", op, self.regs.pc); + debug!("fetched op: {:?}, pc: {} ", op, self.regs.pc); self.execute_op(op) } + + /// takes binary code as input, puts it at the start of memory, and + /// executes the code. Mainly for testing purposes. pub fn run_code_raw(&mut self, bin_code: &[Word]) -> CPUResult<()> { self.halt = false; // put the code in memory: @@ -186,6 +193,7 @@ where Ok(()) } + /// Creates a new CPU from an exising environment. pub fn new(env: &'a mut T) -> Self { CPU { regs: Registers::default(), diff --git a/src/cpu/ram.rs b/src/cpu/ram.rs index c5e9d88..ac58b5d 100644 --- a/src/cpu/ram.rs +++ b/src/cpu/ram.rs @@ -1,10 +1,13 @@ use crate::cpu::registers::Word; +// TODO: abstract this into a trait and write different implementation +// of RAM, such as a growable cointainer/tree. + /// We'll define our RAM as a static array. /// The maximum adressable memory is, right now, just 65kbit of memory. -// pub const MAX_MEM: usize = 65536; -pub const MAX_MEM: usize = 40; +pub const MAX_MEM: usize = 65535; +// pub const MAX_MEM: usize = 40; #[derive(Debug)] pub struct Ram { diff --git a/src/cpu/sysenv/io_vec.rs b/src/cpu/sysenv/io_vec.rs index 7ad4e54..186b8db 100644 --- a/src/cpu/sysenv/io_vec.rs +++ b/src/cpu/sysenv/io_vec.rs @@ -1,5 +1,7 @@ use std::io::stdin; +use log::{debug, info}; + use crate::{ cpu::{ram::MAX_MEM, registers::Register}, loader::{loader::find_and_read_string, unloader::make_string}, @@ -7,13 +9,11 @@ use crate::{ use super::*; -// first working environment, we get input from stdin and we write output -// to a string. -// - // using strings to singal errors kinda sucks. // TODO: Fix this +/// Rudimentary environment, good for testing. +/// Gets input from stdin and writes output to a string. #[derive(Debug, Default)] pub struct IOBuffer { pub output: String, @@ -21,7 +21,7 @@ pub struct IOBuffer { impl Sys for IOBuffer { fn call(cpu: &mut CPU, r: Register, c: Word) -> CPUResult<()> { - println!("called: {}", c); + debug!("called syscall: {}", c); match c { // 0: write an integer to output 0 => { diff --git a/src/lib.rs b/src/lib.rs index 8acb777..2d22733 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,7 @@ use std::mem::transmute; pub mod cpu; pub mod jit; pub mod loader; -// pub mod ; +pub mod cli; pub mod assembler; // diff --git a/src/main.rs b/src/main.rs index bc4a217..5d9c53b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,13 @@ use dekejit::assembler::to_binary; use dekejit::cpu::IOBuffer; use dekejit::cpu::CPU; +// use simple_logger::SimpleLogger; + fn main() { + + simple_logger::init_with_level(log::Level::Warn).unwrap(); + + let args: Vec = args().collect(); if args.len() < 2 { @@ -26,7 +32,7 @@ fn main() { let r = parser.parse_sections().unwrap(); - println!("Parsed sections: {:?}", r); + // println!("Parsed sections: {:?}", r); let code = to_binary(r).unwrap(); @@ -44,9 +50,9 @@ fn main() { // let mut cpu = CPU::new(&mut env); // - for c in &code[..] { - println!("{:#018b}", c); - } + // for c in &code[..] { + // // println!("{:#018b}", c); + // } // match cpu.run_code_raw(&code) { Ok(_) => {