Dekejit/src/assembler/parser.rs
raphy 8f894174d9 preparing to refactor
renamed AST to ast, following rust naming convention, removed some useless test files and moved the tests to the appropriate files.
2023-11-13 00:06:16 +01:00

303 lines
7.8 KiB
Rust

use super::ast::{Const, Operation};
use Operation::*;
use log::*;
/// Represents a parsing failure.
#[derive(Debug)]
pub enum ParseError {
BadSectionHeader,
UnknownSectionKind,
UnexpectedEOF,
BadSectionContent,
BadInstruction,
}
/// represents the state of our parser.
/// Sadly parsing is stateless,
pub struct Parser {
input: Vec<String>, // input file
}
impl Parser {
pub fn new(i: String) -> Self {
Parser { input: sanitize(i) }
}
}
/// removes comments and whitespaces, and splits the input in lines.
fn sanitize(i: String) -> Vec<String> {
i.lines()
.map(|x| remove_comments(x))
.map(|x| x.trim())
.filter(|x| *x != "")
.map(|x| x.to_string())
.collect()
}
/// Removes comments.
fn remove_comments(i: &str) -> &str {
if let Some(end) = i.find(';') {
return &i[0..end];
} else {
return i;
}
}
/// Checks if the string i starts with pat.
/// Returns the rest of the input string on success
/// else, returns None
fn match_string(i: &str, pat: &str) -> Option<String> {
let mut in_chars = i.chars();
for pat_c in pat.chars() {
if let Some(c) = in_chars.next() {
if c != pat_c {
return None;
}
};
}
let rest = in_chars.collect();
return Some(rest);
}
/// Matches till the "stop" string is found.
/// Returns a tuple containing the preceeding string and
/// the rest of the input string.
///
/// Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") );
///
fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> {
//
if let Some((matched, rest)) = i.split_once(stop) {
return Some((matched.to_string(), rest.to_string()));
} else {
return None;
}
}
/// Matches inside the `start` and `stop` delimiters.
/// Return a tuple with the string in between the two
/// togheter with the rest of the string.
fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> {
let s1 = match_string(i, start)?;
return take_alpha_till(&s1, stop);
}
#[test]
fn take_between_test1() {
assert_eq!(
take_between("\"wow\" etc", "\"", "\""),
Some(("wow".to_string(), " etc".to_string()))
);
}
/// finds special escaped characters in a string
/// (such as \n) and replaces them with the actual special
/// character
/// #TODO: do we need this? I forgot.
fn _escaped_codes() {}
//// SECTION PARSING
/// Enum to represent possible section content.
#[derive(Debug)]
pub enum SectionContent {
Code(Vec<Operation>),
CString(String),
CVec(),
}
use SectionContent::*;
/// Binary file section, as parsed from a .grasm file.
#[derive(Debug)]
pub struct Section {
pub name: String,
pub content: SectionContent,
}
impl Parser {
pub fn parse_sections(&self) -> Result<Vec<Section>, ParseError> {
let mut res = vec![];
let mut lines = self.input.iter().map(|x| x.as_str()).into_iter();
while let Some(l) = lines.next() {
debug!("Examining line {}", l);
// are we looking at a section header?
if l.starts_with(".") {
let Some((kind, name)) = take_alpha_till(&l[1..], " ") else {
return Err(ParseError::BadSectionHeader);
};
// what kind of section?
match kind.as_str() {
"text" => {
let s: Vec<&str> = lines
.clone()
.take_while(|&x| !(x).starts_with("."))
.map(|x| x)
.collect();
res.push(Section {
name: name.trim().to_owned(),
content: Code(parse_code(&s)?),
})
}
"asciiz" => {
let Some(s) = lines.next() else {
return Err(ParseError::UnexpectedEOF);
};
let Some((s, _)) = take_between(s.trim(), "\"", "\"") else {
return Err(ParseError::BadSectionContent);
};
res.push(Section {
name: name.trim().to_owned(),
content: CString(s),
})
}
"i16" => {
let _s = lines.next();
todo!();
}
"u16" => {
let _s = lines.next();
todo!();
}
"vi16" => {
let _s = lines.next();
todo!();
}
"vu16" => {
let _s = lines.next();
todo!();
}
_ => {
return Err(ParseError::UnknownSectionKind);
}
}
}
}
return Ok(res);
}
}
fn parse_code(i: &[&str]) -> Result<Vec<Operation>, ParseError> {
let mut res = vec![];
for line in i {
res.push(parse_code_line(line)?);
}
return Ok(res);
}
/// Parses a single line of code.
fn parse_code_line(i: &str) -> Result<Operation, ParseError> {
// every operation has at most 3 arguments
let mut bits = i.split_whitespace();
trace!("current parse code line: {}", i);
let Some(op) = bits.next() else {
return Err(ParseError::BadInstruction);
};
// no type
match op {
"nop" => {
return Ok(NOP);
}
"halt" => {
return Ok(HALT);
}
_ => {}
};
// I-type
let Some(r1) = bits.next() else {
return Err(ParseError::BadInstruction);
};
let Some(r2) = bits.next() else {
return Err(ParseError::BadInstruction);
};
match op {
"addi" => {
return Ok(ADDI(r1.to_owned(), parse_const(r2)?));
}
"sli" => {
return Ok(SLI(r1.to_owned(), parse_const(r2)?));
}
"call" => {
return Ok(CALL(r1.to_owned(), parse_const(r2)?));
}
_ => {}
}
let Some(r3) = bits.next() else {
return Err(ParseError::BadInstruction);
};
// R-type
match op {
"add" => {
return Ok(ADD(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"sub" => {
return Ok(SUB(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"and" => {
return Ok(AND(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"xor" => {
return Ok(XOR(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"sll" => {
return Ok(SLL(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"beq" => {
return Ok(BEQ(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"bgt" => {
return Ok(BGT(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
_ => {}
}
// J-type
match op {
"jal" => {
return Ok(JAL(r1.to_owned(), r2.to_owned(), parse_const(&r3)?));
}
_ => {}
}
return Err(ParseError::BadInstruction);
}
fn parse_const(i: &str) -> Result<Const, ParseError> {
// we try to parse the number, if we fail, we treat it as a string.
let Ok(num) = i.parse() else {
return Ok(Const::CS(i.to_owned()));
};
return Ok(Const::C(num));
}
/// TESTS
#[test]
fn parser_test() {
let code = std::fs::read_to_string("./tests/assembly/hello_world.grasm").unwrap();
let parser = Parser::new(code);
let _r = parser.parse_sections();
// #TODO: WRITE PARSER TEST SUITE!
//assert_eq!(r, Ok(vec![]));
}