renamed AST to ast, following rust naming convention, removed some useless test files and moved the tests to the appropriate files.
303 lines
7.8 KiB
Rust
303 lines
7.8 KiB
Rust
use super::ast::{Const, Operation};
|
|
use Operation::*;
|
|
|
|
use log::*;
|
|
|
|
/// Represents a parsing failure.
|
|
#[derive(Debug)]
|
|
pub enum ParseError {
|
|
BadSectionHeader,
|
|
UnknownSectionKind,
|
|
UnexpectedEOF,
|
|
BadSectionContent,
|
|
BadInstruction,
|
|
}
|
|
|
|
/// represents the state of our parser.
|
|
/// Sadly parsing is stateless,
|
|
pub struct Parser {
|
|
input: Vec<String>, // input file
|
|
}
|
|
|
|
impl Parser {
|
|
pub fn new(i: String) -> Self {
|
|
Parser { input: sanitize(i) }
|
|
}
|
|
}
|
|
|
|
/// removes comments and whitespaces, and splits the input in lines.
|
|
fn sanitize(i: String) -> Vec<String> {
|
|
i.lines()
|
|
.map(|x| remove_comments(x))
|
|
.map(|x| x.trim())
|
|
.filter(|x| *x != "")
|
|
.map(|x| x.to_string())
|
|
.collect()
|
|
}
|
|
|
|
/// Removes comments.
|
|
fn remove_comments(i: &str) -> &str {
|
|
if let Some(end) = i.find(';') {
|
|
return &i[0..end];
|
|
} else {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
/// Checks if the string i starts with pat.
|
|
/// Returns the rest of the input string on success
|
|
/// else, returns None
|
|
fn match_string(i: &str, pat: &str) -> Option<String> {
|
|
let mut in_chars = i.chars();
|
|
|
|
for pat_c in pat.chars() {
|
|
if let Some(c) = in_chars.next() {
|
|
if c != pat_c {
|
|
return None;
|
|
}
|
|
};
|
|
}
|
|
|
|
let rest = in_chars.collect();
|
|
|
|
return Some(rest);
|
|
}
|
|
|
|
/// Matches till the "stop" string is found.
|
|
/// Returns a tuple containing the preceeding string and
|
|
/// the rest of the input string.
|
|
///
|
|
/// Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") );
|
|
///
|
|
fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> {
|
|
//
|
|
if let Some((matched, rest)) = i.split_once(stop) {
|
|
return Some((matched.to_string(), rest.to_string()));
|
|
} else {
|
|
return None;
|
|
}
|
|
}
|
|
|
|
/// Matches inside the `start` and `stop` delimiters.
|
|
/// Return a tuple with the string in between the two
|
|
/// togheter with the rest of the string.
|
|
fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> {
|
|
let s1 = match_string(i, start)?;
|
|
|
|
return take_alpha_till(&s1, stop);
|
|
}
|
|
|
|
#[test]
|
|
fn take_between_test1() {
|
|
assert_eq!(
|
|
take_between("\"wow\" etc", "\"", "\""),
|
|
Some(("wow".to_string(), " etc".to_string()))
|
|
);
|
|
}
|
|
|
|
/// finds special escaped characters in a string
|
|
/// (such as \n) and replaces them with the actual special
|
|
/// character
|
|
/// #TODO: do we need this? I forgot.
|
|
fn _escaped_codes() {}
|
|
|
|
//// SECTION PARSING
|
|
|
|
/// Enum to represent possible section content.
|
|
#[derive(Debug)]
|
|
pub enum SectionContent {
|
|
Code(Vec<Operation>),
|
|
CString(String),
|
|
CVec(),
|
|
}
|
|
|
|
use SectionContent::*;
|
|
|
|
/// Binary file section, as parsed from a .grasm file.
|
|
#[derive(Debug)]
|
|
pub struct Section {
|
|
pub name: String,
|
|
pub content: SectionContent,
|
|
}
|
|
|
|
impl Parser {
|
|
pub fn parse_sections(&self) -> Result<Vec<Section>, ParseError> {
|
|
let mut res = vec![];
|
|
|
|
let mut lines = self.input.iter().map(|x| x.as_str()).into_iter();
|
|
|
|
while let Some(l) = lines.next() {
|
|
debug!("Examining line {}", l);
|
|
|
|
// are we looking at a section header?
|
|
if l.starts_with(".") {
|
|
let Some((kind, name)) = take_alpha_till(&l[1..], " ") else {
|
|
return Err(ParseError::BadSectionHeader);
|
|
};
|
|
|
|
// what kind of section?
|
|
match kind.as_str() {
|
|
"text" => {
|
|
let s: Vec<&str> = lines
|
|
.clone()
|
|
.take_while(|&x| !(x).starts_with("."))
|
|
.map(|x| x)
|
|
.collect();
|
|
res.push(Section {
|
|
name: name.trim().to_owned(),
|
|
content: Code(parse_code(&s)?),
|
|
})
|
|
}
|
|
"asciiz" => {
|
|
let Some(s) = lines.next() else {
|
|
return Err(ParseError::UnexpectedEOF);
|
|
};
|
|
let Some((s, _)) = take_between(s.trim(), "\"", "\"") else {
|
|
return Err(ParseError::BadSectionContent);
|
|
};
|
|
res.push(Section {
|
|
name: name.trim().to_owned(),
|
|
content: CString(s),
|
|
})
|
|
}
|
|
"i16" => {
|
|
let _s = lines.next();
|
|
todo!();
|
|
}
|
|
"u16" => {
|
|
let _s = lines.next();
|
|
todo!();
|
|
}
|
|
"vi16" => {
|
|
let _s = lines.next();
|
|
todo!();
|
|
}
|
|
"vu16" => {
|
|
let _s = lines.next();
|
|
todo!();
|
|
}
|
|
_ => {
|
|
return Err(ParseError::UnknownSectionKind);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return Ok(res);
|
|
}
|
|
}
|
|
|
|
fn parse_code(i: &[&str]) -> Result<Vec<Operation>, ParseError> {
|
|
let mut res = vec![];
|
|
|
|
for line in i {
|
|
res.push(parse_code_line(line)?);
|
|
}
|
|
|
|
return Ok(res);
|
|
}
|
|
|
|
/// Parses a single line of code.
|
|
fn parse_code_line(i: &str) -> Result<Operation, ParseError> {
|
|
// every operation has at most 3 arguments
|
|
let mut bits = i.split_whitespace();
|
|
trace!("current parse code line: {}", i);
|
|
let Some(op) = bits.next() else {
|
|
return Err(ParseError::BadInstruction);
|
|
};
|
|
|
|
// no type
|
|
match op {
|
|
"nop" => {
|
|
return Ok(NOP);
|
|
}
|
|
"halt" => {
|
|
return Ok(HALT);
|
|
}
|
|
_ => {}
|
|
};
|
|
|
|
// I-type
|
|
let Some(r1) = bits.next() else {
|
|
return Err(ParseError::BadInstruction);
|
|
};
|
|
let Some(r2) = bits.next() else {
|
|
return Err(ParseError::BadInstruction);
|
|
};
|
|
|
|
match op {
|
|
"addi" => {
|
|
return Ok(ADDI(r1.to_owned(), parse_const(r2)?));
|
|
}
|
|
"sli" => {
|
|
return Ok(SLI(r1.to_owned(), parse_const(r2)?));
|
|
}
|
|
"call" => {
|
|
return Ok(CALL(r1.to_owned(), parse_const(r2)?));
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
let Some(r3) = bits.next() else {
|
|
return Err(ParseError::BadInstruction);
|
|
};
|
|
|
|
// R-type
|
|
match op {
|
|
"add" => {
|
|
return Ok(ADD(r1.to_owned(), r2.to_owned(), r3.to_owned()));
|
|
}
|
|
|
|
"sub" => {
|
|
return Ok(SUB(r1.to_owned(), r2.to_owned(), r3.to_owned()));
|
|
}
|
|
"and" => {
|
|
return Ok(AND(r1.to_owned(), r2.to_owned(), r3.to_owned()));
|
|
}
|
|
"xor" => {
|
|
return Ok(XOR(r1.to_owned(), r2.to_owned(), r3.to_owned()));
|
|
}
|
|
"sll" => {
|
|
return Ok(SLL(r1.to_owned(), r2.to_owned(), r3.to_owned()));
|
|
}
|
|
"beq" => {
|
|
return Ok(BEQ(r1.to_owned(), r2.to_owned(), r3.to_owned()));
|
|
}
|
|
"bgt" => {
|
|
return Ok(BGT(r1.to_owned(), r2.to_owned(), r3.to_owned()));
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
// J-type
|
|
match op {
|
|
"jal" => {
|
|
return Ok(JAL(r1.to_owned(), r2.to_owned(), parse_const(&r3)?));
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
return Err(ParseError::BadInstruction);
|
|
}
|
|
|
|
fn parse_const(i: &str) -> Result<Const, ParseError> {
|
|
// we try to parse the number, if we fail, we treat it as a string.
|
|
let Ok(num) = i.parse() else {
|
|
return Ok(Const::CS(i.to_owned()));
|
|
};
|
|
return Ok(Const::C(num));
|
|
}
|
|
|
|
/// TESTS
|
|
|
|
#[test]
|
|
fn parser_test() {
|
|
let code = std::fs::read_to_string("./tests/assembly/hello_world.grasm").unwrap();
|
|
|
|
let parser = Parser::new(code);
|
|
|
|
let _r = parser.parse_sections();
|
|
|
|
// #TODO: WRITE PARSER TEST SUITE!
|
|
//assert_eq!(r, Ok(vec![]));
|
|
}
|