use super::ast::{Const, Operation}; use Operation::*; use log::*; /// Represents a parsing failure. #[derive(Debug)] pub enum ParseError { BadSectionHeader, UnknownSectionKind, UnexpectedEOF, BadSectionContent, BadInstruction, } /// represents the state of our parser. /// Sadly parsing is stateless, pub struct Parser { input: Vec, // input file } impl Parser { pub fn new(i: String) -> Self { Parser { input: sanitize(i) } } } /// removes comments and whitespaces, and splits the input in lines. fn sanitize(i: String) -> Vec { i.lines() .map(|x| remove_comments(x)) .map(|x| x.trim()) .filter(|x| *x != "") .map(|x| x.to_string()) .collect() } /// Removes comments. fn remove_comments(i: &str) -> &str { if let Some(end) = i.find(';') { return &i[0..end]; } else { return i; } } /// Checks if the string i starts with pat. /// Returns the rest of the input string on success /// else, returns None fn match_string(i: &str, pat: &str) -> Option { let mut in_chars = i.chars(); for pat_c in pat.chars() { if let Some(c) = in_chars.next() { if c != pat_c { return None; } }; } let rest = in_chars.collect(); return Some(rest); } /// Matches till the "stop" string is found. /// Returns a tuple containing the preceeding string and /// the rest of the input string. /// /// Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") ); /// fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> { // if let Some((matched, rest)) = i.split_once(stop) { return Some((matched.to_string(), rest.to_string())); } else { return None; } } /// Matches inside the `start` and `stop` delimiters. /// Return a tuple with the string in between the two /// togheter with the rest of the string. fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> { let s1 = match_string(i, start)?; return take_alpha_till(&s1, stop); } #[test] fn take_between_test1() { assert_eq!( take_between("\"wow\" etc", "\"", "\""), Some(("wow".to_string(), " etc".to_string())) ); } /// finds special escaped characters in a string /// (such as \n) and replaces them with the actual special /// character /// #TODO: do we need this? I forgot. fn _escaped_codes() {} //// SECTION PARSING /// Enum to represent possible section content. #[derive(Debug)] pub enum SectionContent { Code(Vec), CString(String), CVec(), } use SectionContent::*; /// Binary file section, as parsed from a .grasm file. #[derive(Debug)] pub struct Section { pub name: String, pub content: SectionContent, } impl Parser { pub fn parse_sections(&self) -> Result, ParseError> { let mut res = vec![]; let mut lines = self.input.iter().map(|x| x.as_str()).into_iter(); while let Some(l) = lines.next() { debug!("Examining line {}", l); // are we looking at a section header? if l.starts_with(".") { let Some((kind, name)) = take_alpha_till(&l[1..], " ") else { return Err(ParseError::BadSectionHeader); }; // what kind of section? match kind.as_str() { "text" => { let s: Vec<&str> = lines .clone() .take_while(|&x| !(x).starts_with(".")) .map(|x| x) .collect(); res.push(Section { name: name.trim().to_owned(), content: Code(parse_code(&s)?), }) } "asciiz" => { let Some(s) = lines.next() else { return Err(ParseError::UnexpectedEOF); }; let Some((s, _)) = take_between(s.trim(), "\"", "\"") else { return Err(ParseError::BadSectionContent); }; res.push(Section { name: name.trim().to_owned(), content: CString(s), }) } "i16" => { let _s = lines.next(); todo!(); } "u16" => { let _s = lines.next(); todo!(); } "vi16" => { let _s = lines.next(); todo!(); } "vu16" => { let _s = lines.next(); todo!(); } _ => { return Err(ParseError::UnknownSectionKind); } } } } return Ok(res); } } fn parse_code(i: &[&str]) -> Result, ParseError> { let mut res = vec![]; for line in i { res.push(parse_code_line(line)?); } return Ok(res); } /// Parses a single line of code. fn parse_code_line(i: &str) -> Result { // every operation has at most 3 arguments let mut bits = i.split_whitespace(); trace!("current parse code line: {}", i); let Some(op) = bits.next() else { return Err(ParseError::BadInstruction); }; // no type match op { "nop" => { return Ok(NOP); } "halt" => { return Ok(HALT); } _ => {} }; // I-type let Some(r1) = bits.next() else { return Err(ParseError::BadInstruction); }; let Some(r2) = bits.next() else { return Err(ParseError::BadInstruction); }; match op { "addi" => { return Ok(ADDI(r1.to_owned(), parse_const(r2)?)); } "sli" => { return Ok(SLI(r1.to_owned(), parse_const(r2)?)); } "call" => { return Ok(CALL(r1.to_owned(), parse_const(r2)?)); } _ => {} } let Some(r3) = bits.next() else { return Err(ParseError::BadInstruction); }; // R-type match op { "add" => { return Ok(ADD(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "sub" => { return Ok(SUB(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "and" => { return Ok(AND(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "xor" => { return Ok(XOR(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "sll" => { return Ok(SLL(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "beq" => { return Ok(BEQ(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "bgt" => { return Ok(BGT(r1.to_owned(), r2.to_owned(), r3.to_owned())); } _ => {} } // J-type match op { "jal" => { return Ok(JAL(r1.to_owned(), r2.to_owned(), parse_const(&r3)?)); } _ => {} } return Err(ParseError::BadInstruction); } fn parse_const(i: &str) -> Result { // we try to parse the number, if we fail, we treat it as a string. let Ok(num) = i.parse() else { return Ok(Const::CS(i.to_owned())); }; return Ok(Const::C(num)); } /// TESTS #[test] fn parser_test() { let code = std::fs::read_to_string("./tests/assembly/hello_world.grasm").unwrap(); let parser = Parser::new(code); let _r = parser.parse_sections(); // #TODO: WRITE PARSER TEST SUITE! //assert_eq!(r, Ok(vec![])); }