use super::AST::{Const, Operation}; use Operation::*; type Loc = u16; #[derive(Debug)] pub enum ParseError { BadSectionHeader, UnknownSectionKind, UnexpectedEOF, BadSectionContent, BadInstruction, } /// represents the state of our parser. pub struct Parser { loc: u16, // current number of operations parsed. symtable: Vec<(String, u16)>, // symbols encountered, position. pub input: Vec, // input file } impl Parser { pub fn new(i: String) -> Self { Parser { loc: 0, symtable: vec![], input: sanitize(i), } } } // removes comments and whitespaces, and splits the input in lines. fn sanitize(i: String) -> Vec { i.lines() .map(|x| remove_comments(x)) .map(|x| x.trim()) .filter(|x| *x != "") .map(|x| x.to_string()) .collect() } fn remove_comments(i: &str) -> &str { if let Some(end) = i.find(';') { return &i[0..end]; } else { return i; } } /// Checks if the string i starts with pat. /// Returns the rest of the input string on success /// else, returns None fn match_string(i: &str, pat: &str) -> Option { let mut in_chars = i.chars(); for pat_c in pat.chars() { if let Some(c) = in_chars.next() { if c != pat_c { return None; } }; } let rest = in_chars.collect(); return Some(rest); } /// Matches till the "stop" string is found. /// Returns a tuple containing the preceeding string and /// the rest of the input string. /// /// Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") ); /// fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> { // if let Some((matched, rest)) = i.split_once(stop) { return Some((matched.to_string(), rest.to_string())); } else { return None; } } /// Matches inside the `start` and `stop` delimiters. /// Return a tuple with the string in between the two /// togheter with the rest of the string. fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> { let s1 = match_string(i, start)?; return take_alpha_till(&s1, stop); } /// finds special escaped characters in a string /// (such as \n) and replaces them with the actual special /// character fn escaped_codes() {} #[test] fn take_between_test() { assert_eq!( take_between("\"wow\" etc", "\"", "\""), Some(("wow".to_string(), " etc".to_string())) ); } //// SECTION PARSING #[derive(Debug)] pub enum SectionContent { Code(Vec), CString(String), CVec(), } use SectionContent::*; #[derive(Debug)] pub struct Section { pub name: String, pub content: SectionContent, } // A .section has a name and variable content. impl Parser { pub fn parse_sections(&mut self) -> Result, ParseError> { let mut res = vec![]; let mut lines = self.input.iter().map(|x| x.as_str()).into_iter(); while let Some(l) = lines.next() { println!("Examing line: {}", l); if l.starts_with(".") { let Some((kind, name)) = take_alpha_till(&l[1..], " ") else { return Err(ParseError::BadSectionHeader); }; match kind.as_str() { "text" => { let s: Vec<&str> = lines .clone() .take_while(|&x| !(x).starts_with(".")) .map(|x| x) .collect(); res.push(Section { name: name.trim().to_owned(), content: Code(parse_code(&s)?), }) } "asciiz" => { let Some(s) = lines.next() else {return Err(ParseError::UnexpectedEOF)}; let Some((s, _)) = take_between(s.trim(), "\"", "\"") else {return Err(ParseError::BadSectionContent)}; res.push(Section { name: name.trim().to_owned(), content: CString(s), }) } "i16" => { let _s = lines.next(); } "u16" => { let _s = lines.next(); } "vi16" => { let _s = lines.next(); } "vu16" => { let _s = lines.next(); } _ => { return Err(ParseError::UnknownSectionKind); } } } } return Ok(res); } } fn parse_code(i: &[&str]) -> Result, ParseError> { let mut res = vec![]; for line in i { res.push(parse_code_line(line)?); } return Ok(res); } fn parse_code_line(i: &str) -> Result { // every operation has at most 3 arguments let mut bits = i.split_whitespace(); println!("current parse code line: {}", i); let Some(op) = bits.next() else {return Err(ParseError::BadInstruction)}; // no type match op { "nop" => { return Ok(NOP); } "halt" => { return Ok(HALT); } _ => {} }; // I-type let Some(r1) = bits.next() else {return Err(ParseError::BadInstruction)}; let Some(r2) = bits.next() else {return Err(ParseError::BadInstruction)}; match op { "addi" => { return Ok(ADDI(r1.to_owned(), parse_const(r2)?)); } "sli" => { return Ok(SLI(r1.to_owned(), parse_const(r2)?)); } "call" => { return Ok(CALL(r1.to_owned(), parse_const(r2)?)); } _ => {} } let Some(r3) = bits.next() else {return Err(ParseError::BadInstruction)}; // R-type match op { "add" => { return Ok(ADD(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "sub" => { return Ok(SUB(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "and" => { return Ok(AND(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "xor" => { return Ok(XOR(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "sll" => { return Ok(SLL(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "beq" => { return Ok(BEQ(r1.to_owned(), r2.to_owned(), r3.to_owned())); } "bgt" => { return Ok(BGT(r1.to_owned(), r2.to_owned(), r3.to_owned())); } _ => {} } // J-type match op { "jal" => { return Ok(JAL(r1.to_owned(), r2.to_owned(), parse_const(&r3)?)); } _ => {} } return Err(ParseError::BadInstruction); } fn parse_const(i: &str) -> Result { // we try to parse the number, if we fail, we treat it as a string. let Ok(num) = i.parse() else { return Ok(Const::CS(i.to_owned())); }; return Ok(Const::C(num)); }