Dekejit/src/assembler/parser.rs
2023-05-02 16:26:54 +02:00

270 lines
7.2 KiB
Rust

use super::AST::{Const, Operation};
use Operation::*;
type Loc = u16;
#[derive(Debug)]
pub enum ParseError {
BadSectionHeader,
UnknownSectionKind,
UnexpectedEOF,
BadSectionContent,
BadInstruction,
}
/// represents the state of our parser.
pub struct Parser {
loc: u16, // current number of operations parsed.
symtable: Vec<(String, u16)>, // symbols encountered, position.
pub input: Vec<String>, // input file
}
impl Parser {
pub fn new(i: String) -> Self {
Parser {
loc: 0,
symtable: vec![],
input: sanitize(i),
}
}
}
// removes comments and whitespaces, and splits the input in lines.
fn sanitize(i: String) -> Vec<String> {
i.lines()
.map(|x| remove_comments(x))
.map(|x| x.trim())
.filter(|x| *x != "")
.map(|x| x.to_string())
.collect()
}
fn remove_comments(i: &str) -> &str {
if let Some(end) = i.find(';') {
return &i[0..end];
} else {
return i;
}
}
/// Checks if the string i starts with pat.
/// Returns the rest of the input string on success
/// else, returns None
fn match_string(i: &str, pat: &str) -> Option<String> {
let mut in_chars = i.chars();
for pat_c in pat.chars() {
if let Some(c) = in_chars.next() {
if c != pat_c {
return None;
}
};
}
let rest = in_chars.collect();
return Some(rest);
}
/// Matches till the "stop" string is found.
/// Returns a tuple containing the preceeding string and
/// the rest of the input string.
///
/// Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") );
///
fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> {
//
if let Some((matched, rest)) = i.split_once(stop) {
return Some((matched.to_string(), rest.to_string()));
} else {
return None;
}
}
/// Matches inside the `start` and `stop` delimiters.
/// Return a tuple with the string in between the two
/// togheter with the rest of the string.
fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> {
let s1 = match_string(i, start)?;
return take_alpha_till(&s1, stop);
}
/// finds special escaped characters in a string
/// (such as \n) and replaces them with the actual special
/// character
fn escaped_codes() {}
#[test]
fn take_between_test() {
assert_eq!(
take_between("\"wow\" etc", "\"", "\""),
Some(("wow".to_string(), " etc".to_string()))
);
}
//// SECTION PARSING
#[derive(Debug)]
pub enum SectionContent {
Code(Vec<Operation>),
CString(String),
CVec(),
}
use SectionContent::*;
#[derive(Debug)]
pub struct Section {
pub name: String,
pub content: SectionContent,
}
// A .section has a name and variable content.
impl Parser {
pub fn parse_sections(&mut self) -> Result<Vec<Section>, ParseError> {
let mut res = vec![];
let mut lines = self.input.iter().map(|x| x.as_str()).into_iter();
while let Some(l) = lines.next() {
println!("Examing line: {}", l);
if l.starts_with(".") {
let Some((kind, name)) = take_alpha_till(&l[1..], " ") else {
return Err(ParseError::BadSectionHeader);
};
match kind.as_str() {
"text" => {
let s: Vec<&str> = lines
.clone()
.take_while(|&x| !(x).starts_with("."))
.map(|x| x)
.collect();
res.push(Section {
name: name.trim().to_owned(),
content: Code(parse_code(&s)?),
})
}
"asciiz" => {
let Some(s) = lines.next() else {return Err(ParseError::UnexpectedEOF)};
let Some((s, _)) = take_between(s.trim(), "\"", "\"") else {return Err(ParseError::BadSectionContent)};
res.push(Section {
name: name.trim().to_owned(),
content: CString(s),
})
}
"i16" => {
let _s = lines.next();
}
"u16" => {
let _s = lines.next();
}
"vi16" => {
let _s = lines.next();
}
"vu16" => {
let _s = lines.next();
}
_ => {
return Err(ParseError::UnknownSectionKind);
}
}
}
}
return Ok(res);
}
}
fn parse_code(i: &[&str]) -> Result<Vec<Operation>, ParseError> {
let mut res = vec![];
for line in i {
res.push(parse_code_line(line)?);
}
return Ok(res);
}
fn parse_code_line(i: &str) -> Result<Operation, ParseError> {
// every operation has at most 3 arguments
let mut bits = i.split_whitespace();
println!("current parse code line: {}", i);
let Some(op) = bits.next() else {return Err(ParseError::BadInstruction)};
// no type
match op {
"nop" => {
return Ok(NOP);
}
"halt" => {
return Ok(HALT);
}
_ => {}
};
// I-type
let Some(r1) = bits.next() else {return Err(ParseError::BadInstruction)};
let Some(r2) = bits.next() else {return Err(ParseError::BadInstruction)};
match op {
"addi" => {
return Ok(ADDI(r1.to_owned(), parse_const(r2)?));
}
"sli" => {
return Ok(SLI(r1.to_owned(), parse_const(r2)?));
}
"call" => {
return Ok(CALL(r1.to_owned(), parse_const(r2)?));
}
_ => {}
}
let Some(r3) = bits.next() else {return Err(ParseError::BadInstruction)};
// R-type
match op {
"add" => {
return Ok(ADD(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"sub" => {
return Ok(SUB(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"and" => {
return Ok(AND(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"xor" => {
return Ok(XOR(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"sll" => {
return Ok(SLL(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"beq" => {
return Ok(BEQ(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
"bgt" => {
return Ok(BGT(r1.to_owned(), r2.to_owned(), r3.to_owned()));
}
_ => {}
}
// J-type
match op {
"jal" => {
return Ok(JAL(r1.to_owned(), r2.to_owned(), parse_const(&r3)?));
}
_ => {}
}
return Err(ParseError::BadInstruction);
}
fn parse_const(i: &str) -> Result<Const, ParseError> {
// we try to parse the number, if we fail, we treat it as a string.
let Ok(num) = i.parse() else {
return Ok(Const::CS(i.to_owned()));
};
return Ok(Const::C(num));
}