Dekejit/src/assembler/parser.rs

use super::ast::{Const, Operation};
use Operation::*;

use log::*;

/// Represents a parsing failure.
#[derive(Debug)]
pub enum ParseError {
    BadSectionHeader,
    UnknownSectionKind,
    UnexpectedEOF,
    BadSectionContent,
    BadInstruction,
}

/// represents the state of our parser.
/// Sadly parsing is stateless,
pub struct Parser {
    input: Vec<String>, // input file
}

impl Parser {
    pub fn new(i: String) -> Self {
        Parser { input: sanitize(i) }
    }
}

/// removes comments and whitespaces, and splits the input in lines.
fn sanitize(i: String) -> Vec<String> {
    i.lines()
        .map(|x| remove_comments(x))
        .map(|x| x.trim())
        .filter(|x| *x != "")
        .map(|x| x.to_string())
        .collect()
}

/// Removes comments.
fn remove_comments(i: &str) -> &str {
    if let Some(end) = i.find(';') {
        return &i[0..end];
    } else {
        return i;
    }
}

/// Checks if the string i starts with pat.
/// Returns the rest of the input string on success
/// else, returns None
fn match_string(i: &str, pat: &str) -> Option<String> {
    let mut in_chars = i.chars();

    for pat_c in pat.chars() {
        if let Some(c) = in_chars.next() {
            if c != pat_c {
                return None;
            }
        };
    }

    let rest = in_chars.collect();

    return Some(rest);
}

///  Matches till the "stop" string is found.
///  Returns a tuple containing the preceeding string and
///  the rest of the input string.
///
///  Ex: assert_eq!(Ok("Lorem ", " Ipsum"), match_alpha_till("Lorem X Ipsum", "X") );
///
fn take_alpha_till(i: &str, stop: &str) -> Option<(String, String)> {
    //
    if let Some((matched, rest)) = i.split_once(stop) {
        return Some((matched.to_string(), rest.to_string()));
    } else {
        return None;
    }
}

/// Matches inside the `start` and `stop` delimiters.
/// Return a tuple with the string in between the two
/// togheter with the rest of the string.
fn take_between(i: &str, start: &str, stop: &str) -> Option<(String, String)> {
    let s1 = match_string(i, start)?;

    return take_alpha_till(&s1, stop);
}

#[test]
fn take_between_test1() {
    assert_eq!(
        take_between("\"wow\" etc", "\"", "\""),
        Some(("wow".to_string(), " etc".to_string()))
    );
}

/// finds special escaped characters in a string
/// (such as \n) and replaces them with the actual special
/// character
/// #TODO: do we need this? I forgot.
fn _escaped_codes() {}

//// SECTION PARSING

/// Enum to represent possible section content.
#[derive(Debug)]
pub enum SectionContent {
    Code(Vec<Operation>),
    CString(String),
    CVec(),
}

use SectionContent::*;

/// Binary file section, as parsed from a .grasm file.
#[derive(Debug)]
pub struct Section {
    pub name: String,
    pub content: SectionContent,
}

impl Parser {
    pub fn parse_sections(&self) -> Result<Vec<Section>, ParseError> {
        let mut res = vec![];

        let mut lines = self.input.iter().map(|x| x.as_str()).into_iter();

        while let Some(l) = lines.next() {
            debug!("Examining line {}", l);

            // are we looking at a section header?
            if l.starts_with(".") {
                let Some((kind, name)) = take_alpha_till(&l[1..], " ") else {
                    return Err(ParseError::BadSectionHeader);
                };

                // what kind of section?
                match kind.as_str() {
                    "text" => {
                        let s: Vec<&str> = lines
                            .clone()
                            .take_while(|&x| !(x).starts_with("."))
                            .map(|x| x)
                            .collect();
                        res.push(Section {
                            name: name.trim().to_owned(),
                            content: Code(parse_code(&s)?),
                        })
                    }
                    "asciiz" => {
                        let Some(s) = lines.next() else {
                            return Err(ParseError::UnexpectedEOF);
                        };
                        let Some((s, _)) = take_between(s.trim(), "\"", "\"") else {
                            return Err(ParseError::BadSectionContent);
                        };
                        res.push(Section {
                            name: name.trim().to_owned(),
                            content: CString(s),
                        })
                    }
                    "i16" => {
                        let _s = lines.next();
                        todo!();
                    }
                    "u16" => {
                        let _s = lines.next();
                        todo!();
                    }
                    "vi16" => {
                        let _s = lines.next();
                        todo!();
                    }
                    "vu16" => {
                        let _s = lines.next();
                        todo!();
                    }
                    _ => {
                        return Err(ParseError::UnknownSectionKind);
                    }
                }
            }
        }

        return Ok(res);
    }
}

fn parse_code(i: &[&str]) -> Result<Vec<Operation>, ParseError> {
    let mut res = vec![];

    for line in i {
        res.push(parse_code_line(line)?);
    }

    return Ok(res);
}

/// Parses a single line of code.
fn parse_code_line(i: &str) -> Result<Operation, ParseError> {
    // every operation has at most 3 arguments
    let mut bits = i.split_whitespace();
    trace!("current parse code line: {}", i);
    let Some(op) = bits.next() else {
        return Err(ParseError::BadInstruction);
    };

    // no type
    match op {
        "nop" => {
            return Ok(NOP);
        }
        "halt" => {
            return Ok(HALT);
        }
        _ => {}
    };

    // I-type
    let Some(r1) = bits.next() else {
        return Err(ParseError::BadInstruction);
    };
    let Some(r2) = bits.next() else {
        return Err(ParseError::BadInstruction);
    };

    match op {
        "addi" => {
            return Ok(ADDI(r1.to_owned(), parse_const(r2)?));
        }
        "sli" => {
            return Ok(SLI(r1.to_owned(), parse_const(r2)?));
        }
        "call" => {
            return Ok(CALL(r1.to_owned(), parse_const(r2)?));
        }
        _ => {}
    }

    let Some(r3) = bits.next() else {
        return Err(ParseError::BadInstruction);
    };

    // R-type
    match op {
        "add" => {
            return Ok(ADD(r1.to_owned(), r2.to_owned(), r3.to_owned()));
        }

        "sub" => {
            return Ok(SUB(r1.to_owned(), r2.to_owned(), r3.to_owned()));
        }
        "and" => {
            return Ok(AND(r1.to_owned(), r2.to_owned(), r3.to_owned()));
        }
        "xor" => {
            return Ok(XOR(r1.to_owned(), r2.to_owned(), r3.to_owned()));
        }
        "sll" => {
            return Ok(SLL(r1.to_owned(), r2.to_owned(), r3.to_owned()));
        }
        "beq" => {
            return Ok(BEQ(r1.to_owned(), r2.to_owned(), r3.to_owned()));
        }
        "bgt" => {
            return Ok(BGT(r1.to_owned(), r2.to_owned(), r3.to_owned()));
        }
        _ => {}
    }

    // J-type
    match op {
        "jal" => {
            return Ok(JAL(r1.to_owned(), r2.to_owned(), parse_const(&r3)?));
        }
        _ => {}
    }

    return Err(ParseError::BadInstruction);
}

fn parse_const(i: &str) -> Result<Const, ParseError> {
    // we try to parse the number, if we fail, we treat it as a string.
    let Ok(num) = i.parse() else {
        return Ok(Const::CS(i.to_owned()));
    };
    return Ok(Const::C(num));
}

/// TESTS

#[test]
fn parser_test() {
    let code = std::fs::read_to_string("./tests/assembly/hello_world.grasm").unwrap();

    let parser = Parser::new(code);

    let _r = parser.parse_sections();

    // #TODO: WRITE PARSER TEST SUITE!
    //assert_eq!(r, Ok(vec![]));
}