fixed parser
This commit is contained in:
parent
0d417bbbe5
commit
7883b42d13
9 changed files with 667 additions and 65 deletions
|
@ -2,14 +2,20 @@ use crate::ir::{Atom, OwnedAtom};
|
|||
pub(crate) mod parser;
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
struct Module {
|
||||
objs: Vec<(String, Region)>,
|
||||
pub(crate) struct Module {
|
||||
pub(crate) objs: Vec<(String, Region)>,
|
||||
pub(crate) opts: Options,
|
||||
}
|
||||
|
||||
/// Module options (endianess etc.)
|
||||
/// To be defined :(
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub(crate) struct Options(pub(crate) Vec<String>);
|
||||
|
||||
/// A region of memory.
|
||||
/// Either a structured field list or an atomic type.
|
||||
#[derive(PartialEq, Debug)]
|
||||
enum Region {
|
||||
pub(crate) enum Region {
|
||||
Fields(Vec<(Range, FieldRegion)>),
|
||||
Atom { ty: Type },
|
||||
}
|
||||
|
@ -17,7 +23,7 @@ enum Region {
|
|||
/// Content of a field.
|
||||
/// Can be a region or a constant.
|
||||
#[derive(PartialEq, Debug)]
|
||||
enum FieldRegion {
|
||||
pub(crate) enum FieldRegion {
|
||||
Subfield { name: String, content: Region },
|
||||
Const { name: String, value: OwnedAtom },
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::error::Error;
|
|||
use nom::IResult;
|
||||
use nom::Parser;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::is_not;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::bytes::complete::tag_no_case;
|
||||
use nom::character::complete::alpha1;
|
||||
|
@ -12,28 +13,46 @@ use nom::character::complete::char;
|
|||
use nom::character::complete::digit1;
|
||||
use nom::character::complete::hex_digit1;
|
||||
use nom::character::complete::line_ending;
|
||||
use nom::character::complete::multispace0;
|
||||
use nom::character::complete::newline;
|
||||
use nom::character::complete::space0;
|
||||
use nom::character::streaming::multispace1;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::map_res;
|
||||
use nom::combinator::opt;
|
||||
use nom::combinator::recognize;
|
||||
use nom::error::ParseError;
|
||||
use nom::multi::many0;
|
||||
use nom::multi::many0_count;
|
||||
use nom::multi::separated_list1;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::separated_list0;
|
||||
use nom::sequence::delimited;
|
||||
use nom::sequence::pair;
|
||||
use nom::sequence::preceded;
|
||||
use nom::sequence::terminated;
|
||||
|
||||
use crate::ir::Atom;
|
||||
|
||||
use super::FieldRegion;
|
||||
use super::Module;
|
||||
use super::Options;
|
||||
use super::OwnedAtom;
|
||||
use super::Range;
|
||||
use super::Region;
|
||||
use super::Type;
|
||||
|
||||
/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
|
||||
/// trailing whitespace, returning the output of `inner`.
|
||||
/// trailing whitespace, returning the output of `inner`. This includes newlines
|
||||
pub fn ws<'a, O, E: ParseError<&'a str>, F>(inner: F) -> impl Parser<&'a str, Output = O, Error = E>
|
||||
where
|
||||
F: Parser<&'a str, Output = O, Error = E>,
|
||||
{
|
||||
delimited(multispace0, inner, multispace0)
|
||||
}
|
||||
|
||||
pub fn ws_no_newline<'a, O, E: ParseError<&'a str>, F>(
|
||||
inner: F,
|
||||
) -> impl Parser<&'a str, Output = O, Error = E>
|
||||
where
|
||||
F: Parser<&'a str, Output = O, Error = E>,
|
||||
{
|
||||
|
@ -64,7 +83,7 @@ pub fn parse_field_name(input: &str) -> IResult<&str, &str> {
|
|||
char('"'),
|
||||
recognize(pair(
|
||||
alt((alpha1, tag("_"))),
|
||||
many0_count(alt((alphanumeric1, tag("_")))),
|
||||
many0_count(alt((alphanumeric1, tag("_"), tag(" ")))),
|
||||
)),
|
||||
char('"'),
|
||||
)
|
||||
|
@ -79,8 +98,26 @@ fn field_name_test() {
|
|||
)
|
||||
}
|
||||
|
||||
/// A combinator that parses identifiers.
|
||||
pub fn parse_identifier(input: &str) -> IResult<&str, &str> {
|
||||
recognize(pair(
|
||||
alt((alpha1, tag("_"))),
|
||||
many0_count(alt((alphanumeric1, tag("_")))),
|
||||
))
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn identifier_test() {
|
||||
assert_eq!(
|
||||
"CFGFRAME".to_owned(),
|
||||
parse_identifier("CFGFRAME").unwrap().1
|
||||
)
|
||||
}
|
||||
|
||||
/// A combinator that parses primitive types.
|
||||
pub fn parse_type(input: &str) -> IResult<&str, Type> {
|
||||
println!("type: {:?}", input);
|
||||
alt((
|
||||
(map(tag("bool"), |_| Type::Bool)),
|
||||
(map(tag("u8"), |_| Type::U8)),
|
||||
|
@ -97,13 +134,13 @@ pub fn parse_type(input: &str) -> IResult<&str, Type> {
|
|||
(map(tag("f64"), |_| Type::F64)),
|
||||
(map(tag("utf8"), |_| Type::Utf8)),
|
||||
(preceded(
|
||||
tag("u"),
|
||||
char('u'),
|
||||
map_res(digit1, |s: &str| {
|
||||
Ok::<Type, Box<dyn Error>>(Type::UX(s.parse()?))
|
||||
}),
|
||||
)),
|
||||
(preceded(
|
||||
tag("i"),
|
||||
char('i'),
|
||||
map_res(digit1, |s: &str| {
|
||||
Ok::<Type, Box<dyn Error>>(Type::IX(s.parse()?))
|
||||
}),
|
||||
|
@ -139,20 +176,27 @@ fn parse_range_vals(input: &str) -> IResult<&str, Range> {
|
|||
fn parse_range(input: &str) -> IResult<&str, Range> {
|
||||
map(
|
||||
(parse_range_vals, char(' '), parse_unit),
|
||||
|((l, h), _, s)| (l * s, h * s),
|
||||
|((l, h), _, s)| {
|
||||
if s == 1 {
|
||||
(l, h)
|
||||
} else {
|
||||
((l * s), ((1 + h) * s).saturating_sub(1))
|
||||
}
|
||||
},
|
||||
)
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn range_test() {
|
||||
assert_eq!((0, 16), parse_range("0-2 bytes").unwrap().1);
|
||||
assert_eq!((24, 24), parse_range("3 byte").unwrap().1);
|
||||
assert_eq!((0, 15), parse_range("0-1 bytes").unwrap().1);
|
||||
assert_eq!((24, 31), parse_range("3 byte").unwrap().1);
|
||||
assert_eq!((0, 5), parse_range("0-5 bits").unwrap().1);
|
||||
assert_eq!((0, 2), parse_range("0-2 bit").unwrap().1);
|
||||
}
|
||||
|
||||
pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
|
||||
println!("field_entry{:?}", input);
|
||||
map(
|
||||
(
|
||||
parse_range,
|
||||
|
@ -169,7 +213,7 @@ pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
|
|||
(
|
||||
tag("const "),
|
||||
parse_field_name,
|
||||
char(' '),
|
||||
many1(char(' ')),
|
||||
parse_const_region,
|
||||
),
|
||||
|(_, name, _, r)| FieldRegion::Const {
|
||||
|
@ -184,33 +228,19 @@ pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
|
|||
.parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_field_line_test() {
|
||||
let line = include_str!("test/line.brin");
|
||||
|
||||
assert_eq!(
|
||||
(
|
||||
(0, 2),
|
||||
FieldRegion::Subfield {
|
||||
name: "test".to_owned(),
|
||||
content: Region::Atom { ty: Type::U16 }
|
||||
}
|
||||
),
|
||||
parse_field_entry(line).unwrap().1
|
||||
);
|
||||
}
|
||||
|
||||
pub fn parse_region(input: &str) -> IResult<&str, Region> {
|
||||
println!("Region: {:?}", input);
|
||||
alt((
|
||||
map(
|
||||
(
|
||||
ws(char('{')),
|
||||
opt(ws(newline)),
|
||||
separated_list1(ws(line_ending), parse_field_entry),
|
||||
opt(ws(newline)),
|
||||
ws(char('}')),
|
||||
multispace0,
|
||||
char('{'),
|
||||
multispace1,
|
||||
separated_list0(multispace1, parse_field_entry),
|
||||
multispace1,
|
||||
char('}'),
|
||||
),
|
||||
|(_, _, field_list, _, _)| Region::Fields(field_list),
|
||||
|(_, _, _, field_list, _, _)| Region::Fields(field_list),
|
||||
),
|
||||
map(parse_type, |ty| Region::Atom { ty }),
|
||||
))
|
||||
|
@ -234,64 +264,64 @@ fn parse_concrete_type(ty: Type) -> fn(&str) -> IResult<&str, OwnedAtom> {
|
|||
},
|
||||
Type::U8 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U8(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U8(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
Type::U16 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U16(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U16(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
|
||||
Type::U32 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U32(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U32(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
|
||||
Type::U64 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U64(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U64(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
Type::U128 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U128(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U128(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
|
||||
Type::I8 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I8(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I8(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
Type::I16 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I16(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I16(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
Type::I32 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I32(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I32(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
Type::I64 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I64(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I64(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
Type::I128 => |s: &str| {
|
||||
map_res(parse_number, |x| {
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I128(x.parse()?))
|
||||
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I128(parse_int::parse(x)?))
|
||||
})
|
||||
.parse(s)
|
||||
},
|
||||
|
@ -316,26 +346,201 @@ fn parse_concrete_type(ty: Type) -> fn(&str) -> IResult<&str, OwnedAtom> {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_region_test() {
|
||||
let region = include_str!("test/region.brin");
|
||||
fn test_const_region() {
|
||||
let region = "u16 0x10";
|
||||
assert_eq!(parse_const_region(region).unwrap().1, OwnedAtom::U16(0x10));
|
||||
}
|
||||
|
||||
fn parse_options(input: &str) -> IResult<&str, Options> {
|
||||
map(
|
||||
many0(map(
|
||||
terminated(preceded(tag("#= "), is_not("\r\n")), line_ending),
|
||||
|s: &str| s.trim().to_owned(),
|
||||
)),
|
||||
Options,
|
||||
)
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn options_test() {
|
||||
let options = "#= BigEndian\n#= Another Option\nCFGFRAME";
|
||||
let (rem, parsed) = parse_options(options).unwrap();
|
||||
assert_eq!(rem, "CFGFRAME");
|
||||
assert_eq!(
|
||||
parsed,
|
||||
Options(vec!["BigEndian".to_owned(), "Another Option".to_owned()])
|
||||
);
|
||||
|
||||
let options = "CFGFRAME";
|
||||
let (rem, parsed) = parse_options(options).unwrap();
|
||||
assert_eq!(rem, "CFGFRAME");
|
||||
assert_eq!(parsed, Options(Vec::<String>::new()));
|
||||
}
|
||||
|
||||
fn parse_named_region(input: &str) -> IResult<&str, (String, Region)> {
|
||||
map((parse_identifier, parse_region), |(name, region)| {
|
||||
(name.to_owned(), region)
|
||||
})
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
pub fn parse_module(input: &str) -> IResult<&str, Module> {
|
||||
println!("{}", input);
|
||||
map(
|
||||
(
|
||||
parse_options,
|
||||
multispace1,
|
||||
separated_list0(multispace0, parse_named_region),
|
||||
),
|
||||
|(opts, _, objs)| Module { opts, objs },
|
||||
)
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_test() {
|
||||
let module = "#= BigEndian\n\n\nCFGFRAME { 0-1 bytes: field \"SYNC\" u16 }";
|
||||
let (_, module) = parse_module(module).unwrap();
|
||||
assert_eq!(module.opts, Options(vec!["BigEndian".to_owned()]));
|
||||
assert_eq!(module.objs[0].0, "CFGFRAME");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c37118_test() {
|
||||
let brin = include_str!("../../formats/c37118.brin");
|
||||
let (_, module) = parse_module(brin).unwrap();
|
||||
assert_eq!(module.opts, Options(vec!["Bigendian".to_owned()]));
|
||||
assert_eq!(module.objs[0].0, "CFGFRAME");
|
||||
|
||||
if let Region::Fields(fields) = &module.objs[0].1 {
|
||||
assert_eq!(fields[0].0, (0, 15));
|
||||
if let FieldRegion::Subfield { name, content } = &fields[0].1 {
|
||||
assert_eq!(name, "SYNC");
|
||||
if let Region::Fields(subfields) = content {
|
||||
assert_eq!(subfields[0].0, (0, 7));
|
||||
if let FieldRegion::Const { name, value } = &subfields[0].1 {
|
||||
assert_eq!(name, "magic_number");
|
||||
assert_eq!(*value, OwnedAtom::U8(0xAA));
|
||||
}
|
||||
|
||||
assert_eq!(subfields[1].0, (8, 15));
|
||||
if let FieldRegion::Const { name, value } = &subfields[1].1 {
|
||||
assert_eq!(name, "ver_number");
|
||||
assert_eq!(*value, OwnedAtom::U8(0x31));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(fields[1].0, (24, 39));
|
||||
if let FieldRegion::Subfield { name, content } = &fields[1].1 {
|
||||
assert_eq!(*name, "TIME BASE");
|
||||
if let Region::Atom { ty } = content {
|
||||
assert_eq!(*ty, Type::U16);
|
||||
} else {
|
||||
panic!("Incorrect content for TIME BASE");
|
||||
}
|
||||
} else {
|
||||
panic!("Incorrect field region for TIME BASE");
|
||||
}
|
||||
|
||||
assert_eq!(fields[2].0, (40, 55));
|
||||
if let FieldRegion::Subfield { name, content } = &fields[2].1 {
|
||||
assert_eq!(*name, "NUM_PMU");
|
||||
if let Region::Atom { ty } = content {
|
||||
assert_eq!(*ty, Type::U16);
|
||||
} else {
|
||||
panic!("Incorrect content for NUM_PMU");
|
||||
}
|
||||
} else {
|
||||
panic!("Incorrect field region for NUM_PMU");
|
||||
}
|
||||
} else {
|
||||
panic!("Wrong region type");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_field_entry() {
|
||||
let region = "3-4 bytes: field \"TIME BASE\" u16";
|
||||
assert_eq!(
|
||||
parse_field_entry(region).unwrap().1,
|
||||
(
|
||||
(24, 39),
|
||||
FieldRegion::Subfield {
|
||||
name: "TIME BASE".to_owned(),
|
||||
content: Region::Atom { ty: Type::U16 }
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_region_good() {
|
||||
let region = "{
|
||||
3-4 byte: field \"time base\" u16
|
||||
5-6 byte: field \"magic number\" {
|
||||
0-4 bits: const \"aaa\" u8 0x10
|
||||
}
|
||||
}";
|
||||
assert_eq!(
|
||||
parse_region(region).unwrap().1,
|
||||
Region::Fields(vec![
|
||||
(
|
||||
(0, 1),
|
||||
(24, 39),
|
||||
FieldRegion::Subfield {
|
||||
name: "test".to_owned(),
|
||||
name: "time base".to_owned(),
|
||||
content: Region::Atom { ty: Type::U16 }
|
||||
}
|
||||
),
|
||||
(
|
||||
(2, 3),
|
||||
(40, 55),
|
||||
FieldRegion::Subfield {
|
||||
name: "second_field".to_owned(),
|
||||
content: Region::Atom { ty: Type::I32 }
|
||||
name: "magic number".to_owned(),
|
||||
content: Region::Fields(vec![(
|
||||
(0, 4),
|
||||
FieldRegion::Const {
|
||||
name: "aaa".to_owned(),
|
||||
value: Atom::U8(16)
|
||||
}
|
||||
)])
|
||||
}
|
||||
)
|
||||
]),
|
||||
parse_region(region).unwrap().1
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_region_bad() {
|
||||
let region = "{
|
||||
3-4 byte: field \"magic number\" {
|
||||
0 bit: const \"active\" u8 0x10
|
||||
}
|
||||
1-2 byte: field \"timestamp\" u8
|
||||
}";
|
||||
assert_eq!(
|
||||
parse_region(region).unwrap().1,
|
||||
Region::Fields(vec![
|
||||
(
|
||||
(24, 39),
|
||||
FieldRegion::Subfield {
|
||||
name: "magic number".to_owned(),
|
||||
content: Region::Fields(vec![(
|
||||
(0, 0),
|
||||
FieldRegion::Const {
|
||||
name: "active".to_owned(),
|
||||
value: Atom::U8(16)
|
||||
}
|
||||
)])
|
||||
}
|
||||
),
|
||||
(
|
||||
(8, 23),
|
||||
FieldRegion::Subfield {
|
||||
name: "timestamp".to_owned(),
|
||||
content: Region::Atom { ty: Type::U8 }
|
||||
}
|
||||
),
|
||||
])
|
||||
);
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
0-2 bits: field "test" u16
|
|
@ -1,8 +0,0 @@
|
|||
{
|
||||
0-1 bits: field "test" u16
|
||||
2-3 bits: field "second_field" i16
|
||||
4-8 bits: field "big_struct" {
|
||||
0-1 bits: field "test" u8
|
||||
2-3 bits: const "magic" u16 0
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue