fixed parser

This commit is contained in:
Wintermute 2025-06-26 21:27:07 +02:00
parent 0d417bbbe5
commit 7883b42d13
9 changed files with 667 additions and 65 deletions

View file

@ -2,14 +2,20 @@ use crate::ir::{Atom, OwnedAtom};
pub(crate) mod parser;
#[derive(PartialEq, Debug)]
struct Module {
objs: Vec<(String, Region)>,
pub(crate) struct Module {
pub(crate) objs: Vec<(String, Region)>,
pub(crate) opts: Options,
}
/// Module options (endianess etc.)
/// To be defined :(
#[derive(PartialEq, Debug)]
pub(crate) struct Options(pub(crate) Vec<String>);
/// A region of memory.
/// Either a structured field list or an atomic type.
#[derive(PartialEq, Debug)]
enum Region {
pub(crate) enum Region {
Fields(Vec<(Range, FieldRegion)>),
Atom { ty: Type },
}
@ -17,7 +23,7 @@ enum Region {
/// Content of a field.
/// Can be a region or a constant.
#[derive(PartialEq, Debug)]
enum FieldRegion {
pub(crate) enum FieldRegion {
Subfield { name: String, content: Region },
Const { name: String, value: OwnedAtom },
}

View file

@ -3,6 +3,7 @@ use std::error::Error;
use nom::IResult;
use nom::Parser;
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::alpha1;
@ -12,28 +13,46 @@ use nom::character::complete::char;
use nom::character::complete::digit1;
use nom::character::complete::hex_digit1;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::character::streaming::multispace1;
use nom::combinator::map;
use nom::combinator::map_res;
use nom::combinator::opt;
use nom::combinator::recognize;
use nom::error::ParseError;
use nom::multi::many0;
use nom::multi::many0_count;
use nom::multi::separated_list1;
use nom::multi::many1;
use nom::multi::separated_list0;
use nom::sequence::delimited;
use nom::sequence::pair;
use nom::sequence::preceded;
use nom::sequence::terminated;
use crate::ir::Atom;
use super::FieldRegion;
use super::Module;
use super::Options;
use super::OwnedAtom;
use super::Range;
use super::Region;
use super::Type;
/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
/// trailing whitespace, returning the output of `inner`.
/// trailing whitespace, returning the output of `inner`. This includes newlines
pub fn ws<'a, O, E: ParseError<&'a str>, F>(inner: F) -> impl Parser<&'a str, Output = O, Error = E>
where
F: Parser<&'a str, Output = O, Error = E>,
{
delimited(multispace0, inner, multispace0)
}
pub fn ws_no_newline<'a, O, E: ParseError<&'a str>, F>(
inner: F,
) -> impl Parser<&'a str, Output = O, Error = E>
where
F: Parser<&'a str, Output = O, Error = E>,
{
@ -64,7 +83,7 @@ pub fn parse_field_name(input: &str) -> IResult<&str, &str> {
char('"'),
recognize(pair(
alt((alpha1, tag("_"))),
many0_count(alt((alphanumeric1, tag("_")))),
many0_count(alt((alphanumeric1, tag("_"), tag(" ")))),
)),
char('"'),
)
@ -79,8 +98,26 @@ fn field_name_test() {
)
}
/// A combinator that parses identifiers.
pub fn parse_identifier(input: &str) -> IResult<&str, &str> {
recognize(pair(
alt((alpha1, tag("_"))),
many0_count(alt((alphanumeric1, tag("_")))),
))
.parse(input)
}
#[test]
fn identifier_test() {
assert_eq!(
"CFGFRAME".to_owned(),
parse_identifier("CFGFRAME").unwrap().1
)
}
/// A combinator that parses primitive types.
pub fn parse_type(input: &str) -> IResult<&str, Type> {
println!("type: {:?}", input);
alt((
(map(tag("bool"), |_| Type::Bool)),
(map(tag("u8"), |_| Type::U8)),
@ -97,13 +134,13 @@ pub fn parse_type(input: &str) -> IResult<&str, Type> {
(map(tag("f64"), |_| Type::F64)),
(map(tag("utf8"), |_| Type::Utf8)),
(preceded(
tag("u"),
char('u'),
map_res(digit1, |s: &str| {
Ok::<Type, Box<dyn Error>>(Type::UX(s.parse()?))
}),
)),
(preceded(
tag("i"),
char('i'),
map_res(digit1, |s: &str| {
Ok::<Type, Box<dyn Error>>(Type::IX(s.parse()?))
}),
@ -139,20 +176,27 @@ fn parse_range_vals(input: &str) -> IResult<&str, Range> {
fn parse_range(input: &str) -> IResult<&str, Range> {
map(
(parse_range_vals, char(' '), parse_unit),
|((l, h), _, s)| (l * s, h * s),
|((l, h), _, s)| {
if s == 1 {
(l, h)
} else {
((l * s), ((1 + h) * s).saturating_sub(1))
}
},
)
.parse(input)
}
#[test]
fn range_test() {
assert_eq!((0, 16), parse_range("0-2 bytes").unwrap().1);
assert_eq!((24, 24), parse_range("3 byte").unwrap().1);
assert_eq!((0, 15), parse_range("0-1 bytes").unwrap().1);
assert_eq!((24, 31), parse_range("3 byte").unwrap().1);
assert_eq!((0, 5), parse_range("0-5 bits").unwrap().1);
assert_eq!((0, 2), parse_range("0-2 bit").unwrap().1);
}
pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
println!("field_entry{:?}", input);
map(
(
parse_range,
@ -169,7 +213,7 @@ pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
(
tag("const "),
parse_field_name,
char(' '),
many1(char(' ')),
parse_const_region,
),
|(_, name, _, r)| FieldRegion::Const {
@ -184,33 +228,19 @@ pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
.parse(input)
}
#[test]
fn parse_field_line_test() {
let line = include_str!("test/line.brin");
assert_eq!(
(
(0, 2),
FieldRegion::Subfield {
name: "test".to_owned(),
content: Region::Atom { ty: Type::U16 }
}
),
parse_field_entry(line).unwrap().1
);
}
pub fn parse_region(input: &str) -> IResult<&str, Region> {
println!("Region: {:?}", input);
alt((
map(
(
ws(char('{')),
opt(ws(newline)),
separated_list1(ws(line_ending), parse_field_entry),
opt(ws(newline)),
ws(char('}')),
multispace0,
char('{'),
multispace1,
separated_list0(multispace1, parse_field_entry),
multispace1,
char('}'),
),
|(_, _, field_list, _, _)| Region::Fields(field_list),
|(_, _, _, field_list, _, _)| Region::Fields(field_list),
),
map(parse_type, |ty| Region::Atom { ty }),
))
@ -234,64 +264,64 @@ fn parse_concrete_type(ty: Type) -> fn(&str) -> IResult<&str, OwnedAtom> {
},
Type::U8 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U8(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U8(parse_int::parse(x)?))
})
.parse(s)
},
Type::U16 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U16(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U16(parse_int::parse(x)?))
})
.parse(s)
},
Type::U32 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U32(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U32(parse_int::parse(x)?))
})
.parse(s)
},
Type::U64 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U64(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U64(parse_int::parse(x)?))
})
.parse(s)
},
Type::U128 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U128(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U128(parse_int::parse(x)?))
})
.parse(s)
},
Type::I8 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I8(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I8(parse_int::parse(x)?))
})
.parse(s)
},
Type::I16 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I16(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I16(parse_int::parse(x)?))
})
.parse(s)
},
Type::I32 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I32(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I32(parse_int::parse(x)?))
})
.parse(s)
},
Type::I64 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I64(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I64(parse_int::parse(x)?))
})
.parse(s)
},
Type::I128 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I128(x.parse()?))
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I128(parse_int::parse(x)?))
})
.parse(s)
},
@ -316,26 +346,201 @@ fn parse_concrete_type(ty: Type) -> fn(&str) -> IResult<&str, OwnedAtom> {
}
#[test]
fn parse_region_test() {
let region = include_str!("test/region.brin");
fn test_const_region() {
let region = "u16 0x10";
assert_eq!(parse_const_region(region).unwrap().1, OwnedAtom::U16(0x10));
}
fn parse_options(input: &str) -> IResult<&str, Options> {
map(
many0(map(
terminated(preceded(tag("#= "), is_not("\r\n")), line_ending),
|s: &str| s.trim().to_owned(),
)),
Options,
)
.parse(input)
}
#[test]
fn options_test() {
let options = "#= BigEndian\n#= Another Option\nCFGFRAME";
let (rem, parsed) = parse_options(options).unwrap();
assert_eq!(rem, "CFGFRAME");
assert_eq!(
parsed,
Options(vec!["BigEndian".to_owned(), "Another Option".to_owned()])
);
let options = "CFGFRAME";
let (rem, parsed) = parse_options(options).unwrap();
assert_eq!(rem, "CFGFRAME");
assert_eq!(parsed, Options(Vec::<String>::new()));
}
fn parse_named_region(input: &str) -> IResult<&str, (String, Region)> {
map((parse_identifier, parse_region), |(name, region)| {
(name.to_owned(), region)
})
.parse(input)
}
pub fn parse_module(input: &str) -> IResult<&str, Module> {
println!("{}", input);
map(
(
parse_options,
multispace1,
separated_list0(multispace0, parse_named_region),
),
|(opts, _, objs)| Module { opts, objs },
)
.parse(input)
}
#[test]
fn module_test() {
let module = "#= BigEndian\n\n\nCFGFRAME { 0-1 bytes: field \"SYNC\" u16 }";
let (_, module) = parse_module(module).unwrap();
assert_eq!(module.opts, Options(vec!["BigEndian".to_owned()]));
assert_eq!(module.objs[0].0, "CFGFRAME");
}
#[test]
fn c37118_test() {
let brin = include_str!("../../formats/c37118.brin");
let (_, module) = parse_module(brin).unwrap();
assert_eq!(module.opts, Options(vec!["Bigendian".to_owned()]));
assert_eq!(module.objs[0].0, "CFGFRAME");
if let Region::Fields(fields) = &module.objs[0].1 {
assert_eq!(fields[0].0, (0, 15));
if let FieldRegion::Subfield { name, content } = &fields[0].1 {
assert_eq!(name, "SYNC");
if let Region::Fields(subfields) = content {
assert_eq!(subfields[0].0, (0, 7));
if let FieldRegion::Const { name, value } = &subfields[0].1 {
assert_eq!(name, "magic_number");
assert_eq!(*value, OwnedAtom::U8(0xAA));
}
assert_eq!(subfields[1].0, (8, 15));
if let FieldRegion::Const { name, value } = &subfields[1].1 {
assert_eq!(name, "ver_number");
assert_eq!(*value, OwnedAtom::U8(0x31));
}
}
}
assert_eq!(fields[1].0, (24, 39));
if let FieldRegion::Subfield { name, content } = &fields[1].1 {
assert_eq!(*name, "TIME BASE");
if let Region::Atom { ty } = content {
assert_eq!(*ty, Type::U16);
} else {
panic!("Incorrect content for TIME BASE");
}
} else {
panic!("Incorrect field region for TIME BASE");
}
assert_eq!(fields[2].0, (40, 55));
if let FieldRegion::Subfield { name, content } = &fields[2].1 {
assert_eq!(*name, "NUM_PMU");
if let Region::Atom { ty } = content {
assert_eq!(*ty, Type::U16);
} else {
panic!("Incorrect content for NUM_PMU");
}
} else {
panic!("Incorrect field region for NUM_PMU");
}
} else {
panic!("Wrong region type");
}
}
#[test]
fn test_field_entry() {
let region = "3-4 bytes: field \"TIME BASE\" u16";
assert_eq!(
parse_field_entry(region).unwrap().1,
(
(24, 39),
FieldRegion::Subfield {
name: "TIME BASE".to_owned(),
content: Region::Atom { ty: Type::U16 }
}
)
);
}
#[test]
fn test_region_good() {
let region = "{
3-4 byte: field \"time base\" u16
5-6 byte: field \"magic number\" {
0-4 bits: const \"aaa\" u8 0x10
}
}";
assert_eq!(
parse_region(region).unwrap().1,
Region::Fields(vec![
(
(0, 1),
(24, 39),
FieldRegion::Subfield {
name: "test".to_owned(),
name: "time base".to_owned(),
content: Region::Atom { ty: Type::U16 }
}
),
(
(2, 3),
(40, 55),
FieldRegion::Subfield {
name: "second_field".to_owned(),
content: Region::Atom { ty: Type::I32 }
name: "magic number".to_owned(),
content: Region::Fields(vec![(
(0, 4),
FieldRegion::Const {
name: "aaa".to_owned(),
value: Atom::U8(16)
}
)])
}
)
]),
parse_region(region).unwrap().1
])
);
}
#[test]
fn test_region_bad() {
let region = "{
3-4 byte: field \"magic number\" {
0 bit: const \"active\" u8 0x10
}
1-2 byte: field \"timestamp\" u8
}";
assert_eq!(
parse_region(region).unwrap().1,
Region::Fields(vec![
(
(24, 39),
FieldRegion::Subfield {
name: "magic number".to_owned(),
content: Region::Fields(vec![(
(0, 0),
FieldRegion::Const {
name: "active".to_owned(),
value: Atom::U8(16)
}
)])
}
),
(
(8, 23),
FieldRegion::Subfield {
name: "timestamp".to_owned(),
content: Region::Atom { ty: Type::U8 }
}
),
])
);
}

View file

@ -1 +0,0 @@
0-2 bits: field "test" u16

View file

@ -1,8 +0,0 @@
{
0-1 bits: field "test" u16
2-3 bits: field "second_field" i16
4-8 bits: field "big_struct" {
0-1 bits: field "test" u8
2-3 bits: const "magic" u16 0
}
}