first commit

This commit is contained in:
Ugo Botto 2025-06-24 21:51:28 +02:00
commit 0d417bbbe5
12 changed files with 524 additions and 0 deletions

50
src/grammar/mod.rs Normal file
View file

@ -0,0 +1,50 @@
use crate::ir::{Atom, OwnedAtom};
pub(crate) mod parser;
#[derive(PartialEq, Debug)]
struct Module {
objs: Vec<(String, Region)>,
}
/// A region of memory.
/// Either a structured field list or an atomic type.
#[derive(PartialEq, Debug)]
enum Region {
Fields(Vec<(Range, FieldRegion)>),
Atom { ty: Type },
}
/// Content of a field.
/// Can be a region or a constant.
#[derive(PartialEq, Debug)]
enum FieldRegion {
Subfield { name: String, content: Region },
Const { name: String, value: OwnedAtom },
}
#[derive(PartialEq, Debug)]
pub(crate) enum Type {
Bool,
// unsigned numbers
U8,
U16,
U32,
U64,
U128,
UX(usize),
// signed numbers
I8,
I16,
I32,
I64,
I128,
IX(usize),
// float
F32,
F64,
// strings
Utf8,
}
/// position of field relative to parent
type Range = (usize, usize);

341
src/grammar/parser.rs Normal file
View file

@ -0,0 +1,341 @@
use std::error::Error;
use nom::IResult;
use nom::Parser;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::alpha1;
use nom::character::complete::alphanumeric1;
use nom::character::complete::bin_digit1;
use nom::character::complete::char;
use nom::character::complete::digit1;
use nom::character::complete::hex_digit1;
use nom::character::complete::line_ending;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::combinator::map;
use nom::combinator::map_res;
use nom::combinator::opt;
use nom::combinator::recognize;
use nom::error::ParseError;
use nom::multi::many0_count;
use nom::multi::separated_list1;
use nom::sequence::delimited;
use nom::sequence::pair;
use nom::sequence::preceded;
use super::FieldRegion;
use super::OwnedAtom;
use super::Range;
use super::Region;
use super::Type;
/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
/// trailing whitespace, returning the output of `inner`.
pub fn ws<'a, O, E: ParseError<&'a str>, F>(inner: F) -> impl Parser<&'a str, Output = O, Error = E>
where
F: Parser<&'a str, Output = O, Error = E>,
{
delimited(space0, inner, space0)
}
/// Finds a valid (hex, binary, decimal, floating, negative) number
pub fn parse_number(input: &str) -> IResult<&str, &str> {
alt((
recognize((tag_no_case("0x"), hex_digit1)),
recognize((tag_no_case("0b"), bin_digit1)),
recognize((opt(char('-')), digit1)),
))
.parse(input)
}
#[test]
fn parse_number_test() {
assert_eq!(parse_number("5").unwrap().1, "5".to_owned());
assert_eq!(parse_number("-5").unwrap().1, "-5".to_owned());
assert_eq!(parse_number("0x0").unwrap().1, "0x0".to_owned());
assert_eq!(parse_number("0b10").unwrap().1, "0b10".to_owned());
}
/// A combinator that parses identifiers.
pub fn parse_field_name(input: &str) -> IResult<&str, &str> {
delimited(
char('"'),
recognize(pair(
alt((alpha1, tag("_"))),
many0_count(alt((alphanumeric1, tag("_")))),
)),
char('"'),
)
.parse(input)
}
#[test]
fn field_name_test() {
assert_eq!(
"NUM_PMU".to_owned(),
parse_field_name("\"NUM_PMU\"").unwrap().1
)
}
/// A combinator that parses primitive types.
pub fn parse_type(input: &str) -> IResult<&str, Type> {
alt((
(map(tag("bool"), |_| Type::Bool)),
(map(tag("u8"), |_| Type::U8)),
(map(tag("u16"), |_| Type::U16)),
(map(tag("u32"), |_| Type::U32)),
(map(tag("u64"), |_| Type::U64)),
(map(tag("u128"), |_| Type::U128)),
(map(tag("i8"), |_| Type::I8)),
(map(tag("i16"), |_| Type::I16)),
(map(tag("i32"), |_| Type::I32)),
(map(tag("i64"), |_| Type::I64)),
(map(tag("i128"), |_| Type::I128)),
(map(tag("f32"), |_| Type::F32)),
(map(tag("f64"), |_| Type::F64)),
(map(tag("utf8"), |_| Type::Utf8)),
(preceded(
tag("u"),
map_res(digit1, |s: &str| {
Ok::<Type, Box<dyn Error>>(Type::UX(s.parse()?))
}),
)),
(preceded(
tag("i"),
map_res(digit1, |s: &str| {
Ok::<Type, Box<dyn Error>>(Type::IX(s.parse()?))
}),
)),
))
.parse(input)
}
// Range and locations
fn parse_index(input: &str) -> IResult<&str, usize> {
map_res(digit1, str::parse).parse(input)
}
fn parse_unit(input: &str) -> IResult<&str, usize> {
alt((
map((tag("bit"), opt(char('s'))), |_| 1),
map((tag("byte"), opt(char('s'))), |_| 8),
))
.parse(input)
}
fn parse_range_vals(input: &str) -> IResult<&str, Range> {
alt((
map((parse_index, char('-'), parse_index), |(low, _, high)| {
(low, high)
}),
map(parse_index, |low| (low, low)),
))
.parse(input)
}
fn parse_range(input: &str) -> IResult<&str, Range> {
map(
(parse_range_vals, char(' '), parse_unit),
|((l, h), _, s)| (l * s, h * s),
)
.parse(input)
}
#[test]
fn range_test() {
assert_eq!((0, 16), parse_range("0-2 bytes").unwrap().1);
assert_eq!((24, 24), parse_range("3 byte").unwrap().1);
assert_eq!((0, 5), parse_range("0-5 bits").unwrap().1);
assert_eq!((0, 2), parse_range("0-2 bit").unwrap().1);
}
pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
map(
(
parse_range,
tag(": "),
alt((
map(
(tag("field "), parse_field_name, char(' '), parse_region),
|(_, name, _, r)| FieldRegion::Subfield {
name: name.to_owned(),
content: r,
},
),
map(
(
tag("const "),
parse_field_name,
char(' '),
parse_const_region,
),
|(_, name, _, r)| FieldRegion::Const {
name: name.to_owned(),
value: r,
},
),
)),
),
|(r, _, f)| (r, f),
)
.parse(input)
}
#[test]
fn parse_field_line_test() {
let line = include_str!("test/line.brin");
assert_eq!(
(
(0, 2),
FieldRegion::Subfield {
name: "test".to_owned(),
content: Region::Atom { ty: Type::U16 }
}
),
parse_field_entry(line).unwrap().1
);
}
pub fn parse_region(input: &str) -> IResult<&str, Region> {
alt((
map(
(
ws(char('{')),
opt(ws(newline)),
separated_list1(ws(line_ending), parse_field_entry),
opt(ws(newline)),
ws(char('}')),
),
|(_, _, field_list, _, _)| Region::Fields(field_list),
),
map(parse_type, |ty| Region::Atom { ty }),
))
.parse(input)
}
fn parse_const_region(input: &str) -> IResult<&str, OwnedAtom> {
let (input, ty) = parse_type(input)?;
let (input, _) = char(' ').parse(input)?;
parse_concrete_type(ty).parse(input)
}
fn parse_concrete_type(ty: Type) -> fn(&str) -> IResult<&str, OwnedAtom> {
match ty {
Type::Bool => |s: &str| {
alt((
map(tag("true"), |_| OwnedAtom::Bool(true)),
map(tag("false"), |_| OwnedAtom::Bool(false)),
))
.parse(s)
},
Type::U8 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U8(x.parse()?))
})
.parse(s)
},
Type::U16 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U16(x.parse()?))
})
.parse(s)
},
Type::U32 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U32(x.parse()?))
})
.parse(s)
},
Type::U64 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U64(x.parse()?))
})
.parse(s)
},
Type::U128 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U128(x.parse()?))
})
.parse(s)
},
Type::I8 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I8(x.parse()?))
})
.parse(s)
},
Type::I16 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I16(x.parse()?))
})
.parse(s)
},
Type::I32 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I32(x.parse()?))
})
.parse(s)
},
Type::I64 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I64(x.parse()?))
})
.parse(s)
},
Type::I128 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I128(x.parse()?))
})
.parse(s)
},
Type::F32 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::F32(x.parse()?))
})
.parse(s)
},
Type::F64 => |s: &str| {
map_res(parse_number, |x| {
Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::F64(x.parse()?))
})
.parse(s)
},
Type::Utf8 => todo!(),
Type::IX(bin_len) => todo!(),
Type::UX(bit_len) => todo!(),
}
}
#[test]
fn parse_region_test() {
let region = include_str!("test/region.brin");
assert_eq!(
Region::Fields(vec![
(
(0, 1),
FieldRegion::Subfield {
name: "test".to_owned(),
content: Region::Atom { ty: Type::U16 }
}
),
(
(2, 3),
FieldRegion::Subfield {
name: "second_field".to_owned(),
content: Region::Atom { ty: Type::I32 }
}
)
]),
parse_region(region).unwrap().1
);
}

View file

@ -0,0 +1 @@
0-2 bits: field "test" u16

View file

@ -0,0 +1,8 @@
{
0-1 bits: field "test" u16
2-3 bits: field "second_field" i16
4-8 bits: field "big_struct" {
0-1 bits: field "test" u8
2-3 bits: const "magic" u16 0
}
}