From 0d417bbbe5802838e25ccc1873fe16fdc3957695 Mon Sep 17 00:00:00 2001 From: Ugo Botto Date: Tue, 24 Jun 2025 21:51:28 +0200 Subject: [PATCH] first commit --- .gitignore | 1 + Cargo.toml | 10 + ISSUES.md | 15 ++ README.md | 0 formats/c37118.brin | 12 ++ samples/c37118_bad.txt | 1 + src/grammar/mod.rs | 50 +++++ src/grammar/parser.rs | 341 +++++++++++++++++++++++++++++++++++ src/grammar/test/line.brin | 1 + src/grammar/test/region.brin | 8 + src/ir/mod.rs | 79 ++++++++ src/main.rs | 6 + 12 files changed, 524 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 ISSUES.md create mode 100644 README.md create mode 100644 formats/c37118.brin create mode 100644 samples/c37118_bad.txt create mode 100644 src/grammar/mod.rs create mode 100644 src/grammar/parser.rs create mode 100644 src/grammar/test/line.brin create mode 100644 src/grammar/test/region.brin create mode 100644 src/ir/mod.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d531286 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "bin_inspect" +version = "0.1.0" +edition = "2024" + +[dependencies] +clap = "4.5.40" +nanoid = "0.4.0" +nom = "8.0.0" +thiserror = "2.0.12" diff --git a/ISSUES.md b/ISSUES.md new file mode 100644 index 0000000..d8407b7 --- /dev/null +++ b/ISSUES.md @@ -0,0 +1,15 @@ +# TO-DO + +## Feature + +- [ ] write docs + - [ ] General + - [ ] Brin language +- [ ] show output +- [ ] json output +- [ ] cli usage + - [ ] think of api in general + +## Bug + +# Done ✓ diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/formats/c37118.brin b/formats/c37118.brin new file mode 100644 index 0000000..3086ede --- /dev/null +++ b/formats/c37118.brin @@ -0,0 +1,12 @@ +#= BigEndian + +CFGFRAME { + 0-1 bytes: field "SYNC" { + 0 byte: const "magic_number" u8 0xAA + 1 byte: const "ver_number" u8 0x31 + } + 3-4 bytes: field "TIME BASE" u16 + 5-6 bytes: field "NUM_PMU" u16 +} + + diff --git a/samples/c37118_bad.txt b/samples/c37118_bad.txt new file mode 100644 index 0000000..0fc845d --- /dev/null +++ b/samples/c37118_bad.txt @@ -0,0 +1 @@ +170, 49, 1, 0, 0, 1, 104, 91, 0, 121, 0, 13, 34, 178, 0, 0, 0, 0, 3, 232, 0, 1, 8, 80, 97, 114, 116, 97, 110, 110, 97, 4, 207, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 6, 0, 0, 0, 0, 15, 80, 78, 65, 80, 35, 73, 80, 95, 50, 50, 52, 95, 43, 83, 73, 15, 80, 78, 65, 80, 35, 86, 80, 95, 50, 50, 52, 95, 43, 83, 86, 15, 80, 78, 65, 80, 35, 73, 52, 95, 50, 50, 52, 95, 43, 83, 73, 15, 80, 78, 65, 80, 35, 105, 56, 95, 50, 50, 52, 95, 66, 80, 73, 16, 80, 78, 65, 80, 35, 73, 49, 50, 95, 50, 50, 52, 95, 67, 80, 73, 16, 80, 78, 65, 80, 35, 86, 49, 50, 95, 50, 50, 52, 95, 43, 83, 86, 0, 0, 8, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 50, 138, 147 diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs new file mode 100644 index 0000000..540daec --- /dev/null +++ b/src/grammar/mod.rs @@ -0,0 +1,50 @@ +use crate::ir::{Atom, OwnedAtom}; +pub(crate) mod parser; + +#[derive(PartialEq, Debug)] +struct Module { + objs: Vec<(String, Region)>, +} + +/// A region of memory. +/// Either a structured field list or an atomic type. +#[derive(PartialEq, Debug)] +enum Region { + Fields(Vec<(Range, FieldRegion)>), + Atom { ty: Type }, +} + +/// Content of a field. +/// Can be a region or a constant. +#[derive(PartialEq, Debug)] +enum FieldRegion { + Subfield { name: String, content: Region }, + Const { name: String, value: OwnedAtom }, +} + +#[derive(PartialEq, Debug)] +pub(crate) enum Type { + Bool, + // unsigned numbers + U8, + U16, + U32, + U64, + U128, + UX(usize), + // signed numbers + I8, + I16, + I32, + I64, + I128, + IX(usize), + // float + F32, + F64, + // strings + Utf8, +} + +/// position of field relative to parent +type Range = (usize, usize); diff --git a/src/grammar/parser.rs b/src/grammar/parser.rs new file mode 100644 index 0000000..4c7c7ee --- /dev/null +++ b/src/grammar/parser.rs @@ -0,0 +1,341 @@ +use std::error::Error; + +use nom::IResult; +use nom::Parser; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::alpha1; +use nom::character::complete::alphanumeric1; +use nom::character::complete::bin_digit1; +use nom::character::complete::char; +use nom::character::complete::digit1; +use nom::character::complete::hex_digit1; +use nom::character::complete::line_ending; +use nom::character::complete::newline; +use nom::character::complete::space0; +use nom::combinator::map; +use nom::combinator::map_res; +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::error::ParseError; +use nom::multi::many0_count; +use nom::multi::separated_list1; +use nom::sequence::delimited; +use nom::sequence::pair; +use nom::sequence::preceded; + +use super::FieldRegion; +use super::OwnedAtom; +use super::Range; +use super::Region; +use super::Type; + +/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and +/// trailing whitespace, returning the output of `inner`. +pub fn ws<'a, O, E: ParseError<&'a str>, F>(inner: F) -> impl Parser<&'a str, Output = O, Error = E> +where + F: Parser<&'a str, Output = O, Error = E>, +{ + delimited(space0, inner, space0) +} + +/// Finds a valid (hex, binary, decimal, floating, negative) number +pub fn parse_number(input: &str) -> IResult<&str, &str> { + alt(( + recognize((tag_no_case("0x"), hex_digit1)), + recognize((tag_no_case("0b"), bin_digit1)), + recognize((opt(char('-')), digit1)), + )) + .parse(input) +} + +#[test] +fn parse_number_test() { + assert_eq!(parse_number("5").unwrap().1, "5".to_owned()); + assert_eq!(parse_number("-5").unwrap().1, "-5".to_owned()); + assert_eq!(parse_number("0x0").unwrap().1, "0x0".to_owned()); + assert_eq!(parse_number("0b10").unwrap().1, "0b10".to_owned()); +} + +/// A combinator that parses identifiers. +pub fn parse_field_name(input: &str) -> IResult<&str, &str> { + delimited( + char('"'), + recognize(pair( + alt((alpha1, tag("_"))), + many0_count(alt((alphanumeric1, tag("_")))), + )), + char('"'), + ) + .parse(input) +} + +#[test] +fn field_name_test() { + assert_eq!( + "NUM_PMU".to_owned(), + parse_field_name("\"NUM_PMU\"").unwrap().1 + ) +} + +/// A combinator that parses primitive types. +pub fn parse_type(input: &str) -> IResult<&str, Type> { + alt(( + (map(tag("bool"), |_| Type::Bool)), + (map(tag("u8"), |_| Type::U8)), + (map(tag("u16"), |_| Type::U16)), + (map(tag("u32"), |_| Type::U32)), + (map(tag("u64"), |_| Type::U64)), + (map(tag("u128"), |_| Type::U128)), + (map(tag("i8"), |_| Type::I8)), + (map(tag("i16"), |_| Type::I16)), + (map(tag("i32"), |_| Type::I32)), + (map(tag("i64"), |_| Type::I64)), + (map(tag("i128"), |_| Type::I128)), + (map(tag("f32"), |_| Type::F32)), + (map(tag("f64"), |_| Type::F64)), + (map(tag("utf8"), |_| Type::Utf8)), + (preceded( + tag("u"), + map_res(digit1, |s: &str| { + Ok::>(Type::UX(s.parse()?)) + }), + )), + (preceded( + tag("i"), + map_res(digit1, |s: &str| { + Ok::>(Type::IX(s.parse()?)) + }), + )), + )) + .parse(input) +} + +// Range and locations + +fn parse_index(input: &str) -> IResult<&str, usize> { + map_res(digit1, str::parse).parse(input) +} + +fn parse_unit(input: &str) -> IResult<&str, usize> { + alt(( + map((tag("bit"), opt(char('s'))), |_| 1), + map((tag("byte"), opt(char('s'))), |_| 8), + )) + .parse(input) +} + +fn parse_range_vals(input: &str) -> IResult<&str, Range> { + alt(( + map((parse_index, char('-'), parse_index), |(low, _, high)| { + (low, high) + }), + map(parse_index, |low| (low, low)), + )) + .parse(input) +} + +fn parse_range(input: &str) -> IResult<&str, Range> { + map( + (parse_range_vals, char(' '), parse_unit), + |((l, h), _, s)| (l * s, h * s), + ) + .parse(input) +} + +#[test] +fn range_test() { + assert_eq!((0, 16), parse_range("0-2 bytes").unwrap().1); + assert_eq!((24, 24), parse_range("3 byte").unwrap().1); + assert_eq!((0, 5), parse_range("0-5 bits").unwrap().1); + assert_eq!((0, 2), parse_range("0-2 bit").unwrap().1); +} + +pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> { + map( + ( + parse_range, + tag(": "), + alt(( + map( + (tag("field "), parse_field_name, char(' '), parse_region), + |(_, name, _, r)| FieldRegion::Subfield { + name: name.to_owned(), + content: r, + }, + ), + map( + ( + tag("const "), + parse_field_name, + char(' '), + parse_const_region, + ), + |(_, name, _, r)| FieldRegion::Const { + name: name.to_owned(), + value: r, + }, + ), + )), + ), + |(r, _, f)| (r, f), + ) + .parse(input) +} + +#[test] +fn parse_field_line_test() { + let line = include_str!("test/line.brin"); + + assert_eq!( + ( + (0, 2), + FieldRegion::Subfield { + name: "test".to_owned(), + content: Region::Atom { ty: Type::U16 } + } + ), + parse_field_entry(line).unwrap().1 + ); +} + +pub fn parse_region(input: &str) -> IResult<&str, Region> { + alt(( + map( + ( + ws(char('{')), + opt(ws(newline)), + separated_list1(ws(line_ending), parse_field_entry), + opt(ws(newline)), + ws(char('}')), + ), + |(_, _, field_list, _, _)| Region::Fields(field_list), + ), + map(parse_type, |ty| Region::Atom { ty }), + )) + .parse(input) +} + +fn parse_const_region(input: &str) -> IResult<&str, OwnedAtom> { + let (input, ty) = parse_type(input)?; + let (input, _) = char(' ').parse(input)?; + parse_concrete_type(ty).parse(input) +} + +fn parse_concrete_type(ty: Type) -> fn(&str) -> IResult<&str, OwnedAtom> { + match ty { + Type::Bool => |s: &str| { + alt(( + map(tag("true"), |_| OwnedAtom::Bool(true)), + map(tag("false"), |_| OwnedAtom::Bool(false)), + )) + .parse(s) + }, + Type::U8 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::U8(x.parse()?)) + }) + .parse(s) + }, + Type::U16 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::U16(x.parse()?)) + }) + .parse(s) + }, + + Type::U32 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::U32(x.parse()?)) + }) + .parse(s) + }, + + Type::U64 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::U64(x.parse()?)) + }) + .parse(s) + }, + Type::U128 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::U128(x.parse()?)) + }) + .parse(s) + }, + + Type::I8 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::I8(x.parse()?)) + }) + .parse(s) + }, + Type::I16 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::I16(x.parse()?)) + }) + .parse(s) + }, + Type::I32 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::I32(x.parse()?)) + }) + .parse(s) + }, + Type::I64 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::I64(x.parse()?)) + }) + .parse(s) + }, + Type::I128 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::I128(x.parse()?)) + }) + .parse(s) + }, + + Type::F32 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::F32(x.parse()?)) + }) + .parse(s) + }, + Type::F64 => |s: &str| { + map_res(parse_number, |x| { + Ok::>(OwnedAtom::F64(x.parse()?)) + }) + .parse(s) + }, + + Type::Utf8 => todo!(), + Type::IX(bin_len) => todo!(), + Type::UX(bit_len) => todo!(), + } +} + +#[test] +fn parse_region_test() { + let region = include_str!("test/region.brin"); + + assert_eq!( + Region::Fields(vec![ + ( + (0, 1), + FieldRegion::Subfield { + name: "test".to_owned(), + content: Region::Atom { ty: Type::U16 } + } + ), + ( + (2, 3), + FieldRegion::Subfield { + name: "second_field".to_owned(), + content: Region::Atom { ty: Type::I32 } + } + ) + ]), + parse_region(region).unwrap().1 + ); +} diff --git a/src/grammar/test/line.brin b/src/grammar/test/line.brin new file mode 100644 index 0000000..1be232d --- /dev/null +++ b/src/grammar/test/line.brin @@ -0,0 +1 @@ +0-2 bits: field "test" u16 diff --git a/src/grammar/test/region.brin b/src/grammar/test/region.brin new file mode 100644 index 0000000..2518f03 --- /dev/null +++ b/src/grammar/test/region.brin @@ -0,0 +1,8 @@ +{ + 0-1 bits: field "test" u16 + 2-3 bits: field "second_field" i16 + 4-8 bits: field "big_struct" { + 0-1 bits: field "test" u8 + 2-3 bits: const "magic" u16 0 + } +} diff --git a/src/ir/mod.rs b/src/ir/mod.rs new file mode 100644 index 0000000..d5f26ca --- /dev/null +++ b/src/ir/mod.rs @@ -0,0 +1,79 @@ +//use nanoid::nanoid; +use std::borrow::Cow; + +/* Any format will be parsed into an intermediate, json-like object. + Where possible, zero-copying should be done (???!); + + Each object consists of an ordered list of fields: + - Immediate fields, which can be accessed immediately + - Dependant fields, that depend on possibly another field. + + Each field has: + - A UUID to uniquely identify the field + - A type and a length, which includes endianess information. + + We might have a partial deserialization. In this case + the concrete value of a field will be unknown and what info + is needed to retrive it must be shown. + + We might have different options in deserializing. + In this case we must return all possilbe valid results. +*/ + +/// Tag to uniquely represent a particular field +type FieldTag = &'static str; + +/// Primitive value types +#[derive(PartialEq, Debug)] +pub(crate) enum Atom<'a> { + /// Bottom + Unknown { + depends_on: FieldTag, + }, + Bool(bool), + // unsigned numbers + U8(u8), + U16(u16), + U32(u32), + U64(u64), + U128(u128), + UX(usize, Cow<'a, [u8]>), + // signed numbers + I8(i8), + I16(i16), + I32(i32), + I64(i64), + I128(i128), + IX(usize, Cow<'a, [u8]>), + // float + F32(f32), + F64(f64), + // strings + Utf8(Cow<'a, str>), +} + +pub type OwnedAtom = Atom<'static>; + +impl<'a> Atom<'a> { + fn to_static(self) -> OwnedAtom { + match self { + Atom::Unknown { depends_on } => todo!(), + Atom::Utf8(cow) => Atom::Utf8(cow.into_owned().into()), + Atom::UX(v, cow) => Atom::UX(v, cow.into_owned().into()), + Atom::IX(v, cow) => Atom::IX(v, cow.into_owned().into()), + Atom::Bool(b) => Atom::Bool(b), + Atom::U8(v) => Atom::U8(v), + Atom::U16(v) => Atom::U16(v), + Atom::U32(v) => Atom::U32(v), + Atom::U64(v) => Atom::U64(v), + Atom::U128(v) => Atom::U128(v), + Atom::I8(v) => Atom::I8(v), + Atom::I16(v) => Atom::I16(v), + Atom::I32(v) => Atom::I32(v), + Atom::I64(v) => Atom::I64(v), + Atom::I128(v) => Atom::I128(v), + Atom::F32(v) => Atom::F32(v), + Atom::F64(v) => Atom::F64(v), + } + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..dc52415 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,6 @@ +mod grammar; +mod ir; + +fn main() { + println!("Hello, world!"); +}