From 0d417bbbe5802838e25ccc1873fe16fdc3957695 Mon Sep 17 00:00:00 2001
From: Ugo Botto <ubotto@minsait.com>
Date: Tue, 24 Jun 2025 21:51:28 +0200
Subject: [PATCH] first commit

---
 .gitignore                   |   1 +
 Cargo.toml                   |  10 +
 ISSUES.md                    |  15 ++
 README.md                    |   0
 formats/c37118.brin          |  12 ++
 samples/c37118_bad.txt       |   1 +
 src/grammar/mod.rs           |  50 +++++
 src/grammar/parser.rs        | 341 +++++++++++++++++++++++++++++++++++
 src/grammar/test/line.brin   |   1 +
 src/grammar/test/region.brin |   8 +
 src/ir/mod.rs                |  79 ++++++++
 src/main.rs                  |   6 +
 12 files changed, 524 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.toml
 create mode 100644 ISSUES.md
 create mode 100644 README.md
 create mode 100644 formats/c37118.brin
 create mode 100644 samples/c37118_bad.txt
 create mode 100644 src/grammar/mod.rs
 create mode 100644 src/grammar/parser.rs
 create mode 100644 src/grammar/test/line.brin
 create mode 100644 src/grammar/test/region.brin
 create mode 100644 src/ir/mod.rs
 create mode 100644 src/main.rs

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..d531286
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "bin_inspect"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+clap = "4.5.40"
+nanoid = "0.4.0"
+nom = "8.0.0"
+thiserror = "2.0.12"
diff --git a/ISSUES.md b/ISSUES.md
new file mode 100644
index 0000000..d8407b7
--- /dev/null
+++ b/ISSUES.md
@@ -0,0 +1,15 @@
+# TO-DO
+
+## Feature
+
+- [ ] write docs
+  - [ ] General
+  - [ ] Brin language
+- [ ] show output
+- [ ] json output
+- [ ] cli usage
+  - [ ] think of api in general
+
+## Bug
+
+# Done ✓
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/formats/c37118.brin b/formats/c37118.brin
new file mode 100644
index 0000000..3086ede
--- /dev/null
+++ b/formats/c37118.brin
@@ -0,0 +1,12 @@
+#= BigEndian
+
+CFGFRAME {
+  0-1 bytes: field "SYNC" {
+    0 byte: const "magic_number" u8 0xAA
+    1 byte: const "ver_number" u8 0x31
+  }
+  3-4 bytes: field "TIME BASE" u16
+  5-6 bytes: field "NUM_PMU" u16
+}
+
+  
diff --git a/samples/c37118_bad.txt b/samples/c37118_bad.txt
new file mode 100644
index 0000000..0fc845d
--- /dev/null
+++ b/samples/c37118_bad.txt
@@ -0,0 +1 @@
+170, 49, 1, 0, 0, 1, 104, 91, 0, 121, 0, 13, 34, 178, 0, 0, 0, 0, 3, 232, 0, 1, 8, 80, 97, 114, 116, 97, 110, 110, 97, 4, 207, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 6, 0, 0, 0, 0, 15, 80, 78, 65, 80, 35, 73, 80, 95, 50, 50, 52, 95, 43, 83, 73, 15, 80, 78, 65, 80, 35, 86, 80, 95, 50, 50, 52, 95, 43, 83, 86, 15, 80, 78, 65, 80, 35, 73, 52, 95, 50, 50, 52, 95, 43, 83, 73, 15, 80, 78, 65, 80, 35, 105, 56, 95, 50, 50, 52, 95, 66, 80, 73, 16, 80, 78, 65, 80, 35, 73, 49, 50, 95, 50, 50, 52, 95, 67, 80, 73, 16, 80, 78, 65, 80, 35, 86, 49, 50, 95, 50, 50, 52, 95, 43, 83, 86, 0, 0, 8, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 50, 138, 147
diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs
new file mode 100644
index 0000000..540daec
--- /dev/null
+++ b/src/grammar/mod.rs
@@ -0,0 +1,50 @@
+use crate::ir::{Atom, OwnedAtom};
+pub(crate) mod parser;
+
+#[derive(PartialEq, Debug)]
+struct Module {
+    objs: Vec<(String, Region)>,
+}
+
+/// A region of memory.
+/// Either a structured field list or an atomic type.
+#[derive(PartialEq, Debug)]
+enum Region {
+    Fields(Vec<(Range, FieldRegion)>),
+    Atom { ty: Type },
+}
+
+/// Content of a field.
+/// Can be a region or a constant.
+#[derive(PartialEq, Debug)]
+enum FieldRegion {
+    Subfield { name: String, content: Region },
+    Const { name: String, value: OwnedAtom },
+}
+
+#[derive(PartialEq, Debug)]
+pub(crate) enum Type {
+    Bool,
+    // unsigned numbers
+    U8,
+    U16,
+    U32,
+    U64,
+    U128,
+    UX(usize),
+    // signed numbers
+    I8,
+    I16,
+    I32,
+    I64,
+    I128,
+    IX(usize),
+    // float
+    F32,
+    F64,
+    // strings
+    Utf8,
+}
+
+/// position of field relative to parent
+type Range = (usize, usize);
diff --git a/src/grammar/parser.rs b/src/grammar/parser.rs
new file mode 100644
index 0000000..4c7c7ee
--- /dev/null
+++ b/src/grammar/parser.rs
@@ -0,0 +1,341 @@
+use std::error::Error;
+
+use nom::IResult;
+use nom::Parser;
+use nom::branch::alt;
+use nom::bytes::complete::tag;
+use nom::bytes::complete::tag_no_case;
+use nom::character::complete::alpha1;
+use nom::character::complete::alphanumeric1;
+use nom::character::complete::bin_digit1;
+use nom::character::complete::char;
+use nom::character::complete::digit1;
+use nom::character::complete::hex_digit1;
+use nom::character::complete::line_ending;
+use nom::character::complete::newline;
+use nom::character::complete::space0;
+use nom::combinator::map;
+use nom::combinator::map_res;
+use nom::combinator::opt;
+use nom::combinator::recognize;
+use nom::error::ParseError;
+use nom::multi::many0_count;
+use nom::multi::separated_list1;
+use nom::sequence::delimited;
+use nom::sequence::pair;
+use nom::sequence::preceded;
+
+use super::FieldRegion;
+use super::OwnedAtom;
+use super::Range;
+use super::Region;
+use super::Type;
+
+/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
+/// trailing whitespace, returning the output of `inner`.
+pub fn ws<'a, O, E: ParseError<&'a str>, F>(inner: F) -> impl Parser<&'a str, Output = O, Error = E>
+where
+    F: Parser<&'a str, Output = O, Error = E>,
+{
+    delimited(space0, inner, space0)
+}
+
+/// Finds a valid (hex, binary, decimal, floating, negative) number
+pub fn parse_number(input: &str) -> IResult<&str, &str> {
+    alt((
+        recognize((tag_no_case("0x"), hex_digit1)),
+        recognize((tag_no_case("0b"), bin_digit1)),
+        recognize((opt(char('-')), digit1)),
+    ))
+    .parse(input)
+}
+
+#[test]
+fn parse_number_test() {
+    assert_eq!(parse_number("5").unwrap().1, "5".to_owned());
+    assert_eq!(parse_number("-5").unwrap().1, "-5".to_owned());
+    assert_eq!(parse_number("0x0").unwrap().1, "0x0".to_owned());
+    assert_eq!(parse_number("0b10").unwrap().1, "0b10".to_owned());
+}
+
+/// A combinator that parses identifiers.
+pub fn parse_field_name(input: &str) -> IResult<&str, &str> {
+    delimited(
+        char('"'),
+        recognize(pair(
+            alt((alpha1, tag("_"))),
+            many0_count(alt((alphanumeric1, tag("_")))),
+        )),
+        char('"'),
+    )
+    .parse(input)
+}
+
+#[test]
+fn field_name_test() {
+    assert_eq!(
+        "NUM_PMU".to_owned(),
+        parse_field_name("\"NUM_PMU\"").unwrap().1
+    )
+}
+
+/// A combinator that parses primitive types.
+pub fn parse_type(input: &str) -> IResult<&str, Type> {
+    alt((
+        (map(tag("bool"), |_| Type::Bool)),
+        (map(tag("u8"), |_| Type::U8)),
+        (map(tag("u16"), |_| Type::U16)),
+        (map(tag("u32"), |_| Type::U32)),
+        (map(tag("u64"), |_| Type::U64)),
+        (map(tag("u128"), |_| Type::U128)),
+        (map(tag("i8"), |_| Type::I8)),
+        (map(tag("i16"), |_| Type::I16)),
+        (map(tag("i32"), |_| Type::I32)),
+        (map(tag("i64"), |_| Type::I64)),
+        (map(tag("i128"), |_| Type::I128)),
+        (map(tag("f32"), |_| Type::F32)),
+        (map(tag("f64"), |_| Type::F64)),
+        (map(tag("utf8"), |_| Type::Utf8)),
+        (preceded(
+            tag("u"),
+            map_res(digit1, |s: &str| {
+                Ok::<Type, Box<dyn Error>>(Type::UX(s.parse()?))
+            }),
+        )),
+        (preceded(
+            tag("i"),
+            map_res(digit1, |s: &str| {
+                Ok::<Type, Box<dyn Error>>(Type::IX(s.parse()?))
+            }),
+        )),
+    ))
+    .parse(input)
+}
+
+// Range and locations
+
+fn parse_index(input: &str) -> IResult<&str, usize> {
+    map_res(digit1, str::parse).parse(input)
+}
+
+fn parse_unit(input: &str) -> IResult<&str, usize> {
+    alt((
+        map((tag("bit"), opt(char('s'))), |_| 1),
+        map((tag("byte"), opt(char('s'))), |_| 8),
+    ))
+    .parse(input)
+}
+
+fn parse_range_vals(input: &str) -> IResult<&str, Range> {
+    alt((
+        map((parse_index, char('-'), parse_index), |(low, _, high)| {
+            (low, high)
+        }),
+        map(parse_index, |low| (low, low)),
+    ))
+    .parse(input)
+}
+
+fn parse_range(input: &str) -> IResult<&str, Range> {
+    map(
+        (parse_range_vals, char(' '), parse_unit),
+        |((l, h), _, s)| (l * s, h * s),
+    )
+    .parse(input)
+}
+
+#[test]
+fn range_test() {
+    assert_eq!((0, 16), parse_range("0-2 bytes").unwrap().1);
+    assert_eq!((24, 24), parse_range("3 byte").unwrap().1);
+    assert_eq!((0, 5), parse_range("0-5 bits").unwrap().1);
+    assert_eq!((0, 2), parse_range("0-2 bit").unwrap().1);
+}
+
+pub fn parse_field_entry(input: &str) -> IResult<&str, (Range, FieldRegion)> {
+    map(
+        (
+            parse_range,
+            tag(": "),
+            alt((
+                map(
+                    (tag("field "), parse_field_name, char(' '), parse_region),
+                    |(_, name, _, r)| FieldRegion::Subfield {
+                        name: name.to_owned(),
+                        content: r,
+                    },
+                ),
+                map(
+                    (
+                        tag("const "),
+                        parse_field_name,
+                        char(' '),
+                        parse_const_region,
+                    ),
+                    |(_, name, _, r)| FieldRegion::Const {
+                        name: name.to_owned(),
+                        value: r,
+                    },
+                ),
+            )),
+        ),
+        |(r, _, f)| (r, f),
+    )
+    .parse(input)
+}
+
+#[test]
+fn parse_field_line_test() {
+    let line = include_str!("test/line.brin");
+
+    assert_eq!(
+        (
+            (0, 2),
+            FieldRegion::Subfield {
+                name: "test".to_owned(),
+                content: Region::Atom { ty: Type::U16 }
+            }
+        ),
+        parse_field_entry(line).unwrap().1
+    );
+}
+
+pub fn parse_region(input: &str) -> IResult<&str, Region> {
+    alt((
+        map(
+            (
+                ws(char('{')),
+                opt(ws(newline)),
+                separated_list1(ws(line_ending), parse_field_entry),
+                opt(ws(newline)),
+                ws(char('}')),
+            ),
+            |(_, _, field_list, _, _)| Region::Fields(field_list),
+        ),
+        map(parse_type, |ty| Region::Atom { ty }),
+    ))
+    .parse(input)
+}
+
+fn parse_const_region(input: &str) -> IResult<&str, OwnedAtom> {
+    let (input, ty) = parse_type(input)?;
+    let (input, _) = char(' ').parse(input)?;
+    parse_concrete_type(ty).parse(input)
+}
+
+fn parse_concrete_type(ty: Type) -> fn(&str) -> IResult<&str, OwnedAtom> {
+    match ty {
+        Type::Bool => |s: &str| {
+            alt((
+                map(tag("true"), |_| OwnedAtom::Bool(true)),
+                map(tag("false"), |_| OwnedAtom::Bool(false)),
+            ))
+            .parse(s)
+        },
+        Type::U8 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U8(x.parse()?))
+            })
+            .parse(s)
+        },
+        Type::U16 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U16(x.parse()?))
+            })
+            .parse(s)
+        },
+
+        Type::U32 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U32(x.parse()?))
+            })
+            .parse(s)
+        },
+
+        Type::U64 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U64(x.parse()?))
+            })
+            .parse(s)
+        },
+        Type::U128 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::U128(x.parse()?))
+            })
+            .parse(s)
+        },
+
+        Type::I8 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I8(x.parse()?))
+            })
+            .parse(s)
+        },
+        Type::I16 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I16(x.parse()?))
+            })
+            .parse(s)
+        },
+        Type::I32 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I32(x.parse()?))
+            })
+            .parse(s)
+        },
+        Type::I64 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I64(x.parse()?))
+            })
+            .parse(s)
+        },
+        Type::I128 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::I128(x.parse()?))
+            })
+            .parse(s)
+        },
+
+        Type::F32 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::F32(x.parse()?))
+            })
+            .parse(s)
+        },
+        Type::F64 => |s: &str| {
+            map_res(parse_number, |x| {
+                Ok::<OwnedAtom, Box<dyn Error>>(OwnedAtom::F64(x.parse()?))
+            })
+            .parse(s)
+        },
+
+        Type::Utf8 => todo!(),
+        Type::IX(bin_len) => todo!(),
+        Type::UX(bit_len) => todo!(),
+    }
+}
+
+#[test]
+fn parse_region_test() {
+    let region = include_str!("test/region.brin");
+
+    assert_eq!(
+        Region::Fields(vec![
+            (
+                (0, 1),
+                FieldRegion::Subfield {
+                    name: "test".to_owned(),
+                    content: Region::Atom { ty: Type::U16 }
+                }
+            ),
+            (
+                (2, 3),
+                FieldRegion::Subfield {
+                    name: "second_field".to_owned(),
+                    content: Region::Atom { ty: Type::I32 }
+                }
+            )
+        ]),
+        parse_region(region).unwrap().1
+    );
+}
diff --git a/src/grammar/test/line.brin b/src/grammar/test/line.brin
new file mode 100644
index 0000000..1be232d
--- /dev/null
+++ b/src/grammar/test/line.brin
@@ -0,0 +1 @@
+0-2 bits: field "test" u16
diff --git a/src/grammar/test/region.brin b/src/grammar/test/region.brin
new file mode 100644
index 0000000..2518f03
--- /dev/null
+++ b/src/grammar/test/region.brin
@@ -0,0 +1,8 @@
+{
+  0-1 bits: field "test" u16 
+  2-3 bits: field "second_field" i16
+  4-8 bits: field "big_struct" {
+      0-1 bits: field "test" u8
+      2-3 bits: const "magic" u16 0
+    }
+}
diff --git a/src/ir/mod.rs b/src/ir/mod.rs
new file mode 100644
index 0000000..d5f26ca
--- /dev/null
+++ b/src/ir/mod.rs
@@ -0,0 +1,79 @@
+//use nanoid::nanoid;
+use std::borrow::Cow;
+
+/* Any format will be parsed into an intermediate, json-like object.
+ Where possible, zero-copying should be done (???!);
+
+ Each object consists of an ordered list of fields:
+ - Immediate fields, which can be accessed immediately
+ - Dependant fields, that depend on possibly another field.
+
+ Each field has:
+ - A UUID to uniquely identify the field
+ - A type and a length, which includes endianess information.
+
+ We might have a partial deserialization. In this case
+ the concrete value of a field will be unknown and what info
+ is needed to retrive it must be shown.
+
+ We might have different options in deserializing.
+ In this case we must return all possilbe valid results.
+*/
+
+/// Tag to uniquely represent a particular field
+type FieldTag = &'static str;
+
+/// Primitive value types
+#[derive(PartialEq, Debug)]
+pub(crate) enum Atom<'a> {
+    /// Bottom
+    Unknown {
+        depends_on: FieldTag,
+    },
+    Bool(bool),
+    // unsigned numbers
+    U8(u8),
+    U16(u16),
+    U32(u32),
+    U64(u64),
+    U128(u128),
+    UX(usize, Cow<'a, [u8]>),
+    // signed numbers
+    I8(i8),
+    I16(i16),
+    I32(i32),
+    I64(i64),
+    I128(i128),
+    IX(usize, Cow<'a, [u8]>),
+    // float
+    F32(f32),
+    F64(f64),
+    // strings
+    Utf8(Cow<'a, str>),
+}
+
+pub type OwnedAtom = Atom<'static>;
+
+impl<'a> Atom<'a> {
+    fn to_static(self) -> OwnedAtom {
+        match self {
+            Atom::Unknown { depends_on } => todo!(),
+            Atom::Utf8(cow) => Atom::Utf8(cow.into_owned().into()),
+            Atom::UX(v, cow) => Atom::UX(v, cow.into_owned().into()),
+            Atom::IX(v, cow) => Atom::IX(v, cow.into_owned().into()),
+            Atom::Bool(b) => Atom::Bool(b),
+            Atom::U8(v) => Atom::U8(v),
+            Atom::U16(v) => Atom::U16(v),
+            Atom::U32(v) => Atom::U32(v),
+            Atom::U64(v) => Atom::U64(v),
+            Atom::U128(v) => Atom::U128(v),
+            Atom::I8(v) => Atom::I8(v),
+            Atom::I16(v) => Atom::I16(v),
+            Atom::I32(v) => Atom::I32(v),
+            Atom::I64(v) => Atom::I64(v),
+            Atom::I128(v) => Atom::I128(v),
+            Atom::F32(v) => Atom::F32(v),
+            Atom::F64(v) => Atom::F64(v),
+        }
+    }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..dc52415
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,6 @@
+mod grammar;
+mod ir;
+
+fn main() {
+    println!("Hello, world!");
+}