From 441669a2c3c2d5e6e92a71efdff7c9395473bac0 Mon Sep 17 00:00:00 2001 From: "Christoph J. Scherr" Date: Tue, 16 Jan 2024 16:45:48 +0100 Subject: [PATCH] more configs can't detect dups like this dedup lines --- Cargo.toml | 2 +- data/256B-zero-with-trash.img | Bin 0 -> 264 bytes data/256B-zero.img | Bin 0 -> 256 bytes members/libpt-bin/src/hedu/mod.rs | 39 ++++++++++--- members/libpt-hedu/Cargo.toml | 1 + members/libpt-hedu/src/lib.rs | 92 +++++++++++++++++++++++------- 6 files changed, 102 insertions(+), 32 deletions(-) create mode 100644 data/256B-zero-with-trash.img create mode 100644 data/256B-zero.img diff --git a/Cargo.toml b/Cargo.toml index 61b87df..f241b93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ members = [ default-members = [".", "members/libpt-bin", "members/libpt-core"] [workspace.package] publish = true -version = "0.2.0" +version = "0.2.3" edition = "2021" authors = ["Christoph J. Scherr "] license = "MIT" diff --git a/data/256B-zero-with-trash.img b/data/256B-zero-with-trash.img new file mode 100644 index 0000000000000000000000000000000000000000..fbda59dd4c02f0d6693b3f29405a4c0ee177b5f8 GIT binary patch literal 264 TcmZQz7}+4LI4z|(F^vlV4|)RV literal 0 HcmV?d00001 diff --git a/data/256B-zero.img b/data/256B-zero.img new file mode 100644 index 0000000000000000000000000000000000000000..65f57c2ee985713476ac0b6e3483e6fe472e2176 GIT binary patch literal 256 LcmZQz7})>-0RR92 literal 0 HcmV?d00001 diff --git a/members/libpt-bin/src/hedu/mod.rs b/members/libpt-bin/src/hedu/mod.rs index 9f75a24..24bedd2 100644 --- a/members/libpt-bin/src/hedu/mod.rs +++ b/members/libpt-bin/src/hedu/mod.rs @@ -20,7 +20,7 @@ use clap_verbosity_flag::{InfoLevel, Verbosity}; use std::{ fs::File, - io::{BufRead, BufReader, IsTerminal}, + io::{Seek, IsTerminal}, }; //// TYPES ///////////////////////////////////////////////////////////////////////////////////////// @@ -67,13 +67,25 @@ pub struct Cli { pub verbose: Verbosity, /// show additional logging meta data - #[arg(short, long, global = true)] - pub log_meta: bool, + #[arg(long)] + pub meta: bool, /// show character representation - #[arg(short, long, global = true)] + #[arg(short, long)] pub chars: bool, + /// skip first N bytes + #[arg(short, long, default_value_t = 0)] + pub skip: usize, + + /// only interpret N bytes (end after N) + #[arg(short, long, default_value_t = 0)] + pub len: usize, + + /// show identical lines + #[arg(short = 'i', long)] + pub show_identical: bool, + /// a data source, probably a file. /// /// If left empty or set as "-", the program will read from stdin. @@ -87,12 +99,12 @@ pub struct Cli { //// PRIVATE FUNCTIONS ///////////////////////////////////////////////////////////////////////////// fn main() { let cli = cli_parse(); - let source: Box; + let mut source: Box; if cli.data_source.is_some() && cli.data_source.clone().is_some_and(|val| val != "-") { let data_source = cli.data_source.unwrap(); debug!("Trying to open '{}'", data_source); source = match File::open(&data_source) { - Ok(file) => Box::new(BufReader::new(file)), + Ok(file) => Box::new(file), Err(err) => { error!("Could not open file '{}': {err}", data_source); std::process::exit(1); @@ -105,9 +117,18 @@ fn main() { warn!("Refusing to dump from interactive terminal"); std::process::exit(2) } - source = Box::new(BufReader::new(stdin)); + source = Box::new(stdin); } - match dump(BufReader::new(source), cli.chars) { + + match dump( + &mut *source, + DumpConfig { + chars: cli.chars, + skip: cli.skip, + show_identical: cli.show_identical, + len: cli.len, + }, + ) { Ok(_) => (), Err(err) => { error!("Could not dump data of file: {err}"); @@ -128,7 +149,7 @@ fn cli_parse() -> Cli { unreachable!(); } }; - if cli.log_meta { + if cli.meta { Logger::init(None, Some(ll)).expect("could not initialize Logger"); } else { // less verbose version diff --git a/members/libpt-hedu/Cargo.toml b/members/libpt-hedu/Cargo.toml index 1eb872e..4bfcad2 100644 --- a/members/libpt-hedu/Cargo.toml +++ b/members/libpt-hedu/Cargo.toml @@ -18,3 +18,4 @@ categories.workspace = true anyhow = { workspace = true } thiserror = { workspace = true } libpt-log = {path = "../libpt-log" } +libpt-bintols = {path = "../libpt-bintols" } diff --git a/members/libpt-hedu/src/lib.rs b/members/libpt-hedu/src/lib.rs index 6c46ae4..76cbe67 100644 --- a/members/libpt-hedu/src/lib.rs +++ b/members/libpt-hedu/src/lib.rs @@ -6,40 +6,69 @@ //! This crate is currently empty. use anyhow::{bail, Result}; -use libpt_log::{error, trace}; -use std::io::{prelude::*, BufReader}; +use libpt_log::{error, trace, warn, info}; +use libpt_bintols::display::{bytes_to_bin, humanbytes}; +use std::io::{prelude::*, BufReader, Read, SeekFrom}; const BYTES_PER_LINE: usize = 16; const LINE_SEP_HORIZ: char = '─'; const LINE_SEP_VERT: char = '│'; -pub fn dump(mut data: BufReader, chars: bool) -> Result<()> -where - T: Read, -{ - let mut buf: [u8; BYTES_PER_LINE] = [0; BYTES_PER_LINE]; +pub struct DumpConfig { + pub chars: bool, + pub skip: usize, + pub show_identical: bool, + pub len: usize, +} + +pub trait DataSource: Read { + fn skip(&mut self, length: usize) -> std::io::Result<()>; +} +impl DataSource for std::io::Stdin { + fn skip(&mut self, _length: usize) -> std::io::Result<()> { + warn!("can't skip bytes for the stdin!"); + Ok(()) + } +} +impl DataSource for std::fs::File { + fn skip(&mut self, length: usize) -> std::io::Result<()> { + self.seek(SeekFrom::Current(length as i64))?; + // returns the new position from the start, we don't need it here. + Ok(()) + } +} + +pub fn dump(data: &mut dyn DataSource, config: DumpConfig) -> Result<()> { + // skip a given number of bytes + if config.skip > 0 { + + data.skip(config.skip)?; + info!("Skipped {}", humanbytes(config.skip)); + } + let mut buf: [[u8; BYTES_PER_LINE]; 2] = [[0; BYTES_PER_LINE]; 2]; + let mut alt_buf = 0usize; let mut line_counter: usize = 0; let mut len: usize; // print the head print!("LINE IDX {LINE_SEP_VERT} DATA AS HEX"); - if chars { + if config.chars { print!("{:width$} {LINE_SEP_VERT} FOO", "", width = 37); } println!(); - if chars { - println!("{}", format!("{LINE_SEP_HORIZ}").repeat(78)); + if config.chars { + println!("{}", format!("{LINE_SEP_HORIZ}").repeat(80)); } else { println!("{}", format!("{LINE_SEP_HORIZ}").repeat(59)); } // data dump loop - len = rd_data(&mut data, &mut buf).unwrap(); + len = rd_data(data, &mut buf, &mut alt_buf).unwrap(); while len > 0 { print!("{:08X} {LINE_SEP_VERT} ", line_counter); for i in 0..len { if i as usize % BYTES_PER_LINE == BYTES_PER_LINE / 2 { print!(" "); } - print!("{:02X} ", buf[i]); + print!("{:02X} ", buf[alt_buf][i]); } if len == BYTES_PER_LINE / 2 { print!(" ") @@ -50,15 +79,31 @@ where } print!(" "); } - if chars { - print!("{LINE_SEP_VERT} "); + if config.chars { + print!("{LINE_SEP_VERT} |"); for i in 0..len { - print!("{}", mask_chars(buf[i] as char)); + print!("{}", mask_chars(buf[alt_buf][i] as char)); } + print!("|"); } line_counter += 1; println!(); - len = rd_data(&mut data, &mut buf).unwrap(); + len = rd_data(data, &mut buf, &mut alt_buf).unwrap(); + if buf[0] == buf[1] && len == BYTES_PER_LINE && !config.show_identical { + trace!(buf = format!("{:?}", buf), "found a duplicating line"); + let start_line = line_counter; + while buf[0] == buf[1] && len == BYTES_PER_LINE { + len = rd_data(data, &mut buf, &mut alt_buf).unwrap(); + line_counter += 1; + } + println!( + "^^^^^^^^ {LINE_SEP_VERT} (repeats {} lines)", + line_counter - start_line + ); + } + // switch to the second half of the buf, the original half is stored the old buffer + // We detect duplicate lines with this + alt_buf ^= 1; // toggle the alt buf } Ok(()) } @@ -77,12 +122,15 @@ fn mask_chars(c: char) -> char { } } -fn rd_data(data: &mut BufReader, mut buf: &mut [u8]) -> Result -where - T: Read, -{ - match data.read(&mut buf) { - Ok(len) => Ok(len), +fn rd_data( + data: &mut dyn DataSource, + buf: &mut [[u8; BYTES_PER_LINE]; 2], + alt_buf: &mut usize, +) -> Result { + match data.read(&mut buf[*alt_buf]) { + Ok(len) => { + return Ok(len); + } Err(err) => { error!("error while reading data: {err}"); bail!(err)