more configs
Cargo Check, Format, Fix and Test / cargo CI (push) Successful in 2m41s Details

can't detect dups like this

dedup lines
This commit is contained in:
Christoph J. Scherr 2024-01-16 16:45:48 +01:00
parent b2435f3963
commit 441669a2c3
Signed by: cscherrNT
GPG Key ID: 8E2B45BC51A27EA7
6 changed files with 102 additions and 32 deletions

View File

@ -13,7 +13,7 @@ members = [
default-members = [".", "members/libpt-bin", "members/libpt-core"]
[workspace.package]
publish = true
version = "0.2.0"
version = "0.2.3"
edition = "2021"
authors = ["Christoph J. Scherr <software@cscherr.de>"]
license = "MIT"

Binary file not shown.

BIN
data/256B-zero.img Normal file

Binary file not shown.

View File

@ -20,7 +20,7 @@ use clap_verbosity_flag::{InfoLevel, Verbosity};
use std::{
fs::File,
io::{BufRead, BufReader, IsTerminal},
io::{Seek, IsTerminal},
};
//// TYPES /////////////////////////////////////////////////////////////////////////////////////////
@ -67,13 +67,25 @@ pub struct Cli {
pub verbose: Verbosity<InfoLevel>,
/// show additional logging meta data
#[arg(short, long, global = true)]
pub log_meta: bool,
#[arg(long)]
pub meta: bool,
/// show character representation
#[arg(short, long, global = true)]
#[arg(short, long)]
pub chars: bool,
/// skip first N bytes
#[arg(short, long, default_value_t = 0)]
pub skip: usize,
/// only interpret N bytes (end after N)
#[arg(short, long, default_value_t = 0)]
pub len: usize,
/// show identical lines
#[arg(short = 'i', long)]
pub show_identical: bool,
/// a data source, probably a file.
///
/// If left empty or set as "-", the program will read from stdin.
@ -87,12 +99,12 @@ pub struct Cli {
//// PRIVATE FUNCTIONS /////////////////////////////////////////////////////////////////////////////
fn main() {
let cli = cli_parse();
let source: Box<dyn BufRead>;
let mut source: Box<dyn DataSource>;
if cli.data_source.is_some() && cli.data_source.clone().is_some_and(|val| val != "-") {
let data_source = cli.data_source.unwrap();
debug!("Trying to open '{}'", data_source);
source = match File::open(&data_source) {
Ok(file) => Box::new(BufReader::new(file)),
Ok(file) => Box::new(file),
Err(err) => {
error!("Could not open file '{}': {err}", data_source);
std::process::exit(1);
@ -105,9 +117,18 @@ fn main() {
warn!("Refusing to dump from interactive terminal");
std::process::exit(2)
}
source = Box::new(BufReader::new(stdin));
source = Box::new(stdin);
}
match dump(BufReader::new(source), cli.chars) {
match dump(
&mut *source,
DumpConfig {
chars: cli.chars,
skip: cli.skip,
show_identical: cli.show_identical,
len: cli.len,
},
) {
Ok(_) => (),
Err(err) => {
error!("Could not dump data of file: {err}");
@ -128,7 +149,7 @@ fn cli_parse() -> Cli {
unreachable!();
}
};
if cli.log_meta {
if cli.meta {
Logger::init(None, Some(ll)).expect("could not initialize Logger");
} else {
// less verbose version

View File

@ -18,3 +18,4 @@ categories.workspace = true
anyhow = { workspace = true }
thiserror = { workspace = true }
libpt-log = {path = "../libpt-log" }
libpt-bintols = {path = "../libpt-bintols" }

View File

@ -6,40 +6,69 @@
//! This crate is currently empty.
use anyhow::{bail, Result};
use libpt_log::{error, trace};
use std::io::{prelude::*, BufReader};
use libpt_log::{error, trace, warn, info};
use libpt_bintols::display::{bytes_to_bin, humanbytes};
use std::io::{prelude::*, BufReader, Read, SeekFrom};
const BYTES_PER_LINE: usize = 16;
const LINE_SEP_HORIZ: char = '─';
const LINE_SEP_VERT: char = '│';
pub fn dump<T>(mut data: BufReader<T>, chars: bool) -> Result<()>
where
T: Read,
{
let mut buf: [u8; BYTES_PER_LINE] = [0; BYTES_PER_LINE];
pub struct DumpConfig {
pub chars: bool,
pub skip: usize,
pub show_identical: bool,
pub len: usize,
}
pub trait DataSource: Read {
fn skip(&mut self, length: usize) -> std::io::Result<()>;
}
impl DataSource for std::io::Stdin {
fn skip(&mut self, _length: usize) -> std::io::Result<()> {
warn!("can't skip bytes for the stdin!");
Ok(())
}
}
impl DataSource for std::fs::File {
fn skip(&mut self, length: usize) -> std::io::Result<()> {
self.seek(SeekFrom::Current(length as i64))?;
// returns the new position from the start, we don't need it here.
Ok(())
}
}
pub fn dump(data: &mut dyn DataSource, config: DumpConfig) -> Result<()> {
// skip a given number of bytes
if config.skip > 0 {
data.skip(config.skip)?;
info!("Skipped {}", humanbytes(config.skip));
}
let mut buf: [[u8; BYTES_PER_LINE]; 2] = [[0; BYTES_PER_LINE]; 2];
let mut alt_buf = 0usize;
let mut line_counter: usize = 0;
let mut len: usize;
// print the head
print!("LINE IDX {LINE_SEP_VERT} DATA AS HEX");
if chars {
if config.chars {
print!("{:width$} {LINE_SEP_VERT} FOO", "", width = 37);
}
println!();
if chars {
println!("{}", format!("{LINE_SEP_HORIZ}").repeat(78));
if config.chars {
println!("{}", format!("{LINE_SEP_HORIZ}").repeat(80));
} else {
println!("{}", format!("{LINE_SEP_HORIZ}").repeat(59));
}
// data dump loop
len = rd_data(&mut data, &mut buf).unwrap();
len = rd_data(data, &mut buf, &mut alt_buf).unwrap();
while len > 0 {
print!("{:08X} {LINE_SEP_VERT} ", line_counter);
for i in 0..len {
if i as usize % BYTES_PER_LINE == BYTES_PER_LINE / 2 {
print!(" ");
}
print!("{:02X} ", buf[i]);
print!("{:02X} ", buf[alt_buf][i]);
}
if len == BYTES_PER_LINE / 2 {
print!(" ")
@ -50,15 +79,31 @@ where
}
print!(" ");
}
if chars {
print!("{LINE_SEP_VERT} ");
if config.chars {
print!("{LINE_SEP_VERT} |");
for i in 0..len {
print!("{}", mask_chars(buf[i] as char));
print!("{}", mask_chars(buf[alt_buf][i] as char));
}
print!("|");
}
line_counter += 1;
println!();
len = rd_data(&mut data, &mut buf).unwrap();
len = rd_data(data, &mut buf, &mut alt_buf).unwrap();
if buf[0] == buf[1] && len == BYTES_PER_LINE && !config.show_identical {
trace!(buf = format!("{:?}", buf), "found a duplicating line");
let start_line = line_counter;
while buf[0] == buf[1] && len == BYTES_PER_LINE {
len = rd_data(data, &mut buf, &mut alt_buf).unwrap();
line_counter += 1;
}
println!(
"^^^^^^^^ {LINE_SEP_VERT} (repeats {} lines)",
line_counter - start_line
);
}
// switch to the second half of the buf, the original half is stored the old buffer
// We detect duplicate lines with this
alt_buf ^= 1; // toggle the alt buf
}
Ok(())
}
@ -77,12 +122,15 @@ fn mask_chars(c: char) -> char {
}
}
fn rd_data<T>(data: &mut BufReader<T>, mut buf: &mut [u8]) -> Result<usize>
where
T: Read,
{
match data.read(&mut buf) {
Ok(len) => Ok(len),
fn rd_data(
data: &mut dyn DataSource,
buf: &mut [[u8; BYTES_PER_LINE]; 2],
alt_buf: &mut usize,
) -> Result<usize> {
match data.read(&mut buf[*alt_buf]) {
Ok(len) => {
return Ok(len);
}
Err(err) => {
error!("error while reading data: {err}");
bail!(err)