huffman progress, scripting

This commit is contained in:
Christoph J. Scherr 2022-11-25 00:14:31 +01:00
parent baea9ae9be
commit 96d374eeb7
5 changed files with 182 additions and 184 deletions

View File

@ -3,7 +3,9 @@ returnCode=0
echo -e "compiling all files in working directory $(pwd)\n" echo -e "compiling all files in working directory $(pwd)\n"
for file in $(/bin/ls *.c); for file in $(/bin/ls *.c);
do do
./compile.sh $file; echo "compiling $file ..."
noext=$(echo "$file" | cut -f 1 -d '.')
gcc $file -o bin/$noext -lm
if [ "$?" -ne 0 ] if [ "$?" -ne 0 ]
then then
echo -e "\nERROR: could not compile $file !\n"; echo -e "\nERROR: could not compile $file !\n";

View File

@ -1,4 +1,6 @@
#!/bin/bash #!/bin/bash
noext=$(echo "$1" | cut -f 1 -d '.') noext=$(echo "$1" | cut -f 1 -d '.')
./compile.sh $1 echo "compiling $1 ..."
noext=$(echo "$1" | cut -f 1 -d '.')
gcc $1 -o bin/$noext -lm
./bin/$noext $2 $3 $4 $5 $6 $7 $8 $9 ./bin/$noext $2 $3 $4 $5 $6 $7 $8 $9

View File

@ -1,8 +1,4 @@
#!/bin/bash #!/bin/bash
echo "compiling $1 ..." echo "compiling $1 ..."
noext=$(echo "$1" | cut -f 1 -d '.') noext=$(echo "$1" | cut -f 1 -d '.')
# to treat warnings as errors, use the following line
#gcc $1 -o bin/$noext -lm -Werror=format
#
gcc $1 -o bin/$noext -lm gcc $1 -o bin/$noext -lm

Binary file not shown.

View File

@ -12,19 +12,19 @@ off_t fsize(const char *filename) {
struct stat st; struct stat st;
if (stat(filename, &st) == 0) if (stat(filename, &st) == 0)
return st.st_size; return st.st_size;
fprintf(stderr, "Cannot determine size of %s: %s\n", filename, fprintf(stderr, "Cannot determine size of %s: %s\n", filename,
strerror(errno)); strerror(errno));
return -1; return -1;
} }
void helper() { void helper() {
printf("huffman compression algorithm implementation for educational " printf("huffman compression algorithm implementation for educational "
"purposes.\n\nSyntax:\nhuffman -f fileToCompress\t\tcompress the " "purposes.\n\nSyntax:\nhuffman -f fileToCompress\t\tcompress the "
"given file\nhuffman -xf fileToDecompress\t\tdecompress the given " "given file\nhuffman -xf fileToDecompress\t\tdecompress the given "
"file\nhuffman -h\t\t\t\tshow this help\nhuffman -v\t\t\t\tverbose\n"); "file\nhuffman -h\t\t\t\tshow this help\nhuffman -v\t\t\t\tverbose\n");
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
@ -39,207 +39,205 @@ int main(int argc, char *argv[]) {
FILE *fptrW = NULL; // file pointer for writing FILE *fptrW = NULL; // file pointer for writing
while ((opt = getopt(argc, argv, "dvxhf:")) != -1) { while ((opt = getopt(argc, argv, "dvxhf:")) != -1) {
if (debug) if (debug)
printf("optarg is: %s\n", optarg); printf("optarg is: %s\n", optarg);
switch (opt) { switch (opt) {
case 'v': case 'v':
verbose = true; verbose = true;
break; break;
case 'd': case 'd':
debug = true; debug = true;
break; break;
case 'f': case 'f':
filestring = optarg; filestring = optarg;
break; break;
case 'h': case 'h':
helper(); helper();
exit(0); exit(0);
break; break;
case 'x': case 'x':
extract_mode = true; extract_mode = true;
break; break;
default: default:
fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]); fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
} }
// Now optind (declared extern int by <unistd.h>) is the index of the first // Now optind (declared extern int by <unistd.h>) is the index of the first
// non-option argument. If it is >= argc, there were no non-option arguments. // non-option argument. If it is >= argc, there were no non-option arguments.
if (verbose) if (verbose)
printf("selected file: %s\n", filestring); printf("selected file: %s\n", filestring);
if (filestring) { if (filestring) {
if(debug) if(debug)
printf("[DEBUG]processing given file argument.\n"); printf("[DEBUG]processing given file argument.\n");
// open the given file in binary mode, I want this to work with any files, // open the given file in binary mode, I want this to work with any files,
// not just textfiles. // not just textfiles.
fptrR = fopen(filestring, "rb"); fptrR = fopen(filestring, "rb");
if (fptrR == NULL) { if (fptrR == NULL) {
fprintf(stderr, "The given file does not exist or is unavailable.\n"); fprintf(stderr, "The given file does not exist or is unavailable.\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
// causes bugs. // causes bugs.
fseek(fptrR, 0L, SEEK_END); fseek(fptrR, 0L, SEEK_END);
filelen = ftell(fptrR); filelen = ftell(fptrR);
fseek(fptrR, 0L, SEEK_SET); fseek(fptrR, 0L, SEEK_SET);
} }
else { else {
// empty filestring or filestring is NULL // empty filestring or filestring is NULL
fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]); fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
// TODO check file size and spit a "what the heck im not a 10x dev, do a // TODO check file size and spit a "what the heck im not a 10x dev, do a
if(verbose) if(verbose)
printf("filesize: %ldB\n", filelen); printf("filesize: %ldB\n", filelen);
if (extract_mode) { if (extract_mode) {
printf("extracting is not yet implemented.\n"); printf("extracting is not yet implemented.\n");
// decompress the file // decompress the file
} }
else { else {
// compress the file // compress the file
if (verbose) if (verbose)
printf("compressing file...\n"); printf("compressing file...\n");
// frequency analysis // frequency analysis
uint8_t buf[512]; uint8_t buf[512];
// dump start of file if debugging // dump start of file if debugging
// FIXME add conditions if the part to print is smaller than 512B // FIXME add conditions if the part to print is smaller than 512B
if(debug){ if(debug){
printf("[DEBUG]First 512 bytes are:\n"); printf("[DEBUG]First 512 bytes are:\n");
fread(buf, 1, 512, fptrR); fread(buf, 1, 512, fptrR);
for(int i=0;i<512;i++){ for(int i=0;i<512;i++){
if(i%16==0) if(i%16==0)
printf("%08x\t", i); printf("%08x\t", i);
printf("%02x ", buf[i]); printf("%02x ", buf[i]);
if(i%16==7) if(i%16==7)
printf(" "); printf(" ");
if(i%16==15){ if(i%16==15){
printf("\n"); printf("\n");
} }
} }
} }
uint64_t occurences[256]; uint64_t occurences[256];
for(int i=0;i<256;i++){ for(int i=0;i<256;i++){
occurences[i]=0; occurences[i]=0;
} }
// FIXME doesnt loop through full file! only 50% for larger files // FIXME doesnt loop through full file! only 50% for larger files
// Backup occurence counting algorithm // Backup occurence counting algorithm
/* /*
while(!feof(fptrR)){ while(!feof(fptrR)){
fseek(fptrR, 512, SEEK_CUR); fseek(fptrR, 512, SEEK_CUR);
if(fread(buf, 1, 512, fptrR)){ if(fread(buf, 1, 512, fptrR)){
for(int i=0;i<512;i++){ for(int i=0;i<512;i++){
occurences[buf[i]]++; occurences[buf[i]]++;
} }
} }
else{ else{
fprintf(stderr, "Error when processing file.\n"); fprintf(stderr, "Error when processing file.\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
// advance filepointer 512 bytes foreward. If not possible, set endOfFile flag. // advance filepointer 512 bytes foreward. If not possible, set endOfFile flag.
// FIXME // FIXME
offset += 512; offset += 512;
} }
*/ */
// backup // backup
/* while(1){
while(1){ fseek(fptrR, 512, SEEK_CUR); // this line seems the be making the most problems
fseek(fptrR, 512, SEEK_CUR); // this line seems the be making the most problems
// On success, fread() and fwri)te() return the number of items read or written. // On success, fread() and fwri)te() return the number of items read or written.
// This number equals the number of bytes transferred only when size is // This number equals the number of bytes transferred only when size is
// 1. If an error occurs, or the end of the file is reached, the return value is a short item count (or zero). // 1. If an error occurs, or the end of the file is reached, the return value is a short item count (or zero).
// fread() does not distinguish between end-of-file and error, and callers must use feof(3) and ferror(3) to determine which occurred. // fread() does not distinguish between end-of-file and error, and callers must use feof(3) and ferror(3) to determine which occurred.
// FIXME )This is a buggy mess // FIXME )This is a buggy mess
if(512 == fread(buf, 1, 512, fptrR)){ if(512 == fread(buf, 1, 512, fptrR)){
for(int i=0;i<512;i++){ for(int i=0;i<512;i++){
occurences[buf[i]]++;) occurences[buf[i]]++;
} }
}) }
else if(0 == fread(buf, 1, 512, fptrR)){ else if(0 == fread(buf, 1, 512, fptrR)){
if(debug) if(debug)
printf("[DEBUG]fread returned 0! ftell for current position is %lu\n", ftell(fptrR)); printf("[DEBUG]fread returned 0! ftell for current position is %lu\n", ftell(fptrR));
break; break;
} }
else{ else{
fprintf(stderr, "Error when processing file: %lu, At offset %lu\n",fread(buf, 1, 512, fptrR), ftell(fptrR)); fprintf(stderr, "Error when processing file: %lu, At offset %lu\n",fread(buf, 1, 512, fptrR), ftell(fptrR));
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if(ftell(fptrR) > filelen) { if(ftell(fptrR) > filelen) {
// ??? unknown error // ??? unknown error
fprintf(stderr, "tried reading further than the file is long somehow?\n"); fprintf(stderr, "tried reading further than the file is long somehow?\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
} }
*/ // ALMOST WORKS! ~200 bytes lost for a 10M file!!!
// ALMOST WORKS! ~200 bytes lost for a 10M file!!! uint8_t bufMini[1];
uint8_t bufMini[1]; // while(1){
while(1){ // if(fread(bufMini, 1, 1, fptrR)){
if(fread(bufMini, 1, 1, fptrR)){ // occurences[buf[0]]++;
occurences[buf[0]]++; // }
} // else{
else{ // if(ferror(fptrR)){
if(ferror(fptrR)){ // fprintf(stderr, "encountered error when reading file.\n");
fprintf(stderr, "encountered error when reading file.\n"); // exit(EXIT_FAILURE);
exit(EXIT_FAILURE); // }
} // }
} // fseek(fptrR, 1, SEEK_SET);
fseek(fptrR, 1, SEEK_SET); // if(ferror(fptrR)){
if(ferror(fptrR)){ // fprintf(stderr, "encountered error when reading file.\n");
fprintf(stderr, "encountered error when reading file.\n"); // exit(EXIT_FAILURE);
exit(EXIT_FAILURE); // }
} // else if(feof(fptrR)){
else if(feof(fptrR)){ // break;
continue; // }
} // }
} if(debug){
if(debug){ printf("Occurences (Hex):\n");
printf("Occurences (Hex):\n"); for(int i=0;i<256;i++){
for(int i=0;i<256;i++){ if(i%4==0)
if(i%4==0) printf("\n");
printf("\n"); printf("0x%02x: %016lx\t", i, occurences[i]);
printf("0x%02x: %016lx\t", i, occurences[i]); }
} printf("\n\nfile length(by pointer):\t\t%luB\n", filelen);
printf("\n\nfile length(by pointer):\t\t%luB\n", filelen); long long int addedUpOccurences = 0; // FIXME might not be enough storage for larger files!
long long int addedUpOccurences = 0; // FIXME might not be enough storage for larger files! for(int i=0;i<256;i++){
for(int i=0;i<256;i++){ addedUpOccurences += occurences[i];
addedUpOccurences += occurences[i]; }
} printf("file length(added up occurences):\t%lldB\n", addedUpOccurences);
printf("file length(added up occurences):\t%lldB\n", addedUpOccurences); }
}
if(verbose) if(verbose)
printf("\n\nDone calculating occurences of bytes.\n"); printf("\n\nDone calculating occurences of bytes.\n");
// TODO // TODO
// calculate the frequencies of the bytes. // calculate the frequencies of the bytes.
double frequencies[256]; double frequencies[256];
for(int i=0;i<256;i++){ for(int i=0;i<256;i++){
frequencies[i]=((double)occurences[i]/(double)filelen)*100; // calculate frequencies of bytes in percent (example: 05.23 (%)) frequencies[i]=((double)occurences[i]/(double)filelen)*100; // calculate frequencies of bytes in percent (example: 05.23 (%))
} }
if(debug){ if(debug){
printf("Frequencies:\n"); printf("Frequencies:\n");
for(int i=0;i<256;i++){ for(int i=0;i<256;i++){
if(i%8==0) if(i%8==0)
printf("\n"); printf("\n");
printf("0x%02x: %05.02f%%\t", i, frequencies[i]); printf("0x%02x: %05.02f%%\t", i, frequencies[i]);
} }
double addedUpFrequencies = 0; double addedUpFrequencies = 0;
for(int i=0;i<256;i++){ for(int i=0;i<256;i++){
addedUpFrequencies += frequencies[i]; addedUpFrequencies += frequencies[i];
} }
printf("\n\nadded up frequencies: %05.02f%%\n",addedUpFrequencies); printf("\n\nadded up frequencies: %05.02f%%\n",addedUpFrequencies);
} }
} }
fclose(fptrR); fclose(fptrR);
printf("\n"); printf("\n");