huffman progress, scripting
This commit is contained in:
parent
baea9ae9be
commit
96d374eeb7
|
@ -3,7 +3,9 @@ returnCode=0
|
||||||
echo -e "compiling all files in working directory $(pwd)\n"
|
echo -e "compiling all files in working directory $(pwd)\n"
|
||||||
for file in $(/bin/ls *.c);
|
for file in $(/bin/ls *.c);
|
||||||
do
|
do
|
||||||
./compile.sh $file;
|
echo "compiling $file ..."
|
||||||
|
noext=$(echo "$file" | cut -f 1 -d '.')
|
||||||
|
gcc $file -o bin/$noext -lm
|
||||||
if [ "$?" -ne 0 ]
|
if [ "$?" -ne 0 ]
|
||||||
then
|
then
|
||||||
echo -e "\nERROR: could not compile $file !\n";
|
echo -e "\nERROR: could not compile $file !\n";
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
noext=$(echo "$1" | cut -f 1 -d '.')
|
noext=$(echo "$1" | cut -f 1 -d '.')
|
||||||
./compile.sh $1
|
echo "compiling $1 ..."
|
||||||
|
noext=$(echo "$1" | cut -f 1 -d '.')
|
||||||
|
gcc $1 -o bin/$noext -lm
|
||||||
./bin/$noext $2 $3 $4 $5 $6 $7 $8 $9
|
./bin/$noext $2 $3 $4 $5 $6 $7 $8 $9
|
||||||
|
|
|
@ -1,8 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
echo "compiling $1 ..."
|
echo "compiling $1 ..."
|
||||||
noext=$(echo "$1" | cut -f 1 -d '.')
|
noext=$(echo "$1" | cut -f 1 -d '.')
|
||||||
|
|
||||||
# to treat warnings as errors, use the following line
|
|
||||||
#gcc $1 -o bin/$noext -lm -Werror=format
|
|
||||||
#
|
|
||||||
gcc $1 -o bin/$noext -lm
|
gcc $1 -o bin/$noext -lm
|
||||||
|
|
Binary file not shown.
|
@ -12,19 +12,19 @@ off_t fsize(const char *filename) {
|
||||||
struct stat st;
|
struct stat st;
|
||||||
|
|
||||||
if (stat(filename, &st) == 0)
|
if (stat(filename, &st) == 0)
|
||||||
return st.st_size;
|
return st.st_size;
|
||||||
|
|
||||||
fprintf(stderr, "Cannot determine size of %s: %s\n", filename,
|
fprintf(stderr, "Cannot determine size of %s: %s\n", filename,
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void helper() {
|
void helper() {
|
||||||
printf("huffman compression algorithm implementation for educational "
|
printf("huffman compression algorithm implementation for educational "
|
||||||
"purposes.\n\nSyntax:\nhuffman -f fileToCompress\t\tcompress the "
|
"purposes.\n\nSyntax:\nhuffman -f fileToCompress\t\tcompress the "
|
||||||
"given file\nhuffman -xf fileToDecompress\t\tdecompress the given "
|
"given file\nhuffman -xf fileToDecompress\t\tdecompress the given "
|
||||||
"file\nhuffman -h\t\t\t\tshow this help\nhuffman -v\t\t\t\tverbose\n");
|
"file\nhuffman -h\t\t\t\tshow this help\nhuffman -v\t\t\t\tverbose\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
@ -39,207 +39,205 @@ int main(int argc, char *argv[]) {
|
||||||
FILE *fptrW = NULL; // file pointer for writing
|
FILE *fptrW = NULL; // file pointer for writing
|
||||||
|
|
||||||
while ((opt = getopt(argc, argv, "dvxhf:")) != -1) {
|
while ((opt = getopt(argc, argv, "dvxhf:")) != -1) {
|
||||||
if (debug)
|
if (debug)
|
||||||
printf("optarg is: %s\n", optarg);
|
printf("optarg is: %s\n", optarg);
|
||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'v':
|
case 'v':
|
||||||
verbose = true;
|
verbose = true;
|
||||||
break;
|
break;
|
||||||
case 'd':
|
case 'd':
|
||||||
debug = true;
|
debug = true;
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
filestring = optarg;
|
filestring = optarg;
|
||||||
break;
|
break;
|
||||||
case 'h':
|
case 'h':
|
||||||
helper();
|
helper();
|
||||||
exit(0);
|
exit(0);
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
extract_mode = true;
|
extract_mode = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]);
|
fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now optind (declared extern int by <unistd.h>) is the index of the first
|
// Now optind (declared extern int by <unistd.h>) is the index of the first
|
||||||
// non-option argument. If it is >= argc, there were no non-option arguments.
|
// non-option argument. If it is >= argc, there were no non-option arguments.
|
||||||
|
|
||||||
if (verbose)
|
if (verbose)
|
||||||
printf("selected file: %s\n", filestring);
|
printf("selected file: %s\n", filestring);
|
||||||
|
|
||||||
if (filestring) {
|
if (filestring) {
|
||||||
if(debug)
|
if(debug)
|
||||||
printf("[DEBUG]processing given file argument.\n");
|
printf("[DEBUG]processing given file argument.\n");
|
||||||
// open the given file in binary mode, I want this to work with any files,
|
// open the given file in binary mode, I want this to work with any files,
|
||||||
// not just textfiles.
|
// not just textfiles.
|
||||||
fptrR = fopen(filestring, "rb");
|
fptrR = fopen(filestring, "rb");
|
||||||
if (fptrR == NULL) {
|
if (fptrR == NULL) {
|
||||||
fprintf(stderr, "The given file does not exist or is unavailable.\n");
|
fprintf(stderr, "The given file does not exist or is unavailable.\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
// causes bugs.
|
// causes bugs.
|
||||||
fseek(fptrR, 0L, SEEK_END);
|
fseek(fptrR, 0L, SEEK_END);
|
||||||
filelen = ftell(fptrR);
|
filelen = ftell(fptrR);
|
||||||
fseek(fptrR, 0L, SEEK_SET);
|
fseek(fptrR, 0L, SEEK_SET);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// empty filestring or filestring is NULL
|
// empty filestring or filestring is NULL
|
||||||
fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]);
|
fprintf(stderr, "Usage: %s [-dvhx -f] [file]\n", argv[0]);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO check file size and spit a "what the heck im not a 10x dev, do a
|
// TODO check file size and spit a "what the heck im not a 10x dev, do a
|
||||||
if(verbose)
|
if(verbose)
|
||||||
printf("filesize: %ldB\n", filelen);
|
printf("filesize: %ldB\n", filelen);
|
||||||
|
|
||||||
if (extract_mode) {
|
if (extract_mode) {
|
||||||
printf("extracting is not yet implemented.\n");
|
printf("extracting is not yet implemented.\n");
|
||||||
// decompress the file
|
// decompress the file
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
// compress the file
|
// compress the file
|
||||||
if (verbose)
|
if (verbose)
|
||||||
printf("compressing file...\n");
|
printf("compressing file...\n");
|
||||||
|
|
||||||
// frequency analysis
|
// frequency analysis
|
||||||
|
|
||||||
uint8_t buf[512];
|
uint8_t buf[512];
|
||||||
|
|
||||||
// dump start of file if debugging
|
// dump start of file if debugging
|
||||||
// FIXME add conditions if the part to print is smaller than 512B
|
// FIXME add conditions if the part to print is smaller than 512B
|
||||||
if(debug){
|
if(debug){
|
||||||
printf("[DEBUG]First 512 bytes are:\n");
|
printf("[DEBUG]First 512 bytes are:\n");
|
||||||
fread(buf, 1, 512, fptrR);
|
fread(buf, 1, 512, fptrR);
|
||||||
for(int i=0;i<512;i++){
|
for(int i=0;i<512;i++){
|
||||||
if(i%16==0)
|
if(i%16==0)
|
||||||
printf("%08x\t", i);
|
printf("%08x\t", i);
|
||||||
printf("%02x ", buf[i]);
|
printf("%02x ", buf[i]);
|
||||||
if(i%16==7)
|
if(i%16==7)
|
||||||
printf(" ");
|
printf(" ");
|
||||||
if(i%16==15){
|
if(i%16==15){
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t occurences[256];
|
uint64_t occurences[256];
|
||||||
for(int i=0;i<256;i++){
|
for(int i=0;i<256;i++){
|
||||||
occurences[i]=0;
|
occurences[i]=0;
|
||||||
}
|
}
|
||||||
// FIXME doesnt loop through full file! only 50% for larger files
|
// FIXME doesnt loop through full file! only 50% for larger files
|
||||||
// Backup occurence counting algorithm
|
// Backup occurence counting algorithm
|
||||||
/*
|
/*
|
||||||
while(!feof(fptrR)){
|
while(!feof(fptrR)){
|
||||||
fseek(fptrR, 512, SEEK_CUR);
|
fseek(fptrR, 512, SEEK_CUR);
|
||||||
if(fread(buf, 1, 512, fptrR)){
|
if(fread(buf, 1, 512, fptrR)){
|
||||||
for(int i=0;i<512;i++){
|
for(int i=0;i<512;i++){
|
||||||
occurences[buf[i]]++;
|
occurences[buf[i]]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
fprintf(stderr, "Error when processing file.\n");
|
fprintf(stderr, "Error when processing file.\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// advance filepointer 512 bytes foreward. If not possible, set endOfFile flag.
|
// advance filepointer 512 bytes foreward. If not possible, set endOfFile flag.
|
||||||
// FIXME
|
// FIXME
|
||||||
offset += 512;
|
offset += 512;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
// backup
|
// backup
|
||||||
/*
|
while(1){
|
||||||
while(1){
|
fseek(fptrR, 512, SEEK_CUR); // this line seems the be making the most problems
|
||||||
fseek(fptrR, 512, SEEK_CUR); // this line seems the be making the most problems
|
|
||||||
|
|
||||||
// On success, fread() and fwri)te() return the number of items read or written.
|
// On success, fread() and fwri)te() return the number of items read or written.
|
||||||
// This number equals the number of bytes transferred only when size is
|
// This number equals the number of bytes transferred only when size is
|
||||||
// 1. If an error occurs, or the end of the file is reached, the return value is a short item count (or zero).
|
// 1. If an error occurs, or the end of the file is reached, the return value is a short item count (or zero).
|
||||||
// fread() does not distinguish between end-of-file and error, and callers must use feof(3) and ferror(3) to determine which occurred.
|
// fread() does not distinguish between end-of-file and error, and callers must use feof(3) and ferror(3) to determine which occurred.
|
||||||
|
|
||||||
// FIXME )This is a buggy mess
|
// FIXME )This is a buggy mess
|
||||||
if(512 == fread(buf, 1, 512, fptrR)){
|
if(512 == fread(buf, 1, 512, fptrR)){
|
||||||
for(int i=0;i<512;i++){
|
for(int i=0;i<512;i++){
|
||||||
occurences[buf[i]]++;)
|
occurences[buf[i]]++;
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
else if(0 == fread(buf, 1, 512, fptrR)){
|
else if(0 == fread(buf, 1, 512, fptrR)){
|
||||||
if(debug)
|
if(debug)
|
||||||
printf("[DEBUG]fread returned 0! ftell for current position is %lu\n", ftell(fptrR));
|
printf("[DEBUG]fread returned 0! ftell for current position is %lu\n", ftell(fptrR));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
fprintf(stderr, "Error when processing file: %lu, At offset %lu\n",fread(buf, 1, 512, fptrR), ftell(fptrR));
|
fprintf(stderr, "Error when processing file: %lu, At offset %lu\n",fread(buf, 1, 512, fptrR), ftell(fptrR));
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
if(ftell(fptrR) > filelen) {
|
if(ftell(fptrR) > filelen) {
|
||||||
// ??? unknown error
|
// ??? unknown error
|
||||||
fprintf(stderr, "tried reading further than the file is long somehow?\n");
|
fprintf(stderr, "tried reading further than the file is long somehow?\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
// ALMOST WORKS! ~200 bytes lost for a 10M file!!!
|
||||||
// ALMOST WORKS! ~200 bytes lost for a 10M file!!!
|
uint8_t bufMini[1];
|
||||||
uint8_t bufMini[1];
|
// while(1){
|
||||||
while(1){
|
// if(fread(bufMini, 1, 1, fptrR)){
|
||||||
if(fread(bufMini, 1, 1, fptrR)){
|
// occurences[buf[0]]++;
|
||||||
occurences[buf[0]]++;
|
// }
|
||||||
}
|
// else{
|
||||||
else{
|
// if(ferror(fptrR)){
|
||||||
if(ferror(fptrR)){
|
// fprintf(stderr, "encountered error when reading file.\n");
|
||||||
fprintf(stderr, "encountered error when reading file.\n");
|
// exit(EXIT_FAILURE);
|
||||||
exit(EXIT_FAILURE);
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// fseek(fptrR, 1, SEEK_SET);
|
||||||
fseek(fptrR, 1, SEEK_SET);
|
// if(ferror(fptrR)){
|
||||||
if(ferror(fptrR)){
|
// fprintf(stderr, "encountered error when reading file.\n");
|
||||||
fprintf(stderr, "encountered error when reading file.\n");
|
// exit(EXIT_FAILURE);
|
||||||
exit(EXIT_FAILURE);
|
// }
|
||||||
}
|
// else if(feof(fptrR)){
|
||||||
else if(feof(fptrR)){
|
// break;
|
||||||
continue;
|
// }
|
||||||
}
|
// }
|
||||||
}
|
if(debug){
|
||||||
if(debug){
|
printf("Occurences (Hex):\n");
|
||||||
printf("Occurences (Hex):\n");
|
for(int i=0;i<256;i++){
|
||||||
for(int i=0;i<256;i++){
|
if(i%4==0)
|
||||||
if(i%4==0)
|
printf("\n");
|
||||||
printf("\n");
|
printf("0x%02x: %016lx\t", i, occurences[i]);
|
||||||
printf("0x%02x: %016lx\t", i, occurences[i]);
|
}
|
||||||
}
|
printf("\n\nfile length(by pointer):\t\t%luB\n", filelen);
|
||||||
printf("\n\nfile length(by pointer):\t\t%luB\n", filelen);
|
long long int addedUpOccurences = 0; // FIXME might not be enough storage for larger files!
|
||||||
long long int addedUpOccurences = 0; // FIXME might not be enough storage for larger files!
|
for(int i=0;i<256;i++){
|
||||||
for(int i=0;i<256;i++){
|
addedUpOccurences += occurences[i];
|
||||||
addedUpOccurences += occurences[i];
|
}
|
||||||
}
|
printf("file length(added up occurences):\t%lldB\n", addedUpOccurences);
|
||||||
printf("file length(added up occurences):\t%lldB\n", addedUpOccurences);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if(verbose)
|
if(verbose)
|
||||||
printf("\n\nDone calculating occurences of bytes.\n");
|
printf("\n\nDone calculating occurences of bytes.\n");
|
||||||
|
|
||||||
// TODO
|
// TODO
|
||||||
// calculate the frequencies of the bytes.
|
// calculate the frequencies of the bytes.
|
||||||
double frequencies[256];
|
double frequencies[256];
|
||||||
for(int i=0;i<256;i++){
|
for(int i=0;i<256;i++){
|
||||||
frequencies[i]=((double)occurences[i]/(double)filelen)*100; // calculate frequencies of bytes in percent (example: 05.23 (%))
|
frequencies[i]=((double)occurences[i]/(double)filelen)*100; // calculate frequencies of bytes in percent (example: 05.23 (%))
|
||||||
}
|
}
|
||||||
if(debug){
|
if(debug){
|
||||||
printf("Frequencies:\n");
|
printf("Frequencies:\n");
|
||||||
for(int i=0;i<256;i++){
|
for(int i=0;i<256;i++){
|
||||||
if(i%8==0)
|
if(i%8==0)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("0x%02x: %05.02f%%\t", i, frequencies[i]);
|
printf("0x%02x: %05.02f%%\t", i, frequencies[i]);
|
||||||
}
|
}
|
||||||
double addedUpFrequencies = 0;
|
double addedUpFrequencies = 0;
|
||||||
for(int i=0;i<256;i++){
|
for(int i=0;i<256;i++){
|
||||||
addedUpFrequencies += frequencies[i];
|
addedUpFrequencies += frequencies[i];
|
||||||
}
|
}
|
||||||
printf("\n\nadded up frequencies: %05.02f%%\n",addedUpFrequencies);
|
printf("\n\nadded up frequencies: %05.02f%%\n",addedUpFrequencies);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fclose(fptrR);
|
fclose(fptrR);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
Loading…
Reference in New Issue