working on actual unhacky implementation

This commit is contained in:
Christoph J. Scherr 2022-11-26 14:31:18 +01:00
parent d24007c036
commit e427fe4c0b
1 changed files with 54 additions and 59 deletions

View File

@ -8,6 +8,8 @@
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#define BUFSIZE 512
// a single node, only having information for a single byte. // a single node, only having information for a single byte.
struct Node { struct Node {
uint8_t byte; uint8_t byte;
@ -22,6 +24,24 @@ struct Heap {
bool isRoot; bool isRoot;
}; };
// global vars for Heaps and Nodes.
struct Node* nodes;
struct Heap* heaps;
// initialize our nodes
void initNodes(){
nodes = malloc(sizeof(struct Node)*256);
heaps = malloc(sizeof(struct Heap)*256); // not sure if i might need more memory for heaps?
for(int i = 0; i < 256; i++){
nodes[i].byte = i;
nodes[i].occurences = 0;
nodes[i].frequencyPriority = 0;
heaps[i].isRoot = false;
}
}
// stolen from stackoverflow // stolen from stackoverflow
// https://stackoverflow.com/questions/8236/how-do-you-determine-the-size-of-a-file-in-c // https://stackoverflow.com/questions/8236/how-do-you-determine-the-size-of-a-file-in-c
off_t fsize(const char *filename) { off_t fsize(const char *filename) {
@ -96,8 +116,8 @@ int main(int argc, char *argv[]) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
filelen = fsize(filestring); filelen = fsize(filestring);
if(verbose) if(verbose)
printf("filesize: %ldB\n", filelen); printf("filesize: %ldB\n", filelen);
} }
else { else {
// empty filestring or filestring is NULL // empty filestring or filestring is NULL
@ -111,19 +131,20 @@ int main(int argc, char *argv[]) {
} }
else { else {
// compress the file // compress the file
if (verbose) if (verbose)
printf("compressing file...\n"); printf("compressing file...\n");
// frequency analysis // frequency analysis
uint8_t buf [BUFSIZE];
// dump start of file if debugging // dump start of file if debugging
// FIXME add conditions if the part to print is smaller than 512B // FIXME add conditions if the part to print is smaller than 512B
if(debug){ if(debug){
printf("[DEBUG]First 512 bytes are:\n"); printf("[DEBUG]First 512 bytes are:\n");
fread(buf, 1, 512, fptrR); fread(buf, 1, BUFSIZE, fptrR);
for(int i=0;i<512;i++){ for(int i=0;i<BUFSIZE;i++){
if(i%16==0) if(i%16==0)
printf("%08x\t", i); printf("%08x\t", i);
printf("%02x ", buf[i]); printf("%02x ", buf[i]);
@ -134,66 +155,40 @@ int main(int argc, char *argv[]) {
} }
} }
} }
initNodes();
uint64_t occurences[256] = { 0 }; int ret;
while(!feof(fptrR)){
// TODO calculate occurences ret = fread(buf, 1, BUFSIZE, fptrR);
// FIXME this loads the file into RAM completely. Loading a too big file would eat all memory of the system. if(ret == BUFSIZE){
// This is a dirty hack of an algorithm. // fread success, continue as normal
// uint8_t* buf = malloc(filelen); // calculate occurences.
for(int i = 0; i < filelen; i++) { for(int i = 0; i < BUFSIZE; i++){
fread(buf+i, 1, 1, fptrR); nodes[buf[i]].occurences++;
} }
// now go through all of the stored bytes in the buffer and count the occurences.
for(int i = 0; i < filelen; i++) {
occurences[*(buf+i)]++; // FIXME this might get the value of the bytes but +1, not sure about the logic!
}
// holy shit i think the dirty hack is working
// well, at least for smaller files.
// SEGVAULT for the 10G file, 1G works.
if(debug){
printf("Occurences (Hex):\n");
for(int i=0;i<256;i++){
if(i%4==0)
printf("\n");
printf("0x%02x: %016lx\t", i, occurences[i]);
} }
printf("\n\nfile length(by pointer):\t\t%luB\n", filelen); else if((ret < BUFSIZE) && (ret >= 0)){
long long int addedUpOccurences = 0; // FIXME might not be enough storage for larger files! if(!feof(fptrR)) {
for(int i=0;i<256;i++){ // no EOF, but didn't read full buffer. Assuming an error
addedUpOccurences += occurences[i]; printf("Error while reading file.");
exit(EXIT_FAILURE);
}
else{
// reached EOF, finished
break;
}
} }
printf("file length(added up occurences):\t%lldB\n", addedUpOccurences); else{
printf("Undefined behaviour while reading file.\n");
exit(EXIT_FAILURE);
}
fseek(fptrR, BUFSIZE, SEEK_CUR);
} }
if(verbose)
printf("\n\nDone calculating occurences of bytes.\n");
// TODO
// calculate the frequencies of the bytes.
double frequencies[256];
for(int i=0;i<256;i++){
frequencies[i]=((double)occurences[i]/(double)filelen)*100; // calculate frequencies of bytes in percent (example: 05.23 (%))
}
if(debug){
printf("Frequencies:\n");
for(int i=0;i<256;i++){
if(i%8==0)
printf("\n");
printf("0x%02x: %05.02f%%\t", i, frequencies[i]);
}
double addedUpFrequencies = 0;
for(int i=0;i<256;i++){
addedUpFrequencies += frequencies[i];
}
printf("\n\nadded up frequencies: %05.02f%%\n",addedUpFrequencies);
}
} }
fclose(fptrR); fclose(fptrR);
printf("\n"); printf("\n");
if(debug) // wait for input to end.
getchar();
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }