working on actual unhacky implementation
This commit is contained in:
parent
d24007c036
commit
e427fe4c0b
|
@ -8,6 +8,8 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#define BUFSIZE 512
|
||||||
|
|
||||||
// a single node, only having information for a single byte.
|
// a single node, only having information for a single byte.
|
||||||
struct Node {
|
struct Node {
|
||||||
uint8_t byte;
|
uint8_t byte;
|
||||||
|
@ -22,6 +24,24 @@ struct Heap {
|
||||||
bool isRoot;
|
bool isRoot;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// global vars for Heaps and Nodes.
|
||||||
|
struct Node* nodes;
|
||||||
|
struct Heap* heaps;
|
||||||
|
|
||||||
|
// initialize our nodes
|
||||||
|
void initNodes(){
|
||||||
|
nodes = malloc(sizeof(struct Node)*256);
|
||||||
|
heaps = malloc(sizeof(struct Heap)*256); // not sure if i might need more memory for heaps?
|
||||||
|
|
||||||
|
for(int i = 0; i < 256; i++){
|
||||||
|
nodes[i].byte = i;
|
||||||
|
nodes[i].occurences = 0;
|
||||||
|
nodes[i].frequencyPriority = 0;
|
||||||
|
heaps[i].isRoot = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// stolen from stackoverflow
|
// stolen from stackoverflow
|
||||||
// https://stackoverflow.com/questions/8236/how-do-you-determine-the-size-of-a-file-in-c
|
// https://stackoverflow.com/questions/8236/how-do-you-determine-the-size-of-a-file-in-c
|
||||||
off_t fsize(const char *filename) {
|
off_t fsize(const char *filename) {
|
||||||
|
@ -111,19 +131,20 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
|
|
||||||
// compress the file
|
// compress the file
|
||||||
if (verbose)
|
if (verbose)
|
||||||
printf("compressing file...\n");
|
printf("compressing file...\n");
|
||||||
|
|
||||||
// frequency analysis
|
// frequency analysis
|
||||||
|
uint8_t buf [BUFSIZE];
|
||||||
|
|
||||||
// dump start of file if debugging
|
// dump start of file if debugging
|
||||||
// FIXME add conditions if the part to print is smaller than 512B
|
// FIXME add conditions if the part to print is smaller than 512B
|
||||||
if(debug){
|
if(debug){
|
||||||
printf("[DEBUG]First 512 bytes are:\n");
|
printf("[DEBUG]First 512 bytes are:\n");
|
||||||
fread(buf, 1, 512, fptrR);
|
fread(buf, 1, BUFSIZE, fptrR);
|
||||||
for(int i=0;i<512;i++){
|
for(int i=0;i<BUFSIZE;i++){
|
||||||
if(i%16==0)
|
if(i%16==0)
|
||||||
printf("%08x\t", i);
|
printf("%08x\t", i);
|
||||||
printf("%02x ", buf[i]);
|
printf("%02x ", buf[i]);
|
||||||
|
@ -134,66 +155,40 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
initNodes();
|
||||||
uint64_t occurences[256] = { 0 };
|
int ret;
|
||||||
|
while(!feof(fptrR)){
|
||||||
// TODO calculate occurences
|
ret = fread(buf, 1, BUFSIZE, fptrR);
|
||||||
// FIXME this loads the file into RAM completely. Loading a too big file would eat all memory of the system.
|
if(ret == BUFSIZE){
|
||||||
// This is a dirty hack of an algorithm.
|
// fread success, continue as normal
|
||||||
// uint8_t* buf = malloc(filelen);
|
// calculate occurences.
|
||||||
for(int i = 0; i < filelen; i++) {
|
for(int i = 0; i < BUFSIZE; i++){
|
||||||
fread(buf+i, 1, 1, fptrR);
|
nodes[buf[i]].occurences++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if((ret < BUFSIZE) && (ret >= 0)){
|
||||||
|
if(!feof(fptrR)) {
|
||||||
|
// no EOF, but didn't read full buffer. Assuming an error
|
||||||
|
printf("Error while reading file.");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
// reached EOF, finished
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
printf("Undefined behaviour while reading file.\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
fseek(fptrR, BUFSIZE, SEEK_CUR);
|
||||||
}
|
}
|
||||||
|
|
||||||
// now go through all of the stored bytes in the buffer and count the occurences.
|
|
||||||
for(int i = 0; i < filelen; i++) {
|
|
||||||
occurences[*(buf+i)]++; // FIXME this might get the value of the bytes but +1, not sure about the logic!
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// holy shit i think the dirty hack is working
|
|
||||||
// well, at least for smaller files.
|
|
||||||
// SEGVAULT for the 10G file, 1G works.
|
|
||||||
|
|
||||||
|
|
||||||
if(debug){
|
|
||||||
printf("Occurences (Hex):\n");
|
|
||||||
for(int i=0;i<256;i++){
|
|
||||||
if(i%4==0)
|
|
||||||
printf("\n");
|
|
||||||
printf("0x%02x: %016lx\t", i, occurences[i]);
|
|
||||||
}
|
|
||||||
printf("\n\nfile length(by pointer):\t\t%luB\n", filelen);
|
|
||||||
long long int addedUpOccurences = 0; // FIXME might not be enough storage for larger files!
|
|
||||||
for(int i=0;i<256;i++){
|
|
||||||
addedUpOccurences += occurences[i];
|
|
||||||
}
|
|
||||||
printf("file length(added up occurences):\t%lldB\n", addedUpOccurences);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(verbose)
|
|
||||||
printf("\n\nDone calculating occurences of bytes.\n");
|
|
||||||
|
|
||||||
// TODO
|
|
||||||
// calculate the frequencies of the bytes.
|
|
||||||
double frequencies[256];
|
|
||||||
for(int i=0;i<256;i++){
|
|
||||||
frequencies[i]=((double)occurences[i]/(double)filelen)*100; // calculate frequencies of bytes in percent (example: 05.23 (%))
|
|
||||||
}
|
|
||||||
if(debug){
|
|
||||||
printf("Frequencies:\n");
|
|
||||||
for(int i=0;i<256;i++){
|
|
||||||
if(i%8==0)
|
|
||||||
printf("\n");
|
|
||||||
printf("0x%02x: %05.02f%%\t", i, frequencies[i]);
|
|
||||||
}
|
|
||||||
double addedUpFrequencies = 0;
|
|
||||||
for(int i=0;i<256;i++){
|
|
||||||
addedUpFrequencies += frequencies[i];
|
|
||||||
}
|
|
||||||
printf("\n\nadded up frequencies: %05.02f%%\n",addedUpFrequencies);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fclose(fptrR);
|
fclose(fptrR);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
if(debug) // wait for input to end.
|
||||||
|
getchar();
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue