_ENTROPY_ by Kas Thomas [LISTING ONE] /* * * * * * * * * * * * * * * * * ENTROPY.C * * * * * * * * * * * * * * * */ /* Calculates zero-order entropy of a file, a la Shannon. */ /* Turbo C version by Kas Thomas */ /* You may distribute this listing to fellow programmers. Please retain */ /* authorship notices, however. */ /* This program will give an approximate measure of how compressible a */ /* given file is using Huffman-type compression techniques. It calculates */ /* the best compression possible using order-0 finite context modelling. */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include #include #include #define LOG(x) 3.32 * log10(x) /* base-2 log macro */ #define ENTROPY(x) -(x * LOG(x)) /* classic definition of entropy */ FILE *in; /* input file pointer */ unsigned int table[256]; /* count data goes here */ void read_input(void); double analyze(void); void usage(void); /* ------------------- MAIN ------------------ */ main(int ac, char **av) { double result; /* return value of analyze() */ if (ac==1) usage(); /* explain program usage & exit */ in = fopen(av[1],"rb"); /* open the input file */ if (!in) printf("\nCouldn't open input file."); /* error message */ if (!in) exit(-1); /* exit program if file couldn't be opened */ printf("\n ** Reading file . . ."); /* status message */ read_input(); /* read file & tally character frequencies */ printf("\n ** Calculating . . .\n"); /* status message */ result = analyze(); /* analyze the frequency data */ /* finally, print the results to the screen */ printf("\n The file \"%s\" has a zero-order",av[1]); printf("\n entropy of %3.3f bits per byte.\n",result); printf("\n Approximate shrinkage potential"); printf("\n using Huffman techniques:"); printf(" %2.0f%%\n\n\n",100-(result * 100)/8); fclose(in); /* close file */ return (1); /* optional, but a good idea anyway */ } /* end function main() */ /* ----------------------- read_input() ----------------------- */ void read_input() { int ch; while (( ch = getc(in)) != EOF) /* until EOF reached . . . */ table[ch]++; /* read a byte at a time & tally char counts */ } /* end function read_input() */ /* ----------------------- analyze() -------------------------- */ double analyze() { double accum = 0.0; /* entropy will accumulate here */ double freq; /* frequency of occurrence of character */ long fsize = 0L; /* input file's size */ register int z; /* scratch variable */ fsize = ftell( in ); /* get file size */ for (z = 0; z < 256; z++) /* for every position in table */ if (table[z]) /* if data exists */ { freq = (double) table[z]/fsize; /* calculate frequency */ accum += (double) ENTROPY(freq); /* get entropy contribution */ } return accum; } /* end analyze() */ /* --------------------------------- usage() -------------------------- */ /* Explain program & exit. */ void usage() { printf("\n\n"); printf(" Entropy v1.00 by Kas Thomas. Public Domain.\n\n"); printf(" Syntax: ENTROPY {filename} [Enter]\n\n"); printf(" Entropy is a measure of information storage efficiency.\n"); printf(" This program calculates a file's entropy, hence its\n"); printf(" compressibility, using the entropy equation of Shannon.\n"); printf(" (See \"Information Theory: Symbols, Signals, & Noise,\"\n"); printf(" by John Pierce, Dover, 1981).\n\n"); exit(1); } /* end function usage() */