_BIT VECTORS IN C_ by James Blustein Listing One /* BITS_SZ -- BITS_SZ is the number of bits in a single bits' type. */ /* Definition of BITS_SZ */ #ifdef CHAR_BIT /* assumes typedef unsigned char bits */ #define BITS_SZ (CHAR_BIT) #else static elem_t bits_size(void); elem_t BITS_SZ = 0; /* until it is initialized by ba_init() */ static elem_t bits_size(void) { /* Adapted from the wordlength() function on page 54 (Exercise 2-8) of _The C Answer Book_ (2nd ed.) by Clovis L. Tondo and Scott E. Gimpel. Prentice-Hall, Inc., 1989. */ elem_t i; bits v = (bits)~0; for (i=1; (v = v >> 1) > 0; i++) ; /* EMPTY */ return (i); } #endif Listing Two #include #include #include "types.h" /* Listing 3 */ #include "bitarr.h" /* exported prototypes */ /* Listing 1 (definition of BITS_SZ) goes here */ /* Listing 4 (NELEM macro) goes here */ /* Listing 5 (CANONIZE macro) goes here */ typedef struct {elem_t size; bit *vector;} BitVector; static void first_is_biggest(BitVector bv[2], unsigned *, unsigned *); /* ------ Initialization and Creation Code ------ */ elem_t ba_init(void) { /* ba_init() PRE: Must be called before use of any other ba_ functions. Should only be called once. POST: Returns the number of values that can be stored in one variable of type bit. If does not define CHAR_BIT then the module global variable BITS_SZ has been set to the appropriate value. */ #ifndef BITS_SZ if (!BITS_SZ) { BITS_SZ = bits_size(); } #endif return (BITS_SZ); } /* ba_init() */ bit *ba_new(const elem_t nelems) { /* ba_new() PURPOSE: dynamically allocate space for an array of nelems bits and initalize the bits to all be zero. PRE: nelems is the number of Boolean values required in an array POST: either a pointer to an initialized (all zero) array of bit or space was not available and NULL was returned NOTE: calloc() guarantees that the space has been initialized to 0. Used by: ba_ul2b(), ba_intersection() and ba_union(). */ size_t howmany = NELEM(nelems,(BITS_SZ)); return ((bit *)calloc(howmany, sizeof(bit))); } /* ba_new() */ void ba_copy(bit dst[], const bit src[], const elem_t size) { /* ba_copy() PRE: dst has been initialized to hold size elements. src is the array of bit to be copied to dst. POST: dst is identical to the first size bits of src. src is unchanged. Used by: ba_union() */ elem_t nelem = NELEM(size,(BITS_SZ)); register elem_t i; for (i=0; i < nelem; i++) { dst[i] = src[i]; } } /* ba_copy() */ /* ------- Assigning and Retrieving Values ------ */ void ba_assign( bit arr[], elem_t elem, const bool value) { /* ba_assign() PURPOSE: set or clear the bit in position elem of the array arr PRE: arr[elem] is to be set (assigned to 1) if value is TRUE, otherwise it is to be cleared (assigned to 0). POST: PRE fulfilled. All other bits unchanged. SEE ALSO: ba_all_assign() Used by: ba_ul2b() */ if (value) { arr[elem / BITS_SZ] |= (1 << (elem % BITS_SZ)); } else { arr[elem / BITS_SZ] &= ~(1 << (elem % BITS_SZ)); } } /* ba_assign() */ bool ba_value(const bit arr[], const elem_t elem) { /* ba_value() PRE: arr must have at least elem elements POST: The value of the elemth element of arr has been returned (as though arr was just a 1-dimensional array of bit) Used by: ba_b2str() and ba_count() */ return(arr[elem / BITS_SZ] & (1 << (elem % BITS_SZ))); } /* ba_value() */ void ba_toggle( bit arr[], const elem_t elem) { /* ba_toggle() PRE: arr must have at least elem elements POST: The value of the elemth element of arr has been flipped, i.e. if it was 1 it is 0; if it was 0 it is 1. SEE ALSO: ba_complement() */ arr[elem / BITS_SZ] ^= (1 << (elem % BITS_SZ)); } /* ba_toggle() */ void ba_all_assign( bit arr[], const elem_t size, const bool value) { /* ba_all_assign() PRE: arr has been initialized to have *exactly* size elements. POST: All size elements of arr have been set to value. The array is in canonical form, i.e. trailing elements are all 0. NOTE: The array allocated by ba_new() has all elements 0 and is therefore in canonical form. SEE ALSO: ba_assign() Used by: ba_ul2b() */ elem_t nelem = NELEM(size,(BITS_SZ)); bit setval = (value) ?~0 :0; register elem_t i; for (i=0; i < nelem; i++) { arr[i] = setval; } /* force canonical form */ CANONIZE(arr,nelem,size); } /* ba_all_assign() */ /* ------- Conversion Routines ------- */ bit * ba_ul2b(unsigned long num, bit * arr, elem_t * size) { /* ba_ul2b() PRE: Either arr points to space allocated to hold enough bits to represent num (namely the ceiling of the base 2 logarithm of num). size points to the number of bit to use. OR arr is NULL and the caller is requesting that enough space be allocated to hold the representation before the translation is made. size points to space allocated to hold the count of the number of bit needed for the conversion (enough for MAXLONG). POST: A pointer to a right-aligned array of bits representing the unsigned value num has been returned and size points to the number of bits needed to hold the value. OR the request to allocate space for such an array could not be granted NOTES: - The first argument is unsigned. - It is bad to pass a size that is too small to hold the bit array representation of num [K&R II, p.100]. - Should the size be the maximum size (if size > 0) even if more bits are needed? The user can always use a filter composed of all 1s (see ba_all_assign()) intersected with result (see ba_intersection()). */ register elem_t i; if (NULL != arr) { ba_all_assign(arr, *size, 0); } else { *size = NELEM(sizeof(num),sizeof(bit)); *size *= BITS_SZ; if (NULL == (arr = ba_new(*size))) { return (arr); } } /* usual base conversion algorithm */ for (i=0; num; num >>= 1, i++) { ba_assign(arr, (*size - i - 1), (1 == (num & 01))); } return (arr); } /* ba_ul2b() */ char * ba_b2str(const bit arr[], const elem_t size, char * dest) { /* ba_b2str() PRE: arr is a bit array with at least size elements. Either dest points to enough allocated space to hold size + 1 characters or dest is NULL and such space is to be dynamically allocated. POST: Either dest points to a null-terminated string that contains a character representation of the first size elements of the bit array arr; OR dest is NULL and a request to dynamically allocate memory for a string to hold a character representation of arr was not be granted. Used by: ba_print() */ register elem_t i; if ((NULL != dest) || \ (NULL != (dest = (char *)malloc(size + 1)))) { for (i=0; i < size; i++) { dest[i] = (ba_value(arr,i) ?'1' :'0'); } dest[size] = '\0'; } return (dest); } /* ba_b2str() */ /* ------- Mathematical Applications -------- */ unsigned long ba_count(const bit arr[], const elem_t size) { /* ba_count() PRE: arr is an allocated bit array with at least size elements POST: The number of 1 bits in the first size elements of arr have been returned. NOTE: if arr is not in canonical form, i.e. if some unused bits are 1, then an unexpected value may be returned. */ register unsigned long count; register elem_t i; elem_t nelem = NELEM(size,(BITS_SZ)); static const unsigned bitcount[256] = {0, 1, 1, 2, 1, 2, 2, 3, 1, \ 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, \ 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, \ 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, \ 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, \ 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, \ 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, \ 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, \ 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, \ 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, \ 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, \ 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, \ 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, \ 6, 6, 7, 6, 7, 7, 8}; if (8 == BITS_SZ) { /* lookup table will speed this up a lot */ for (count = 0L, i = 0; i < nelem; i++) { count += bitcount[arr[i]]; } } else { for (count = 0L; i < size; i++) { if (ba_value(arr, i)) { count++; } } } return (count); } /* ba_count() */ bool ba_intersection( bit first[], bit second[], bit * result[], const elem_t size_first, const elem_t size_second) { /* ba_intersection() PRE: first is a bit array of at least size_first elements. second is a bit array of at least size_second elements. result points to enough space to hold the as many elements as the smallest of size_first and size_second; OR result points to NULL and such space is to be dynamically allocated. POST: TRUE has been returned and result points to a bit array containing the intersection of the two arrays up to the smallest of the two sizes; OR FALSE has been returned and result pointed to NULL (a request was made to allocate enough memory to store the intersection) but the required memory could not be obtained. NOTE: This runs faster if the first array is not smaller than second. */ register elem_t i; elem_t numints; unsigned largest=0, smallest=1; BitVector bv[2]; bv[largest].size = size_first; bv[largest].vector = first; bv[smallest].size = size_second; bv[smallest].vector = second; first_is_biggest(bv, &largest, &smallest); /* allocate space if *result is NULL */ if ((NULL == *result) && \ (NULL == (*result = ba_new(bv[largest].size)))) { return(FALSE); /* can't get memory, so can't continue */ } else { numints = NELEM(size_second,(BITS_SZ)); for (i=0; i < numints; i++) { (*result)[i] = (bv[smallest].vector[i] & \ bv[largest].vector[i]); } /* bits beyond size_second should be zero -- canonical form */ CANONIZE(*result, numints, size_second); return(TRUE); } } /* ba_intersection() */ bool ba_union( bit first[], bit second[], bit * result[], const elem_t size_first, const elem_t size_second) { /* ba_union() PRE: first is a bit array of at least size_first elements. second is a bit array of at least size_second elements. result points to enough space to hold the as many elements as the largest of size_first and size_second; OR result points to NULL and such space is to be dynamically allocated. POST: TRUE has been returned and result points to a bit array containing union of two arrays (up to the size of the largest of the two sizes); OR FALSE has been returned and result pointed to NULL (a request was made to allocate enough memory to store the union) but the required memory could not be obtained. NOTE: This runs faster if the first array is not smaller than second. */ register elem_t i; elem_t numints; unsigned largest=0, smallest=1; BitVector bv[2]; bv[largest].size = size_first; bv[largest].vector = first; bv[smallest].size = size_second; bv[smallest].vector = second; first_is_biggest(bv, &largest, &smallest); if ((NULL == *result) && \ (NULL == (*result = ba_new(bv[largest].size)))) { return(FALSE); } else { ba_copy(*result, bv[largest].vector, bv[largest].size); numints = NELEM(bv[smallest].size,(BITS_SZ)); for (i=0; i < numints; i++) { (*result)[i] |= bv[smallest].vector[i]; } CANONIZE(*result, numints, bv[largest].size); return(TRUE); } } /* ba_union() */ bool ba_diff( bit first[], bit second[], bit * diff[], const elem_t size_first, const elem_t size_second) { /* ba_diff() PRE: first is a bit array of at least size_first elements. second is a bit array of at least size_second elements. diff points to enough space to hold the as many elements as the largest of size_first and size_second; OR diff points to NULL and such space is to be dynamically allocated. POST: TRUE has been returned and diff points to a bit array containing the union of the two arrays (up to the size of the largest of the two sizes); OR FALSE has been returned and result pointed to NULL (a request was made to allocate enough memory to store the result) but the required memory could not be obtained. NOTE: This runs faster if the first array is not smaller than second. */ register elem_t i; elem_t numints; unsigned largest=0, smallest=1; BitVector bv[2]; bv[largest].size = size_first; bv[largest].vector = first; bv[smallest].size = size_second; bv[smallest].vector = second; first_is_biggest(bv, &largest, &smallest); if ((NULL == *diff) && \ (NULL == (*diff = ba_new(bv[largest].size)))) { return(FALSE); } else { ba_copy(*diff, bv[largest].vector, bv[largest].size); numints = NELEM(bv[smallest].size,(BITS_SZ)); for (i=0; i < numints; i++) { (*diff)[i] ^= bv[smallest].vector[i]; } CANONIZE(*diff, numints, bv[largest].size); return(TRUE); } } /* ba_diff() */ void ba_complement( bit arr[], const elem_t size) { /* ba_complement() PRE: arr is a bit array composed of *exactly* size elements. POST: All bits in arr have been flipped and arr is in canonical form. SEE ALSO: ba_toggle() */ elem_t nelem = NELEM(size,(BITS_SZ)); register elem_t i; for (i=0; i < nelem; i++) { arr[i] = ~arr[i]; } /* force canonical form */ CANONIZE(arr, nelem, size); } /* ba_complement() */ unsigned long ba_dotprod(const bit first[], const bit second[], const elem_t size_first, const elem_t size_second) { /* ba_dotprod() PRE: first is an array of at least size_first bits. second is an array of at least size_second bits. POST: The scalar product of the two vectors represented by the first size_first elements of first and the first size_second elements of second have been returned. */ register elem_t i, j; register unsigned long sum = 0L; for (i=0; i < size_first; i++) { for (j=0; j < size_second; j++) { sum += (first[i/BITS_SZ] & (1<<(i % BITS_SZ))) \ && \ (second[j/BITS_SZ] & (1<<(j % BITS_SZ))); } } return (sum); } /* ba_dotprod() */ /* ------ Miscellaneous ------- */ static void first_is_biggest(BitVector bv[2], unsigned * big, unsigned * small) { if (bv[*big].size < bv[*small].size) { unsigned temp; temp = *big; *big = *small; *small = temp; } } /* first_is_biggest() */ /* -------- Miscellaneous --------- */ bool ba_print(const bit arr[], const elem_t size, FILE * dest) { char * to_print = ba_b2str(arr, size, NULL); if (NULL != to_print) { bool status = (EOF != fputs(to_print, dest) ); free(to_print); return (status); } else { return (FALSE); } } /* ba_print() */ Listing Three /* "types.h" */ #include typedef enum bool {FALSE, TRUE} bool; typedef size_t elem_t; typedef unsigned char bit; Listing Four /* macro NELEM(). The number of elements, nelem, in an array of N bits can be computed using the formula: if (0 == (N % BITS_SZ)) nelem = N/BITS_SZ else nelem = N/BITS_SZ + 1 This can be represented in any of these ways: nelem = N/(BITS_SZ) + 1 - (0 == (N %(BITS_SZ))) nelem = N/(BITS_SZ) + !(0 == (N %(BITS_SZ))) nelem = N/(BITS_SZ) + (0 != (N %(BITS_SZ))) The macro NELEM uses this last form. */ #define NELEM(N,ELEMPER) (N / (ELEMPER) + (0 != (N % (ELEMPER)))) Listing Five /* macro CANONIZE(). Array is an array of NumInts type bit representing NumElem bits Forces Array into canonical form, i.e. all unused bits are set to 0 */ #define CANONIZE(Array,NumInts,NumElem) \ (Array)[NumInts - 1] &=(~0 << (BITS_SZ - (NumElem % BITS_SZ))) Listing Six void ba_Xintersection(const bits first[], const bits second[], bits * result[], elem_t size_first, elem_t size_second) { register elem_t i; register bits select; bits * largest = first; bits * smallest = second; for (i=0; i < size_smallest; i++) { select = (1 << (i % BITS_SZ)); *result[i/BITS_SZ] = \ (smallest[i/BITS_SZ] & select) && \ (largest[i/BITS_SZ] & select); } } /* ba_Xintersection() */ void ba_Xunion(const bits first[], const bits second[], bits * result[], elem_t size_first, elem_t size_second) { register elem_t i; bits * largest = first, * smallest = second; /* any bits that are set in the longest vector are set in the union */ /* all other bits are initially zero */ ba_copy(*result, largest, size_first); /* bits that are set in the shortest vector are set in the union */ for (i=0; i < size_smallest; i++) { *result[i/BITS_SZ] |= \ (smallest[i/BITS_SZ] & (1 << (i % BITS_SZ))); } } /* ba_Xunion() */ Example 1: (a) typedef struct { elem_t size; /* how many items in array */ bit * array; /* bit vector recording which elements are selected */ } bitvector; (b) typedef struct { elem_t size; /* how many items in selected */ bit * selected; /* bit vector recording which elements are selected */ elem_t max; /* maximum possible size */ char ** name; /* array of names of items */ char * title; /* what data is represented by this struct? */ } chose_t;