_ASSOCIATIVE ARRAYS IN C++_ by David Weber Listing One /*************************************************************** * file: ASSOC.HPP * purpose: template class for associative arrays * contains: * ASSOCIATION_BASE - core routines for the ASSOCIATION template * ASSOCIATION - association between strings and data objects * copyright: 1994 by David Weber. Unlimited use granted in EXE, OBJ, * or LIB formats. Do not sell as source without asking first. * environment: tested Borland C++ 4.01 and Zortech C++ 3.1r2 * history: 10-02-94 - initial code, based on an earlier C module **************************************************************/ #ifndef _ASSOC #define _ASSOC // needed goodies #include // Feature controls #define ASSOC_CASE_SENSITIVE 1 // string case sensitivity (1 or 0) #define ASSOC_EXCEPTIONS 0 // environment supports C++ exceptions #define ASSOC_NEW_CASTS 0 // environment supports new C++ casts // case sensitivity - This could be done dynamically with a function ptr in the // class, but would disallow the compiler to do optimization via intrinsic // function expansion. #if ASSOC_CASE_SENSITIVE #define ASSOC_STRCMP strcmp #define ASSOC_MAP(c) (c) #else #include #define ASSOC_STRCMP stricmp #define ASSOC_MAP(c) toupper(c) #endif // The only place exceptions occur are resource failure with the "new" calls. // If the environment supports C++ exceptions and a failure occurs, a handler // somewhere, even if it's terminate(), will take care of it. Without // exception handling we just shut down the farm via assert() and abort() #if ASSOC_EXCEPTIONS #define ASSOC_MEM_CHECK(p) #else #include #include #define ASSOC_MEM_CHECK(p) { if ((p) == 0) { assert((p) != 0); abort(); } } #endif // old versus new casting, not much gained here except you can search for casts #if ASSOC_NEW_CASTS #define ASSOC_CAST(cast,item) reinterpret_cast(item) #else #define ASSOC_CAST(cast,item) (cast)(item) #endif // defines const int DEFAULT_ASSOC_SIZE = 64; // default estimated size of array const unsigned int ASSOC_NULL = ~0; // end of chain // The base class for associative arrays. You should NOT make an instance // of this class. Instead use the ASSOCIATON template below class ASSOCIATION_BASE { protected: // protect everything ASSOCIATION_BASE(unsigned int data_size,unsigned int estimated_size); ~ASSOCIATION_BASE(); // destructor - make virtual if extending // inheritance and/or pointing along the chain ASSOCIATION_BASE(const ASSOCIATION_BASE &); // copy constructor ASSOCIATION_BASE &operator=(const ASSOCIATION_BASE &);// assignment ASSOCIATION_BASE(const char **keys,const char *data, // static initializer unsigned int data_size,unsigned int count); unsigned int size(void) // how many data elements in array { return fill_level; } int insert(const char *key,const char *data); // add an element int remove(const char *key); // remove an element char *find(const char *key); // find an element const char *first(void); // traversal functions const char *next(void); int operator!() // test array for problems { return hash_list==0 || key_list==0 || link_list==0 || data_list==0; } int operator()(const char *key) // existence operator { return find(key) != 0; } char &operator[](const char *key) // access operator { return *reference(key); } char *reference(const char *key); // get a reference to data or insert unsigned int hash(const char *key);// hash function void allocate_array(void); // get enough space for the array int expand_array(void); // resize array void clear_pointers(void) // clear all pointers { hash_list = 0; key_list = 0; link_list = 0; data_list = 0; } void delete_pointers(void) // delete all pointers { delete [] hash_list; delete [] key_list; delete [] link_list; delete [] data_list; } unsigned int array_size; // full size of array unsigned int fill_level; // data entries currently in array unsigned int *hash_list; // hash indexed array of chains const char **key_list; // storage for key pointers unsigned int *link_list; // storage for key linkages char *data_list; // storage for data expressed as char unsigned int sizeofdata; // size of data objects in bytes unsigned int iteration; // current iteration position in data }; // ASSOCIATION - associative array template // Use this class template for creating instances when you will be // storing associations between character strings and data objects. // There are three ways to use this template: // direct storage - data are stored directly in the associative array. // good for small classes or native types // ASSOCIATION direct_array(estimated_size); // value = *direct_array.find("key"); // indirect storage - pointers to data are stored in the array. // good for large classes or pointers to things that vary in size // ASSOCIATION indirect_array(estimated_size); // ptr_to_value = *indirect_array.find("key"); // value = **indirect_array.find("key"); // function pointer storage - pointers to functions are stored. // ASSOCIATION func_ptr_array(estimated_size); // int (**fptr)(int); // if ((fptr = func_ptr_array.find("key")) != 0) // always test // function_return = (**fptr)(parameter); // You are responsible for the string storage. In the case of indirect // storage you are also responsible for storing the data. // example: // ASSOCIATION assoc_array(estimated_size); // declaration // if (!assoc_array) { class is unusable; } // validity // assoc_array.insert("xray",myclass_instance1); // insert, // assoc_array.insert("yodel",myclass_instance2); // returns 0 on failure // assoc_array.insert("zorro",myclass_instance3); // assoc_array.remove("yodel"); // delete, returns 0 if not there // cout << "Size is " << assoc_array.size() << "\n"; // size is 2 // cout << "zorro is " << *assoc_array.find("zorro") << "\n"; // find // assert(assoc_array.find("garbage") == 0); // failed find returns 0 // if ((const char *p = assoc_array.first()) != 0) // iterate over set // { // do not insert or remove while iterating // do // { // cout << p << "\t\t" << *assoc_array.find(p) << "\n"; // } while ((p = assoc_array.next()) != 0); // } // other uses: // copy constructor // ASSOCIATION x; // fill x with stuff // ASSOCIATION y(x); // assignment // ASSOCIATION x; // fill x with stuff // ASSOCIATION y; // y = x; // static initialization // static const char *keys[] = { "key1","key2", "key3" }; // note: const // static int data[] = { 1,2,3 }; // ASSOCIATION x(keys,data,sizeof(keys)/sizeof(keys[0])); template class ASSOCIATION : public ASSOCIATION_BASE { public: ASSOCIATION(unsigned int estimated_size = DEFAULT_ASSOC_SIZE) : ASSOCIATION_BASE(sizeof(T),estimated_size) { } // default constructor // destructor and copy constructor found in ASSOCIATION_BASE ASSOCIATION &operator=(const ASSOCIATION &original) // assignment { ASSOCIATION_BASE::operator=(original); return *this; } // static initializer ASSOCIATION(const char **keys,T *data,unsigned int count) : ASSOCIATION_BASE(keys,ASSOC_CAST(const char *,data),sizeof(T),count) { } unsigned int size(void) // how many data elements in array { return fill_level; } int insert(const char *key,T data) // add an element { return ASSOCIATION_BASE::insert(key,ASSOC_CAST(const char *,&data)); } int remove(const char *key) // remove an element { return ASSOCIATION_BASE::remove(key); } T *find(const char *key) // find an element { return ASSOC_CAST(T *,ASSOCIATION_BASE::find(key)); } const char *first(void) // traversal functions { return ASSOCIATION_BASE::first(); } const char *next(void) { return ASSOCIATION_BASE::next(); } int operator!() // test array for problems { return ASSOCIATION_BASE::operator!(); } int operator()(const char *key) // existence operator { return ASSOCIATION_BASE::find(key) != 0; } T &operator[](const char *key) // access operator { return *(ASSOC_CAST(T *,ASSOCIATION_BASE::reference(key))); } }; // ASSOC_STORED - ASSOCIATION class with storage for keys // This class is almost identical to ASSOCIATION except that it maintains the // storage for the keys. The interface is the same but the static initializer // is left out. If it is static, the keys are already stored. So why bother? template class ASSOC_STORED : public ASSOCIATION_BASE { public: ASSOC_STORED(unsigned int estimated_size = DEFAULT_ASSOC_SIZE) : ASSOCIATION_BASE(sizeof(T),estimated_size) { } // default constructor ~ASSOC_STORED(); // destructor ASSOC_STORED(const ASSOC_STORED &original) : ASSOCIATION_BASE(original) { dup_keys(); } // copy constructor ASSOC_STORED &operator=(const ASSOC_STORED &original) { // assignment ASSOCIATION_BASE::operator=(original); dup_keys(); return *this; } unsigned int size(void) // how many data elements in array { return fill_level; } int insert(const char *key,T data) // add an element { if (key == 0) return 0; char *p = new char[strlen(key)+1]; ASSOC_MEM_CHECK(p); strcpy(p,key); return ASSOCIATION_BASE::insert(p,ASSOC_CAST(const char *,&data)); } int remove(const char *key); // remove an element T *find(const char *key) // find an element { return ASSOC_CAST(T *,ASSOCIATION_BASE::find(key)); } const char *first(void) // traversal functions { return ASSOCIATION_BASE::first(); } const char *next(void) { return ASSOCIATION_BASE::next(); } int operator!() // test array for problems { return ASSOCIATION_BASE::operator!(); } int operator()(const char *key) // existence operator { return ASSOCIATION_BASE::find(key) != 0; } T &operator[](const char *key) // access operator { char *p = ASSOCIATION_BASE::find(key); if (p == 0) { if (key == 0) return *(ASSOC_CAST(T *,0)); // garbage in, garbage out char *p = new char[strlen(key)+1]; ASSOC_MEM_CHECK(p); strcpy(p,key); return *(ASSOC_CAST(T *,ASSOCIATION_BASE::reference(p))); } return *(ASSOC_CAST(T *,p)); } private: void dup_keys(void); }; // functions out of line cuz looped, large and/or called infrequently // destructor template ASSOC_STORED::~ASSOC_STORED() { for (unsigned int i = 0 ; i < fill_level ; i++) delete [] ASSOC_CAST(char *,key_list[i]); } // remove template int ASSOC_STORED::remove(const char *key) { char *data = ASSOCIATION_BASE::find(key); if (data != 0) { data = ASSOC_CAST(char *,key_list[(data-data_list)/sizeofdata]); if (ASSOCIATION_BASE::remove(key)) { delete [] data; return 1; } } return 0; } // duplicate the full set of keys template void ASSOC_STORED::dup_keys(void) { char *newkey; const char *oldkey; for (unsigned int i = 0 ; i < fill_level ; i++) { // duplicate keys oldkey = key_list[i]; newkey = new char[strlen(oldkey)+1]; ASSOC_MEM_CHECK(newkey); strcpy(newkey,oldkey); key_list[i] = newkey; } } #endif // _ASSOC Listing Two /*************************************************************** * file: ASSOC.CPP * purpose: template class for associative arrays * contains: * ASSOCIATION_BASE - core routines for the ASSOCIATION template * ASSOCIATION - association between strings and data objects * copyright: 1994 by David Weber. Unlimited use granted in EXE, OBJ, * or LIB formats. Do not sell as source without asking first. * environment: tested Borland C++ 4.01 and Zortech C++ 3.1r2 * history: 10-02-94 - initial code, based on an earlier C module **************************************************************/ #include "assoc.hpp" /************************************************ * function:ASSOCIATION_BASE(unsigned int data_size,unsigned int estimated_size) * Constructor for an associative array * parameters: byte size of a data element and the estimated size of array * returns: nothing ************************************************/ ASSOCIATION_BASE::ASSOCIATION_BASE(unsigned int data_size, unsigned int estimated_size) { clear_pointers(); // preset as invalid sizeofdata = data_size; // set up sizes array_size = estimated_size; fill_level = 0; // empty to start allocate_array(); // allocate space } /************************************************ * function: ~ASSOCIATION_BASE() * Destructor for an associative array * parameters: none * returns: nothing ************************************************/ ASSOCIATION_BASE::~ASSOCIATION_BASE() { delete_pointers(); // delete storage clear_pointers(); // just for tidiness } /************************************************ * function: ASSOCIATION_BASE(const ASSOCIATION_BASE &original) * copy constructor for class * parameters: previous associative array to copy * returns: nothing ************************************************/ ASSOCIATION_BASE::ASSOCIATION_BASE(const ASSOCIATION_BASE &original) { clear_pointers(); // no heap to start *this = original; // piggyback on assignment operator } /************************************************ * function: ASSOCIATION_BASE & operator=(const ASSOCIATION_BASE &original) * assigment operator for class * parameters: previous associative array to copy * returns: *this ************************************************/ ASSOCIATION_BASE & ASSOCIATION_BASE::operator=(const ASSOCIATION_BASE &original) { delete_pointers(); // remove old storage clear_pointers(); // no heap to start array_size = original.array_size; // essential data fill_level = original.fill_level; sizeofdata = original.sizeofdata; allocate_array(); // allocate heap if (!*this) // valid? return *this; // copy hash, keys, links and data; this only works cuz linkage is via offsets memcpy(hash_list,original.hash_list,array_size * sizeof(unsigned int)); memcpy(key_list,original.key_list,fill_level * sizeof(const char *)); memcpy(link_list,original.link_list,fill_level * sizeof(unsigned int)); memcpy(data_list,original.data_list,array_size * sizeofdata); return *this; } /************************************************ * function: ASSOCIATION_BASE(const char **keys,const char *data, * unsigned int data_size,unsigned int count) * static initialization constructor, build an associative array with * pre-existing key and data arrays. * parameters: pointer to an array of keys, pointer to an array of data * expressed as chars, size of array in elements * returns: nothing ************************************************/ ASSOCIATION_BASE::ASSOCIATION_BASE(const char **keys,const char *data, unsigned int data_size,unsigned int count) { unsigned int i; clear_pointers(); // mark storage as invalid sizeofdata = data_size; // set up sizes array_size = count; fill_level = 0; // empty to start allocate_array(); // allocate space if (!*this) // valid? return; for (i = 0 ; i < count ; i++, keys++, data += sizeofdata) insert(*keys,data); // add info } /************************************************ * function: int insert(const char *key,const char *data) * Insert an entry into the associative array. The caller is responsible for * storage of the key; * parameters: const pointer to key string and const/non const pointer to data * returns: 1 if inserted OK or 0 if a bad key ************************************************/ int ASSOCIATION_BASE::insert(const char *key,const char *data) { unsigned int index,k; if (key == 0) // no null keys allowed return 0; if (fill_level >= array_size) if (!expand_array()) return 0; index = hash(key); // find start in array for (k = hash_list[index] ; k != ASSOC_NULL ; k = link_list[k]) { // see if already exists if (ASSOC_STRCMP(key,key_list[k]) == 0) { // replace current data memcpy(data_list+k*sizeofdata,data,sizeofdata); return 1; } } key_list[fill_level] = key; // put in new data link_list[fill_level] = hash_list[index]; hash_list[index] = fill_level; memcpy(data_list+fill_level*sizeofdata,data,sizeofdata); fill_level++; return 1; } /************************************************ * function: int remove(const char *key) * Remove an entry from the associative array * parameters: const pointer to the key string * returns: 1 if removed or 0 if not found in the array ************************************************/ int ASSOCIATION_BASE::remove(const char *key) { unsigned int k,*l; if (key == 0) // no null keys allowed return 0; for (l=hash_list+hash(key), k=*l ; k != ASSOC_NULL ; l=link_list+k, k=*l) { // find entry if (ASSOC_STRCMP(key,key_list[k]) == 0) { *l = link_list[k]; // unlink fill_level--; // level shrinks key_list[k] = key_list[fill_level]; // move top of pile into "removed" link_list[k] = link_list[fill_level]; memcpy(data_list+k*sizeofdata,data_list+fill_level*sizeofdata,sizeofdata); for (l=hash_list+hash(key_list[k]) ; *l != ASSOC_NULL ; l=link_list + *l) if (*l == fill_level) { // fix link to what was top of pile *l = k; break; } return 1; } } return 0; // doesn't exist } /************************************************ * function: char *find(const char *key) * Lookup an entry in the associative array and return a non const pointer * parameters: const pointer to the key string * returns:non const pointer to the data associated with the key ************************************************/ char *ASSOCIATION_BASE::find(const char *key) { unsigned int k; if (key == 0) // no null keys allowed return 0; for (k = hash_list[hash(key)] ; k != ASSOC_NULL ; k = link_list[k]) { // follow chain in array if (ASSOC_STRCMP(key,key_list[k]) == 0) return data_list + k * sizeofdata; } return 0; // not found } /************************************************ * function: const char *first(void) * Find the first key string in the array. Follow this by * next() calls until a 0 is returned. Inserting or Removing * from an array while you are iterating will invalidate the * iteration sequence (but doesn't mess up the array). * parameters: none * returns: first key string encountered or 0 if none ************************************************/ const char *ASSOCIATION_BASE::first(void) { iteration = 0; // start from beginning return next(); // and search } /************************************************ * function: const char *next(void) * Find the next key string in the array, call first() * to start iteration. * parameters: nothing * returns: next key string encountered or 0 if all done ************************************************/ const char *ASSOCIATION_BASE::next(void) { while (iteration < fill_level) // until end of data return key_list[iteration++]; // return key return 0; } /************************************************ * function: char *reference(const char *key) * find a key and return a reference to its data if it is there * otherwise insert a place for it and return a reference to the * zeroed out hole * parameters: const pointer to the key string * returns: pointer to associated data (which may be zeroed out) ************************************************/ char *ASSOCIATION_BASE::reference(const char *key) { unsigned int k,index; if (key == 0) // no null keys allowed return 0; index = hash(key); for (k = hash_list[index] ; k != ASSOC_NULL ; k = link_list[k]) { // follow chain in array if (ASSOC_STRCMP(key,key_list[k]) == 0) return data_list + k * sizeofdata; // found it } if (fill_level >= array_size) // expand if necessary if (!expand_array()) return 0; key_list[fill_level] = key; // put in hole for new data link_list[fill_level] = hash_list[index]; hash_list[index] = fill_level; memset(data_list+sizeofdata*fill_level,0,sizeofdata); return data_list + sizeofdata*fill_level++; // return pointer to hole } /************************************************ * function: unsigned int hash(const char *key) * local function calculates a hash. Designed to minimize clustering. * parameters: key string * returns: hash value clipped to array size ************************************************/ unsigned int ASSOCIATION_BASE::hash(const char *key) { unsigned int index; const unsigned char *k; for (index=0x5555, k=ASSOC_CAST(const unsigned char *,key) ; *k != 0 ; k++) index = (index << 1) ^ ASSOC_MAP(*k); // hash key return index % array_size; // fit in array } /************************************************ * function: void allocate_array(void) * local function allocates and initializes the array * parameters: none * returns: nothing ************************************************/ void ASSOCIATION_BASE::allocate_array(void) { unsigned int i; hash_list = new unsigned int[array_size]; // allocate hash array key_list = new const char *[array_size]; // allocate key pointers link_list = new unsigned int[array_size]; // allocate key linkage data_list = new char[array_size*sizeofdata]; // allocate storage for data ASSOC_MEM_CHECK(hash_list); // validate resources ASSOC_MEM_CHECK(key_list); ASSOC_MEM_CHECK(link_list); ASSOC_MEM_CHECK(data_list); for (i = 0 ; i < array_size ; i++) hash_list[i] = ASSOC_NULL; // preset with nothing } /************************************************ * function: int expand_array(void) * double the size of the array * parameters: none * returns: 1 if expanded OK or 0 if failed ************************************************/ int ASSOCIATION_BASE::expand_array(void) { // if array full, increase size const char **old_key; char *old_data; unsigned int i,index; old_key = key_list; // save old data old_data = data_list; delete [] hash_list; // remove pointer storage hash_list = 0; delete [] link_list; link_list = 0; array_size <<= 1; // new size allocate_array(); // new array if (!*this) // valid? return 0; memcpy(key_list,old_key,fill_level*sizeof(const char *)); memcpy(data_list,old_data,sizeofdata*fill_level); for (i = 0 ; i < fill_level ; i++) { // rehash old data into new array index = hash(old_key[i]); link_list[i] = hash_list[index]; hash_list[index] = i; } delete [] old_key; // blow away old storage delete [] old_data; return 1; } Figure 1: (a) AWK script to store an integer dollar value indexed by year and name of automobile; (b) associative array operations; (c) filling an array with integers; (d) using pointer-based arrays (a) price = bluebook["1967 VW"]; # access rvalue bluebook["1967 VW"] -= valuedecay; # set lvalue if ("1928 Durant" in bluebook) # check existence delete bluebook["Stanley Steamer"]; # remove entry for (pv in bluebook) # iterate over array (b) ASSOCIATION test(100); // creation test.insert("key1",instance1); // lvalue access test.find("key1"); // rvalue and existence test.remove("key1"); // remove entry test.first(), test.next(); // iterate (c) ASSOCIATION test2; test2.insert("one",1); // inserting constants test2.insert("two",2); (d) ASSOCIATION test3; // note: pointer bigclass *instance1 = new bigclass(construction parameters); test3.insert("bigkey1",instance1); Figure 2. Code fragment illustrating thread-safe template. PTTSafeType sequenceNumber; ... sequenceNumber.set (0); // sets number to 0 safely. ++sequenceNumber; // inc number safely. // or sequenceNumber++; ...