Sierra Toolkit Version of the Day
hash_map_rdestl.h
00001 #ifndef RDESTL_HASH_MAP_H
00002 #define RDESTL_HASH_MAP_H
00003 
00004 #include <stk_util/util/algorithm_rdestl.h>
00005 #include <stk_util/util/allocator_rdestl.h>
00006 #include <stk_util/util/functional_rdestl.h>
00007 #include <stk_util/util/hash_rdestl.h>
00008 #include <stk_util/util/pair_rdestl.h>
00009 
00010 namespace rde
00011 {
00012 
00013 // TLoadFactor4 - controls hash map load. 4 means 100% load, ie. hashmap will grow
00014 // when number of items == capacity. Default value of 6 means it grows when
00015 // number of items == capacity * 3/2 (6/4). Higher load == tighter maps, but bigger
00016 // risk of collisions.
00017 template<typename TKey, typename TValue,
00018     class THashFunc = rde::hash<TKey>,
00019     int TLoadFactor4 = 6,
00020     class TKeyEqualFunc = rde::equal_to<TKey>,
00021     class TAllocator = rde::allocator>
00022 class hash_map
00023 {
00024 public:
00025   typedef rde::pair<TKey, TValue>         value_type;
00026 
00027 private:
00028   struct node
00029     {
00030     static const hash_value_t kUnusedHash       = 0xFFFFFFFF;
00031         static const hash_value_t kDeletedHash      = 0xFFFFFFFE;
00032 
00033         node(): hash(kUnusedHash) {}
00034 
00035         RDE_FORCEINLINE bool is_unused() const    { return hash == kUnusedHash; }
00036         RDE_FORCEINLINE bool is_deleted() const     { return hash == kDeletedHash; }
00037         RDE_FORCEINLINE bool is_occupied() const  { return hash < kDeletedHash; }
00038 
00039     hash_value_t    hash;
00040         value_type      data;
00041   };
00042   template<typename TNodePtr, typename TPtr, typename TRef>
00043     class node_iterator
00044     {
00045     friend class hash_map;
00046     public:
00047     typedef forward_iterator_tag    iterator_category;
00048 
00049         explicit node_iterator(TNodePtr node, const hash_map* map)
00050         : m_node(node),
00051             m_map(map)
00052     {}
00053         template<typename UNodePtr, typename UPtr, typename URef>
00054         node_iterator(const node_iterator<UNodePtr, UPtr, URef>& rhs)
00055         : m_node(rhs.node()),
00056       m_map(rhs.get_map())
00057     {}
00058 
00059     TRef operator*() const
00060         {
00061       RDE_ASSERT(m_node != 0);
00062             return m_node->data;
00063     }
00064         TPtr operator->() const
00065         {
00066       return &m_node->data;
00067     }
00068         RDE_FORCEINLINE TNodePtr node() const
00069         {
00070       return m_node;
00071     }
00072 
00073     node_iterator& operator++()
00074         {
00075       RDE_ASSERT(m_node != 0);
00076       ++m_node;
00077             move_to_next_occupied_node();
00078       return *this;
00079         }
00080     node_iterator operator++(int)
00081         {
00082       node_iterator copy(*this);
00083             ++(*this);
00084             return copy;
00085     }
00086 
00087         RDE_FORCEINLINE bool operator==(const node_iterator& rhs) const
00088         {
00089       return rhs.m_node == m_node;
00090     }
00091         bool operator!=(const node_iterator& rhs) const
00092         {
00093       return !(rhs == *this);
00094     }
00095 
00096         const hash_map* get_map() const { return m_map; }
00097   private:
00098     void move_to_next_occupied_node()
00099         {
00100       // @todo: save nodeEnd in constructor?
00101       TNodePtr nodeEnd = m_map->m_nodes + m_map->bucket_count();
00102             for (; m_node < nodeEnd; ++m_node)
00103             {
00104         if (m_node->is_occupied())
00105           break;
00106       }
00107     }
00108         TNodePtr    m_node;
00109         const hash_map* m_map;
00110   };
00111 
00112 public:
00113   typedef TKey   key_type;
00114         typedef TValue                                mapped_type;
00115         typedef TAllocator                              allocator_type;
00116   typedef node_iterator<node*, value_type*, value_type&>            iterator;
00117   typedef node_iterator<const node*, const value_type*, const value_type&>  const_iterator;
00118         typedef int                                 size_type;
00119   static const size_type                            kNodeSize = sizeof(node);
00120   static const size_type                            kInitialCapacity = 64;
00121 
00122   hash_map()
00123   : m_nodes(&ms_emptyNode),
00124     m_size(0),
00125     m_capacity(0),
00126     m_capacityMask(0),
00127     m_numUsed(0)
00128   {
00129     RDE_ASSERT((kInitialCapacity & (kInitialCapacity - 1)) == 0); // Must be power-of-two
00130   }
00131   explicit hash_map(const allocator_type& allocator)
00132     : m_nodes(&ms_emptyNode),
00133         m_size(0),
00134         m_capacity(0),
00135     m_capacityMask(0),
00136         m_numUsed(0),
00137         m_allocator(allocator)
00138   {
00139     
00140   }
00141   explicit hash_map(size_type initial_bucket_count,
00142     const allocator_type& allocator = allocator_type())
00143     : m_nodes(&ms_emptyNode),
00144         m_size(0),
00145         m_capacity(0),
00146     m_capacityMask(0),
00147         m_numUsed(0),
00148         m_allocator(allocator)
00149   {
00150     reserve(initial_bucket_count);
00151   }
00152   hash_map(size_type initial_bucket_count,
00153     const THashFunc& hashFunc,
00154     const allocator_type& allocator = allocator_type())
00155     : m_nodes(&ms_emptyNode),
00156         m_size(0),
00157         m_capacity(0),
00158     m_capacityMask(0),
00159         m_numUsed(0),
00160     m_hashFunc(hashFunc),
00161         m_allocator(allocator)
00162   {
00163     reserve(initial_bucket_count);
00164   }
00165   hash_map(const hash_map& rhs, const allocator_type& allocator = allocator_type())
00166     : m_nodes(&ms_emptyNode),
00167         m_size(0),
00168         m_capacity(0),
00169     m_capacityMask(0),
00170         m_numUsed(0),
00171         m_allocator(allocator)
00172   {
00173     *this = rhs;
00174   }
00175   explicit hash_map(e_noinitialize)
00176   {
00177     
00178   }
00179   ~hash_map()
00180   {
00181     delete_nodes();
00182   }
00183 
00184   iterator begin()
00185     {
00186     iterator it(m_nodes, this);
00187     it.move_to_next_occupied_node();
00188     return it;
00189   }
00190   const_iterator begin() const
00191     {
00192     const_iterator it(m_nodes, this);
00193     it.move_to_next_occupied_node();
00194     return it;
00195   }
00196   iterator end()              { return iterator(m_nodes + m_capacity, this); }
00197   const_iterator end() const  { return const_iterator(m_nodes + m_capacity, this); }
00198 
00199   // @note: Added for compatiblity sake.
00200   //      Personally, I consider it "risky". Use find/insert for more
00201   //      explicit operations.
00202   mapped_type& operator[](const key_type& key)
00203   {
00204     hash_value_t hash;
00205     node* n = find_for_insert(key, &hash);
00206     if (n == 0 || !n->is_occupied())
00207     {
00208       return insert_at(value_type(key, TValue()), n, hash).first->second;
00209     }
00210     return n->data.second;
00211   }
00212   // @note: Doesn't copy allocator.
00213   hash_map& operator=(const hash_map& rhs)
00214   {
00215     RDE_ASSERT(invariant());
00216     if (&rhs != this)
00217     {
00218       clear();
00219       if (m_capacity < rhs.bucket_count())
00220       {
00221         delete_nodes();
00222         m_nodes = allocate_nodes(rhs.bucket_count());
00223         m_capacity = rhs.bucket_count();
00224         m_capacityMask = m_capacity - 1;
00225       }
00226       rehash(m_capacity, m_nodes, rhs.m_capacity, rhs.m_nodes, false);
00227       m_size = rhs.size();
00228       m_numUsed = rhs.m_numUsed;
00229     }
00230     RDE_ASSERT(invariant());
00231     return *this;
00232   }
00233   void swap(hash_map& rhs)
00234   {
00235     if (&rhs != this)
00236     {
00237       RDE_ASSERT(invariant());
00238       RDE_ASSERT(m_allocator == rhs.m_allocator);
00239       rde::swap(m_nodes, rhs.m_nodes);
00240       rde::swap(m_size, rhs.m_size);
00241       rde::swap(m_capacity, rhs.m_capacity);
00242       rde::swap(m_capacityMask, rhs.m_capacityMask);
00243       rde::swap(m_numUsed, rhs.m_numUsed);
00244       rde::swap(m_hashFunc, rhs.m_hashFunc);
00245       rde::swap(m_keyEqualFunc, rhs.m_keyEqualFunc);
00246       RDE_ASSERT(invariant());
00247     }
00248   }
00249 
00250   rde::pair<iterator, bool> insert(const value_type& v)
00251   {
00252     typedef rde::pair<iterator, bool> ret_type_t;
00253     RDE_ASSERT(invariant());
00254     if (m_numUsed * TLoadFactor4 >= m_capacity * 4)
00255       grow();
00256 
00257     hash_value_t hash = 0XFFFFFFFF;
00258 
00259     node* n = find_for_insert(v.first, &hash);
00260     if (n->is_occupied())
00261     {
00262       RDE_ASSERT(hash == n->hash && m_keyEqualFunc(v.first, n->data.first));
00263       return ret_type_t(iterator(n, this), false);
00264     }
00265     if (n->is_unused())
00266       ++m_numUsed;
00267     rde::copy_construct(&n->data, v);
00268         n->hash = hash;
00269         ++m_size;
00270     RDE_ASSERT(invariant());
00271         return ret_type_t(iterator(n, this), true);
00272   }
00273 
00274   size_type erase(const key_type& key)
00275     {
00276     node* n = lookup(key);
00277         if (n != (m_nodes + m_capacity) && n->is_occupied())
00278         {
00279       erase_node(n);
00280             return 1;
00281     }
00282     return 0;
00283   }
00284     void erase(iterator it)
00285     {
00286     RDE_ASSERT(it.get_map() == this);
00287         if (it != end())
00288         {
00289       RDE_ASSERT(!empty());
00290             erase_node(it.node());
00291     }
00292   }
00293   void erase(iterator from, iterator to)
00294   {
00295     for (; from != to; ++from)
00296         {
00297       node* n = from.node();
00298             if (n->is_occupied())
00299         erase_node(n);
00300     }
00301   }
00302 
00303   iterator find(const key_type& key)
00304     {
00305     node* n = lookup(key);
00306         return iterator(n, this);
00307   }
00308   const_iterator find(const key_type& key) const
00309   {
00310     const node* n = lookup(key);
00311     return const_iterator(n, this);
00312   }
00313 
00314   void clear()
00315     {
00316     node* endNode = m_nodes + m_capacity;
00317         for (node* iter = m_nodes; iter != endNode; ++iter)
00318         {
00319             if( iter )
00320             {
00321                 if (iter->is_occupied())
00322                 {
00323                     rde::destruct(&iter->data);
00324                 }
00325                 // We can make them unused, because we clear whole hash_map,
00326                 // so we can guarantee there'll be no holes.
00327                 iter->hash = node::kUnusedHash;
00328             }
00329     }
00330         m_size = 0;
00331         m_numUsed = 0;
00332   }
00333 
00334   // More like reserve.
00335   // resize() name chosen for compatibility sake.
00336   void reserve(size_type min_size)
00337   {
00338     size_type newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity);
00339     while (newCapacity < min_size)
00340       newCapacity *= 2;
00341     if (newCapacity > m_capacity)
00342       grow(newCapacity);
00343   }
00344 
00345   size_type bucket_count() const      { return m_capacity; }
00346   size_type size() const          { return m_size; }
00347   size_type empty() const         { return size() == 0; }
00348   size_type nonempty_bucket_count() const { return m_numUsed; }
00349   size_type used_memory() const
00350   {
00351     return bucket_count() * kNodeSize;
00352   }
00353 
00354   const allocator_type& get_allocator() const { return m_allocator; }
00355   void set_allocator(const allocator_type& allocator)
00356   {
00357     m_allocator = allocator;
00358   }
00359 
00360 private:
00361   void grow()
00362   {
00363     const int newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity * 2);
00364     grow(newCapacity);
00365   }
00366   void grow(int new_capacity)
00367   {
00368     RDE_ASSERT((new_capacity & (new_capacity - 1)) == 0); // Must be power-of-two
00369     node* newNodes = allocate_nodes(new_capacity);
00370     rehash(new_capacity, newNodes, m_capacity, m_nodes, true);
00371     if (m_nodes != &ms_emptyNode)
00372       m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity);
00373     m_capacity = new_capacity;
00374     m_capacityMask = new_capacity - 1;
00375     m_nodes = newNodes;
00376     m_numUsed = m_size;
00377     RDE_ASSERT(m_numUsed < m_capacity);
00378    }
00379   rde::pair<iterator, bool> insert_at(const value_type& v, node* n,
00380     hash_value_t hash)
00381   {
00382     RDE_ASSERT(invariant());
00383     if (n == 0 || m_numUsed * TLoadFactor4 >= m_capacity * 4)
00384       return insert(v);
00385 
00386     RDE_ASSERT(!n->is_occupied());
00387     if (n->is_unused())
00388       ++m_numUsed;
00389     rde::copy_construct(&n->data, v);
00390     n->hash = hash;
00391     ++m_size;
00392     RDE_ASSERT(invariant());
00393     return rde::pair<iterator, bool>(iterator(n, this), true);
00394   }
00395   node* find_for_insert(const key_type& key, hash_value_t* out_hash)
00396   {
00397     if (m_capacity == 0)
00398       return 0;
00399 
00400     const hash_value_t hash = hash_func(key);
00401         *out_hash = hash;
00402         uint32 i = hash & m_capacityMask;
00403 
00404         node* n = m_nodes + i;
00405     if (n->hash == hash && m_keyEqualFunc(key, n->data.first))
00406       return n;
00407 
00408         node* freeNode(0);
00409         if (n->is_deleted())
00410       freeNode = n;
00411     uint32 numProbes(0);
00412         // Guarantees loop termination.
00413         RDE_ASSERT(m_numUsed < m_capacity);
00414         while (!n->is_unused())
00415         {
00416       ++numProbes;
00417             i = (i + numProbes) & m_capacityMask;
00418             n = m_nodes + i;
00419       if (compare_key(n, key, hash))
00420         return n;
00421             if (n->is_deleted() && freeNode == 0)
00422         freeNode = n;
00423     }
00424         return freeNode ? freeNode : n;
00425   }
00426   node* lookup(const key_type& key) const
00427   {
00428     const hash_value_t hash = hash_func(key);
00429         uint32 i = hash & m_capacityMask;
00430         node* n = m_nodes + i;
00431     // In theory, we could try to use compare_key here, it would
00432     // be a little bit faster for keys with cheap_compare. However, if keys are
00433     // not totally destroyed on removal (erase_node), this could result in returning
00434     // unused nodes. By testing hashes - we make sure it does not happen.
00435     // This could also be solved by doing what Google does -- set_empty_key/set_deleted_key,
00436     // but for the time being it doesn't look to me like it's worth it.
00437     if (n->hash == hash && m_keyEqualFunc(key, n->data.first))
00438       return n;
00439 
00440     uint32 numProbes(0);
00441         // Guarantees loop termination.
00442         RDE_ASSERT(m_capacity == 0 || m_numUsed < m_capacity);
00443         while (!n->is_unused())
00444         {
00445       ++numProbes;
00446             i = (i + numProbes) & m_capacityMask;
00447             n = m_nodes + i;
00448 
00449       if (compare_key(n, key, hash))
00450         return n;
00451     }
00452     return m_nodes + m_capacity;
00453   }
00454 
00455   static void rehash(int new_capacity, node* new_nodes,
00456     int capacity, const node* nodes, bool destruct_original)
00457   {
00458         //if (nodes == &ms_emptyNode || new_nodes == &ms_emptyNode)
00459           //  return;
00460 
00461     const node* it = nodes;
00462     const node* itEnd = nodes + capacity;
00463     const uint32 mask = new_capacity - 1;
00464     while (it != itEnd)
00465     {
00466       if (it->is_occupied())
00467       {
00468         const hash_value_t hash = it->hash;
00469         uint32 i = hash & mask;
00470 
00471         node* n = new_nodes + i;
00472         uint32 numProbes(0);
00473         while (!n->is_unused())
00474         {
00475           ++numProbes;
00476           i = (i + numProbes) & mask;
00477           n = new_nodes + i;
00478         }
00479         rde::copy_construct(&n->data, it->data);
00480         n->hash = hash;
00481         if (destruct_original)
00482           rde::destruct(&it->data);
00483       }
00484       ++it;
00485     }
00486   }
00487 
00488   node* allocate_nodes(int n)
00489   {
00490     node* buckets = static_cast<node*>(m_allocator.allocate(n * sizeof(node)));
00491         node* iterBuckets(buckets);
00492         node* end = iterBuckets + n;
00493         for (; iterBuckets != end; ++iterBuckets)
00494       iterBuckets->hash = node::kUnusedHash;
00495 
00496     return buckets;
00497   }
00498   void delete_nodes()
00499   {
00500     node* it = m_nodes;
00501         node* itEnd = it + m_capacity;
00502         while (it != itEnd)
00503         {
00504       if (it && it->is_occupied())
00505         rde::destruct(&it->data);
00506       ++it;
00507     }
00508     if (m_nodes != &ms_emptyNode)
00509       m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity);
00510 
00511         m_capacity = 0;
00512     m_capacityMask = 0;
00513         m_size = 0;
00514   }
00515   void erase_node(node* n)
00516   {
00517     RDE_ASSERT(!empty());
00518         RDE_ASSERT(n->is_occupied());
00519         rde::destruct(&n->data);
00520         n->hash = node::kDeletedHash;
00521     --m_size;
00522   }
00523 
00524   RDE_FORCEINLINE hash_value_t hash_func(const key_type& key) const
00525   {
00526     const hash_value_t h = m_hashFunc(key) & 0xFFFFFFFD;
00527     //RDE_ASSERT(h < node::kDeletedHash);
00528         return h;
00529   }
00530   bool invariant() const
00531   {
00532     RDE_ASSERT((m_capacity & (m_capacity - 1)) == 0);
00533     RDE_ASSERT(m_numUsed >= m_size);
00534     return true;
00535   }
00536 
00537   RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t hash,
00538     int_to_type<false>) const
00539   {
00540     return (n->hash == hash && m_keyEqualFunc(key, n->data.first));
00541   }
00542   RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t,
00543     int_to_type<true>) const
00544   {
00545     return m_keyEqualFunc(key, n->data.first);
00546   }
00547   RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t hash) const
00548   {
00549     return compare_key(n, key, hash, int_to_type<has_cheap_compare<TKey>::value>());
00550   }
00551 
00552   node*     m_nodes;
00553   int       m_size;
00554   int       m_capacity;
00555   uint32      m_capacityMask;
00556   int       m_numUsed;
00557   THashFunc       m_hashFunc;
00558   TKeyEqualFunc m_keyEqualFunc;
00559   TAllocator      m_allocator;
00560 
00561   static node   ms_emptyNode;
00562 };
00563 
00564 
00565 // Holy ...
00566 template<typename TKey, typename TValue,
00567     class THashFunc,
00568     int TLoadFactor4,
00569     class TKeyEqualFunc,
00570     class TAllocator>
00571 typename hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::node hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::ms_emptyNode;
00572 
00573 }
00574 
00575 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines