Sierra Toolkit Version of the Day
densehashtable.h
00001 // Copyright (c) 2005, Google Inc.
00002 // All rights reserved.
00003 //
00004 // Redistribution and use in source and binary forms, with or without
00005 // modification, are permitted provided that the following conditions are
00006 // met:
00007 //
00008 //     * Redistributions of source code must retain the above copyright
00009 // notice, this list of conditions and the following disclaimer.
00010 //     * Redistributions in binary form must reproduce the above
00011 // copyright notice, this list of conditions and the following disclaimer
00012 // in the documentation and/or other materials provided with the
00013 // distribution.
00014 //     * Neither the name of Google Inc. nor the names of its
00015 // contributors may be used to endorse or promote products derived from
00016 // this software without specific prior written permission.
00017 //
00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 // ---
00031 // Author: Craig Silverstein
00032 //
00033 // A dense hashtable is a particular implementation of
00034 // a hashtable: one that is meant to minimize memory allocation.
00035 // It does this by using an array to store all the data.  We
00036 // steal a value from the key space to indicate "empty" array
00037 // elements (ie indices where no item lives) and another to indicate
00038 // "deleted" elements.
00039 //
00040 // (Note it is possible to change the value of the delete key
00041 // on the fly; you can even remove it, though after that point
00042 // the hashtable is insert_only until you set it again.  The empty
00043 // value however can't be changed.)
00044 //
00045 // To minimize allocation and pointer overhead, we use internal
00046 // probing, in which the hashtable is a single table, and collisions
00047 // are resolved by trying to insert again in another bucket.  The
00048 // most cache-efficient internal probing schemes are linear probing
00049 // (which suffers, alas, from clumping) and quadratic probing, which
00050 // is what we implement by default.
00051 //
00052 // Type requirements: value_type is required to be Copy Constructible
00053 // and Default Constructible. It is not required to be (and commonly
00054 // isn't) Assignable.
00055 //
00056 // You probably shouldn't use this code directly.  Use
00057 // <google/dense_hash_map> or <google/dense_hash_set> instead.
00058 
00059 // You can change the following below:
00060 // HT_OCCUPANCY_PCT      -- how full before we double size
00061 // HT_EMPTY_PCT          -- how empty before we halve size
00062 // HT_MIN_BUCKETS        -- default smallest bucket size
00063 //
00064 // You can also change enlarge_factor (which defaults to
00065 // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
00066 // HT_EMPTY_PCT) with set_resizing_parameters().
00067 //
00068 // How to decide what values to use?
00069 // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
00070 // HT_MIN_BUCKETS is probably unnecessary since you can specify
00071 // (indirectly) the starting number of buckets at construct-time.
00072 // For enlarge_factor, you can use this chart to try to trade-off
00073 // expected lookup time to the space taken up.  By default, this
00074 // code uses quadratic probing, though you can change it to linear
00075 // via _JUMP below if you really want to.
00076 //
00077 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
00078 // NUMBER OF PROBES / LOOKUP       Successful            Unsuccessful
00079 // Quadratic collision resolution   1 - ln(1-L) - L/2    1/(1-L) - L - ln(1-L)
00080 // Linear collision resolution     [1+1/(1-L)]/2         [1+1/(1-L)2]/2
00081 //
00082 // -- enlarge_factor --           0.10  0.50  0.60  0.75  0.80  0.90  0.99
00083 // QUADRATIC COLLISION RES.
00084 //    probes/successful lookup    1.05  1.44  1.62  2.01  2.21  2.85  5.11
00085 //    probes/unsuccessful lookup  1.11  2.19  2.82  4.64  5.81  11.4  103.6
00086 // LINEAR COLLISION RES.
00087 //    probes/successful lookup    1.06  1.5   1.75  2.5   3.0   5.5   50.5
00088 //    probes/unsuccessful lookup  1.12  2.5   3.6   8.5   13.0  50.0  5000.0
00089 
00090 #ifndef _DENSEHASHTABLE_H_
00091 #define _DENSEHASHTABLE_H_
00092 
00093 // The probing method
00094 // Linear probing
00095 // #define JUMP_(key, num_probes)    ( 1 )
00096 // Quadratic probing
00097 #define JUMP_(key, num_probes)    ( num_probes )
00098 
00099 
00100 #include <stk_util/util/sparseconfig.h>
00101 #include <stdio.h>
00102 #include <assert.h>
00103 #include <stdlib.h>             // for abort()
00104 #include <algorithm>            // For swap(), eg
00105 #include <stdexcept>            // For length_error
00106 #include <iostream>             // For cerr
00107 #include <memory>               // For uninitialized_fill, uninitialized_copy
00108 #include <utility>              // for pair<>
00109 #include <iterator>             // for facts about iterator tags
00110 #include <limits>               // for numeric_limits<>
00111 #include <stk_util/util/libc_allocator_with_realloc.h>
00112 #include <stk_util/util/hashtable-common.h>
00113 #include <stk_util/util/type_traits_google.h> // for true_type, integral_constant, etc.
00114 
00115 _START_GOOGLE_NAMESPACE_
00116 
00117 using STL_NAMESPACE::pair;
00118 
00119 // Hashtable class, used to implement the hashed associative containers
00120 // hash_set and hash_map.
00121 
00122 // Value: what is stored in the table (each bucket is a Value).
00123 // Key: something in a 1-to-1 correspondence to a Value, that can be used
00124 //      to search for a Value in the table (find() takes a Key).
00125 // HashFcn: Takes a Key and returns an integer, the more unique the better.
00126 // ExtractKey: given a Value, returns the unique Key associated with it.
00127 //             Must inherit from unary_function, or at least have a
00128 //             result_type enum indicating the return type of operator().
00129 // SetKey: given a Value* and a Key, modifies the value such that
00130 //         ExtractKey(value) == key.  We guarantee this is only called
00131 //         with key == deleted_key or key == empty_key.
00132 // EqualKey: Given two Keys, says whether they are the same (that is,
00133 //           if they are both associated with the same Value).
00134 // Alloc: STL allocator to use to allocate memory.
00135 
00136 template <class Value, class Key, class HashFcn,
00137           class ExtractKey, class SetKey, class EqualKey, class Alloc>
00138 class dense_hashtable;
00139 
00140 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
00141 struct dense_hashtable_iterator;
00142 
00143 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
00144 struct dense_hashtable_const_iterator;
00145 
00146 // We're just an array, but we need to skip over empty and deleted elements
00147 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
00148 struct dense_hashtable_iterator {
00149  private:
00150   typedef typename A::template rebind<V>::other value_alloc_type;
00151 
00152  public:
00153   typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A>       iterator;
00154   typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
00155 
00156   typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
00157   typedef V value_type;
00158   typedef typename value_alloc_type::difference_type difference_type;
00159   typedef typename value_alloc_type::size_type size_type;
00160   typedef typename value_alloc_type::reference reference;
00161   typedef typename value_alloc_type::pointer pointer;
00162 
00163   // "Real" constructor and default constructor
00164   dense_hashtable_iterator(const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
00165                            pointer it, pointer it_end, bool advance)
00166     : ht(h), pos(it), end(it_end)   {
00167     if (advance)  advance_past_empty_and_deleted();
00168   }
00169   dense_hashtable_iterator() { }
00170   // The default destructor is fine; we don't define one
00171   // The default operator= is fine; we don't define one
00172 
00173   // Happy dereferencer
00174   reference operator*() const { return *pos; }
00175   pointer operator->() const { return &(operator*()); }
00176 
00177   // Arithmetic.  The only hard part is making sure that
00178   // we're not on an empty or marked-deleted array element
00179   void advance_past_empty_and_deleted() {
00180     while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
00181       ++pos;
00182   }
00183   iterator& operator++()   {
00184     assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
00185   }
00186   iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
00187 
00188   // Comparison.
00189   bool operator==(const iterator& it) const { return pos == it.pos; }
00190   bool operator!=(const iterator& it) const { return pos != it.pos; }
00191 
00192 
00193   // The actual data
00194   const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht;
00195   pointer pos, end;
00196 };
00197 
00198 
00199 // Now do it all again, but with const-ness!
00200 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
00201 struct dense_hashtable_const_iterator {
00202  private:
00203   typedef typename A::template rebind<V>::other value_alloc_type;
00204 
00205  public:
00206   typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A>       iterator;
00207   typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
00208 
00209   typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
00210   typedef V value_type;
00211   typedef typename value_alloc_type::difference_type difference_type;
00212   typedef typename value_alloc_type::size_type size_type;
00213   typedef typename value_alloc_type::const_reference reference;
00214   typedef typename value_alloc_type::const_pointer pointer;
00215 
00216   // "Real" constructor and default constructor
00217   dense_hashtable_const_iterator(
00218       const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
00219       pointer it, pointer it_end, bool advance)
00220     : ht(h), pos(it), end(it_end)   {
00221     if (advance)  advance_past_empty_and_deleted();
00222   }
00223   dense_hashtable_const_iterator()
00224     : ht(NULL), pos(pointer()), end(pointer()) { }
00225   // This lets us convert regular iterators to const iterators
00226   dense_hashtable_const_iterator(const iterator &it)
00227     : ht(it.ht), pos(it.pos), end(it.end) { }
00228   // The default destructor is fine; we don't define one
00229   // The default operator= is fine; we don't define one
00230 
00231   // Happy dereferencer
00232   reference operator*() const { return *pos; }
00233   pointer operator->() const { return &(operator*()); }
00234 
00235   // Arithmetic.  The only hard part is making sure that
00236   // we're not on an empty or marked-deleted array element
00237   void advance_past_empty_and_deleted() {
00238     while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
00239       ++pos;
00240   }
00241   const_iterator& operator++()   {
00242     assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
00243   }
00244   const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; }
00245 
00246   // Comparison.
00247   bool operator==(const const_iterator& it) const { return pos == it.pos; }
00248   bool operator!=(const const_iterator& it) const { return pos != it.pos; }
00249 
00250 
00251   // The actual data
00252   const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht;
00253   pointer pos, end;
00254 };
00255 
00256 template <class Value, class Key, class HashFcn,
00257           class ExtractKey, class SetKey, class EqualKey, class Alloc>
00258 class dense_hashtable {
00259  private:
00260   typedef typename Alloc::template rebind<Value>::other value_alloc_type;
00261 
00262  public:
00263   typedef Key key_type;
00264   typedef Value value_type;
00265   typedef HashFcn hasher;
00266   typedef EqualKey key_equal;
00267   typedef Alloc allocator_type;
00268 
00269   typedef typename value_alloc_type::size_type size_type;
00270   typedef typename value_alloc_type::difference_type difference_type;
00271   typedef typename value_alloc_type::reference reference;
00272   typedef typename value_alloc_type::const_reference const_reference;
00273   typedef typename value_alloc_type::pointer pointer;
00274   typedef typename value_alloc_type::const_pointer const_pointer;
00275   typedef dense_hashtable_iterator<Value, Key, HashFcn,
00276                                    ExtractKey, SetKey, EqualKey, Alloc>
00277   iterator;
00278 
00279   typedef dense_hashtable_const_iterator<Value, Key, HashFcn,
00280                                          ExtractKey, SetKey, EqualKey, Alloc>
00281   const_iterator;
00282 
00283   // These come from tr1.  For us they're the same as regular iterators.
00284   typedef iterator local_iterator;
00285   typedef const_iterator const_local_iterator;
00286 
00287   // How full we let the table get before we resize, by default.
00288   // Knuth says .8 is good -- higher causes us to probe too much,
00289   // though it saves memory.
00290   static const int HT_OCCUPANCY_PCT; // = 50 (out of 100)
00291 
00292   // How empty we let the table get before we resize lower, by default.
00293   // (0.0 means never resize lower.)
00294   // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
00295   static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
00296 
00297   // Minimum size we're willing to let hashtables be.
00298   // Must be a power of two, and at least 4.
00299   // Note, however, that for a given hashtable, the initial size is a
00300   // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
00301   static const size_type HT_MIN_BUCKETS = 4;
00302 
00303   // By default, if you don't specify a hashtable size at
00304   // construction-time, we use this size.  Must be a power of two, and
00305   // at least HT_MIN_BUCKETS.
00306   static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
00307 
00308   // ITERATOR FUNCTIONS
00309   iterator begin()             { return iterator(this, table,
00310                                                  table + num_buckets, true); }
00311   iterator end()               { return iterator(this, table + num_buckets,
00312                                                  table + num_buckets, true); }
00313   const_iterator begin() const { return const_iterator(this, table,
00314                                                        table+num_buckets,true);}
00315   const_iterator end() const   { return const_iterator(this, table + num_buckets,
00316                                                        table+num_buckets,true);}
00317 
00318   // These come from tr1 unordered_map.  They iterate over 'bucket' n.
00319   // We'll just consider bucket n to be the n-th element of the table.
00320   local_iterator begin(size_type i) {
00321     return local_iterator(this, table + i, table + i+1, false);
00322   }
00323   local_iterator end(size_type i) {
00324     local_iterator it = begin(i);
00325     if (!test_empty(i) && !test_deleted(i))
00326       ++it;
00327     return it;
00328   }
00329   const_local_iterator begin(size_type i) const {
00330     return const_local_iterator(this, table + i, table + i+1, false);
00331   }
00332   const_local_iterator end(size_type i) const {
00333     const_local_iterator it = begin(i);
00334     if (!test_empty(i) && !test_deleted(i))
00335       ++it;
00336     return it;
00337   }
00338 
00339   // ACCESSOR FUNCTIONS for the things we templatize on, basically
00340   hasher hash_funct() const               { return settings; }
00341   key_equal key_eq() const                { return key_info; }
00342   allocator_type get_allocator() const {
00343     return allocator_type(val_info);
00344   }
00345 
00346   // Accessor function for statistics gathering.
00347   int num_table_copies() const { return settings.num_ht_copies(); }
00348 
00349  private:
00350   // Annoyingly, we can't copy values around, because they might have
00351   // const components (they're probably pair<const X, Y>).  We use
00352   // explicit destructor invocation and placement new to get around
00353   // this.  Arg.
00354   void set_value(pointer dst, const_reference src) {
00355     dst->~value_type();   // delete the old value, if any
00356     new(dst) value_type(src);
00357   }
00358 
00359   void destroy_buckets(size_type first, size_type last) {
00360     for ( ; first != last; ++first)
00361       table[first].~value_type();
00362   }
00363 
00364   // DELETE HELPER FUNCTIONS
00365   // This lets the user describe a key that will indicate deleted
00366   // table entries.  This key should be an "impossible" entry --
00367   // if you try to insert it for real, you won't be able to retrieve it!
00368   // (NB: while you pass in an entire value, only the key part is looked
00369   // at.  This is just because I don't know how to assign just a key.)
00370  private:
00371   void squash_deleted() {           // gets rid of any deleted entries we have
00372     if ( num_deleted ) {            // get rid of deleted before writing
00373       dense_hashtable tmp(*this);   // copying will get rid of deleted
00374       swap(tmp);                    // now we are tmp
00375     }
00376     assert(num_deleted == 0);
00377   }
00378 
00379   bool test_deleted_key(const key_type& key) const {
00380     // The num_deleted test is crucial for read(): after read(), the ht values
00381     // are garbage, and we don't want to think some of them are deleted.
00382     // Invariant: !use_deleted implies num_deleted is 0.
00383     assert(settings.use_deleted() || num_deleted == 0);
00384     return num_deleted > 0 && equals(key_info.delkey, key);
00385   }
00386 
00387  public:
00388   void set_deleted_key(const key_type &key) {
00389     // the empty indicator (if specified) and the deleted indicator
00390     // must be different
00391     assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
00392            && "Passed the empty-key to set_deleted_key");
00393     // It's only safe to change what "deleted" means if we purge deleted guys
00394     squash_deleted();
00395     settings.set_use_deleted(true);
00396     key_info.delkey = key;
00397   }
00398   void clear_deleted_key() {
00399     squash_deleted();
00400     settings.set_use_deleted(false);
00401   }
00402   key_type deleted_key() const {
00403     assert(settings.use_deleted()
00404            && "Must set deleted key before calling deleted_key");
00405     return key_info.delkey;
00406   }
00407 
00408   // These are public so the iterators can use them
00409   // True if the item at position bucknum is "deleted" marker
00410   bool test_deleted(size_type bucknum) const {
00411     return test_deleted_key(get_key(table[bucknum]));
00412   }
00413   bool test_deleted(const iterator &it) const {
00414     return test_deleted_key(get_key(*it));
00415   }
00416   bool test_deleted(const const_iterator &it) const {
00417     return test_deleted_key(get_key(*it));
00418   }
00419 
00420  private:
00421   // Set it so test_deleted is true.  true if object didn't used to be deleted.
00422   bool set_deleted(iterator &it) {
00423     assert(settings.use_deleted());
00424     bool retval = !test_deleted(it);
00425     // &* converts from iterator to value-type.
00426     set_key(&(*it), key_info.delkey);
00427     return retval;
00428   }
00429   // Set it so test_deleted is false.  true if object used to be deleted.
00430   bool clear_deleted(iterator &it) {
00431     assert(settings.use_deleted());
00432     // Happens automatically when we assign something else in its place.
00433     return test_deleted(it);
00434   }
00435 
00436   // We also allow to set/clear the deleted bit on a const iterator.
00437   // We allow a const_iterator for the same reason you can delete a
00438   // const pointer: it's convenient, and semantically you can't use
00439   // 'it' after it's been deleted anyway, so its const-ness doesn't
00440   // really matter.
00441   bool set_deleted(const_iterator &it) {
00442     assert(settings.use_deleted());
00443     bool retval = !test_deleted(it);
00444     set_key(const_cast<pointer>(&(*it)), key_info.delkey);
00445     return retval;
00446   }
00447   // Set it so test_deleted is false.  true if object used to be deleted.
00448   bool clear_deleted(const_iterator &it) {
00449     assert(settings.use_deleted());
00450     return test_deleted(it);
00451   }
00452 
00453   // EMPTY HELPER FUNCTIONS
00454   // This lets the user describe a key that will indicate empty (unused)
00455   // table entries.  This key should be an "impossible" entry --
00456   // if you try to insert it for real, you won't be able to retrieve it!
00457   // (NB: while you pass in an entire value, only the key part is looked
00458   // at.  This is just because I don't know how to assign just a key.)
00459  public:
00460   // These are public so the iterators can use them
00461   // True if the item at position bucknum is "empty" marker
00462   bool test_empty(size_type bucknum) const {
00463     assert(settings.use_empty());  // we always need to know what's empty!
00464     return equals(get_key(val_info.emptyval), get_key(table[bucknum]));
00465   }
00466   bool test_empty(const iterator &it) const {
00467     assert(settings.use_empty());  // we always need to know what's empty!
00468     return equals(get_key(val_info.emptyval), get_key(*it));
00469   }
00470   bool test_empty(const const_iterator &it) const {
00471     assert(settings.use_empty());  // we always need to know what's empty!
00472     return equals(get_key(val_info.emptyval), get_key(*it));
00473   }
00474 
00475  private:
00476   void fill_range_with_empty(pointer table_start, pointer table_end) {
00477     STL_NAMESPACE::uninitialized_fill(table_start, table_end, val_info.emptyval);
00478   }
00479 
00480  public:
00481   // TODO(csilvers): change all callers of this to pass in a key instead,
00482   //                 and take a const key_type instead of const value_type.
00483   void set_empty_key(const_reference val) {
00484     // Once you set the empty key, you can't change it
00485     assert(!settings.use_empty() && "Calling set_empty_key multiple times");
00486     // The deleted indicator (if specified) and the empty indicator
00487     // must be different.
00488     assert((!settings.use_deleted() || !equals(get_key(val), key_info.delkey))
00489            && "Setting the empty key the same as the deleted key");
00490     settings.set_use_empty(true);
00491     set_value(&val_info.emptyval, val);
00492 
00493     assert(!table);                  // must set before first use
00494     // num_buckets was set in constructor even though table was NULL
00495     table = val_info.allocate(num_buckets);
00496     assert(table);
00497     fill_range_with_empty(table, table + num_buckets);
00498   }
00499   // TODO(sjackman): return a key_type rather than a value_type
00500   value_type empty_key() const {
00501     assert(settings.use_empty());
00502     return val_info.emptyval;
00503   }
00504 
00505   // FUNCTIONS CONCERNING SIZE
00506  public:
00507   size_type size() const      { return num_elements - num_deleted; }
00508   size_type max_size() const  { return val_info.max_size(); }
00509   bool empty() const          { return size() == 0; }
00510   size_type bucket_count() const      { return num_buckets; }
00511   size_type max_bucket_count() const  { return max_size(); }
00512   size_type nonempty_bucket_count() const { return num_elements; }
00513   // These are tr1 methods.  Their idea of 'bucket' doesn't map well to
00514   // what we do.  We just say every bucket has 0 or 1 items in it.
00515   size_type bucket_size(size_type i) const {
00516     return begin(i) == end(i) ? 0 : 1;
00517   }
00518 
00519  private:
00520   // Because of the above, size_type(-1) is never legal; use it for errors
00521   static const size_type ILLEGAL_BUCKET = size_type(-1);
00522 
00523   // Used after a string of deletes.  Returns true if we actually shrunk.
00524   // TODO(csilvers): take a delta so we can take into account inserts
00525   // done after shrinking.  Maybe make part of the Settings class?
00526   bool maybe_shrink() {
00527     assert(num_elements >= num_deleted);
00528     assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
00529     assert(bucket_count() >= HT_MIN_BUCKETS);
00530     bool retval = false;
00531 
00532     // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
00533     // we'll never shrink until you get relatively big, and we'll never
00534     // shrink below HT_DEFAULT_STARTING_BUCKETS.  Otherwise, something
00535     // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
00536     // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
00537     const size_type num_remain = num_elements - num_deleted;
00538     const size_type shrink_threshold = settings.shrink_threshold();
00539     if (shrink_threshold > 0 && num_remain < shrink_threshold &&
00540         bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
00541       const float shrink_factor = settings.shrink_factor();
00542       size_type sz = bucket_count() / 2;    // find how much we should shrink
00543       while (sz > HT_DEFAULT_STARTING_BUCKETS &&
00544              num_remain < sz * shrink_factor) {
00545         sz /= 2;                            // stay a power of 2
00546       }
00547       dense_hashtable tmp(*this, sz);       // Do the actual resizing
00548       swap(tmp);                            // now we are tmp
00549       retval = true;
00550     }
00551     settings.set_consider_shrink(false);    // because we just considered it
00552     return retval;
00553   }
00554 
00555   // We'll let you resize a hashtable -- though this makes us copy all!
00556   // When you resize, you say, "make it big enough for this many more elements"
00557   // Returns true if we actually resized, false if size was already ok.
00558   bool resize_delta(size_type delta) {
00559     bool did_resize = false;
00560     if ( settings.consider_shrink() ) {  // see if lots of deletes happened
00561       if ( maybe_shrink() )
00562         did_resize = true;
00563     }
00564     if (num_elements >= (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
00565       throw std::length_error("resize overflow");
00566     if ( bucket_count() >= HT_MIN_BUCKETS &&
00567          (num_elements + delta) <= settings.enlarge_threshold() )
00568       return did_resize;                          // we're ok as we are
00569 
00570     // Sometimes, we need to resize just to get rid of all the
00571     // "deleted" buckets that are clogging up the hashtable.  So when
00572     // deciding whether to resize, count the deleted buckets (which
00573     // are currently taking up room).  But later, when we decide what
00574     // size to resize to, *don't* count deleted buckets, since they
00575     // get discarded during the resize.
00576     const size_type needed_size = settings.min_buckets(num_elements + delta, 0);
00577     if ( needed_size <= bucket_count() )      // we have enough buckets
00578       return did_resize;
00579 
00580     size_type resize_to =
00581       settings.min_buckets(num_elements - num_deleted + delta, bucket_count());
00582 
00583     if (resize_to < needed_size &&    // may double resize_to
00584         resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
00585       // This situation means that we have enough deleted elements,
00586       // that once we purge them, we won't actually have needed to
00587       // grow.  But we may want to grow anyway: if we just purge one
00588       // element, say, we'll have to grow anyway next time we
00589       // insert.  Might as well grow now, since we're already going
00590       // through the trouble of copying (in order to purge the
00591       // deleted elements).
00592       const size_type target =
00593           static_cast<size_type>(settings.shrink_size(resize_to*2));
00594       if (num_elements - num_deleted + delta >= target) {
00595         // Good, we won't be below the shrink threshhold even if we double.
00596         resize_to *= 2;
00597       }
00598     }
00599     dense_hashtable tmp(*this, resize_to);
00600     swap(tmp);                             // now we are tmp
00601     return true;
00602   }
00603 
00604   // We require table be not-NULL and empty before calling this.
00605   void resize_table(size_type /*old_size*/, size_type new_size,
00606                     true_type) {
00607     table = val_info.realloc_or_die(table, new_size);
00608   }
00609 
00610   void resize_table(size_type old_size, size_type new_size, false_type) {
00611     val_info.deallocate(table, old_size);
00612     table = val_info.allocate(new_size);
00613   }
00614 
00615   // Used to actually do the rehashing when we grow/shrink a hashtable
00616   void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted) {
00617     clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted));
00618 
00619     // We use a normal iterator to get non-deleted bcks from ht
00620     // We could use insert() here, but since we know there are
00621     // no duplicates and no deleted items, we can be more efficient
00622     assert((bucket_count() & (bucket_count()-1)) == 0);      // a power of two
00623     for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
00624       size_type num_probes = 0;              // how many times we've probed
00625       size_type bucknum;
00626       const size_type bucket_count_minus_one = bucket_count() - 1;
00627       for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
00628            !test_empty(bucknum);                               // not empty
00629            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
00630         ++num_probes;
00631         assert(num_probes < bucket_count()
00632                && "Hashtable is full: an error in key_equal<> or hash<>");
00633       }
00634       set_value(&table[bucknum], *it);       // copies the value to here
00635       num_elements++;
00636     }
00637     settings.inc_num_ht_copies();
00638   }
00639 
00640   // Required by the spec for hashed associative container
00641  public:
00642   // Though the docs say this should be num_buckets, I think it's much
00643   // more useful as num_elements.  As a special feature, calling with
00644   // req_elements==0 will cause us to shrink if we can, saving space.
00645   void resize(size_type req_elements) {       // resize to this or larger
00646     if ( settings.consider_shrink() || req_elements == 0 )
00647       maybe_shrink();
00648     if ( req_elements > num_elements )
00649       resize_delta(req_elements - num_elements);
00650   }
00651 
00652   // Get and change the value of shrink_factor and enlarge_factor.  The
00653   // description at the beginning of this file explains how to choose
00654   // the values.  Setting the shrink parameter to 0.0 ensures that the
00655   // table never shrinks.
00656   void get_resizing_parameters(float* shrink, float* grow) const {
00657     *shrink = settings.shrink_factor();
00658     *grow = settings.enlarge_factor();
00659   }
00660   void set_resizing_parameters(float shrink, float grow) {
00661     settings.set_resizing_parameters(shrink, grow);
00662     settings.reset_thresholds(bucket_count());
00663   }
00664 
00665   // CONSTRUCTORS -- as required by the specs, we take a size,
00666   // but also let you specify a hashfunction, key comparator,
00667   // and key extractor.  We also define a copy constructor and =.
00668   // DESTRUCTOR -- needs to free the table
00669   explicit dense_hashtable(size_type expected_max_items_in_table = 0,
00670                            const HashFcn& hf = HashFcn(),
00671                            const EqualKey& eql = EqualKey(),
00672                            const ExtractKey& ext = ExtractKey(),
00673                            const SetKey& set = SetKey(),
00674                            const Alloc& alloc = Alloc())
00675       : settings(hf),
00676         key_info(ext, set, eql),
00677         num_deleted(0),
00678         num_elements(0),
00679         num_buckets(expected_max_items_in_table == 0
00680                     ? HT_DEFAULT_STARTING_BUCKETS
00681                     : settings.min_buckets(expected_max_items_in_table, 0)),
00682         val_info(alloc_impl<value_alloc_type>(alloc)),
00683         table(NULL) {
00684     // table is NULL until emptyval is set.  However, we set num_buckets
00685     // here so we know how much space to allocate once emptyval is set
00686     settings.reset_thresholds(bucket_count());
00687   }
00688 
00689   // As a convenience for resize(), we allow an optional second argument
00690   // which lets you make this new hashtable a different size than ht
00691   dense_hashtable(const dense_hashtable& ht,
00692                   size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
00693       : settings(ht.settings),
00694         key_info(ht.key_info),
00695         num_deleted(0),
00696         num_elements(0),
00697         num_buckets(0),
00698         val_info(ht.val_info),
00699         table(NULL) {
00700     if (!ht.settings.use_empty()) {
00701       // If use_empty isn't set, copy_from will crash, so we do our own copying.
00702       assert(ht.empty());
00703       num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
00704       settings.reset_thresholds(bucket_count());
00705       return;
00706     }
00707     settings.reset_thresholds(bucket_count());
00708     copy_from(ht, min_buckets_wanted);   // copy_from() ignores deleted entries
00709   }
00710 
00711   dense_hashtable& operator= (const dense_hashtable& ht) {
00712     if (&ht == this)  return *this;        // don't copy onto ourselves
00713     if (!ht.settings.use_empty()) {
00714       assert(ht.empty());
00715       dense_hashtable empty_table(ht);  // empty table with ht's thresholds
00716       this->swap(empty_table);
00717       return *this;
00718     }
00719     settings = ht.settings;
00720     key_info = ht.key_info;
00721     set_value(&val_info.emptyval, ht.val_info.emptyval);
00722     // copy_from() calls clear and sets num_deleted to 0 too
00723     copy_from(ht, HT_MIN_BUCKETS);
00724     // we purposefully don't copy the allocator, which may not be copyable
00725     return *this;
00726   }
00727 
00728   ~dense_hashtable() {
00729     if (table) {
00730       destroy_buckets(0, num_buckets);
00731       val_info.deallocate(table, num_buckets);
00732     }
00733   }
00734 
00735   // Many STL algorithms use swap instead of copy constructors
00736   void swap(dense_hashtable& ht) {
00737     STL_NAMESPACE::swap(settings, ht.settings);
00738     STL_NAMESPACE::swap(key_info, ht.key_info);
00739     STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
00740     STL_NAMESPACE::swap(num_elements, ht.num_elements);
00741     STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
00742     { value_type tmp;     // for annoying reasons, swap() doesn't work
00743       set_value(&tmp, val_info.emptyval);
00744       set_value(&val_info.emptyval, ht.val_info.emptyval);
00745       set_value(&ht.val_info.emptyval, tmp);
00746     }
00747     STL_NAMESPACE::swap(table, ht.table);
00748     settings.reset_thresholds(bucket_count());  // this also resets consider_shrink
00749     ht.settings.reset_thresholds(bucket_count());
00750     // we purposefully don't swap the allocator, which may not be swap-able
00751   }
00752 
00753  private:
00754   void clear_to_size(size_type new_num_buckets) {
00755     if (!table) {
00756       table = val_info.allocate(new_num_buckets);
00757     } else {
00758       destroy_buckets(0, num_buckets);
00759       if (new_num_buckets != num_buckets) {   // resize, if necessary
00760         typedef integral_constant<bool,
00761             is_same<value_alloc_type,
00762                     libc_allocator_with_realloc<value_type> >::value>
00763             realloc_ok;
00764         resize_table(num_buckets, new_num_buckets, realloc_ok());
00765       }
00766     }
00767     assert(table);
00768     fill_range_with_empty(table, table + new_num_buckets);
00769     num_elements = 0;
00770     num_deleted = 0;
00771     num_buckets = new_num_buckets;          // our new size
00772     settings.reset_thresholds(bucket_count());
00773   }
00774 
00775  public:
00776   // It's always nice to be able to clear a table without deallocating it
00777   void clear() {
00778     // If the table is already empty, and the number of buckets is
00779     // already as we desire, there's nothing to do.
00780     const size_type new_num_buckets = settings.min_buckets(0, 0);
00781     if (num_elements == 0 && new_num_buckets == num_buckets) {
00782       return;
00783     }
00784     clear_to_size(new_num_buckets);
00785   }
00786 
00787   // Clear the table without resizing it.
00788   // Mimicks the stl_hashtable's behaviour when clear()-ing in that it
00789   // does not modify the bucket count
00790   void clear_no_resize() {
00791     if (num_elements > 0) {
00792       assert(table);
00793       destroy_buckets(0, num_buckets);
00794       fill_range_with_empty(table, table + num_buckets);
00795     }
00796     // don't consider to shrink before another erase()
00797     settings.reset_thresholds(bucket_count());
00798     num_elements = 0;
00799     num_deleted = 0;
00800   }
00801 
00802   // LOOKUP ROUTINES
00803  private:
00804   // Returns a pair of positions: 1st where the object is, 2nd where
00805   // it would go if you wanted to insert it.  1st is ILLEGAL_BUCKET
00806   // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
00807   // Note: because of deletions where-to-insert is not trivial: it's the
00808   // first deleted bucket we see, as long as we don't find the key later
00809   pair<size_type, size_type> find_position(const key_type &key) const {
00810     size_type num_probes = 0;              // how many times we've probed
00811     const size_type bucket_count_minus_one = bucket_count() - 1;
00812     size_type bucknum = hash(key) & bucket_count_minus_one;
00813     size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
00814     while ( 1 ) {                          // probe until something happens
00815       if ( test_empty(bucknum) ) {         // bucket is empty
00816         if ( insert_pos == ILLEGAL_BUCKET )   // found no prior place to insert
00817           return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
00818         else
00819           return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
00820 
00821       } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
00822         if ( insert_pos == ILLEGAL_BUCKET )
00823           insert_pos = bucknum;
00824 
00825       } else if ( equals(key, get_key(table[bucknum])) ) {
00826         return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
00827       }
00828       ++num_probes;                        // we're doing another probe
00829       bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
00830       assert(num_probes < bucket_count()
00831              && "Hashtable is full: an error in key_equal<> or hash<>");
00832     }
00833   }
00834 
00835  public:
00836   iterator find(const key_type& key) {
00837     if ( size() == 0 ) return end();
00838     pair<size_type, size_type> pos = find_position(key);
00839     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
00840       return end();
00841     else
00842       return iterator(this, table + pos.first, table + num_buckets, false);
00843   }
00844 
00845   const_iterator find(const key_type& key) const {
00846     if ( size() == 0 ) return end();
00847     pair<size_type, size_type> pos = find_position(key);
00848     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
00849       return end();
00850     else
00851       return const_iterator(this, table + pos.first, table+num_buckets, false);
00852   }
00853 
00854   // This is a tr1 method: the bucket a given key is in, or what bucket
00855   // it would be put in, if it were to be inserted.  Shrug.
00856   size_type bucket(const key_type& key) const {
00857     pair<size_type, size_type> pos = find_position(key);
00858     return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
00859   }
00860 
00861   // Counts how many elements have key key.  For maps, it's either 0 or 1.
00862   size_type count(const key_type &key) const {
00863     pair<size_type, size_type> pos = find_position(key);
00864     return pos.first == ILLEGAL_BUCKET ? 0 : 1;
00865   }
00866 
00867   // Likewise, equal_range doesn't really make sense for us.  Oh well.
00868   pair<iterator,iterator> equal_range(const key_type& key) {
00869     iterator pos = find(key);      // either an iterator or end
00870     if (pos == end()) {
00871       return pair<iterator,iterator>(pos, pos);
00872     } else {
00873       const iterator startpos = pos++;
00874       return pair<iterator,iterator>(startpos, pos);
00875     }
00876   }
00877   pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
00878     const_iterator pos = find(key);      // either an iterator or end
00879     if (pos == end()) {
00880       return pair<const_iterator,const_iterator>(pos, pos);
00881     } else {
00882       const const_iterator startpos = pos++;
00883       return pair<const_iterator,const_iterator>(startpos, pos);
00884     }
00885   }
00886 
00887 
00888   // INSERTION ROUTINES
00889  private:
00890   // Private method used by insert_noresize and find_or_insert.
00891   iterator insert_at(const_reference obj, size_type pos) {
00892     if (size() >= max_size())
00893       throw std::length_error("insert overflow");
00894     if ( test_deleted(pos) ) {      // just replace if it's been del.
00895       // shrug: shouldn't need to be const.
00896       const_iterator delpos(this, table + pos, table + num_buckets, false);
00897       clear_deleted(delpos);
00898       assert( num_deleted > 0);
00899       --num_deleted;                // used to be, now it isn't
00900     } else {
00901       ++num_elements;               // replacing an empty bucket
00902     }
00903     set_value(&table[pos], obj);
00904     return iterator(this, table + pos, table + num_buckets, false);
00905   }
00906 
00907   // If you know *this is big enough to hold obj, use this routine
00908   pair<iterator, bool> insert_noresize(const_reference obj) {
00909     // First, double-check we're not inserting delkey or emptyval
00910     assert((!settings.use_empty() || !equals(get_key(obj),
00911                                              get_key(val_info.emptyval)))
00912            && "Inserting the empty key");
00913     assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
00914            && "Inserting the deleted key");
00915     const pair<size_type,size_type> pos = find_position(get_key(obj));
00916     if ( pos.first != ILLEGAL_BUCKET) {      // object was already there
00917       return pair<iterator,bool>(iterator(this, table + pos.first,
00918                                           table + num_buckets, false),
00919                                  false);          // false: we didn't insert
00920     } else {                                 // pos.second says where to put it
00921       return pair<iterator,bool>(insert_at(obj, pos.second), true);
00922     }
00923   }
00924 
00925   // Specializations of insert(it, it) depending on the power of the iterator:
00926   // (1) Iterator supports operator-, resize before inserting
00927   template <class ForwardIterator>
00928   void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
00929     size_t dist = STL_NAMESPACE::distance(f, l);
00930     if (dist >= (std::numeric_limits<size_type>::max)())
00931       throw std::length_error("insert-range overflow");
00932     resize_delta(static_cast<size_type>(dist));
00933     for ( ; dist > 0; --dist, ++f) {
00934       insert_noresize(*f);
00935     }
00936   }
00937 
00938   // (2) Arbitrary iterator, can't tell how much to resize
00939   template <class InputIterator>
00940   void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
00941     for ( ; f != l; ++f)
00942       insert(*f);
00943   }
00944 
00945  public:
00946   // This is the normal insert routine, used by the outside world
00947   pair<iterator, bool> insert(const_reference obj) {
00948     resize_delta(1);                      // adding an object, grow if need be
00949     return insert_noresize(obj);
00950   }
00951 
00952   // When inserting a lot at a time, we specialize on the type of iterator
00953   template <class InputIterator>
00954   void insert(InputIterator f, InputIterator l) {
00955     // specializes on iterator type
00956     insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
00957   }
00958 
00959   // DefaultValue is a functor that takes a key and returns a value_type
00960   // representing the default value to be inserted if none is found.
00961   template <class DefaultValue>
00962   value_type& find_or_insert(const key_type& key) {
00963     // First, double-check we're not inserting emptykey or delkey
00964     assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
00965            && "Inserting the empty key");
00966     assert((!settings.use_deleted() || !equals(key, key_info.delkey))
00967            && "Inserting the deleted key");
00968     const pair<size_type,size_type> pos = find_position(key);
00969     DefaultValue default_value;
00970     if ( pos.first != ILLEGAL_BUCKET) {  // object was already there
00971       return table[pos.first];
00972     } else if (resize_delta(1)) {        // needed to rehash to make room
00973       // Since we resized, we can't use pos, so recalculate where to insert.
00974       return *insert_noresize(default_value(key)).first;
00975     } else {                             // no need to rehash, insert right here
00976       return *insert_at(default_value(key), pos.second);
00977     }
00978   }
00979 
00980   // DELETION ROUTINES
00981   size_type erase(const key_type& key) {
00982     // First, double-check we're not trying to erase delkey or emptyval.
00983     assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
00984            && "Erasing the empty key");
00985     assert((!settings.use_deleted() || !equals(key, key_info.delkey))
00986            && "Erasing the deleted key");
00987     const_iterator pos = find(key);   // shrug: shouldn't need to be const
00988     if ( pos != end() ) {
00989       assert(!test_deleted(pos));  // or find() shouldn't have returned it
00990       set_deleted(pos);
00991       ++num_deleted;
00992       settings.set_consider_shrink(true); // will think about shrink after next insert
00993       return 1;                    // because we deleted one thing
00994     } else {
00995       return 0;                    // because we deleted nothing
00996     }
00997   }
00998 
00999   // We return the iterator past the deleted item.
01000   void erase(iterator pos) {
01001     if ( pos == end() ) return;    // sanity check
01002     if ( set_deleted(pos) ) {      // true if object has been newly deleted
01003       ++num_deleted;
01004       settings.set_consider_shrink(true); // will think about shrink after next insert
01005     }
01006   }
01007 
01008   void erase(iterator f, iterator l) {
01009     for ( ; f != l; ++f) {
01010       if ( set_deleted(f)  )       // should always be true
01011         ++num_deleted;
01012     }
01013     settings.set_consider_shrink(true); // will think about shrink after next insert
01014   }
01015 
01016   // We allow you to erase a const_iterator just like we allow you to
01017   // erase an iterator.  This is in parallel to 'delete': you can delete
01018   // a const pointer just like a non-const pointer.  The logic is that
01019   // you can't use the object after it's erased anyway, so it doesn't matter
01020   // if it's const or not.
01021   void erase(const_iterator pos) {
01022     if ( pos == end() ) return;    // sanity check
01023     if ( set_deleted(pos) ) {      // true if object has been newly deleted
01024       ++num_deleted;
01025       settings.set_consider_shrink(true); // will think about shrink after next insert
01026     }
01027   }
01028   void erase(const_iterator f, const_iterator l) {
01029     for ( ; f != l; ++f) {
01030       if ( set_deleted(f)  )       // should always be true
01031         ++num_deleted;
01032     }
01033     settings.set_consider_shrink(true);   // will think about shrink after next insert
01034   }
01035 
01036 
01037   // COMPARISON
01038   bool operator==(const dense_hashtable& ht) const {
01039     if (size() != ht.size()) {
01040       return false;
01041     } else if (this == &ht) {
01042       return true;
01043     } else {
01044       // Iterate through the elements in "this" and see if the
01045       // corresponding element is in ht
01046       for ( const_iterator it = begin(); it != end(); ++it ) {
01047         const_iterator it2 = ht.find(get_key(*it));
01048         if ((it2 == ht.end()) || (*it != *it2)) {
01049           return false;
01050         }
01051       }
01052       return true;
01053     }
01054   }
01055   bool operator!=(const dense_hashtable& ht) const {
01056     return !(*this == ht);
01057   }
01058 
01059 
01060   // I/O
01061   // We support reading and writing hashtables to disk.  Alas, since
01062   // I don't know how to write a hasher or key_equal, you have to make
01063   // sure everything but the table is the same.  We compact before writing
01064   //
01065   // NOTE: These functions are currently TODO.  They've not been implemented.
01066   bool write_metadata(FILE * /*fp*/) {
01067     squash_deleted();           // so we don't have to worry about delkey
01068     return false;               // TODO
01069   }
01070 
01071   bool read_metadata(FILE* /*fp*/) {
01072     num_deleted = 0;            // since we got rid before writing
01073     assert(settings.use_empty() && "empty_key not set for read_metadata");
01074     if (table)  val_info.deallocate(table, num_buckets);  // we'll make our own
01075     // TODO: read magic number
01076     // TODO: read num_buckets
01077     settings.reset_thresholds(bucket_count());
01078     table = val_info.allocate(num_buckets);
01079     assert(table);
01080     fill_range_with_empty(table, table + num_buckets);
01081     // TODO: read num_elements
01082     for ( size_type i = 0; i < num_elements; ++i ) {
01083       // TODO: read bucket_num
01084       // TODO: set with non-empty, non-deleted value
01085     }
01086     return false;               // TODO
01087   }
01088 
01089   // If your keys and values are simple enough, we can write them to
01090   // disk for you.  "simple enough" means value_type is a POD type
01091   // that contains no pointers.  However, we don't try to normalize
01092   // endianness
01093   bool write_nopointer_data(FILE *fp) const {
01094     for ( const_iterator it = begin(); it != end(); ++it ) {
01095       // TODO: skip empty/deleted values
01096       if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
01097     }
01098     return false;
01099   }
01100 
01101   // When reading, we have to override the potential const-ness of *it
01102   bool read_nopointer_data(FILE *fp) {
01103     for ( iterator it = begin(); it != end(); ++it ) {
01104       // TODO: skip empty/deleted values
01105       if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
01106         return false;
01107     }
01108     return false;
01109   }
01110 
01111  private:
01112   template <class A>
01113   class alloc_impl : public A {
01114    public:
01115     typedef typename A::pointer pointer;
01116     typedef typename A::size_type size_type;
01117 
01118     // Convert a normal allocator to one that has realloc_or_die()
01119     alloc_impl(const A& a) : A(a) { }
01120 
01121     // realloc_or_die should only be used when using the default
01122     // allocator (libc_allocator_with_realloc).
01123     pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) {
01124       fprintf(stderr, "realloc_or_die is only supported for "
01125                       "libc_allocator_with_realloc");
01126       exit(1);
01127       return NULL;
01128     }
01129   };
01130 
01131   // A template specialization of alloc_impl for
01132   // libc_allocator_with_realloc that can handle realloc_or_die.
01133   template <class A>
01134   class alloc_impl<libc_allocator_with_realloc<A> >
01135       : public libc_allocator_with_realloc<A> {
01136    public:
01137     typedef typename libc_allocator_with_realloc<A>::pointer pointer;
01138     typedef typename libc_allocator_with_realloc<A>::size_type size_type;
01139 
01140     alloc_impl(const libc_allocator_with_realloc<A>& a)
01141         : libc_allocator_with_realloc<A>(a) { }
01142 
01143     pointer realloc_or_die(pointer ptr, size_type n) {
01144       pointer retval = this->reallocate(ptr, n);
01145       if (retval == NULL) {
01146         // We really should use PRIuS here, but I don't want to have to add
01147         // a whole new configure option, with concomitant macro namespace
01148         // pollution, just to print this (unlikely) error message.  So I cast.
01149         fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
01150                 "%lu elements for ptr %p",
01151                 static_cast<unsigned long>(n), ptr);
01152         exit(1);
01153       }
01154       return retval;
01155     }
01156   };
01157 
01158   // Package allocator with emptyval to eliminate memory needed for
01159   // the zero-size allocator.
01160   // If new fields are added to this class, we should add them to
01161   // operator= and swap.
01162   class ValInfo : public alloc_impl<value_alloc_type> {
01163    public:
01164     typedef typename alloc_impl<value_alloc_type>::value_type value_type;
01165 
01166     ValInfo(const alloc_impl<value_alloc_type>& a)
01167         : alloc_impl<value_alloc_type>(a), emptyval() { }
01168     ValInfo(const ValInfo& v)
01169         : alloc_impl<value_alloc_type>(v), emptyval(v.emptyval) { }
01170 
01171     value_type emptyval;    // which key marks unused entries
01172   };
01173 
01174 
01175   // Package functors with another class to eliminate memory needed for
01176   // zero-size functors.  Since ExtractKey and hasher's operator() might
01177   // have the same function signature, they must be packaged in
01178   // different classes.
01179   struct Settings :
01180       sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
01181     explicit Settings(const hasher& hf)
01182         : sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
01183             hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
01184   };
01185 
01186   // Packages ExtractKey and SetKey functors.
01187   class KeyInfo : public ExtractKey, public SetKey, public key_equal {
01188    public:
01189     KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
01190         : ExtractKey(ek),
01191           SetKey(sk),
01192           key_equal(eq) {
01193     }
01194 
01195     // We want to return the exact same type as ExtractKey: Key or const Key&
01196     typename ExtractKey::result_type get_key(const_reference v) const {
01197       return ExtractKey::operator()(v);
01198     }
01199     void set_key(pointer v, const key_type& k) const {
01200       SetKey::operator()(v, k);
01201     }
01202     bool equals(const key_type& a, const key_type& b) const {
01203       return key_equal::operator()(a, b);
01204     }
01205 
01206     // Which key marks deleted entries.
01207     // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
01208     typename remove_const<key_type>::type delkey;
01209   };
01210 
01211   // Utility functions to access the templated operators
01212   size_type hash(const key_type& v) const {
01213     return settings.hash(v);
01214   }
01215   bool equals(const key_type& a, const key_type& b) const {
01216     return key_info.equals(a, b);
01217   }
01218   typename ExtractKey::result_type get_key(const_reference v) const {
01219     return key_info.get_key(v);
01220   }
01221   void set_key(pointer v, const key_type& k) const {
01222     key_info.set_key(v, k);
01223   }
01224 
01225  private:
01226   // Actual data
01227   Settings settings;
01228   KeyInfo key_info;
01229 
01230   size_type num_deleted;  // how many occupied buckets are marked deleted
01231   size_type num_elements;
01232   size_type num_buckets;
01233   ValInfo val_info;       // holds emptyval, and also the allocator
01234   pointer table;
01235 };
01236 
01237 
01238 // We need a global swap as well
01239 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
01240 inline void swap(dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
01241                  dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &y) {
01242   x.swap(y);
01243 }
01244 
01245 #undef JUMP_
01246 
01247 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
01248 const typename dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
01249   dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
01250 
01251 // How full we let the table get before we resize.  Knuth says .8 is
01252 // good -- higher causes us to probe too much, though saves memory.
01253 // However, we go with .5, getting better performance at the cost of
01254 // more space (a trade-off densehashtable explicitly chooses to make).
01255 // Feel free to play around with different values, though.
01256 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
01257 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
01258 
01259 // How empty we let the table get before we resize lower.
01260 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
01261 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
01262 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
01263   = static_cast<int>(0.4 *
01264                      dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
01265 
01266 _END_GOOGLE_NAMESPACE_
01267 
01268 #endif /* _DENSEHASHTABLE_H_ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines