Tpetra Matrix/Vector Services Version of the Day
Tpetra_HashTable_def.hpp
00001 /*
00002 // @HEADER
00003 // ***********************************************************************
00004 //
00005 //          Tpetra: Templated Linear Algebra Services Package
00006 //                 Copyright (2008) Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ************************************************************************
00041 // @HEADER
00042 */
00043 
00044 #ifndef TPETRA_HASHTABLE_DEF_HPP_
00045 #define TPETRA_HASHTABLE_DEF_HPP_
00046 
00047 #ifdef DOXYGEN_USE_ONLY
00048   #include "Tpetra_HashTable_decl.hpp"
00049 #endif
00050 
00051 
00052 #include "MurmurHash3.hpp"
00053 
00054 
00055 namespace Tpetra
00056 {
00057 
00058 namespace Details
00059 {
00060 
00061 template<typename KeyType, typename ValueType>
00062 int
00063 HashTable<KeyType, ValueType>::hashFunc( const KeyType key ) {
00064 #ifdef TPETRA_USE_MURMUR_HASH
00065     uint32_t k;
00066     MurmurHash3_x86_32((void *)&key, sizeof(KeyType),
00067                           1, (void *)&k);
00068     return (int) (k%Size_);
00069 #else
00070     // Epetra's version of hash, using it as default as we have observed
00071     // this is much faster than murmur hash. However, this is not a good
00072     // hash function for all data sets. For our typical use case, this is good.
00073     // Use murmur if the maps are sparse.
00074     int intkey = (int) ((key & 0x000000007fffffffLL) +
00075        ((key & 0x7fffffff80000000LL) >> 31));
00076     return (int) ((Seed_ ^ intkey)%Size_);
00077 #endif
00078 }
00079 
00080 template<typename KeyType, typename ValueType>
00081 int
00082 HashTable<KeyType, ValueType>::getRecommendedSize( const int size ) {
00083    // A large list of prime numbers.
00084    // Based on a recommendation by Andres Valloud in hash forums.
00085    //  There are only enough primes here so that between any number N and 2*N,
00086    //  there will be at least about 8 to choose from (except the first 10).
00087    //  This is a balance between a small list of primes, and getting a
00088    //  collection size that doesn't waste too much space.  In addition,
00089    //  the primes in this table were chosen so that they do not divide
00090    //  256^k +- a, for 1<=k<=8, and -32<=a<=32.  This is so that
00091    //  using them as a modulo will not have a tendency to just throw away
00092    //  the most significant bits of the object's hash.  The primes (except the
00093    //  first ten) or not close to any power of two to avoid aliasing
00094    //  between hash functions based on bit manipulation and the moduli.
00095         int primes [ ] = {
00096         3, 7, 13, 23, 53, 97, 193, 389, 769, 1543,
00097         2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539,
00098         3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827
00099         , 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289
00100         , 12967 , 13649 , 14341 , 15013 , 15727
00101         , 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329
00102         , 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439
00103         , 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619
00104         , 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963
00105         , 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579
00106         , 201653 , 211741 , 221813 , 231893 , 241979 , 252079
00107         , 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457
00108         , 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609
00109         , 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239
00110         , 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869
00111         , 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253
00112         , 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739
00113         , 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503
00114         , 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469
00115         , 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033
00116         , 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729
00117         , 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861
00118         , 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661
00119         , 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529
00120         , 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327
00121         , 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099
00122         , 55924061 , 58161041 , 60397993 , 62634959 , 64871921
00123         , 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427
00124         , 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971
00125         , 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141
00126         , 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237
00127         , 232644089 , 241591943 , 250539763 , 259487603 , 268435399 };
00128 
00129     int hsize = primes[220] ;
00130     for (int i = 0 ; i < 221 ; i++)
00131     {
00132         if (size <= primes[i])
00133         {
00134             hsize = primes[i] ;
00135             break ;
00136         }
00137     }
00138 
00139     return hsize ;
00140 }
00141 
00142 template<typename KeyType, typename ValueType>
00143 HashTable<KeyType, ValueType>::
00144 HashTable( const int size, const unsigned int seed )
00145   : Container_(NULL),
00146     Seed_(seed)
00147 {
00148   TEUCHOS_TEST_FOR_EXCEPTION(size < 0, std::runtime_error,
00149                              "HashTable : ERROR, size cannot be less than zero");
00150 
00151   Size_ = getRecommendedSize(size);
00152   Container_ = new Node * [Size_];
00153   for( KeyType i = 0; i < Size_; ++i ) Container_[i] = NULL;
00154 #ifdef HAVE_TEUCHOS_DEBUG
00155   maxc_ = 0;
00156   nc_ = 0;
00157 #endif
00158 }
00159 
00160 template<typename KeyType, typename ValueType>
00161 HashTable<KeyType, ValueType>::
00162 HashTable( const HashTable & obj )
00163   : Container_(NULL),
00164     Size_(obj.Size_),
00165     Seed_(obj.Seed_)
00166 {
00167 #ifdef HAVE_TEUCHOS_DEBUG
00168   maxc_ = 0;
00169   nc_ = 0;
00170 #endif
00171   Container_ = new Node * [Size_];
00172   for( KeyType i = 0; i < Size_; ++i ) Container_[i] = NULL;
00173   for( KeyType i = 0; i < Size_; ++i ) {
00174     Node * ptr = obj.Container_[i];
00175     while( ptr ) { add( ptr->Key, ptr->Value ); ptr = ptr->Ptr; }
00176   }
00177 }
00178 
00179 template<typename KeyType, typename ValueType>
00180 HashTable<KeyType, ValueType>::~HashTable() {
00181   Node * ptr1;
00182   Node * ptr2;
00183   for( KeyType i = 0; i < Size_; ++i ) {
00184     ptr1 = Container_[i];
00185     while( ptr1 ) { ptr2 = ptr1; ptr1 = ptr1->Ptr; delete ptr2; }
00186   }
00187 
00188   delete [] Container_;
00189 }
00190 
00191 template<typename KeyType, typename ValueType>
00192 void
00193 HashTable<KeyType, ValueType>::
00194 add( const KeyType key, const ValueType value ) {
00195   int v = hashFunc(key);
00196   Node * n1 = Container_[v];
00197   Container_[v] = new Node(key,value,n1);
00198 }
00199 
00200 template<typename KeyType, typename ValueType>
00201 ValueType
00202 HashTable<KeyType, ValueType>::
00203 get( const KeyType key ) {
00204   Node * n = Container_[ hashFunc(key) ];
00205 
00206 #ifdef HAVE_TEUCHOS_DEBUG
00207   int k = 0;
00208 #endif
00209 
00210   while( n && (n->Key != key) ){
00211     n = n->Ptr;
00212 #ifdef HAVE_TEUCHOS_DEBUG
00213     ((k+1 > maxc_) ? maxc_ = k+1 : 0) ;
00214     k++;
00215 #endif
00216   }
00217 
00218 #ifdef HAVE_TEUCHOS_DEBUG
00219   if (k != 0) nc_++;
00220 #endif
00221   if( n ) return n->Value;
00222   else    return -1;
00223 }
00224 
00225 template <typename KeyType, typename ValueType>
00226 std::string HashTable<KeyType, ValueType>::description() const {
00227   std::ostringstream oss;
00228   oss << "HashTable<"
00229       << Teuchos::TypeNameTraits<KeyType>::name() << ","
00230       << Teuchos::TypeNameTraits<ValueType>::name() << "> "
00231       << "{ Size_: " << Size_ << " }";
00232   return oss.str();
00233 }
00234 
00235 template <typename KeyType, typename ValueType>
00236 void HashTable<KeyType, ValueType>::describe(
00237  Teuchos::FancyOStream &out,
00238  const Teuchos::EVerbosityLevel verbLevel) const {
00239   using std::endl;
00240   using std::setw;
00241   using Teuchos::OSTab;
00242   using Teuchos::rcpFromRef;
00243   using Teuchos::TypeNameTraits;
00244   using Teuchos::VERB_DEFAULT;
00245   using Teuchos::VERB_NONE;
00246   using Teuchos::VERB_LOW;
00247   using Teuchos::VERB_EXTREME;
00248 
00249   Teuchos::EVerbosityLevel vl = verbLevel;
00250   if (vl == VERB_DEFAULT) vl = VERB_LOW;
00251 
00252   if (vl == VERB_NONE) {
00253     // do nothing
00254   }
00255   else if (vl == VERB_LOW) {
00256     out << this->description() << endl;
00257   }
00258   else {  // MEDIUM, HIGH or EXTREME
00259     out << "HashTable: {" << endl;
00260     {
00261       OSTab tab1 (rcpFromRef (out));
00262 
00263       const std::string label = this->getObjectLabel ();
00264       if (label != "") {
00265         out << "label: " << label << endl;
00266       }
00267       out << "Template parameters: {" << endl;
00268       {
00269         OSTab tab2 (rcpFromRef (out));
00270         out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
00271             << "ValueType" << TypeNameTraits<ValueType>::name () << endl;
00272       }
00273       out << "}" << endl << "Table parameters: {" << endl;
00274       {
00275         OSTab tab2 (rcpFromRef (out));
00276         out << "Size_: " << Size_ << endl;
00277       }
00278       out << "}" << endl;
00279 #ifdef HAVE_TEUCHOS_DEBUG
00280       out << "Debug info: {" << endl;
00281       {
00282         OSTab tab2 (rcpFromRef (out));
00283         out << "Maximum number of collisions for any key: " << maxc_ << endl
00284             << "Total number of collisions: " << nc_ << endl;
00285       }
00286       out << "}" << endl;
00287 #endif // HAVE_TEUCHOS_DEBUG
00288 
00289       if (vl >= VERB_EXTREME) {
00290         out << "Contents: ";
00291         if (Container_ == NULL || Size_ == 0) {
00292           out << "[]" << endl;
00293         } else {
00294           out << "[" << endl;
00295           {
00296             OSTab tab2 (rcpFromRef (out));
00297             for (KeyType i = 0; i < Size_; ++i) {
00298               Node* curNode = Container_[i];
00299               if (curNode == NULL) {
00300                 out << "NULL";
00301               } else { // curNode != NULL
00302                 // Print all the buckets at the current table position i.
00303                 out << "[";
00304                 // Print the first bucket.
00305                 out << "[" << curNode->Key << "," << curNode->Value << "]";
00306                 curNode = curNode->Ptr;
00307                 // Print the remaining buckets, if there are any.
00308                 while (curNode != NULL) {
00309                   out << ", [" << curNode->Key << "," << curNode->Value << "]";
00310                   curNode = curNode->Ptr;
00311                 }
00312                 out << "]" << endl;
00313               } // if curNode == or != NULL
00314               if (i + 1 < Size_) {
00315                 out << ", ";
00316               }
00317             } // for each table position i
00318           }
00319           out << "]" << endl;
00320         } // The table contains entries
00321       } // vl >= VERB_EXTREME
00322     }
00323     out << "}" << endl;
00324   } // if vl > VERB_LOW
00325 }
00326 
00327 } // namespace Details
00328 } // namespace Tpetra
00329 
00330 // Macro that explicitly instantiates HashTable for the given local
00331 // ordinal (LO) and global ordinal (GO) types.  Note that HashTable's
00332 // template parameters occur in the opposite order of most Tpetra
00333 // classes.  This is because HashTable performs global-to-local
00334 // lookup, and the convention in templated C++ lookup tables (such as
00335 // std::map) is <KeyType, ValueType>.
00336 //
00337 // This macro must be explanded within the Tpetra::Details namespace.
00338 #define TPETRA_HASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
00339   template class HashTable< GO , LO >;                         \
00340 
00341 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines