Tpetra_Distributor.cpp

00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #include "Tpetra_Distributor.hpp"
00030 
00031 namespace Tpetra {
00032 
00033   Distributor::Distributor(const Teuchos::RCP<const Teuchos::Comm<int> > &comm) 
00034     : comm_(comm)
00035     , numExports_(0)
00036     , selfMessage_(false)
00037     , numSends_(0)
00038     , maxSendLength_(0)
00039     , numReceives_(0)
00040     , totalReceiveLength_(0)
00041   {}
00042 
00043   Distributor::Distributor(const Distributor & distributor) 
00044     : comm_(distributor.comm_)
00045     , numExports_(distributor.numExports_)
00046     , selfMessage_(distributor.selfMessage_)
00047     , numSends_(distributor.numSends_)
00048     , maxSendLength_(distributor.maxSendLength_)
00049     , numReceives_(distributor.numReceives_)
00050     , totalReceiveLength_(distributor.totalReceiveLength_)
00051     , reverseDistributor_(distributor.reverseDistributor_)
00052   {}
00053 
00054   Distributor::~Distributor() 
00055   {
00056   // we shouldn't have any outstanding requests at this point; verify
00057     TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
00058         Teuchos::typeName(*this) << "::Distributor~(): Destructor called with outstanding posts.");
00059   }
00060 
00061   size_t Distributor::getTotalReceiveLength() const 
00062   { return totalReceiveLength_; }
00063 
00064   size_t Distributor::getNumReceives() const 
00065   { return numReceives_; }
00066 
00067   bool Distributor::hasSelfMessage() const 
00068   { return selfMessage_; }
00069 
00070   size_t Distributor::getNumSends() const 
00071   { return numSends_; }
00072 
00073   size_t Distributor::getMaxSendLength() const 
00074   { return maxSendLength_; }
00075 
00076   Teuchos::ArrayView<const int> Distributor::getImagesFrom() const 
00077   { return imagesFrom_; }
00078 
00079   Teuchos::ArrayView<const size_t> Distributor::getLengthsFrom() const 
00080   { return lengthsFrom_; }
00081 
00082   Teuchos::ArrayView<const int> Distributor::getImagesTo() const 
00083   { return imagesTo_; }
00084 
00085   Teuchos::ArrayView<const size_t> Distributor::getLengthsTo() const 
00086   { return lengthsTo_; }
00087 
00088   const Teuchos::RCP<Distributor> & 
00089   Distributor::getReverse() const {
00090     if (reverseDistributor_ == Teuchos::null) { 
00091       // need to create reverse distributor
00092       createReverseDistributor();
00093     }
00094     return reverseDistributor_;
00095   }
00096 
00097 
00098   void Distributor::createReverseDistributor() const {
00099 
00100     reverseDistributor_ = Teuchos::rcp(new Distributor(comm_));
00101 
00102     // compute new totalSendLength
00103     size_t totalSendLength = std::accumulate(lengthsTo_.begin(),lengthsTo_.end(),0);
00104 
00105     // compute new maxReceiveLength
00106     size_t maxReceiveLength = 0;
00107     const int myImageID = comm_->getRank();
00108     for (size_t i=0; i < numReceives_; ++i) {
00109       if (imagesFrom_[i] != myImageID) {
00110         if (lengthsFrom_[i] > maxReceiveLength) {
00111           maxReceiveLength = lengthsFrom_[i];
00112         }
00113       }
00114     }
00115 
00116     // initialize all of reverseDistributor's data members
00117     reverseDistributor_->lengthsTo_ = lengthsFrom_;
00118     reverseDistributor_->imagesTo_ = imagesFrom_;
00119     reverseDistributor_->indicesTo_ = indicesFrom_;
00120     reverseDistributor_->startsTo_ = startsFrom_;
00121     reverseDistributor_->lengthsFrom_ = lengthsTo_;
00122     reverseDistributor_->imagesFrom_ = imagesTo_;
00123     reverseDistributor_->indicesFrom_ = indicesTo_;
00124     reverseDistributor_->startsFrom_ = startsTo_;
00125     reverseDistributor_->numSends_ = numReceives_;
00126     reverseDistributor_->numReceives_ = numSends_;
00127     reverseDistributor_->selfMessage_ = selfMessage_;
00128     reverseDistributor_->maxSendLength_ = maxReceiveLength;
00129     reverseDistributor_->totalReceiveLength_ = totalSendLength;
00130     // Note: technically, I am my reverse distributor's reverse distributor, but 
00131     //       we will not set this up, as it gives us an opportunity to test 
00132     //       that reverseDistributor is an inverse operation w.r.t. value semantics of distributors
00133     // Note: numExports_ was not copied
00134   }
00135 
00136 
00137   void Distributor::doWaits() {
00138     if (getNumReceives() > 0) {
00139       Teuchos::waitAll(*comm_,requests_());
00140       // Requests should all be null, clear them
00141 #ifdef HAVE_TEUCHOS_DEBUG
00142       for (Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest> >::const_iterator i = requests_.begin(); 
00143            i != requests_.end(); ++i) 
00144       {
00145         TEST_FOR_EXCEPTION(*i != Teuchos::null, std::runtime_error,
00146             Teuchos::typeName(*this) << "::doWaits(): Requests should be null after call to Teuchos::waitAll().");
00147       }
00148 #endif
00149       requests_.clear();
00150     }
00151   }
00152 
00153 
00154   void Distributor::doReverseWaits() 
00155   {
00156     // call doWaits() on the reverse Distributor, if it exists
00157     if (reverseDistributor_ != Teuchos::null) {
00158       reverseDistributor_->doWaits();
00159     }
00160   }
00161 
00162   std::string Distributor::description() const
00163   {
00164     std::ostringstream oss;
00165     oss << Teuchos::Describable::description();
00166     return oss.str();
00167   }
00168 
00169   void Distributor::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
00170   {
00171     using std::endl;
00172     using std::setw;
00173     using Teuchos::VERB_DEFAULT;
00174     using Teuchos::VERB_NONE;
00175     using Teuchos::VERB_LOW;
00176     using Teuchos::VERB_MEDIUM;
00177     using Teuchos::VERB_HIGH;
00178     using Teuchos::VERB_EXTREME;
00179     Teuchos::EVerbosityLevel vl = verbLevel;
00180     if (vl == VERB_DEFAULT) vl = VERB_LOW;
00181     const int myImageID = comm_->getRank();
00182     const int numImages = comm_->getSize();
00183     Teuchos::OSTab tab(out);
00184     if (vl != VERB_NONE) {
00185       // VERB_LOW and higher prints description()
00186       if (myImageID == 0) out << this->description() << std::endl; 
00187       for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
00188         if (myImageID == imageCtr) {
00189           if (vl != VERB_LOW) {
00190             out << "[Node " << myImageID << " of " << numImages << "]" << endl;
00191             out << " selfMessage: " << hasSelfMessage() << endl;
00192             out << " numSends: " << getNumSends() << endl;
00193             if (vl == VERB_HIGH || vl == VERB_EXTREME) {
00194               out << " imagesTo: " << toString(imagesTo_) << endl;
00195               out << " lengthsTo: " << toString(lengthsTo_) << endl;
00196               out << " maxSendLength: " << getMaxSendLength() << endl;
00197             }
00198             if (vl == VERB_EXTREME) {
00199               out << " startsTo: " << toString(startsTo_) << endl;
00200               out << " indicesTo: " << toString(indicesTo_) << endl;
00201             }
00202             if (vl == VERB_HIGH || vl == VERB_EXTREME) {
00203               out << " numReceives: " << getNumReceives() << endl;
00204               out << " totalReceiveLength: " << getTotalReceiveLength() << endl;
00205               out << " lengthsFrom: " << toString(lengthsFrom_) << endl;
00206               out << " imagesFrom: " << toString(imagesFrom_) << endl;
00207             }
00208           }
00209         }
00210       }
00211     }
00212   }
00213 
00214 
00216   void Distributor::computeReceives()
00217   {
00218     int myImageID = comm_->getRank();
00219     int numImages = comm_->getSize();
00220 
00221     // to_nodes_from_me[i] == number of messages sent by this node to node i
00222     // the info in numSends_, imagesTo_, lengthsTo_ concerns the contiguous sends
00223     // therefore, each node will be listed in imagesTo_ at most once
00224     {
00225       Teuchos::Array<size_t> to_nodes_from_me(numImages,0);
00226 #     ifdef HAVE_TEUCHOS_DEBUG 
00227         bool counting_error = false;
00228 #     endif
00229       for (size_t i=0; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) {
00230 #       ifdef HAVE_TEUCHOS_DEBUG
00231           if (to_nodes_from_me[imagesTo_[i]] != 0) counting_error = true;
00232 #       endif
00233         to_nodes_from_me[imagesTo_[i]] = 1;
00234       }
00235 #     ifdef HAVE_TEUCHOS_DEBUG
00236         SHARED_TEST_FOR_EXCEPTION(counting_error, std::logic_error,
00237             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00238 #     endif
00239       // each proc will get back only one item (hence, counts = ones) from the array of globals sums, 
00240       // namely that entry corresponding to the node, and detailing how many receives it has.
00241       // this total includes self sends
00242       Teuchos::Array<int> counts(numImages, 1);
00243       Teuchos::reduceAllAndScatter<int,size_t>(*comm_,Teuchos::REDUCE_SUM,numImages,&to_nodes_from_me[0],&counts[0],&numReceives_);
00244     }
00245 
00246     // assign these to length numReceives, with zero entries
00247     lengthsFrom_.assign(numReceives_, 0);
00248     imagesFrom_.assign(numReceives_, 0);
00249 
00250     // FINISH: why do these work? they are blocking sends, and should block until completion, which happens below
00251     // FINISH: consider switching them to non-blocking
00252     // NOTE: epetra has both, old (non-blocking) and new (mysterious)
00253 
00254     for (size_t i=0; i < numSends_ + (selfMessage_ ? 1 : 0); ++i) {
00255       if (imagesTo_[i] != myImageID ) {
00256         // send a message to imagesTo_[i], telling him that our pattern sends him lengthsTo_[i] blocks of packets
00257         Teuchos::send(*comm_,lengthsTo_[i],imagesTo_[i]);
00258       }
00259       else {
00260         // set selfMessage_ to end block of recv arrays
00261         lengthsFrom_[numReceives_-1] = lengthsTo_[i];
00262         imagesFrom_[numReceives_-1] = myImageID;
00263       }
00264     }
00265 
00266     //
00267     for (size_t i=0; i < numReceives_ - (selfMessage_ ? 1 : 0); ++i) {
00268       // receive one variable from any sender.
00269       // store the value in lengthsFrom_[i], and store the sender's ImageID in imagesFrom_[i]
00270       // imagesFrom_[i] = comm_->receive(&lengthsFrom_[i], 1, -1);
00271       imagesFrom_[i] = Teuchos::receive(*comm_,-1,&lengthsFrom_[i]);
00272     }
00273     comm_->barrier();
00274 
00275     sort2(imagesFrom_.begin(), imagesFrom_.end(), lengthsFrom_.begin());
00276 
00277     // Compute indicesFrom_
00278     totalReceiveLength_ = std::accumulate(lengthsFrom_.begin(), lengthsFrom_.end(), 0);
00279     indicesFrom_.clear();
00280     indicesFrom_.reserve(totalReceiveLength_);
00281     for (size_t i=0; i < totalReceiveLength_; ++i) {
00282       indicesFrom_.push_back(i);
00283     }
00284 
00285     startsFrom_.clear();
00286     startsFrom_.reserve(numReceives_);
00287     for (size_t i=0, j = 0; i < numReceives_; ++i) {
00288       startsFrom_.push_back(j);
00289       j += lengthsFrom_[i];
00290     }
00291 
00292     if (selfMessage_) --numReceives_;
00293 
00294     comm_->barrier();
00295   }
00296 
00297 
00299   size_t Distributor::createFromSends(const Teuchos::ArrayView<const int> &exportNodeIDs) {
00300     using Teuchos::outArg;
00301     numExports_ = exportNodeIDs.size();
00302 
00303     const int myImageID = comm_->getRank();
00304     const int numImages = comm_->getSize();
00305 
00306     // exportNodeIDs tells us the communication pattern for this distributor
00307     // it dictates the way that the export data will be interpretted in doPosts()
00308     // we want to perform at most one communication per node; this is for two
00309     // reasons:
00310     //   * minimize latency/overhead in the comm routines (nice)
00311     //   * match the number of receives and sends between nodes (necessary)
00312     // Teuchos::Comm requires that the data for a send is contiguous in a send
00313     // buffer.
00314     // Therefore, if the data in the send buffer for doPosts() is not
00315     // contiguous, it will need to be copied into a contiguous buffer.
00316     // 
00317     // The user has specified this pattern and we can't do anything about it.
00318     // 
00319     // However, if they do not provide an efficient pattern, we will warn them 
00320     // if one of
00321     //    HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS 
00322     //    HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS 
00323     // is on.
00324     //
00325     // If the data is contiguous, then we can post the sends in situ.
00326     // 
00327     // Determine contiguity. There are a number of ways to do this:
00328     // * if the export IDs are sorted, then all exports to a particular 
00329     //   node must be contiguous. This is how Epetra does it. 
00330     // * if the export ID of the current export already has been listed,
00331     //   then the previous listing should correspond to the same export.
00332     //   This tests contiguity, but not sortedness.
00333     // Both of these tests require O(n), where n is the number of 
00334     // exports. However, the latter will positively identify a greater
00335     // portion of contiguous patterns. We will use the latter method.
00336     // 
00337     // Check to see if values are grouped by images without gaps
00338     // If so, indices_to -> 0.
00339 
00340     // Setup data structures for quick traversal of arrays.
00341     // This contains the number of sends for each image id.
00342     Teuchos::Array<size_t> starts(numImages + 1, 0);
00343 
00344     // numActive is the number of sends that are not Null
00345     size_t numActive = 0;
00346     char needSendBuff = 0;
00347 
00348     int badID = -1;
00349     for (size_t i = 0; i < numExports_; ++i) {
00350       int exportID = exportNodeIDs[i];
00351       if (exportID >= numImages) {
00352         badID = myImageID;
00353         break;
00354       }
00355       else if (exportID >= 0) {
00356         // increment starts[exportID]
00357         ++starts[exportID];
00358         // if after incrementing it is greater than one, check that the
00359         // previous export went to this node
00360         // this is a safe comparison, because starts[exportID] > 1
00361         // implies that i > 1. 
00362         // null entries break continuity.
00363         // e.g.,  [ 0, 0, 0, 1, -99, 1, 2, 2, 2] is not contiguous
00364         if (needSendBuff==0 && starts[exportID] > 1 && exportID != exportNodeIDs[i-1]) {
00365           needSendBuff = 1;
00366         }
00367         ++numActive;
00368       }
00369     }
00370     {
00371       int gbl_badID;
00372       Teuchos::reduceAll(*comm_,Teuchos::REDUCE_MAX,badID,outArg(gbl_badID));
00373       TEST_FOR_EXCEPTION(gbl_badID >= 0, std::runtime_error,
00374           Teuchos::typeName(*this) << "::createFromSends(): bad node id listed on node " << gbl_badID << ".");
00375     }
00376 
00377 #   if defined(HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS) || defined(HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS)
00378     {
00379       char global_needSendBuff;
00380       Teuchos::reduceAll(*comm_,Teuchos::REDUCE_MAX,needSendBuff,&global_needSendBuff);
00381       TPETRA_EFFICIENCY_WARNING(global_needSendBuff,std::runtime_error,
00382           "::createFromSends(): Grouping export IDs together leads to improved performance.");
00383     }
00384 #   endif
00385 
00386     if (starts[myImageID] != 0) {
00387       selfMessage_ = true;
00388     }
00389     else {
00390       selfMessage_ = false;
00391     }
00392 
00393 
00394 #ifdef HAVE_TEUCHOS_DEBUG
00395     bool index_neq_numActive = false;
00396     bool send_neq_numSends = false;
00397 #endif
00398     if (!needSendBuff) {
00399       // grouped by image, no send buffer or indicesTo_ needed
00400       numSends_ = 0;
00401       // count total number of sends, i.e., total number of images that we are sending to
00402       // this includes myself
00403       for (int i=0; i < numImages; ++i) {
00404         if (starts[i]) ++numSends_;
00405       }
00406 
00407       // not only do we not need these, but we must clear it, as empty status of indicesTo 
00408       // is a flag used later
00409       indicesTo_.resize(0);
00410       // size these to numSends_; note, at the moment, numSends_ includes self sends
00411       // set their values to zeros
00412       imagesTo_.assign(numSends_,0);
00413       startsTo_.assign(numSends_,0);
00414       lengthsTo_.assign(numSends_,0);
00415 
00416       // set startsTo to the offsent for each send (i.e., each image ID)
00417       // set imagesTo to the image ID for each send
00418       // in interpretting this code, remember that we are assuming contiguity
00419       // that is why index skips through the ranks
00420       {
00421         size_t index = 0, nodeIndex = 0;
00422         for (size_t i = 0; i < numSends_; ++i) {
00423           while (exportNodeIDs[nodeIndex] < 0) {
00424             ++nodeIndex; // skip all negative node IDs
00425           }
00426           startsTo_[i] = nodeIndex;
00427           int imageID = exportNodeIDs[nodeIndex];
00428           imagesTo_[i] = imageID;
00429           index     += starts[imageID];
00430           nodeIndex += starts[imageID];
00431         }
00432 #ifdef HAVE_TEUCHOS_DEBUG
00433         if (index != numActive) {
00434           index_neq_numActive = true;
00435         }
00436 #endif
00437       }
00438       // sort the startsTo and image IDs together, in ascending order, according
00439       // to image IDs
00440       if (numSends_ > 0) {
00441         sort2(imagesTo_.begin(), imagesTo_.end(), startsTo_.begin());
00442       }
00443       // compute the maximum send length
00444       maxSendLength_ = 0;
00445       for (size_t i = 0; i < numSends_; ++i) {
00446         int imageID = imagesTo_[i];
00447         lengthsTo_[i] = starts[imageID];
00448         if ((imageID != myImageID) && (lengthsTo_[i] > maxSendLength_)) {
00449           maxSendLength_ = lengthsTo_[i];
00450         }
00451       }
00452     }
00453     else {
00454       // not grouped by image, need send buffer and indicesTo_
00455 
00456       // starts[i] is the number of sends to node i
00457       // numActive equals number of sends total, \sum_i starts[i]
00458 
00459       // this loop starts at starts[1], so explicitly check starts[0]
00460       if (starts[0] == 0 ) {
00461         numSends_ = 0;
00462       }
00463       else {
00464         numSends_ = 1;
00465       }
00466       for (Teuchos::Array<size_t>::iterator i=starts.begin()+1,
00467                                             im1=starts.begin();
00468            i != starts.end(); ++i) 
00469       {
00470         if (*i != 0) ++numSends_;
00471         *i += *im1;
00472         im1 = i;
00473       }
00474       // starts[i] now contains the number of exports to nodes 0 through i
00475 
00476       for (Teuchos::Array<size_t>::reverse_iterator ip1=starts.rbegin(),
00477                                                       i=starts.rbegin()+1;
00478            i != starts.rend(); ++i)
00479       {
00480         *ip1 = *i;
00481         ip1 = i;
00482       }
00483       starts[0] = 0;
00484       // starts[i] now contains the number of exports to nodes 0 through
00485       // i-1, i.e., all nodes before node i
00486 
00487       indicesTo_.resize(numActive);
00488 
00489       for (size_t i = 0; i < numExports_; ++i) {
00490         if (exportNodeIDs[i] >= 0) {
00491           // record the offset to the sendBuffer for this export
00492           indicesTo_[starts[exportNodeIDs[i]]] = i;
00493           // now increment the offset for this node
00494           ++starts[exportNodeIDs[i]];
00495         }
00496       }
00497       // our send buffer will contain the export data for each of the nodes
00498       // we communicate with, in order by node id
00499       // sendBuffer = {node_0_data, node_1_data, ..., node_np-1_data}
00500       // indicesTo now maps each export to the location in our send buffer
00501       // associated with the export
00502       // data for export i located at sendBuffer[indicesTo[i]]
00503       //
00504       // starts[i] once again contains the number of exports to 
00505       // nodes 0 through i
00506       for (int node = numImages-1; node != 0; --node) {
00507         starts[node] = starts[node-1];
00508       }
00509       starts.front() = 0;       
00510       starts[numImages] = numActive;
00511       // 
00512       // starts[node] once again contains the number of exports to 
00513       // nodes 0 through node-1
00514       // i.e., the start of my data in the sendBuffer
00515 
00516       // this contains invalid data at nodes we don't care about, that is okay
00517       imagesTo_.resize(numSends_);
00518       startsTo_.resize(numSends_);
00519       lengthsTo_.resize(numSends_);
00520 
00521       // for each group of sends/exports, record the destination node,
00522       // the length, and the offset for this send into the 
00523       // send buffer (startsTo_)
00524       maxSendLength_ = 0;
00525       size_t snd = 0;
00526       for (int node = 0; node < numImages; ++node ) {
00527         if (starts[node+1] != starts[node]) {
00528           lengthsTo_[snd] = starts[node+1] - starts[node];
00529           startsTo_[snd] = starts[node];
00530           // record max length for all off-node sends
00531           if ((node != myImageID) && (lengthsTo_[snd] > maxSendLength_)) {
00532             maxSendLength_ = lengthsTo_[snd];
00533           }
00534           imagesTo_[snd] = node;
00535           ++snd;
00536         }
00537       }
00538 #ifdef HAVE_TEUCHOS_DEBUG
00539       if (snd != numSends_) {
00540         send_neq_numSends = true;
00541       }
00542 #endif
00543     }
00544 #ifdef HAVE_TEUCHOS_DEBUG
00545         SHARED_TEST_FOR_EXCEPTION(index_neq_numActive, std::logic_error,
00546             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00547         SHARED_TEST_FOR_EXCEPTION(send_neq_numSends, std::logic_error,
00548             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00549 #endif
00550 
00551     if (selfMessage_) --numSends_;
00552 
00553     // Invert map to see what msgs are received and what length
00554     computeReceives();
00555 
00556     return totalReceiveLength_;
00557   }
00558 
00559 } // namespace Tpetra
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Generated on Wed Apr 13 10:21:41 2011 for Tpetra Matrix/Vector Services by  doxygen 1.6.3