Tpetra Matrix/Vector Services Version of the Day
Tpetra_Distributor.cpp
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #include "Tpetra_Distributor.hpp"
00043 
00044 namespace Tpetra {
00045 
00046   Distributor::Distributor(const Teuchos::RCP<const Teuchos::Comm<int> > &comm) 
00047     : comm_(comm)
00048     , numExports_(0)
00049     , selfMessage_(false)
00050     , numSends_(0)
00051     , maxSendLength_(0)
00052     , numReceives_(0)
00053     , totalReceiveLength_(0)
00054   {}
00055 
00056   Distributor::Distributor(const Distributor & distributor) 
00057     : comm_(distributor.comm_)
00058     , numExports_(distributor.numExports_)
00059     , selfMessage_(distributor.selfMessage_)
00060     , numSends_(distributor.numSends_)
00061     , maxSendLength_(distributor.maxSendLength_)
00062     , numReceives_(distributor.numReceives_)
00063     , totalReceiveLength_(distributor.totalReceiveLength_)
00064     , reverseDistributor_(distributor.reverseDistributor_)
00065   {}
00066 
00067   Distributor::~Distributor() 
00068   {
00069     // We shouldn't have any outstanding communication requests at
00070     // this point.
00071     TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
00072       "Tpetra::Distributor: Destructor called with " << requests_.size() 
00073       << " outstanding posts (unfulfilled communication requests).  There "
00074       "should be none at this point.  Please report this bug to the Tpetra "
00075       "developers.");
00076   }
00077 
00078   size_t Distributor::getTotalReceiveLength() const 
00079   { return totalReceiveLength_; }
00080 
00081   size_t Distributor::getNumReceives() const 
00082   { return numReceives_; }
00083 
00084   bool Distributor::hasSelfMessage() const 
00085   { return selfMessage_; }
00086 
00087   size_t Distributor::getNumSends() const 
00088   { return numSends_; }
00089 
00090   size_t Distributor::getMaxSendLength() const 
00091   { return maxSendLength_; }
00092 
00093   Teuchos::ArrayView<const int> Distributor::getImagesFrom() const 
00094   { return imagesFrom_; }
00095 
00096   Teuchos::ArrayView<const size_t> Distributor::getLengthsFrom() const 
00097   { return lengthsFrom_; }
00098 
00099   Teuchos::ArrayView<const int> Distributor::getImagesTo() const 
00100   { return imagesTo_; }
00101 
00102   Teuchos::ArrayView<const size_t> Distributor::getLengthsTo() const 
00103   { return lengthsTo_; }
00104 
00105   const Teuchos::RCP<Distributor> & 
00106   Distributor::getReverse() const {
00107     if (reverseDistributor_ == Teuchos::null) { 
00108       // need to create reverse distributor
00109       createReverseDistributor();
00110     }
00111     return reverseDistributor_;
00112   }
00113 
00114 
00115   void Distributor::createReverseDistributor() const {
00116 
00117     reverseDistributor_ = Teuchos::rcp(new Distributor(comm_));
00118 
00119     // The total length of all the sends of this Distributor.  We
00120     // calculate it because it's the total length of all the receives
00121     // of the reverse Distributor.
00122     size_t totalSendLength = std::accumulate(lengthsTo_.begin(),lengthsTo_.end(),0);
00123 
00124     // The maximum length of any of the receives of this Distributor.
00125     // We calculate it because it's the maximum length of any of the
00126     // sends of the reverse Distributor.
00127     size_t maxReceiveLength = 0;
00128     const int myImageID = comm_->getRank();
00129     for (size_t i=0; i < numReceives_; ++i) {
00130       if (imagesFrom_[i] != myImageID) {
00131   // Don't count receives for messages sent by myself to myself.
00132         if (lengthsFrom_[i] > maxReceiveLength) {
00133           maxReceiveLength = lengthsFrom_[i];
00134         }
00135       }
00136     }
00137 
00138     // Initialize all of reverseDistributor's data members.  This
00139     // mainly just involves flipping "send" and "receive," or the
00140     // equivalent "to" and "from."
00141     reverseDistributor_->lengthsTo_ = lengthsFrom_;
00142     reverseDistributor_->imagesTo_ = imagesFrom_;
00143     reverseDistributor_->indicesTo_ = indicesFrom_;
00144     reverseDistributor_->startsTo_ = startsFrom_;
00145     reverseDistributor_->lengthsFrom_ = lengthsTo_;
00146     reverseDistributor_->imagesFrom_ = imagesTo_;
00147     reverseDistributor_->indicesFrom_ = indicesTo_;
00148     reverseDistributor_->startsFrom_ = startsTo_;
00149     reverseDistributor_->numSends_ = numReceives_;
00150     reverseDistributor_->numReceives_ = numSends_;
00151     reverseDistributor_->selfMessage_ = selfMessage_;
00152     reverseDistributor_->maxSendLength_ = maxReceiveLength;
00153     reverseDistributor_->totalReceiveLength_ = totalSendLength;
00154     // Note: technically, I am my reverse distributor's reverse distributor, but 
00155     //       we will not set this up, as it gives us an opportunity to test 
00156     //       that reverseDistributor is an inverse operation w.r.t. value semantics of distributors
00157     // Note: numExports_ was not copied
00158   }
00159 
00160 
00161   void Distributor::doWaits() {
00162     if (requests_.size() > 0) {
00163       Teuchos::waitAll(*comm_,requests_());
00164       // Requests should all be null, clear them
00165 #ifdef HAVE_TEUCHOS_DEBUG
00166       using Teuchos::Array;
00167       using Teuchos::CommRequest;
00168       using Teuchos::RCP;
00169       for (Array<RCP<CommRequest> >::const_iterator i = requests_.begin();
00170            i != requests_.end(); ++i) 
00171       {
00172         TEUCHOS_TEST_FOR_EXCEPTION(*i != Teuchos::null, std::runtime_error,
00173           Teuchos::typeName(*this) << "::doWaits(): Communication requests "
00174           "should all be null aftr calling Teuchos::waitAll() on them, but "
00175           "at least one request is not null.");
00176       }
00177 #endif // HAVE_TEUCHOS_DEBUG
00178       requests_.clear();
00179     }
00180   }
00181 
00182 
00183   void Distributor::doReverseWaits() 
00184   {
00185     // call doWaits() on the reverse Distributor, if it exists
00186     if (! reverseDistributor_.is_null()) {
00187       reverseDistributor_->doWaits();
00188     }
00189   }
00190 
00191   std::string Distributor::description() const
00192   {
00193     std::ostringstream oss;
00194     oss << Teuchos::Describable::description();
00195     return oss.str();
00196   }
00197 
00198   void Distributor::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
00199   {
00200     using std::endl;
00201     using std::setw;
00202     using Teuchos::VERB_DEFAULT;
00203     using Teuchos::VERB_NONE;
00204     using Teuchos::VERB_LOW;
00205     using Teuchos::VERB_MEDIUM;
00206     using Teuchos::VERB_HIGH;
00207     using Teuchos::VERB_EXTREME;
00208     Teuchos::EVerbosityLevel vl = verbLevel;
00209     if (vl == VERB_DEFAULT) vl = VERB_LOW;
00210     const int myImageID = comm_->getRank();
00211     const int numImages = comm_->getSize();
00212     Teuchos::OSTab tab(out);
00213 
00214     if (vl == VERB_NONE) {
00215       return;
00216     } else { 
00217       if (myImageID == 0) {
00218   // VERB_LOW and higher prints description() (on Proc 0 only).
00219   out << this->description() << endl; 
00220       }
00221       if (vl == VERB_LOW) {
00222   return;
00223       } else {
00224   // vl > VERB_LOW lets each image print its data.  We assume
00225   // that all images can print to the given output stream, and
00226   // execute barriers to make it more likely that the output
00227   // will be in the right order.
00228   for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
00229     if (myImageID == imageCtr) {
00230       out << "[Node " << myImageID << " of " << numImages << "]" << endl;
00231       out << " selfMessage: " << hasSelfMessage() << endl;
00232       out << " numSends: " << getNumSends() << endl;
00233       if (vl == VERB_HIGH || vl == VERB_EXTREME) {
00234         out << " imagesTo: " << toString(imagesTo_) << endl;
00235         out << " lengthsTo: " << toString(lengthsTo_) << endl;
00236         out << " maxSendLength: " << getMaxSendLength() << endl;
00237       }
00238       if (vl == VERB_EXTREME) {
00239         out << " startsTo: " << toString(startsTo_) << endl;
00240         out << " indicesTo: " << toString(indicesTo_) << endl;
00241       }
00242       if (vl == VERB_HIGH || vl == VERB_EXTREME) {
00243         out << " numReceives: " << getNumReceives() << endl;
00244         out << " totalReceiveLength: " << getTotalReceiveLength() << endl;
00245         out << " lengthsFrom: " << toString(lengthsFrom_) << endl;
00246         out << " imagesFrom: " << toString(imagesFrom_) << endl;
00247       }
00248       // Last output is a flush; it leaves a space and also 
00249       // helps synchronize output.
00250       out << std::flush;
00251     } // if it's my image's turn to print
00252     // Execute barriers to give output time to synchronize.
00253     // One barrier generally isn't enough.
00254     comm_->barrier();
00255     comm_->barrier();
00256     comm_->barrier();
00257   } // for each image
00258       }
00259     }
00260   }
00261 
00262   void 
00263   Distributor::computeReceives()
00264   {
00265     int myImageID = comm_->getRank();
00266     int numImages = comm_->getSize();
00267 
00268     // to_nodes_from_me[i] == the number of messages sent by this node
00269     // to node i.  The data in numSends_, imagesTo_, lengthsTo_
00270     // concern the contiguous sends.  Therefore, each node will be
00271     // listed in imagesTo_ at most once.
00272     {
00273       Teuchos::Array<size_t> to_nodes_from_me(numImages,0);
00274 #     ifdef HAVE_TEUCHOS_DEBUG 
00275       bool counting_error = false;
00276 #     endif
00277       for (size_t i=0; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) {
00278 #       ifdef HAVE_TEUCHOS_DEBUG
00279   if (to_nodes_from_me[imagesTo_[i]] != 0) {
00280     counting_error = true;
00281   }
00282 #       endif
00283         to_nodes_from_me[imagesTo_[i]] = 1;
00284       }
00285 #     ifdef HAVE_TEUCHOS_DEBUG
00286       SHARED_TEST_FOR_EXCEPTION(counting_error, std::logic_error,
00287         "Tpetra::Distributor::computeReceives: There was an error on at least "
00288         "one node in counting the number of messages send by that node to the "
00289         "other nodes.  Please report this bug to the Tpetra developers.", 
00290         *comm_);
00291 #     endif
00292       // Each process will get back only one item (hence, counts =
00293       // ones) from the array of global sums, namely that entry
00294       // corresponding to the process, and detailing how many receives
00295       // it has.  This total includes self sends.
00296       //
00297       // mfh 09 Jan 2012: The reduceAllAndScatter really isn't
00298       // necessary here.  Since counts is just all ones, we could
00299       // replace this with an all-reduce on to_nodes_from_me, and let
00300       // my process (with rank myRank) get numReceives_ from
00301       // to_nodes_from_me[myRank].  The HPCCG miniapp uses the
00302       // all-reduce method.  It could be possible that
00303       // reduceAllAndScatter is faster, but it also makes the code
00304       // more complicated, and it can't be _asymptotically_ faster
00305       // (MPI_Allreduce has twice the critical path length of
00306       // MPI_Reduce, so reduceAllAndScatter can't be more than twice
00307       // as fast as the all-reduce, even if the scatter is free).
00308       Teuchos::Array<int> counts (numImages, 1);
00309       Teuchos::reduceAllAndScatter<int,size_t> (*comm_, Teuchos::REDUCE_SUM, numImages, &to_nodes_from_me[0], &counts[0], &numReceives_);
00310     }
00311 
00312     // assign these to length numReceives, with zero entries
00313     lengthsFrom_.assign(numReceives_, 0);
00314     imagesFrom_.assign(numReceives_, 0);
00315 
00316     // FINISH: why do these work? they are blocking sends, and should block until completion, which happens below
00317     // FINISH: consider switching them to non-blocking
00318     // NOTE: epetra has both, old (non-blocking) and new (mysterious)
00319 
00320     for (size_t i=0; i < numSends_ + (selfMessage_ ? 1 : 0); ++i) {
00321       if (imagesTo_[i] != myImageID ) {
00322         // send a message to imagesTo_[i], telling him that our pattern sends him lengthsTo_[i] blocks of packets
00323         Teuchos::send(*comm_,lengthsTo_[i],imagesTo_[i]);
00324       }
00325       else {
00326         // set selfMessage_ to end block of recv arrays
00327         lengthsFrom_[numReceives_-1] = lengthsTo_[i];
00328         imagesFrom_[numReceives_-1] = myImageID;
00329       }
00330     }
00331 
00332     //
00333     for (size_t i=0; i < numReceives_ - (selfMessage_ ? 1 : 0); ++i) {
00334       // receive one variable from any sender.
00335       // store the value in lengthsFrom_[i], and store the sender's ImageID in imagesFrom_[i]
00336       // imagesFrom_[i] = comm_->receive(&lengthsFrom_[i], 1, -1);
00337       imagesFrom_[i] = Teuchos::receive(*comm_,-1,&lengthsFrom_[i]);
00338     }
00339     comm_->barrier();
00340 
00341     // Sort the imagesFrom_ array, and apply the same permutation to
00342     // lengthsFrom_.  This ensures that imagesFrom_[i] and
00343     // lengthsFrom_[i] refers to the same thing.
00344     sort2 (imagesFrom_.begin(), imagesFrom_.end(), lengthsFrom_.begin());
00345 
00346     // Compute indicesFrom_
00347     totalReceiveLength_ = std::accumulate(lengthsFrom_.begin(), lengthsFrom_.end(), 0);
00348     indicesFrom_.clear();
00349     indicesFrom_.reserve(totalReceiveLength_);
00350     for (size_t i=0; i < totalReceiveLength_; ++i) {
00351       indicesFrom_.push_back(i);
00352     }
00353 
00354     startsFrom_.clear();
00355     startsFrom_.reserve(numReceives_);
00356     for (size_t i=0, j = 0; i < numReceives_; ++i) {
00357       startsFrom_.push_back(j);
00358       j += lengthsFrom_[i];
00359     }
00360 
00361     if (selfMessage_) --numReceives_;
00362 
00363     comm_->barrier();
00364   }
00365 
00366   size_t 
00367   Distributor::createFromSends (const Teuchos::ArrayView<const int> &exportNodeIDs) 
00368   {
00369     using Teuchos::outArg;
00370     numExports_ = exportNodeIDs.size();
00371 
00372     const int myImageID = comm_->getRank();
00373     const int numImages = comm_->getSize();
00374 
00375     // exportNodeIDs tells us the communication pattern for this
00376     // distributor.  It dictates the way that the export data will be
00377     // interpreted in doPosts().  We want to perform at most one
00378     // communication per node; this is for two reasons:
00379     //   * minimize latency/overhead in the comm routines (nice)
00380     //   * match the number of receives and sends between nodes (necessary)
00381     //
00382     // Teuchos::Comm requires that the data for a send is contiguous
00383     // in a send buffer.  Therefore, if the data in the send buffer
00384     // for doPosts() is not contiguous, it will need to be copied into
00385     // a contiguous buffer.  The user has specified this noncontiguous
00386     // pattern and we can't do anything about it.  However, if they do
00387     // not provide an efficient pattern, we will warn them if one of
00388     // the following compile-time options has been set:
00389     //   * HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS 
00390     //   * HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS 
00391     //
00392     // If the data is contiguous, then we can post the sends in situ
00393     // (i.e., without needing to copy them into a send buffer).
00394     // 
00395     // Determine contiguity. There are a number of ways to do this:
00396     // * If the export IDs are sorted, then all exports to a
00397     //   particular node must be contiguous. This is what Epetra does.
00398     // * If the export ID of the current export already has been
00399     //   listed, then the previous listing should correspond to the
00400     //   same export.  This tests contiguity, but not sortedness.
00401     //
00402     // Both of these tests require O(n), where n is the number of
00403     // exports. However, the latter will positively identify a greater
00404     // portion of contiguous patterns. We will use the latter method.
00405     // 
00406     // Check to see if values are grouped by images without gaps
00407     // If so, indices_to -> 0.
00408 
00409     // Set up data structures for quick traversal of arrays.
00410     // This contains the number of sends for each image id.
00411     Teuchos::Array<size_t> starts (numImages + 1, 0);
00412 
00413     // numActive is the number of sends that are not Null
00414     size_t numActive = 0;
00415     char needSendBuff = 0;
00416 
00417     int badID = -1;
00418     for (size_t i = 0; i < numExports_; ++i) {
00419       int exportID = exportNodeIDs[i];
00420       if (exportID >= numImages) {
00421         badID = myImageID;
00422         break;
00423       }
00424       else if (exportID >= 0) {
00425         // increment starts[exportID]
00426         ++starts[exportID];
00427         // if after incrementing it is greater than one, check that the
00428         // previous export went to this node
00429         // this is a safe comparison, because starts[exportID] > 1
00430         // implies that i > 1. 
00431         // null entries break continuity.
00432         // e.g.,  [ 0, 0, 0, 1, -99, 1, 2, 2, 2] is not contiguous
00433         if (needSendBuff==0 && starts[exportID] > 1 && exportID != exportNodeIDs[i-1]) {
00434           needSendBuff = 1;
00435         }
00436         ++numActive;
00437       }
00438     }
00439     {
00440       int gbl_badID;
00441       Teuchos::reduceAll(*comm_,Teuchos::REDUCE_MAX,badID,outArg(gbl_badID));
00442       TEUCHOS_TEST_FOR_EXCEPTION(gbl_badID >= 0, std::runtime_error,
00443           Teuchos::typeName(*this) << "::createFromSends(): bad node id listed on node " << gbl_badID << ".");
00444     }
00445 
00446 #   if defined(HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS) || defined(HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS)
00447     {
00448       char global_needSendBuff;
00449       Teuchos::reduceAll (*comm_, Teuchos::REDUCE_MAX, needSendBuff, Teuchos::ptr (&global_needSendBuff));
00450       TPETRA_EFFICIENCY_WARNING(global_needSendBuff,std::runtime_error,
00451           "::createFromSends(): Grouping export IDs together leads to improved performance.");
00452     }
00453 #   endif
00454 
00455     // Determine from the caller's data whether or not the current
00456     // process should send (a) message(s) to itself.
00457     if (starts[myImageID] != 0) {
00458       selfMessage_ = true;
00459     }
00460     else {
00461       selfMessage_ = false;
00462     }
00463 
00464 
00465 #ifdef HAVE_TEUCHOS_DEBUG
00466     bool index_neq_numActive = false;
00467     bool send_neq_numSends = false;
00468 #endif
00469     if (!needSendBuff) {
00470       // grouped by image, no send buffer or indicesTo_ needed
00471       numSends_ = 0;
00472       // Count total number of sends, i.e., total number of images to
00473       // which we are sending.  This includes myself, if applicable.
00474       for (int i=0; i < numImages; ++i) {
00475         if (starts[i]) ++numSends_;
00476       }
00477 
00478       // Not only do we not need these, but we must clear them, as
00479       // empty status of indicesTo is a flag used later.
00480       indicesTo_.resize(0);
00481       // Size these to numSends_; note, at the moment, numSends_
00482       // includes self sends.  Set their values to zeros.
00483       imagesTo_.assign(numSends_,0);
00484       startsTo_.assign(numSends_,0);
00485       lengthsTo_.assign(numSends_,0);
00486 
00487       // set startsTo to the offset for each send (i.e., each image ID)
00488       // set imagesTo to the image ID for each send
00489       // in interpreting this code, remember that we are assuming contiguity
00490       // that is why index skips through the ranks
00491       {
00492         size_t index = 0, nodeIndex = 0;
00493         for (size_t i = 0; i < numSends_; ++i) {
00494           while (exportNodeIDs[nodeIndex] < 0) {
00495             ++nodeIndex; // skip all negative node IDs
00496           }
00497           startsTo_[i] = nodeIndex;
00498           int imageID = exportNodeIDs[nodeIndex];
00499           imagesTo_[i] = imageID;
00500           index     += starts[imageID];
00501           nodeIndex += starts[imageID];
00502         }
00503 #ifdef HAVE_TEUCHOS_DEBUG
00504         if (index != numActive) {
00505           index_neq_numActive = true;
00506         }
00507 #endif
00508       }
00509       // sort the startsTo and image IDs together, in ascending order, according
00510       // to image IDs
00511       if (numSends_ > 0) {
00512         sort2(imagesTo_.begin(), imagesTo_.end(), startsTo_.begin());
00513       }
00514       // compute the maximum send length
00515       maxSendLength_ = 0;
00516       for (size_t i = 0; i < numSends_; ++i) {
00517         int imageID = imagesTo_[i];
00518         lengthsTo_[i] = starts[imageID];
00519         if ((imageID != myImageID) && (lengthsTo_[i] > maxSendLength_)) {
00520           maxSendLength_ = lengthsTo_[i];
00521         }
00522       }
00523     }
00524     else {
00525       // not grouped by image, need send buffer and indicesTo_
00526 
00527       // starts[i] is the number of sends to node i
00528       // numActive equals number of sends total, \sum_i starts[i]
00529 
00530       // this loop starts at starts[1], so explicitly check starts[0]
00531       if (starts[0] == 0 ) {
00532         numSends_ = 0;
00533       }
00534       else {
00535         numSends_ = 1;
00536       }
00537       for (Teuchos::Array<size_t>::iterator i=starts.begin()+1,
00538                                             im1=starts.begin();
00539            i != starts.end(); ++i) 
00540       {
00541         if (*i != 0) ++numSends_;
00542         *i += *im1;
00543         im1 = i;
00544       }
00545       // starts[i] now contains the number of exports to nodes 0 through i
00546 
00547       for (Teuchos::Array<size_t>::reverse_iterator ip1=starts.rbegin(),
00548                                                       i=starts.rbegin()+1;
00549            i != starts.rend(); ++i)
00550       {
00551         *ip1 = *i;
00552         ip1 = i;
00553       }
00554       starts[0] = 0;
00555       // starts[i] now contains the number of exports to nodes 0 through
00556       // i-1, i.e., all nodes before node i
00557 
00558       indicesTo_.resize(numActive);
00559 
00560       for (size_t i = 0; i < numExports_; ++i) {
00561         if (exportNodeIDs[i] >= 0) {
00562           // record the offset to the sendBuffer for this export
00563           indicesTo_[starts[exportNodeIDs[i]]] = i;
00564           // now increment the offset for this node
00565           ++starts[exportNodeIDs[i]];
00566         }
00567       }
00568       // our send buffer will contain the export data for each of the nodes
00569       // we communicate with, in order by node id
00570       // sendBuffer = {node_0_data, node_1_data, ..., node_np-1_data}
00571       // indicesTo now maps each export to the location in our send buffer
00572       // associated with the export
00573       // data for export i located at sendBuffer[indicesTo[i]]
00574       //
00575       // starts[i] once again contains the number of exports to 
00576       // nodes 0 through i
00577       for (int node = numImages-1; node != 0; --node) {
00578         starts[node] = starts[node-1];
00579       }
00580       starts.front() = 0;       
00581       starts[numImages] = numActive;
00582       // 
00583       // starts[node] once again contains the number of exports to 
00584       // nodes 0 through node-1
00585       // i.e., the start of my data in the sendBuffer
00586 
00587       // this contains invalid data at nodes we don't care about, that is okay
00588       imagesTo_.resize(numSends_);
00589       startsTo_.resize(numSends_);
00590       lengthsTo_.resize(numSends_);
00591 
00592       // for each group of sends/exports, record the destination node,
00593       // the length, and the offset for this send into the 
00594       // send buffer (startsTo_)
00595       maxSendLength_ = 0;
00596       size_t snd = 0;
00597       for (int node = 0; node < numImages; ++node ) {
00598         if (starts[node+1] != starts[node]) {
00599           lengthsTo_[snd] = starts[node+1] - starts[node];
00600           startsTo_[snd] = starts[node];
00601           // record max length for all off-node sends
00602           if ((node != myImageID) && (lengthsTo_[snd] > maxSendLength_)) {
00603             maxSendLength_ = lengthsTo_[snd];
00604           }
00605           imagesTo_[snd] = node;
00606           ++snd;
00607         }
00608       }
00609 #ifdef HAVE_TEUCHOS_DEBUG
00610       if (snd != numSends_) {
00611         send_neq_numSends = true;
00612       }
00613 #endif
00614     }
00615 #ifdef HAVE_TEUCHOS_DEBUG
00616         SHARED_TEST_FOR_EXCEPTION(index_neq_numActive, std::logic_error,
00617             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00618         SHARED_TEST_FOR_EXCEPTION(send_neq_numSends, std::logic_error,
00619             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00620 #endif
00621 
00622     if (selfMessage_) --numSends_;
00623 
00624     // Invert map to see what msgs are received and what length
00625     computeReceives();
00626 
00627     return totalReceiveLength_;
00628   }
00629 
00630 } // namespace Tpetra
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines