Tpetra_Distributor.hpp

00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef TPETRA_DISTRIBUTOR_HPP
00030 #define TPETRA_DISTRIBUTOR_HPP
00031 
00032 #include <Teuchos_RCP.hpp>
00033 #include <Teuchos_OrdinalTraits.hpp>
00034 #include "Tpetra_Util.hpp"
00035 #include <Teuchos_Object.hpp>
00036 #include <Teuchos_Comm.hpp>
00037 #include <Teuchos_CommHelpers.hpp>
00038 #include <Teuchos_ArrayView.hpp>
00039 #include <Teuchos_ArrayRCP.hpp>
00040 
00041 
00042 // FINISH: some of the get accessors may not be necessary anymore. clean up.
00043 // FINISH: This class may not be const correct. doPosts() et al. perhaps should be const, with affected members made mutable.
00044 
00045 namespace Tpetra {
00046 
00048 
00053   template<typename Ordinal>
00054   class Distributor : public Teuchos::Object {
00055   public:
00056 
00058 
00059 
00061     Distributor(const Teuchos::RCP<const Teuchos::Comm<Ordinal> > & comm);
00062 
00064     Distributor(const Distributor<Ordinal> & distributor);
00065 
00067     ~Distributor();
00068 
00070 
00071 
00073 
00074 
00076 
00085     void createFromSends(const Teuchos::ArrayView<const Ordinal> &exportImageIDs,
00086                          Ordinal &numImports);
00087 
00089 
00100     void createFromRecvs(const Teuchos::ArrayView<const Ordinal> & remoteGIDs, 
00101                          const Teuchos::ArrayView<const Ordinal> & remoteImageIDs, 
00102                          Teuchos::ArrayRCP<Ordinal> &exportGIDs, 
00103                          Teuchos::ArrayRCP<Ordinal> &exportImageIDs);
00104 
00106 
00108 
00109 
00111     const Ordinal & getNumReceives() const;
00112 
00114     const Ordinal & getNumSends() const;
00115 
00117 
00118     bool getSelfMessage() const;
00119 
00121     const Ordinal & getMaxSendLength() const;
00122 
00124     const Ordinal & getTotalReceiveLength() const;
00125 
00127     Teuchos::ArrayView<const Ordinal> getImagesFrom() const;
00128 
00130     Teuchos::ArrayView<const Ordinal> getImagesTo() const;
00131 
00133 
00134     Teuchos::ArrayView<const Ordinal> getLengthsFrom() const;
00135 
00137 
00138     Teuchos::ArrayView<const Ordinal> getLengthsTo() const;
00139 
00141 
00143     Teuchos::ArrayView<const Ordinal> getStartsTo() const;
00144 
00146 
00149     Teuchos::ArrayView<const Ordinal> getIndicesTo() const;
00150 
00152 
00154 
00155 
00156     // getReverse
00158 
00161     Teuchos::RCP<Distributor<Ordinal> > getReverse() const;
00162 
00164 
00166 
00167 
00169 
00178     template <typename Packet>
00179     void doPostsAndWaits(const Teuchos::ArrayView<const Packet> &exports,
00180                          Ordinal numPackets,
00181                          const Teuchos::ArrayView<Packet> &imports);
00182 
00184 
00193     template <typename Packet>
00194     void doPosts(const Teuchos::ArrayView<const Packet> &exports,
00195                  Ordinal numPackets,
00196                  const Teuchos::ArrayRCP<Packet> &imports);
00197 
00199     void doWaits();
00200 
00202 
00211     template <typename Packet>
00212     void doReversePostsAndWaits(const Teuchos::ArrayView<const Packet> &exports,
00213                                 Ordinal numPackets,
00214                                 const Teuchos::ArrayView<Packet> &imports);
00215 
00217 
00226     template <typename Packet>
00227     void doReversePosts(const Teuchos::ArrayView<const Packet> &exports,
00228                         Ordinal numPackets,
00229                         const Teuchos::ArrayRCP<Packet> &imports);
00230 
00232     void doReverseWaits();
00233 
00235 
00237 
00238 
00240     void print(std::ostream& os) const;
00241 
00243 
00244   private:
00245 
00246     // private data members
00247     Teuchos::RCP<const Teuchos::Comm<Ordinal> > comm_;
00248 
00249     Ordinal numExports_;
00250     // selfMessage_ is whether I have a send for myself
00251     bool selfMessage_;
00252     // numSends_ is number of sends to other nodes
00253     Ordinal numSends_;
00254     // imagesTo_, startsTo_ and lengthsTo_ each have size 
00255     //   numSends_ + selfMessage_
00256     Teuchos::Array<Ordinal> imagesTo_;
00257     Teuchos::Array<Ordinal> startsTo_;
00258     Teuchos::Array<Ordinal> lengthsTo_;
00259     // maxSendLength_ is the maximum send to another node: 
00260     //   max(lengthsTo_[i]) for i != me
00261     Ordinal maxSendLength_;
00262     Teuchos::Array<Ordinal> indicesTo_;
00263     // numReceives_ is the number of receives by me from other procs, not
00264     // counting self receives
00265     Ordinal numReceives_;
00266     // totalReceiveLength_ is the total number of Packet received, used to 
00267     // allocate the receive buffer
00268     Ordinal totalReceiveLength_;
00269     // imagesFrom_, startsFrom_ and lengthsFrom_ each have size 
00270     //   numReceives_ + selfMessage_
00271     Teuchos::Array<Ordinal> lengthsFrom_;
00272     Teuchos::Array<Ordinal> imagesFrom_;
00273     Teuchos::Array<Ordinal> startsFrom_;
00274     Teuchos::Array<Ordinal> indicesFrom_;
00275 
00276     // requests associated with non-blocking receives
00277     Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest> > requests_;
00278 
00279     mutable Teuchos::RCP<Distributor<Ordinal> > reverseDistributor_;
00280 
00281     // compute receive info from sends
00282     void computeReceives();
00283 
00284     // compute send info from receives
00285     void computeSends(const Teuchos::ArrayView<const Ordinal> &importIDs,
00286                       const Teuchos::ArrayView<const Ordinal> &importImageIDs,
00287                             Teuchos::ArrayRCP<Ordinal> &exportIDs,
00288                             Teuchos::ArrayRCP<Ordinal> &exportImageIDs);
00289 
00290     // create a distributor for the reverse communciation pattern (pretty much
00291     // swap all send and receive info)
00292     void createReverseDistributor() const;
00293 
00294   }; // class Distributor
00295 
00296 
00297   template <typename Ordinal>
00298   Distributor<Ordinal>::Distributor(const Teuchos::RCP<const Teuchos::Comm<Ordinal> > &comm) 
00299     : Teuchos::Object("Tpetra::Distributor")
00300     , comm_(comm)
00301     , numExports_(Teuchos::OrdinalTraits<Ordinal>::zero())
00302     , selfMessage_(false)
00303     , numSends_(Teuchos::OrdinalTraits<Ordinal>::zero())
00304     , maxSendLength_(Teuchos::OrdinalTraits<Ordinal>::zero())
00305     , numReceives_(Teuchos::OrdinalTraits<Ordinal>::zero())
00306     , totalReceiveLength_(Teuchos::OrdinalTraits<Ordinal>::zero())
00307   {}
00308 
00309   template <typename Ordinal>
00310   Distributor<Ordinal>::Distributor(Distributor<Ordinal> const& distributor) 
00311     : Teuchos::Object(distributor.label())
00312     , comm_(distributor.comm_)
00313     , numExports_(distributor.numExports_)
00314     , selfMessage_(distributor.selfMessage_)
00315     , numSends_(distributor.numSends_)
00316     , maxSendLength_(distributor.maxSendLength_)
00317     , numReceives_(distributor.numReceives_)
00318     , totalReceiveLength_(distributor.totalReceiveLength_)
00319     , reverseDistributor_(distributor.reverseDistributor_)
00320   {}
00321 
00322   template <typename Ordinal>
00323   Distributor<Ordinal>::~Distributor() 
00324   {
00325   // we shouldn't have any outstanding requests at this point; verify
00326 # ifdef TEUCHOS_DEBUG
00327     for (typename Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest> >::const_iterator i = requests_.begin(); 
00328          i != requests_.end(); ++i)
00329     {
00330       TEST_FOR_EXCEPTION(*i != Teuchos::null, std::runtime_error,
00331           "Tpetra::Distributor<"<<Teuchos::OrdinalTraits<Ordinal>::name()
00332           <<">::doWaits(): Requests should be null after call to Teuchos::waitAll().");
00333     }
00334 #endif
00335   }
00336 
00337   template <typename Ordinal>
00338   void Distributor<Ordinal>::createFromSends(
00339       const Teuchos::ArrayView<const Ordinal> &exportImageIDs,
00340       Ordinal &numImports) 
00341   {
00342     const Ordinal ZERO = Teuchos::OrdinalTraits<Ordinal>::zero();
00343     const Ordinal ONE  = Teuchos::OrdinalTraits<Ordinal>::one();
00344 
00345     numExports_ = exportImageIDs.size();
00346 
00347     const int myImageID = comm_->getRank();
00348     const int numImages = comm_->getSize();
00349 
00350     // exportImageIDs tells us the communication pattern for this distributor
00351     // it dictates the way that the export data will be interpretted in doPosts()
00352     // we want to perform at most one communication per node; this is for two
00353     // reasons:
00354     //   * minimize latency/overhead in the comm routines (nice)
00355     //   * match the number of receives and sends between nodes (necessary)
00356     // Teuchos::Comm requires that the data for a send is contiguous in a send
00357     // buffer
00358     // therefore, if the data in the send buffer for doPosts() is not
00359     // contiguous, it will need to be copied into a contiguous buffer
00360     // 
00361     // the user has specified this pattern and we can't do anything about it,
00362     // 
00363     // however, if they do not provide an efficient pattern, we will warn them 
00364     // if one of
00365     //    THROW_TPETRA_EFFICIENCY_WARNINGS 
00366     //    PRINT_TPETRA_EFFICIENCY_WARNINGS 
00367     // is on.
00368     //
00369     // if the data is contiguous, then we can post the sends in situ
00370     // 
00371     // determine contiguity. there are a number of ways to do this:
00372     // * if the export IDs are sorted, then all exports to a particular 
00373     //   node must contiguous. this is how epetra does it. 
00374     // * if the export ID of the current export already has been listed,
00375     //   then the previous listing should correspond to the same export.
00376     //   this tests contiguity, but not sortedness.
00377     // both of these tests require O(n), where n is the number of 
00378     // exports. however, the latter will positively identify a greater
00379     // portion of contiguous patterns. we will use the latter method.
00380     // 
00381     // Check to see if items are grouped by images without gaps
00382     // If so, indices_to -> 0
00383 
00384     // Setup data structures for quick traversal of arrays
00385     // this contains the number of sends for each image id
00386     Teuchos::Array<Ordinal> starts(numImages + 1, ZERO);
00387 
00388     // numActive is the number of sends that are not Null
00389     Ordinal numActive = ZERO;
00390     int needSendBuff = 0;
00391 
00392     for (int i = 0; i < numExports_; ++i) {
00393       Ordinal exportID = exportImageIDs[i];
00394       if (exportID >= ZERO) {
00395         // increment starts[exportID]
00396         ++starts[exportID];
00397         // if after incrementing it is greater than one, check that the
00398         // previous export went to this node
00399         // this is a safe comparison, because starts[exportID] > 1
00400         // implies that i > 1. 
00401         // null entries break continuity.
00402         // e.g.,  [ 0, 0, 0, 1, -99, 1, 2, 2, 2] is not considered contiguous
00403         if (needSendBuff==0 && starts[exportID]>1 && exportID != exportImageIDs[i-1]) {
00404           needSendBuff = 1;
00405         }
00406         ++numActive;
00407       }
00408     }
00409 
00410 #   if defined(THROW_TPETRA_EFFICIENCY_WARNINGS) || defined(PRINT_TPETRA_EFFICIENCY_WARNINGS)
00411     {
00412       int global_needSendBuff;
00413       Teuchos::reduceAll(*comm_,Teuchos::REDUCE_SUM,needSendBuff,&global_needSendBuff);
00414       std::string err;
00415       err += "Tpetra::Distributor<" + Teuchos::TypeNameTraits<Ordinal>::name() 
00416         + ">::createFromSends(): Grouping export IDs together leads to improved performance.";
00417 #   if defined(THROW_TPETRA_EFFICIENCY_WARNINGS)
00418       TEST_FOR_EXCEPTION(global_needSendBuff != 0, std::runtime_error, err);
00419 #   else // print warning
00420       if (global_needSendBuff) {
00421         std::cerr << err << std::endl;
00422       }
00423     }
00424 #   endif
00425 #   endif
00426 
00427     if (starts[myImageID] != ZERO) {
00428       selfMessage_ = true;
00429     }
00430     else {
00431       selfMessage_ = false;
00432     }
00433 
00434 
00435 #ifdef TEUCHOS_DEBUG
00436     bool index_neq_numActive = false;
00437     bool send_neq_numSends = false;
00438 #endif
00439     if (!needSendBuff) {
00440       // grouped by image, no send buffer or indicesTo_ needed
00441       numSends_ = ZERO;
00442       for (int i=0; i < numImages; ++i) {
00443         if (starts[i]) ++numSends_;
00444       }
00445 
00446       // not only do we not need these, but empty indicesTo is also a flag for
00447       // later
00448       indicesTo_.resize(0);
00449       // size these to numSends_; note, at the moment, numSends_ includes sends
00450       // to myself; set their values to zeros
00451       imagesTo_.assign(numSends_,ZERO);
00452       startsTo_.assign(numSends_,ZERO);
00453       lengthsTo_.assign(numSends_,ZERO);
00454 
00455       // set startsTo to the offsent for each send (i.e., each image ID)
00456       // set imagesTo to the image ID for each send
00457       // in interpretting this code, remember that we are assuming contiguity
00458       // that is why index skips through the ranks
00459       {
00460         Ordinal index = ZERO;
00461         for (Ordinal i = ZERO; i < numSends_; ++i) {
00462           startsTo_[i] = index;
00463           Ordinal imageID = exportImageIDs[index];
00464           imagesTo_[i] = imageID;
00465           index += starts[imageID];
00466         }
00467 #ifdef TEUCHOS_DEBUG
00468         if (index != numActive) {
00469           index_neq_numActive = true;
00470         }
00471 #endif
00472       }
00473 
00474       // sort the startsTo and image IDs together, in ascending order, according
00475       // to image IDs
00476       if (numSends_ > ZERO) {
00477         sortArrays(imagesTo_(), startsTo_());
00478       }
00479 
00480       // compute the maximum send length
00481       maxSendLength_ = ZERO;
00482       for (int i = 0; i < numSends_; ++i) {
00483         Ordinal imageID = imagesTo_[i];
00484         lengthsTo_[i] = starts[imageID];
00485         if ((imageID != myImageID) && (lengthsTo_[i] > maxSendLength_)) {
00486           maxSendLength_ = lengthsTo_[i];
00487         }
00488       }
00489     }
00490     else {
00491       // not grouped by image, need send buffer and indicesTo_
00492 
00493       // starts[i] is the number of sends to node i
00494       // numActive equals number of sends total, \sum_i starts[i]
00495 
00496       // this loop starts at starts[1], so explicitly check starts[0]
00497       if (starts[0] == ZERO ) {
00498         numSends_ = ZERO;
00499       }
00500       else {
00501         numSends_ = ONE;
00502       }
00503       for (typename Teuchos::Array<Ordinal>::iterator i=starts.begin()+1,
00504                                                  im1=starts.begin();
00505            i != starts.end(); ++i) 
00506       {
00507         if (*i != ZERO) ++numSends_;
00508         *i += *im1;
00509         im1 = i;
00510       }
00511       // starts[i] now contains the number of exports to nodes 0 through i
00512 
00513       for (typename Teuchos::Array<Ordinal>::reverse_iterator ip1=starts.rbegin(),
00514                                                              i=starts.rbegin()+1;
00515            i != starts.rend(); ++i)
00516       {
00517         *ip1 = *i;
00518         ip1 = i;
00519       }
00520       starts[0] = ZERO;
00521       // starts[i] now contains the number of exports to nodes 0 through
00522       // i-1, i.e., all nodes before node i
00523 
00524       indicesTo_.resize(numActive);
00525 
00526       for (Ordinal i = ZERO; i < numExports_; ++i) {
00527         if (exportImageIDs[i] >= ZERO) {
00528           // record the offset to the sendBuffer for this export
00529           indicesTo_[starts[exportImageIDs[i]]] = i;
00530           // now increment the offset for this node
00531           ++starts[exportImageIDs[i]];
00532         }
00533       }
00534       // our send buffer will contain the export data for each of the nodes
00535       // we communicate with, in order by node id
00536       // sendBuffer = {node_0_data, node_1_data, ..., node_np-1_data}
00537       // indicesTo now maps each export to the location in our send buffer
00538       // associated with the export
00539       // data for export i located at sendBuffer[indicesTo[i]]
00540       //
00541       // starts[i] once again contains the number of exports to 
00542       // nodes 0 through i
00543       for (int node = numImages-1; node != 0; --node) {
00544         starts[node] = starts[node-1];
00545       }
00546       starts.front() = ZERO;       
00547       starts[numImages] = numActive;
00548       // 
00549       // starts[node] once again contains the number of exports to 
00550       // nodes 0 through node-1
00551       // i.e., the start of my data in the sendBuffer
00552 
00553       // this contains invalid data at nodes we don't care about, that is okay
00554       imagesTo_.resize(numSends_);
00555       startsTo_.resize(numSends_);
00556       lengthsTo_.resize(numSends_);
00557 
00558       // for each group of sends/exports, record the destination node,
00559       // the length, and the offset for this send into the 
00560       // send buffer (startsTo_)
00561       maxSendLength_ = ZERO;
00562       int snd = 0;
00563       for (int node = 0; node < numImages; ++node ) {
00564         if (starts[node+1] != starts[node]) {
00565           lengthsTo_[snd] = starts[node+1] - starts[node];
00566           startsTo_[snd] = starts[node];
00567           // record max length for all off-node sends
00568           if ((node != myImageID) && (lengthsTo_[snd] > maxSendLength_)) {
00569             maxSendLength_ = lengthsTo_[snd];
00570           }
00571           imagesTo_[snd] = node;
00572           ++snd;
00573         }
00574       }
00575 #ifdef TEUCHOS_DEBUG
00576       if (snd != numSends_) {
00577         send_neq_numSends = true;
00578       }
00579 #endif
00580     }
00581 #ifdef TEUCHOS_DEBUG
00582         SHARED_TEST_FOR_EXCEPTION(index_neq_numActive, std::logic_error,
00583             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00584         SHARED_TEST_FOR_EXCEPTION(send_neq_numSends, std::logic_error,
00585             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00586 #endif
00587 
00588     if (selfMessage_) --numSends_;
00589 
00590     // Invert map to see what msgs are received and what length
00591     computeReceives();
00592 
00593     numImports = totalReceiveLength_;
00594   }
00595 
00596   template <typename Ordinal>
00597   void Distributor<Ordinal>::createFromRecvs(
00598       const Teuchos::ArrayView<const Ordinal> &remoteGIDs, 
00599       const Teuchos::ArrayView<const Ordinal> &remoteImageIDs, 
00600             Teuchos::ArrayRCP<Ordinal> &exportGIDs, 
00601             Teuchos::ArrayRCP<Ordinal> &exportImageIDs)
00602   {
00603     computeSends(remoteGIDs, remoteImageIDs, exportGIDs, exportImageIDs);
00604     Ordinal testNumRemoteIDs; // dummy
00605     createFromSends(exportImageIDs(), testNumRemoteIDs);
00606   }
00607 
00608   template <typename Ordinal>
00609   const Ordinal & Distributor<Ordinal>::getTotalReceiveLength() const 
00610   { return(totalReceiveLength_); }
00611 
00612   template <typename Ordinal>
00613   const Ordinal & Distributor<Ordinal>::getNumReceives() const 
00614   { return(numReceives_); }
00615 
00616   template <typename Ordinal>
00617   bool Distributor<Ordinal>::getSelfMessage() const 
00618   { return(selfMessage_); }
00619 
00620   template <typename Ordinal>
00621   const Ordinal & Distributor<Ordinal>::getNumSends() const 
00622   { return(numSends_); }
00623 
00624   template <typename Ordinal>
00625   const Ordinal & Distributor<Ordinal>::getMaxSendLength() const 
00626   { return(maxSendLength_); }
00627 
00628   template <typename Ordinal>
00629   Teuchos::ArrayView<const Ordinal> Distributor<Ordinal>::getImagesFrom() const 
00630   { return imagesFrom_; }
00631 
00632   template <typename Ordinal>
00633   Teuchos::ArrayView<const Ordinal> Distributor<Ordinal>::getLengthsFrom() const 
00634   { return lengthsFrom_; }
00635 
00636   template <typename Ordinal>
00637   Teuchos::ArrayView<const Ordinal> Distributor<Ordinal>::getImagesTo() const 
00638   { return imagesTo_; }
00639 
00640   template <typename Ordinal>
00641   Teuchos::ArrayView<const Ordinal> Distributor<Ordinal>::getIndicesTo() const 
00642   { return indicesTo_; }
00643 
00644   template <typename Ordinal>
00645   Teuchos::ArrayView<const Ordinal> Distributor<Ordinal>::getStartsTo() const 
00646   { return startsTo_; }
00647 
00648   template <typename Ordinal>
00649   Teuchos::ArrayView<const Ordinal> Distributor<Ordinal>::getLengthsTo() const 
00650   { return lengthsTo_; }
00651 
00652   template <typename Ordinal>
00653   Teuchos::RCP<Distributor<Ordinal> > Distributor<Ordinal>::getReverse() const
00654   {
00655     if (reverseDistributor_ == Teuchos::null) { 
00656       // need to create reverse distributor
00657       createReverseDistributor();
00658     }
00659     return reverseDistributor_;
00660   }
00661 
00662   template <typename Ordinal>
00663   void Distributor<Ordinal>::createReverseDistributor() const {
00664     const Ordinal zero = Teuchos::OrdinalTraits<Ordinal>::zero();
00665 
00666     reverseDistributor_ = Teuchos::rcp(new Distributor<Ordinal>(comm_));
00667 
00668     // compute new totalSendLength
00669     Ordinal totalSendLength = zero;
00670     for (Ordinal i = zero; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) {
00671       totalSendLength += lengthsTo_[i];
00672     }
00673 
00674     // compute new maxReceiveLength
00675     Ordinal maxReceiveLength = zero;
00676     const int myImageID = comm_->getRank();
00677     for (Ordinal i = zero; i < numReceives_; ++i) {
00678       if (imagesFrom_[i] != myImageID) {
00679         if (lengthsFrom_[i] > maxReceiveLength) {
00680           maxReceiveLength = lengthsFrom_[i];
00681         }
00682       }
00683     }
00684 
00685     // initialize all of reverseDistributor's data members
00686     reverseDistributor_->lengthsTo_ = lengthsFrom_;
00687     reverseDistributor_->imagesTo_ = imagesFrom_;
00688     reverseDistributor_->indicesTo_ = indicesFrom_;
00689     reverseDistributor_->startsTo_ = startsFrom_;
00690     reverseDistributor_->lengthsFrom_ = lengthsTo_;
00691     reverseDistributor_->imagesFrom_ = imagesTo_;
00692     reverseDistributor_->indicesFrom_ = indicesTo_;
00693     reverseDistributor_->startsFrom_ = startsTo_;
00694     reverseDistributor_->numSends_ = numReceives_;
00695     reverseDistributor_->numReceives_ = numSends_;
00696     reverseDistributor_->selfMessage_ = selfMessage_;
00697     reverseDistributor_->maxSendLength_ = maxReceiveLength;
00698     reverseDistributor_->totalReceiveLength_ = totalSendLength;
00699     // Note: technically, I am my reverse distributor's reverse distributor, but 
00700     //       we will not set this up, as it gives us an opportunity to test 
00701     //       that reverseDistributor is an inverse operation w.r.t. value semantics of distributors
00702     // Note: numExports_ was not copied
00703   }
00704 
00705   template <typename Ordinal>
00706   template <typename Packet>
00707   void Distributor<Ordinal>::doPostsAndWaits(
00708       const Teuchos::ArrayView<const Packet>& exports,
00709       Ordinal numPackets,
00710       const Teuchos::ArrayView<Packet>& imports) 
00711   {
00712     // doPosts takes imports as an ArrayRCP, requiring that the memory location is persisting
00713     // however, it need only persist until doWaits is called, so it is safe for us to 
00714     // use a non-persisting reference in this case
00715     doPosts(exports, numPackets, Teuchos::arcp<Packet>(imports.getRawPtr(),0,imports.size(),false));
00716     doWaits();
00717   }
00718 
00719   template <typename Ordinal>
00720   template <typename Packet>
00721   void Distributor<Ordinal>::doPosts(
00722       const Teuchos::ArrayView<const Packet>& exports,
00723       Ordinal numPackets,
00724       const Teuchos::ArrayRCP<Packet>& imports) 
00725   {
00726     using Teuchos::ArrayRCP;
00727 
00728     // start of actual doPosts function
00729     const Ordinal ZERO = Teuchos::OrdinalTraits<Ordinal>::zero();
00730     const Ordinal ONE  = Teuchos::OrdinalTraits<Ordinal>::one();
00731     const Ordinal myImageID = comm_->getRank();
00732     int selfReceiveOffset = 0;
00733 
00734 #ifdef TEUCHOS_DEBUG
00735     TEST_FOR_EXCEPTION(imports.size() != totalReceiveLength_ * numPackets, std::runtime_error,
00736         "Tpetra::Distributor<"<<Teuchos::OrdinalTraits<Ordinal>::name()
00737         <<">::doPosts(): imports must be large enough to store the imported data.");
00738 #endif
00739 
00740     // allocate space in requests
00741     requests_.resize(0);
00742     requests_.reserve(numReceives_);
00743 
00744     // start up the Irecv's
00745     {
00746       int curBufferOffset = 0;
00747       for (int i = 0; i < (numReceives_ + (selfMessage_ ? 1 : 0)); ++i) {
00748         if (imagesFrom_[i] != myImageID) { 
00749           // receiving this one from another image
00750           // setup reference into imports of the appropriate size and at the appropriate place
00751           ArrayRCP<Packet> impptr = imports.persistingView(curBufferOffset,lengthsFrom_[i]*numPackets);
00752           requests_.push_back( Teuchos::ireceive<Ordinal,Packet>(*comm_,impptr,imagesFrom_[i]) );
00753         }
00754         else {
00755           // receiving this one from myself 
00756           // note that offset
00757           selfReceiveOffset = curBufferOffset;
00758         }
00759         curBufferOffset += lengthsFrom_[i]*numPackets;
00760       }
00761     }
00762 
00763     // wait for everyone else before posting ready-sends below to ensure that 
00764     // all non-blocking receives above have been posted
00765     Teuchos::barrier(*comm_);
00766 
00767     // setup scan through imagesTo_ list starting with higher numbered images
00768     // (should help balance message traffic)
00769     Ordinal numBlocks = numSends_+ selfMessage_;
00770     Ordinal imageIndex = ZERO;
00771     while ((imageIndex < numBlocks) && (imagesTo_[imageIndex] < myImageID)) {
00772       ++imageIndex;
00773     }
00774     if (imageIndex == numBlocks) {
00775       imageIndex = ZERO;
00776     }
00777 
00778     Ordinal selfNum = ZERO;
00779     Ordinal selfIndex = ZERO;
00780 
00781     if (indicesTo_.empty()) { // data is already blocked by processor
00782       for (Ordinal i = ZERO; i < numBlocks; ++i) {
00783         Ordinal p = i + imageIndex;
00784         if (p > (numBlocks - ONE)) {
00785           p -= numBlocks;
00786         }
00787 
00788         if (imagesTo_[p] != myImageID) {
00789           // sending it to another image
00790           Teuchos::ArrayView<const Packet> tmpSend(&exports[startsTo_[p]*numPackets],lengthsTo_[p]*numPackets);
00791           Teuchos::readySend<Ordinal,Packet>(*comm_,tmpSend,imagesTo_[p]);
00792         }
00793         else {
00794           // sending it to ourself
00795           selfNum = p;
00796         }
00797       }
00798 
00799       if (selfMessage_) {
00800         std::copy(exports.begin()+startsTo_[selfNum]*numPackets, exports.begin()+startsTo_[selfNum]*numPackets+lengthsTo_[selfNum]*numPackets, 
00801                   imports.begin()+selfReceiveOffset);
00802       }
00803     }
00804     else { // data is not blocked by image, use send buffer
00805       // allocate sendArray buffer
00806       Teuchos::Array<Packet> sendArray(maxSendLength_*numPackets); 
00807 
00808       for (Ordinal i = ZERO; i < numBlocks; ++i) {
00809         Ordinal p = i + imageIndex;
00810         if (p > (numBlocks - ONE)) {
00811           p -= numBlocks;
00812         }
00813 
00814         if (imagesTo_[p] != myImageID) { 
00815           // sending it to another image
00816           typename Teuchos::ArrayView<const Packet>::iterator srcBegin, srcEnd;
00817           int sendArrayOffset = 0;
00818           int j = startsTo_[p];
00819           for (Ordinal k = ZERO; k < lengthsTo_[p]; ++k, ++j) {
00820             srcBegin = exports.begin() + indicesTo_[j]*numPackets;
00821             srcEnd   = srcBegin + numPackets;
00822             std::copy( srcBegin, srcEnd, sendArray.begin()+sendArrayOffset );
00823             sendArrayOffset += numPackets;
00824           }
00825           Teuchos::ArrayView<const Packet> tmpSend = sendArray(0,lengthsTo_[p]*numPackets);
00826           Teuchos::readySend<Ordinal,Packet>(*comm_,tmpSend,imagesTo_[p]);
00827         }
00828         else { 
00829           // sending it to myself
00830           selfNum = p;
00831           selfIndex = startsTo_[p];
00832         }
00833       }
00834 
00835       if (selfMessage_) {
00836         for (Ordinal k = ZERO; k < lengthsTo_[selfNum]; ++k) {
00837           std::copy( exports.begin()+indicesTo_[selfIndex]*numPackets,
00838                      exports.begin()+indicesTo_[selfIndex]*numPackets + numPackets,
00839                      imports.begin() + selfReceiveOffset );
00840           ++selfIndex;
00841           selfReceiveOffset += numPackets;
00842         }
00843       }
00844     }
00845   }
00846 
00847   template <typename Ordinal>
00848   void Distributor<Ordinal>::doWaits() 
00849   {
00850     const Ordinal & numReceives = getNumReceives();
00851     if (numReceives > Teuchos::OrdinalTraits<Ordinal>::zero()) {
00852       Teuchos::waitAll(*comm_,requests_());
00853       // Requests should all be null, clear them
00854 #ifdef TEUCHOS_DEBUG
00855       for (typename Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest> >::const_iterator i = requests_.begin(); 
00856            i != requests_.end(); ++i) 
00857       {
00858         TEST_FOR_EXCEPTION(*i != Teuchos::null, std::runtime_error,
00859             "Tpetra::Distributor<"<<Teuchos::OrdinalTraits<Ordinal>::name()
00860             <<">::doWaits(): Requests should be null after call to Teuchos::waitAll().");
00861       }
00862 #endif
00863       requests_.empty();
00864     }
00865   }
00866 
00867   template <typename Ordinal>
00868   template <typename Packet>
00869   void Distributor<Ordinal>::doReversePostsAndWaits(
00870       const Teuchos::ArrayView<const Packet>& exports,
00871       Ordinal numPackets,
00872       const Teuchos::ArrayView<Packet>& imports) 
00873   {
00874     // doPosts takes imports as an ArrayRCP, requiring that the memory location is persisting
00875     // however, it need only persist until doWaits is called, so it is safe for us to 
00876     // use a non-persisting reference in this case
00877     doReversePosts(exports, numPackets, Teuchos::arcp<Packet>(imports.getRawPtr(),0,imports.size(),false));
00878     doReverseWaits();
00879   }
00880 
00881   template <typename Ordinal>
00882   template <typename Packet>
00883   void Distributor<Ordinal>::doReversePosts(
00884       const Teuchos::ArrayView<const Packet>& exports,
00885       Ordinal numPackets,
00886       const Teuchos::ArrayRCP<Packet>& imports) 
00887   {
00888     TEST_FOR_EXCEPTION(!indicesTo_.empty(),std::runtime_error,
00889         "Tpetra::Distributor<"<<Teuchos::OrdinalTraits<Ordinal>::name()<<">::doReversePosts(): Can only do reverse comm when original data is blocked by image.");
00890     if (reverseDistributor_ == Teuchos::null) {
00891       createReverseDistributor();
00892     }
00893     reverseDistributor_->doPosts(exports,numPackets,imports);
00894   }
00895 
00896   template <typename Ordinal>
00897   void Distributor<Ordinal>::doReverseWaits() 
00898   {
00899     // call doWaits() on the reverse Distributor, if it exists
00900     if (reverseDistributor_ != Teuchos::null) {
00901       reverseDistributor_->doWaits();
00902     }
00903   }
00904 
00906   template <typename Ordinal>
00907   void Distributor<Ordinal>::print(std::ostream& os) const 
00908   {
00909     using std::endl;
00910     int const myImageID = comm_->getRank();
00911     int const numImages = comm_->getSize();
00912     for (int i = 0; i < numImages; ++i) {
00913       comm_->barrier();
00914       if (i == myImageID) {
00915         os << "[Image " << myImageID << " of " << numImages << "]" << endl;
00916         os << " numExports: " << numExports_ << endl;
00917         os << " selfMessage: " << selfMessage_ << endl;
00918         os << " numSends_: " << numSends_ << endl;
00919         os << " imagesTo_: " << toString(imagesTo_) << endl;
00920         os << " startsTo_: " << toString(startsTo_) << endl;
00921         os << " lengthsTo_: " << toString(lengthsTo_) << endl;
00922         os << " maxSendLength_: " << maxSendLength_ << endl;
00923         os << " indicesTo_: " << toString(indicesTo_) << endl;
00924         os << " numReceives_: " << numReceives_ << endl;
00925         os << " totalReceiveLength_: " << totalReceiveLength_ << endl;
00926         os << " lengthsFrom_: " << toString(lengthsFrom_) << endl;
00927         os << " imagesFrom_: " << toString(imagesFrom_) << endl;
00928         os << " indicesFrom_: " << toString(indicesFrom_) << endl;
00929         os << " startsFrom_: " << toString(startsFrom_) << endl;
00930       }
00931     }
00932   }
00933 
00934   template <typename Ordinal>
00935   void Distributor<Ordinal>::computeReceives()
00936   {
00937     int myImageID = comm_->getRank();
00938     int numImages = comm_->getSize();
00939     const Ordinal ZERO = Teuchos::OrdinalTraits<Ordinal>::zero();
00940     const Ordinal  ONE = Teuchos::OrdinalTraits<Ordinal>::one();
00941 
00942     // to_nodes_from_me[i] == number of messages sent by this node to node i
00943     // the info in numSends_, imagesTo_, lengthsTo_ concerns the contiguous sends
00944     // therefore, each node will be listed in imagesTo_ at most once
00945     {
00946       std::vector<Ordinal> to_nodes_from_me(numImages, ZERO);
00947 #     ifdef TEUCHOS_DEBUG 
00948         bool counting_error = false;
00949 #     endif
00950       for (int i = ZERO; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) {
00951 #       ifdef TEUCHOS_DEBUG
00952           if (to_nodes_from_me[imagesTo_[i]] != 0) counting_error = true;
00953 #       endif
00954         to_nodes_from_me[imagesTo_[i]] = ONE;
00955       }
00956 #     ifdef TEUCHOS_DEBUG
00957         SHARED_TEST_FOR_EXCEPTION(counting_error, std::logic_error,
00958             "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
00959 #     endif
00960       // each proc will get back only one item (hence, counts = ones) from the array of globals sums, 
00961       // namely that entry corresponding to the node, and detailing how many receives it has.
00962       // this total includes self sends
00963       std::vector<Ordinal> counts(numImages, 1);
00964       Teuchos::reduceAllAndScatter<Ordinal>(*comm_,Teuchos::REDUCE_SUM,numImages,&to_nodes_from_me[0],&counts[0],&numReceives_);
00965     }
00966 
00967     // assign these to length numReceives, with zero entries
00968     lengthsFrom_.assign(numReceives_, ZERO);
00969     imagesFrom_.assign(numReceives_, ZERO);
00970 
00971     // FINISH: why do these work? they are blocking sends, and should block until completion, which happens below
00972     // FINISH: consider switching them to non-blocking
00973     // NOTE: epetra has both, old (non-blocking) and new (mysterious)
00974 
00975     for (Ordinal i = ZERO; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) {
00976       if (imagesTo_[i] != myImageID ) {
00977         // send a message to imagesTo_[i], telling him that our pattern sends him lengthsTo_[i] blocks of packets
00978         Teuchos::send(*comm_,lengthsTo_[i],imagesTo_[i]);
00979       }
00980       else {
00981         // set selfMessage_ to end block of recv arrays
00982         lengthsFrom_[numReceives_-ONE] = lengthsTo_[i];
00983         imagesFrom_[numReceives_-ONE] = myImageID;
00984       }
00985     }
00986 
00987     //
00988     for (Ordinal i = ZERO; i < (numReceives_ - (selfMessage_ ? 1 : 0)); ++i) {
00989       // receive one Ordinal variable from any sender.
00990       // store the value in lengthsFrom_[i], and store the sender's ImageID in imagesFrom_[i]
00991       // imagesFrom_[i] = comm_->receive(&lengthsFrom_[i], 1, -1);
00992       imagesFrom_[i] = Teuchos::receive(*comm_,-1,&lengthsFrom_[i]);
00993     }
00994     comm_->barrier();
00995 
00996     sortArrays(imagesFrom_(), lengthsFrom_());
00997 
00998     // Compute indicesFrom_
00999     totalReceiveLength_ = std::accumulate(lengthsFrom_.begin(), lengthsFrom_.end(), ZERO);
01000     indicesFrom_.clear();
01001     indicesFrom_.reserve(totalReceiveLength_);
01002     for (Ordinal i = 0; i < totalReceiveLength_; ++i) {
01003       indicesFrom_.push_back(i);
01004     }
01005 
01006     startsFrom_.clear();
01007     startsFrom_.reserve(numReceives_);
01008     for (Ordinal i = ZERO, j = ZERO; i < numReceives_; ++i) {
01009       startsFrom_.push_back(j);
01010       j += lengthsFrom_[i];
01011     }
01012 
01013     if (selfMessage_) --numReceives_;
01014 
01015     comm_->barrier();
01016   }
01017 
01018   template <typename Ordinal>
01019   void Distributor<Ordinal>::computeSends(
01020       const Teuchos::ArrayView<const Ordinal> & importIDs,
01021       const Teuchos::ArrayView<const Ordinal> & importImageIDs,
01022             Teuchos::ArrayRCP<Ordinal>& exportIDs,
01023             Teuchos::ArrayRCP<Ordinal>& exportImageIDs)
01024   {
01025     int myImageID = comm_->getRank();
01026     const Ordinal ZERO = Teuchos::OrdinalTraits<Ordinal>::zero();
01027 
01028     Ordinal numImports = importImageIDs.size();
01029     Teuchos::Array<Ordinal> importObjs(2*numImports);
01030     for (Ordinal i = ZERO; i < numImports; ++i ) {  
01031       importObjs[2*i]   = importIDs[i];
01032       importObjs[2*i+1] = myImageID;
01033     }
01034 
01035     Ordinal numExports;
01036     Distributor<Ordinal> tempPlan(comm_);
01037     tempPlan.createFromSends(importImageIDs, numExports);
01038     exportIDs = Teuchos::arcp<Ordinal>(numExports);
01039     exportImageIDs = Teuchos::arcp<Ordinal>(numExports);
01040 
01041     Teuchos::Array<Ordinal> exportObjs(tempPlan.getTotalReceiveLength()*2);
01042     tempPlan.doPostsAndWaits(importObjs().getConst(),2,exportObjs());
01043 
01044     for (int i = 0; i < numExports; ++i) {
01045       exportIDs[i]      = exportObjs[2*i];
01046       exportImageIDs[i] = exportObjs[2*i+1];
01047     }
01048   }
01049 
01050 } // namespace Tpetra
01051 
01052 #endif // TPETRA_DISTRIBUTOR_HPP

Generated on Wed May 12 21:59:41 2010 for Tpetra Matrix/Vector Services by  doxygen 1.4.7