Zoltan2
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //   Zoltan2: A package of combinatorial algorithms for scientific computing
00006 //                  Copyright 2012 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Karen Devine      (kddevin@sandia.gov)
00039 //                    Erik Boman        (egboman@sandia.gov)
00040 //                    Siva Rajamanickam (srajama@sandia.gov)
00041 //
00042 // ***********************************************************************
00043 //
00044 // @HEADER
00045 
00050 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00051 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00052 
00053 #include <Zoltan2_IdentifierMap.hpp>
00054 #include <Zoltan2_Solution.hpp>
00055 #include <Zoltan2_GreedyMWM.hpp>
00056 #include <Zoltan2_CoordinatePartitioningGraph.hpp>
00057 #include <cmath>
00058 #include <algorithm>
00059 #include <vector>
00060 #include <limits>
00061 
00062 namespace Teuchos{
00063 
00068 template <typename Ordinal, typename T>
00069 class Zoltan2_BoxBoundaries  : public ValueTypeReductionOp<Ordinal,T>
00070 {
00071 private:
00072     Ordinal size;
00073     T _EPSILON;
00074 
00075 public:
00078     Zoltan2_BoxBoundaries ():size(0), _EPSILON (std::numeric_limits<T>::epsilon()){}
00079 
00086     Zoltan2_BoxBoundaries (Ordinal s_):
00087         size(s_), _EPSILON (std::numeric_limits<T>::epsilon()){}
00088 
00091     void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
00092     {
00093         for (Ordinal i=0; i < count; i++){
00094             if (Z2_ABS(inBuffer[i]) >  _EPSILON){
00095                 inoutBuffer[i] = inBuffer[i];
00096             }
00097         }
00098     }
00099 };
00100 } // namespace Teuchos
00101 
00102 
00103 namespace Zoltan2 {
00104 
00105 
00106 
00107 
00122 template <typename Adapter>
00123   class PartitioningSolution : public Solution
00124 {
00125 public:
00126 
00127 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00128   typedef typename Adapter::gno_t gno_t;
00129   typedef typename Adapter::scalar_t scalar_t;
00130   typedef typename Adapter::lno_t lno_t;
00131   typedef typename Adapter::gid_t gid_t;
00132   typedef typename Adapter::part_t part_t;
00133   typedef typename Adapter::user_t user_t;
00134 #endif
00135 
00153   PartitioningSolution( RCP<const Environment> &env,
00154     RCP<const Comm<int> > &comm,
00155     RCP<const IdentifierMap<user_t> > &idMap,
00156     int nUserWeights);
00157 
00187   PartitioningSolution( RCP<const Environment> &env,
00188     RCP<const Comm<int> > &comm,
00189     RCP<const IdentifierMap<user_t> > &idMap,
00190     int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
00191     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00192 
00194   // Information that the algorithm may wish to query.
00195 
00198   size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
00199 
00202   size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
00203 
00206   size_t getLocalNumberOfParts() const { return nLocalParts_; }
00207 
00215   scalar_t getLocalFractionOfPart() const { return localFraction_; }
00216 
00226   bool oneToOnePartDistribution() const { return onePartPerProc_; }
00227 
00247   const int *getPartDistribution() const {
00248     if (partDist_.size() > 0) return &partDist_[0];
00249     else return NULL;
00250   }
00251 
00268   const part_t *getProcDistribution() const {
00269     if (procDist_.size() > 0) return &procDist_[0];
00270     else return NULL;
00271   }
00272 
00276   int getNumberOfCriteria() const { return nWeightsPerObj_; }
00277 
00278 
00285   bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
00286 
00299   scalar_t getCriteriaPartSize(int idx, part_t part) const {
00300     if (pSizeUniform_[idx])
00301       return 1.0 / nGlobalParts_;
00302     else if (pCompactIndex_[idx].size())
00303       return pSize_[idx][pCompactIndex_[idx][part]];
00304     else
00305       return pSize_[idx][part];
00306   }
00307 
00321   bool criteriaHaveSamePartSizes(int c1, int c2) const;
00322 
00324   // Method used by the algorithm to set results.
00325 
00352   void setParts(ArrayRCP<const gno_t> &gnoList,
00353     ArrayRCP<part_t> &partList, bool dataDidNotMove);
00354 
00356 
00368   void RemapParts();
00369 
00371   /* Return the weight of objects staying with a given remap.
00372    * If remap is NULL, compute weight of objects staying with given partition
00373    */
00374   long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt,
00375                      part_t nrhs, part_t nlhs)
00376   {
00377     long staying = 0;
00378     for (part_t i = 0; i < nrhs; i++) { 
00379       part_t k = (remap ? remap[i] : i);
00380       for (part_t j = idx[k]; j < idx[k+1]; j++) { 
00381         if (i == (adj[j]-nlhs)) {
00382           staying += wgt[j];
00383           break;
00384         }
00385       }
00386     }
00387     return staying;
00388   }
00389 
00391   // Results that may be queried by the user, by migration methods,
00392   // or by metric calculation methods.
00393   // We return raw pointers so users don't have to learn about our
00394   // pointer wrappers.
00395 
00398   const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
00399 
00402   size_t getLocalNumberOfIds() const { return gids_.size(); }
00403 
00406   const gid_t *getIdList() const {
00407     if (gids_.size() > 0) return gids_.getRawPtr();
00408     else                  return NULL;
00409   }
00410 
00413   const part_t *getPartList() const {
00414     if (parts_.size() > 0) return parts_.getRawPtr();
00415     else                   return NULL;
00416   }
00417 
00422   const int *getProcList() const {
00423     if (procs_.size() > 0) return procs_.getRawPtr();
00424     else                   return NULL;
00425   }
00426 
00427 
00430   void setPartBoxes(
00431     RCP<std::vector<
00432              Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > outPartBoxes)
00433   {
00434     this->partBoxes = outPartBoxes;
00435   }
00436 
00439   RCP<std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00440   getPartBoxes()
00441   {
00442     return this->partBoxes;
00443   }
00444 
00447   void getCommunicationGraph(
00448           const Teuchos::Comm<int> *comm,
00449           ArrayRCP <part_t> &comXAdj,
00450           ArrayRCP <part_t> &comAdj) 
00451   {
00452     if(comXAdj_.getRawPtr() == NULL && comAdj_.getRawPtr() == NULL){
00453 
00454       part_t ntasks =  this->getActualGlobalNumberOfParts();
00455       if (part_t (this->getTargetGlobalNumberOfParts()) > ntasks){
00456         ntasks = this->getTargetGlobalNumberOfParts();
00457       }
00458       RCP<std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00459       pBoxes = this->getGlobalBoxBoundaries(comm);
00460       int dim = (*pBoxes)[0].getDim();
00461       GridHash<scalar_t, part_t> grid(pBoxes, ntasks, dim);
00462       grid.getAdjArrays(comXAdj_, comAdj_);
00463     }
00464     comAdj = comAdj_;
00465     comXAdj = comXAdj_;
00466   }
00467 
00468   RCP<std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00469   getGlobalBoxBoundaries(const Teuchos::Comm<int> *comm) {
00470     part_t ntasks =  this->getActualGlobalNumberOfParts();
00471     if (part_t (this->getTargetGlobalNumberOfParts()) > ntasks){
00472       ntasks = this->getTargetGlobalNumberOfParts();
00473     }
00474 
00475     RCP<std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00476     pBoxes = this->getPartBoxes();
00477 
00478     int dim = (*pBoxes)[0].getDim();
00479     scalar_t *localPartBoundaries = new scalar_t[ntasks * 2 *dim];
00480 
00481     memset(localPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim);
00482 
00483     scalar_t *globalPartBoundaries = new scalar_t[ntasks * 2 *dim];
00484     memset(globalPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim);
00485 
00486     scalar_t *localPartMins = localPartBoundaries;
00487     scalar_t *localPartMaxs = localPartBoundaries + ntasks * dim;
00488 
00489     scalar_t *globalPartMins = globalPartBoundaries;
00490     scalar_t *globalPartMaxs = globalPartBoundaries + ntasks * dim;
00491 
00492     part_t boxCount = pBoxes->size();
00493     for (part_t i = 0; i < boxCount; ++i){
00494       part_t pId = (*pBoxes)[i].getpId();
00495       //cout << "me:" << comm->getRank() << " has:" << pId << endl;
00496 
00497       scalar_t *lmins = (*pBoxes)[i].getlmins();
00498       scalar_t *lmaxs = (*pBoxes)[i].getlmaxs();
00499 
00500       for (int j = 0; j < dim; ++j){
00501         localPartMins[dim * pId + j] = lmins[j];
00502         localPartMaxs[dim * pId + j] = lmaxs[j];
00503         /*
00504         cout << "me:" << comm->getRank()  <<
00505                 " dim * pId + j:"<< dim * pId + j <<
00506                 " localMin:" << localPartMins[dim * pId + j] <<
00507                 " localMax:" << localPartMaxs[dim * pId + j] << endl;
00508         */
00509       }
00510     }
00511 
00512     Teuchos::Zoltan2_BoxBoundaries<int, scalar_t> reductionOp(ntasks * 2 *dim);
00513 
00514     reduceAll<int, scalar_t>(*comm, reductionOp,
00515               ntasks * 2 *dim, localPartBoundaries, globalPartBoundaries);
00516     RCP<std::vector<coordinateModelPartBox<scalar_t, part_t> > > 
00517     pB(new std::vector <coordinateModelPartBox <scalar_t, part_t> > (), true);
00518     for (part_t i = 0; i < ntasks; ++i){
00519       Zoltan2::coordinateModelPartBox <scalar_t, part_t> tpb(i, dim,
00520                                                  globalPartMins + dim * i,
00521                                                  globalPartMaxs + dim * i);
00522 
00523       /*
00524       for (int j = 0; j < dim; ++j){
00525           cout << "me:" << comm->getRank()  <<
00526                   " dim * pId + j:"<< dim * i + j <<
00527                   " globalMin:" << globalPartMins[dim * i + j] <<
00528                   " globalMax:" << globalPartMaxs[dim * i + j] << endl;
00529       }
00530       */
00531       pB->push_back(tpb);
00532     }
00533     delete []localPartBoundaries;
00534     delete []globalPartBoundaries;
00535     //RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > tmpRCPBox(pB, true);
00536     this->partBoxes = pB;
00537     return this->partBoxes;
00538   }
00539 
00569   template <typename Extra>
00570     size_t convertSolutionToImportList(
00571       int numExtra,
00572       ArrayRCP<Extra> &xtraInfo,
00573       ArrayRCP<typename Adapter::gid_t> &imports,
00574       ArrayRCP<Extra> &newXtraInfo) const;
00575 
00593   void getPartsForProc(int procId, double &numParts, part_t &partMin,
00594     part_t &partMax) const
00595   {
00596     env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
00597       procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
00598 
00599     procToPartsMap(procId, numParts, partMin, partMax);
00600   }
00601 
00613   void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
00614   {
00615     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
00616       partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
00617 
00618     partToProcsMap(partId, procMin, procMax);
00619   }
00620 
00621 private:
00622   void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
00623     int numLocalParts, int numGlobalParts);
00624 
00625   void procToPartsMap(int procId, double &numParts, part_t &partMin,
00626     part_t &partMax) const;
00627 
00628   void partToProcsMap(part_t partId, int &procMin, int &procMax) const;
00629 
00630   void setPartDistribution();
00631 
00632   void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
00633     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00634 
00635   void computePartSizes(int widx, ArrayView<part_t> ids,
00636     ArrayView<scalar_t> sizes);
00637 
00638   void broadcastPartSizes(int widx);
00639 
00640 
00641   RCP<const Environment> env_;             // has application communicator
00642   RCP<const Comm<int> > comm_;             // the problem communicator
00643   RCP<const IdentifierMap<user_t> > idMap_;
00644 
00645   //part box boundaries as a result of geometric partitioning algorithm.
00646   RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > partBoxes;
00647   ArrayRCP <part_t> comXAdj_; //communication graph xadj
00648   ArrayRCP <part_t> comAdj_; //communication graph adj.
00649 
00650   part_t nGlobalParts_;// target global number of parts
00651   part_t nLocalParts_; // number of parts to be on this process
00652 
00653   scalar_t localFraction_; // approx fraction of a part on this process
00654   int nWeightsPerObj_;      // if user has no weights, this is 1  TODO:  WHY???
00655 
00656   // If process p is to be assigned part p for all p, then onePartPerProc_
00657   // is true. Otherwise it is false, and either procDist_ or partDist_
00658   // describes the allocation of parts to processes.
00659   //
00660   // If parts are never split across processes, then procDist_ is defined
00661   // as follows:
00662   //
00663   //   partId              = procDist_[procId]
00664   //   partIdNext          = procDist_[procId+1]
00665   //   globalNumberOfParts = procDist_[numProcs]
00666   //
00667   // meaning that the parts assigned to process procId range from
00668   // [partId, partIdNext).  If partIdNext is the same as partId, then
00669   // process procId has no parts.
00670   //
00671   // If the number parts is less than the number of processes, and the
00672   // user did not specify "num_local_parts" for each of the processes, then
00673   // parts are split across processes, and partDist_ is defined rather than
00674   // procDist_.
00675   //
00676   //   procId              = partDist_[partId]
00677   //   procIdNext          = partDist_[partId+1]
00678   //   globalNumberOfProcs = partDist_[numParts]
00679   //
00680   // which implies that the part partId is shared by processes in the
00681   // the range [procId, procIdNext).
00682   //
00683   // We use std::vector so we can use upper_bound algorithm
00684 
00685   bool             onePartPerProc_;   // either this is true...
00686   std::vector<int>      partDist_;      // or this is defined ...
00687   std::vector<part_t> procDist_;      // or this is defined.
00688   bool procDistEquallySpread_;        // if procDist_ is used and
00689                                       // #parts > #procs and
00690                                       // num_local_parts is not specified,
00691                                       // parts are evenly distributed to procs
00692 
00693   // In order to minimize the storage required for part sizes, we
00694   // have three different representations.
00695   //
00696   // If the part sizes for weight index w are all the same, then:
00697   //    pSizeUniform_[w] = true
00698   //    pCompactIndex_[w].size() = 0
00699   //    pSize_[w].size() = 0
00700   //
00701   // and the size for part p is 1.0 / nparts.
00702   //
00703   // If part sizes differ for each part in weight index w, but there
00704   // are no more than 64 distinct sizes:
00705   //    pSizeUniform_[w] = false
00706   //    pCompactIndex_[w].size() = number of parts
00707   //    pSize_[w].size() = number of different sizes
00708   //
00709   // and the size for part p is pSize_[pCompactIndex_[p]].
00710   //
00711   // If part sizes differ for each part in weight index w, and there
00712   // are more than 64 distinct sizes:
00713   //    pSizeUniform_[w] = false
00714   //    pCompactIndex_[w].size() = 0
00715   //    pSize_[w].size() = nparts
00716   //
00717   // and the size for part p is pSize_[p].
00718   //
00719   // NOTE: If we expect to have similar cases, i.e. a long list of scalars
00720   //   where it is highly possible that just a few unique values appear,
00721   //   then we may want to make this a class.  The advantage is that we
00722   //   save a long list of 1-byte indices instead of a long list of scalars.
00723 
00724   ArrayRCP<bool> pSizeUniform_;
00725   ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
00726   ArrayRCP<ArrayRCP<scalar_t> > pSize_;
00727 
00729   // The algorithm sets these values upon completion.
00730 
00731   ArrayRCP<const gid_t>  gids_;   // User's global IDs
00732   ArrayRCP<part_t> parts_;      // part number assigned to gids_[i]
00733 
00734   bool haveSolution_;
00735 
00736   part_t nGlobalPartsSolution_; // global number of parts in solution
00737 
00739   // The solution calculates this from the part assignments,
00740   // unless onePartPerProc_.
00741 
00742   ArrayRCP<int> procs_;       // process rank assigned to gids_[i]
00743 };
00744 
00746 // Definitions
00748 
00749 template <typename Adapter>
00750   PartitioningSolution<Adapter>::PartitioningSolution(
00751     RCP<const Environment> &env,
00752     RCP<const Comm<int> > &comm,
00753     RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights)
00754     : env_(env), comm_(comm), idMap_(idMap),
00755       partBoxes(),comXAdj_(), comAdj_(),
00756       nGlobalParts_(0), nLocalParts_(0),
00757       localFraction_(0),  nWeightsPerObj_(),
00758       onePartPerProc_(false), partDist_(), procDist_(),
00759       procDistEquallySpread_(false),
00760       pSizeUniform_(), pCompactIndex_(), pSize_(),
00761       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00762       procs_()
00763 {
00764   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  // TODO:  WHY??  WHY NOT ZERO?
00765 
00766   setPartDistribution();
00767 
00768   // We must call setPartSizes() because part sizes may have
00769   // been provided by the user on other processes.
00770 
00771   ArrayRCP<part_t> *noIds = new ArrayRCP<part_t> [nWeightsPerObj_];
00772   ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [nWeightsPerObj_];
00773   ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_, true);
00774   ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, true);
00775 
00776   setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
00777 
00778   env_->memory("After construction of solution");
00779 }
00780 
00781 template <typename Adapter>
00782   PartitioningSolution<Adapter>::PartitioningSolution(
00783     RCP<const Environment> &env,
00784     RCP<const Comm<int> > &comm,
00785     RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights,
00786     ArrayView<ArrayRCP<part_t> > reqPartIds,
00787     ArrayView<ArrayRCP<scalar_t> > reqPartSizes)
00788     : env_(env), comm_(comm), idMap_(idMap),
00789       partBoxes(),comXAdj_(), comAdj_(),
00790       nGlobalParts_(0), nLocalParts_(0),
00791       localFraction_(0),  nWeightsPerObj_(),
00792       onePartPerProc_(false), partDist_(), procDist_(),
00793       procDistEquallySpread_(false),
00794       pSizeUniform_(), pCompactIndex_(), pSize_(),
00795       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00796       procs_()
00797 {
00798   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  // TODO:  WHY?? WHY NOT ZERO?
00799 
00800   setPartDistribution();
00801 
00802   setPartSizes(reqPartIds, reqPartSizes);
00803 
00804   env_->memory("After construction of solution");
00805 }
00806 
00807 template <typename Adapter>
00808   void PartitioningSolution<Adapter>::setPartDistribution()
00809 {
00810   // Did the caller define num_global_parts and/or num_local_parts?
00811 
00812   const ParameterList &pl = env_->getParameters();
00813   size_t haveGlobalNumParts=0, haveLocalNumParts=0;
00814   int numLocal=0, numGlobal=0;
00815   double val;
00816 
00817   const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
00818 
00819   if (pe){
00820     val = pe->getValue<double>(&val);  // TODO: KDD Skip this double get
00821     haveGlobalNumParts = 1;            // TODO: KDD Should be unnecessary once
00822     numGlobal = static_cast<int>(val); // TODO: KDD paramlist handles long long.
00823     nGlobalParts_ = part_t(numGlobal); // TODO: KDD  also do below.
00824   }
00825 
00826   pe = pl.getEntryPtr("num_local_parts");
00827 
00828   if (pe){
00829     val = pe->getValue<double>(&val);
00830     haveLocalNumParts = 1;
00831     numLocal = static_cast<int>(val);
00832     nLocalParts_ = part_t(numLocal);
00833   }
00834 
00835   try{
00836     // Sets onePartPerProc_, partDist_, and procDist_
00837 
00838     partToProc(true, haveLocalNumParts, haveGlobalNumParts,
00839       numLocal, numGlobal);
00840   }
00841   Z2_FORWARD_EXCEPTIONS
00842 
00843   int nprocs = comm_->getSize();
00844   int rank = comm_->getRank();
00845 
00846   if (onePartPerProc_){
00847     nGlobalParts_ = nprocs;
00848     nLocalParts_ = 1;
00849   }
00850   else if (partDist_.size() > 0){   // more procs than parts
00851     nGlobalParts_ = partDist_.size() - 1;
00852     int pstart = partDist_[0];
00853     for (part_t i=1; i <= nGlobalParts_; i++){
00854       int pend = partDist_[i];
00855       if (rank >= pstart && rank < pend){
00856         int numOwners = pend - pstart;
00857         nLocalParts_ = 1;
00858         localFraction_ = 1.0 / numOwners;
00859         break;
00860       }
00861       pstart = pend;
00862     }
00863   }
00864   else if (procDist_.size() > 0){  // more parts than procs
00865     nGlobalParts_ = procDist_[nprocs];
00866     nLocalParts_ = procDist_[rank+1] - procDist_[rank];
00867   }
00868   else {
00869     throw std::logic_error("partToProc error");
00870   }
00871 
00872 }
00873 
00874 template <typename Adapter>
00875   void PartitioningSolution<Adapter>::setPartSizes(
00876     ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
00877 {
00878   int widx = nWeightsPerObj_;
00879   bool fail=false;
00880 
00881   size_t *countBuf = new size_t [widx*2];
00882   ArrayRCP<size_t> counts(countBuf, 0, widx*2, true);
00883 
00884   fail = ((ids.size() != widx) || (sizes.size() != widx));
00885 
00886   for (int w=0; !fail && w < widx; w++){
00887     counts[w] = ids[w].size();
00888     if (ids[w].size() != sizes[w].size()) fail=true;
00889   }
00890 
00891   env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0,
00892     COMPLEX_ASSERTION, comm_);
00893 
00894   // Are all part sizes the same?  This is the common case.
00895 
00896   ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [widx];
00897   pSize_ = arcp(emptySizes, 0, widx);
00898 
00899   ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [widx];
00900   pCompactIndex_ = arcp(emptyIndices, 0, widx);
00901 
00902   bool *info = new bool [widx];
00903   pSizeUniform_ = arcp(info, 0, widx);
00904   for (int w=0; w < widx; w++)
00905     pSizeUniform_[w] = true;
00906 
00907   if (nGlobalParts_ == 1){
00908     return;   // there's only one part in the whole problem
00909   }
00910 
00911   size_t *ptr1 = counts.getRawPtr();
00912   size_t *ptr2 = counts.getRawPtr() + widx;
00913 
00914   try{
00915     reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
00916   }
00917   Z2_THROW_OUTSIDE_ERROR(*env_);
00918 
00919   bool zero = true;
00920 
00921   for (int w=0; w < widx; w++)
00922     if (counts[widx+w] > 0){
00923       zero = false;
00924       pSizeUniform_[w] = false;
00925     }
00926 
00927   if (zero) // Part sizes for all criteria are uniform.
00928     return;
00929 
00930   // Compute the part sizes for criteria for which part sizes were
00931   // supplied.  Normalize for each criteria so part sizes sum to one.
00932 
00933   int nprocs = comm_->getSize();
00934   int rank = comm_->getRank();
00935 
00936   for (int w=0; w < widx; w++){
00937     if (pSizeUniform_[w]) continue;
00938 
00939     // Send all ids and sizes to one process.
00940     // (There is no simple gather method in Teuchos.)
00941 
00942     part_t length = ids[w].size();
00943     part_t *allLength = new part_t [nprocs];
00944     Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
00945       nprocs, allLength);
00946 
00947     if (rank == 0){
00948       int total = 0;
00949       for (int i=0; i < nprocs; i++)
00950         total += allLength[i];
00951 
00952       part_t *partNums = new part_t [total];
00953       scalar_t *partSizes = new scalar_t [total];
00954 
00955       ArrayView<part_t> idArray(partNums, total);
00956       ArrayView<scalar_t> sizeArray(partSizes, total);
00957 
00958       if (length > 0){
00959         for (int i=0; i < length; i++){
00960           *partNums++ = ids[w][i];
00961           *partSizes++ = sizes[w][i];
00962         }
00963       }
00964 
00965       for (int p=1; p < nprocs; p++){
00966         if (allLength[p] > 0){
00967           Teuchos::receive<int, part_t>(*comm_, p,
00968             allLength[p], partNums);
00969           Teuchos::receive<int, scalar_t>(*comm_, p,
00970             allLength[p], partSizes);
00971           partNums += allLength[p];
00972           partSizes += allLength[p];
00973         }
00974       }
00975 
00976       delete [] allLength;
00977 
00978       try{
00979         computePartSizes(w, idArray, sizeArray);
00980       }
00981       Z2_FORWARD_EXCEPTIONS
00982 
00983       delete [] idArray.getRawPtr();
00984       delete [] sizeArray.getRawPtr();
00985     }
00986     else{
00987       delete [] allLength;
00988       if (length > 0){
00989         Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
00990         Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
00991       }
00992     }
00993 
00994     broadcastPartSizes(w);
00995   }
00996 }
00997 
00998 template <typename Adapter>
00999   void PartitioningSolution<Adapter>::broadcastPartSizes(int widx)
01000 {
01001   env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
01002     pSize_.size()>widx &&
01003     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
01004     COMPLEX_ASSERTION);
01005 
01006   int rank = comm_->getRank();
01007   int nprocs = comm_->getSize();
01008   part_t nparts = nGlobalParts_;
01009 
01010   if (nprocs < 2)
01011     return;
01012 
01013   char flag=0;
01014 
01015   if (rank == 0){
01016     if (pSizeUniform_[widx] == true)
01017       flag = 1;
01018     else if (pCompactIndex_[widx].size() > 0)
01019       flag = 2;
01020     else
01021       flag = 3;
01022   }
01023 
01024   try{
01025     Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
01026   }
01027   Z2_THROW_OUTSIDE_ERROR(*env_);
01028 
01029   if (flag == 1){
01030     if (rank > 0)
01031       pSizeUniform_[widx] = true;
01032 
01033     return;
01034   }
01035 
01036   if (flag == 2){
01037 
01038     // broadcast the indices into the size list
01039 
01040     unsigned char *idxbuf = NULL;
01041 
01042     if (rank > 0){
01043       idxbuf = new unsigned char [nparts];
01044       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
01045     }
01046     else{
01047       env_->localBugAssertion(__FILE__, __LINE__, "index list size",
01048         pCompactIndex_[widx].size() == nparts, COMPLEX_ASSERTION);
01049       idxbuf = pCompactIndex_[widx].getRawPtr();
01050     }
01051 
01052     try{
01053       // broadcast of unsigned char is not supported
01054       Teuchos::broadcast<int, char>(*comm_, 0, nparts,
01055         reinterpret_cast<char *>(idxbuf));
01056     }
01057     Z2_THROW_OUTSIDE_ERROR(*env_);
01058 
01059     if (rank > 0)
01060       pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, true);
01061 
01062     // broadcast the list of different part sizes
01063 
01064     unsigned char maxIdx=0;
01065     for (part_t p=0; p < nparts; p++)
01066       if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
01067 
01068     int numSizes = maxIdx + 1;
01069 
01070     scalar_t *sizeList = NULL;
01071 
01072     if (rank > 0){
01073       sizeList = new scalar_t [numSizes];
01074       env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
01075     }
01076     else{
01077       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
01078         numSizes == pSize_[widx].size(), COMPLEX_ASSERTION);
01079 
01080       sizeList = pSize_[widx].getRawPtr();
01081     }
01082 
01083     try{
01084       Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
01085     }
01086     Z2_THROW_OUTSIDE_ERROR(*env_);
01087 
01088     if (rank > 0)
01089       pSize_[widx] = arcp(sizeList, 0, numSizes, true);
01090 
01091     return;
01092   }
01093 
01094   if (flag == 3){
01095 
01096     // broadcast the size of each part
01097 
01098     scalar_t *sizeList = NULL;
01099 
01100     if (rank > 0){
01101       sizeList = new scalar_t [nparts];
01102       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
01103     }
01104     else{
01105       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
01106         nparts == pSize_[widx].size(), COMPLEX_ASSERTION);
01107 
01108       sizeList = pSize_[widx].getRawPtr();
01109     }
01110 
01111     try{
01112       Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
01113     }
01114     Z2_THROW_OUTSIDE_ERROR(*env_);
01115 
01116     if (rank > 0)
01117       pSize_[widx] = arcp(sizeList, 0, nparts);
01118 
01119     return;
01120   }
01121 }
01122 
01123 template <typename Adapter>
01124   void PartitioningSolution<Adapter>::computePartSizes(int widx,
01125     ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
01126 {
01127   int len = ids.size();
01128 
01129   if (len == 0){
01130     pSizeUniform_[widx] = true;
01131     return;
01132   }
01133 
01134   env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes",
01135     len>0 && sizes.size()==len, COMPLEX_ASSERTION);
01136 
01137   env_->localBugAssertion(__FILE__, __LINE__, "bad index",
01138     widx>=0 && widx<nWeightsPerObj_, COMPLEX_ASSERTION);
01139 
01140   env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
01141     pSize_.size()>widx &&
01142     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
01143     COMPLEX_ASSERTION);
01144 
01145   // Check ids and sizes and find min, max and average sizes.
01146   // If sizes are very close to uniform, call them uniform parts.
01147 
01148   part_t nparts = nGlobalParts_;
01149   unsigned char *buf = new unsigned char [nparts];
01150   env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
01151   memset(buf, 0, nparts);
01152   ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
01153 
01154   scalar_t epsilon = 10e-5 / nparts;
01155   scalar_t min=sizes[0], max=sizes[0], sum=0;
01156 
01157   for (int i=0; i < len; i++){
01158     part_t id = ids[i];
01159     scalar_t size = sizes[i];
01160 
01161     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
01162       id>=0 && id<nparts, BASIC_ASSERTION);
01163 
01164     env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
01165       BASIC_ASSERTION);
01166 
01167     // TODO: we could allow users to specify multiple sizes for the same
01168     // part if we add a parameter that says what we are to do with them:
01169     // add them or take the max.
01170 
01171     env_->localInputAssertion(__FILE__, __LINE__,
01172       "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
01173 
01174     partIdx[id] = 1;    // mark that we have a size for this part
01175 
01176     if (size < min) min = size;
01177     if (size > max) max = size;
01178     sum += size;
01179   }
01180 
01181   if (sum == 0){
01182 
01183     // User has given us a list of parts of size 0, we'll set
01184     // the rest to them to equally sized parts.
01185 
01186     scalar_t *allSizes = new scalar_t [2];
01187     env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
01188 
01189     ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
01190 
01191     int numNonZero = nparts - len;
01192 
01193     allSizes[0] = 0.0;
01194     allSizes[1] = 1.0 / numNonZero;
01195 
01196     for (part_t p=0; p < nparts; p++)
01197       buf[p] = 1;                 // index to default part size
01198 
01199     for (int i=0; i < len; i++)
01200       buf[ids[i]] = 0;            // index to part size zero
01201 
01202     pSize_[widx] = sizeArray;
01203     pCompactIndex_[widx] = partIdx;
01204 
01205     return;
01206   }
01207 
01208   if (max - min <= epsilon){
01209     pSizeUniform_[widx] = true;
01210     return;
01211   }
01212 
01213   // A size for parts that were not specified:
01214   scalar_t avg = sum / nparts;
01215 
01216   // We are going to merge part sizes that are very close.  This takes
01217   // computation time now, but can save considerably in the storage of
01218   // all part sizes on each process.  For example, a common case may
01219   // be some parts are size 1 and all the rest are size 2.
01220 
01221   scalar_t *tmp = new scalar_t [len];
01222   env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01223   memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
01224   ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
01225 
01226   std::sort(partSizes.begin(), partSizes.end());
01227 
01228   // create a list of sizes that are unique within epsilon
01229 
01230   Array<scalar_t> nextUniqueSize;
01231   nextUniqueSize.push_back(partSizes[len-1]);   // largest
01232   scalar_t curr = partSizes[len-1];
01233   int avgIndex = len;
01234   bool haveAvg = false;
01235   if (curr - avg <= epsilon)
01236      avgIndex = 0;
01237 
01238   for (int i=len-2; i >= 0; i--){
01239     scalar_t val = partSizes[i];
01240     if (curr - val > epsilon){
01241       nextUniqueSize.push_back(val);  // the highest in the group
01242       curr = val;
01243       if (avgIndex==len && val > avg && val - avg <= epsilon){
01244         // the average would be in this group
01245         avgIndex = nextUniqueSize.size() - 1;
01246         haveAvg = true;
01247       }
01248     }
01249   }
01250 
01251   partSizes.clear();
01252 
01253   size_t numSizes = nextUniqueSize.size();
01254   int sizeArrayLen = numSizes;
01255 
01256   if (numSizes < 64){
01257     // We can store the size for every part in a compact way.
01258 
01259     // Create a list of all sizes in increasing order
01260 
01261     if (!haveAvg) sizeArrayLen++;   // need to include average
01262 
01263     scalar_t *allSizes = new scalar_t [sizeArrayLen];
01264     env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
01265     ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
01266 
01267     int newAvgIndex = sizeArrayLen;
01268 
01269     for (int i=numSizes-1, idx=0; i >= 0; i--){
01270 
01271       if (newAvgIndex == sizeArrayLen){
01272 
01273         if (haveAvg && i==avgIndex)
01274           newAvgIndex = idx;
01275 
01276         else if (!haveAvg && avg < nextUniqueSize[i]){
01277           newAvgIndex = idx;
01278           allSizes[idx++] = avg;
01279         }
01280       }
01281 
01282       allSizes[idx++] = nextUniqueSize[i];
01283     }
01284 
01285     env_->localBugAssertion(__FILE__, __LINE__, "finding average in list",
01286       newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
01287 
01288     for (int i=0; i < nparts; i++){
01289       buf[i] = newAvgIndex;   // index to default part size
01290     }
01291 
01292     sum = (nparts - len) * allSizes[newAvgIndex];
01293 
01294     for (int i=0; i < len; i++){
01295       int id = ids[i];
01296       scalar_t size = sizes[i];
01297       int index;
01298 
01299       // Find the first size greater than or equal to this size.
01300 
01301       if (size < avg && avg - size <= epsilon)
01302         index = newAvgIndex;
01303       else{
01304         typename ArrayRCP<scalar_t>::iterator found =
01305           std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
01306 
01307         env_->localBugAssertion(__FILE__, __LINE__, "size array",
01308           found != sizeArray.end(), COMPLEX_ASSERTION);
01309 
01310         index = found - sizeArray.begin();
01311       }
01312 
01313       buf[id] = index;
01314       sum += allSizes[index];
01315     }
01316 
01317     for (int i=0; i < sizeArrayLen; i++){
01318       sizeArray[i] /= sum;
01319     }
01320 
01321     pCompactIndex_[widx] = partIdx;
01322     pSize_[widx] = sizeArray;
01323   }
01324   else{
01325     // To have access to part sizes, we must store nparts scalar_ts on
01326     // every process.  We expect this is a rare case.
01327 
01328     tmp = new scalar_t [nparts];
01329     env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
01330 
01331     sum += ((nparts - len) * avg);
01332 
01333     for (int i=0; i < nparts; i++){
01334       tmp[i] = avg/sum;
01335     }
01336 
01337     for (int i=0; i < len; i++){
01338       tmp[ids[i]] = sizes[i]/sum;
01339     }
01340 
01341     pSize_[widx] = arcp(tmp, 0, nparts);
01342   }
01343 }
01344 
01345 
01346 template <typename Adapter>
01347   void PartitioningSolution<Adapter>::setParts(
01348     ArrayRCP<const gno_t> &gnoList, ArrayRCP<part_t> &partList,
01349     bool dataDidNotMove)
01350 {
01351   env_->debug(DETAILED_STATUS, "Entering setParts");
01352 
01353   size_t len = partList.size();
01354 
01355   // Find the actual number of parts in the solution, which can
01356   // be more or less than the nGlobalParts_ target.
01357   // (We may want to compute the imbalance of a given solution with
01358   // respect to a desired solution.  This solution may have more or
01359   // fewer parts that the desired solution.)
01360 
01361   part_t lMax=0, lMin=0, gMax, gMin;
01362 
01363   if (len > 0)
01364     IdentifierTraits<part_t>::minMax(partList.getRawPtr(), len, lMin, lMax);
01365 
01366   IdentifierTraits<part_t>::globalMinMax(*comm_, len == 0,
01367     lMin, lMax, gMin, gMax);
01368 
01369   nGlobalPartsSolution_ = gMax - gMin + 1;
01370 
01371   if (dataDidNotMove) {
01372     // Case where the algorithm provides the part list in the same order
01373     // that it received the gnos.
01374 
01375     if (idMap_->gnosAreGids()){
01376       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01377     }
01378     else{
01379       gid_t *gidList = new gid_t [len];
01380       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01381       ArrayView<gid_t> gidView(gidList, len);
01382 
01383       const gno_t *gnos = gnoList.getRawPtr();
01384       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01385 
01386       try{
01387         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01388       }
01389       Z2_FORWARD_EXCEPTIONS
01390 
01391       gids_ = arcp<const gid_t>(gidList, 0, len);
01392     }
01393     parts_ = partList;
01394   }
01395   else {
01396     // Case where the algorithm moved the data.
01397     // KDDKDD Should we require the algorithm to return the parts in
01398     // KDDKDD the same order as the gnos provided by the model?
01399 
01400     // We send part information to the process that "owns" the global number.
01401 
01402     ArrayView<gid_t> emptyView;
01403     ArrayView<int> procList;
01404 
01405     if (len){
01406       int *tmp = new int [len];
01407       env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01408       procList = ArrayView<int>(tmp, len);
01409     }
01410 
01411     idMap_->gnoGlobalTranslate (gnoList.view(0,len), emptyView, procList);
01412 
01413     int remotelyOwned = 0;
01414     int rank = comm_->getRank();
01415     int nprocs = comm_->getSize();
01416 
01417     for (size_t i=0; !remotelyOwned && i < len; i++){
01418       if (procList[i] != rank)
01419         remotelyOwned = 1;
01420     }
01421 
01422     int anyRemotelyOwned=0;
01423 
01424     try{
01425       reduceAll<int, int>(*comm_, Teuchos::REDUCE_MAX, 1,
01426         &remotelyOwned, &anyRemotelyOwned);
01427     }
01428     Z2_THROW_OUTSIDE_ERROR(*env_);
01429 
01430     if (anyRemotelyOwned){
01431 
01432       // Send the owners of these gnos their part assignments.
01433 
01434       Array<int> countOutBuf(nprocs, 0);
01435       Array<int> countInBuf(nprocs, 0);
01436 
01437       Array<gno_t> outBuf(len*2, 0);
01438 
01439       if (len > 0){
01440         Array<lno_t> offsetBuf(nprocs+1, 0);
01441 
01442         for (size_t i=0; i < len; i++){
01443           countOutBuf[procList[i]]+=2;
01444         }
01445 
01446         offsetBuf[0] = 0;
01447         for (int i=0; i < nprocs; i++)
01448           offsetBuf[i+1] = offsetBuf[i] + countOutBuf[i];
01449 
01450         for (size_t i=0; i < len; i++){
01451           int p = procList[i];
01452           int off = offsetBuf[p];
01453           outBuf[off] = gnoList[i];
01454           outBuf[off+1] = static_cast<gno_t>(partList[i]);
01455           offsetBuf[p]+=2;
01456         }
01457       }
01458 
01459       ArrayRCP<gno_t> inBuf;
01460 
01461       try{
01462         AlltoAllv<gno_t>(*comm_, *env_,
01463           outBuf(), countOutBuf(), inBuf, countInBuf());
01464       }
01465       Z2_FORWARD_EXCEPTIONS;
01466 
01467       outBuf.clear();
01468       countOutBuf.clear();
01469 
01470       gno_t newLen = 0;
01471       for (int i=0; i < nprocs; i++)
01472         newLen += countInBuf[i];
01473 
01474       countInBuf.clear();
01475 
01476       newLen /= 2;
01477 
01478       ArrayRCP<part_t> parts;
01479       ArrayRCP<const gno_t> myGnos;
01480 
01481       if (newLen > 0){
01482 
01483         gno_t *tmpGno = new gno_t [newLen];
01484         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpGno);
01485 
01486         part_t *tmpPart = new part_t [newLen];
01487         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpPart);
01488 
01489         size_t next = 0;
01490         for (lno_t i=0; i < newLen; i++){
01491           tmpGno[i] = inBuf[next++];
01492           tmpPart[i] = inBuf[next++];
01493         }
01494 
01495         parts = arcp(tmpPart, 0, newLen);
01496         myGnos = arcp(tmpGno, 0, newLen);
01497       }
01498 
01499       gnoList = myGnos;
01500       partList = parts;
01501       len = newLen;
01502     }
01503 
01504     delete [] procList.getRawPtr();
01505 
01506     if (idMap_->gnosAreGids()){
01507       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01508     }
01509     else{
01510       gid_t *gidList = new gid_t [len];
01511       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01512       ArrayView<gid_t> gidView(gidList, len);
01513 
01514       const gno_t *gnos = gnoList.getRawPtr();
01515       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01516 
01517       try{
01518         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01519       }
01520       Z2_FORWARD_EXCEPTIONS
01521 
01522       gids_ = arcp<const gid_t>(gidList, 0, len);
01523     }
01524 
01525     parts_ = partList;
01526   }
01527 
01528   // Now determine which process gets each object, if not one-to-one.
01529 
01530   if (!onePartPerProc_){
01531 
01532     int *procs = new int [len];
01533     env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
01534     procs_ = arcp<int>(procs, 0, len);
01535 
01536     part_t *parts = partList.getRawPtr();
01537 
01538     if (procDist_.size() > 0){    // parts are not split across procs
01539 
01540       int procId;
01541       for (size_t i=0; i < len; i++){
01542         partToProcsMap(parts[i], procs[i], procId);
01543       }
01544     }
01545     else{  // harder - we need to split the parts across multiple procs
01546 
01547       lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
01548       env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
01549         partCounter);
01550 
01551       int numProcs = comm_->getSize();
01552 
01553       //MD NOTE: there was no initialization for partCounter.
01554       //I added the line below, correct me if I am wrong.
01555       memset(partCounter, 0, sizeof(lno_t) * nGlobalPartsSolution_);
01556 
01557       for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
01558         partCounter[parts[i]]++;
01559 
01560       lno_t *procCounter = new lno_t [numProcs];
01561       env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
01562 
01563       int proc1;
01564       int proc2 = partDist_[0];
01565 
01566       for (part_t part=1; part < nGlobalParts_; part++){
01567         proc1 = proc2;
01568         proc2 = partDist_[part+1];
01569         int numprocs = proc2 - proc1;
01570 
01571         double dNum = partCounter[part];
01572         double dProcs = numprocs;
01573 
01574         //cout << "dNum:" << dNum << " dProcs:" << dProcs << endl;
01575         double each = floor(dNum/dProcs);
01576         double extra = fmod(dNum,dProcs);
01577 
01578         for (int proc=proc1, i=0; proc<proc2; proc++, i++){
01579           if (i < extra)
01580             procCounter[proc] = lno_t(each) + 1;
01581           else
01582             procCounter[proc] = lno_t(each);
01583         }
01584       }
01585 
01586       delete [] partCounter;
01587 
01588       for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
01589         if (partList[i] >= nGlobalParts_){
01590           // Solution has more parts that targeted.  These
01591           // objects just remain on this process.
01592           procs[i] = comm_->getRank();
01593           continue;
01594         }
01595         part_t partNum = parts[i];
01596         proc1 = partDist_[partNum];
01597         proc2 = partDist_[partNum + 1];
01598 
01599         int proc;
01600         for (proc=proc1; proc < proc2; proc++){
01601           if (procCounter[proc] > 0){
01602             procs[i] = proc;
01603             procCounter[proc]--;
01604             break;
01605           }
01606         }
01607         env_->localBugAssertion(__FILE__, __LINE__, "part to proc",
01608           proc < proc2, COMPLEX_ASSERTION);
01609       }
01610 
01611       delete [] procCounter;
01612     }
01613   }
01614 
01615   // Now that parts_ info is back on home process, remap the parts.
01616   // TODO:  The parts will be inconsistent with the proc assignments after
01617   // TODO:  remapping.  This problem will go away after we separate process
01618   // TODO:  mapping from setParts.  But for MueLu's use case, the part
01619   // TODO:  remapping is all that matters; they do not use the process mapping.
01620   int doRemap = 0;
01621   const Teuchos::ParameterEntry *pe =
01622                  env_->getParameters().getEntryPtr("remap_parts");
01623   if (pe) doRemap = pe->getValue(&doRemap);
01624   if (doRemap) RemapParts();
01625 
01626   haveSolution_ = true;
01627 
01628   env_->memory("After Solution has processed algorithm's answer");
01629   env_->debug(DETAILED_STATUS, "Exiting setParts");
01630 }
01631 
01632 template <typename Adapter>
01633   template <typename Extra>
01634     size_t PartitioningSolution<Adapter>::convertSolutionToImportList(
01635       int numExtra, ArrayRCP<Extra> &xtraInfo,
01636       ArrayRCP<typename Adapter::gid_t> &imports,       // output
01637       ArrayRCP<Extra> &newXtraInfo) const               // output
01638 {
01639   env_->localInputAssertion(__FILE__, __LINE__, "no solution yet",
01640     haveSolution_, BASIC_ASSERTION);
01641   env_->debug(DETAILED_STATUS, "Entering convertSolutionToImportList");
01642 
01643   int numProcs                = comm_->getSize();
01644   size_t localNumIds          = gids_.size();
01645 
01646   // How many to each process?
01647   Array<int> counts(numProcs, 0);
01648   if (onePartPerProc_)
01649     for (size_t i=0; i < localNumIds; i++)
01650       counts[parts_[i]]++;
01651   else
01652     for (size_t i=0; i < localNumIds; i++)
01653       counts[procs_[i]]++;
01654 
01655   Array<gno_t> offsets(numProcs+1, 0);
01656   for (int i=1; i <= numProcs; i++){
01657     offsets[i] = offsets[i-1] + counts[i-1];
01658   }
01659 
01660   Array<gid_t> gidList(localNumIds);
01661   Array<Extra> numericInfo;
01662 
01663   if (numExtra > 0)
01664     numericInfo.resize(localNumIds);
01665 
01666   if (onePartPerProc_){
01667     for (size_t i=0; i < localNumIds; i++){
01668       lno_t idx = offsets[parts_[i]];
01669       gidList[idx] = gids_[i];
01670       if (numExtra > 0)
01671         numericInfo[idx] = xtraInfo[i];
01672       offsets[parts_[i]] = idx + 1;
01673     }
01674   }
01675   else {
01676     for (size_t i=0; i < localNumIds; i++){
01677       lno_t idx = offsets[procs_[i]];
01678       gidList[idx] = gids_[i];
01679       if (numExtra > 0)
01680         numericInfo[idx] = xtraInfo[i];
01681       offsets[procs_[i]] = idx + 1;
01682     }
01683   }
01684 
01685   Array<int> recvCounts(numProcs, 0);
01686 
01687   try{
01688     AlltoAllv<gid_t>(*comm_, *env_, gidList(),
01689       counts(), imports, recvCounts());
01690   }
01691   catch (std::exception &e){
01692     throw std::runtime_error("alltoallv 1");
01693   }
01694 
01695   if (numExtra > 0){
01696     try{
01697       AlltoAllv<Extra>(*comm_, *env_, xtraInfo(),
01698         counts(), newXtraInfo, recvCounts());
01699     }
01700     catch (std::exception &e){
01701       throw std::runtime_error("alltoallv 2");
01702     }
01703   }
01704 
01705   env_->debug(DETAILED_STATUS, "Exiting convertSolutionToImportList");
01706   return imports.size();
01707 }
01708 
01709 template <typename Adapter>
01710   void PartitioningSolution<Adapter>::procToPartsMap(int procId,
01711     double &numParts, part_t &partMin, part_t &partMax) const
01712 {
01713   if (onePartPerProc_){
01714     numParts = 1.0;
01715     partMin = partMax = procId;
01716   }
01717   else if (procDist_.size() > 0){
01718     partMin = procDist_[procId];
01719     partMax = procDist_[procId+1] - 1;
01720     numParts = procDist_[procId+1] - partMin;
01721   }
01722   else{
01723     // find the first p such that partDist_[p] > procId
01724 
01725     std::vector<int>::const_iterator entry;
01726     entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
01727 
01728     size_t partIdx = entry - partDist_.begin();
01729     int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
01730     partMin = partMax = int(partIdx) - 1;
01731     numParts = 1.0 / numProcs;
01732   }
01733 }
01734 
01735 template <typename Adapter>
01736   void PartitioningSolution<Adapter>::partToProcsMap(part_t partId,
01737     int &procMin, int &procMax) const
01738 {
01739   if (partId >= nGlobalParts_){
01740     // setParts() may be given an initial solution which uses a
01741     // different number of parts than the desired solution.  It is
01742     // still a solution.  We keep it on this process.
01743     procMin = procMax = comm_->getRank();
01744   }
01745   else if (onePartPerProc_){
01746     procMin = procMax = int(partId);
01747   }
01748   else if (procDist_.size() > 0){
01749     if (procDistEquallySpread_) {
01750       // Avoid binary search.
01751       double fProcs = comm_->getSize();
01752       double fParts = nGlobalParts_;
01753       double each = fParts / fProcs;
01754       procMin = int(partId / each);
01755       while (procDist_[procMin] > partId) procMin--;
01756       while (procDist_[procMin+1] <= partId) procMin++;
01757       procMax = procMin;
01758     }
01759     else {
01760       // find the first p such that procDist_[p] > partId.
01761       // For now, do a binary search.
01762 
01763       typename std::vector<part_t>::const_iterator entry;
01764       entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
01765 
01766       size_t procIdx = entry - procDist_.begin();
01767       procMin = procMax = int(procIdx) - 1;
01768     }
01769   }
01770   else{
01771     procMin = partDist_[partId];
01772     procMax = partDist_[partId+1] - 1;
01773   }
01774 }
01775 
01776 template <typename Adapter>
01777   bool PartitioningSolution<Adapter>::criteriaHaveSamePartSizes(
01778     int c1, int c2) const
01779 {
01780   if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
01781     throw std::logic_error("criteriaHaveSamePartSizes error");
01782 
01783   bool theSame = false;
01784 
01785   if (c1 == c2)
01786     theSame = true;
01787 
01788   else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
01789     theSame = true;
01790 
01791   else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
01792     theSame = true;
01793     bool useIndex = pCompactIndex_[c1].size() > 0;
01794     if (useIndex){
01795       for (part_t p=0; theSame && p < nGlobalParts_; p++)
01796         if (pSize_[c1][pCompactIndex_[c1][p]] !=
01797             pSize_[c2][pCompactIndex_[c2][p]])
01798           theSame = false;
01799     }
01800     else{
01801       for (part_t p=0; theSame && p < nGlobalParts_; p++)
01802         if (pSize_[c1][p] != pSize_[c2][p])
01803           theSame = false;
01804     }
01805   }
01806 
01807   return theSame;
01808 }
01809 
01825 template <typename Adapter>
01826   void PartitioningSolution<Adapter>::partToProc(
01827     bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
01828     int numLocalParts, int numGlobalParts)
01829 {
01830   int nprocs = comm_->getSize();
01831   ssize_t reducevals[4];
01832   ssize_t sumHaveGlobal=0, sumHaveLocal=0;
01833   ssize_t sumGlobal=0, sumLocal=0;
01834   ssize_t maxGlobal=0, maxLocal=0;
01835   ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
01836       numGlobalParts, numLocalParts};
01837 
01838   partDist_.clear();
01839   procDist_.clear();
01840 
01841   if (doCheck){
01842 
01843     try{
01844       reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
01845     }
01846     Z2_THROW_OUTSIDE_ERROR(*env_);
01847 
01848     sumHaveGlobal = reducevals[0];
01849     sumHaveLocal = reducevals[1];
01850     sumGlobal = reducevals[2];
01851     sumLocal = reducevals[3];
01852 
01853     env_->localInputAssertion(__FILE__, __LINE__,
01854       "Either all procs specify num_global/local_parts or none do",
01855       (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
01856       (sumHaveLocal == 0 || sumHaveLocal == nprocs),
01857       BASIC_ASSERTION);
01858   }
01859   else{
01860     if (haveNumLocalParts)
01861       sumLocal = numLocalParts * nprocs;
01862     if (haveNumGlobalParts)
01863       sumGlobal = numGlobalParts * nprocs;
01864 
01865     sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
01866     sumHaveLocal = haveNumLocalParts ? nprocs : 0;
01867 
01868     maxLocal = numLocalParts;
01869     maxGlobal = numGlobalParts;
01870   }
01871 
01872   if (!haveNumLocalParts && !haveNumGlobalParts){
01873     onePartPerProc_ = true;   // default if user did not specify
01874     return;
01875   }
01876 
01877   if (haveNumGlobalParts){
01878     if (doCheck){
01879       vals[0] = numGlobalParts;
01880       vals[1] = numLocalParts;
01881       try{
01882         reduceAll<int, ssize_t>(
01883           *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
01884       }
01885       Z2_THROW_OUTSIDE_ERROR(*env_);
01886 
01887       maxGlobal = reducevals[0];
01888       maxLocal = reducevals[1];
01889 
01890       env_->localInputAssertion(__FILE__, __LINE__,
01891         "Value for num_global_parts is different on different processes.",
01892         maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
01893     }
01894 
01895     if (sumLocal){
01896       env_->localInputAssertion(__FILE__, __LINE__,
01897         "Sum of num_local_parts does not equal requested num_global_parts",
01898         sumLocal == numGlobalParts, BASIC_ASSERTION);
01899 
01900       if (sumLocal == nprocs && maxLocal == 1){
01901         onePartPerProc_ = true;   // user specified one part per proc
01902         return;
01903       }
01904     }
01905     else{
01906       if (maxGlobal == nprocs){
01907         onePartPerProc_ = true;   // user specified num parts is num procs
01908         return;
01909       }
01910     }
01911   }
01912 
01913   // If we are here, we do not have #parts == #procs.
01914 
01915   if (sumHaveLocal == nprocs){
01916     //
01917     // We will go by the number of local parts specified.
01918     //
01919 
01920     try{
01921       procDist_.resize(nprocs+1);
01922     }
01923     catch (std::exception &e){
01924       throw(std::bad_alloc());
01925     }
01926 
01927     part_t *procArray = &procDist_[0];
01928 
01929     try{
01930       part_t tmp = part_t(numLocalParts);
01931       gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
01932     }
01933     Z2_THROW_OUTSIDE_ERROR(*env_);
01934 
01935     procArray[0] = 0;
01936 
01937     for (int proc=0; proc < nprocs; proc++)
01938       procArray[proc+1] += procArray[proc];
01939   }
01940   else{
01941     //
01942     // We will allocate global number of parts to the processes.
01943     //
01944     double fParts = numGlobalParts;
01945     double fProcs = nprocs;
01946 
01947     if (fParts < fProcs){
01948 
01949       try{
01950         partDist_.resize(size_t(fParts+1));
01951       }
01952       catch (std::exception &e){
01953         throw(std::bad_alloc());
01954       }
01955 
01956       int *partArray = &partDist_[0];
01957 
01958       double each = floor(fProcs / fParts);
01959       double extra = fmod(fProcs, fParts);
01960       partDist_[0] = 0;
01961 
01962       for (part_t part=0; part < numGlobalParts; part++){
01963         int numOwners = int(each + ((part<extra) ? 1 : 0));
01964         partArray[part+1] = partArray[part] + numOwners;
01965       }
01966 
01967       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
01968         partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
01969     }
01970     else if (fParts > fProcs){
01971 
01972       // User did not specify local number of parts per proc;
01973       // Distribute the parts evenly among the procs.
01974 
01975       procDistEquallySpread_ = true;
01976 
01977       try{
01978         procDist_.resize(size_t(fProcs+1));
01979       }
01980       catch (std::exception &e){
01981         throw(std::bad_alloc());
01982       }
01983 
01984       part_t *procArray = &procDist_[0];
01985 
01986       double each = floor(fParts / fProcs);
01987       double extra = fmod(fParts, fProcs);
01988       procArray[0] = 0;
01989 
01990       for (int proc=0; proc < nprocs; proc++){
01991         part_t numParts = part_t(each + ((proc<extra) ? 1 : 0));
01992         procArray[proc+1] = procArray[proc] + numParts;
01993       }
01994 
01995       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
01996         procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
01997     }
01998     else{
01999       env_->globalBugAssertion(__FILE__, __LINE__,
02000         "should never get here", 1, COMPLEX_ASSERTION, comm_);
02001     }
02002   }
02003 }
02004 
02006 // Remap a new part assignment vector for maximum overlap with an input
02007 // part assignment.
02008 //
02009 // Assumptions for this version:
02010 //   input part assignment == processor rank for every local object.
02011 //   assuming nGlobalParts_ <= num ranks
02012 // TODO:  Write a version that takes the input part number as input;
02013 //        this change requires input parts in adapters to be provided in
02014 //        the Adapter.
02015 // TODO:  For repartitioning, compare to old remapping results; see Zoltan1.
02016 
02017 template <typename Adapter>
02018 void PartitioningSolution<Adapter>::RemapParts()
02019 {
02020   size_t len = parts_.size();
02021 
02022   part_t me = comm_->getRank();
02023   int np = comm_->getSize();
02024 
02025   if (np < nGlobalParts_) {
02026     if (me == 0)
02027       std::cout << "Remapping not yet supported for "
02028            << "num_global_parts " << nGlobalParts_
02029            << " > num procs " << np << std::endl;
02030     return;
02031   }
02032   // Build edges of a bipartite graph with np + nGlobalParts_ vertices,
02033   // and edges between a process vtx and any parts to which that process'
02034   // objects are assigned.
02035   // Weight edge[parts_[i]] by the number of objects that are going from
02036   // this rank to parts_[i].
02037   // We use a std::map, assuming the number of unique parts in the parts_ array
02038   // is small to keep the binary search efficient.
02039   // TODO We use the count of objects to move; should change to SIZE of objects
02040   // to move; need SIZE function in Adapter.
02041 
02042   std::map<part_t, long> edges;
02043   long lstaying = 0;  // Total num of local objects staying if we keep the
02044                       // current mapping. TODO:  change to SIZE of local objs
02045   long gstaying = 0;  // Total num of objects staying in the current partition
02046 
02047   for (size_t i = 0; i < len; i++) {
02048     edges[parts_[i]]++;                // TODO Use obj size instead of count
02049     if (parts_[i] == me) lstaying++;    // TODO Use obj size instead of count
02050   }
02051 
02052   Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
02053                                 &lstaying, &gstaying);
02054 //TODO  if (gstaying == Adapter::getGlobalNumObjs()) return;  // Nothing to do
02055 
02056   part_t *remap = NULL;
02057 
02058   int nedges = edges.size();
02059 
02060   // Gather the graph to rank 0.
02061   part_t tnVtx = np + nGlobalParts_;  // total # vertices
02062   int *idx = NULL;    // Pointer index into graph adjacencies
02063   int *sizes = NULL;  // nedges per rank
02064   if (me == 0) {
02065     idx = new int[tnVtx+1];
02066     sizes = new int[np];
02067     sizes[0] = nedges;
02068   }
02069   if (np > 1)
02070     Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
02071 
02072   // prefix sum to build the idx array
02073   if (me == 0) {
02074     idx[0] = 0;
02075     for (int i = 0; i < np; i++)
02076       idx[i+1] = idx[i] + sizes[i];
02077   }
02078 
02079   // prepare to send edges
02080   int cnt = 0;
02081   part_t *bufv = NULL;
02082   long *bufw = NULL;
02083   if (nedges) {
02084     bufv = new part_t[nedges];
02085     bufw = new long[nedges];
02086     // Create buffer with edges (me, part[i]) and weight edges[parts_[i]].
02087     for (typename std::map<part_t, long>::iterator it = edges.begin();
02088          it != edges.end(); it++) {
02089       bufv[cnt] = it->first;  // target part
02090       bufw[cnt] = it->second; // weight
02091       cnt++;
02092     }
02093   }
02094 
02095   // Prepare to receive edges on rank 0
02096   part_t *adj = NULL;
02097   long *wgt = NULL;
02098   if (me == 0) {
02099 //SYM    adj = new part_t[2*idx[np]];  // need 2x space to symmetrize later
02100 //SYM    wgt = new long[2*idx[np]];  // need 2x space to symmetrize later
02101     adj = new part_t[idx[np]];
02102     wgt = new long[idx[np]];
02103   }
02104 
02105   Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
02106   Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
02107   delete [] bufv;
02108   delete [] bufw;
02109 
02110   // Now have constructed graph on rank 0.
02111   // Call the matching algorithm
02112 
02113   int doRemap;
02114   if (me == 0) {
02115     // We have the "LHS" vertices of the bipartite graph; need to create
02116     // "RHS" vertices.
02117     for (int i = 0; i < idx[np]; i++) {
02118       adj[i] += np;  // New RHS vertex number; offset by num LHS vertices
02119     }
02120 
02121     // Build idx for RHS vertices
02122     for (part_t i = np; i < tnVtx; i++) {
02123       idx[i+1] = idx[i];  // No edges for RHS vertices
02124     }
02125 
02126 #ifdef KDDKDD_DEBUG
02127     cout << "IDX ";
02128     for (part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << " ";
02129     std::cout << std::endl;
02130 
02131     std::cout << "ADJ ";
02132     for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << " ";
02133     std::cout << std::endl;
02134 
02135     std::cout << "WGT ";
02136     for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << " ";
02137     std::cout << std::endl;
02138 #endif
02139 
02140     // Perform matching on the graph
02141     part_t *match = new part_t[tnVtx];
02142     for (part_t i = 0; i < tnVtx; i++) match[i] = i;
02143     part_t nmatches =
02144              Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
02145 
02146 #ifdef KDDKDD_DEBUG
02147     std::cout << "After matching:  " << nmatches << " found" << std::endl;
02148     for (part_t i = 0; i < tnVtx; i++)
02149       std::cout << "match[" << i << "] = " << match[i]
02150            << ((match[i] != i &&
02151                (i < np && match[i] != i+np))
02152                   ? " *" : " ")
02153            << std::endl;
02154 #endif
02155 
02156     // See whether there were nontrivial changes in the matching.
02157     bool nontrivial = false;
02158     if (nmatches) {
02159       for (part_t i = 0; i < np; i++) {
02160         if ((match[i] != i) && (match[i] != (i+np))) {
02161           nontrivial = true;
02162           break;
02163         }
02164       }
02165     }
02166 
02167     // Process the matches
02168     if (nontrivial) {
02169       remap = new part_t[nGlobalParts_];
02170       for (part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
02171 
02172       bool *used = new bool[np];
02173       for (part_t i = 0; i < np; i++) used[i] = false;
02174 
02175       // First, process all matched parts
02176       for (part_t i = 0; i < nGlobalParts_; i++) {
02177         part_t tmp = i + np;
02178         if (match[tmp] != tmp) {
02179           remap[i] = match[tmp];
02180           used[match[tmp]] = true;
02181         }
02182       }
02183 
02184       // Second, process unmatched parts; keep same part number if possible
02185       for (part_t i = 0; i < nGlobalParts_; i++) {
02186         if (remap[i] > -1) continue;
02187         if (!used[i]) {
02188           remap[i] = i;
02189           used[i] = true;
02190         }
02191       }
02192 
02193       // Third, process unmatched parts; give them the next unused part
02194       for (part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
02195         if (remap[i] > -1) continue;
02196         while (used[uidx]) uidx++;
02197         remap[i] = uidx;
02198         used[uidx] = true;
02199       }
02200       delete [] used;
02201     }
02202     delete [] match;
02203 
02204 #ifdef KDDKDD_DEBUG
02205     cout << "Remap vector: ";
02206     for (part_t i = 0; i < nGlobalParts_; i++) cout << remap[i] << " ";
02207     std::cout << std::endl;
02208 #endif
02209 
02210     long newgstaying = measure_stays(remap, idx, adj, wgt,
02211                                                       nGlobalParts_, np);
02212     doRemap = (newgstaying > gstaying);
02213     std::cout << "gstaying " << gstaying << " measure(input) "
02214          << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
02215          << " newgstaying " << newgstaying
02216          << " nontrivial " << nontrivial
02217          << " doRemap " << doRemap << std::endl;
02218   }
02219   delete [] idx;
02220   delete [] sizes;
02221   delete [] adj;
02222   delete [] wgt;
02223 
02224   Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
02225 
02226   if (doRemap) {
02227     if (me != 0) remap = new part_t[nGlobalParts_];
02228     Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
02229     for (size_t i = 0; i < len; i++) {
02230       parts_[i] = remap[parts_[i]];
02231     }
02232   }
02233 
02234   delete [] remap;  // TODO May want to keep for repartitioning as in Zoltan
02235 }
02236 
02237 
02238 }  // namespace Zoltan2
02239 
02240 #endif