Zoltan2
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //   Zoltan2: A package of combinatorial algorithms for scientific computing
00006 //                  Copyright 2012 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Karen Devine      (kddevin@sandia.gov)
00039 //                    Erik Boman        (egboman@sandia.gov)
00040 //                    Siva Rajamanickam (srajama@sandia.gov)
00041 //
00042 // ***********************************************************************
00043 //
00044 // @HEADER
00045 
00050 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00051 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00052 
00053 #include <Zoltan2_IdentifierMap.hpp>
00054 #include <Zoltan2_Solution.hpp>
00055 #include <Zoltan2_GreedyMWM.hpp>
00056 #include <Zoltan2_CoordinatePartitioningGraph.hpp>
00057 #include <cmath>
00058 #include <algorithm>
00059 #include <vector>
00060 #include <limits>
00061 
00062 namespace Teuchos{
00063 
00068 template <typename Ordinal, typename T>
00069 class Zoltan2_BoxBoundaries  : public ValueTypeReductionOp<Ordinal,T>
00070 {
00071 private:
00072     Ordinal size;
00073     T _EPSILON;
00074 
00075 public:
00078     Zoltan2_BoxBoundaries ():size(0), _EPSILON (std::numeric_limits<T>::epsilon()){}
00079 
00086     Zoltan2_BoxBoundaries (Ordinal s_):
00087         size(s_), _EPSILON (std::numeric_limits<T>::epsilon()){}
00088 
00091     void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
00092     {
00093         for (Ordinal i=0; i < count; i++){
00094             if (Z2_ABS(inBuffer[i]) >  _EPSILON){
00095                 inoutBuffer[i] = inBuffer[i];
00096             }
00097         }
00098     }
00099 };
00100 } // namespace Teuchos
00101 
00102 
00103 namespace Zoltan2 {
00104 
00105 long measure_stays(partId_t *, int *, partId_t *, long *, partId_t, partId_t);
00106 
00107 
00122 template <typename Adapter>
00123   class PartitioningSolution : public Solution
00124 {
00125 public:
00126 
00127 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00128   typedef typename Adapter::gno_t gno_t;
00129   typedef typename Adapter::scalar_t scalar_t;
00130   typedef typename Adapter::lno_t lno_t;
00131   typedef typename Adapter::gid_t gid_t;
00132   typedef typename Adapter::user_t user_t;
00133 #endif
00134 
00152   PartitioningSolution( RCP<const Environment> &env,
00153     RCP<const Comm<int> > &comm,
00154     RCP<const IdentifierMap<user_t> > &idMap,
00155     int nUserWeights);
00156 
00186   PartitioningSolution( RCP<const Environment> &env,
00187     RCP<const Comm<int> > &comm,
00188     RCP<const IdentifierMap<user_t> > &idMap,
00189     int nUserWeights, ArrayView<ArrayRCP<partId_t> > reqPartIds,
00190     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00191 
00193   // Information that the algorithm may wish to query.
00194 
00197   size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
00198 
00201   size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
00202 
00205   size_t getLocalNumberOfParts() const { return nLocalParts_; }
00206 
00214   scalar_t getLocalFractionOfPart() const { return localFraction_; }
00215 
00225   bool oneToOnePartDistribution() const { return onePartPerProc_; }
00226 
00246   const int *getPartDistribution() const {
00247     if (partDist_.size() > 0) return &partDist_[0];
00248     else return NULL;
00249   }
00250 
00267   const partId_t *getProcDistribution() const {
00268     if (procDist_.size() > 0) return &procDist_[0];
00269     else return NULL;
00270   }
00271 
00275   int getNumberOfCriteria() const { return nWeightsPerObj_; }
00276 
00277 
00284   bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
00285 
00298   scalar_t getCriteriaPartSize(int idx, partId_t part) const {
00299     if (pSizeUniform_[idx])
00300       return 1.0 / nGlobalParts_;
00301     else if (pCompactIndex_[idx].size())
00302       return pSize_[idx][pCompactIndex_[idx][part]];
00303     else
00304       return pSize_[idx][part];
00305   }
00306 
00320   bool criteriaHaveSamePartSizes(int c1, int c2) const;
00321 
00323   // Method used by the algorithm to set results.
00324 
00351   void setParts(ArrayRCP<const gno_t> &gnoList,
00352     ArrayRCP<partId_t> &partList, bool dataDidNotMove);
00353 
00355 
00367   void RemapParts();
00368 
00370   // Results that may be queried by the user, by migration methods,
00371   // or by metric calculation methods.
00372   // We return raw pointers so users don't have to learn about our
00373   // pointer wrappers.
00374 
00377   const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
00378 
00381   size_t getLocalNumberOfIds() const { return gids_.size(); }
00382 
00385   const gid_t *getIdList() const {
00386     if (gids_.size() > 0) return gids_.getRawPtr();
00387     else                  return NULL;
00388   }
00389 
00392   const zoltan2_partId_t *getPartList() const {
00393     if (parts_.size() > 0) return parts_.getRawPtr();
00394     else                   return NULL;
00395   }
00396 
00401   const int *getProcList() const {
00402     if (procs_.size() > 0) return procs_.getRawPtr();
00403     else                   return NULL;
00404   }
00405 
00406 
00409   void setPartBoxes(RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, partId_t> > > outPartBoxes){
00410       this->partBoxes = outPartBoxes;
00411   }
00412 
00415   RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, partId_t> > > getPartBoxes(){
00416       return this->partBoxes;
00417   }
00418 
00421   void getCommunicationGraph(
00422           const Teuchos::Comm<int> *comm,
00423           ArrayRCP <partId_t> &comXAdj,
00424           ArrayRCP <partId_t> &comAdj
00425   ) {
00426 
00427       if(comXAdj_.getRawPtr() == NULL && comAdj_.getRawPtr() == NULL){
00428 
00429           partId_t ntasks =  this->getActualGlobalNumberOfParts();
00430           if (partId_t (this->getTargetGlobalNumberOfParts()) > ntasks){
00431               ntasks = this->getTargetGlobalNumberOfParts();
00432           }
00433           RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, partId_t> > > pBoxes = this->getGlobalBoxBoundaries(comm);
00434           int dim = (*pBoxes)[0].getDim();
00435           GridHash < scalar_t, partId_t> grid(
00436                   pBoxes,
00437                   ntasks,
00438                   dim);
00439           grid.getAdjArrays(comXAdj_, comAdj_);
00440       }
00441       comAdj = comAdj_;
00442       comXAdj = comXAdj_;
00443 
00444   }
00445 
00446   RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, partId_t> > > getGlobalBoxBoundaries(
00447           const Teuchos::Comm<int> *comm){
00448 
00449       partId_t ntasks =  this->getActualGlobalNumberOfParts();
00450       if (partId_t (this->getTargetGlobalNumberOfParts()) > ntasks){
00451           ntasks = this->getTargetGlobalNumberOfParts();
00452       }
00453 
00454       RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, partId_t> > > pBoxes = this->getPartBoxes();
00455 
00456       int dim = (*pBoxes)[0].getDim();
00457 
00458 
00459       scalar_t *localPartBoundaries = new scalar_t[ntasks * 2 *dim];
00460 
00461       memset(localPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim);
00462 
00463       scalar_t *globalPartBoundaries = new scalar_t[ntasks * 2 *dim];
00464       memset(globalPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim);
00465 
00466       scalar_t *localPartMins = localPartBoundaries;
00467       scalar_t *localPartMaxs = localPartBoundaries + ntasks * dim;
00468 
00469       scalar_t *globalPartMins = globalPartBoundaries;
00470       scalar_t *globalPartMaxs = globalPartBoundaries + ntasks * dim;
00471 
00472       partId_t boxCount = pBoxes->size();
00473       for (partId_t i = 0; i < boxCount; ++i){
00474           partId_t pId = (*pBoxes)[i].getpId();
00475           //cout << "me:" << comm->getRank() << " has:" << pId << endl;
00476 
00477           scalar_t *lmins = (*pBoxes)[i].getlmins();
00478           scalar_t *lmaxs = (*pBoxes)[i].getlmaxs();
00479 
00480           for (int j = 0; j < dim; ++j){
00481               localPartMins[dim * pId + j] = lmins[j];
00482               localPartMaxs[dim * pId + j] = lmaxs[j];
00483               /*
00484               cout << "me:" << comm->getRank()  <<
00485                       " dim * pId + j:"<< dim * pId + j <<
00486                       " localMin:" << localPartMins[dim * pId + j] <<
00487                       " localMax:" << localPartMaxs[dim * pId + j] << endl;
00488                       */
00489           }
00490       }
00491 
00492       Teuchos::Zoltan2_BoxBoundaries<int, scalar_t> reductionOp(ntasks * 2 *dim);
00493 
00494       reduceAll<int, scalar_t>(*comm, reductionOp,
00495               ntasks * 2 *dim, localPartBoundaries, globalPartBoundaries
00496       );
00497       RCP < std::vector <coordinateModelPartBox <scalar_t, partId_t> > > pB(new std::vector <coordinateModelPartBox <scalar_t, partId_t> > (), true) ;
00498       for (partId_t i = 0; i < ntasks; ++i){
00499           Zoltan2::coordinateModelPartBox <scalar_t, partId_t> tpb(
00500                   i,
00501                   dim,
00502                   globalPartMins + dim * i,
00503                   globalPartMaxs + dim * i);
00504 
00505           /*
00506           for (int j = 0; j < dim; ++j){
00507               cout << "me:" << comm->getRank()  <<
00508                       " dim * pId + j:"<< dim * i + j <<
00509                       " globalMin:" << globalPartMins[dim * i + j] <<
00510                       " globalMax:" << globalPartMaxs[dim * i + j] << endl;
00511           }
00512           */
00513           pB->push_back(tpb);
00514       }
00515       delete []localPartBoundaries;
00516       delete []globalPartBoundaries;
00517       //RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, partId_t> > > tmpRCPBox(pB, true);
00518       this->partBoxes = pB;
00519       return this->partBoxes;
00520   }
00550   template <typename Extra>
00551     size_t convertSolutionToImportList(
00552       int numExtra,
00553       ArrayRCP<Extra> &xtraInfo,
00554       ArrayRCP<typename Adapter::gid_t> &imports,
00555       ArrayRCP<Extra> &newXtraInfo) const;
00556 
00574   void getPartsForProc(int procId, double &numParts, partId_t &partMin,
00575     partId_t &partMax) const
00576   {
00577     env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
00578       procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
00579 
00580     procToPartsMap(procId, numParts, partMin, partMax);
00581   }
00582 
00594   void getProcsForPart(partId_t partId, int &procMin, int &procMax) const
00595   {
00596     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
00597       partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
00598 
00599     partToProcsMap(partId, procMin, procMax);
00600   }
00601 
00602 private:
00603   void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
00604     int numLocalParts, int numGlobalParts);
00605 
00606   void procToPartsMap(int procId, double &numParts, partId_t &partMin,
00607     partId_t &partMax) const;
00608 
00609   void partToProcsMap(partId_t partId, int &procMin, int &procMax) const;
00610 
00611   void setPartDistribution();
00612 
00613   void setPartSizes(ArrayView<ArrayRCP<partId_t> > reqPartIds,
00614     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00615 
00616   void computePartSizes(int widx, ArrayView<partId_t> ids,
00617     ArrayView<scalar_t> sizes);
00618 
00619   void broadcastPartSizes(int widx);
00620 
00621 
00622   RCP<const Environment> env_;             // has application communicator
00623   RCP<const Comm<int> > comm_;             // the problem communicator
00624   RCP<const IdentifierMap<user_t> > idMap_;
00625 
00626   //part box boundaries as a result of geometric partitioning algorithm.
00627   RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, partId_t> > > partBoxes;
00628   ArrayRCP <partId_t> comXAdj_; //communication graph xadj
00629   ArrayRCP <partId_t> comAdj_; //communication graph adj.
00630 
00631   partId_t nGlobalParts_;// target global number of parts
00632   partId_t nLocalParts_; // number of parts to be on this process
00633 
00634   scalar_t localFraction_; // approx fraction of a part on this process
00635   int nWeightsPerObj_;      // if user has no weights, this is 1  TODO:  WHY???
00636 
00637   // If process p is to be assigned part p for all p, then onePartPerProc_
00638   // is true. Otherwise it is false, and either procDist_ or partDist_
00639   // describes the allocation of parts to processes.
00640   //
00641   // If parts are never split across processes, then procDist_ is defined
00642   // as follows:
00643   //
00644   //   partId              = procDist_[procId]
00645   //   partIdNext          = procDist_[procId+1]
00646   //   globalNumberOfParts = procDist_[numProcs]
00647   //
00648   // meaning that the parts assigned to process procId range from
00649   // [partId, partIdNext).  If partIdNext is the same as partId, then
00650   // process procId has no parts.
00651   //
00652   // If the number parts is less than the number of processes, and the
00653   // user did not specify "num_local_parts" for each of the processes, then
00654   // parts are split across processes, and partDist_ is defined rather than
00655   // procDist_.
00656   //
00657   //   procId              = partDist_[partId]
00658   //   procIdNext          = partDist_[partId+1]
00659   //   globalNumberOfProcs = partDist_[numParts]
00660   //
00661   // which implies that the part partId is shared by processes in the
00662   // the range [procId, procIdNext).
00663   //
00664   // We use std::vector so we can use upper_bound algorithm
00665 
00666   bool             onePartPerProc_;   // either this is true...
00667   std::vector<int>      partDist_;      // or this is defined ...
00668   std::vector<partId_t> procDist_;      // or this is defined.
00669   bool procDistEquallySpread_;        // if procDist_ is used and
00670                                       // #parts > #procs and
00671                                       // num_local_parts is not specified,
00672                                       // parts are evenly distributed to procs
00673 
00674   // In order to minimize the storage required for part sizes, we
00675   // have three different representations.
00676   //
00677   // If the part sizes for weight index w are all the same, then:
00678   //    pSizeUniform_[w] = true
00679   //    pCompactIndex_[w].size() = 0
00680   //    pSize_[w].size() = 0
00681   //
00682   // and the size for part p is 1.0 / nparts.
00683   //
00684   // If part sizes differ for each part in weight index w, but there
00685   // are no more than 64 distinct sizes:
00686   //    pSizeUniform_[w] = false
00687   //    pCompactIndex_[w].size() = number of parts
00688   //    pSize_[w].size() = number of different sizes
00689   //
00690   // and the size for part p is pSize_[pCompactIndex_[p]].
00691   //
00692   // If part sizes differ for each part in weight index w, and there
00693   // are more than 64 distinct sizes:
00694   //    pSizeUniform_[w] = false
00695   //    pCompactIndex_[w].size() = 0
00696   //    pSize_[w].size() = nparts
00697   //
00698   // and the size for part p is pSize_[p].
00699   //
00700   // NOTE: If we expect to have similar cases, i.e. a long list of scalars
00701   //   where it is highly possible that just a few unique values appear,
00702   //   then we may want to make this a class.  The advantage is that we
00703   //   save a long list of 1-byte indices instead of a long list of scalars.
00704 
00705   ArrayRCP<bool> pSizeUniform_;
00706   ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
00707   ArrayRCP<ArrayRCP<scalar_t> > pSize_;
00708 
00710   // The algorithm sets these values upon completion.
00711 
00712   ArrayRCP<const gid_t>  gids_;   // User's global IDs
00713   ArrayRCP<partId_t> parts_;      // part number assigned to gids_[i]
00714 
00715   bool haveSolution_;
00716 
00717   partId_t nGlobalPartsSolution_; // global number of parts in solution
00718 
00720   // The solution calculates this from the part assignments,
00721   // unless onePartPerProc_.
00722 
00723   ArrayRCP<int> procs_;       // process rank assigned to gids_[i]
00724 };
00725 
00727 // Definitions
00729 
00730 template <typename Adapter>
00731   PartitioningSolution<Adapter>::PartitioningSolution(
00732     RCP<const Environment> &env,
00733     RCP<const Comm<int> > &comm,
00734     RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights)
00735     : env_(env), comm_(comm), idMap_(idMap),
00736       partBoxes(),comXAdj_(), comAdj_(),
00737       nGlobalParts_(0), nLocalParts_(0),
00738       localFraction_(0),  nWeightsPerObj_(),
00739       onePartPerProc_(false), partDist_(), procDist_(),
00740       procDistEquallySpread_(false),
00741       pSizeUniform_(), pCompactIndex_(), pSize_(),
00742       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00743       procs_()
00744 {
00745   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  // TODO:  WHY??  WHY NOT ZERO?
00746 
00747   setPartDistribution();
00748 
00749   // We must call setPartSizes() because part sizes may have
00750   // been provided by the user on other processes.
00751 
00752   ArrayRCP<partId_t> *noIds = new ArrayRCP<partId_t> [nWeightsPerObj_];
00753   ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [nWeightsPerObj_];
00754   ArrayRCP<ArrayRCP<partId_t> > ids(noIds, 0, nWeightsPerObj_, true);
00755   ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, true);
00756 
00757   setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
00758 
00759   env_->memory("After construction of solution");
00760 }
00761 
00762 template <typename Adapter>
00763   PartitioningSolution<Adapter>::PartitioningSolution(
00764     RCP<const Environment> &env,
00765     RCP<const Comm<int> > &comm,
00766     RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights,
00767     ArrayView<ArrayRCP<partId_t> > reqPartIds,
00768     ArrayView<ArrayRCP<scalar_t> > reqPartSizes)
00769     : env_(env), comm_(comm), idMap_(idMap),
00770       partBoxes(),comXAdj_(), comAdj_(),
00771       nGlobalParts_(0), nLocalParts_(0),
00772       localFraction_(0),  nWeightsPerObj_(),
00773       onePartPerProc_(false), partDist_(), procDist_(),
00774       procDistEquallySpread_(false),
00775       pSizeUniform_(), pCompactIndex_(), pSize_(),
00776       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00777       procs_()
00778 {
00779   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  // TODO:  WHY?? WHY NOT ZERO?
00780 
00781   setPartDistribution();
00782 
00783   setPartSizes(reqPartIds, reqPartSizes);
00784 
00785   env_->memory("After construction of solution");
00786 }
00787 
00788 template <typename Adapter>
00789   void PartitioningSolution<Adapter>::setPartDistribution()
00790 {
00791   // Did the caller define num_global_parts and/or num_local_parts?
00792 
00793   const ParameterList &pl = env_->getParameters();
00794   size_t haveGlobalNumParts=0, haveLocalNumParts=0;
00795   int numLocal=0, numGlobal=0;
00796   double val;
00797 
00798   const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
00799 
00800   if (pe){
00801     val = pe->getValue<double>(&val);  // TODO: KDD Skip this double get
00802     haveGlobalNumParts = 1;            // TODO: KDD Should be unnecessary once
00803     numGlobal = static_cast<int>(val); // TODO: KDD paramlist handles long long.
00804     nGlobalParts_ = partId_t(numGlobal); // TODO: KDD  also do below.
00805   }
00806 
00807   pe = pl.getEntryPtr("num_local_parts");
00808 
00809   if (pe){
00810     val = pe->getValue<double>(&val);
00811     haveLocalNumParts = 1;
00812     numLocal = static_cast<int>(val);
00813     nLocalParts_ = partId_t(numLocal);
00814   }
00815 
00816   try{
00817     // Sets onePartPerProc_, partDist_, and procDist_
00818 
00819     partToProc(true, haveLocalNumParts, haveGlobalNumParts,
00820       numLocal, numGlobal);
00821   }
00822   Z2_FORWARD_EXCEPTIONS
00823 
00824   int nprocs = comm_->getSize();
00825   int rank = comm_->getRank();
00826 
00827   if (onePartPerProc_){
00828     nGlobalParts_ = nprocs;
00829     nLocalParts_ = 1;
00830   }
00831   else if (partDist_.size() > 0){   // more procs than parts
00832     nGlobalParts_ = partDist_.size() - 1;
00833     int pstart = partDist_[0];
00834     for (partId_t i=1; i <= nGlobalParts_; i++){
00835       int pend = partDist_[i];
00836       if (rank >= pstart && rank < pend){
00837         int numOwners = pend - pstart;
00838         nLocalParts_ = 1;
00839         localFraction_ = 1.0 / numOwners;
00840         break;
00841       }
00842       pstart = pend;
00843     }
00844   }
00845   else if (procDist_.size() > 0){  // more parts than procs
00846     nGlobalParts_ = procDist_[nprocs];
00847     nLocalParts_ = procDist_[rank+1] - procDist_[rank];
00848   }
00849   else {
00850     throw std::logic_error("partToProc error");
00851   }
00852 
00853 }
00854 
00855 template <typename Adapter>
00856   void PartitioningSolution<Adapter>::setPartSizes(
00857     ArrayView<ArrayRCP<partId_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
00858 {
00859   int widx = nWeightsPerObj_;
00860   bool fail=false;
00861 
00862   size_t *countBuf = new size_t [widx*2];
00863   ArrayRCP<size_t> counts(countBuf, 0, widx*2, true);
00864 
00865   fail = ((ids.size() != widx) || (sizes.size() != widx));
00866 
00867   for (int w=0; !fail && w < widx; w++){
00868     counts[w] = ids[w].size();
00869     if (ids[w].size() != sizes[w].size()) fail=true;
00870   }
00871 
00872   env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0,
00873     COMPLEX_ASSERTION, comm_);
00874 
00875   // Are all part sizes the same?  This is the common case.
00876 
00877   ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [widx];
00878   pSize_ = arcp(emptySizes, 0, widx);
00879 
00880   ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [widx];
00881   pCompactIndex_ = arcp(emptyIndices, 0, widx);
00882 
00883   bool *info = new bool [widx];
00884   pSizeUniform_ = arcp(info, 0, widx);
00885   for (int w=0; w < widx; w++)
00886     pSizeUniform_[w] = true;
00887 
00888   if (nGlobalParts_ == 1){
00889     return;   // there's only one part in the whole problem
00890   }
00891 
00892   size_t *ptr1 = counts.getRawPtr();
00893   size_t *ptr2 = counts.getRawPtr() + widx;
00894 
00895   try{
00896     reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
00897   }
00898   Z2_THROW_OUTSIDE_ERROR(*env_);
00899 
00900   bool zero = true;
00901 
00902   for (int w=0; w < widx; w++)
00903     if (counts[widx+w] > 0){
00904       zero = false;
00905       pSizeUniform_[w] = false;
00906     }
00907 
00908   if (zero) // Part sizes for all criteria are uniform.
00909     return;
00910 
00911   // Compute the part sizes for criteria for which part sizes were
00912   // supplied.  Normalize for each criteria so part sizes sum to one.
00913 
00914   int nprocs = comm_->getSize();
00915   int rank = comm_->getRank();
00916 
00917   for (int w=0; w < widx; w++){
00918     if (pSizeUniform_[w]) continue;
00919 
00920     // Send all ids and sizes to one process.
00921     // (There is no simple gather method in Teuchos.)
00922 
00923     partId_t length = ids[w].size();
00924     partId_t *allLength = new partId_t [nprocs];
00925     Teuchos::gatherAll<int, partId_t>(*comm_, 1, &length,
00926       nprocs, allLength);
00927 
00928     if (rank == 0){
00929       int total = 0;
00930       for (int i=0; i < nprocs; i++)
00931         total += allLength[i];
00932 
00933       partId_t *partNums = new partId_t [total];
00934       scalar_t *partSizes = new scalar_t [total];
00935 
00936       ArrayView<partId_t> idArray(partNums, total);
00937       ArrayView<scalar_t> sizeArray(partSizes, total);
00938 
00939       if (length > 0){
00940         for (int i=0; i < length; i++){
00941           *partNums++ = ids[w][i];
00942           *partSizes++ = sizes[w][i];
00943         }
00944       }
00945 
00946       for (int p=1; p < nprocs; p++){
00947         if (allLength[p] > 0){
00948           Teuchos::receive<int, partId_t>(*comm_, p,
00949             allLength[p], partNums);
00950           Teuchos::receive<int, scalar_t>(*comm_, p,
00951             allLength[p], partSizes);
00952           partNums += allLength[p];
00953           partSizes += allLength[p];
00954         }
00955       }
00956 
00957       delete [] allLength;
00958 
00959       try{
00960         computePartSizes(w, idArray, sizeArray);
00961       }
00962       Z2_FORWARD_EXCEPTIONS
00963 
00964       delete [] idArray.getRawPtr();
00965       delete [] sizeArray.getRawPtr();
00966     }
00967     else{
00968       delete [] allLength;
00969       if (length > 0){
00970         Teuchos::send<int, partId_t>(*comm_, length, ids[w].getRawPtr(), 0);
00971         Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
00972       }
00973     }
00974 
00975     broadcastPartSizes(w);
00976   }
00977 }
00978 
00979 template <typename Adapter>
00980   void PartitioningSolution<Adapter>::broadcastPartSizes(int widx)
00981 {
00982   env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
00983     pSize_.size()>widx &&
00984     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
00985     COMPLEX_ASSERTION);
00986 
00987   int rank = comm_->getRank();
00988   int nprocs = comm_->getSize();
00989   partId_t nparts = nGlobalParts_;
00990 
00991   if (nprocs < 2)
00992     return;
00993 
00994   char flag=0;
00995 
00996   if (rank == 0){
00997     if (pSizeUniform_[widx] == true)
00998       flag = 1;
00999     else if (pCompactIndex_[widx].size() > 0)
01000       flag = 2;
01001     else
01002       flag = 3;
01003   }
01004 
01005   try{
01006     Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
01007   }
01008   Z2_THROW_OUTSIDE_ERROR(*env_);
01009 
01010   if (flag == 1){
01011     if (rank > 0)
01012       pSizeUniform_[widx] = true;
01013 
01014     return;
01015   }
01016 
01017   if (flag == 2){
01018 
01019     // broadcast the indices into the size list
01020 
01021     unsigned char *idxbuf = NULL;
01022 
01023     if (rank > 0){
01024       idxbuf = new unsigned char [nparts];
01025       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
01026     }
01027     else{
01028       env_->localBugAssertion(__FILE__, __LINE__, "index list size",
01029         pCompactIndex_[widx].size() == nparts, COMPLEX_ASSERTION);
01030       idxbuf = pCompactIndex_[widx].getRawPtr();
01031     }
01032 
01033     try{
01034       // broadcast of unsigned char is not supported
01035       Teuchos::broadcast<int, char>(*comm_, 0, nparts,
01036         reinterpret_cast<char *>(idxbuf));
01037     }
01038     Z2_THROW_OUTSIDE_ERROR(*env_);
01039 
01040     if (rank > 0)
01041       pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, true);
01042 
01043     // broadcast the list of different part sizes
01044 
01045     unsigned char maxIdx=0;
01046     for (partId_t p=0; p < nparts; p++)
01047       if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
01048 
01049     int numSizes = maxIdx + 1;
01050 
01051     scalar_t *sizeList = NULL;
01052 
01053     if (rank > 0){
01054       sizeList = new scalar_t [numSizes];
01055       env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
01056     }
01057     else{
01058       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
01059         numSizes == pSize_[widx].size(), COMPLEX_ASSERTION);
01060 
01061       sizeList = pSize_[widx].getRawPtr();
01062     }
01063 
01064     try{
01065       Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
01066     }
01067     Z2_THROW_OUTSIDE_ERROR(*env_);
01068 
01069     if (rank > 0)
01070       pSize_[widx] = arcp(sizeList, 0, numSizes, true);
01071 
01072     return;
01073   }
01074 
01075   if (flag == 3){
01076 
01077     // broadcast the size of each part
01078 
01079     scalar_t *sizeList = NULL;
01080 
01081     if (rank > 0){
01082       sizeList = new scalar_t [nparts];
01083       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
01084     }
01085     else{
01086       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
01087         nparts == pSize_[widx].size(), COMPLEX_ASSERTION);
01088 
01089       sizeList = pSize_[widx].getRawPtr();
01090     }
01091 
01092     try{
01093       Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
01094     }
01095     Z2_THROW_OUTSIDE_ERROR(*env_);
01096 
01097     if (rank > 0)
01098       pSize_[widx] = arcp(sizeList, 0, nparts);
01099 
01100     return;
01101   }
01102 }
01103 
01104 template <typename Adapter>
01105   void PartitioningSolution<Adapter>::computePartSizes(int widx,
01106     ArrayView<partId_t> ids, ArrayView<scalar_t> sizes)
01107 {
01108   int len = ids.size();
01109 
01110   if (len == 0){
01111     pSizeUniform_[widx] = true;
01112     return;
01113   }
01114 
01115   env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes",
01116     len>0 && sizes.size()==len, COMPLEX_ASSERTION);
01117 
01118   env_->localBugAssertion(__FILE__, __LINE__, "bad index",
01119     widx>=0 && widx<nWeightsPerObj_, COMPLEX_ASSERTION);
01120 
01121   env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
01122     pSize_.size()>widx &&
01123     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
01124     COMPLEX_ASSERTION);
01125 
01126   // Check ids and sizes and find min, max and average sizes.
01127   // If sizes are very close to uniform, call them uniform parts.
01128 
01129   partId_t nparts = nGlobalParts_;
01130   unsigned char *buf = new unsigned char [nparts];
01131   env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
01132   memset(buf, 0, nparts);
01133   ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
01134 
01135   scalar_t epsilon = 10e-5 / nparts;
01136   scalar_t min=sizes[0], max=sizes[0], sum=0;
01137 
01138   for (int i=0; i < len; i++){
01139     partId_t id = ids[i];
01140     scalar_t size = sizes[i];
01141 
01142     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
01143       id>=0 && id<nparts, BASIC_ASSERTION);
01144 
01145     env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
01146       BASIC_ASSERTION);
01147 
01148     // TODO: we could allow users to specify multiple sizes for the same
01149     // part if we add a parameter that says what we are to do with them:
01150     // add them or take the max.
01151 
01152     env_->localInputAssertion(__FILE__, __LINE__,
01153       "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
01154 
01155     partIdx[id] = 1;    // mark that we have a size for this part
01156 
01157     if (size < min) min = size;
01158     if (size > max) max = size;
01159     sum += size;
01160   }
01161 
01162   if (sum == 0){
01163 
01164     // User has given us a list of parts of size 0, we'll set
01165     // the rest to them to equally sized parts.
01166 
01167     scalar_t *allSizes = new scalar_t [2];
01168     env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
01169 
01170     ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
01171 
01172     int numNonZero = nparts - len;
01173 
01174     allSizes[0] = 0.0;
01175     allSizes[1] = 1.0 / numNonZero;
01176 
01177     for (partId_t p=0; p < nparts; p++)
01178       buf[p] = 1;                 // index to default part size
01179 
01180     for (int i=0; i < len; i++)
01181       buf[ids[i]] = 0;            // index to part size zero
01182 
01183     pSize_[widx] = sizeArray;
01184     pCompactIndex_[widx] = partIdx;
01185 
01186     return;
01187   }
01188 
01189   if (max - min <= epsilon){
01190     pSizeUniform_[widx] = true;
01191     return;
01192   }
01193 
01194   // A size for parts that were not specified:
01195   scalar_t avg = sum / nparts;
01196 
01197   // We are going to merge part sizes that are very close.  This takes
01198   // computation time now, but can save considerably in the storage of
01199   // all part sizes on each process.  For example, a common case may
01200   // be some parts are size 1 and all the rest are size 2.
01201 
01202   scalar_t *tmp = new scalar_t [len];
01203   env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01204   memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
01205   ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
01206 
01207   std::sort(partSizes.begin(), partSizes.end());
01208 
01209   // create a list of sizes that are unique within epsilon
01210 
01211   Array<scalar_t> nextUniqueSize;
01212   nextUniqueSize.push_back(partSizes[len-1]);   // largest
01213   scalar_t curr = partSizes[len-1];
01214   int avgIndex = len;
01215   bool haveAvg = false;
01216   if (curr - avg <= epsilon)
01217      avgIndex = 0;
01218 
01219   for (int i=len-2; i >= 0; i--){
01220     scalar_t val = partSizes[i];
01221     if (curr - val > epsilon){
01222       nextUniqueSize.push_back(val);  // the highest in the group
01223       curr = val;
01224       if (avgIndex==len && val > avg && val - avg <= epsilon){
01225         // the average would be in this group
01226         avgIndex = nextUniqueSize.size() - 1;
01227         haveAvg = true;
01228       }
01229     }
01230   }
01231 
01232   partSizes.clear();
01233 
01234   size_t numSizes = nextUniqueSize.size();
01235   int sizeArrayLen = numSizes;
01236 
01237   if (numSizes < 64){
01238     // We can store the size for every part in a compact way.
01239 
01240     // Create a list of all sizes in increasing order
01241 
01242     if (!haveAvg) sizeArrayLen++;   // need to include average
01243 
01244     scalar_t *allSizes = new scalar_t [sizeArrayLen];
01245     env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
01246     ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
01247 
01248     int newAvgIndex = sizeArrayLen;
01249 
01250     for (int i=numSizes-1, idx=0; i >= 0; i--){
01251 
01252       if (newAvgIndex == sizeArrayLen){
01253 
01254         if (haveAvg && i==avgIndex)
01255           newAvgIndex = idx;
01256 
01257         else if (!haveAvg && avg < nextUniqueSize[i]){
01258           newAvgIndex = idx;
01259           allSizes[idx++] = avg;
01260         }
01261       }
01262 
01263       allSizes[idx++] = nextUniqueSize[i];
01264     }
01265 
01266     env_->localBugAssertion(__FILE__, __LINE__, "finding average in list",
01267       newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
01268 
01269     for (int i=0; i < nparts; i++){
01270       buf[i] = newAvgIndex;   // index to default part size
01271     }
01272 
01273     sum = (nparts - len) * allSizes[newAvgIndex];
01274 
01275     for (int i=0; i < len; i++){
01276       int id = ids[i];
01277       scalar_t size = sizes[i];
01278       int index;
01279 
01280       // Find the first size greater than or equal to this size.
01281 
01282       if (size < avg && avg - size <= epsilon)
01283         index = newAvgIndex;
01284       else{
01285         typename ArrayRCP<scalar_t>::iterator found =
01286           std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
01287 
01288         env_->localBugAssertion(__FILE__, __LINE__, "size array",
01289           found != sizeArray.end(), COMPLEX_ASSERTION);
01290 
01291         index = found - sizeArray.begin();
01292       }
01293 
01294       buf[id] = index;
01295       sum += allSizes[index];
01296     }
01297 
01298     for (int i=0; i < sizeArrayLen; i++){
01299       sizeArray[i] /= sum;
01300     }
01301 
01302     pCompactIndex_[widx] = partIdx;
01303     pSize_[widx] = sizeArray;
01304   }
01305   else{
01306     // To have access to part sizes, we must store nparts scalar_ts on
01307     // every process.  We expect this is a rare case.
01308 
01309     tmp = new scalar_t [nparts];
01310     env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
01311 
01312     sum += ((nparts - len) * avg);
01313 
01314     for (int i=0; i < nparts; i++){
01315       tmp[i] = avg/sum;
01316     }
01317 
01318     for (int i=0; i < len; i++){
01319       tmp[ids[i]] = sizes[i]/sum;
01320     }
01321 
01322     pSize_[widx] = arcp(tmp, 0, nparts);
01323   }
01324 }
01325 
01326 
01327 template <typename Adapter>
01328   void PartitioningSolution<Adapter>::setParts(
01329     ArrayRCP<const gno_t> &gnoList, ArrayRCP<partId_t> &partList,
01330     bool dataDidNotMove)
01331 {
01332   env_->debug(DETAILED_STATUS, "Entering setParts");
01333 
01334   size_t len = partList.size();
01335 
01336   // Find the actual number of parts in the solution, which can
01337   // be more or less than the nGlobalParts_ target.
01338   // (We may want to compute the imbalance of a given solution with
01339   // respect to a desired solution.  This solution may have more or
01340   // fewer parts that the desired solution.)
01341 
01342   partId_t lMax=0, lMin=0, gMax, gMin;
01343 
01344   if (len > 0)
01345     IdentifierTraits<partId_t>::minMax(partList.getRawPtr(), len, lMin, lMax);
01346 
01347   IdentifierTraits<partId_t>::globalMinMax(*comm_, len == 0,
01348     lMin, lMax, gMin, gMax);
01349 
01350   nGlobalPartsSolution_ = gMax - gMin + 1;
01351 
01352   if (dataDidNotMove) {
01353     // Case where the algorithm provides the part list in the same order
01354     // that it received the gnos.
01355 
01356     if (idMap_->gnosAreGids()){
01357       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01358     }
01359     else{
01360       gid_t *gidList = new gid_t [len];
01361       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01362       ArrayView<gid_t> gidView(gidList, len);
01363 
01364       const gno_t *gnos = gnoList.getRawPtr();
01365       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01366 
01367       try{
01368         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01369       }
01370       Z2_FORWARD_EXCEPTIONS
01371 
01372       gids_ = arcp<const gid_t>(gidList, 0, len);
01373     }
01374     parts_ = partList;
01375   }
01376   else {
01377     // Case where the algorithm moved the data.
01378     // KDDKDD Should we require the algorithm to return the parts in
01379     // KDDKDD the same order as the gnos provided by the model?
01380 
01381     // We send part information to the process that "owns" the global number.
01382 
01383     ArrayView<gid_t> emptyView;
01384     ArrayView<int> procList;
01385 
01386     if (len){
01387       int *tmp = new int [len];
01388       env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01389       procList = ArrayView<int>(tmp, len);
01390     }
01391 
01392     idMap_->gnoGlobalTranslate (gnoList.view(0,len), emptyView, procList);
01393 
01394     int remotelyOwned = 0;
01395     int rank = comm_->getRank();
01396     int nprocs = comm_->getSize();
01397 
01398     for (size_t i=0; !remotelyOwned && i < len; i++){
01399       if (procList[i] != rank)
01400         remotelyOwned = 1;
01401     }
01402 
01403     int anyRemotelyOwned=0;
01404 
01405     try{
01406       reduceAll<int, int>(*comm_, Teuchos::REDUCE_MAX, 1,
01407         &remotelyOwned, &anyRemotelyOwned);
01408     }
01409     Z2_THROW_OUTSIDE_ERROR(*env_);
01410 
01411     if (anyRemotelyOwned){
01412 
01413       // Send the owners of these gnos their part assignments.
01414 
01415       Array<int> countOutBuf(nprocs, 0);
01416       Array<int> countInBuf(nprocs, 0);
01417 
01418       Array<gno_t> outBuf(len*2, 0);
01419 
01420       if (len > 0){
01421         Array<lno_t> offsetBuf(nprocs+1, 0);
01422 
01423         for (size_t i=0; i < len; i++){
01424           countOutBuf[procList[i]]+=2;
01425         }
01426 
01427         offsetBuf[0] = 0;
01428         for (int i=0; i < nprocs; i++)
01429           offsetBuf[i+1] = offsetBuf[i] + countOutBuf[i];
01430 
01431         for (size_t i=0; i < len; i++){
01432           int p = procList[i];
01433           int off = offsetBuf[p];
01434           outBuf[off] = gnoList[i];
01435           outBuf[off+1] = static_cast<gno_t>(partList[i]);
01436           offsetBuf[p]+=2;
01437         }
01438       }
01439 
01440       ArrayRCP<gno_t> inBuf;
01441 
01442       try{
01443         AlltoAllv<gno_t>(*comm_, *env_,
01444           outBuf(), countOutBuf(), inBuf, countInBuf());
01445       }
01446       Z2_FORWARD_EXCEPTIONS;
01447 
01448       outBuf.clear();
01449       countOutBuf.clear();
01450 
01451       gno_t newLen = 0;
01452       for (int i=0; i < nprocs; i++)
01453         newLen += countInBuf[i];
01454 
01455       countInBuf.clear();
01456 
01457       newLen /= 2;
01458 
01459       ArrayRCP<partId_t> parts;
01460       ArrayRCP<const gno_t> myGnos;
01461 
01462       if (newLen > 0){
01463 
01464         gno_t *tmpGno = new gno_t [newLen];
01465         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpGno);
01466 
01467         partId_t *tmpPart = new partId_t [newLen];
01468         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpPart);
01469 
01470         size_t next = 0;
01471         for (lno_t i=0; i < newLen; i++){
01472           tmpGno[i] = inBuf[next++];
01473           tmpPart[i] = inBuf[next++];
01474         }
01475 
01476         parts = arcp(tmpPart, 0, newLen);
01477         myGnos = arcp(tmpGno, 0, newLen);
01478       }
01479 
01480       gnoList = myGnos;
01481       partList = parts;
01482       len = newLen;
01483     }
01484 
01485     delete [] procList.getRawPtr();
01486 
01487     if (idMap_->gnosAreGids()){
01488       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01489     }
01490     else{
01491       gid_t *gidList = new gid_t [len];
01492       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01493       ArrayView<gid_t> gidView(gidList, len);
01494 
01495       const gno_t *gnos = gnoList.getRawPtr();
01496       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01497 
01498       try{
01499         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01500       }
01501       Z2_FORWARD_EXCEPTIONS
01502 
01503       gids_ = arcp<const gid_t>(gidList, 0, len);
01504     }
01505 
01506     parts_ = partList;
01507   }
01508 
01509   // Now determine which process gets each object, if not one-to-one.
01510 
01511   if (!onePartPerProc_){
01512 
01513     int *procs = new int [len];
01514     env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
01515     procs_ = arcp<int>(procs, 0, len);
01516 
01517     partId_t *parts = partList.getRawPtr();
01518 
01519     if (procDist_.size() > 0){    // parts are not split across procs
01520 
01521       int procId;
01522       for (size_t i=0; i < len; i++){
01523         partToProcsMap(parts[i], procs[i], procId);
01524       }
01525     }
01526     else{  // harder - we need to split the parts across multiple procs
01527 
01528       lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
01529       env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
01530         partCounter);
01531 
01532       int numProcs = comm_->getSize();
01533 
01534       //MD NOTE: there was no initialization for partCounter.
01535       //I added the line below, correct me if I am wrong.
01536       memset(partCounter, 0, sizeof(lno_t) * nGlobalPartsSolution_);
01537 
01538       for (ArrayRCP<partId_t>::size_type i=0; i < partList.size(); i++)
01539         partCounter[parts[i]]++;
01540 
01541       lno_t *procCounter = new lno_t [numProcs];
01542       env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
01543 
01544       int proc1;
01545       int proc2 = partDist_[0];
01546 
01547       for (partId_t part=1; part < nGlobalParts_; part++){
01548         proc1 = proc2;
01549         proc2 = partDist_[part+1];
01550         int numprocs = proc2 - proc1;
01551 
01552         double dNum = partCounter[part];
01553         double dProcs = numprocs;
01554 
01555         //cout << "dNum:" << dNum << " dProcs:" << dProcs << endl;
01556         double each = floor(dNum/dProcs);
01557         double extra = fmod(dNum,dProcs);
01558 
01559         for (int proc=proc1, i=0; proc<proc2; proc++, i++){
01560           if (i < extra)
01561             procCounter[proc] = lno_t(each) + 1;
01562           else
01563             procCounter[proc] = lno_t(each);
01564         }
01565       }
01566 
01567       delete [] partCounter;
01568 
01569       for (ArrayRCP<partId_t>::size_type i=0; i < partList.size(); i++){
01570         if (partList[i] >= nGlobalParts_){
01571           // Solution has more parts that targeted.  These
01572           // objects just remain on this process.
01573           procs[i] = comm_->getRank();
01574           continue;
01575         }
01576         partId_t partNum = parts[i];
01577         proc1 = partDist_[partNum];
01578         proc2 = partDist_[partNum + 1];
01579 
01580         int proc;
01581         for (proc=proc1; proc < proc2; proc++){
01582           if (procCounter[proc] > 0){
01583             procs[i] = proc;
01584             procCounter[proc]--;
01585             break;
01586           }
01587         }
01588         env_->localBugAssertion(__FILE__, __LINE__, "part to proc",
01589           proc < proc2, COMPLEX_ASSERTION);
01590       }
01591 
01592       delete [] procCounter;
01593     }
01594   }
01595 
01596   // Now that parts_ info is back on home process, remap the parts.
01597   // TODO:  The parts will be inconsistent with the proc assignments after
01598   // TODO:  remapping.  This problem will go away after we separate process
01599   // TODO:  mapping from setParts.  But for MueLu's use case, the part
01600   // TODO:  remapping is all that matters; they do not use the process mapping.
01601   int doRemap = 0;
01602   const Teuchos::ParameterEntry *pe =
01603                  env_->getParameters().getEntryPtr("remap_parts");
01604   if (pe) doRemap = pe->getValue(&doRemap);
01605   if (doRemap) RemapParts();
01606 
01607   haveSolution_ = true;
01608 
01609   env_->memory("After Solution has processed algorithm's answer");
01610   env_->debug(DETAILED_STATUS, "Exiting setParts");
01611 }
01612 
01613 template <typename Adapter>
01614   template <typename Extra>
01615     size_t PartitioningSolution<Adapter>::convertSolutionToImportList(
01616       int numExtra, ArrayRCP<Extra> &xtraInfo,
01617       ArrayRCP<typename Adapter::gid_t> &imports,       // output
01618       ArrayRCP<Extra> &newXtraInfo) const               // output
01619 {
01620   env_->localInputAssertion(__FILE__, __LINE__, "no solution yet",
01621     haveSolution_, BASIC_ASSERTION);
01622   env_->debug(DETAILED_STATUS, "Entering convertSolutionToImportList");
01623 
01624   int numProcs                = comm_->getSize();
01625   size_t localNumIds          = gids_.size();
01626 
01627   // How many to each process?
01628   Array<int> counts(numProcs, 0);
01629   if (onePartPerProc_)
01630     for (size_t i=0; i < localNumIds; i++)
01631       counts[parts_[i]]++;
01632   else
01633     for (size_t i=0; i < localNumIds; i++)
01634       counts[procs_[i]]++;
01635 
01636   Array<gno_t> offsets(numProcs+1, 0);
01637   for (int i=1; i <= numProcs; i++){
01638     offsets[i] = offsets[i-1] + counts[i-1];
01639   }
01640 
01641   Array<gid_t> gidList(localNumIds);
01642   Array<Extra> numericInfo;
01643 
01644   if (numExtra > 0)
01645     numericInfo.resize(localNumIds);
01646 
01647   if (onePartPerProc_){
01648     for (size_t i=0; i < localNumIds; i++){
01649       lno_t idx = offsets[parts_[i]];
01650       gidList[idx] = gids_[i];
01651       if (numExtra > 0)
01652         numericInfo[idx] = xtraInfo[i];
01653       offsets[parts_[i]] = idx + 1;
01654     }
01655   }
01656   else {
01657     for (size_t i=0; i < localNumIds; i++){
01658       lno_t idx = offsets[procs_[i]];
01659       gidList[idx] = gids_[i];
01660       if (numExtra > 0)
01661         numericInfo[idx] = xtraInfo[i];
01662       offsets[procs_[i]] = idx + 1;
01663     }
01664   }
01665 
01666   Array<int> recvCounts(numProcs, 0);
01667 
01668   try{
01669     AlltoAllv<gid_t>(*comm_, *env_, gidList(),
01670       counts(), imports, recvCounts());
01671   }
01672   catch (std::exception &e){
01673     throw std::runtime_error("alltoallv 1");
01674   }
01675 
01676   if (numExtra > 0){
01677     try{
01678       AlltoAllv<Extra>(*comm_, *env_, xtraInfo(),
01679         counts(), newXtraInfo, recvCounts());
01680     }
01681     catch (std::exception &e){
01682       throw std::runtime_error("alltoallv 2");
01683     }
01684   }
01685 
01686   env_->debug(DETAILED_STATUS, "Exiting convertSolutionToImportList");
01687   return imports.size();
01688 }
01689 
01690 template <typename Adapter>
01691   void PartitioningSolution<Adapter>::procToPartsMap(int procId,
01692     double &numParts, partId_t &partMin, partId_t &partMax) const
01693 {
01694   if (onePartPerProc_){
01695     numParts = 1.0;
01696     partMin = partMax = procId;
01697   }
01698   else if (procDist_.size() > 0){
01699     partMin = procDist_[procId];
01700     partMax = procDist_[procId+1] - 1;
01701     numParts = procDist_[procId+1] - partMin;
01702   }
01703   else{
01704     // find the first p such that partDist_[p] > procId
01705 
01706     std::vector<int>::const_iterator entry;
01707     entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
01708 
01709     size_t partIdx = entry - partDist_.begin();
01710     int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
01711     partMin = partMax = int(partIdx) - 1;
01712     numParts = 1.0 / numProcs;
01713   }
01714 }
01715 
01716 template <typename Adapter>
01717   void PartitioningSolution<Adapter>::partToProcsMap(partId_t partId,
01718     int &procMin, int &procMax) const
01719 {
01720   if (partId >= nGlobalParts_){
01721     // setParts() may be given an initial solution which uses a
01722     // different number of parts than the desired solution.  It is
01723     // still a solution.  We keep it on this process.
01724     procMin = procMax = comm_->getRank();
01725   }
01726   else if (onePartPerProc_){
01727     procMin = procMax = int(partId);
01728   }
01729   else if (procDist_.size() > 0){
01730     if (procDistEquallySpread_) {
01731       // Avoid binary search.
01732       double fProcs = comm_->getSize();
01733       double fParts = nGlobalParts_;
01734       double each = fParts / fProcs;
01735       procMin = int(partId / each);
01736       while (procDist_[procMin] > partId) procMin--;
01737       while (procDist_[procMin+1] <= partId) procMin++;
01738       procMax = procMin;
01739     }
01740     else {
01741       // find the first p such that procDist_[p] > partId.
01742       // For now, do a binary search.
01743 
01744       std::vector<partId_t>::const_iterator entry;
01745       entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
01746 
01747       size_t procIdx = entry - procDist_.begin();
01748       procMin = procMax = int(procIdx) - 1;
01749     }
01750   }
01751   else{
01752     procMin = partDist_[partId];
01753     procMax = partDist_[partId+1] - 1;
01754   }
01755 }
01756 
01757 template <typename Adapter>
01758   bool PartitioningSolution<Adapter>::criteriaHaveSamePartSizes(
01759     int c1, int c2) const
01760 {
01761   if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
01762     throw std::logic_error("criteriaHaveSamePartSizes error");
01763 
01764   bool theSame = false;
01765 
01766   if (c1 == c2)
01767     theSame = true;
01768 
01769   else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
01770     theSame = true;
01771 
01772   else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
01773     theSame = true;
01774     bool useIndex = pCompactIndex_[c1].size() > 0;
01775     if (useIndex){
01776       for (partId_t p=0; theSame && p < nGlobalParts_; p++)
01777         if (pSize_[c1][pCompactIndex_[c1][p]] !=
01778             pSize_[c2][pCompactIndex_[c2][p]])
01779           theSame = false;
01780     }
01781     else{
01782       for (partId_t p=0; theSame && p < nGlobalParts_; p++)
01783         if (pSize_[c1][p] != pSize_[c2][p])
01784           theSame = false;
01785     }
01786   }
01787 
01788   return theSame;
01789 }
01790 
01806 template <typename Adapter>
01807   void PartitioningSolution<Adapter>::partToProc(
01808     bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
01809     int numLocalParts, int numGlobalParts)
01810 {
01811   int nprocs = comm_->getSize();
01812   ssize_t reducevals[4];
01813   ssize_t sumHaveGlobal=0, sumHaveLocal=0;
01814   ssize_t sumGlobal=0, sumLocal=0;
01815   ssize_t maxGlobal=0, maxLocal=0;
01816   ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
01817       numGlobalParts, numLocalParts};
01818 
01819   partDist_.clear();
01820   procDist_.clear();
01821 
01822   if (doCheck){
01823 
01824     try{
01825       reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
01826     }
01827     Z2_THROW_OUTSIDE_ERROR(*env_);
01828 
01829     sumHaveGlobal = reducevals[0];
01830     sumHaveLocal = reducevals[1];
01831     sumGlobal = reducevals[2];
01832     sumLocal = reducevals[3];
01833 
01834     env_->localInputAssertion(__FILE__, __LINE__,
01835       "Either all procs specify num_global/local_parts or none do",
01836       (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
01837       (sumHaveLocal == 0 || sumHaveLocal == nprocs),
01838       BASIC_ASSERTION);
01839   }
01840   else{
01841     if (haveNumLocalParts)
01842       sumLocal = numLocalParts * nprocs;
01843     if (haveNumGlobalParts)
01844       sumGlobal = numGlobalParts * nprocs;
01845 
01846     sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
01847     sumHaveLocal = haveNumLocalParts ? nprocs : 0;
01848 
01849     maxLocal = numLocalParts;
01850     maxGlobal = numGlobalParts;
01851   }
01852 
01853   if (!haveNumLocalParts && !haveNumGlobalParts){
01854     onePartPerProc_ = true;   // default if user did not specify
01855     return;
01856   }
01857 
01858   if (haveNumGlobalParts){
01859     if (doCheck){
01860       vals[0] = numGlobalParts;
01861       vals[1] = numLocalParts;
01862       try{
01863         reduceAll<int, ssize_t>(
01864           *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
01865       }
01866       Z2_THROW_OUTSIDE_ERROR(*env_);
01867 
01868       maxGlobal = reducevals[0];
01869       maxLocal = reducevals[1];
01870 
01871       env_->localInputAssertion(__FILE__, __LINE__,
01872         "Value for num_global_parts is different on different processes.",
01873         maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
01874     }
01875 
01876     if (sumLocal){
01877       env_->localInputAssertion(__FILE__, __LINE__,
01878         "Sum of num_local_parts does not equal requested num_global_parts",
01879         sumLocal == numGlobalParts, BASIC_ASSERTION);
01880 
01881       if (sumLocal == nprocs && maxLocal == 1){
01882         onePartPerProc_ = true;   // user specified one part per proc
01883         return;
01884       }
01885     }
01886     else{
01887       if (maxGlobal == nprocs){
01888         onePartPerProc_ = true;   // user specified num parts is num procs
01889         return;
01890       }
01891     }
01892   }
01893 
01894   // If we are here, we do not have #parts == #procs.
01895 
01896   if (sumHaveLocal == nprocs){
01897     //
01898     // We will go by the number of local parts specified.
01899     //
01900 
01901     try{
01902       procDist_.resize(nprocs+1);
01903     }
01904     catch (std::exception &e){
01905       throw(std::bad_alloc());
01906     }
01907 
01908     int *procArray = &procDist_[0];
01909 
01910     try{
01911       partId_t tmp = partId_t(numLocalParts);
01912       gatherAll<int, partId_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
01913     }
01914     Z2_THROW_OUTSIDE_ERROR(*env_);
01915 
01916     procArray[0] = 0;
01917 
01918     for (int proc=0; proc < nprocs; proc++)
01919       procArray[proc+1] += procArray[proc];
01920   }
01921   else{
01922     //
01923     // We will allocate global number of parts to the processes.
01924     //
01925     double fParts = numGlobalParts;
01926     double fProcs = nprocs;
01927 
01928     if (fParts < fProcs){
01929 
01930       try{
01931         partDist_.resize(size_t(fParts+1));
01932       }
01933       catch (std::exception &e){
01934         throw(std::bad_alloc());
01935       }
01936 
01937       int *partArray = &partDist_[0];
01938 
01939       double each = floor(fProcs / fParts);
01940       double extra = fmod(fProcs, fParts);
01941       partDist_[0] = 0;
01942 
01943       for (partId_t part=0; part < numGlobalParts; part++){
01944         int numOwners = int(each + ((part<extra) ? 1 : 0));
01945         partArray[part+1] = partArray[part] + numOwners;
01946       }
01947 
01948       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
01949         partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
01950     }
01951     else if (fParts > fProcs){
01952 
01953       // User did not specify local number of parts per proc;
01954       // Distribute the parts evenly among the procs.
01955 
01956       procDistEquallySpread_ = true;
01957 
01958       try{
01959         procDist_.resize(size_t(fProcs+1));
01960       }
01961       catch (std::exception &e){
01962         throw(std::bad_alloc());
01963       }
01964 
01965       int *procArray = &procDist_[0];
01966 
01967       double each = floor(fParts / fProcs);
01968       double extra = fmod(fParts, fProcs);
01969       procArray[0] = 0;
01970 
01971       for (int proc=0; proc < nprocs; proc++){
01972         partId_t numParts = partId_t(each + ((proc<extra) ? 1 : 0));
01973         procArray[proc+1] = procArray[proc] + numParts;
01974       }
01975 
01976       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
01977         procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
01978     }
01979     else{
01980       env_->globalBugAssertion(__FILE__, __LINE__,
01981         "should never get here", 1, COMPLEX_ASSERTION, comm_);
01982     }
01983   }
01984 }
01985 
01987 // Remap a new part assignment vector for maximum overlap with an input
01988 // part assignment.
01989 //
01990 // Assumptions for this version:
01991 //   input part assignment == processor rank for every local object.
01992 //   assuming nGlobalParts_ <= num ranks
01993 // TODO:  Write a version that takes the input part number as input;
01994 //        this change requires input parts in adapters to be provided in
01995 //        the Adapter.
01996 // TODO:  For repartitioning, compare to old remapping results; see Zoltan1.
01997 
01998 template <typename Adapter>
01999 void PartitioningSolution<Adapter>::RemapParts()
02000 {
02001   size_t len = parts_.size();
02002 
02003   partId_t me = comm_->getRank();
02004   int np = comm_->getSize();
02005 
02006   if (np < nGlobalParts_) {
02007     if (me == 0)
02008       std::cout << "Remapping not yet supported for "
02009            << "num_global_parts " << nGlobalParts_
02010            << " > num procs " << np << std::endl;
02011     return;
02012   }
02013   // Build edges of a bipartite graph with np + nGlobalParts_ vertices,
02014   // and edges between a process vtx and any parts to which that process'
02015   // objects are assigned.
02016   // Weight edge[parts_[i]] by the number of objects that are going from
02017   // this rank to parts_[i].
02018   // We use a std::map, assuming the number of unique parts in the parts_ array
02019   // is small to keep the binary search efficient.
02020   // TODO We use the count of objects to move; should change to SIZE of objects
02021   // to move; need SIZE function in Adapter.
02022 
02023   std::map<partId_t, long> edges;
02024   long lstaying = 0;  // Total num of local objects staying if we keep the
02025                       // current mapping. TODO:  change to SIZE of local objs
02026   long gstaying = 0;  // Total num of objects staying in the current partition
02027 
02028   for (size_t i = 0; i < len; i++) {
02029     edges[parts_[i]]++;                // TODO Use obj size instead of count
02030     if (parts_[i] == me) lstaying++;    // TODO Use obj size instead of count
02031   }
02032 
02033   Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
02034                                 &lstaying, &gstaying);
02035 //TODO  if (gstaying == Adapter::getGlobalNumObjs()) return;  // Nothing to do
02036 
02037   partId_t *remap = NULL;
02038 
02039   int nedges = edges.size();
02040 
02041   // Gather the graph to rank 0.
02042   partId_t tnVtx = np + nGlobalParts_;  // total # vertices
02043   int *idx = NULL;    // Pointer index into graph adjacencies
02044   int *sizes = NULL;  // nedges per rank
02045   if (me == 0) {
02046     idx = new int[tnVtx+1];
02047     sizes = new int[np];
02048     sizes[0] = nedges;
02049   }
02050   if (np > 1)
02051     Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
02052 
02053   // prefix sum to build the idx array
02054   if (me == 0) {
02055     idx[0] = 0;
02056     for (int i = 0; i < np; i++)
02057       idx[i+1] = idx[i] + sizes[i];
02058   }
02059 
02060   // prepare to send edges
02061   int cnt = 0;
02062   partId_t *bufv = NULL;
02063   long *bufw = NULL;
02064   if (nedges) {
02065     bufv = new partId_t[nedges];
02066     bufw = new long[nedges];
02067     // Create buffer with edges (me, part[i]) and weight edges[parts_[i]].
02068     for (std::map<partId_t, long>::iterator it = edges.begin();
02069          it != edges.end(); it++) {
02070       bufv[cnt] = it->first;  // target part
02071       bufw[cnt] = it->second; // weight
02072       cnt++;
02073     }
02074   }
02075 
02076   // Prepare to receive edges on rank 0
02077   partId_t *adj = NULL;
02078   long *wgt = NULL;
02079   if (me == 0) {
02080 //SYM    adj = new partId_t[2*idx[np]];  // need 2x space to symmetrize later
02081 //SYM    wgt = new long[2*idx[np]];  // need 2x space to symmetrize later
02082     adj = new partId_t[idx[np]];
02083     wgt = new long[idx[np]];
02084   }
02085 
02086   Teuchos::gatherv<int, partId_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
02087   Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
02088   delete [] bufv;
02089   delete [] bufw;
02090 
02091   // Now have constructed graph on rank 0.
02092   // Call the matching algorithm
02093 
02094   int doRemap;
02095   if (me == 0) {
02096     // We have the "LHS" vertices of the bipartite graph; need to create
02097     // "RHS" vertices.
02098     for (int i = 0; i < idx[np]; i++) {
02099       adj[i] += np;  // New RHS vertex number; offset by num LHS vertices
02100     }
02101 
02102     // Build idx for RHS vertices
02103     for (partId_t i = np; i < tnVtx; i++) {
02104       idx[i+1] = idx[i];  // No edges for RHS vertices
02105     }
02106 
02107 #ifdef KDDKDD_DEBUG
02108     cout << "IDX ";
02109     for (partId_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << " ";
02110     std::cout << std::endl;
02111 
02112     std::cout << "ADJ ";
02113     for (partId_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << " ";
02114     std::cout << std::endl;
02115 
02116     std::cout << "WGT ";
02117     for (partId_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << " ";
02118     std::cout << std::endl;
02119 #endif
02120 
02121     // Perform matching on the graph
02122     partId_t *match = new partId_t[tnVtx];
02123     for (partId_t i = 0; i < tnVtx; i++) match[i] = i;
02124     partId_t nmatches =
02125              Zoltan2::GreedyMWM<partId_t, long>(idx, adj, wgt, tnVtx, match);
02126 
02127 #ifdef KDDKDD_DEBUG
02128     std::cout << "After matching:  " << nmatches << " found" << std::endl;
02129     for (partId_t i = 0; i < tnVtx; i++)
02130       std::cout << "match[" << i << "] = " << match[i]
02131            << ((match[i] != i &&
02132                (i < np && match[i] != i+np))
02133                   ? " *" : " ")
02134            << std::endl;
02135 #endif
02136 
02137     // See whether there were nontrivial changes in the matching.
02138     bool nontrivial = false;
02139     if (nmatches) {
02140       for (partId_t i = 0; i < np; i++) {
02141         if ((match[i] != i) && (match[i] != (i+np))) {
02142           nontrivial = true;
02143           break;
02144         }
02145       }
02146     }
02147 
02148     // Process the matches
02149     if (nontrivial) {
02150       remap = new partId_t[nGlobalParts_];
02151       for (partId_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
02152 
02153       bool *used = new bool[np];
02154       for (partId_t i = 0; i < np; i++) used[i] = false;
02155 
02156       // First, process all matched parts
02157       for (partId_t i = 0; i < nGlobalParts_; i++) {
02158         partId_t tmp = i + np;
02159         if (match[tmp] != tmp) {
02160           remap[i] = match[tmp];
02161           used[match[tmp]] = true;
02162         }
02163       }
02164 
02165       // Second, process unmatched parts; keep same part number if possible
02166       for (partId_t i = 0; i < nGlobalParts_; i++) {
02167         if (remap[i] > -1) continue;
02168         if (!used[i]) {
02169           remap[i] = i;
02170           used[i] = true;
02171         }
02172       }
02173 
02174       // Third, process unmatched parts; give them the next unused part
02175       for (partId_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
02176         if (remap[i] > -1) continue;
02177         while (used[uidx]) uidx++;
02178         remap[i] = uidx;
02179         used[uidx] = true;
02180       }
02181       delete [] used;
02182     }
02183     delete [] match;
02184 
02185 #ifdef KDDKDD_DEBUG
02186     cout << "Remap vector: ";
02187     for (partId_t i = 0; i < nGlobalParts_; i++) cout << remap[i] << " ";
02188     std::cout << std::endl;
02189 #endif
02190 
02191     long newgstaying = Zoltan2::measure_stays(remap, idx, adj, wgt,
02192                                               nGlobalParts_, np);
02193     doRemap = (newgstaying > gstaying);
02194     std::cout << "gstaying " << gstaying << " measure(input) "
02195          << Zoltan2::measure_stays(NULL, idx, adj, wgt,
02196                                    nGlobalParts_, np)
02197          << " newgstaying " << newgstaying
02198          << " nontrivial " << nontrivial
02199          << " doRemap " << doRemap << std::endl;
02200   }
02201   delete [] idx;
02202   delete [] sizes;
02203   delete [] adj;
02204   delete [] wgt;
02205 
02206   Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
02207 
02208   if (doRemap) {
02209     if (me != 0) remap = new partId_t[nGlobalParts_];
02210     Teuchos::broadcast<int, partId_t>(*comm_, 0, nGlobalParts_, remap);
02211     for (size_t i = 0; i < len; i++) {
02212       parts_[i] = remap[parts_[i]];
02213     }
02214   }
02215 
02216   delete [] remap;  // TODO May want to keep for repartitioning as in Zoltan
02217 }
02218 
02219 
02220 }  // namespace Zoltan2
02221 
02222 #endif