Zoltan2 Version of the Day
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //   Zoltan2: A package of combinatorial algorithms for scientific computing
00006 //                  Copyright 2012 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Karen Devine      (kddevin@sandia.gov)
00039 //                    Erik Boman        (egboman@sandia.gov)
00040 //                    Siva Rajamanickam (srajama@sandia.gov)
00041 //
00042 // ***********************************************************************
00043 //
00044 // @HEADER
00045 
00050 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00051 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00052 
00053 #include <Zoltan2_IdentifierMap.hpp>
00054 #include <Zoltan2_Solution.hpp>
00055 
00056 #include <cmath>
00057 #include <algorithm>
00058 #include <vector>
00059 
00060 namespace Zoltan2 {
00061 
00076 template <typename Adapter>
00077   class PartitioningSolution : public Solution
00078 {
00079 public:
00080 
00081 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00082   typedef typename Adapter::gno_t gno_t;
00083   typedef typename Adapter::scalar_t scalar_t;
00084   typedef typename Adapter::lno_t lno_t;
00085   typedef typename Adapter::gid_t gid_t;
00086   typedef typename Adapter::user_t user_t;
00087 #endif
00088 
00106   PartitioningSolution( RCP<const Environment> &env,
00107     RCP<const Comm<int> > &comm,
00108     RCP<const IdentifierMap<user_t> > &idMap,
00109     int userWeightDim);
00110 
00140   PartitioningSolution( RCP<const Environment> &env,
00141     RCP<const Comm<int> > &comm,
00142     RCP<const IdentifierMap<user_t> > &idMap,
00143     int userWeightDim, ArrayView<ArrayRCP<partId_t> > reqPartIds,
00144     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00145   
00147   // Information that the algorithm may wish to query.
00148   
00151   size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
00152   
00155   size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
00156   
00159   size_t getLocalNumberOfParts() const { return nLocalParts_; }
00160   
00168   scalar_t getLocalFractionOfPart() const { return localFraction_; }
00169   
00179   bool oneToOnePartDistribution() const { return onePartPerProc_; }
00180   
00200   const int *getPartDistribution() const {
00201     if (partDist_.size() > 0) return &partDist_[0];
00202     else return NULL;
00203   }
00204   
00221   const partId_t *getProcDistribution() const {
00222     if (procDist_.size() > 0) return &procDist_[0];
00223     else return NULL;
00224   }
00225 
00229   int getNumberOfCriteria() const { return weightDim_; }
00230 
00231   
00238   bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
00239   
00252   scalar_t getCriteriaPartSize(int idx, partId_t part) const { 
00253     if (pSizeUniform_[idx]) 
00254       return 1.0 / nGlobalParts_;
00255     else if (pCompactIndex_[idx].size())
00256       return pSize_[idx][pCompactIndex_[idx][part]];
00257     else
00258       return pSize_[idx][part];
00259   }
00260 
00274   bool criteriaHaveSamePartSizes(int c1, int c2) const;
00275 
00277   // Method used by the algorithm to set results.
00278 
00305   void setParts(ArrayRCP<const gno_t> &gnoList, 
00306     ArrayRCP<partId_t> &partList, bool dataDidNotMove);
00307   
00309   // Results that may be queried by the user, by migration methods,
00310   // or by metric calculation methods.
00311   // We return raw pointers so users don't have to learn about our
00312   // pointer wrappers.
00313 
00316   const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
00317 
00320   size_t getLocalNumberOfIds() const { return gids_.size(); }
00321 
00324   const gid_t *getIdList() const { return gids_.getRawPtr(); }
00325 
00328   const zoltan2_partId_t *getPartList() const { return parts_.getRawPtr();}
00329 
00334   const int *getProcList() const { return procs_.getRawPtr();}
00335 
00365   template <typename Extra>
00366     size_t convertSolutionToImportList(
00367       int numExtra,
00368       ArrayRCP<Extra> &xtraInfo,
00369       ArrayRCP<typename Adapter::gid_t> &imports,
00370       ArrayRCP<Extra> &newXtraInfo) const;
00371 
00389   void getPartsForProc(int procId, double &numParts, partId_t &partMin, 
00390     partId_t &partMax) const
00391   {
00392     env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
00393       procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
00394 
00395     procToPartsMap(procId, numParts, partMin, partMax);
00396   }
00397 
00409   void getProcsForPart(partId_t partId, int &procMin, int &procMax) const
00410   {
00411     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
00412       partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
00413 
00414     partToProcsMap(partId, procMin, procMax);
00415   }
00416 
00417 private:
00418   void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
00419     int numLocalParts, int numGlobalParts);
00420 
00421   void procToPartsMap(int procId, double &numParts, partId_t &partMin, 
00422     partId_t &partMax) const;
00423 
00424   void partToProcsMap(partId_t partId, int &procMin, int &procMax) const;
00425 
00426   void setPartDistribution();
00427 
00428   void setPartSizes(ArrayView<ArrayRCP<partId_t> > reqPartIds,
00429     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00430 
00431   void computePartSizes(int wdim, ArrayView<partId_t> ids, 
00432     ArrayView<scalar_t> sizes);
00433 
00434   void broadcastPartSizes(int wdim);
00435 
00436   RCP<const Environment> env_;             // has application communicator
00437   RCP<const Comm<int> > comm_;             // the problem communicator
00438   RCP<const IdentifierMap<user_t> > idMap_;
00439 
00440   partId_t nGlobalParts_;// target global number of parts
00441   partId_t nLocalParts_; // number of parts to be on this process
00442 
00443   scalar_t localFraction_; // approx fraction of a part on this process
00444   int weightDim_;      // if user has no weights, this is 1
00445 
00446   // If process p is to be assigned part p for all p, then onePartPerProc_ 
00447   // is true. Otherwise it is false, and either procDist_ or partDist_
00448   // describes the allocation of parts to processes.
00449   //
00450   // If parts are never split across processes, then procDist_ is defined
00451   // as follows:
00452   //
00453   //   partId              = procDist_[procId]
00454   //   partIdNext          = procDist_[procId+1]
00455   //   globalNumberOfParts = procDist_[numProcs]
00456   //
00457   // meaning that the parts assigned to process procId range from
00458   // [partId, partIdNext).  If partIdNext is the same as partId, then
00459   // process procId has no parts.
00460   //
00461   // If the number parts is less than the number of processes, and the
00462   // user did not specify "num_local_parts" for each of the processes, then
00463   // parts are split across processes, and partDist_ is defined rather than
00464   // procDist_.
00465   //
00466   //   procId              = partDist_[partId]
00467   //   procIdNext          = partDist_[partId+1]
00468   //   globalNumberOfProcs = partDist_[numParts]
00469   //
00470   // which implies that the part partId is shared by processes in the
00471   // the range [procId, procIdNext).
00472   //
00473   // We use std::vector so we can use upper_bound algorithm
00474   
00475   bool             onePartPerProc_;   // either this is true...
00476   std::vector<int>      partDist_;      // or this is defined ...
00477   std::vector<partId_t> procDist_;      // or this is defined.
00478   bool procDistEquallySpread_;        // if procDist_ is used and 
00479                                       // #parts > #procs and
00480                                       // num_local_parts is not specified,
00481                                       // parts are evenly distributed to procs
00482 
00483   // In order to minimize the storage required for part sizes, we
00484   // have three different representations.
00485   //
00486   // If the part sizes for weight dimension w are all the same, then:
00487   //    pSizeUniform_[w] = true
00488   //    pCompactIndex_[w].size() = 0
00489   //    pSize_[w].size() = 0
00490   //
00491   // and the size for part p is 1.0 / nparts.
00492   //
00493   // If part sizes differ for each part in weight dimension w, but there
00494   // are no more than 64 distinct sizes:
00495   //    pSizeUniform_[w] = false
00496   //    pCompactIndex_[w].size() = number of parts
00497   //    pSize_[w].size() = number of different sizes
00498   //
00499   // and the size for part p is pSize_[pCompactIndex_[p]].
00500   //
00501   // If part sizes differ for each part in weight dimension w, and there
00502   // are more than 64 distinct sizes:
00503   //    pSizeUniform_[w] = false
00504   //    pCompactIndex_[w].size() = 0
00505   //    pSize_[w].size() = nparts
00506   //
00507   // and the size for part p is pSize_[p].
00508   //
00509   // NOTE: If we expect to have similar cases, i.e. a long list of scalars
00510   //   where it is highly possible that just a few unique values appear,
00511   //   then we may want to make this a class.  The advantage is that we
00512   //   save a long list of 1-byte indices instead of a long list of scalars.
00513 
00514   ArrayRCP<bool> pSizeUniform_;
00515   ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
00516   ArrayRCP<ArrayRCP<scalar_t> > pSize_;
00517 
00519   // The algorithm sets these values upon completion.
00520 
00521   ArrayRCP<const gid_t>  gids_;   // User's global IDs 
00522   ArrayRCP<partId_t> parts_;      // part number assigned to gids_[i]
00523 
00524   bool haveSolution_;
00525 
00526   partId_t nGlobalPartsSolution_; // global number of parts in solution
00527 
00529   // The solution calculates this from the part assignments,
00530   // unless onePartPerProc_.
00531 
00532   ArrayRCP<int> procs_;       // process rank assigned to gids_[i]
00533 };
00534 
00536 // Definitions
00538 
00539 template <typename Adapter>
00540   PartitioningSolution<Adapter>::PartitioningSolution(
00541     RCP<const Environment> &env, 
00542     RCP<const Comm<int> > &comm,
00543     RCP<const IdentifierMap<user_t> > &idMap, int userWeightDim)
00544     : env_(env), comm_(comm), idMap_(idMap),
00545       nGlobalParts_(0), nLocalParts_(0),
00546       localFraction_(0),  weightDim_(),
00547       onePartPerProc_(false), partDist_(), procDist_(),
00548       procDistEquallySpread_(false),
00549       pSizeUniform_(), pCompactIndex_(), pSize_(),
00550       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00551       procs_()
00552 {
00553   weightDim_ = (userWeightDim ? userWeightDim : 1); 
00554 
00555   setPartDistribution();
00556 
00557   // We must call setPartSizes() because part sizes may have
00558   // been provided by the user on other processes.
00559 
00560   ArrayRCP<partId_t> *noIds = new ArrayRCP<partId_t> [weightDim_];
00561   ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [weightDim_];
00562   ArrayRCP<ArrayRCP<partId_t> > ids(noIds, 0, weightDim_, true);
00563   ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, weightDim_, true);
00564 
00565   setPartSizes(ids.view(0, weightDim_), sizes.view(0, weightDim_));
00566 
00567   env_->memory("After construction of solution");
00568 }
00569 
00570 template <typename Adapter>
00571   PartitioningSolution<Adapter>::PartitioningSolution(
00572     RCP<const Environment> &env, 
00573     RCP<const Comm<int> > &comm,
00574     RCP<const IdentifierMap<user_t> > &idMap, int userWeightDim,
00575     ArrayView<ArrayRCP<partId_t> > reqPartIds, 
00576     ArrayView<ArrayRCP<scalar_t> > reqPartSizes)
00577     : env_(env), comm_(comm), idMap_(idMap),
00578       nGlobalParts_(0), nLocalParts_(0), 
00579       localFraction_(0),  weightDim_(),
00580       onePartPerProc_(false), partDist_(), procDist_(), 
00581       procDistEquallySpread_(false),
00582       pSizeUniform_(), pCompactIndex_(), pSize_(),
00583       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0), 
00584       procs_()
00585 {
00586   weightDim_ = (userWeightDim ? userWeightDim : 1); 
00587 
00588   setPartDistribution();
00589 
00590   setPartSizes(reqPartIds, reqPartSizes);
00591 
00592   env_->memory("After construction of solution");
00593 }
00594 
00595 template <typename Adapter>
00596   void PartitioningSolution<Adapter>::setPartDistribution()
00597 {
00598   // Did the caller define num_global_parts and/or num_local_parts?
00599 
00600   const ParameterList &pl = env_->getParameters();
00601   size_t haveGlobalNumParts=0, haveLocalNumParts=0;
00602   int numLocal=0, numGlobal=0;
00603   double val;
00604 
00605   const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
00606 
00607   if (pe){
00608     val = pe->getValue<double>(&val);  // TODO: KDD Skip this double get
00609     haveGlobalNumParts = 1;            // TODO: KDD Should be unnecessary once
00610     numGlobal = static_cast<int>(val); // TODO: KDD paramlist handles long long.
00611     nGlobalParts_ = partId_t(numGlobal); // TODO: KDD  also do below.
00612   }
00613 
00614   pe = pl.getEntryPtr("num_local_parts");
00615 
00616   if (pe){
00617     val = pe->getValue<double>(&val);
00618     haveLocalNumParts = 1;
00619     numLocal = static_cast<int>(val);
00620     nLocalParts_ = partId_t(numLocal);
00621   }
00622 
00623   try{
00624     // Sets onePartPerProc_, partDist_, and procDist_
00625 
00626     partToProc(true, haveLocalNumParts, haveGlobalNumParts, 
00627       numLocal, numGlobal);
00628   }
00629   Z2_FORWARD_EXCEPTIONS
00630 
00631   int nprocs = comm_->getSize();
00632   int rank = comm_->getRank();
00633 
00634   if (onePartPerProc_){
00635     nGlobalParts_ = nprocs;
00636     nLocalParts_ = 1;
00637   } 
00638   else if (partDist_.size() > 0){   // more procs than parts
00639     nGlobalParts_ = partDist_.size() - 1;
00640     int pstart = partDist_[0];
00641     for (partId_t i=1; i <= nGlobalParts_; i++){
00642       int pend = partDist_[i];
00643       if (rank >= pstart && rank < pend){
00644         int numOwners = pend - pstart;
00645         nLocalParts_ = 1;
00646         localFraction_ = 1.0 / numOwners;
00647         break;
00648       }
00649       pstart = pend;
00650     }
00651   }
00652   else if (procDist_.size() > 0){  // more parts than procs
00653     nGlobalParts_ = procDist_[nprocs];
00654     nLocalParts_ = procDist_[rank+1] - procDist_[rank];
00655   }
00656   else {
00657     throw logic_error("partToProc error");
00658   }
00659 
00660 }
00661 
00662 template <typename Adapter>
00663   void PartitioningSolution<Adapter>::setPartSizes(
00664     ArrayView<ArrayRCP<partId_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
00665 {
00666   int wdim = weightDim_;
00667   bool fail=false;
00668 
00669   size_t *countBuf = new size_t [wdim*2];
00670   ArrayRCP<size_t> counts(countBuf, 0, wdim*2, true);
00671 
00672   fail = ((ids.size() != wdim) || (sizes.size() != wdim));
00673 
00674   for (int w=0; !fail && w < wdim; w++){
00675     counts[w] = ids[w].size();
00676     if (ids[w].size() != sizes[w].size()) fail=true;
00677   }
00678 
00679   env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0, 
00680     COMPLEX_ASSERTION, comm_);
00681 
00682   // Are all part sizes the same?  This is the common case.
00683 
00684   ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [wdim];
00685   pSize_ = arcp(emptySizes, 0, wdim);
00686 
00687   ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [wdim];
00688   pCompactIndex_ = arcp(emptyIndices, 0, wdim);
00689 
00690   bool *info = new bool [wdim];
00691   pSizeUniform_ = arcp(info, 0, wdim);
00692   for (int w=0; w < wdim; w++)
00693     pSizeUniform_[w] = true;
00694 
00695   if (nGlobalParts_ == 1){   
00696     return;   // there's only one part in the whole problem
00697   }
00698 
00699   size_t *ptr1 = counts.getRawPtr();
00700   size_t *ptr2 = counts.getRawPtr() + wdim;
00701 
00702   try{
00703     reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, wdim, ptr1, ptr2);
00704   }
00705   Z2_THROW_OUTSIDE_ERROR(*env_);
00706 
00707   bool zero = true;
00708 
00709   for (int w=0; w < wdim; w++)
00710     if (counts[wdim+w] > 0){
00711       zero = false;
00712       pSizeUniform_[w] = false;
00713     }
00714 
00715   if (zero) // Part sizes for all criteria are uniform.
00716     return; 
00717 
00718   // Compute the part sizes for criteria for which part sizes were
00719   // supplied.  Normalize for each criteria so part sizes sum to one.
00720 
00721   int nprocs = comm_->getSize();
00722   int rank = comm_->getRank();
00723 
00724   for (int w=0; w < wdim; w++){
00725     if (pSizeUniform_[w]) continue;
00726     
00727     // Send all ids and sizes to one process.
00728     // (There is no simple gather method in Teuchos.)
00729 
00730     partId_t length = ids[w].size();
00731     partId_t *allLength = new partId_t [nprocs];
00732     Teuchos::gatherAll<int, partId_t>(*comm_, 1, &length, 
00733       nprocs, allLength);
00734 
00735     if (rank == 0){
00736       int total = 0;
00737       for (int i=0; i < nprocs; i++)
00738         total += allLength[i];
00739 
00740       partId_t *partNums = new partId_t [total];
00741       scalar_t *partSizes = new scalar_t [total];
00742 
00743       ArrayView<partId_t> idArray(partNums, total);
00744       ArrayView<scalar_t> sizeArray(partSizes, total);
00745 
00746       if (length > 0){
00747         for (int i=0; i < length; i++){
00748           *partNums++ = ids[w][i];
00749           *partSizes++ = sizes[w][i];
00750         }
00751       }
00752 
00753       for (int p=1; p < nprocs; p++){
00754         if (allLength[p] > 0){
00755           Teuchos::receive<int, partId_t>(*comm_, p,
00756             allLength[p], partNums);
00757           Teuchos::receive<int, scalar_t>(*comm_, p,
00758             allLength[p], partSizes);
00759           partNums += allLength[p];
00760           partSizes += allLength[p];
00761         }
00762       }
00763 
00764       delete [] allLength;
00765 
00766       try{
00767         computePartSizes(w, idArray, sizeArray);
00768       }
00769       Z2_FORWARD_EXCEPTIONS
00770 
00771       delete [] idArray.getRawPtr();
00772       delete [] sizeArray.getRawPtr();
00773     } 
00774     else{
00775       delete [] allLength;
00776       if (length > 0){
00777         Teuchos::send<int, partId_t>(*comm_, length, ids[w].getRawPtr(), 0);
00778         Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
00779       }
00780     }
00781 
00782     broadcastPartSizes(w);
00783   }
00784 }
00785 
00786 template <typename Adapter>
00787   void PartitioningSolution<Adapter>::broadcastPartSizes(int wdim)
00788 {
00789   env_->localBugAssertion(__FILE__, __LINE__, "preallocations", 
00790     pSize_.size()>wdim && 
00791     pSizeUniform_.size()>wdim && pCompactIndex_.size()>wdim, 
00792     COMPLEX_ASSERTION);
00793 
00794   int rank = comm_->getRank();
00795   int nprocs = comm_->getSize();
00796   partId_t nparts = nGlobalParts_;
00797 
00798   if (nprocs < 2)
00799     return;
00800 
00801   char flag=0;
00802 
00803   if (rank == 0){
00804     if (pSizeUniform_[wdim] == true)
00805       flag = 1;
00806     else if (pCompactIndex_[wdim].size() > 0)
00807       flag = 2;
00808     else
00809       flag = 3;
00810   }
00811 
00812   try{
00813     Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
00814   }
00815   Z2_THROW_OUTSIDE_ERROR(*env_);
00816 
00817   if (flag == 1){
00818     if (rank > 0)
00819       pSizeUniform_[wdim] = true;
00820 
00821     return;
00822   }
00823 
00824   if (flag == 2){
00825 
00826     // broadcast the indices into the size list
00827 
00828     unsigned char *idxbuf = NULL;
00829 
00830     if (rank > 0){
00831       idxbuf = new unsigned char [nparts];
00832       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
00833     }
00834     else{
00835       env_->localBugAssertion(__FILE__, __LINE__, "index list size", 
00836         pCompactIndex_[wdim].size() == nparts, COMPLEX_ASSERTION);
00837       idxbuf = pCompactIndex_[wdim].getRawPtr();
00838     }
00839 
00840     try{
00841       // broadcast of unsigned char is not supported
00842       Teuchos::broadcast<int, char>(*comm_, 0, nparts, 
00843         reinterpret_cast<char *>(idxbuf));
00844     }
00845     Z2_THROW_OUTSIDE_ERROR(*env_);
00846 
00847     if (rank > 0)
00848       pCompactIndex_[wdim] = arcp(idxbuf, 0, nparts, true);
00849 
00850     // broadcast the list of different part sizes
00851 
00852     unsigned char maxIdx=0;
00853     for (partId_t p=0; p < nparts; p++)
00854       if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
00855 
00856     int numSizes = maxIdx + 1;
00857   
00858     scalar_t *sizeList = NULL;
00859 
00860     if (rank > 0){
00861       sizeList = new scalar_t [numSizes];
00862       env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
00863     }
00864     else{
00865       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes", 
00866         numSizes == pSize_[wdim].size(), COMPLEX_ASSERTION);
00867 
00868       sizeList = pSize_[wdim].getRawPtr();
00869     }
00870 
00871     try{
00872       Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
00873     }
00874     Z2_THROW_OUTSIDE_ERROR(*env_);
00875 
00876     if (rank > 0)
00877       pSize_[wdim] = arcp(sizeList, 0, numSizes, true);
00878 
00879     return;
00880   }
00881 
00882   if (flag == 3){
00883 
00884     // broadcast the size of each part
00885 
00886     scalar_t *sizeList = NULL;
00887 
00888     if (rank > 0){
00889       sizeList = new scalar_t [nparts];
00890       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
00891     }
00892     else{
00893       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes", 
00894         nparts == pSize_[wdim].size(), COMPLEX_ASSERTION);
00895 
00896       sizeList = pSize_[wdim].getRawPtr();
00897     }
00898 
00899     try{
00900       Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
00901     }
00902     Z2_THROW_OUTSIDE_ERROR(*env_);
00903 
00904     if (rank > 0)
00905       pSize_[wdim] = arcp(sizeList, 0, nparts);
00906 
00907     return;
00908   }
00909 }
00910 
00911 template <typename Adapter>
00912   void PartitioningSolution<Adapter>::computePartSizes(int wdim,
00913     ArrayView<partId_t> ids, ArrayView<scalar_t> sizes)
00914 {
00915   int len = ids.size();
00916 
00917   if (len == 0){
00918     pSizeUniform_[wdim] = true;
00919     return;
00920   }
00921 
00922   env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes", 
00923     len>0 && sizes.size()==len, COMPLEX_ASSERTION);
00924 
00925   env_->localBugAssertion(__FILE__, __LINE__, "bad index", 
00926     wdim>=0 && wdim<weightDim_, COMPLEX_ASSERTION);
00927 
00928   env_->localBugAssertion(__FILE__, __LINE__, "preallocations", 
00929     pSize_.size()>wdim && 
00930     pSizeUniform_.size()>wdim && pCompactIndex_.size()>wdim, 
00931     COMPLEX_ASSERTION);
00932 
00933   // Check ids and sizes and find min, max and average sizes.
00934   // If sizes are very close to uniform, call them uniform parts.
00935 
00936   partId_t nparts = nGlobalParts_;
00937   unsigned char *buf = new unsigned char [nparts];
00938   env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
00939   memset(buf, 0, nparts);
00940   ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
00941 
00942   scalar_t epsilon = 10e-5 / nparts;
00943   scalar_t min=sizes[0], max=sizes[0], sum=0;
00944 
00945   for (int i=0; i < len; i++){
00946     partId_t id = ids[i];
00947     scalar_t size = sizes[i];
00948 
00949     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id", 
00950       id>=0 && id<nparts, BASIC_ASSERTION);
00951 
00952     env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
00953       BASIC_ASSERTION);
00954 
00955     // TODO: we could allow users to specify multiple sizes for the same
00956     // part if we add a parameter that says what we are to do with them:
00957     // add them or take the max.
00958 
00959     env_->localInputAssertion(__FILE__, __LINE__, 
00960       "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
00961 
00962     partIdx[id] = 1;    // mark that we have a size for this part
00963 
00964     if (size < min) min = size;
00965     if (size > max) max = size;
00966     sum += size;
00967   }
00968 
00969   if (sum == 0){   
00970 
00971     // User has given us a list of parts of size 0, we'll set
00972     // the rest to them to equally sized parts.
00973     
00974     scalar_t *allSizes = new scalar_t [2];
00975     env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
00976 
00977     ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
00978 
00979     int numNonZero = nparts - len;
00980 
00981     allSizes[0] = 0.0;
00982     allSizes[1] = 1.0 / numNonZero;
00983 
00984     for (partId_t p=0; p < nparts; p++)
00985       buf[p] = 1;                 // index to default part size
00986 
00987     for (int i=0; i < len; i++)
00988       buf[ids[i]] = 0;            // index to part size zero
00989     
00990     pSize_[wdim] = sizeArray;
00991     pCompactIndex_[wdim] = partIdx;
00992 
00993     return;
00994   }
00995 
00996   if (max - min <= epsilon){
00997     pSizeUniform_[wdim] = true;
00998     return;
00999   }
01000 
01001   // A size for parts that were not specified:
01002   scalar_t avg = sum / nparts;
01003 
01004   // We are going to merge part sizes that are very close.  This takes
01005   // computation time now, but can save considerably in the storage of
01006   // all part sizes on each process.  For example, a common case may
01007   // be some parts are size 1 and all the rest are size 2.
01008 
01009   scalar_t *tmp = new scalar_t [len];
01010   env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01011   memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
01012   ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
01013 
01014   std::sort(partSizes.begin(), partSizes.end());
01015 
01016   // create a list of sizes that are unique within epsilon
01017 
01018   Array<scalar_t> nextUniqueSize;
01019   nextUniqueSize.push_back(partSizes[len-1]);   // largest
01020   scalar_t curr = partSizes[len-1];
01021   int avgIndex = len;
01022   bool haveAvg = false;
01023   if (curr - avg <= epsilon)
01024      avgIndex = 0;
01025 
01026   for (int i=len-2; i >= 0; i--){
01027     scalar_t val = partSizes[i];
01028     if (curr - val > epsilon){
01029       nextUniqueSize.push_back(val);  // the highest in the group
01030       curr = val;
01031       if (avgIndex==len && val > avg && val - avg <= epsilon){
01032         // the average would be in this group
01033         avgIndex = nextUniqueSize.size() - 1;
01034         haveAvg = true;
01035       }
01036     }
01037   }
01038 
01039   partSizes.clear();
01040 
01041   size_t numSizes = nextUniqueSize.size();
01042   int sizeArrayLen = numSizes;
01043 
01044   if (numSizes < 64){
01045     // We can store the size for every part in a compact way.
01046 
01047     // Create a list of all sizes in increasing order
01048 
01049     if (!haveAvg) sizeArrayLen++;   // need to include average
01050     
01051     scalar_t *allSizes = new scalar_t [sizeArrayLen];
01052     env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
01053     ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
01054 
01055     int newAvgIndex = sizeArrayLen;
01056 
01057     for (int i=numSizes-1, idx=0; i >= 0; i--){
01058 
01059       if (newAvgIndex == sizeArrayLen){
01060 
01061         if (haveAvg && i==avgIndex)
01062           newAvgIndex = idx;
01063 
01064         else if (!haveAvg && avg < nextUniqueSize[i]){
01065           newAvgIndex = idx;
01066           allSizes[idx++] = avg;
01067         }
01068       }
01069 
01070       allSizes[idx++] = nextUniqueSize[i];
01071     }
01072 
01073     env_->localBugAssertion(__FILE__, __LINE__, "finding average in list", 
01074       newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
01075 
01076     for (int i=0; i < nparts; i++){
01077       buf[i] = newAvgIndex;   // index to default part size
01078     }
01079 
01080     sum = (nparts - len) * allSizes[newAvgIndex];
01081 
01082     for (int i=0; i < len; i++){
01083       int id = ids[i];
01084       scalar_t size = sizes[i];
01085       int index;
01086 
01087       // Find the first size greater than or equal to this size.
01088 
01089       if (size < avg && avg - size <= epsilon)
01090         index = newAvgIndex;
01091       else{
01092         typename ArrayRCP<scalar_t>::iterator found = 
01093           std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
01094 
01095         env_->localBugAssertion(__FILE__, __LINE__, "size array", 
01096           found != sizeArray.end(), COMPLEX_ASSERTION);
01097 
01098         index = found - sizeArray.begin();
01099       }
01100 
01101       buf[id] = index;
01102       sum += allSizes[index];
01103     }
01104 
01105     for (int i=0; i < sizeArrayLen; i++){
01106       sizeArray[i] /= sum;
01107     }
01108 
01109     pCompactIndex_[wdim] = partIdx;
01110     pSize_[wdim] = sizeArray;
01111   }
01112   else{
01113     // To have access to part sizes, we must store nparts scalar_ts on 
01114     // every process.  We expect this is a rare case.
01115 
01116     tmp = new scalar_t [nparts];
01117     env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
01118 
01119     sum += ((nparts - len) * avg);
01120 
01121     for (int i=0; i < nparts; i++){
01122       tmp[i] = avg/sum;
01123     }
01124 
01125     for (int i=0; i < len; i++){
01126       tmp[ids[i]] = sizes[i]/sum;
01127     }
01128 
01129     pSize_[wdim] = arcp(tmp, 0, nparts);
01130   }
01131 }
01132 
01133 template <typename Adapter>
01134   void PartitioningSolution<Adapter>::setParts(
01135     ArrayRCP<const gno_t> &gnoList, ArrayRCP<partId_t> &partList,
01136     bool dataDidNotMove)
01137 {
01138   env_->debug(DETAILED_STATUS, "Entering setParts");
01139 
01140   size_t len = partList.size();
01141 
01142   // Find the actual number of parts in the solution, which can
01143   // be more or less than the nGlobalParts_ target.
01144   // (We may want to compute the imbalance of a given solution with
01145   // respect to a desired solution.  This solution may have more or
01146   // fewer parts that the desired solution.)
01147 
01148   partId_t lMax=0, lMin=0, gMax, gMin;
01149   
01150   if (len > 0)
01151     IdentifierTraits<partId_t>::minMax(partList.getRawPtr(), len, lMin, lMax);
01152 
01153   IdentifierTraits<partId_t>::globalMinMax(*comm_, len == 0,
01154     lMin, lMax, gMin, gMax);
01155       
01156   nGlobalPartsSolution_ = gMax - gMin + 1;
01157 
01158   if (dataDidNotMove) {
01159     // Case where the algorithm provides the part list in the same order
01160     // that it received the gnos.
01161 
01162     if (idMap_->gnosAreGids()){
01163       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01164     }
01165     else{
01166       gid_t *gidList = new gid_t [len];
01167       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01168       ArrayView<gid_t> gidView(gidList, len);
01169   
01170       const gno_t *gnos = gnoList.getRawPtr();
01171       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01172 
01173       try{
01174         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01175       }
01176       Z2_FORWARD_EXCEPTIONS
01177   
01178       gids_ = arcp<const gid_t>(gidList, 0, len);
01179     }
01180     parts_ = partList;
01181   }
01182   else {
01183     // Case where the algorithm moved the data.
01184     // KDDKDD Should we require the algorithm to return the parts in
01185     // KDDKDD the same order as the gnos provided by the model?
01186 
01187     // We send part information to the process that "owns" the global number.
01188 
01189     ArrayView<gid_t> emptyView;
01190     ArrayView<int> procList;
01191 
01192     if (len){
01193       int *tmp = new int [len];
01194       env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01195       procList = ArrayView<int>(tmp, len);
01196     }
01197 
01198     idMap_->gnoGlobalTranslate (gnoList.view(0,len), emptyView, procList);
01199 
01200     int remotelyOwned = 0;
01201     int rank = comm_->getRank();
01202     int nprocs = comm_->getSize();
01203   
01204     for (size_t i=0; !remotelyOwned && i < len; i++){
01205       if (procList[i] != rank)
01206         remotelyOwned = 1;
01207     }
01208 
01209     int anyRemotelyOwned=0;
01210 
01211     try{
01212       reduceAll<int, int>(*comm_, Teuchos::REDUCE_MAX, 1,
01213         &remotelyOwned, &anyRemotelyOwned);
01214     }
01215     Z2_THROW_OUTSIDE_ERROR(*env_);
01216 
01217     if (anyRemotelyOwned){
01218 
01219       // Send the owners of these gnos their part assignments.
01220     
01221       Array<int> countOutBuf(nprocs, 0);
01222       Array<int> countInBuf(nprocs, 0);
01223   
01224       Array<gno_t> outBuf(len*2, 0);
01225   
01226       if (len > 0){
01227         Array<lno_t> offsetBuf(nprocs+1, 0);
01228       
01229         for (size_t i=0; i < len; i++){
01230           countOutBuf[procList[i]]+=2;
01231         }
01232       
01233         offsetBuf[0] = 0;
01234         for (int i=0; i < nprocs; i++)
01235           offsetBuf[i+1] = offsetBuf[i] + countOutBuf[i];
01236       
01237         for (size_t i=0; i < len; i++){
01238           int p = procList[i];
01239           int off = offsetBuf[p];
01240           outBuf[off] = gnoList[i];
01241           outBuf[off+1] = static_cast<gno_t>(partList[i]);
01242           offsetBuf[p]+=2;
01243         }
01244       }
01245     
01246       ArrayRCP<gno_t> inBuf;
01247     
01248       try{
01249         AlltoAllv<gno_t>(*comm_, *env_,
01250           outBuf(), countOutBuf(), inBuf, countInBuf());
01251       }
01252       Z2_FORWARD_EXCEPTIONS;
01253   
01254       outBuf.clear();
01255       countOutBuf.clear();
01256   
01257       gno_t newLen = 0;
01258       for (int i=0; i < nprocs; i++)
01259         newLen += countInBuf[i];
01260   
01261       countInBuf.clear();
01262 
01263       newLen /= 2;
01264 
01265       ArrayRCP<partId_t> parts;
01266       ArrayRCP<const gno_t> myGnos;
01267 
01268       if (newLen > 0){
01269   
01270         gno_t *tmpGno = new gno_t [newLen];
01271         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpGno);
01272     
01273         partId_t *tmpPart = new partId_t [newLen];
01274         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpPart);
01275     
01276         int next = 0;
01277         for (lno_t i=0; i < newLen; i++){
01278           tmpGno[i] = inBuf[next++];
01279           tmpPart[i] = inBuf[next++];
01280         }
01281   
01282         parts = arcp(tmpPart, 0, newLen);
01283         myGnos = arcp(tmpGno, 0, newLen);
01284       }
01285   
01286       gnoList = myGnos;
01287       partList = parts;
01288       len = newLen;
01289     }
01290   
01291     delete [] procList.getRawPtr();
01292   
01293     if (idMap_->gnosAreGids()){
01294       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01295     }
01296     else{
01297       gid_t *gidList = new gid_t [len];
01298       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01299       ArrayView<gid_t> gidView(gidList, len);
01300   
01301       const gno_t *gnos = gnoList.getRawPtr();
01302       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01303 
01304       try{
01305         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01306       }
01307       Z2_FORWARD_EXCEPTIONS
01308   
01309       gids_ = arcp<const gid_t>(gidList, 0, len);
01310     }
01311 
01312     parts_ = partList;
01313   }
01314 
01315   // Now determine which process gets each object, if not one-to-one.
01316 
01317   if (!onePartPerProc_){
01318 
01319     int *procs = new int [len];
01320     env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
01321     procs_ = arcp<int>(procs, 0, len);
01322 
01323     partId_t *parts = partList.getRawPtr();
01324 
01325     if (procDist_.size() > 0){    // parts are not split across procs
01326 
01327       int procId;
01328       for (size_t i=0; i < len; i++){
01329         partToProcsMap(parts[i], procs[i], procId);
01330       }
01331     }
01332     else{  // harder - we need to split the parts across multiple procs
01333 
01334       lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
01335       env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_, 
01336         partCounter);
01337 
01338       int numProcs = comm_->getSize();
01339 
01340       for (ArrayRCP<partId_t>::size_type i=0; i < partList.size(); i++)
01341         partCounter[parts[i]]++;
01342 
01343       lno_t *procCounter = new lno_t [numProcs];
01344       env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
01345       
01346       int proc1;
01347       int proc2 = partDist_[0];
01348 
01349       for (partId_t part=1; part < nGlobalParts_; part++){
01350         proc1 = proc2;
01351         proc2 = partDist_[part+1];
01352         int numprocs = proc2 - proc1;
01353 
01354         double dNum = partCounter[part];
01355         double dProcs = numprocs;
01356         
01357         double each = floor(dNum/dProcs);
01358         double extra = fmod(dNum,dProcs);
01359 
01360         for (int proc=proc1, i=0; proc<proc2; proc++, i++){
01361           if (i < extra)
01362             procCounter[proc] = lno_t(each) + 1;
01363           else
01364             procCounter[proc] = lno_t(each);
01365         }          
01366       }
01367 
01368       delete [] partCounter;
01369 
01370       for (ArrayRCP<partId_t>::size_type i=0; i < partList.size(); i++){
01371         if (partList[i] >= nGlobalParts_){
01372           // Solution has more parts that targeted.  These
01373           // objects just remain on this process.
01374           procs[i] = comm_->getRank();
01375           continue;
01376         }
01377         partId_t partNum = parts[i];
01378         proc1 = partDist_[partNum];
01379         proc2 = partDist_[partNum + 1];
01380 
01381         int proc;
01382         for (proc=proc1; proc < proc2; proc++){
01383           if (procCounter[proc] > 0){
01384             procs[i] = proc;
01385             procCounter[proc]--;
01386             break;
01387           }
01388         }
01389         env_->localBugAssertion(__FILE__, __LINE__, "part to proc", 
01390           proc < proc2, COMPLEX_ASSERTION);
01391       }
01392 
01393       delete [] procCounter;
01394     }
01395   }
01396 
01397   haveSolution_ = true;
01398 
01399   env_->memory("After Solution has processed algorithm's answer");
01400   env_->debug(DETAILED_STATUS, "Exiting setParts");
01401 }
01402 
01403 template <typename Adapter>
01404   template <typename Extra>
01405     size_t PartitioningSolution<Adapter>::convertSolutionToImportList(
01406       int numExtra, ArrayRCP<Extra> &xtraInfo,
01407       ArrayRCP<typename Adapter::gid_t> &imports,       // output
01408       ArrayRCP<Extra> &newXtraInfo) const               // output
01409 {
01410   env_->localInputAssertion(__FILE__, __LINE__, "no solution yet",
01411     haveSolution_, BASIC_ASSERTION);
01412   env_->debug(DETAILED_STATUS, "Entering convertSolutionToImportList");
01413 
01414   int numProcs                = comm_->getSize();
01415   size_t localNumIds          = gids_.size();
01416 
01417   // How many to each process?
01418   Array<int> counts(numProcs, 0);
01419   if (onePartPerProc_)
01420     for (size_t i=0; i < localNumIds; i++)
01421       counts[parts_[i]]++;
01422   else 
01423     for (size_t i=0; i < localNumIds; i++)
01424       counts[procs_[i]]++;
01425 
01426   Array<gno_t> offsets(numProcs+1, 0);
01427   for (int i=1; i <= numProcs; i++){
01428     offsets[i] = offsets[i-1] + counts[i-1];
01429   }
01430 
01431   Array<gid_t> gidList(localNumIds);
01432   Array<Extra> numericInfo;
01433 
01434   if (numExtra > 0)
01435     numericInfo.resize(localNumIds);
01436 
01437   if (onePartPerProc_){
01438     for (size_t i=0; i < localNumIds; i++){
01439       lno_t idx = offsets[parts_[i]];
01440       gidList[idx] = gids_[i];
01441       if (numExtra > 0)
01442         numericInfo[idx] = xtraInfo[i];
01443       offsets[parts_[i]] = idx + 1;
01444     }
01445   }
01446   else {
01447     for (size_t i=0; i < localNumIds; i++){
01448       lno_t idx = offsets[procs_[i]];
01449       gidList[idx] = gids_[i];
01450       if (numExtra > 0)
01451         numericInfo[idx] = xtraInfo[i];
01452       offsets[procs_[i]] = idx + 1;
01453     }
01454   }
01455 
01456   Array<int> recvCounts(numProcs, 0);
01457 
01458   try{
01459     AlltoAllv<gid_t>(*comm_, *env_, gidList(),
01460       counts(), imports, recvCounts());
01461   }
01462   catch (std::exception &e){
01463     throw std::runtime_error("alltoallv 1");
01464   }
01465 
01466   if (numExtra > 0){
01467     try{
01468       AlltoAllv<Extra>(*comm_, *env_, xtraInfo(),
01469         counts(), newXtraInfo, recvCounts());
01470     }
01471     catch (std::exception &e){
01472       throw std::runtime_error("alltoallv 2");
01473     }
01474   }
01475 
01476   env_->debug(DETAILED_STATUS, "Exiting convertSolutionToImportList");
01477   return imports.size();
01478 }
01479 
01480 template <typename Adapter>
01481   void PartitioningSolution<Adapter>::procToPartsMap(int procId, 
01482     double &numParts, partId_t &partMin, partId_t &partMax) const
01483 {
01484   if (onePartPerProc_){
01485     numParts = 1.0;
01486     partMin = partMax = procId;
01487   }
01488   else if (procDist_.size() > 0){
01489     partMin = procDist_[procId];
01490     partMax = procDist_[procId+1] - 1;
01491     numParts = procDist_[procId+1] - partMin;
01492   }
01493   else{
01494     // find the first p such that partDist_[p] > procId
01495 
01496     std::vector<int>::const_iterator entry;
01497     entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
01498 
01499     size_t partIdx = entry - partDist_.begin();
01500     int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
01501     partMin = partMax = int(partIdx) - 1;
01502     numParts = 1.0 / numProcs;
01503   }
01504 }
01505 
01506 template <typename Adapter>
01507   void PartitioningSolution<Adapter>::partToProcsMap(partId_t partId, 
01508     int &procMin, int &procMax) const
01509 {
01510   if (partId >= nGlobalParts_){
01511     // setParts() may be given an initial solution which uses a
01512     // different number of parts than the desired solution.  It is
01513     // still a solution.  We keep it on this process.
01514     procMin = procMax = comm_->getRank();
01515   }
01516   else if (onePartPerProc_){
01517     procMin = procMax = int(partId);
01518   }
01519   else if (procDist_.size() > 0){
01520     if (procDistEquallySpread_) {
01521       // Avoid binary search.
01522       double fProcs = comm_->getSize();
01523       double fParts = nGlobalParts_;
01524       double each = fParts / fProcs;
01525       procMin = int(partId / each);
01526       while (procDist_[procMin] > partId) procMin--;
01527       while (procDist_[procMin+1] <= partId) procMin++;
01528       procMax = procMin;
01529     }
01530     else {
01531       // find the first p such that procDist_[p] > partId.
01532       // For now, do a binary search.
01533 
01534       std::vector<partId_t>::const_iterator entry;
01535       entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
01536 
01537       size_t procIdx = entry - procDist_.begin();
01538       procMin = procMax = int(procIdx) - 1;
01539     }
01540   }
01541   else{
01542     procMin = partDist_[partId];
01543     procMax = partDist_[partId+1] - 1;
01544   }
01545 }
01546 
01547 template <typename Adapter>
01548   bool PartitioningSolution<Adapter>::criteriaHaveSamePartSizes(
01549     int c1, int c2) const
01550 {
01551   if (c1 < 0 || c1 >= weightDim_ || c2 < 0 || c2 >= weightDim_ )
01552     throw logic_error("criteriaHaveSamePartSizes error");
01553 
01554   bool theSame = false;
01555 
01556   if (c1 == c2)
01557     theSame = true;
01558 
01559   else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
01560     theSame = true;
01561 
01562   else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
01563     theSame = true;
01564     bool useIndex = pCompactIndex_[c1].size() > 0;
01565     if (useIndex){
01566       for (partId_t p=0; theSame && p < nGlobalParts_; p++)
01567         if (pSize_[c1][pCompactIndex_[c1][p]] != 
01568             pSize_[c2][pCompactIndex_[c2][p]])
01569           theSame = false;
01570     }
01571     else{
01572       for (partId_t p=0; theSame && p < nGlobalParts_; p++)
01573         if (pSize_[c1][p] != pSize_[c2][p])
01574           theSame = false;
01575     }
01576   }
01577 
01578   return theSame;
01579 }
01580 
01596 template <typename Adapter>
01597   void PartitioningSolution<Adapter>::partToProc(
01598     bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
01599     int numLocalParts, int numGlobalParts) 
01600 {
01601   int nprocs = comm_->getSize();
01602   ssize_t reducevals[4];
01603   ssize_t sumHaveGlobal=0, sumHaveLocal=0;
01604   ssize_t sumGlobal=0, sumLocal=0;
01605   ssize_t maxGlobal=0, maxLocal=0;
01606   ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
01607       numGlobalParts, numLocalParts};
01608 
01609   partDist_.clear();
01610   procDist_.clear();
01611 
01612   if (doCheck){
01613 
01614     try{
01615       reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
01616     }
01617     Z2_THROW_OUTSIDE_ERROR(*env_);
01618 
01619     sumHaveGlobal = reducevals[0];
01620     sumHaveLocal = reducevals[1];
01621     sumGlobal = reducevals[2];
01622     sumLocal = reducevals[3];
01623 
01624     env_->localInputAssertion(__FILE__, __LINE__,
01625       "Either all procs specify num_global/local_parts or none do",
01626       (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
01627       (sumHaveLocal == 0 || sumHaveLocal == nprocs), 
01628       BASIC_ASSERTION);
01629   }
01630   else{
01631     if (haveNumLocalParts)
01632       sumLocal = numLocalParts * nprocs;
01633     if (haveNumGlobalParts)
01634       sumGlobal = numGlobalParts * nprocs;
01635 
01636     sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
01637     sumHaveLocal = haveNumLocalParts ? nprocs : 0;
01638 
01639     maxLocal = numLocalParts;
01640     maxGlobal = numGlobalParts;
01641   }
01642 
01643   if (!haveNumLocalParts && !haveNumGlobalParts){
01644     onePartPerProc_ = true;   // default if user did not specify
01645     return;
01646   }
01647 
01648   if (haveNumGlobalParts){
01649     if (doCheck){
01650       vals[0] = numGlobalParts;
01651       vals[1] = numLocalParts;
01652       try{
01653         reduceAll<int, ssize_t>(
01654           *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
01655       }
01656       Z2_THROW_OUTSIDE_ERROR(*env_);
01657   
01658       maxGlobal = reducevals[0];
01659       maxLocal = reducevals[1];
01660   
01661       env_->localInputAssertion(__FILE__, __LINE__,
01662         "Value for num_global_parts is different on different processes.",
01663         maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
01664     }
01665 
01666     if (sumLocal){
01667       env_->localInputAssertion(__FILE__, __LINE__,
01668         "Sum of num_local_parts does not equal requested num_global_parts",
01669         sumLocal == numGlobalParts, BASIC_ASSERTION);
01670 
01671       if (sumLocal == nprocs && maxLocal == 1){
01672         onePartPerProc_ = true;   // user specified one part per proc
01673         return;
01674       }
01675     }
01676     else{
01677       if (maxGlobal == nprocs){
01678         onePartPerProc_ = true;   // user specified num parts is num procs
01679         return;
01680       }
01681     }
01682   }
01683 
01684   // If we are here, we do not have #parts == #procs.
01685 
01686   if (sumHaveLocal == nprocs){
01687     //
01688     // We will go by the number of local parts specified.
01689     //
01690 
01691     try{
01692       procDist_.resize(nprocs+1);
01693     }
01694     catch (std::exception &e){
01695       throw(std::bad_alloc());
01696     }
01697 
01698     int *procArray = &procDist_[0];
01699 
01700     try{
01701       partId_t tmp = partId_t(numLocalParts);
01702       gatherAll<int, partId_t>(*comm_, 1, &tmp, nprocs, procArray + 1); 
01703     }
01704     Z2_THROW_OUTSIDE_ERROR(*env_);
01705 
01706     procArray[0] = 0;
01707 
01708     for (int proc=0; proc < nprocs; proc++)
01709       procArray[proc+1] += procArray[proc];
01710   }
01711   else{
01712     //
01713     // We will allocate global number of parts to the processes.
01714     //
01715     double fParts = numGlobalParts;
01716     double fProcs = nprocs;
01717 
01718     if (fParts < fProcs){
01719 
01720       try{
01721         partDist_.resize(size_t(fParts+1));
01722       }
01723       catch (std::exception &e){
01724         throw(std::bad_alloc());
01725       }
01726 
01727       int *partArray = &partDist_[0];
01728 
01729       double each = floor(fProcs / fParts);
01730       double extra = fmod(fProcs, fParts);
01731       partDist_[0] = 0;     
01732 
01733       for (partId_t part=0; part < numGlobalParts; part++){
01734         int numOwners = int(each + ((part<extra) ? 1 : 0));
01735         partArray[part+1] = partArray[part] + numOwners;
01736       }
01737 
01738       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs", 
01739         partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
01740     }
01741     else if (fParts > fProcs){ 
01742 
01743       // User did not specify local number of parts per proc; 
01744       // Distribute the parts evenly among the procs.
01745 
01746       procDistEquallySpread_ = true;
01747 
01748       try{
01749         procDist_.resize(size_t(fProcs+1));
01750       }
01751       catch (std::exception &e){
01752         throw(std::bad_alloc());
01753       }
01754 
01755       int *procArray = &procDist_[0];
01756 
01757       double each = floor(fParts / fProcs);
01758       double extra = fmod(fParts, fProcs);
01759       procArray[0] = 0;     
01760 
01761       for (int proc=0; proc < nprocs; proc++){
01762         partId_t numParts = partId_t(each + ((proc<extra) ? 1 : 0));
01763         procArray[proc+1] = procArray[proc] + numParts;
01764       }
01765 
01766       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs", 
01767         procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
01768     }
01769     else{
01770       env_->globalBugAssertion(__FILE__, __LINE__, 
01771         "should never get here", 1, COMPLEX_ASSERTION, comm_);
01772     }
01773   }
01774 }
01775 
01776 }  // namespace Zoltan2
01777 
01778 #endif