Zoltan 2 Version 0.5
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //   Zoltan2: A package of combinatorial algorithms for scientific computing
00006 //                  Copyright 2012 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Karen Devine      (kddevin@sandia.gov)
00039 //                    Erik Boman        (egboman@sandia.gov)
00040 //                    Siva Rajamanickam (srajama@sandia.gov)
00041 //
00042 // ***********************************************************************
00043 //
00044 // @HEADER
00045 
00050 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00051 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00052 
00053 #include <Zoltan2_IdentifierMap.hpp>
00054 #include <Zoltan2_Solution.hpp>
00055 
00056 #include <cmath>
00057 #include <algorithm>
00058 #include <vector>
00059 
00060 namespace Zoltan2 {
00061 
00076 template <typename Adapter>
00077   class PartitioningSolution : public Solution
00078 {
00079 public:
00080 
00081 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00082   typedef typename Adapter::gno_t gno_t;
00083   typedef typename Adapter::scalar_t scalar_t;
00084   typedef typename Adapter::lno_t lno_t;
00085   typedef typename Adapter::gid_t gid_t;
00086   typedef typename Adapter::user_t user_t;
00087 #endif
00088 
00106   PartitioningSolution( RCP<const Environment> &env,
00107     RCP<const Comm<int> > &comm,
00108     RCP<const IdentifierMap<user_t> > &idMap,
00109     int userWeightDim);
00110 
00140   PartitioningSolution( RCP<const Environment> &env,
00141     RCP<const Comm<int> > &comm,
00142     RCP<const IdentifierMap<user_t> > &idMap,
00143     int userWeightDim, ArrayView<ArrayRCP<partId_t> > reqPartIds,
00144     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00145   
00147   // Information that the algorithm may wish to query.
00148   
00151   size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
00152   
00155   size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
00156   
00159   size_t getLocalNumberOfParts() const { return nLocalParts_; }
00160   
00168   scalar_t getLocalFractionOfPart() const { return localFraction_; }
00169   
00179   bool oneToOnePartDistribution() const { return onePartPerProc_; }
00180   
00200   const int *getPartDistribution() const { return &partDist_[0]; }
00201   
00218   const partId_t *getProcDistribution() const { return &procDist_[0]; }
00219 
00223   int getNumberOfCriteria() const { return weightDim_; }
00224 
00225   
00232   bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
00233   
00246   scalar_t getCriteriaPartSize(int idx, partId_t part) const { 
00247     if (pSizeUniform_[idx]) 
00248       return 1.0 / nGlobalParts_;
00249     else if (pCompactIndex_[idx].size())
00250       return pSize_[idx][pCompactIndex_[idx][part]];
00251     else
00252       return pSize_[idx][part];
00253   }
00254 
00268   bool criteriaHaveSamePartSizes(int c1, int c2) const;
00269 
00271   // Method used by the algorithm to set results.
00272 
00299   void setParts(ArrayRCP<const gno_t> &gnoList, 
00300     ArrayRCP<partId_t> &partList, bool dataDidNotMove);
00301   
00303   // Results that may be queried by the user, by migration methods,
00304   // or by metric calculation methods.
00305   // We return raw pointers so users don't have to learn about our
00306   // pointer wrappers.
00307 
00310   const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
00311 
00314   size_t getLocalNumberOfIds() const { return gids_.size(); }
00315 
00318   const gid_t *getIdList() const { return gids_.getRawPtr(); }
00319 
00322   const zoltan2_partId_t *getPartList() const { return parts_.getRawPtr();}
00323 
00328   const int *getProcList() const { return procs_.getRawPtr();}
00329 
00359   template <typename Extra>
00360     size_t convertSolutionToImportList(
00361       int numExtra,
00362       ArrayRCP<Extra> &xtraInfo,
00363       ArrayRCP<typename Adapter::gid_t> &imports,
00364       ArrayRCP<Extra> &newXtraInfo) const;
00365 
00383   void getPartsForProc(int procId, double &numParts, partId_t &partMin, 
00384     partId_t &partMax) const
00385   {
00386     env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
00387       procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
00388 
00389     procToPartsMap(procId, numParts, partMin, partMax);
00390   }
00391 
00403   void getProcsForPart(partId_t partId, int &procMin, int &procMax) const
00404   {
00405     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
00406       partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
00407 
00408     partToProcsMap(partId, procMin, procMax);
00409   }
00410 
00411 private:
00412   void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
00413     int numLocalParts, int numGlobalParts);
00414 
00415   void procToPartsMap(int procId, double &numParts, partId_t &partMin, 
00416     partId_t &partMax) const;
00417 
00418   void partToProcsMap(partId_t partId, int &procMin, int &procMax) const;
00419 
00420   void setPartDistribution();
00421 
00422   void setPartSizes(ArrayView<ArrayRCP<partId_t> > reqPartIds,
00423     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00424 
00425   void computePartSizes(int wdim, ArrayView<partId_t> ids, 
00426     ArrayView<scalar_t> sizes);
00427 
00428   void broadcastPartSizes(int wdim);
00429 
00430   RCP<const Environment> env_;             // has application communicator
00431   RCP<const Comm<int> > comm_;             // the problem communicator
00432   RCP<const IdentifierMap<user_t> > idMap_;
00433 
00434   gno_t nGlobalParts_;// target global number of parts
00435   lno_t nLocalParts_; // number of parts to be on this process
00436 
00437   scalar_t localFraction_; // approx fraction of a part on this process
00438   int weightDim_;      // if user has no weights, this is 1
00439 
00440   // If process p is to be assigned part p for all p, then onePartPerProc_ 
00441   // is true. Otherwise it is false, and either procDist_ or partDist_
00442   // describes the allocation of parts to processes.
00443   //
00444   // If parts are never split across processes, then procDist_ is defined
00445   // as follows:
00446   //
00447   //   partId              = procDist_[procId]
00448   //   partIdNext          = procDist_[procId+1]
00449   //   globalNumberOfParts = procDist_[numProcs]
00450   //
00451   // meaning that the parts assigned to process procId range from
00452   // [partId, partIdNext).  If partIdNext is the same as partId, then
00453   // process procId has no parts.
00454   //
00455   // If the number parts is less than the number of processes, and the
00456   // user did not specify "num_local_parts" for each of the processes, then
00457   // parts are split across processes, and partDist_ is defined rather than
00458   // procDist_.
00459   //
00460   //   procId              = partDist_[partId]
00461   //   procIdNext          = partDist_[partId+1]
00462   //   globalNumberOfProcs = partDist_[numParts]
00463   //
00464   // which implies that the part partId is shared by processes in the
00465   // the range [procId, procIdNext).
00466   //
00467   // We use std::vector so we can use upper_bound algorithm
00468   
00469   bool             onePartPerProc_;   // either this is true...
00470   std::vector<int>      partDist_;      // or this is defined ...
00471   std::vector<partId_t> procDist_;      // or this is defined.
00472   bool procDistEquallySpread_;        // if procDist_ is used and 
00473                                       // #parts > #procs and
00474                                       // num_local_parts is not specified,
00475                                       // parts are evenly distributed to procs
00476 
00477   // In order to minimize the storage required for part sizes, we
00478   // have three different representations.
00479   //
00480   // If the part sizes for weight dimension w are all the same, then:
00481   //    pSizeUniform_[w] = true
00482   //    pCompactIndex_[w].size() = 0
00483   //    pSize_[w].size() = 0
00484   //
00485   // and the size for part p is 1.0 / nparts.
00486   //
00487   // If part sizes differ for each part in weight dimension w, but there
00488   // are no more than 64 distinct sizes:
00489   //    pSizeUniform_[w] = false
00490   //    pCompactIndex_[w].size() = number of parts
00491   //    pSize_[w].size() = number of different sizes
00492   //
00493   // and the size for part p is pSize_[pCompactIndex_[p]].
00494   //
00495   // If part sizes differ for each part in weight dimension w, and there
00496   // are more than 64 distinct sizes:
00497   //    pSizeUniform_[w] = false
00498   //    pCompactIndex_[w].size() = 0
00499   //    pSize_[w].size() = nparts
00500   //
00501   // and the size for part p is pSize_[p].
00502   //
00503   // NOTE: If we expect to have similar cases, i.e. a long list of scalars
00504   //   where it is highly possible that just a few unique values appear,
00505   //   then we may want to make this a class.  The advantage is that we
00506   //   save a long list of 1-byte indices instead of a long list of scalars.
00507 
00508   ArrayRCP<bool> pSizeUniform_;
00509   ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
00510   ArrayRCP<ArrayRCP<scalar_t> > pSize_;
00511 
00513   // The algorithm sets these values upon completion.
00514 
00515   ArrayRCP<const gid_t>  gids_;   // User's global IDs 
00516   ArrayRCP<partId_t> parts_;      // part number assigned to gids_[i]
00517 
00518   bool haveSolution_;
00519 
00520   gno_t nGlobalPartsSolution_; // global number of parts in solution
00521 
00523   // The solution calculates this from the part assignments,
00524   // unless onePartPerProc_.
00525 
00526   ArrayRCP<int> procs_;       // process rank assigned to gids_[i]
00527 };
00528 
00530 // Definitions
00532 
00533 template <typename Adapter>
00534   PartitioningSolution<Adapter>::PartitioningSolution(
00535     RCP<const Environment> &env, 
00536     RCP<const Comm<int> > &comm,
00537     RCP<const IdentifierMap<user_t> > &idMap, int userWeightDim)
00538     : env_(env), comm_(comm), idMap_(idMap),
00539       nGlobalParts_(0), nLocalParts_(0),
00540       localFraction_(0),  weightDim_(),
00541       onePartPerProc_(false), partDist_(), procDist_(),
00542       procDistEquallySpread_(false),
00543       pSizeUniform_(), pCompactIndex_(), pSize_(),
00544       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00545       procs_()
00546 {
00547   weightDim_ = (userWeightDim ? userWeightDim : 1); 
00548 
00549   setPartDistribution();
00550 
00551   // We must call setPartSizes() because part sizes may have
00552   // been provided by the user on other processes.
00553 
00554   ArrayRCP<partId_t> *noIds = new ArrayRCP<partId_t> [weightDim_];
00555   ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [weightDim_];
00556   ArrayRCP<ArrayRCP<partId_t> > ids(noIds, 0, weightDim_, true);
00557   ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, weightDim_, true);
00558 
00559   setPartSizes(ids.view(0, weightDim_), sizes.view(0, weightDim_));
00560 
00561   env_->memory("After construction of solution");
00562 }
00563 
00564 template <typename Adapter>
00565   PartitioningSolution<Adapter>::PartitioningSolution(
00566     RCP<const Environment> &env, 
00567     RCP<const Comm<int> > &comm,
00568     RCP<const IdentifierMap<user_t> > &idMap, int userWeightDim,
00569     ArrayView<ArrayRCP<partId_t> > reqPartIds, 
00570     ArrayView<ArrayRCP<scalar_t> > reqPartSizes)
00571     : env_(env), comm_(comm), idMap_(idMap),
00572       nGlobalParts_(0), nLocalParts_(0), 
00573       localFraction_(0),  weightDim_(),
00574       onePartPerProc_(false), partDist_(), procDist_(), 
00575       procDistEquallySpread_(false),
00576       pSizeUniform_(), pCompactIndex_(), pSize_(),
00577       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0), 
00578       procs_()
00579 {
00580   weightDim_ = (userWeightDim ? userWeightDim : 1); 
00581 
00582   setPartDistribution();
00583 
00584   setPartSizes(reqPartIds, reqPartSizes);
00585 
00586   env_->memory("After construction of solution");
00587 }
00588 
00589 template <typename Adapter>
00590   void PartitioningSolution<Adapter>::setPartDistribution()
00591 {
00592   // Did the caller define num_global_parts and/or num_local_parts?
00593 
00594   const ParameterList &pl = env_->getParameters();
00595   size_t haveGlobalNumParts=0, haveLocalNumParts=0;
00596   int numLocal=0, numGlobal=0;
00597   double val;
00598 
00599   const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
00600 
00601   if (pe){
00602     val = pe->getValue<double>(&val);
00603     haveGlobalNumParts = 1;
00604     numGlobal = static_cast<int>(val);
00605     nGlobalParts_ = gno_t(numGlobal);
00606   }
00607 
00608   pe = pl.getEntryPtr("num_local_parts");
00609 
00610   if (pe){
00611     val = pe->getValue<double>(&val);
00612     haveLocalNumParts = 1;
00613     numLocal = static_cast<int>(val);
00614     nLocalParts_ = lno_t(numLocal);
00615   }
00616 
00617   try{
00618     // Sets onePartPerProc_, partDist_, and procDist_
00619 
00620     partToProc(true, haveLocalNumParts, haveGlobalNumParts, 
00621       numLocal, numGlobal);
00622   }
00623   Z2_FORWARD_EXCEPTIONS
00624 
00625   int nprocs = comm_->getSize();
00626   int rank = comm_->getRank();
00627 
00628   if (onePartPerProc_){
00629     nGlobalParts_ = nprocs;
00630     nLocalParts_ = 1;
00631   } 
00632   else if (partDist_.size() > 0){   // more procs than parts
00633     nGlobalParts_ = partDist_.size() - 1;
00634     int pstart = partDist_[0];
00635     for (int i=1; i <= nGlobalParts_; i++){
00636       int pend = partDist_[i];
00637       if (rank >= pstart && rank < pend){
00638         int numOwners = pend - pstart;
00639         nLocalParts_ = 1;
00640         localFraction_ = 1.0 / numOwners;
00641         break;
00642       }
00643       pstart = pend;
00644     }
00645   }
00646   else if (procDist_.size() > 0){  // more parts than procs
00647     nGlobalParts_ = procDist_[nprocs];
00648     nLocalParts_ = procDist_[rank+1] - procDist_[rank];
00649   }
00650   else {
00651     throw logic_error("partToProc error");
00652   }
00653 
00654 }
00655 
00656 template <typename Adapter>
00657   void PartitioningSolution<Adapter>::setPartSizes(
00658     ArrayView<ArrayRCP<partId_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
00659 {
00660   int wdim = weightDim_;
00661   bool fail=false;
00662 
00663   size_t *countBuf = new size_t [wdim*2];
00664   ArrayRCP<size_t> counts(countBuf, 0, wdim*2, true);
00665 
00666   fail = ((ids.size() != wdim) || (sizes.size() != wdim));
00667 
00668   for (int w=0; !fail && w < wdim; w++){
00669     counts[w] = ids[w].size();
00670     if (ids[w].size() != sizes[w].size()) fail=true;
00671   }
00672 
00673   env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0, 
00674     COMPLEX_ASSERTION, comm_);
00675 
00676   // Are all part sizes the same?  This is the common case.
00677 
00678   ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [wdim];
00679   pSize_ = arcp(emptySizes, 0, wdim);
00680 
00681   ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [wdim];
00682   pCompactIndex_ = arcp(emptyIndices, 0, wdim);
00683 
00684   bool *info = new bool [wdim];
00685   pSizeUniform_ = arcp(info, 0, wdim);
00686   for (int w=0; w < wdim; w++)
00687     pSizeUniform_[w] = true;
00688 
00689   if (nGlobalParts_ == 1){   
00690     return;   // there's only one part in the whole problem
00691   }
00692 
00693   size_t *ptr1 = counts.getRawPtr();
00694   size_t *ptr2 = counts.getRawPtr() + wdim;
00695 
00696   try{
00697     reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, wdim, ptr1, ptr2);
00698   }
00699   Z2_THROW_OUTSIDE_ERROR(*env_);
00700 
00701   bool zero = true;
00702 
00703   for (int w=0; w < wdim; w++)
00704     if (counts[wdim+w] > 0){
00705       zero = false;
00706       pSizeUniform_[w] = false;
00707     }
00708 
00709   if (zero) // Part sizes for all criteria are uniform.
00710     return; 
00711 
00712   // Compute the part sizes for criteria for which part sizes were
00713   // supplied.  Normalize for each criteria so part sizes sum to one.
00714 
00715   int nprocs = comm_->getSize();
00716   int rank = comm_->getRank();
00717 
00718   for (int w=0; w < wdim; w++){
00719     if (pSizeUniform_[w]) continue;
00720     
00721     // Send all ids and sizes to one process.
00722     // (There is no simple gather method in Teuchos.)
00723 
00724     partId_t length = ids[w].size();
00725     partId_t *allLength = new partId_t [nprocs];
00726     Teuchos::gatherAll<int, partId_t>(*comm_, 1, &length, 
00727       nprocs, allLength);
00728 
00729     if (rank == 0){
00730       int total = 0;
00731       for (int i=0; i < nprocs; i++)
00732         total += allLength[i];
00733 
00734       partId_t *partNums = new partId_t [total];
00735       scalar_t *partSizes = new scalar_t [total];
00736 
00737       ArrayView<partId_t> idArray(partNums, total);
00738       ArrayView<scalar_t> sizeArray(partSizes, total);
00739 
00740       if (length > 0){
00741         for (int i=0; i < length; i++){
00742           *partNums++ = ids[w][i];
00743           *partSizes++ = sizes[w][i];
00744         }
00745       }
00746 
00747       for (int p=1; p < nprocs; p++){
00748         if (allLength[p] > 0){
00749           Teuchos::receive<int, partId_t>(*comm_, p,
00750             allLength[p], partNums);
00751           Teuchos::receive<int, scalar_t>(*comm_, p,
00752             allLength[p], partSizes);
00753           partNums += allLength[p];
00754           partSizes += allLength[p];
00755         }
00756       }
00757 
00758       delete [] allLength;
00759 
00760       try{
00761         computePartSizes(w, idArray, sizeArray);
00762       }
00763       Z2_FORWARD_EXCEPTIONS
00764 
00765       delete [] idArray.getRawPtr();
00766       delete [] sizeArray.getRawPtr();
00767     } 
00768     else{
00769       delete [] allLength;
00770       if (length > 0){
00771         Teuchos::send<int, partId_t>(*comm_, length, ids[w].getRawPtr(), 0);
00772         Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
00773       }
00774     }
00775 
00776     broadcastPartSizes(w);
00777   }
00778 }
00779 
00780 template <typename Adapter>
00781   void PartitioningSolution<Adapter>::broadcastPartSizes(int wdim)
00782 {
00783   env_->localBugAssertion(__FILE__, __LINE__, "preallocations", 
00784     pSize_.size()>wdim && 
00785     pSizeUniform_.size()>wdim && pCompactIndex_.size()>wdim, 
00786     COMPLEX_ASSERTION);
00787 
00788   int rank = comm_->getRank();
00789   int nprocs = comm_->getSize();
00790   partId_t nparts = nGlobalParts_;
00791 
00792   if (nprocs < 2)
00793     return;
00794 
00795   char flag=0;
00796 
00797   if (rank == 0){
00798     if (pSizeUniform_[wdim] == true)
00799       flag = 1;
00800     else if (pCompactIndex_[wdim].size() > 0)
00801       flag = 2;
00802     else
00803       flag = 3;
00804   }
00805 
00806   try{
00807     Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
00808   }
00809   Z2_THROW_OUTSIDE_ERROR(*env_);
00810 
00811   if (flag == 1){
00812     if (rank > 0)
00813       pSizeUniform_[wdim] = true;
00814 
00815     return;
00816   }
00817 
00818   if (flag == 2){
00819 
00820     // broadcast the indices into the size list
00821 
00822     unsigned char *idxbuf = NULL;
00823 
00824     if (rank > 0){
00825       idxbuf = new unsigned char [nparts];
00826       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
00827     }
00828     else{
00829       env_->localBugAssertion(__FILE__, __LINE__, "index list size", 
00830         pCompactIndex_[wdim].size() == nparts, COMPLEX_ASSERTION);
00831       idxbuf = pCompactIndex_[wdim].getRawPtr();
00832     }
00833 
00834     try{
00835       // broadcast of unsigned char is not supported
00836       Teuchos::broadcast<int, char>(*comm_, 0, nparts, 
00837         reinterpret_cast<char *>(idxbuf));
00838     }
00839     Z2_THROW_OUTSIDE_ERROR(*env_);
00840 
00841     if (rank > 0)
00842       pCompactIndex_[wdim] = arcp(idxbuf, 0, nparts, true);
00843 
00844     // broadcast the list of different part sizes
00845 
00846     unsigned char maxIdx=0;
00847     for (partId_t p=0; p < nparts; p++)
00848       if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
00849 
00850     int numSizes = maxIdx + 1;
00851   
00852     scalar_t *sizeList = NULL;
00853 
00854     if (rank > 0){
00855       sizeList = new scalar_t [numSizes];
00856       env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
00857     }
00858     else{
00859       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes", 
00860         numSizes == pSize_[wdim].size(), COMPLEX_ASSERTION);
00861 
00862       sizeList = pSize_[wdim].getRawPtr();
00863     }
00864 
00865     try{
00866       Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
00867     }
00868     Z2_THROW_OUTSIDE_ERROR(*env_);
00869 
00870     if (rank > 0)
00871       pSize_[wdim] = arcp(sizeList, 0, numSizes, true);
00872 
00873     return;
00874   }
00875 
00876   if (flag == 3){
00877 
00878     // broadcast the size of each part
00879 
00880     scalar_t *sizeList = NULL;
00881 
00882     if (rank > 0){
00883       sizeList = new scalar_t [nparts];
00884       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
00885     }
00886     else{
00887       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes", 
00888         nparts == pSize_[wdim].size(), COMPLEX_ASSERTION);
00889 
00890       sizeList = pSize_[wdim].getRawPtr();
00891     }
00892 
00893     try{
00894       Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
00895     }
00896     Z2_THROW_OUTSIDE_ERROR(*env_);
00897 
00898     if (rank > 0)
00899       pSize_[wdim] = arcp(sizeList, 0, nparts);
00900 
00901     return;
00902   }
00903 }
00904 
00905 template <typename Adapter>
00906   void PartitioningSolution<Adapter>::computePartSizes(int wdim,
00907     ArrayView<partId_t> ids, ArrayView<scalar_t> sizes)
00908 {
00909   int len = ids.size();
00910 
00911   if (len == 0){
00912     pSizeUniform_[wdim] = true;
00913     return;
00914   }
00915 
00916   env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes", 
00917     len>0 && sizes.size()==len, COMPLEX_ASSERTION);
00918 
00919   env_->localBugAssertion(__FILE__, __LINE__, "bad index", 
00920     wdim>=0 && wdim<weightDim_, COMPLEX_ASSERTION);
00921 
00922   env_->localBugAssertion(__FILE__, __LINE__, "preallocations", 
00923     pSize_.size()>wdim && 
00924     pSizeUniform_.size()>wdim && pCompactIndex_.size()>wdim, 
00925     COMPLEX_ASSERTION);
00926 
00927   // Check ids and sizes and find min, max and average sizes.
00928   // If sizes are very close to uniform, call them uniform parts.
00929 
00930   partId_t nparts = nGlobalParts_;
00931   unsigned char *buf = new unsigned char [nparts];
00932   env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
00933   memset(buf, 0, nparts);
00934   ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
00935 
00936   scalar_t epsilon = 10e-5 / nparts;
00937   scalar_t min=sizes[0], max=sizes[0], sum=0;
00938 
00939   for (int i=0; i < len; i++){
00940     partId_t id = ids[i];
00941     scalar_t size = sizes[i];
00942 
00943     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id", 
00944       id>=0 && id<nparts, BASIC_ASSERTION);
00945 
00946     env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
00947       BASIC_ASSERTION);
00948 
00949     // TODO: we could allow users to specify multiple sizes for the same
00950     // part if we add a parameter that says what we are to do with them:
00951     // add them or take the max.
00952 
00953     env_->localInputAssertion(__FILE__, __LINE__, 
00954       "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
00955 
00956     partIdx[id] = 1;    // mark that we have a size for this part
00957 
00958     if (size < min) min = size;
00959     if (size > max) max = size;
00960     sum += size;
00961   }
00962 
00963   if (sum == 0){   
00964 
00965     // User has given us a list of parts of size 0, we'll set
00966     // the rest to them to equally sized parts.
00967     
00968     scalar_t *allSizes = new scalar_t [2];
00969     env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
00970 
00971     ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
00972 
00973     int numNonZero = nparts - len;
00974 
00975     allSizes[0] = 0.0;
00976     allSizes[1] = 1.0 / numNonZero;
00977 
00978     for (partId_t p=0; p < nparts; p++)
00979       buf[p] = 1;                 // index to default part size
00980 
00981     for (int i=0; i < len; i++)
00982       buf[ids[i]] = 0;            // index to part size zero
00983     
00984     pSize_[wdim] = sizeArray;
00985     pCompactIndex_[wdim] = partIdx;
00986 
00987     return;
00988   }
00989 
00990   if (max - min <= epsilon){
00991     pSizeUniform_[wdim] = true;
00992     return;
00993   }
00994 
00995   // A size for parts that were not specified:
00996   scalar_t avg = sum / nparts;
00997 
00998   // We are going to merge part sizes that are very close.  This takes
00999   // computation time now, but can save considerably in the storage of
01000   // all part sizes on each process.  For example, a common case may
01001   // be some parts are size 1 and all the rest are size 2.
01002 
01003   scalar_t *tmp = new scalar_t [len];
01004   env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01005   memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
01006   ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
01007 
01008   std::sort(partSizes.begin(), partSizes.end());
01009 
01010   // create a list of sizes that are unique within epsilon
01011 
01012   Array<scalar_t> nextUniqueSize;
01013   nextUniqueSize.push_back(partSizes[len-1]);   // largest
01014   scalar_t curr = partSizes[len-1];
01015   int avgIndex = len;
01016   bool haveAvg = false;
01017   if (curr - avg <= epsilon)
01018      avgIndex = 0;
01019 
01020   for (int i=len-2; i >= 0; i--){
01021     scalar_t val = partSizes[i];
01022     if (curr - val > epsilon){
01023       nextUniqueSize.push_back(val);  // the highest in the group
01024       curr = val;
01025       if (avgIndex==len && val > avg && val - avg <= epsilon){
01026         // the average would be in this group
01027         avgIndex = nextUniqueSize.size() - 1;
01028         haveAvg = true;
01029       }
01030     }
01031   }
01032 
01033   partSizes.clear();
01034 
01035   size_t numSizes = nextUniqueSize.size();
01036   int sizeArrayLen = numSizes;
01037 
01038   if (numSizes < 64){
01039     // We can store the size for every part in a compact way.
01040 
01041     // Create a list of all sizes in increasing order
01042 
01043     if (!haveAvg) sizeArrayLen++;   // need to include average
01044     
01045     scalar_t *allSizes = new scalar_t [sizeArrayLen];
01046     env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
01047     ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
01048 
01049     int newAvgIndex = sizeArrayLen;
01050 
01051     for (int i=numSizes-1, idx=0; i >= 0; i--){
01052 
01053       if (newAvgIndex == sizeArrayLen){
01054 
01055         if (haveAvg && i==avgIndex)
01056           newAvgIndex = idx;
01057 
01058         else if (!haveAvg && avg < nextUniqueSize[i]){
01059           newAvgIndex = idx;
01060           allSizes[idx++] = avg;
01061         }
01062       }
01063 
01064       allSizes[idx++] = nextUniqueSize[i];
01065     }
01066 
01067     env_->localBugAssertion(__FILE__, __LINE__, "finding average in list", 
01068       newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
01069 
01070     for (int i=0; i < nparts; i++){
01071       buf[i] = newAvgIndex;   // index to default part size
01072     }
01073 
01074     sum = (nparts - len) * allSizes[newAvgIndex];
01075 
01076     for (int i=0; i < len; i++){
01077       int id = ids[i];
01078       scalar_t size = sizes[i];
01079       int index;
01080 
01081       // Find the first size greater than or equal to this size.
01082 
01083       if (size < avg && avg - size <= epsilon)
01084         index = newAvgIndex;
01085       else{
01086         typename ArrayRCP<scalar_t>::iterator found = 
01087           std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
01088 
01089         env_->localBugAssertion(__FILE__, __LINE__, "size array", 
01090           found != sizeArray.end(), COMPLEX_ASSERTION);
01091 
01092         index = found - sizeArray.begin();
01093       }
01094 
01095       buf[id] = index;
01096       sum += allSizes[index];
01097     }
01098 
01099     for (int i=0; i < sizeArrayLen; i++){
01100       sizeArray[i] /= sum;
01101     }
01102 
01103     pCompactIndex_[wdim] = partIdx;
01104     pSize_[wdim] = sizeArray;
01105   }
01106   else{
01107     // To have access to part sizes, we must store nparts scalar_ts on 
01108     // every process.  We expect this is a rare case.
01109 
01110     tmp = new scalar_t [nparts];
01111     env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
01112 
01113     sum += ((nparts - len) * avg);
01114 
01115     for (int i=0; i < nparts; i++){
01116       tmp[i] = avg/sum;
01117     }
01118 
01119     for (int i=0; i < len; i++){
01120       tmp[ids[i]] = sizes[i]/sum;
01121     }
01122 
01123     pSize_[wdim] = arcp(tmp, 0, nparts);
01124   }
01125 }
01126 
01127 template <typename Adapter>
01128   void PartitioningSolution<Adapter>::setParts(
01129     ArrayRCP<const gno_t> &gnoList, ArrayRCP<partId_t> &partList,
01130     bool dataDidNotMove)
01131 {
01132   env_->debug(DETAILED_STATUS, "Entering setParts");
01133 
01134   size_t len = partList.size();
01135 
01136   // Find the actual number of parts in the solution, which can
01137   // be more or less than the nGlobalParts_ target.
01138   // (We may want to compute the imbalance of a given solution with
01139   // respect to a desired solution.  This solution may have more or
01140   // fewer parts that the desired solution.)
01141 
01142   partId_t lMax, lMin, gMax, gMin;
01143   
01144   if (len > 0)
01145     IdentifierTraits<partId_t>::minMax(partList.getRawPtr(), len, lMin, lMax);
01146 
01147   IdentifierTraits<partId_t>::globalMinMax(*comm_, len == 0,
01148     lMin, lMax, gMin, gMax);
01149       
01150   nGlobalPartsSolution_ = gMax - gMin + 1;
01151 
01152   if (dataDidNotMove) {
01153     // Case where the algorithm provides the part list in the same order
01154     // that it received the gnos.
01155 
01156     if (idMap_->gnosAreGids()){
01157       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01158     }
01159     else{
01160       gid_t *gidList = new gid_t [len];
01161       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01162       ArrayView<gid_t> gidView(gidList, len);
01163   
01164       const gno_t *gnos = gnoList.getRawPtr();
01165       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01166 
01167       try{
01168         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01169       }
01170       Z2_FORWARD_EXCEPTIONS
01171   
01172       gids_ = arcp<const gid_t>(gidList, 0, len);
01173     }
01174     parts_ = partList;
01175   }
01176   else {
01177     // Case where the algorithm moved the data.
01178     // KDDKDD Should we require the algorithm to return the parts in
01179     // KDDKDD the same order as the gnos provided by the model?
01180 
01181     // We send part information to the process that "owns" the global number.
01182 
01183     ArrayView<gid_t> emptyView;
01184     ArrayView<int> procList;
01185 
01186     if (len){
01187       int *tmp = new int [len];
01188       env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01189       procList = ArrayView<int>(tmp, len);
01190     }
01191 
01192     idMap_->gnoGlobalTranslate (gnoList.view(0,len), emptyView, procList);
01193 
01194     int remotelyOwned = 0;
01195     int rank = comm_->getRank();
01196     int nprocs = comm_->getSize();
01197   
01198     for (size_t i=0; !remotelyOwned && i < len; i++){
01199       if (procList[i] != rank)
01200         remotelyOwned = 1;
01201     }
01202 
01203     int anyRemotelyOwned=0;
01204 
01205     try{
01206       reduceAll<int, int>(*comm_, Teuchos::REDUCE_MAX, 1,
01207         &remotelyOwned, &anyRemotelyOwned);
01208     }
01209     Z2_THROW_OUTSIDE_ERROR(*env_);
01210 
01211     if (anyRemotelyOwned){
01212 
01213       // Send the owners of these gnos their part assignments.
01214     
01215       lno_t *tmpCount = new lno_t [nprocs];
01216       memset(tmpCount, 0, sizeof(lno_t) * nprocs);
01217       env_->localMemoryAssertion(__FILE__, __LINE__, nprocs, tmpCount);
01218       ArrayView<int> countOutBuf(tmpCount, nprocs);
01219   
01220       ArrayView<gno_t> outBuf;
01221   
01222       if (len > 0){
01223         gno_t *tmpGno = new gno_t [len*2];
01224         env_->localMemoryAssertion(__FILE__, __LINE__, len*2, tmpGno);
01225         outBuf = ArrayView<gno_t>(tmpGno, len*2);
01226       
01227         lno_t *tmpOff = new lno_t [nprocs+1];
01228         env_->localMemoryAssertion(__FILE__, __LINE__, nprocs+1, tmpOff);
01229         ArrayView<lno_t> offsetBuf(tmpOff, nprocs+1);
01230       
01231           for (size_t i=0; i < len; i++){
01232           countOutBuf[procList[i]]+=2;
01233         }
01234       
01235         offsetBuf[0] = 0;
01236         for (int i=0; i < nprocs; i++)
01237           offsetBuf[i+1] = offsetBuf[i] + countOutBuf[i];
01238       
01239         for (size_t i=0; i < len; i++){
01240           int p = procList[i];
01241           int off = offsetBuf[p];
01242           outBuf[off] = gnoList[i];
01243           outBuf[off+1] = static_cast<gno_t>(partList[i]);
01244           offsetBuf[p]+=2;
01245         }
01246     
01247         delete [] tmpOff;
01248       }
01249     
01250       ArrayRCP<gno_t> inBuf;
01251       ArrayRCP<int> countInBuf;
01252     
01253       try{
01254         AlltoAllv<gno_t>(*comm_, *env_,
01255           outBuf, countOutBuf, inBuf, countInBuf);
01256       }
01257       Z2_FORWARD_EXCEPTIONS;
01258   
01259       if (len)
01260         delete [] outBuf.getRawPtr();
01261   
01262       delete [] countOutBuf.getRawPtr();
01263   
01264       gno_t newLen = 0;
01265       for (int i=0; i < nprocs; i++)
01266         newLen += countInBuf[i];
01267   
01268       newLen /= 2;
01269 
01270       ArrayRCP<partId_t> parts;
01271       ArrayRCP<const gno_t> myGnos;
01272 
01273       if (newLen > 0){
01274   
01275         gno_t *tmpGno = new gno_t [newLen];
01276         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpGno);
01277     
01278         partId_t *tmpPart = new partId_t [newLen];
01279         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpPart);
01280     
01281         int next = 0;
01282         for (lno_t i=0; i < newLen; i++){
01283           tmpGno[i] = inBuf[next++];
01284           tmpPart[i] = inBuf[next++];
01285         }
01286   
01287         parts = arcp(tmpPart, 0, newLen);
01288         myGnos = arcp(tmpGno, 0, newLen);
01289       }
01290   
01291       gnoList = myGnos;
01292       partList = parts;
01293       len = newLen;
01294     }
01295   
01296     delete [] procList.getRawPtr();
01297   
01298     if (idMap_->gnosAreGids()){
01299       gids_ = Teuchos::arcp_reinterpret_cast<const gid_t>(gnoList);
01300     }
01301     else{
01302       gid_t *gidList = new gid_t [len];
01303       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01304       ArrayView<gid_t> gidView(gidList, len);
01305   
01306       const gno_t *gnos = gnoList.getRawPtr();
01307       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01308 
01309       try{
01310         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01311       }
01312       Z2_FORWARD_EXCEPTIONS
01313   
01314       gids_ = arcp<const gid_t>(gidList, 0, len);
01315     }
01316 
01317     parts_ = partList;
01318   }
01319 
01320   // Now determine which process gets each object, if not one-to-one.
01321 
01322   if (!onePartPerProc_){
01323 
01324     int *procs = new int [len];
01325     env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
01326     procs_ = arcp<int>(procs, 0, len);
01327 
01328     partId_t *parts = partList.getRawPtr();
01329 
01330     if (procDist_.size() > 0){    // parts are not split across procs
01331 
01332       int procId;
01333       for (size_t i=0; i < len; i++){
01334         partToProcsMap(parts[i], procs[i], procId);
01335       }
01336     }
01337     else{  // harder - we need to split the parts across multiple procs
01338 
01339       lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
01340       env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_, 
01341         partCounter);
01342 
01343       int numProcs = comm_->getSize();
01344 
01345       for (lno_t i=0; i < partList.size(); i++)
01346         partCounter[parts[i]]++;
01347 
01348       lno_t *procCounter = new lno_t [numProcs];
01349       env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
01350       
01351       int proc2 = partDist_[0];
01352 
01353       for (lno_t part=1; part < nGlobalParts_; part++){
01354         int proc1 = proc2;
01355         proc2 = partDist_[part+1];
01356         int numprocs = proc2 - proc1;
01357 
01358         double dNum = partCounter[part];
01359         double dProcs = numprocs;
01360         
01361         double each = floor(dNum/dProcs);
01362         double extra = fmod(dNum,dProcs);
01363 
01364         for (int proc=proc1, i=0; proc<proc2; proc++, i++){
01365           if (i < extra)
01366             procCounter[proc] = lno_t(each) + 1;
01367           else
01368             procCounter[proc] = lno_t(each);
01369         }          
01370       }
01371 
01372       delete [] partCounter;
01373 
01374       for (lno_t i=0; i < partList.size(); i++){
01375         if (partList[i] >= nGlobalParts_){
01376           // Solution has more parts that targeted.  These
01377           // objects just remain on this process.
01378           procs[i] = comm_->getRank();
01379           continue;
01380         }
01381         partId_t partNum = parts[i];
01382         int proc1 = partDist_[partNum];
01383         int proc2 = partDist_[partNum + 1];
01384         int proc=0;
01385         
01386         for (proc=proc1; proc < proc2; proc++){
01387           if (procCounter[proc] > 0){
01388             procs[i] = proc;
01389             procCounter[proc]--;
01390             break;
01391           }
01392         }
01393         env_->localBugAssertion(__FILE__, __LINE__, "part to proc", 
01394           proc < proc2, COMPLEX_ASSERTION);
01395       }
01396 
01397       delete [] procCounter;
01398     }
01399   }
01400 
01401   haveSolution_ = true;
01402 
01403   env_->memory("After Solution has processed algorithm's answer");
01404   env_->debug(DETAILED_STATUS, "Exiting setParts");
01405 }
01406 
01407 template <typename Adapter>
01408   template <typename Extra>
01409     size_t PartitioningSolution<Adapter>::convertSolutionToImportList(
01410       int numExtra, ArrayRCP<Extra> &xtraInfo,
01411       ArrayRCP<typename Adapter::gid_t> &imports,       // output
01412       ArrayRCP<Extra> &newXtraInfo) const               // output
01413 {
01414   env_->localInputAssertion(__FILE__, __LINE__, "no solution yet",
01415     haveSolution_, BASIC_ASSERTION);
01416   env_->debug(DETAILED_STATUS, "Entering convertSolutionToImportList");
01417 
01418   int numProcs                = comm_->getSize();
01419   size_t localNumIds          = gids_.size();
01420 
01421   // How many to each process?
01422   Array<int> counts(numProcs, 0);
01423   if (onePartPerProc_)
01424     for (size_t i=0; i < localNumIds; i++)
01425       counts[parts_[i]]++;
01426   else 
01427     for (size_t i=0; i < localNumIds; i++)
01428       counts[procs_[i]]++;
01429 
01430   Array<gno_t> offsets(numProcs+1, 0);
01431   for (int i=1; i <= numProcs; i++){
01432     offsets[i] = offsets[i-1] + counts[i-1];
01433   }
01434 
01435   Array<gid_t> gidList(localNumIds);
01436   Array<Extra> numericInfo;
01437 
01438   if (numExtra > 0)
01439     numericInfo.resize(localNumIds);
01440 
01441   if (onePartPerProc_){
01442     for (size_t i=0; i < localNumIds; i++){
01443       lno_t idx = offsets[parts_[i]];
01444       gidList[idx] = gids_[i];
01445       if (numExtra > 0)
01446         numericInfo[idx] = xtraInfo[i];
01447       offsets[parts_[i]] = idx + 1;
01448     }
01449   }
01450   else {
01451     for (size_t i=0; i < localNumIds; i++){
01452       lno_t idx = offsets[procs_[i]];
01453       gidList[idx] = gids_[i];
01454       if (numExtra > 0)
01455         numericInfo[idx] = xtraInfo[i];
01456       offsets[procs_[i]] = idx + 1;
01457     }
01458   }
01459 
01460   ArrayRCP<int> recvCounts;
01461   RCP<const Environment> env = rcp(new Environment);
01462 
01463   try{
01464     AlltoAllv<gid_t>(*comm_, *env_, gidList.view(0,localNumIds),
01465       counts.view(0, numProcs), imports, recvCounts);
01466   }
01467   catch (std::exception &e){
01468     throw std::runtime_error("alltoallv 1");
01469   }
01470 
01471   if (numExtra > 0){
01472     try{
01473       AlltoAllv<Extra>(*comm_, *env_, xtraInfo.view(0, localNumIds),
01474         counts.view(0, numProcs), newXtraInfo, recvCounts);
01475     }
01476     catch (std::exception &e){
01477       throw std::runtime_error("alltoallv 2");
01478     }
01479   }
01480   env_->debug(DETAILED_STATUS, "Exiting convertSolutionToImportList");
01481   return imports.size();
01482 }
01483 
01484 template <typename Adapter>
01485   void PartitioningSolution<Adapter>::procToPartsMap(int procId, 
01486     double &numParts, partId_t &partMin, partId_t &partMax) const
01487 {
01488   if (onePartPerProc_){
01489     numParts = 1.0;
01490     partMin = partMax = procId;
01491   }
01492   else if (procDist_.size() > 0){
01493     partMin = procDist_[procId];
01494     partMax = procDist_[procId+1] - 1;
01495     numParts = procDist_[procId+1] - partMin;
01496   }
01497   else{
01498     // find the first p such that partDist_[p] > procId
01499 
01500     std::vector<int>::const_iterator entry;
01501     entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
01502 
01503     size_t partIdx = entry - partDist_.begin();
01504     int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
01505     partMin = partMax = int(partIdx) - 1;
01506     numParts = 1.0 / numProcs;
01507   }
01508 }
01509 
01510 template <typename Adapter>
01511   void PartitioningSolution<Adapter>::partToProcsMap(partId_t partId, 
01512     int &procMin, int &procMax) const
01513 {
01514   if (partId >= nGlobalParts_){
01515     // setParts() may be given an initial solution which uses a
01516     // different number of parts than the desired solution.  It is
01517     // still a solution.  We keep it on this process.
01518     procMin = procMax = comm_->getRank();
01519   }
01520   else if (onePartPerProc_){
01521     procMin = procMax = int(partId);
01522   }
01523   else if (procDist_.size() > 0){
01524     if (procDistEquallySpread_) {
01525       // Avoid binary search.
01526       double fProcs = comm_->getSize();
01527       double fParts = nGlobalParts_;
01528       double each = fParts / fProcs;
01529       procMin = int(partId / each);
01530       while (procDist_[procMin] > partId) procMin--;
01531       while (procDist_[procMin+1] <= partId) procMin++;
01532       procMax = procMin;
01533     }
01534     else {
01535       // find the first p such that procDist_[p] > partId.
01536       // For now, do a binary search.
01537 
01538       std::vector<partId_t>::const_iterator entry;
01539       entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
01540 
01541       size_t procIdx = entry - procDist_.begin();
01542       procMin = procMax = int(procIdx) - 1;
01543     }
01544   }
01545   else{
01546     procMin = partDist_[partId];
01547     procMax = partDist_[partId+1] - 1;
01548   }
01549 }
01550 
01551 template <typename Adapter>
01552   bool PartitioningSolution<Adapter>::criteriaHaveSamePartSizes(
01553     int c1, int c2) const
01554 {
01555   if (c1 < 0 || c1 >= weightDim_ || c2 < 0 || c2 >= weightDim_ )
01556     throw logic_error("criteriaHaveSamePartSizes error");
01557 
01558   bool theSame = false;
01559 
01560   if (c1 == c2)
01561     theSame = true;
01562 
01563   else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
01564     theSame = true;
01565 
01566   else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
01567     theSame = true;
01568     bool useIndex = pCompactIndex_[c1].size() > 0;
01569     if (useIndex){
01570       for (partId_t p=0; theSame && p < nGlobalParts_; p++)
01571         if (pSize_[c1][pCompactIndex_[c1][p]] != 
01572             pSize_[c2][pCompactIndex_[c2][p]])
01573           theSame = false;
01574     }
01575     else{
01576       for (partId_t p=0; theSame && p < nGlobalParts_; p++)
01577         if (pSize_[c1][p] != pSize_[c2][p])
01578           theSame = false;
01579     }
01580   }
01581 
01582   return theSame;
01583 }
01584 
01600 template <typename Adapter>
01601   void PartitioningSolution<Adapter>::partToProc(
01602     bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
01603     int numLocalParts, int numGlobalParts) 
01604 {
01605   int nprocs = comm_->getSize();
01606   ssize_t reducevals[4];
01607   ssize_t sumHaveGlobal=0, sumHaveLocal=0;
01608   ssize_t sumGlobal=0, sumLocal=0;
01609   ssize_t maxGlobal=0, maxLocal=0;
01610   ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
01611       numGlobalParts, numLocalParts};
01612 
01613   partDist_.clear();
01614   procDist_.clear();
01615 
01616   if (doCheck){
01617 
01618     try{
01619       reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
01620     }
01621     Z2_THROW_OUTSIDE_ERROR(*env_);
01622 
01623     sumHaveGlobal = reducevals[0];
01624     sumHaveLocal = reducevals[1];
01625     sumGlobal = reducevals[2];
01626     sumLocal = reducevals[3];
01627 
01628     env_->localInputAssertion(__FILE__, __LINE__,
01629       "Either all procs specify num_global/local_parts or none do",
01630       (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
01631       (sumHaveLocal == 0 || sumHaveLocal == nprocs), 
01632       BASIC_ASSERTION);
01633   }
01634   else{
01635     if (haveNumLocalParts)
01636       sumLocal = numLocalParts * nprocs;
01637     if (haveNumGlobalParts)
01638       sumGlobal = numGlobalParts * nprocs;
01639 
01640     sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
01641     sumHaveLocal = haveNumLocalParts ? nprocs : 0;
01642 
01643     maxLocal = numLocalParts;
01644     maxGlobal = numGlobalParts;
01645   }
01646 
01647   if (!haveNumLocalParts && !haveNumGlobalParts){
01648     onePartPerProc_ = true;   // default if user did not specify
01649     return;
01650   }
01651 
01652   if (haveNumGlobalParts){
01653     if (doCheck){
01654       vals[0] = numGlobalParts;
01655       vals[1] = numLocalParts;
01656       try{
01657         reduceAll<int, ssize_t>(
01658           *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
01659       }
01660       Z2_THROW_OUTSIDE_ERROR(*env_);
01661   
01662       maxGlobal = reducevals[0];
01663       maxLocal = reducevals[1];
01664   
01665       env_->localInputAssertion(__FILE__, __LINE__,
01666         "Value for num_global_parts is different on different processes.",
01667         maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
01668     }
01669 
01670     if (sumLocal){
01671       env_->localInputAssertion(__FILE__, __LINE__,
01672         "Sum of num_local_parts does not equal requested num_global_parts",
01673         sumLocal == numGlobalParts, BASIC_ASSERTION);
01674 
01675       if (sumLocal == nprocs && maxLocal == 1){
01676         onePartPerProc_ = true;   // user specified one part per proc
01677         return;
01678       }
01679     }
01680     else{
01681       if (maxGlobal == nprocs){
01682         onePartPerProc_ = true;   // user specified num parts is num procs
01683         return;
01684       }
01685     }
01686   }
01687 
01688   // If we are here, we do not have #parts == #procs.
01689 
01690   if (sumHaveLocal == nprocs){
01691     //
01692     // We will go by the number of local parts specified.
01693     //
01694 
01695     try{
01696       procDist_.resize(nprocs+1);
01697     }
01698     catch (std::exception &e){
01699       throw(std::bad_alloc());
01700     }
01701 
01702     int *procArray = &procDist_[0];
01703 
01704     try{
01705       partId_t tmp = partId_t(numLocalParts);
01706       gatherAll<int, partId_t>(*comm_, 1, &tmp, nprocs, procArray + 1); 
01707     }
01708     Z2_THROW_OUTSIDE_ERROR(*env_);
01709 
01710     procArray[0] = 0;
01711 
01712     for (int proc=0; proc < nprocs; proc++)
01713       procArray[proc+1] += procArray[proc];
01714   }
01715   else{
01716     //
01717     // We will allocate global number of parts to the processes.
01718     //
01719     double fParts = numGlobalParts;
01720     double fProcs = nprocs;
01721 
01722     if (fParts < fProcs){
01723 
01724       try{
01725         partDist_.resize(size_t(fParts+1));
01726       }
01727       catch (std::exception &e){
01728         throw(std::bad_alloc());
01729       }
01730 
01731       int *partArray = &partDist_[0];
01732 
01733       double each = floor(fProcs / fParts);
01734       double extra = fmod(fProcs, fParts);
01735       partDist_[0] = 0;     
01736 
01737       for (partId_t part=0; part < numGlobalParts; part++){
01738         int numOwners = int(each + ((part<extra) ? 1 : 0));
01739         partArray[part+1] = partArray[part] + numOwners;
01740       }
01741 
01742       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs", 
01743         partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
01744     }
01745     else if (fParts > fProcs){ 
01746 
01747       // User did not specify local number of parts per proc; 
01748       // Distribute the parts evenly among the procs.
01749 
01750       procDistEquallySpread_ = true;
01751 
01752       try{
01753         procDist_.resize(size_t(fProcs+1));
01754       }
01755       catch (std::exception &e){
01756         throw(std::bad_alloc());
01757       }
01758 
01759       int *procArray = &procDist_[0];
01760 
01761       double each = floor(fParts / fProcs);
01762       double extra = fmod(fParts, fProcs);
01763       procArray[0] = 0;     
01764 
01765       for (int proc=0; proc < nprocs; proc++){
01766         partId_t numParts = partId_t(each + ((proc<extra) ? 1 : 0));
01767         procArray[proc+1] = procArray[proc] + numParts;
01768       }
01769 
01770       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs", 
01771         procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
01772     }
01773     else{
01774       env_->globalBugAssertion(__FILE__, __LINE__, 
01775         "should never get here", 1, COMPLEX_ASSERTION, comm_);
01776     }
01777   }
01778 }
01779 
01780 }  // namespace Zoltan2
01781 
01782 #endif