Zoltan 2 Version 0.5
Zoltan2_AlgPQJagged.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //   Zoltan2: A package of combinatorial algorithms for scientific computing
00006 //                  Copyright 2012 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Karen Devine      (kddevin@sandia.gov)
00039 //                    Erik Boman        (egboman@sandia.gov)
00040 //                    Siva Rajamanickam (srajama@sandia.gov)
00041 //
00042 // ***********************************************************************
00043 //
00044 // @HEADER
00045 
00050 #ifndef _ZOLTAN2_ALGPQJagged_HPP_
00051 #define _ZOLTAN2_ALGPQJagged_HPP_
00052 
00053 #include <Zoltan2_AlgRCB_methods.hpp>
00054 #include <Zoltan2_CoordinateModel.hpp>
00055 #include <Zoltan2_Metric.hpp>             // won't need thiss
00056 
00057 #include <Teuchos_ParameterList.hpp>
00058 
00059 #include <bitset>
00060 
00061 #define EPS_SCALE 1
00062 #define LEAST_SIGNIFICANCE 0.0001
00063 #define SIGNIFICANCE_MUL 1000
00064 //#define INCLUDE_ZOLTAN2_EXPERIMENTAL
00065 #ifdef HAVE_ZOLTAN2_OMP
00066 #include <omp.h>
00067 #endif
00068 //#define FIRST_TOUCH
00069 
00070 //#define BINARYCUTSEARCH
00071 
00072 #define ABS(x) ((x) >= 0 ? (x) : -(x))
00073 
00074 #define LEAF_IMBALANCE_FACTOR 0.1
00075 #define BINARYCUTOFF 32
00076 //imbalance calculation. Wreal / Wexpected - 1
00077 #define imbalanceOf(Wachieved, totalW, expectedRatio) \
00078     (Wachieved) / ((totalW) * (expectedRatio)) - 1
00079 //#define mpi_communication
00080 
00081 namespace Teuchos{
00082 template <typename Ordinal, typename T>
00083 class PQJaggedCombinedReductionOp  : public ValueTypeReductionOp<Ordinal,T>
00084 {
00085 private:
00086   Ordinal numSum_, numMin_1, numMin_2;
00087   Ordinal k;
00088 
00089 public:
00092   PQJaggedCombinedReductionOp ():numSum_(0), numMin_1(0), numMin_2(0), k(0){}
00093 
00100   PQJaggedCombinedReductionOp (Ordinal nsum, Ordinal nmin1, Ordinal nmin2, Ordinal k_):
00101     numSum_(nsum), numMin_1(nmin1), numMin_2(nmin2), k(k_){}
00102 
00105   void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
00106   {
00107     Ordinal next=0;
00108     for(Ordinal ii = 0; ii < k ; ++ii){
00109       for (Ordinal i=0; i < numSum_; i++, next++)
00110         inoutBuffer[next] += inBuffer[next];
00111 
00112       for (Ordinal i=0; i < numMin_1; i++, next++)
00113         if (inoutBuffer[next] > inBuffer[next])
00114           inoutBuffer[next] = inBuffer[next];
00115 
00116       for (Ordinal i=0; i < numMin_2; i++, next++)
00117         if (inoutBuffer[next] > inBuffer[next])
00118           inoutBuffer[next] = inBuffer[next];
00119     }
00120   }
00121 };
00122   
00123   
00124 
00125 
00126   
00127 
00128 
00129 
00130 template <typename Ordinal, typename T>
00131 class PQJaggedCombinedMinMaxTotalReductionOp  : public ValueTypeReductionOp<Ordinal,T>
00132 {
00133 private:
00134   Ordinal numMin, numMax, numTotal;
00135 
00136 public:
00139   PQJaggedCombinedMinMaxTotalReductionOp ():numMin(0), numMax(0), numTotal(0){}
00140 
00147   PQJaggedCombinedMinMaxTotalReductionOp (Ordinal nmin, Ordinal nmax, Ordinal nTotal):
00148     numMin(nmin), numMax(nmax), numTotal(nTotal){}
00149 
00152   void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
00153   {
00154     Ordinal next=0;
00155 
00156     for (Ordinal i=0; i < numMin; i++, next++)
00157       if (inoutBuffer[next] > inBuffer[next])
00158         inoutBuffer[next] = inBuffer[next];
00159 
00160     for (Ordinal i=0; i < numMax; i++, next++)
00161       if (inoutBuffer[next] < inBuffer[next])
00162         inoutBuffer[next] = inBuffer[next];
00163 
00164 
00165     for (Ordinal i=0; i < numTotal; i++, next++)
00166         inoutBuffer[next] += inBuffer[next];
00167   }
00168 };
00169 } // namespace Teuchos
00170 
00171 namespace Zoltan2{
00172 
00173 //diffclock for temporary timing experiments.
00174 
00175 #ifdef mpi_communication
00176   partId_t concurrent = 0;
00177   void sumMinMin(void *in, void *inout, int *count, MPI_Datatype *type) {   
00178     
00179     int k = *count;
00180     int numCut = (k  - 1) / 4;
00181     int total_part_count = numCut * 2 + 1;
00182     int next = 0;
00183     
00184     float *inoutBuffer = (float *) inout;
00185     float *inBuffer = (float *) in;
00186     
00187     
00188     for(partId_t ii = 0; ii < concurrent ; ++ii){
00189     for (long i=0; i < total_part_count; i++, next++)
00190       inoutBuffer[next] += inBuffer[next];
00191     
00192     for (long i=0; i < numCut; i++, next++)
00193       if (inoutBuffer[next] > inBuffer[next])
00194         inoutBuffer[next] = inBuffer[next];
00195     
00196     for (long i=0; i < numCut; i++, next++)
00197       if (inoutBuffer[next] > inBuffer[next])
00198         inoutBuffer[next] = inBuffer[next];
00199     }
00200   }
00201   
00202   
00203   void minMaxSum(void *in, void *inout, int *count, MPI_Datatype *type) {   
00204 //    long k = (*((int *) count));
00205     int k = *count;
00206     int num = k / 3;
00207     int next = 0;
00208     float *inoutBuffer = (float *) inout;
00209     float *inBuffer = (float *) in;
00210     
00211     
00212     for (long i=0; i < num; i++, next++)
00213       if (inoutBuffer[next] > inBuffer[next])
00214         inoutBuffer[next] = inBuffer[next];
00215     for (long i=0; i < num; i++, next++)
00216       if (inoutBuffer[next] < inBuffer[next])
00217         inoutBuffer[next] = inBuffer[next];
00218     for (long i=0; i < num; i++, next++)
00219       inoutBuffer[next] += inBuffer[next];
00220 
00221   }
00222 #endif
00223 
00228 template <typename lno_t, typename partId_t>
00229 class pqJagged_PartVertices{
00230 private:
00231   lno_t *linkedList; //initially filled with -1's.
00232   lno_t *partBegins; //initially filled with -1's.
00233   lno_t *partEnds; //initially filled with -1's.
00234 public:
00235 
00236   //default constructor
00237   pqJagged_PartVertices(){};
00238 
00248   void set(lno_t *linkedList_, lno_t *partBegins_, lno_t *partEnds_){
00249     linkedList = linkedList_;
00250     partBegins = partBegins_;
00251     partEnds = partEnds_;
00252   }
00253 
00254   //user is responsible from providing the correct number of part counts
00261   void inserToPart (partId_t partNo, lno_t coordinateIndex){
00262 
00263     switch (partEnds[partNo]){
00264     case -1: // this means partBegins[partNo] is also -1.
00265       partBegins[partNo] = coordinateIndex;
00266       partEnds[partNo] = coordinateIndex;
00267       break;
00268     default:
00269       linkedList[coordinateIndex] = partBegins[partNo];
00270       partBegins[partNo] = coordinateIndex;
00271       break;
00272     }
00273 
00274 
00275   }
00276 
00280   lno_t *getLinkedList(){ return linkedList;}
00281 
00285   lno_t *getPartBegins(){ return partBegins;}
00289   lno_t *getPartEnds(){ return partEnds;}
00290 
00291 };
00292 
00293 template<typename T>
00294 inline void firstTouch(T *arrayName, size_t arraySize){
00295 #ifdef HAVE_ZOLTAN2_OMP
00296 #pragma omp parallel for
00297 #endif
00298   for(size_t jj = 0; jj < arraySize; ++jj){
00299     arrayName[jj] = 0;
00300   }
00301 }
00302 
00313 template <typename scalar_t>
00314 inline scalar_t pivotPos (scalar_t * cutUpperBounds, scalar_t *cutLowerBounds,size_t currentCutIndex, scalar_t *cutUpperWeight, scalar_t *cutLowerWeight, scalar_t ew){
00315 
00316   if(cutUpperWeight[currentCutIndex] == cutLowerWeight[currentCutIndex]){
00317     return cutLowerBounds[currentCutIndex];
00318   }
00319   return ((cutUpperBounds[currentCutIndex] - cutLowerBounds[currentCutIndex]) /
00320       (cutUpperWeight[currentCutIndex] - cutLowerWeight[currentCutIndex]))  * (ew - cutLowerWeight[currentCutIndex]) + cutLowerBounds[currentCutIndex];
00321 }
00322 
00323 
00342 template <typename T>
00343 void pqJagged_getParameters(const Teuchos::ParameterList &pl, T &imbalanceTolerance,
00344     multiCriteriaNorm &mcnorm, std::bitset<NUM_RCB_PARAMS> &params,  int &numTestCuts, bool &ignoreWeights, bool &allowNonrectelinear, partId_t &concurrentPartCount,
00345     bool &force_binary, bool &force_linear){
00346 
00347   string obj;
00348 
00349   const Teuchos::ParameterEntry *pe = pl.getEntryPtr("partitioning_objective");
00350   if (pe)
00351     obj = pe->getValue(&obj);
00352 
00353   if (!pe){
00354     params.set(rcb_balanceWeight);
00355     mcnorm = normBalanceTotalMaximum;
00356   }
00357   else if (obj == string("balance_object_count")){
00358     params.set(rcb_balanceCount);
00359   }
00360   else if (obj == string("multicriteria_minimize_total_weight")){
00361     params.set(rcb_minTotalWeight);
00362     mcnorm = normMinimizeTotalWeight;
00363   }
00364   else if (obj == string("multicriteria_minimize_maximum_weight")){
00365     params.set(rcb_minMaximumWeight);
00366     mcnorm = normMinimizeMaximumWeight;
00367   }
00368   else if (obj == string("multicriteria_balance_total_maximum")){
00369     params.set(rcb_balanceTotalMaximum);
00370     mcnorm = normBalanceTotalMaximum;
00371   }
00372   else{
00373     params.set(rcb_balanceWeight);
00374     mcnorm = normBalanceTotalMaximum;
00375   }
00376 
00377   imbalanceTolerance = .1;
00378   pe = pl.getEntryPtr("imbalance_tolerance");
00379   if (pe){
00380     double tol;
00381     tol = pe->getValue(&tol);
00382     imbalanceTolerance = tol - 1.0;
00383   }
00384 
00385   if (imbalanceTolerance <= 0)
00386     imbalanceTolerance = 10e-4;
00387 
00388   force_binary = false;
00389   pe = pl.getEntryPtr("force_binary_search");
00390   if (pe){
00391     int val = 0;
00392     val = pe->getValue(&val);
00393     if (val == 1)
00394       force_binary = true;
00395   }
00396 
00397   force_linear = false;
00398   pe = pl.getEntryPtr("force_linear_search");
00399   if (pe){
00400     int val;
00401     val = pe->getValue(&val);
00402     if (val == 1)
00403       force_linear = true;
00404   }
00405 
00406   //TODO: FIX ME.
00407   //double aa = 1;
00408   pe = pl.getEntryPtr("parallel_part_calculation_count");
00409   if (pe)
00410     //aa = pe->getValue(&aa);
00411     concurrentPartCount = pe->getValue(&concurrentPartCount);
00412   
00413   //concurrentPartCount = partId_t(aa);
00414 
00415   int val = 0;
00416   pe = pl.getEntryPtr("average_cuts");
00417   if (pe)
00418     val = pe->getValue(&val);
00419    
00420   if (val == 1)
00421     params.set(rcb_averageCuts);
00422 
00423   val = 0;
00424   pe = pl.getEntryPtr("rectilinear_blocks");
00425   if (pe)
00426     val = pe->getValue(&val);
00427    
00428   if (val == 1){
00429     params.set(rcb_rectilinearBlocks);
00430     allowNonrectelinear = false;
00431   } else {
00432     allowNonrectelinear = true;
00433   }
00434 
00435   numTestCuts = 1;
00436   pe = pl.getEntryPtr("bisection_num_test_cuts");
00437   if (pe)
00438     numTestCuts = pe->getValue(&numTestCuts);
00439     
00440   ignoreWeights = params.test(rcb_balanceCount);
00441 }
00442 
00443 
00455 template <typename Adapter>
00456 void pqJagged_getCoordinateValues( const RCP<const CoordinateModel<
00457     typename Adapter::base_adapter_t> > &coords, int &coordDim,
00458     int &weightDim, size_t &numLocalCoords, global_size_t &numGlobalCoords, int &criteriaDim, const bool &ignoreWeights){
00459 
00460   coordDim = coords->getCoordinateDim();
00461   weightDim = coords->getCoordinateWeightDim();
00462   numLocalCoords = coords->getLocalNumCoordinates();
00463   numGlobalCoords = coords->getGlobalNumCoordinates();
00464   criteriaDim = (weightDim ? weightDim : 1);
00465   if (criteriaDim > 1 && ignoreWeights)
00466     criteriaDim = 1;
00467 }
00468 
00469 
00494 template <typename Adapter, typename scalar_t, typename gno_t>
00495 void pqJagged_getInputValues(
00496     const RCP<const Environment> &env, const RCP<const CoordinateModel<
00497     typename Adapter::base_adapter_t> > &coords,
00498     RCP<PartitioningSolution<Adapter> > &solution,
00499     std::bitset<NUM_RCB_PARAMS> &params,
00500     const int &coordDim,
00501     const int &weightDim,
00502     const size_t &numLocalCoords, size_t &numGlobalParts, int &pqJagged_multiVectorDim,
00503     scalar_t **pqJagged_values, const int &criteriaDim, scalar_t **pqJagged_weights, ArrayView<const gno_t> &pqJagged_gnos, bool &ignoreWeights,
00504     bool *pqJagged_uniformWeights, bool *pqJagged_uniformParts, scalar_t **pqJagged_partSizes
00505 ){
00506   typedef typename Adapter::node_t node_t;
00507   typedef typename Adapter::lno_t lno_t;
00508   typedef StridedData<lno_t, scalar_t> input_t;
00509 
00510   ArrayView<const gno_t> gnos;
00511   ArrayView<input_t>     xyz;
00512   ArrayView<input_t>     wgts;
00513 
00514   coords->getCoordinates(gnos, xyz, wgts);
00515   pqJagged_gnos = gnos;
00516 
00517 
00518   //std::cout << std::endl;
00519 
00520   for (int dim=0; dim < coordDim; dim++){
00521     ArrayRCP<const scalar_t> ar;
00522     xyz[dim].getInputArray(ar);
00523     //pqJagged coordinate values assignment
00524     pqJagged_values[dim] =  (scalar_t *)ar.getRawPtr();
00525   }
00526 
00527   if (weightDim == 0 || ignoreWeights){
00528 
00529     pqJagged_uniformWeights[0] = true;
00530     pqJagged_weights[0] = NULL;
00531   }
00532   else{
00533     for (int wdim = 0; wdim < weightDim; wdim++){
00534       if (wgts[wdim].size() == 0){
00535         pqJagged_uniformWeights[wdim] = true;
00536         pqJagged_weights[wdim] = NULL;
00537       }
00538       else{
00539         ArrayRCP<const scalar_t> ar;
00540         wgts[wdim].getInputArray(ar);
00541         pqJagged_uniformWeights[wdim] = false;
00542         pqJagged_weights[wdim] = (scalar_t *) ar.getRawPtr();
00543       }
00544     }
00545   }
00546 
00548   // From the Solution we get part information.
00549   // If the part sizes for a given criteria are not uniform,
00550   // then they are values that sum to 1.0.
00551 
00552   numGlobalParts = solution->getTargetGlobalNumberOfParts();
00553 
00554   for (int wdim = 0; wdim < criteriaDim; wdim++){
00555     if (solution->criteriaHasUniformPartSizes(wdim)){
00556       pqJagged_uniformParts[wdim] = true;
00557       pqJagged_partSizes[wdim] = NULL;
00558     }
00559     else{
00560       scalar_t *tmp = new scalar_t [numGlobalParts];
00561       env->localMemoryAssertion(__FILE__, __LINE__, numGlobalParts, tmp) ;
00562       for (size_t i=0; i < numGlobalParts; i++){
00563         tmp[i] = solution->getCriteriaPartSize(wdim, i);
00564       }
00565       pqJagged_partSizes[wdim] = tmp;
00566 
00567     }
00568   }
00569 
00570   // It may not be possible to solve the partitioning problem
00571   // if we have multiple weight dimensions with part size
00572   // arrays that differ. So let's be aware of this possibility.
00573 
00574   bool multiplePartSizeSpecs = false;
00575 
00576   if (criteriaDim > 1){
00577     for (int wdim1 = 0; wdim1 < criteriaDim; wdim1++)
00578       for (int wdim2 = wdim1+1; wdim2 < criteriaDim; wdim2++)
00579         if (!solution->criteriaHaveSamePartSizes(wdim1, wdim2)){
00580           multiplePartSizeSpecs = true;
00581           break;
00582         }
00583   }
00584 
00585   if (multiplePartSizeSpecs)
00586     params.set(rcb_multiplePartSizeSpecs);
00587 
00589   // Create the distributed data for the algorithm.
00590   //
00591   // It is a multivector containing one vector for each coordinate
00592   // dimension, plus a vector for each weight dimension that is not
00593   // uniform.
00594 
00595   pqJagged_multiVectorDim = coordDim;
00596   for (int wdim = 0; wdim < criteriaDim; wdim++){
00597     if (!pqJagged_uniformWeights[wdim]) pqJagged_multiVectorDim++;
00598   }
00599 
00600 }
00601 
00602 
00606 template <typename scalar_t, typename gno_t>
00607 void pqJagged_printInput(int coordDim, int weightDim, size_t numLocalCoords, global_size_t numGlobalCoords,
00608     int criteriaDim, scalar_t **pqJagged_values, scalar_t **pqJagged_weights,
00609     bool *pqJagged_uniformParts, bool *pqJagged_uniformWeights, gno_t *pqJagged_gnos,
00610     bool ignoreWeights,size_t numGlobalParts, scalar_t **pqJagged_partSizes){
00611 
00612   std::cout << "numLocalCoords:" << numLocalCoords << std::endl;
00613   std::cout << "coordDim:" << coordDim << std::endl;
00614   for(size_t i = 0; i < numLocalCoords; ++i){
00615     for (int ii = 0; ii < coordDim; ++ii){
00616       std::cout <<  pqJagged_values[ii][i] << " ";
00617     }
00618     std::cout << std::endl;
00619   }
00620 
00621 
00622   std::cout << "criteriaDim:" << criteriaDim << std::endl;
00623   std::cout << "weightDim:" << weightDim << std::endl;
00624   if(weightDim){
00625     for(size_t i = 0; i < numLocalCoords; ++i){
00626       for (int ii = 0; ii < weightDim; ++ii){
00627         std::cout <<  pqJagged_weights[ii][i] << " ";
00628       }
00629       std::cout << std::endl;
00630     }
00631   }
00632 
00633   std::cout << "pqJagged_uniformWeights:" << pqJagged_uniformWeights[0] << std::endl;
00634   for(int i = 0; i < criteriaDim; ++i){
00635     std::cout << pqJagged_uniformWeights[i] << " ";
00636   }
00637   std::cout << std::endl;
00638 
00639 
00640   std::cout << "gnos" << std::endl;
00641   for(size_t i = 0; i < numLocalCoords; ++i){
00642     std::cout <<  pqJagged_gnos[i] << " ";
00643   }
00644   std::cout << std::endl;
00645 
00646   std::cout << "ignoreWeights:" << ignoreWeights << std::endl;
00647 
00648   std::cout << "pqJagged_uniformParts:" << pqJagged_uniformParts[0] << std::endl;
00649   for(int i = 0; i < criteriaDim; ++i){
00650     std::cout << pqJagged_uniformParts[i] << " ";
00651   }
00652   std::cout << std::endl;
00653 
00654   std::cout << "pqJagged_partSizes:" << std::endl;
00655   std::cout << "numGlobalParts:" << numGlobalParts << std::endl;
00656   for(int i = 0; i < criteriaDim; ++i){
00657     if(!pqJagged_uniformParts[i])
00658       for(size_t ii = 0; ii < numGlobalParts; ++ii){
00659         std::cout << pqJagged_partSizes[i][ii] << " ";
00660       }
00661     std::cout << std::endl;
00662   }
00663 }
00664 
00665 
00669 int pqJagged_getNumThreads(){
00670   int numThreads = 1;
00671 
00672 
00673 #ifdef HAVE_ZOLTAN2_OMP
00674 #pragma omp parallel shared(numThreads)
00675   {
00676     numThreads = omp_get_num_threads();
00677   }
00678 
00679 #endif
00680 
00681 
00682   return numThreads;
00683 
00684 }
00685 
00686 
00707 template <typename scalar_t, typename lno_t>
00708 void pqJagged_getLocalMinMaxTotalCoord(
00709     lno_t *partitionedPointPermutations,
00710     scalar_t *pqJagged_coordinates,
00711     bool pqJagged_uniformWeights,
00712     scalar_t *pqJagged_weights,
00713 
00714 
00715     int numThreads,
00716     lno_t coordinateBegin,
00717     lno_t coordinateEnd,
00718 
00719     scalar_t *max_min_array /*sized nothreads * 2*/,
00720     scalar_t maxScalar,
00721     scalar_t minScalar,
00722     scalar_t &minCoordinate,
00723     scalar_t &maxCoordinate,
00724     scalar_t &totalWeight
00725 
00726     ){
00727 
00728 
00729   if(coordinateBegin >= coordinateEnd)
00730   {
00731     minCoordinate = maxScalar;
00732     maxCoordinate = minScalar;
00733     totalWeight = 0;
00734   }
00735   else {
00736     scalar_t mytotalWeight = 0;
00737 #ifdef HAVE_ZOLTAN2_OMP
00738 #pragma omp parallel
00739 #endif
00740     {
00741       if (pqJagged_uniformWeights) {
00742   #ifdef HAVE_ZOLTAN2_OMP
00743   #pragma omp single
00744   #endif
00745         {
00746           mytotalWeight = coordinateEnd - coordinateBegin;
00747         }
00748       } else {
00749   #ifdef HAVE_ZOLTAN2_OMP
00750   #pragma omp for reduction(+:mytotalWeight)
00751   #endif
00752         for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
00753           int i = partitionedPointPermutations[ii];
00754           mytotalWeight += pqJagged_weights[i];
00755         }
00756       }
00757 
00758       int myId = 0;
00759 #ifdef HAVE_ZOLTAN2_OMP
00760       myId = omp_get_thread_num();
00761 #endif
00762       scalar_t myMin, myMax;
00763 
00764       size_t j = partitionedPointPermutations[coordinateBegin];
00765       myMin = myMax = pqJagged_coordinates[j];
00766 
00767 #ifdef HAVE_ZOLTAN2_OMP
00768 #pragma omp for
00769 #endif
00770       for(lno_t j = coordinateBegin + 1; j < coordinateEnd; ++j){
00771         int i = partitionedPointPermutations[j];
00772         if(pqJagged_coordinates[i] > myMax) myMax = pqJagged_coordinates[i];
00773         if(pqJagged_coordinates[i] < myMin) myMin = pqJagged_coordinates[i];
00774       }
00775       max_min_array[myId] = myMin;
00776       max_min_array[myId + numThreads] = myMax;
00777 
00778 #ifdef HAVE_ZOLTAN2_OMP
00779 #pragma omp barrier
00780 #endif
00781 
00782 #ifdef HAVE_ZOLTAN2_OMP
00783 #pragma omp single nowait
00784 #endif
00785       {
00786         minCoordinate = max_min_array[0];
00787         for(int i = 1; i < numThreads; ++i){
00788           if(max_min_array[i] < minCoordinate) minCoordinate = max_min_array[i];
00789         }
00790       }
00791 #ifdef HAVE_ZOLTAN2_OMP
00792 #pragma omp single nowait
00793 #endif
00794       {
00795         maxCoordinate = max_min_array[numThreads];
00796         for(int i = numThreads + 1; i < numThreads * 2; ++i){
00797           if(max_min_array[i] > maxCoordinate) maxCoordinate = max_min_array[i];
00798         }
00799       }
00800     }
00801     totalWeight = mytotalWeight;
00802   }
00803 }
00804 
00805 
00806   
00816 template <typename scalar_t>
00817 void pqJagged_getGlobalMinMaxTotalCoord(
00818     RCP<Comm<int> > &comm,
00819     const RCP<const Environment> &env,
00820     partId_t concurrentPartCount,
00821     scalar_t *localMinMaxTotal,
00822     scalar_t *globalMinMaxTotal){
00823 
00824 
00825   //reduce min for first concurrentPartCount elements, reduce max for next concurrentPartCount elements,
00826   //reduce sum for the last concurrentPartCount elements.
00827   if(comm->getSize()  > 1){
00828     
00829 #ifndef mpi_communication
00830   Teuchos::PQJaggedCombinedMinMaxTotalReductionOp<int, scalar_t> reductionOp(
00831      concurrentPartCount,
00832      concurrentPartCount,
00833      concurrentPartCount);
00834 #endif
00835     
00836 #ifdef mpi_communication
00837     MPI_Op myop;
00838     MPI_Op_create(minMaxSum, 0, &myop);   /* step 3 */
00839 #endif 
00840 
00841   try{
00842 
00843     
00844 #ifdef mpi_communication
00845 
00846     MPI_Allreduce(localMinMaxTotal, globalMinMaxTotal, 3 * concurrentPartCount, MPI_FLOAT, myop,MPI_COMM_WORLD);  
00847 #endif 
00848 #ifndef mpi_communication
00849     reduceAll<int, scalar_t>(*comm, reductionOp,
00850         3 * concurrentPartCount, localMinMaxTotal, globalMinMaxTotal
00851       );
00852 #endif
00853     
00854   }
00855   Z2_THROW_OUTSIDE_ERROR(*env)
00856   }
00857   else {
00858     partId_t s = 3 * concurrentPartCount;
00859     for (partId_t i = 0; i < s; ++i){
00860       globalMinMaxTotal[i] = localMinMaxTotal[i];
00861     }
00862   }
00863 }
00864 
00865 
00876 template <typename scalar_t>
00877 void pqJagged_getCutCoord_Weights(
00878     scalar_t minCoordinate,
00879     scalar_t maxCoordinate,
00880     bool pqJagged_uniformParts,
00881     scalar_t *pqJagged_partSizes /*p sized, weight ratios of each part*/,
00882     partId_t noCuts/*p-1*/ ,
00883     scalar_t *cutCoordinates /*p - 1 sized, coordinate of each cut line*/,
00884     scalar_t *cutPartRatios /*cumulative weight ratios, at left side of each cut line. p-1 sized*/,
00885     int numThreads){
00886 
00887   scalar_t coordinateRange = maxCoordinate - minCoordinate;
00888   if(pqJagged_uniformParts){
00889     scalar_t uniform = 1. / (noCuts + 1);
00890     scalar_t slice = uniform * coordinateRange;
00891 
00892 #ifdef HAVE_ZOLTAN2_OMP
00893 #pragma omp parallel
00894 #endif
00895     {
00896       partId_t myStart = 0;
00897       partId_t myEnd = noCuts;
00898 #ifdef HAVE_ZOLTAN2_OMP
00899 #pragma omp for
00900 #endif
00901       for(partId_t i = myStart; i < myEnd; ++i){
00902         cutPartRatios[i] =  uniform * (i + 1);
00903         cutCoordinates[i] = minCoordinate + slice * (i + 1);
00904       }
00905       cutPartRatios[noCuts] = 1;
00906     }
00907 
00908     
00909   }
00910   else {
00911     //TODO fix here!!
00912     cutPartRatios[0] = pqJagged_partSizes[0];
00913     cutCoordinates[0] = coordinateRange * cutPartRatios[0];
00914     for(partId_t i = 1; i < noCuts; ++i){
00915       cutPartRatios[i] = pqJagged_partSizes[i] + cutPartRatios[i - 1];
00916       cutCoordinates[i] = coordinateRange * cutPartRatios[i];
00917     }
00918   }
00919 }
00920 
00921 
00957 template <typename scalar_t>
00958 void getNewCoordinates(
00959     const RCP<const Environment> &env,
00960     RCP<Comm<int> > &comm,
00961     const size_t &total_part_count,
00962     const partId_t &noCuts,
00963     const scalar_t &maxCoordinate,
00964     const scalar_t &minCoordinate,
00965     const scalar_t &globalTotalWeight,
00966     const scalar_t &imbalanceTolerance,
00967     scalar_t maxScalar,
00968 
00969     const scalar_t * globalPartWeights,
00970     const scalar_t * localPartWeights,
00971     const scalar_t *targetPartWeightRatios,
00972     bool *isDone,
00973 
00974     scalar_t *cutCoordinates,
00975     scalar_t *cutUpperBounds,
00976     scalar_t *cutLowerBounds,
00977     scalar_t *leftClosestDistance,
00978     scalar_t *rightClosestDistance,
00979     scalar_t * cutLowerWeight,
00980     scalar_t * cutUpperWeight,
00981     scalar_t *newCutCoordinates,
00982 
00983     bool allowNonRectelinearPart,
00984     float *nonRectelinearPartRatios,
00985     partId_t *rectelinearCutCount,
00986     scalar_t *localCutWeights,
00987     scalar_t *globalCutWeights,
00988 
00989     partId_t &myNoneDoneCount
00990 )
00991     {
00992 
00993 
00994   scalar_t seenW = 0;
00995   float expected = 0;
00996   scalar_t leftImbalance = 0, rightImbalance = 0;
00997   scalar_t _EPSILON = numeric_limits<scalar_t>::epsilon();
00998   //scalar_t _EPSILON = numeric_limits<float>::epsilon();
00999 
01000 #ifdef HAVE_ZOLTAN2_OMP
01001 #pragma omp for
01002 #endif
01003   for (partId_t i = 0; i < noCuts; i++){
01004 
01005     //if a left and right closes point is not found, set the distance to 0.
01006     if(leftClosestDistance[i] == maxScalar)
01007       leftClosestDistance[i] = 0;
01008     if(rightClosestDistance[i] == maxScalar)
01009       rightClosestDistance[i] = 0;
01010 
01011   }
01012 
01013 #ifdef HAVE_ZOLTAN2_OMP
01014 #pragma omp for
01015 #endif
01016   for (partId_t i = 0; i < noCuts; i++){
01017     globalCutWeights[i] = 0;
01018     localCutWeights[i] = 0;
01019     //if already determined at previous iterations, do nothing.
01020     if(isDone[i]) {
01021       newCutCoordinates[i] = cutCoordinates[i];
01022       continue;
01023     }
01024     //current weight of the part at the left of the cut line.
01025     seenW = globalPartWeights[i * 2];
01026 
01027 
01028     //expected ratio
01029     expected = targetPartWeightRatios[i];
01030     leftImbalance = imbalanceOf(seenW, globalTotalWeight, expected);
01031     rightImbalance = imbalanceOf(globalTotalWeight - seenW, globalTotalWeight, 1 - expected);
01032 
01033     bool isLeftValid = ABS(leftImbalance) - imbalanceTolerance < _EPSILON ;
01034     bool isRightValid = ABS(rightImbalance) - imbalanceTolerance < _EPSILON;
01035 
01036     //if the cut line reaches to desired imbalance.
01037     if(isLeftValid && isRightValid){
01038 
01039       isDone[i] = true;
01040 #ifdef HAVE_ZOLTAN2_OMP
01041 #pragma omp atomic
01042 #endif
01043       myNoneDoneCount -= 1;
01044       newCutCoordinates [i] = cutCoordinates[i];
01045       continue;
01046     }
01047     else if(leftImbalance < 0){
01048 
01049       scalar_t ew = globalTotalWeight * expected;
01050       if(allowNonRectelinearPart){
01051 
01052         if (globalPartWeights[i * 2 + 1] == ew){
01053           isDone[i] = true;
01054 #ifdef HAVE_ZOLTAN2_OMP
01055 #pragma omp atomic
01056 #endif
01057           myNoneDoneCount -= 1;
01058           newCutCoordinates [i] = cutCoordinates[i];
01059           nonRectelinearPartRatios[i] = 1;
01060           continue;
01061         }
01062         else if (globalPartWeights[i * 2 + 1] > ew){
01063           isDone[i] = true;
01064 #ifdef HAVE_ZOLTAN2_OMP
01065 #pragma omp atomic
01066 #endif
01067           *rectelinearCutCount += 1;
01068 
01069 #ifdef HAVE_ZOLTAN2_OMP
01070 #pragma omp atomic
01071 #endif
01072           myNoneDoneCount -= 1;
01073           newCutCoordinates [i] = cutCoordinates[i];
01074           scalar_t myWeightOnLine = localPartWeights[i * 2 + 1] - localPartWeights[i * 2];
01075           localCutWeights[i] = myWeightOnLine;
01076           continue;
01077         }
01078       }
01079       //when moving right, set lower bound to current line.
01080       cutLowerBounds[i] = cutCoordinates[i] + rightClosestDistance[i];
01081       cutLowerWeight[i] = seenW;
01082 
01083       //compare the upper bound with the current lines.
01084       for (partId_t ii = i + 1; ii < noCuts ; ++ii){
01085         scalar_t pw = globalPartWeights[ii * 2];
01086         scalar_t lw = globalPartWeights[ii * 2 + 1];
01087         if(pw >= ew){
01088           if(pw == ew){
01089             cutUpperBounds[i] = cutCoordinates[ii];
01090             cutUpperWeight[i] = pw;
01091             cutLowerBounds[i] = cutCoordinates[ii];
01092             cutLowerWeight[i] = pw;
01093           } else if (pw < cutUpperWeight[i]){
01094             //if a cut line is more strict than the current upper bound,
01095             //update the upper bound.
01096             cutUpperBounds[i] = cutCoordinates[ii] - leftClosestDistance[ii];
01097             cutUpperWeight[i] = pw;
01098           }
01099           break;
01100         }
01101         //if comes here then pw < ew
01102         if(lw >= ew){
01103           cutUpperBounds[i] = cutCoordinates[ii];
01104           cutUpperWeight[i] = lw;
01105           cutLowerBounds[i] = cutCoordinates[ii];
01106           cutLowerWeight[i] = pw;
01107           break;
01108         }
01109         //if a stricter lower bound is found,
01110         //update the lower bound.
01111         if (pw <= ew && pw >= cutLowerWeight[i]){
01112           cutLowerBounds[i] = cutCoordinates[ii] + rightClosestDistance[ii] ;
01113           cutLowerWeight[i] = pw;
01114         }
01115       }
01116       scalar_t newPivot = pivotPos<scalar_t> (cutUpperBounds, cutLowerBounds,i, cutUpperWeight, cutLowerWeight, ew);
01117       //if cut line does not move significantly.
01118       if (ABS(cutCoordinates[i] - newPivot) < _EPSILON * EPS_SCALE || cutUpperBounds[i] < cutLowerBounds[i]){
01119         isDone[i] = true;
01120 #ifdef HAVE_ZOLTAN2_OMP
01121 #pragma omp atomic
01122 #endif
01123         myNoneDoneCount -= 1;
01124         newCutCoordinates [i] = cutCoordinates[i];
01125       } else {
01126         newCutCoordinates [i] = newPivot;
01127       }
01128     } else {
01129       //moving to left.
01130       scalar_t ew = globalTotalWeight * expected;
01131       //moving left, set upper to current line.
01132       cutUpperBounds[i] = cutCoordinates[i] - leftClosestDistance[i];
01133       cutUpperWeight[i] = seenW;
01134 
01135       // compare the current cut line weights with previous upper and lower bounds.
01136       for (int ii = i - 1; ii >= 0; --ii){
01137         scalar_t pw = globalPartWeights[ii * 2];
01138         scalar_t lw = globalPartWeights[ii * 2 + 1];
01139         if(pw <= ew){
01140           if(pw == ew){
01141             cutUpperBounds[i] = cutCoordinates[ii];
01142             cutUpperWeight[i] = pw;
01143             cutLowerBounds[i] = cutCoordinates[ii];
01144             cutLowerWeight[i] = pw;
01145           }
01146           else if (pw > cutLowerWeight[i]){
01147             cutLowerBounds[i] = cutCoordinates[ii] + rightClosestDistance[ii];
01148             cutLowerWeight[i] = pw;
01149             if(lw > ew){
01150               cutUpperBounds[i] = cutCoordinates[ii] + rightClosestDistance[ii];
01151 
01152               cutUpperWeight[i] = lw;
01153             }
01154           }
01155           break;
01156         }
01157         if (pw >= ew && (pw < cutUpperWeight[i] || (pw == cutUpperWeight[i] && cutUpperBounds[i] > cutCoordinates[ii] - leftClosestDistance[ii]))){
01158           cutUpperBounds[i] = cutCoordinates[ii] - leftClosestDistance[ii] ;
01159 
01160           cutUpperWeight[i] = pw;
01161         }
01162       }
01163 
01164       scalar_t newPivot = pivotPos<scalar_t> (cutUpperBounds, cutLowerBounds,i, cutUpperWeight, cutLowerWeight, ew);
01165       //if cut line does not move significantly.
01166       if (ABS(cutCoordinates[i] - newPivot) < _EPSILON * EPS_SCALE  || cutUpperBounds[i] < cutLowerBounds[i]){
01167         isDone[i] = true;
01168 
01169 #ifdef HAVE_ZOLTAN2_OMP
01170 #pragma omp atomic
01171 #endif
01172         myNoneDoneCount -= 1;
01173         newCutCoordinates [ i] = cutCoordinates[i];
01174       } else {
01175         newCutCoordinates [ i] = newPivot;
01176       }
01177     }
01178   }
01179 
01180 
01181 
01182   //communication to determine the ratios of processors for the distribution of coordinates on the cut lines.
01183 #ifdef HAVE_ZOLTAN2_OMP
01184 //#pragma omp barrier
01185 #pragma omp single
01186 #endif
01187   {
01188     if(*rectelinearCutCount > 0){
01189       try{
01190         Teuchos::scan<int,scalar_t>(
01191             *comm, Teuchos::REDUCE_SUM,
01192             noCuts,localCutWeights, globalCutWeights
01193         );
01194       }
01195       Z2_THROW_OUTSIDE_ERROR(*env)
01196 
01197 
01198       for (partId_t i = 0; i < noCuts; ++i){
01199         //cout << "gw:" << globalCutWeights[i] << endl;
01200         if(globalCutWeights[i] > 0) {
01201           scalar_t expected = targetPartWeightRatios[i];
01202           scalar_t ew = globalTotalWeight * expected;
01203           scalar_t expectedWeightOnLine = ew - globalPartWeights[i * 2];
01204           scalar_t myWeightOnLine = localCutWeights[i];
01205           scalar_t weightOnLineBefore = globalCutWeights[i];
01206           scalar_t incMe = expectedWeightOnLine - weightOnLineBefore;
01207           scalar_t mine = incMe + myWeightOnLine;
01208           if(mine < 0){
01209             nonRectelinearPartRatios[i] = 0;
01210           }
01211           else if(mine >= myWeightOnLine){
01212             nonRectelinearPartRatios[i] = 1;
01213           }
01214           else {
01215             nonRectelinearPartRatios[i] = mine / myWeightOnLine;
01216           }
01217         }
01218       }
01219       *rectelinearCutCount = 0;
01220     }
01221   }
01222 }
01223 
01224 
01255 template <typename scalar_t, typename lno_t>
01256 void pqJagged_1DPart_getPartWeights(
01257     size_t total_part_count,
01258     partId_t noCuts,
01259     scalar_t maxScalar,
01260     scalar_t _EPSILON,
01261     int numThreads,
01262     lno_t coordinateBegin,
01263     lno_t coordinateEnd,
01264     lno_t *partitionedPointPermutations,
01265     scalar_t *pqJagged_coordinates,
01266     bool pqJagged_uniformWeights,
01267     scalar_t *pqJagged_weights,
01268 
01269     scalar_t *cutCoordinates_tmp, //TODO change name
01270     bool *isDone,
01271     double *myPartWeights,
01272     scalar_t *myLeftClosest,
01273     scalar_t *myRightClosest,
01274     bool useBinarySearch,
01275     partId_t *partIds
01276 ){
01277 
01278   // initializations for part weights, left/right closest
01279     for (size_t i = 0; i < total_part_count; ++i){
01280       myPartWeights[i] = 0;
01281     }
01282 
01283 
01284   for(partId_t i = 0; i < noCuts; ++i){
01285     //if(isDone[i]) continue;
01286     myLeftClosest[i] = maxScalar;
01287     myRightClosest[i] = maxScalar;
01288   }
01289   //cout << "iter:" << endl;
01290   if(useBinarySearch){
01291 
01292     //lno_t comparison_count = 0;
01293     scalar_t minus_EPSILON = -_EPSILON;
01294 #ifdef HAVE_ZOLTAN2_OMP
01295 #pragma omp for
01296 #endif
01297     for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
01298       int i = partitionedPointPermutations[ii];
01299       //partId_t j = (uc + lc) / 2;
01300       partId_t j = partIds[i] / 2;
01301 
01302       if(j >= noCuts){
01303         j = noCuts - 1;
01304       }
01305 
01306       partId_t lc = 0;
01307       partId_t uc = noCuts - 1;
01308       scalar_t w = pqJagged_uniformWeights? 1:pqJagged_weights[i];
01309       bool isInserted = false;
01310       bool onLeft = false;
01311       bool onRight = false;
01312       partId_t lastPart = -1;
01313       while(uc >= lc)
01314       {
01315       //comparison_count++;
01316         lastPart = -1;
01317         onLeft = false;
01318         onRight = false;
01319         scalar_t coord = pqJagged_coordinates[i];
01320         scalar_t cut = cutCoordinates_tmp[j];
01321         scalar_t distance = coord - cut;
01322         scalar_t absdistance = ABS(distance);
01323 
01324         if(absdistance < _EPSILON){
01325 
01326           myPartWeights[j * 2 + 1] += w;
01327           partIds[i] = j * 2 + 1;
01328 
01329           myLeftClosest[j] = 0;
01330           myRightClosest[j] = 0;
01331           partId_t kk = j + 1;
01332           while(kk < noCuts){  // Needed when cuts shared the same position
01333                                // kddnote Can this loop be disabled for RECTILINEAR BLOCKS?
01334                                // kddnote Mehmet says it is probably needed anyway.
01335             scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01336             if(distance < _EPSILON){
01337               myPartWeights[2 * kk + 1] += w;
01338 
01339               myLeftClosest[kk] = 0;
01340               myRightClosest[kk] = 0;
01341               kk++;
01342             }
01343             else{
01344               if(myLeftClosest[kk] > distance){
01345                 myLeftClosest[kk] = distance;
01346               }
01347               break;
01348             }
01349           }
01350 
01351           kk = j - 1;
01352           while(kk >= 0){
01353             scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01354             if(distance < _EPSILON){
01355               myPartWeights[2 * kk + 1] += w;
01356 
01357               myLeftClosest[kk] = 0;
01358               myRightClosest[kk] = 0;
01359               kk--;
01360             }
01361             else{
01362               if(myRightClosest[kk] > distance){
01363                 myRightClosest[kk] = distance;
01364               }
01365               break;
01366             }
01367           }
01368           isInserted = true;
01369           break;
01370         }
01371         else {
01372           if (distance < 0) {
01373 
01374             //TODO fix abs
01375             distance = absdistance;
01376             if (myLeftClosest[j] > distance){
01377               myLeftClosest[j] = distance;
01378             }
01379             bool _break = false;
01380             if(j > 0){
01381               scalar_t distance = coord - cutCoordinates_tmp[j - 1];
01382               if(distance > _EPSILON){
01383                 if (myRightClosest[j - 1] > distance){
01384                   myRightClosest[j - 1] = distance;
01385                 }
01386                 _break = true;
01387               } else if(distance < minus_EPSILON){
01388                 distance = -distance;
01389                 if (myLeftClosest[j - 1] > distance){
01390                   myLeftClosest[j - 1] = distance;
01391                 }
01392               }
01393               else {
01394                 myLeftClosest[j - 1] = 0;
01395                 myRightClosest[j - 1 ] = 0;
01396               }
01397             }
01398             uc = j - 1;
01399             onLeft = true;
01400             lastPart = j;
01401             if(_break) break;
01402           }
01403           else {
01404             if (myRightClosest[j] > distance){
01405               myRightClosest[j] = distance;
01406             }
01407             bool _break = false;
01408             if(j < noCuts - 1){
01409               scalar_t distance = coord - cutCoordinates_tmp[j + 1];
01410               if(distance > _EPSILON){
01411                 if (myRightClosest[j + 1] > distance){
01412                   myRightClosest[j + 1] = distance;
01413                 }
01414               } else if(distance < minus_EPSILON){
01415                 distance = -distance;
01416                 if (myLeftClosest[j + 1] > distance){
01417                   myLeftClosest[j + 1] = distance;
01418                 }
01419                 _break = true;
01420               }
01421               else {
01422                 myLeftClosest[j + 1] = 0;
01423                 myRightClosest[j + 1 ] = 0;
01424               }
01425             }
01426             lc = j + 1;
01427             onRight = true;
01428             lastPart = j;
01429             if(_break) break;
01430           }
01431         }
01432         
01433         j = (uc + lc) / 2;
01434       }
01435       if(!isInserted){
01436         if(onRight){
01437           myPartWeights[2 * lastPart + 2] += w;
01438           partIds[i] = 2 * lastPart + 2;
01439         }
01440         else if(onLeft){
01441           myPartWeights[2 * lastPart] += w;
01442           partIds[i] = 2 * lastPart;
01443         }
01444       }
01445     }
01446 /*
01447     //cout << "comp:" << comparison_count << " size:" << coordinateEnd- coordinateBegin << endl;
01448     // prefix sum computation.
01449     for (size_t i = 1; i < total_part_count; ++i){
01450       // if check for cuts sharing the same position; all cuts sharing a position
01451       // have the same weight == total weight for all cuts sharing the position.  
01452       // don't want to accumulate that total weight more than once.
01453       if(i % 2 == 0 && i > 1 && i < total_part_count - 1 && 
01454          ABS(cutCoordinates_tmp[i / 2] - cutCoordinates_tmp[i /2 - 1]) < _EPSILON){
01455         myPartWeights[i] = myPartWeights[i-2];
01456         continue;
01457       }
01458       myPartWeights[i] += myPartWeights[i-1];
01459     }
01460     
01461 */
01462   }
01463   else {
01464 
01465     //lno_t comp = 0;
01466 #ifdef HAVE_ZOLTAN2_OMP
01467 #pragma omp for
01468 #endif
01469     for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
01470       int i = partitionedPointPermutations[ii];
01471 
01472       scalar_t w = pqJagged_uniformWeights ? 1 : pqJagged_weights[i];
01473 
01474       scalar_t coord = pqJagged_coordinates[i];
01475       
01476       partId_t j = partIds[i] / 2;
01477 
01478       if(j >= noCuts){
01479         j = noCuts - 1;
01480       }
01481       scalar_t cut = cutCoordinates_tmp[j];
01482       scalar_t distance = coord - cut;
01483       scalar_t absdistance = ABS(distance);
01484 
01485       //comp++;
01486       if(absdistance < _EPSILON){
01487         myPartWeights[j * 2 + 1] += w;
01488         //cout << "1to part:" << 2*j+1 << " coord:" << coord << endl;  
01489         myLeftClosest[j] = 0;
01490         myRightClosest[j] = 0;
01491         partIds[i] = j * 2 + 1;
01492         
01493         //bas
01494         partId_t kk = j + 1;
01495         while(kk < noCuts){  // Needed when cuts shared the same position
01496           // kddnote Can this loop be disabled for RECTILINEAR BLOCKS?
01497           // kddnote Mehmet says it is probably needed anyway.
01498           scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01499           if(distance < _EPSILON){
01500             //cout << "yo" << endl;
01501             myPartWeights[2 * kk + 1] += w;
01502             
01503             //cout << "2to part:" << 2*kk+1 << " coord:" << coord << endl;
01504             myLeftClosest[kk] = 0;
01505             myRightClosest[kk] = 0;
01506             kk++;
01507           }
01508           else{
01509             if(myLeftClosest[kk] > distance){
01510               myLeftClosest[kk] = distance;
01511             }
01512             break;
01513           }
01514         }
01515         
01516         kk = j - 1;
01517         while(kk >= 0){
01518           scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01519           if(distance < _EPSILON){
01520             myPartWeights[2 * kk + 1] += w;
01521             //cout << "3to part:" << 2*kk+1 << " coord:" << coord << endl;
01522             
01523             myLeftClosest[kk] = 0;
01524             myRightClosest[kk] = 0;
01525             kk--;
01526           }
01527           else{
01528             if(myRightClosest[kk] > distance){
01529               myRightClosest[kk] = distance;
01530             }
01531             break;
01532           }
01533         }
01534       }
01535       else if (distance < 0) {
01536         while((absdistance > _EPSILON) && distance < 0){
01537           //comp++;
01538           if (myLeftClosest[j] > absdistance){
01539             myLeftClosest[j] = absdistance;
01540           }
01541           --j;
01542           if(j  < 0) break;
01543           distance = coord - cutCoordinates_tmp[j];
01544           
01545           absdistance = ABS(distance);
01546           }
01547         if(absdistance < _EPSILON)
01548         {
01549           myPartWeights[j * 2 + 1] += w;
01550           myLeftClosest[j] = 0;
01551           myRightClosest[j] = 0;
01552           cut = cutCoordinates_tmp[j];
01553           //cout << "4to part:" << 2*j+1 <<" j:" << j <<  " coord:" << coord << endl;
01554           //cout << "cut:" << cutCoordinates_tmp[j] << " dis:" << distance << endl;
01555           partIds[i] = j * 2 + 1;
01556           
01557           partId_t kk = j + 1;
01558           while(kk < noCuts){  // Needed when cuts shared the same position
01559             // kddnote Can this loop be disabled for RECTILINEAR BLOCKS?
01560             // kddnote Mehmet says it is probably needed anyway.
01561             scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01562             //cout << "distance:" << distance << endl;
01563             if(distance < _EPSILON){
01564               myPartWeights[2 * kk + 1] += w;
01565               //cout << "5to part:" << 2*kk+1 << " kk:" << kk << " coord:" << coord << endl;
01566               //cout << "cut:" << cutCoordinates_tmp[kk] << " dis:" << distance << endl;
01567               myLeftClosest[kk] = 0;
01568               myRightClosest[kk] = 0;
01569               kk++;
01570             }
01571             else{
01572               if(myLeftClosest[kk] > distance){
01573                 myLeftClosest[kk] = distance;
01574               }
01575               break;
01576             }
01577           }
01578           
01579           kk = j - 1;
01580           while(kk >= 0){
01581             scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01582             if(distance < _EPSILON){
01583               myPartWeights[2 * kk + 1] += w;
01584               //cout << "6to part:" << 2*kk+1 << " coord:" << coord << endl;
01585               //cout << "cut:" << cutCoordinates_tmp[kk] << " dis:" << distance << endl;
01586               
01587               myLeftClosest[kk] = 0;
01588               myRightClosest[kk] = 0;
01589               kk--;
01590             }
01591             else{
01592               if(myRightClosest[kk] > distance){
01593                 myRightClosest[kk] = distance;
01594               }
01595               break;
01596             }
01597           }
01598         }
01599         else {
01600           myPartWeights[j * 2 + 2] += w;
01601           if (j >= 0 && myRightClosest[j] > absdistance){
01602             myRightClosest[j] = absdistance;
01603           }
01604           partIds[i] = j * 2 + 2;
01605         }
01606         
01607       }
01608       //if it is on the left
01609       else {
01610 
01611         while((absdistance > _EPSILON) && distance > 0){
01612           //comp++;
01613             if ( myRightClosest[j] > absdistance){
01614               myRightClosest[j] = absdistance;
01615             }
01616           ++j;
01617           if(j  >= noCuts) break;
01618           distance = coord - cutCoordinates_tmp[j];
01619           absdistance = ABS(distance);
01620           }
01621         
01622         
01623         
01624         if(absdistance < _EPSILON)
01625         {
01626           myPartWeights[j * 2 + 1] += w;
01627           myLeftClosest[j] = 0;
01628           myRightClosest[j] = 0;
01629           partIds[i] = j * 2 + 1;
01630           cut = cutCoordinates_tmp[j];  
01631           partId_t kk = j + 1;
01632           while(kk < noCuts){  // Needed when cuts shared the same position
01633             // kddnote Can this loop be disabled for RECTILINEAR BLOCKS?
01634             // kddnote Mehmet says it is probably needed anyway.
01635             scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01636             if(distance < _EPSILON){
01637               myPartWeights[2 * kk + 1] += w;
01638               myLeftClosest[kk] = 0;
01639               myRightClosest[kk] = 0;
01640               kk++;
01641             }
01642             else{
01643               if(myLeftClosest[kk] > distance){
01644                 myLeftClosest[kk] = distance;
01645               }
01646               break;
01647             }
01648           }
01649           
01650           kk = j - 1;
01651           while(kk >= 0){
01652             scalar_t distance =ABS(cutCoordinates_tmp[kk] - cut);
01653             if(distance < _EPSILON){
01654               myPartWeights[2 * kk + 1] += w;
01655               
01656               myLeftClosest[kk] = 0;
01657               myRightClosest[kk] = 0;
01658               kk--;
01659             }
01660             else{
01661               if(myRightClosest[kk] > distance){
01662                 myRightClosest[kk] = distance;
01663               }
01664               break;
01665             }
01666           }
01667         }
01668         else {
01669         myPartWeights[j * 2] += w;
01670         if (j < noCuts && myLeftClosest[j] > absdistance){
01671           myLeftClosest[j] = absdistance;
01672         }
01673         partIds[i] = j * 2 ;
01674         }
01675       }
01676     }
01677 
01678     /*
01679 #ifdef HAVE_ZOLTAN2_OMP
01680 #pragma omp for
01681 #endif
01682     for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
01683       int i = partitionedPointPermutations[ii];
01684 
01685       scalar_t w = pqJagged_uniformWeights? 1:pqJagged_weights[i];
01686       //get a coordinate and compare it with cut lines from left to right.
01687       for(partId_t j = 0; j < noCuts; ++j){
01688 
01689         if(isDone[j]) continue;
01690         scalar_t distance = pqJagged_coordinates[i] - cutCoordinates_tmp[j];
01691         scalar_t absdistance = ABS(distance);
01692         if(absdistance < _EPSILON){
01693           myPartWeights[j * 2] -= w;
01694           //myPartWeights[j * 2] += w;
01695           myLeftClosest[j] = 0;
01696           myRightClosest[j] = 0;
01697           //break;
01698         }
01699         else
01700           if (distance < 0) {
01701             distance = -distance;
01702             if (myLeftClosest[j] > distance){
01703               myLeftClosest[j] = distance;
01704             }
01705             break;
01706           }
01707         //if it is on the left
01708           else {
01709             //if on the right, continue with the next line.
01710             myPartWeights[j * 2] -= w;
01711             myPartWeights[j * 2 + 1] -= w;
01712             //myPartWeights[j * 2] += w;
01713             //myPartWeights[j * 2 + 1] += w;
01714 
01715             if ( myRightClosest[j] > distance){
01716               myRightClosest[j] = distance;
01717             }
01718           }
01719       }
01720     }
01721     */
01722 
01723   }
01724 
01725     //cout << "comp:" << comp <<endl;
01726     // prefix sum computation.
01727     for (size_t i = 1; i < total_part_count; ++i){
01728       // if check for cuts sharing the same position; all cuts sharing a position
01729       // have the same weight == total weight for all cuts sharing the position.
01730       // don't want to accumulate that total weight more than once.
01731       if(i % 2 == 0 && i > 1 && i < total_part_count - 1 &&
01732          ABS(cutCoordinates_tmp[i / 2] - cutCoordinates_tmp[i /2 - 1]) < _EPSILON){
01733         myPartWeights[i] = myPartWeights[i-2];
01734         continue;
01735       }
01736       myPartWeights[i] += myPartWeights[i-1];
01737       //cout << "p:" << "i:" << i<< " :" <<myPartWeights[i]  << endl;
01738     }
01739   /*
01740   for (size_t i = 0; i < total_part_count; ++i){
01741     cout << "p:" << i << ":" << myPartWeights[i] << endl;
01742   }
01743    */
01744  
01745 
01746 }
01747 
01748 
01763 template <typename scalar_t>
01764 void accumulateThreadResults(
01765     partId_t noCuts,
01766     size_t total_part_count,
01767     partId_t concurrentPartCount,
01768     int numThreads,
01769     bool *isDone,
01770     scalar_t **leftClosestDistance,
01771     scalar_t **rightClosestDistance,
01772     double **partWeights,
01773     scalar_t *localMinMaxTotal,
01774     scalar_t *totalPartWeights_leftClosest_rightCloset//,
01775     //bool useBinarySearch
01776     ){
01777 #ifdef HAVE_ZOLTAN2_OMP
01778 #pragma omp for
01779 #endif
01780   for(partId_t i = 0; i < noCuts * concurrentPartCount; ++i){
01781     if(isDone[i]) continue;
01782     scalar_t minl = leftClosestDistance[0][i], minr = rightClosestDistance[0][i];
01783 
01784     for (int j = 1; j < numThreads; ++j){
01785       if (rightClosestDistance[j][i] < minr ){
01786         minr = rightClosestDistance[j][i];
01787       }
01788       if (leftClosestDistance[j][i] < minl ){
01789         minl = leftClosestDistance[j][i];
01790       }
01791     }
01792     size_t ishift = i % noCuts + (i / noCuts) * (total_part_count + 2 * noCuts);
01793     totalPartWeights_leftClosest_rightCloset[total_part_count + ishift] = minl;
01794     totalPartWeights_leftClosest_rightCloset[total_part_count + noCuts + ishift] = minr;
01795   }
01796 
01797  // if(1 || useBinarySearch){
01798 #ifdef HAVE_ZOLTAN2_OMP
01799 #pragma omp for
01800 #endif
01801     for(size_t j = 0; j < total_part_count * concurrentPartCount; ++j){
01802       size_t actualCutInd = (j % total_part_count);
01803       partId_t cutInd = actualCutInd / 2 + (j / total_part_count) * noCuts;
01804 
01805       if(actualCutInd !=  total_part_count - 1 && isDone[cutInd]) continue;
01806       double pwj = 0;
01807       for (int i = 0; i < numThreads; ++i){
01808         pwj += partWeights[i][j];
01809       }
01810       size_t jshift = j % total_part_count + (j / total_part_count) * (total_part_count + 2 * noCuts);
01811       totalPartWeights_leftClosest_rightCloset[jshift] = pwj;
01812     }
01813 /*
01814   } else {
01815 #ifdef HAVE_ZOLTAN2_OMP
01816 #pragma omp for
01817 #endif
01818     for(size_t j = 0; j < total_part_count * concurrentPartCount; ++j){
01819       size_t actualCutInd = (j % total_part_count);
01820       partId_t cutInd = actualCutInd / 2 + (j / total_part_count) * noCuts;
01821 
01822       if(actualCutInd !=  total_part_count - 1 && isDone[cutInd]) continue;
01823       double pwj = 0;
01824       for (int i = 0; i < numThreads; ++i){
01825         pwj += partWeights[i][j];
01826       }
01827       scalar_t totalWeight = localMinMaxTotal[j / total_part_count + concurrentPartCount * 2];
01828       size_t jshift = j % total_part_count + (j / total_part_count) * (total_part_count + 2 * noCuts);
01829       totalPartWeights_leftClosest_rightCloset[jshift] = totalWeight + pwj;
01830 
01831     }
01832   }
01833 */
01834 }
01835 
01836 
01837 
01838 
01884 template <typename scalar_t, typename lno_t>
01885 void pqJagged_1D_Partition(
01886     const RCP<const Environment> &env,
01887     RCP<Comm<int> > &comm,
01888 
01889     lno_t *partitionedPointPermutations,
01890     scalar_t *pqJagged_coordinates,
01891     bool pqJagged_uniformWeights,
01892     scalar_t *pqJagged_weights,
01893 
01894     scalar_t *targetPartWeightRatios,   // the weight ratios at left side of the cuts. last is 1.
01895     scalar_t *globalMinMaxTotal,
01896     scalar_t *localMinMaxTotal,
01897 
01898     partId_t partNo,
01899     int numThreads,
01900     scalar_t maxScalar,
01901     scalar_t minScalar,
01902     scalar_t imbalanceTolerance,
01903     partId_t currentPartBeginIndex,
01904     partId_t concurrentPartCount,
01905     lno_t *inTotalCounts,
01906 
01907     scalar_t *cutCoordinates,
01908     scalar_t *cutCoordinatesWork,   // work array to manipulate coordinate of cutlines in different iterations.
01909     scalar_t **leftClosestDistance,
01910     scalar_t **rightClosestDistance,
01911     scalar_t *cutUpperBounds,  //to determine the next cut line with binary search
01912     scalar_t *cutLowerBounds,  //to determine the next cut line with binary search
01913     scalar_t *cutUpperWeight,   //to determine the next cut line with binary search
01914     scalar_t *cutLowerWeight,  //to determine the next cut line with binary search
01915     bool *isDone,
01916     double **partWeights,
01917     scalar_t *local_totalPartWeights_leftClosest_rightCloset,
01918     scalar_t *global_totalPartWeights_leftClosest_rightCloset,
01919     bool allowNonRectelinearPart,
01920     float *nonRectelinearPartRatios,
01921     scalar_t *localCutWeights,
01922     scalar_t *globalCutWeights,
01923 
01924     partId_t allDone,
01925     partId_t *myNonDoneCounts,
01926     bool useBinarySearch,
01927 //    string dimension,
01928     partId_t * partIds
01929 ){
01930 
01931   partId_t recteLinearCutCount = 0;
01932   scalar_t *cutCoordinates_tmp = cutCoordinates;
01933   partId_t noCuts = partNo - 1;
01934   size_t total_part_count = partNo + size_t (noCuts) ;
01935   
01936   
01937 #ifdef mpi_communication
01938   MPI_Op myop;
01939   MPI_Op_create(sumMinMin, 0, &myop);   /* step 3 */
01940 #endif 
01941   
01942 
01943   scalar_t _EPSILON = numeric_limits<scalar_t>::epsilon();
01944 
01945 
01946 //  env->timerStart(MACRO_TIMERS, "PQJagged creation_" + dimension);
01947   Teuchos::PQJaggedCombinedReductionOp<int, scalar_t> reductionOp(
01948       total_part_count , noCuts  , noCuts , concurrentPartCount);
01949 
01950  // env->timerStop(MACRO_TIMERS, "PQJagged creation_" + dimension);
01951 
01952 #ifdef HAVE_ZOLTAN2_OMP
01953 #pragma omp parallel shared(allDone,  recteLinearCutCount)
01954 #endif
01955   {
01956     //env->timerStart(MACRO_TIMERS, "PQJagged thread_init_" + dimension);
01957     int me = 0;
01958 #ifdef HAVE_ZOLTAN2_OMP
01959     me = omp_get_thread_num();
01960 #endif
01961     double *myPartWeights = partWeights[me];
01962     scalar_t *myLeftClosest = leftClosestDistance[me];
01963     scalar_t *myRightClosest = rightClosestDistance[me];
01964 
01965 #ifdef HAVE_ZOLTAN2_OMP
01966 #pragma omp for
01967 #endif
01968     for(partId_t i = 0; i < noCuts * concurrentPartCount; ++i){
01969       isDone[i] = false;
01970       partId_t ind = i / noCuts;
01971       cutLowerBounds[i] = globalMinMaxTotal[ind];
01972       cutUpperBounds[i] = globalMinMaxTotal[ind + concurrentPartCount];
01973       cutUpperWeight[i] = globalMinMaxTotal[ind + 2 * concurrentPartCount];
01974       cutLowerWeight[i] = 0;
01975       if(allowNonRectelinearPart){
01976         nonRectelinearPartRatios[i] = 0;
01977       }
01978     }
01979 
01980     //env->timerStop(MACRO_TIMERS, "PQJagged thread_init_" + dimension);
01981 
01982 
01983 #ifdef HAVE_ZOLTAN2_OMP
01984 #pragma omp barrier
01985 #endif
01986     int iteration = 0;
01987     while (allDone != 0){
01988       iteration += 1;
01989 /*
01990       if(comm->getRank() == 0)
01991         {
01992 #pragma omp single
01993           { cout << endl << endl << "allDone:" << allDone << endl;
01994 
01995             for (size_t i = 0; i < noCuts * concurrentPartCount; ++i){
01996 
01997               if(isDone[i] == false)
01998                 cout << "i:" << i <<  " c:" << cutCoordinates_tmp[i] << " u:" << cutUpperBounds[i] << " l:" << cutLowerBounds[i] <<
01999                 " uw:" << cutUpperWeight[i] << " lw:" << cutLowerWeight[i] << " not done" << endl;
02000               else cout << "i:" << i <<  " c:" << cutCoordinates_tmp[i] <<  " done" << endl; }
02001 
02002           }
02003         }
02004 */
02005 
02006 /*
02007 #pragma omp single
02008       {
02009       env->timerStart(MACRO_TIMERS, "PQJagged 1D_" + dimension);
02010       }
02011       */
02012       for (partId_t kk = 0; kk < concurrentPartCount; ++kk){
02013         if (/*globalMinMax[kk] > globalMinMax[kk + k] ||*/ myNonDoneCounts[kk] == 0) continue;
02014         partId_t cutShift = noCuts * kk;
02015         size_t totalPartShift = total_part_count * kk;
02016 
02017         bool *currentDone = isDone + cutShift;
02018         double *myCurrentPartWeights = myPartWeights + totalPartShift;
02019         scalar_t *myCurrentLeftClosest = myLeftClosest + cutShift;
02020         scalar_t *myCurrentRightClosest = myRightClosest + cutShift;
02021 
02022         partId_t current = currentPartBeginIndex + kk;
02023         lno_t coordinateBegin = current ==0 ? 0: inTotalCounts[current -1];
02024         lno_t coordinateEnd = inTotalCounts[current];
02025         scalar_t *cutCoordinates_ = cutCoordinates_tmp + kk * noCuts;
02026 
02027         // compute part weights using existing cuts
02028         pqJagged_1DPart_getPartWeights<scalar_t, lno_t>(
02029             total_part_count,
02030             noCuts,
02031             maxScalar,
02032             _EPSILON,
02033             numThreads,
02034             coordinateBegin,
02035             coordinateEnd,
02036             partitionedPointPermutations,
02037             pqJagged_coordinates,
02038             pqJagged_uniformWeights,
02039             pqJagged_weights,
02040             cutCoordinates_,
02041             currentDone,
02042             myCurrentPartWeights,
02043             myCurrentLeftClosest,
02044             myCurrentRightClosest,
02045             useBinarySearch,
02046             partIds);
02047       }
02048       /*
02049 #pragma omp single
02050       {
02051       env->timerStop(MACRO_TIMERS, "PQJagged 1D_" + dimension);
02052       env->timerStart(MACRO_TIMERS, "PQJagged 1D_accumulation_" + dimension);
02053       }
02054       */
02055       accumulateThreadResults<scalar_t>(
02056           noCuts, total_part_count, concurrentPartCount,
02057           numThreads, isDone,
02058           leftClosestDistance, rightClosestDistance, partWeights,
02059           localMinMaxTotal,
02060           local_totalPartWeights_leftClosest_rightCloset//,
02061 //          useBinarySearch
02062       );
02063       /*
02064 #pragma omp single
02065       {
02066       env->timerStop(MACRO_TIMERS, "PQJagged 1D_accumulation_" + dimension);
02067 
02068       env->timerStart(MACRO_TIMERS, "PQJagged Communication_" + dimension);
02069       }
02070       */
02071 #ifdef HAVE_ZOLTAN2_OMP
02072 #pragma omp single
02073 #endif
02074       {
02075         if(comm->getSize() > 1){
02076         try{
02077    
02078        
02079             
02080 #ifdef mpi_communication
02081             
02082             MPI_Allreduce(local_totalPartWeights_leftClosest_rightCloset, global_totalPartWeights_leftClosest_rightCloset, 
02083                           
02084                           (total_part_count + 2 * noCuts) * concurrentPartCount, MPI_FLOAT, myop,MPI_COMM_WORLD);  
02085 #endif 
02086 #ifndef mpi_communication
02087             
02088           reduceAll<int, scalar_t>(*comm, reductionOp,
02089               (total_part_count + 2 * noCuts) * concurrentPartCount,
02090               local_totalPartWeights_leftClosest_rightCloset,
02091               global_totalPartWeights_leftClosest_rightCloset
02092           );
02093 #endif
02094 
02095           //cout << "reducing" << endl;
02096           /*
02097           MPI_Allreduce (
02098               local_totalPartWeights_leftClosest_rightCloset,
02099               global_totalPartWeights_leftClosest_rightCloset,
02100               total_part_count ,
02101               MPI_DOUBLE,
02102               MPI_SUM,
02103               MPI_COMM_WORLD);
02104 
02105           MPI_Allreduce (
02106               local_totalPartWeights_leftClosest_rightCloset + total_part_count ,
02107               global_totalPartWeights_leftClosest_rightCloset + total_part_count ,
02108               noCuts,
02109               MPI_DOUBLE,
02110               MPI_MIN,
02111               MPI_COMM_WORLD);
02112 
02113           MPI_Allreduce (
02114               local_totalPartWeights_leftClosest_rightCloset + total_part_count + noCuts,
02115               global_totalPartWeights_leftClosest_rightCloset + total_part_count + noCuts,
02116               noCuts,
02117               MPI_DOUBLE,
02118               MPI_MIN,
02119               MPI_COMM_WORLD);
02120         */
02121         }
02122 
02123         Z2_THROW_OUTSIDE_ERROR(*env)
02124         }
02125         else {
02126           size_t s = (total_part_count + 2 * noCuts) * concurrentPartCount;
02127           for(size_t i = 0; i < s; ++i){
02128             global_totalPartWeights_leftClosest_rightCloset[i] = local_totalPartWeights_leftClosest_rightCloset[i];
02129           }
02130         }
02131       }
02132       /*
02133 #pragma omp single
02134       {
02135       env->timerStop(MACRO_TIMERS, "PQJagged Communication_" + dimension);
02136 
02137       env->timerStart(MACRO_TIMERS, "PQJagged new_cut_calculation_" + dimension);
02138       }
02139       */
02140       for (partId_t kk = 0; kk < concurrentPartCount; ++kk){
02141 
02142 
02143         if (/*globalMinMax[kk] > globalMinMax[kk + k] || */myNonDoneCounts[kk] == 0) continue;
02144         partId_t cutShift = noCuts * kk;
02145         size_t tlrShift = (total_part_count + 2 * noCuts) * kk;
02146 
02147         scalar_t *localPartWeights = local_totalPartWeights_leftClosest_rightCloset  + tlrShift ;
02148         scalar_t *gtlr = global_totalPartWeights_leftClosest_rightCloset + tlrShift;
02149         scalar_t *glc = gtlr + total_part_count; //left closest points
02150         scalar_t *grc = gtlr + total_part_count + noCuts; //right closest points
02151         scalar_t *globalPartWeights = gtlr;
02152         bool *currentDone = isDone + cutShift;
02153         scalar_t *currentTargetPartWeightRatios = targetPartWeightRatios + (noCuts + 1) * kk;
02154         float *currentnonRectelinearPartRatios = nonRectelinearPartRatios + cutShift;
02155 
02156         scalar_t minCoordinate = globalMinMaxTotal[kk];
02157         scalar_t maxCoordinate = globalMinMaxTotal[kk + concurrentPartCount];
02158         scalar_t globalTotalWeight = globalMinMaxTotal[kk + concurrentPartCount * 2];
02159         scalar_t *currentcutLowerWeight = cutLowerWeight + cutShift;
02160         scalar_t *currentcutUpperWeight = cutUpperWeight + cutShift;
02161         scalar_t *currentcutUpperBounds = cutUpperBounds + cutShift;
02162         scalar_t *currentcutLowerBounds = cutLowerBounds + cutShift;
02163 
02164         partId_t prevDoneCount = myNonDoneCounts[kk];
02165         // Now compute the new cut coordinates.
02166         getNewCoordinates<scalar_t>(
02167             env, comm,
02168             total_part_count, noCuts, maxCoordinate, minCoordinate, globalTotalWeight, imbalanceTolerance, maxScalar,
02169             globalPartWeights, localPartWeights, currentTargetPartWeightRatios, currentDone,
02170 
02171             cutCoordinates_tmp + cutShift, currentcutUpperBounds, currentcutLowerBounds,
02172             glc, grc, currentcutLowerWeight, currentcutUpperWeight,
02173             cutCoordinatesWork +cutShift, //new cut coordinates
02174 
02175             allowNonRectelinearPart,
02176             currentnonRectelinearPartRatios,
02177             &recteLinearCutCount,
02178             localCutWeights,
02179             globalCutWeights,
02180             myNonDoneCounts[kk]
02181         );
02182 
02183         partId_t reduction = prevDoneCount - myNonDoneCounts[kk];
02184 #ifdef HAVE_ZOLTAN2_OMP
02185 #pragma omp single
02186 #endif
02187         {
02188           allDone -= reduction;
02189         }
02190       }
02191       /*
02192 #pragma omp single
02193       {
02194       env->timerStop(MACRO_TIMERS, "PQJagged new_cut_calculation_" + dimension);
02195       }
02196       */
02197 #ifdef HAVE_ZOLTAN2_OMP
02198 #pragma omp single
02199 #endif
02200       {
02201       scalar_t *t = cutCoordinates_tmp;
02202       cutCoordinates_tmp = cutCoordinatesWork;
02203       cutCoordinatesWork = t;
02204       }
02205     }
02206 /*
02207     if(comm->getRank() == 0)
02208       if(me == 0) cout << "it:" << iteration << endl;
02209 */
02210     // Needed only if keep_cuts; otherwise can simply swap array pointers 
02211     // cutCoordinates and cutCoordinatesWork.  
02212     // (at first iteration, cutCoordinates == cutCoorindates_tmp).
02213     // computed cuts must be in cutCoordinates.
02214     if (cutCoordinates != cutCoordinates_tmp){
02215 #ifdef HAVE_ZOLTAN2_OMP
02216 #pragma omp for
02217 #endif
02218       for(partId_t i = 0; i < noCuts *concurrentPartCount; ++i){
02219         cutCoordinates[i] = cutCoordinates_tmp[i];
02220       }
02221 
02222 #ifdef HAVE_ZOLTAN2_OMP
02223 #pragma omp single
02224 #endif
02225       {
02226         cutCoordinatesWork = cutCoordinates_tmp;
02227       }
02228     }
02229 
02230   }
02231 }
02232 
02233 
02234 
02235 
02236 
02262 template <typename lno_t, typename scalar_t>
02263 void getChunksFromCoordinates(
02264     partId_t partNo,
02265     int noThreads,
02266 
02267     lno_t *partitionedPointPermutations,
02268     scalar_t *pqJagged_coordinates,
02269     bool pqJagged_uniformWeights,
02270     scalar_t *coordWeights,
02271     scalar_t *cutCoordinates,
02272     lno_t coordinateBegin,
02273     lno_t coordinateEnd,
02274 
02275     bool allowNonRectelinearPart,
02276     float *actual_ratios,
02277     scalar_t *localPartWeights,
02278     double **partWeights,
02279     float **nonRectelinearRatios,
02280 
02281     //lno_t *coordinate_linked_list,
02282     //lno_t **coordinate_starts,
02283     //lno_t **coordinate_ends,
02284     lno_t ** partPointCounts,
02285 
02286     lno_t *newpartitionedPointPermutations,
02287     lno_t *totalCounts,
02288     partId_t *partIds //,
02289     //bool useBinarySearch
02290     ){
02291 
02292   //lno_t numCoordsInPart =  coordinateEnd - coordinateBegin;
02293   partId_t noCuts = partNo - 1;
02294   //size_t total_part_count = noCuts + partNo;
02295   scalar_t _EPSILON = numeric_limits<scalar_t>::epsilon();
02296 
02297 #ifdef HAVE_ZOLTAN2_OMP
02298 #pragma omp parallel
02299 #endif
02300   {
02301     int me = 0;
02302 #ifdef HAVE_ZOLTAN2_OMP
02303     me = omp_get_thread_num();
02304 #endif
02305 
02306     //lno_t *myStarts = coordinate_starts[me];
02307     //lno_t *myEnds = coordinate_ends[me];
02308     lno_t *myPartPointCounts = partPointCounts[me];
02309     float *myRatios = NULL;
02310     if (allowNonRectelinearPart){
02311 
02312 
02313       myRatios = nonRectelinearRatios[me];
02314 #ifdef HAVE_ZOLTAN2_OMP
02315 #pragma omp for
02316 #endif
02317       for (partId_t i = 0; i < noCuts; ++i){
02318         float r = actual_ratios[i];
02319         
02320         //cout << "real i:" << i << " :" << r << " " << endl;
02321         scalar_t leftWeight = r * (localPartWeights[i * 2 + 1] - localPartWeights[i * 2]);
02322         for(int ii = 0; ii < noThreads; ++ii){
02323           if(leftWeight > _EPSILON){
02324 
02325             scalar_t ithWeight = partWeights[ii][i * 2 + 1] - partWeights[ii][i * 2 ];
02326             if(ithWeight < leftWeight){
02327               nonRectelinearRatios[ii][i] = ithWeight;
02328             }
02329             else {
02330               nonRectelinearRatios[ii][i] = leftWeight ;
02331             }
02332             leftWeight -= ithWeight;
02333           }
02334           else {
02335             nonRectelinearRatios[ii][i] = 0;
02336           }
02337         }
02338       }
02339 
02340 
02341       if(noCuts > 0){
02342         for (partId_t i = noCuts - 1; i > 0 ; --i){          if(ABS(cutCoordinates[i] - cutCoordinates[i -1]) < _EPSILON){
02343             myRatios[i] -= myRatios[i - 1] ;
02344           }
02345           //cout << "i:" << i << " :" << myRatios[i] << " ";
02346           myRatios[i] = int ((myRatios[i] + LEAST_SIGNIFICANCE) * SIGNIFICANCE_MUL) / scalar_t(SIGNIFICANCE_MUL);
02347         }
02348       }
02349       /*
02350       
02351       for (partId_t i = 0; i < noCuts; ++i){
02352       cout << "r i:" << i << " :" <<  myRatios[i] << " " << endl;
02353       }
02354        */
02355        
02356     }
02357 
02358     for(partId_t ii = 0; ii < partNo; ++ii){
02359       myPartPointCounts[ii] = 0;
02360     }
02361     
02362     
02363 #ifdef HAVE_ZOLTAN2_OMP
02364 #pragma omp barrier
02365 #pragma omp for
02366 #endif
02367       for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
02368         
02369         lno_t i = partitionedPointPermutations[ii];
02370         partId_t pp = partIds[i];
02371         partId_t p = pp / 2;
02372         if(pp % 2 == 1){
02373           //cout << "on: " << pp << ":" << p << " cut:" << p << endl;
02374           
02375           if(allowNonRectelinearPart && myRatios[p] > _EPSILON * EPS_SCALE){
02376             //cout << "p:" << p << endl;
02377             scalar_t w = pqJagged_uniformWeights? 1:coordWeights[i];
02378             myRatios[p] -= w;
02379             if(myRatios[p] < 0 && p < noCuts - 1 && ABS(cutCoordinates[p+1] - cutCoordinates[p]) < _EPSILON){
02380               myRatios[p + 1] += myRatios[p];
02381             }
02382             ++myPartPointCounts[p];
02383             partIds[i] = p;
02384           }
02385           else{
02386             //scalar_t currentCut = cutCoordinates[p];
02387             //TODO:currently cannot divide 1 line more than 2 parts.
02388             //bug cannot be divided, therefore this part should change.
02389             //cout << "p:" << p+1 << endl;
02390             ++myPartPointCounts[p + 1];    
02391             partIds[i] = p + 1;
02392           }
02393       }
02394       else {
02395         ++myPartPointCounts[p];
02396         
02397         partIds[i] = p;
02398       }
02399     }
02400     
02401     
02402 
02403 #ifdef HAVE_ZOLTAN2_OMP
02404 #pragma omp for
02405 #endif
02406     for(partId_t j = 0; j < partNo; ++j){
02407       lno_t pwj = 0;//, prev = 0;
02408       //if(j == 0) prev = 0;
02409       //else prev = totalCounts[j - 1];
02410       //lno_t prev2 = prev;
02411       for (int i = 0; i < noThreads; ++i){
02412         lno_t threadPartPointCount = partPointCounts[i][j];
02413         partPointCounts[i][j] = pwj;
02414         pwj += threadPartPointCount;
02415       
02416         }
02417       totalCounts[j] = pwj;// + prev2; //+ coordinateBegin;
02418     }
02419     
02420 #ifdef HAVE_ZOLTAN2_OMP
02421 #pragma omp single
02422 #endif
02423     {
02424       for(partId_t j = 1; j < partNo; ++j){
02425         totalCounts[j] += totalCounts[j - 1];
02426       }
02427     }
02428 
02429     for(partId_t j = 1; j < partNo; ++j){
02430       myPartPointCounts[j] += totalCounts[j - 1] ;
02431     }
02432     
02433     
02434 #ifdef HAVE_ZOLTAN2_OMP
02435 #pragma omp for
02436 #endif
02437     for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
02438       lno_t i = partitionedPointPermutations[ii];
02439       partId_t p =  partIds[i];
02440       //cout << "p:" << p << " my:" << myPartPointCounts[p] << " begin:" << coordinateBegin << endl; 
02441       newpartitionedPointPermutations[coordinateBegin + myPartPointCounts[p]++] = i;
02442     }
02443     
02444     /*
02445     pqJagged_PartVertices <lno_t, partId_t> pqPV;
02446     pqPV.set(coordinate_linked_list, myStarts, myEnds);
02447     memset(myPartPointCounts, 0, sizeof(lno_t) * partNo);
02448 
02449 
02450 #ifdef HAVE_ZOLTAN2_OMP
02451 #pragma omp for
02452 #endif
02453     for (lno_t i = coordinateBegin; i < coordinateEnd; ++i){
02454       int ii = partitionedPointPermutations[i];
02455       coordinate_linked_list[ii] = -1;
02456     }
02457 
02458     for (partId_t i = 0; i < partNo; ++i){
02459       myEnds[i] = -1;
02460       myStarts[i] = -1;
02461     }
02462 
02463 
02464     //determine part of each point
02465     if(1 ||useBinarySearch){
02466 
02467 #ifdef HAVE_ZOLTAN2_OMP
02468 #pragma omp for
02469 #endif
02470       for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
02471 
02472         int i = partitionedPointPermutations[ii];
02473         partId_t lc = 0;
02474         partId_t uc = noCuts - 1;
02475 
02476         //bool isInserted = false;
02477         bool onLeft = false;
02478         bool onRight = false;
02479         partId_t lastPart = -1;
02480         while(uc >= lc)
02481         {
02482           lastPart = -1;
02483           onLeft = false;
02484           onRight = false;
02485           partId_t j = (uc + lc) / 2;
02486           scalar_t distance = pqJagged_coordinates[i] - cutCoordinates[j];
02487           scalar_t absdistance = ABS(distance);
02488 
02489           if(allowNonRectelinearPart  && absdistance < _EPSILON ){
02490             scalar_t w = pqJagged_uniformWeights? 1:coordWeights[i];
02491             partId_t jj = j -1;
02492             while(jj >= 0 && ABS(cutCoordinates[jj] - cutCoordinates[j]) < _EPSILON){
02493               --jj;
02494             }
02495             ++jj;
02496             for (;jj < noCuts && ABS(cutCoordinates[jj] - cutCoordinates[j]) < _EPSILON; ++jj){
02497               if(myRatios[jj] > _EPSILON * EPS_SCALE ){
02498                 myRatios[jj] -= w;
02499                 if(myRatios[jj] < 0 && jj < noCuts - 1 && ABS(cutCoordinates[jj+1] - cutCoordinates[jj]) < _EPSILON){
02500                   myRatios[jj + 1] += myRatios[jj];
02501                 }
02502                 onLeft = true;
02503                 lastPart = jj;
02504                 break;
02505               }
02506             }
02507             if(!onLeft){
02508               onRight= true;
02509               lastPart = jj -1;
02510             }
02511 
02512             break;
02513           }
02514           else {
02515             if (distance < 0) {
02516               uc = j - 1;
02517               onLeft = true;
02518               lastPart = j;
02519             }
02520             else {
02521               lc = j + 1;
02522               onRight = true;
02523               lastPart = j;
02524             }
02525           }
02526         }
02527         if(onRight){
02528           pqPV.inserToPart(lastPart + 1, i);
02529           myPartPointCounts[lastPart + 1] +=  1;
02530         }
02531         else if(onLeft){
02532           pqPV.inserToPart(lastPart , i);
02533           myPartPointCounts[lastPart] +=  1;
02534         }
02535       }
02536       for(partId_t i = 1; i < partNo; ++i){
02537         myPartPointCounts[i] += myPartPointCounts[i-1];
02538       }
02539     } else {
02540 #ifdef HAVE_ZOLTAN2_OMP
02541 #pragma omp for
02542 #endif
02543       for (lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
02544 
02545         lno_t i = partitionedPointPermutations[ii];
02546 
02547         bool inserted = false;
02548         for(partId_t j = 0; j < noCuts; ++j){
02549           scalar_t distance = pqJagged_coordinates[i] - cutCoordinates[j];
02550           scalar_t absdistance = ABS(distance);
02551 
02552           if (allowNonRectelinearPart && myRatios[j] > _EPSILON * EPS_SCALE && absdistance < _EPSILON ){
02553             scalar_t w = pqJagged_uniformWeights? 1:coordWeights[i];
02554             scalar_t decrease = w;
02555             myRatios[j] -= decrease;
02556 
02557             if(myRatios[j] < 0 && j < noCuts - 1 && ABS(cutCoordinates[j+1] - cutCoordinates[j]) < _EPSILON){
02558               myRatios[j + 1] += myRatios[j];
02559             }
02560             inserted = true;
02561             pqPV.inserToPart(j, i);
02562             break;
02563           }
02564           else if (distance < 0 && absdistance > _EPSILON){
02565             pqPV.inserToPart(j, i);
02566             inserted = true;
02567             break;
02568           }
02569           else {
02570             myPartPointCounts[j] -=  1;
02571           }
02572 
02573         }
02574         if(!inserted){
02575           pqPV.inserToPart(noCuts, i);
02576         }
02577       }
02578     }
02579     //accumulate the starts of each thread.
02580 
02581 #ifdef HAVE_ZOLTAN2_OMP
02582 #pragma omp for
02583 #endif
02584     for(partId_t i = 0; i < partNo; ++i){
02585       int head = 0, tail = 0;
02586       while(head < noThreads && coordinate_ends[head][i] == -1){
02587         ++head;
02588       }
02589       int firstHead = head;
02590       for(int j = head; j < noThreads; ){
02591         tail = j+1;
02592         bool foundTail = false;
02593         while(tail < noThreads){
02594           if(coordinate_starts[tail][i] == -1){
02595             ++tail;
02596           }
02597           else {
02598             foundTail = true;
02599             break;
02600           }
02601         }
02602         if(foundTail){
02603           coordinate_linked_list[coordinate_ends[head][i]] = coordinate_starts[tail][i];
02604           head = tail;
02605         }
02606         j = tail;
02607 
02608       }
02609       coordinate_starts[0][i] = firstHead >= noThreads ? -1:coordinate_starts[firstHead][i];
02610     }
02611      
02612 
02613     if(1 || useBinarySearch){
02614       //accumulate the count.
02615 #ifdef HAVE_ZOLTAN2_OMP
02616 #pragma omp for
02617 #endif
02618       for(partId_t j = 0; j < partNo; ++j){
02619         lno_t pwj = 0;
02620         for (int i = 0; i < noThreads; ++i){
02621           pwj += partPointCounts[i][j];
02622         }
02623         totalCounts[j] = pwj;
02624       }
02625     } else {
02626 #ifdef HAVE_ZOLTAN2_OMP
02627 #pragma omp for
02628 #endif
02629       for(partId_t j = 0; j < partNo; ++j){
02630         lno_t pwj = 0;
02631         for (int i = 0; i < noThreads; ++i){
02632           pwj += partPointCounts[i][j];
02633         }
02634         totalCounts[j] = pwj + numCoordsInPart;
02635       }
02636     }
02637 */
02638 
02639 /*
02640 #ifdef HAVE_ZOLTAN2_OMP
02641 #pragma omp for
02642 #endif
02643     for(partId_t i = 0; i < partNo; ++i){
02644       lno_t nextPoint = coordinate_starts[0][i];
02645       lno_t pcnt = 0;
02646 
02647       lno_t prevCount = coordinateBegin;
02648       if (i > 0) prevCount = totalCounts[i -1] + coordinateBegin;
02649 
02650       while(nextPoint != -1){
02651         newpartitionedPointPermutations[prevCount + pcnt++] = nextPoint;
02652         nextPoint = coordinate_linked_list[nextPoint];
02653       }
02654     }
02655     */
02656   }
02657 }
02658 
02659 
02660 template <typename T>
02661 T *allocMemory(size_t size){
02662   if (size > 0){
02663     T * a = new T[size];
02664     if (a == NULL) {
02665       throw  "cannot allocate memory";
02666     }
02667     return a;
02668   }
02669   else {
02670     return NULL;
02671   }
02672 }
02673 
02674 template <typename T>
02675 void freeArray(T *&array){
02676   if(array != NULL){
02677     delete [] array;
02678     array = NULL;
02679   }
02680 }
02681 
02682 template <typename tt>
02683 std::string toString(tt obj){
02684   std::stringstream ss (std::stringstream::in |std::stringstream::out);
02685   ss << obj;
02686   std::string tmp = "";
02687   ss >> tmp;
02688   return tmp;
02689 }
02690 
02691 
02692 
02693 
02704 template <typename Adapter>
02705 void AlgPQJagged(
02706     const RCP<const Environment> &env,
02707     RCP<Comm<int> > &comm,
02708     const RCP<const CoordinateModel<
02709     typename Adapter::base_adapter_t> > &coords,
02710     RCP<PartitioningSolution<Adapter> > &solution
02711 )
02712 {
02713 #ifndef INCLUDE_ZOLTAN2_EXPERIMENTAL
02714 
02715   Z2_THROW_EXPERIMENTAL("Zoltan2 PQJagged is strictly experimental software "
02716                         "while it is being developed and tested.")
02717 
02718 #else
02719 /*
02720  *   typedef typename Adapter::scalar_t scalar_t;
02721  *     typedef typename Adapter::gno_t gno_t;
02722  *       typedef typename Adapter::lno_t lno_t;
02723   if(comm->getRank() == 0){
02724     cout << "size of gno:" << sizeof(gno_t) << endl;
02725     cout << "size of lno:" << sizeof(lno_t) << endl;
02726     cout << "size of scalar_t:" << sizeof(scalar_t) << endl;
02727   }
02728  */
02729   env->timerStart(MACRO_TIMERS, "PQJagged Total");
02730 
02731 
02732   env->timerStart(MACRO_TIMERS, "PQJagged Problem_Init");
02733   typedef typename Adapter::scalar_t scalar_t;
02734   typedef typename Adapter::gno_t gno_t;
02735 
02736   typedef typename Adapter::lno_t lno_t;
02737   const Teuchos::ParameterList &pl = env->getParameters();
02738 
02739   std::bitset<NUM_RCB_PARAMS> params;
02740   int numTestCuts = 5;
02741   scalar_t imbalanceTolerance;
02742 
02743   multiCriteriaNorm mcnorm;
02744   bool ignoreWeights=false;
02745 
02746   bool allowNonRectelinearPart = false;
02747   int concurrentPartCount = 0;
02748   bool force_binary = false, force_linear = false;
02749   pqJagged_getParameters<scalar_t>(pl, imbalanceTolerance, mcnorm, params, numTestCuts, ignoreWeights,allowNonRectelinearPart,  concurrentPartCount,
02750       force_binary, force_linear);
02751 
02752   int coordDim, weightDim; size_t nlc; global_size_t gnc; int criteriaDim;
02753   pqJagged_getCoordinateValues<Adapter>( coords, coordDim, weightDim, nlc, gnc, criteriaDim, ignoreWeights);
02754   size_t numLocalCoords = nlc;
02755 
02756   //allocate only two dimensional pointer.
02757   //raw pointer addresess will be obtained from multivector.
02758   scalar_t **pqJagged_coordinates = allocMemory<scalar_t *>(coordDim);
02759   scalar_t **pqJagged_weights = allocMemory<scalar_t *>(criteriaDim);
02760   bool *pqJagged_uniformParts = allocMemory< bool >(criteriaDim); //if the partitioning results wanted to be uniform.
02761   scalar_t **pqJagged_partSizes =  allocMemory<scalar_t *>(criteriaDim); //if in a criteria dimension, uniform part is false this shows ratios of the target part weights.
02762   bool *pqJagged_uniformWeights = allocMemory< bool >(criteriaDim); //if the weights of coordinates are uniform in a criteria dimension.
02763 
02764   ArrayView<const gno_t> pqJagged_gnos;
02765   size_t numGlobalParts;
02766   int pqJagged_multiVectorDim;
02767 
02768   pqJagged_getInputValues<Adapter, scalar_t, gno_t>(
02769       env, coords, solution,params,coordDim,weightDim,numLocalCoords,
02770       numGlobalParts, pqJagged_multiVectorDim,
02771       pqJagged_coordinates,criteriaDim, pqJagged_weights,pqJagged_gnos, ignoreWeights,
02772       pqJagged_uniformWeights, pqJagged_uniformParts, pqJagged_partSizes
02773   );
02774 
02775   int numThreads = pqJagged_getNumThreads();
02776 
02777   partId_t totalDimensionCut = 0;
02778   partId_t totalPartCount = 1;
02779   partId_t maxPartNo = 0;
02780 
02781   const partId_t *partNo = pl.getPtr<Array <partId_t> >("pqParts")->getRawPtr();
02782   int partArraySize = pl.getPtr<Array <partId_t> >("pqParts")->size() - 1;
02783 
02784   for (int i = 0; i < partArraySize; ++i){
02785     totalPartCount *= partNo[i];
02786     if(partNo[i] > maxPartNo) maxPartNo = partNo[i];
02787   }
02788   totalDimensionCut = totalPartCount - 1;
02789   partId_t maxCutNo = maxPartNo - 1;
02790   partId_t maxTotalCumulativePartCount = totalPartCount / partNo[partArraySize - 1];
02791   size_t maxTotalPartCount = maxPartNo + size_t(maxCutNo);
02792   //maxPartNo is P, maxCutNo = P-1, matTotalPartcount = 2P-1
02793 
02794   if(concurrentPartCount > maxTotalCumulativePartCount){
02795     if(comm->getRank() == 0){
02796       cerr << "Warning: Concurrent part calculation count ("<< concurrentPartCount << ") has been set bigger than maximum amount that can be used." << " Setting to:" << maxTotalCumulativePartCount << "." << endl;
02797     }
02798     concurrentPartCount = maxTotalCumulativePartCount;
02799   }
02800 
02801   // coordinates of the cut lines. First one is the min, last one is max coordinate.
02802   // kddnote if (keep_cuts)
02803   // coordinates of the cut lines.
02804   //only store this much if cuts are needed to be stored.
02805   scalar_t *allCutCoordinates = allocMemory< scalar_t>(totalDimensionCut);
02806   // kddnote else
02807   //scalar_t *allCutCoordinates = allocMemory< scalar_t>(maxCutNo * concurrentPartCount);
02808 
02809   //as input indices.
02810   lno_t *partitionedPointCoordinates =  allocMemory< lno_t>(numLocalCoords);
02811   //as output indices
02812   lno_t *newpartitionedPointCoordinates = allocMemory< lno_t>(numLocalCoords);
02813   scalar_t *max_min_array =  allocMemory< scalar_t>(numThreads * 2);
02814 
02815   //initial configuration
02816   //set each pointer-i to i.
02817 #ifdef HAVE_ZOLTAN2_OMP
02818 #pragma omp parallel for
02819 #endif
02820   for(size_t i = 0; i < numLocalCoords; ++i){
02821     partitionedPointCoordinates[i] = i;
02822   }
02823 
02824 #ifdef HAVE_ZOLTAN2_OMP
02825 #ifdef FIRST_TOUCH
02826   firstTouch<lno_t>(newpartitionedPointCoordinates, numLocalCoords);
02827 #endif
02828 #endif
02829 
02830   //initially there is a single partition
02831   lno_t currentPartitionCount = 1;
02832   //single partition starts at index-0, and ends at numLocalCoords
02833 
02834   //inTotalCounts array holds the end points in partitionedPointCoordinates array
02835   //for each partition. Initially sized 1, and single element is set to numLocalCoords.
02836   lno_t *inTotalCounts = allocMemory<lno_t>(1);
02837   inTotalCounts[0] = static_cast<lno_t>(numLocalCoords);//the end of the initial partition is the end of coordinates.
02838 
02839   //the ends points of the output.
02840   lno_t *outTotalCounts = NULL;
02841 
02842   //the array holding the points in each part as linked list
02843   //lno_t *coordinate_linked_list = allocMemory<lno_t>(numLocalCoords);
02844   //the start and end coordinate of  each part.
02845   //lno_t **coordinate_starts =  allocMemory<lno_t *>(numThreads);
02846   //lno_t **coordinate_ends = allocMemory<lno_t *>(numThreads);
02847 
02848   //assign the max size to starts, as it will be reused.
02849   /*
02850   for(int i = 0; i < numThreads; ++i){
02851     coordinate_starts[i] = allocMemory<lno_t>(maxPartNo);
02852     coordinate_ends[i] = allocMemory<lno_t>(maxPartNo);
02853   }
02854    */
02855 
02856 
02857 #ifdef HAVE_ZOLTAN2_OMP
02858 #ifdef FIRST_TOUCH
02859 #pragma omp parallel
02860   {
02861     int me = omp_get_thread_num();
02862     for(partId_t ii = 0; ii < maxPartNo; ++ii){
02863       coordinate_starts[me][ii] = 0;
02864       coordinate_ends[me][ii] = 0;
02865     }
02866   }
02867 #endif
02868 #endif
02869 
02870 
02871 
02872 
02873   float *nonRectelinearPart = NULL; //how much weight percentage should a MPI put left side of the each cutline
02874   float **nonRectRatios = NULL; //how much weight percentage should each thread in MPI put left side of the each cutline
02875 
02876   if(allowNonRectelinearPart){
02877     //cout << "allowing" << endl;
02878     nonRectelinearPart = allocMemory<float>(maxCutNo * concurrentPartCount);
02879 #ifdef HAVE_ZOLTAN2_OMP
02880 #ifdef FIRST_TOUCH
02881     firstTouch<float>(nonRectelinearPart, maxCutNo);
02882 #endif
02883 #endif
02884 
02885     nonRectRatios = allocMemory<float *>(numThreads);
02886 
02887     for(int i = 0; i < numThreads; ++i){
02888       nonRectRatios[i] = allocMemory<float>(maxCutNo);
02889     }
02890 #ifdef HAVE_ZOLTAN2_OMP
02891 #ifdef FIRST_TOUCH
02892 #pragma omp parallel
02893     {
02894       int me = omp_get_thread_num();
02895       for(partId_t ii = 0; ii < maxCutNo; ++ii){
02896         nonRectRatios[me][ii] = 0;
02897       }
02898     }
02899 #endif
02900 #endif
02901 
02902   }
02903 
02904   // work array to manipulate coordinate of cutlines in different iterations.
02905   //necessary because previous cut line information is used for determining the next cutline information.
02906   //therefore, cannot update the cut work array until all cutlines are determined.
02907   scalar_t *cutCoordinatesWork = allocMemory<scalar_t>(maxCutNo * concurrentPartCount);
02908 #ifdef HAVE_ZOLTAN2_OMP
02909 #ifdef FIRST_TOUCH
02910   firstTouch<scalar_t>(cutCoordinatesWork, maxCutNo);
02911 #endif
02912 #endif
02913 
02914   //cumulative part weight ratio array.
02915   scalar_t *targetPartWeightRatios = allocMemory<scalar_t>(maxPartNo * concurrentPartCount); // the weight ratios at left side of the cuts. First is 0, last is 1.
02916 #ifdef HAVE_ZOLTAN2_OMP
02917 #ifdef FIRST_TOUCH
02918   firstTouch<scalar_t>(cutPartRatios, maxCutNo);
02919 #endif
02920 #endif
02921 
02922 
02923   scalar_t *cutUpperBounds = allocMemory<scalar_t>(maxCutNo * concurrentPartCount);  //upper bound coordinate of a cut line
02924   scalar_t *cutLowerBounds = allocMemory<scalar_t>(maxCutNo* concurrentPartCount);  //lower bound coordinate of a cut line
02925   scalar_t *cutLowerWeight = allocMemory<scalar_t>(maxCutNo* concurrentPartCount);  //lower bound weight of a cut line
02926   scalar_t *cutUpperWeight = allocMemory<scalar_t>(maxCutNo* concurrentPartCount);  //upper bound weight of a cut line
02927 
02928   scalar_t *localMinMaxTotal = allocMemory<scalar_t>(3 * concurrentPartCount); //combined array to exchange the min and max coordinate, and total weight of part.
02929   scalar_t *globalMinMaxTotal = allocMemory<scalar_t>(3 * concurrentPartCount);//global combined array with the results for min, max and total weight.
02930 
02931   //isDone is used to determine if a cutline is determined already.
02932   //If a cut line is already determined, the next iterations will skip this cut line.
02933   bool *isDone = allocMemory<bool>(maxCutNo * concurrentPartCount);
02934   //myNonDoneCount count holds the number of cutlines that have not been finalized for each part
02935   //when concurrentPartCount>1, using this information, if myNonDoneCount[x]==0, then no work is done for this part.
02936   partId_t *myNonDoneCount =  allocMemory<partId_t>(concurrentPartCount);
02937   //local part weights of each thread.
02938   double **partWeights = allocMemory<double *>(numThreads);
02939   //the work manupulation array for partweights.
02940   double **pws = allocMemory<double *>(numThreads);
02941 
02942   //leftClosesDistance to hold the min distance of a coordinate to a cutline from left (for each thread).
02943   scalar_t **leftClosestDistance = allocMemory<scalar_t *>(numThreads);
02944   //leftClosesDistance to hold the min distance of a coordinate to a cutline from right (for each thread)
02945   scalar_t **rightClosestDistance = allocMemory<scalar_t *>(numThreads);
02946 
02947   //to store how many points in each part a thread has.
02948   lno_t **partPointCounts = allocMemory<lno_t *>(numThreads);
02949 
02950   for(int i = 0; i < numThreads; ++i){
02951     //partWeights[i] = allocMemory<scalar_t>(maxTotalPartCount);
02952     partWeights[i] = allocMemory < double >(maxTotalPartCount * concurrentPartCount);
02953     rightClosestDistance[i] = allocMemory<scalar_t>(maxCutNo * concurrentPartCount);
02954     leftClosestDistance[i] = allocMemory<scalar_t>(maxCutNo * concurrentPartCount);
02955     partPointCounts[i] =  allocMemory<lno_t>(maxPartNo);
02956   }
02957 #ifdef HAVE_ZOLTAN2_OMP
02958 #ifdef FIRST_TOUCH
02959 #pragma omp parallel
02960   {
02961     int me = omp_get_thread_num();
02962     for(partId_t ii = 0; ii < maxPartNo; ++ii){
02963       partPointCounts[me][ii] = 0;
02964     }
02965     for(size_t ii = 0; ii < maxTotalPartCount; ++ii){
02966       partWeights[me][ii] = 0;
02967     }
02968     for(partId_t ii = 0; ii < maxCutNo; ++ii){
02969       rightClosestDistance[me][ii] = 0;
02970       leftClosestDistance[me][ii] = 0;
02971 
02972     }
02973   }
02974 #endif
02975 #endif
02976   // kddnote  Needed only when non-rectilinear parts.
02977   scalar_t *cutWeights = allocMemory<scalar_t>(maxCutNo);
02978   scalar_t *globalCutWeights = allocMemory<scalar_t>(maxCutNo);
02979 
02980   //for faster communication, concatanation of
02981   //totalPartWeights sized 2P-1, since there are P parts and P-1 cut lines
02982   //leftClosest distances sized P-1, since P-1 cut lines
02983   //rightClosest distances size P-1, since P-1 cut lines.
02984   scalar_t *totalPartWeights_leftClosests_rightClosests = allocMemory<scalar_t>((maxTotalPartCount + maxCutNo * 2) * concurrentPartCount);
02985   scalar_t *global_totalPartWeights_leftClosests_rightClosests = allocMemory<scalar_t>((maxTotalPartCount + maxCutNo * 2) * concurrentPartCount);
02986 
02987   partId_t *partIds = NULL;
02988   ArrayRCP<partId_t> partId;
02989   if(numLocalCoords > 0){
02990     partIds = allocMemory<partId_t>(numLocalCoords);
02991     partId = arcp(partIds, 0, numLocalCoords, true);
02992   }
02993 
02994   scalar_t *cutCoordinates =  allCutCoordinates;
02995 
02996   //partId_t leftPartitions = totalPartCount;
02997   scalar_t maxScalar_t = numeric_limits<scalar_t>::max();
02998   scalar_t minScalar_t = -numeric_limits<scalar_t>::max();
02999 
03000   env->timerStop(MACRO_TIMERS, "PQJagged Problem_Init");
03001 
03002   env->timerStart(MACRO_TIMERS, "PQJagged Problem_Partitioning");
03003 
03004 
03005   //int myRank = comm->getRank();
03006   //int worldSize = comm->getSize();
03007   
03008   scalar_t _EPSILON = numeric_limits<scalar_t>::epsilon();
03009   for (int i = 0; i < partArraySize; ++i){
03010     if(partNo[i] == 1) continue;
03011     int coordInd = i % coordDim;
03012     string istring = toString<int>(i);
03013 
03014     env->timerStart(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring);
03015     outTotalCounts = allocMemory<lno_t>(currentPartitionCount * partNo[i]);
03016 
03017     //the index where in the outtotalCounts will be written.
03018     partId_t currentOut = 0;
03019     //whatever is written to outTotalCounts will be added with previousEnd so that the points will be shifted.
03020     partId_t previousEnd = 0;
03021     scalar_t * pqCoord = pqJagged_coordinates[coordInd];
03022 
03023     partId_t currentWorkPart = 0;
03024     partId_t concurrentPart = min(currentPartitionCount - currentWorkPart, concurrentPartCount);
03025     bool useBinarySearch = false;
03026     if(partNo[i] > BINARYCUTOFF){
03027       useBinarySearch = true;
03028     }
03029     if(force_binary){
03030       useBinarySearch = true;
03031     }
03032     if (force_linear){
03033       useBinarySearch = false;
03034     }
03035     
03036 //    size_t total_part_count = partNo[i] * 2 + 1;
03037 
03038     //run for all available parts.
03039     for (; currentWorkPart < currentPartitionCount; currentWorkPart += concurrentPart){
03040 
03041       concurrentPart = min(currentPartitionCount - currentWorkPart, concurrentPartCount);
03042 #ifdef mpi_communication
03043       concurrent = concurrentPart;
03044 #endif
03045       /*
03046       if(myRank == 0)
03047         cout << "i: " << i << " j:" << currentWorkPart << " ";
03048        */
03049       //scalar_t used_imbalance = imbalanceTolerance * (LEAF_IMBALANCE_FACTOR + (1 - LEAF_IMBALANCE_FACTOR)   / leftPartitions) * 0.7;
03050       scalar_t used_imbalance = 0;
03051       //env->timerStart(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring +"_min_max");
03052 
03053       for(int kk = 0; kk < concurrentPart; ++kk){
03054 
03055         partId_t currentPart = currentWorkPart + kk;
03056         lno_t coordinateEnd= inTotalCounts[currentPart];
03057         lno_t coordinateBegin = currentPart==0 ? 0: inTotalCounts[currentPart -1];
03058 
03059         pqJagged_getLocalMinMaxTotalCoord<scalar_t, lno_t>(
03060             partitionedPointCoordinates,
03061             pqCoord,
03062             pqJagged_uniformWeights,
03063             pqJagged_weights[0],
03064             numThreads,
03065             coordinateBegin,
03066             coordinateEnd,
03067             max_min_array,
03068             maxScalar_t,
03069             minScalar_t,
03070             localMinMaxTotal[kk], //min coordinate
03071             localMinMaxTotal[kk + concurrentPart], //max coordinate
03072             localMinMaxTotal[kk + 2*concurrentPart] //total weight);
03073                           );
03074       }
03075       pqJagged_getGlobalMinMaxTotalCoord<scalar_t>(comm,env, concurrentPart, localMinMaxTotal, globalMinMaxTotal);
03076 
03077       //env->timerStop(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring +"_min_max");
03078 
03079 
03080 
03081 
03082       partId_t allDone = 0;
03083       // Compute weight ratios for parts & cuts: 
03084       //  e.g., 0.25  0.25  0.5    0.5  0.75 0.75  1
03085       //       part0  cut0  part1 cut1 part2 cut2 part3
03086       for(int kk = 0; kk < concurrentPart; ++kk){
03087         scalar_t minCoordinate = globalMinMaxTotal[kk];
03088         scalar_t maxCoordinate = globalMinMaxTotal[kk + concurrentPart];
03089         scalar_t *usedCutCoordinate = cutCoordinates + (partNo[i] - 1) * kk;
03090         scalar_t *usedCutPartRatios = targetPartWeightRatios + (partNo[i]) * kk;
03091 
03092         if(minCoordinate <= maxCoordinate){
03093           allDone += partNo[i] - 1;
03094           myNonDoneCount[kk] = partNo[i] - 1;
03095           //env->timerStart(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring + "_cut_coord");
03096           pqJagged_getCutCoord_Weights<scalar_t>(
03097               minCoordinate, maxCoordinate,
03098               pqJagged_uniformParts[0], pqJagged_partSizes[0], partNo[i] - 1,
03099               usedCutCoordinate, usedCutPartRatios, numThreads
03100           );
03101           //env->timerStop(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring + "_cut_coord");
03102           scalar_t coordinate_range = maxCoordinate - minCoordinate;
03103           
03104           partId_t currentPart = currentWorkPart + kk;
03105           lno_t coordinateEnd= inTotalCounts[currentPart];
03106           lno_t coordinateBegin = currentPart==0 ? 0: inTotalCounts[currentPart -1];
03107           
03108           if(ABS(coordinate_range) < _EPSILON ){
03109             for(lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
03110               
03111               lno_t i = partitionedPointCoordinates[ii];
03112               partIds[i] = 0;
03113             }
03114           }
03115           else{
03116 
03117             scalar_t slice = coordinate_range / partNo[i];
03118 
03119 #ifdef HAVE_ZOLTAN2_OMP
03120 #pragma omp parallel for
03121 #endif 
03122             for(lno_t ii = coordinateBegin; ii < coordinateEnd; ++ii){
03123               
03124               lno_t i = partitionedPointCoordinates[ii];
03125               partId_t pp = partId_t((pqCoord[i] - minCoordinate) / slice);
03126               //if( pp >= partNo[i])
03127               //{
03128               //  partIds[i] = 0;
03129               //} else {
03130               //  partIds[i] = total_part_count - 2 * pp;
03131               //}
03132               partIds[i] = 2 * pp;
03133             }
03134           }
03135         }
03136         else {
03137           // e.g., if have fewer coordinates than parts, don't need to do next dim.
03138           myNonDoneCount[kk] = 0;
03139         }
03140       }
03141 
03142       //cout << "partId:" << partIds[0] << endl;
03143       //env->timerStart(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring + "_1d");
03144       // Determine cut lines for k parts here.
03145       pqJagged_1D_Partition<scalar_t, lno_t>(
03146           env, comm,
03147           partitionedPointCoordinates,
03148           pqCoord, pqJagged_uniformWeights[0], pqJagged_weights[0],
03149           targetPartWeightRatios,
03150           globalMinMaxTotal,localMinMaxTotal,
03151           partNo[i], numThreads,
03152           maxScalar_t,
03153           minScalar_t,
03154           used_imbalance,
03155           currentWorkPart,
03156           concurrentPart,
03157           inTotalCounts,
03158           cutCoordinates,
03159           cutCoordinatesWork,
03160           leftClosestDistance,
03161           rightClosestDistance,
03162           cutUpperBounds,
03163           cutLowerBounds,
03164           cutUpperWeight,
03165           cutLowerWeight,
03166           isDone,
03167           partWeights,
03168           totalPartWeights_leftClosests_rightClosests,
03169           global_totalPartWeights_leftClosests_rightClosests,
03170           allowNonRectelinearPart,
03171           nonRectelinearPart,
03172           cutWeights,
03173           globalCutWeights,
03174           allDone,
03175           myNonDoneCount,
03176           useBinarySearch, // istring,
03177           partIds
03178           );
03179       
03180 
03181       //env->timerStop(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring + "_1d");
03182 
03183       //partId_t pEnd = currentOut + partNo[i] * concurrentPart;
03184       //for (partId_t ii = currentOut; ii < pEnd; ++ii){
03185       //  outTotalCounts[ii] = 0;
03186       //}
03187 
03188       //env->timerStart(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring + "_chunks");
03189       for(int kk = 0; kk < concurrentPart; ++kk){
03190 
03191         if(globalMinMaxTotal[kk] > globalMinMaxTotal[kk + concurrentPart]) {
03192           for(partId_t jj = 0; jj < partNo[i]; ++jj){
03193             outTotalCounts[currentOut + kk * (partNo[i]) + jj] = 0;
03194           }
03195 
03196           continue;
03197         }
03198         partId_t curr = currentWorkPart + kk;
03199         lno_t coordinateEnd= inTotalCounts[curr];
03200         lno_t coordinateBegin = curr==0 ? 0: inTotalCounts[curr -1];
03201         partId_t cutShift = (partNo[i] - 1) * kk;
03202         scalar_t *usedCutCoordinate = cutCoordinates + cutShift;
03203         float *usednonRectelinearPart = nonRectelinearPart + cutShift;
03204 
03205         scalar_t *tlr =  totalPartWeights_leftClosests_rightClosests + (4 *(partNo[i] - 1) + 1) * kk;
03206 
03207         for(int ii = 0; ii < numThreads; ++ii){
03208           pws[ii] = partWeights[ii] +  (2 * (partNo[i] - 1) + 1) * kk;
03209         }
03210 
03211         // Rewrite the indices based on the computed cuts.
03212         getChunksFromCoordinates<lno_t,scalar_t>(
03213             partNo[i],
03214             numThreads,
03215 
03216             partitionedPointCoordinates,
03217             pqCoord,
03218             pqJagged_uniformWeights[0],
03219             pqJagged_weights[0],
03220 
03221             usedCutCoordinate,
03222             coordinateBegin,
03223             coordinateEnd,
03224 
03225             allowNonRectelinearPart,
03226             usednonRectelinearPart,
03227             tlr,
03228             pws,
03229             nonRectRatios,
03230 
03231             //coordinate_linked_list,
03232             //coordinate_starts,
03233             //coordinate_ends,
03234             partPointCounts,
03235 
03236             newpartitionedPointCoordinates,
03237             outTotalCounts + currentOut + kk * (partNo[i]),partIds//,
03238             //useBinarySearch
03239         );
03240       }
03241 
03242       //env->timerStop(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring + "_chunks");
03243       cutCoordinates += (partNo[i] - 1) * concurrentPart;
03244 
03245       for(partId_t kk = 0; kk < concurrentPart; ++kk){
03246 
03247         
03248         for (partId_t ii = 0;ii < partNo[i] ; ++ii){
03249           outTotalCounts[currentOut+ii] += previousEnd;
03250           //cout << "out:" << outTotalCounts[currentOut+ii] << " prev:" << previousEnd<< endl; 
03251         }
03252         previousEnd = outTotalCounts[currentOut + partNo[i] - 1];
03253          
03254         currentOut += partNo[i] ;
03255       }
03256 /*
03257       if(myRank == 0)
03258         cout << endl;
03259 */
03260     } // end of this partitioning dimension
03261 
03262     // swap the indices' memory
03263     lno_t * tmp = partitionedPointCoordinates;
03264     partitionedPointCoordinates = newpartitionedPointCoordinates;
03265     newpartitionedPointCoordinates = tmp;
03266 
03267     currentPartitionCount *= partNo[i];
03268     freeArray<lno_t>(inTotalCounts);
03269     inTotalCounts = outTotalCounts;
03270     env->timerStop(MACRO_TIMERS, "PQJagged Problem_Partitioning_" + istring);
03271   } // Partitioning is done
03272 
03273   env->timerStop(MACRO_TIMERS, "PQJagged Problem_Partitioning");
03274 
03275   //cout << "biter" << endl;
03276   env->timerStart(MACRO_TIMERS, "PQJagged Part_Assignment");
03277 /*
03278   partId_t *partIds = NULL;
03279   ArrayRCP<partId_t> partId;
03280   if(numLocalCoords > 0){
03281     partIds = allocMemory<partId_t>(numLocalCoords);
03282     partId = arcp(partIds, 0, numLocalCoords, true);
03283   }
03284 */
03285 
03286 #ifdef HAVE_ZOLTAN2_OMP
03287 #pragma omp parallel for
03288 #endif
03289   for(partId_t i = 0; i < totalPartCount;++i){
03290     lno_t begin = 0;
03291     lno_t end = inTotalCounts[i];
03292     if(i > 0) begin = inTotalCounts[i -1];
03293     //cout << "part:" << i << " begin:" << begin << " end:" << end << " count:" << end - begin << endl;
03294     /*
03295 #ifdef HAVE_ZOLTAN2_OMP
03296 #pragma omp for
03297 #endif
03298      */
03299     for (lno_t ii = begin; ii < end; ++ii){
03300 
03301       lno_t k = partitionedPointCoordinates[ii];
03302       partIds[k] = i;
03303 
03304       /*
03305       cout << "part of coordinate:";
03306       for(int iii = 0; iii < coordDim; ++iii){
03307         cout <<  pqJagged_coordinates[iii][k] << " ";
03308       }
03309       cout << i;
03310       cout << endl;
03311       */
03312 
03313     }
03314   }
03315   env->timerStop(MACRO_TIMERS, "PQJagged Part_Assignment");
03316   ArrayRCP<const gno_t> gnoList;
03317   if(numLocalCoords > 0){
03318     gnoList = arcpFromArrayView(pqJagged_gnos);
03319   }
03320 
03321   env->timerStart(MACRO_TIMERS, "PQJagged Solution_Part_Assignment");
03322   solution->setParts(gnoList, partId, true);
03323   env->timerStop(MACRO_TIMERS, "PQJagged Solution_Part_Assignment");
03324 
03325 
03326   env->timerStart(MACRO_TIMERS, "PQJagged Problem_Free");
03327 
03328 /*
03329   if(comm->getRank() == 0){
03330     for(partId_t i = 0; i < totalPartCount - 1;++i){
03331       cout << "i:"<< i<<" cut coordinate:" << allCutCoordinates[i] << endl;
03332     }
03333   }
03334 */
03335 
03336 
03337   for(int i = 0; i < numThreads; ++i){
03338 //    freeArray<lno_t>(coordinate_starts[i]);
03339 //    freeArray<lno_t>(coordinate_ends[i]);
03340     freeArray<lno_t>(partPointCounts[i]);
03341   }
03342 
03343   freeArray<lno_t *>(partPointCounts);
03344   //freeArray<lno_t>(coordinate_linked_list);
03345   //freeArray<lno_t *>(coordinate_starts);
03346   //freeArray<lno_t *>(coordinate_ends);
03347   freeArray<double *> (pws);
03348 
03349   if(allowNonRectelinearPart){
03350     freeArray<float>(nonRectelinearPart);
03351     for(int i = 0; i < numThreads; ++i){
03352       freeArray<float>(nonRectRatios[i]);
03353     }
03354     freeArray<float *>(nonRectRatios);
03355   }
03356 
03357   freeArray<partId_t>(myNonDoneCount);
03358   freeArray<scalar_t>(cutWeights);
03359   freeArray<scalar_t>(globalCutWeights);
03360   freeArray<scalar_t>(max_min_array);
03361   freeArray<lno_t>(outTotalCounts);
03362   freeArray<lno_t>(partitionedPointCoordinates);
03363   freeArray<lno_t>(newpartitionedPointCoordinates);
03364   freeArray<scalar_t>(allCutCoordinates);
03365   freeArray<scalar_t *>(pqJagged_coordinates);
03366   freeArray<scalar_t *>(pqJagged_weights);
03367   freeArray<bool>(pqJagged_uniformParts);
03368   freeArray<scalar_t> (localMinMaxTotal);
03369   freeArray<scalar_t> (globalMinMaxTotal);
03370   freeArray<scalar_t *>(pqJagged_partSizes);
03371   freeArray<bool>(pqJagged_uniformWeights);
03372   freeArray<scalar_t>(cutCoordinatesWork);
03373   freeArray<scalar_t>(targetPartWeightRatios);
03374   freeArray<scalar_t>(cutUpperBounds);
03375   freeArray<scalar_t>(cutLowerBounds);
03376   freeArray<scalar_t>(cutLowerWeight);
03377   freeArray<scalar_t>(cutUpperWeight);
03378   freeArray<bool>(isDone);
03379   freeArray<scalar_t>(totalPartWeights_leftClosests_rightClosests);
03380   freeArray<scalar_t>(global_totalPartWeights_leftClosests_rightClosests);
03381 
03382   for(int i = 0; i < numThreads; ++i){
03383     freeArray<double>(partWeights[i]);//freeArray<scalar_t>(partWeights[i]);
03384     freeArray<scalar_t>(rightClosestDistance[i]);
03385     freeArray<scalar_t>(leftClosestDistance[i]);
03386   }
03387 
03388   //freeArray<scalar_t *>(partWeights);
03389   freeArray<double *>(partWeights);
03390   freeArray<scalar_t *>(leftClosestDistance);
03391   freeArray<scalar_t *>(rightClosestDistance);
03392 
03393 
03394   env->timerStop(MACRO_TIMERS, "PQJagged Problem_Free");
03395   env->timerStop(MACRO_TIMERS, "PQJagged Total");
03396 
03397 #endif // INCLUDE_ZOLTAN2_EXPERIMENTAL
03398 }
03399 } // namespace Zoltan2
03400 
03401 
03402 
03403 
03404 
03405 #endif