Zoltan2
Zoltan2_AlgScotch.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //   Zoltan2: A package of combinatorial algorithms for scientific computing
00006 //                  Copyright 2012 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Karen Devine      (kddevin@sandia.gov)
00039 //                    Erik Boman        (egboman@sandia.gov)
00040 //                    Siva Rajamanickam (srajama@sandia.gov)
00041 //
00042 // ***********************************************************************
00043 //
00044 // @HEADER
00045 #ifndef _ZOLTAN2_ALGSCOTCH_HPP_
00046 #define _ZOLTAN2_ALGSCOTCH_HPP_
00047 
00048 #include <Zoltan2_GraphModel.hpp>
00049 #include <Zoltan2_PartitioningSolution.hpp>
00050 #include <Zoltan2_Util.hpp>
00051 
00052 
00053 #ifndef HAVE_ZOLTAN2_SCOTCH
00054 
00055 // Error handling for when Scotch is requested
00056 // but Zoltan2 not built with Scotch.
00057 
00058 namespace Zoltan2 {
00059 
00060 
00074 template <typename Adapter>
00075 void AlgPTScotch(
00076   const RCP<const Environment> &env,
00077   const RCP<const Comm<int> > &problemComm,
00078 #ifdef HAVE_ZOLTAN2_MPI
00079   MPI_Comm mpicomm,
00080 #endif
00081   const RCP<GraphModel<typename Adapter::base_adapter_t> > &model,
00082   RCP<PartitioningSolution<Adapter> > &solution
00083 
00084 )
00085 {
00086   throw std::runtime_error(
00087         "BUILD ERROR:  Scotch requested but not compiled into Zoltan2.\n"
00088         "Please set CMake flag Zoltan2_ENABLE_Scotch:BOOL=ON.");
00089 }
00090 }
00091 
00092 #else  //HAVE_ZOLTAN2_SCOTCH
00093 
00094 
00095 // stdint.h for int64_t in scotch header
00096 
00097 #include <stdint.h>
00098 #ifndef HAVE_ZOLTAN2_MPI
00099 #include "scotch.h"
00100 #else
00101 #include "ptscotch.h"
00102 #endif
00103 
00104 #ifdef SHOW_ZOLTAN2_SCOTCH_MEMORY
00105 //
00106 // Scotch keeps track of memory high water mark, but doesn't
00107 // provide a way to get that number.  So add this function:
00108 //   "size_t SCOTCH_getMemoryMax() { return memorymax;}"
00109 // to src/libscotch/common_memory.c
00110 //
00111 // and this macro:
00112 //   "#define HAVE_SCOTCH_GETMEMORYMAX
00113 // to include/ptscotch.h
00114 //
00115 // and compile scotch with -DCOMMON_MEMORY_TRACE
00116 //
00117 #ifdef HAVE_SCOTCH_GETMEMORYMAX
00118 
00119 extern "C"{
00120 extern size_t SCOTCH_getMemoryMax();
00121 }
00122 
00123 #else
00124 
00125 #error "Either turn off ZOLTAN2_ENABLE_SCOTCH_MEMORY_REPORT in cmake configure, or see SHOW_ZOLTAN2_SCOTCH_MEMORY comment in Zoltan2_AlgScotch.hpp"
00126 
00127 #endif
00128 
00129 #endif
00130 
00131 
00135 
00136 namespace Zoltan2{
00137 
00138 // Scale and round scalar_t weights (typically float or double) to 
00139 // SCOTCH_Num (typically int or long).
00140 // subject to sum(weights) <= max_wgt_sum.
00141 // Only scale if deemed necessary.
00142 //
00143 // Note that we use ceil() instead of round() to avoid
00144 // rounding to zero weights.
00145 // Based on Zoltan's scale_round_weights, mode 1.
00146 
00147 template <typename lno_t, typename scalar_t>
00148 static void scale_weights(
00149   size_t n,
00150   StridedData<lno_t, scalar_t> &fwgts,
00151   SCOTCH_Num *iwgts,
00152   const RCP<const Comm<int> > &problemComm
00153 )
00154 {
00155   const double INT_EPSILON = 1e-5;
00156 
00157   SCOTCH_Num nonint, nonint_local = 0;
00158   double sum_wgt, sum_wgt_local = 0.;
00159   double max_wgt, max_wgt_local = 0.;
00160 
00161   // Compute local sums of the weights 
00162   // Check whether all weights are integers
00163   for (size_t i = 0; i < n; i++) {
00164     double fw = double(fwgts[i]);
00165     if (!nonint_local){
00166       SCOTCH_Num tmp = (SCOTCH_Num) floor(fw + .5); /* Nearest int */
00167       if (fabs((double)tmp-fw) > INT_EPSILON) {
00168         nonint_local = 1;
00169       }
00170     }
00171     sum_wgt_local += fw;
00172     if (fw > max_wgt_local) max_wgt_local = fw;
00173   }
00174 
00175   Teuchos::reduceAll<int,int>(*problemComm, Teuchos::REDUCE_MAX, 1,
00176                               &nonint_local,  &nonint);
00177   Teuchos::reduceAll<int,double>(*problemComm, Teuchos::REDUCE_SUM, 1,
00178                                  &sum_wgt_local, &sum_wgt);
00179   Teuchos::reduceAll<int,double>(*problemComm, Teuchos::REDUCE_MAX, 1,
00180                                  &max_wgt_local, &max_wgt);
00181 
00182   double scale = 1.;
00183   const double max_wgt_sum = double(SCOTCH_NUMMAX/8);
00184 
00185   // Scaling needed if weights are not integers or weights' range is not sufficient
00186   if (nonint || (max_wgt <= INT_EPSILON) || (sum_wgt > max_wgt_sum)) {
00187     /* Calculate scale factor */
00188     if (sum_wgt != 0.) scale = max_wgt_sum/sum_wgt;
00189   }
00190 
00191   /* Convert weights to positive integers using the computed scale factor */
00192   for (size_t i = 0; i < n; i++)
00193     iwgts[i] = (SCOTCH_Num) ceil(double(fwgts[i])*scale);
00194 
00195 }
00196 
00198 //  Traits struct to handle conversions between gno_t/lno_t and SCOTCH_Num.
00200 
00201 // General case:  SCOTCH_Num and gno_t/lno_t (called zno_t here) differ.
00202 template <typename zno_t>
00203 struct SCOTCH_Num_Traits {
00204 
00205   static inline SCOTCH_Num ASSIGN_TO_SCOTCH_NUM(
00206     SCOTCH_Num &a,
00207     zno_t b,
00208     const RCP<const Environment> &env)
00209   {
00210     // Assign a = b; make sure SCOTCH_Num is large enough to accept zno_t.
00211     if (b <= SCOTCH_NUMMAX) a = b;
00212     else 
00213       env->localInputAssertion(__FILE__, __LINE__, 
00214        "Value too large for SCOTCH_Num, Rebuild Scotch with larger SCOTCH_Num",
00215        false, BASIC_ASSERTION);
00216     return a;
00217   }
00218 
00219   static inline void ASSIGN_SCOTCH_NUM_ARRAY(
00220     SCOTCH_Num **a,
00221     ArrayView<const zno_t> &b,
00222     const RCP<const Environment> &env)
00223   {
00224     // Allocate array a; copy b values into a.
00225     size_t size = b.size();
00226     if (size > 0) {
00227       *a = new SCOTCH_Num[size];
00228       for (size_t i = 0; i < size; i++) ASSIGN_TO_SCOTCH_NUM((*a)[i], b[i], env);
00229     }
00230     else {
00231       *a = NULL;
00232       // Note:  the Scotch manual says that if any rank has a non-NULL array,
00233       //        every process must have a non-NULL array.  In practice, 
00234       //        however, this condition is not needed for the arrays we use.
00235       //        For now, we'll set these arrays to NULL.  We could allocate
00236       //        a dummy value here if needed.  KDD 1/23/14
00237     }
00238   }
00239 
00240   static inline void DELETE_SCOTCH_NUM_ARRAY(SCOTCH_Num **a)
00241   {
00242     // Delete the copy made in ASSIGN_SCOTCH_NUM_ARRAY.
00243     delete [] *a;
00244   }
00245 };
00246 
00247 
00248 // Special case:  zno_t == SCOTCH_Num. No error checking or copies needed.
00249 template <>
00250 struct SCOTCH_Num_Traits<SCOTCH_Num> {
00251   static inline SCOTCH_Num ASSIGN_TO_SCOTCH_NUM(
00252     SCOTCH_Num &a,
00253     SCOTCH_Num b,
00254     const RCP<const Environment> &env)
00255   {
00256     a = b;
00257     return a;
00258   }
00259   static inline void ASSIGN_SCOTCH_NUM_ARRAY(
00260     SCOTCH_Num **a,
00261     ArrayView<const SCOTCH_Num> &b,
00262     const RCP<const Environment> &env)
00263   {
00264     if (b.size() > 0)
00265       *a = const_cast<SCOTCH_Num *> (b.getRawPtr());
00266     else {
00267       *a = NULL;
00268       // Note:  the Scotch manual says that if any rank has a non-NULL array,
00269       //        every process must have a non-NULL array.  In practice, 
00270       //        however, this condition is not needed for the arrays we use.
00271       //        For now, we'll set these arrays to NULL, because if we
00272       //        allocated a dummy value here, we'll have to track whether or
00273       //        not we can free it.  KDD 1/23/14
00274     }
00275   }
00276   static inline void DELETE_SCOTCH_NUM_ARRAY(SCOTCH_Num **a) { }
00277 };
00278 
00280 // Now, the actual Scotch algorithm.
00282 
00294 template <typename Adapter>
00295 void AlgPTScotch(
00296   const RCP<const Environment> &env,        // parameters & app comm
00297   const RCP<const Comm<int> > &problemComm, // problem comm
00298 #ifdef HAVE_ZOLTAN2_MPI
00299   MPI_Comm mpicomm,
00300 #endif
00301   const RCP<GraphModel<typename Adapter::base_adapter_t> > &model, // the graph
00302   RCP<PartitioningSolution<Adapter> > &solution
00303 )
00304 {
00305   HELLO;
00306 
00307   typedef typename Adapter::lno_t lno_t;
00308   typedef typename Adapter::gno_t gno_t;
00309   typedef typename Adapter::scalar_t scalar_t;
00310   typedef typename Adapter::part_t part_t;
00311 
00312   size_t numGlobalParts = solution->getTargetGlobalNumberOfParts();
00313 
00314   SCOTCH_Num partnbr;
00315   SCOTCH_Num_Traits<size_t>::ASSIGN_TO_SCOTCH_NUM(partnbr, numGlobalParts, env);
00316 
00317 #ifdef HAVE_ZOLTAN2_MPI
00318   int ierr = 0;
00319   int me = problemComm->getRank();
00320 
00321   const SCOTCH_Num  baseval = 0;  // Base value for array indexing.
00322                                   // GraphModel returns GNOs from base 0.
00323 
00324   SCOTCH_Strat stratstr;          // Strategy string
00325                                   // TODO:  Set from parameters
00326   SCOTCH_stratInit(&stratstr);
00327 
00328   // Allocate and initialize PTScotch Graph data structure.
00329   SCOTCH_Dgraph *gr = SCOTCH_dgraphAlloc();  // Scotch distributed graph
00330   ierr = SCOTCH_dgraphInit(gr, mpicomm);
00331 
00332   env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphInit", 
00333     !ierr, BASIC_ASSERTION, problemComm);
00334 
00335   // Get vertex info
00336   ArrayView<const gno_t> vtxID;
00337   ArrayView<StridedData<lno_t, scalar_t> > xyz;
00338   ArrayView<StridedData<lno_t, scalar_t> > vwgts;
00339   size_t nVtx = model->getVertexList(vtxID, xyz, vwgts);
00340   SCOTCH_Num vertlocnbr;
00341   SCOTCH_Num_Traits<size_t>::ASSIGN_TO_SCOTCH_NUM(vertlocnbr, nVtx, env);
00342   SCOTCH_Num vertlocmax = vertlocnbr; // Assumes no holes in global nums.
00343 
00344   // Get edge info
00345   ArrayView<const gno_t> edgeIds;
00346   ArrayView<const int>   procIds;
00347   ArrayView<const lno_t> offsets;
00348   ArrayView<StridedData<lno_t, scalar_t> > ewgts;
00349 
00350   size_t nEdge = model->getEdgeList(edgeIds, procIds, offsets, ewgts);
00351 
00352   SCOTCH_Num edgelocnbr;
00353   SCOTCH_Num_Traits<size_t>::ASSIGN_TO_SCOTCH_NUM(edgelocnbr, nEdge, env);
00354   const SCOTCH_Num edgelocsize = edgelocnbr;  // Assumes adj array is compact.
00355 
00356   SCOTCH_Num *vertloctab;  // starting adj/vtx
00357   SCOTCH_Num_Traits<lno_t>::ASSIGN_SCOTCH_NUM_ARRAY(&vertloctab, offsets, env);
00358 
00359   SCOTCH_Num *edgeloctab;  // adjacencies
00360   SCOTCH_Num_Traits<gno_t>::ASSIGN_SCOTCH_NUM_ARRAY(&edgeloctab, edgeIds, env);
00361 
00362   // We don't use these arrays, but we need them as arguments to Scotch.
00363   SCOTCH_Num *vendloctab = NULL;  // Assume consecutive storage for adj
00364   SCOTCH_Num *vlblloctab = NULL;  // Vertex label array
00365   SCOTCH_Num *edgegsttab = NULL;  // Array for ghost vertices
00366 
00367   // Get weight info.
00368   SCOTCH_Num *velotab = NULL;  // Vertex weights
00369   SCOTCH_Num *edlotab = NULL;  // Edge weights
00370 
00371   int nVwgts = model->getNumWeightsPerVertex();
00372   int nEwgts = model->getNumWeightsPerEdge();
00373   if (nVwgts > 1 && me == 0) {
00374     std::cerr << "Warning:  NumWeightsPerVertex is " << nVwgts 
00375               << " but Scotch allows only one weight. "
00376               << " Zoltan2 will use only the first weight per vertex."
00377               << std::endl;
00378   }
00379   if (nEwgts > 1 && me == 0) {
00380     std::cerr << "Warning:  NumWeightsPerEdge is " << nEwgts 
00381               << " but Scotch allows only one weight. "
00382               << " Zoltan2 will use only the first weight per edge."
00383               << std::endl;
00384   }
00385 
00386   if (nVwgts) {
00387     velotab = new SCOTCH_Num[nVtx+1];  // +1 since Scotch wants all procs 
00388                                        // to have non-NULL arrays
00389     scale_weights<lno_t, scalar_t>(nVtx, vwgts[0], velotab, problemComm);
00390   }
00391 
00392   if (nEwgts) {
00393     edlotab = new SCOTCH_Num[nEdge+1];  // +1 since Scotch wants all procs 
00394                                          // to have non-NULL arrays
00395     scale_weights<lno_t, scalar_t>(nEdge, ewgts[0], edlotab, problemComm);
00396   }
00397 
00398   // Build PTScotch distributed data structure
00399   ierr = SCOTCH_dgraphBuild(gr, baseval, vertlocnbr, vertlocmax,
00400                             vertloctab, vendloctab, velotab, vlblloctab,
00401                             edgelocnbr, edgelocsize,
00402                             edgeloctab, edgegsttab, edlotab);
00403 
00404   env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphBuild", 
00405     !ierr, BASIC_ASSERTION, problemComm);
00406 
00407   // Create array for Scotch to return results in.
00408   ArrayRCP<part_t> partList(new part_t[nVtx], 0, nVtx,true);
00409   SCOTCH_Num *partloctab = NULL;
00410   if (nVtx && (sizeof(SCOTCH_Num) == sizeof(part_t))) {
00411     // Can write directly into the solution's memory
00412     partloctab = (SCOTCH_Num *) partList.getRawPtr();
00413   }
00414   else {
00415     // Can't use solution memory directly; will have to copy later.
00416     // Note:  Scotch does not like NULL arrays, so add 1 to always have non-null.
00417     //        ParMETIS has this same "feature."  See Zoltan bug 4299.
00418     partloctab = new SCOTCH_Num[nVtx+1];
00419   }
00420 
00421   // Get target part sizes
00422   float *partsizes = new float[numGlobalParts];
00423   if (!solution->criteriaHasUniformPartSizes(0))
00424     for (size_t i=0; i<numGlobalParts; i++)
00425       partsizes[i] = solution->getCriteriaPartSize(0, i);
00426   else
00427     for (size_t i=0; i<numGlobalParts; i++)
00428       partsizes[i] = 1.0 / float(numGlobalParts);
00429 
00430   // Allocate and initialize PTScotch target architecture data structure
00431   SCOTCH_Arch archdat;
00432   SCOTCH_archInit(&archdat);
00433 
00434   SCOTCH_Num velosum = 0;
00435   SCOTCH_dgraphSize (gr, &velosum, NULL, NULL, NULL);
00436   SCOTCH_Num *goalsizes = new SCOTCH_Num[partnbr];
00437   // TODO: The goalsizes are set as in Zoltan; not sure it is correct there 
00438   // or here.
00439   // It appears velosum is global NUMBER of vertices, not global total 
00440   // vertex weight.  I think we should use the latter.
00441   // Fix this when we add vertex weights.
00442   for (SCOTCH_Num i = 0; i < partnbr; i++)
00443     goalsizes[i] = ceil(velosum * partsizes[i]);
00444   delete [] partsizes;
00445 
00446   SCOTCH_archCmpltw(&archdat, partnbr, goalsizes);
00447 
00448   // Call partitioning; result returned in partloctab.
00449   ierr = SCOTCH_dgraphMap(gr, &archdat, &stratstr, partloctab);
00450 
00451   env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphMap", 
00452     !ierr, BASIC_ASSERTION, problemComm);
00453 
00454   SCOTCH_archExit(&archdat);
00455   delete [] goalsizes;
00456 
00457   // TODO - metrics
00458 
00459 #ifdef SHOW_ZOLTAN2_SCOTCH_MEMORY
00460   int me = env->comm_->getRank();
00461 #endif
00462 
00463 #ifdef HAVE_SCOTCH_ZOLTAN2_GETMEMORYMAX
00464   if (me == 0){
00465     size_t scotchBytes = SCOTCH_getMemoryMax();
00466     std::cout << "Rank " << me << ": Maximum bytes used by Scotch: ";
00467     std::cout << scotchBytes << std::endl;
00468   }
00469 #endif
00470 
00471   // Clean up PTScotch
00472   SCOTCH_dgraphExit(gr);
00473   free(gr);
00474   SCOTCH_stratExit(&stratstr);
00475 
00476   // Load answer into the solution.
00477 
00478   if ((sizeof(SCOTCH_Num) != sizeof(part_t)) || (nVtx == 0)) {
00479     for (size_t i = 0; i < nVtx; i++) partList[i] = partloctab[i];
00480     delete [] partloctab;
00481   }
00482 
00483   ArrayRCP<const gno_t> gnos = arcpFromArrayView(vtxID);
00484 
00485   solution->setParts(gnos, partList, true);
00486 
00487   env->memory("Zoltan2-Scotch: After creating solution");
00488 
00489   // Clean up copies made due to differing data sizes.
00490   SCOTCH_Num_Traits<lno_t>::DELETE_SCOTCH_NUM_ARRAY(&vertloctab);
00491   SCOTCH_Num_Traits<gno_t>::DELETE_SCOTCH_NUM_ARRAY(&edgeloctab);
00492 
00493   if (nVwgts) delete [] velotab;
00494   if (nEwgts) delete [] edlotab;
00495 
00496 #else // DO NOT HAVE_MPI
00497 
00498   // TODO:  Handle serial case with calls to Scotch.
00499   // TODO:  For now, assign everything to rank 0 and assume only one part.
00500   // TODO:  Can probably use the code above for loading solution,
00501   // TODO:  instead of duplicating it here.
00502   // TODO
00503   // TODO:  Actual logic should call Scotch when number of processes == 1.
00504   ArrayView<const gno_t> vtxID;
00505   ArrayView<StridedData<lno_t, scalar_t> > xyz;
00506   ArrayView<StridedData<lno_t, scalar_t> > vwgts;
00507   size_t nVtx = model->getVertexList(vtxID, xyz, vwgts);
00508 
00509   ArrayRCP<part_t> partList(new part_t[nVtx], 0, nVtx, true);
00510   for (size_t i = 0; i < nVtx; i++) partList[i] = 0;
00511 
00512   ArrayRCP<const gno_t> gnos = arcpFromArrayView(vtxID);
00513   solution->setParts(gnos, partList, true);
00514 
00515 #endif // DO NOT HAVE_MPI
00516 }
00517 
00518 }
00519 #endif // HAVE_ZOLTAN2_SCOTCH
00520 #endif