Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //           Amesos2: Templated Direct Sparse Solver Package
00006 //                  Copyright 2011 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ***********************************************************************
00041 //
00042 // @HEADER
00043 
00052 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP
00053 #define AMESOS2_SUPERLUDIST_DEF_HPP
00054 
00055 #include <Teuchos_Tuple.hpp>
00056 #include <Teuchos_StandardParameterEntryValidators.hpp>
00057 #include <Teuchos_DefaultMpiComm.hpp>
00058 
00059 #include "Amesos2_SolverCore_def.hpp"
00060 #include "Amesos2_Superludist_TypeMap.hpp"
00061 #include "Amesos2_Util.hpp"
00062 
00063 
00064 namespace Amesos2 {
00065 
00066 
00067   template <class Matrix, class Vector>
00068   Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
00069                                           Teuchos::RCP<Vector> X,
00070                                           Teuchos::RCP<const Vector> B)
00071     : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
00072     , nzvals_()                 // initialization to empty arrays
00073     , colind_()
00074     , rowptr_()
00075     , bvals_()
00076     , xvals_()
00077     , in_grid_(false)
00078   {
00080     // Set up the SuperLU_DIST processor grid //
00082 
00083     int nprocs = this->getComm()->getSize();
00084     SLUD::int_t nprow, npcol;
00085     get_default_grid_size(nprocs, nprow, npcol);
00086     data_.mat_comm = dynamic_cast<const Teuchos::MpiComm<int>* >(this->matrixA_->getComm().getRawPtr())->getRawMpiComm()->operator()();
00087     SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
00088 
00090     // Set Some default parameters.                       //
00091     //                                                    //
00092     // Must do this after grid has been created in        //
00093     // case user specifies the nprow and npcol parameters //
00095     Teuchos::RCP<Teuchos::ParameterList> default_params
00096       = Teuchos::parameterList( *(this->getValidParameters()) );
00097     this->setParameters(default_params);
00098 
00099     // Set some internal options
00100     data_.options.Fact = SLUD::DOFACT;
00101     data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
00102     data_.options.SolveInitialized  = SLUD::NO;
00103     data_.options.RefineInitialized = SLUD::NO;
00104     data_.rowequ = false;
00105     data_.colequ = false;
00106     data_.perm_r.resize(this->globalNumRows_);
00107     data_.perm_c.resize(this->globalNumCols_);
00108 
00110     // Set up a communicator for the parallel column ordering and //
00111     // parallel symbolic factorization.                           //
00113     data_.symb_comm = MPI_COMM_NULL;
00114     int color = MPI_UNDEFINED;
00115     int my_rank = this->rank_;
00116 
00117     /* domains is the next power of 2 less than nprow*npcol.  This
00118      * value will be used for creating an MPI communicator for the
00119      * pre-ordering and symbolic factorization methods.
00120      */
00121     data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
00122 
00123     if( this->rank_ < data_.domains ) color = 0;
00124     MPI_Comm_split (data_.mat_comm, color, my_rank, &(data_.symb_comm));
00125 
00127     // Set up a row map that maps to only processors that are in the    //
00128     // SuperLU processor grid.  This will be used for redistributing A. //
00130 
00131     int my_weight = 0;
00132     if( this->rank_ < nprow * npcol ){
00133       in_grid_ = true; my_weight = 1; // I am in the grid, and I get some of the matrix rows
00134     }
00135     // TODO: might only need to initialize if parallel symbolic factorization is requested.
00136     // TODO: Need to fix this map for indexbase ?
00137     superlu_rowmap_
00138       = Tpetra::createWeightedContigMapWithNode<local_ordinal_type,
00139       global_ordinal_type,
00140       node_type>(my_weight,
00141                  this->globalNumRows_,
00142                  this->getComm(),
00143                  KokkosClassic::DefaultNode::getDefaultNode());
00144     // TODO: the node above should technically come from the matrix
00145     // itself.  Might need to add a getNode method to the matrix
00146     // adapter.
00147 
00149     // Do some other initialization //
00151 
00152     data_.A.Store = NULL;
00153     function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
00154     SLUD::PStatInit(&(data_.stat));
00155     // We do not use ScalePermstructInit because we will use our own
00156     // arrays for storing perm_r and perm_c
00157     data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
00158     data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
00159   }
00160 
00161 
00162   template <class Matrix, class Vector>
00163   Superludist<Matrix,Vector>::~Superludist( )
00164   {
00165     /* Free SuperLU_DIST data_types
00166      * - Matrices
00167      * - Vectors
00168      * - Stat object
00169      * - ScalePerm, LUstruct, grid, and solve objects
00170      *
00171      * Note: the function definitions are the same regardless whether
00172      * complex or real, so we arbitrarily use the D namespace
00173      */
00174     if ( this->status_.getNumPreOrder() > 0 ){
00175       free( data_.sizes );
00176       free( data_.fstVtxSep );
00177     }
00178 
00179     // Cleanup old matrix store memory if it's non-NULL.  Our
00180     // Teuchos::Array's will destroy rowind, colptr, and nzval for us
00181     if( data_.A.Store != NULL ){
00182       SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
00183     }
00184 
00185     // LU data is initialized in numericFactorization_impl()
00186     if ( this->status_.getNumNumericFact() > 0 ){
00187       function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
00188     }
00189     function_map::LUstructFree(&(data_.lu));
00190 
00191     // If a symbolic factorization is ever performed without a
00192     // follow-up numericfactorization, there are some arrays in the
00193     // Pslu_freeable struct which will never be free'd by
00194     // SuperLU_DIST.
00195     if ( this->status_.symbolicFactorizationDone() &&
00196          !this->status_.numericFactorizationDone() ){
00197       if ( data_.pslu_freeable.xlsub != NULL ){
00198         free( data_.pslu_freeable.xlsub );
00199         free( data_.pslu_freeable.lsub );
00200       }
00201       if ( data_.pslu_freeable.xusub != NULL ){
00202         free( data_.pslu_freeable.xusub );
00203         free( data_.pslu_freeable.usub );
00204       }
00205       if ( data_.pslu_freeable.supno_loc != NULL ){
00206         free( data_.pslu_freeable.supno_loc );
00207         free( data_.pslu_freeable.xsup_beg_loc );
00208         free( data_.pslu_freeable.xsup_end_loc );
00209       }
00210       free( data_.pslu_freeable.globToLoc );
00211     }
00212 
00213     SLUD::PStatFree( &(data_.stat) ) ;
00214 
00215     // Teuchos::Arrays will free R, C, perm_r, and perm_c
00216     // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
00217 
00218     if ( data_.options.SolveInitialized == SLUD::YES )
00219       function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
00220 
00221     SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
00222                                            // cases where grid
00223                                            // wouldn't be initialized?
00224 
00225     if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
00226   }
00227 
00228   template<class Matrix, class Vector>
00229   int
00230   Superludist<Matrix,Vector>::preOrdering_impl()
00231   {
00232     // We will always use the NATURAL row ordering to avoid the
00233     // sequential bottleneck present when doing any other row
00234     // ordering scheme from SuperLU_DIST
00235     //
00236     // Set perm_r to be the natural ordering
00237     SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
00238     for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
00239 
00240     // loadA_impl();                    // Refresh matrix values
00241 
00242     if( in_grid_ ){
00243       // If this function has been called at least once, then the
00244       // sizes, and fstVtxSep arrays were allocated in
00245       // get_perm_c_parmetis.  Delete them before calling that
00246       // function again.  These arrays will also be dealloc'd in the
00247       // deconstructor.
00248       if( this->status_.getNumPreOrder() > 0 ){
00249         free( data_.sizes );
00250         free( data_.fstVtxSep );
00251       }
00252 #ifdef HAVE_AMESOS2_TIMERS
00253       Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
00254 #endif
00255 
00256       float info = 0.0;
00257       info = SLUD::get_perm_c_parmetis( &(data_.A),
00258                                         data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
00259                                         data_.grid.nprow * data_.grid.npcol, data_.domains,
00260                                         &(data_.sizes), &(data_.fstVtxSep),
00261                                         &(data_.grid), &(data_.symb_comm) );
00262 
00263       TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
00264                           std::runtime_error,
00265                           "SuperLU_DIST pre-ordering ran out of memory after allocating "
00266                           << info << " bytes of memory" );
00267     }
00268 
00269     // Ordering will be applied directly before numeric factorization,
00270     // after we have a chance to get updated coefficients from the
00271     // matrix
00272 
00273     return EXIT_SUCCESS;
00274   }
00275 
00276 
00277 
00278   template <class Matrix, class Vector>
00279   int
00280   Superludist<Matrix,Vector>::symbolicFactorization_impl()
00281   {
00282     // loadA_impl();                    // Refresh matrix values
00283 
00284     if( in_grid_ ){
00285 
00286 #ifdef HAVE_AMESOS2_TIMERS
00287       Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
00288 #endif
00289 
00290       float info = 0.0;
00291       info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
00292                                  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
00293                                  data_.perm_r.getRawPtr(), data_.sizes,
00294                                  data_.fstVtxSep, &(data_.pslu_freeable),
00295                                  &(data_.grid.comm), &(data_.symb_comm),
00296                                  &(data_.mem_usage));
00297 
00298       TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
00299                           std::runtime_error,
00300                           "SuperLU_DIST symbolic factorization ran out of memory after"
00301                           " allocating " << info << " bytes of memory" );
00302     }
00303     same_symbolic_ = false;
00304     same_solve_struct_ = false;
00305 
00306     return EXIT_SUCCESS;
00307   }
00308 
00309 
00310   template <class Matrix, class Vector>
00311   int
00312   Superludist<Matrix,Vector>::numericFactorization_impl(){
00313     using Teuchos::as;
00314 
00315     // loadA_impl();                    // Refresh the matrix values
00316 
00317     // if( data_.options.Equil == SLUD::YES ){
00318     //   // Apply the scalings computed in preOrdering
00319     //   function_map::laqgs(&(data_.A), data_.R.getRawPtr(),
00320     //                    data_.C.getRawPtr(), data_.rowcnd, data_.colcnd,
00321     //                    data_.amax, &(data_.equed));
00322 
00323     //   data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
00324     //   data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
00325     // }
00326 
00327     if( in_grid_ ){
00328       // Apply the column ordering, so that AC is the column-permuted A, and compute etree
00329       size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
00330       for( size_t i = 0; i < nnz_loc; ++i ) colind_[i] = data_.perm_c[colind_[i]];
00331 
00332       // Distribute data from the symbolic factorization
00333       if( same_symbolic_ ){
00334         // Note: with the SamePattern_SameRowPerm options, it does not
00335         // matter that the glu_freeable member has never been
00336         // initialized, because it is never accessed.  It is a
00337         // placeholder arg.  The real work is done in data_.lu
00338         function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
00339                                   as<SLUD::int_t>(this->globalNumRows_), // aka "n"
00340                                   &(data_.A), &(data_.scale_perm),
00341                                   &(data_.glu_freeable), &(data_.lu),
00342                                   &(data_.grid));
00343       } else {
00344         function_map::dist_psymbtonum(SLUD::DOFACT,
00345                                       as<SLUD::int_t>(this->globalNumRows_), // aka "n"
00346                                       &(data_.A), &(data_.scale_perm),
00347                                       &(data_.pslu_freeable), &(data_.lu),
00348                                       &(data_.grid));
00349       }
00350 
00351       // Retrieve the normI of A (required by gstrf).
00352       double anorm = function_map::plangs((char *)"I", &(data_.A), &(data_.grid));
00353 
00354       int info = 0;
00355       {
00356 #ifdef HAVE_AMESOS2_TIMERS
00357         Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
00358 #endif
00359 
00360         function_map::gstrf(&(data_.options), this->globalNumRows_,
00361                             this->globalNumCols_, anorm, &(data_.lu),
00362                             &(data_.grid), &(data_.stat), &info);
00363       }
00364 
00365       // Check output
00366       TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
00367                           std::runtime_error,
00368                           "L and U factors have been computed but U("
00369                           << info << "," << info << ") is exactly zero "
00370                           "(i.e. U is singular)");
00371     }
00372 
00373     // The other option, that info_st < 0, denotes invalid parameters
00374     // to the function, but we'll assume for now that that won't
00375     // happen.
00376 
00377     data_.options.Fact = SLUD::FACTORED;
00378     same_symbolic_ = true;
00379 
00380     return EXIT_SUCCESS;
00381   }
00382 
00383 
00384   template <class Matrix, class Vector>
00385   int
00386   Superludist<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> >       X,
00387                                          const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
00388   {
00389     using Teuchos::as;
00390 
00391     // local_len_rhs is how many of the multivector rows belong to
00392     // this processor in the SuperLU_DIST processor grid.
00393     const size_t local_len_rhs = superlu_rowmap_->getNodeNumElements();
00394     const global_size_type nrhs = X->getGlobalNumVectors();
00395     const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
00396 
00397     // make sure our multivector storage is sized appropriately
00398     bvals_.resize(nrhs * local_len_rhs);
00399     xvals_.resize(nrhs * local_len_rhs);
00400 
00401     // We assume the global length of the two vectors have already been
00402     // checked for compatibility
00403 
00404     {                           // get the values from B
00405 #ifdef HAVE_AMESOS2_TIMERS
00406       Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
00407 #endif
00408 
00409       {
00410         // The input dense matrix for B should be distributed in the
00411         // same manner as the superlu_dist matrix.  That is, if a
00412         // processor has m_loc rows of A, then it should also have
00413         // m_loc rows of B (and the same rows).  We accomplish this by
00414         // distributing the multivector rows with the same Map that
00415         // the matrix A's rows are distributed.
00416 #ifdef HAVE_AMESOS2_TIMERS
00417         Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
00418 #endif
00419 
00420         // get grid-distributed mv data.  The multivector data will be
00421         // distributed across the processes in the SuperLU_DIST grid.
00422         typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
00423         copy_helper::do_get(B,
00424                             bvals_(),
00425                             local_len_rhs,
00426                             Teuchos::ptrInArg(*superlu_rowmap_));
00427       }
00428     }         // end block for conversion time
00429 
00430     if( in_grid_ ){
00431       // if( data_.options.trans == SLUD::NOTRANS ){
00432       //   if( data_.rowequ ){            // row equilibration has been done on AC
00433       //  // scale bxvals_ by diag(R)
00434       //  Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
00435       //              SLUD::slu_mt_mult<slu_type,magnitude_type>());
00436       //   }
00437       // } else if( data_.colequ ){       // column equilibration has been done on AC
00438       //   // scale bxvals_ by diag(C)
00439       //   Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
00440       //            SLUD::slu_mt_mult<slu_type,magnitude_type>());
00441       // }
00442 
00443       // Initialize the SOLVEstruct_t.
00444       //
00445       // We are able to reuse the solve struct if we have not changed
00446       // the sparsity pattern of L and U since the last solve
00447       if( !same_solve_struct_ ){
00448         if( data_.options.SolveInitialized == SLUD::YES ){
00449           function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
00450         }
00451         function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
00452                                 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
00453                                 &(data_.grid), &(data_.solve_struct));
00454         // Flag that we can reuse this solve_struct unless another
00455         // symbolicFactorization is called between here and the next
00456         // solve.
00457         same_solve_struct_ = true;
00458       }
00459 
00460       int ierr = 0; // returned error code
00461       {
00462 #ifdef HAVE_AMESOS2_TIMERS
00463         Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
00464 #endif
00465 
00466         function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
00467                             &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
00468                             as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
00469                             as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
00470                             &(data_.solve_struct), &(data_.stat), &ierr);
00471       } // end block for solve time
00472 
00473       TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
00474                           std::runtime_error,
00475                           "Argument " << -ierr << " to gstrs had an illegal value" );
00476 
00477       // "Un-scale" the solution so that it is a solution of the original system
00478       // if( data_.options.trans == SLUD::NOTRANS ){
00479       //   if( data_.colequ ){    // column equilibration has been done on AC
00480       //  // scale bxvals_ by diag(C)
00481       //  Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
00482       //              SLUD::slu_mt_mult<slu_type,magnitude_type>());
00483       //   }
00484       // } else if( data_.rowequ ){               // row equilibration has been done on AC
00485       //   // scale bxvals_ by diag(R)
00486       //   Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
00487       //            SLUD::slu_mt_mult<slu_type,magnitude_type>());
00488       // }
00489       {                         // permute B to a solution of the original system
00490 #ifdef HAVE_AMESOS2_TIMERS
00491         Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
00492 #endif
00493         SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
00494         function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
00495                                            as<SLUD::int_t>(local_len_rhs),
00496                                            data_.solve_struct.row_to_proc,
00497                                            data_.solve_struct.inv_perm_c,
00498                                            bvals_.getRawPtr(), ld,
00499                                            xvals_.getRawPtr(), ld,
00500                                            as<int>(nrhs),
00501                                            &(data_.grid));
00502       }
00503     }
00504 
00505     /* Update X's global values */
00506     {
00507 #ifdef HAVE_AMESOS2_TIMERS
00508       Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
00509 #endif
00510 
00511       typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
00512       put_helper::do_put(X,
00513                          xvals_(),
00514                          local_len_rhs,
00515                          Teuchos::ptrInArg(*superlu_rowmap_));
00516     }
00517 
00518     return EXIT_SUCCESS;
00519   }
00520 
00521 
00522   template <class Matrix, class Vector>
00523   bool
00524   Superludist<Matrix,Vector>::matrixShapeOK_impl() const
00525   {
00526     // SuperLU_DIST requires square matrices
00527     return( this->globalNumRows_ == this->globalNumCols_ );
00528   }
00529 
00530 
00531   template <class Matrix, class Vector>
00532   void
00533   Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
00534   {
00535     using Teuchos::as;
00536     using Teuchos::RCP;
00537     using Teuchos::getIntegralValue;
00538     using Teuchos::ParameterEntryValidator;
00539 
00540     RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
00541 
00542     if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
00543       TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
00544                             parameterList->isParameter("npcol")),
00545                           std::invalid_argument,
00546                           "nprow and npcol must be set together" );
00547 
00548       SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
00549       SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
00550 
00551       TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
00552                           std::invalid_argument,
00553                           "nprow and npcol combination invalid" );
00554 
00555       if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
00556         // De-allocate the default grid that was initialized in the constructor
00557         SLUD::superlu_gridexit(&(data_.grid));
00558         // Create a new grid
00559         SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
00560       } // else our grid has not changed size since the last initialization
00561     }
00562 
00563     TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
00564                         std::invalid_argument,
00565                         "SuperLU_DIST does not support solving the tranpose system" );
00566 
00567     data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
00568 
00569     // TODO: Uncomment when supported
00570     // bool equil = parameterList->get<bool>("Equil", true);
00571     // data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
00572     data_.options.Equil = SLUD::NO;
00573 
00574     if( parameterList->isParameter("ColPerm") ){
00575       RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
00576       parameterList->getEntry("ColPerm").setValidator(colperm_validator);
00577 
00578       data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
00579     }
00580 
00581     // Always use the "NOROWPERM" option to avoid a serial bottleneck
00582     // with the weighted bipartite matching algorithm used for the
00583     // "LargeDiag" RowPerm.  Note the inconsistency with the SuperLU
00584     // User guide (which states that the value should be "NATURAL").
00585     data_.options.RowPerm = SLUD::NOROWPERM;
00586 
00587     // TODO: Uncomment when supported
00588     // if( parameterList->isParameter("IterRefine") ){
00589     //   RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
00590     //   parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
00591 
00592     //   data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
00593     // }
00594     data_.options.IterRefine = SLUD::NOREFINE;
00595 
00596     bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
00597     data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
00598   }
00599 
00600 
00601   template <class Matrix, class Vector>
00602   Teuchos::RCP<const Teuchos::ParameterList>
00603   Superludist<Matrix,Vector>::getValidParameters_impl() const
00604   {
00605     using std::string;
00606     using Teuchos::tuple;
00607     using Teuchos::ParameterList;
00608     using Teuchos::EnhancedNumberValidator;
00609     using Teuchos::setStringToIntegralParameter;
00610     using Teuchos::stringToIntegralParameterEntryValidator;
00611 
00612     static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
00613 
00614     if( is_null(valid_params) ){
00615       Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
00616 
00617       Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
00618         = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
00619       col_row_validator->setMin(1);
00620 
00621       pl->set("npcol", data_.grid.npcol,
00622               "Number of columns in the processor grid. "
00623               "Must be set with nprow", col_row_validator);
00624       pl->set("nprow", data_.grid.nprow,
00625               "Number of rows in the SuperLU_DIST processor grid. "
00626               "Must be set together with npcol", col_row_validator);
00627 
00628       // validator will catch any value besides NOTRANS
00629       setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
00630                                                   "Solve for the transpose system or not",
00631                                                   tuple<string>("NOTRANS"),
00632                                                   tuple<string>("Do not solve with transpose"),
00633                                                   tuple<SLUD::trans_t>(SLUD::NOTRANS),
00634                                                   pl.getRawPtr());
00635 
00636       // TODO: uncomment when supported
00637       // pl->set("Equil", false, "Whether to equilibrate the system before solve");
00638 
00639       // TODO: uncomment when supported
00640       // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
00641       //                                                     "Type of iterative refinement to use",
00642       //                                                     tuple<string>("NOREFINE", "DOUBLE"),
00643       //                                                     tuple<string>("Do not use iterative refinement",
00644       //                                                                   "Do double iterative refinement"),
00645       //                                                     tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
00646       //                                                                               SLUD::DOUBLE),
00647       //                                                     pl.getRawPtr());
00648 
00649       pl->set("ReplaceTinyPivot", true,
00650               "Specifies whether to replace tiny diagonals during LU factorization");
00651 
00652       setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
00653                                                     "Specifies how to permute the columns of the "
00654                                                     "matrix for sparsity preservation",
00655                                                     tuple<string>("NATURAL", "PARMETIS"),
00656                                                     tuple<string>("Natural ordering",
00657                                                                   "ParMETIS ordering on A^T + A"),
00658                                                     tuple<SLUD::colperm_t>(SLUD::NATURAL,
00659                                                                            SLUD::PARMETIS),
00660                                                     pl.getRawPtr());
00661 
00662       valid_params = pl;
00663     }
00664 
00665     return valid_params;
00666   }
00667 
00668 
00669   template <class Matrix, class Vector>
00670   void
00671   Superludist<Matrix,Vector>::get_default_grid_size(int nprocs,
00672                                                     SLUD::int_t& nprow,
00673                                                     SLUD::int_t& npcol) const {
00674     TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
00675                         std::invalid_argument,
00676                         "Number of MPI processes must be at least 1" );
00677     SLUD::int_t c, r = 1;
00678     while( r*r <= nprocs ) r++;
00679     nprow = npcol = --r;                // fall back to square grid
00680     c = nprocs / r;
00681     while( (r--)*c != nprocs ){
00682       c = nprocs / r;           // note integer division
00683     }
00684     ++r;
00685     // prefer the square grid over a single row (which will only happen
00686     // in the case of a prime nprocs
00687     if( r > 1 || nprocs < 9){   // nprocs < 9 is a heuristic for the small cases
00688       nprow = r;
00689       npcol = c;
00690     }
00691   }
00692 
00693 
00694   template <class Matrix, class Vector>
00695   bool
00696   Superludist<Matrix,Vector>::loadA_impl(EPhase current_phase){
00697     // Extract the necessary information from mat and call SLU function
00698     using Teuchos::Array;
00699     using Teuchos::ArrayView;
00700     using Teuchos::ptrInArg;
00701     using Teuchos::as;
00702 
00703     using SLUD::int_t;
00704 
00705 #ifdef HAVE_AMESOS2_TIMERS
00706     Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
00707 #endif
00708 
00709     // Cleanup old store memory if it's non-NULL
00710     if( data_.A.Store != NULL ){
00711       SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
00712       data_.A.Store = NULL;
00713     }
00714 
00715     Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
00716       = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
00717 
00718     int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
00719     l_nnz  = as<int_t>(redist_mat->getLocalNNZ());
00720     l_rows = as<int_t>(redist_mat->getLocalNumRows());
00721     g_rows = as<int_t>(redist_mat->getGlobalNumRows());
00722     g_cols = g_rows;            // we deal with square matrices
00723     fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
00724 
00725     nzvals_.resize(l_nnz);
00726     colind_.resize(l_nnz);
00727     rowptr_.resize(l_rows + 1);
00728 
00729     int_t nnz_ret = 0;
00730     {
00731 #ifdef HAVE_AMESOS2_TIMERS
00732       Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
00733 #endif
00734 
00735       Util::get_crs_helper<
00736       MatrixAdapter<Matrix>,
00737         slu_type, int_t, int_t >::do_get(redist_mat.ptr(),
00738                                          nzvals_(), colind_(), rowptr_(),
00739                                          nnz_ret,
00740                                          ptrInArg(*superlu_rowmap_),
00741                                          ARBITRARY);
00742   }
00743 
00744     TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
00745                         std::runtime_error,
00746                         "Did not get the expected number of non-zero vals");
00747 
00748   // Get the SLU data type for this type of matrix
00749   SLUD::Dtype_t dtype = type_map::dtype;
00750 
00751   if( in_grid_ ){
00752     function_map::create_CompRowLoc_Matrix(&(data_.A),
00753                                            g_rows, g_cols,
00754                                            l_nnz, l_rows, fst_global_row,
00755                                            nzvals_.getRawPtr(),
00756                                            colind_.getRawPtr(),
00757                                            rowptr_.getRawPtr(),
00758                                            SLUD::SLU_NR_loc,
00759                                            dtype, SLUD::SLU_GE);
00760   }
00761 
00762   return true;
00763 }
00764 
00765 
00766   template<class Matrix, class Vector>
00767   const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
00768 
00769 
00770 } // end namespace Amesos2
00771 
00772 #endif  // AMESOS2_SUPERLUDIST_DEF_HPP