Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //           Amesos2: Templated Direct Sparse Solver Package
00006 //                  Copyright 2011 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ***********************************************************************
00041 //
00042 // @HEADER
00043 
00052 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP
00053 #define AMESOS2_SUPERLUDIST_DEF_HPP
00054 
00055 #include <Teuchos_Tuple.hpp>
00056 #include <Teuchos_StandardParameterEntryValidators.hpp>
00057 #include <Teuchos_DefaultMpiComm.hpp>
00058 
00059 #include "Amesos2_SolverCore_def.hpp"
00060 #include "Amesos2_Superludist_TypeMap.hpp"
00061 #include "Amesos2_Util.hpp"
00062 
00063 
00064 namespace Amesos2 {
00065 
00066 
00067   template <class Matrix, class Vector>
00068   Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
00069                                           Teuchos::RCP<Vector> X,
00070                                           Teuchos::RCP<const Vector> B)
00071     : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
00072     , nzvals_()                 // initialization to empty arrays
00073     , colind_()
00074     , rowptr_()
00075     , bvals_()
00076     , xvals_()
00077     , in_grid_(false)
00078   {
00080     // Set up the SuperLU_DIST processor grid //
00082 
00083     int nprocs = this->getComm()->getSize();
00084     SLUD::int_t nprow, npcol;
00085     get_default_grid_size(nprocs, nprow, npcol);
00086     data_.mat_comm = dynamic_cast<const Teuchos::MpiComm<int>* >(this->matrixA_->getComm().getRawPtr())->getRawMpiComm()->operator()();
00087     SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
00088 
00090     // Set Some default parameters.                       //
00091     //                                                    //
00092     // Must do this after grid has been created in        //
00093     // case user specifies the nprow and npcol parameters //
00095     Teuchos::RCP<Teuchos::ParameterList> default_params
00096       = Teuchos::parameterList( *(this->getValidParameters()) );
00097     this->setParameters(default_params);
00098 
00099     // Set some internal options
00100     data_.options.Fact = SLUD::DOFACT;
00101     data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
00102     data_.options.SolveInitialized  = SLUD::NO;
00103     data_.options.RefineInitialized = SLUD::NO;
00104     data_.rowequ = false;
00105     data_.colequ = false;
00106     data_.perm_r.resize(this->globalNumRows_);
00107     data_.perm_c.resize(this->globalNumCols_);
00108 
00110     // Set up a communicator for the parallel column ordering and //
00111     // parallel symbolic factorization.                           //
00113     data_.symb_comm = MPI_COMM_NULL;
00114     int color = MPI_UNDEFINED;
00115     int my_rank = this->rank_;
00116 
00117     /* domains is the next power of 2 less than nprow*npcol.  This
00118      * value will be used for creating an MPI communicator for the
00119      * pre-ordering and symbolic factorization methods.
00120      */
00121     data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
00122 
00123     if( this->rank_ < data_.domains ) color = 0;
00124     MPI_Comm_split (data_.mat_comm, color, my_rank, &(data_.symb_comm));
00125 
00127     // Set up a row map that maps to only processors that are in the    //
00128     // SuperLU processor grid.  This will be used for redistributing A. //
00130 
00131     int my_weight = 0;
00132     if( this->rank_ < nprow * npcol ){
00133       in_grid_ = true; my_weight = 1; // I am in the grid, and I get some of the matrix rows
00134     }
00135     // TODO: might only need to initialize if parallel symbolic factorization is requested.
00136     superlu_rowmap_
00137       = Tpetra::createWeightedContigMapWithNode<local_ordinal_type,
00138       global_ordinal_type,
00139       node_type>(my_weight,
00140                  this->globalNumRows_,
00141                  this->getComm(),
00142                  Kokkos::DefaultNode::getDefaultNode());
00143     // TODO: the node above should technically come from the matrix
00144     // itself.  Might need to add a getNode method to the matrix
00145     // adapter.
00146 
00148     // Do some other initialization //
00150 
00151     data_.A.Store = NULL;
00152     function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
00153     SLUD::PStatInit(&(data_.stat));
00154     // We do not use ScalePermstructInit because we will use our own
00155     // arrays for storing perm_r and perm_c
00156     data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
00157     data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
00158   }
00159 
00160 
00161   template <class Matrix, class Vector>
00162   Superludist<Matrix,Vector>::~Superludist( )
00163   {
00164     /* Free SuperLU_DIST data_types
00165      * - Matrices
00166      * - Vectors
00167      * - Stat object
00168      * - ScalePerm, LUstruct, grid, and solve objects
00169      *
00170      * Note: the function definitions are the same regardless whether
00171      * complex or real, so we arbitrarily use the D namespace
00172      */
00173     if ( this->status_.getNumPreOrder() > 0 ){
00174       free( data_.sizes );
00175       free( data_.fstVtxSep );
00176     }
00177 
00178     // Cleanup old matrix store memory if it's non-NULL.  Our
00179     // Teuchos::Array's will destroy rowind, colptr, and nzval for us
00180     if( data_.A.Store != NULL ){
00181       SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
00182     }
00183 
00184     // LU data is initialized in numericFactorization_impl()
00185     if ( this->status_.getNumNumericFact() > 0 ){
00186       function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
00187     }
00188     function_map::LUstructFree(&(data_.lu));
00189 
00190     // If a symbolic factorization is ever performed without a
00191     // follow-up numericfactorization, there are some arrays in the
00192     // Pslu_freeable struct which will never be free'd by
00193     // SuperLU_DIST.
00194     if ( this->status_.symbolicFactorizationDone() &&
00195          !this->status_.numericFactorizationDone() ){
00196       if ( data_.pslu_freeable.xlsub != NULL ){
00197         free( data_.pslu_freeable.xlsub );
00198         free( data_.pslu_freeable.lsub );
00199       }
00200       if ( data_.pslu_freeable.xusub != NULL ){
00201         free( data_.pslu_freeable.xusub );
00202         free( data_.pslu_freeable.usub );
00203       }
00204       if ( data_.pslu_freeable.supno_loc != NULL ){
00205         free( data_.pslu_freeable.supno_loc );
00206         free( data_.pslu_freeable.xsup_beg_loc );
00207         free( data_.pslu_freeable.xsup_end_loc );
00208       }
00209       free( data_.pslu_freeable.globToLoc );
00210     }
00211 
00212     SLUD::PStatFree( &(data_.stat) ) ;
00213 
00214     // Teuchos::Arrays will free R, C, perm_r, and perm_c
00215     // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
00216 
00217     if ( data_.options.SolveInitialized == SLUD::YES )
00218       function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
00219 
00220     SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
00221                                            // cases where grid
00222                                            // wouldn't be initialized?
00223 
00224     if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
00225   }
00226 
00227   template<class Matrix, class Vector>
00228   int
00229   Superludist<Matrix,Vector>::preOrdering_impl()
00230   {
00231     // We will always use the NATURAL row ordering to avoid the
00232     // sequential bottleneck present when doing any other row
00233     // ordering scheme from SuperLU_DIST
00234     //
00235     // Set perm_r to be the natural ordering
00236     SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
00237     for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
00238 
00239     // loadA_impl();                    // Refresh matrix values
00240 
00241     if( in_grid_ ){
00242       // If this function has been called at least once, then the
00243       // sizes, and fstVtxSep arrays were allocated in
00244       // get_perm_c_parmetis.  Delete them before calling that
00245       // function again.  These arrays will also be dealloc'd in the
00246       // deconstructor.
00247       if( this->status_.getNumPreOrder() > 0 ){
00248         free( data_.sizes );
00249         free( data_.fstVtxSep );
00250       }
00251 #ifdef HAVE_AMESOS2_TIMERS
00252       Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
00253 #endif
00254 
00255       float info = 0.0;
00256       info = SLUD::get_perm_c_parmetis( &(data_.A),
00257                                         data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
00258                                         data_.grid.nprow * data_.grid.npcol, data_.domains,
00259                                         &(data_.sizes), &(data_.fstVtxSep),
00260                                         &(data_.grid), &(data_.symb_comm) );
00261 
00262       TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
00263                           std::runtime_error,
00264                           "SuperLU_DIST pre-ordering ran out of memory after allocating "
00265                           << info << " bytes of memory" );
00266     }
00267 
00268     // Ordering will be applied directly before numeric factorization,
00269     // after we have a chance to get updated coefficients from the
00270     // matrix
00271 
00272     return EXIT_SUCCESS;
00273   }
00274 
00275 
00276 
00277   template <class Matrix, class Vector>
00278   int
00279   Superludist<Matrix,Vector>::symbolicFactorization_impl()
00280   {
00281     // loadA_impl();                    // Refresh matrix values
00282 
00283     if( in_grid_ ){
00284 
00285 #ifdef HAVE_AMESOS2_TIMERS
00286       Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
00287 #endif
00288 
00289       float info = 0.0;
00290       info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
00291                                  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
00292                                  data_.perm_r.getRawPtr(), data_.sizes,
00293                                  data_.fstVtxSep, &(data_.pslu_freeable),
00294                                  &(data_.grid.comm), &(data_.symb_comm),
00295                                  &(data_.mem_usage));
00296 
00297       TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
00298                           std::runtime_error,
00299                           "SuperLU_DIST symbolic factorization ran out of memory after"
00300                           " allocating " << info << " bytes of memory" );
00301     }
00302     same_symbolic_ = false;
00303     same_solve_struct_ = false;
00304 
00305     return EXIT_SUCCESS;
00306   }
00307 
00308 
00309   template <class Matrix, class Vector>
00310   int
00311   Superludist<Matrix,Vector>::numericFactorization_impl(){
00312     using Teuchos::as;
00313 
00314     // loadA_impl();                    // Refresh the matrix values
00315 
00316     // if( data_.options.Equil == SLUD::YES ){
00317     //   // Apply the scalings computed in preOrdering
00318     //   function_map::laqgs(&(data_.A), data_.R.getRawPtr(),
00319     //                    data_.C.getRawPtr(), data_.rowcnd, data_.colcnd,
00320     //                    data_.amax, &(data_.equed));
00321 
00322     //   data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
00323     //   data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
00324     // }
00325 
00326     if( in_grid_ ){
00327       // Apply the column ordering, so that AC is the column-permuted A, and compute etree
00328       size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
00329       for( size_t i = 0; i < nnz_loc; ++i ) colind_[i] = data_.perm_c[colind_[i]];
00330 
00331       // Distribute data from the symbolic factorization
00332       if( same_symbolic_ ){
00333         // Note: with the SamePattern_SameRowPerm options, it does not
00334         // matter that the glu_freeable member has never been
00335         // initialized, because it is never accessed.  It is a
00336         // placeholder arg.  The real work is done in data_.lu
00337         function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
00338                                   as<SLUD::int_t>(this->globalNumRows_), // aka "n"
00339                                   &(data_.A), &(data_.scale_perm),
00340                                   &(data_.glu_freeable), &(data_.lu),
00341                                   &(data_.grid));
00342       } else {
00343         function_map::dist_psymbtonum(SLUD::DOFACT,
00344                                       as<SLUD::int_t>(this->globalNumRows_), // aka "n"
00345                                       &(data_.A), &(data_.scale_perm),
00346                                       &(data_.pslu_freeable), &(data_.lu),
00347                                       &(data_.grid));
00348       }
00349 
00350       // Retrieve the normI of A (required by gstrf).
00351       double anorm = function_map::plangs((char *)"I", &(data_.A), &(data_.grid));
00352 
00353       int info = 0;
00354       {
00355 #ifdef HAVE_AMESOS2_TIMERS
00356         Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
00357 #endif
00358 
00359         function_map::gstrf(&(data_.options), this->globalNumRows_,
00360                             this->globalNumCols_, anorm, &(data_.lu),
00361                             &(data_.grid), &(data_.stat), &info);
00362       }
00363 
00364       // Check output
00365       TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
00366                           std::runtime_error,
00367                           "L and U factors have been computed but U("
00368                           << info << "," << info << ") is exactly zero "
00369                           "(i.e. U is singular)");
00370     }
00371 
00372     // The other option, that info_st < 0, denotes invalid parameters
00373     // to the function, but we'll assume for now that that won't
00374     // happen.
00375 
00376     data_.options.Fact = SLUD::FACTORED;
00377     same_symbolic_ = true;
00378 
00379     return EXIT_SUCCESS;
00380   }
00381 
00382 
00383   template <class Matrix, class Vector>
00384   int
00385   Superludist<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> >       X,
00386                                          const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
00387   {
00388     using Teuchos::as;
00389 
00390     // local_len_rhs is how many of the multivector rows belong to
00391     // this processor in the SuperLU_DIST processor grid.
00392     const size_t local_len_rhs = superlu_rowmap_->getNodeNumElements();
00393     const global_size_type nrhs = X->getGlobalNumVectors();
00394     const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
00395 
00396     // make sure our multivector storage is sized appropriately
00397     bvals_.resize(nrhs * local_len_rhs);
00398     xvals_.resize(nrhs * local_len_rhs);
00399 
00400     // We assume the global length of the two vectors have already been
00401     // checked for compatibility
00402 
00403     {                           // get the values from B
00404 #ifdef HAVE_AMESOS2_TIMERS
00405       Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
00406 #endif
00407 
00408       {
00409         // The input dense matrix for B should be distributed in the
00410         // same manner as the superlu_dist matrix.  That is, if a
00411         // processor has m_loc rows of A, then it should also have
00412         // m_loc rows of B (and the same rows).  We accomplish this by
00413         // distributing the multivector rows with the same Map that
00414         // the matrix A's rows are distributed.
00415 #ifdef HAVE_AMESOS2_TIMERS
00416         Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
00417 #endif
00418 
00419         // get grid-distributed mv data.  The multivector data will be
00420         // distributed across the processes in the SuperLU_DIST grid.
00421         typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
00422         copy_helper::do_get(B,
00423                             bvals_(),
00424                             local_len_rhs,
00425                             Teuchos::ptrInArg(*superlu_rowmap_));
00426       }
00427     }         // end block for conversion time
00428 
00429     if( in_grid_ ){
00430       // if( data_.options.trans == SLUD::NOTRANS ){
00431       //   if( data_.rowequ ){            // row equilibration has been done on AC
00432       //  // scale bxvals_ by diag(R)
00433       //  Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
00434       //              SLUD::slu_mt_mult<slu_type,magnitude_type>());
00435       //   }
00436       // } else if( data_.colequ ){       // column equilibration has been done on AC
00437       //   // scale bxvals_ by diag(C)
00438       //   Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
00439       //            SLUD::slu_mt_mult<slu_type,magnitude_type>());
00440       // }
00441 
00442       // Initialize the SOLVEstruct_t.
00443       //
00444       // We are able to reuse the solve struct if we have not changed
00445       // the sparsity pattern of L and U since the last solve
00446       if( !same_solve_struct_ ){
00447         if( data_.options.SolveInitialized == SLUD::YES ){
00448           function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
00449         }
00450         function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
00451                                 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
00452                                 &(data_.grid), &(data_.solve_struct));
00453         // Flag that we can reuse this solve_struct unless another
00454         // symbolicFactorization is called between here and the next
00455         // solve.
00456         same_solve_struct_ = true;
00457       }
00458 
00459       int ierr = 0; // returned error code
00460       {
00461 #ifdef HAVE_AMESOS2_TIMERS
00462         Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
00463 #endif
00464 
00465         function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
00466                             &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
00467                             as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
00468                             as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
00469                             &(data_.solve_struct), &(data_.stat), &ierr);
00470       } // end block for solve time
00471 
00472       TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
00473                           std::runtime_error,
00474                           "Argument " << -ierr << " to gstrs had an illegal value" );
00475 
00476       // "Un-scale" the solution so that it is a solution of the original system
00477       // if( data_.options.trans == SLUD::NOTRANS ){
00478       //   if( data_.colequ ){    // column equilibration has been done on AC
00479       //  // scale bxvals_ by diag(C)
00480       //  Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
00481       //              SLUD::slu_mt_mult<slu_type,magnitude_type>());
00482       //   }
00483       // } else if( data_.rowequ ){               // row equilibration has been done on AC
00484       //   // scale bxvals_ by diag(R)
00485       //   Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
00486       //            SLUD::slu_mt_mult<slu_type,magnitude_type>());
00487       // }
00488       {                         // permute B to a solution of the original system
00489 #ifdef HAVE_AMESOS2_TIMERS
00490         Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
00491 #endif
00492         SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
00493         function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
00494                                            as<SLUD::int_t>(local_len_rhs),
00495                                            data_.solve_struct.row_to_proc,
00496                                            data_.solve_struct.inv_perm_c,
00497                                            bvals_.getRawPtr(), ld,
00498                                            xvals_.getRawPtr(), ld,
00499                                            as<int>(nrhs),
00500                                            &(data_.grid));
00501       }
00502     }
00503 
00504     /* Update X's global values */
00505     {
00506 #ifdef HAVE_AMESOS2_TIMERS
00507       Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
00508 #endif
00509 
00510       typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
00511       put_helper::do_put(X,
00512                          xvals_(),
00513                          local_len_rhs,
00514                          Teuchos::ptrInArg(*superlu_rowmap_));
00515     }
00516 
00517     return EXIT_SUCCESS;
00518   }
00519 
00520 
00521   template <class Matrix, class Vector>
00522   bool
00523   Superludist<Matrix,Vector>::matrixShapeOK_impl() const
00524   {
00525     // SuperLU_DIST requires square matrices
00526     return( this->globalNumRows_ == this->globalNumCols_ );
00527   }
00528 
00529 
00530   template <class Matrix, class Vector>
00531   void
00532   Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
00533   {
00534     using Teuchos::as;
00535     using Teuchos::RCP;
00536     using Teuchos::getIntegralValue;
00537     using Teuchos::ParameterEntryValidator;
00538 
00539     RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
00540 
00541     if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
00542       TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
00543                             parameterList->isParameter("npcol")),
00544                           std::invalid_argument,
00545                           "nprow and npcol must be set together" );
00546 
00547       SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
00548       SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
00549 
00550       TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
00551                           std::invalid_argument,
00552                           "nprow and npcol combination invalid" );
00553 
00554       if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
00555         // De-allocate the default grid that was initialized in the constructor
00556         SLUD::superlu_gridexit(&(data_.grid));
00557         // Create a new grid
00558         SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
00559       } // else our grid has not changed size since the last initialization
00560     }
00561 
00562     TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
00563                         std::invalid_argument,
00564                         "SuperLU_DIST does not support solving the tranpose system" );
00565 
00566     data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
00567 
00568     // TODO: Uncomment when supported
00569     // bool equil = parameterList->get<bool>("Equil", true);
00570     // data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
00571     data_.options.Equil = SLUD::NO;
00572 
00573     if( parameterList->isParameter("ColPerm") ){
00574       RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
00575       parameterList->getEntry("ColPerm").setValidator(colperm_validator);
00576 
00577       data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
00578     }
00579 
00580     // Always use the "NOROWPERM" option to avoid a serial bottleneck
00581     // with the weighted bipartite matching algorithm used for the
00582     // "LargeDiag" RowPerm.  Note the inconsistency with the SuperLU
00583     // User guide (which states that the value should be "NATURAL").
00584     data_.options.RowPerm = SLUD::NOROWPERM;
00585 
00586     // TODO: Uncomment when supported
00587     // if( parameterList->isParameter("IterRefine") ){
00588     //   RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
00589     //   parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
00590 
00591     //   data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
00592     // }
00593     data_.options.IterRefine = SLUD::NOREFINE;
00594 
00595     bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
00596     data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
00597   }
00598 
00599 
00600   template <class Matrix, class Vector>
00601   Teuchos::RCP<const Teuchos::ParameterList>
00602   Superludist<Matrix,Vector>::getValidParameters_impl() const
00603   {
00604     using std::string;
00605     using Teuchos::tuple;
00606     using Teuchos::ParameterList;
00607     using Teuchos::EnhancedNumberValidator;
00608     using Teuchos::setStringToIntegralParameter;
00609     using Teuchos::stringToIntegralParameterEntryValidator;
00610 
00611     static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
00612 
00613     if( is_null(valid_params) ){
00614       Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
00615 
00616       Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
00617         = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
00618       col_row_validator->setMin(1);
00619 
00620       pl->set("npcol", data_.grid.npcol,
00621               "Number of columns in the processor grid. "
00622               "Must be set with nprow", col_row_validator);
00623       pl->set("nprow", data_.grid.nprow,
00624               "Number of rows in the SuperLU_DIST processor grid. "
00625               "Must be set together with npcol", col_row_validator);
00626 
00627       // validator will catch any value besides NOTRANS
00628       setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
00629                                                   "Solve for the transpose system or not",
00630                                                   tuple<string>("NOTRANS"),
00631                                                   tuple<string>("Do not solve with transpose"),
00632                                                   tuple<SLUD::trans_t>(SLUD::NOTRANS),
00633                                                   pl.getRawPtr());
00634 
00635       // TODO: uncomment when supported
00636       // pl->set("Equil", false, "Whether to equilibrate the system before solve");
00637 
00638       // TODO: uncomment when supported
00639       // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
00640       //                                                     "Type of iterative refinement to use",
00641       //                                                     tuple<string>("NOREFINE", "DOUBLE"),
00642       //                                                     tuple<string>("Do not use iterative refinement",
00643       //                                                                   "Do double iterative refinement"),
00644       //                                                     tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
00645       //                                                                               SLUD::DOUBLE),
00646       //                                                     pl.getRawPtr());
00647 
00648       pl->set("ReplaceTinyPivot", true,
00649               "Specifies whether to replace tiny diagonals during LU factorization");
00650 
00651       setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
00652                                                     "Specifies how to permute the columns of the "
00653                                                     "matrix for sparsity preservation",
00654                                                     tuple<string>("NATURAL", "PARMETIS"),
00655                                                     tuple<string>("Natural ordering",
00656                                                                   "ParMETIS ordering on A^T + A"),
00657                                                     tuple<SLUD::colperm_t>(SLUD::NATURAL,
00658                                                                            SLUD::PARMETIS),
00659                                                     pl.getRawPtr());
00660 
00661       valid_params = pl;
00662     }
00663 
00664     return valid_params;
00665   }
00666 
00667 
00668   template <class Matrix, class Vector>
00669   void
00670   Superludist<Matrix,Vector>::get_default_grid_size(int nprocs,
00671                                                     SLUD::int_t& nprow,
00672                                                     SLUD::int_t& npcol) const {
00673     TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
00674                         std::invalid_argument,
00675                         "Number of MPI processes must be at least 1" );
00676     SLUD::int_t c, r = 1;
00677     while( r*r <= nprocs ) r++;
00678     nprow = npcol = --r;                // fall back to square grid
00679     c = nprocs / r;
00680     while( (r--)*c != nprocs ){
00681       c = nprocs / r;           // note integer division
00682     }
00683     ++r;
00684     // prefer the square grid over a single row (which will only happen
00685     // in the case of a prime nprocs
00686     if( r > 1 || nprocs < 9){   // nprocs < 9 is a heuristic for the small cases
00687       nprow = r;
00688       npcol = c;
00689     }
00690   }
00691 
00692 
00693   template <class Matrix, class Vector>
00694   bool
00695   Superludist<Matrix,Vector>::loadA_impl(EPhase current_phase){
00696     // Extract the necessary information from mat and call SLU function
00697     using Teuchos::Array;
00698     using Teuchos::ArrayView;
00699     using Teuchos::ptrInArg;
00700     using Teuchos::as;
00701 
00702     using SLUD::int_t;
00703 
00704 #ifdef HAVE_AMESOS2_TIMERS
00705     Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
00706 #endif
00707 
00708     // Cleanup old store memory if it's non-NULL
00709     if( data_.A.Store != NULL ){
00710       SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
00711       data_.A.Store = NULL;
00712     }
00713 
00714     Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
00715       = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
00716 
00717     int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
00718     l_nnz  = as<int_t>(redist_mat->getLocalNNZ());
00719     l_rows = as<int_t>(redist_mat->getLocalNumRows());
00720     g_rows = as<int_t>(redist_mat->getGlobalNumRows());
00721     g_cols = g_rows;            // we deal with square matrices
00722     fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
00723 
00724     nzvals_.resize(l_nnz);
00725     colind_.resize(l_nnz);
00726     rowptr_.resize(l_rows + 1);
00727 
00728     int_t nnz_ret = 0;
00729     {
00730 #ifdef HAVE_AMESOS2_TIMERS
00731       Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
00732 #endif
00733 
00734       Util::get_crs_helper<
00735       MatrixAdapter<Matrix>,
00736         slu_type, int_t, int_t >::do_get(redist_mat.ptr(),
00737                                          nzvals_(), colind_(), rowptr_(),
00738                                          nnz_ret,
00739                                          ptrInArg(*superlu_rowmap_),
00740                                          ARBITRARY);
00741   }
00742 
00743     TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
00744                         std::runtime_error,
00745                         "Did not get the expected number of non-zero vals");
00746 
00747   // Get the SLU data type for this type of matrix
00748   SLUD::Dtype_t dtype = type_map::dtype;
00749 
00750   if( in_grid_ ){
00751     function_map::create_CompRowLoc_Matrix(&(data_.A),
00752                                            g_rows, g_cols,
00753                                            l_nnz, l_rows, fst_global_row,
00754                                            nzvals_.getRawPtr(),
00755                                            colind_.getRawPtr(),
00756                                            rowptr_.getRawPtr(),
00757                                            SLUD::SLU_NR_loc,
00758                                            dtype, SLUD::SLU_GE);
00759   }
00760 
00761   return true;
00762 }
00763 
00764 
00765   template<class Matrix, class Vector>
00766   const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
00767 
00768 
00769 } // end namespace Amesos2
00770 
00771 #endif  // AMESOS2_SUPERLUDIST_DEF_HPP