Amesos Package Browser (Single Doxygen Collection) Development
Amesos_Superlu.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //                Amesos: Direct Sparse Solver Package
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #include "Amesos_Superlu.h"
00030 #include "Epetra_Map.h"
00031 #include "Epetra_Import.h"
00032 #include "Epetra_RowMatrix.h"
00033 #include "Epetra_CrsMatrix.h"
00034 #include "Epetra_Vector.h"
00035 #include "Epetra_Util.h"
00036 #include "Epetra_Time.h"
00037 #include "Epetra_Comm.h"
00038 #include "Epetra_LinearProblem.h"
00039 
00040 namespace SLU
00041 {
00042 extern "C" {
00043 #ifdef AMESOS_SUPERLU_PRE_JULY2005
00044 #include "dsp_defs.h"
00045 #else
00046 #include "slu_ddefs.h"
00047 #endif
00048 }
00049 }
00050 struct SLUData
00051 {
00052   SLU::SuperMatrix A, B, X, L, U;
00053 #ifdef USE_DGSTRF
00054   SLU::SuperMatrix AC;
00055 #endif
00056   SLU::superlu_options_t SLU_options;
00057   SLU::mem_usage_t mem_usage;
00058   SLU::fact_t refactor_option ;         //  SamePattern or SamePattern_SameRowPerm 
00059 };
00060 
00061 using namespace Teuchos;
00062 
00063 //=============================================================================
00064 Amesos_Superlu::Amesos_Superlu(const Epetra_LinearProblem &prob ):
00065   NumGlobalRows_(-1),
00066   NumGlobalNonzeros_(-1),
00067   UseTranspose_(false),
00068   FactorizationOK_(false),
00069   FactorizationDone_(false),
00070   iam_(0),
00071   MtxConvTime_(-1),
00072   MtxRedistTime_(-1),
00073   VecRedistTime_(-1),
00074   NumFactTime_(-1),
00075   SolveTime_(-1),
00076   OverheadTime_(-1),
00077   SerialMap_(Teuchos::null), 
00078   SerialCrsMatrixA_(Teuchos::null), 
00079   ImportToSerial_(Teuchos::null),
00080   SerialMatrix_(0),
00081   RowMatrixA_(0)
00082 {
00083   data_ = new SLUData();
00084   ferr_.resize(1);
00085   berr_.resize(1);
00086 
00087   Problem_ = &prob ; 
00088 
00089   // I have to skip timing on this, because the Comm object is not done yet
00090   dCreate_Dense_Matrix( &(data_->X), 
00091       0, 
00092       0, 
00093       &DummyArray[0],
00094       0, 
00095       SLU::SLU_DN, SLU::SLU_D, SLU::SLU_GE);
00096     
00097   dCreate_Dense_Matrix( &(data_->B), 
00098       0, 
00099       0, 
00100       &DummyArray[0],
00101       0, 
00102       SLU::SLU_DN, SLU::SLU_D, SLU::SLU_GE);
00103 }
00104 
00105 //=============================================================================
00106 Amesos_Superlu::~Amesos_Superlu(void) 
00107 {
00108   if (PrintTiming_) PrintTiming();
00109   if (PrintStatus_) PrintStatus();
00110 
00111   Destroy_SuperMatrix_Store(&data_->B);
00112   Destroy_SuperMatrix_Store(&data_->X);
00113 
00114   if (iam_ == 0) { 
00115     if (FactorizationDone_) { 
00116       Destroy_SuperMatrix_Store(&data_->A);
00117       Destroy_SuperNode_Matrix(&data_->L);
00118       Destroy_CompCol_Matrix(&data_->U);
00119     }
00120   }
00121 
00122   delete data_; 
00123 }
00124 
00125 // ====================================================================== 
00126 int Amesos_Superlu::SetParameters( Teuchos::ParameterList &ParameterList) 
00127 {
00128   // retrive general parameters
00129 
00130   SetStatusParameters( ParameterList );
00131 
00132   SetControlParameters( ParameterList );
00133 
00134   /* the list is empty now
00135   if (ParameterList.isSublist("Superlu") ) {
00136     Teuchos::ParameterList SuperluParams = ParameterList.sublist("Superlu") ;
00137   }  
00138   */
00139   return 0;
00140 }
00141 
00142 // ====================================================================== 
00143 bool Amesos_Superlu::MatrixShapeOK() const 
00144 { 
00145   if (GetProblem()->GetOperator()->OperatorRangeMap().NumGlobalPoints() != 
00146       GetProblem()->GetOperator()->OperatorDomainMap().NumGlobalPoints()) 
00147     return(false);
00148 
00149   return(true);
00150 }
00151 
00152 // ======================================================================
00153 int Amesos_Superlu::ConvertToSerial() 
00154 { 
00155   ResetTimer(0);
00156   ResetTimer(1);
00157 
00158   RowMatrixA_ = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
00159   if (RowMatrixA_ == 0)
00160     AMESOS_CHK_ERR(-1); // cannot cast to RowMatrix
00161 
00162   iam_ = Comm().MyPID() ;
00163 
00164   const Epetra_Map &OriginalMap = RowMatrixA_->RowMatrixRowMap() ; 
00165 
00166   NumGlobalRows_ = RowMatrixA_->NumGlobalRows();
00167   NumGlobalNonzeros_ = RowMatrixA_->NumGlobalNonzeros();
00168   if (NumGlobalRows_ != RowMatrixA_->NumGlobalCols())
00169     AMESOS_CHK_ERR(-1); // only square matrices
00170   if (NumGlobalNonzeros_ <= 0)
00171     AMESOS_CHK_ERR(-2); // empty matrix
00172 
00173   int NumMyElements_ = 0;
00174   if (iam_ == 0) NumMyElements_ = NumGlobalRows_;
00175 
00176   // If the matrix is distributed, then brings it to processor zero.
00177   // This also requires the construction of a serial map, and
00178   // the exporter from distributed to serial.
00179   // Otherwise, simply take the pointer of RowMatrixA_ and
00180   // set it to SerialMatrix_.
00181 
00182   if (Comm().NumProc() == 1) // Bug #1411 - should recognize serial matrices even when NumProc() > 1
00183   { 
00184      SerialMatrix_ = RowMatrixA_;
00185   } 
00186   else 
00187   {
00188     SerialMap_ = rcp(new Epetra_Map(NumGlobalRows_, NumMyElements_, 0, Comm()));
00189 
00190     ImportToSerial_ = rcp(new Epetra_Import(SerialMap(), OriginalMap));
00191 
00192     SerialCrsMatrixA_ = rcp(new Epetra_CrsMatrix(Copy, SerialMap(), 0));
00193     SerialCrsMatrixA_->Import(*RowMatrixA_, ImportToSerial(), Add); 
00194     
00195     // MS // Set zero element if not present, possibly add
00196     // MS // something to the diagonal
00197     //double AddToDiag = 0.0;
00198     // FIXME??      bug #1371
00199 #if 0
00200     if (iam_ == 0)
00201     {
00202       int this_res ;
00203       for (int i = 0 ; i < NumGlobalRows_; i++ ) {
00204   //  I am not sure what the intent is here, 
00205   //  but InsertGlobalValues returns 1 meaning "out of room" 
00206   //  and as a result, we sum AddToDiag_ into this diagonal element
00207   //  a second time.
00208         if (this_res=SerialCrsMatrixA_->InsertGlobalValues(i, 1, &AddToDiag, &i)) {
00209     std::cout << __FILE__ << "::" << __LINE__ 
00210          << " this_res = " <<  this_res 
00211          << std::endl ; 
00212           SerialCrsMatrixA_->SumIntoGlobalValues(i, 1, &AddToDiag, &i);
00213   }
00214       }
00215     }
00216 #endif
00217 
00218     SerialCrsMatrixA_->FillComplete();
00219     SerialMatrix_ = SerialCrsMatrixA_.get();
00220   }
00221 
00222   MtxRedistTime_ = AddTime("Total matrix redistribution time", MtxRedistTime_, 0);
00223   OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00224 
00225   return(0);
00226 }
00227 
00228 //
00229 //  See also pre and post conditions in Amesos_Superlu.h
00230 //  Preconditions:  
00231 //    SerialMatrix_ points to the matrix to be factored and solved
00232 //    NumGlobalRows_ has been set to the dimension of the matrix
00233 //    NumGlobalNonzeros_ has been set to the number of non-zeros in the matrix
00234 //      (i.e. ConvertToSerial() has been called) 
00235 //    FactorizationDone_ and FactorizationOK_  must be accurate
00236 //
00237 //  Postconditions:
00238 //    data->A 
00239 //    FactorizationDone_ = true
00240 //
00241 //  Issues:
00242 //
00243 //
00244 int Amesos_Superlu::Factor()
00245 {
00246   // Convert matrix to the form that Superlu expects (Ap, Ai, Aval) 
00247   // I suppose that the matrix has already been formed on processor 0
00248   // as SerialMatrix_.
00249 
00250   ResetTimer(0);
00251   if (iam_ == 0) 
00252   {
00253     ResetTimer(1);
00254 
00255     if (NumGlobalRows_ != SerialMatrix_->NumGlobalRows() ||
00256         NumGlobalRows_ != SerialMatrix_->NumGlobalCols() ||
00257         NumGlobalRows_ != SerialMatrix_->NumMyRows() ||
00258         NumGlobalRows_ != SerialMatrix_->NumMyCols() ||
00259         NumGlobalNonzeros_ != SerialMatrix_->NumGlobalNonzeros())
00260     {
00261       AMESOS_CHK_ERR(-1); // something fishy here
00262     }
00263 
00264     NumGlobalNonzeros_ = SerialMatrix_->NumGlobalNonzeros();
00265     Ap_.resize(NumGlobalRows_ + 1);
00266     Ai_.resize(EPETRA_MAX( NumGlobalRows_, NumGlobalNonzeros_)); 
00267     Aval_.resize(EPETRA_MAX( NumGlobalRows_, NumGlobalNonzeros_)); 
00268 
00269     int NzThisRow ;
00270     int Ai_index = 0 ; 
00271     int MyRow;
00272     int MaxNumEntries_ = SerialMatrix_->MaxNumEntries();
00273 
00274     for (MyRow = 0; MyRow < NumGlobalRows_ ; MyRow++ ) 
00275     {
00276       Ap_[MyRow] = Ai_index; 
00277       int ierr;
00278       ierr = SerialMatrix_->ExtractMyRowCopy(MyRow, MaxNumEntries_, NzThisRow, 
00279                                              &Aval_[Ai_index], &Ai_[Ai_index]);
00280       AMESOS_CHK_ERR(ierr);
00281       Ai_index += NzThisRow;
00282     }
00283 
00284     Ap_[NumGlobalRows_] = Ai_index; 
00285 
00286     OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00287 
00288     if ( FactorizationDone_ ) { 
00289       Destroy_SuperMatrix_Store(&data_->A);
00290       Destroy_SuperNode_Matrix(&data_->L);
00291       Destroy_CompCol_Matrix(&data_->U);
00292     }
00293       
00294     /* Create matrix A in the format expected by SuperLU. */
00295     dCreate_CompCol_Matrix( &(data_->A), NumGlobalRows_, NumGlobalRows_,
00296           NumGlobalNonzeros_, &Aval_[0],
00297           &Ai_[0], &Ap_[0], SLU::SLU_NR, SLU::SLU_D, SLU::SLU_GE );
00298   }
00299 
00300   MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0);
00301 
00302   return 0;
00303 }   
00304 
00305 // ====================================================================== 
00306 // MS // What is the difference between ReFactor() and Factor()?
00307 // ====================================================================== 
00308 int Amesos_Superlu::ReFactor()
00309 {
00310   // Convert matrix to the form that Superlu expects (Ap, Ai, Aval) 
00311   // I suppose that ConvertToSerial() has already been called, 
00312   // there I have SerialMatrix_ stored at it should. 
00313   // Only processor 0 does something useful here.
00314 
00315   if (iam_ == 0) 
00316   {
00317     ResetTimer(1);
00318 
00319     if (NumGlobalRows_ != SerialMatrix_->NumGlobalRows() ||
00320         NumGlobalRows_ != SerialMatrix_->NumGlobalCols() ||
00321         NumGlobalRows_ != SerialMatrix_->NumMyRows() ||
00322         NumGlobalRows_ != SerialMatrix_->NumMyCols() ||
00323         NumGlobalNonzeros_ != SerialMatrix_->NumGlobalNonzeros())
00324     {
00325       AMESOS_CHK_ERR(-1);
00326     }
00327 
00328     Ap_.resize(NumGlobalRows_+ 1);
00329     Ai_.resize(EPETRA_MAX( NumGlobalRows_, NumGlobalNonzeros_)); 
00330     Aval_.resize(EPETRA_MAX(NumGlobalRows_, NumGlobalNonzeros_)); 
00331 
00332     int NzThisRow ;
00333     int Ai_index = 0 ; 
00334     int MyRow;
00335     double *RowValues;
00336     int *ColIndices;
00337     std::vector<int> ColIndicesV_;
00338     std::vector<double> RowValuesV_;
00339     int MaxNumEntries_ = SerialMatrix_->MaxNumEntries();
00340 
00341     Epetra_CrsMatrix *SuperluCrs = dynamic_cast<Epetra_CrsMatrix *>(SerialMatrix_);
00342     if ( SuperluCrs == 0 ) {
00343       ColIndicesV_.resize(MaxNumEntries_);
00344       RowValuesV_.resize(MaxNumEntries_);
00345     }
00346     for ( MyRow = 0; MyRow < NumGlobalRows_ ; MyRow++ ) {
00347       if ( SuperluCrs != 0 ) {
00348   AMESOS_CHK_ERR(SuperluCrs->ExtractMyRowView(MyRow, NzThisRow, RowValues, ColIndices));
00349       }
00350       else {
00351   AMESOS_CHK_ERR(SerialMatrix_->ExtractMyRowCopy(MyRow, MaxNumEntries_,NzThisRow, &RowValuesV_[0], 
00352                                                        &ColIndicesV_[0]));
00353   RowValues =  &RowValuesV_[0];
00354   ColIndices = &ColIndicesV_[0];
00355       }
00356 
00357       if (Ap_[MyRow] != Ai_index) 
00358         AMESOS_CHK_ERR(-4);
00359 
00360       for (int j = 0; j < NzThisRow; j++) { 
00361   assert(Ai_[Ai_index] == ColIndices[j]) ;   // FIXME this may not work.   
00362   Aval_[Ai_index] = RowValues[j] ; 
00363   Ai_index++;
00364       }
00365     }
00366     assert( NumGlobalRows_ == MyRow );
00367     Ap_[ NumGlobalRows_ ] = Ai_index ; 
00368     
00369     OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00370 
00371     assert ( FactorizationDone_ ) ; 
00372     Destroy_SuperMatrix_Store(&data_->A);
00373     Destroy_SuperNode_Matrix(&data_->L);
00374     Destroy_CompCol_Matrix(&data_->U);
00375     /* Create matrix A in the format expected by SuperLU. */
00376     dCreate_CompCol_Matrix( &(data_->A), NumGlobalRows_, NumGlobalRows_,
00377           NumGlobalNonzeros_, &Aval_[0],
00378           &Ai_[0], &Ap_[0], SLU::SLU_NR, SLU::SLU_D, SLU::SLU_GE );
00379   }
00380 
00381   return 0;
00382 }
00383 
00384 // ====================================================================== 
00385 int Amesos_Superlu::SymbolicFactorization() 
00386 {
00387   // nothing happens here, all it is done in NumericFactorization()
00388   FactorizationOK_ = false; 
00389   return 0;
00390 }
00391 
00392 // ======================================================================
00393 int Amesos_Superlu::NumericFactorization() 
00394 {
00395   CreateTimer(Comm(), 2);
00396 
00397   ResetTimer(0);
00398 
00399   ConvertToSerial(); 
00400 
00401   perm_r_.resize(NumGlobalRows_);
00402   perm_c_.resize(NumGlobalRows_);
00403   etree_.resize(NumGlobalRows_);
00404   R_.resize(NumGlobalRows_);
00405   C_.resize(NumGlobalRows_);
00406 
00407   SLU::superlu_options_t& SLUopt =  data_->SLU_options ; 
00408 
00409   set_default_options( &SLUopt ) ; 
00410   if (FactorizationOK_) {
00411     AMESOS_CHK_ERR(ReFactor());
00412     SLUopt.Fact = data_->refactor_option ;
00413   }  else { 
00414     AMESOS_CHK_ERR(Factor());
00415     FactorizationOK_ = true;
00416     SLUopt.Fact = SLU::DOFACT;
00417   }
00418 
00419   int Ierr[1];
00420   if ( iam_ == 0 ) { 
00421     double rpg, rcond;
00422     equed_ = 'N';
00423 
00424 #if 0
00425     if ( ! UseTranspose() )        // FIXME - I doubt we need this here.
00426       assert( SLUopt.Trans == NOTRANS ) ; 
00427     else
00428       SLUopt.Trans = TRANS ; 
00429     
00430 
00431     //    SLUopt.ColPerm  = COLAMD ;
00432 
00433     std::cout << " SLUopt.ColPerm  = " << SLUopt.ColPerm  << std::endl ; 
00434     std::cout << " SLUopt.Equil  = " << SLUopt.Equil  << std::endl ; 
00435     std::cout << " SLUopt.Fact  = " << SLUopt.Fact  << std::endl ; 
00436     std::cout << " SLUopt.IterRefine  = " << SLUopt.IterRefine  << std::endl ; 
00437     std::cout << " data_->A.Stype  = " << data_->A.Stype  
00438    << " SLU_NC = " << SLU_NC 
00439    << " SLU_NR = " << SLU_NR 
00440    << std::endl ; 
00441     std::cout << " SLUopt.ColPerm  = " << SLUopt.ColPerm  << std::endl ; 
00442 #endif
00443 
00444     data_->B.nrow = NumGlobalRows_; 
00445     data_->B.ncol = 0;
00446     SLU::DNformat* Bstore = (SLU::DNformat *) (data_->B.Store) ; 
00447     Bstore->lda = NumGlobalRows_; 
00448     Bstore->nzval = &DummyArray[0];
00449     data_->X.nrow = NumGlobalRows_; 
00450     data_->X.ncol = 0;
00451     SLU::DNformat* Xstore = (SLU::DNformat *) (data_->X.Store) ; 
00452     Xstore->lda = NumGlobalRows_; 
00453     Xstore->nzval = &DummyArray[0];
00454 
00455     SLU::SuperLUStat_t SLU_stat ;
00456     SLU::StatInit( &SLU_stat ) ; 
00457     assert( SLUopt.Fact == SLU::DOFACT); 
00458     dgssvx( &(SLUopt), &(data_->A), 
00459       &perm_c_[0], &perm_r_[0], &etree_[0], &equed_, &R_[0], 
00460       &C_[0], &(data_->L), &(data_->U), NULL, 0, 
00461       &(data_->B), &(data_->X), &rpg, &rcond, &ferr_[0], 
00462       &berr_[0], &(data_->mem_usage), &SLU_stat,
00463       &Ierr[0] );
00464     SLU::StatFree( &SLU_stat ) ; 
00465   }
00466 
00467   NumFactTime_ = AddTime("Total numeric factorization time", NumFactTime_, 0);
00468 
00469   FactorizationDone_ = true; 
00470 
00471   ++NumNumericFact_;
00472 
00473   return(0);
00474 }
00475 
00476 // ====================================================================== 
00477 int Amesos_Superlu::Solve() 
00478 { 
00479   if (!FactorizationDone_) {
00480     FactorizationOK_ = false;
00481     AMESOS_CHK_ERR(NumericFactorization());
00482   }
00483 
00484   ResetTimer(1); // for "overhead'
00485 
00486   Epetra_MultiVector* vecX = Problem_->GetLHS(); 
00487   Epetra_MultiVector* vecB = Problem_->GetRHS(); 
00488   int Ierr;
00489 
00490   if (vecX == 0 || vecB == 0)  
00491     AMESOS_CHK_ERR(-1); // vectors not set
00492 
00493   int nrhs = vecX->NumVectors(); 
00494   if (nrhs != vecB->NumVectors())
00495     AMESOS_CHK_ERR(-2); // vectors not compatible
00496 
00497   ferr_.resize(nrhs); 
00498   berr_.resize(nrhs); 
00499 
00500   Epetra_MultiVector* SerialB = 0;
00501   Epetra_MultiVector* SerialX = 0;
00502 
00503   double *SerialXvalues ;
00504   double *SerialBvalues ;
00505 
00506   if (Comm().NumProc() == 1) 
00507   { 
00508     SerialB = vecB; 
00509     SerialX = vecX; 
00510   } 
00511   else 
00512   { 
00513     ResetTimer(0);
00514 
00515     SerialX = new Epetra_MultiVector(SerialMap(), nrhs); 
00516     SerialB = new Epetra_MultiVector(SerialMap(), nrhs); 
00517 
00518     SerialB->Import(*vecB, ImportToSerial(), Insert);
00519     SerialB = SerialB; 
00520     SerialX = SerialX; 
00521 
00522     VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
00523   } 
00524 
00525   ResetTimer(0);
00526 
00527   // Call SUPERLU's dgssvx to perform the solve
00528   // At this point I have, on processor 0, the solution vector
00529   // in SerialX and the right-hand side in SerialB
00530   
00531   if (iam_ == 0) 
00532   {
00533     int SerialXlda;
00534     int SerialBlda;
00535 
00536     int ierr;
00537     ierr = SerialX->ExtractView(&SerialXvalues, &SerialXlda);
00538     assert (ierr == 0);
00539     assert (SerialXlda == NumGlobalRows_ ) ; 
00540 
00541     ierr = SerialB->ExtractView(&SerialBvalues, &SerialBlda);
00542     assert (ierr == 0);
00543     assert (SerialBlda == NumGlobalRows_ ) ; 
00544     
00545     SLU::SuperMatrix& dataX = (data_->X) ;
00546     dataX.nrow =   NumGlobalRows_; 
00547     dataX.ncol =   nrhs ; 
00548     data_->X.nrow = NumGlobalRows_; 
00549     SLU::DNformat* Xstore = (SLU::DNformat *) (data_->X.Store) ; 
00550     Xstore->lda = SerialXlda; 
00551     Xstore->nzval = &SerialXvalues[0]; 
00552 
00553     SLU::SuperMatrix& dataB = (data_->B) ;
00554     dataB.nrow =   NumGlobalRows_; 
00555     dataB.ncol =   nrhs ; 
00556     data_->B.nrow = NumGlobalRows_; 
00557     SLU::DNformat* Bstore = (SLU::DNformat *) (data_->B.Store) ; 
00558     Bstore->lda = SerialBlda; 
00559     Bstore->nzval = &SerialBvalues[0]; 
00560 
00561     double rpg, rcond;
00562 #if 0
00563     char fact, trans, refact;
00564     fact = 'F';
00565     refact = 'N';
00566     trans = 'N' ;    //  This will allow us to try trans and not trans - see if either works.
00567     //    equed = 'N';
00568 #endif
00569 #if 0
00570     dgssvx( &fact, &trans, &refact, &(data_->A), &(data_->iparam), &perm_c_[0],
00571       &perm_r_[0], &etree_[0], &equed_, &R_[0], &C_[0], &(data_->L), &(data_->U),
00572       NULL, 0, &(data_->B), &(data_->X), &rpg, &rcond,
00573       &ferr_[0], &berr_[0], &(data_->mem_usage), &Ierr[0] );
00574 #endif
00575 
00576     OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); // NOTE: only timings on processor 0 will be meaningful
00577 
00578     SLU::SuperLUStat_t SLU_stat ;
00579     SLU::StatInit( &SLU_stat ) ;//    Copy the scheme used in dgssvx1.c 
00580     data_->SLU_options.Fact = SLU::FACTORED ; 
00581     SLU::superlu_options_t& SLUopt =  data_->SLU_options ; 
00582     if (UseTranspose()) 
00583       SLUopt.Trans = SLU::TRANS; 
00584     else
00585       SLUopt.Trans = SLU::NOTRANS; 
00586 
00587     //#ifdef USE_DGSTRF
00588     //    assert( equed_ == 'N' ) ; 
00589     dgssvx( &(SLUopt), &(data_->A), 
00590       &perm_c_[0], &perm_r_[0], &etree_[0], &equed_, &R_[0], 
00591       &C_[0], &(data_->L), &(data_->U), NULL, 0, 
00592       &(data_->B), &(data_->X), &rpg, &rcond, &ferr_[0], 
00593       &berr_[0], &(data_->mem_usage), &SLU_stat,
00594       &Ierr);
00595     //    assert( equed_ == 'N' ) ; 
00596     StatFree( &SLU_stat ) ; 
00597   }
00598 
00599   SolveTime_ = AddTime("Total solve time", SolveTime_, 0);
00600 
00601   ResetTimer(1); // for "overhead'
00602 
00603   //
00604   //  Copy X back to the original vector
00605   // 
00606   if (Comm().NumProc() != 1) 
00607   { 
00608     ResetTimer(0);
00609 
00610     vecX->Export(*SerialX, ImportToSerial(), Insert);
00611     delete SerialB;
00612     delete SerialX;
00613 
00614     VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
00615   } 
00616 
00617   if (ComputeTrueResidual_)
00618     ComputeTrueResidual(*(GetProblem()->GetMatrix()), *vecX, *vecB, 
00619                         UseTranspose(), "Amesos_Superlu");
00620 
00621   if (ComputeVectorNorms_)
00622     ComputeVectorNorms(*vecX, *vecB, "Amesos_Superlu");
00623 
00624   OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00625 
00626   ++NumSolve_;
00627 
00628   //  All processes should return the same error code
00629   if (Comm().NumProc() != 1)
00630     Comm().Broadcast(&Ierr, 1, 0); 
00631 
00632   AMESOS_RETURN(Ierr);
00633 }
00634 
00635 // ================================================ ====== ==== ==== == =
00636 
00637 void Amesos_Superlu::PrintStatus() const
00638 {
00639   if (Problem_->GetOperator() == 0 || Comm().MyPID() != 0)
00640     return;
00641 
00642   std::string p = "Amesos_Superlu : ";
00643   PrintLine();
00644 
00645   int n = GetProblem()->GetMatrix()->NumGlobalRows();
00646   int nnz = GetProblem()->GetMatrix()->NumGlobalNonzeros();
00647 
00648   std::cout << p << "Matrix has " << n << " rows"
00649        << " and " << nnz << " nonzeros" << std::endl;
00650   std::cout << p << "Nonzero elements per row = "
00651        << 1.0 *  nnz / n << std::endl;
00652   std::cout << p << "Percentage of nonzero elements = "
00653        << 100.0 * nnz /(pow(double(n),double(2.0))) << std::endl;
00654   std::cout << p << "Use transpose = " << UseTranspose_ << std::endl;
00655 
00656   PrintLine();
00657 
00658   return;
00659 }
00660 
00661 // ================================================ ====== ==== ==== == =
00662 void Amesos_Superlu::PrintTiming() const
00663 {
00664   if (Problem_->GetOperator() == 0 || Comm().MyPID() != 0)
00665     return;
00666 
00667   double ConTime = GetTime(MtxConvTime_);
00668   double MatTime = GetTime(MtxRedistTime_);
00669   double VecTime = GetTime(VecRedistTime_);
00670   double NumTime = GetTime(NumFactTime_);
00671   double SolTime = GetTime(SolveTime_);
00672   double OveTime = GetTime(OverheadTime_);
00673 
00674   if (NumNumericFact_)
00675     NumTime /= NumNumericFact_;
00676 
00677   if (NumSolve_)
00678     SolTime /= NumSolve_;
00679 
00680   std::string p = "Amesos_Superlu : ";
00681   PrintLine();
00682 
00683   std::cout << p << "Time to convert matrix to SuperLU format = " << ConTime << " (s)" << std::endl;
00684   std::cout << p << "Time to redistribute matrix = " << MatTime << " (s)" << std::endl;
00685   std::cout << p << "Time to redistribute vectors = " << VecTime << " (s)" << std::endl;
00686   std::cout << p << "Number of numeric factorizations = " << NumNumericFact_ << std::endl;
00687   std::cout << p << "Time for num fact = "
00688        << NumTime * NumNumericFact_ << " (s), avg = " 
00689        << NumTime << " (s)" << std::endl;
00690   std::cout << p << "Number of solve phases = " << NumSolve_ << std::endl;
00691   std::cout << p << "Time for solve = "
00692        << SolTime * NumSolve_ << " (s), avg = " << SolTime << " (s)" << std::endl;
00693   double tt = NumTime * NumNumericFact_ + SolTime * NumSolve_;
00694   if (tt != 0)
00695   {
00696     std::cout << p << "Total time spent in Amesos = " << tt << " (s) " << std::endl;
00697     std::cout << p << "Total time spent in the Amesos interface = " << OveTime << " (s)" << std::endl;
00698     std::cout << p << "(the above time does not include SuperLU time)" << std::endl;
00699     std::cout << p << "Amesos interface time / total time = " << OveTime / tt << std::endl;
00700   }
00701 
00702 
00703   PrintLine();
00704 
00705   return;
00706 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines