Amesos Package Browser (Single Doxygen Collection) Development
Amesos_Klu.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                Amesos: Direct Sparse Solver Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #include "Amesos_Klu.h"
00030 #include "Epetra_Map.h"
00031 #include "Epetra_Import.h"
00032 #include "Epetra_CrsMatrix.h"
00033 #include "Epetra_Vector.h"
00034 #include "Epetra_Util.h"
00035 #include "Amesos_Support.h"
00036 
00037 extern "C" {
00038 
00039 #include "amesos_klu_decl.h"
00040 
00041 }
00042 
00043 namespace {
00044 
00045 using Teuchos::RCP;
00046 
00047 template<class T, class DeleteFunctor>
00048 class DeallocFunctorDeleteWithCommon
00049 {
00050 public:
00051   DeallocFunctorDeleteWithCommon(
00052          const RCP<klu_common>  &common
00053          ,DeleteFunctor         deleteFunctor
00054          )
00055     : common_(common), deleteFunctor_(deleteFunctor)
00056     {}
00057   typedef T ptr_t;
00058   void free( T* ptr ) {
00059     if(ptr) deleteFunctor_(&ptr,&*common_);
00060   }
00061 private:
00062   Teuchos::RCP<klu_common> common_;
00063   DeleteFunctor deleteFunctor_;
00064   DeallocFunctorDeleteWithCommon(); // Not defined and not to be called!
00065 };
00066 
00067 template<class T, class DeleteFunctor>
00068 DeallocFunctorDeleteWithCommon<T,DeleteFunctor>
00069 deallocFunctorDeleteWithCommon(
00070              const RCP<klu_common>  &common
00071              ,DeleteFunctor                        deleteFunctor
00072              )
00073 {
00074   return DeallocFunctorDeleteWithCommon<T,DeleteFunctor>(common,deleteFunctor);
00075 }
00076 
00077 
00078 } // namespace 
00079 
00080 class Amesos_Klu_Pimpl 
00081 {
00082 public:
00083   Teuchos::RCP<klu_symbolic> Symbolic_;
00084   Teuchos::RCP<klu_numeric> Numeric_;
00085   Teuchos::RCP<klu_common> common_;
00086 };
00087 
00088 //=============================================================================
00089 Amesos_Klu::Amesos_Klu(const Epetra_LinearProblem &prob ) :
00090   PrivateKluData_( rcp( new Amesos_Klu_Pimpl() ) ),
00091   CrsMatrixA_(0),
00092   TrustMe_(false),
00093   UseTranspose_(false),
00094   Problem_(&prob),
00095   MtxRedistTime_(-1),
00096   MtxConvTime_(-1),
00097   VecRedistTime_(-1),
00098   SymFactTime_(-1),
00099   NumFactTime_(-1),
00100   SolveTime_(-1),
00101   OverheadTime_(-1)
00102 {
00103   // MS // move declaration of Problem_ above because I need it
00104   // MS // set up before calling Comm()
00105   Teuchos::ParameterList ParamList ;
00106   SetParameters( ParamList ) ;
00107 
00108 }
00109 
00110 //=============================================================================
00111 Amesos_Klu::~Amesos_Klu(void) {
00112 
00113   // print out some information if required by the user
00114   if( (verbose_ && PrintTiming_) || verbose_ == 2 ) PrintTiming();
00115   if( (verbose_ && PrintStatus_) || verbose_ == 2 ) PrintStatus();
00116 }
00117 
00118 //=============================================================================
00119 int Amesos_Klu::ExportToSerial() 
00120 {
00121 
00122   if ( AddZeroToDiag_==0 && numentries_ != RowMatrixA_->NumGlobalNonzeros()) { 
00123     std::cerr << " The number of non zero entries in the matrix has changed since the last call to SymbolicFactorization().  " ;
00124     AMESOS_CHK_ERR( -2 );
00125   }
00126   if (UseDataInPlace_ != 1) {
00127     assert ( RowMatrixA_ != 0 ) ; 
00128     if (  AddZeroToDiag_==0 && numentries_ != RowMatrixA_->NumGlobalNonzeros()) { 
00129       std::cerr << " The number of non zero entries in the matrix has changed since the last call to SymbolicFactorization().  " ;
00130       AMESOS_CHK_ERR( -3 );
00131     }
00132     assert ( ImportToSerial_.get() != 0 ) ; 
00133     AMESOS_CHK_ERR(SerialCrsMatrixA_->Import(*StdIndexMatrix_, 
00134                *ImportToSerial_, Insert ));
00135 
00136     if (AddZeroToDiag_ ) { 
00137       int OriginalTracebackMode = SerialCrsMatrixA_->GetTracebackMode() ; 
00138       SerialCrsMatrixA_->SetTracebackMode( EPETRA_MIN( OriginalTracebackMode, 0) ) ; // ExportToSerial is called both by PerformSymbolicFactorization() and PerformNumericFactorization().  When called by the latter, the call to insertglobalvalues is both unnecessary and illegal.  Fortunately, Epetra allows us to ignore the error message by setting the traceback mode to 0.
00139 
00140       //
00141       //  Add 0.0 to each diagonal entry to avoid empty diagonal entries;
00142       //
00143       double zero = 0.0;
00144       for ( int i = 0 ; i < SerialMap_->NumGlobalElements(); i++ ) 
00145   if ( SerialCrsMatrixA_->LRID(i) >= 0 ) 
00146     SerialCrsMatrixA_->InsertGlobalValues( i, 1, &zero, &i ) ;
00147       SerialCrsMatrixA_->SetTracebackMode( OriginalTracebackMode ) ; 
00148     }
00149     AMESOS_CHK_ERR(SerialCrsMatrixA_->FillComplete());
00150     AMESOS_CHK_ERR(SerialCrsMatrixA_->OptimizeStorage());
00151 
00152     if( ! AddZeroToDiag_ &&  numentries_ != SerialMatrix_->NumGlobalNonzeros()) {
00153       std::cerr << " Amesos_Klu cannot handle this matrix.  " ;
00154       if ( Reindex_ ) {
00155   std::cerr << "Unknown error" << std::endl ; 
00156   AMESOS_CHK_ERR( -4 );
00157       } else {
00158   std::cerr << " Try setting the Reindex parameter to true. " << std::endl ; 
00159 #ifndef HAVE_AMESOS_EPETRAEXT
00160   std::cerr << " You will need to rebuild the Amesos library with the EpetraExt library to use the reindex feature" << std::endl ; 
00161   std::cerr << " To rebuild Amesos with EpetraExt, add --enable-epetraext to your configure invocation" << std::endl ;
00162 #endif
00163   AMESOS_CHK_ERR( -5 );
00164       }
00165     }
00166   }
00167   
00168   return 0;
00169 }
00170 //=============================================================================
00171 //
00172 // CreateLocalMatrixAndExporters() is called only by SymbolicFactorization() 
00173 // for CrsMatrix objects.  All objects should be created here.  No assumptions 
00174 // are made about the input operator.  I.e. it can be completely different from 
00175 // the matrix at the time of the previous call to SymbolicFactorization(). 
00176 //
00177 
00178 int Amesos_Klu::CreateLocalMatrixAndExporters() 
00179 {
00180   ResetTimer(0);
00181 
00182   RowMatrixA_ = Problem_->GetMatrix(); // MS, 18-Apr-06 //
00183   if (RowMatrixA_ == 0) AMESOS_CHK_ERR(-1);
00184 
00185   const Epetra_Map &OriginalMatrixMap = RowMatrixA_->RowMatrixRowMap() ;
00186   const Epetra_Map &OriginalDomainMap = 
00187     UseTranspose()?GetProblem()->GetOperator()->OperatorRangeMap():
00188     GetProblem()->GetOperator()->OperatorDomainMap();
00189   const Epetra_Map &OriginalRangeMap = 
00190     UseTranspose()?GetProblem()->GetOperator()->OperatorDomainMap():
00191     GetProblem()->GetOperator()->OperatorRangeMap();
00192 
00193   NumGlobalElements_ = RowMatrixA_->NumGlobalRows();
00194   numentries_ = RowMatrixA_->NumGlobalNonzeros();
00195   assert( NumGlobalElements_ == RowMatrixA_->NumGlobalCols() );
00196 
00197   //
00198   //  Create a serial matrix
00199   //
00200   assert( NumGlobalElements_ == OriginalMatrixMap.NumGlobalElements() ) ;
00201   int NumMyElements_ = 0 ;
00202   if (MyPID_==0) NumMyElements_ = NumGlobalElements_;
00203   //
00204   //  UseDataInPlace_ is set to 1 (true) only if everything is perfectly
00205   //  normal.  Anything out of the ordinary reverts to the more expensive
00206   //  path. 
00207   //
00208   UseDataInPlace_ = ( OriginalMatrixMap.NumMyElements() ==
00209          OriginalMatrixMap.NumGlobalElements() )?1:0;
00210   if ( ! OriginalRangeMap.SameAs( OriginalMatrixMap ) ) UseDataInPlace_ = 0 ; 
00211   if ( ! OriginalDomainMap.SameAs( OriginalMatrixMap ) ) UseDataInPlace_ = 0 ; 
00212   if ( AddZeroToDiag_ ) UseDataInPlace_ = 0 ; 
00213   Comm().Broadcast( &UseDataInPlace_, 1, 0 ) ;
00214 
00215   //
00216   //  Reindex matrix if necessary (and possible - i.e. CrsMatrix)
00217   //
00218   //  For now, since I don't know how to determine if we need to reindex the matrix, 
00219   //  we allow the user to control re-indexing through the "Reindex" parameter.
00220   //
00221   CrsMatrixA_ = dynamic_cast<Epetra_CrsMatrix *>(Problem_->GetOperator());
00222 
00223   if  ( Reindex_ ) {
00224     if ( CrsMatrixA_ == 0 ) AMESOS_CHK_ERR(-7);
00225 #ifdef HAVE_AMESOS_EPETRAEXT
00226     const Epetra_Map& OriginalMap = CrsMatrixA_->RowMap();
00227     StdIndex_ = rcp( new Amesos_StandardIndex( OriginalMap  ) );
00228     //const Epetra_Map& OriginalColMap = CrsMatrixA_->RowMap();
00229     StdIndexDomain_ = rcp( new Amesos_StandardIndex( OriginalDomainMap  ) );
00230     StdIndexRange_ = rcp( new Amesos_StandardIndex( OriginalRangeMap  ) );
00231 
00232     StdIndexMatrix_ = StdIndex_->StandardizeIndex( CrsMatrixA_ );
00233 #else
00234     std::cerr << "Amesos_Klu requires EpetraExt to reindex matrices." << std::endl 
00235    <<  " Please rebuild with the EpetraExt library by adding --enable-epetraext to your configure invocation" << std::endl ;
00236     AMESOS_CHK_ERR(-8);
00237 #endif
00238   } else { 
00239     StdIndexMatrix_ = RowMatrixA_ ;
00240   }
00241 
00242   //
00243   //  At this point, StdIndexMatrix_ points to a matrix with 
00244   //  standard indexing.  
00245   //
00246 
00247   //
00248   //  Convert Original Matrix to Serial (if it is not already)
00249   //
00250   if (UseDataInPlace_ == 1) {
00251     SerialMatrix_ = StdIndexMatrix_;
00252   } else {
00253     SerialMap_ = rcp(new Epetra_Map(NumGlobalElements_, NumMyElements_, 0, Comm()));
00254     
00255     if ( Problem_->GetRHS() ) 
00256       NumVectors_ = Problem_->GetRHS()->NumVectors() ; 
00257     else
00258       NumVectors_ = 1 ; 
00259     SerialXextract_ = rcp( new Epetra_MultiVector(*SerialMap_,NumVectors_));
00260     SerialBextract_ = rcp (new Epetra_MultiVector(*SerialMap_,NumVectors_));
00261 
00262     ImportToSerial_ = rcp(new Epetra_Import ( *SerialMap_, StdIndexMatrix_->RowMatrixRowMap() ) );
00263 
00264     if (ImportToSerial_.get() == 0) AMESOS_CHK_ERR(-9);
00265 
00266     // Build the vector data import/export once and only once
00267 #define CHRIS
00268 #ifdef CHRIS
00269     if(StdIndexMatrix_->RowMatrixRowMap().SameAs(StdIndexMatrix_->OperatorRangeMap()))
00270       ImportRangeToSerial_=ImportToSerial_;
00271     else
00272       ImportRangeToSerial_ = rcp(new Epetra_Import ( *SerialMap_, StdIndexMatrix_->OperatorRangeMap() ) );
00273 
00274     if(StdIndexMatrix_->RowMatrixRowMap().SameAs(StdIndexMatrix_->OperatorDomainMap()))
00275       ImportDomainToSerial_=ImportToSerial_;
00276     else
00277       ImportDomainToSerial_ = rcp(new Epetra_Import ( *SerialMap_, StdIndexMatrix_->OperatorDomainMap() ) );
00278 #endif
00279     
00280     SerialCrsMatrixA_ = rcp( new Epetra_CrsMatrix(Copy, *SerialMap_, 0) ) ;
00281     SerialMatrix_ = &*SerialCrsMatrixA_ ;
00282   }
00283 
00284   MtxRedistTime_ = AddTime("Total matrix redistribution time", MtxRedistTime_, 0);
00285 
00286   return(0);
00287 }
00288 
00289 //=============================================================================
00290 //
00291 //  See also pre and post conditions in Amesos_Klu.h
00292 //  Preconditions:
00293 //    firsttime specifies that this is the first time that 
00294 //    ConertToKluCrs has been called, i.e. in symbolic factorization.  
00295 //    No data allocation should happen unless firsttime=true.
00296 //    SerialMatrix_ points to the matrix to be factored and solved
00297 //    NumGlobalElements_ has been set to the dimension of the matrix
00298 //    numentries_ has been set to the number of non-zeros in the matrix
00299 //      (i.e. CreateLocalMatrixAndExporters() has been callded)
00300 //
00301 //  Postconditions:
00302 //    Ap, VecAi, VecAval contain the matrix as Klu needs it
00303 //
00304 //
00305 int Amesos_Klu::ConvertToKluCRS(bool firsttime)
00306 {
00307   ResetTimer(0);
00308 
00309   //
00310   //  Convert matrix to the form that Klu expects (Ap, VecAi, VecAval)
00311   //
00312 
00313   if (MyPID_==0) {
00314     assert( NumGlobalElements_ == SerialMatrix_->NumGlobalRows());
00315     assert( NumGlobalElements_ == SerialMatrix_->NumGlobalCols());
00316     if ( ! AddZeroToDiag_ ) {
00317       assert( numentries_ == SerialMatrix_->NumGlobalNonzeros()) ;
00318     } else {
00319       numentries_ = SerialMatrix_->NumGlobalNonzeros() ;
00320     }
00321 
00322     Epetra_CrsMatrix *CrsMatrix = dynamic_cast<Epetra_CrsMatrix *>(SerialMatrix_);
00323     bool StorageOptimized = ( CrsMatrix != 0 && CrsMatrix->StorageOptimized() );
00324 
00325     if ( AddToDiag_ != 0.0 ) StorageOptimized = false ;
00326 
00327     if ( firsttime ) { 
00328       Ap.resize( NumGlobalElements_+1 );
00329       if ( ! StorageOptimized ) { 
00330   VecAi.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ;
00331   VecAval.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ;
00332   Ai = &VecAi[0];
00333   Aval = &VecAval[0];
00334       }
00335     }
00336 
00337     double *RowValues;
00338     int *ColIndices;
00339     int NumEntriesThisRow;
00340 
00341     if( StorageOptimized ) {
00342       if ( firsttime ) {
00343   Ap[0] = 0;
00344   for ( int MyRow = 0; MyRow <NumGlobalElements_; MyRow++ ) {
00345     if( CrsMatrix->
00346         ExtractMyRowView( MyRow, NumEntriesThisRow, RowValues,
00347         ColIndices ) != 0 ) 
00348       AMESOS_CHK_ERR( -10 );
00349     if ( MyRow == 0 ) {
00350       Ai = ColIndices ;
00351       Aval = RowValues ;
00352     }
00353     Ap[MyRow+1] = Ap[MyRow] + NumEntriesThisRow ;
00354   }
00355       }
00356     } else { 
00357       
00358       int Ai_index = 0 ;
00359       int MyRow;
00360       
00361       int MaxNumEntries_ = SerialMatrix_->MaxNumEntries();
00362       if ( firsttime && CrsMatrix == 0 ) {
00363   ColIndicesV_.resize(MaxNumEntries_);
00364   RowValuesV_.resize(MaxNumEntries_);
00365       }
00366       
00367       for ( MyRow = 0; MyRow <NumGlobalElements_; MyRow++ ) {
00368   if ( CrsMatrix != 0 ) {
00369     if( CrsMatrix->
00370         ExtractMyRowView( MyRow, NumEntriesThisRow, RowValues,
00371         ColIndices ) != 0 ) 
00372       AMESOS_CHK_ERR( -11 );
00373 
00374   } else {
00375     if( SerialMatrix_->
00376         ExtractMyRowCopy( MyRow, MaxNumEntries_,
00377               NumEntriesThisRow, &RowValuesV_[0],
00378               &ColIndicesV_[0] ) != 0 ) 
00379       AMESOS_CHK_ERR( -12 );
00380 
00381     RowValues =  &RowValuesV_[0];
00382     ColIndices = &ColIndicesV_[0];
00383   }
00384   
00385   if ( firsttime ) {
00386     Ap[MyRow] = Ai_index ;
00387     for ( int j = 0; j < NumEntriesThisRow; j++ ) {
00388       VecAi[Ai_index] = ColIndices[j] ;
00389       //    assert( VecAi[Ai_index] == Ai[Ai_index] ) ; 
00390       VecAval[Ai_index] = RowValues[j] ;      //  We have to do this because of the hacks to get aroun bug #1502 
00391       if (ColIndices[j] == MyRow) {
00392         VecAval[Ai_index] += AddToDiag_;    
00393       }
00394       Ai_index++;
00395     }
00396   } else { 
00397     for ( int j = 0; j < NumEntriesThisRow; j++ ) {
00398       VecAval[Ai_index] = RowValues[j] ;     
00399       if (ColIndices[j] == MyRow) {
00400         VecAval[Ai_index] += AddToDiag_;   
00401       }
00402       Ai_index++;
00403     }
00404   }
00405       }
00406       Ap[MyRow] = Ai_index ;
00407     }
00408 
00409   }
00410 
00411   MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0);
00412 
00413   return 0;
00414 }
00415 
00416 //=============================================================================
00417 int Amesos_Klu::SetParameters( Teuchos::ParameterList &ParameterList ) {
00418 
00419   // ========================================= //
00420   // retrive KLU's parameters from list.       //
00421   // default values defined in the constructor //
00422   // ========================================= //
00423 
00424   // retrive general parameters
00425 
00426   SetStatusParameters( ParameterList );
00427   SetControlParameters( ParameterList );
00428 
00429   if (ParameterList.isParameter("TrustMe") ) 
00430     TrustMe_ = ParameterList.get<bool>( "TrustMe" );
00431 
00432 #if 0
00433 
00434   unused for now
00435 
00436   if (ParameterList.isSublist("Klu") ) {
00437     Teuchos::ParameterList KluParams = ParameterList.sublist("Klu") ;
00438   }
00439 #endif
00440 
00441   return 0;
00442 }
00443 
00444 
00445 //=============================================================================
00446 int Amesos_Klu::PerformSymbolicFactorization() 
00447 {
00448   ResetTimer(0);
00449 
00450   int symbolic_ok = 0;
00451 
00452   if (MyPID_ == 0) {
00453 
00454     PrivateKluData_->common_ = rcp(new klu_common());
00455     amesos_klu_defaults(&*PrivateKluData_->common_) ;
00456     PrivateKluData_->Symbolic_ =
00457       rcpWithDealloc(
00458     amesos_klu_analyze (NumGlobalElements_, &Ap[0], Ai,  &*PrivateKluData_->common_ ) 
00459     ,deallocFunctorDeleteWithCommon<klu_symbolic>(PrivateKluData_->common_,amesos_klu_free_symbolic)
00460     ,true
00461     );
00462     
00463     symbolic_ok = (PrivateKluData_->Symbolic_.get() != NULL) 
00464       && PrivateKluData_->common_->status == KLU_OK ;
00465 
00466   }
00467 
00468   // Communicate the state of the symbolic factorization with everyone.
00469   Comm().Broadcast(&symbolic_ok, 1, 0);
00470 
00471   if ( !symbolic_ok ) {
00472     if (MyPID_ == 0) {
00473       AMESOS_CHK_ERR( StructurallySingularMatrixError );
00474     } else
00475       return( StructurallySingularMatrixError );
00476   }
00477 
00478   SymFactTime_ = AddTime("Total symbolic factorization time", SymFactTime_, 0);
00479   
00480   return 0;
00481 }
00482 
00483 //=============================================================================
00484 int Amesos_Klu::PerformNumericFactorization( ) 
00485 {
00486 
00487   // Changed this; it was "if (!TrustMe)...
00488   // The behavior is not intuitive. Maybe we should introduce a new
00489   // parameter, FastSolvers or something like that, that does not perform
00490   // any AddTime, ResetTimer, GetTime.
00491   // HKT, 1/18/2007:  The "TrustMe_" flag was put back in this code due to performance degradation; Bug# 3042.
00492   
00493   if (! TrustMe_ ) ResetTimer(0);
00494 
00495   // This needs to be 1 just in case the pivot ordering is reused.
00496   int numeric_ok = 1;
00497   
00498   if (MyPID_ == 0) {
00499     
00500     bool factor_with_pivoting = true ;
00501     
00502     // set the default parameters
00503     PrivateKluData_->common_->scale = ScaleMethod_ ;
00504     
00505     const bool NumericNonZero =  PrivateKluData_->Numeric_.get() != 0 ; 
00506     
00507     // see if we can "refactorize"
00508     if ( refactorize_ && NumericNonZero ) { 
00509       // refactorize using the existing Symbolic and Numeric objects, and
00510       // using the identical pivot ordering as the prior klu_factor.
00511       // No partial pivoting is done.
00512       int result = amesos_klu_refactor (&Ap[0], Ai, Aval,
00513           &*PrivateKluData_->Symbolic_, 
00514           &*PrivateKluData_->Numeric_, &*PrivateKluData_->common_) ;
00515       // Did it work?
00516       const  bool refactor_ok = result == 1 && PrivateKluData_->common_->status == KLU_OK ;
00517       if ( refactor_ok ) { 
00518   
00519   amesos_klu_rcond (&*PrivateKluData_->Symbolic_,
00520         &*PrivateKluData_->Numeric_,
00521         &*PrivateKluData_->common_) ;
00522   
00523   double rcond = PrivateKluData_->common_->rcond;
00524   
00525   if ( rcond > rcond_threshold_ ) {
00526     // factorizing without pivot worked fine.  We are done.
00527     factor_with_pivoting = false ;
00528   }
00529       }
00530     }
00531     
00532     if ( factor_with_pivoting ) {
00533       
00534       // factor with partial pivoting:
00535       // either this is the first time we are factoring the matrix, or the
00536       // refactorize parameter is false, or we tried to refactorize and
00537       // found it to be too inaccurate.
00538       
00539       // factor the matrix using partial pivoting
00540       PrivateKluData_->Numeric_ =
00541   rcpWithDealloc( amesos_klu_factor(&Ap[0], Ai, Aval,
00542              &*PrivateKluData_->Symbolic_, &*PrivateKluData_->common_),
00543        deallocFunctorDeleteWithCommon<klu_numeric>(PrivateKluData_->common_,amesos_klu_free_numeric)
00544        ,true
00545        );
00546       
00547       numeric_ok =  PrivateKluData_->Numeric_.get()!=NULL 
00548   && PrivateKluData_->common_->status == KLU_OK ;
00549       
00550     }
00551   }
00552   
00553   // Communicate the state of the numeric factorization with everyone.
00554   Comm().Broadcast(&numeric_ok, 1, 0);
00555 
00556   if ( ! numeric_ok ) {
00557     if (MyPID_ == 0) {
00558       AMESOS_CHK_ERR( NumericallySingularMatrixError );  
00559     } else 
00560       return( NumericallySingularMatrixError );
00561   }
00562   
00563   if ( !TrustMe_ ) {
00564     NumFactTime_ = AddTime("Total numeric factorization time", NumFactTime_, 0);
00565   }
00566   
00567   return 0;
00568 }
00569 
00570 //=============================================================================
00571 bool Amesos_Klu::MatrixShapeOK() const {
00572   bool OK = true;
00573   
00574   // Comment by Tim:  The following code seems suspect.  The variable "OK"
00575   // is not set if the condition is true.
00576   // Does the variable "OK" default to true?
00577   if ( GetProblem()->GetOperator()->OperatorRangeMap().NumGlobalPoints() !=
00578        GetProblem()->GetOperator()->OperatorDomainMap().NumGlobalPoints() ) {
00579     OK = false;
00580   }
00581   return OK;
00582 }
00583 
00584 //=============================================================================
00585 int Amesos_Klu::SymbolicFactorization() 
00586 {
00587   MyPID_    = Comm().MyPID();
00588   NumProcs_ = Comm().NumProc();
00589   
00590   IsSymbolicFactorizationOK_ = false;
00591   IsNumericFactorizationOK_ = false;
00592   
00593   CreateTimer(Comm(), 2);
00594   
00595   ResetTimer(1);
00596   
00597   // "overhead" time for the following method is considered here
00598   AMESOS_CHK_ERR( CreateLocalMatrixAndExporters() ) ;
00599   assert( NumGlobalElements_ == RowMatrixA_->NumGlobalCols() );
00600   
00601   //
00602   //  Perform checks in SymbolicFactorization(), but none in 
00603   //  NumericFactorization() or Solve()
00604   //
00605   if ( TrustMe_ ) { 
00606      if ( CrsMatrixA_ == 0 ) AMESOS_CHK_ERR( -15 );
00607      if( UseDataInPlace_ != 1 ) AMESOS_CHK_ERR( -15 ) ;
00608      if( Reindex_ )  AMESOS_CHK_ERR( -15 ) ;
00609      if( ! Problem_->GetLHS() )  AMESOS_CHK_ERR( -15 ) ;
00610      if( ! Problem_->GetRHS() )  AMESOS_CHK_ERR( -15 ) ;
00611      if( ! Problem_->GetLHS()->NumVectors() ) AMESOS_CHK_ERR( -15 ) ;
00612      if( ! Problem_->GetRHS()->NumVectors() ) AMESOS_CHK_ERR( -15 ) ; 
00613      SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false) ;
00614      SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false) ;
00615      NumVectors_ = SerialX_->NumVectors();
00616      if (MyPID_ == 0) {
00617        AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ ));
00618        AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ ));
00619      }
00620   }
00621 
00622   // "overhead" time for the following two methods is considered here
00623   AMESOS_CHK_ERR( ExportToSerial() );
00624 
00625   AMESOS_CHK_ERR( ConvertToKluCRS(true) );
00626 
00627   OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00628 
00629   // All this time if KLU time
00630   AMESOS_CHK_ERR( PerformSymbolicFactorization() );
00631 
00632   NumSymbolicFact_++;
00633 
00634   IsSymbolicFactorizationOK_ = true;
00635   
00636   return 0;
00637 }
00638 
00639 //=============================================================================
00640 int Amesos_Klu::NumericFactorization() 
00641 {
00642   if ( !TrustMe_ ) 
00643   { 
00644     IsNumericFactorizationOK_ = false;
00645     if (IsSymbolicFactorizationOK_ == false)
00646       AMESOS_CHK_ERR(SymbolicFactorization());
00647     
00648     ResetTimer(1); // "overhead" time
00649 
00650     Epetra_CrsMatrix *CrsMatrixA = dynamic_cast<Epetra_CrsMatrix *>(RowMatrixA_);
00651     if ( CrsMatrixA == 0 )   // hack to get around Bug #1502 
00652       AMESOS_CHK_ERR( CreateLocalMatrixAndExporters() ) ;
00653     assert( NumGlobalElements_ == RowMatrixA_->NumGlobalCols() );
00654     if ( AddZeroToDiag_ == 0 ) 
00655       assert( numentries_ == RowMatrixA_->NumGlobalNonzeros() );
00656 
00657     AMESOS_CHK_ERR( ExportToSerial() );
00658     
00659     if ( CrsMatrixA == 0 ) {  // continuation of hack to avoid bug #1502
00660       AMESOS_CHK_ERR( ConvertToKluCRS(true) );
00661     }  else {
00662       AMESOS_CHK_ERR( ConvertToKluCRS(false) );
00663     }
00664     
00665     OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00666   }
00667   
00668   // this time is all for KLU
00669   AMESOS_CHK_ERR( PerformNumericFactorization() );
00670   
00671   NumNumericFact_++;
00672   
00673   IsNumericFactorizationOK_ = true;
00674   
00675   return 0;
00676 }
00677 
00678 //=============================================================================
00679 int Amesos_Klu::Solve() 
00680 {
00681   Epetra_MultiVector* vecX = 0 ;
00682   Epetra_MultiVector* vecB = 0 ;
00683 
00684 #ifdef HAVE_AMESOS_EPETRAEXT
00685   Teuchos::RCP<Epetra_MultiVector> vecX_rcp;
00686   Teuchos::RCP<Epetra_MultiVector> vecB_rcp;
00687 #endif
00688   
00689 #ifdef Bug_8212
00690   //  This demonstrates Bug #2812 - Valgrind does not catch this
00691   //  memory leak
00692   lose_this_ = (int *) malloc( 300 ) ;
00693   
00694 #ifdef Bug_8212_B
00695   //  This demonstrates Bug #2812 - Valgrind does catch this
00696   //  use of unitialized data - but only in TestOptions/TestOptions.exe 
00697   //  not in Test_Basic/amesos_test.exe   
00698   //      
00699     if ( lose_this_[0] == 12834 ) { 
00700        std::cout << __FILE__ << "::"  << __LINE__ << " very unlikely to happen " << std::endl ; 
00701     }
00702 #endif
00703 #endif
00704 
00705   if ( !TrustMe_  ) { 
00706 
00707     SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false);
00708     SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false);
00709     
00710     Epetra_MultiVector* OrigVecX ;
00711     Epetra_MultiVector* OrigVecB ;
00712 
00713     if (IsNumericFactorizationOK_ == false)
00714       AMESOS_CHK_ERR(NumericFactorization());
00715     
00716     ResetTimer(1);
00717     
00718     //
00719     //  Reindex the LHS and RHS 
00720     //
00721     OrigVecX = Problem_->GetLHS() ;
00722     OrigVecB = Problem_->GetRHS() ;
00723     
00724     if ( Reindex_ ) { 
00725 #ifdef HAVE_AMESOS_EPETRAEXT
00726       vecX_rcp = StdIndexDomain_->StandardizeIndex( *OrigVecX ) ;
00727       vecB_rcp = StdIndexRange_->StandardizeIndex( *OrigVecB ) ;
00728 
00729       vecX = &*vecX_rcp;
00730       vecB = &*vecB_rcp;
00731 #else
00732       AMESOS_CHK_ERR( -13 ) ; // Amesos_Klu can't handle non-standard indexing without EpetraExt 
00733 #endif
00734     } else {
00735       vecX = OrigVecX ;
00736       vecB = OrigVecB ;
00737     } 
00738     
00739     if ((vecX == 0) || (vecB == 0))
00740       AMESOS_CHK_ERR(-1); // something wrong in input
00741     
00742     //  Extract Serial versions of X and B
00743     
00744     ResetTimer(0);
00745 
00746     //  Copy B to the serial version of B
00747     //
00748     if (UseDataInPlace_ == 1) {
00749 #ifdef HAVE_AMESOS_EPETRAEXT
00750       if(vecX_rcp==Teuchos::null)
00751          SerialX_ = Teuchos::rcp(vecX,false);
00752       else
00753          SerialX_ = vecX_rcp;
00754 
00755       if(vecB_rcp==Teuchos::null)
00756          SerialB_ = Teuchos::rcp(vecB,false);
00757       else 
00758          SerialB_ = vecB_rcp;
00759 #else
00760       SerialB_ = Teuchos::rcp(vecB,false);
00761       SerialX_ = Teuchos::rcp(vecX,false);
00762 #endif
00763       NumVectors_ = Problem_->GetRHS()->NumVectors() ; 
00764     } else {
00765       assert (UseDataInPlace_ == 0);
00766       
00767       if( NumVectors_ != Problem_->GetRHS()->NumVectors() ) {
00768   NumVectors_ = Problem_->GetRHS()->NumVectors() ; 
00769   SerialXextract_ = rcp( new Epetra_MultiVector(*SerialMap_,NumVectors_));
00770   SerialBextract_ = rcp (new Epetra_MultiVector(*SerialMap_,NumVectors_));
00771       }
00772       if (NumVectors_ != vecB->NumVectors())
00773   AMESOS_CHK_ERR(-1); // internal error 
00774       
00775       //ImportRangeToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecB->Map() ) );
00776       //if ( SerialBextract_->Import(*vecB,*ImportRangeToSerial_,Insert) )
00777       Epetra_Import *UseImport;
00778       if(!UseTranspose_) UseImport=&*ImportRangeToSerial_;
00779       else UseImport=&*ImportDomainToSerial_;      
00780       if ( SerialBextract_->Import(*vecB,*UseImport,Insert) )
00781   AMESOS_CHK_ERR( -1 ) ; // internal error
00782       
00783       SerialB_ = Teuchos::rcp(&*SerialBextract_,false) ;
00784       SerialX_ = Teuchos::rcp(&*SerialXextract_,false) ;
00785     }
00786     
00787     VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
00788 
00789     //  Call KLU to perform the solve
00790     
00791     ResetTimer(0);
00792     if (MyPID_ == 0) {
00793       AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ ));
00794       AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ ));
00795       if (SerialXlda_ != NumGlobalElements_)
00796   AMESOS_CHK_ERR(-1);
00797     }
00798 
00799     OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00800   }
00801   else
00802   {
00803      SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false) ;
00804      SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false) ;
00805      NumVectors_ = SerialX_->NumVectors();
00806     if (MyPID_ == 0) {
00807       AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ ));
00808       AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ ));
00809     }
00810   }
00811 
00812   if ( MyPID_ == 0) {
00813     if ( NumVectors_ == 1 ) {
00814       for ( int i = 0 ; i < NumGlobalElements_ ; i++ ) 
00815   SerialXBvalues_[i] = SerialBvalues_[i] ;
00816     } else {
00817       SerialX_->Scale(1.0, *SerialB_ ) ;    // X = B (Klu overwrites B with X)
00818     }
00819     if (UseTranspose()) {
00820       amesos_klu_solve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_,
00821       SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ );
00822     } else {
00823       amesos_klu_tsolve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_,
00824        SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ );
00825     }
00826   }
00827 
00828   if ( !TrustMe_ ) {
00829     SolveTime_ = AddTime("Total solve time", SolveTime_, 0);
00830     
00831     //  Copy X back to the original vector
00832     
00833     ResetTimer(0);
00834     ResetTimer(1);
00835     
00836     if (UseDataInPlace_ == 0) {
00837       Epetra_Import *UseImport;
00838       if(!UseTranspose_) UseImport=&*ImportDomainToSerial_;
00839       else UseImport=&*ImportRangeToSerial_;      
00840       //        ImportDomainToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecX->Map() ) );
00841       vecX->Export( *SerialX_, *UseImport, Insert ) ;
00842       
00843     } // otherwise we are already in place.
00844     
00845     VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
00846     
00847 #if 0
00848     //
00849     //  ComputeTrueResidual causes TestOptions to fail on my linux box 
00850     //  Bug #1417
00851     if (ComputeTrueResidual_)
00852       ComputeTrueResidual(*SerialMatrix_, *vecX, *vecB, UseTranspose(), "Amesos_Klu");
00853 #endif
00854     
00855     if (ComputeVectorNorms_)
00856       ComputeVectorNorms(*vecX, *vecB, "Amesos_Klu");
00857     
00858     OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
00859     
00860   }
00861   ++NumSolve_;
00862   
00863   return(0) ;
00864 }
00865 
00866 // ================================================ ====== ==== ==== == =
00867 
00868 void Amesos_Klu::PrintStatus() const
00869 {
00870 
00871   if (MyPID_) return;
00872 
00873   PrintLine();
00874 
00875   std::cout << "Amesos_Klu : Matrix has " << NumGlobalElements_ << " rows"
00876        << " and " << numentries_ << " nonzeros" << std::endl;
00877   std::cout << "Amesos_Klu : Nonzero elements per row = "
00878        << 1.0*numentries_/NumGlobalElements_ << std::endl;
00879   std::cout << "Amesos_Klu : Percentage of nonzero elements = "
00880        << 100.0*numentries_/(pow(double(NumGlobalElements_),double(2.0))) << std::endl;
00881   std::cout << "Amesos_Klu : Use transpose = " << UseTranspose_ << std::endl;
00882 
00883   PrintLine();
00884 
00885   return;
00886 
00887 }
00888 
00889 // ================================================ ====== ==== ==== == =
00890 
00891 void Amesos_Klu::PrintTiming() const
00892 {
00893   if (MyPID_) return;
00894 
00895   double ConTime = GetTime(MtxConvTime_);
00896   double MatTime = GetTime(MtxRedistTime_);
00897   double VecTime = GetTime(VecRedistTime_);
00898   double SymTime = GetTime(SymFactTime_);
00899   double NumTime = GetTime(NumFactTime_);
00900   double SolTime = GetTime(SolveTime_);
00901   double OveTime = GetTime(OverheadTime_);
00902 
00903   if (NumSymbolicFact_)
00904     SymTime /= NumSymbolicFact_;
00905 
00906   if (NumNumericFact_)
00907     NumTime /= NumNumericFact_;
00908 
00909   if (NumSolve_)
00910     SolTime /= NumSolve_;
00911 
00912   std::string p = "Amesos_Klu : ";
00913   PrintLine();
00914 
00915   std::cout << p << "Time to convert matrix to Klu format = "
00916        << ConTime << " (s)" << std::endl;
00917   std::cout << p << "Time to redistribute matrix = "
00918        << MatTime << " (s)" << std::endl;
00919   std::cout << p << "Time to redistribute vectors = "
00920        << VecTime << " (s)" << std::endl;
00921   std::cout << p << "Number of symbolic factorizations = "
00922        << NumSymbolicFact_ << std::endl;
00923   std::cout << p << "Time for sym fact = "
00924        << SymTime * NumSymbolicFact_ << " (s), avg = " << SymTime << " (s)" << std::endl;
00925   std::cout << p << "Number of numeric factorizations = "
00926        << NumNumericFact_ << std::endl;
00927   std::cout << p << "Time for num fact = "
00928        << NumTime * NumNumericFact_ << " (s), avg = " << NumTime << " (s)" << std::endl;
00929   std::cout << p << "Number of solve phases = "
00930        << NumSolve_ << std::endl;
00931   std::cout << p << "Time for solve = "
00932        << SolTime * NumSolve_ << " (s), avg = " << SolTime << " (s)" << std::endl;
00933 
00934   double tt = SymTime * NumSymbolicFact_ + NumTime * NumNumericFact_ + SolTime * NumSolve_;
00935   if (tt != 0)
00936   {
00937     std::cout << p << "Total time spent in Amesos = " << tt << " (s) " << std::endl;
00938     std::cout << p << "Total time spent in the Amesos interface = " << OveTime << " (s)" << std::endl;
00939     std::cout << p << "(the above time does not include KLU time)" << std::endl;
00940     std::cout << p << "Amesos interface time / total time = " << OveTime / tt << std::endl;
00941   }
00942 
00943   PrintLine();
00944 
00945   return;
00946 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines