Ifpack_ILUT.cpp

00001 /*@HEADER
00002 // ***********************************************************************
00003 // 
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #include "Ifpack_Preconditioner.h"
00032 #include "Ifpack_ILUT.h"
00033 #include "Ifpack_Condest.h"
00034 #include "Ifpack_Utils.h"
00035 #include "Ifpack_HashTable.h"
00036 #include "Epetra_SerialComm.h"
00037 #include "Epetra_Comm.h"
00038 #include "Epetra_Map.h"
00039 #include "Epetra_RowMatrix.h"
00040 #include "Epetra_CrsMatrix.h"
00041 #include "Epetra_Vector.h"
00042 #include "Epetra_MultiVector.h"
00043 #include "Epetra_Util.h"
00044 #include "Teuchos_ParameterList.hpp"
00045 #include "Teuchos_RefCountPtr.hpp"
00046 #include <functional>
00047 
00048 using namespace Teuchos;
00049 
00050 //==============================================================================
00051 // FIXME: allocate Comm_ and Time_ the first Initialize() call
00052 Ifpack_ILUT::Ifpack_ILUT(const Epetra_RowMatrix* A) :
00053   A_(*A),
00054   Comm_(A->Comm()),
00055   Condest_(-1.0),
00056   Relax_(0.),
00057   Athresh_(0.0),
00058   Rthresh_(1.0),
00059   LevelOfFill_(1.0),
00060   DropTolerance_(1e-12),
00061   IsInitialized_(false),
00062   IsComputed_(false),
00063   UseTranspose_(false),
00064   NumMyRows_(-1),
00065   NumInitialize_(0),
00066   NumCompute_(0),
00067   NumApplyInverse_(0),
00068   InitializeTime_(0.0),
00069   ComputeTime_(0.0),
00070   ApplyInverseTime_(0.0),
00071   ComputeFlops_(0.0),
00072   ApplyInverseFlops_(0.0),
00073   Time_(Comm()),
00074   GlobalNonzeros_(0)
00075 {
00076   // do nothing here..
00077 }
00078 
00079 //==============================================================================
00080 Ifpack_ILUT::~Ifpack_ILUT()
00081 {
00082   Destroy();
00083 }
00084 
00085 //==============================================================================
00086 void Ifpack_ILUT::Destroy()
00087 {
00088   IsInitialized_ = false;
00089   IsComputed_ = false;
00090 }
00091 
00092 //==========================================================================
00093 int Ifpack_ILUT::SetParameters(Teuchos::ParameterList& List)
00094 {
00095   try 
00096   {
00097     LevelOfFill_ = List.get<double>("fact: ilut level-of-fill", LevelOfFill());
00098     if (LevelOfFill_ <= 0.0)
00099       IFPACK_CHK_ERR(-2); // must be greater than 0.0
00100 
00101     Athresh_ = List.get<double>("fact: absolute threshold", Athresh_);
00102     Rthresh_ = List.get<double>("fact: relative threshold", Rthresh_);
00103     Relax_ = List.get<double>("fact: relax value", Relax_);
00104     DropTolerance_ = List.get<double>("fact: drop tolerance", DropTolerance_);
00105 
00106     Label_ = "IFPACK ILUT (fill=" + Ifpack_toString(LevelOfFill())
00107       + ", relax=" + Ifpack_toString(RelaxValue())
00108       + ", athr=" + Ifpack_toString(AbsoluteThreshold())
00109       + ", rthr=" + Ifpack_toString(RelativeThreshold())
00110       + ", droptol=" + Ifpack_toString(DropTolerance())
00111       + ")";
00112     return(0);
00113   }
00114   catch (...)
00115   {
00116     cerr << "Caught an exception while parsing the parameter list" << endl;
00117     cerr << "This typically means that a parameter was set with the" << endl;
00118     cerr << "wrong type (for example, int instead of double). " << endl;
00119     cerr << "please check the documentation for the type required by each parameer." << endl;
00120     IFPACK_CHK_ERR(-1);
00121   }
00122 
00123   return(0);
00124 }
00125 
00126 //==========================================================================
00127 int Ifpack_ILUT::Initialize()
00128 {
00129   // delete previously allocated factorization
00130   Destroy();
00131 
00132   Time_.ResetStartTime();
00133 
00134   // check only in serial
00135   if (Comm().NumProc() == 1 && Matrix().NumMyRows() != Matrix().NumMyCols())
00136     IFPACK_CHK_ERR(-2);
00137     
00138   NumMyRows_ = Matrix().NumMyRows();
00139 
00140   // nothing else to do here
00141   IsInitialized_ = true;
00142   ++NumInitialize_;
00143   InitializeTime_ += Time_.ElapsedTime();
00144 
00145   return(0);
00146 }
00147 
00148 //==========================================================================
00149 class Ifpack_AbsComp 
00150 {
00151  public:
00152   inline bool operator()(const double& x, const double& y) 
00153   {
00154     return(IFPACK_ABS(x) > IFPACK_ABS(y));
00155   }
00156 };
00157 
00158 //==========================================================================
00159 // MS // completely rewritten the algorithm on 25-Jan-05, using more STL
00160 // MS // and in a nicer and cleaner way. Also, it is more efficient.
00161 // MS // WARNING: Still not fully tested!
00162 int Ifpack_ILUT::Compute() 
00163 {
00164   if (!IsInitialized()) 
00165     IFPACK_CHK_ERR(Initialize());
00166 
00167   Time_.ResetStartTime();
00168   IsComputed_ = false;
00169 
00170   NumMyRows_ = A_.NumMyRows();
00171   int Length = A_.MaxNumEntries();
00172   vector<int>    RowIndicesL(Length);
00173   vector<double> RowValuesL(Length);
00174   vector<int>    RowIndicesU(Length);
00175   vector<double> RowValuesU(Length);
00176   bool distributed = (Comm().NumProc() > 1)?true:false;
00177 
00178   if (distributed)
00179   {
00180     SerialComm_ = rcp(new Epetra_SerialComm);
00181     SerialMap_ = rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_));
00182     assert (SerialComm_.get() != 0);
00183     assert (SerialMap_.get() != 0);
00184   }
00185   else
00186     SerialMap_ = rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false);
00187   
00188   int RowNnzU;
00189 
00190   L_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00191   U_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00192 
00193   if ((L_.get() == 0) || (U_.get() == 0))
00194     IFPACK_CHK_ERR(-5); // memory allocation error
00195 
00196   // insert first row in U_ and L_
00197   IFPACK_CHK_ERR(A_.ExtractMyRowCopy(0,Length,RowNnzU,
00198                                      &RowValuesU[0],&RowIndicesU[0]));
00199 
00200   if (distributed)
00201   {
00202     int count = 0;
00203     for (int i = 0 ;i < RowNnzU ; ++i) 
00204     {
00205       if (RowIndicesU[i] < NumMyRows_){
00206         RowIndicesU[count] = RowIndicesU[i];
00207         RowValuesU[count] = RowValuesU[i];
00208         ++count;
00209       }
00210       else
00211         continue;
00212     }
00213     RowNnzU = count;
00214   }
00215 
00216   // modify diagonal
00217   for (int i = 0 ;i < RowNnzU ; ++i) {
00218     if (RowIndicesU[i] == 0) {
00219       double& v = RowValuesU[i];
00220       v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00221       break;
00222     }
00223   }
00224   
00225   IFPACK_CHK_ERR(U_->InsertGlobalValues(0,RowNnzU,&(RowValuesU[0]),
00226                                         &(RowIndicesU[0])));
00227    // FIXME: DOES IT WORK IN PARALLEL ??
00228   RowValuesU[0] = 1.0;
00229   RowIndicesU[0] = 0;
00230   IFPACK_CHK_ERR(L_->InsertGlobalValues(0,1,&(RowValuesU[0]),
00231                                         &(RowIndicesU[0])));
00232 
00233   int hash_size = 128;
00234   while (hash_size < (int) 1.5 * A_.MaxNumEntries() * LevelOfFill())
00235     hash_size *= 2;
00236 
00237   Ifpack_HashTable SingleRowU(hash_size - 1, 1);
00238   Ifpack_HashTable SingleRowL(hash_size - 1, 1);
00239 
00240   vector<int> keys;      keys.reserve(hash_size * 10);
00241   vector<double> values; values.reserve(hash_size * 10);
00242   vector<double> AbsRow; AbsRow.reserve(hash_size * 10);
00243 
00244   // =================== //
00245   // start factorization //
00246   // =================== //
00247   
00248   double this_proc_flops = 0.0;
00249 
00250   for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i) 
00251   {
00252     // get row `row_i' of the matrix, store in U pointers
00253     IFPACK_CHK_ERR(A_.ExtractMyRowCopy(row_i,Length,RowNnzU,
00254                                        &RowValuesU[0],&RowIndicesU[0]));
00255 
00256     if (distributed)
00257     {
00258       int count = 0;
00259       for (int i = 0 ;i < RowNnzU ; ++i) 
00260       {
00261         if (RowIndicesU[i] < NumMyRows_){
00262           RowIndicesU[count] = RowIndicesU[i];
00263           RowValuesU[count] = RowValuesU[i];
00264           ++count;
00265         }
00266         else
00267           continue;
00268       }
00269       RowNnzU = count;
00270     }
00271 
00272     int NnzLower = 0;
00273     int NnzUpper = 0;
00274 
00275     for (int i = 0 ;i < RowNnzU ; ++i) {
00276       if (RowIndicesU[i] < row_i)
00277         NnzLower++;
00278       else if (RowIndicesU[i] == row_i) {
00279         // add threshold
00280         NnzUpper++;
00281         double& v = RowValuesU[i];
00282         v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00283       }
00284       else
00285         NnzUpper++;
00286     }
00287 
00288     int FillL = (int)(LevelOfFill() * NnzLower);
00289     int FillU = (int)(LevelOfFill() * NnzUpper);
00290     if (FillL == 0) FillL = 1;
00291     if (FillU == 0) FillU = 1;
00292 
00293     // convert line `row_i' into STL map for fast access
00294     SingleRowU.reset();
00295 
00296     for (int i = 0 ; i < RowNnzU ; ++i) {
00297         SingleRowU.set(RowIndicesU[i], RowValuesU[i]);
00298     }
00299       
00300     // for the multipliers
00301     SingleRowL.reset();
00302 
00303     int start_col = NumMyRows_;
00304     for (int i = 0 ; i < RowNnzU ; ++i)
00305       start_col = EPETRA_MIN(start_col, RowIndicesU[i]);
00306 
00307     int flops = 0;
00308   
00309     for (int col_k = start_col ; col_k < row_i ; ++col_k) {
00310 
00311       int*    ColIndicesK;
00312       double* ColValuesK;
00313       int     ColNnzK;
00314 
00315       IFPACK_CHK_ERR(U_->ExtractGlobalRowView(col_k, ColNnzK, ColValuesK, 
00316                                               ColIndicesK));
00317 
00318       // FIXME: can keep trace of diagonals
00319       double DiagonalValueK = 0.0;
00320       for (int i = 0 ; i < ColNnzK ; ++i) {
00321         if (ColIndicesK[i] == col_k) {
00322           DiagonalValueK = ColValuesK[i];
00323           break;
00324         }
00325       }
00326       
00327       // FIXME: this should never happen!
00328       if (DiagonalValueK == 0.0)
00329         DiagonalValueK = AbsoluteThreshold();
00330       
00331       double xxx = SingleRowU.get(col_k);
00332       if (IFPACK_ABS(xxx) > DropTolerance()) {
00333         SingleRowL.set(col_k, xxx / DiagonalValueK);
00334         ++flops;
00335 
00336         for (int j = 0 ; j < ColNnzK ; ++j) {
00337           int col_j = ColIndicesK[j];
00338 
00339           if (col_j < col_k) continue;
00340 
00341           double yyy = SingleRowL.get(col_k);
00342           if (yyy !=  0.0)
00343             SingleRowU.set(col_j, -yyy * ColValuesK[j], true);
00344           flops += 2;
00345         }
00346       }
00347     }
00348 
00349     this_proc_flops += 1.0 * flops;
00350 
00351     double cutoff = DropTolerance();
00352     double DiscardedElements = 0.0;
00353     int count;
00354 
00355     // drop elements to satisfy LevelOfFill(), start with L
00356     count = 0;
00357     int sizeL = SingleRowL.getNumEntries();
00358     keys.resize(sizeL);
00359     values.resize(sizeL);
00360 
00361     AbsRow.resize(sizeL);
00362 
00363     SingleRowL.arrayify(
00364       keys.size() ? &keys[0] : 0,
00365       values.size() ? &values[0] : 0
00366       );
00367     for (int i = 0; i < sizeL; ++i)
00368       if (IFPACK_ABS(values[i]) > DropTolerance()) {
00369         AbsRow[count++] = IFPACK_ABS(values[i]);
00370       }
00371 
00372     if (count > FillL) {
00373       nth_element(AbsRow.begin(), AbsRow.begin() + FillL, AbsRow.begin() + count, 
00374                   greater<double>());
00375       cutoff = AbsRow[FillL];
00376     }
00377 
00378     for (int i = 0; i < sizeL; ++i) {
00379       if (IFPACK_ABS(values[i]) >= cutoff) {
00380         IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &values[i], (int*)&keys[i]));
00381       }
00382       else
00383         DiscardedElements += values[i];
00384     }
00385 
00386     // FIXME: DOES IT WORK IN PARALLEL ???
00387     // add 1 to the diagonal
00388     double dtmp = 1.0;
00389     IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &dtmp, &row_i));
00390 
00391     // same business with U_
00392     count = 0;
00393     int sizeU = SingleRowU.getNumEntries();
00394     AbsRow.resize(sizeU + 1);
00395     keys.resize(sizeU + 1);
00396     values.resize(sizeU + 1);
00397 
00398     SingleRowU.arrayify(&keys[0], &values[0]);
00399 
00400     for (int i = 0; i < sizeU; ++i)
00401       if (keys[i] >= row_i && IFPACK_ABS(values[i]) > DropTolerance())
00402       {
00403         AbsRow[count++] = IFPACK_ABS(values[i]);
00404       }
00405 
00406     if (count > FillU) {
00407       nth_element(AbsRow.begin(), AbsRow.begin() + FillU, AbsRow.begin() + count, 
00408                   greater<double>());
00409       cutoff = AbsRow[FillU];
00410     }
00411 
00412     // sets the factors in U_
00413     for (int i = 0; i < sizeU; ++i) 
00414     {
00415       int col = keys[i];
00416       double val = values[i];
00417 
00418       if (col >= row_i) {
00419         if (IFPACK_ABS(val) >= cutoff || row_i == col) {
00420           IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &val, &col));
00421         }
00422         else
00423           DiscardedElements += val;
00424       }
00425     }
00426 
00427     // FIXME: not so sure of that!
00428     if (RelaxValue() != 0.0) {
00429       DiscardedElements *= RelaxValue();
00430       IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &DiscardedElements,
00431                                             &row_i));
00432     }
00433   }
00434 
00435   double tf;
00436   Comm().SumAll(&this_proc_flops, &tf, 1);
00437   ComputeFlops_ += tf;
00438 
00439   IFPACK_CHK_ERR(L_->FillComplete());
00440   IFPACK_CHK_ERR(U_->FillComplete());
00441 
00442 #if 0
00443   // to check the complete factorization
00444   Epetra_Vector LHS(A_.RowMatrixRowMap());
00445   Epetra_Vector RHS1(A_.RowMatrixRowMap());
00446   Epetra_Vector RHS2(A_.RowMatrixRowMap());
00447   Epetra_Vector RHS3(A_.RowMatrixRowMap());
00448   LHS.Random();
00449 
00450   cout << "A = " << A_.NumGlobalNonzeros() << endl;
00451   cout << "L = " << L_->NumGlobalNonzeros() << endl;
00452   cout << "U = " << U_->NumGlobalNonzeros() << endl;
00453 
00454   A_.Multiply(false,LHS,RHS1);
00455   U_->Multiply(false,LHS,RHS2);
00456   L_->Multiply(false,RHS2,RHS3);
00457 
00458   RHS1.Update(-1.0, RHS3, 1.0);
00459   double Norm;
00460   RHS1.Norm2(&Norm);
00461 #endif
00462 
00463   int MyNonzeros = L_->NumGlobalNonzeros() + U_->NumGlobalNonzeros();
00464   Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1);
00465 
00466   IsComputed_ = true;
00467 
00468   ++NumCompute_;
00469   ComputeTime_ += Time_.ElapsedTime();
00470 
00471   return(0);
00472 
00473 }
00474   
00475 //=============================================================================
00476 int Ifpack_ILUT::ApplyInverse(const Epetra_MultiVector& X, 
00477                  Epetra_MultiVector& Y) const
00478 {
00479   if (!IsComputed())
00480     IFPACK_CHK_ERR(-2); // compute preconditioner first
00481 
00482   if (X.NumVectors() != Y.NumVectors()) 
00483     IFPACK_CHK_ERR(-3); // Return error: X and Y not the same size
00484 
00485   Time_.ResetStartTime();
00486 
00487   // NOTE: L_ and U_ are based on SerialMap_, while Xcopy is based
00488   // on A.Map()... which are in general different. However, Solve()
00489   // does not seem to care... which is fine with me.
00490   //
00491   // AztecOO gives X and Y pointing to the same memory location,
00492   // need to create an auxiliary vector, Xcopy
00493   Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
00494   if (X.Pointers()[0] == Y.Pointers()[0])
00495     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00496   else
00497     Xcopy = Teuchos::rcp( &X, false );
00498 
00499   if (!UseTranspose_)
00500   {
00501     // solves LU Y = X 
00502     IFPACK_CHK_ERR(L_->Solve(false,false,false,*Xcopy,Y));
00503     IFPACK_CHK_ERR(U_->Solve(true,false,false,Y,Y));
00504   }
00505   else
00506   {
00507     // solves U(trans) L(trans) Y = X
00508     IFPACK_CHK_ERR(U_->Solve(true,true,false,*Xcopy,Y));
00509     IFPACK_CHK_ERR(L_->Solve(false,true,false,Y,Y));
00510   }
00511 
00512   ++NumApplyInverse_;
00513   ApplyInverseFlops_ += X.NumVectors() * 2 * GlobalNonzeros_;
00514   ApplyInverseTime_ += Time_.ElapsedTime();
00515 
00516   return(0);
00517 
00518 }
00519 //=============================================================================
00520 // This function finds X such that LDU Y = X or U(trans) D L(trans) Y = X for multiple RHS
00521 int Ifpack_ILUT::Apply(const Epetra_MultiVector& X, 
00522               Epetra_MultiVector& Y) const 
00523 {
00524   return(-98);
00525 }
00526 
00527 //=============================================================================
00528 double Ifpack_ILUT::Condest(const Ifpack_CondestType CT, 
00529                             const int MaxIters, const double Tol,
00530                 Epetra_RowMatrix* Matrix_in)
00531 {
00532   if (!IsComputed()) // cannot compute right now
00533     return(-1.0);
00534 
00535   // NOTE: this is computing the *local* condest
00536   if (Condest_ == -1.0)
00537     Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00538 
00539   return(Condest_);
00540 }
00541 
00542 //=============================================================================
00543 std::ostream&
00544 Ifpack_ILUT::Print(std::ostream& os) const
00545 {
00546   if (!Comm().MyPID()) {
00547     os << endl;
00548     os << "================================================================================" << endl;
00549     os << "Ifpack_ILUT: " << Label() << endl << endl;
00550     os << "Level-of-fill      = " << LevelOfFill() << endl;
00551     os << "Absolute threshold = " << AbsoluteThreshold() << endl;
00552     os << "Relative threshold = " << RelativeThreshold() << endl;
00553     os << "Relax value        = " << RelaxValue() << endl;
00554     os << "Condition number estimate       = " << Condest() << endl;
00555     os << "Global number of rows           = " << A_.NumGlobalRows() << endl;
00556     if (IsComputed_) {
00557       os << "Number of nonzeros in A         = " << A_.NumGlobalNonzeros() << endl;
00558       os << "Number of nonzeros in L + U     = " << NumGlobalNonzeros() 
00559          << " ( = " << 100.0 * NumGlobalNonzeros() / A_.NumGlobalNonzeros() 
00560          << " % of A)" << endl;
00561       os << "nonzeros / rows                 = " 
00562         << 1.0 * NumGlobalNonzeros() / U_->NumGlobalRows() << endl;
00563     }
00564     os << endl;
00565     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00566     os << "-----           -------   --------------       ------------     --------" << endl;
00567     os << "Initialize()    "   << std::setw(5) << NumInitialize() 
00568        << "  " << std::setw(15) << InitializeTime() 
00569        << "               0.0            0.0" << endl;
00570     os << "Compute()       "   << std::setw(5) << NumCompute() 
00571        << "  " << std::setw(15) << ComputeTime()
00572        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops(); 
00573     if (ComputeTime() != 0.0)
00574       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl;
00575     else
00576       os << "  " << std::setw(15) << 0.0 << endl;
00577     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse() 
00578        << "  " << std::setw(15) << ApplyInverseTime()
00579        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops();
00580     if (ApplyInverseTime() != 0.0) 
00581       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl;
00582     else
00583       os << "  " << std::setw(15) << 0.0 << endl;
00584     os << "================================================================================" << endl;
00585     os << endl;
00586   }
00587 
00588   return(os);
00589 }

Generated on Tue Jul 13 09:27:13 2010 for IFPACK by  doxygen 1.4.7