Ifpack_ILUT.cpp

00001 /*@HEADER
00002 // ***********************************************************************
00003 // 
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #ifdef HAVE_IFPACK_TEUCHOS
00032 #include "Ifpack_Preconditioner.h"
00033 #include "Ifpack_ILUT.h"
00034 #include "Ifpack_Condest.h"
00035 #include "Ifpack_Utils.h"
00036 #include "Ifpack_HashTable.h"
00037 #include "Epetra_SerialComm.h"
00038 #include "Epetra_Comm.h"
00039 #include "Epetra_Map.h"
00040 #include "Epetra_RowMatrix.h"
00041 #include "Epetra_CrsMatrix.h"
00042 #include "Epetra_Vector.h"
00043 #include "Epetra_MultiVector.h"
00044 #include "Epetra_Util.h"
00045 #include "Teuchos_ParameterList.hpp"
00046 #include "Teuchos_RefCountPtr.hpp"
00047 #include <functional>
00048 
00049 using namespace Teuchos;
00050 
00051 //==============================================================================
00052 // FIXME: allocate Comm_ and Time_ the first Initialize() call
00053 Ifpack_ILUT::Ifpack_ILUT(const Epetra_RowMatrix* A) :
00054   A_(*A),
00055   Comm_(A->Comm()),
00056   Condest_(-1.0),
00057   Relax_(0.),
00058   Athresh_(0.0),
00059   Rthresh_(1.0),
00060   LevelOfFill_(1.0),
00061   DropTolerance_(1e-12),
00062   IsInitialized_(false),
00063   IsComputed_(false),
00064   UseTranspose_(false),
00065   NumMyRows_(-1),
00066   NumInitialize_(0),
00067   NumCompute_(0),
00068   NumApplyInverse_(0),
00069   InitializeTime_(0.0),
00070   ComputeTime_(0.0),
00071   ApplyInverseTime_(0.0),
00072   ComputeFlops_(0.0),
00073   ApplyInverseFlops_(0.0),
00074   Time_(Comm()),
00075   GlobalNonzeros_(0)
00076 {
00077   // do nothing here..
00078 }
00079 
00080 //==============================================================================
00081 Ifpack_ILUT::~Ifpack_ILUT()
00082 {
00083   Destroy();
00084 }
00085 
00086 //==============================================================================
00087 void Ifpack_ILUT::Destroy()
00088 {
00089   IsInitialized_ = false;
00090   IsComputed_ = false;
00091 }
00092 
00093 //==========================================================================
00094 int Ifpack_ILUT::SetParameters(Teuchos::ParameterList& List)
00095 {
00096   try 
00097   {
00098     LevelOfFill_ = List.get<double>("fact: ilut level-of-fill", LevelOfFill());
00099     if (LevelOfFill_ <= 0.0)
00100       IFPACK_CHK_ERR(-2); // must be greater than 0.0
00101 
00102     Athresh_ = List.get<double>("fact: absolute threshold", Athresh_);
00103     Rthresh_ = List.get<double>("fact: relative threshold", Rthresh_);
00104     Relax_ = List.get<double>("fact: relax value", Relax_);
00105     DropTolerance_ = List.get<double>("fact: drop tolerance", DropTolerance_);
00106 
00107     Label_ = "IFPACK ILUT (fill=" + Ifpack_toString(LevelOfFill())
00108       + ", relax=" + Ifpack_toString(RelaxValue())
00109       + ", athr=" + Ifpack_toString(AbsoluteThreshold())
00110       + ", rthr=" + Ifpack_toString(RelativeThreshold())
00111       + ", droptol=" + Ifpack_toString(DropTolerance())
00112       + ")";
00113     return(0);
00114   }
00115   catch (...)
00116   {
00117     cerr << "Caught an exception while parsing the parameter list" << endl;
00118     cerr << "This typically means that a parameter was set with the" << endl;
00119     cerr << "wrong type (for example, int instead of double). " << endl;
00120     cerr << "please check the documentation for the type required by each parameer." << endl;
00121     IFPACK_CHK_ERR(-1);
00122   }
00123 
00124   return(0);
00125 }
00126 
00127 //==========================================================================
00128 int Ifpack_ILUT::Initialize()
00129 {
00130   // delete previously allocated factorization
00131   Destroy();
00132 
00133   Time_.ResetStartTime();
00134 
00135   // check only in serial
00136   if (Comm().NumProc() == 1 && Matrix().NumMyRows() != Matrix().NumMyCols())
00137     IFPACK_CHK_ERR(-2);
00138     
00139   NumMyRows_ = Matrix().NumMyRows();
00140 
00141   // nothing else to do here
00142   IsInitialized_ = true;
00143   ++NumInitialize_;
00144   InitializeTime_ += Time_.ElapsedTime();
00145 
00146   return(0);
00147 }
00148 
00149 //==========================================================================
00150 class Ifpack_AbsComp 
00151 {
00152  public:
00153   inline bool operator()(const double& x, const double& y) 
00154   {
00155     return(IFPACK_ABS(x) > IFPACK_ABS(y));
00156   }
00157 };
00158 
00159 //==========================================================================
00160 // MS // completely rewritten the algorithm on 25-Jan-05, using more STL
00161 // MS // and in a nicer and cleaner way. Also, it is more efficient.
00162 // MS // WARNING: Still not fully tested!
00163 int Ifpack_ILUT::Compute() 
00164 {
00165   if (!IsInitialized()) 
00166     IFPACK_CHK_ERR(Initialize());
00167 
00168   Time_.ResetStartTime();
00169   IsComputed_ = false;
00170 
00171   NumMyRows_ = A_.NumMyRows();
00172   int Length = A_.MaxNumEntries();
00173   vector<int>    RowIndicesL(Length);
00174   vector<double> RowValuesL(Length);
00175   vector<int>    RowIndicesU(Length);
00176   vector<double> RowValuesU(Length);
00177   bool distributed = (Comm().NumProc() > 1)?true:false;
00178 
00179   if (distributed)
00180   {
00181     SerialComm_ = rcp(new Epetra_SerialComm);
00182     SerialMap_ = rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_));
00183     assert (SerialComm_.get() != 0);
00184     assert (SerialMap_.get() != 0);
00185   }
00186   else
00187     SerialMap_ = rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false);
00188   
00189   int RowNnzU;
00190 
00191   L_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00192   U_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00193 
00194   if ((L_.get() == 0) || (U_.get() == 0))
00195     IFPACK_CHK_ERR(-5); // memory allocation error
00196 
00197   // insert first row in U_ and L_
00198   IFPACK_CHK_ERR(A_.ExtractMyRowCopy(0,Length,RowNnzU,
00199                                      &RowValuesU[0],&RowIndicesU[0]));
00200 
00201   if (distributed)
00202   {
00203     int count = 0;
00204     for (int i = 0 ;i < RowNnzU ; ++i) 
00205     {
00206       if (RowIndicesU[i] < NumMyRows_){
00207         RowIndicesU[count] = RowIndicesU[i];
00208         RowValuesU[count] = RowValuesU[i];
00209         ++count;
00210       }
00211       else
00212         continue;
00213     }
00214     RowNnzU = count;
00215   }
00216 
00217   // modify diagonal
00218   for (int i = 0 ;i < RowNnzU ; ++i) {
00219     if (RowIndicesU[i] == 0) {
00220       double& v = RowValuesU[i];
00221       v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00222       break;
00223     }
00224   }
00225   
00226   IFPACK_CHK_ERR(U_->InsertGlobalValues(0,RowNnzU,&(RowValuesU[0]),
00227                                         &(RowIndicesU[0])));
00228    // FIXME: DOES IT WORK IN PARALLEL ??
00229   RowValuesU[0] = 1.0;
00230   RowIndicesU[0] = 0;
00231   IFPACK_CHK_ERR(L_->InsertGlobalValues(0,1,&(RowValuesU[0]),
00232                                         &(RowIndicesU[0])));
00233 
00234   int hash_size = 128;
00235   while (hash_size < (int) 1.5 * A_.MaxNumEntries() * LevelOfFill())
00236     hash_size *= 2;
00237 
00238   Ifpack_HashTable SingleRowU(hash_size - 1, 1);
00239   Ifpack_HashTable SingleRowL(hash_size - 1, 1);
00240 
00241   vector<int> keys;      keys.reserve(hash_size * 10);
00242   vector<double> values; values.reserve(hash_size * 10);
00243   vector<double> AbsRow; AbsRow.reserve(hash_size * 10);
00244 
00245   // =================== //
00246   // start factorization //
00247   // =================== //
00248   
00249   double this_proc_flops = 0.0;
00250 
00251   for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i) 
00252   {
00253     // get row `row_i' of the matrix, store in U pointers
00254     IFPACK_CHK_ERR(A_.ExtractMyRowCopy(row_i,Length,RowNnzU,
00255                                        &RowValuesU[0],&RowIndicesU[0]));
00256 
00257     if (distributed)
00258     {
00259       int count = 0;
00260       for (int i = 0 ;i < RowNnzU ; ++i) 
00261       {
00262         if (RowIndicesU[i] < NumMyRows_){
00263           RowIndicesU[count] = RowIndicesU[i];
00264           RowValuesU[count] = RowValuesU[i];
00265           ++count;
00266         }
00267         else
00268           continue;
00269       }
00270       RowNnzU = count;
00271     }
00272 
00273     int NnzLower = 0;
00274     int NnzUpper = 0;
00275 
00276     for (int i = 0 ;i < RowNnzU ; ++i) {
00277       if (RowIndicesU[i] < row_i)
00278         NnzLower++;
00279       else if (RowIndicesU[i] == row_i) {
00280         // add threshold
00281         NnzUpper++;
00282         double& v = RowValuesU[i];
00283         v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00284       }
00285       else
00286         NnzUpper++;
00287     }
00288 
00289     int FillL = (int)(LevelOfFill() * NnzLower);
00290     int FillU = (int)(LevelOfFill() * NnzUpper);
00291     if (FillL == 0) FillL = 1;
00292     if (FillU == 0) FillU = 1;
00293 
00294     // convert line `row_i' into STL map for fast access
00295     SingleRowU.reset();
00296 
00297     for (int i = 0 ; i < RowNnzU ; ++i) {
00298         SingleRowU.set(RowIndicesU[i], RowValuesU[i]);
00299     }
00300       
00301     // for the multipliers
00302     SingleRowL.reset();
00303 
00304     int start_col = NumMyRows_;
00305     for (int i = 0 ; i < RowNnzU ; ++i)
00306       start_col = EPETRA_MIN(start_col, RowIndicesU[i]);
00307 
00308     int flops = 0;
00309   
00310     for (int col_k = start_col ; col_k < row_i ; ++col_k) {
00311 
00312       int*    ColIndicesK;
00313       double* ColValuesK;
00314       int     ColNnzK;
00315 
00316       IFPACK_CHK_ERR(U_->ExtractGlobalRowView(col_k, ColNnzK, ColValuesK, 
00317                                               ColIndicesK));
00318 
00319       // FIXME: can keep trace of diagonals
00320       double DiagonalValueK = 0.0;
00321       for (int i = 0 ; i < ColNnzK ; ++i) {
00322         if (ColIndicesK[i] == col_k) {
00323           DiagonalValueK = ColValuesK[i];
00324           break;
00325         }
00326       }
00327       
00328       // FIXME: this should never happen!
00329       if (DiagonalValueK == 0.0)
00330         DiagonalValueK = AbsoluteThreshold();
00331       
00332       double xxx = SingleRowU.get(col_k);
00333       if (IFPACK_ABS(xxx) > DropTolerance()) {
00334         SingleRowL.set(col_k, xxx / DiagonalValueK);
00335         ++flops;
00336 
00337         for (int j = 0 ; j < ColNnzK ; ++j) {
00338           int col_j = ColIndicesK[j];
00339 
00340           if (col_j < col_k) continue;
00341 
00342           double yyy = SingleRowL.get(col_k);
00343           if (yyy !=  0.0)
00344             SingleRowU.set(col_j, -yyy * ColValuesK[j], true);
00345           flops += 2;
00346         }
00347       }
00348     }
00349 
00350     this_proc_flops += 1.0 * flops;
00351 
00352     double cutoff = DropTolerance();
00353     double DiscardedElements = 0.0;
00354     int count;
00355 
00356     // drop elements to satisfy LevelOfFill(), start with L
00357     count = 0;
00358     int sizeL = SingleRowL.getNumEntries();
00359     keys.resize(sizeL);
00360     values.resize(sizeL);
00361 
00362     AbsRow.resize(sizeL);
00363 
00364     SingleRowL.arrayify(&keys[0], &values[0]);
00365     for (int i = 0; i < sizeL; ++i)
00366       if (IFPACK_ABS(values[i]) > DropTolerance()) {
00367         AbsRow[count++] = IFPACK_ABS(values[i]);
00368       }
00369 
00370     if (count > FillL) {
00371       nth_element(AbsRow.begin(), AbsRow.begin() + FillL, AbsRow.begin() + count, 
00372                   greater<double>());
00373       cutoff = AbsRow[FillL];
00374     }
00375 
00376     for (int i = 0; i < sizeL; ++i) {
00377       if (IFPACK_ABS(values[i]) >= cutoff) {
00378         IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &values[i], (int*)&keys[i]));
00379       }
00380       else
00381         DiscardedElements += values[i];
00382     }
00383 
00384     // FIXME: DOES IT WORK IN PARALLEL ???
00385     // add 1 to the diagonal
00386     double dtmp = 1.0;
00387     IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &dtmp, &row_i));
00388 
00389     // same business with U_
00390     count = 0;
00391     int sizeU = SingleRowU.getNumEntries();
00392     AbsRow.resize(sizeU + 1);
00393     keys.resize(sizeU + 1);
00394     values.resize(sizeU + 1);
00395 
00396     SingleRowU.arrayify(&keys[0], &values[0]);
00397 
00398     for (int i = 0; i < sizeU; ++i)
00399       if (keys[i] >= row_i && IFPACK_ABS(values[i]) > DropTolerance())
00400       {
00401         AbsRow[count++] = IFPACK_ABS(values[i]);
00402       }
00403 
00404     if (count > FillU) {
00405       nth_element(AbsRow.begin(), AbsRow.begin() + FillU, AbsRow.begin() + count, 
00406                   greater<double>());
00407       cutoff = AbsRow[FillU];
00408     }
00409 
00410     // sets the factors in U_
00411     for (int i = 0; i < sizeU; ++i) 
00412     {
00413       int col = keys[i];
00414       double val = values[i];
00415 
00416       if (col >= row_i) {
00417         if (IFPACK_ABS(val) >= cutoff || row_i == col) {
00418           IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &val, &col));
00419         }
00420         else
00421           DiscardedElements += val;
00422       }
00423     }
00424 
00425     // FIXME: not so sure of that!
00426     if (RelaxValue() != 0.0) {
00427       DiscardedElements *= RelaxValue();
00428       IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &DiscardedElements,
00429                                             &row_i));
00430     }
00431   }
00432 
00433   double tf;
00434   Comm().SumAll(&this_proc_flops, &tf, 1);
00435   ComputeFlops_ += tf;
00436 
00437   IFPACK_CHK_ERR(L_->FillComplete());
00438   IFPACK_CHK_ERR(U_->FillComplete());
00439 
00440 #if 0
00441   // to check the complete factorization
00442   Epetra_Vector LHS(A_.RowMatrixRowMap());
00443   Epetra_Vector RHS1(A_.RowMatrixRowMap());
00444   Epetra_Vector RHS2(A_.RowMatrixRowMap());
00445   Epetra_Vector RHS3(A_.RowMatrixRowMap());
00446   LHS.Random();
00447 
00448   cout << "A = " << A_.NumGlobalNonzeros() << endl;
00449   cout << "L = " << L_->NumGlobalNonzeros() << endl;
00450   cout << "U = " << U_->NumGlobalNonzeros() << endl;
00451 
00452   A_.Multiply(false,LHS,RHS1);
00453   U_->Multiply(false,LHS,RHS2);
00454   L_->Multiply(false,RHS2,RHS3);
00455 
00456   RHS1.Update(-1.0, RHS3, 1.0);
00457   double Norm;
00458   RHS1.Norm2(&Norm);
00459 #endif
00460 
00461   int MyNonzeros = L_->NumGlobalNonzeros() + U_->NumGlobalNonzeros();
00462   Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1);
00463 
00464   IsComputed_ = true;
00465 
00466   ++NumCompute_;
00467   ComputeTime_ += Time_.ElapsedTime();
00468 
00469   return(0);
00470 
00471 }
00472   
00473 //=============================================================================
00474 int Ifpack_ILUT::ApplyInverse(const Epetra_MultiVector& X, 
00475                  Epetra_MultiVector& Y) const
00476 {
00477   if (!IsComputed())
00478     IFPACK_CHK_ERR(-2); // compute preconditioner first
00479 
00480   if (X.NumVectors() != Y.NumVectors()) 
00481     IFPACK_CHK_ERR(-3); // Return error: X and Y not the same size
00482 
00483   Time_.ResetStartTime();
00484 
00485   // NOTE: L_ and U_ are based on SerialMap_, while Xcopy is based
00486   // on A.Map()... which are in general different. However, Solve()
00487   // does not seem to care... which is fine with me.
00488   //
00489   // AztecOO gives X and Y pointing to the same memory location,
00490   // need to create an auxiliary vector, Xcopy
00491   const Epetra_MultiVector* Xcopy;
00492   if (X.Pointers()[0] == Y.Pointers()[0])
00493     Xcopy = new Epetra_MultiVector(X);
00494   else
00495     Xcopy = &X;
00496 
00497   if (!UseTranspose_)
00498   {
00499     // solves LU Y = X 
00500     IFPACK_CHK_ERR(L_->Solve(false,false,false,*Xcopy,Y));
00501     IFPACK_CHK_ERR(U_->Solve(true,false,false,Y,Y));
00502   }
00503   else
00504   {
00505     // solves U(trans) L(trans) Y = X
00506     IFPACK_CHK_ERR(U_->Solve(true,true,false,*Xcopy,Y));
00507     IFPACK_CHK_ERR(L_->Solve(false,true,false,Y,Y));
00508   }
00509 
00510   if (Xcopy != &X)
00511     delete Xcopy;
00512 
00513   ++NumApplyInverse_;
00514   ApplyInverseFlops_ += X.NumVectors() * 2 * GlobalNonzeros_;
00515   ApplyInverseTime_ += Time_.ElapsedTime();
00516 
00517   return(0);
00518 
00519 }
00520 //=============================================================================
00521 // This function finds X such that LDU Y = X or U(trans) D L(trans) Y = X for multiple RHS
00522 int Ifpack_ILUT::Apply(const Epetra_MultiVector& X, 
00523               Epetra_MultiVector& Y) const 
00524 {
00525 
00526   return(-98);
00527 }
00528 
00529 //=============================================================================
00530 double Ifpack_ILUT::Condest(const Ifpack_CondestType CT, 
00531                             const int MaxIters, const double Tol,
00532                 Epetra_RowMatrix* Matrix)
00533 {
00534   if (!IsComputed()) // cannot compute right now
00535     return(-1.0);
00536 
00537   // NOTE: this is computing the *local* condest
00538   if (Condest_ == -1.0)
00539     Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix);
00540 
00541   return(Condest_);
00542 }
00543 
00544 //=============================================================================
00545 std::ostream&
00546 Ifpack_ILUT::Print(std::ostream& os) const
00547 {
00548   if (!Comm().MyPID()) {
00549     os << endl;
00550     os << "================================================================================" << endl;
00551     os << "Ifpack_ILUT: " << Label() << endl << endl;
00552     os << "Level-of-fill      = " << LevelOfFill() << endl;
00553     os << "Absolute threshold = " << AbsoluteThreshold() << endl;
00554     os << "Relative threshold = " << RelativeThreshold() << endl;
00555     os << "Relax value        = " << RelaxValue() << endl;
00556     os << "Condition number estimate       = " << Condest() << endl;
00557     os << "Global number of rows           = " << A_.NumGlobalRows() << endl;
00558     if (IsComputed_) {
00559       os << "Number of nonzeros in A         = " << A_.NumGlobalNonzeros() << endl;
00560       os << "Number of nonzeros in L + U     = " << NumGlobalNonzeros() 
00561          << " ( = " << 100.0 * NumGlobalNonzeros() / A_.NumGlobalNonzeros() 
00562          << " % of A)" << endl;
00563       os << "nonzeros / rows                 = " 
00564         << 1.0 * NumGlobalNonzeros() / U_->NumGlobalRows() << endl;
00565     }
00566     os << endl;
00567     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00568     os << "-----           -------   --------------       ------------     --------" << endl;
00569     os << "Initialize()    "   << std::setw(5) << NumInitialize() 
00570        << "  " << std::setw(15) << InitializeTime() 
00571        << "               0.0            0.0" << endl;
00572     os << "Compute()       "   << std::setw(5) << NumCompute() 
00573        << "  " << std::setw(15) << ComputeTime()
00574        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops(); 
00575     if (ComputeTime() != 0.0)
00576       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl;
00577     else
00578       os << "  " << std::setw(15) << 0.0 << endl;
00579     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse() 
00580        << "  " << std::setw(15) << ApplyInverseTime()
00581        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops();
00582     if (ApplyInverseTime() != 0.0) 
00583       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl;
00584     else
00585       os << "  " << std::setw(15) << 0.0 << endl;
00586     os << "================================================================================" << endl;
00587     os << endl;
00588   }
00589 
00590   return(os);
00591 }
00592 #endif // HAVE_IFPACK_TEUCHOS

Generated on Thu Sep 18 12:37:07 2008 for IFPACK by doxygen 1.3.9.1