Ifpack_ILUT.cpp

00001 /*@HEADER
00002 // ***********************************************************************
00003 // 
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #include "Ifpack_Preconditioner.h"
00032 #include "Ifpack_ILUT.h"
00033 #include "Ifpack_Condest.h"
00034 #include "Ifpack_Utils.h"
00035 #include "Ifpack_HashTable.h"
00036 #include "Epetra_SerialComm.h"
00037 #include "Epetra_Comm.h"
00038 #include "Epetra_Map.h"
00039 #include "Epetra_RowMatrix.h"
00040 #include "Epetra_CrsMatrix.h"
00041 #include "Epetra_Vector.h"
00042 #include "Epetra_MultiVector.h"
00043 #include "Epetra_Util.h"
00044 #include "Teuchos_ParameterList.hpp"
00045 #include "Teuchos_RefCountPtr.hpp"
00046 #include <functional>
00047 
00048 using namespace Teuchos;
00049 
00050 //==============================================================================
00051 // FIXME: allocate Comm_ and Time_ the first Initialize() call
00052 Ifpack_ILUT::Ifpack_ILUT(const Epetra_RowMatrix* A) :
00053   A_(*A),
00054   Comm_(A->Comm()),
00055   Condest_(-1.0),
00056   Relax_(0.),
00057   Athresh_(0.0),
00058   Rthresh_(1.0),
00059   LevelOfFill_(1.0),
00060   DropTolerance_(1e-12),
00061   IsInitialized_(false),
00062   IsComputed_(false),
00063   UseTranspose_(false),
00064   NumMyRows_(-1),
00065   NumInitialize_(0),
00066   NumCompute_(0),
00067   NumApplyInverse_(0),
00068   InitializeTime_(0.0),
00069   ComputeTime_(0.0),
00070   ApplyInverseTime_(0.0),
00071   ComputeFlops_(0.0),
00072   ApplyInverseFlops_(0.0),
00073   Time_(Comm()),
00074   GlobalNonzeros_(0)
00075 {
00076   // do nothing here..
00077 }
00078 
00079 //==============================================================================
00080 Ifpack_ILUT::~Ifpack_ILUT()
00081 {
00082   Destroy();
00083 }
00084 
00085 //==============================================================================
00086 void Ifpack_ILUT::Destroy()
00087 {
00088   IsInitialized_ = false;
00089   IsComputed_ = false;
00090 }
00091 
00092 //==========================================================================
00093 int Ifpack_ILUT::SetParameters(Teuchos::ParameterList& List)
00094 {
00095   try 
00096   {
00097     LevelOfFill_ = List.get<double>("fact: ilut level-of-fill", LevelOfFill());
00098     if (LevelOfFill_ <= 0.0)
00099       IFPACK_CHK_ERR(-2); // must be greater than 0.0
00100 
00101     Athresh_ = List.get<double>("fact: absolute threshold", Athresh_);
00102     Rthresh_ = List.get<double>("fact: relative threshold", Rthresh_);
00103     Relax_ = List.get<double>("fact: relax value", Relax_);
00104     DropTolerance_ = List.get<double>("fact: drop tolerance", DropTolerance_);
00105 
00106     Label_ = "IFPACK ILUT (fill=" + Ifpack_toString(LevelOfFill())
00107       + ", relax=" + Ifpack_toString(RelaxValue())
00108       + ", athr=" + Ifpack_toString(AbsoluteThreshold())
00109       + ", rthr=" + Ifpack_toString(RelativeThreshold())
00110       + ", droptol=" + Ifpack_toString(DropTolerance())
00111       + ")";
00112     return(0);
00113   }
00114   catch (...)
00115   {
00116     cerr << "Caught an exception while parsing the parameter list" << endl;
00117     cerr << "This typically means that a parameter was set with the" << endl;
00118     cerr << "wrong type (for example, int instead of double). " << endl;
00119     cerr << "please check the documentation for the type required by each parameer." << endl;
00120     IFPACK_CHK_ERR(-1);
00121   }
00122 
00123   return(0);
00124 }
00125 
00126 //==========================================================================
00127 int Ifpack_ILUT::Initialize()
00128 {
00129   // delete previously allocated factorization
00130   Destroy();
00131 
00132   Time_.ResetStartTime();
00133 
00134   // check only in serial
00135   if (Comm().NumProc() == 1 && Matrix().NumMyRows() != Matrix().NumMyCols())
00136     IFPACK_CHK_ERR(-2);
00137     
00138   NumMyRows_ = Matrix().NumMyRows();
00139 
00140   // nothing else to do here
00141   IsInitialized_ = true;
00142   ++NumInitialize_;
00143   InitializeTime_ += Time_.ElapsedTime();
00144 
00145   return(0);
00146 }
00147 
00148 //==========================================================================
00149 class Ifpack_AbsComp 
00150 {
00151  public:
00152   inline bool operator()(const double& x, const double& y) 
00153   {
00154     return(IFPACK_ABS(x) > IFPACK_ABS(y));
00155   }
00156 };
00157 
00158 //==========================================================================
00159 // MS // completely rewritten the algorithm on 25-Jan-05, using more STL
00160 // MS // and in a nicer and cleaner way. Also, it is more efficient.
00161 // MS // WARNING: Still not fully tested!
00162 int Ifpack_ILUT::Compute() 
00163 {
00164   if (!IsInitialized()) 
00165     IFPACK_CHK_ERR(Initialize());
00166 
00167   Time_.ResetStartTime();
00168   IsComputed_ = false;
00169 
00170   NumMyRows_ = A_.NumMyRows();
00171   int Length = A_.MaxNumEntries();
00172   vector<int>    RowIndicesL(Length);
00173   vector<double> RowValuesL(Length);
00174   vector<int>    RowIndicesU(Length);
00175   vector<double> RowValuesU(Length);
00176   bool distributed = (Comm().NumProc() > 1)?true:false;
00177 
00178   if (distributed)
00179   {
00180     SerialComm_ = rcp(new Epetra_SerialComm);
00181     SerialMap_ = rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_));
00182     assert (SerialComm_.get() != 0);
00183     assert (SerialMap_.get() != 0);
00184   }
00185   else
00186     SerialMap_ = rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false);
00187   
00188   int RowNnzU;
00189 
00190   L_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00191   U_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0));
00192 
00193   if ((L_.get() == 0) || (U_.get() == 0))
00194     IFPACK_CHK_ERR(-5); // memory allocation error
00195 
00196   // insert first row in U_ and L_
00197   IFPACK_CHK_ERR(A_.ExtractMyRowCopy(0,Length,RowNnzU,
00198                                      &RowValuesU[0],&RowIndicesU[0]));
00199 
00200   if (distributed)
00201   {
00202     int count = 0;
00203     for (int i = 0 ;i < RowNnzU ; ++i) 
00204     {
00205       if (RowIndicesU[i] < NumMyRows_){
00206         RowIndicesU[count] = RowIndicesU[i];
00207         RowValuesU[count] = RowValuesU[i];
00208         ++count;
00209       }
00210       else
00211         continue;
00212     }
00213     RowNnzU = count;
00214   }
00215 
00216   // modify diagonal
00217   for (int i = 0 ;i < RowNnzU ; ++i) {
00218     if (RowIndicesU[i] == 0) {
00219       double& v = RowValuesU[i];
00220       v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00221       break;
00222     }
00223   }
00224   
00225   IFPACK_CHK_ERR(U_->InsertGlobalValues(0,RowNnzU,&(RowValuesU[0]),
00226                                         &(RowIndicesU[0])));
00227    // FIXME: DOES IT WORK IN PARALLEL ??
00228   RowValuesU[0] = 1.0;
00229   RowIndicesU[0] = 0;
00230   IFPACK_CHK_ERR(L_->InsertGlobalValues(0,1,&(RowValuesU[0]),
00231                                         &(RowIndicesU[0])));
00232 
00233   int hash_size = 128;
00234   while (hash_size < (int) 1.5 * A_.MaxNumEntries() * LevelOfFill())
00235     hash_size *= 2;
00236 
00237   Ifpack_HashTable SingleRowU(hash_size - 1, 1);
00238   Ifpack_HashTable SingleRowL(hash_size - 1, 1);
00239 
00240   vector<int> keys;      keys.reserve(hash_size * 10);
00241   vector<double> values; values.reserve(hash_size * 10);
00242   vector<double> AbsRow; AbsRow.reserve(hash_size * 10);
00243 
00244   // =================== //
00245   // start factorization //
00246   // =================== //
00247   
00248   double this_proc_flops = 0.0;
00249 
00250   for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i) 
00251   {
00252     // get row `row_i' of the matrix, store in U pointers
00253     IFPACK_CHK_ERR(A_.ExtractMyRowCopy(row_i,Length,RowNnzU,
00254                                        &RowValuesU[0],&RowIndicesU[0]));
00255 
00256     if (distributed)
00257     {
00258       int count = 0;
00259       for (int i = 0 ;i < RowNnzU ; ++i) 
00260       {
00261         if (RowIndicesU[i] < NumMyRows_){
00262           RowIndicesU[count] = RowIndicesU[i];
00263           RowValuesU[count] = RowValuesU[i];
00264           ++count;
00265         }
00266         else
00267           continue;
00268       }
00269       RowNnzU = count;
00270     }
00271 
00272     int NnzLower = 0;
00273     int NnzUpper = 0;
00274 
00275     for (int i = 0 ;i < RowNnzU ; ++i) {
00276       if (RowIndicesU[i] < row_i)
00277         NnzLower++;
00278       else if (RowIndicesU[i] == row_i) {
00279         // add threshold
00280         NnzUpper++;
00281         double& v = RowValuesU[i];
00282         v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
00283       }
00284       else
00285         NnzUpper++;
00286     }
00287 
00288     int FillL = (int)(LevelOfFill() * NnzLower);
00289     int FillU = (int)(LevelOfFill() * NnzUpper);
00290     if (FillL == 0) FillL = 1;
00291     if (FillU == 0) FillU = 1;
00292 
00293     // convert line `row_i' into STL map for fast access
00294     SingleRowU.reset();
00295 
00296     for (int i = 0 ; i < RowNnzU ; ++i) {
00297         SingleRowU.set(RowIndicesU[i], RowValuesU[i]);
00298     }
00299       
00300     // for the multipliers
00301     SingleRowL.reset();
00302 
00303     int start_col = NumMyRows_;
00304     for (int i = 0 ; i < RowNnzU ; ++i)
00305       start_col = EPETRA_MIN(start_col, RowIndicesU[i]);
00306 
00307     int flops = 0;
00308   
00309     for (int col_k = start_col ; col_k < row_i ; ++col_k) {
00310 
00311       int*    ColIndicesK;
00312       double* ColValuesK;
00313       int     ColNnzK;
00314 
00315       IFPACK_CHK_ERR(U_->ExtractGlobalRowView(col_k, ColNnzK, ColValuesK, 
00316                                               ColIndicesK));
00317 
00318       // FIXME: can keep trace of diagonals
00319       double DiagonalValueK = 0.0;
00320       for (int i = 0 ; i < ColNnzK ; ++i) {
00321         if (ColIndicesK[i] == col_k) {
00322           DiagonalValueK = ColValuesK[i];
00323           break;
00324         }
00325       }
00326       
00327       // FIXME: this should never happen!
00328       if (DiagonalValueK == 0.0)
00329         DiagonalValueK = AbsoluteThreshold();
00330       
00331       double xxx = SingleRowU.get(col_k);
00332       if (IFPACK_ABS(xxx) > DropTolerance()) {
00333         SingleRowL.set(col_k, xxx / DiagonalValueK);
00334         ++flops;
00335 
00336         for (int j = 0 ; j < ColNnzK ; ++j) {
00337           int col_j = ColIndicesK[j];
00338 
00339           if (col_j < col_k) continue;
00340 
00341           double yyy = SingleRowL.get(col_k);
00342           if (yyy !=  0.0)
00343             SingleRowU.set(col_j, -yyy * ColValuesK[j], true);
00344           flops += 2;
00345         }
00346       }
00347     }
00348 
00349     this_proc_flops += 1.0 * flops;
00350 
00351     double cutoff = DropTolerance();
00352     double DiscardedElements = 0.0;
00353     int count;
00354 
00355     // drop elements to satisfy LevelOfFill(), start with L
00356     count = 0;
00357     int sizeL = SingleRowL.getNumEntries();
00358     keys.resize(sizeL);
00359     values.resize(sizeL);
00360 
00361     AbsRow.resize(sizeL);
00362 
00363     SingleRowL.arrayify(&keys[0], &values[0]);
00364     for (int i = 0; i < sizeL; ++i)
00365       if (IFPACK_ABS(values[i]) > DropTolerance()) {
00366         AbsRow[count++] = IFPACK_ABS(values[i]);
00367       }
00368 
00369     if (count > FillL) {
00370       nth_element(AbsRow.begin(), AbsRow.begin() + FillL, AbsRow.begin() + count, 
00371                   greater<double>());
00372       cutoff = AbsRow[FillL];
00373     }
00374 
00375     for (int i = 0; i < sizeL; ++i) {
00376       if (IFPACK_ABS(values[i]) >= cutoff) {
00377         IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &values[i], (int*)&keys[i]));
00378       }
00379       else
00380         DiscardedElements += values[i];
00381     }
00382 
00383     // FIXME: DOES IT WORK IN PARALLEL ???
00384     // add 1 to the diagonal
00385     double dtmp = 1.0;
00386     IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &dtmp, &row_i));
00387 
00388     // same business with U_
00389     count = 0;
00390     int sizeU = SingleRowU.getNumEntries();
00391     AbsRow.resize(sizeU + 1);
00392     keys.resize(sizeU + 1);
00393     values.resize(sizeU + 1);
00394 
00395     SingleRowU.arrayify(&keys[0], &values[0]);
00396 
00397     for (int i = 0; i < sizeU; ++i)
00398       if (keys[i] >= row_i && IFPACK_ABS(values[i]) > DropTolerance())
00399       {
00400         AbsRow[count++] = IFPACK_ABS(values[i]);
00401       }
00402 
00403     if (count > FillU) {
00404       nth_element(AbsRow.begin(), AbsRow.begin() + FillU, AbsRow.begin() + count, 
00405                   greater<double>());
00406       cutoff = AbsRow[FillU];
00407     }
00408 
00409     // sets the factors in U_
00410     for (int i = 0; i < sizeU; ++i) 
00411     {
00412       int col = keys[i];
00413       double val = values[i];
00414 
00415       if (col >= row_i) {
00416         if (IFPACK_ABS(val) >= cutoff || row_i == col) {
00417           IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &val, &col));
00418         }
00419         else
00420           DiscardedElements += val;
00421       }
00422     }
00423 
00424     // FIXME: not so sure of that!
00425     if (RelaxValue() != 0.0) {
00426       DiscardedElements *= RelaxValue();
00427       IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &DiscardedElements,
00428                                             &row_i));
00429     }
00430   }
00431 
00432   double tf;
00433   Comm().SumAll(&this_proc_flops, &tf, 1);
00434   ComputeFlops_ += tf;
00435 
00436   IFPACK_CHK_ERR(L_->FillComplete());
00437   IFPACK_CHK_ERR(U_->FillComplete());
00438 
00439 #if 0
00440   // to check the complete factorization
00441   Epetra_Vector LHS(A_.RowMatrixRowMap());
00442   Epetra_Vector RHS1(A_.RowMatrixRowMap());
00443   Epetra_Vector RHS2(A_.RowMatrixRowMap());
00444   Epetra_Vector RHS3(A_.RowMatrixRowMap());
00445   LHS.Random();
00446 
00447   cout << "A = " << A_.NumGlobalNonzeros() << endl;
00448   cout << "L = " << L_->NumGlobalNonzeros() << endl;
00449   cout << "U = " << U_->NumGlobalNonzeros() << endl;
00450 
00451   A_.Multiply(false,LHS,RHS1);
00452   U_->Multiply(false,LHS,RHS2);
00453   L_->Multiply(false,RHS2,RHS3);
00454 
00455   RHS1.Update(-1.0, RHS3, 1.0);
00456   double Norm;
00457   RHS1.Norm2(&Norm);
00458 #endif
00459 
00460   int MyNonzeros = L_->NumGlobalNonzeros() + U_->NumGlobalNonzeros();
00461   Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1);
00462 
00463   IsComputed_ = true;
00464 
00465   ++NumCompute_;
00466   ComputeTime_ += Time_.ElapsedTime();
00467 
00468   return(0);
00469 
00470 }
00471   
00472 //=============================================================================
00473 int Ifpack_ILUT::ApplyInverse(const Epetra_MultiVector& X, 
00474                  Epetra_MultiVector& Y) const
00475 {
00476   if (!IsComputed())
00477     IFPACK_CHK_ERR(-2); // compute preconditioner first
00478 
00479   if (X.NumVectors() != Y.NumVectors()) 
00480     IFPACK_CHK_ERR(-3); // Return error: X and Y not the same size
00481 
00482   Time_.ResetStartTime();
00483 
00484   // NOTE: L_ and U_ are based on SerialMap_, while Xcopy is based
00485   // on A.Map()... which are in general different. However, Solve()
00486   // does not seem to care... which is fine with me.
00487   //
00488   // AztecOO gives X and Y pointing to the same memory location,
00489   // need to create an auxiliary vector, Xcopy
00490   Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
00491   if (X.Pointers()[0] == Y.Pointers()[0])
00492     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00493   else
00494     Xcopy = Teuchos::rcp( &X, false );
00495 
00496   if (!UseTranspose_)
00497   {
00498     // solves LU Y = X 
00499     IFPACK_CHK_ERR(L_->Solve(false,false,false,*Xcopy,Y));
00500     IFPACK_CHK_ERR(U_->Solve(true,false,false,Y,Y));
00501   }
00502   else
00503   {
00504     // solves U(trans) L(trans) Y = X
00505     IFPACK_CHK_ERR(U_->Solve(true,true,false,*Xcopy,Y));
00506     IFPACK_CHK_ERR(L_->Solve(false,true,false,Y,Y));
00507   }
00508 
00509   ++NumApplyInverse_;
00510   ApplyInverseFlops_ += X.NumVectors() * 2 * GlobalNonzeros_;
00511   ApplyInverseTime_ += Time_.ElapsedTime();
00512 
00513   return(0);
00514 
00515 }
00516 //=============================================================================
00517 // This function finds X such that LDU Y = X or U(trans) D L(trans) Y = X for multiple RHS
00518 int Ifpack_ILUT::Apply(const Epetra_MultiVector& X, 
00519               Epetra_MultiVector& Y) const 
00520 {
00521   return(-98);
00522 }
00523 
00524 //=============================================================================
00525 double Ifpack_ILUT::Condest(const Ifpack_CondestType CT, 
00526                             const int MaxIters, const double Tol,
00527                 Epetra_RowMatrix* Matrix_in)
00528 {
00529   if (!IsComputed()) // cannot compute right now
00530     return(-1.0);
00531 
00532   // NOTE: this is computing the *local* condest
00533   if (Condest_ == -1.0)
00534     Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00535 
00536   return(Condest_);
00537 }
00538 
00539 //=============================================================================
00540 std::ostream&
00541 Ifpack_ILUT::Print(std::ostream& os) const
00542 {
00543   if (!Comm().MyPID()) {
00544     os << endl;
00545     os << "================================================================================" << endl;
00546     os << "Ifpack_ILUT: " << Label() << endl << endl;
00547     os << "Level-of-fill      = " << LevelOfFill() << endl;
00548     os << "Absolute threshold = " << AbsoluteThreshold() << endl;
00549     os << "Relative threshold = " << RelativeThreshold() << endl;
00550     os << "Relax value        = " << RelaxValue() << endl;
00551     os << "Condition number estimate       = " << Condest() << endl;
00552     os << "Global number of rows           = " << A_.NumGlobalRows() << endl;
00553     if (IsComputed_) {
00554       os << "Number of nonzeros in A         = " << A_.NumGlobalNonzeros() << endl;
00555       os << "Number of nonzeros in L + U     = " << NumGlobalNonzeros() 
00556          << " ( = " << 100.0 * NumGlobalNonzeros() / A_.NumGlobalNonzeros() 
00557          << " % of A)" << endl;
00558       os << "nonzeros / rows                 = " 
00559         << 1.0 * NumGlobalNonzeros() / U_->NumGlobalRows() << endl;
00560     }
00561     os << endl;
00562     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00563     os << "-----           -------   --------------       ------------     --------" << endl;
00564     os << "Initialize()    "   << std::setw(5) << NumInitialize() 
00565        << "  " << std::setw(15) << InitializeTime() 
00566        << "               0.0            0.0" << endl;
00567     os << "Compute()       "   << std::setw(5) << NumCompute() 
00568        << "  " << std::setw(15) << ComputeTime()
00569        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops(); 
00570     if (ComputeTime() != 0.0)
00571       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl;
00572     else
00573       os << "  " << std::setw(15) << 0.0 << endl;
00574     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse() 
00575        << "  " << std::setw(15) << ApplyInverseTime()
00576        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops();
00577     if (ApplyInverseTime() != 0.0) 
00578       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl;
00579     else
00580       os << "  " << std::setw(15) << 0.0 << endl;
00581     os << "================================================================================" << endl;
00582     os << endl;
00583   }
00584 
00585   return(os);
00586 }

Generated on Wed May 12 21:46:03 2010 for IFPACK by  doxygen 1.4.7