IFPACK Development
Ifpack_PointRelaxation.cpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #include <iomanip>
00032 #include <cmath>
00033 #include "Epetra_Operator.h"
00034 #include "Epetra_CrsMatrix.h"
00035 #include "Epetra_Comm.h"
00036 #include "Epetra_Map.h"
00037 #include "Epetra_MultiVector.h"
00038 #include "Ifpack_Preconditioner.h"
00039 #include "Ifpack_PointRelaxation.h"
00040 #include "Ifpack_Utils.h"
00041 #include "Ifpack_Condest.h"
00042 
00043 static const int IFPACK_JACOBI = 0;
00044 static const int IFPACK_GS = 1;
00045 static const int IFPACK_SGS = 2;
00046 
00047 //==============================================================================
00048 Ifpack_PointRelaxation::
00049 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix_in) :
00050   IsInitialized_(false),
00051   IsComputed_(false),
00052   NumInitialize_(0),
00053   NumCompute_(0),
00054   NumApplyInverse_(0),
00055   InitializeTime_(0.0),
00056   ComputeTime_(0.0),
00057   ApplyInverseTime_(0.0),
00058   ComputeFlops_(0.0),
00059   ApplyInverseFlops_(0.0),
00060   NumSweeps_(1),
00061   DampingFactor_(1.0),
00062   UseTranspose_(false),
00063   Condest_(-1.0),
00064   ComputeCondest_(false),
00065   PrecType_(IFPACK_JACOBI),
00066   MinDiagonalValue_(0.0),
00067   NumMyRows_(0),
00068   NumMyNonzeros_(0),
00069   NumGlobalRows_(0),
00070   NumGlobalNonzeros_(0),
00071   Matrix_(Teuchos::rcp(Matrix_in,false)),
00072   IsParallel_(false),
00073   ZeroStartingSolution_(true),
00074   DoBackwardGS_(false),
00075   DoL1Method_(false),
00076   L1Eta_(1.5)
00077 {
00078 }
00079 
00080 //==============================================================================
00081 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
00082 {
00083 
00084   string PT;
00085   if (PrecType_ == IFPACK_JACOBI)
00086     PT = "Jacobi";
00087   else if (PrecType_ == IFPACK_GS)
00088     PT = "Gauss-Seidel";
00089   else if (PrecType_ == IFPACK_SGS)
00090     PT = "symmetric Gauss-Seidel";
00091 
00092   PT = List.get("relaxation: type", PT);
00093 
00094   if (PT == "Jacobi")
00095     PrecType_ = IFPACK_JACOBI;
00096   else if (PT == "Gauss-Seidel")
00097     PrecType_ = IFPACK_GS;
00098   else if (PT == "symmetric Gauss-Seidel")
00099     PrecType_ = IFPACK_SGS;
00100   else {
00101     IFPACK_CHK_ERR(-2);
00102   }
00103   
00104   NumSweeps_            = List.get("relaxation: sweeps",NumSweeps_);
00105   DampingFactor_        = List.get("relaxation: damping factor", 
00106                                    DampingFactor_);
00107   MinDiagonalValue_     = List.get("relaxation: min diagonal value", 
00108                                    MinDiagonalValue_);
00109   ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 
00110                                    ZeroStartingSolution_);
00111 
00112   DoBackwardGS_         = List.get("relaxation: backward mode",DoBackwardGS_);
00113 
00114   DoL1Method_           = List.get("relaxation: use l1",DoL1Method_);
00115 
00116   L1Eta_                = List.get("relaxation: l1 eta",L1Eta_);
00117   
00118   SetLabel();
00119 
00120   return(0);
00121 }
00122 
00123 //==============================================================================
00124 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const
00125 {
00126   return(Matrix_->Comm());
00127 }
00128 
00129 //==============================================================================
00130 const Epetra_Map& Ifpack_PointRelaxation::OperatorDomainMap() const
00131 {
00132   return(Matrix_->OperatorDomainMap());
00133 }
00134 
00135 //==============================================================================
00136 const Epetra_Map& Ifpack_PointRelaxation::OperatorRangeMap() const
00137 {
00138   return(Matrix_->OperatorRangeMap());
00139 }
00140 
00141 //==============================================================================
00142 int Ifpack_PointRelaxation::Initialize()
00143 {
00144   IsInitialized_ = false;
00145 
00146   if (Matrix_ == Teuchos::null)
00147     IFPACK_CHK_ERR(-2);
00148 
00149   if (Time_ == Teuchos::null)
00150     Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
00151 
00152   if (Matrix().NumGlobalRows() != Matrix().NumGlobalCols())
00153     IFPACK_CHK_ERR(-2); // only square matrices
00154 
00155   NumMyRows_ = Matrix_->NumMyRows();
00156   NumMyNonzeros_ = Matrix_->NumMyNonzeros();
00157   NumGlobalRows_ = Matrix_->NumGlobalRows();
00158   NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros();
00159 
00160   if (Comm().NumProc() != 1)
00161     IsParallel_ = true;
00162   else
00163     IsParallel_ = false;
00164 
00165   ++NumInitialize_;
00166   InitializeTime_ += Time_->ElapsedTime();
00167   IsInitialized_ = true;
00168   return(0);
00169 }
00170 
00171 //==============================================================================
00172 int Ifpack_PointRelaxation::Compute()
00173 {
00174   int ierr = 0;
00175   if (!IsInitialized())
00176     IFPACK_CHK_ERR(Initialize());
00177 
00178   Time_->ResetStartTime();
00179 
00180   // reset values
00181   IsComputed_ = false;
00182   Condest_ = -1.0;
00183 
00184   if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
00185   if (NumSweeps_ < 0)
00186     IFPACK_CHK_ERR(-2); // at least one application
00187   
00188   Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
00189 
00190   if (Diagonal_ == Teuchos::null)
00191     IFPACK_CHK_ERR(-5);
00192 
00193   IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
00194 
00195   // Setup for L1 Methods.
00196   // Here we add half the value of the off-processor entries in the row, 
00197   // but only if diagonal isn't sufficiently large.
00198   //
00199   // Note: This is only done in the slower-but-more-general "RowMatrix" mode.  
00200   //
00201   // This follows from Equation (6.5) in:
00202   // Baker, Falgout, Kolev and Yang.  Multigrid Smoothers for Ultraparallel Computing.
00203   // SIAM J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864--2887.
00204   if(DoL1Method_ && IsParallel_) {
00205     int maxLength = Matrix().MaxNumEntries();
00206     std::vector<int> Indices(maxLength);
00207     std::vector<double> Values(maxLength);
00208     int NumEntries;
00209 
00210     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00211       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, maxLength,NumEntries,
00212                            &Values[0], &Indices[0]));
00213       double diagonal_boost=0.0;
00214       for (int k = 0 ; k < NumEntries ; ++i)
00215     if(Indices[k] > i)
00216       diagonal_boost+=std::abs(Values[k]/2.0);  
00217       if ((*Diagonal_)[i] < L1Eta_*diagonal_boost)
00218     (*Diagonal_)[i]+=diagonal_boost;
00219     }
00220   }
00221 
00222   // check diagonal elements, store the inverses, and verify that
00223   // no zeros are around. If an element is zero, then by default
00224   // its inverse is zero as well (that is, the row is ignored).
00225   for (int i = 0 ; i < NumMyRows_ ; ++i) {
00226     double& diag = (*Diagonal_)[i];
00227     if (IFPACK_ABS(diag) < MinDiagonalValue_)
00228       diag = MinDiagonalValue_;
00229     if (diag != 0.0)
00230       diag = 1.0 / diag;
00231   }
00232 #ifdef IFPACK_FLOPCOUNTERS
00233   ComputeFlops_ += NumMyRows_;
00234 #endif
00235 
00236 #if 0
00237   // some methods require the inverse of the diagonal, compute it
00238   // now and store it in `Diagonal_'
00239   if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
00240     Diagonal_->Reciprocal(*Diagonal_);
00241     // update flops
00242     ComputeFlops_ += NumMyRows_;
00243   }
00244 #endif
00245 
00246   // We need to import data from external processors. Here I create an
00247   // Epetra_Import object because I cannot assume that Matrix_ has one.
00248   // This is a bit of waste of resources (but the code is more robust).
00249   // Note that I am doing some strange stuff to set the components of Y
00250   // from Y2 (to save some time).
00251   //
00252   if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
00253     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00254                                   Matrix().RowMatrixRowMap()) );
00255     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00256   }
00257 
00258   ++NumCompute_;
00259   ComputeTime_ += Time_->ElapsedTime();
00260   IsComputed_ = true;
00261 
00262   IFPACK_CHK_ERR(ierr);
00263   return(0);
00264 }
00265 
00266 //==============================================================================
00267 ostream& Ifpack_PointRelaxation::Print(ostream & os) const
00268 {
00269 
00270   double MyMinVal, MyMaxVal;
00271   double MinVal, MaxVal;
00272 
00273   if (IsComputed_) {
00274     Diagonal_->MinValue(&MyMinVal);
00275     Diagonal_->MaxValue(&MyMaxVal);
00276     Comm().MinAll(&MyMinVal,&MinVal,1);
00277     Comm().MinAll(&MyMaxVal,&MaxVal,1);
00278   }
00279 
00280   if (!Comm().MyPID()) {
00281     os << endl;
00282     os << "================================================================================" << endl;
00283     os << "Ifpack_PointRelaxation" << endl;
00284     os << "Sweeps         = " << NumSweeps_ << endl;
00285     os << "damping factor = " << DampingFactor_ << endl;
00286     if (PrecType_ == IFPACK_JACOBI)
00287       os << "Type           = Jacobi" << endl;
00288     else if (PrecType_ == IFPACK_GS)
00289       os << "Type           = Gauss-Seidel" << endl;
00290     else if (PrecType_ == IFPACK_SGS)
00291       os << "Type           = symmetric Gauss-Seidel" << endl;
00292     if (DoBackwardGS_) 
00293       os << "Using backward mode (GS only)" << endl;
00294     if (ZeroStartingSolution_) 
00295       os << "Using zero starting solution" << endl;
00296     else
00297       os << "Using input starting solution" << endl;
00298     os << "Condition number estimate = " << Condest() << endl;
00299     os << "Global number of rows            = " << Matrix_->NumGlobalRows() << endl;
00300     if (IsComputed_) {
00301       os << "Minimum value on stored diagonal = " << MinVal << endl;
00302       os << "Maximum value on stored diagonal = " << MaxVal << endl;
00303     }
00304     os << endl;
00305     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00306     os << "-----           -------   --------------       ------------     --------" << endl;
00307     os << "Initialize()    "   << std::setw(5) << NumInitialize_ 
00308        << "  " << std::setw(15) << InitializeTime_ 
00309        << "              0.0              0.0" << endl;
00310     os << "Compute()       "   << std::setw(5) << NumCompute_ 
00311        << "  " << std::setw(15) << ComputeTime_
00312        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_;
00313     if (ComputeTime_ != 0.0)
00314       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
00315     else
00316       os << "  " << std::setw(15) << 0.0 << endl;
00317     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse_ 
00318        << "  " << std::setw(15) << ApplyInverseTime_
00319        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
00320     if (ApplyInverseTime_ != 0.0)
00321       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
00322     else
00323       os << "  " << std::setw(15) << 0.0 << endl;
00324     os << "================================================================================" << endl;
00325     os << endl;
00326   }
00327 
00328   return(os);
00329 }
00330 
00331 //==============================================================================
00332 double Ifpack_PointRelaxation::
00333 Condest(const Ifpack_CondestType CT, 
00334         const int MaxIters, const double Tol,
00335     Epetra_RowMatrix* Matrix_in)
00336 {
00337   if (!IsComputed()) // cannot compute right now
00338     return(-1.0);
00339 
00340   // always computes it. Call Condest() with no parameters to get
00341   // the previous estimate.
00342   Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00343 
00344   return(Condest_);
00345 }
00346 
00347 //==============================================================================
00348 void Ifpack_PointRelaxation::SetLabel()
00349 {
00350   string PT;
00351   if (PrecType_ == IFPACK_JACOBI)
00352     PT = "Jacobi";
00353   else if (PrecType_ == IFPACK_GS){
00354     PT = "GS";
00355     if(DoBackwardGS_)
00356       PT = "Backward " + PT;
00357   }    
00358   else if (PrecType_ == IFPACK_SGS)
00359     PT = "SGS";
00360 
00361   Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
00362     + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
00363 }
00364 
00365 //==============================================================================
00366 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
00367 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
00368 // way the matrix-vector produce is performed).
00369 //
00370 // Another ML-related observation is that the starting solution (in Y)
00371 // is NOT supposed to be zero. This may slow down the application of just
00372 // one sweep of Jacobi.
00373 //
00374 int Ifpack_PointRelaxation::
00375 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00376 {
00377   if (!IsComputed())
00378     IFPACK_CHK_ERR(-3);
00379 
00380   if (X.NumVectors() != Y.NumVectors())
00381     IFPACK_CHK_ERR(-2);
00382 
00383   Time_->ResetStartTime();
00384 
00385   // AztecOO gives X and Y pointing to the same memory location,
00386   // need to create an auxiliary vector, Xcopy
00387   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00388   if (X.Pointers()[0] == Y.Pointers()[0])
00389     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00390   else
00391     Xcopy = Teuchos::rcp( &X, false );
00392     
00393   if (ZeroStartingSolution_)
00394     Y.PutScalar(0.0);
00395 
00396   // Flops are updated in each of the following. 
00397   switch (PrecType_) {
00398   case IFPACK_JACOBI:
00399     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00400     break;
00401   case IFPACK_GS:
00402     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00403     break;
00404   case IFPACK_SGS:
00405     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00406     break;
00407   default:
00408     IFPACK_CHK_ERR(-1); // something wrong
00409   }
00410 
00411   ++NumApplyInverse_;
00412   ApplyInverseTime_ += Time_->ElapsedTime();
00413   return(0);
00414 }
00415 
00416 //==============================================================================
00417 // This preconditioner can be much slower than AztecOO and ML versions
00418 // if the matrix-vector product requires all ExtractMyRowCopy() 
00419 // (as done through Ifpack_AdditiveSchwarz).
00420 int Ifpack_PointRelaxation::
00421 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
00422 {
00423 
00424   int NumVectors = LHS.NumVectors();
00425   Epetra_MultiVector A_times_LHS( LHS.Map(),NumVectors );
00426 
00427   for (int j = 0; j < NumSweeps_ ; j++) {
00428 
00429     IFPACK_CHK_ERR(Apply(LHS,A_times_LHS));
00430     IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
00431     for (int v = 0 ; v < NumVectors ; ++v)
00432       IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)), 
00433                                      *Diagonal_, 1.0));
00434 
00435   }
00436 
00437   // Flops:
00438   // - matrix vector              (2 * NumGlobalNonzeros_)
00439   // - update                     (2 * NumGlobalRows_)
00440   // - Multiply:
00441   //   - DampingFactor            (NumGlobalRows_)
00442   //   - Diagonal                 (NumGlobalRows_)
00443   //   - A + B                    (NumGlobalRows_)
00444   //   - 1.0                      (NumGlobalRows_)
00445 #ifdef IFPACK_FLOPCOUNTERS
00446   ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00447 #endif
00448 
00449   return(0);
00450 }
00451 
00452 //==============================================================================
00453 int Ifpack_PointRelaxation::
00454 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00455 {
00456   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00457   // try to pick the best option; performances may be improved
00458   // if several sweeps are used.
00459   if (CrsMatrix != 0)
00460   {
00461     if (CrsMatrix->StorageOptimized())
00462       return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
00463     else
00464       return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
00465   }
00466   else
00467     return(ApplyInverseGS_RowMatrix(X, Y));
00468 } //ApplyInverseGS()
00469 
00470 //==============================================================================
00471 int Ifpack_PointRelaxation::
00472 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00473 {
00474   int NumVectors = X.NumVectors();
00475 
00476   int Length = Matrix().MaxNumEntries();
00477   vector<int> Indices(Length);
00478   vector<double> Values(Length);
00479 
00480   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00481   if (IsParallel_)
00482     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00483   else
00484     Y2 = Teuchos::rcp( &Y, false );
00485 
00486   // extract views (for nicer and faster code)
00487   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00488   X.ExtractView(&x_ptr);
00489   Y.ExtractView(&y_ptr);
00490   Y2->ExtractView(&y2_ptr);
00491   Diagonal_->ExtractView(&d_ptr);
00492 
00493   for (int j = 0; j < NumSweeps_ ; j++) {
00494 
00495     // data exchange is here, once per sweep
00496     if (IsParallel_)
00497       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00498 
00499     // FIXME: do I really need this code below?
00500     if (NumVectors == 1) {
00501 
00502       double* y0_ptr = y_ptr[0];
00503       double* y20_ptr = y2_ptr[0];
00504       double* x0_ptr = x_ptr[0];
00505 
00506       if(!DoBackwardGS_){      
00507         /* Forward Mode */
00508         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00509 
00510           int NumEntries;
00511           int col;
00512           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00513                                                    &Values[0], &Indices[0]));
00514           
00515           double dtemp = 0.0;
00516           for (int k = 0 ; k < NumEntries ; ++k) {
00517             
00518             col = Indices[k];
00519             dtemp += Values[k] * y20_ptr[col];
00520           }
00521           
00522           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00523         }
00524       }
00525       else {
00526         /* Backward Mode */
00527         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00528 
00529           int NumEntries;
00530           int col;
00531           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00532                                                    &Values[0], &Indices[0]));
00533           double dtemp = 0.0;
00534           for (int k = 0 ; k < NumEntries ; ++k) {
00535 
00536             col = Indices[k];
00537             dtemp += Values[k] * y20_ptr[i];
00538           }
00539           
00540           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00541         }
00542       }
00543       
00544       // using Export() sounded quite expensive
00545       if (IsParallel_)
00546         for (int i = 0 ; i < NumMyRows_ ; ++i)
00547           y0_ptr[i] = y20_ptr[i];
00548 
00549     }
00550     else {
00551       if(!DoBackwardGS_){      
00552         /* Forward Mode */
00553         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00554           
00555           int NumEntries;
00556           int col;
00557           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00558                                                    &Values[0], &Indices[0]));
00559           
00560           for (int m = 0 ; m < NumVectors ; ++m) {
00561             
00562             double dtemp = 0.0;
00563             for (int k = 0 ; k < NumEntries ; ++k) {
00564               
00565               col = Indices[k];
00566               dtemp += Values[k] * y2_ptr[m][col];
00567             }
00568             
00569             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00570           }
00571         }
00572       }
00573       else {
00574         /* Backward Mode */
00575         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00576           int NumEntries;
00577           int col;
00578           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00579                                                    &Values[0], &Indices[0]));
00580 
00581           for (int m = 0 ; m < NumVectors ; ++m) {
00582             
00583             double dtemp = 0.0;
00584             for (int k = 0 ; k < NumEntries ; ++k) {
00585 
00586               col = Indices[k];
00587               dtemp += Values[k] * y2_ptr[m][col];
00588             }
00589 
00590             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00591 
00592           }
00593         }
00594       }
00595 
00596       // using Export() sounded quite expensive   
00597       if (IsParallel_)
00598         for (int m = 0 ; m < NumVectors ; ++m) 
00599           for (int i = 0 ; i < NumMyRows_ ; ++i)
00600             y_ptr[m][i] = y2_ptr[m][i];
00601       
00602     }
00603   }
00604 
00605 #ifdef IFPACK_FLOPCOUNTERS
00606   ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00607 #endif
00608 
00609   return(0);
00610 } //ApplyInverseGS_RowMatrix()
00611 
00612 //==============================================================================
00613 int Ifpack_PointRelaxation::
00614 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00615 {
00616   int NumVectors = X.NumVectors();
00617 
00618   int* Indices;
00619   double* Values;
00620 
00621   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00622   if (IsParallel_) {
00623     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00624   }
00625   else
00626     Y2 = Teuchos::rcp( &Y, false );
00627 
00628   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00629   X.ExtractView(&x_ptr);
00630   Y.ExtractView(&y_ptr);
00631   Y2->ExtractView(&y2_ptr);
00632   Diagonal_->ExtractView(&d_ptr);
00633   
00634   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00635     
00636     // only one data exchange per sweep
00637     if (IsParallel_)
00638       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00639 
00640     if(!DoBackwardGS_){  
00641       /* Forward Mode */
00642       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00643 
00644         int NumEntries;
00645         int col;
00646         double diag = d_ptr[i];
00647         
00648         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00649         
00650         for (int m = 0 ; m < NumVectors ; ++m) {
00651           
00652           double dtemp = 0.0;
00653           
00654           for (int k = 0; k < NumEntries; ++k) {
00655             
00656             col = Indices[k];
00657             dtemp += Values[k] * y2_ptr[m][col];
00658           }
00659           
00660           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00661         }
00662       }
00663     }
00664     else {
00665       /* Backward Mode */
00666       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00667 
00668         int NumEntries;
00669         int col;
00670         double diag = d_ptr[i];
00671         
00672         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00673         
00674         for (int m = 0 ; m < NumVectors ; ++m) {
00675           
00676           double dtemp = 0.0;
00677           for (int k = 0; k < NumEntries; ++k) {
00678             
00679             col = Indices[k];
00680             dtemp += Values[k] * y2_ptr[m][col];
00681           }
00682           
00683           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00684           
00685         }
00686       }
00687     }
00688     
00689     if (IsParallel_)
00690       for (int m = 0 ; m < NumVectors ; ++m) 
00691         for (int i = 0 ; i < NumMyRows_ ; ++i)
00692           y_ptr[m][i] = y2_ptr[m][i];
00693   }
00694 
00695 #ifdef IFPACK_FLOPCOUNTERS
00696   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00697 #endif
00698   return(0);
00699 } //ApplyInverseGS_CrsMatrix()
00700 
00701 //
00702 //==============================================================================
00703 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
00704 
00705 int Ifpack_PointRelaxation::
00706 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00707 {
00708   int* IndexOffset;
00709   int* Indices;
00710   double* Values;
00711   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00712 
00713   int NumVectors = X.NumVectors();
00714 
00715   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00716   if (IsParallel_) {
00717     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00718   }
00719   else
00720     Y2 = Teuchos::rcp( &Y, false );
00721 
00722   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00723   X.ExtractView(&x_ptr);
00724   Y.ExtractView(&y_ptr);
00725   Y2->ExtractView(&y2_ptr);
00726   Diagonal_->ExtractView(&d_ptr);
00727 
00728   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00729     
00730     // only one data exchange per sweep
00731     if (IsParallel_)
00732       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00733 
00734     if(!DoBackwardGS_){  
00735       /* Forward Mode */
00736       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00737         
00738         int col;
00739         double diag = d_ptr[i];
00740         
00741         for (int m = 0 ; m < NumVectors ; ++m) {
00742           
00743           double dtemp = 0.0;
00744           
00745           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00746             
00747             col = Indices[k];
00748             dtemp += Values[k] * y2_ptr[m][col];
00749           }
00750           
00751           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00752         }      
00753       }
00754     }
00755     else {
00756       /* Backward Mode */
00757       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00758 
00759         int col;
00760         double diag = d_ptr[i];
00761         
00762         for (int m = 0 ; m < NumVectors ; ++m) {
00763 
00764           double dtemp = 0.0;
00765           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00766 
00767             col = Indices[k];
00768             dtemp += Values[k] * y2_ptr[m][col];
00769           }
00770 
00771           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00772 
00773         }
00774       }
00775     }
00776     
00777 
00778     if (IsParallel_)
00779       for (int m = 0 ; m < NumVectors ; ++m) 
00780         for (int i = 0 ; i < NumMyRows_ ; ++i)
00781           y_ptr[m][i] = y2_ptr[m][i];
00782   }
00783 
00784 #ifdef IFPACK_FLOPCOUNTERS
00785   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00786 #endif
00787   return(0);
00788 } //ApplyInverseGS_FastCrsMatrix()
00789 
00790 //==============================================================================
00791 int Ifpack_PointRelaxation::
00792 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00793 {
00794   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00795   // try to pick the best option; performances may be improved
00796   // if several sweeps are used.
00797   if (CrsMatrix != 0)
00798   {
00799     if (CrsMatrix->StorageOptimized())
00800       return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
00801     else
00802       return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
00803   }
00804   else
00805     return(ApplyInverseSGS_RowMatrix(X, Y));
00806 }
00807 
00808 //==============================================================================
00809 int Ifpack_PointRelaxation::
00810 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00811 {
00812   int NumVectors = X.NumVectors();
00813   int Length = Matrix().MaxNumEntries();
00814   vector<int> Indices(Length);
00815   vector<double> Values(Length);
00816 
00817   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00818   if (IsParallel_) {
00819     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00820   }
00821   else
00822     Y2 = Teuchos::rcp( &Y, false );
00823 
00824   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00825   X.ExtractView(&x_ptr);
00826   Y.ExtractView(&y_ptr);
00827   Y2->ExtractView(&y2_ptr);
00828   Diagonal_->ExtractView(&d_ptr);
00829   
00830   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00831     
00832     // only one data exchange per sweep
00833     if (IsParallel_)
00834       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00835 
00836     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00837 
00838       int NumEntries;
00839       int col;
00840       double diag = d_ptr[i];
00841 
00842       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00843                                                &Values[0], &Indices[0]));
00844 
00845       for (int m = 0 ; m < NumVectors ; ++m) {
00846 
00847         double dtemp = 0.0;
00848 
00849         for (int k = 0 ; k < NumEntries ; ++k) {
00850 
00851           col = Indices[k];
00852           dtemp += Values[k] * y2_ptr[m][col];
00853         }
00854 
00855         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00856       }
00857     }
00858 
00859     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00860 
00861       int NumEntries;
00862       int col;
00863       double diag = d_ptr[i];
00864 
00865       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00866                                                &Values[0], &Indices[0]));
00867 
00868       for (int m = 0 ; m < NumVectors ; ++m) {
00869 
00870         double dtemp = 0.0;
00871         for (int k = 0 ; k < NumEntries ; ++k) {
00872 
00873           col = Indices[k];
00874           dtemp += Values[k] * y2_ptr[m][col];
00875         }
00876 
00877         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00878 
00879       }
00880     }
00881 
00882     if (IsParallel_)
00883       for (int m = 0 ; m < NumVectors ; ++m) 
00884         for (int i = 0 ; i < NumMyRows_ ; ++i)
00885           y_ptr[m][i] = y2_ptr[m][i];
00886   }
00887 
00888 #ifdef IFPACK_FLOPCOUNTERS
00889   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00890 #endif
00891   return(0);
00892 }
00893 
00894 //==============================================================================
00895 int Ifpack_PointRelaxation::
00896 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00897 {
00898   int NumVectors = X.NumVectors();
00899 
00900   int* Indices;
00901   double* Values;
00902 
00903   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00904   if (IsParallel_) {
00905     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00906   }
00907   else
00908     Y2 = Teuchos::rcp( &Y, false );
00909 
00910   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00911   X.ExtractView(&x_ptr);
00912   Y.ExtractView(&y_ptr);
00913   Y2->ExtractView(&y2_ptr);
00914   Diagonal_->ExtractView(&d_ptr);
00915   
00916   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00917     
00918     // only one data exchange per sweep
00919     if (IsParallel_)
00920       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00921 
00922     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00923 
00924       int NumEntries;
00925       int col;
00926       double diag = d_ptr[i];
00927 
00928       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00929 
00930       for (int m = 0 ; m < NumVectors ; ++m) {
00931 
00932         double dtemp = 0.0;
00933 
00934         for (int k = 0; k < NumEntries; ++k) {
00935 
00936           col = Indices[k];
00937           dtemp += Values[k] * y2_ptr[m][col];
00938         }
00939 
00940         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00941       }
00942     }
00943 
00944     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00945 
00946       int NumEntries;
00947       int col;
00948       double diag = d_ptr[i];
00949 
00950       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00951 
00952       for (int m = 0 ; m < NumVectors ; ++m) {
00953 
00954         double dtemp = 0.0;
00955         for (int k = 0; k < NumEntries; ++k) {
00956 
00957           col = Indices[k];
00958           dtemp += Values[k] * y2_ptr[m][col];
00959         }
00960 
00961         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00962 
00963       }
00964     }
00965 
00966     if (IsParallel_)
00967       for (int m = 0 ; m < NumVectors ; ++m) 
00968         for (int i = 0 ; i < NumMyRows_ ; ++i)
00969           y_ptr[m][i] = y2_ptr[m][i];
00970   }
00971 
00972 #ifdef IFPACK_FLOPCOUNTERS
00973   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00974 #endif
00975   return(0);
00976 }
00977 
00978 //==============================================================================
00979 // this requires Epetra_CrsMatrix + OptimizeStorage()
00980 int Ifpack_PointRelaxation::
00981 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00982 {
00983   int* IndexOffset;
00984   int* Indices;
00985   double* Values;
00986   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00987 
00988   int NumVectors = X.NumVectors();
00989 
00990   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00991   if (IsParallel_) {
00992     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00993   }
00994   else
00995     Y2 = Teuchos::rcp( &Y, false );
00996 
00997   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00998   X.ExtractView(&x_ptr);
00999   Y.ExtractView(&y_ptr);
01000   Y2->ExtractView(&y2_ptr);
01001   Diagonal_->ExtractView(&d_ptr);
01002   
01003   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
01004     
01005     // only one data exchange per sweep
01006     if (IsParallel_)
01007       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
01008 
01009     for (int i = 0 ; i < NumMyRows_ ; ++i) {
01010 
01011       int col;
01012       double diag = d_ptr[i];
01013 
01014       // no need to extract row i
01015       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01016 
01017       for (int m = 0 ; m < NumVectors ; ++m) {
01018 
01019         double dtemp = 0.0;
01020 
01021         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01022 
01023           col = Indices[k];
01024           dtemp += Values[k] * y2_ptr[m][col];
01025         }
01026 
01027         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01028       }
01029     }
01030 
01031     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
01032 
01033       int col;
01034       double diag = d_ptr[i];
01035 
01036       // no need to extract row i
01037       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01038 
01039       for (int m = 0 ; m < NumVectors ; ++m) {
01040 
01041         double dtemp = 0.0;
01042         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01043 
01044           col = Indices[k];
01045           dtemp += Values[k] * y2_ptr[m][col];
01046         }
01047 
01048         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01049 
01050       }
01051     }
01052 
01053     if (IsParallel_)
01054       for (int m = 0 ; m < NumVectors ; ++m) 
01055         for (int i = 0 ; i < NumMyRows_ ; ++i)
01056           y_ptr[m][i] = y2_ptr[m][i];
01057   }
01058 
01059 #ifdef IFPACK_FLOPCOUNTERS
01060   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01061 #endif
01062   return(0);
01063 }
 All Classes Files Functions Variables Enumerations Friends