IFPACK Development
Ifpack_PointRelaxation.cpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #include <iomanip>
00032 #include <cmath>
00033 #include "Epetra_Operator.h"
00034 #include "Epetra_CrsMatrix.h"
00035 #include "Epetra_Comm.h"
00036 #include "Epetra_Map.h"
00037 #include "Epetra_MultiVector.h"
00038 #include "Ifpack_Preconditioner.h"
00039 #include "Ifpack_PointRelaxation.h"
00040 #include "Ifpack_Utils.h"
00041 #include "Ifpack_Condest.h"
00042 
00043 static const int IFPACK_JACOBI = 0;
00044 static const int IFPACK_GS = 1;
00045 static const int IFPACK_SGS = 2;
00046 
00047 //==============================================================================
00048 Ifpack_PointRelaxation::
00049 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix_in) :
00050   IsInitialized_(false),
00051   IsComputed_(false),
00052   NumInitialize_(0),
00053   NumCompute_(0),
00054   NumApplyInverse_(0),
00055   InitializeTime_(0.0),
00056   ComputeTime_(0.0),
00057   ApplyInverseTime_(0.0),
00058   ComputeFlops_(0.0),
00059   ApplyInverseFlops_(0.0),
00060   NumSweeps_(1),
00061   DampingFactor_(1.0),
00062   UseTranspose_(false),
00063   Condest_(-1.0),
00064   ComputeCondest_(false),
00065   PrecType_(IFPACK_JACOBI),
00066   MinDiagonalValue_(0.0),
00067   NumMyRows_(0),
00068   NumMyNonzeros_(0),
00069   NumGlobalRows_(0),
00070   NumGlobalNonzeros_(0),
00071   Matrix_(Teuchos::rcp(Matrix_in,false)),
00072   IsParallel_(false),
00073   ZeroStartingSolution_(true),
00074   DoBackwardGS_(false),
00075   DoL1Method_(false),
00076   L1Eta_(1.5)
00077 {
00078 }
00079 
00080 //==============================================================================
00081 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
00082 {
00083 
00084   string PT;
00085   if (PrecType_ == IFPACK_JACOBI)
00086     PT = "Jacobi";
00087   else if (PrecType_ == IFPACK_GS)
00088     PT = "Gauss-Seidel";
00089   else if (PrecType_ == IFPACK_SGS)
00090     PT = "symmetric Gauss-Seidel";
00091 
00092   PT = List.get("relaxation: type", PT);
00093 
00094   if (PT == "Jacobi")
00095     PrecType_ = IFPACK_JACOBI;
00096   else if (PT == "Gauss-Seidel")
00097     PrecType_ = IFPACK_GS;
00098   else if (PT == "symmetric Gauss-Seidel")
00099     PrecType_ = IFPACK_SGS;
00100   else {
00101     IFPACK_CHK_ERR(-2);
00102   }
00103   
00104   NumSweeps_            = List.get("relaxation: sweeps",NumSweeps_);
00105   DampingFactor_        = List.get("relaxation: damping factor", 
00106                                    DampingFactor_);
00107   MinDiagonalValue_     = List.get("relaxation: min diagonal value", 
00108                                    MinDiagonalValue_);
00109   ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 
00110                                    ZeroStartingSolution_);
00111 
00112   DoBackwardGS_         = List.get("relaxation: backward mode",DoBackwardGS_);
00113 
00114   DoL1Method_           = List.get("relaxation: use l1",DoL1Method_);
00115 
00116   L1Eta_                = List.get("relaxation: l1 eta",L1Eta_);
00117   
00118   SetLabel();
00119 
00120   return(0);
00121 }
00122 
00123 //==============================================================================
00124 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const
00125 {
00126   return(Matrix_->Comm());
00127 }
00128 
00129 //==============================================================================
00130 const Epetra_Map& Ifpack_PointRelaxation::OperatorDomainMap() const
00131 {
00132   return(Matrix_->OperatorDomainMap());
00133 }
00134 
00135 //==============================================================================
00136 const Epetra_Map& Ifpack_PointRelaxation::OperatorRangeMap() const
00137 {
00138   return(Matrix_->OperatorRangeMap());
00139 }
00140 
00141 //==============================================================================
00142 int Ifpack_PointRelaxation::Initialize()
00143 {
00144   IsInitialized_ = false;
00145 
00146   if (Matrix_ == Teuchos::null)
00147     IFPACK_CHK_ERR(-2);
00148 
00149   if (Time_ == Teuchos::null)
00150     Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
00151 
00152   if (Matrix().NumGlobalRows() != Matrix().NumGlobalCols())
00153     IFPACK_CHK_ERR(-2); // only square matrices
00154 
00155   NumMyRows_ = Matrix_->NumMyRows();
00156   NumMyNonzeros_ = Matrix_->NumMyNonzeros();
00157   NumGlobalRows_ = Matrix_->NumGlobalRows();
00158   NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros();
00159 
00160   if (Comm().NumProc() != 1)
00161     IsParallel_ = true;
00162   else
00163     IsParallel_ = false;
00164 
00165   ++NumInitialize_;
00166   InitializeTime_ += Time_->ElapsedTime();
00167   IsInitialized_ = true;
00168   return(0);
00169 }
00170 
00171 //==============================================================================
00172 int Ifpack_PointRelaxation::Compute()
00173 {
00174   int ierr = 0;
00175   if (!IsInitialized())
00176     IFPACK_CHK_ERR(Initialize());
00177 
00178   Time_->ResetStartTime();
00179 
00180   // reset values
00181   IsComputed_ = false;
00182   Condest_ = -1.0;
00183 
00184   if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
00185   if (NumSweeps_ < 0)
00186     IFPACK_CHK_ERR(-2); // at least one application
00187   
00188   Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
00189 
00190   if (Diagonal_ == Teuchos::null)
00191     IFPACK_CHK_ERR(-5);
00192 
00193   IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
00194 
00195   // Setup for L1 Methods.
00196   // Here we add half the value of the off-processor entries in the row, 
00197   // but only if diagonal isn't sufficiently large.
00198   //
00199   // Note: This is only done in the slower-but-more-general "RowMatrix" mode.  
00200   //
00201   // This follows from Equation (6.5) in:
00202   // Baker, Falgout, Kolev and Yang.  Multigrid Smoothers for Ultraparallel Computing.
00203   // SIAM J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864--2887.
00204   if(DoL1Method_ && IsParallel_) {
00205     int maxLength = Matrix().MaxNumEntries();
00206     std::vector<int> Indices(maxLength);
00207     std::vector<double> Values(maxLength);
00208     int NumEntries;
00209 
00210     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00211       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, maxLength,NumEntries,
00212                            &Values[0], &Indices[0]));
00213       double diagonal_boost=0.0;
00214       for (int k = 0 ; k < NumEntries ; ++i)
00215     if(Indices[k] > i)
00216       diagonal_boost+=std::abs(Values[k]/2.0);  
00217       if ((*Diagonal_)[i] < L1Eta_*diagonal_boost)
00218     (*Diagonal_)[i]+=diagonal_boost;
00219     }
00220   }
00221 
00222   // check diagonal elements, store the inverses, and verify that
00223   // no zeros are around. If an element is zero, then by default
00224   // its inverse is zero as well (that is, the row is ignored).
00225   for (int i = 0 ; i < NumMyRows_ ; ++i) {
00226     double& diag = (*Diagonal_)[i];
00227     if (IFPACK_ABS(diag) < MinDiagonalValue_)
00228       diag = MinDiagonalValue_;
00229     if (diag != 0.0)
00230       diag = 1.0 / diag;
00231   }
00232 #ifdef IFPACK_FLOPCOUNTERS
00233   ComputeFlops_ += NumMyRows_;
00234 #endif
00235 
00236 #if 0
00237   // some methods require the inverse of the diagonal, compute it
00238   // now and store it in `Diagonal_'
00239   if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
00240     Diagonal_->Reciprocal(*Diagonal_);
00241     // update flops
00242     ComputeFlops_ += NumMyRows_;
00243   }
00244 #endif
00245 
00246   // We need to import data from external processors. Here I create an
00247   // Epetra_Import object because I cannot assume that Matrix_ has one.
00248   // This is a bit of waste of resources (but the code is more robust).
00249   // Note that I am doing some strange stuff to set the components of Y
00250   // from Y2 (to save some time).
00251   //
00252   if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
00253     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00254                                   Matrix().RowMatrixRowMap()) );
00255     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00256   }
00257 
00258   ++NumCompute_;
00259   ComputeTime_ += Time_->ElapsedTime();
00260   IsComputed_ = true;
00261 
00262   IFPACK_CHK_ERR(ierr);
00263   return(0);
00264 }
00265 
00266 //==============================================================================
00267 ostream& Ifpack_PointRelaxation::Print(ostream & os) const
00268 {
00269 
00270   double MyMinVal, MyMaxVal;
00271   double MinVal, MaxVal;
00272 
00273   if (IsComputed_) {
00274     Diagonal_->MinValue(&MyMinVal);
00275     Diagonal_->MaxValue(&MyMaxVal);
00276     Comm().MinAll(&MyMinVal,&MinVal,1);
00277     Comm().MinAll(&MyMaxVal,&MaxVal,1);
00278   }
00279 
00280   if (!Comm().MyPID()) {
00281     os << endl;
00282     os << "================================================================================" << endl;
00283     os << "Ifpack_PointRelaxation" << endl;
00284     os << "Sweeps         = " << NumSweeps_ << endl;
00285     os << "damping factor = " << DampingFactor_ << endl;
00286     if (PrecType_ == IFPACK_JACOBI)
00287       os << "Type           = Jacobi" << endl;
00288     else if (PrecType_ == IFPACK_GS)
00289       os << "Type           = Gauss-Seidel" << endl;
00290     else if (PrecType_ == IFPACK_SGS)
00291       os << "Type           = symmetric Gauss-Seidel" << endl;
00292     if (DoBackwardGS_) 
00293       os << "Using backward mode (GS only)" << endl;
00294     if (ZeroStartingSolution_) 
00295       os << "Using zero starting solution" << endl;
00296     else
00297       os << "Using input starting solution" << endl;
00298     os << "Condition number estimate = " << Condest() << endl;
00299     os << "Global number of rows            = " << Matrix_->NumGlobalRows() << endl;
00300     if (IsComputed_) {
00301       os << "Minimum value on stored diagonal = " << MinVal << endl;
00302       os << "Maximum value on stored diagonal = " << MaxVal << endl;
00303     }
00304     os << endl;
00305     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00306     os << "-----           -------   --------------       ------------     --------" << endl;
00307     os << "Initialize()    "   << std::setw(5) << NumInitialize_ 
00308        << "  " << std::setw(15) << InitializeTime_ 
00309        << "              0.0              0.0" << endl;
00310     os << "Compute()       "   << std::setw(5) << NumCompute_ 
00311        << "  " << std::setw(15) << ComputeTime_
00312        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_;
00313     if (ComputeTime_ != 0.0)
00314       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
00315     else
00316       os << "  " << std::setw(15) << 0.0 << endl;
00317     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse_ 
00318        << "  " << std::setw(15) << ApplyInverseTime_
00319        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
00320     if (ApplyInverseTime_ != 0.0)
00321       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
00322     else
00323       os << "  " << std::setw(15) << 0.0 << endl;
00324     os << "================================================================================" << endl;
00325     os << endl;
00326   }
00327 
00328   return(os);
00329 }
00330 
00331 //==============================================================================
00332 double Ifpack_PointRelaxation::
00333 Condest(const Ifpack_CondestType CT, 
00334         const int MaxIters, const double Tol,
00335     Epetra_RowMatrix* Matrix_in)
00336 {
00337   if (!IsComputed()) // cannot compute right now
00338     return(-1.0);
00339 
00340   // always computes it. Call Condest() with no parameters to get
00341   // the previous estimate.
00342   Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00343 
00344   return(Condest_);
00345 }
00346 
00347 //==============================================================================
00348 void Ifpack_PointRelaxation::SetLabel()
00349 {
00350   string PT;
00351   if (PrecType_ == IFPACK_JACOBI)
00352     PT = "Jacobi";
00353   else if (PrecType_ == IFPACK_GS){
00354     PT = "GS";
00355     if(DoBackwardGS_)
00356       PT = "Backward " + PT;
00357   }    
00358   else if (PrecType_ == IFPACK_SGS)
00359     PT = "SGS";
00360 
00361   Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
00362     + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
00363 }
00364 
00365 //==============================================================================
00366 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
00367 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
00368 // way the matrix-vector produce is performed).
00369 //
00370 // Another ML-related observation is that the starting solution (in Y)
00371 // is NOT supposed to be zero. This may slow down the application of just
00372 // one sweep of Jacobi.
00373 //
00374 int Ifpack_PointRelaxation::
00375 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00376 {
00377   if (!IsComputed())
00378     IFPACK_CHK_ERR(-3);
00379 
00380   if (X.NumVectors() != Y.NumVectors())
00381     IFPACK_CHK_ERR(-2);
00382 
00383   Time_->ResetStartTime();
00384 
00385   // AztecOO gives X and Y pointing to the same memory location,
00386   // need to create an auxiliary vector, Xcopy
00387   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00388   if (X.Pointers()[0] == Y.Pointers()[0])
00389     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00390   else
00391     Xcopy = Teuchos::rcp( &X, false );
00392     
00393   if (ZeroStartingSolution_)
00394     Y.PutScalar(0.0);
00395 
00396   // Flops are updated in each of the following. 
00397   switch (PrecType_) {
00398   case IFPACK_JACOBI:
00399     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00400     break;
00401   case IFPACK_GS:
00402     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00403     break;
00404   case IFPACK_SGS:
00405     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00406     break;
00407   default:
00408     IFPACK_CHK_ERR(-1); // something wrong
00409   }
00410 
00411   ++NumApplyInverse_;
00412   ApplyInverseTime_ += Time_->ElapsedTime();
00413   return(0);
00414 }
00415 
00416 //==============================================================================
00417 // This preconditioner can be much slower than AztecOO and ML versions
00418 // if the matrix-vector product requires all ExtractMyRowCopy() 
00419 // (as done through Ifpack_AdditiveSchwarz).
00420 int Ifpack_PointRelaxation::
00421 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
00422 {
00423 
00424   int NumVectors = LHS.NumVectors();
00425   Epetra_MultiVector A_times_LHS( LHS.Map(),NumVectors );
00426 
00427   for (int j = 0; j < NumSweeps_ ; j++) {
00428 
00429     IFPACK_CHK_ERR(Apply(LHS,A_times_LHS));
00430     IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
00431     for (int v = 0 ; v < NumVectors ; ++v)
00432       IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)), 
00433                                      *Diagonal_, 1.0));
00434 
00435   }
00436 
00437   // Flops:
00438   // - matrix vector              (2 * NumGlobalNonzeros_)
00439   // - update                     (2 * NumGlobalRows_)
00440   // - Multiply:
00441   //   - DampingFactor            (NumGlobalRows_)
00442   //   - Diagonal                 (NumGlobalRows_)
00443   //   - A + B                    (NumGlobalRows_)
00444   //   - 1.0                      (NumGlobalRows_)
00445 #ifdef IFPACK_FLOPCOUNTERS
00446   ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00447 #endif
00448 
00449   return(0);
00450 }
00451 
00452 //==============================================================================
00453 int Ifpack_PointRelaxation::
00454 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00455 {
00456   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00457   // try to pick the best option; performances may be improved
00458   // if several sweeps are used.
00459   if (CrsMatrix != 0)
00460   {
00461     if (CrsMatrix->StorageOptimized())
00462       return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
00463     else
00464       return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
00465   }
00466   else
00467     return(ApplyInverseGS_RowMatrix(X, Y));
00468 } //ApplyInverseGS()
00469 
00470 //==============================================================================
00471 int Ifpack_PointRelaxation::
00472 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00473 {
00474   int NumVectors = X.NumVectors();
00475 
00476   int Length = Matrix().MaxNumEntries();
00477   vector<int> Indices(Length);
00478   vector<double> Values(Length);
00479 
00480   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00481   if (IsParallel_)
00482     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00483   else
00484     Y2 = Teuchos::rcp( &Y, false );
00485 
00486   // extract views (for nicer and faster code)
00487   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00488   X.ExtractView(&x_ptr);
00489   Y.ExtractView(&y_ptr);
00490   Y2->ExtractView(&y2_ptr);
00491   Diagonal_->ExtractView(&d_ptr);
00492 
00493   for (int j = 0; j < NumSweeps_ ; j++) {
00494 
00495     // data exchange is here, once per sweep
00496     if (IsParallel_)
00497       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00498 
00499     // FIXME: do I really need this code below?
00500     if (NumVectors == 1) {
00501 
00502       double* y0_ptr = y_ptr[0];
00503       double* y20_ptr = y2_ptr[0];
00504       double* x0_ptr = x_ptr[0];
00505 
00506       if(!DoBackwardGS_){      
00507         /* Forward Mode */
00508         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00509 
00510           int NumEntries;
00511           int col;
00512           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00513                                                    &Values[0], &Indices[0]));
00514           
00515           double dtemp = 0.0;
00516           for (int k = 0 ; k < NumEntries ; ++k) {
00517             
00518             col = Indices[k];
00519             dtemp += Values[k] * y20_ptr[col];
00520           }
00521           
00522           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00523         }
00524       }
00525       else {
00526         /* Backward Mode */
00527         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00528 
00529           int NumEntries;
00530           int col;
00531       (void) col; // Forestall compiler warning for unused variable.
00532           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00533                                                    &Values[0], &Indices[0]));
00534           double dtemp = 0.0;
00535           for (int k = 0 ; k < NumEntries ; ++k) {
00536 
00537             col = Indices[k];
00538             dtemp += Values[k] * y20_ptr[i];
00539           }
00540           
00541           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00542         }
00543       }
00544       
00545       // using Export() sounded quite expensive
00546       if (IsParallel_)
00547         for (int i = 0 ; i < NumMyRows_ ; ++i)
00548           y0_ptr[i] = y20_ptr[i];
00549 
00550     }
00551     else {
00552       if(!DoBackwardGS_){      
00553         /* Forward Mode */
00554         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00555           
00556           int NumEntries;
00557           int col;
00558           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00559                                                    &Values[0], &Indices[0]));
00560           
00561           for (int m = 0 ; m < NumVectors ; ++m) {
00562             
00563             double dtemp = 0.0;
00564             for (int k = 0 ; k < NumEntries ; ++k) {
00565               
00566               col = Indices[k];
00567               dtemp += Values[k] * y2_ptr[m][col];
00568             }
00569             
00570             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00571           }
00572         }
00573       }
00574       else {
00575         /* Backward Mode */
00576         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00577           int NumEntries;
00578           int col;
00579           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00580                                                    &Values[0], &Indices[0]));
00581 
00582           for (int m = 0 ; m < NumVectors ; ++m) {
00583             
00584             double dtemp = 0.0;
00585             for (int k = 0 ; k < NumEntries ; ++k) {
00586 
00587               col = Indices[k];
00588               dtemp += Values[k] * y2_ptr[m][col];
00589             }
00590 
00591             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00592 
00593           }
00594         }
00595       }
00596 
00597       // using Export() sounded quite expensive   
00598       if (IsParallel_)
00599         for (int m = 0 ; m < NumVectors ; ++m) 
00600           for (int i = 0 ; i < NumMyRows_ ; ++i)
00601             y_ptr[m][i] = y2_ptr[m][i];
00602       
00603     }
00604   }
00605 
00606 #ifdef IFPACK_FLOPCOUNTERS
00607   ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00608 #endif
00609 
00610   return(0);
00611 } //ApplyInverseGS_RowMatrix()
00612 
00613 //==============================================================================
00614 int Ifpack_PointRelaxation::
00615 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00616 {
00617   int NumVectors = X.NumVectors();
00618 
00619   int* Indices;
00620   double* Values;
00621 
00622   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00623   if (IsParallel_) {
00624     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00625   }
00626   else
00627     Y2 = Teuchos::rcp( &Y, false );
00628 
00629   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00630   X.ExtractView(&x_ptr);
00631   Y.ExtractView(&y_ptr);
00632   Y2->ExtractView(&y2_ptr);
00633   Diagonal_->ExtractView(&d_ptr);
00634   
00635   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00636     
00637     // only one data exchange per sweep
00638     if (IsParallel_)
00639       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00640 
00641     if(!DoBackwardGS_){  
00642       /* Forward Mode */
00643       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00644 
00645         int NumEntries;
00646         int col;
00647         double diag = d_ptr[i];
00648         
00649         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00650         
00651         for (int m = 0 ; m < NumVectors ; ++m) {
00652           
00653           double dtemp = 0.0;
00654           
00655           for (int k = 0; k < NumEntries; ++k) {
00656             
00657             col = Indices[k];
00658             dtemp += Values[k] * y2_ptr[m][col];
00659           }
00660           
00661           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00662         }
00663       }
00664     }
00665     else {
00666       /* Backward Mode */
00667       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00668 
00669         int NumEntries;
00670         int col;
00671         double diag = d_ptr[i];
00672         
00673         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00674         
00675         for (int m = 0 ; m < NumVectors ; ++m) {
00676           
00677           double dtemp = 0.0;
00678           for (int k = 0; k < NumEntries; ++k) {
00679             
00680             col = Indices[k];
00681             dtemp += Values[k] * y2_ptr[m][col];
00682           }
00683           
00684           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00685           
00686         }
00687       }
00688     }
00689     
00690     if (IsParallel_)
00691       for (int m = 0 ; m < NumVectors ; ++m) 
00692         for (int i = 0 ; i < NumMyRows_ ; ++i)
00693           y_ptr[m][i] = y2_ptr[m][i];
00694   }
00695 
00696 #ifdef IFPACK_FLOPCOUNTERS
00697   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00698 #endif
00699   return(0);
00700 } //ApplyInverseGS_CrsMatrix()
00701 
00702 //
00703 //==============================================================================
00704 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
00705 
00706 int Ifpack_PointRelaxation::
00707 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00708 {
00709   int* IndexOffset;
00710   int* Indices;
00711   double* Values;
00712   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00713 
00714   int NumVectors = X.NumVectors();
00715 
00716   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00717   if (IsParallel_) {
00718     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00719   }
00720   else
00721     Y2 = Teuchos::rcp( &Y, false );
00722 
00723   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00724   X.ExtractView(&x_ptr);
00725   Y.ExtractView(&y_ptr);
00726   Y2->ExtractView(&y2_ptr);
00727   Diagonal_->ExtractView(&d_ptr);
00728 
00729   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00730     
00731     // only one data exchange per sweep
00732     if (IsParallel_)
00733       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00734 
00735     if(!DoBackwardGS_){  
00736       /* Forward Mode */
00737       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00738         
00739         int col;
00740         double diag = d_ptr[i];
00741         
00742         for (int m = 0 ; m < NumVectors ; ++m) {
00743           
00744           double dtemp = 0.0;
00745           
00746           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00747             
00748             col = Indices[k];
00749             dtemp += Values[k] * y2_ptr[m][col];
00750           }
00751           
00752           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00753         }      
00754       }
00755     }
00756     else {
00757       /* Backward Mode */
00758       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00759 
00760         int col;
00761         double diag = d_ptr[i];
00762         
00763         for (int m = 0 ; m < NumVectors ; ++m) {
00764 
00765           double dtemp = 0.0;
00766           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00767 
00768             col = Indices[k];
00769             dtemp += Values[k] * y2_ptr[m][col];
00770           }
00771 
00772           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00773 
00774         }
00775       }
00776     }
00777     
00778 
00779     if (IsParallel_)
00780       for (int m = 0 ; m < NumVectors ; ++m) 
00781         for (int i = 0 ; i < NumMyRows_ ; ++i)
00782           y_ptr[m][i] = y2_ptr[m][i];
00783   }
00784 
00785 #ifdef IFPACK_FLOPCOUNTERS
00786   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00787 #endif
00788   return(0);
00789 } //ApplyInverseGS_FastCrsMatrix()
00790 
00791 //==============================================================================
00792 int Ifpack_PointRelaxation::
00793 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00794 {
00795   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00796   // try to pick the best option; performances may be improved
00797   // if several sweeps are used.
00798   if (CrsMatrix != 0)
00799   {
00800     if (CrsMatrix->StorageOptimized())
00801       return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
00802     else
00803       return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
00804   }
00805   else
00806     return(ApplyInverseSGS_RowMatrix(X, Y));
00807 }
00808 
00809 //==============================================================================
00810 int Ifpack_PointRelaxation::
00811 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00812 {
00813   int NumVectors = X.NumVectors();
00814   int Length = Matrix().MaxNumEntries();
00815   vector<int> Indices(Length);
00816   vector<double> Values(Length);
00817 
00818   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00819   if (IsParallel_) {
00820     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00821   }
00822   else
00823     Y2 = Teuchos::rcp( &Y, false );
00824 
00825   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00826   X.ExtractView(&x_ptr);
00827   Y.ExtractView(&y_ptr);
00828   Y2->ExtractView(&y2_ptr);
00829   Diagonal_->ExtractView(&d_ptr);
00830   
00831   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00832     
00833     // only one data exchange per sweep
00834     if (IsParallel_)
00835       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00836 
00837     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00838 
00839       int NumEntries;
00840       int col;
00841       double diag = d_ptr[i];
00842 
00843       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00844                                                &Values[0], &Indices[0]));
00845 
00846       for (int m = 0 ; m < NumVectors ; ++m) {
00847 
00848         double dtemp = 0.0;
00849 
00850         for (int k = 0 ; k < NumEntries ; ++k) {
00851 
00852           col = Indices[k];
00853           dtemp += Values[k] * y2_ptr[m][col];
00854         }
00855 
00856         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00857       }
00858     }
00859 
00860     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00861 
00862       int NumEntries;
00863       int col;
00864       double diag = d_ptr[i];
00865 
00866       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00867                                                &Values[0], &Indices[0]));
00868 
00869       for (int m = 0 ; m < NumVectors ; ++m) {
00870 
00871         double dtemp = 0.0;
00872         for (int k = 0 ; k < NumEntries ; ++k) {
00873 
00874           col = Indices[k];
00875           dtemp += Values[k] * y2_ptr[m][col];
00876         }
00877 
00878         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00879 
00880       }
00881     }
00882 
00883     if (IsParallel_)
00884       for (int m = 0 ; m < NumVectors ; ++m) 
00885         for (int i = 0 ; i < NumMyRows_ ; ++i)
00886           y_ptr[m][i] = y2_ptr[m][i];
00887   }
00888 
00889 #ifdef IFPACK_FLOPCOUNTERS
00890   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00891 #endif
00892   return(0);
00893 }
00894 
00895 //==============================================================================
00896 int Ifpack_PointRelaxation::
00897 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00898 {
00899   int NumVectors = X.NumVectors();
00900 
00901   int* Indices;
00902   double* Values;
00903 
00904   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00905   if (IsParallel_) {
00906     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00907   }
00908   else
00909     Y2 = Teuchos::rcp( &Y, false );
00910 
00911   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00912   X.ExtractView(&x_ptr);
00913   Y.ExtractView(&y_ptr);
00914   Y2->ExtractView(&y2_ptr);
00915   Diagonal_->ExtractView(&d_ptr);
00916   
00917   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00918     
00919     // only one data exchange per sweep
00920     if (IsParallel_)
00921       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00922 
00923     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00924 
00925       int NumEntries;
00926       int col;
00927       double diag = d_ptr[i];
00928 
00929       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00930 
00931       for (int m = 0 ; m < NumVectors ; ++m) {
00932 
00933         double dtemp = 0.0;
00934 
00935         for (int k = 0; k < NumEntries; ++k) {
00936 
00937           col = Indices[k];
00938           dtemp += Values[k] * y2_ptr[m][col];
00939         }
00940 
00941         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00942       }
00943     }
00944 
00945     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00946 
00947       int NumEntries;
00948       int col;
00949       double diag = d_ptr[i];
00950 
00951       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00952 
00953       for (int m = 0 ; m < NumVectors ; ++m) {
00954 
00955         double dtemp = 0.0;
00956         for (int k = 0; k < NumEntries; ++k) {
00957 
00958           col = Indices[k];
00959           dtemp += Values[k] * y2_ptr[m][col];
00960         }
00961 
00962         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00963 
00964       }
00965     }
00966 
00967     if (IsParallel_)
00968       for (int m = 0 ; m < NumVectors ; ++m) 
00969         for (int i = 0 ; i < NumMyRows_ ; ++i)
00970           y_ptr[m][i] = y2_ptr[m][i];
00971   }
00972 
00973 #ifdef IFPACK_FLOPCOUNTERS
00974   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00975 #endif
00976   return(0);
00977 }
00978 
00979 //==============================================================================
00980 // this requires Epetra_CrsMatrix + OptimizeStorage()
00981 int Ifpack_PointRelaxation::
00982 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00983 {
00984   int* IndexOffset;
00985   int* Indices;
00986   double* Values;
00987   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00988 
00989   int NumVectors = X.NumVectors();
00990 
00991   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00992   if (IsParallel_) {
00993     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00994   }
00995   else
00996     Y2 = Teuchos::rcp( &Y, false );
00997 
00998   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00999   X.ExtractView(&x_ptr);
01000   Y.ExtractView(&y_ptr);
01001   Y2->ExtractView(&y2_ptr);
01002   Diagonal_->ExtractView(&d_ptr);
01003   
01004   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
01005     
01006     // only one data exchange per sweep
01007     if (IsParallel_)
01008       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
01009 
01010     for (int i = 0 ; i < NumMyRows_ ; ++i) {
01011 
01012       int col;
01013       double diag = d_ptr[i];
01014 
01015       // no need to extract row i
01016       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01017 
01018       for (int m = 0 ; m < NumVectors ; ++m) {
01019 
01020         double dtemp = 0.0;
01021 
01022         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01023 
01024           col = Indices[k];
01025           dtemp += Values[k] * y2_ptr[m][col];
01026         }
01027 
01028         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01029       }
01030     }
01031 
01032     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
01033 
01034       int col;
01035       double diag = d_ptr[i];
01036 
01037       // no need to extract row i
01038       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01039 
01040       for (int m = 0 ; m < NumVectors ; ++m) {
01041 
01042         double dtemp = 0.0;
01043         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01044 
01045           col = Indices[k];
01046           dtemp += Values[k] * y2_ptr[m][col];
01047         }
01048 
01049         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01050 
01051       }
01052     }
01053 
01054     if (IsParallel_)
01055       for (int m = 0 ; m < NumVectors ; ++m) 
01056         for (int i = 0 ; i < NumMyRows_ ; ++i)
01057           y_ptr[m][i] = y2_ptr[m][i];
01058   }
01059 
01060 #ifdef IFPACK_FLOPCOUNTERS
01061   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01062 #endif
01063   return(0);
01064 }
 All Classes Files Functions Variables Typedefs Enumerations Friends