Ifpack_PointRelaxation.cpp

00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #include <iomanip>
00032 #include "Epetra_Operator.h"
00033 #include "Epetra_CrsMatrix.h"
00034 #include "Epetra_Comm.h"
00035 #include "Epetra_Map.h"
00036 #include "Epetra_MultiVector.h"
00037 #include "Ifpack_Preconditioner.h"
00038 #include "Ifpack_PointRelaxation.h"
00039 #include "Ifpack_Utils.h"
00040 #include "Ifpack_Condest.h"
00041 
00042 static const int IFPACK_JACOBI = 0;
00043 static const int IFPACK_GS = 1;
00044 static const int IFPACK_SGS = 2;
00045 
00046 //==============================================================================
00047 Ifpack_PointRelaxation::
00048 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix_in) :
00049   IsInitialized_(false),
00050   IsComputed_(false),
00051   NumInitialize_(0),
00052   NumCompute_(0),
00053   NumApplyInverse_(0),
00054   InitializeTime_(0.0),
00055   ComputeTime_(0.0),
00056   ApplyInverseTime_(0.0),
00057   ComputeFlops_(0.0),
00058   ApplyInverseFlops_(0.0),
00059   NumSweeps_(1),
00060   DampingFactor_(1.0),
00061   UseTranspose_(false),
00062   Condest_(-1.0),
00063   ComputeCondest_(false),
00064   PrecType_(IFPACK_JACOBI),
00065   MinDiagonalValue_(0.0),
00066   NumMyRows_(0),
00067   NumMyNonzeros_(0),
00068   NumGlobalRows_(0),
00069   NumGlobalNonzeros_(0),
00070   Matrix_(Teuchos::rcp(Matrix_in,false)),
00071   IsParallel_(false),
00072   ZeroStartingSolution_(true),
00073   DoBackwardGS_(false)
00074 {
00075 }
00076 
00077 //==============================================================================
00078 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
00079 {
00080 
00081   string PT;
00082   if (PrecType_ == IFPACK_JACOBI)
00083     PT = "Jacobi";
00084   else if (PrecType_ == IFPACK_GS)
00085     PT = "Gauss-Seidel";
00086   else if (PrecType_ == IFPACK_SGS)
00087     PT = "symmetric Gauss-Seidel";
00088 
00089   PT = List.get("relaxation: type", PT);
00090 
00091   if (PT == "Jacobi")
00092     PrecType_ = IFPACK_JACOBI;
00093   else if (PT == "Gauss-Seidel")
00094     PrecType_ = IFPACK_GS;
00095   else if (PT == "symmetric Gauss-Seidel")
00096     PrecType_ = IFPACK_SGS;
00097   else {
00098     IFPACK_CHK_ERR(-2);
00099   }
00100   
00101   NumSweeps_            = List.get("relaxation: sweeps",NumSweeps_);
00102   DampingFactor_        = List.get("relaxation: damping factor", 
00103                                    DampingFactor_);
00104   MinDiagonalValue_     = List.get("relaxation: min diagonal value", 
00105                                    MinDiagonalValue_);
00106   ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 
00107                                    ZeroStartingSolution_);
00108 
00109   DoBackwardGS_         = List.get("relaxation: backward mode",DoBackwardGS_);
00110   
00111   SetLabel();
00112 
00113   return(0);
00114 }
00115 
00116 //==============================================================================
00117 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const
00118 {
00119   return(Matrix_->Comm());
00120 }
00121 
00122 //==============================================================================
00123 const Epetra_Map& Ifpack_PointRelaxation::OperatorDomainMap() const
00124 {
00125   return(Matrix_->OperatorDomainMap());
00126 }
00127 
00128 //==============================================================================
00129 const Epetra_Map& Ifpack_PointRelaxation::OperatorRangeMap() const
00130 {
00131   return(Matrix_->OperatorRangeMap());
00132 }
00133 
00134 //==============================================================================
00135 int Ifpack_PointRelaxation::
00136 Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00137 {
00138 
00139   if (IsComputed() == false)
00140     IFPACK_CHK_ERR(-3);
00141 
00142   if (X.NumVectors() != Y.NumVectors())
00143     IFPACK_CHK_ERR(-2);
00144 
00145   IFPACK_CHK_ERR(Matrix_->Multiply(UseTranspose(),X,Y));
00146   return(0);
00147 }
00148 
00149 //==============================================================================
00150 int Ifpack_PointRelaxation::Initialize()
00151 {
00152   IsInitialized_ = false;
00153 
00154   if (Matrix_ == Teuchos::null)
00155     IFPACK_CHK_ERR(-2);
00156 
00157   if (Time_ == Teuchos::null)
00158     Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
00159 
00160   if (Matrix().NumGlobalRows() != Matrix().NumGlobalCols())
00161     IFPACK_CHK_ERR(-2); // only square matrices
00162 
00163   NumMyRows_ = Matrix_->NumMyRows();
00164   NumMyNonzeros_ = Matrix_->NumMyNonzeros();
00165   NumGlobalRows_ = Matrix_->NumGlobalRows();
00166   NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros();
00167 
00168   if (Comm().NumProc() != 1)
00169     IsParallel_ = true;
00170   else
00171     IsParallel_ = false;
00172 
00173   ++NumInitialize_;
00174   InitializeTime_ += Time_->ElapsedTime();
00175   IsInitialized_ = true;
00176   return(0);
00177 }
00178 
00179 //==============================================================================
00180 int Ifpack_PointRelaxation::Compute()
00181 {
00182   int ierr = 0;
00183   if (!IsInitialized())
00184     IFPACK_CHK_ERR(Initialize());
00185 
00186   Time_->ResetStartTime();
00187 
00188   // reset values
00189   IsComputed_ = false;
00190   Condest_ = -1.0;
00191 
00192   if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
00193   if (NumSweeps_ < 0)
00194     IFPACK_CHK_ERR(-2); // at least one application
00195   
00196   Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
00197 
00198   if (Diagonal_ == Teuchos::null)
00199     IFPACK_CHK_ERR(-5);
00200 
00201   IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
00202 
00203   // check diagonal elements, store the inverses, and verify that
00204   // no zeros are around. If an element is zero, then by default
00205   // its inverse is zero as well (that is, the row is ignored).
00206   for (int i = 0 ; i < NumMyRows_ ; ++i) {
00207     double& diag = (*Diagonal_)[i];
00208     if (IFPACK_ABS(diag) < MinDiagonalValue_)
00209       diag = MinDiagonalValue_;
00210     if (diag != 0.0)
00211       diag = 1.0 / diag;
00212   }
00213   ComputeFlops_ += NumMyRows_;
00214 
00215 #if 0
00216   // some methods require the inverse of the diagonal, compute it
00217   // now and store it in `Diagonal_'
00218   if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
00219     Diagonal_->Reciprocal(*Diagonal_);
00220     // update flops
00221     ComputeFlops_ += NumMyRows_;
00222   }
00223 #endif
00224 
00225   // We need to import data from external processors. Here I create an
00226   // Epetra_Import object because I cannot assume that Matrix_ has one.
00227   // This is a bit of waste of resources (but the code is more robust).
00228   // Note that I am doing some strange stuff to set the components of Y
00229   // from Y2 (to save some time).
00230   //
00231   if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
00232     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00233                                   Matrix().RowMatrixRowMap()) );
00234     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00235   }
00236 
00237   ++NumCompute_;
00238   ComputeTime_ += Time_->ElapsedTime();
00239   IsComputed_ = true;
00240 
00241   IFPACK_CHK_ERR(ierr);
00242   return(0);
00243 }
00244 
00245 //==============================================================================
00246 ostream& Ifpack_PointRelaxation::Print(ostream & os) const
00247 {
00248 
00249   double MyMinVal, MyMaxVal;
00250   double MinVal, MaxVal;
00251 
00252   if (IsComputed_) {
00253     Diagonal_->MinValue(&MyMinVal);
00254     Diagonal_->MaxValue(&MyMaxVal);
00255     Comm().MinAll(&MyMinVal,&MinVal,1);
00256     Comm().MinAll(&MyMaxVal,&MaxVal,1);
00257   }
00258 
00259   if (!Comm().MyPID()) {
00260     os << endl;
00261     os << "================================================================================" << endl;
00262     os << "Ifpack_PointRelaxation" << endl;
00263     os << "Sweeps         = " << NumSweeps_ << endl;
00264     os << "damping factor = " << DampingFactor_ << endl;
00265     if (PrecType_ == IFPACK_JACOBI)
00266       os << "Type           = Jacobi" << endl;
00267     else if (PrecType_ == IFPACK_GS)
00268       os << "Type           = Gauss-Seidel" << endl;
00269     else if (PrecType_ == IFPACK_SGS)
00270       os << "Type           = symmetric Gauss-Seidel" << endl;
00271     if (DoBackwardGS_) 
00272       os << "Using backward mode (GS only)" << endl;
00273     if (ZeroStartingSolution_) 
00274       os << "Using zero starting solution" << endl;
00275     else
00276       os << "Using input starting solution" << endl;
00277     os << "Condition number estimate = " << Condest() << endl;
00278     os << "Global number of rows            = " << Matrix_->NumGlobalRows() << endl;
00279     if (IsComputed_) {
00280       os << "Minimum value on stored diagonal = " << MinVal << endl;
00281       os << "Maximum value on stored diagonal = " << MaxVal << endl;
00282     }
00283     os << endl;
00284     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00285     os << "-----           -------   --------------       ------------     --------" << endl;
00286     os << "Initialize()    "   << std::setw(5) << NumInitialize_ 
00287        << "  " << std::setw(15) << InitializeTime_ 
00288        << "              0.0              0.0" << endl;
00289     os << "Compute()       "   << std::setw(5) << NumCompute_ 
00290        << "  " << std::setw(15) << ComputeTime_
00291        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_;
00292     if (ComputeTime_ != 0.0)
00293       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
00294     else
00295       os << "  " << std::setw(15) << 0.0 << endl;
00296     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse_ 
00297        << "  " << std::setw(15) << ApplyInverseTime_
00298        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
00299     if (ApplyInverseTime_ != 0.0)
00300       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
00301     else
00302       os << "  " << std::setw(15) << 0.0 << endl;
00303     os << "================================================================================" << endl;
00304     os << endl;
00305   }
00306 
00307   return(os);
00308 }
00309 
00310 //==============================================================================
00311 double Ifpack_PointRelaxation::
00312 Condest(const Ifpack_CondestType CT, 
00313         const int MaxIters, const double Tol,
00314     Epetra_RowMatrix* Matrix_in)
00315 {
00316   if (!IsComputed()) // cannot compute right now
00317     return(-1.0);
00318 
00319   // always computes it. Call Condest() with no parameters to get
00320   // the previous estimate.
00321   Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00322 
00323   return(Condest_);
00324 }
00325 
00326 //==============================================================================
00327 void Ifpack_PointRelaxation::SetLabel()
00328 {
00329   string PT;
00330   if (PrecType_ == IFPACK_JACOBI)
00331     PT = "Jacobi";
00332   else if (PrecType_ == IFPACK_GS){
00333     PT = "GS";
00334     if(DoBackwardGS_)
00335       PT = "Backward " + PT;
00336   }    
00337   else if (PrecType_ == IFPACK_SGS)
00338     PT = "SGS";
00339 
00340   Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
00341     + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
00342 }
00343 
00344 //==============================================================================
00345 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
00346 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
00347 // way the matrix-vector produce is performed).
00348 //
00349 // Another ML-related observation is that the starting solution (in Y)
00350 // is NOT supposed to be zero. This may slow down the application of just
00351 // one sweep of Jacobi.
00352 //
00353 int Ifpack_PointRelaxation::
00354 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00355 {
00356   if (!IsComputed())
00357     IFPACK_CHK_ERR(-3);
00358 
00359   if (X.NumVectors() != Y.NumVectors())
00360     IFPACK_CHK_ERR(-2);
00361 
00362   Time_->ResetStartTime();
00363 
00364   // AztecOO gives X and Y pointing to the same memory location,
00365   // need to create an auxiliary vector, Xcopy
00366   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00367   if (X.Pointers()[0] == Y.Pointers()[0])
00368     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00369   else
00370     Xcopy = Teuchos::rcp( &X, false );
00371     
00372   if (ZeroStartingSolution_)
00373     Y.PutScalar(0.0);
00374 
00375   // Flops are updated in each of the following. 
00376   switch (PrecType_) {
00377   case IFPACK_JACOBI:
00378     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00379     break;
00380   case IFPACK_GS:
00381     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00382     break;
00383   case IFPACK_SGS:
00384     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00385     break;
00386   default:
00387     IFPACK_CHK_ERR(-1); // something wrong
00388   }
00389 
00390   ++NumApplyInverse_;
00391   ApplyInverseTime_ += Time_->ElapsedTime();
00392   return(0);
00393 }
00394 
00395 //==============================================================================
00396 // This preconditioner can be much slower than AztecOO and ML versions
00397 // if the matrix-vector product requires all ExtractMyRowCopy() 
00398 // (as done through Ifpack_AdditiveSchwarz).
00399 int Ifpack_PointRelaxation::
00400 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
00401 {
00402 
00403   int NumVectors = LHS.NumVectors();
00404   Epetra_MultiVector A_times_LHS( LHS.Map(),NumVectors );
00405 
00406   for (int j = 0; j < NumSweeps_ ; j++) {
00407 
00408     IFPACK_CHK_ERR(Apply(LHS,A_times_LHS));
00409     IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
00410     for (int v = 0 ; v < NumVectors ; ++v)
00411       IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)), 
00412                                      *Diagonal_, 1.0));
00413 
00414   }
00415 
00416   // Flops:
00417   // - matrix vector              (2 * NumGlobalNonzeros_)
00418   // - update                     (2 * NumGlobalRows_)
00419   // - Multiply:
00420   //   - DampingFactor            (NumGlobalRows_)
00421   //   - Diagonal                 (NumGlobalRows_)
00422   //   - A + B                    (NumGlobalRows_)
00423   //   - 1.0                      (NumGlobalRows_)
00424   ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00425 
00426   return(0);
00427 }
00428 
00429 //==============================================================================
00430 int Ifpack_PointRelaxation::
00431 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00432 {
00433   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00434   // try to pick the best option; performances may be improved
00435   // if several sweeps are used.
00436   if (CrsMatrix != 0)
00437   {
00438     if (CrsMatrix->StorageOptimized())
00439       return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
00440     else
00441       return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
00442   }
00443   else
00444     return(ApplyInverseGS_RowMatrix(X, Y));
00445 } //ApplyInverseGS()
00446 
00447 //==============================================================================
00448 int Ifpack_PointRelaxation::
00449 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00450 {
00451   int NumVectors = X.NumVectors();
00452 
00453   int Length = Matrix().MaxNumEntries();
00454   vector<int> Indices(Length);
00455   vector<double> Values(Length);
00456 
00457   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00458   if (IsParallel_)
00459     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00460   else
00461     Y2 = Teuchos::rcp( &Y, false );
00462 
00463   // extract views (for nicer and faster code)
00464   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00465   X.ExtractView(&x_ptr);
00466   Y.ExtractView(&y_ptr);
00467   Y2->ExtractView(&y2_ptr);
00468   Diagonal_->ExtractView(&d_ptr);
00469 
00470   for (int j = 0; j < NumSweeps_ ; j++) {
00471 
00472     // data exchange is here, once per sweep
00473     if (IsParallel_)
00474       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00475 
00476     // FIXME: do I really need this code below?
00477     if (NumVectors == 1) {
00478 
00479       double* y0_ptr = y_ptr[0];
00480       double* y20_ptr = y2_ptr[0];
00481       double* x0_ptr = x_ptr[0];
00482 
00483       if(!DoBackwardGS_){      
00484         /* Forward Mode */
00485         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00486 
00487           int NumEntries;
00488           int col;
00489           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00490                                                    &Values[0], &Indices[0]));
00491           
00492           double dtemp = 0.0;
00493           for (int k = 0 ; k < NumEntries ; ++k) {
00494             
00495             col = Indices[k];
00496             dtemp += Values[k] * y20_ptr[col];
00497           }
00498           
00499           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00500         }
00501       }
00502       else {
00503         /* Backward Mode */
00504         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00505 
00506           int NumEntries;
00507           int col;
00508           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00509                                                    &Values[0], &Indices[0]));
00510           double dtemp = 0.0;
00511           for (int k = 0 ; k < NumEntries ; ++k) {
00512 
00513             col = Indices[k];
00514             dtemp += Values[k] * y20_ptr[i];
00515           }
00516           
00517           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00518         }
00519       }
00520       
00521       // using Export() sounded quite expensive
00522       if (IsParallel_)
00523         for (int i = 0 ; i < NumMyRows_ ; ++i)
00524           y0_ptr[i] = y20_ptr[i];
00525 
00526     }
00527     else {
00528       if(!DoBackwardGS_){      
00529         /* Forward Mode */
00530         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00531           
00532           int NumEntries;
00533           int col;
00534           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00535                                                    &Values[0], &Indices[0]));
00536           
00537           for (int m = 0 ; m < NumVectors ; ++m) {
00538             
00539             double dtemp = 0.0;
00540             for (int k = 0 ; k < NumEntries ; ++k) {
00541               
00542               col = Indices[k];
00543               dtemp += Values[k] * y2_ptr[m][col];
00544             }
00545             
00546             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00547           }
00548         }
00549       }
00550       else {
00551         /* Backward Mode */
00552         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00553           int NumEntries;
00554           int col;
00555           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00556                                                    &Values[0], &Indices[0]));
00557 
00558           for (int m = 0 ; m < NumVectors ; ++m) {
00559             
00560             double dtemp = 0.0;
00561             for (int k = 0 ; k < NumEntries ; ++k) {
00562 
00563               col = Indices[k];
00564               dtemp += Values[k] * y2_ptr[m][col];
00565             }
00566 
00567             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00568 
00569           }
00570         }
00571       }
00572 
00573       // using Export() sounded quite expensive   
00574       if (IsParallel_)
00575         for (int m = 0 ; m < NumVectors ; ++m) 
00576           for (int i = 0 ; i < NumMyRows_ ; ++i)
00577             y_ptr[m][i] = y2_ptr[m][i];
00578       
00579     }
00580   }
00581 
00582   ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00583 
00584   return(0);
00585 } //ApplyInverseGS_RowMatrix()
00586 
00587 //==============================================================================
00588 int Ifpack_PointRelaxation::
00589 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00590 {
00591   int NumVectors = X.NumVectors();
00592 
00593   int* Indices;
00594   double* Values;
00595 
00596   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00597   if (IsParallel_) {
00598     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00599   }
00600   else
00601     Y2 = Teuchos::rcp( &Y, false );
00602 
00603   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00604   X.ExtractView(&x_ptr);
00605   Y.ExtractView(&y_ptr);
00606   Y2->ExtractView(&y2_ptr);
00607   Diagonal_->ExtractView(&d_ptr);
00608   
00609   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00610     
00611     // only one data exchange per sweep
00612     if (IsParallel_)
00613       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00614 
00615     if(!DoBackwardGS_){  
00616       /* Forward Mode */
00617       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00618 
00619         int NumEntries;
00620         int col;
00621         double diag = d_ptr[i];
00622         
00623         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00624         
00625         for (int m = 0 ; m < NumVectors ; ++m) {
00626           
00627           double dtemp = 0.0;
00628           
00629           for (int k = 0; k < NumEntries; ++k) {
00630             
00631             col = Indices[k];
00632             dtemp += Values[k] * y2_ptr[m][col];
00633           }
00634           
00635           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00636         }
00637       }
00638     }
00639     else {
00640       /* Backward Mode */
00641       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00642 
00643         int NumEntries;
00644         int col;
00645         double diag = d_ptr[i];
00646         
00647         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00648         
00649         for (int m = 0 ; m < NumVectors ; ++m) {
00650           
00651           double dtemp = 0.0;
00652           for (int k = 0; k < NumEntries; ++k) {
00653             
00654             col = Indices[k];
00655             dtemp += Values[k] * y2_ptr[m][col];
00656           }
00657           
00658           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00659           
00660         }
00661       }
00662     }
00663     
00664     if (IsParallel_)
00665       for (int m = 0 ; m < NumVectors ; ++m) 
00666         for (int i = 0 ; i < NumMyRows_ ; ++i)
00667           y_ptr[m][i] = y2_ptr[m][i];
00668   }
00669 
00670   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00671   return(0);
00672 } //ApplyInverseGS_CrsMatrix()
00673 
00674 //
00675 //==============================================================================
00676 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
00677 
00678 int Ifpack_PointRelaxation::
00679 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00680 {
00681   int* IndexOffset;
00682   int* Indices;
00683   double* Values;
00684   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00685 
00686   int NumVectors = X.NumVectors();
00687 
00688   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00689   if (IsParallel_) {
00690     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00691   }
00692   else
00693     Y2 = Teuchos::rcp( &Y, false );
00694 
00695   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00696   X.ExtractView(&x_ptr);
00697   Y.ExtractView(&y_ptr);
00698   Y2->ExtractView(&y2_ptr);
00699   Diagonal_->ExtractView(&d_ptr);
00700 
00701   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00702     
00703     // only one data exchange per sweep
00704     if (IsParallel_)
00705       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00706 
00707     if(!DoBackwardGS_){  
00708       /* Forward Mode */
00709       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00710         
00711         int col;
00712         double diag = d_ptr[i];
00713         
00714         for (int m = 0 ; m < NumVectors ; ++m) {
00715           
00716           double dtemp = 0.0;
00717           
00718           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00719             
00720             col = Indices[k];
00721             dtemp += Values[k] * y2_ptr[m][col];
00722           }
00723           
00724           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00725         }      
00726       }
00727     }
00728     else {
00729       /* Backward Mode */
00730       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00731 
00732         int col;
00733         double diag = d_ptr[i];
00734         
00735         for (int m = 0 ; m < NumVectors ; ++m) {
00736 
00737           double dtemp = 0.0;
00738           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00739 
00740             col = Indices[k];
00741             dtemp += Values[k] * y2_ptr[m][col];
00742           }
00743 
00744           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00745 
00746         }
00747       }
00748     }
00749     
00750 
00751     if (IsParallel_)
00752       for (int m = 0 ; m < NumVectors ; ++m) 
00753         for (int i = 0 ; i < NumMyRows_ ; ++i)
00754           y_ptr[m][i] = y2_ptr[m][i];
00755   }
00756 
00757   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00758   return(0);
00759 } //ApplyInverseGS_FastCrsMatrix()
00760 
00761 //==============================================================================
00762 int Ifpack_PointRelaxation::
00763 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00764 {
00765   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00766   // try to pick the best option; performances may be improved
00767   // if several sweeps are used.
00768   if (CrsMatrix != 0)
00769   {
00770     if (CrsMatrix->StorageOptimized())
00771       return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
00772     else
00773       return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
00774   }
00775   else
00776     return(ApplyInverseSGS_RowMatrix(X, Y));
00777 }
00778 
00779 //==============================================================================
00780 int Ifpack_PointRelaxation::
00781 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00782 {
00783   int NumVectors = X.NumVectors();
00784   int Length = Matrix().MaxNumEntries();
00785   vector<int> Indices(Length);
00786   vector<double> Values(Length);
00787 
00788   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00789   if (IsParallel_) {
00790     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00791   }
00792   else
00793     Y2 = Teuchos::rcp( &Y, false );
00794 
00795   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00796   X.ExtractView(&x_ptr);
00797   Y.ExtractView(&y_ptr);
00798   Y2->ExtractView(&y2_ptr);
00799   Diagonal_->ExtractView(&d_ptr);
00800   
00801   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00802     
00803     // only one data exchange per sweep
00804     if (IsParallel_)
00805       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00806 
00807     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00808 
00809       int NumEntries;
00810       int col;
00811       double diag = d_ptr[i];
00812 
00813       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00814                                                &Values[0], &Indices[0]));
00815 
00816       for (int m = 0 ; m < NumVectors ; ++m) {
00817 
00818         double dtemp = 0.0;
00819 
00820         for (int k = 0 ; k < NumEntries ; ++k) {
00821 
00822           col = Indices[k];
00823           dtemp += Values[k] * y2_ptr[m][col];
00824         }
00825 
00826         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00827       }
00828     }
00829 
00830     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00831 
00832       int NumEntries;
00833       int col;
00834       double diag = d_ptr[i];
00835 
00836       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00837                                                &Values[0], &Indices[0]));
00838 
00839       for (int m = 0 ; m < NumVectors ; ++m) {
00840 
00841         double dtemp = 0.0;
00842         for (int k = 0 ; k < NumEntries ; ++k) {
00843 
00844           col = Indices[k];
00845           dtemp += Values[k] * y2_ptr[m][col];
00846         }
00847 
00848         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00849 
00850       }
00851     }
00852 
00853     if (IsParallel_)
00854       for (int m = 0 ; m < NumVectors ; ++m) 
00855         for (int i = 0 ; i < NumMyRows_ ; ++i)
00856           y_ptr[m][i] = y2_ptr[m][i];
00857   }
00858 
00859   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00860   return(0);
00861 }
00862 
00863 //==============================================================================
00864 int Ifpack_PointRelaxation::
00865 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00866 {
00867   int NumVectors = X.NumVectors();
00868 
00869   int* Indices;
00870   double* Values;
00871 
00872   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00873   if (IsParallel_) {
00874     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00875   }
00876   else
00877     Y2 = Teuchos::rcp( &Y, false );
00878 
00879   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00880   X.ExtractView(&x_ptr);
00881   Y.ExtractView(&y_ptr);
00882   Y2->ExtractView(&y2_ptr);
00883   Diagonal_->ExtractView(&d_ptr);
00884   
00885   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00886     
00887     // only one data exchange per sweep
00888     if (IsParallel_)
00889       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00890 
00891     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00892 
00893       int NumEntries;
00894       int col;
00895       double diag = d_ptr[i];
00896 
00897       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00898 
00899       for (int m = 0 ; m < NumVectors ; ++m) {
00900 
00901         double dtemp = 0.0;
00902 
00903         for (int k = 0; k < NumEntries; ++k) {
00904 
00905           col = Indices[k];
00906           dtemp += Values[k] * y2_ptr[m][col];
00907         }
00908 
00909         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00910       }
00911     }
00912 
00913     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00914 
00915       int NumEntries;
00916       int col;
00917       double diag = d_ptr[i];
00918 
00919       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00920 
00921       for (int m = 0 ; m < NumVectors ; ++m) {
00922 
00923         double dtemp = 0.0;
00924         for (int k = 0; k < NumEntries; ++k) {
00925 
00926           col = Indices[k];
00927           dtemp += Values[k] * y2_ptr[m][col];
00928         }
00929 
00930         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00931 
00932       }
00933     }
00934 
00935     if (IsParallel_)
00936       for (int m = 0 ; m < NumVectors ; ++m) 
00937         for (int i = 0 ; i < NumMyRows_ ; ++i)
00938           y_ptr[m][i] = y2_ptr[m][i];
00939   }
00940 
00941   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00942   return(0);
00943 }
00944 
00945 //==============================================================================
00946 // this requires Epetra_CrsMatrix + OptimizeStorage()
00947 int Ifpack_PointRelaxation::
00948 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00949 {
00950   int* IndexOffset;
00951   int* Indices;
00952   double* Values;
00953   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00954 
00955   int NumVectors = X.NumVectors();
00956 
00957   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00958   if (IsParallel_) {
00959     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00960   }
00961   else
00962     Y2 = Teuchos::rcp( &Y, false );
00963 
00964   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00965   X.ExtractView(&x_ptr);
00966   Y.ExtractView(&y_ptr);
00967   Y2->ExtractView(&y2_ptr);
00968   Diagonal_->ExtractView(&d_ptr);
00969   
00970   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00971     
00972     // only one data exchange per sweep
00973     if (IsParallel_)
00974       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00975 
00976     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00977 
00978       int col;
00979       double diag = d_ptr[i];
00980 
00981       // no need to extract row i
00982       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00983 
00984       for (int m = 0 ; m < NumVectors ; ++m) {
00985 
00986         double dtemp = 0.0;
00987 
00988         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00989 
00990           col = Indices[k];
00991           dtemp += Values[k] * y2_ptr[m][col];
00992         }
00993 
00994         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00995       }
00996     }
00997 
00998     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00999 
01000       int col;
01001       double diag = d_ptr[i];
01002 
01003       // no need to extract row i
01004       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01005 
01006       for (int m = 0 ; m < NumVectors ; ++m) {
01007 
01008         double dtemp = 0.0;
01009         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01010 
01011           col = Indices[k];
01012           dtemp += Values[k] * y2_ptr[m][col];
01013         }
01014 
01015         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01016 
01017       }
01018     }
01019 
01020     if (IsParallel_)
01021       for (int m = 0 ; m < NumVectors ; ++m) 
01022         for (int i = 0 ; i < NumMyRows_ ; ++i)
01023           y_ptr[m][i] = y2_ptr[m][i];
01024   }
01025 
01026   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01027   return(0);
01028 }

Generated on Wed May 12 21:30:18 2010 for IFPACK by  doxygen 1.4.7