Ifpack Package Browser (Single Doxygen Collection) Development
Ifpack_PointRelaxation.cpp
Go to the documentation of this file.
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #include <iomanip>
00032 #include "Epetra_Operator.h"
00033 #include "Epetra_CrsMatrix.h"
00034 #include "Epetra_Comm.h"
00035 #include "Epetra_Map.h"
00036 #include "Epetra_MultiVector.h"
00037 #include "Ifpack_Preconditioner.h"
00038 #include "Ifpack_PointRelaxation.h"
00039 #include "Ifpack_Utils.h"
00040 #include "Ifpack_Condest.h"
00041 
00042 static const int IFPACK_JACOBI = 0;
00043 static const int IFPACK_GS = 1;
00044 static const int IFPACK_SGS = 2;
00045 
00046 //==============================================================================
00047 Ifpack_PointRelaxation::
00048 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix_in) :
00049   IsInitialized_(false),
00050   IsComputed_(false),
00051   NumInitialize_(0),
00052   NumCompute_(0),
00053   NumApplyInverse_(0),
00054   InitializeTime_(0.0),
00055   ComputeTime_(0.0),
00056   ApplyInverseTime_(0.0),
00057   ComputeFlops_(0.0),
00058   ApplyInverseFlops_(0.0),
00059   NumSweeps_(1),
00060   DampingFactor_(1.0),
00061   UseTranspose_(false),
00062   Condest_(-1.0),
00063   ComputeCondest_(false),
00064   PrecType_(IFPACK_JACOBI),
00065   MinDiagonalValue_(0.0),
00066   NumMyRows_(0),
00067   NumMyNonzeros_(0),
00068   NumGlobalRows_(0),
00069   NumGlobalNonzeros_(0),
00070   Matrix_(Teuchos::rcp(Matrix_in,false)),
00071   IsParallel_(false),
00072   ZeroStartingSolution_(true),
00073   DoBackwardGS_(false)
00074 {
00075 }
00076 
00077 //==============================================================================
00078 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
00079 {
00080 
00081   string PT;
00082   if (PrecType_ == IFPACK_JACOBI)
00083     PT = "Jacobi";
00084   else if (PrecType_ == IFPACK_GS)
00085     PT = "Gauss-Seidel";
00086   else if (PrecType_ == IFPACK_SGS)
00087     PT = "symmetric Gauss-Seidel";
00088 
00089   PT = List.get("relaxation: type", PT);
00090 
00091   if (PT == "Jacobi")
00092     PrecType_ = IFPACK_JACOBI;
00093   else if (PT == "Gauss-Seidel")
00094     PrecType_ = IFPACK_GS;
00095   else if (PT == "symmetric Gauss-Seidel")
00096     PrecType_ = IFPACK_SGS;
00097   else {
00098     IFPACK_CHK_ERR(-2);
00099   }
00100   
00101   NumSweeps_            = List.get("relaxation: sweeps",NumSweeps_);
00102   DampingFactor_        = List.get("relaxation: damping factor", 
00103                                    DampingFactor_);
00104   MinDiagonalValue_     = List.get("relaxation: min diagonal value", 
00105                                    MinDiagonalValue_);
00106   ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 
00107                                    ZeroStartingSolution_);
00108 
00109   DoBackwardGS_         = List.get("relaxation: backward mode",DoBackwardGS_);
00110   
00111   SetLabel();
00112 
00113   return(0);
00114 }
00115 
00116 //==============================================================================
00117 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const
00118 {
00119   return(Matrix_->Comm());
00120 }
00121 
00122 //==============================================================================
00123 const Epetra_Map& Ifpack_PointRelaxation::OperatorDomainMap() const
00124 {
00125   return(Matrix_->OperatorDomainMap());
00126 }
00127 
00128 //==============================================================================
00129 const Epetra_Map& Ifpack_PointRelaxation::OperatorRangeMap() const
00130 {
00131   return(Matrix_->OperatorRangeMap());
00132 }
00133 
00134 //==============================================================================
00135 int Ifpack_PointRelaxation::Initialize()
00136 {
00137   IsInitialized_ = false;
00138 
00139   if (Matrix_ == Teuchos::null)
00140     IFPACK_CHK_ERR(-2);
00141 
00142   if (Time_ == Teuchos::null)
00143     Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
00144 
00145   if (Matrix().NumGlobalRows() != Matrix().NumGlobalCols())
00146     IFPACK_CHK_ERR(-2); // only square matrices
00147 
00148   NumMyRows_ = Matrix_->NumMyRows();
00149   NumMyNonzeros_ = Matrix_->NumMyNonzeros();
00150   NumGlobalRows_ = Matrix_->NumGlobalRows();
00151   NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros();
00152 
00153   if (Comm().NumProc() != 1)
00154     IsParallel_ = true;
00155   else
00156     IsParallel_ = false;
00157 
00158   ++NumInitialize_;
00159   InitializeTime_ += Time_->ElapsedTime();
00160   IsInitialized_ = true;
00161   return(0);
00162 }
00163 
00164 //==============================================================================
00165 int Ifpack_PointRelaxation::Compute()
00166 {
00167   int ierr = 0;
00168   if (!IsInitialized())
00169     IFPACK_CHK_ERR(Initialize());
00170 
00171   Time_->ResetStartTime();
00172 
00173   // reset values
00174   IsComputed_ = false;
00175   Condest_ = -1.0;
00176 
00177   if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
00178   if (NumSweeps_ < 0)
00179     IFPACK_CHK_ERR(-2); // at least one application
00180   
00181   Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
00182 
00183   if (Diagonal_ == Teuchos::null)
00184     IFPACK_CHK_ERR(-5);
00185 
00186   IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
00187 
00188   // check diagonal elements, store the inverses, and verify that
00189   // no zeros are around. If an element is zero, then by default
00190   // its inverse is zero as well (that is, the row is ignored).
00191   for (int i = 0 ; i < NumMyRows_ ; ++i) {
00192     double& diag = (*Diagonal_)[i];
00193     if (IFPACK_ABS(diag) < MinDiagonalValue_)
00194       diag = MinDiagonalValue_;
00195     if (diag != 0.0)
00196       diag = 1.0 / diag;
00197   }
00198 #ifdef IFPACK_FLOPCOUNTERS
00199   ComputeFlops_ += NumMyRows_;
00200 #endif
00201 
00202 #if 0
00203   // some methods require the inverse of the diagonal, compute it
00204   // now and store it in `Diagonal_'
00205   if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
00206     Diagonal_->Reciprocal(*Diagonal_);
00207     // update flops
00208     ComputeFlops_ += NumMyRows_;
00209   }
00210 #endif
00211 
00212   // We need to import data from external processors. Here I create an
00213   // Epetra_Import object because I cannot assume that Matrix_ has one.
00214   // This is a bit of waste of resources (but the code is more robust).
00215   // Note that I am doing some strange stuff to set the components of Y
00216   // from Y2 (to save some time).
00217   //
00218   if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
00219     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00220                                   Matrix().RowMatrixRowMap()) );
00221     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00222   }
00223 
00224   ++NumCompute_;
00225   ComputeTime_ += Time_->ElapsedTime();
00226   IsComputed_ = true;
00227 
00228   IFPACK_CHK_ERR(ierr);
00229   return(0);
00230 }
00231 
00232 //==============================================================================
00233 ostream& Ifpack_PointRelaxation::Print(ostream & os) const
00234 {
00235 
00236   double MyMinVal, MyMaxVal;
00237   double MinVal, MaxVal;
00238 
00239   if (IsComputed_) {
00240     Diagonal_->MinValue(&MyMinVal);
00241     Diagonal_->MaxValue(&MyMaxVal);
00242     Comm().MinAll(&MyMinVal,&MinVal,1);
00243     Comm().MinAll(&MyMaxVal,&MaxVal,1);
00244   }
00245 
00246   if (!Comm().MyPID()) {
00247     os << endl;
00248     os << "================================================================================" << endl;
00249     os << "Ifpack_PointRelaxation" << endl;
00250     os << "Sweeps         = " << NumSweeps_ << endl;
00251     os << "damping factor = " << DampingFactor_ << endl;
00252     if (PrecType_ == IFPACK_JACOBI)
00253       os << "Type           = Jacobi" << endl;
00254     else if (PrecType_ == IFPACK_GS)
00255       os << "Type           = Gauss-Seidel" << endl;
00256     else if (PrecType_ == IFPACK_SGS)
00257       os << "Type           = symmetric Gauss-Seidel" << endl;
00258     if (DoBackwardGS_) 
00259       os << "Using backward mode (GS only)" << endl;
00260     if (ZeroStartingSolution_) 
00261       os << "Using zero starting solution" << endl;
00262     else
00263       os << "Using input starting solution" << endl;
00264     os << "Condition number estimate = " << Condest() << endl;
00265     os << "Global number of rows            = " << Matrix_->NumGlobalRows() << endl;
00266     if (IsComputed_) {
00267       os << "Minimum value on stored diagonal = " << MinVal << endl;
00268       os << "Maximum value on stored diagonal = " << MaxVal << endl;
00269     }
00270     os << endl;
00271     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00272     os << "-----           -------   --------------       ------------     --------" << endl;
00273     os << "Initialize()    "   << std::setw(5) << NumInitialize_ 
00274        << "  " << std::setw(15) << InitializeTime_ 
00275        << "              0.0              0.0" << endl;
00276     os << "Compute()       "   << std::setw(5) << NumCompute_ 
00277        << "  " << std::setw(15) << ComputeTime_
00278        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_;
00279     if (ComputeTime_ != 0.0)
00280       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
00281     else
00282       os << "  " << std::setw(15) << 0.0 << endl;
00283     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse_ 
00284        << "  " << std::setw(15) << ApplyInverseTime_
00285        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
00286     if (ApplyInverseTime_ != 0.0)
00287       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
00288     else
00289       os << "  " << std::setw(15) << 0.0 << endl;
00290     os << "================================================================================" << endl;
00291     os << endl;
00292   }
00293 
00294   return(os);
00295 }
00296 
00297 //==============================================================================
00298 double Ifpack_PointRelaxation::
00299 Condest(const Ifpack_CondestType CT, 
00300         const int MaxIters, const double Tol,
00301   Epetra_RowMatrix* Matrix_in)
00302 {
00303   if (!IsComputed()) // cannot compute right now
00304     return(-1.0);
00305 
00306   // always computes it. Call Condest() with no parameters to get
00307   // the previous estimate.
00308   Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
00309 
00310   return(Condest_);
00311 }
00312 
00313 //==============================================================================
00314 void Ifpack_PointRelaxation::SetLabel()
00315 {
00316   string PT;
00317   if (PrecType_ == IFPACK_JACOBI)
00318     PT = "Jacobi";
00319   else if (PrecType_ == IFPACK_GS){
00320     PT = "GS";
00321     if(DoBackwardGS_)
00322       PT = "Backward " + PT;
00323   }    
00324   else if (PrecType_ == IFPACK_SGS)
00325     PT = "SGS";
00326 
00327   Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
00328     + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
00329 }
00330 
00331 //==============================================================================
00332 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
00333 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
00334 // way the matrix-vector produce is performed).
00335 //
00336 // Another ML-related observation is that the starting solution (in Y)
00337 // is NOT supposed to be zero. This may slow down the application of just
00338 // one sweep of Jacobi.
00339 //
00340 int Ifpack_PointRelaxation::
00341 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00342 {
00343   if (!IsComputed())
00344     IFPACK_CHK_ERR(-3);
00345 
00346   if (X.NumVectors() != Y.NumVectors())
00347     IFPACK_CHK_ERR(-2);
00348 
00349   Time_->ResetStartTime();
00350 
00351   // AztecOO gives X and Y pointing to the same memory location,
00352   // need to create an auxiliary vector, Xcopy
00353   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00354   if (X.Pointers()[0] == Y.Pointers()[0])
00355     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00356   else
00357     Xcopy = Teuchos::rcp( &X, false );
00358     
00359   if (ZeroStartingSolution_)
00360     Y.PutScalar(0.0);
00361 
00362   // Flops are updated in each of the following. 
00363   switch (PrecType_) {
00364   case IFPACK_JACOBI:
00365     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00366     break;
00367   case IFPACK_GS:
00368     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00369     break;
00370   case IFPACK_SGS:
00371     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00372     break;
00373   default:
00374     IFPACK_CHK_ERR(-1); // something wrong
00375   }
00376 
00377   ++NumApplyInverse_;
00378   ApplyInverseTime_ += Time_->ElapsedTime();
00379   return(0);
00380 }
00381 
00382 //==============================================================================
00383 // This preconditioner can be much slower than AztecOO and ML versions
00384 // if the matrix-vector product requires all ExtractMyRowCopy() 
00385 // (as done through Ifpack_AdditiveSchwarz).
00386 int Ifpack_PointRelaxation::
00387 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
00388 {
00389 
00390   int NumVectors = LHS.NumVectors();
00391   Epetra_MultiVector A_times_LHS( LHS.Map(),NumVectors );
00392 
00393   for (int j = 0; j < NumSweeps_ ; j++) {
00394 
00395     IFPACK_CHK_ERR(Apply(LHS,A_times_LHS));
00396     IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
00397     for (int v = 0 ; v < NumVectors ; ++v)
00398       IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)), 
00399                                      *Diagonal_, 1.0));
00400 
00401   }
00402 
00403   // Flops:
00404   // - matrix vector              (2 * NumGlobalNonzeros_)
00405   // - update                     (2 * NumGlobalRows_)
00406   // - Multiply:
00407   //   - DampingFactor            (NumGlobalRows_)
00408   //   - Diagonal                 (NumGlobalRows_)
00409   //   - A + B                    (NumGlobalRows_)
00410   //   - 1.0                      (NumGlobalRows_)
00411 #ifdef IFPACK_FLOPCOUNTERS
00412   ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00413 #endif
00414 
00415   return(0);
00416 }
00417 
00418 //==============================================================================
00419 int Ifpack_PointRelaxation::
00420 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00421 {
00422   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00423   // try to pick the best option; performances may be improved
00424   // if several sweeps are used.
00425   if (CrsMatrix != 0)
00426   {
00427     if (CrsMatrix->StorageOptimized())
00428       return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
00429     else
00430       return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
00431   }
00432   else
00433     return(ApplyInverseGS_RowMatrix(X, Y));
00434 } //ApplyInverseGS()
00435 
00436 //==============================================================================
00437 int Ifpack_PointRelaxation::
00438 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00439 {
00440   int NumVectors = X.NumVectors();
00441 
00442   int Length = Matrix().MaxNumEntries();
00443   vector<int> Indices(Length);
00444   vector<double> Values(Length);
00445 
00446   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00447   if (IsParallel_)
00448     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00449   else
00450     Y2 = Teuchos::rcp( &Y, false );
00451 
00452   // extract views (for nicer and faster code)
00453   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00454   X.ExtractView(&x_ptr);
00455   Y.ExtractView(&y_ptr);
00456   Y2->ExtractView(&y2_ptr);
00457   Diagonal_->ExtractView(&d_ptr);
00458 
00459   for (int j = 0; j < NumSweeps_ ; j++) {
00460 
00461     // data exchange is here, once per sweep
00462     if (IsParallel_)
00463       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00464 
00465     // FIXME: do I really need this code below?
00466     if (NumVectors == 1) {
00467 
00468       double* y0_ptr = y_ptr[0];
00469       double* y20_ptr = y2_ptr[0];
00470       double* x0_ptr = x_ptr[0];
00471 
00472       if(!DoBackwardGS_){      
00473         /* Forward Mode */
00474         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00475 
00476           int NumEntries;
00477           int col;
00478           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00479                                                    &Values[0], &Indices[0]));
00480           
00481           double dtemp = 0.0;
00482           for (int k = 0 ; k < NumEntries ; ++k) {
00483             
00484             col = Indices[k];
00485             dtemp += Values[k] * y20_ptr[col];
00486           }
00487           
00488           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00489         }
00490       }
00491       else {
00492         /* Backward Mode */
00493         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00494 
00495           int NumEntries;
00496           int col;
00497           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00498                                                    &Values[0], &Indices[0]));
00499           double dtemp = 0.0;
00500           for (int k = 0 ; k < NumEntries ; ++k) {
00501 
00502             col = Indices[k];
00503             dtemp += Values[k] * y20_ptr[i];
00504           }
00505           
00506           y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00507         }
00508       }
00509       
00510       // using Export() sounded quite expensive
00511       if (IsParallel_)
00512         for (int i = 0 ; i < NumMyRows_ ; ++i)
00513           y0_ptr[i] = y20_ptr[i];
00514 
00515     }
00516     else {
00517       if(!DoBackwardGS_){      
00518         /* Forward Mode */
00519         for (int i = 0 ; i < NumMyRows_ ; ++i) {
00520           
00521           int NumEntries;
00522           int col;
00523           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00524                                                    &Values[0], &Indices[0]));
00525           
00526           for (int m = 0 ; m < NumVectors ; ++m) {
00527             
00528             double dtemp = 0.0;
00529             for (int k = 0 ; k < NumEntries ; ++k) {
00530               
00531               col = Indices[k];
00532               dtemp += Values[k] * y2_ptr[m][col];
00533             }
00534             
00535             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00536           }
00537         }
00538       }
00539       else {
00540         /* Backward Mode */
00541         for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00542           int NumEntries;
00543           int col;
00544           IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00545                                                    &Values[0], &Indices[0]));
00546 
00547           for (int m = 0 ; m < NumVectors ; ++m) {
00548             
00549             double dtemp = 0.0;
00550             for (int k = 0 ; k < NumEntries ; ++k) {
00551 
00552               col = Indices[k];
00553               dtemp += Values[k] * y2_ptr[m][col];
00554             }
00555 
00556             y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00557 
00558           }
00559         }
00560       }
00561 
00562       // using Export() sounded quite expensive   
00563       if (IsParallel_)
00564         for (int m = 0 ; m < NumVectors ; ++m) 
00565           for (int i = 0 ; i < NumMyRows_ ; ++i)
00566             y_ptr[m][i] = y2_ptr[m][i];
00567       
00568     }
00569   }
00570 
00571 #ifdef IFPACK_FLOPCOUNTERS
00572   ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00573 #endif
00574 
00575   return(0);
00576 } //ApplyInverseGS_RowMatrix()
00577 
00578 //==============================================================================
00579 int Ifpack_PointRelaxation::
00580 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00581 {
00582   int NumVectors = X.NumVectors();
00583 
00584   int* Indices;
00585   double* Values;
00586 
00587   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00588   if (IsParallel_) {
00589     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00590   }
00591   else
00592     Y2 = Teuchos::rcp( &Y, false );
00593 
00594   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00595   X.ExtractView(&x_ptr);
00596   Y.ExtractView(&y_ptr);
00597   Y2->ExtractView(&y2_ptr);
00598   Diagonal_->ExtractView(&d_ptr);
00599   
00600   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00601     
00602     // only one data exchange per sweep
00603     if (IsParallel_)
00604       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00605 
00606     if(!DoBackwardGS_){  
00607       /* Forward Mode */
00608       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00609 
00610         int NumEntries;
00611         int col;
00612         double diag = d_ptr[i];
00613         
00614         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00615         
00616         for (int m = 0 ; m < NumVectors ; ++m) {
00617           
00618           double dtemp = 0.0;
00619           
00620           for (int k = 0; k < NumEntries; ++k) {
00621             
00622             col = Indices[k];
00623             dtemp += Values[k] * y2_ptr[m][col];
00624           }
00625           
00626           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00627         }
00628       }
00629     }
00630     else {
00631       /* Backward Mode */
00632       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00633 
00634         int NumEntries;
00635         int col;
00636         double diag = d_ptr[i];
00637         
00638         IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00639         
00640         for (int m = 0 ; m < NumVectors ; ++m) {
00641           
00642           double dtemp = 0.0;
00643           for (int k = 0; k < NumEntries; ++k) {
00644             
00645             col = Indices[k];
00646             dtemp += Values[k] * y2_ptr[m][col];
00647           }
00648           
00649           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00650           
00651         }
00652       }
00653     }
00654     
00655     if (IsParallel_)
00656       for (int m = 0 ; m < NumVectors ; ++m) 
00657         for (int i = 0 ; i < NumMyRows_ ; ++i)
00658           y_ptr[m][i] = y2_ptr[m][i];
00659   }
00660 
00661 #ifdef IFPACK_FLOPCOUNTERS
00662   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00663 #endif
00664   return(0);
00665 } //ApplyInverseGS_CrsMatrix()
00666 
00667 //
00668 //==============================================================================
00669 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
00670 
00671 int Ifpack_PointRelaxation::
00672 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00673 {
00674   int* IndexOffset;
00675   int* Indices;
00676   double* Values;
00677   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00678 
00679   int NumVectors = X.NumVectors();
00680 
00681   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00682   if (IsParallel_) {
00683     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00684   }
00685   else
00686     Y2 = Teuchos::rcp( &Y, false );
00687 
00688   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00689   X.ExtractView(&x_ptr);
00690   Y.ExtractView(&y_ptr);
00691   Y2->ExtractView(&y2_ptr);
00692   Diagonal_->ExtractView(&d_ptr);
00693 
00694   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00695     
00696     // only one data exchange per sweep
00697     if (IsParallel_)
00698       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00699 
00700     if(!DoBackwardGS_){  
00701       /* Forward Mode */
00702       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00703         
00704         int col;
00705         double diag = d_ptr[i];
00706         
00707         for (int m = 0 ; m < NumVectors ; ++m) {
00708           
00709           double dtemp = 0.0;
00710           
00711           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00712             
00713             col = Indices[k];
00714             dtemp += Values[k] * y2_ptr[m][col];
00715           }
00716           
00717           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00718         }      
00719       }
00720     }
00721     else {
00722       /* Backward Mode */
00723       for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00724 
00725         int col;
00726         double diag = d_ptr[i];
00727         
00728         for (int m = 0 ; m < NumVectors ; ++m) {
00729 
00730           double dtemp = 0.0;
00731           for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00732 
00733             col = Indices[k];
00734             dtemp += Values[k] * y2_ptr[m][col];
00735           }
00736 
00737           y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00738 
00739         }
00740       }
00741     }
00742     
00743 
00744     if (IsParallel_)
00745       for (int m = 0 ; m < NumVectors ; ++m) 
00746         for (int i = 0 ; i < NumMyRows_ ; ++i)
00747           y_ptr[m][i] = y2_ptr[m][i];
00748   }
00749 
00750 #ifdef IFPACK_FLOPCOUNTERS
00751   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00752 #endif
00753   return(0);
00754 } //ApplyInverseGS_FastCrsMatrix()
00755 
00756 //==============================================================================
00757 int Ifpack_PointRelaxation::
00758 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00759 {
00760   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00761   // try to pick the best option; performances may be improved
00762   // if several sweeps are used.
00763   if (CrsMatrix != 0)
00764   {
00765     if (CrsMatrix->StorageOptimized())
00766       return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
00767     else
00768       return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
00769   }
00770   else
00771     return(ApplyInverseSGS_RowMatrix(X, Y));
00772 }
00773 
00774 //==============================================================================
00775 int Ifpack_PointRelaxation::
00776 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00777 {
00778   int NumVectors = X.NumVectors();
00779   int Length = Matrix().MaxNumEntries();
00780   vector<int> Indices(Length);
00781   vector<double> Values(Length);
00782 
00783   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00784   if (IsParallel_) {
00785     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00786   }
00787   else
00788     Y2 = Teuchos::rcp( &Y, false );
00789 
00790   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00791   X.ExtractView(&x_ptr);
00792   Y.ExtractView(&y_ptr);
00793   Y2->ExtractView(&y2_ptr);
00794   Diagonal_->ExtractView(&d_ptr);
00795   
00796   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00797     
00798     // only one data exchange per sweep
00799     if (IsParallel_)
00800       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00801 
00802     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00803 
00804       int NumEntries;
00805       int col;
00806       double diag = d_ptr[i];
00807 
00808       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00809                                                &Values[0], &Indices[0]));
00810 
00811       for (int m = 0 ; m < NumVectors ; ++m) {
00812 
00813         double dtemp = 0.0;
00814 
00815         for (int k = 0 ; k < NumEntries ; ++k) {
00816 
00817           col = Indices[k];
00818           dtemp += Values[k] * y2_ptr[m][col];
00819         }
00820 
00821         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00822       }
00823     }
00824 
00825     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00826 
00827       int NumEntries;
00828       int col;
00829       double diag = d_ptr[i];
00830 
00831       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00832                                                &Values[0], &Indices[0]));
00833 
00834       for (int m = 0 ; m < NumVectors ; ++m) {
00835 
00836         double dtemp = 0.0;
00837         for (int k = 0 ; k < NumEntries ; ++k) {
00838 
00839           col = Indices[k];
00840           dtemp += Values[k] * y2_ptr[m][col];
00841         }
00842 
00843         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00844 
00845       }
00846     }
00847 
00848     if (IsParallel_)
00849       for (int m = 0 ; m < NumVectors ; ++m) 
00850         for (int i = 0 ; i < NumMyRows_ ; ++i)
00851           y_ptr[m][i] = y2_ptr[m][i];
00852   }
00853 
00854 #ifdef IFPACK_FLOPCOUNTERS
00855   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00856 #endif
00857   return(0);
00858 }
00859 
00860 //==============================================================================
00861 int Ifpack_PointRelaxation::
00862 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00863 {
00864   int NumVectors = X.NumVectors();
00865 
00866   int* Indices;
00867   double* Values;
00868 
00869   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00870   if (IsParallel_) {
00871     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00872   }
00873   else
00874     Y2 = Teuchos::rcp( &Y, false );
00875 
00876   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00877   X.ExtractView(&x_ptr);
00878   Y.ExtractView(&y_ptr);
00879   Y2->ExtractView(&y2_ptr);
00880   Diagonal_->ExtractView(&d_ptr);
00881   
00882   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00883     
00884     // only one data exchange per sweep
00885     if (IsParallel_)
00886       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00887 
00888     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00889 
00890       int NumEntries;
00891       int col;
00892       double diag = d_ptr[i];
00893 
00894       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00895 
00896       for (int m = 0 ; m < NumVectors ; ++m) {
00897 
00898         double dtemp = 0.0;
00899 
00900         for (int k = 0; k < NumEntries; ++k) {
00901 
00902           col = Indices[k];
00903           dtemp += Values[k] * y2_ptr[m][col];
00904         }
00905 
00906         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00907       }
00908     }
00909 
00910     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00911 
00912       int NumEntries;
00913       int col;
00914       double diag = d_ptr[i];
00915 
00916       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00917 
00918       for (int m = 0 ; m < NumVectors ; ++m) {
00919 
00920         double dtemp = 0.0;
00921         for (int k = 0; k < NumEntries; ++k) {
00922 
00923           col = Indices[k];
00924           dtemp += Values[k] * y2_ptr[m][col];
00925         }
00926 
00927         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00928 
00929       }
00930     }
00931 
00932     if (IsParallel_)
00933       for (int m = 0 ; m < NumVectors ; ++m) 
00934         for (int i = 0 ; i < NumMyRows_ ; ++i)
00935           y_ptr[m][i] = y2_ptr[m][i];
00936   }
00937 
00938 #ifdef IFPACK_FLOPCOUNTERS
00939   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00940 #endif
00941   return(0);
00942 }
00943 
00944 //==============================================================================
00945 // this requires Epetra_CrsMatrix + OptimizeStorage()
00946 int Ifpack_PointRelaxation::
00947 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00948 {
00949   int* IndexOffset;
00950   int* Indices;
00951   double* Values;
00952   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00953 
00954   int NumVectors = X.NumVectors();
00955 
00956   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00957   if (IsParallel_) {
00958     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00959   }
00960   else
00961     Y2 = Teuchos::rcp( &Y, false );
00962 
00963   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00964   X.ExtractView(&x_ptr);
00965   Y.ExtractView(&y_ptr);
00966   Y2->ExtractView(&y2_ptr);
00967   Diagonal_->ExtractView(&d_ptr);
00968   
00969   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00970     
00971     // only one data exchange per sweep
00972     if (IsParallel_)
00973       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00974 
00975     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00976 
00977       int col;
00978       double diag = d_ptr[i];
00979 
00980       // no need to extract row i
00981       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00982 
00983       for (int m = 0 ; m < NumVectors ; ++m) {
00984 
00985         double dtemp = 0.0;
00986 
00987         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00988 
00989           col = Indices[k];
00990           dtemp += Values[k] * y2_ptr[m][col];
00991         }
00992 
00993         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00994       }
00995     }
00996 
00997     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00998 
00999       int col;
01000       double diag = d_ptr[i];
01001 
01002       // no need to extract row i
01003       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
01004 
01005       for (int m = 0 ; m < NumVectors ; ++m) {
01006 
01007         double dtemp = 0.0;
01008         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
01009 
01010           col = Indices[k];
01011           dtemp += Values[k] * y2_ptr[m][col];
01012         }
01013 
01014         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
01015 
01016       }
01017     }
01018 
01019     if (IsParallel_)
01020       for (int m = 0 ; m < NumVectors ; ++m) 
01021         for (int i = 0 ; i < NumMyRows_ ; ++i)
01022           y_ptr[m][i] = y2_ptr[m][i];
01023   }
01024 
01025 #ifdef IFPACK_FLOPCOUNTERS
01026   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
01027 #endif
01028   return(0);
01029 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines