Ifpack_PointRelaxation.cpp

00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Ifpack_ConfigDefs.h"
00031 #include <iomanip>
00032 #include "Epetra_Operator.h"
00033 #include "Epetra_CrsMatrix.h"
00034 #include "Epetra_Comm.h"
00035 #include "Epetra_Map.h"
00036 #include "Epetra_MultiVector.h"
00037 #include "Ifpack_Preconditioner.h"
00038 #include "Ifpack_PointRelaxation.h"
00039 #include "Ifpack_Utils.h"
00040 #include "Ifpack_Condest.h"
00041 
00042 static const int IFPACK_JACOBI = 0;
00043 static const int IFPACK_GS = 1;
00044 static const int IFPACK_SGS = 2;
00045 
00046 //==============================================================================
00047 Ifpack_PointRelaxation::
00048 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix) :
00049   IsInitialized_(false),
00050   IsComputed_(false),
00051   NumInitialize_(0),
00052   NumCompute_(0),
00053   NumApplyInverse_(0),
00054   InitializeTime_(0.0),
00055   ComputeTime_(0.0),
00056   ApplyInverseTime_(0.0),
00057   ComputeFlops_(0.0),
00058   ApplyInverseFlops_(0.0),
00059   NumSweeps_(1),
00060   DampingFactor_(1.0),
00061   UseTranspose_(false),
00062   Condest_(-1.0),
00063   ComputeCondest_(false),
00064   PrecType_(IFPACK_JACOBI),
00065   MinDiagonalValue_(0.0),
00066   NumMyRows_(0),
00067   NumMyNonzeros_(0),
00068   NumGlobalRows_(0),
00069   NumGlobalNonzeros_(0),
00070   Matrix_(Teuchos::rcp(Matrix,false)),
00071   IsParallel_(false),
00072   ZeroStartingSolution_(true)
00073 {
00074 }
00075 
00076 //==============================================================================
00077 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
00078 {
00079 
00080   string PT;
00081   if (PrecType_ == IFPACK_JACOBI)
00082     PT = "Jacobi";
00083   else if (PrecType_ == IFPACK_GS)
00084     PT = "Gauss-Seidel";
00085   else if (PrecType_ == IFPACK_SGS)
00086     PT = "symmetric Gauss-Seidel";
00087 
00088   PT = List.get("relaxation: type", PT);
00089 
00090   if (PT == "Jacobi")
00091     PrecType_ = IFPACK_JACOBI;
00092   else if (PT == "Gauss-Seidel")
00093     PrecType_ = IFPACK_GS;
00094   else if (PT == "symmetric Gauss-Seidel")
00095     PrecType_ = IFPACK_SGS;
00096   else {
00097     IFPACK_CHK_ERR(-2);
00098   }
00099   
00100   NumSweeps_            = List.get("relaxation: sweeps",NumSweeps_);
00101   DampingFactor_        = List.get("relaxation: damping factor", 
00102                                    DampingFactor_);
00103   MinDiagonalValue_     = List.get("relaxation: min diagonal value", 
00104                                    MinDiagonalValue_);
00105   ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 
00106                                    ZeroStartingSolution_);
00107 
00108   SetLabel();
00109 
00110   return(0);
00111 }
00112 
00113 //==============================================================================
00114 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const
00115 {
00116   return(Matrix_->Comm());
00117 }
00118 
00119 //==============================================================================
00120 const Epetra_Map& Ifpack_PointRelaxation::OperatorDomainMap() const
00121 {
00122   return(Matrix_->OperatorDomainMap());
00123 }
00124 
00125 //==============================================================================
00126 const Epetra_Map& Ifpack_PointRelaxation::OperatorRangeMap() const
00127 {
00128   return(Matrix_->OperatorRangeMap());
00129 }
00130 
00131 //==============================================================================
00132 int Ifpack_PointRelaxation::
00133 Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00134 {
00135 
00136   if (IsComputed() == false)
00137     IFPACK_CHK_ERR(-3);
00138 
00139   if (X.NumVectors() != Y.NumVectors())
00140     IFPACK_CHK_ERR(-2);
00141 
00142   IFPACK_CHK_ERR(Matrix_->Multiply(UseTranspose(),X,Y));
00143   return(0);
00144 }
00145 
00146 //==============================================================================
00147 int Ifpack_PointRelaxation::Initialize()
00148 {
00149   IsInitialized_ = false;
00150 
00151   if (Matrix_ == Teuchos::null)
00152     IFPACK_CHK_ERR(-2);
00153 
00154   if (Time_ == Teuchos::null)
00155     Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
00156 
00157   if (Matrix().NumGlobalRows() != Matrix().NumGlobalCols())
00158     IFPACK_CHK_ERR(-2); // only square matrices
00159 
00160   NumMyRows_ = Matrix_->NumMyRows();
00161   NumMyNonzeros_ = Matrix_->NumMyNonzeros();
00162   NumGlobalRows_ = Matrix_->NumGlobalRows();
00163   NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros();
00164 
00165   if (Comm().NumProc() != 1)
00166     IsParallel_ = true;
00167   else
00168     IsParallel_ = false;
00169 
00170   ++NumInitialize_;
00171   InitializeTime_ += Time_->ElapsedTime();
00172   IsInitialized_ = true;
00173   return(0);
00174 }
00175 
00176 //==============================================================================
00177 int Ifpack_PointRelaxation::Compute()
00178 {
00179   int ierr = 0;
00180   if (!IsInitialized())
00181     IFPACK_CHK_ERR(Initialize());
00182 
00183   Time_->ResetStartTime();
00184 
00185   // reset values
00186   IsComputed_ = false;
00187   Condest_ = -1.0;
00188 
00189   if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
00190   if (NumSweeps_ < 0)
00191     IFPACK_CHK_ERR(-2); // at least one application
00192   
00193   Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
00194 
00195   if (Diagonal_ == Teuchos::null)
00196     IFPACK_CHK_ERR(-5);
00197 
00198   IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
00199 
00200   // check diagonal elements, store the inverses, and verify that
00201   // no zeros are around. If an element is zero, then by default
00202   // its inverse is zero as well (that is, the row is ignored).
00203   for (int i = 0 ; i < NumMyRows_ ; ++i) {
00204     double& diag = (*Diagonal_)[i];
00205     if (IFPACK_ABS(diag) < MinDiagonalValue_)
00206       diag = MinDiagonalValue_;
00207     if (diag != 0.0)
00208       diag = 1.0 / diag;
00209   }
00210   ComputeFlops_ += NumMyRows_;
00211 
00212 #if 0
00213   // some methods require the inverse of the diagonal, compute it
00214   // now and store it in `Diagonal_'
00215   if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
00216     Diagonal_->Reciprocal(*Diagonal_);
00217     // update flops
00218     ComputeFlops_ += NumMyRows_;
00219   }
00220 #endif
00221 
00222   // We need to import data from external processors. Here I create an
00223   // Epetra_Import object because I cannot assume that Matrix_ has one.
00224   // This is a bit of waste of resources (but the code is more robust).
00225   // Note that I am doing some strange stuff to set the components of Y
00226   // from Y2 (to save some time).
00227   //
00228   if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
00229     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00230                                   Matrix().RowMatrixRowMap()) );
00231     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00232   }
00233 
00234   ++NumCompute_;
00235   ComputeTime_ += Time_->ElapsedTime();
00236   IsComputed_ = true;
00237 
00238   IFPACK_CHK_ERR(ierr);
00239   return(0);
00240 }
00241 
00242 //==============================================================================
00243 ostream& Ifpack_PointRelaxation::Print(ostream & os) const
00244 {
00245 
00246   double MyMinVal, MyMaxVal;
00247   double MinVal, MaxVal;
00248 
00249   if (IsComputed_) {
00250     Diagonal_->MinValue(&MyMinVal);
00251     Diagonal_->MaxValue(&MyMaxVal);
00252     Comm().MinAll(&MyMinVal,&MinVal,1);
00253     Comm().MinAll(&MyMaxVal,&MaxVal,1);
00254   }
00255 
00256   if (!Comm().MyPID()) {
00257     os << endl;
00258     os << "================================================================================" << endl;
00259     os << "Ifpack_PointRelaxation" << endl;
00260     os << "Sweeps         = " << NumSweeps_ << endl;
00261     os << "damping factor = " << DampingFactor_ << endl;
00262     if (PrecType_ == IFPACK_JACOBI)
00263       os << "Type           = Jacobi" << endl;
00264     else if (PrecType_ == IFPACK_GS)
00265       os << "Type           = Gauss-Seidel" << endl;
00266     else if (PrecType_ == IFPACK_SGS)
00267       os << "Type           = symmetric Gauss-Seidel" << endl;
00268     if (ZeroStartingSolution_) 
00269       os << "Using zero starting solution" << endl;
00270     else
00271       os << "Using input starting solution" << endl;
00272     os << "Condition number estimate = " << Condest() << endl;
00273     os << "Global number of rows            = " << Matrix_->NumGlobalRows() << endl;
00274     if (IsComputed_) {
00275       os << "Minimum value on stored diagonal = " << MinVal << endl;
00276       os << "Maximum value on stored diagonal = " << MaxVal << endl;
00277     }
00278     os << endl;
00279     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
00280     os << "-----           -------   --------------       ------------     --------" << endl;
00281     os << "Initialize()    "   << std::setw(5) << NumInitialize_ 
00282        << "  " << std::setw(15) << InitializeTime_ 
00283        << "              0.0              0.0" << endl;
00284     os << "Compute()       "   << std::setw(5) << NumCompute_ 
00285        << "  " << std::setw(15) << ComputeTime_
00286        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_;
00287     if (ComputeTime_ != 0.0)
00288       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
00289     else
00290       os << "  " << std::setw(15) << 0.0 << endl;
00291     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse_ 
00292        << "  " << std::setw(15) << ApplyInverseTime_
00293        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
00294     if (ApplyInverseTime_ != 0.0)
00295       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
00296     else
00297       os << "  " << std::setw(15) << 0.0 << endl;
00298     os << "================================================================================" << endl;
00299     os << endl;
00300   }
00301 
00302   return(os);
00303 }
00304 
00305 //==============================================================================
00306 double Ifpack_PointRelaxation::
00307 Condest(const Ifpack_CondestType CT, 
00308         const int MaxIters, const double Tol,
00309     Epetra_RowMatrix* Matrix)
00310 {
00311   if (!IsComputed()) // cannot compute right now
00312     return(-1.0);
00313 
00314   // always computes it. Call Condest() with no parameters to get
00315   // the previous estimate.
00316   Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix);
00317 
00318   return(Condest_);
00319 }
00320 
00321 //==============================================================================
00322 void Ifpack_PointRelaxation::SetLabel()
00323 {
00324   string PT;
00325   if (PrecType_ == IFPACK_JACOBI)
00326     PT = "Jacobi";
00327   else if (PrecType_ == IFPACK_GS)
00328     PT = "GS";
00329   else if (PrecType_ == IFPACK_SGS)
00330     PT = "SGS";
00331 
00332   Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
00333     + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
00334 }
00335 
00336 //==============================================================================
00337 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
00338 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
00339 // way the matrix-vector produce is performed).
00340 //
00341 // Another ML-related observation is that the starting solution (in Y)
00342 // is NOT supposed to be zero. This may slow down the application of just
00343 // one sweep of Jacobi.
00344 //
00345 int Ifpack_PointRelaxation::
00346 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00347 {
00348   if (!IsComputed())
00349     IFPACK_CHK_ERR(-3);
00350 
00351   if (X.NumVectors() != Y.NumVectors())
00352     IFPACK_CHK_ERR(-2);
00353 
00354   Time_->ResetStartTime();
00355 
00356   // AztecOO gives X and Y pointing to the same memory location,
00357   // need to create an auxiliary vector, Xcopy
00358   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00359   if (X.Pointers()[0] == Y.Pointers()[0])
00360     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00361   else
00362     Xcopy = Teuchos::rcp( &X, false );
00363     
00364   if (ZeroStartingSolution_)
00365     Y.PutScalar(0.0);
00366 
00367   // Flops are updated in each of the following. 
00368   switch (PrecType_) {
00369   case IFPACK_JACOBI:
00370     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00371     break;
00372   case IFPACK_GS:
00373     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00374     break;
00375   case IFPACK_SGS:
00376     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00377     break;
00378   default:
00379     IFPACK_CHK_ERR(-1); // something wrong
00380   }
00381 
00382   ++NumApplyInverse_;
00383   ApplyInverseTime_ += Time_->ElapsedTime();
00384   return(0);
00385 }
00386 
00387 //==============================================================================
00388 // This preconditioner can be much slower than AztecOO and ML versions
00389 // if the matrix-vector product requires all ExtractMyRowCopy() 
00390 // (as done through Ifpack_AdditiveSchwarz).
00391 int Ifpack_PointRelaxation::
00392 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
00393 {
00394 
00395   int NumVectors = LHS.NumVectors();
00396   Epetra_MultiVector A_times_LHS( LHS.Map(),NumVectors );
00397 
00398   for (int j = 0; j < NumSweeps_ ; j++) {
00399 
00400     IFPACK_CHK_ERR(Apply(LHS,A_times_LHS));
00401     IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
00402     for (int v = 0 ; v < NumVectors ; ++v)
00403       IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)), 
00404                                      *Diagonal_, 1.0));
00405 
00406   }
00407 
00408   // Flops:
00409   // - matrix vector              (2 * NumGlobalNonzeros_)
00410   // - update                     (2 * NumGlobalRows_)
00411   // - Multiply:
00412   //   - DampingFactor            (NumGlobalRows_)
00413   //   - Diagonal                 (NumGlobalRows_)
00414   //   - A + B                    (NumGlobalRows_)
00415   //   - 1.0                      (NumGlobalRows_)
00416   ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00417 
00418   return(0);
00419 }
00420 
00421 //==============================================================================
00422 int Ifpack_PointRelaxation::
00423 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00424 {
00425   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00426   // try to pick the best option; performances may be improved
00427   // if several sweeps are used.
00428   if (CrsMatrix != 0)
00429   {
00430     if (CrsMatrix->StorageOptimized())
00431       return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
00432     else
00433       return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
00434   }
00435   else
00436     return(ApplyInverseGS_RowMatrix(X, Y));
00437 } //ApplyInverseGS()
00438 
00439 //==============================================================================
00440 int Ifpack_PointRelaxation::
00441 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00442 {
00443   int NumVectors = X.NumVectors();
00444 
00445   int Length = Matrix().MaxNumEntries();
00446   vector<int> Indices(Length);
00447   vector<double> Values(Length);
00448 
00449   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00450   if (IsParallel_)
00451     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00452   else
00453     Y2 = Teuchos::rcp( &Y, false );
00454 
00455   // extract views (for nicer and faster code)
00456   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00457   X.ExtractView(&x_ptr);
00458   Y.ExtractView(&y_ptr);
00459   Y2->ExtractView(&y2_ptr);
00460   Diagonal_->ExtractView(&d_ptr);
00461 
00462   for (int j = 0; j < NumSweeps_ ; j++) {
00463 
00464     // data exchange is here, once per sweep
00465     if (IsParallel_)
00466       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00467 
00468     // FIXME: do I really need this code below?
00469     if (NumVectors == 1) {
00470 
00471       double* y0_ptr = y_ptr[0];
00472       double* y20_ptr = y2_ptr[0];
00473       double* x0_ptr = x_ptr[0];
00474 
00475       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00476 
00477         int NumEntries;
00478         int col;
00479         IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00480                                                  &Values[0], &Indices[0]));
00481 
00482         double dtemp = 0.0;
00483         for (int k = 0 ; k < NumEntries ; ++k) {
00484 
00485           col = Indices[k];
00486           dtemp += Values[k] * y20_ptr[col];
00487         }
00488 
00489         y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
00490       }
00491       // using Export() sounded quite expensive
00492       if (IsParallel_)
00493         for (int i = 0 ; i < NumMyRows_ ; ++i)
00494           y0_ptr[i] = y20_ptr[i];
00495 
00496     }
00497     else {
00498 
00499       for (int i = 0 ; i < NumMyRows_ ; ++i) {
00500 
00501         int NumEntries;
00502         int col;
00503         IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00504                                                  &Values[0], &Indices[0]));
00505 
00506         for (int m = 0 ; m < NumVectors ; ++m) {
00507 
00508           double dtemp = 0.0;
00509           for (int k = 0 ; k < NumEntries ; ++k) {
00510 
00511             col = Indices[k];
00512             dtemp += Values[k] * y2_ptr[m][col];
00513           }
00514 
00515           y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
00516         }
00517         // using Export() sounded quite expensive
00518       }
00519 
00520       if (IsParallel_)
00521         for (int m = 0 ; m < NumVectors ; ++m) 
00522           for (int i = 0 ; i < NumMyRows_ ; ++i)
00523             y_ptr[m][i] = y2_ptr[m][i];
00524 
00525     }
00526   }
00527 
00528   ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
00529 
00530   return(0);
00531 } //ApplyInverseGS_RowMatrix()
00532 
00533 //==============================================================================
00534 int Ifpack_PointRelaxation::
00535 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00536 {
00537   int NumVectors = X.NumVectors();
00538 
00539   int* Indices;
00540   double* Values;
00541 
00542   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00543   if (IsParallel_) {
00544     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00545   }
00546   else
00547     Y2 = Teuchos::rcp( &Y, false );
00548 
00549   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00550   X.ExtractView(&x_ptr);
00551   Y.ExtractView(&y_ptr);
00552   Y2->ExtractView(&y2_ptr);
00553   Diagonal_->ExtractView(&d_ptr);
00554   
00555   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00556     
00557     // only one data exchange per sweep
00558     if (IsParallel_)
00559       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00560 
00561     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00562 
00563       int NumEntries;
00564       int col;
00565       double diag = d_ptr[i];
00566 
00567       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00568 
00569       for (int m = 0 ; m < NumVectors ; ++m) {
00570 
00571         double dtemp = 0.0;
00572 
00573         for (int k = 0; k < NumEntries; ++k) {
00574 
00575           col = Indices[k];
00576           dtemp += Values[k] * y2_ptr[m][col];
00577         }
00578 
00579         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00580       }
00581     }
00582 
00583     if (IsParallel_)
00584       for (int m = 0 ; m < NumVectors ; ++m) 
00585         for (int i = 0 ; i < NumMyRows_ ; ++i)
00586           y_ptr[m][i] = y2_ptr[m][i];
00587   }
00588 
00589   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00590   return(0);
00591 } //ApplyInverseGS_CrsMatrix()
00592 
00593 //
00594 //==============================================================================
00595 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
00596 
00597 int Ifpack_PointRelaxation::
00598 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00599 {
00600   int* IndexOffset;
00601   int* Indices;
00602   double* Values;
00603   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00604 
00605   int NumVectors = X.NumVectors();
00606 
00607   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00608   if (IsParallel_) {
00609     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00610   }
00611   else
00612     Y2 = Teuchos::rcp( &Y, false );
00613 
00614   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00615   X.ExtractView(&x_ptr);
00616   Y.ExtractView(&y_ptr);
00617   Y2->ExtractView(&y2_ptr);
00618   Diagonal_->ExtractView(&d_ptr);
00619   
00620   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00621     
00622     // only one data exchange per sweep
00623     if (IsParallel_)
00624       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00625 
00626     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00627 
00628       int col;
00629       double diag = d_ptr[i];
00630 
00631       for (int m = 0 ; m < NumVectors ; ++m) {
00632 
00633         double dtemp = 0.0;
00634 
00635         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00636 
00637           col = Indices[k];
00638           dtemp += Values[k] * y2_ptr[m][col];
00639         }
00640 
00641         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00642       }
00643     }
00644 
00645     if (IsParallel_)
00646       for (int m = 0 ; m < NumVectors ; ++m) 
00647         for (int i = 0 ; i < NumMyRows_ ; ++i)
00648           y_ptr[m][i] = y2_ptr[m][i];
00649   }
00650 
00651   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00652   return(0);
00653 } //ApplyInverseGS_FastCrsMatrix()
00654 
00655 //==============================================================================
00656 int Ifpack_PointRelaxation::
00657 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00658 {
00659   const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
00660   // try to pick the best option; performances may be improved
00661   // if several sweeps are used.
00662   if (CrsMatrix != 0)
00663   {
00664     if (CrsMatrix->StorageOptimized())
00665       return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
00666     else
00667       return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
00668   }
00669   else
00670     return(ApplyInverseSGS_RowMatrix(X, Y));
00671 }
00672 
00673 //==============================================================================
00674 int Ifpack_PointRelaxation::
00675 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00676 {
00677   int NumVectors = X.NumVectors();
00678   int Length = Matrix().MaxNumEntries();
00679   vector<int> Indices(Length);
00680   vector<double> Values(Length);
00681 
00682   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00683   if (IsParallel_) {
00684     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00685   }
00686   else
00687     Y2 = Teuchos::rcp( &Y, false );
00688 
00689   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00690   X.ExtractView(&x_ptr);
00691   Y.ExtractView(&y_ptr);
00692   Y2->ExtractView(&y2_ptr);
00693   Diagonal_->ExtractView(&d_ptr);
00694   
00695   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00696     
00697     // only one data exchange per sweep
00698     if (IsParallel_)
00699       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00700 
00701     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00702 
00703       int NumEntries;
00704       int col;
00705       double diag = d_ptr[i];
00706 
00707       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00708                                                &Values[0], &Indices[0]));
00709 
00710       for (int m = 0 ; m < NumVectors ; ++m) {
00711 
00712         double dtemp = 0.0;
00713 
00714         for (int k = 0 ; k < NumEntries ; ++k) {
00715 
00716           col = Indices[k];
00717           dtemp += Values[k] * y2_ptr[m][col];
00718         }
00719 
00720         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00721       }
00722     }
00723 
00724     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00725 
00726       int NumEntries;
00727       int col;
00728       double diag = d_ptr[i];
00729 
00730       IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
00731                                                &Values[0], &Indices[0]));
00732 
00733       for (int m = 0 ; m < NumVectors ; ++m) {
00734 
00735         double dtemp = 0.0;
00736         for (int k = 0 ; k < NumEntries ; ++k) {
00737 
00738           col = Indices[k];
00739           dtemp += Values[k] * y2_ptr[m][col];
00740         }
00741 
00742         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00743 
00744       }
00745     }
00746 
00747     if (IsParallel_)
00748       for (int m = 0 ; m < NumVectors ; ++m) 
00749         for (int i = 0 ; i < NumMyRows_ ; ++i)
00750           y_ptr[m][i] = y2_ptr[m][i];
00751   }
00752 
00753   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00754   return(0);
00755 }
00756 
00757 //==============================================================================
00758 int Ifpack_PointRelaxation::
00759 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00760 {
00761   int NumVectors = X.NumVectors();
00762 
00763   int* Indices;
00764   double* Values;
00765 
00766   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00767   if (IsParallel_) {
00768     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00769   }
00770   else
00771     Y2 = Teuchos::rcp( &Y, false );
00772 
00773   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00774   X.ExtractView(&x_ptr);
00775   Y.ExtractView(&y_ptr);
00776   Y2->ExtractView(&y2_ptr);
00777   Diagonal_->ExtractView(&d_ptr);
00778   
00779   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00780     
00781     // only one data exchange per sweep
00782     if (IsParallel_)
00783       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00784 
00785     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00786 
00787       int NumEntries;
00788       int col;
00789       double diag = d_ptr[i];
00790 
00791       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00792 
00793       for (int m = 0 ; m < NumVectors ; ++m) {
00794 
00795         double dtemp = 0.0;
00796 
00797         for (int k = 0; k < NumEntries; ++k) {
00798 
00799           col = Indices[k];
00800           dtemp += Values[k] * y2_ptr[m][col];
00801         }
00802 
00803         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00804       }
00805     }
00806 
00807     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00808 
00809       int NumEntries;
00810       int col;
00811       double diag = d_ptr[i];
00812 
00813       IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00814 
00815       for (int m = 0 ; m < NumVectors ; ++m) {
00816 
00817         double dtemp = 0.0;
00818         for (int k = 0; k < NumEntries; ++k) {
00819 
00820           col = Indices[k];
00821           dtemp += Values[k] * y2_ptr[m][col];
00822         }
00823 
00824         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00825 
00826       }
00827     }
00828 
00829     if (IsParallel_)
00830       for (int m = 0 ; m < NumVectors ; ++m) 
00831         for (int i = 0 ; i < NumMyRows_ ; ++i)
00832           y_ptr[m][i] = y2_ptr[m][i];
00833   }
00834 
00835   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00836   return(0);
00837 }
00838 
00839 //==============================================================================
00840 // this requires Epetra_CrsMatrix + OptimizeStorage()
00841 int Ifpack_PointRelaxation::
00842 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00843 {
00844   int* IndexOffset;
00845   int* Indices;
00846   double* Values;
00847   IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
00848 
00849   int NumVectors = X.NumVectors();
00850 
00851   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00852   if (IsParallel_) {
00853     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00854   }
00855   else
00856     Y2 = Teuchos::rcp( &Y, false );
00857 
00858   double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
00859   X.ExtractView(&x_ptr);
00860   Y.ExtractView(&y_ptr);
00861   Y2->ExtractView(&y2_ptr);
00862   Diagonal_->ExtractView(&d_ptr);
00863   
00864   for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
00865     
00866     // only one data exchange per sweep
00867     if (IsParallel_)
00868       IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00869 
00870     for (int i = 0 ; i < NumMyRows_ ; ++i) {
00871 
00872       int col;
00873       double diag = d_ptr[i];
00874 
00875       // no need to extract row i
00876       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00877 
00878       for (int m = 0 ; m < NumVectors ; ++m) {
00879 
00880         double dtemp = 0.0;
00881 
00882         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00883 
00884           col = Indices[k];
00885           dtemp += Values[k] * y2_ptr[m][col];
00886         }
00887 
00888         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00889       }
00890     }
00891 
00892     for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
00893 
00894       int col;
00895       double diag = d_ptr[i];
00896 
00897       // no need to extract row i
00898       //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
00899 
00900       for (int m = 0 ; m < NumVectors ; ++m) {
00901 
00902         double dtemp = 0.0;
00903         for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
00904 
00905           col = Indices[k];
00906           dtemp += Values[k] * y2_ptr[m][col];
00907         }
00908 
00909         y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
00910 
00911       }
00912     }
00913 
00914     if (IsParallel_)
00915       for (int m = 0 ; m < NumVectors ; ++m) 
00916         for (int i = 0 ; i < NumMyRows_ ; ++i)
00917           y_ptr[m][i] = y2_ptr[m][i];
00918   }
00919 
00920   ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
00921   return(0);
00922 }

Generated on Tue Oct 20 12:48:54 2009 for IFPACK by doxygen 1.4.7