IFPACK Development
Ifpack_BlockRelaxation.h
00001 /*
00002 //@HEADER
00003 // ***********************************************************************
00004 //
00005 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00006 //                 Copyright (2002) Sandia Corporation
00007 //
00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00009 // license for use of this work by or on behalf of the U.S. Government.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ***********************************************************************
00041 //@HEADER
00042 */
00043 
00044 #ifndef IFPACK_BLOCKPRECONDITIONER_H
00045 #define IFPACK_BLOCKPRECONDITIONER_H
00046 
00047 #include "Ifpack_ConfigDefs.h"
00048 #include "Ifpack_Preconditioner.h" 
00049 #include "Ifpack_Partitioner.h"
00050 #include "Ifpack_LinearPartitioner.h"
00051 #include "Ifpack_GreedyPartitioner.h"
00052 #include "Ifpack_METISPartitioner.h"
00053 #include "Ifpack_EquationPartitioner.h"
00054 #include "Ifpack_UserPartitioner.h"
00055 #include "Ifpack_Graph_Epetra_RowMatrix.h"
00056 #include "Ifpack_DenseContainer.h" 
00057 #include "Ifpack_Utils.h" 
00058 #include "Teuchos_ParameterList.hpp"
00059 #include "Teuchos_RefCountPtr.hpp"
00060 #include "Epetra_RowMatrix.h"
00061 #include "Epetra_MultiVector.h"
00062 #include "Epetra_Vector.h"
00063 #include "Epetra_Time.h"
00064 #include "Epetra_Import.h"
00065 
00066 static const int IFPACK_JACOBI = 0;
00067 static const int IFPACK_GS = 1;
00068 static const int IFPACK_SGS = 2;
00069 
00070 
00072 
00135 template<typename T>
00136 class Ifpack_BlockRelaxation : public Ifpack_Preconditioner {
00137 
00138 public:
00139 
00141 
00142 
00147   Ifpack_BlockRelaxation(const Epetra_RowMatrix* Matrix);
00148 
00149   virtual ~Ifpack_BlockRelaxation();
00150 
00152 
00154 
00156 
00164   virtual int Apply(const Epetra_MultiVector& X, 
00165             Epetra_MultiVector& Y) const;
00166 
00168 
00177   virtual int ApplyInverse(const Epetra_MultiVector& X, 
00178                Epetra_MultiVector& Y) const;
00179 
00181   virtual double NormInf() const
00182   {
00183     return(-1.0);
00184   }
00186 
00188 
00189   virtual int SetUseTranspose(bool UseTranspose_in)
00190   {
00191     if (UseTranspose_in)
00192       IFPACK_CHK_ERR(-98); // FIXME: can I work with the transpose?
00193     return(0);
00194   }
00195 
00196   virtual const char* Label() const;
00197  
00199   virtual bool UseTranspose() const
00200   {
00201     return(false);
00202   }
00203 
00205   virtual bool HasNormInf() const
00206   {
00207     return(false);
00208   }
00209 
00211   virtual const Epetra_Comm & Comm() const;
00212 
00214   virtual const Epetra_Map & OperatorDomainMap() const;
00215 
00217   virtual const Epetra_Map & OperatorRangeMap() const;
00219 
00221   int NumLocalBlocks() const 
00222   {
00223     return(NumLocalBlocks_);
00224   }
00225 
00227   virtual bool IsInitialized() const
00228   {
00229     return(IsInitialized_);
00230   }
00231 
00233   virtual bool IsComputed() const
00234   {
00235     return(IsComputed_);
00236   }
00237 
00239   virtual int SetParameters(Teuchos::ParameterList& List);
00240 
00242   virtual int Initialize();
00243 
00245   virtual int Compute();
00246 
00247   virtual const Epetra_RowMatrix& Matrix() const
00248   {
00249     return(*Matrix_);
00250   }
00251 
00252   virtual double Condest(const Ifpack_CondestType CT = Ifpack_Cheap,
00253                          const int MaxIters = 1550,
00254                          const double Tol = 1e-9,
00255              Epetra_RowMatrix* Matrix_in = 0)
00256   {
00257     return(-1.0);
00258   }
00259 
00260   virtual double Condest() const
00261   {
00262     return(-1.0);
00263   }
00264 
00265   std::ostream& Print(std::ostream& os) const;
00266 
00268   virtual int NumInitialize() const
00269   {
00270     return(NumInitialize_);
00271   }
00272 
00274   virtual int NumCompute() const
00275   {
00276     return(NumCompute_);
00277   }
00278 
00280   virtual int NumApplyInverse() const
00281   {
00282     return(NumApplyInverse_);
00283   }
00284 
00286   virtual double InitializeTime() const
00287   {
00288     return(InitializeTime_);
00289   }
00290 
00292   virtual double ComputeTime() const
00293   {
00294     return(ComputeTime_);
00295   }
00296 
00298   virtual double ApplyInverseTime() const
00299   {
00300     return(ApplyInverseTime_);
00301   }
00302 
00304   virtual double InitializeFlops() const
00305   {
00306 #ifdef IFPACK_FLOPCOUNTERS
00307     if (Containers_.size() == 0)
00308       return(0.0);
00309 
00310     // the total number of flops is computed each time InitializeFlops() is
00311     // called. This is becase I also have to add the contribution from each
00312     // container.
00313     double total = InitializeFlops_;
00314     for (unsigned int i = 0 ; i < Containers_.size() ; ++i)
00315       total += Containers_[i]->InitializeFlops();
00316     return(total);
00317 #else
00318     return(0.0);
00319 #endif
00320   }
00321 
00322   virtual double ComputeFlops() const
00323   {
00324 #ifdef IFPACK_FLOPCOUNTERS
00325     if (Containers_.size() == 0)
00326       return(0.0);
00327     
00328     double total = ComputeFlops_;
00329     for (unsigned int i = 0 ; i < Containers_.size() ; ++i)
00330       total += Containers_[i]->ComputeFlops();
00331     return(total);
00332 #else
00333     return(0.0);
00334 #endif
00335   }
00336 
00337   virtual double ApplyInverseFlops() const
00338   {
00339 #ifdef IFPACK_FLOPCOUNTERS
00340     if (Containers_.size() == 0)
00341       return(0.0);
00342 
00343     double total = ApplyInverseFlops_;
00344     for (unsigned int i = 0 ; i < Containers_.size() ; ++i) {
00345       total += Containers_[i]->ApplyInverseFlops();
00346     }
00347     return(total);
00348 #else
00349     return(0.0);
00350 #endif
00351   }
00352 
00353 private:
00354 
00356   Ifpack_BlockRelaxation(const Ifpack_BlockRelaxation& rhs);
00357 
00359   Ifpack_BlockRelaxation & operator=(const Ifpack_BlockRelaxation& rhs)
00360   {
00361     return(*this);
00362   }
00363 
00364   virtual int ApplyInverseJacobi(const Epetra_MultiVector& X, 
00365                                  Epetra_MultiVector& Y) const;
00366 
00367   virtual int DoJacobi(const Epetra_MultiVector& X, 
00368                                   Epetra_MultiVector& Y) const;
00369 
00370   virtual int ApplyInverseGS(const Epetra_MultiVector& X, 
00371                              Epetra_MultiVector& Y) const;
00372 
00373   virtual int DoGaussSeidel(Epetra_MultiVector& X, 
00374                             Epetra_MultiVector& Y) const;
00375 
00376   virtual int ApplyInverseSGS(const Epetra_MultiVector& X, 
00377                               Epetra_MultiVector& Y) const;
00378 
00379   virtual int DoSGS(const Epetra_MultiVector& X,
00380                     Epetra_MultiVector& Xtmp,
00381                     Epetra_MultiVector& Y) const;
00382 
00383   int ExtractSubmatrices();
00384 
00385   // @{ Initializations, timing and flops
00386 
00388   bool IsInitialized_;
00390   bool IsComputed_;
00392   int NumInitialize_;
00394   int NumCompute_;
00396   mutable int NumApplyInverse_;
00398   double InitializeTime_;
00400   double ComputeTime_;
00402   mutable double ApplyInverseTime_;
00404   double InitializeFlops_;
00406   double ComputeFlops_;
00408   mutable double ApplyInverseFlops_;
00409   // @}
00410 
00411   // @{ Settings
00413   int NumSweeps_;
00415   double DampingFactor_;
00417   int NumLocalBlocks_;
00419   Teuchos::ParameterList List_;
00420   // @}
00421 
00422   // @{ Other data
00425   Teuchos::RefCountPtr< const Epetra_RowMatrix > Matrix_;
00426   mutable std::vector<Teuchos::RefCountPtr<T> > Containers_;
00428   Teuchos::RefCountPtr<Ifpack_Partitioner> Partitioner_;
00429   string PartitionerType_;
00430   int PrecType_;
00432   string Label_;
00434   bool ZeroStartingSolution_;
00435   Teuchos::RefCountPtr<Ifpack_Graph> Graph_;
00437   Teuchos::RefCountPtr<Epetra_Vector> W_;
00438   // Level of overlap among blocks (for Jacobi only).
00439   int OverlapLevel_;
00440   mutable Epetra_Time Time_;
00441   bool IsParallel_;
00442   Teuchos::RefCountPtr<Epetra_Import> Importer_;
00443   // @}
00444   
00445 }; // class Ifpack_BlockRelaxation
00446 
00447 //==============================================================================
00448 template<typename T>
00449 Ifpack_BlockRelaxation<T>::
00450 Ifpack_BlockRelaxation(const Epetra_RowMatrix* Matrix_in) :
00451   IsInitialized_(false),
00452   IsComputed_(false),
00453   NumInitialize_(0),
00454   NumCompute_(0),
00455   NumApplyInverse_(0),
00456   InitializeTime_(0.0),
00457   ComputeTime_(0.0),
00458   ApplyInverseTime_(0.0),
00459   InitializeFlops_(0.0),
00460   ComputeFlops_(0.0),
00461   ApplyInverseFlops_(0.0),
00462   NumSweeps_(1),
00463   DampingFactor_(1.0),
00464   NumLocalBlocks_(1),
00465   Matrix_(Teuchos::rcp(Matrix_in,false)),
00466   PartitionerType_("greedy"),
00467   PrecType_(IFPACK_JACOBI),
00468   ZeroStartingSolution_(true),
00469   OverlapLevel_(0),
00470   Time_(Comm()),
00471   IsParallel_(false)
00472 {
00473   if (Matrix_in->Comm().NumProc() != 1)
00474     IsParallel_ = true;
00475 }
00476 
00477 //==============================================================================
00478 template<typename T>
00479 Ifpack_BlockRelaxation<T>::~Ifpack_BlockRelaxation()
00480 {
00481 }
00482 
00483 //==============================================================================
00484 template<typename T>
00485 const char* Ifpack_BlockRelaxation<T>::Label() const
00486 {
00487   return(Label_.c_str());
00488 }
00489 
00490 //==============================================================================
00491 template<typename T>
00492 int Ifpack_BlockRelaxation<T>::
00493 Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00494 {
00495   IFPACK_RETURN(Matrix().Apply(X,Y));
00496 }
00497 
00498 //==============================================================================
00499 template<typename T>
00500 const Epetra_Comm& Ifpack_BlockRelaxation<T>::
00501 Comm() const
00502 {
00503   return(Matrix().Comm());
00504 }
00505 
00506 //==============================================================================
00507 template<typename T>
00508 const Epetra_Map& Ifpack_BlockRelaxation<T>::
00509 OperatorDomainMap() const
00510 {
00511   return(Matrix().OperatorDomainMap());
00512 }
00513 
00514 //==============================================================================
00515 template<typename T>
00516 const Epetra_Map& Ifpack_BlockRelaxation<T>::
00517 OperatorRangeMap() const
00518 {
00519   return(Matrix().OperatorRangeMap());
00520 }
00521 
00522 //==============================================================================
00523 template<typename T>
00524 int Ifpack_BlockRelaxation<T>::ExtractSubmatrices()
00525 {
00526 
00527   if (Partitioner_ == Teuchos::null)
00528     IFPACK_CHK_ERR(-3);
00529 
00530   NumLocalBlocks_ = Partitioner_->NumLocalParts();
00531 
00532   Containers_.resize(NumLocalBlocks());
00533 
00534   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00535 
00536     int rows = Partitioner_->NumRowsInPart(i);
00537     Containers_[i] = Teuchos::rcp( new T(rows) );
00538     
00539     //Ifpack_DenseContainer* DC = 0;
00540     //DC = dynamic_cast<Ifpack_DenseContainer*>(Containers_[i]);
00541 
00542     if (Containers_[i] == Teuchos::null)
00543       IFPACK_CHK_ERR(-5);
00544     
00545     IFPACK_CHK_ERR(Containers_[i]->SetParameters(List_));
00546     IFPACK_CHK_ERR(Containers_[i]->Initialize());
00547     // flops in Initialize() will be computed on-the-fly in method InitializeFlops().
00548 
00549     // set "global" ID of each partitioner row
00550     for (int j = 0 ; j < rows ; ++j) {
00551       int LRID = (*Partitioner_)(i,j);
00552       Containers_[i]->ID(j) = LRID;
00553     }
00554 
00555     IFPACK_CHK_ERR(Containers_[i]->Compute(*Matrix_));
00556     // flops in Compute() will be computed on-the-fly in method ComputeFlops().
00557 
00558   }
00559 
00560   return(0);
00561 }
00562 
00563 //==============================================================================
00564 template<typename T>
00565 int Ifpack_BlockRelaxation<T>::Compute()
00566 {
00567 
00568   if (!IsInitialized())
00569     IFPACK_CHK_ERR(Initialize());
00570 
00571   Time_.ResetStartTime();
00572 
00573   IsComputed_ = false;
00574 
00575   if (Matrix().NumGlobalRows64() != Matrix().NumGlobalCols64())
00576     IFPACK_CHK_ERR(-2); // only square matrices
00577 
00578   IFPACK_CHK_ERR(ExtractSubmatrices());
00579   
00580   if (IsParallel_ && PrecType_ != IFPACK_JACOBI) {
00581     // not needed by Jacobi (done by matvec)
00582     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00583                                                 Matrix().RowMatrixRowMap()) );
00584 
00585     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00586   }
00587   IsComputed_ = true;
00588   ComputeTime_ += Time_.ElapsedTime();
00589   ++NumCompute_;
00590 
00591   return(0);
00592 
00593 }
00594 
00595 //==============================================================================
00596 template<typename T>
00597 int Ifpack_BlockRelaxation<T>::
00598 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00599 {
00600   if (!IsComputed())
00601     IFPACK_CHK_ERR(-3);
00602 
00603   if (X.NumVectors() != Y.NumVectors())
00604     IFPACK_CHK_ERR(-2);
00605 
00606   Time_.ResetStartTime();
00607 
00608   // AztecOO gives X and Y pointing to the same memory location,
00609   // need to create an auxiliary vector, Xcopy
00610   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00611   if (X.Pointers()[0] == Y.Pointers()[0])
00612     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00613   else
00614     Xcopy = Teuchos::rcp( &X, false );
00615 
00616   switch (PrecType_) {
00617   case IFPACK_JACOBI:
00618     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00619     break;
00620   case IFPACK_GS:
00621     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00622     break;
00623   case IFPACK_SGS:
00624     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00625     break;
00626   }
00627 
00628   ApplyInverseTime_ += Time_.ElapsedTime();
00629   ++NumApplyInverse_;
00630 
00631   return(0);
00632 }
00633 
00634 //==============================================================================
00635 // This method in general will not work with AztecOO if used
00636 // outside Ifpack_AdditiveSchwarz and OverlapLevel_ != 0
00637 //
00638 template<typename T>
00639 int Ifpack_BlockRelaxation<T>::
00640 ApplyInverseJacobi(const Epetra_MultiVector& X, 
00641                    Epetra_MultiVector& Y) const
00642 {
00643 
00644   if (ZeroStartingSolution_)
00645     Y.PutScalar(0.0);
00646 
00647   // do not compute the residual in this case
00648   if (NumSweeps_ == 1 && ZeroStartingSolution_) {
00649     IFPACK_RETURN(DoJacobi(X,Y));
00650   }
00651 
00652   Epetra_MultiVector AX(Y);
00653 
00654   for (int j = 0; j < NumSweeps_ ; j++) {
00655     IFPACK_CHK_ERR(Apply(Y,AX));
00656     ApplyInverseFlops_ += X.NumVectors() * 2 * Matrix_->NumGlobalNonzeros64();
00657     IFPACK_CHK_ERR(AX.Update(1.0,X,-1.0));
00658     ApplyInverseFlops_ += X.NumVectors() * 2 * Matrix_->NumGlobalRows64();
00659     IFPACK_CHK_ERR(DoJacobi(AX,Y));
00660     // flops counted in DoJacobi()
00661   }
00662 
00663 
00664   return(0);
00665 }
00666 
00667 //==============================================================================
00668 template<typename T>
00669 int Ifpack_BlockRelaxation<T>::
00670 DoJacobi(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00671 {
00672   int NumVectors = X.NumVectors();
00673 
00674   if (OverlapLevel_ == 0) {
00675 
00676     for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00677      
00678       // may happen that a partition is empty
00679       if (Containers_[i]->NumRows() == 0) 
00680         continue;
00681 
00682       int LID;
00683 
00684       // extract RHS from X
00685       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00686         LID = Containers_[i]->ID(j);
00687         for (int k = 0 ; k < NumVectors ; ++k) {
00688           Containers_[i]->RHS(j,k) = X[k][LID];
00689         }
00690       }
00691 
00692       // apply the inverse of each block. NOTE: flops occurred
00693       // in ApplyInverse() of each block are summed up in method
00694       // ApplyInverseFlops().
00695       IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00696 
00697       // copy back into solution vector Y
00698       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00699         LID = Containers_[i]->ID(j);
00700         for (int k = 0 ; k < NumVectors ; ++k) {
00701           Y[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
00702         }
00703       }
00704 
00705     }
00706     // NOTE: flops for ApplyInverse() of each block are summed up
00707     // in method ApplyInverseFlops()
00708 #ifdef IFPACK_FLOPCOUNTERS
00709     ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
00710 #endif
00711 
00712   }
00713   else {
00714 
00715     for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00716 
00717       // may happen that a partition is empty
00718       if (Containers_[i]->NumRows() == 0) 
00719         continue;
00720 
00721       int LID;
00722 
00723       // extract RHS from X
00724       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00725         LID = Containers_[i]->ID(j);
00726         for (int k = 0 ; k < NumVectors ; ++k) {
00727           Containers_[i]->RHS(j,k) = (*W_)[LID] * X[k][LID];
00728         }
00729       }
00730 
00731       // apply the inverse of each block
00732       IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00733 
00734       // copy back into solution vector Y
00735       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00736         LID = Containers_[i]->ID(j);
00737         for (int k = 0 ; k < NumVectors ; ++k) {
00738           Y[k][LID] += DampingFactor_ * (*W_)[LID] * Containers_[i]->LHS(j,k);
00739         }
00740       }
00741 
00742     }
00743     // NOTE: flops for ApplyInverse() of each block are summed up
00744     // in method ApplyInverseFlops()
00745     // NOTE: do not count for simplicity the flops due to overlapping rows
00746 #ifdef IFPACK_FLOPCOUNTERS
00747     ApplyInverseFlops_ += NumVectors * 4 * Matrix_->NumGlobalRows();
00748 #endif
00749   }
00750 
00751   return(0);
00752 }
00753 
00754 //==============================================================================
00755 template<typename T>
00756 int Ifpack_BlockRelaxation<T>::
00757 ApplyInverseGS(const Epetra_MultiVector& X, 
00758                Epetra_MultiVector& Y) const
00759 {
00760 
00761   if (ZeroStartingSolution_)
00762     Y.PutScalar(0.0);
00763 
00764   Epetra_MultiVector Xcopy(X);
00765   for (int j = 0; j < NumSweeps_ ; j++) {
00766     IFPACK_CHK_ERR(DoGaussSeidel(Xcopy,Y));
00767     if (j != NumSweeps_ - 1)
00768       Xcopy = X;
00769   }
00770 
00771   return(0);
00772 
00773 }
00774 
00775 //==============================================================================
00776 template<typename T>
00777 int Ifpack_BlockRelaxation<T>::
00778 DoGaussSeidel(Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00779 {
00780 
00781   // cycle over all local subdomains
00782 
00783   int Length = Matrix().MaxNumEntries();
00784   std::vector<int> Indices(Length);
00785   std::vector<double> Values(Length);
00786 
00787   int NumMyRows = Matrix().NumMyRows();
00788   int NumVectors = X.NumVectors();
00789 
00790   // an additonal vector is needed by parallel computations
00791   // (note that applications through Ifpack_AdditiveSchwarz
00792   // are always seen are serial)
00793   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00794   if (IsParallel_)
00795     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00796   else
00797     Y2 = Teuchos::rcp( &Y, false );
00798 
00799   double** y_ptr;
00800   double** y2_ptr;
00801   Y.ExtractView(&y_ptr);
00802   Y2->ExtractView(&y2_ptr);
00803 
00804   // data exchange is here, once per sweep
00805   if (IsParallel_)
00806     IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00807 
00808   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00809 
00810     // may happen that a partition is empty
00811     if (Containers_[i]->NumRows() == 0) 
00812       continue;
00813 
00814     int LID;
00815 
00816     // update from previous block
00817 
00818     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00819       LID = Containers_[i]->ID(j);
00820 
00821       int NumEntries;
00822       IFPACK_CHK_ERR(Matrix().ExtractMyRowCopy(LID, Length,NumEntries,
00823                                                &Values[0], &Indices[0]));
00824 
00825       for (int k = 0 ; k < NumEntries ; ++k) {
00826         int col = Indices[k];
00827 
00828           for (int kk = 0 ; kk < NumVectors ; ++kk) {
00829             X[kk][LID] -= Values[k] * y2_ptr[kk][col];
00830           }
00831       }
00832     }
00833 
00834     // solve with this block
00835 
00836     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00837       LID = Containers_[i]->ID(j);
00838       for (int k = 0 ; k < NumVectors ; ++k) {
00839         Containers_[i]->RHS(j,k) = X[k][LID];
00840       }
00841     }
00842 
00843     IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00844 #ifdef IFPACK_FLOPCOUNTERS
00845     ApplyInverseFlops_ += Containers_[i]->ApplyInverseFlops();
00846 #endif
00847 
00848     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00849       LID = Containers_[i]->ID(j);
00850       for (int k = 0 ; k < NumVectors ; ++k) {
00851         y2_ptr[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
00852       }
00853     }
00854 
00855   }
00856 
00857   // operations for all getrow()'s
00858   // NOTE: flops for ApplyInverse() of each block are summed up
00859   // in method ApplyInverseFlops()
00860 #ifdef IFPACK_FLOPCOUNTERS
00861   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalNonzeros();
00862   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
00863 #endif
00864 
00865   // Attention: this is delicate... Not all combinations
00866   // of Y2 and Y will always work (tough for ML it should be ok)
00867   if (IsParallel_)
00868     for (int m = 0 ; m < NumVectors ; ++m) 
00869       for (int i = 0 ; i < NumMyRows ; ++i)
00870         y_ptr[m][i] = y2_ptr[m][i];
00871 
00872   return(0);
00873 }
00874 
00875 //==============================================================================
00876 template<typename T>
00877 int Ifpack_BlockRelaxation<T>::
00878 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00879 {
00880 
00881   if (ZeroStartingSolution_)
00882     Y.PutScalar(0.0);
00883 
00884   Epetra_MultiVector Xcopy(X);
00885   for (int j = 0; j < NumSweeps_ ; j++) {
00886     IFPACK_CHK_ERR(DoSGS(X,Xcopy,Y));
00887     if (j != NumSweeps_ - 1)
00888       Xcopy = X;
00889   }
00890   return(0);
00891 }
00892 
00893 //==============================================================================
00894 template<typename T>
00895 int Ifpack_BlockRelaxation<T>::
00896 DoSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Xcopy, 
00897       Epetra_MultiVector& Y) const
00898 {
00899 
00900   int NumMyRows = Matrix().NumMyRows();
00901   int NumVectors = X.NumVectors();
00902 
00903   int Length = Matrix().MaxNumEntries();
00904   std::vector<int> Indices;
00905   std::vector<double> Values;
00906   Indices.resize(Length);
00907   Values.resize(Length);
00908 
00909   // an additonal vector is needed by parallel computations
00910   // (note that applications through Ifpack_AdditiveSchwarz
00911   // are always seen are serial)
00912   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00913   if (IsParallel_)
00914     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00915   else
00916     Y2 = Teuchos::rcp( &Y, false );
00917 
00918   double** y_ptr;
00919   double** y2_ptr;
00920   Y.ExtractView(&y_ptr);
00921   Y2->ExtractView(&y2_ptr);
00922 
00923   // data exchange is here, once per sweep
00924   if (IsParallel_)
00925     IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00926 
00927   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00928 
00929     // may happen that a partition is empty
00930     if (Containers_[i]->NumRows() == 0) 
00931       continue;
00932 
00933     int LID;
00934 
00935     // update from previous block
00936 
00937     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00938       LID = Containers_[i]->ID(j);
00939 
00940       int NumEntries;
00941       IFPACK_CHK_ERR(Matrix().ExtractMyRowCopy(LID, Length,NumEntries,
00942                                                &Values[0], &Indices[0]));
00943 
00944       for (int k = 0 ; k < NumEntries ; ++k) {
00945         int col = Indices[k];
00946 
00947         for (int kk = 0 ; kk < NumVectors ; ++kk) {
00948           Xcopy[kk][LID] -= Values[k] * y2_ptr[kk][col];
00949         }
00950       }
00951     }
00952 
00953     // solve with this block
00954 
00955     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00956       LID = Containers_[i]->ID(j);
00957       for (int k = 0 ; k < NumVectors ; ++k) {
00958         Containers_[i]->RHS(j,k) = Xcopy[k][LID];
00959       }
00960     }
00961 
00962     IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00963 #ifdef IFPACK_FLOPCOUNTERS
00964     ApplyInverseFlops_ += Containers_[i]->ApplyInverseFlops();
00965 #endif
00966 
00967     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00968       LID = Containers_[i]->ID(j);
00969       for (int k = 0 ; k < NumVectors ; ++k) {
00970         y2_ptr[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
00971       }
00972     }
00973   }
00974 
00975 #ifdef IFPACK_FLOPCOUNTERS
00976   // operations for all getrow()'s
00977   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalNonzeros();
00978   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
00979 #endif
00980 
00981   Xcopy = X;
00982 
00983   for (int i = NumLocalBlocks() - 1; i >=0 ; --i) {
00984 
00985     if (Containers_[i]->NumRows() == 0) 
00986       continue;
00987 
00988     int LID;
00989 
00990     // update from previous block
00991 
00992     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00993       LID = Containers_[i]->ID(j);
00994 
00995       int NumEntries;
00996       IFPACK_CHK_ERR(Matrix().ExtractMyRowCopy(LID, Length,NumEntries,
00997                                                &Values[0], &Indices[0]));
00998 
00999       for (int k = 0 ; k < NumEntries ; ++k) {
01000         int col = Indices[k];
01001 
01002           for (int kk = 0 ; kk < NumVectors ; ++kk) {
01003             Xcopy[kk][LID] -= Values[k] * y2_ptr[kk][col];
01004           }
01005       }
01006     }
01007 
01008     // solve with this block
01009 
01010     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
01011       LID = Containers_[i]->ID(j);
01012       for (int k = 0 ; k < NumVectors ; ++k) {
01013         Containers_[i]->RHS(j,k) = Xcopy[k][LID];
01014       }
01015     }
01016 
01017     IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
01018 #ifdef IFPACK_FLOPCOUNTERS
01019     ApplyInverseFlops_ += Containers_[i]->ApplyInverseFlops();
01020 #endif
01021 
01022     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
01023       LID = Containers_[i]->ID(j);
01024       for (int k = 0 ; k < NumVectors ; ++k) {
01025         y2_ptr[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
01026       }
01027     }
01028   }
01029 
01030 #ifdef IFPACK_FLOPCOUNTERS
01031   // operations for all getrow()'s
01032   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalNonzeros();
01033   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
01034 #endif
01035 
01036   // Attention: this is delicate... Not all combinations
01037   // of Y2 and Y will always work (tough for ML it should be ok)
01038   if (IsParallel_)
01039     for (int m = 0 ; m < NumVectors ; ++m) 
01040       for (int i = 0 ; i < NumMyRows ; ++i)
01041         y_ptr[m][i] = y2_ptr[m][i];
01042 
01043   return(0);
01044 }
01045 
01046 //==============================================================================
01047 template<typename T>
01048 ostream& Ifpack_BlockRelaxation<T>::Print(ostream & os) const
01049 {
01050 
01051   string PT;
01052   if (PrecType_ == IFPACK_JACOBI)
01053     PT = "Jacobi";
01054   else if (PrecType_ == IFPACK_GS)
01055     PT = "Gauss-Seidel";
01056   else if (PrecType_ == IFPACK_SGS)
01057     PT = "symmetric Gauss-Seidel";
01058 
01059   if (!Comm().MyPID()) {
01060     os << endl;
01061     os << "================================================================================" << endl;
01062     os << "Ifpack_BlockRelaxation, " << PT << endl;
01063     os << "Sweeps = " << NumSweeps_ << endl;
01064     os << "Damping factor = " << DampingFactor_;
01065     if (ZeroStartingSolution_) 
01066       os << ", using zero starting solution" << endl;
01067     else
01068       os << ", using input starting solution" << endl;
01069     os << "Number of local blocks = " << Partitioner_->NumLocalParts() << endl;
01070     //os << "Condition number estimate = " << Condest_ << endl;
01071     os << "Global number of rows            = " << Matrix_->NumGlobalRows64() << endl;
01072     os << endl;
01073     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
01074     os << "-----           -------   --------------       ------------     --------" << endl;
01075     os << "Initialize()    "   << std::setw(5) << NumInitialize() 
01076        << "  " << std::setw(15) << InitializeTime() 
01077        << "  " << std::setw(15) << 1.0e-6 * InitializeFlops();
01078     if (InitializeTime() != 0.0)
01079       os << "  " << std::setw(15) << 1.0e-6 * InitializeFlops() / InitializeTime() << endl;
01080     else
01081       os << "  " << std::setw(15) << 0.0 << endl;
01082     os << "Compute()       "   << std::setw(5) << NumCompute() 
01083        << "  " << std::setw(15) << ComputeTime()
01084        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops();
01085     if (ComputeTime() != 0.0) 
01086       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl;
01087     else
01088       os << "  " << std::setw(15) << 0.0 << endl;
01089     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse() 
01090        << "  " << std::setw(15) << ApplyInverseTime()
01091        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops();
01092     if (ApplyInverseTime() != 0.0) 
01093       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl;
01094     else
01095       os << "  " << std::setw(15) << 0.0 << endl;
01096     os << "================================================================================" << endl;
01097     os << endl;
01098   }
01099 
01100   return(os);
01101 }
01102 
01103 //==============================================================================
01104 template<typename T>
01105 int Ifpack_BlockRelaxation<T>::SetParameters(Teuchos::ParameterList& List)
01106 {
01107 
01108   string PT;
01109   if (PrecType_ == IFPACK_JACOBI)
01110     PT = "Jacobi";
01111   else if (PrecType_ == IFPACK_GS)
01112     PT = "Gauss-Seidel";
01113   else if (PrecType_ == IFPACK_SGS)
01114     PT = "symmetric Gauss-Seidel";
01115 
01116   PT = List.get("relaxation: type", PT);
01117 
01118   if (PT == "Jacobi") {
01119     PrecType_ = IFPACK_JACOBI;
01120   }
01121   else if (PT == "Gauss-Seidel") {
01122     PrecType_ = IFPACK_GS;
01123   }
01124   else if (PT == "symmetric Gauss-Seidel") {
01125     PrecType_ = IFPACK_SGS;
01126   } else {
01127     cerr << "Option `relaxation: type' has an incorrect value ("
01128       << PT << ")'" << endl;
01129     cerr << "(file " << __FILE__ << ", line " << __LINE__ << ")" << endl;
01130     exit(EXIT_FAILURE);
01131   }
01132 
01133   NumSweeps_            = List.get("relaxation: sweeps", NumSweeps_);
01134   DampingFactor_        = List.get("relaxation: damping factor", 
01135                                    DampingFactor_);
01136   ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 
01137                                    ZeroStartingSolution_);
01138   PartitionerType_      = List.get("partitioner: type", 
01139                                    PartitionerType_);
01140   NumLocalBlocks_       = List.get("partitioner: local parts", 
01141                                    NumLocalBlocks_);
01142   // only Jacobi can work with overlap among local domains,
01143   OverlapLevel_         = List.get("partitioner: overlap", 
01144                                    OverlapLevel_);
01145 
01146   // check parameters
01147   if (PrecType_ != IFPACK_JACOBI)
01148     OverlapLevel_ = 0;
01149   if (NumLocalBlocks_ < 0)
01150     NumLocalBlocks_ = Matrix().NumMyRows() / (-NumLocalBlocks_);
01151   // other checks are performed in Partitioner_
01152   
01153   // copy the list as each subblock's constructor will
01154   // require it later
01155   List_ = List;
01156 
01157   // set the label
01158   string PT2;
01159   if (PrecType_ == IFPACK_JACOBI)
01160     PT2 = "BJ";
01161   else if (PrecType_ == IFPACK_GS)
01162     PT2 = "BGS";
01163   else if (PrecType_ == IFPACK_SGS)
01164     PT2 = "BSGS";
01165   Label_ = "IFPACK (" + PT2 + ", sweeps=" 
01166     + Ifpack_toString(NumSweeps_) + ", damping="
01167     + Ifpack_toString(DampingFactor_) + ", blocks="
01168     + Ifpack_toString(NumLocalBlocks()) + ")";
01169 
01170   return(0);
01171 }
01172 
01173 //==============================================================================
01174 template<typename T>
01175 int Ifpack_BlockRelaxation<T>::Initialize()
01176 {
01177   IsInitialized_ = false;
01178   Time_.ResetStartTime();
01179 
01180   Graph_ = Teuchos::rcp( new Ifpack_Graph_Epetra_RowMatrix(Teuchos::rcp(&Matrix(),false)) );
01181   if (Graph_ == Teuchos::null) IFPACK_CHK_ERR(-5);
01182 
01183   if (PartitionerType_ == "linear")
01184     Partitioner_ = Teuchos::rcp( new Ifpack_LinearPartitioner(&*Graph_) );
01185   else if (PartitionerType_ == "greedy")
01186     Partitioner_ = Teuchos::rcp( new Ifpack_GreedyPartitioner(&*Graph_) );
01187   else if (PartitionerType_ == "metis")
01188     Partitioner_ = Teuchos::rcp( new Ifpack_METISPartitioner(&*Graph_) );
01189   else if (PartitionerType_ == "equation")
01190     Partitioner_ = Teuchos::rcp( new Ifpack_EquationPartitioner(&*Graph_) );
01191   else if (PartitionerType_ == "user")
01192     Partitioner_ = Teuchos::rcp( new Ifpack_UserPartitioner(&*Graph_) );
01193   else
01194     IFPACK_CHK_ERR(-2);
01195 
01196   if (Partitioner_ == Teuchos::null) IFPACK_CHK_ERR(-5);
01197 
01198   // need to partition the graph of A
01199   IFPACK_CHK_ERR(Partitioner_->SetParameters(List_));
01200   IFPACK_CHK_ERR(Partitioner_->Compute());
01201 
01202   // get actual number of partitions
01203   NumLocalBlocks_ = Partitioner_->NumLocalParts();
01204 
01205   // weight of each vertex
01206   W_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
01207   W_->PutScalar(0.0);
01208 
01209   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
01210 
01211     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
01212       int LID = (*Partitioner_)(i,j);
01213       (*W_)[LID]++;
01214     }
01215   }
01216   W_->Reciprocal(*W_);
01217 
01218   InitializeTime_ += Time_.ElapsedTime();
01219   IsInitialized_ = true;
01220   ++NumInitialize_;
01221 
01222   return(0);
01223 }
01224 
01225 //==============================================================================
01226 #endif // IFPACK_BLOCKPRECONDITIONER_H
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends