IFPACK Development
Ifpack_BlockRelaxation.h
00001 #ifndef IFPACK_BLOCKPRECONDITIONER_H
00002 #define IFPACK_BLOCKPRECONDITIONER_H
00003 
00004 #include "Ifpack_ConfigDefs.h"
00005 #include "Ifpack_Preconditioner.h" 
00006 #include "Ifpack_Partitioner.h"
00007 #include "Ifpack_LinearPartitioner.h"
00008 #include "Ifpack_GreedyPartitioner.h"
00009 #include "Ifpack_METISPartitioner.h"
00010 #include "Ifpack_EquationPartitioner.h"
00011 #include "Ifpack_UserPartitioner.h"
00012 #include "Ifpack_Graph_Epetra_RowMatrix.h"
00013 #include "Ifpack_DenseContainer.h" 
00014 #include "Ifpack_Utils.h" 
00015 #include "Teuchos_ParameterList.hpp"
00016 #include "Teuchos_RefCountPtr.hpp"
00017 #include "Epetra_RowMatrix.h"
00018 #include "Epetra_MultiVector.h"
00019 #include "Epetra_Vector.h"
00020 #include "Epetra_Time.h"
00021 #include "Epetra_Import.h"
00022 
00023 static const int IFPACK_JACOBI = 0;
00024 static const int IFPACK_GS = 1;
00025 static const int IFPACK_SGS = 2;
00026 
00027 
00029 
00092 template<typename T>
00093 class Ifpack_BlockRelaxation : public Ifpack_Preconditioner {
00094 
00095 public:
00096 
00098 
00099 
00104   Ifpack_BlockRelaxation(const Epetra_RowMatrix* Matrix);
00105 
00106   virtual ~Ifpack_BlockRelaxation();
00107 
00109 
00111 
00113 
00121   virtual int Apply(const Epetra_MultiVector& X, 
00122             Epetra_MultiVector& Y) const;
00123 
00125 
00134   virtual int ApplyInverse(const Epetra_MultiVector& X, 
00135                Epetra_MultiVector& Y) const;
00136 
00138   virtual double NormInf() const
00139   {
00140     return(-1.0);
00141   }
00143 
00145 
00146   virtual int SetUseTranspose(bool UseTranspose_in)
00147   {
00148     if (UseTranspose_in)
00149       IFPACK_CHK_ERR(-98); // FIXME: can I work with the transpose?
00150     return(0);
00151   }
00152 
00153   virtual const char* Label() const;
00154  
00156   virtual bool UseTranspose() const
00157   {
00158     return(false);
00159   }
00160 
00162   virtual bool HasNormInf() const
00163   {
00164     return(false);
00165   }
00166 
00168   virtual const Epetra_Comm & Comm() const;
00169 
00171   virtual const Epetra_Map & OperatorDomainMap() const;
00172 
00174   virtual const Epetra_Map & OperatorRangeMap() const;
00176 
00178   int NumLocalBlocks() const 
00179   {
00180     return(NumLocalBlocks_);
00181   }
00182 
00184   virtual bool IsInitialized() const
00185   {
00186     return(IsInitialized_);
00187   }
00188 
00190   virtual bool IsComputed() const
00191   {
00192     return(IsComputed_);
00193   }
00194 
00196   virtual int SetParameters(Teuchos::ParameterList& List);
00197 
00199   virtual int Initialize();
00200 
00202   virtual int Compute();
00203 
00204   virtual const Epetra_RowMatrix& Matrix() const
00205   {
00206     return(*Matrix_);
00207   }
00208 
00209   virtual double Condest(const Ifpack_CondestType CT = Ifpack_Cheap,
00210                          const int MaxIters = 1550,
00211                          const double Tol = 1e-9,
00212              Epetra_RowMatrix* Matrix_in = 0)
00213   {
00214     return(-1.0);
00215   }
00216 
00217   virtual double Condest() const
00218   {
00219     return(-1.0);
00220   }
00221 
00222   std::ostream& Print(std::ostream& os) const;
00223 
00225   virtual int NumInitialize() const
00226   {
00227     return(NumInitialize_);
00228   }
00229 
00231   virtual int NumCompute() const
00232   {
00233     return(NumCompute_);
00234   }
00235 
00237   virtual int NumApplyInverse() const
00238   {
00239     return(NumApplyInverse_);
00240   }
00241 
00243   virtual double InitializeTime() const
00244   {
00245     return(InitializeTime_);
00246   }
00247 
00249   virtual double ComputeTime() const
00250   {
00251     return(ComputeTime_);
00252   }
00253 
00255   virtual double ApplyInverseTime() const
00256   {
00257     return(ApplyInverseTime_);
00258   }
00259 
00261   virtual double InitializeFlops() const
00262   {
00263 #ifdef IFPACK_FLOPCOUNTERS
00264     if (Containers_.size() == 0)
00265       return(0.0);
00266 
00267     // the total number of flops is computed each time InitializeFlops() is
00268     // called. This is becase I also have to add the contribution from each
00269     // container.
00270     double total = InitializeFlops_;
00271     for (unsigned int i = 0 ; i < Containers_.size() ; ++i)
00272       total += Containers_[i]->InitializeFlops();
00273     return(total);
00274 #else
00275     return(0.0);
00276 #endif
00277   }
00278 
00279   virtual double ComputeFlops() const
00280   {
00281 #ifdef IFPACK_FLOPCOUNTERS
00282     if (Containers_.size() == 0)
00283       return(0.0);
00284     
00285     double total = ComputeFlops_;
00286     for (unsigned int i = 0 ; i < Containers_.size() ; ++i)
00287       total += Containers_[i]->ComputeFlops();
00288     return(total);
00289 #else
00290     return(0.0);
00291 #endif
00292   }
00293 
00294   virtual double ApplyInverseFlops() const
00295   {
00296 #ifdef IFPACK_FLOPCOUNTERS
00297     if (Containers_.size() == 0)
00298       return(0.0);
00299 
00300     double total = ApplyInverseFlops_;
00301     for (unsigned int i = 0 ; i < Containers_.size() ; ++i) {
00302       total += Containers_[i]->ApplyInverseFlops();
00303     }
00304     return(total);
00305 #else
00306     return(0.0);
00307 #endif
00308   }
00309 
00310 private:
00311 
00313   Ifpack_BlockRelaxation(const Ifpack_BlockRelaxation& rhs);
00314 
00316   Ifpack_BlockRelaxation & operator=(const Ifpack_BlockRelaxation& rhs)
00317   {
00318     return(*this);
00319   }
00320 
00321   virtual int ApplyInverseJacobi(const Epetra_MultiVector& X, 
00322                                  Epetra_MultiVector& Y) const;
00323 
00324   virtual int DoJacobi(const Epetra_MultiVector& X, 
00325                                   Epetra_MultiVector& Y) const;
00326 
00327   virtual int ApplyInverseGS(const Epetra_MultiVector& X, 
00328                              Epetra_MultiVector& Y) const;
00329 
00330   virtual int DoGaussSeidel(Epetra_MultiVector& X, 
00331                             Epetra_MultiVector& Y) const;
00332 
00333   virtual int ApplyInverseSGS(const Epetra_MultiVector& X, 
00334                               Epetra_MultiVector& Y) const;
00335 
00336   virtual int DoSGS(const Epetra_MultiVector& X,
00337                     Epetra_MultiVector& Xtmp,
00338                     Epetra_MultiVector& Y) const;
00339 
00340   int ExtractSubmatrices();
00341 
00342   // @{ Initializations, timing and flops
00343 
00345   bool IsInitialized_;
00347   bool IsComputed_;
00349   int NumInitialize_;
00351   int NumCompute_;
00353   mutable int NumApplyInverse_;
00355   double InitializeTime_;
00357   double ComputeTime_;
00359   mutable double ApplyInverseTime_;
00361   double InitializeFlops_;
00363   double ComputeFlops_;
00365   mutable double ApplyInverseFlops_;
00366   // @}
00367 
00368   // @{ Settings
00370   int NumSweeps_;
00372   double DampingFactor_;
00374   int NumLocalBlocks_;
00376   Teuchos::ParameterList List_;
00377   // @}
00378 
00379   // @{ Other data
00382   Teuchos::RefCountPtr< const Epetra_RowMatrix > Matrix_;
00383   mutable std::vector<Teuchos::RefCountPtr<T> > Containers_;
00385   Teuchos::RefCountPtr<Ifpack_Partitioner> Partitioner_;
00386   string PartitionerType_;
00387   int PrecType_;
00389   string Label_;
00391   bool ZeroStartingSolution_;
00392   Teuchos::RefCountPtr<Ifpack_Graph> Graph_;
00394   Teuchos::RefCountPtr<Epetra_Vector> W_;
00395   // Level of overlap among blocks (for Jacobi only).
00396   int OverlapLevel_;
00397   mutable Epetra_Time Time_;
00398   bool IsParallel_;
00399   Teuchos::RefCountPtr<Epetra_Import> Importer_;
00400   // @}
00401   
00402 }; // class Ifpack_BlockRelaxation
00403 
00404 //==============================================================================
00405 template<typename T>
00406 Ifpack_BlockRelaxation<T>::
00407 Ifpack_BlockRelaxation(const Epetra_RowMatrix* Matrix_in) :
00408   IsInitialized_(false),
00409   IsComputed_(false),
00410   NumInitialize_(0),
00411   NumCompute_(0),
00412   NumApplyInverse_(0),
00413   InitializeTime_(0.0),
00414   ComputeTime_(0.0),
00415   ApplyInverseTime_(0.0),
00416   InitializeFlops_(0.0),
00417   ComputeFlops_(0.0),
00418   ApplyInverseFlops_(0.0),
00419   NumSweeps_(1),
00420   DampingFactor_(1.0),
00421   NumLocalBlocks_(1),
00422   Matrix_(Teuchos::rcp(Matrix_in,false)),
00423   PartitionerType_("greedy"),
00424   PrecType_(IFPACK_JACOBI),
00425   ZeroStartingSolution_(true),
00426   OverlapLevel_(0),
00427   Time_(Comm()),
00428   IsParallel_(false)
00429 {
00430   if (Matrix_in->Comm().NumProc() != 1)
00431     IsParallel_ = true;
00432 }
00433 
00434 //==============================================================================
00435 template<typename T>
00436 Ifpack_BlockRelaxation<T>::~Ifpack_BlockRelaxation()
00437 {
00438 }
00439 
00440 //==============================================================================
00441 template<typename T>
00442 const char* Ifpack_BlockRelaxation<T>::Label() const
00443 {
00444   return(Label_.c_str());
00445 }
00446 
00447 //==============================================================================
00448 template<typename T>
00449 int Ifpack_BlockRelaxation<T>::
00450 Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00451 {
00452   IFPACK_RETURN(Matrix().Apply(X,Y));
00453 }
00454 
00455 //==============================================================================
00456 template<typename T>
00457 const Epetra_Comm& Ifpack_BlockRelaxation<T>::
00458 Comm() const
00459 {
00460   return(Matrix().Comm());
00461 }
00462 
00463 //==============================================================================
00464 template<typename T>
00465 const Epetra_Map& Ifpack_BlockRelaxation<T>::
00466 OperatorDomainMap() const
00467 {
00468   return(Matrix().OperatorDomainMap());
00469 }
00470 
00471 //==============================================================================
00472 template<typename T>
00473 const Epetra_Map& Ifpack_BlockRelaxation<T>::
00474 OperatorRangeMap() const
00475 {
00476   return(Matrix().OperatorRangeMap());
00477 }
00478 
00479 //==============================================================================
00480 template<typename T>
00481 int Ifpack_BlockRelaxation<T>::ExtractSubmatrices()
00482 {
00483 
00484   if (Partitioner_ == Teuchos::null)
00485     IFPACK_CHK_ERR(-3);
00486 
00487   NumLocalBlocks_ = Partitioner_->NumLocalParts();
00488 
00489   Containers_.resize(NumLocalBlocks());
00490 
00491   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00492 
00493     int rows = Partitioner_->NumRowsInPart(i);
00494     Containers_[i] = Teuchos::rcp( new T(rows) );
00495     
00496     //Ifpack_DenseContainer* DC = 0;
00497     //DC = dynamic_cast<Ifpack_DenseContainer*>(Containers_[i]);
00498 
00499     if (Containers_[i] == Teuchos::null)
00500       IFPACK_CHK_ERR(-5);
00501     
00502     IFPACK_CHK_ERR(Containers_[i]->SetParameters(List_));
00503     IFPACK_CHK_ERR(Containers_[i]->Initialize());
00504     // flops in Initialize() will be computed on-the-fly in method InitializeFlops().
00505 
00506     // set "global" ID of each partitioner row
00507     for (int j = 0 ; j < rows ; ++j) {
00508       int LRID = (*Partitioner_)(i,j);
00509       Containers_[i]->ID(j) = LRID;
00510     }
00511 
00512     IFPACK_CHK_ERR(Containers_[i]->Compute(*Matrix_));
00513     // flops in Compute() will be computed on-the-fly in method ComputeFlops().
00514 
00515   }
00516 
00517   return(0);
00518 }
00519 
00520 //==============================================================================
00521 template<typename T>
00522 int Ifpack_BlockRelaxation<T>::Compute()
00523 {
00524 
00525   if (!IsInitialized())
00526     IFPACK_CHK_ERR(Initialize());
00527 
00528   Time_.ResetStartTime();
00529 
00530   IsComputed_ = false;
00531 
00532   if (Matrix().NumGlobalRows() != Matrix().NumGlobalCols())
00533     IFPACK_CHK_ERR(-2); // only square matrices
00534 
00535   IFPACK_CHK_ERR(ExtractSubmatrices());
00536   
00537   if (IsParallel_ && PrecType_ != IFPACK_JACOBI) {
00538     // not needed by Jacobi (done by matvec)
00539     Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
00540                                                 Matrix().RowMatrixRowMap()) );
00541 
00542     if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
00543   }
00544   IsComputed_ = true;
00545   ComputeTime_ += Time_.ElapsedTime();
00546   ++NumCompute_;
00547 
00548   return(0);
00549 
00550 }
00551 
00552 //==============================================================================
00553 template<typename T>
00554 int Ifpack_BlockRelaxation<T>::
00555 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00556 {
00557   if (!IsComputed())
00558     IFPACK_CHK_ERR(-3);
00559 
00560   if (X.NumVectors() != Y.NumVectors())
00561     IFPACK_CHK_ERR(-2);
00562 
00563   Time_.ResetStartTime();
00564 
00565   // AztecOO gives X and Y pointing to the same memory location,
00566   // need to create an auxiliary vector, Xcopy
00567   Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
00568   if (X.Pointers()[0] == Y.Pointers()[0])
00569     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00570   else
00571     Xcopy = Teuchos::rcp( &X, false );
00572 
00573   switch (PrecType_) {
00574   case IFPACK_JACOBI:
00575     IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
00576     break;
00577   case IFPACK_GS:
00578     IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
00579     break;
00580   case IFPACK_SGS:
00581     IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
00582     break;
00583   }
00584 
00585   ApplyInverseTime_ += Time_.ElapsedTime();
00586   ++NumApplyInverse_;
00587 
00588   return(0);
00589 }
00590 
00591 //==============================================================================
00592 // This method in general will not work with AztecOO if used
00593 // outside Ifpack_AdditiveSchwarz and OverlapLevel_ != 0
00594 //
00595 template<typename T>
00596 int Ifpack_BlockRelaxation<T>::
00597 ApplyInverseJacobi(const Epetra_MultiVector& X, 
00598                    Epetra_MultiVector& Y) const
00599 {
00600 
00601   if (ZeroStartingSolution_)
00602     Y.PutScalar(0.0);
00603 
00604   // do not compute the residual in this case
00605   if (NumSweeps_ == 1 && ZeroStartingSolution_) {
00606     IFPACK_RETURN(DoJacobi(X,Y));
00607   }
00608 
00609   Epetra_MultiVector AX(Y);
00610 
00611   for (int j = 0; j < NumSweeps_ ; j++) {
00612     IFPACK_CHK_ERR(Apply(Y,AX));
00613     ApplyInverseFlops_ += X.NumVectors() * 2 * Matrix_->NumGlobalNonzeros();
00614     IFPACK_CHK_ERR(AX.Update(1.0,X,-1.0));
00615     ApplyInverseFlops_ += X.NumVectors() * 2 * Matrix_->NumGlobalRows();
00616     IFPACK_CHK_ERR(DoJacobi(AX,Y));
00617     // flops counted in DoJacobi()
00618   }
00619 
00620 
00621   return(0);
00622 }
00623 
00624 //==============================================================================
00625 template<typename T>
00626 int Ifpack_BlockRelaxation<T>::
00627 DoJacobi(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00628 {
00629   int NumVectors = X.NumVectors();
00630 
00631   if (OverlapLevel_ == 0) {
00632 
00633     for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00634      
00635       // may happen that a partition is empty
00636       if (Containers_[i]->NumRows() == 0) 
00637         continue;
00638 
00639       int LID;
00640 
00641       // extract RHS from X
00642       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00643         LID = Containers_[i]->ID(j);
00644         for (int k = 0 ; k < NumVectors ; ++k) {
00645           Containers_[i]->RHS(j,k) = X[k][LID];
00646         }
00647       }
00648 
00649       // apply the inverse of each block. NOTE: flops occurred
00650       // in ApplyInverse() of each block are summed up in method
00651       // ApplyInverseFlops().
00652       IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00653 
00654       // copy back into solution vector Y
00655       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00656         LID = Containers_[i]->ID(j);
00657         for (int k = 0 ; k < NumVectors ; ++k) {
00658           Y[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
00659         }
00660       }
00661 
00662     }
00663     // NOTE: flops for ApplyInverse() of each block are summed up
00664     // in method ApplyInverseFlops()
00665 #ifdef IFPACK_FLOPCOUNTERS
00666     ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
00667 #endif
00668 
00669   }
00670   else {
00671 
00672     for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00673 
00674       // may happen that a partition is empty
00675       if (Containers_[i]->NumRows() == 0) 
00676         continue;
00677 
00678       int LID;
00679 
00680       // extract RHS from X
00681       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00682         LID = Containers_[i]->ID(j);
00683         for (int k = 0 ; k < NumVectors ; ++k) {
00684           Containers_[i]->RHS(j,k) = (*W_)[LID] * X[k][LID];
00685         }
00686       }
00687 
00688       // apply the inverse of each block
00689       IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00690 
00691       // copy back into solution vector Y
00692       for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00693         LID = Containers_[i]->ID(j);
00694         for (int k = 0 ; k < NumVectors ; ++k) {
00695           Y[k][LID] += DampingFactor_ * (*W_)[LID] * Containers_[i]->LHS(j,k);
00696         }
00697       }
00698 
00699     }
00700     // NOTE: flops for ApplyInverse() of each block are summed up
00701     // in method ApplyInverseFlops()
00702     // NOTE: do not count for simplicity the flops due to overlapping rows
00703 #ifdef IFPACK_FLOPCOUNTERS
00704     ApplyInverseFlops_ += NumVectors * 4 * Matrix_->NumGlobalRows();
00705 #endif
00706   }
00707 
00708   return(0);
00709 }
00710 
00711 //==============================================================================
00712 template<typename T>
00713 int Ifpack_BlockRelaxation<T>::
00714 ApplyInverseGS(const Epetra_MultiVector& X, 
00715                Epetra_MultiVector& Y) const
00716 {
00717 
00718   if (ZeroStartingSolution_)
00719     Y.PutScalar(0.0);
00720 
00721   Epetra_MultiVector Xcopy(X);
00722   for (int j = 0; j < NumSweeps_ ; j++) {
00723     IFPACK_CHK_ERR(DoGaussSeidel(Xcopy,Y));
00724     if (j != NumSweeps_ - 1)
00725       Xcopy = X;
00726   }
00727 
00728   return(0);
00729 
00730 }
00731 
00732 //==============================================================================
00733 template<typename T>
00734 int Ifpack_BlockRelaxation<T>::
00735 DoGaussSeidel(Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00736 {
00737 
00738   // cycle over all local subdomains
00739 
00740   int Length = Matrix().MaxNumEntries();
00741   std::vector<int> Indices(Length);
00742   std::vector<double> Values(Length);
00743 
00744   int NumMyRows = Matrix().NumMyRows();
00745   int NumVectors = X.NumVectors();
00746 
00747   // an additonal vector is needed by parallel computations
00748   // (note that applications through Ifpack_AdditiveSchwarz
00749   // are always seen are serial)
00750   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00751   if (IsParallel_)
00752     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00753   else
00754     Y2 = Teuchos::rcp( &Y, false );
00755 
00756   double** y_ptr;
00757   double** y2_ptr;
00758   Y.ExtractView(&y_ptr);
00759   Y2->ExtractView(&y2_ptr);
00760 
00761   // data exchange is here, once per sweep
00762   if (IsParallel_)
00763     IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00764 
00765   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00766 
00767     // may happen that a partition is empty
00768     if (Containers_[i]->NumRows() == 0) 
00769       continue;
00770 
00771     int LID;
00772 
00773     // update from previous block
00774 
00775     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00776       LID = Containers_[i]->ID(j);
00777 
00778       int NumEntries;
00779       IFPACK_CHK_ERR(Matrix().ExtractMyRowCopy(LID, Length,NumEntries,
00780                                                &Values[0], &Indices[0]));
00781 
00782       for (int k = 0 ; k < NumEntries ; ++k) {
00783         int col = Indices[k];
00784 
00785           for (int kk = 0 ; kk < NumVectors ; ++kk) {
00786             X[kk][LID] -= Values[k] * y2_ptr[kk][col];
00787           }
00788       }
00789     }
00790 
00791     // solve with this block
00792 
00793     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00794       LID = Containers_[i]->ID(j);
00795       for (int k = 0 ; k < NumVectors ; ++k) {
00796         Containers_[i]->RHS(j,k) = X[k][LID];
00797       }
00798     }
00799 
00800     IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00801 #ifdef IFPACK_FLOPCOUNTERS
00802     ApplyInverseFlops_ += Containers_[i]->ApplyInverseFlops();
00803 #endif
00804 
00805     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00806       LID = Containers_[i]->ID(j);
00807       for (int k = 0 ; k < NumVectors ; ++k) {
00808         y2_ptr[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
00809       }
00810     }
00811 
00812   }
00813 
00814   // operations for all getrow()'s
00815   // NOTE: flops for ApplyInverse() of each block are summed up
00816   // in method ApplyInverseFlops()
00817 #ifdef IFPACK_FLOPCOUNTERS
00818   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalNonzeros();
00819   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
00820 #endif
00821 
00822   // Attention: this is delicate... Not all combinations
00823   // of Y2 and Y will always work (tough for ML it should be ok)
00824   if (IsParallel_)
00825     for (int m = 0 ; m < NumVectors ; ++m) 
00826       for (int i = 0 ; i < NumMyRows ; ++i)
00827         y_ptr[m][i] = y2_ptr[m][i];
00828 
00829   return(0);
00830 }
00831 
00832 //==============================================================================
00833 template<typename T>
00834 int Ifpack_BlockRelaxation<T>::
00835 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
00836 {
00837 
00838   if (ZeroStartingSolution_)
00839     Y.PutScalar(0.0);
00840 
00841   Epetra_MultiVector Xcopy(X);
00842   for (int j = 0; j < NumSweeps_ ; j++) {
00843     IFPACK_CHK_ERR(DoSGS(X,Xcopy,Y));
00844     if (j != NumSweeps_ - 1)
00845       Xcopy = X;
00846   }
00847   return(0);
00848 }
00849 
00850 //==============================================================================
00851 template<typename T>
00852 int Ifpack_BlockRelaxation<T>::
00853 DoSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Xcopy, 
00854       Epetra_MultiVector& Y) const
00855 {
00856 
00857   int NumMyRows = Matrix().NumMyRows();
00858   int NumVectors = X.NumVectors();
00859 
00860   int Length = Matrix().MaxNumEntries();
00861   std::vector<int> Indices;
00862   std::vector<double> Values;
00863   Indices.resize(Length);
00864   Values.resize(Length);
00865 
00866   // an additonal vector is needed by parallel computations
00867   // (note that applications through Ifpack_AdditiveSchwarz
00868   // are always seen are serial)
00869   Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
00870   if (IsParallel_)
00871     Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
00872   else
00873     Y2 = Teuchos::rcp( &Y, false );
00874 
00875   double** y_ptr;
00876   double** y2_ptr;
00877   Y.ExtractView(&y_ptr);
00878   Y2->ExtractView(&y2_ptr);
00879 
00880   // data exchange is here, once per sweep
00881   if (IsParallel_)
00882     IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
00883 
00884   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
00885 
00886     // may happen that a partition is empty
00887     if (Containers_[i]->NumRows() == 0) 
00888       continue;
00889 
00890     int LID;
00891 
00892     // update from previous block
00893 
00894     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00895       LID = Containers_[i]->ID(j);
00896 
00897       int NumEntries;
00898       IFPACK_CHK_ERR(Matrix().ExtractMyRowCopy(LID, Length,NumEntries,
00899                                                &Values[0], &Indices[0]));
00900 
00901       for (int k = 0 ; k < NumEntries ; ++k) {
00902         int col = Indices[k];
00903 
00904         for (int kk = 0 ; kk < NumVectors ; ++kk) {
00905           Xcopy[kk][LID] -= Values[k] * y2_ptr[kk][col];
00906         }
00907       }
00908     }
00909 
00910     // solve with this block
00911 
00912     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00913       LID = Containers_[i]->ID(j);
00914       for (int k = 0 ; k < NumVectors ; ++k) {
00915         Containers_[i]->RHS(j,k) = Xcopy[k][LID];
00916       }
00917     }
00918 
00919     IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00920 #ifdef IFPACK_FLOPCOUNTERS
00921     ApplyInverseFlops_ += Containers_[i]->ApplyInverseFlops();
00922 #endif
00923 
00924     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00925       LID = Containers_[i]->ID(j);
00926       for (int k = 0 ; k < NumVectors ; ++k) {
00927         y2_ptr[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
00928       }
00929     }
00930   }
00931 
00932 #ifdef IFPACK_FLOPCOUNTERS
00933   // operations for all getrow()'s
00934   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalNonzeros();
00935   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
00936 #endif
00937 
00938   Xcopy = X;
00939 
00940   for (int i = NumLocalBlocks() - 1; i >=0 ; --i) {
00941 
00942     if (Containers_[i]->NumRows() == 0) 
00943       continue;
00944 
00945     int LID;
00946 
00947     // update from previous block
00948 
00949     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00950       LID = Containers_[i]->ID(j);
00951 
00952       int NumEntries;
00953       IFPACK_CHK_ERR(Matrix().ExtractMyRowCopy(LID, Length,NumEntries,
00954                                                &Values[0], &Indices[0]));
00955 
00956       for (int k = 0 ; k < NumEntries ; ++k) {
00957         int col = Indices[k];
00958 
00959           for (int kk = 0 ; kk < NumVectors ; ++kk) {
00960             Xcopy[kk][LID] -= Values[k] * y2_ptr[kk][col];
00961           }
00962       }
00963     }
00964 
00965     // solve with this block
00966 
00967     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00968       LID = Containers_[i]->ID(j);
00969       for (int k = 0 ; k < NumVectors ; ++k) {
00970         Containers_[i]->RHS(j,k) = Xcopy[k][LID];
00971       }
00972     }
00973 
00974     IFPACK_CHK_ERR(Containers_[i]->ApplyInverse());
00975 #ifdef IFPACK_FLOPCOUNTERS
00976     ApplyInverseFlops_ += Containers_[i]->ApplyInverseFlops();
00977 #endif
00978 
00979     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
00980       LID = Containers_[i]->ID(j);
00981       for (int k = 0 ; k < NumVectors ; ++k) {
00982         y2_ptr[k][LID] += DampingFactor_ * Containers_[i]->LHS(j,k);
00983       }
00984     }
00985   }
00986 
00987 #ifdef IFPACK_FLOPCOUNTERS
00988   // operations for all getrow()'s
00989   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalNonzeros();
00990   ApplyInverseFlops_ += NumVectors * 2 * Matrix_->NumGlobalRows();
00991 #endif
00992 
00993   // Attention: this is delicate... Not all combinations
00994   // of Y2 and Y will always work (tough for ML it should be ok)
00995   if (IsParallel_)
00996     for (int m = 0 ; m < NumVectors ; ++m) 
00997       for (int i = 0 ; i < NumMyRows ; ++i)
00998         y_ptr[m][i] = y2_ptr[m][i];
00999 
01000   return(0);
01001 }
01002 
01003 //==============================================================================
01004 template<typename T>
01005 ostream& Ifpack_BlockRelaxation<T>::Print(ostream & os) const
01006 {
01007 
01008   string PT;
01009   if (PrecType_ == IFPACK_JACOBI)
01010     PT = "Jacobi";
01011   else if (PrecType_ == IFPACK_GS)
01012     PT = "Gauss-Seidel";
01013   else if (PrecType_ == IFPACK_SGS)
01014     PT = "symmetric Gauss-Seidel";
01015 
01016   if (!Comm().MyPID()) {
01017     os << endl;
01018     os << "================================================================================" << endl;
01019     os << "Ifpack_BlockRelaxation, " << PT << endl;
01020     os << "Sweeps = " << NumSweeps_ << endl;
01021     os << "Damping factor = " << DampingFactor_;
01022     if (ZeroStartingSolution_) 
01023       os << ", using zero starting solution" << endl;
01024     else
01025       os << ", using input starting solution" << endl;
01026     os << "Number of local blocks = " << Partitioner_->NumLocalParts() << endl;
01027     //os << "Condition number estimate = " << Condest_ << endl;
01028     os << "Global number of rows            = " << Matrix_->NumGlobalRows() << endl;
01029     os << endl;
01030     os << "Phase           # calls   Total Time (s)       Total MFlops     MFlops/s" << endl;
01031     os << "-----           -------   --------------       ------------     --------" << endl;
01032     os << "Initialize()    "   << std::setw(5) << NumInitialize() 
01033        << "  " << std::setw(15) << InitializeTime() 
01034        << "  " << std::setw(15) << 1.0e-6 * InitializeFlops();
01035     if (InitializeTime() != 0.0)
01036       os << "  " << std::setw(15) << 1.0e-6 * InitializeFlops() / InitializeTime() << endl;
01037     else
01038       os << "  " << std::setw(15) << 0.0 << endl;
01039     os << "Compute()       "   << std::setw(5) << NumCompute() 
01040        << "  " << std::setw(15) << ComputeTime()
01041        << "  " << std::setw(15) << 1.0e-6 * ComputeFlops();
01042     if (ComputeTime() != 0.0) 
01043       os << "  " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl;
01044     else
01045       os << "  " << std::setw(15) << 0.0 << endl;
01046     os << "ApplyInverse()  "   << std::setw(5) << NumApplyInverse() 
01047        << "  " << std::setw(15) << ApplyInverseTime()
01048        << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops();
01049     if (ApplyInverseTime() != 0.0) 
01050       os << "  " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl;
01051     else
01052       os << "  " << std::setw(15) << 0.0 << endl;
01053     os << "================================================================================" << endl;
01054     os << endl;
01055   }
01056 
01057   return(os);
01058 }
01059 
01060 //==============================================================================
01061 template<typename T>
01062 int Ifpack_BlockRelaxation<T>::SetParameters(Teuchos::ParameterList& List)
01063 {
01064 
01065   string PT;
01066   if (PrecType_ == IFPACK_JACOBI)
01067     PT = "Jacobi";
01068   else if (PrecType_ == IFPACK_GS)
01069     PT = "Gauss-Seidel";
01070   else if (PrecType_ == IFPACK_SGS)
01071     PT = "symmetric Gauss-Seidel";
01072 
01073   PT = List.get("relaxation: type", PT);
01074 
01075   if (PT == "Jacobi") {
01076     PrecType_ = IFPACK_JACOBI;
01077   }
01078   else if (PT == "Gauss-Seidel") {
01079     PrecType_ = IFPACK_GS;
01080   }
01081   else if (PT == "symmetric Gauss-Seidel") {
01082     PrecType_ = IFPACK_SGS;
01083   } else {
01084     cerr << "Option `relaxation: type' has an incorrect value ("
01085       << PT << ")'" << endl;
01086     cerr << "(file " << __FILE__ << ", line " << __LINE__ << ")" << endl;
01087     exit(EXIT_FAILURE);
01088   }
01089 
01090   NumSweeps_            = List.get("relaxation: sweeps", NumSweeps_);
01091   DampingFactor_        = List.get("relaxation: damping factor", 
01092                                    DampingFactor_);
01093   ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 
01094                                    ZeroStartingSolution_);
01095   PartitionerType_      = List.get("partitioner: type", 
01096                                    PartitionerType_);
01097   NumLocalBlocks_       = List.get("partitioner: local parts", 
01098                                    NumLocalBlocks_);
01099   // only Jacobi can work with overlap among local domains,
01100   OverlapLevel_         = List.get("partitioner: overlap", 
01101                                    OverlapLevel_);
01102 
01103   // check parameters
01104   if (PrecType_ != IFPACK_JACOBI)
01105     OverlapLevel_ = 0;
01106   if (NumLocalBlocks_ < 0)
01107     NumLocalBlocks_ = Matrix().NumMyRows() / (-NumLocalBlocks_);
01108   // other checks are performed in Partitioner_
01109   
01110   // copy the list as each subblock's constructor will
01111   // require it later
01112   List_ = List;
01113 
01114   // set the label
01115   string PT2;
01116   if (PrecType_ == IFPACK_JACOBI)
01117     PT2 = "BJ";
01118   else if (PrecType_ == IFPACK_GS)
01119     PT2 = "BGS";
01120   else if (PrecType_ == IFPACK_SGS)
01121     PT2 = "BSGS";
01122   Label_ = "IFPACK (" + PT2 + ", sweeps=" 
01123     + Ifpack_toString(NumSweeps_) + ", damping="
01124     + Ifpack_toString(DampingFactor_) + ", blocks="
01125     + Ifpack_toString(NumLocalBlocks()) + ")";
01126 
01127   return(0);
01128 }
01129 
01130 //==============================================================================
01131 template<typename T>
01132 int Ifpack_BlockRelaxation<T>::Initialize()
01133 {
01134   IsInitialized_ = false;
01135   Time_.ResetStartTime();
01136 
01137   Graph_ = Teuchos::rcp( new Ifpack_Graph_Epetra_RowMatrix(Teuchos::rcp(&Matrix(),false)) );
01138   if (Graph_ == Teuchos::null) IFPACK_CHK_ERR(-5);
01139 
01140   if (PartitionerType_ == "linear")
01141     Partitioner_ = Teuchos::rcp( new Ifpack_LinearPartitioner(&*Graph_) );
01142   else if (PartitionerType_ == "greedy")
01143     Partitioner_ = Teuchos::rcp( new Ifpack_GreedyPartitioner(&*Graph_) );
01144   else if (PartitionerType_ == "metis")
01145     Partitioner_ = Teuchos::rcp( new Ifpack_METISPartitioner(&*Graph_) );
01146   else if (PartitionerType_ == "equation")
01147     Partitioner_ = Teuchos::rcp( new Ifpack_EquationPartitioner(&*Graph_) );
01148   else if (PartitionerType_ == "user")
01149     Partitioner_ = Teuchos::rcp( new Ifpack_UserPartitioner(&*Graph_) );
01150   else
01151     IFPACK_CHK_ERR(-2);
01152 
01153   if (Partitioner_ == Teuchos::null) IFPACK_CHK_ERR(-5);
01154 
01155   // need to partition the graph of A
01156   IFPACK_CHK_ERR(Partitioner_->SetParameters(List_));
01157   IFPACK_CHK_ERR(Partitioner_->Compute());
01158 
01159   // get actual number of partitions
01160   NumLocalBlocks_ = Partitioner_->NumLocalParts();
01161 
01162   // weight of each vertex
01163   W_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
01164   W_->PutScalar(0.0);
01165 
01166   for (int i = 0 ; i < NumLocalBlocks() ; ++i) {
01167 
01168     for (int j = 0 ; j < Partitioner_->NumRowsInPart(i) ; ++j) {
01169       int LID = (*Partitioner_)(i,j);
01170       (*W_)[LID]++;
01171     }
01172   }
01173   W_->Reciprocal(*W_);
01174 
01175   InitializeTime_ += Time_.ElapsedTime();
01176   IsInitialized_ = true;
01177   ++NumInitialize_;
01178 
01179   return(0);
01180 }
01181 
01182 //==============================================================================
01183 #endif // IFPACK_BLOCKPRECONDITIONER_H
 All Classes Files Functions Variables Enumerations Friends