Ifpack2 Templated Preconditioning Package Version 1.0
Ifpack2_BlockRelaxation_def.hpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #ifndef IFPACK2_BLOCKRELAXATION_DEF_HPP
00044 #define IFPACK2_BLOCKRELAXATION_DEF_HPP
00045 
00046 #include "Ifpack2_BlockRelaxation_decl.hpp"
00047 #include "Ifpack2_LinearPartitioner_decl.hpp"
00048 #include <Ifpack2_Condest.hpp>
00049 #include <Ifpack2_Parameters.hpp>
00050 
00051 namespace Ifpack2 {
00052 
00053 template<class MatrixType,class ContainerType>
00054 BlockRelaxation<MatrixType,ContainerType>::
00055 BlockRelaxation (const Teuchos::RCP<const row_matrix_type>& A)
00056 : A_ (A),
00057   Time_ (Teuchos::rcp (new Teuchos::Time ("Ifpack2::BlockRelaxation"))),
00058   OverlapLevel_ (0),
00059   PartitionerType_ ("linear"),
00060   NumSweeps_ (1),
00061   NumLocalBlocks_(0),
00062   PrecType_ (Ifpack2::Details::JACOBI),
00063   MinDiagonalValue_ (STS::zero ()),
00064   DampingFactor_ (STS::one ()),
00065   IsParallel_ (false),
00066   ZeroStartingSolution_ (true),
00067   DoBackwardGS_ (false),
00068   Condest_ (STS::real(-STS::one ())),
00069   IsInitialized_ (false),
00070   IsComputed_ (false),
00071   NumInitialize_ (0),
00072   NumCompute_ (0),
00073   NumApply_ (0),
00074   InitializeTime_ (0.0),
00075   ComputeTime_ (0.0),
00076   ApplyTime_ (0.0),
00077   ComputeFlops_ (0.0),
00078   ApplyFlops_ (0.0),
00079   NumMyRows_ (0),
00080   NumGlobalRows_ (0),
00081   NumGlobalNonzeros_ (0)
00082 {
00083   TEUCHOS_TEST_FOR_EXCEPTION(
00084     A_.is_null (), std::invalid_argument,
00085     Teuchos::typeName(*this) << "::BlockRelaxation(): input matrix is null.");
00086 }
00087 
00088 //==========================================================================
00089 template<class MatrixType,class ContainerType>
00090 BlockRelaxation<MatrixType,ContainerType>::~BlockRelaxation() {}
00091 
00092 //==========================================================================
00093 template<class MatrixType,class ContainerType>
00094 void
00095 BlockRelaxation<MatrixType,ContainerType>::
00096 setParameters (const Teuchos::ParameterList& List)
00097 {
00098   Teuchos::ParameterList validparams;
00099   Ifpack2::getValidParameters (validparams);
00100   List.validateParameters (validparams);
00101 
00102   std::string PT;
00103   if (PrecType_ == Ifpack2::Details::JACOBI) {
00104     PT = "Jacobi";
00105   } else if (PrecType_ == Ifpack2::Details::GS) {
00106     PT = "Gauss-Seidel";
00107   } else if (PrecType_ == Ifpack2::Details::SGS) {
00108     PT = "Symmetric Gauss-Seidel";
00109   }
00110 
00111   Ifpack2::getParameter (List, "relaxation: type", PT);
00112 
00113   if (PT == "Jacobi") {
00114     PrecType_ = Ifpack2::Details::JACOBI;
00115   }
00116   else if (PT == "Gauss-Seidel") {
00117     PrecType_ = Ifpack2::Details::GS;
00118   }
00119   else if (PT == "Symmetric Gauss-Seidel") {
00120     PrecType_ = Ifpack2::Details::SGS;
00121   }
00122   else {
00123     TEUCHOS_TEST_FOR_EXCEPTION(
00124       true, std::invalid_argument, "Ifpack2::BlockRelaxation::setParameters: "
00125       "Invalid parameter value \"" << PT << "\" for parameter \"relaxation: type\".");
00126   }
00127 
00128   Ifpack2::getParameter (List, "relaxation: sweeps",NumSweeps_);
00129   Ifpack2::getParameter (List, "relaxation: damping factor", DampingFactor_);
00130   Ifpack2::getParameter (List, "relaxation: min diagonal value", MinDiagonalValue_);
00131   Ifpack2::getParameter (List, "relaxation: zero starting solution", ZeroStartingSolution_);
00132   Ifpack2::getParameter (List, "relaxation: backward mode",DoBackwardGS_);
00133   Ifpack2::getParameter (List, "partitioner: type",PartitionerType_);
00134   Ifpack2::getParameter (List, "partitioner: local parts",NumLocalBlocks_);
00135   Ifpack2::getParameter (List, "partitioner: overlap",OverlapLevel_);
00136 
00137   // check parameters
00138   if (PrecType_ != Ifpack2::Details::JACOBI) {
00139     OverlapLevel_ = 0;
00140   }
00141   if (NumLocalBlocks_ < 0) {
00142     NumLocalBlocks_ = A_->getNodeNumRows() / (-NumLocalBlocks_);
00143   }
00144   // other checks are performed in Partitioner_
00145 
00146   // NTS: Sanity check to be removed at a later date when Backward mode is enabled
00147   TEUCHOS_TEST_FOR_EXCEPTION(
00148     DoBackwardGS_, std::runtime_error,
00149     "Ifpack2::BlockRelaxation:setParameters: \"relaxation: backward mode\" == "
00150     "true is not supported yet.");
00151 
00152   // copy the list as each subblock's constructor will
00153   // require it later
00154   List_ = List;
00155 }
00156 
00157 //==========================================================================
00158 template<class MatrixType,class ContainerType>
00159 Teuchos::RCP<const Teuchos::Comm<int> >
00160 BlockRelaxation<MatrixType,ContainerType>::getComm() const{
00161   return A_->getComm();
00162 }
00163 
00164 //==========================================================================
00165 template<class MatrixType,class ContainerType>
00166 Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type,
00167                                      typename MatrixType::local_ordinal_type,
00168                                      typename MatrixType::global_ordinal_type,
00169                                      typename MatrixType::node_type> >
00170 BlockRelaxation<MatrixType,ContainerType>::getMatrix() const {
00171   return(A_);
00172 }
00173 
00174 //==========================================================================
00175 template<class MatrixType,class ContainerType>
00176 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00177                                typename MatrixType::global_ordinal_type,
00178                                typename MatrixType::node_type> >
00179 BlockRelaxation<MatrixType,ContainerType>::getDomainMap() const {
00180   return A_->getDomainMap();
00181 }
00182 
00183 //==========================================================================
00184 template<class MatrixType,class ContainerType>
00185 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00186                                typename MatrixType::global_ordinal_type,
00187                                typename MatrixType::node_type> >
00188 BlockRelaxation<MatrixType,ContainerType>::getRangeMap() const {
00189   return A_->getRangeMap();
00190 }
00191 
00192 //==========================================================================
00193 template<class MatrixType,class ContainerType>
00194 bool BlockRelaxation<MatrixType,ContainerType>::hasTransposeApply() const {
00195   return true;
00196 }
00197 
00198 //==========================================================================
00199 template<class MatrixType,class ContainerType>
00200 int BlockRelaxation<MatrixType,ContainerType>::getNumInitialize() const {
00201   return(NumInitialize_);
00202 }
00203 
00204 //==========================================================================
00205 template<class MatrixType,class ContainerType>
00206 int BlockRelaxation<MatrixType,ContainerType>::getNumCompute() const {
00207   return(NumCompute_);
00208 }
00209 
00210 //==========================================================================
00211 template<class MatrixType,class ContainerType>
00212 int BlockRelaxation<MatrixType,ContainerType>::getNumApply() const {
00213   return(NumApply_);
00214 }
00215 
00216 //==========================================================================
00217 template<class MatrixType,class ContainerType>
00218 double BlockRelaxation<MatrixType,ContainerType>::getInitializeTime() const {
00219   return(InitializeTime_);
00220 }
00221 
00222 //==========================================================================
00223 template<class MatrixType,class ContainerType>
00224 double BlockRelaxation<MatrixType,ContainerType>::getComputeTime() const {
00225   return(ComputeTime_);
00226 }
00227 
00228 //==========================================================================
00229 template<class MatrixType,class ContainerType>
00230 double BlockRelaxation<MatrixType,ContainerType>::getApplyTime() const {
00231   return(ApplyTime_);
00232 }
00233 
00234 //==========================================================================
00235 template<class MatrixType,class ContainerType>
00236 double BlockRelaxation<MatrixType,ContainerType>::getComputeFlops() const {
00237   return(ComputeFlops_);
00238 }
00239 
00240 //==========================================================================
00241 template<class MatrixType,class ContainerType>
00242 double BlockRelaxation<MatrixType,ContainerType>::getApplyFlops() const {
00243   return ApplyFlops_;
00244 }
00245 
00246 //==========================================================================
00247 template<class MatrixType,class ContainerType>
00248 typename BlockRelaxation<MatrixType, ContainerType>::magnitude_type
00249 BlockRelaxation<MatrixType,ContainerType>::getCondEst () const
00250 {
00251   return Condest_;
00252 }
00253 
00254 //==========================================================================
00255 template<class MatrixType,class ContainerType>
00256 typename BlockRelaxation<MatrixType, ContainerType>::magnitude_type
00257 BlockRelaxation<MatrixType,ContainerType>::
00258 computeCondEst (CondestType CT,
00259                 typename MatrixType::local_ordinal_type MaxIters,
00260                 magnitude_type Tol,
00261                 const Teuchos::Ptr<const row_matrix_type>& matrix)
00262 {
00263   if (! isComputed ()) {// cannot compute right now
00264     return -STM::one ();
00265   }
00266 
00267   // always compute it. Call Condest() with no parameters to get
00268   // the previous estimate.
00269   Condest_ = Ifpack2::Condest (*this, CT, MaxIters, Tol, matrix);
00270 
00271   return Condest_;
00272 }
00273 
00274 //==========================================================================
00275 template<class MatrixType,class ContainerType>
00276 void
00277 BlockRelaxation<MatrixType,ContainerType>::
00278 apply (const Tpetra::MultiVector<typename MatrixType::scalar_type,
00279                                  typename MatrixType::local_ordinal_type,
00280                                  typename MatrixType::global_ordinal_type,
00281                                  typename MatrixType::node_type>& X,
00282        Tpetra::MultiVector<typename MatrixType::scalar_type,
00283                            typename MatrixType::local_ordinal_type,
00284                            typename MatrixType::global_ordinal_type,
00285                            typename MatrixType::node_type>& Y,
00286        Teuchos::ETransp mode,
00287        scalar_type alpha,
00288        scalar_type beta) const
00289 {
00290   TEUCHOS_TEST_FOR_EXCEPTION(
00291     ! isComputed (), std::runtime_error, "Ifpack2::BlockRelaxation::apply: "
00292     "isComputed() must be true prior to calling apply.");
00293   TEUCHOS_TEST_FOR_EXCEPTION(
00294     X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
00295     "Ifpack2::BlockRelaxation::apply: X.getNumVectors() = "
00296     << X.getNumVectors () << " != Y.getNumVectors() = "
00297     << Y.getNumVectors () << ".");
00298   TEUCHOS_TEST_FOR_EXCEPTION(
00299     mode != Teuchos::NO_TRANS, std::logic_error, "Ifpack2::BlockRelaxation::"
00300     "apply: This method currently only implements the case mode == Teuchos::"
00301     "NO_TRANS.  Transposed modes are not currently supported.");
00302   TEUCHOS_TEST_FOR_EXCEPTION(
00303     alpha != Teuchos::ScalarTraits<scalar_type>::one (), std::logic_error,
00304     "Ifpack2::BlockRelaxation::apply: This method currently only implements "
00305     "the case alpha == 1.  You specified alpha = " << alpha << ".");
00306   TEUCHOS_TEST_FOR_EXCEPTION(
00307     beta != Teuchos::ScalarTraits<scalar_type>::zero (), std::logic_error,
00308     "Ifpack2::BlockRelaxation::apply: This method currently only implements "
00309     "the case beta == 0.  You specified beta = " << beta << ".");
00310 
00311   Time_->start(true);
00312 
00313   // If X and Y are pointing to the same memory location,
00314   // we need to create an auxiliary vector, Xcopy
00315   Teuchos::RCP<const MV> X_copy;
00316   if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
00317     X_copy = Teuchos::rcp (new MV (createCopy(X)));
00318   } else {
00319     X_copy = Teuchos::rcpFromRef (X);
00320   }
00321 
00322   if (ZeroStartingSolution_) {
00323     Y.putScalar (STS::zero ());
00324   }
00325 
00326   // Flops are updated in each of the following.
00327   switch (PrecType_) {
00328   case Ifpack2::Details::JACOBI:
00329     ApplyInverseJacobi(*X_copy,Y);
00330     break;
00331   case Ifpack2::Details::GS:
00332     ApplyInverseGS(*X_copy,Y);
00333     break;
00334   case Ifpack2::Details::SGS:
00335     ApplyInverseSGS(*X_copy,Y);
00336     break;
00337   default:
00338     throw std::runtime_error("Ifpack2::BlockRelaxation::apply internal logic error.");
00339   }
00340 
00341   ++NumApply_;
00342   Time_->stop();
00343   ApplyTime_ += Time_->totalElapsedTime();
00344 }
00345 
00346 //==========================================================================
00347 template<class MatrixType,class ContainerType>
00348 void BlockRelaxation<MatrixType,ContainerType>::applyMat(
00349           const Tpetra::MultiVector<typename MatrixType::scalar_type,
00350                                     typename MatrixType::local_ordinal_type,
00351                                     typename MatrixType::global_ordinal_type,
00352                                     typename MatrixType::node_type>& X,
00353                 Tpetra::MultiVector<typename MatrixType::scalar_type,
00354                                     typename MatrixType::local_ordinal_type,
00355                                     typename MatrixType::global_ordinal_type,
00356                                     typename MatrixType::node_type>& Y,
00357              Teuchos::ETransp mode) const
00358 {
00359   TEUCHOS_TEST_FOR_EXCEPTION(isComputed() == false, std::runtime_error,
00360      "Ifpack2::BlockRelaxation::applyMat() ERROR: isComputed() must be true prior to calling applyMat().");
00361   TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
00362      "Ifpack2::BlockRelaxation::applyMat() ERROR: X.getNumVectors() != Y.getNumVectors().");
00363   A_->apply (X, Y, mode);
00364 }
00365 
00366 //==========================================================================
00367 template<class MatrixType,class ContainerType>
00368 void BlockRelaxation<MatrixType,ContainerType>::initialize() {
00369   using Teuchos::rcp;
00370   typedef Tpetra::RowGraph<local_ordinal_type, global_ordinal_type, node_type>
00371     row_graph_type;
00372 
00373   IsInitialized_ = false;
00374   Time_->start (true);
00375 
00376   NumMyRows_         = A_->getNodeNumRows ();
00377   NumGlobalRows_     = A_->getGlobalNumRows ();
00378   NumGlobalNonzeros_ = A_->getGlobalNumEntries ();
00379 
00380   // NTS: Will need to add support for Zoltan2 partitions later Also,
00381   // will need a better way of handling the Graph typing issue.
00382   // Especially with ordinal types w.r.t the container.
00383 
00384   if (PartitionerType_ == "linear") {
00385     Partitioner_ =
00386       rcp (new Ifpack2::LinearPartitioner<row_graph_type> (A_->getGraph ()));
00387   } else {
00388     // We should have checked for this in setParameters(), so it's a
00389     // logic_error, not an invalid_argument or runtime_error.
00390     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
00391       "Ifpack2::BlockRelaxation::initialize, invalid partitioner type.");
00392   }
00393 
00394   // need to partition the graph of A
00395   Partitioner_->setParameters (List_);
00396   Partitioner_->compute ();
00397 
00398   // get actual number of partitions
00399   NumLocalBlocks_ = Partitioner_->numLocalParts ();
00400 
00401   // Note: Unlike Ifpack, we'll punt on computing W_ until compute(), which is where
00402   // we assume that the type of relaxation has been chosen.
00403 
00404   if (A_->getComm()->getSize() != 1) {
00405     IsParallel_ = true;
00406   } else {
00407     IsParallel_ = false;
00408   }
00409 
00410   ++NumInitialize_;
00411   Time_->stop ();
00412   InitializeTime_ += Time_->totalElapsedTime ();
00413   IsInitialized_ = true;
00414 }
00415 
00416 //==========================================================================
00417 template<class MatrixType,class ContainerType>
00418 void BlockRelaxation<MatrixType,ContainerType>::compute()
00419 {
00420   using Teuchos::rcp;
00421   typedef Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> vector_type;
00422   typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type;
00423 
00424   // We should have checked for this in setParameters(), so it's a
00425   // logic_error, not an invalid_argument or runtime_error.
00426   TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error,
00427     "Ifpack2::BlockRelaxation::compute, NumSweeps_ must be >= 0");
00428 
00429   if (! isInitialized ()) {
00430     initialize ();
00431   }
00432   Time_->start (true);
00433 
00434   // reset values
00435   IsComputed_ = false;
00436   Condest_ = STS::real(-STS::one ());
00437 
00438   // Extract the submatrices
00439   ExtractSubmatrices ();
00440 
00441   // Compute the weight vector if we're doing overlapped Jacobi (and
00442   // only if we're doing overlapped Jacobi).
00443   if (PrecType_ == Ifpack2::Details::JACOBI && OverlapLevel_ > 0) {
00444     // weight of each vertex
00445     W_ = rcp (new vector_type (A_->getRowMap ()));
00446     W_->putScalar (STS::zero ());
00447     Teuchos::ArrayRCP<scalar_type > w_ptr = W_->getDataNonConst(0);
00448 
00449     for (local_ordinal_type i = 0 ; i < NumLocalBlocks_ ; ++i) {
00450       for (size_t j = 0 ; j < Partitioner_->numRowsInPart(i) ; ++j) {
00451         int LID = (*Partitioner_)(i,j);
00452         w_ptr[LID]+= STS::one();
00453       }
00454     }
00455     W_->reciprocal (*W_);
00456   }
00457 
00458   // We need to import data from external processors. Here I create a
00459   // Tpetra::Import object if needed (stealing from A_ if possible)
00460   // Marzio's comment:
00461   // Note that I am doing some strange stuff to set the components of Y
00462   // from Y2 (to save some time).
00463   //
00464   if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
00465                       PrecType_ == Ifpack2::Details::SGS)) {
00466     Importer_ = A_->getGraph ()->getImporter ();
00467     if (Importer_.is_null ()) {
00468       Importer_ = rcp (new import_type (A_->getDomainMap (), A_->getColMap ()));
00469     }
00470   }
00471 
00472   ++NumCompute_;
00473   Time_->stop ();
00474   ComputeTime_ += Time_->totalElapsedTime();
00475   IsComputed_ = true;
00476 }
00477 
00478 //==============================================================================
00479 template<class MatrixType,class ContainerType>
00480 void BlockRelaxation<MatrixType,ContainerType>::ExtractSubmatrices()
00481 {
00482   TEUCHOS_TEST_FOR_EXCEPTION(
00483     Partitioner_.is_null (), std::runtime_error,
00484     "Ifpack2::BlockRelaxation::ExtractSubmatrices: Partitioner object is null.");
00485 
00486   NumLocalBlocks_ = Partitioner_->numLocalParts ();
00487   Containers_.resize (NumLocalBlocks_);
00488 
00489   for (local_ordinal_type i = 0; i < NumLocalBlocks_; ++i) {
00490     const size_t numRows = Partitioner_->numRowsInPart (i);
00491 
00492     // Extract a list of the indices of each partitioner row.
00493     Teuchos::Array<local_ordinal_type> localRows (numRows);
00494     for (size_t j = 0; j < numRows; ++j) {
00495       localRows[j] = (*Partitioner_) (i,j);
00496     }
00497 
00498     Containers_[i] = Teuchos::rcp (new ContainerType (A_, localRows ()));
00499     Containers_[i]->setParameters (List_);
00500     Containers_[i]->initialize ();
00501     Containers_[i]->compute ();
00502   }
00503 }
00504 
00505 
00506 
00507 //==========================================================================
00508 template<class MatrixType,class ContainerType>
00509 void BlockRelaxation<MatrixType,ContainerType>::ApplyInverseJacobi (const MV& X, MV& Y) const
00510 {
00511   const size_t NumVectors = X.getNumVectors ();
00512   MV AY (Y.getMap (), NumVectors);
00513 
00514   // Initial matvec not needed
00515   int starting_iteration = 0;
00516   if (ZeroStartingSolution_) {
00517     DoJacobi(X,Y);
00518     starting_iteration = 1;
00519   }
00520 
00521   const scalar_type ONE = STS::one ();
00522   for (int j = starting_iteration; j < NumSweeps_; ++j) {
00523     applyMat (Y, AY);
00524     AY.update (ONE, X, -ONE);
00525     DoJacobi (AY, Y);
00526 
00527     // Flops for matrix apply & update
00528     ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_);
00529   }
00530 
00531 }
00532 
00533 
00534 //==========================================================================
00535 template<class MatrixType,class ContainerType>
00536 void BlockRelaxation<MatrixType,ContainerType>::DoJacobi(const MV& X, MV& Y) const
00537 {
00538   const size_t NumVectors = X.getNumVectors();
00539   const scalar_type one = STS::one();
00540   // Note: Flop counts copied naively from Ifpack.
00541 
00542   if (OverlapLevel_ == 0) {
00543     // Non-overlapping Jacobi
00544     for (local_ordinal_type i = 0; i < NumLocalBlocks_; ++i) {
00545       // may happen that a partition is empty
00546       if (Containers_[i]->getNumRows () == 0) {
00547         continue;
00548       }
00549       Containers_[i]->apply (X, Y, Teuchos::NO_TRANS, DampingFactor_, one);
00550       ApplyFlops_ += NumVectors * 2 * NumGlobalRows_;
00551     }
00552   }
00553   else {
00554     // Overlapping Jacobi
00555     for (local_ordinal_type i = 0 ; i < NumLocalBlocks_ ; i++) {
00556       // may happen that a partition is empty
00557       if (Containers_[i]->getNumRows() == 0) continue;
00558 
00559       try {
00560         Containers_[i]->weightedApply(X,Y,*W_,Teuchos::NO_TRANS,DampingFactor_,one);
00561       } catch (std::exception& e) {
00562         std::cerr << "BlockRelaxation::DoJacobi: Containers_[" << i
00563                   << "]->weightedApply() threw an exception: " << e.what ()
00564                   << std::endl;
00565         throw;
00566       }
00567       // NOTE: do not count (for simplicity) the flops due to overlapping rows
00568       ApplyFlops_ += NumVectors * 4 * NumGlobalRows_;
00569     }
00570   }
00571 }
00572 
00573 //==========================================================================
00574 template<class MatrixType,class ContainerType>
00575 void BlockRelaxation<MatrixType,ContainerType>::
00576 ApplyInverseGS (const MV& X, MV& Y) const
00577 {
00578   MV Xcopy (X);
00579   for (int j = 0; j < NumSweeps_ ; j++) {
00580     DoGaussSeidel (Xcopy, Y);
00581     if (j != NumSweeps_ - 1) {
00582       Xcopy = X;
00583     }
00584   }
00585 }
00586 
00587 
00588 //==============================================================================
00589 template<class MatrixType,class ContainerType>
00590 void BlockRelaxation<MatrixType,ContainerType>::
00591 DoGaussSeidel (MV& X, MV& Y) const
00592 {
00593   using Teuchos::Array;
00594   using Teuchos::ArrayRCP;
00595   using Teuchos::ArrayView;
00596   using Teuchos::RCP;
00597   using Teuchos::rcp;
00598   using Teuchos::rcpFromRef;
00599 
00600   // Note: Flop counts copied naively from Ifpack.
00601 
00602   const scalar_type    one =  STS::one ();
00603   int Length = A_->getNodeMaxNumRowEntries();
00604   const size_t NumVectors = X.getNumVectors();
00605   Array<scalar_type>         Values;
00606   Array<local_ordinal_type>   Indices;
00607   Values.resize(Length);
00608   Indices.resize(Length);
00609 
00610   // an additonal vector is needed by parallel computations
00611   // (note that applications through Ifpack2_AdditiveSchwarz
00612   // are always seen are serial)
00613   RCP<MV> Y2;
00614   if (IsParallel_) {
00615     Y2 = rcp (new MV (Importer_->getTargetMap(), NumVectors));
00616   } else {
00617     Y2 = rcpFromRef (Y);
00618   }
00619 
00620   // I think I decided I need two extra vectors:
00621   // One to store the sum of the corrections (initialized to zero)
00622   // One to store the temporary residual (doesn't matter if it is zeroed or not)
00623   // My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
00624   MV Residual(X.getMap(),NumVectors,false);
00625 
00626   ArrayRCP<ArrayRCP<scalar_type> >           x_ptr = X.get2dViewNonConst();
00627   ArrayRCP<ArrayRCP<scalar_type> >           y_ptr = Y.get2dViewNonConst();
00628   ArrayRCP<ArrayRCP<scalar_type> >          y2_ptr = Y2->get2dViewNonConst();
00629   ArrayRCP<ArrayRCP<scalar_type> >    residual_ptr = Residual.get2dViewNonConst();
00630 
00631   // data exchange is here, once per sweep
00632   if (IsParallel_)  Y2->doImport(Y,*Importer_,Tpetra::INSERT);
00633 
00634   for (local_ordinal_type i = 0; i < NumLocalBlocks_; ++i) {
00635     if (Containers_[i]->getNumRows () == 0) {
00636       continue; // Skip empty partitions
00637     }
00638 
00639     // update from previous block
00640     ArrayView<const local_ordinal_type> localRows =
00641       Containers_[i]->getLocalRows ();
00642     const size_t localNumRows = Containers_[i]->getNumRows ();
00643     for (size_t j = 0; j < localNumRows; ++j) {
00644       const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
00645       size_t NumEntries;
00646       A_->getLocalRowCopy (LID, Indices (), Values (), NumEntries);
00647 
00648       for (size_t m = 0; m < NumVectors; ++m) {
00649   ArrayView<const scalar_type> x_local = (x_ptr())[m]();
00650   ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
00651   ArrayView<scalar_type>       r_local = (residual_ptr())[m]();
00652 
00653   r_local[LID] = x_local[LID];  
00654   for (size_t k = 0; k < NumEntries; ++k) {
00655     const local_ordinal_type col = Indices[k];
00656     r_local[LID] -= Values[k] * y2_local[col];
00657   }
00658       }
00659     }
00660     // solve with this block
00661     //
00662     // Note: I'm abusing the ordering information, knowing that X/Y
00663     // and Y2 have the same ordering for on-proc unknowns.
00664     //
00665     // Note: Add flop counts for inverse apply
00666     Containers_[i]->apply (Residual, *Y2, Teuchos::NO_TRANS,
00667                            DampingFactor_,one);
00668 
00669     // operations for all getrow's
00670     ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_);
00671   } // end for NumLocalBlocks_
00672 
00673   // Attention: this is delicate... Not all combinations
00674   // of Y2 and Y will always work (tough for ML it should be ok)
00675   if (IsParallel_) {
00676     for (size_t m = 0; m < NumVectors; ++m) {
00677       ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
00678       ArrayView<scalar_type>      y_local  = (y_ptr())[m]();
00679       for (size_t i = 0; i < NumMyRows_; ++i) {
00680         y_local[i] = y2_local[i];
00681       }
00682     }
00683   }
00684 }
00685 
00686 //==========================================================================
00687 template<class MatrixType,class ContainerType>
00688 void
00689 BlockRelaxation<MatrixType,ContainerType>::
00690 ApplyInverseSGS (const MV& X, MV& Y) const
00691 {
00692   MV Xcopy (X);
00693   for (int j = 0; j < NumSweeps_; ++j) {
00694     DoSGS (Xcopy, Y);
00695     if (j != NumSweeps_ - 1) {
00696       Xcopy = X;
00697     }
00698   }
00699 }
00700 
00701 //==========================================================================
00702 template<class MatrixType,class ContainerType>
00703 void
00704 BlockRelaxation<MatrixType,ContainerType>::DoSGS (MV& X, MV& Y) const
00705 {
00706   using Teuchos::Array;
00707   using Teuchos::ArrayRCP;
00708   using Teuchos::ArrayView;
00709   using Teuchos::RCP;
00710   using Teuchos::rcp;
00711   using Teuchos::rcpFromRef;
00712 
00713   const scalar_type    one =  STS::one ();
00714   int Length = A_->getNodeMaxNumRowEntries();
00715   const size_t NumVectors = X.getNumVectors();
00716   Array<scalar_type>         Values;
00717   Array<local_ordinal_type>   Indices;
00718   Values.resize(Length);
00719   Indices.resize(Length);
00720 
00721   // an additonal vector is needed by parallel computations
00722   // (note that applications through Ifpack2_AdditiveSchwarz
00723   // are always seen are serial)
00724   RCP<MV> Y2;
00725   if (IsParallel_) {
00726     Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
00727   } else {
00728     Y2 = rcpFromRef (Y);
00729   }
00730 
00731   // I think I decided I need two extra vectors:
00732   // One to store the sum of the corrections (initialized to zero)
00733   // One to store the temporary residual (doesn't matter if it is zeroed or not)
00734   // My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
00735   MV Residual(X.getMap(),NumVectors,false);
00736 
00737   ArrayRCP<ArrayRCP<scalar_type> >     x_ptr       = X.get2dViewNonConst();
00738   ArrayRCP<ArrayRCP<scalar_type> >     y_ptr       = Y.get2dViewNonConst();
00739   ArrayRCP<ArrayRCP<scalar_type> >     y2_ptr      = Y2->get2dViewNonConst();
00740   ArrayRCP<ArrayRCP<scalar_type> >    residual_ptr = Residual.get2dViewNonConst();
00741 
00742   // data exchange is here, once per sweep
00743   if (IsParallel_) {
00744     Y2->doImport (Y, *Importer_, Tpetra::INSERT);
00745   }
00746 
00747   // Forward Sweep
00748   for (local_ordinal_type i = 0; i < NumLocalBlocks_; ++i) {
00749     if (Containers_[i]->getNumRows () == 0) {
00750       continue; // Skip empty partitions
00751     }
00752     // update from previous block
00753     ArrayView<const local_ordinal_type> localRows =
00754       Containers_[i]->getLocalRows ();
00755     for (size_t j = 0; j < Containers_[i]->getNumRows (); ++j) {
00756       const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
00757       size_t NumEntries;
00758       A_->getLocalRowCopy (LID, Indices (), Values (), NumEntries);
00759 
00760       //set tmpresid = initresid - A*correction
00761       for (size_t m = 0; m < NumVectors; ++m) {
00762   ArrayView<const scalar_type> x_local = (x_ptr())[m]();
00763   ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
00764   ArrayView<scalar_type>       r_local = (residual_ptr())[m]();
00765 
00766         r_local[LID] = x_local[LID];
00767   for (size_t k = 0 ; k < NumEntries ; k++) {
00768     local_ordinal_type col = Indices[k];
00769           r_local[LID] -= Values[k] * y2_local[col];
00770         }
00771       }
00772     }
00773     // solve with this block
00774     //
00775     // Note: I'm abusing the ordering information, knowing that X/Y
00776     // and Y2 have the same ordering for on-proc unknowns.
00777     //
00778     // Note: Add flop counts for inverse apply
00779     Containers_[i]->apply (Residual, *Y2, Teuchos::NO_TRANS,
00780                            DampingFactor_, one);
00781 
00782     // operations for all getrow's
00783     ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_);
00784   }// end forward sweep
00785 
00786   // Reverse Sweep
00787   //
00788   // mfh 12 July 2013: The unusual iteration bounds, and the use of
00789   // i-1 rather than i in the loop body, ensure correctness even if
00790   // local_ordinal_type is unsigned.  "i = NumLocalBlocks_-1; i >= 0;
00791   // i--" will loop forever if local_ordinal_type is unsigned, because
00792   // unsigned integers are (trivially) always nonnegative.
00793   for (local_ordinal_type i = NumLocalBlocks_; i > 0; --i) {
00794     if (Containers_[i-1]->getNumRows () == 0) {
00795       continue; // Skip empty partitions
00796     }
00797     // update from previous block
00798     ArrayView<const local_ordinal_type> localRows =
00799       Containers_[i-1]->getLocalRows ();
00800     for (size_t j = 0; j < Containers_[i-1]->getNumRows (); ++j) {
00801       const local_ordinal_type LID = localRows[j]; // Containers_[i-1]->ID (j);
00802       size_t NumEntries;
00803       A_->getLocalRowCopy (LID, Indices (), Values (), NumEntries);
00804 
00805       //set tmpresid = initresid - A*correction
00806       for (size_t m = 0; m < NumVectors; ++m) {
00807   ArrayView<const scalar_type> x_local = (x_ptr())[m]();
00808   ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
00809   ArrayView<scalar_type>       r_local = (residual_ptr())[m]();
00810 
00811         r_local [LID] = x_local[LID];
00812   for (size_t k = 0; k < NumEntries; ++k)  {
00813     local_ordinal_type col = Indices[k];       
00814           r_local[LID] -= Values[k] * y2_local[col];
00815   }
00816       }
00817     }
00818     
00819     // solve with this block
00820     //
00821     // Note: I'm abusing the ordering information, knowing that X/Y
00822     // and Y2 have the same ordering for on-proc unknowns.
00823     //
00824     // Note: Add flop counts for inverse apply
00825     Containers_[i-1]->apply (Residual, *Y2, Teuchos::NO_TRANS,
00826                            DampingFactor_, one);
00827 
00828     // operations for all getrow's
00829     ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_);
00830   } //end reverse sweep
00831 
00832   // Attention: this is delicate... Not all combinations
00833   // of Y2 and Y will always work (though for ML it should be ok)
00834   if (IsParallel_) {
00835     for (size_t m = 0; m < NumVectors; ++m) {
00836       ArrayView<scalar_type>       y_local = (y_ptr())[m]();
00837       ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
00838       for (size_t i = 0 ; i < NumMyRows_ ; ++i) {
00839         y_local[i] = y2_local[i];
00840       }
00841     }
00842   }
00843 }
00844 
00845 
00846 template<class MatrixType, class ContainerType>
00847 std::string BlockRelaxation<MatrixType,ContainerType>::description () const
00848 {
00849   std::ostringstream out;
00850 
00851   // Output is a valid YAML dictionary in flow style.  If you don't
00852   // like everything on a single line, you should call describe()
00853   // instead.
00854   out << "\"Ifpack2::BlockRelaxation\": {";
00855   if (this->getObjectLabel () != "") {
00856     out << "Label: \"" << this->getObjectLabel () << "\", ";
00857   }
00858   out << "Initialized: " << (isInitialized () ? "true" : "false") << ", ";
00859   out << "Computed: " << (isComputed () ? "true" : "false") << ", ";
00860 
00861   if (A_.is_null ()) {
00862     out << "Matrix: null";
00863   }
00864   else {
00865     out << "Matrix: not null"
00866         << ", Global matrix dimensions: ["
00867         << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]";
00868   }
00869 
00870   // It's useful to print this instance's relaxation method.  If you
00871   // want more info than that, call describe() instead.
00872   out << "\"relaxation: type\": ";
00873   if (PrecType_ == Ifpack2::Details::JACOBI) {
00874     out << "Block Jacobi";
00875   } else if (PrecType_ == Ifpack2::Details::GS) {
00876     out << "Block Gauss-Seidel";
00877   } else if (PrecType_ == Ifpack2::Details::SGS) {
00878     out << "Block Symmetric Gauss-Seidel";
00879   } else {
00880     out << "INVALID";
00881   }
00882 
00883   out << "}";
00884   return out.str();
00885 }
00886 
00887 
00888 template<class MatrixType,class ContainerType>
00889 void BlockRelaxation<MatrixType,ContainerType>::
00890 describe (Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
00891 {
00892   using std::endl;
00893   using std::setw;
00894   using Teuchos::TypeNameTraits;
00895   using Teuchos::VERB_DEFAULT;
00896   using Teuchos::VERB_NONE;
00897   using Teuchos::VERB_LOW;
00898   using Teuchos::VERB_MEDIUM;
00899   using Teuchos::VERB_HIGH;
00900   using Teuchos::VERB_EXTREME;
00901 
00902   Teuchos::EVerbosityLevel vl = verbLevel;
00903   if (vl == VERB_DEFAULT) vl = VERB_LOW;
00904   const int myImageID = A_->getComm()->getRank();
00905 
00906   // Convention requires that describe() begin with a tab.
00907   Teuchos::OSTab tab (out);
00908 
00909   //    none: print nothing
00910   //     low: print O(1) info from node 0
00911   //  medium:
00912   //    high:
00913   // extreme:
00914   if (vl != VERB_NONE && myImageID == 0) {
00915     out << "Ifpack2::BlockRelaxation<"
00916         << TypeNameTraits<MatrixType>::name () << ", "
00917         << TypeNameTraits<ContainerType>::name () << " >:";
00918     Teuchos::OSTab tab1 (out);
00919 
00920     if (this->getObjectLabel () != "") {
00921       out << "label: \"" << this->getObjectLabel () << "\"" << endl;
00922     }
00923     out << "initialized: " << (isInitialized () ? "true" : "false") << endl
00924         << "computed: " << (isComputed () ? "true" : "false") << endl;
00925 
00926     std::string precType;
00927     if (PrecType_ == Ifpack2::Details::JACOBI) {
00928       precType = "Block Jacobi";
00929     } else if (PrecType_ == Ifpack2::Details::GS) {
00930       precType = "Block Gauss-Seidel";
00931     } else if (PrecType_ == Ifpack2::Details::SGS) {
00932       precType = "Block Symmetric Gauss-Seidel";
00933     }
00934     out << "type: " << precType << endl
00935         << "global number of rows: " << A_->getGlobalNumRows () << endl
00936         << "global number of columns: " << A_->getGlobalNumCols () << endl
00937         << "number of sweeps: " << NumSweeps_ << endl
00938         << "damping factor: " << DampingFactor_ << endl
00939         << "backwards mode: "
00940         << ((PrecType_ == Ifpack2::Details::GS && DoBackwardGS_) ? "true" : "false")
00941         << endl
00942         << "zero starting solution: "
00943         << (ZeroStartingSolution_ ? "true" : "false") << endl
00944         << "condition number estimate: " << Condest_ << endl;
00945 
00946     out << "===============================================================================" << endl;
00947     out << "Phase           # calls    Total Time (s)     Total MFlops      MFlops/s       " << endl;
00948     out << "------------    -------    ---------------    ---------------   ---------------" << endl;
00949     out << setw(12) << "initialize()" << setw(5) << getNumInitialize() << "    " << setw(15) << getInitializeTime() << endl;
00950     out << setw(12) << "compute()" << setw(5) << getNumCompute()    << "    " << setw(15) << getComputeTime() << "    "
00951         << setw(15) << getComputeFlops() << "    "
00952         << setw(15) << (getComputeTime() != 0.0 ? getComputeFlops() / getComputeTime() * 1.0e-6 : 0.0) << endl;
00953     out << setw(12) << "apply()" << setw(5) << getNumApply()    << "    " << setw(15) << getApplyTime() << "    "
00954         << setw(15) << getApplyFlops() << "    "
00955         << setw(15) << (getApplyTime() != 0.0 ? getApplyFlops() / getApplyTime() * 1.0e-6 : 0.0) << endl;
00956     out << "===============================================================================" << endl;
00957     out << endl;
00958   }
00959 }
00960 
00961 }//namespace Ifpack2
00962 
00963 // For ETI
00964 #include "Ifpack2_SparseContainer.hpp"
00965 #include "Ifpack2_ILUT.hpp"
00966 
00967 #define IFPACK2_BLOCKRELAXATION_INSTANT(S,LO,GO,N) \
00968   template class Ifpack2::BlockRelaxation< \
00969     Tpetra::CrsMatrix<S, LO, GO, N>, \
00970     Ifpack2::SparseContainer< Tpetra::CrsMatrix<S, LO, GO, N>, \
00971                               Ifpack2::ILUT<Tpetra::CrsMatrix<S,LO,GO,N> > > >;
00972 
00973 #endif // IFPACK2_BLOCKRELAXATION_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends