Ifpack2 Templated Preconditioning Package Version 1.0
Ifpack2_Relaxation_def.hpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #ifndef IFPACK2_RELAXATION_DEF_HPP
00044 #define IFPACK2_RELAXATION_DEF_HPP
00045 
00046 #include "Ifpack2_Relaxation_decl.hpp"
00047 #include "Teuchos_StandardParameterEntryValidators.hpp"
00048 #include <Teuchos_TimeMonitor.hpp>
00049 #include <Tpetra_ConfigDefs.hpp>
00050 #include <Tpetra_CrsMatrix.hpp>
00051 #include <Tpetra_Experimental_BlockCrsMatrix.hpp>
00052 
00053 // mfh 28 Mar 2013: Uncomment out these three lines to compute
00054 // statistics on diagonal entries in compute().
00055 // #ifndef IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
00056 // #  define IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 1
00057 // #endif // IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
00058 
00059 namespace {
00060   // Validate that a given int is nonnegative.
00061   class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
00062   public:
00063     // Constructor (does nothing).
00064     NonnegativeIntValidator () {}
00065 
00066     // ParameterEntryValidator wants this method.
00067     Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
00068       return Teuchos::null;
00069     }
00070 
00071     // Actually validate the parameter's value.
00072     void
00073     validate (const Teuchos::ParameterEntry& entry,
00074               const std::string& paramName,
00075               const std::string& sublistName) const
00076     {
00077       using std::endl;
00078       Teuchos::any anyVal = entry.getAny (true);
00079       const std::string entryName = entry.getAny (false).typeName ();
00080 
00081       TEUCHOS_TEST_FOR_EXCEPTION(
00082         anyVal.type () != typeid (int),
00083         Teuchos::Exceptions::InvalidParameterType,
00084         "Parameter \"" << paramName << "\" in sublist \"" << sublistName
00085         << "\" has the wrong type." << endl << "Parameter: " << paramName
00086         << endl << "Type specified: " << entryName << endl
00087         << "Type required: int" << endl);
00088 
00089       const int val = Teuchos::any_cast<int> (anyVal);
00090       TEUCHOS_TEST_FOR_EXCEPTION(
00091         val < 0, Teuchos::Exceptions::InvalidParameterValue,
00092         "Parameter \"" << paramName << "\" in sublist \"" << sublistName
00093         << "\" is negative." << endl << "Parameter: " << paramName
00094         << endl << "Value specified: " << val << endl
00095         << "Required range: [0, INT_MAX]" << endl);
00096     }
00097 
00098     // ParameterEntryValidator wants this method.
00099     const std::string getXMLTypeName () const {
00100       return "NonnegativeIntValidator";
00101     }
00102 
00103     // ParameterEntryValidator wants this method.
00104     void
00105     printDoc (const std::string& docString,
00106               std::ostream &out) const
00107     {
00108       Teuchos::StrUtils::printLines (out, "# ", docString);
00109       out << "#\tValidator Used: " << std::endl;
00110       out << "#\t\tNonnegativeIntValidator" << std::endl;
00111     }
00112   };
00113 
00114   // A way to get a small positive number (eps() for floating-point
00115   // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
00116   // define it (for example, for integer values).
00117   template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
00118   class SmallTraits {
00119   public:
00120     // Return eps if Scalar is a floating-point type, else return 1.
00121     static const Scalar eps ();
00122   };
00123 
00124   // Partial specialization for when Scalar is not a floating-point type.
00125   template<class Scalar>
00126   class SmallTraits<Scalar, true> {
00127   public:
00128     static const Scalar eps () {
00129       return Teuchos::ScalarTraits<Scalar>::one ();
00130     }
00131   };
00132 
00133   // Partial specialization for when Scalar is a floating-point type.
00134   template<class Scalar>
00135   class SmallTraits<Scalar, false> {
00136   public:
00137     static const Scalar eps () {
00138       return Teuchos::ScalarTraits<Scalar>::eps ();
00139     }
00140   };
00141 } // namespace (anonymous)
00142 
00143 namespace Ifpack2 {
00144 
00145 template<class MatrixType>
00146 void Relaxation<MatrixType>::
00147 setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
00148 {
00149   if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
00150     Importer_ = Teuchos::null;
00151     Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
00152     isInitialized_ = false;
00153     IsComputed_ = false;
00154     diagOffsets_ = Teuchos::null;
00155     savedDiagOffsets_ = false;
00156     hasBlockCrsMatrix_ = false;
00157     if (! A.is_null ()) {
00158       IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
00159     }
00160     A_ = A;
00161   }
00162 }
00163 
00164 
00165 template<class MatrixType>
00166 Relaxation<MatrixType>::
00167 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
00168 : A_ (A),
00169   Time_ (Teuchos::rcp (new Teuchos::Time ("Ifpack2::Relaxation"))),
00170   NumSweeps_ (1),
00171   PrecType_ (Ifpack2::Details::JACOBI),
00172   DampingFactor_ (STS::one ()),
00173   IsParallel_ (A.is_null () ? false : A->getRowMap ()->getComm ()->getSize () > 1), // pick a reasonable default if the matrix is null.  It doesn't really matter, because the methods that care can't use a null matrix anyway.
00174   ZeroStartingSolution_ (true),
00175   DoBackwardGS_ (false),
00176   DoL1Method_ (false),
00177   L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
00178   MinDiagonalValue_ (STS::zero ()),
00179   fixTinyDiagEntries_ (false),
00180   checkDiagEntries_ (false),
00181   Condest_ (-STM::one ()),
00182   isInitialized_ (false),
00183   IsComputed_ (false),
00184   NumInitialize_ (0),
00185   NumCompute_ (0),
00186   NumApply_ (0),
00187   InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
00188   ComputeTime_ (0.0),
00189   ApplyTime_ (0.0),
00190   ComputeFlops_ (0.0),
00191   ApplyFlops_ (0.0),
00192   globalMinMagDiagEntryMag_ (STM::zero ()),
00193   globalMaxMagDiagEntryMag_ (STM::zero ()),
00194   globalNumSmallDiagEntries_ (0),
00195   globalNumZeroDiagEntries_ (0),
00196   globalNumNegDiagEntries_ (0),
00197   globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
00198   savedDiagOffsets_ (false),
00199   hasBlockCrsMatrix_ (false)
00200 {
00201   this->setObjectLabel ("Ifpack2::Relaxation");
00202 }
00203 
00204 //==========================================================================
00205 template<class MatrixType>
00206 Relaxation<MatrixType>::~Relaxation() {
00207 }
00208 
00209 template<class MatrixType>
00210 Teuchos::RCP<const Teuchos::ParameterList>
00211 Relaxation<MatrixType>::getValidParameters () const
00212 {
00213   using Teuchos::Array;
00214   using Teuchos::ParameterList;
00215   using Teuchos::parameterList;
00216   using Teuchos::RCP;
00217   using Teuchos::rcp;
00218   using Teuchos::rcp_const_cast;
00219   using Teuchos::rcp_implicit_cast;
00220   using Teuchos::setStringToIntegralParameter;
00221   typedef Teuchos::ParameterEntryValidator PEV;
00222 
00223   if (validParams_.is_null ()) {
00224     RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
00225 
00226     // Set a validator that automatically converts from the valid
00227     // string options to their enum values.
00228     Array<std::string> precTypes (3);
00229     precTypes[0] = "Jacobi";
00230     precTypes[1] = "Gauss-Seidel";
00231     precTypes[2] = "Symmetric Gauss-Seidel";
00232     Array<Details::RelaxationType> precTypeEnums (3);
00233     precTypeEnums[0] = Details::JACOBI;
00234     precTypeEnums[1] = Details::GS;
00235     precTypeEnums[2] = Details::SGS;
00236     const std::string defaultPrecType ("Jacobi");
00237     setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
00238       defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
00239       pl.getRawPtr ());
00240 
00241     const int numSweeps = 1;
00242     RCP<PEV> numSweepsValidator =
00243       rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
00244     pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
00245              rcp_const_cast<const PEV> (numSweepsValidator));
00246 
00247     const scalar_type dampingFactor = STS::one ();
00248     pl->set ("relaxation: damping factor", dampingFactor);
00249 
00250     const bool zeroStartingSolution = true;
00251     pl->set ("relaxation: zero starting solution", zeroStartingSolution);
00252 
00253     const bool doBackwardGS = false;
00254     pl->set ("relaxation: backward mode", doBackwardGS);
00255 
00256     const bool doL1Method = false;
00257     pl->set ("relaxation: use l1", doL1Method);
00258 
00259     const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
00260       (STM::one() + STM::one()); // 1.5
00261     pl->set ("relaxation: l1 eta", l1eta);
00262 
00263     const scalar_type minDiagonalValue = STS::zero ();
00264     pl->set ("relaxation: min diagonal value", minDiagonalValue);
00265 
00266     const bool fixTinyDiagEntries = false;
00267     pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
00268 
00269     const bool checkDiagEntries = false;
00270     pl->set ("relaxation: check diagonal entries", checkDiagEntries);
00271 
00272     Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
00273     pl->set("relaxation: local smoothing indices", localSmoothingIndices);
00274 
00275     validParams_ = rcp_const_cast<const ParameterList> (pl);
00276   }
00277   return validParams_;
00278 }
00279 
00280 
00281 template<class MatrixType>
00282 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
00283 {
00284   using Teuchos::getIntegralValue;
00285   using Teuchos::ParameterList;
00286   using Teuchos::RCP;
00287   typedef scalar_type ST; // just to make code below shorter
00288 
00289   pl.validateParametersAndSetDefaults (* getValidParameters ());
00290 
00291   const Details::RelaxationType precType =
00292     getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
00293   const int numSweeps = pl.get<int> ("relaxation: sweeps");
00294   const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
00295   const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
00296   const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
00297   const bool doL1Method = pl.get<bool> ("relaxation: use l1");
00298   const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
00299   const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
00300   const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
00301   const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
00302   Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
00303 
00304 
00305   // "Commit" the changes, now that we've validated everything.
00306   PrecType_              = precType;
00307   NumSweeps_             = numSweeps;
00308   DampingFactor_         = dampingFactor;
00309   ZeroStartingSolution_  = zeroStartSol;
00310   DoBackwardGS_          = doBackwardGS;
00311   DoL1Method_            = doL1Method;
00312   L1Eta_                 = l1Eta;
00313   MinDiagonalValue_      = minDiagonalValue;
00314   fixTinyDiagEntries_    = fixTinyDiagEntries;
00315   checkDiagEntries_      = checkDiagEntries;
00316   localSmoothingIndices_ = localSmoothingIndices;
00317 }
00318 
00319 
00320 template<class MatrixType>
00321 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
00322 {
00323   // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
00324   // but otherwise, we will get [unused] in pl
00325   this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
00326 }
00327 
00328 
00329 template<class MatrixType>
00330 Teuchos::RCP<const Teuchos::Comm<int> >
00331 Relaxation<MatrixType>::getComm() const {
00332   TEUCHOS_TEST_FOR_EXCEPTION(
00333     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
00334     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00335     "input matrix before calling this method.");
00336   return A_->getRowMap ()->getComm ();
00337 }
00338 
00339 
00340 template<class MatrixType>
00341 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
00342 Relaxation<MatrixType>::getMatrix() const {
00343   return A_;
00344 }
00345 
00346 
00347 template<class MatrixType>
00348 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00349                                typename MatrixType::global_ordinal_type,
00350                                typename MatrixType::node_type> >
00351 Relaxation<MatrixType>::getDomainMap () const {
00352   TEUCHOS_TEST_FOR_EXCEPTION(
00353     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
00354     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00355     "input matrix before calling this method.");
00356   return A_->getDomainMap ();
00357 }
00358 
00359 
00360 template<class MatrixType>
00361 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00362                                typename MatrixType::global_ordinal_type,
00363                                typename MatrixType::node_type> >
00364 Relaxation<MatrixType>::getRangeMap () const {
00365   TEUCHOS_TEST_FOR_EXCEPTION(
00366     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
00367     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00368     "input matrix before calling this method.");
00369   return A_->getRangeMap ();
00370 }
00371 
00372 //==========================================================================
00373 template<class MatrixType>
00374 bool Relaxation<MatrixType>::hasTransposeApply () const {
00375   return true;
00376 }
00377 
00378 //==========================================================================
00379 template<class MatrixType>
00380 int Relaxation<MatrixType>::getNumInitialize() const {
00381   return(NumInitialize_);
00382 }
00383 
00384 //==========================================================================
00385 template<class MatrixType>
00386 int Relaxation<MatrixType>::getNumCompute() const {
00387   return(NumCompute_);
00388 }
00389 
00390 //==========================================================================
00391 template<class MatrixType>
00392 int Relaxation<MatrixType>::getNumApply() const {
00393   return(NumApply_);
00394 }
00395 
00396 //==========================================================================
00397 template<class MatrixType>
00398 double Relaxation<MatrixType>::getInitializeTime() const {
00399   return(InitializeTime_);
00400 }
00401 
00402 //==========================================================================
00403 template<class MatrixType>
00404 double Relaxation<MatrixType>::getComputeTime() const {
00405   return(ComputeTime_);
00406 }
00407 
00408 //==========================================================================
00409 template<class MatrixType>
00410 double Relaxation<MatrixType>::getApplyTime() const {
00411   return(ApplyTime_);
00412 }
00413 
00414 //==========================================================================
00415 template<class MatrixType>
00416 double Relaxation<MatrixType>::getComputeFlops() const {
00417   return(ComputeFlops_);
00418 }
00419 
00420 //==========================================================================
00421 template<class MatrixType>
00422 double Relaxation<MatrixType>::getApplyFlops() const {
00423   return(ApplyFlops_);
00424 }
00425 
00426 //==========================================================================
00427 template<class MatrixType>
00428 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType
00429 Relaxation<MatrixType>::getCondEst () const
00430 {
00431   return Condest_;
00432 }
00433 
00434 
00435 template<class MatrixType>
00436 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType
00437 Relaxation<MatrixType>::
00438 computeCondEst (CondestType CT,
00439                 typename MatrixType::local_ordinal_type MaxIters,
00440                 magnitude_type Tol,
00441                 const Teuchos::Ptr<const row_matrix_type>& matrix)
00442 {
00443   if (! isComputed ()) { // cannot compute right now
00444     return -Teuchos::ScalarTraits<magnitude_type>::one ();
00445   }
00446   // always compute it. Call Condest() with no parameters to get
00447   // the previous estimate.
00448   Condest_ = Ifpack2::Condest (*this, CT, MaxIters, Tol, matrix);
00449   return Condest_;
00450 }
00451 
00452 
00453 template<class MatrixType>
00454 void
00455 Relaxation<MatrixType>::
00456 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
00457        Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
00458        Teuchos::ETransp mode,
00459        scalar_type alpha,
00460        scalar_type beta) const
00461 {
00462   using Teuchos::as;
00463   using Teuchos::RCP;
00464   using Teuchos::rcp;
00465   using Teuchos::rcpFromRef;
00466   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
00467                               global_ordinal_type, node_type> MV;
00468   TEUCHOS_TEST_FOR_EXCEPTION(
00469     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
00470     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00471     "input matrix, then call compute(), before calling this method.");
00472   TEUCHOS_TEST_FOR_EXCEPTION(
00473     ! isComputed (),
00474     std::runtime_error,
00475     "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
00476     "preconditioner instance before you may call apply().  You may call "
00477     "isComputed() to find out if compute() has been called already.");
00478   TEUCHOS_TEST_FOR_EXCEPTION(
00479     X.getNumVectors() != Y.getNumVectors(),
00480     std::runtime_error,
00481     "Ifpack2::Relaxation::apply: X and Y have different numbers of columns.  "
00482     "X has " << X.getNumVectors() << " columns, but Y has "
00483     << Y.getNumVectors() << " columns.");
00484   TEUCHOS_TEST_FOR_EXCEPTION(
00485     beta != STS::zero (), std::logic_error,
00486     "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
00487     "implemented.");
00488   {
00489     // Reset the timer each time, since Relaxation uses the same Time
00490     // object to track times for different methods.
00491     Teuchos::TimeMonitor timeMon (*Time_, true);
00492 
00493     // Special case: alpha == 0.
00494     if (alpha == STS::zero ()) {
00495       // No floating-point operations, so no need to update a count.
00496       Y.putScalar (STS::zero ());
00497     }
00498     else {
00499       // If X and Y are pointing to the same memory location,
00500       // we need to create an auxiliary vector, Xcopy
00501       RCP<const MV> Xcopy;
00502       if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
00503         Xcopy = rcp (new MV(Tpetra::createCopy(X)));
00504       }
00505       else {
00506         Xcopy = rcpFromRef (X);
00507       }
00508 
00509       // Each of the following methods updates the flop count itself.
00510       // All implementations handle zeroing out the starting solution
00511       // (if necessary) themselves.
00512       switch (PrecType_) {
00513       case Ifpack2::Details::JACOBI:
00514         ApplyInverseJacobi(*Xcopy,Y);
00515         break;
00516       case Ifpack2::Details::GS:
00517         ApplyInverseGS(*Xcopy,Y);
00518         break;
00519       case Ifpack2::Details::SGS:
00520         ApplyInverseSGS(*Xcopy,Y);
00521         break;
00522       default:
00523         TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
00524           "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
00525           << PrecType_ << ".  Please report this bug to the Ifpack2 developers.");
00526       }
00527       if (alpha != STS::one ()) {
00528         Y.scale (alpha);
00529         const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
00530         const double numVectors = as<double> (Y.getNumVectors ());
00531         ApplyFlops_ += numGlobalRows * numVectors;
00532       }
00533     }
00534   }
00535   ApplyTime_ += Time_->totalElapsedTime ();
00536   ++NumApply_;
00537 }
00538 
00539 
00540 template<class MatrixType>
00541 void
00542 Relaxation<MatrixType>::
00543 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
00544           Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
00545           Teuchos::ETransp mode) const
00546 {
00547   TEUCHOS_TEST_FOR_EXCEPTION(
00548     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
00549     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00550     "input matrix, then call compute(), before calling this method.");
00551   TEUCHOS_TEST_FOR_EXCEPTION(
00552     ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
00553     "isComputed() must be true before you may call applyMat().  "
00554     "Please call compute() before calling this method.");
00555   TEUCHOS_TEST_FOR_EXCEPTION(
00556     X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
00557     "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
00558     << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
00559   A_->apply (X, Y, mode);
00560 }
00561 
00562 
00563 template<class MatrixType>
00564 void Relaxation<MatrixType>::initialize () {
00565   TEUCHOS_TEST_FOR_EXCEPTION(
00566     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: "
00567     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00568     "input matrix before calling this method.");
00569 
00570   // Initialization for Relaxation is trivial, so we say it takes zero time.
00571   //InitializeTime_ += Time_->totalElapsedTime ();
00572 
00573   ++NumInitialize_;
00574   isInitialized_ = true;
00575 }
00576 
00577 template<class MatrixType>
00578 void Relaxation<MatrixType>::computeBlockCrs ()
00579 {
00580   using Teuchos::Array;
00581   using Teuchos::ArrayRCP;
00582   using Teuchos::ArrayView;
00583   using Teuchos::as;
00584   using Teuchos::Comm;
00585   using Teuchos::RCP;
00586   using Teuchos::rcp;
00587   using Teuchos::REDUCE_MAX;
00588   using Teuchos::REDUCE_MIN;
00589   using Teuchos::REDUCE_SUM;
00590   using Teuchos::rcp_dynamic_cast;
00591   using Teuchos::reduceAll;
00592 
00593   {
00594     // Reset the timer each time, since Relaxation uses the same Time
00595     // object to track times for different methods.
00596     Teuchos::TimeMonitor timeMon (*Time_, true);
00597 
00598     TEUCHOS_TEST_FOR_EXCEPTION(
00599       A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
00600       "computeBlockCrs: The input matrix A is null.  Please call setMatrix() "
00601       "with a nonnull input matrix, then call initialize(), before calling "
00602       "this method.");
00603     const block_crs_matrix_type* blockCrsA =
00604       dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
00605     TEUCHOS_TEST_FOR_EXCEPTION(
00606       blockCrsA == NULL, std::logic_error, "Ifpack2::Relaxation::"
00607       "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
00608       "got this far.  Please report this bug to the Ifpack2 developers.");
00609 
00610     const scalar_type zero = STS::zero ();
00611     const scalar_type one = STS::one ();
00612 
00613     // Reset state.
00614     IsComputed_ = false;
00615     Condest_ = -STM::one ();
00616 
00617     const local_ordinal_type blockSize = blockCrsA->getBlockSize ();
00618 
00619     if (! savedDiagOffsets_) {
00620       BlockDiagonal_ = Teuchos::null;
00621       BlockDiagonal_ =
00622         rcp (new block_crs_matrix_type (* (blockCrsA->getDiagonalGraph ()),
00623                                         blockSize));
00624       blockCrsA->getLocalDiagOffsets (diagOffsets_);
00625       savedDiagOffsets_ = true;
00626     }
00627     blockCrsA->getLocalDiagCopy (*BlockDiagonal_, diagOffsets_ ());
00628 
00629     const size_t numMyRows = A_->getNodeNumRows ();
00630 
00631     if (DoL1Method_ && IsParallel_) {
00632       const scalar_type two = one + one;
00633       const size_t maxLength = A_->getNodeMaxNumRowEntries ();
00634       Array<local_ordinal_type> indices (maxLength);
00635       Array<scalar_type> values (maxLength * blockSize * blockSize);
00636       size_t numEntries = 0;
00637 
00638       for (size_t i = 0; i < numMyRows; ++i) {
00639         blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
00640         scalar_type* diagBlock = BlockDiagonal_->getLocalBlock (i,i).getRawPtr ();
00641         for (local_ordinal_type subRow = 0; subRow < blockSize; ++subRow) {
00642           magnitude_type diagonal_boost = STM::zero ();
00643           for (size_t k = 0 ; k < numEntries ; ++k) {
00644             if (static_cast<size_t> (indices[k]) > numMyRows) {
00645               const size_t offset = blockSize*blockSize*k + subRow*blockSize;
00646               for (local_ordinal_type subCol = 0; subCol < blockSize; ++subCol) {
00647                 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
00648               }
00649             }
00650           }
00651           if (STS::magnitude (diagBlock[subRow*(blockSize+1)]) < L1Eta_ * diagonal_boost) {
00652             diagBlock[subRow*(blockSize+1)] += diagonal_boost;
00653           }
00654         }
00655       }
00656     }
00657 
00658     blockDiagonalFactorizationPivots.resize (numMyRows * blockSize);
00659     int info;
00660     for (size_t i = 0 ; i < numMyRows; ++i) {
00661       typename block_crs_matrix_type::little_block_type diagBlock =
00662         BlockDiagonal_->getLocalBlock (i, i);
00663       diagBlock.factorize (&blockDiagonalFactorizationPivots[i*blockSize], info);
00664     }
00665 
00666     Importer_ = A_->getGraph ()->getImporter ();
00667   } // end TimeMonitor scope
00668 
00669   ComputeTime_ += Time_->totalElapsedTime ();
00670   ++NumCompute_;
00671   IsComputed_ = true;
00672 }
00673 
00674 template<class MatrixType>
00675 void Relaxation<MatrixType>::compute ()
00676 {
00677   using Teuchos::Array;
00678   using Teuchos::ArrayRCP;
00679   using Teuchos::ArrayView;
00680   using Teuchos::as;
00681   using Teuchos::Comm;
00682   using Teuchos::RCP;
00683   using Teuchos::rcp;
00684   using Teuchos::REDUCE_MAX;
00685   using Teuchos::REDUCE_MIN;
00686   using Teuchos::REDUCE_SUM;
00687   using Teuchos::rcp_dynamic_cast;
00688   using Teuchos::reduceAll;
00689   typedef Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> vector_type;
00690   const scalar_type zero = STS::zero ();
00691   const scalar_type one = STS::one ();
00692 
00693   // We don't count initialization in compute() time.
00694   if (! isInitialized ()) {
00695     initialize ();
00696   }
00697 
00698   const block_crs_matrix_type* blockCrsA = dynamic_cast<const block_crs_matrix_type*>(A_.getRawPtr());
00699   if (blockCrsA != NULL)
00700   {
00701     hasBlockCrsMatrix_ = true;
00702     computeBlockCrs();
00703     return;
00704   }
00705 
00706   {
00707     // Reset the timer each time, since Relaxation uses the same Time
00708     // object to track times for different methods.
00709     Teuchos::TimeMonitor timeMon (*Time_, true);
00710 
00711     TEUCHOS_TEST_FOR_EXCEPTION(
00712       A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: "
00713       "The input matrix A is null.  Please call setMatrix() with a nonnull "
00714       "input matrix, then call initialize(), before calling this method.");
00715 
00716 
00717     // Reset state.
00718     IsComputed_ = false;
00719     Condest_ = -STM::one ();
00720 
00721     TEUCHOS_TEST_FOR_EXCEPTION(
00722       NumSweeps_ < 0, std::logic_error,
00723       "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0.  "
00724       "Please report this bug to the Ifpack2 developers.");
00725 
00726     Diagonal_ = rcp (new vector_type (A_->getRowMap ()));
00727 
00728     TEUCHOS_TEST_FOR_EXCEPTION(
00729       Diagonal_.is_null (), std::logic_error,
00730       "Ifpack2::Relaxation::compute: Vector of diagonal entries has not been "
00731       "created yet.  Please report this bug to the Ifpack2 developers.");
00732 
00733     // Extract the diagonal entries.  The CrsMatrix static graph
00734     // version is faster for subsequent calls to compute(), since it
00735     // caches the diagonal offsets.
00736     //
00737     // isStaticGraph() == true means that the matrix was created with
00738     // a const graph.  The only requirement is that the structure of
00739     // the matrix never changes, so isStaticGraph() == true is a bit
00740     // more conservative than we need.  However, Tpetra doesn't (as of
00741     // 05 Apr 2013) have a way to tell if the graph hasn't changed
00742     // since the last time we used it.
00743     {
00744       // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
00745       // instead of MatrixType, because isStaticGraph is a CrsMatrix
00746       // method (not inherited from RowMatrix's interface).  It's
00747       // perfectly valid to do relaxation on a RowMatrix which is not
00748       // a CrsMatrix.
00749       //
00750       // This cast isn't ideal because it won't catch CrsMatrix
00751       // specializations with nondefault LocalMatOps (fifth) template
00752       // parameter.  The code will still be correct if the cast fails,
00753       // but it won't pick up the "cached offsets" optimization.
00754       const crs_matrix_type* crsMat =
00755         dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
00756       if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
00757         A_->getLocalDiagCopy (*Diagonal_); // slow path
00758       } else {
00759         if (! savedDiagOffsets_) { // we haven't precomputed offsets
00760           crsMat->getLocalDiagOffsets (diagOffsets_);
00761           savedDiagOffsets_ = true;
00762         }
00763         crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_ ());
00764 #ifdef HAVE_TPETRA_DEBUG
00765         // Validate the fast-path diagonal against the slow-path diagonal.
00766         vector_type D_copy (A_->getRowMap ());
00767         A_->getLocalDiagCopy (D_copy);
00768         D_copy.update (STS::one (), *Diagonal_, -STS::one ());
00769         const magnitude_type err = D_copy.normInf ();
00770         // The two diagonals should be exactly the same, so their
00771         // difference should be exactly zero.
00772         TEUCHOS_TEST_FOR_EXCEPTION(
00773           err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: "
00774           "\"fast-path\" diagonal computation failed.  \\|D1 - D2\\|_inf = "
00775           << err << ".");
00776 #endif // HAVE_TPETRA_DEBUG
00777       }
00778     }
00779 
00780     // If we're checking the computed inverse diagonal, then keep a
00781     // copy of the original diagonal entries for later comparison.
00782     RCP<vector_type> origDiag;
00783     if (checkDiagEntries_) {
00784       origDiag = rcp (new vector_type (A_->getRowMap ()));
00785       *origDiag = *Diagonal_;
00786     }
00787 
00788     // "Host view" means that if the Node type is a GPU Node, the
00789     // ArrayRCP points to host memory, not device memory.  It will
00790     // write back to device memory (Diagonal_) at end of scope.
00791     ArrayRCP<scalar_type> diagHostView = Diagonal_->get1dViewNonConst ();
00792 
00793     // The view below is only valid as long as diagHostView is within
00794     // scope.  We extract a raw pointer in release mode because we
00795     // don't trust older versions of compilers (like GCC 4.4.x or
00796     // Intel < 13) to optimize away ArrayView::operator[].
00797 #ifdef HAVE_IFPACK2_DEBUG
00798     ArrayView<scalar_type> diag = diagHostView ();
00799 #else
00800     scalar_type* KOKKOSCLASSIC_RESTRICT const diag = diagHostView.getRawPtr ();
00801 #endif // HAVE_IFPACK2_DEBUG
00802 
00803     const size_t numMyRows = A_->getNodeNumRows ();
00804 
00805     // Setup for L1 Methods.
00806     // Here we add half the value of the off-processor entries in the row,
00807     // but only if diagonal isn't sufficiently large.
00808     //
00809     // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
00810     // Yang.  "Multigrid Smoothers for Ultraparallel Computing."  SIAM
00811     // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
00812     if (DoL1Method_ && IsParallel_) {
00813       const scalar_type two = one + one;
00814       const size_t maxLength = A_->getNodeMaxNumRowEntries ();
00815       Array<local_ordinal_type> indices (maxLength);
00816       Array<scalar_type> values (maxLength);
00817       size_t numEntries;
00818 
00819       for (size_t i = 0; i < numMyRows; ++i) {
00820         A_->getLocalRowCopy (i, indices (), values (), numEntries);
00821         magnitude_type diagonal_boost = STM::zero ();
00822         for (size_t k = 0 ; k < numEntries ; ++k) {
00823           if (static_cast<size_t> (indices[k]) > numMyRows) {
00824             diagonal_boost += STS::magnitude (values[k] / two);
00825           }
00826         }
00827         if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
00828           diag[i] += diagonal_boost;
00829         }
00830       }
00831 
00832     }
00833 
00834     //
00835     // Invert the diagonal entries of the matrix (not in place).
00836     //
00837 
00838     // Precompute some quantities for "fixing" zero or tiny diagonal
00839     // entries.  We'll only use them if this "fixing" is enabled.
00840     //
00841     // SmallTraits covers for the lack of eps() in
00842     // Teuchos::ScalarTraits when its template parameter is not a
00843     // floating-point type.  (Ifpack2 sometimes gets instantiated for
00844     // integer Scalar types.)
00845     const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
00846       one / SmallTraits<scalar_type>::eps () :
00847       one / MinDiagonalValue_;
00848     // It's helpful not to have to recompute this magnitude each time.
00849     const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
00850 
00851     if (checkDiagEntries_) {
00852       //
00853       // Check diagonal elements, replace zero elements with the minimum
00854       // diagonal value, and store their inverses.  Count the number of
00855       // "small" and zero diagonal entries while we're at it.
00856       //
00857       size_t numSmallDiagEntries = 0; // "small" includes zero
00858       size_t numZeroDiagEntries = 0; // # zero diagonal entries
00859       size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
00860 
00861       // As we go, keep track of the diagonal entries with the least and
00862       // greatest magnitude.  We could use the trick of starting the min
00863       // with +Inf and the max with -Inf, but that doesn't work if
00864       // scalar_type is a built-in integer type.  Thus, we have to start
00865       // by reading the first diagonal entry redundantly.
00866       // scalar_type minMagDiagEntry = zero;
00867       // scalar_type maxMagDiagEntry = zero;
00868       magnitude_type minMagDiagEntryMag = STM::zero ();
00869       magnitude_type maxMagDiagEntryMag = STM::zero ();
00870       if (numMyRows > 0) {
00871         const scalar_type d_0 = diag[0];
00872         const magnitude_type d_0_mag = STS::magnitude (d_0);
00873         // minMagDiagEntry = d_0;
00874         // maxMagDiagEntry = d_0;
00875         minMagDiagEntryMag = d_0_mag;
00876         maxMagDiagEntryMag = d_0_mag;
00877       }
00878 
00879       // Go through all the diagonal entries.  Compute counts of
00880       // small-magnitude, zero, and negative-real-part entries.  Invert
00881       // the diagonal entries that aren't too small.  For those that are
00882       // too small in magnitude, replace them with 1/MinDiagonalValue_
00883       // (or 1/eps if MinDiagonalValue_ happens to be zero).
00884       for (size_t i = 0 ; i < numMyRows; ++i) {
00885         const scalar_type d_i = diag[i];
00886         const magnitude_type d_i_mag = STS::magnitude (d_i);
00887         const magnitude_type d_i_real = STS::real (d_i);
00888 
00889         // We can't compare complex numbers, but we can compare their
00890         // real parts.
00891         if (d_i_real < STM::zero ()) {
00892           ++numNegDiagEntries;
00893         }
00894         if (d_i_mag < minMagDiagEntryMag) {
00895           // minMagDiagEntry = d_i;
00896           minMagDiagEntryMag = d_i_mag;
00897         }
00898         if (d_i_mag > maxMagDiagEntryMag) {
00899           // maxMagDiagEntry = d_i;
00900           maxMagDiagEntryMag = d_i_mag;
00901         }
00902 
00903         if (fixTinyDiagEntries_) {
00904           // <= not <, in case minDiagValMag is zero.
00905           if (d_i_mag <= minDiagValMag) {
00906             ++numSmallDiagEntries;
00907             if (d_i_mag == STM::zero ()) {
00908               ++numZeroDiagEntries;
00909             }
00910             diag[i] = oneOverMinDiagVal;
00911           } else {
00912             diag[i] = one / d_i;
00913           }
00914         }
00915         else { // Don't fix zero or tiny diagonal entries.
00916           // <= not <, in case minDiagValMag is zero.
00917           if (d_i_mag <= minDiagValMag) {
00918             ++numSmallDiagEntries;
00919             if (d_i_mag == STM::zero ()) {
00920               ++numZeroDiagEntries;
00921             }
00922           }
00923           diag[i] = one / d_i;
00924         }
00925       }
00926 
00927       // We're done computing the inverse diagonal, so invalidate the view.
00928       // This ensures that the operations below, that use methods on Vector,
00929       // produce correct results even on a GPU Node.
00930       diagHostView = Teuchos::null;
00931 
00932       // Count floating-point operations of computing the inverse diagonal.
00933       //
00934       // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
00935       if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
00936         ComputeFlops_ += 4.0 * numMyRows;
00937       } else {
00938         ComputeFlops_ += numMyRows;
00939       }
00940 
00941       // Collect global data about the diagonal entries.  Only do this
00942       // (which involves O(1) all-reduces) if the user asked us to do
00943       // the extra work.
00944       //
00945       // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
00946       // zero rows.  Fixing this requires one of two options:
00947       //
00948       // 1. Use a custom reduction operation that ignores processes
00949       //    with zero diagonal entries.
00950       // 2. Split the communicator, compute all-reduces using the
00951       //    subcommunicator over processes that have a nonzero number
00952       //    of diagonal entries, and then broadcast from one of those
00953       //    processes (if there is one) to the processes in the other
00954       //    subcommunicator.
00955       const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
00956 
00957       // Compute global min and max magnitude of entries.
00958       Array<magnitude_type> localVals (2);
00959       localVals[0] = minMagDiagEntryMag;
00960       // (- (min (- x))) is the same as (max x).
00961       localVals[1] = -maxMagDiagEntryMag;
00962       Array<magnitude_type> globalVals (2);
00963       reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
00964                                       localVals.getRawPtr (),
00965                                       globalVals.getRawPtr ());
00966       globalMinMagDiagEntryMag_ = globalVals[0];
00967       globalMaxMagDiagEntryMag_ = -globalVals[1];
00968 
00969       Array<size_t> localCounts (3);
00970       localCounts[0] = numSmallDiagEntries;
00971       localCounts[1] = numZeroDiagEntries;
00972       localCounts[2] = numNegDiagEntries;
00973       Array<size_t> globalCounts (3);
00974       reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
00975                               localCounts.getRawPtr (),
00976                               globalCounts.getRawPtr ());
00977       globalNumSmallDiagEntries_ = globalCounts[0];
00978       globalNumZeroDiagEntries_ = globalCounts[1];
00979       globalNumNegDiagEntries_ = globalCounts[2];
00980 
00981       // Forestall "set but not used" compiler warnings.
00982       // (void) minMagDiagEntry;
00983       // (void) maxMagDiagEntry;
00984 
00985       // Compute and save the difference between the computed inverse
00986       // diagonal, and the original diagonal's inverse.
00987       RCP<vector_type> diff = rcp (new vector_type (A_->getRowMap ()));
00988       diff->reciprocal (*origDiag);
00989       diff->update (-one, *Diagonal_, one);
00990       globalDiagNormDiff_ = diff->norm2 ();
00991     }
00992     else { // don't check diagonal elements
00993       if (fixTinyDiagEntries_) {
00994         // Go through all the diagonal entries.  Invert those that
00995         // aren't too small in magnitude.  For those that are too
00996         // small in magnitude, replace them with oneOverMinDiagVal.
00997         for (size_t i = 0 ; i < numMyRows; ++i) {
00998           const scalar_type d_i = diag[i];
00999           const magnitude_type d_i_mag = STS::magnitude (d_i);
01000 
01001           // <= not <, in case minDiagValMag is zero.
01002           if (d_i_mag <= minDiagValMag) {
01003             diag[i] = oneOverMinDiagVal;
01004           } else {
01005             diag[i] = one / d_i;
01006           }
01007         }
01008       }
01009       else { // don't fix tiny or zero diagonal entries
01010         for (size_t i = 0 ; i < numMyRows; ++i) {
01011           diag[i] = one / diag[i];
01012         }
01013       }
01014       if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
01015         ComputeFlops_ += 4.0 * numMyRows;
01016       } else {
01017         ComputeFlops_ += numMyRows;
01018       }
01019     }
01020 
01021     if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
01022                         PrecType_ == Ifpack2::Details::SGS)) {
01023       Importer_ = A_->getGraph ()->getImporter ();
01024       // mfh 21 Mar 2013: The Import object may be null, but in that
01025       // case, the domain and column Maps are the same and we don't
01026       // need to Import anyway.
01027     }
01028   } // end TimeMonitor scope
01029 
01030   ComputeTime_ += Time_->totalElapsedTime ();
01031   ++NumCompute_;
01032   IsComputed_ = true;
01033 }
01034 
01035 
01036 template<class MatrixType>
01037 void
01038 Relaxation<MatrixType>::
01039 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01040                     Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01041 {
01042   using Teuchos::as;
01043   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
01044     global_ordinal_type, node_type> MV;
01045 
01046   if (hasBlockCrsMatrix_) {
01047     ApplyInverseJacobi_BlockCrsMatrix (X, Y);
01048     return;
01049   }
01050 
01051   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01052   const double numVectors = as<double> (X.getNumVectors ());
01053   if (ZeroStartingSolution_) {
01054     // For the first Jacobi sweep, if we are allowed to assume that
01055     // the initial guess is zero, then Jacobi is just diagonal
01056     // scaling.  (A_ij * x_j = 0 for i != j, since x_j = 0.)
01057     // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
01058     Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
01059 
01060     // Count (global) floating-point operations.  Ifpack2 represents
01061     // this as a floating-point number rather than an integer, so that
01062     // overflow (for a very large number of calls, or a very large
01063     // problem) is approximate instead of catastrophic.
01064     double flopUpdate = 0.0;
01065     if (DampingFactor_ == STS::one ()) {
01066       // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
01067       flopUpdate = numGlobalRows * numVectors;
01068     } else {
01069       // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
01070       // Two multiplies per entry of Y.
01071       flopUpdate = 2.0 * numGlobalRows * numVectors;
01072     }
01073     ApplyFlops_ += flopUpdate;
01074     if (NumSweeps_ == 1) {
01075       return;
01076     }
01077   }
01078   // If we were allowed to assume that the starting guess was zero,
01079   // then we have already done the first sweep above.
01080   const int startSweep = ZeroStartingSolution_ ? 1 : 0;
01081   // We don't need to initialize the result MV, since the sparse
01082   // mat-vec will clobber its contents anyway.
01083   MV A_times_Y (Y.getMap (), as<size_t>(numVectors), false);
01084   for (int j = startSweep; j < NumSweeps_; ++j) {
01085     // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
01086     applyMat (Y, A_times_Y);
01087     A_times_Y.update (STS::one (), X, -STS::one ());
01088     Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ());
01089   }
01090 
01091   // For each column of output, for each pass over the matrix:
01092   //
01093   // - One + and one * for each matrix entry
01094   // - One / and one + for each row of the matrix
01095   // - If the damping factor is not one: one * for each row of the
01096   //   matrix.  (It's not fair to count this if the damping factor is
01097   //   one, since the implementation could skip it.  Whether it does
01098   //   or not is the implementation's choice.)
01099 
01100   // Floating-point operations due to the damping factor, per matrix
01101   // row, per direction, per columm of output.
01102   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01103   const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
01104   ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
01105     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01106 }
01107 
01108 template<class MatrixType>
01109 void
01110 Relaxation<MatrixType>::
01111 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
01112                                                              global_ordinal_type, node_type>& X,
01113                                    Tpetra::MultiVector<scalar_type, local_ordinal_type,
01114                                                        global_ordinal_type,node_type>& Y) const
01115 {
01116   typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
01117     local_ordinal_type, global_ordinal_type,node_type> BMV;
01118   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
01119     global_ordinal_type, node_type> MV;
01120   typedef typename block_crs_matrix_type::little_block_type little_block_type;
01121   typedef typename block_crs_matrix_type::little_vec_type little_vec_type;
01122 
01123   if (ZeroStartingSolution_) {
01124     Y.putScalar (STS::zero ());
01125   }
01126 
01127   const int numVectors = X.getNumVectors ();
01128 
01129   const block_crs_matrix_type* blockMat =
01130     dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
01131 
01132   const local_ordinal_type blockSize = blockMat->getBlockSize();
01133   BMV A_times_Y_Block (*blockMat->getRowMap(), *Y.getMap (), blockSize, numVectors);
01134   MV A_times_Y = A_times_Y_Block.getMultiVectorView();
01135   BMV yBlock(Y, *blockMat->getRowMap(), blockSize);
01136   for (int j = 0; j < NumSweeps_; ++j) {
01137     blockMat->apply (Y, A_times_Y);
01138     A_times_Y.update (STS::one (), X, -STS::one ());
01139     const size_t numRows = blockMat->getNodeNumRows ();
01140     for (int i = 0; i < numVectors; ++i) {
01141       for (size_t k = 0; k < numRows; ++k) {
01142         // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
01143         little_block_type factorizedBlockDiag = BlockDiagonal_->getLocalBlock (k, k);
01144         little_vec_type xloc = A_times_Y_Block.getLocalBlock (k, i);
01145         little_vec_type yloc = yBlock.getLocalBlock (k, i);
01146         factorizedBlockDiag.solve (xloc, &blockDiagonalFactorizationPivots[i*blockSize]);
01147         yloc.update (DampingFactor_, xloc);
01148       }
01149     }
01150   }
01151 }
01152 
01153 template<class MatrixType>
01154 void
01155 Relaxation<MatrixType>::
01156 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01157                 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01158 {
01159   // The CrsMatrix version is faster, because it can access the sparse
01160   // matrix data directly, rather than by copying out each row's data
01161   // in turn.  Thus, we check whether the RowMatrix is really a
01162   // CrsMatrix.
01163   //
01164   // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
01165   // declaration in Ifpack2_Relaxation_decl.hpp header file.  The code
01166   // will still be correct if the cast fails, but it will use an
01167   // unoptimized kernel.
01168   const block_crs_matrix_type* blockCrsMat =
01169     dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
01170   const crs_matrix_type* crsMat =
01171     dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
01172   if (blockCrsMat != NULL)  {
01173     ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
01174   } else if (crsMat != NULL) {
01175     ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
01176   } else {
01177     ApplyInverseGS_RowMatrix (X, Y);
01178   }
01179 }
01180 
01181 
01182 template<class MatrixType>
01183 void Relaxation<MatrixType>::ApplyInverseGS_RowMatrix(
01184         const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01185               Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01186 {
01187   using Teuchos::Array;
01188   using Teuchos::ArrayRCP;
01189   using Teuchos::ArrayView;
01190   using Teuchos::as;
01191   using Teuchos::RCP;
01192   using Teuchos::rcp;
01193   using Teuchos::rcpFromRef;
01194   typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
01195 
01196 
01197 
01198   // Tpetra's GS implementation for CrsMatrix handles zeroing out the
01199   // starting multivector itself.  The generic RowMatrix version here
01200   // does not, so we have to zero out Y here.
01201   if (ZeroStartingSolution_) {
01202     Y.putScalar (STS::zero ());
01203   }
01204 
01205   const size_t NumVectors = X.getNumVectors ();
01206   const size_t maxLength = A_->getNodeMaxNumRowEntries ();
01207   Array<local_ordinal_type> Indices (maxLength);
01208   Array<scalar_type> Values (maxLength);
01209 
01210   // Local smoothing stuff
01211   const size_t numMyRows             = A_->getNodeNumRows();
01212   const local_ordinal_type * rowInd  = 0;
01213   size_t numActive                   = numMyRows;
01214   bool do_local = !localSmoothingIndices_.is_null();
01215   if(do_local) {
01216     rowInd    = localSmoothingIndices_.getRawPtr();
01217     numActive = localSmoothingIndices_.size();
01218   }
01219 
01220   RCP<MV> Y2;
01221   if (IsParallel_) {
01222     if (Importer_.is_null ()) { // domain and column Maps are the same.
01223       // We will copy Y into Y2 below, so no need to fill with zeros here.
01224       Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
01225     } else {
01226       // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
01227       // zeros here, since we are doing an Import into Y2 below
01228       // anyway.  However, it doesn't hurt correctness.
01229       Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
01230     }
01231   }
01232   else {
01233     Y2 = rcpFromRef (Y);
01234   }
01235 
01236   // Diagonal
01237   ArrayRCP<const scalar_type>  d_rcp = Diagonal_->get1dView();
01238   ArrayView<const scalar_type> d_ptr = d_rcp();
01239 
01240   // Constant stride check
01241   bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
01242 
01243   if(constant_stride) {
01244     // extract 1D RCPs
01245     size_t                    x_stride = X.getStride();
01246     size_t                   y2_stride = Y2->getStride();
01247     ArrayRCP<scalar_type>       y2_rcp = Y2->get1dViewNonConst();
01248     ArrayRCP<const scalar_type>  x_rcp = X.get1dView();
01249     ArrayView<scalar_type>      y2_ptr = y2_rcp();
01250     ArrayView<const scalar_type> x_ptr = x_rcp();
01251     Array<scalar_type> dtemp(NumVectors,STS::zero());
01252 
01253     for (int j = 0; j < NumSweeps_; j++) {
01254       // data exchange is here, once per sweep
01255       if (IsParallel_) {
01256         if (Importer_.is_null ()) {
01257           *Y2 = Y; // just copy, since domain and column Maps are the same
01258         } else {
01259           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01260         }
01261       }
01262 
01263       if (! DoBackwardGS_) { // Forward sweep
01264         for (size_t ii = 0; ii < numActive; ++ii) {
01265           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01266           size_t NumEntries;
01267           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01268           dtemp.assign(NumVectors,STS::zero());
01269 
01270           for (size_t k = 0; k < NumEntries; ++k) {
01271             const local_ordinal_type col = Indices[k];
01272             for (size_t m = 0; m < NumVectors; ++m)
01273               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01274           }
01275 
01276           for (size_t m = 0; m < NumVectors; ++m)
01277             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01278         }
01279       }
01280       else { // Backward sweep
01281         // ptrdiff_t is the same size as size_t, but is signed.  Being
01282         // signed is important so that i >= 0 is not trivially true.
01283         for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01284           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01285           size_t NumEntries;
01286           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01287           dtemp.assign(NumVectors,STS::zero());
01288 
01289           for (size_t k = 0; k < NumEntries; ++k) {
01290             const local_ordinal_type col = Indices[k];
01291             for (size_t m = 0; m < NumVectors; ++m)
01292               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01293           }
01294 
01295           for (size_t m = 0; m < NumVectors; ++m)
01296             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01297         }
01298       }
01299       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01300       if (IsParallel_) {
01301         Y = *Y2;
01302       }
01303     }
01304   }
01305   else {
01306     // extract 2D RCPS
01307     ArrayRCP<ArrayRCP<scalar_type> > y2_ptr      = Y2->get2dViewNonConst();
01308     ArrayRCP<ArrayRCP<const scalar_type> > x_ptr =  X.get2dView();
01309 
01310     for (int j = 0; j < NumSweeps_; j++) {
01311       // data exchange is here, once per sweep
01312       if (IsParallel_) {
01313         if (Importer_.is_null ()) {
01314           *Y2 = Y; // just copy, since domain and column Maps are the same
01315         } else {
01316           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01317         }
01318       }
01319 
01320       if (! DoBackwardGS_) { // Forward sweep
01321         for (size_t ii = 0; ii < numActive; ++ii) {
01322           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01323           size_t NumEntries;
01324           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01325 
01326           for (size_t m = 0; m < NumVectors; ++m) {
01327             scalar_type dtemp = STS::zero ();
01328             ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01329             ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01330 
01331             for (size_t k = 0; k < NumEntries; ++k) {
01332               const local_ordinal_type col = Indices[k];
01333               dtemp += Values[k] * y2_local[col];
01334             }
01335             y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
01336           }
01337         }
01338       }
01339       else { // Backward sweep
01340         // ptrdiff_t is the same size as size_t, but is signed.  Being
01341         // signed is important so that i >= 0 is not trivially true.
01342         for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01343           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01344 
01345           size_t NumEntries;
01346           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01347 
01348           for (size_t m = 0; m < NumVectors; ++m) {
01349             scalar_type dtemp = STS::zero ();
01350             ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01351             ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01352 
01353             for (size_t k = 0; k < NumEntries; ++k) {
01354               const local_ordinal_type col = Indices[k];
01355               dtemp += Values[k] * y2_local[col];
01356             }
01357             y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
01358           }
01359         }
01360       }
01361 
01362       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01363       if (IsParallel_) {
01364         Y = *Y2;
01365       }
01366     }
01367   }
01368 
01369 
01370   // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
01371   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01372   const double numVectors = as<double> (X.getNumVectors ());
01373   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01374   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01375   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01376     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01377 }
01378 
01379 
01380 template<class MatrixType>
01381 void
01382 Relaxation<MatrixType>::
01383 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
01384                           const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01385                           Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01386 {
01387   using Teuchos::as;
01388   const Tpetra::ESweepDirection direction =
01389     DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
01390   if(localSmoothingIndices_.is_null())
01391     A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
01392                        NumSweeps_, ZeroStartingSolution_);
01393   else
01394     A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_(), DampingFactor_, direction,
01395                                 NumSweeps_, ZeroStartingSolution_);
01396 
01397   // For each column of output, for each sweep over the matrix:
01398   //
01399   // - One + and one * for each matrix entry
01400   // - One / and one + for each row of the matrix
01401   // - If the damping factor is not one: one * for each row of the
01402   //   matrix.  (It's not fair to count this if the damping factor is
01403   //   one, since the implementation could skip it.  Whether it does
01404   //   or not is the implementation's choice.)
01405 
01406   // Floating-point operations due to the damping factor, per matrix
01407   // row, per direction, per columm of output.
01408   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01409   const double numVectors = as<double> (X.getNumVectors ());
01410   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01411   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01412   ApplyFlops_ += NumSweeps_ * numVectors *
01413     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01414 }
01415 
01416 template<class MatrixType>
01417 void
01418 Relaxation<MatrixType>::
01419 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A,
01420                           const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01421                           Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01422 {
01423 
01424   typedef Tpetra::Experimental::BlockMultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> BMV;
01425 
01426 
01427   BMV yBlock(Y, *A.getRowMap(), A.getBlockSize());
01428   const BMV xBlock(X, *A.getRowMap(), A.getBlockSize());
01429 
01430   bool performImport = false;
01431   Teuchos::RCP<BMV> yBlockCol;
01432   if (Importer_.is_null())
01433     yBlockCol = Teuchos::rcpFromRef(yBlock);
01434   else
01435   {
01436     yBlockCol = Teuchos::rcp(new BMV(*Importer_->getTargetMap(), *A.getDomainMap(),
01437         A.getBlockSize(), Y.getNumVectors()));
01438     performImport = true;
01439   }
01440 
01441   const Tpetra::ESweepDirection direction =
01442     DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
01443 
01444 
01445 
01446   for (int sweep = 0; sweep < NumSweeps_; ++sweep)
01447   {
01448     if (performImport) yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
01449     A.localGaussSeidel(xBlock, *yBlockCol, *BlockDiagonal_, &blockDiagonalFactorizationPivots[0], DampingFactor_, direction);
01450     if (performImport) yBlock = *yBlockCol;
01451   }
01452 
01453 }
01454 
01455 
01456 template<class MatrixType>
01457 void
01458 Relaxation<MatrixType>::
01459 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01460                  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01461 {
01462   // The CrsMatrix version is faster, because it can access the sparse
01463   // matrix data directly, rather than by copying out each row's data
01464   // in turn.  Thus, we check whether the RowMatrix is really a
01465   // CrsMatrix.
01466   //
01467   // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
01468   // declaration in Ifpack2_Relaxation_decl.hpp header file.  The code
01469   // will still be correct if the cast fails, but it will use an
01470   // unoptimized kernel.
01471   const block_crs_matrix_type* blockCrsMat = dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr());
01472   const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
01473   if (blockCrsMat != NULL)  {
01474     ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
01475   }
01476   else if (crsMat != NULL) {
01477     ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
01478   } else {
01479     ApplyInverseSGS_RowMatrix (X, Y);
01480   }
01481 }
01482 
01483 
01484 template<class MatrixType>
01485 void
01486 Relaxation<MatrixType>::
01487 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01488                            Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01489 {
01490   using Teuchos::Array;
01491   using Teuchos::ArrayRCP;
01492   using Teuchos::ArrayView;
01493   using Teuchos::as;
01494   using Teuchos::RCP;
01495   using Teuchos::rcp;
01496   using Teuchos::rcpFromRef;
01497   typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
01498 
01499   // Tpetra's GS implementation for CrsMatrix handles zeroing out the
01500   // starting multivector itself.  The generic RowMatrix version here
01501   // does not, so we have to zero out Y here.
01502   if (ZeroStartingSolution_) {
01503     Y.putScalar (STS::zero ());
01504   }
01505 
01506   const size_t NumVectors = X.getNumVectors ();
01507   const size_t maxLength = A_->getNodeMaxNumRowEntries ();
01508   Array<local_ordinal_type> Indices (maxLength);
01509   Array<scalar_type> Values (maxLength);
01510 
01511   // Local smoothing stuff
01512   const size_t numMyRows             = A_->getNodeNumRows();
01513   const local_ordinal_type * rowInd  = 0;
01514   size_t numActive                   = numMyRows;
01515   bool do_local = localSmoothingIndices_.is_null();
01516   if(do_local) {
01517     rowInd    = localSmoothingIndices_.getRawPtr();
01518     numActive = localSmoothingIndices_.size();
01519   }
01520 
01521 
01522   RCP<MV> Y2;
01523   if (IsParallel_) {
01524     if (Importer_.is_null ()) { // domain and column Maps are the same.
01525       // We will copy Y into Y2 below, so no need to fill with zeros here.
01526       Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
01527     } else {
01528       // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
01529       // zeros here, since we are doing an Import into Y2 below
01530       // anyway.  However, it doesn't hurt correctness.
01531       Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
01532     }
01533   }
01534   else {
01535     Y2 = rcpFromRef (Y);
01536   }
01537 
01538   // Diagonal
01539   ArrayRCP<const scalar_type>  d_rcp = Diagonal_->get1dView();
01540   ArrayView<const scalar_type> d_ptr = d_rcp();
01541 
01542   // Constant stride check
01543   bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
01544 
01545   if(constant_stride) {
01546     // extract 1D RCPs
01547     size_t                    x_stride = X.getStride();
01548     size_t                   y2_stride = Y2->getStride();
01549     ArrayRCP<scalar_type>       y2_rcp = Y2->get1dViewNonConst();
01550     ArrayRCP<const scalar_type>  x_rcp = X.get1dView();
01551     ArrayView<scalar_type>      y2_ptr = y2_rcp();
01552     ArrayView<const scalar_type> x_ptr = x_rcp();
01553     Array<scalar_type> dtemp(NumVectors,STS::zero());
01554     for (int iter = 0; iter < NumSweeps_; ++iter) {
01555       // only one data exchange per sweep
01556       if (IsParallel_) {
01557         if (Importer_.is_null ()) {
01558           *Y2 = Y; // just copy, since domain and column Maps are the same
01559         } else {
01560           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01561         }
01562       }
01563 
01564       for (int j = 0; j < NumSweeps_; j++) {
01565         // data exchange is here, once per sweep
01566         if (IsParallel_) {
01567           if (Importer_.is_null ()) {
01568             *Y2 = Y; // just copy, since domain and column Maps are the same
01569           } else {
01570             Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01571           }
01572         }
01573 
01574         for (size_t ii = 0; ii < numActive; ++ii) {
01575           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01576           size_t NumEntries;
01577           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01578           dtemp.assign(NumVectors,STS::zero());
01579 
01580           for (size_t k = 0; k < NumEntries; ++k) {
01581             const local_ordinal_type col = Indices[k];
01582             for (size_t m = 0; m < NumVectors; ++m)
01583               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01584           }
01585 
01586           for (size_t m = 0; m < NumVectors; ++m)
01587             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01588         }
01589 
01590         // ptrdiff_t is the same size as size_t, but is signed.  Being
01591         // signed is important so that i >= 0 is not trivially true.
01592         for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01593           local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01594           size_t NumEntries;
01595           A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01596           dtemp.assign(NumVectors,STS::zero());
01597 
01598           for (size_t k = 0; k < NumEntries; ++k) {
01599             const local_ordinal_type col = Indices[k];
01600             for (size_t m = 0; m < NumVectors; ++m)
01601               dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01602           }
01603 
01604           for (size_t m = 0; m < NumVectors; ++m)
01605             y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01606         }
01607       }
01608 
01609       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01610       if (IsParallel_) {
01611         Y = *Y2;
01612       }
01613     }
01614   }
01615   else {
01616     // extract 2D RCPs
01617     ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
01618     ArrayRCP<ArrayRCP<const scalar_type> > x_ptr =  X.get2dView ();
01619 
01620     for (int iter = 0; iter < NumSweeps_; ++iter) {
01621       // only one data exchange per sweep
01622       if (IsParallel_) {
01623         if (Importer_.is_null ()) {
01624           *Y2 = Y; // just copy, since domain and column Maps are the same
01625         } else {
01626           Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01627         }
01628       }
01629 
01630       for (size_t ii = 0; ii < numActive; ++ii) {
01631         local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01632         const scalar_type diag = d_ptr[i];
01633         size_t NumEntries;
01634         A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
01635 
01636         for (size_t m = 0; m < NumVectors; ++m) {
01637           scalar_type dtemp = STS::zero ();
01638           ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01639           ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01640 
01641           for (size_t k = 0; k < NumEntries; ++k) {
01642             const local_ordinal_type col = Indices[k];
01643             dtemp += Values[k] * y2_local[col];
01644           }
01645           y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
01646         }
01647       }
01648 
01649       // ptrdiff_t is the same size as size_t, but is signed.  Being
01650       // signed is important so that i >= 0 is not trivially true.
01651       for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01652         local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01653         const scalar_type diag = d_ptr[i];
01654         size_t NumEntries;
01655         A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
01656 
01657         for (size_t m = 0; m < NumVectors; ++m) {
01658           scalar_type dtemp = STS::zero ();
01659           ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01660           ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01661 
01662           for (size_t k = 0; k < NumEntries; ++k) {
01663             const local_ordinal_type col = Indices[k];
01664             dtemp += Values[k] * y2_local[col];
01665           }
01666           y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
01667         }
01668       }
01669 
01670       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01671       if (IsParallel_) {
01672         Y = *Y2;
01673       }
01674     }
01675   }
01676 
01677   // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
01678   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01679   const double numVectors = as<double> (X.getNumVectors ());
01680   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01681   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01682   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01683     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01684 }
01685 
01686 
01687 template<class MatrixType>
01688 void
01689 Relaxation<MatrixType>::
01690 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
01691                            const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01692                            Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01693 {
01694   using Teuchos::as;
01695   const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
01696   if(localSmoothingIndices_.is_null())
01697     A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
01698                        NumSweeps_, ZeroStartingSolution_);
01699   else
01700     A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_(), DampingFactor_, direction,
01701                                 NumSweeps_, ZeroStartingSolution_);
01702 
01703   // For each column of output, for each sweep over the matrix:
01704   //
01705   // - One + and one * for each matrix entry
01706   // - One / and one + for each row of the matrix
01707   // - If the damping factor is not one: one * for each row of the
01708   //   matrix.  (It's not fair to count this if the damping factor is
01709   //   one, since the implementation could skip it.  Whether it does
01710   //   or not is the implementation's choice.)
01711   //
01712   // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
01713   // one forward and one backward.
01714 
01715   // Floating-point operations due to the damping factor, per matrix
01716   // row, per direction, per columm of output.
01717   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01718   const double numVectors = as<double> (X.getNumVectors ());
01719   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01720   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01721   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01722     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01723 }
01724 
01725 template<class MatrixType>
01726 void
01727 Relaxation<MatrixType>::
01728 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A,
01729                            const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01730                            Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01731 {
01732   typedef Tpetra::Experimental::BlockMultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> BMV;
01733 
01734   BMV yBlock(Y, *A.getRowMap(), A.getBlockSize());
01735   const BMV xBlock(X, *A.getRowMap(), A.getBlockSize());
01736 
01737   bool performImport = false;
01738   Teuchos::RCP<BMV> yBlockCol;
01739   if (Importer_.is_null())
01740     yBlockCol = Teuchos::rcpFromRef(yBlock);
01741   else
01742   {
01743     yBlockCol = Teuchos::rcp(new BMV(*Importer_->getTargetMap(), *A.getDomainMap(),
01744         A.getBlockSize(), Y.getNumVectors()));
01745     performImport = true;
01746   }
01747 
01748   const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
01749 
01750   for (int sweep = 0; sweep < NumSweeps_; ++sweep)
01751   {
01752     if (performImport) yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
01753     A.localGaussSeidel(xBlock, *yBlockCol, *BlockDiagonal_, &blockDiagonalFactorizationPivots[0], DampingFactor_, direction);
01754     if (performImport) yBlock = *yBlockCol;
01755   }
01756 }
01757 
01758 
01759 template<class MatrixType>
01760 std::string Relaxation<MatrixType>::description () const {
01761   std::ostringstream os;
01762 
01763   // Output is a valid YAML dictionary in flow style.  If you don't
01764   // like everything on a single line, you should call describe()
01765   // instead.
01766   os << "\"Ifpack2::Relaxation\": {";
01767 
01768   os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
01769      << "Computed: " << (isComputed () ? "true" : "false") << ", ";
01770 
01771   // It's useful to print this instance's relaxation method (Jacobi,
01772   // Gauss-Seidel, or symmetric Gauss-Seidel).  If you want more info
01773   // than that, call describe() instead.
01774   os << "Type: ";
01775   if (PrecType_ == Ifpack2::Details::JACOBI) {
01776     os << "Jacobi";
01777   } else if (PrecType_ == Ifpack2::Details::GS) {
01778     os << "Gauss-Seidel";
01779   } else if (PrecType_ == Ifpack2::Details::SGS) {
01780     os << "Symmetric Gauss-Seidel";
01781   } else {
01782     os << "INVALID";
01783   }
01784 
01785   os  << ", " << "sweeps: " << NumSweeps_ << ", "
01786       << "damping factor: " << DampingFactor_ << ", ";
01787   if (DoL1Method_) {
01788     os << "use l1: " << DoL1Method_ << ", "
01789        << "l1 eta: " << L1Eta_ << ", ";
01790   }
01791 
01792   if (A_.is_null ()) {
01793     os << "Matrix: null";
01794   }
01795   else {
01796     os << "Global matrix dimensions: ["
01797        << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
01798        << ", Global nnz: " << A_->getGlobalNumEntries();
01799   }
01800 
01801   os << "}";
01802   return os.str ();
01803 }
01804 
01805 
01806 template<class MatrixType>
01807 void
01808 Relaxation<MatrixType>::
01809 describe (Teuchos::FancyOStream &out,
01810           const Teuchos::EVerbosityLevel verbLevel) const
01811 {
01812   using Teuchos::OSTab;
01813   using Teuchos::TypeNameTraits;
01814   using Teuchos::VERB_DEFAULT;
01815   using Teuchos::VERB_NONE;
01816   using Teuchos::VERB_LOW;
01817   using Teuchos::VERB_MEDIUM;
01818   using Teuchos::VERB_HIGH;
01819   using Teuchos::VERB_EXTREME;
01820   using std::endl;
01821 
01822   const Teuchos::EVerbosityLevel vl =
01823     (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
01824 
01825   const int myRank = this->getComm ()->getRank ();
01826 
01827   //    none: print nothing
01828   //     low: print O(1) info from Proc 0
01829   //  medium:
01830   //    high:
01831   // extreme:
01832 
01833   if (vl != VERB_NONE && myRank == 0) {
01834     // Describable interface asks each implementation to start with a tab.
01835     OSTab tab1 (out);
01836     // Output is valid YAML; hence the quotes, to protect the colons.
01837     out << "\"Ifpack2::Relaxation\":" << endl;
01838     OSTab tab2 (out);
01839     out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
01840         << endl;
01841     if (this->getObjectLabel () != "") {
01842       out << "Label: " << this->getObjectLabel () << endl;
01843     }
01844     out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
01845         << "Computed: " << (isComputed () ? "true" : "false") << endl
01846         << "Parameters: " << endl;
01847     {
01848       OSTab tab3 (out);
01849       out << "\"relaxation: type\": ";
01850       if (PrecType_ == Ifpack2::Details::JACOBI) {
01851         out << "Jacobi";
01852       } else if (PrecType_ == Ifpack2::Details::GS) {
01853         out << "Gauss-Seidel";
01854       } else if (PrecType_ == Ifpack2::Details::SGS) {
01855         out << "Symmetric Gauss-Seidel";
01856       } else {
01857         out << "INVALID";
01858       }
01859       // We quote these parameter names because they contain colons.
01860       // YAML uses the colon to distinguish key from value.
01861       out << endl
01862           << "\"relaxation: sweeps\": " << NumSweeps_ << endl
01863           << "\"relaxation: damping factor\": " << DampingFactor_ << endl
01864           << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
01865           << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
01866           << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
01867           << "\"relaxation: use l1\": " << DoL1Method_ << endl
01868           << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
01869     }
01870     out << "Computed quantities:" << endl;
01871     {
01872       OSTab tab3 (out);
01873       out << "Condition number estimate: " << Condest_ << endl
01874           << "Global number of rows: " << A_->getGlobalNumRows () << endl
01875           << "Global number of columns: " << A_->getGlobalNumCols () << endl;
01876     }
01877     if (checkDiagEntries_ && isComputed ()) {
01878       out << "Properties of input diagonal entries:" << endl;
01879       {
01880         OSTab tab3 (out);
01881         out << "Magnitude of minimum-magnitude diagonal entry: "
01882             << globalMinMagDiagEntryMag_ << endl
01883             << "Magnitude of maximum-magnitude diagonal entry: "
01884             << globalMaxMagDiagEntryMag_ << endl
01885             << "Number of diagonal entries with small magnitude: "
01886             << globalNumSmallDiagEntries_ << endl
01887             << "Number of zero diagonal entries: "
01888             << globalNumZeroDiagEntries_ << endl
01889             << "Number of diagonal entries with negative real part: "
01890             << globalNumNegDiagEntries_ << endl
01891             << "Abs 2-norm diff between computed and actual inverse "
01892             << "diagonal: " << globalDiagNormDiff_ << endl;
01893       }
01894     }
01895     if (isComputed ()) {
01896       out << "Saved diagonal offsets: "
01897           << (savedDiagOffsets_ ? "true" : "false") << endl;
01898     }
01899     out << "Call counts and total times (in seconds): " << endl;
01900     {
01901       OSTab tab3 (out);
01902       out << "initialize: " << endl;
01903       {
01904         OSTab tab4 (out);
01905         out << "Call count: " << NumInitialize_ << endl;
01906         out << "Total time: " << InitializeTime_ << endl;
01907       }
01908       out << "compute: " << endl;
01909       {
01910         OSTab tab4 (out);
01911         out << "Call count: " << NumCompute_ << endl;
01912         out << "Total time: " << ComputeTime_ << endl;
01913       }
01914       out << "apply: " << endl;
01915       {
01916         OSTab tab4 (out);
01917         out << "Call count: " << NumApply_ << endl;
01918         out << "Total time: " << ApplyTime_ << endl;
01919       }
01920     }
01921   }
01922 }
01923 
01924 } // namespace Ifpack2
01925 
01926 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N)                            \
01927   template class Ifpack2::Relaxation< Tpetra::CrsMatrix<S, LO, GO, N> >; \
01928   template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
01929 
01930 #endif // IFPACK2_RELAXATION_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends