Ifpack2 Templated Preconditioning Package Version 1.0
Ifpack2_Relaxation_def.hpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #ifndef IFPACK2_RELAXATION_DEF_HPP
00044 #define IFPACK2_RELAXATION_DEF_HPP
00045 
00046 #include "Ifpack2_Relaxation_decl.hpp"
00047 #include "Teuchos_StandardParameterEntryValidators.hpp"
00048 #include <Teuchos_TimeMonitor.hpp>
00049 #include <Tpetra_ConfigDefs.hpp>
00050 #include <Tpetra_CrsMatrix.hpp>
00051 
00052 // mfh 28 Mar 2013: Uncomment out these three lines to compute
00053 // statistics on diagonal entries in compute().
00054 // #ifndef IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
00055 // #  define IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 1
00056 // #endif // IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
00057 
00058 namespace {
00059   // Validate that a given int is nonnegative.
00060   class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
00061   public:
00062     // Constructor (does nothing).
00063     NonnegativeIntValidator () {}
00064 
00065     // ParameterEntryValidator wants this method.
00066     Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
00067       return Teuchos::null;
00068     }
00069 
00070     // Actually validate the parameter's value.
00071     void
00072     validate (const Teuchos::ParameterEntry& entry,
00073               const std::string& paramName,
00074               const std::string& sublistName) const
00075     {
00076       using std::endl;
00077       Teuchos::any anyVal = entry.getAny (true);
00078       const std::string entryName = entry.getAny (false).typeName ();
00079 
00080       TEUCHOS_TEST_FOR_EXCEPTION(
00081         anyVal.type () != typeid (int),
00082         Teuchos::Exceptions::InvalidParameterType,
00083         "Parameter \"" << paramName << "\" in sublist \"" << sublistName
00084         << "\" has the wrong type." << endl << "Parameter: " << paramName
00085         << endl << "Type specified: " << entryName << endl
00086         << "Type required: int" << endl);
00087 
00088       const int val = Teuchos::any_cast<int> (anyVal);
00089       TEUCHOS_TEST_FOR_EXCEPTION(
00090         val < 0, Teuchos::Exceptions::InvalidParameterValue,
00091         "Parameter \"" << paramName << "\" in sublist \"" << sublistName
00092         << "\" is negative." << endl << "Parameter: " << paramName
00093         << endl << "Value specified: " << val << endl
00094         << "Required range: [0, INT_MAX]" << endl);
00095     }
00096 
00097     // ParameterEntryValidator wants this method.
00098     const std::string getXMLTypeName () const {
00099       return "NonnegativeIntValidator";
00100     }
00101 
00102     // ParameterEntryValidator wants this method.
00103     void
00104     printDoc (const std::string& docString,
00105               std::ostream &out) const
00106     {
00107       Teuchos::StrUtils::printLines (out, "# ", docString);
00108       out << "#\tValidator Used: " << std::endl;
00109       out << "#\t\tNonnegativeIntValidator" << std::endl;
00110     }
00111   };
00112 
00113   // A way to get a small positive number (eps() for floating-point
00114   // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
00115   // define it (for example, for integer values).
00116   template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
00117   class SmallTraits {
00118   public:
00119     // Return eps if Scalar is a floating-point type, else return 1.
00120     static const Scalar eps ();
00121   };
00122 
00123   // Partial specialization for when Scalar is not a floating-point type.
00124   template<class Scalar>
00125   class SmallTraits<Scalar, true> {
00126   public:
00127     static const Scalar eps () {
00128       return Teuchos::ScalarTraits<Scalar>::one ();
00129     }
00130   };
00131 
00132   // Partial specialization for when Scalar is a floating-point type.
00133   template<class Scalar>
00134   class SmallTraits<Scalar, false> {
00135   public:
00136     static const Scalar eps () {
00137       return Teuchos::ScalarTraits<Scalar>::eps ();
00138     }
00139   };
00140 } // namespace (anonymous)
00141 
00142 namespace Ifpack2 {
00143 
00144 template<class MatrixType>
00145 void Relaxation<MatrixType>::
00146 setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
00147 {
00148   if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
00149     Importer_ = Teuchos::null;
00150     Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
00151     isInitialized_ = false;
00152     IsComputed_ = false;
00153     diagOffsets_ = Teuchos::null;
00154     savedDiagOffsets_ = false;
00155     if (! A.is_null ()) {
00156       IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
00157     }
00158     A_ = A;
00159   }
00160 }
00161 
00162 
00163 template<class MatrixType>
00164 Relaxation<MatrixType>::
00165 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
00166 : A_ (A),
00167   Time_ (Teuchos::rcp (new Teuchos::Time ("Ifpack2::Relaxation"))),
00168   NumSweeps_ (1),
00169   PrecType_ (Ifpack2::Details::JACOBI),
00170   DampingFactor_ (STS::one ()),
00171   IsParallel_ (A.is_null () ? false : A->getRowMap ()->getComm ()->getSize () > 1), // pick a reasonable default if the matrix is null.  It doesn't really matter, because the methods that care can't use a null matrix anyway.
00172   ZeroStartingSolution_ (true),
00173   DoBackwardGS_ (false),
00174   DoL1Method_ (false),
00175   L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
00176   MinDiagonalValue_ (STS::zero ()),
00177   fixTinyDiagEntries_ (false),
00178   checkDiagEntries_ (false),
00179   Condest_ (-STM::one ()),
00180   isInitialized_ (false),
00181   IsComputed_ (false),
00182   NumInitialize_ (0),
00183   NumCompute_ (0),
00184   NumApply_ (0),
00185   InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
00186   ComputeTime_ (0.0),
00187   ApplyTime_ (0.0),
00188   ComputeFlops_ (0.0),
00189   ApplyFlops_ (0.0),
00190   globalMinMagDiagEntryMag_ (STM::zero ()),
00191   globalMaxMagDiagEntryMag_ (STM::zero ()),
00192   globalNumSmallDiagEntries_ (0),
00193   globalNumZeroDiagEntries_ (0),
00194   globalNumNegDiagEntries_ (0),
00195   globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
00196   savedDiagOffsets_ (false)
00197 {
00198   this->setObjectLabel ("Ifpack2::Relaxation");
00199 }
00200 
00201 //==========================================================================
00202 template<class MatrixType>
00203 Relaxation<MatrixType>::~Relaxation() {
00204 }
00205 
00206 template<class MatrixType>
00207 Teuchos::RCP<const Teuchos::ParameterList>
00208 Relaxation<MatrixType>::getValidParameters () const
00209 {
00210   using Teuchos::Array;
00211   using Teuchos::ParameterList;
00212   using Teuchos::parameterList;
00213   using Teuchos::RCP;
00214   using Teuchos::rcp;
00215   using Teuchos::rcp_const_cast;
00216   using Teuchos::rcp_implicit_cast;
00217   using Teuchos::setStringToIntegralParameter;
00218   typedef Teuchos::ParameterEntryValidator PEV;
00219 
00220   if (validParams_.is_null ()) {
00221     RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
00222 
00223     // Set a validator that automatically converts from the valid
00224     // string options to their enum values.
00225     Array<std::string> precTypes (3);
00226     precTypes[0] = "Jacobi";
00227     precTypes[1] = "Gauss-Seidel";
00228     precTypes[2] = "Symmetric Gauss-Seidel";
00229     Array<Details::RelaxationType> precTypeEnums (3);
00230     precTypeEnums[0] = Details::JACOBI;
00231     precTypeEnums[1] = Details::GS;
00232     precTypeEnums[2] = Details::SGS;
00233     const std::string defaultPrecType ("Jacobi");
00234     setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
00235       defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
00236       pl.getRawPtr ());
00237 
00238     const int numSweeps = 1;
00239     RCP<PEV> numSweepsValidator =
00240       rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
00241     pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
00242              rcp_const_cast<const PEV> (numSweepsValidator));
00243 
00244     const scalar_type dampingFactor = STS::one ();
00245     pl->set ("relaxation: damping factor", dampingFactor);
00246 
00247     const bool zeroStartingSolution = true;
00248     pl->set ("relaxation: zero starting solution", zeroStartingSolution);
00249 
00250     const bool doBackwardGS = false;
00251     pl->set ("relaxation: backward mode", doBackwardGS);
00252 
00253     const bool doL1Method = false;
00254     pl->set ("relaxation: use l1", doL1Method);
00255 
00256     const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
00257       (STM::one() + STM::one()); // 1.5
00258     pl->set ("relaxation: l1 eta", l1eta);
00259 
00260     const scalar_type minDiagonalValue = STS::zero ();
00261     pl->set ("relaxation: min diagonal value", minDiagonalValue);
00262 
00263     const bool fixTinyDiagEntries = false;
00264     pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
00265 
00266     const bool checkDiagEntries = false;
00267     pl->set ("relaxation: check diagonal entries", checkDiagEntries);
00268 
00269     Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
00270     pl->set("relaxation: local smoothing indices", localSmoothingIndices);
00271 
00272     validParams_ = rcp_const_cast<const ParameterList> (pl);
00273   }
00274   return validParams_;
00275 }
00276 
00277 
00278 template<class MatrixType>
00279 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
00280 {
00281   using Teuchos::getIntegralValue;
00282   using Teuchos::ParameterList;
00283   using Teuchos::RCP;
00284   typedef scalar_type ST; // just to make code below shorter
00285 
00286   pl.validateParametersAndSetDefaults (* getValidParameters ());
00287 
00288   const Details::RelaxationType precType =
00289     getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
00290   const int numSweeps = pl.get<int> ("relaxation: sweeps");
00291   const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
00292   const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
00293   const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
00294   const bool doL1Method = pl.get<bool> ("relaxation: use l1");
00295   const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
00296   const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
00297   const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
00298   const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
00299   Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
00300 
00301 
00302   // "Commit" the changes, now that we've validated everything.
00303   PrecType_              = precType;
00304   NumSweeps_             = numSweeps;
00305   DampingFactor_         = dampingFactor;
00306   ZeroStartingSolution_  = zeroStartSol;
00307   DoBackwardGS_          = doBackwardGS;
00308   DoL1Method_            = doL1Method;
00309   L1Eta_                 = l1Eta;
00310   MinDiagonalValue_      = minDiagonalValue;
00311   fixTinyDiagEntries_    = fixTinyDiagEntries;
00312   checkDiagEntries_      = checkDiagEntries;
00313   localSmoothingIndices_ = localSmoothingIndices;
00314 }
00315 
00316 
00317 template<class MatrixType>
00318 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
00319 {
00320   // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
00321   // but otherwise, we will get [unused] in pl
00322   this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
00323 }
00324 
00325 
00326 template<class MatrixType>
00327 Teuchos::RCP<const Teuchos::Comm<int> >
00328 Relaxation<MatrixType>::getComm() const {
00329   TEUCHOS_TEST_FOR_EXCEPTION(
00330     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
00331     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00332     "input matrix before calling this method.");
00333   return A_->getRowMap ()->getComm ();
00334 }
00335 
00336 
00337 template<class MatrixType>
00338 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
00339 Relaxation<MatrixType>::getMatrix() const {
00340   return A_;
00341 }
00342 
00343 
00344 template<class MatrixType>
00345 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00346                                typename MatrixType::global_ordinal_type,
00347                                typename MatrixType::node_type> >
00348 Relaxation<MatrixType>::getDomainMap () const {
00349   TEUCHOS_TEST_FOR_EXCEPTION(
00350     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
00351     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00352     "input matrix before calling this method.");
00353   return A_->getDomainMap ();
00354 }
00355 
00356 
00357 template<class MatrixType>
00358 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
00359                                typename MatrixType::global_ordinal_type,
00360                                typename MatrixType::node_type> >
00361 Relaxation<MatrixType>::getRangeMap () const {
00362   TEUCHOS_TEST_FOR_EXCEPTION(
00363     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
00364     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00365     "input matrix before calling this method.");
00366   return A_->getRangeMap ();
00367 }
00368 
00369 //==========================================================================
00370 template<class MatrixType>
00371 bool Relaxation<MatrixType>::hasTransposeApply () const {
00372   return true;
00373 }
00374 
00375 //==========================================================================
00376 template<class MatrixType>
00377 int Relaxation<MatrixType>::getNumInitialize() const {
00378   return(NumInitialize_);
00379 }
00380 
00381 //==========================================================================
00382 template<class MatrixType>
00383 int Relaxation<MatrixType>::getNumCompute() const {
00384   return(NumCompute_);
00385 }
00386 
00387 //==========================================================================
00388 template<class MatrixType>
00389 int Relaxation<MatrixType>::getNumApply() const {
00390   return(NumApply_);
00391 }
00392 
00393 //==========================================================================
00394 template<class MatrixType>
00395 double Relaxation<MatrixType>::getInitializeTime() const {
00396   return(InitializeTime_);
00397 }
00398 
00399 //==========================================================================
00400 template<class MatrixType>
00401 double Relaxation<MatrixType>::getComputeTime() const {
00402   return(ComputeTime_);
00403 }
00404 
00405 //==========================================================================
00406 template<class MatrixType>
00407 double Relaxation<MatrixType>::getApplyTime() const {
00408   return(ApplyTime_);
00409 }
00410 
00411 //==========================================================================
00412 template<class MatrixType>
00413 double Relaxation<MatrixType>::getComputeFlops() const {
00414   return(ComputeFlops_);
00415 }
00416 
00417 //==========================================================================
00418 template<class MatrixType>
00419 double Relaxation<MatrixType>::getApplyFlops() const {
00420   return(ApplyFlops_);
00421 }
00422 
00423 //==========================================================================
00424 template<class MatrixType>
00425 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType
00426 Relaxation<MatrixType>::getCondEst () const
00427 {
00428   return Condest_;
00429 }
00430 
00431 
00432 template<class MatrixType>
00433 typename Teuchos::ScalarTraits<typename MatrixType::scalar_type>::magnitudeType
00434 Relaxation<MatrixType>::
00435 computeCondEst (CondestType CT,
00436                 typename MatrixType::local_ordinal_type MaxIters,
00437                 magnitude_type Tol,
00438                 const Teuchos::Ptr<const row_matrix_type>& matrix)
00439 {
00440   if (! isComputed ()) { // cannot compute right now
00441     return -Teuchos::ScalarTraits<magnitude_type>::one ();
00442   }
00443   // always compute it. Call Condest() with no parameters to get
00444   // the previous estimate.
00445   Condest_ = Ifpack2::Condest (*this, CT, MaxIters, Tol, matrix);
00446   return Condest_;
00447 }
00448 
00449 
00450 template<class MatrixType>
00451 void
00452 Relaxation<MatrixType>::
00453 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
00454        Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
00455        Teuchos::ETransp mode,
00456        scalar_type alpha,
00457        scalar_type beta) const
00458 {
00459   using Teuchos::as;
00460   using Teuchos::RCP;
00461   using Teuchos::rcp;
00462   using Teuchos::rcpFromRef;
00463   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
00464                               global_ordinal_type, node_type> MV;
00465   TEUCHOS_TEST_FOR_EXCEPTION(
00466     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
00467     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00468     "input matrix, then call compute(), before calling this method.");
00469   TEUCHOS_TEST_FOR_EXCEPTION(
00470     ! isComputed (),
00471     std::runtime_error,
00472     "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
00473     "preconditioner instance before you may call apply().  You may call "
00474     "isComputed() to find out if compute() has been called already.");
00475   TEUCHOS_TEST_FOR_EXCEPTION(
00476     X.getNumVectors() != Y.getNumVectors(),
00477     std::runtime_error,
00478     "Ifpack2::Relaxation::apply: X and Y have different numbers of columns.  "
00479     "X has " << X.getNumVectors() << " columns, but Y has "
00480     << Y.getNumVectors() << " columns.");
00481   TEUCHOS_TEST_FOR_EXCEPTION(
00482     beta != STS::zero (), std::logic_error,
00483     "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
00484     "implemented.");
00485   {
00486     // Reset the timer each time, since Relaxation uses the same Time
00487     // object to track times for different methods.
00488     Teuchos::TimeMonitor timeMon (*Time_, true);
00489 
00490     // Special case: alpha == 0.
00491     if (alpha == STS::zero ()) {
00492       // No floating-point operations, so no need to update a count.
00493       Y.putScalar (STS::zero ());
00494     }
00495     else {
00496       // If X and Y are pointing to the same memory location,
00497       // we need to create an auxiliary vector, Xcopy
00498       RCP<const MV> Xcopy;
00499       if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
00500         Xcopy = rcp (new MV(Tpetra::createCopy(X)));
00501       }
00502       else {
00503         Xcopy = rcpFromRef (X);
00504       }
00505 
00506       // Each of the following methods updates the flop count itself.
00507       // All implementations handle zeroing out the starting solution
00508       // (if necessary) themselves.
00509       switch (PrecType_) {
00510       case Ifpack2::Details::JACOBI:
00511         ApplyInverseJacobi(*Xcopy,Y);
00512         break;
00513       case Ifpack2::Details::GS:
00514         ApplyInverseGS(*Xcopy,Y);
00515         break;
00516       case Ifpack2::Details::SGS:
00517         ApplyInverseSGS(*Xcopy,Y);
00518         break;
00519       default:
00520         TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
00521           "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
00522           << PrecType_ << ".  Please report this bug to the Ifpack2 developers.");
00523       }
00524       if (alpha != STS::one ()) {
00525         Y.scale (alpha);
00526         const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
00527         const double numVectors = as<double> (Y.getNumVectors ());
00528         ApplyFlops_ += numGlobalRows * numVectors;
00529       }
00530     }
00531   }
00532   ApplyTime_ += Time_->totalElapsedTime ();
00533   ++NumApply_;
00534 }
00535 
00536 
00537 template<class MatrixType>
00538 void
00539 Relaxation<MatrixType>::
00540 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
00541           Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
00542           Teuchos::ETransp mode) const
00543 {
00544   TEUCHOS_TEST_FOR_EXCEPTION(
00545     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
00546     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00547     "input matrix, then call compute(), before calling this method.");
00548   TEUCHOS_TEST_FOR_EXCEPTION(
00549     ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
00550     "isComputed() must be true before you may call applyMat().  "
00551     "Please call compute() before calling this method.");
00552   TEUCHOS_TEST_FOR_EXCEPTION(
00553     X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
00554     "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
00555     << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
00556   A_->apply (X, Y, mode);
00557 }
00558 
00559 
00560 template<class MatrixType>
00561 void Relaxation<MatrixType>::initialize () {
00562   TEUCHOS_TEST_FOR_EXCEPTION(
00563     A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: "
00564     "The input matrix A is null.  Please call setMatrix() with a nonnull "
00565     "input matrix before calling this method.");
00566 
00567   // Initialization for Relaxation is trivial, so we say it takes zero time.
00568   //InitializeTime_ += Time_->totalElapsedTime ();
00569   ++NumInitialize_;
00570   isInitialized_ = true;
00571 }
00572 
00573 
00574 template<class MatrixType>
00575 void Relaxation<MatrixType>::compute ()
00576 {
00577   using Teuchos::Array;
00578   using Teuchos::ArrayRCP;
00579   using Teuchos::ArrayView;
00580   using Teuchos::as;
00581   using Teuchos::Comm;
00582   using Teuchos::RCP;
00583   using Teuchos::rcp;
00584   using Teuchos::REDUCE_MAX;
00585   using Teuchos::REDUCE_MIN;
00586   using Teuchos::REDUCE_SUM;
00587   using Teuchos::rcp_dynamic_cast;
00588   using Teuchos::reduceAll;
00589   typedef Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> vector_type;
00590   const scalar_type zero = STS::zero ();
00591   const scalar_type one = STS::one ();
00592 
00593   // We don't count initialization in compute() time.
00594   if (! isInitialized ()) {
00595     initialize ();
00596   }
00597 
00598   {
00599     // Reset the timer each time, since Relaxation uses the same Time
00600     // object to track times for different methods.
00601     Teuchos::TimeMonitor timeMon (*Time_, true);
00602 
00603     TEUCHOS_TEST_FOR_EXCEPTION(
00604       A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: "
00605       "The input matrix A is null.  Please call setMatrix() with a nonnull "
00606       "input matrix, then call initialize(), before calling this method.");
00607 
00608     // Reset state.
00609     IsComputed_ = false;
00610     Condest_ = -STM::one ();
00611 
00612     TEUCHOS_TEST_FOR_EXCEPTION(
00613       NumSweeps_ < 0, std::logic_error,
00614       "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0.  "
00615       "Please report this bug to the Ifpack2 developers.");
00616 
00617     Diagonal_ = rcp (new vector_type (A_->getRowMap ()));
00618 
00619     TEUCHOS_TEST_FOR_EXCEPTION(
00620       Diagonal_.is_null (), std::logic_error,
00621       "Ifpack2::Relaxation::compute: Vector of diagonal entries has not been "
00622       "created yet.  Please report this bug to the Ifpack2 developers.");
00623 
00624     // Extract the diagonal entries.  The CrsMatrix static graph
00625     // version is faster for subsequent calls to compute(), since it
00626     // caches the diagonal offsets.
00627     //
00628     // isStaticGraph() == true means that the matrix was created with
00629     // a const graph.  The only requirement is that the structure of
00630     // the matrix never changes, so isStaticGraph() == true is a bit
00631     // more conservative than we need.  However, Tpetra doesn't (as of
00632     // 05 Apr 2013) have a way to tell if the graph hasn't changed
00633     // since the last time we used it.
00634     {
00635       // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
00636       // instead of MatrixType, because isStaticGraph is a CrsMatrix
00637       // method (not inherited from RowMatrix's interface).  It's
00638       // perfectly valid to do relaxation on a RowMatrix which is not
00639       // a CrsMatrix.
00640       //
00641       // This cast isn't ideal because it won't catch CrsMatrix
00642       // specializations with nondefault LocalMatOps (fifth) template
00643       // parameter.  The code will still be correct if the cast fails,
00644       // but it won't pick up the "cached offsets" optimization.
00645       const crs_matrix_type* crsMat =
00646         dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
00647       if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
00648         A_->getLocalDiagCopy (*Diagonal_); // slow path
00649       } else {
00650         if (! savedDiagOffsets_) { // we haven't precomputed offsets
00651           crsMat->getLocalDiagOffsets (diagOffsets_);
00652           savedDiagOffsets_ = true;
00653         }
00654         crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_ ());
00655 #ifdef HAVE_TPETRA_DEBUG
00656         // Validate the fast-path diagonal against the slow-path diagonal.
00657         vector_type D_copy (A_->getRowMap ());
00658         A_->getLocalDiagCopy (D_copy);
00659         D_copy.update (STS::one (), *Diagonal_, -STS::one ());
00660         const magnitude_type err = D_copy.normInf ();
00661         // The two diagonals should be exactly the same, so their
00662         // difference should be exactly zero.
00663         TEUCHOS_TEST_FOR_EXCEPTION(
00664           err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: "
00665           "\"fast-path\" diagonal computation failed.  \\|D1 - D2\\|_inf = "
00666           << err << ".");
00667 #endif // HAVE_TPETRA_DEBUG
00668       }
00669     }
00670 
00671     // If we're checking the computed inverse diagonal, then keep a
00672     // copy of the original diagonal entries for later comparison.
00673     RCP<vector_type> origDiag;
00674     if (checkDiagEntries_) {
00675       origDiag = rcp (new vector_type (A_->getRowMap ()));
00676       *origDiag = *Diagonal_;
00677     }
00678 
00679     // "Host view" means that if the Node type is a GPU Node, the
00680     // ArrayRCP points to host memory, not device memory.  It will
00681     // write back to device memory (Diagonal_) at end of scope.
00682     ArrayRCP<scalar_type> diagHostView = Diagonal_->get1dViewNonConst ();
00683 
00684     // The view below is only valid as long as diagHostView is within
00685     // scope.  We extract a raw pointer in release mode because we
00686     // don't trust older versions of compilers (like GCC 4.4.x or
00687     // Intel < 13) to optimize away ArrayView::operator[].
00688 #ifdef HAVE_IFPACK2_DEBUG
00689     ArrayView<scalar_type> diag = diagHostView ();
00690 #else
00691     scalar_type* KOKKOSCLASSIC_RESTRICT const diag = diagHostView.getRawPtr ();
00692 #endif // HAVE_IFPACK2_DEBUG
00693 
00694     const size_t numMyRows = A_->getNodeNumRows ();
00695 
00696     // Setup for L1 Methods.
00697     // Here we add half the value of the off-processor entries in the row,
00698     // but only if diagonal isn't sufficiently large.
00699     //
00700     // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
00701     // Yang.  "Multigrid Smoothers for Ultraparallel Computing."  SIAM
00702     // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
00703     if (DoL1Method_ && IsParallel_) {
00704       const scalar_type two = one + one;
00705       const size_t maxLength = A_->getNodeMaxNumRowEntries ();
00706       Array<local_ordinal_type> indices (maxLength);
00707       Array<scalar_type> values (maxLength);
00708       size_t numEntries;
00709 
00710       for (size_t i = 0; i < numMyRows; ++i) {
00711         A_->getLocalRowCopy (i, indices (), values (), numEntries);
00712         magnitude_type diagonal_boost = STM::zero ();
00713         for (size_t k = 0 ; k < numEntries ; ++k) {
00714           if (as<size_t> (indices[k]) > numMyRows) {
00715             diagonal_boost += STS::magnitude (values[k] / two);
00716           }
00717         }
00718         if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
00719           diag[i] += diagonal_boost;
00720         }
00721       }
00722     }
00723 
00724     //
00725     // Invert the diagonal entries of the matrix (not in place).
00726     //
00727 
00728     // Precompute some quantities for "fixing" zero or tiny diagonal
00729     // entries.  We'll only use them if this "fixing" is enabled.
00730     //
00731     // SmallTraits covers for the lack of eps() in
00732     // Teuchos::ScalarTraits when its template parameter is not a
00733     // floating-point type.  (Ifpack2 sometimes gets instantiated for
00734     // integer Scalar types.)
00735     const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
00736       one / SmallTraits<scalar_type>::eps () :
00737       one / MinDiagonalValue_;
00738     // It's helpful not to have to recompute this magnitude each time.
00739     const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
00740 
00741     if (checkDiagEntries_) {
00742       //
00743       // Check diagonal elements, replace zero elements with the minimum
00744       // diagonal value, and store their inverses.  Count the number of
00745       // "small" and zero diagonal entries while we're at it.
00746       //
00747       size_t numSmallDiagEntries = 0; // "small" includes zero
00748       size_t numZeroDiagEntries = 0; // # zero diagonal entries
00749       size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
00750 
00751       // As we go, keep track of the diagonal entries with the least and
00752       // greatest magnitude.  We could use the trick of starting the min
00753       // with +Inf and the max with -Inf, but that doesn't work if
00754       // scalar_type is a built-in integer type.  Thus, we have to start
00755       // by reading the first diagonal entry redundantly.
00756       scalar_type minMagDiagEntry = zero;
00757       scalar_type maxMagDiagEntry = zero;
00758       magnitude_type minMagDiagEntryMag = STM::zero ();
00759       magnitude_type maxMagDiagEntryMag = STM::zero ();
00760       if (numMyRows > 0) {
00761         const scalar_type d_0 = diag[0];
00762         const magnitude_type d_0_mag = STS::magnitude (d_0);
00763         minMagDiagEntry = d_0;
00764         maxMagDiagEntry = d_0;
00765         minMagDiagEntryMag = d_0_mag;
00766         maxMagDiagEntryMag = d_0_mag;
00767       }
00768 
00769       // Go through all the diagonal entries.  Compute counts of
00770       // small-magnitude, zero, and negative-real-part entries.  Invert
00771       // the diagonal entries that aren't too small.  For those that are
00772       // too small in magnitude, replace them with 1/MinDiagonalValue_
00773       // (or 1/eps if MinDiagonalValue_ happens to be zero).
00774       for (size_t i = 0 ; i < numMyRows; ++i) {
00775         const scalar_type d_i = diag[i];
00776         const magnitude_type d_i_mag = STS::magnitude (d_i);
00777         const magnitude_type d_i_real = STS::real (d_i);
00778 
00779         // We can't compare complex numbers, but we can compare their
00780         // real parts.
00781         if (d_i_real < STM::zero ()) {
00782           ++numNegDiagEntries;
00783         }
00784         if (d_i_mag < minMagDiagEntryMag) {
00785           minMagDiagEntry = d_i;
00786           minMagDiagEntryMag = d_i_mag;
00787         }
00788         if (d_i_mag > maxMagDiagEntryMag) {
00789           maxMagDiagEntry = d_i;
00790           maxMagDiagEntryMag = d_i_mag;
00791         }
00792 
00793         if (fixTinyDiagEntries_) {
00794           // <= not <, in case minDiagValMag is zero.
00795           if (d_i_mag <= minDiagValMag) {
00796             ++numSmallDiagEntries;
00797             if (d_i_mag == STM::zero ()) {
00798               ++numZeroDiagEntries;
00799             }
00800             diag[i] = oneOverMinDiagVal;
00801           } else {
00802             diag[i] = one / d_i;
00803           }
00804         }
00805         else { // Don't fix zero or tiny diagonal entries.
00806           // <= not <, in case minDiagValMag is zero.
00807           if (d_i_mag <= minDiagValMag) {
00808             ++numSmallDiagEntries;
00809             if (d_i_mag == STM::zero ()) {
00810               ++numZeroDiagEntries;
00811             }
00812           }
00813           diag[i] = one / d_i;
00814         }
00815       }
00816 
00817       // We're done computing the inverse diagonal, so invalidate the view.
00818       // This ensures that the operations below, that use methods on Vector,
00819       // produce correct results even on a GPU Node.
00820       diagHostView = Teuchos::null;
00821 
00822       // Count floating-point operations of computing the inverse diagonal.
00823       //
00824       // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
00825       if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
00826         ComputeFlops_ += 4.0 * numMyRows;
00827       } else {
00828         ComputeFlops_ += numMyRows;
00829       }
00830 
00831       // Collect global data about the diagonal entries.  Only do this
00832       // (which involves O(1) all-reduces) if the user asked us to do
00833       // the extra work.
00834       //
00835       // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
00836       // zero rows.  Fixing this requires one of two options:
00837       //
00838       // 1. Use a custom reduction operation that ignores processes
00839       //    with zero diagonal entries.
00840       // 2. Split the communicator, compute all-reduces using the
00841       //    subcommunicator over processes that have a nonzero number
00842       //    of diagonal entries, and then broadcast from one of those
00843       //    processes (if there is one) to the processes in the other
00844       //    subcommunicator.
00845       const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
00846 
00847       // Compute global min and max magnitude of entries.
00848       Array<magnitude_type> localVals (2);
00849       localVals[0] = minMagDiagEntryMag;
00850       // (- (min (- x))) is the same as (max x).
00851       localVals[1] = -maxMagDiagEntryMag;
00852       Array<magnitude_type> globalVals (2);
00853       reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
00854                                       localVals.getRawPtr (),
00855                                       globalVals.getRawPtr ());
00856       globalMinMagDiagEntryMag_ = globalVals[0];
00857       globalMaxMagDiagEntryMag_ = -globalVals[1];
00858 
00859       Array<size_t> localCounts (3);
00860       localCounts[0] = numSmallDiagEntries;
00861       localCounts[1] = numZeroDiagEntries;
00862       localCounts[2] = numNegDiagEntries;
00863       Array<size_t> globalCounts (3);
00864       reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
00865                               localCounts.getRawPtr (),
00866                               globalCounts.getRawPtr ());
00867       globalNumSmallDiagEntries_ = globalCounts[0];
00868       globalNumZeroDiagEntries_ = globalCounts[1];
00869       globalNumNegDiagEntries_ = globalCounts[2];
00870 
00871       // Forestall "set but not used" compiler warnings.
00872       (void) minMagDiagEntry;
00873       (void) maxMagDiagEntry;
00874 
00875       // Compute and save the difference between the computed inverse
00876       // diagonal, and the original diagonal's inverse.
00877       RCP<vector_type> diff = rcp (new vector_type (A_->getRowMap ()));
00878       diff->reciprocal (*origDiag);
00879       diff->update (-one, *Diagonal_, one);
00880       globalDiagNormDiff_ = diff->norm2 ();
00881     }
00882     else { // don't check diagonal elements
00883       if (fixTinyDiagEntries_) {
00884         // Go through all the diagonal entries.  Invert those that
00885         // aren't too small in magnitude.  For those that are too
00886         // small in magnitude, replace them with oneOverMinDiagVal.
00887         for (size_t i = 0 ; i < numMyRows; ++i) {
00888           const scalar_type d_i = diag[i];
00889           const magnitude_type d_i_mag = STS::magnitude (d_i);
00890 
00891           // <= not <, in case minDiagValMag is zero.
00892           if (d_i_mag <= minDiagValMag) {
00893             diag[i] = oneOverMinDiagVal;
00894           } else {
00895             diag[i] = one / d_i;
00896           }
00897         }
00898       }
00899       else { // don't fix tiny or zero diagonal entries
00900         for (size_t i = 0 ; i < numMyRows; ++i) {
00901           diag[i] = one / diag[i];
00902         }
00903       }
00904       if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
00905         ComputeFlops_ += 4.0 * numMyRows;
00906       } else {
00907         ComputeFlops_ += numMyRows;
00908       }
00909     }
00910 
00911     if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
00912                         PrecType_ == Ifpack2::Details::SGS)) {
00913       Importer_ = A_->getGraph ()->getImporter ();
00914       // mfh 21 Mar 2013: The Import object may be null, but in that
00915       // case, the domain and column Maps are the same and we don't
00916       // need to Import anyway.
00917     }
00918   } // end TimeMonitor scope
00919 
00920   ComputeTime_ += Time_->totalElapsedTime ();
00921   ++NumCompute_;
00922   IsComputed_ = true;
00923 }
00924 
00925 
00926 template<class MatrixType>
00927 void
00928 Relaxation<MatrixType>::
00929 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
00930                     Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
00931 {
00932   using Teuchos::as;
00933   typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
00934 
00935   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
00936   const double numVectors = as<double> (X.getNumVectors ());
00937   if (ZeroStartingSolution_) {
00938     // For the first Jacobi sweep, if we are allowed to assume that
00939     // the initial guess is zero, then Jacobi is just diagonal
00940     // scaling.  (A_ij * x_j = 0 for i != j, since x_j = 0.)
00941     // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
00942     Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
00943 
00944     // Count (global) floating-point operations.  Ifpack2 represents
00945     // this as a floating-point number rather than an integer, so that
00946     // overflow (for a very large number of calls, or a very large
00947     // problem) is approximate instead of catastrophic.
00948     double flopUpdate = 0.0;
00949     if (DampingFactor_ == STS::one ()) {
00950       // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
00951       flopUpdate = numGlobalRows * numVectors;
00952     } else {
00953       // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
00954       // Two multiplies per entry of Y.
00955       flopUpdate = 2.0 * numGlobalRows * numVectors;
00956     }
00957     ApplyFlops_ += flopUpdate;
00958     if (NumSweeps_ == 1) {
00959       return;
00960     }
00961   }
00962   // If we were allowed to assume that the starting guess was zero,
00963   // then we have already done the first sweep above.
00964   const int startSweep = ZeroStartingSolution_ ? 1 : 0;
00965   // We don't need to initialize the result MV, since the sparse
00966   // mat-vec will clobber its contents anyway.
00967   MV A_times_Y (Y.getMap (), as<size_t>(numVectors), false);
00968   for (int j = startSweep; j < NumSweeps_; ++j) {
00969     // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
00970     applyMat (Y, A_times_Y);
00971     A_times_Y.update (STS::one (), X, -STS::one ());
00972     Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ());
00973   }
00974 
00975   // For each column of output, for each pass over the matrix:
00976   //
00977   // - One + and one * for each matrix entry
00978   // - One / and one + for each row of the matrix
00979   // - If the damping factor is not one: one * for each row of the
00980   //   matrix.  (It's not fair to count this if the damping factor is
00981   //   one, since the implementation could skip it.  Whether it does
00982   //   or not is the implementation's choice.)
00983 
00984   // Floating-point operations due to the damping factor, per matrix
00985   // row, per direction, per columm of output.
00986   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
00987   const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
00988   ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
00989     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
00990 }
00991 
00992 
00993 template<class MatrixType>
00994 void
00995 Relaxation<MatrixType>::
00996 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
00997                 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
00998 {
00999   // The CrsMatrix version is faster, because it can access the sparse
01000   // matrix data directly, rather than by copying out each row's data
01001   // in turn.  Thus, we check whether the RowMatrix is really a
01002   // CrsMatrix.
01003   //
01004   // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
01005   // declaration in Ifpack2_Relaxation_decl.hpp header file.  The code
01006   // will still be correct if the cast fails, but it will use an
01007   // unoptimized kernel.
01008   const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
01009   if (crsMat != NULL) {
01010     ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
01011   } else {
01012     ApplyInverseGS_RowMatrix (X, Y);
01013   }
01014 }
01015 
01016 
01017 template<class MatrixType>
01018 void Relaxation<MatrixType>::ApplyInverseGS_RowMatrix(
01019         const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01020               Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01021 {
01022   using Teuchos::Array;
01023   using Teuchos::ArrayRCP;
01024   using Teuchos::ArrayView;
01025   using Teuchos::as;
01026   using Teuchos::RCP;
01027   using Teuchos::rcp;
01028   using Teuchos::rcpFromRef;
01029   typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
01030 
01031 
01032 
01033   // Tpetra's GS implementation for CrsMatrix handles zeroing out the
01034   // starting multivector itself.  The generic RowMatrix version here
01035   // does not, so we have to zero out Y here.
01036   if (ZeroStartingSolution_) {
01037     Y.putScalar (STS::zero ());
01038   }
01039 
01040   const size_t NumVectors = X.getNumVectors ();
01041   const size_t maxLength = A_->getNodeMaxNumRowEntries ();
01042   Array<local_ordinal_type> Indices (maxLength);
01043   Array<scalar_type> Values (maxLength);
01044 
01045   // Local smoothing stuff
01046   const size_t numMyRows             = A_->getNodeNumRows();
01047   const local_ordinal_type * rowInd  = 0;
01048   size_t numActive                   = numMyRows;
01049   bool do_local = !localSmoothingIndices_.is_null();
01050   if(do_local) {
01051     rowInd    = localSmoothingIndices_.getRawPtr();
01052     numActive = localSmoothingIndices_.size();
01053   }
01054 
01055   RCP<MV> Y2;
01056   if (IsParallel_) {
01057     if (Importer_.is_null ()) { // domain and column Maps are the same.
01058       // We will copy Y into Y2 below, so no need to fill with zeros here.
01059       Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
01060     } else {
01061       // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
01062       // zeros here, since we are doing an Import into Y2 below
01063       // anyway.  However, it doesn't hurt correctness.
01064       Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
01065     }
01066   }
01067   else {
01068     Y2 = rcpFromRef (Y);
01069   }
01070 
01071   // Diagonal
01072   ArrayRCP<const scalar_type>  d_rcp = Diagonal_->get1dView();
01073   ArrayView<const scalar_type> d_ptr = d_rcp();
01074 
01075   // Constant stride check
01076   bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
01077 
01078   if(constant_stride) {
01079     // extract 1D RCPs
01080     size_t                    x_stride = X.getStride();
01081     size_t                   y2_stride = Y2->getStride();
01082     ArrayRCP<scalar_type>       y2_rcp = Y2->get1dViewNonConst();
01083     ArrayRCP<const scalar_type>  x_rcp = X.get1dView();
01084     ArrayView<scalar_type>      y2_ptr = y2_rcp();
01085     ArrayView<const scalar_type> x_ptr = x_rcp();
01086     Array<scalar_type> dtemp(NumVectors,STS::zero());
01087 
01088     for (int j = 0; j < NumSweeps_; j++) {
01089       // data exchange is here, once per sweep
01090       if (IsParallel_) {
01091   if (Importer_.is_null ()) {
01092     *Y2 = Y; // just copy, since domain and column Maps are the same
01093   } else {
01094     Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01095   }
01096       }
01097       
01098       if (! DoBackwardGS_) { // Forward sweep
01099   for (size_t ii = 0; ii < numActive; ++ii) {
01100     local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01101     size_t NumEntries;
01102     A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);   
01103     dtemp.assign(NumVectors,STS::zero());
01104 
01105     for (size_t k = 0; k < NumEntries; ++k) {
01106       const local_ordinal_type col = Indices[k];
01107       for (size_t m = 0; m < NumVectors; ++m) 
01108         dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01109     }
01110 
01111     for (size_t m = 0; m < NumVectors; ++m) 
01112       y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01113   }
01114       }
01115       else { // Backward sweep
01116   // ptrdiff_t is the same size as size_t, but is signed.  Being
01117   // signed is important so that i >= 0 is not trivially true.
01118   for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01119     local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01120     size_t NumEntries;
01121     A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);   
01122     dtemp.assign(NumVectors,STS::zero());
01123 
01124     for (size_t k = 0; k < NumEntries; ++k) {
01125       const local_ordinal_type col = Indices[k];
01126       for (size_t m = 0; m < NumVectors; ++m) 
01127         dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01128     }
01129 
01130     for (size_t m = 0; m < NumVectors; ++m) 
01131       y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01132   }
01133       }      
01134       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01135       if (IsParallel_) {
01136   Y = *Y2;
01137       }
01138     }
01139   }
01140   else {
01141     // extract 2D RCPS 
01142     ArrayRCP<ArrayRCP<scalar_type> > y2_ptr      = Y2->get2dViewNonConst();
01143     ArrayRCP<ArrayRCP<const scalar_type> > x_ptr =  X.get2dView();
01144     
01145     for (int j = 0; j < NumSweeps_; j++) {
01146       // data exchange is here, once per sweep
01147       if (IsParallel_) {
01148   if (Importer_.is_null ()) {
01149     *Y2 = Y; // just copy, since domain and column Maps are the same
01150   } else {
01151     Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01152   }
01153       }
01154       
01155       if (! DoBackwardGS_) { // Forward sweep
01156   for (size_t ii = 0; ii < numActive; ++ii) {
01157     local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01158     size_t NumEntries;
01159     A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01160     
01161     for (size_t m = 0; m < NumVectors; ++m) {
01162       scalar_type dtemp = STS::zero ();
01163       ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01164       ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01165       
01166       for (size_t k = 0; k < NumEntries; ++k) {
01167         const local_ordinal_type col = Indices[k];
01168         dtemp += Values[k] * y2_local[col];
01169       }
01170       y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
01171     }
01172   }
01173       }
01174       else { // Backward sweep
01175   // ptrdiff_t is the same size as size_t, but is signed.  Being
01176   // signed is important so that i >= 0 is not trivially true.
01177   for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01178     local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01179     
01180     size_t NumEntries;
01181     A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
01182     
01183     for (size_t m = 0; m < NumVectors; ++m) {
01184       scalar_type dtemp = STS::zero ();
01185       ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01186       ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01187       
01188       for (size_t k = 0; k < NumEntries; ++k) {
01189         const local_ordinal_type col = Indices[k];
01190         dtemp += Values[k] * y2_local[col];
01191       }
01192       y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
01193     }
01194   }
01195       }
01196       
01197       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01198       if (IsParallel_) {
01199   Y = *Y2;
01200       }
01201     }
01202   }
01203  
01204 
01205   // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
01206   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01207   const double numVectors = as<double> (X.getNumVectors ());
01208   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01209   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01210   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01211     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01212 }
01213 
01214 
01215 template<class MatrixType>
01216 void
01217 Relaxation<MatrixType>::
01218 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
01219                           const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01220                           Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01221 {
01222   using Teuchos::as;
01223   const Tpetra::ESweepDirection direction =
01224     DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
01225   if(localSmoothingIndices_.is_null())
01226     A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
01227                        NumSweeps_, ZeroStartingSolution_);
01228   else
01229     A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_(), DampingFactor_, direction,
01230                                 NumSweeps_, ZeroStartingSolution_);
01231 
01232   // For each column of output, for each sweep over the matrix:
01233   //
01234   // - One + and one * for each matrix entry
01235   // - One / and one + for each row of the matrix
01236   // - If the damping factor is not one: one * for each row of the
01237   //   matrix.  (It's not fair to count this if the damping factor is
01238   //   one, since the implementation could skip it.  Whether it does
01239   //   or not is the implementation's choice.)
01240 
01241   // Floating-point operations due to the damping factor, per matrix
01242   // row, per direction, per columm of output.
01243   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01244   const double numVectors = as<double> (X.getNumVectors ());
01245   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01246   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01247   ApplyFlops_ += NumSweeps_ * numVectors *
01248     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01249 }
01250 
01251 
01252 template<class MatrixType>
01253 void
01254 Relaxation<MatrixType>::
01255 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01256                  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01257 {
01258   // The CrsMatrix version is faster, because it can access the sparse
01259   // matrix data directly, rather than by copying out each row's data
01260   // in turn.  Thus, we check whether the RowMatrix is really a
01261   // CrsMatrix.
01262   //
01263   // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
01264   // declaration in Ifpack2_Relaxation_decl.hpp header file.  The code
01265   // will still be correct if the cast fails, but it will use an
01266   // unoptimized kernel.
01267   const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
01268   if (crsMat != NULL) {
01269     ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
01270   } else {
01271     ApplyInverseSGS_RowMatrix (X, Y);
01272   }
01273 }
01274 
01275 
01276 template<class MatrixType>
01277 void
01278 Relaxation<MatrixType>::
01279 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01280                            Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01281 {
01282   using Teuchos::Array;
01283   using Teuchos::ArrayRCP;
01284   using Teuchos::ArrayView;
01285   using Teuchos::as;
01286   using Teuchos::RCP;
01287   using Teuchos::rcp;
01288   using Teuchos::rcpFromRef;
01289   typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
01290 
01291   // Tpetra's GS implementation for CrsMatrix handles zeroing out the
01292   // starting multivector itself.  The generic RowMatrix version here
01293   // does not, so we have to zero out Y here.
01294   if (ZeroStartingSolution_) {
01295     Y.putScalar (STS::zero ());
01296   }
01297 
01298   const size_t NumVectors = X.getNumVectors ();
01299   const size_t maxLength = A_->getNodeMaxNumRowEntries ();
01300   Array<local_ordinal_type> Indices (maxLength);
01301   Array<scalar_type> Values (maxLength);
01302 
01303   // Local smoothing stuff
01304   const size_t numMyRows             = A_->getNodeNumRows();
01305   const local_ordinal_type * rowInd  = 0;
01306   size_t numActive                   = numMyRows;
01307   bool do_local = localSmoothingIndices_.is_null();
01308   if(do_local) {
01309     rowInd    = localSmoothingIndices_.getRawPtr();
01310     numActive = localSmoothingIndices_.size();
01311   }
01312 
01313 
01314   RCP<MV> Y2;
01315   if (IsParallel_) {
01316     if (Importer_.is_null ()) { // domain and column Maps are the same.
01317       // We will copy Y into Y2 below, so no need to fill with zeros here.
01318       Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
01319     } else {
01320       // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
01321       // zeros here, since we are doing an Import into Y2 below
01322       // anyway.  However, it doesn't hurt correctness.
01323       Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
01324     }
01325   }
01326   else {
01327     Y2 = rcpFromRef (Y);
01328   }
01329 
01330   // Diagonal
01331   ArrayRCP<const scalar_type>  d_rcp = Diagonal_->get1dView();
01332   ArrayView<const scalar_type> d_ptr = d_rcp();
01333 
01334   // Constant stride check
01335   bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
01336 
01337   if(constant_stride) {
01338     // extract 1D RCPs
01339     size_t                    x_stride = X.getStride();
01340     size_t                   y2_stride = Y2->getStride();
01341     ArrayRCP<scalar_type>       y2_rcp = Y2->get1dViewNonConst();
01342     ArrayRCP<const scalar_type>  x_rcp = X.get1dView();
01343     ArrayView<scalar_type>      y2_ptr = y2_rcp();
01344     ArrayView<const scalar_type> x_ptr = x_rcp();
01345     Array<scalar_type> dtemp(NumVectors,STS::zero());
01346     for (int iter = 0; iter < NumSweeps_; ++iter) {
01347       // only one data exchange per sweep
01348       if (IsParallel_) {
01349   if (Importer_.is_null ()) {
01350     *Y2 = Y; // just copy, since domain and column Maps are the same
01351   } else {
01352     Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01353   }
01354       }
01355 
01356       for (int j = 0; j < NumSweeps_; j++) {
01357   // data exchange is here, once per sweep
01358   if (IsParallel_) {
01359     if (Importer_.is_null ()) {
01360       *Y2 = Y; // just copy, since domain and column Maps are the same
01361     } else {
01362       Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01363     }
01364   }
01365   
01366   for (size_t ii = 0; ii < numActive; ++ii) {
01367     local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01368     size_t NumEntries;
01369     A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);   
01370     dtemp.assign(NumVectors,STS::zero());
01371     
01372     for (size_t k = 0; k < NumEntries; ++k) {
01373       const local_ordinal_type col = Indices[k];
01374       for (size_t m = 0; m < NumVectors; ++m) 
01375         dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01376     }
01377     
01378     for (size_t m = 0; m < NumVectors; ++m) 
01379       y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01380   }
01381 
01382   // ptrdiff_t is the same size as size_t, but is signed.  Being
01383   // signed is important so that i >= 0 is not trivially true.
01384   for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01385     local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01386     size_t NumEntries;
01387     A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);   
01388     dtemp.assign(NumVectors,STS::zero());
01389     
01390     for (size_t k = 0; k < NumEntries; ++k) {
01391       const local_ordinal_type col = Indices[k];
01392       for (size_t m = 0; m < NumVectors; ++m) 
01393         dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
01394     }
01395     
01396     for (size_t m = 0; m < NumVectors; ++m) 
01397       y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
01398   }
01399       }      
01400 
01401       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01402       if (IsParallel_) {
01403   Y = *Y2;
01404       }
01405     }
01406   }
01407   else { 
01408     // extract 2D RCPs
01409     ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
01410     ArrayRCP<ArrayRCP<const scalar_type> > x_ptr =  X.get2dView ();
01411 
01412     for (int iter = 0; iter < NumSweeps_; ++iter) {
01413       // only one data exchange per sweep
01414       if (IsParallel_) {
01415   if (Importer_.is_null ()) {
01416     *Y2 = Y; // just copy, since domain and column Maps are the same
01417   } else {
01418     Y2->doImport (Y, *Importer_, Tpetra::INSERT);
01419   }
01420       }
01421 
01422       for (size_t ii = 0; ii < numActive; ++ii) {
01423   local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01424   const scalar_type diag = d_ptr[i];
01425   size_t NumEntries;
01426   A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
01427   
01428   for (size_t m = 0; m < NumVectors; ++m) {
01429     scalar_type dtemp = STS::zero ();
01430     ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01431     ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01432     
01433     for (size_t k = 0; k < NumEntries; ++k) {
01434       const local_ordinal_type col = Indices[k];
01435       dtemp += Values[k] * y2_local[col];
01436     }
01437     y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
01438   }
01439       }
01440       
01441       // ptrdiff_t is the same size as size_t, but is signed.  Being
01442       // signed is important so that i >= 0 is not trivially true.
01443       for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
01444   local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
01445   const scalar_type diag = d_ptr[i];
01446   size_t NumEntries;
01447   A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
01448   
01449   for (size_t m = 0; m < NumVectors; ++m) {
01450     scalar_type dtemp = STS::zero ();
01451     ArrayView<const scalar_type> x_local = (x_ptr())[m]();
01452     ArrayView<scalar_type>      y2_local = (y2_ptr())[m]();
01453     
01454     for (size_t k = 0; k < NumEntries; ++k) {
01455       const local_ordinal_type col = Indices[k];
01456       dtemp += Values[k] * y2_local[col];
01457     }
01458     y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
01459   }
01460       }
01461       
01462       // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
01463       if (IsParallel_) {
01464   Y = *Y2;
01465       }
01466     }
01467   }
01468 
01469   // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
01470   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01471   const double numVectors = as<double> (X.getNumVectors ());
01472   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01473   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01474   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01475     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01476 }
01477 
01478 
01479 template<class MatrixType>
01480 void
01481 Relaxation<MatrixType>::
01482 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
01483                            const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
01484                            Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
01485 {
01486   using Teuchos::as;
01487   const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
01488   if(localSmoothingIndices_.is_null())
01489     A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
01490                        NumSweeps_, ZeroStartingSolution_);
01491   else
01492     A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_(), DampingFactor_, direction,
01493                                 NumSweeps_, ZeroStartingSolution_);
01494 
01495   // For each column of output, for each sweep over the matrix:
01496   //
01497   // - One + and one * for each matrix entry
01498   // - One / and one + for each row of the matrix
01499   // - If the damping factor is not one: one * for each row of the
01500   //   matrix.  (It's not fair to count this if the damping factor is
01501   //   one, since the implementation could skip it.  Whether it does
01502   //   or not is the implementation's choice.)
01503   //
01504   // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
01505   // one forward and one backward.
01506 
01507   // Floating-point operations due to the damping factor, per matrix
01508   // row, per direction, per columm of output.
01509   const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
01510   const double numVectors = as<double> (X.getNumVectors ());
01511   const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
01512   const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
01513   ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
01514     (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
01515 }
01516 
01517 
01518 template<class MatrixType>
01519 std::string Relaxation<MatrixType>::description () const {
01520   std::ostringstream os;
01521 
01522   // Output is a valid YAML dictionary in flow style.  If you don't
01523   // like everything on a single line, you should call describe()
01524   // instead.
01525   os << "\"Ifpack2::Relaxation\": {";
01526 
01527   os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
01528      << "Computed: " << (isComputed () ? "true" : "false") << ", ";
01529 
01530   // It's useful to print this instance's relaxation method (Jacobi,
01531   // Gauss-Seidel, or symmetric Gauss-Seidel).  If you want more info
01532   // than that, call describe() instead.
01533   os << "Type: ";
01534   if (PrecType_ == Ifpack2::Details::JACOBI) {
01535     os << "Jacobi";
01536   } else if (PrecType_ == Ifpack2::Details::GS) {
01537     os << "Gauss-Seidel";
01538   } else if (PrecType_ == Ifpack2::Details::SGS) {
01539     os << "Symmetric Gauss-Seidel";
01540   } else {
01541     os << "INVALID";
01542   }
01543 
01544   os  << ", " << "sweeps: " << NumSweeps_ << ", "
01545       << "damping factor: " << DampingFactor_ << ", ";
01546   if (DoL1Method_) {
01547     os << "use l1: " << DoL1Method_ << ", "
01548        << "l1 eta: " << L1Eta_ << ", ";
01549   }
01550 
01551   if (A_.is_null ()) {
01552     os << "Matrix: null";
01553   }
01554   else {
01555     os << "Global matrix dimensions: ["
01556        << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
01557        << ", Global nnz: " << A_->getGlobalNumEntries();
01558   }
01559 
01560   os << "}";
01561   return os.str ();
01562 }
01563 
01564 
01565 template<class MatrixType>
01566 void
01567 Relaxation<MatrixType>::
01568 describe (Teuchos::FancyOStream &out,
01569           const Teuchos::EVerbosityLevel verbLevel) const
01570 {
01571   using Teuchos::OSTab;
01572   using Teuchos::TypeNameTraits;
01573   using Teuchos::VERB_DEFAULT;
01574   using Teuchos::VERB_NONE;
01575   using Teuchos::VERB_LOW;
01576   using Teuchos::VERB_MEDIUM;
01577   using Teuchos::VERB_HIGH;
01578   using Teuchos::VERB_EXTREME;
01579   using std::endl;
01580 
01581   const Teuchos::EVerbosityLevel vl =
01582     (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
01583 
01584   const int myRank = this->getComm ()->getRank ();
01585 
01586   //    none: print nothing
01587   //     low: print O(1) info from Proc 0
01588   //  medium:
01589   //    high:
01590   // extreme:
01591 
01592   if (vl != VERB_NONE && myRank == 0) {
01593     // Describable interface asks each implementation to start with a tab.
01594     OSTab tab1 (out);
01595     // Output is valid YAML; hence the quotes, to protect the colons.
01596     out << "\"Ifpack2::Relaxation\":" << endl;
01597     OSTab tab2 (out);
01598     out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
01599         << endl;
01600     if (this->getObjectLabel () != "") {
01601       out << "Label: " << this->getObjectLabel () << endl;
01602     }
01603     out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
01604         << "Computed: " << (isComputed () ? "true" : "false") << endl
01605         << "Parameters: " << endl;
01606     {
01607       OSTab tab3 (out);
01608       out << "\"relaxation: type\": ";
01609       if (PrecType_ == Ifpack2::Details::JACOBI) {
01610         out << "Jacobi";
01611       } else if (PrecType_ == Ifpack2::Details::GS) {
01612         out << "Gauss-Seidel";
01613       } else if (PrecType_ == Ifpack2::Details::SGS) {
01614         out << "Symmetric Gauss-Seidel";
01615       } else {
01616         out << "INVALID";
01617       }
01618       // We quote these parameter names because they contain colons.
01619       // YAML uses the colon to distinguish key from value.
01620       out << endl
01621           << "\"relaxation: sweeps\": " << NumSweeps_ << endl
01622           << "\"relaxation: damping factor\": " << DampingFactor_ << endl
01623           << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
01624           << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
01625           << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
01626           << "\"relaxation: use l1\": " << DoL1Method_ << endl
01627           << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
01628     }
01629     out << "Computed quantities:" << endl;
01630     {
01631       OSTab tab3 (out);
01632       out << "Condition number estimate: " << Condest_ << endl
01633           << "Global number of rows: " << A_->getGlobalNumRows () << endl
01634           << "Global number of columns: " << A_->getGlobalNumCols () << endl;
01635     }
01636     if (checkDiagEntries_ && isComputed ()) {
01637       out << "Properties of input diagonal entries:" << endl;
01638       {
01639         OSTab tab3 (out);
01640         out << "Magnitude of minimum-magnitude diagonal entry: "
01641             << globalMinMagDiagEntryMag_ << endl
01642             << "Magnitude of maximum-magnitude diagonal entry: "
01643             << globalMaxMagDiagEntryMag_ << endl
01644             << "Number of diagonal entries with small magnitude: "
01645             << globalNumSmallDiagEntries_ << endl
01646             << "Number of zero diagonal entries: "
01647             << globalNumZeroDiagEntries_ << endl
01648             << "Number of diagonal entries with negative real part: "
01649             << globalNumNegDiagEntries_ << endl
01650             << "Abs 2-norm diff between computed and actual inverse "
01651             << "diagonal: " << globalDiagNormDiff_ << endl;
01652       }
01653     }
01654     if (isComputed ()) {
01655       out << "Saved diagonal offsets: "
01656           << (savedDiagOffsets_ ? "true" : "false") << endl;
01657     }
01658     out << "Call counts and total times (in seconds): " << endl;
01659     {
01660       OSTab tab3 (out);
01661       out << "initialize: " << endl;
01662       {
01663         OSTab tab4 (out);
01664         out << "Call count: " << NumInitialize_ << endl;
01665         out << "Total time: " << InitializeTime_ << endl;
01666       }
01667       out << "compute: " << endl;
01668       {
01669         OSTab tab4 (out);
01670         out << "Call count: " << NumCompute_ << endl;
01671         out << "Total time: " << ComputeTime_ << endl;
01672       }
01673       out << "apply: " << endl;
01674       {
01675         OSTab tab4 (out);
01676         out << "Call count: " << NumApply_ << endl;
01677         out << "Total time: " << ApplyTime_ << endl;
01678       }
01679     }
01680   }
01681 }
01682 
01683 } // namespace Ifpack2
01684 
01685 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N)                            \
01686   template class Ifpack2::Relaxation< Tpetra::CrsMatrix<S, LO, GO, N> >; \
01687   template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
01688 
01689 #endif // IFPACK2_RELAXATION_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends