Ifpack2 Templated Preconditioning Package Version 1.0
Ifpack2_ILUT_def.hpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #ifndef IFPACK2_ILUT_DEF_HPP
00044 #define IFPACK2_ILUT_DEF_HPP
00045 
00046 // disable clang warnings
00047 #ifdef __clang__
00048 #pragma clang system_header
00049 #endif
00050 
00051 #include <Ifpack2_Heap.hpp>
00052 #include <Ifpack2_Condest.hpp>
00053 #include <Ifpack2_LocalFilter.hpp>
00054 #include <Ifpack2_Parameters.hpp>
00055 #include <Tpetra_CrsMatrix_def.hpp>
00056 
00057 #include <Teuchos_Time.hpp>
00058 #include <Teuchos_TypeNameTraits.hpp>
00059 
00060 
00061 namespace Ifpack2 {
00062 
00063   namespace {
00064 
00089     template<class ScalarType>
00090     typename Teuchos::ScalarTraits<ScalarType>::magnitudeType
00091     ilutDefaultDropTolerance () {
00092       using Teuchos::as;
00093       typedef Teuchos::ScalarTraits<ScalarType> STS;
00094       typedef typename STS::magnitudeType magnitude_type;
00095       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00096 
00097       // 1/2.  Hopefully this can be represented in magnitude_type.
00098       const magnitude_type oneHalf = STM::one() / (STM::one() + STM::one());
00099 
00100       // The min ensures that in case magnitude_type has very low
00101       // precision, we'll at least get some value strictly less than
00102       // one.
00103       return std::min (as<magnitude_type> (1000) * STS::magnitude (STS::eps ()), oneHalf);
00104     }
00105 
00106     // Full specialization for ScalarType = double.
00107     // This specialization preserves ILUT's previous default behavior.
00108     template<>
00109     Teuchos::ScalarTraits<double>::magnitudeType
00110     ilutDefaultDropTolerance<double> () {
00111       return 1e-12;
00112     }
00113 
00114   } // namespace (anonymous)
00115 
00116 
00117 template <class MatrixType>
00118 ILUT<MatrixType>::ILUT (const Teuchos::RCP<const row_matrix_type>& A) :
00119   A_ (A),
00120   Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
00121   Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
00122   RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
00123   LevelOfFill_ (1),
00124   DropTolerance_ (ilutDefaultDropTolerance<scalar_type> ()),
00125   Condest_ (-Teuchos::ScalarTraits<magnitude_type>::one ()),
00126   InitializeTime_ (0.0),
00127   ComputeTime_ (0.0),
00128   ApplyTime_ (0.0),
00129   NumInitialize_ (0),
00130   NumCompute_ (0),
00131   NumApply_ (0),
00132   IsInitialized_ (false),
00133   IsComputed_ (false)
00134 {}
00135 
00136 template <class MatrixType>
00137 ILUT<MatrixType>::~ILUT()
00138 {}
00139 
00140 template <class MatrixType>
00141 void ILUT<MatrixType>::setParameters (const Teuchos::ParameterList& params)
00142 {
00143   using Teuchos::as;
00144   using Teuchos::Exceptions::InvalidParameterName;
00145   using Teuchos::Exceptions::InvalidParameterType;
00146 
00147   // Default values of the various parameters.
00148   int fillLevel = 1;
00149   magnitude_type absThresh = STM::zero ();
00150   magnitude_type relThresh = STM::one ();
00151   magnitude_type relaxValue = STM::zero ();
00152   magnitude_type dropTol = ilutDefaultDropTolerance<scalar_type> ();
00153 
00154   bool gotFillLevel = false;
00155   try {
00156     // Try getting the fill level as an int.
00157     fillLevel = params.get<int> ("fact: ilut level-of-fill");
00158     gotFillLevel = true;
00159   }
00160   catch (InvalidParameterName&) {
00161     // We didn't really get it, but this tells us to stop looking.
00162     gotFillLevel = true;
00163   }
00164   catch (InvalidParameterType&) {
00165     // The name is right, but the type is wrong; try different types.
00166     // We don't have to check InvalidParameterName again, since we
00167     // checked it above, and it has nothing to do with the type.
00168   }
00169 
00170   if (! gotFillLevel) {
00171     // Try magnitude_type, for backwards compatibility.
00172     try {
00173       fillLevel = as<int> (params.get<magnitude_type> ("fact: ilut level-of-fill"));
00174     }
00175     catch (InvalidParameterType&) {}
00176   }
00177   if (! gotFillLevel) {
00178     // Try double, for backwards compatibility.
00179     try {
00180       fillLevel = as<int> (params.get<double> ("fact: ilut level-of-fill"));
00181     }
00182     catch (InvalidParameterType&) {}
00183   }
00184   // If none of the above attempts succeed, accept the default value.
00185 
00186   TEUCHOS_TEST_FOR_EXCEPTION(
00187     fillLevel <= 0, std::runtime_error,
00188     "Ifpack2::ILUT: The \"fact: ilut level-of-fill\" parameter must be "
00189     "strictly greater than zero, but you specified a value of " << fillLevel
00190     << ".  Remember that for ILUT, the fill level p means something different "
00191     "than it does for ILU(k).  ILU(0) produces factors with the same sparsity "
00192     "structure as the input matrix A; ILUT with p = 0 always produces a "
00193     "diagonal matrix, and is thus probably not what you want.");
00194 
00195   try {
00196     absThresh = params.get<magnitude_type> ("fact: absolute threshold");
00197   }
00198   catch (InvalidParameterType&) {
00199     // Try double, for backwards compatibility.
00200     // The cast from double to magnitude_type must succeed.
00201     absThresh = as<magnitude_type> (params.get<double> ("fact: absolute threshold"));
00202   }
00203   catch (InvalidParameterName&) {
00204     // Accept the default value.
00205   }
00206 
00207   try {
00208     relThresh = params.get<magnitude_type> ("fact: relative threshold");
00209   }
00210   catch (InvalidParameterType&) {
00211     // Try double, for backwards compatibility.
00212     // The cast from double to magnitude_type must succeed.
00213     relThresh = as<magnitude_type> (params.get<double> ("fact: relative threshold"));
00214   }
00215   catch (InvalidParameterName&) {
00216     // Accept the default value.
00217   }
00218 
00219   try {
00220     relaxValue = params.get<magnitude_type> ("fact: relax value");
00221   }
00222   catch (InvalidParameterType&) {
00223     // Try double, for backwards compatibility.
00224     // The cast from double to magnitude_type must succeed.
00225     relaxValue = as<magnitude_type> (params.get<double> ("fact: relax value"));
00226   }
00227   catch (InvalidParameterName&) {
00228     // Accept the default value.
00229   }
00230 
00231   try {
00232     dropTol = params.get<magnitude_type> ("fact: drop tolerance");
00233   }
00234   catch (InvalidParameterType&) {
00235     // Try double, for backwards compatibility.
00236     // The cast from double to magnitude_type must succeed.
00237     dropTol = as<magnitude_type> (params.get<double> ("fact: drop tolerance"));
00238   }
00239   catch (InvalidParameterName&) {
00240     // Accept the default value.
00241   }
00242 
00243   // "Commit" the values only after validating all of them.  This
00244   // ensures that there are no side effects if this routine throws an
00245   // exception.
00246 
00247   // mfh 28 Nov 2012: The previous code would not assign Athresh_,
00248   // Rthresh_, RelaxValue_, or DropTolerance_ if the read-in value was
00249   // -1.  I don't know if keeping this behavior is correct, but I'll
00250   // keep it just so as not to change previous behavior.
00251 
00252   LevelOfFill_ = fillLevel;
00253   if (absThresh != -STM::one ()) {
00254     Athresh_ = absThresh;
00255   }
00256   if (relThresh != -STM::one ()) {
00257     Rthresh_ = relThresh;
00258   }
00259   if (relaxValue != -STM::one ()) {
00260     RelaxValue_ = relaxValue;
00261   }
00262   if (dropTol != -STM::one ()) {
00263     DropTolerance_ = dropTol;
00264   }
00265 }
00266 
00267 
00268 template <class MatrixType>
00269 Teuchos::RCP<const Teuchos::Comm<int> >
00270 ILUT<MatrixType>::getComm () const {
00271   TEUCHOS_TEST_FOR_EXCEPTION(
00272     A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getComm: "
00273     "The matrix is null.  Please call setMatrix() with a nonnull input "
00274     "before calling this method.");
00275   return A_->getComm ();
00276 }
00277 
00278 
00279 template <class MatrixType>
00280 Teuchos::RCP<const typename ILUT<MatrixType>::row_matrix_type>
00281 ILUT<MatrixType>::getMatrix () const {
00282   return A_;
00283 }
00284 
00285 
00286 template <class MatrixType>
00287 Teuchos::RCP<const typename ILUT<MatrixType>::map_type>
00288 ILUT<MatrixType>::getDomainMap () const
00289 {
00290   TEUCHOS_TEST_FOR_EXCEPTION(
00291     A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getDomainMap: "
00292     "The matrix is null.  Please call setMatrix() with a nonnull input "
00293     "before calling this method.");
00294   return A_->getDomainMap ();
00295 }
00296 
00297 
00298 template <class MatrixType>
00299 Teuchos::RCP<const typename ILUT<MatrixType>::map_type>
00300 ILUT<MatrixType>::getRangeMap () const
00301 {
00302   TEUCHOS_TEST_FOR_EXCEPTION(
00303     A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getRangeMap: "
00304     "The matrix is null.  Please call setMatrix() with a nonnull input "
00305     "before calling this method.");
00306   return A_->getRangeMap ();
00307 }
00308 
00309 
00310 template <class MatrixType>
00311 bool ILUT<MatrixType>::hasTransposeApply () const {
00312   return true;
00313 }
00314 
00315 
00316 template <class MatrixType>
00317 int ILUT<MatrixType>::getNumInitialize () const {
00318   return NumInitialize_;
00319 }
00320 
00321 
00322 template <class MatrixType>
00323 int ILUT<MatrixType>::getNumCompute () const {
00324   return NumCompute_;
00325 }
00326 
00327 
00328 template <class MatrixType>
00329 int ILUT<MatrixType>::getNumApply () const {
00330   return NumApply_;
00331 }
00332 
00333 
00334 template <class MatrixType>
00335 double ILUT<MatrixType>::getInitializeTime () const {
00336   return InitializeTime_;
00337 }
00338 
00339 
00340 template<class MatrixType>
00341 double ILUT<MatrixType>::getComputeTime () const {
00342   return ComputeTime_;
00343 }
00344 
00345 
00346 template<class MatrixType>
00347 double ILUT<MatrixType>::getApplyTime () const {
00348   return ApplyTime_;
00349 }
00350 
00351 
00352 template<class MatrixType>
00353 global_size_t ILUT<MatrixType>::getGlobalNumEntries () const {
00354   return L_->getGlobalNumEntries () + U_->getGlobalNumEntries ();
00355 }
00356 
00357 
00358 template<class MatrixType>
00359 size_t ILUT<MatrixType>::getNodeNumEntries () const {
00360   return L_->getNodeNumEntries () + U_->getNodeNumEntries ();
00361 }
00362 
00363 
00364 template<class MatrixType>
00365 typename ILUT<MatrixType>::magnitude_type
00366 ILUT<MatrixType>::
00367 computeCondEst (CondestType CT,
00368                 local_ordinal_type MaxIters,
00369                 magnitude_type Tol,
00370                 const Teuchos::Ptr<const row_matrix_type>& matrix)
00371 {
00372   if (! isComputed ()) {
00373     return -STM::one ();
00374   }
00375   // NOTE: this is computing the *local* condest
00376   if (Condest_ == -STM::one ()) {
00377     Condest_ = Ifpack2::Condest(*this, CT, MaxIters, Tol, matrix);
00378   }
00379   return Condest_;
00380 }
00381 
00382 
00383 template<class MatrixType>
00384 void ILUT<MatrixType>::setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
00385 {
00386   if (A.getRawPtr () != A_.getRawPtr ()) {
00387     // Check in serial or one-process mode if the matrix is square.
00388     TEUCHOS_TEST_FOR_EXCEPTION(
00389       ! A.is_null () && A->getComm ()->getSize () == 1 &&
00390       A->getNodeNumRows () != A->getNodeNumCols (),
00391       std::runtime_error, "Ifpack2::ILUT::setMatrix: If A's communicator only "
00392       "contains one process, then A must be square.  Instead, you provided a "
00393       "matrix A with " << A->getNodeNumRows () << " rows and "
00394       << A->getNodeNumCols () << " columns.");
00395 
00396     // It's legal for A to be null; in that case, you may not call
00397     // initialize() until calling setMatrix() with a nonnull input.
00398     // Regardless, setting the matrix invalidates any previous
00399     // factorization.
00400     IsInitialized_ = false;
00401     IsComputed_ = false;
00402     A_local_ = Teuchos::null;
00403     L_ = Teuchos::null;
00404     U_ = Teuchos::null;
00405     Condest_ = -Teuchos::ScalarTraits<magnitude_type>::one();
00406     A_ = A;
00407   }
00408 }
00409 
00410 
00411 template<class MatrixType>
00412 void ILUT<MatrixType>::initialize ()
00413 {
00414   Teuchos::Time timer ("ILUT::initialize");
00415   {
00416     Teuchos::TimeMonitor timeMon (timer);
00417 
00418     // Check that the matrix is nonnull.
00419     TEUCHOS_TEST_FOR_EXCEPTION(
00420       A_.is_null (), std::runtime_error, "Ifpack2::ILUT::initialize: "
00421       "The matrix to precondition is null.  Please call setMatrix() with a "
00422       "nonnull input before calling this method.");
00423 
00424     // Clear any previous computations.
00425     IsInitialized_ = false;
00426     IsComputed_ = false;
00427     A_local_ = Teuchos::null;
00428     L_ = Teuchos::null;
00429     U_ = Teuchos::null;
00430 
00431     A_local_ = makeLocalFilter (A_); // Compute the local filter.
00432 
00433     IsInitialized_ = true;
00434     ++NumInitialize_;
00435   }
00436   InitializeTime_ += timer.totalElapsedTime ();
00437 }
00438 
00439 
00440 template<typename ScalarType>
00441 typename Teuchos::ScalarTraits<ScalarType>::magnitudeType
00442 scalar_mag (const ScalarType& s)
00443 {
00444   return Teuchos::ScalarTraits<ScalarType>::magnitude(s);
00445 }
00446 
00447 
00448 template<class MatrixType>
00449 void ILUT<MatrixType>::compute ()
00450 {
00451   using Teuchos::Array;
00452   using Teuchos::ArrayRCP;
00453   using Teuchos::ArrayView;
00454   using Teuchos::as;
00455   using Teuchos::rcp;
00456   using Teuchos::reduceAll;
00457 
00458   //--------------------------------------------------------------------------
00459   // Ifpack2::ILUT is a translation of the Aztec ILUT implementation. The Aztec
00460   // ILUT implementation was written by Ray Tuminaro.
00461   //
00462   // This isn't an exact translation of the Aztec ILUT algorithm, for the
00463   // following reasons:
00464   // 1. Minor differences result from the fact that Aztec factors a MSR format
00465   // matrix in place, while the code below factors an input CrsMatrix which
00466   // remains untouched and stores the resulting factors in separate L and U
00467   // CrsMatrix objects.
00468   // Also, the Aztec code begins by shifting the matrix pointers back
00469   // by one, and the pointer contents back by one, and then using 1-based
00470   // Fortran-style indexing in the algorithm. This Ifpack2 code uses C-style
00471   // 0-based indexing throughout.
00472   // 2. Aztec stores the inverse of the diagonal of U. This Ifpack2 code
00473   // stores the non-inverted diagonal in U.
00474   // The triangular solves (in Ifpack2::ILUT::apply()) are performed by
00475   // calling the Tpetra::CrsMatrix::solve method on the L and U objects, and
00476   // this requires U to contain the non-inverted diagonal.
00477   //
00478   // ABW.
00479   //--------------------------------------------------------------------------
00480 
00481   // Don't count initialization in the compute() time.
00482   if (! isInitialized ()) {
00483     initialize ();
00484   }
00485 
00486   Teuchos::Time timer ("ILUT::compute");
00487   { // Timer scope for timing compute()
00488     Teuchos::TimeMonitor timeMon (timer, true);
00489     const scalar_type zero = STS::zero ();
00490     const scalar_type one  = STS::one ();
00491 
00492     const local_ordinal_type myNumRows = A_local_->getNodeNumRows ();
00493     L_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0));
00494     U_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0));
00495 
00496     // CGB: note, this caching approach may not be necessary anymore
00497     // We will store ArrayView objects that are views of the rows of U, so that
00498     // we don't have to repeatedly retrieve the view for each row. These will
00499     // be populated row by row as the factorization proceeds.
00500     Array<ArrayView<const local_ordinal_type> > Uindices (myNumRows);
00501     Array<ArrayView<const scalar_type> >       Ucoefs (myNumRows);
00502 
00503     // If this macro is defined, files containing the L and U factors
00504     // will be written. DON'T CHECK IN THE CODE WITH THIS MACRO ENABLED!!!
00505     // #define IFPACK2_WRITE_FACTORS
00506 #ifdef IFPACK2_WRITE_FACTORS
00507     std::ofstream ofsL("L.tif.mtx", std::ios::out);
00508     std::ofstream ofsU("U.tif.mtx", std::ios::out);
00509 #endif
00510 
00511     // The code here uses double for fill calculations, even though
00512     // the fill level is actually an integer.  The point is to avoid
00513     // rounding and overflow for integer calculations.  If int is <=
00514     // 32 bits, it can never overflow double, so this cast is always
00515     // OK as long as int has <= 32 bits.
00516 
00517     // Calculate how much fill will be allowed in addition to the
00518     // space that corresponds to the input matrix entries.
00519     double local_nnz = static_cast<double> (A_local_->getNodeNumEntries ());
00520     double fill;
00521     {
00522       const double fillLevel = as<double> (getLevelOfFill ());
00523       fill = ((fillLevel - 1) * local_nnz) / (2 * myNumRows);
00524     }
00525 
00526     // std::ceil gives the smallest integer larger than the argument.
00527     // this may give a slightly different result than Aztec's fill value in
00528     // some cases.
00529     double fill_ceil=std::ceil(fill);
00530 
00531     // Similarly to Aztec, we will allow the same amount of fill for each
00532     // row, half in L and half in U.
00533     size_type fillL = static_cast<size_type>(fill_ceil);
00534     size_type fillU = static_cast<size_type>(fill_ceil);
00535 
00536     Array<scalar_type> InvDiagU (myNumRows, zero);
00537 
00538     Array<local_ordinal_type> tmp_idx;
00539     Array<scalar_type> tmpv;
00540 
00541     enum { UNUSED, ORIG, FILL };
00542     local_ordinal_type max_col = myNumRows;
00543 
00544     Array<int> pattern(max_col, UNUSED);
00545     Array<scalar_type> cur_row(max_col, zero);
00546     Array<magnitude_type> unorm(max_col);
00547     magnitude_type rownorm;
00548     Array<local_ordinal_type> L_cols_heap;
00549     Array<local_ordinal_type> U_cols;
00550     Array<local_ordinal_type> L_vals_heap;
00551     Array<local_ordinal_type> U_vals_heap;
00552 
00553     // A comparison object which will be used to create 'heaps' of indices
00554     // that are ordered according to the corresponding values in the
00555     // 'cur_row' array.
00556     greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row);
00557 
00558     // =================== //
00559     // start factorization //
00560     // =================== //
00561 
00562     ArrayRCP<local_ordinal_type> ColIndicesARCP;
00563     ArrayRCP<scalar_type>       ColValuesARCP;
00564     if (! A_local_->supportsRowViews ()) {
00565       const size_t maxnz = A_local_->getNodeMaxNumRowEntries ();
00566       ColIndicesARCP.resize (maxnz);
00567       ColValuesARCP.resize (maxnz);
00568     }
00569 
00570     for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
00571       ArrayView<const local_ordinal_type> ColIndicesA;
00572       ArrayView<const scalar_type> ColValuesA;
00573       size_t RowNnz;
00574 
00575       if (A_local_->supportsRowViews ()) {
00576         A_local_->getLocalRowView (row_i, ColIndicesA, ColValuesA);
00577         RowNnz = ColIndicesA.size ();
00578       }
00579       else {
00580         A_local_->getLocalRowCopy (row_i, ColIndicesARCP (), ColValuesARCP (), RowNnz);
00581         ColIndicesA = ColIndicesARCP (0, RowNnz);
00582         ColValuesA = ColValuesARCP (0, RowNnz);
00583       }
00584 
00585       // Always include the diagonal in the U factor. The value should get
00586       // set in the next loop below.
00587       U_cols.push_back(row_i);
00588       cur_row[row_i] = zero;
00589       pattern[row_i] = ORIG;
00590 
00591       size_type L_cols_heaplen = 0;
00592       rownorm = STM::zero ();
00593       for (size_t i = 0; i < RowNnz; ++i) {
00594         if (ColIndicesA[i] < myNumRows) {
00595           if (ColIndicesA[i] < row_i) {
00596             add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
00597           }
00598           else if (ColIndicesA[i] > row_i) {
00599             U_cols.push_back(ColIndicesA[i]);
00600           }
00601 
00602           cur_row[ColIndicesA[i]] = ColValuesA[i];
00603           pattern[ColIndicesA[i]] = ORIG;
00604           rownorm += scalar_mag(ColValuesA[i]);
00605         }
00606       }
00607 
00608       // Alter the diagonal according to the absolute-threshold and
00609       // relative-threshold values. If not set, those values default
00610       // to zero and one respectively.
00611       const magnitude_type rthresh = getRelativeThreshold();
00612       const scalar_type& v = cur_row[row_i];
00613       cur_row[row_i] = as<scalar_type> (getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh*v;
00614 
00615       size_type orig_U_len = U_cols.size();
00616       RowNnz = L_cols_heap.size() + orig_U_len;
00617       rownorm = getDropTolerance() * rownorm/RowNnz;
00618 
00619       // The following while loop corresponds to the 'L30' goto's in Aztec.
00620       size_type L_vals_heaplen = 0;
00621       while (L_cols_heaplen > 0) {
00622         local_ordinal_type row_k = L_cols_heap.front();
00623 
00624         scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
00625         cur_row[row_k] = multiplier;
00626         magnitude_type mag_mult = scalar_mag(multiplier);
00627         if (mag_mult*unorm[row_k] < rownorm) {
00628           pattern[row_k] = UNUSED;
00629           rm_heap_root(L_cols_heap, L_cols_heaplen);
00630           continue;
00631         }
00632         if (pattern[row_k] != ORIG) {
00633           if (L_vals_heaplen < fillL) {
00634             add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
00635           }
00636           else if (L_vals_heaplen==0 ||
00637                    mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
00638             pattern[row_k] = UNUSED;
00639             rm_heap_root(L_cols_heap, L_cols_heaplen);
00640             continue;
00641           }
00642           else {
00643             pattern[L_vals_heap.front()] = UNUSED;
00644             rm_heap_root(L_vals_heap, L_vals_heaplen, vals_comp);
00645             add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
00646           }
00647         }
00648 
00649         /* Reduce current row */
00650 
00651         ArrayView<const local_ordinal_type>& ColIndicesU = Uindices[row_k];
00652         ArrayView<const scalar_type>& ColValuesU = Ucoefs[row_k];
00653         size_type ColNnzU = ColIndicesU.size();
00654 
00655         for(size_type j=0; j<ColNnzU; ++j) {
00656           if (ColIndicesU[j] > row_k) {
00657             scalar_type tmp = multiplier * ColValuesU[j];
00658             local_ordinal_type col_j = ColIndicesU[j];
00659             if (pattern[col_j] != UNUSED) {
00660               cur_row[col_j] -= tmp;
00661             }
00662             else if (scalar_mag(tmp) > rownorm) {
00663               cur_row[col_j] = -tmp;
00664               pattern[col_j] = FILL;
00665               if (col_j > row_i) {
00666                 U_cols.push_back(col_j);
00667               }
00668               else {
00669                 add_to_heap(col_j, L_cols_heap, L_cols_heaplen);
00670               }
00671             }
00672           }
00673         }
00674 
00675         rm_heap_root(L_cols_heap, L_cols_heaplen);
00676       }//end of while(L_cols_heaplen) loop
00677 
00678 
00679       // Put indices and values for L into arrays and then into the L_ matrix.
00680 
00681       //   first, the original entries from the L section of A:
00682       for (size_type i = 0; i < ColIndicesA.size (); ++i) {
00683         if (ColIndicesA[i] < row_i) {
00684           tmp_idx.push_back(ColIndicesA[i]);
00685           tmpv.push_back(cur_row[ColIndicesA[i]]);
00686           pattern[ColIndicesA[i]] = UNUSED;
00687         }
00688       }
00689 
00690       //   next, the L entries resulting from fill:
00691       for (size_type j = 0; j < L_vals_heaplen; ++j) {
00692         tmp_idx.push_back(L_vals_heap[j]);
00693         tmpv.push_back(cur_row[L_vals_heap[j]]);
00694         pattern[L_vals_heap[j]] = UNUSED;
00695       }
00696 
00697       // L has a one on the diagonal, but we don't explicitly store it.
00698       // If we don't store it, then the Tpetra/Kokkos kernel which performs
00699       // the triangular solve can assume a unit diagonal, take a short-cut
00700       // and perform faster.
00701 
00702       L_->insertLocalValues (row_i, tmp_idx (), tmpv ());
00703 #ifdef IFPACK2_WRITE_FACTORS
00704       for (size_type ii = 0; ii < tmp_idx.size (); ++ii) {
00705         ofsL << row_i << " " << tmp_idx[ii] << " " << tmpv[ii] << std::endl;
00706       }
00707 #endif
00708 
00709       tmp_idx.clear();
00710       tmpv.clear();
00711 
00712       // Pick out the diagonal element, store its reciprocal.
00713       if (cur_row[row_i] == zero) {
00714         std::cerr << "Ifpack2::ILUT::Compute: zero pivot encountered! Replacing with rownorm and continuing...(You may need to set the parameter 'fact: absolute threshold'.)" << std::endl;
00715         cur_row[row_i] = rownorm;
00716       }
00717       InvDiagU[row_i] = one / cur_row[row_i];
00718 
00719       // Non-inverted diagonal is stored for U:
00720       tmp_idx.push_back(row_i);
00721       tmpv.push_back(cur_row[row_i]);
00722       unorm[row_i] = scalar_mag(cur_row[row_i]);
00723       pattern[row_i] = UNUSED;
00724 
00725       // Now put indices and values for U into arrays and then into the U_ matrix.
00726       // The first entry in U_cols is the diagonal, which we just handled, so we'll
00727       // start our loop at j=1.
00728 
00729       size_type U_vals_heaplen = 0;
00730       for(size_type j=1; j<U_cols.size(); ++j) {
00731         local_ordinal_type col = U_cols[j];
00732         if (pattern[col] != ORIG) {
00733           if (U_vals_heaplen < fillU) {
00734             add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
00735           }
00736           else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) >
00737                    scalar_mag(cur_row[U_vals_heap.front()])) {
00738             rm_heap_root(U_vals_heap, U_vals_heaplen, vals_comp);
00739             add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
00740           }
00741         }
00742         else {
00743           tmp_idx.push_back(col);
00744           tmpv.push_back(cur_row[col]);
00745           unorm[row_i] += scalar_mag(cur_row[col]);
00746         }
00747         pattern[col] = UNUSED;
00748       }
00749 
00750       for(size_type j=0; j<U_vals_heaplen; ++j) {
00751         tmp_idx.push_back(U_vals_heap[j]);
00752         tmpv.push_back(cur_row[U_vals_heap[j]]);
00753         unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
00754       }
00755 
00756       unorm[row_i] /= (orig_U_len + U_vals_heaplen);
00757 
00758       U_->insertLocalValues(row_i, tmp_idx(), tmpv() );
00759 #ifdef IFPACK2_WRITE_FACTORS
00760       for(int ii=0; ii<tmp_idx.size(); ++ii) {
00761         ofsU <<row_i<< " " <<tmp_idx[ii]<< " " <<tmpv[ii]<< std::endl;
00762       }
00763 #endif
00764       tmp_idx.clear();
00765       tmpv.clear();
00766 
00767       U_->getLocalRowView(row_i, Uindices[row_i], Ucoefs[row_i] );
00768 
00769       L_cols_heap.clear();
00770       U_cols.clear();
00771       L_vals_heap.clear();
00772       U_vals_heap.clear();
00773     } // end of for(row_i) loop
00774 
00775     // FIXME (mfh 03 Apr 2013) Do we need to supply a domain and range Map?
00776     L_->fillComplete();
00777     U_->fillComplete();
00778   }
00779   ComputeTime_ += timer.totalElapsedTime ();
00780   IsComputed_ = true;
00781   ++NumCompute_;
00782 }
00783 
00784 
00785 template <class MatrixType>
00786 void ILUT<MatrixType>::
00787 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
00788        Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
00789        Teuchos::ETransp mode,
00790        scalar_type alpha,
00791        scalar_type beta) const
00792 {
00793   using Teuchos::RCP;
00794   using Teuchos::rcp;
00795   using Teuchos::rcpFromRef;
00796   typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
00797 
00798   Teuchos::Time timer ("ILUT::apply");
00799   { // Timer scope for timing apply()
00800     Teuchos::TimeMonitor timeMon (timer, true);
00801 
00802     TEUCHOS_TEST_FOR_EXCEPTION(
00803       ! isComputed (), std::runtime_error,
00804       "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete "
00805       "factorization, before calling apply().");
00806 
00807     TEUCHOS_TEST_FOR_EXCEPTION(
00808       X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
00809       "Ifpack2::ILUT::apply: X and Y must have the same number of columns.  "
00810       "X has " << X.getNumVectors () << " columns, but Y has "
00811       << Y.getNumVectors () << " columns.");
00812 
00813     if (alpha == Teuchos::ScalarTraits<scalar_type>::zero ()) {
00814       // alpha == 0, so we don't need to apply the operator.
00815       //
00816       // The special case for beta == 0 ensures that if Y contains Inf
00817       // or NaN values, we replace them with 0 (following BLAS
00818       // convention), rather than multiplying them by 0 to get NaN.
00819       if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) {
00820         Y.putScalar (beta);
00821       } else {
00822         Y.scale (beta);
00823       }
00824       return;
00825     }
00826 
00827     // If beta != 0, create a temporary multivector Y_temp to hold the
00828     // contents of alpha*M^{-1}*X.  Otherwise, alias Y_temp to Y.
00829     RCP<MV> Y_temp;
00830     if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) {
00831       Y_temp = rcpFromRef (Y);
00832     } else {
00833       Y_temp = rcp (new MV (Y.getMap (), Y.getNumVectors ()));
00834     }
00835 
00836     // If X and Y are pointing to the same memory location, create an
00837     // auxiliary vector, X_temp, so that we don't clobber the input
00838     // when computing the output.  Otherwise, alias X_temp to X.
00839     RCP<const MV> X_temp;
00840     if (X.getLocalMV ().getValues () == Y.getLocalMV ().getValues ()) {
00841       X_temp = rcp (new MV (createCopy(X)));
00842     } else {
00843       X_temp = rcpFromRef (X);
00844     }
00845 
00846     // Create a temporary multivector Y_mid to hold the intermediate
00847     // between the L and U (or U and L, for the transpose or conjugate
00848     // transpose case) solves.
00849     RCP<MV> Y_mid = rcp (new MV (Y.getMap (), Y.getNumVectors ()));
00850 
00851     if (mode == Teuchos::NO_TRANS) { // Solve L U Y = X
00852       L_->template localSolve<scalar_type, scalar_type> (*X_temp, *Y_mid, mode);
00853       // FIXME (mfh 20 Aug 2013) Is it OK to use Y_temp for both the
00854       // input and the output?
00855       U_->template localSolve<scalar_type, scalar_type> (*Y_mid, *Y_temp, mode);
00856     }
00857     else { // Solve U^* L^* Y = X
00858       U_->template localSolve<scalar_type, scalar_type> (*X_temp, *Y_mid, mode);
00859       // FIXME (mfh 20 Aug 2013) Is it OK to use Y_temp for both the
00860       // input and the output?
00861       L_->template localSolve<scalar_type, scalar_type> (*Y_mid, *Y_temp, mode);
00862     }
00863 
00864     if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) {
00865       Y.scale (alpha);
00866     } else { // beta != 0
00867       Y.update (alpha, *Y_temp, beta);
00868     }
00869   }
00870   ++NumApply_;
00871   ApplyTime_ += timer.totalElapsedTime ();
00872 }
00873 
00874 
00875 template <class MatrixType>
00876 std::string ILUT<MatrixType>::description () const
00877 {
00878   std::ostringstream os;
00879 
00880   // Output is a valid YAML dictionary in flow style.  If you don't
00881   // like everything on a single line, you should call describe()
00882   // instead.
00883   os << "\"Ifpack2::ILUT\": {";
00884   os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
00885      << "Computed: " << (isComputed () ? "true" : "false") << ", ";
00886 
00887   os << "Level-of-fill: " << getLevelOfFill() << ", "
00888      << "absolute threshold: " << getAbsoluteThreshold() << ", "
00889      << "relative threshold: " << getRelativeThreshold() << ", "
00890      << "relaxation value: " << getRelaxValue() << ", ";
00891 
00892   if (A_.is_null ()) {
00893     os << "Matrix: null";
00894   }
00895   else {
00896     os << "Global matrix dimensions: ["
00897        << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
00898        << ", Global nnz: " << A_->getGlobalNumEntries();
00899   }
00900 
00901   os << "}";
00902   return os.str ();
00903 }
00904 
00905 
00906 template <class MatrixType>
00907 void
00908 ILUT<MatrixType>::
00909 describe (Teuchos::FancyOStream& out,
00910           const Teuchos::EVerbosityLevel verbLevel) const
00911 {
00912   using Teuchos::Comm;
00913   using Teuchos::OSTab;
00914   using Teuchos::RCP;
00915   using Teuchos::TypeNameTraits;
00916   using std::endl;
00917   using Teuchos::VERB_DEFAULT;
00918   using Teuchos::VERB_NONE;
00919   using Teuchos::VERB_LOW;
00920   using Teuchos::VERB_MEDIUM;
00921   using Teuchos::VERB_HIGH;
00922   using Teuchos::VERB_EXTREME;
00923 
00924   const Teuchos::EVerbosityLevel vl =
00925     (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
00926   OSTab tab0 (out);
00927 
00928   if (vl > VERB_NONE) {
00929     out << "\"Ifpack2::ILUT\":" << endl;
00930     OSTab tab1 (out);
00931     out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
00932     if (this->getObjectLabel () != "") {
00933       out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
00934     }
00935     out << "Initialized: " << (isInitialized () ? "true" : "false")
00936         << endl
00937         << "Computed: " << (isComputed () ? "true" : "false")
00938         << endl
00939         << "Level of fill: " << getLevelOfFill () << endl
00940         << "Absolute threshold: " << getAbsoluteThreshold () << endl
00941         << "Relative threshold: " << getRelativeThreshold () << endl
00942         << "Relax value: " << getRelaxValue () << endl;
00943 
00944     if (isComputed () && vl >= VERB_HIGH) {
00945       const double fillFraction =
00946         (double) getGlobalNumEntries () / (double) A_->getGlobalNumEntries ();
00947       const double nnzToRows =
00948         (double) getGlobalNumEntries () / (double) U_->getGlobalNumRows ();
00949 
00950       out << "Dimensions of L: [" << L_->getGlobalNumRows () << ", "
00951           << L_->getGlobalNumRows () << "]" << endl
00952           << "Dimensions of U: [" << U_->getGlobalNumRows () << ", "
00953           << U_->getGlobalNumRows () << "]" << endl
00954           << "Number of nonzeros in factors: " << getGlobalNumEntries () << endl
00955           << "Fill fraction of factors over A: " << fillFraction << endl
00956           << "Ratio of nonzeros to rows: " << nnzToRows << endl;
00957     }
00958 
00959     out << "Number of initialize calls: " << getNumInitialize () << endl
00960         << "Number of compute calls: " << getNumCompute () << endl
00961         << "Number of apply calls: " << getNumApply () << endl
00962         << "Total time in seconds for initialize: " << getInitializeTime () << endl
00963         << "Total time in seconds for compute: " << getComputeTime () << endl
00964         << "Total time in seconds for apply: " << getApplyTime () << endl;
00965 
00966     out << "Local matrix:" << endl;
00967     A_local_->describe (out, vl);
00968   }
00969 }
00970 
00971 template <class MatrixType>
00972 Teuchos::RCP<const typename ILUT<MatrixType>::row_matrix_type>
00973 ILUT<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A)
00974 {
00975   if (A->getComm ()->getSize () > 1) {
00976     return Teuchos::rcp (new LocalFilter<MatrixType> (A));
00977   } else {
00978     return A;
00979   }
00980 }
00981 
00982 }//namespace Ifpack2
00983 
00984 #define IFPACK2_ILUT_INSTANT(S,LO,GO,N)                            \
00985   template class Ifpack2::ILUT< Tpetra::CrsMatrix<S, LO, GO, N> >;
00986 
00987 #endif /* IFPACK2_ILUT_DEF_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends