Ifpack2 Templated Preconditioning Package Version 1.0
Ifpack2_ILUT_def.hpp
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 //-----------------------------------------------------
00031 // Ifpack2::ILUT is a translation of the Aztec ILUT
00032 // implementation. The Aztec ILUT implementation was
00033 // written by Ray Tuminaro.
00034 // See notes below, in the Ifpack2::ILUT::Compute method.
00035 // ABW.
00036 //------------------------------------------------------
00037 
00038 #ifndef IFPACK2_ILUT_DEF_HPP
00039 #define IFPACK2_ILUT_DEF_HPP
00040 
00041 namespace Ifpack2 {
00042 
00043   namespace {
00044 
00069     template<class ScalarType>
00070     typename Teuchos::ScalarTraits<ScalarType>::magnitudeType
00071     ilutDefaultDropTolerance () {
00072       using Teuchos::as;
00073       typedef Teuchos::ScalarTraits<ScalarType> STS;
00074       typedef typename STS::magnitudeType magnitude_type;
00075       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00076 
00077       // 1/2.  Hopefully this can be represented in magnitude_type.
00078       const magnitude_type oneHalf = STM::one() / (STM::one() + STM::one());
00079 
00080       // The min ensures that in case magnitude_type has very low
00081       // precision, we'll at least get some value strictly less than
00082       // one.
00083       return std::min (as<magnitude_type> (1000) * STS::magnitude (STS::eps ()), oneHalf);
00084     }
00085 
00086     // Full specialization for ScalarType = double.
00087     // This specialization preserves ILUT's previous default behavior.
00088     template<>
00089     Teuchos::ScalarTraits<double>::magnitudeType
00090     ilutDefaultDropTolerance<double> () {
00091       return 1e-12;
00092     }
00093 
00094   } // namespace (anonymous)
00095 
00096 //Definitions for the ILUT methods:
00097 
00098 //==============================================================================
00099 template <class MatrixType>
00100 ILUT<MatrixType>::ILUT(const Teuchos::RCP<const Tpetra::RowMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> >& A) :
00101   A_(A),
00102   Comm_(A->getRowMap()->getComm()),
00103   Athresh_(0.0),
00104   Rthresh_(1.0),
00105   RelaxValue_(0.0),
00106   LevelOfFill_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
00107   DropTolerance_ (ilutDefaultDropTolerance<scalar_type> ()),
00108   Condest_(-1.0),
00109   IsInitialized_(false),
00110   IsComputed_(false),
00111   NumInitialize_(0),
00112   NumCompute_(0),
00113   NumApply_(0),
00114   InitializeTime_(0.0),
00115   ComputeTime_(0.0),
00116   ApplyTime_(0.0),
00117   Time_("Ifpack2::ILUT"),
00118   NumMyRows_(-1),
00119   NumGlobalNonzeros_(0)
00120 {
00121   TEUCHOS_TEST_FOR_EXCEPTION(A_ == Teuchos::null, std::runtime_error,
00122       Teuchos::typeName(*this) << "::ILUT(): input matrix reference was null.");
00123 }
00124 
00125 //==========================================================================
00126 template <class MatrixType>
00127 ILUT<MatrixType>::~ILUT() {
00128 }
00129 
00130 //==========================================================================
00131 template <class MatrixType>
00132 void ILUT<MatrixType>::setParameters(const Teuchos::ParameterList& params) {
00133   using Teuchos::as;
00134   using Teuchos::Exceptions::InvalidParameterName;
00135   using Teuchos::Exceptions::InvalidParameterType;
00136   typedef Teuchos::ScalarTraits<magnitude_type> STM;
00137 
00138   // Default values of the various parameters.
00139   magnitude_type fillLevel = STM::one ();
00140   magnitude_type absThresh = STM::zero ();
00141   magnitude_type relThresh = STM::one ();
00142   magnitude_type relaxValue = STM::zero ();
00143   magnitude_type dropTol = ilutDefaultDropTolerance<scalar_type> ();
00144 
00145   try {
00146     fillLevel = params.get<magnitude_type> ("fact: ilut level-of-fill");
00147   }
00148   catch (InvalidParameterType&) {
00149     // Try double, for backwards compatibility.
00150     // The cast from double to magnitude_type must succeed.
00151     fillLevel = as<magnitude_type> (params.get<double> ("fact: ilut level-of-fill"));
00152   }
00153   catch (InvalidParameterName&) {
00154     // Accept the default value.
00155   }
00156 
00157   // FIXME (mfh 28 Nov 2012) This doesn't make any sense.  Isn't zero
00158   // fill allowed?  The exception test doesn't match the exception
00159   // message.  However, I'm leaving it alone for now, so as not to
00160   // change current behavior.
00161   TEUCHOS_TEST_FOR_EXCEPTION(fillLevel <= 0.0, std::runtime_error,
00162     "Ifpack2::ILUT::SetParameters ERROR, level-of-fill must be >= 0.");
00163 
00164   try {
00165     absThresh = params.get<magnitude_type> ("fact: absolute threshold");
00166   }
00167   catch (InvalidParameterType&) {
00168     // Try double, for backwards compatibility.
00169     // The cast from double to magnitude_type must succeed.
00170     absThresh = as<magnitude_type> (params.get<double> ("fact: absolute threshold"));
00171   }
00172   catch (InvalidParameterName&) {
00173     // Accept the default value.
00174   }
00175 
00176   try {
00177     relThresh = params.get<magnitude_type> ("fact: relative threshold");
00178   }
00179   catch (InvalidParameterType&) {
00180     // Try double, for backwards compatibility.
00181     // The cast from double to magnitude_type must succeed.
00182     relThresh = as<magnitude_type> (params.get<double> ("fact: relative threshold"));
00183   }
00184   catch (InvalidParameterName&) {
00185     // Accept the default value.
00186   }
00187 
00188   try {
00189     relaxValue = params.get<magnitude_type> ("fact: relax value");
00190   }
00191   catch (InvalidParameterType&) {
00192     // Try double, for backwards compatibility.
00193     // The cast from double to magnitude_type must succeed.
00194     relaxValue = as<magnitude_type> (params.get<double> ("fact: relax value"));
00195   }
00196   catch (InvalidParameterName&) {
00197     // Accept the default value.
00198   }
00199 
00200   try {
00201     dropTol = params.get<magnitude_type> ("fact: drop tolerance");
00202   }
00203   catch (InvalidParameterType&) {
00204     // Try double, for backwards compatibility.
00205     // The cast from double to magnitude_type must succeed.
00206     dropTol = as<magnitude_type> (params.get<double> ("fact: drop tolerance"));
00207   }
00208   catch (InvalidParameterName&) {
00209     // Accept the default value.
00210   }
00211 
00212   // "Commit" the values only after validating all of them.  This
00213   // ensures that there are no side effects if this routine throws an
00214   // exception.
00215 
00216   // mfh 28 Nov 2012: The previous code would not assign Athresh_,
00217   // Rthresh_, RelaxValue_, or DropTolerance_ if the read-in value was
00218   // -1.  I don't know if keeping this behavior is correct, but I'll
00219   // keep it just so as not to change previous behavior.
00220 
00221   LevelOfFill_ = fillLevel;
00222   if (absThresh != -STM::one ()) {
00223     Athresh_ = absThresh;
00224   }
00225   if (relThresh != -STM::one ()) {
00226     Rthresh_ = relThresh;
00227   }
00228   if (relaxValue != -STM::one ()) {
00229     RelaxValue_ = relaxValue;
00230   }
00231   if (dropTol != -STM::one ()) {
00232     DropTolerance_ = dropTol;
00233   }
00234 }
00235 
00236 //==========================================================================
00237 template <class MatrixType>
00238 const Teuchos::RCP<const Teuchos::Comm<int> > &
00239 ILUT<MatrixType>::getComm() const{
00240   return(Comm_);
00241 }
00242 
00243 //==========================================================================
00244 template <class MatrixType>
00245 Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type,typename MatrixType::local_ordinal_type,typename MatrixType::global_ordinal_type,typename MatrixType::node_type> >
00246 ILUT<MatrixType>::getMatrix() const {
00247   return(A_);
00248 }
00249 
00250 //==========================================================================
00251 template <class MatrixType>
00252 const Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,typename MatrixType::global_ordinal_type,typename MatrixType::node_type> >&
00253 ILUT<MatrixType>::getDomainMap() const
00254 {
00255   return A_->getDomainMap();
00256 }
00257 
00258 //==========================================================================
00259 template <class MatrixType>
00260 const Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,typename MatrixType::global_ordinal_type,typename MatrixType::node_type> >&
00261 ILUT<MatrixType>::getRangeMap() const
00262 {
00263   return A_->getRangeMap();
00264 }
00265 
00266 //==============================================================================
00267 template <class MatrixType>
00268 bool ILUT<MatrixType>::hasTransposeApply() const {
00269   return true;
00270 }
00271 
00272 //==========================================================================
00273 template <class MatrixType>
00274 int ILUT<MatrixType>::getNumInitialize() const {
00275   return(NumInitialize_);
00276 }
00277 
00278 //==========================================================================
00279 template <class MatrixType>
00280 int ILUT<MatrixType>::getNumCompute() const {
00281   return(NumCompute_);
00282 }
00283 
00284 //==========================================================================
00285 template <class MatrixType>
00286 int ILUT<MatrixType>::getNumApply() const {
00287   return(NumApply_);
00288 }
00289 
00290 //==========================================================================
00291 template <class MatrixType>
00292 double ILUT<MatrixType>::getInitializeTime() const {
00293   return(InitializeTime_);
00294 }
00295 
00296 //==========================================================================
00297 template<class MatrixType>
00298 double ILUT<MatrixType>::getComputeTime() const {
00299   return(ComputeTime_);
00300 }
00301 
00302 //==========================================================================
00303 template<class MatrixType>
00304 double ILUT<MatrixType>::getApplyTime() const {
00305   return(ApplyTime_);
00306 }
00307 
00308 //==========================================================================
00309 template<class MatrixType>
00310 global_size_t ILUT<MatrixType>::getGlobalNumEntries() const {
00311   return(L_->getGlobalNumEntries() + U_->getGlobalNumEntries());
00312 }
00313 
00314 //==========================================================================
00315 template<class MatrixType>
00316 size_t ILUT<MatrixType>::getNodeNumEntries() const {
00317   return(L_->getNodeNumEntries() + U_->getNodeNumEntries());
00318 }
00319 
00320 //=============================================================================
00321 template<class MatrixType>
00322 typename ILUT<MatrixType>::magnitude_type
00323 ILUT<MatrixType>::
00324 computeCondEst (CondestType CT,
00325                 local_ordinal_type MaxIters,
00326                 magnitude_type Tol,
00327                 const Teuchos::Ptr<const Tpetra::RowMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> > &matrix) {
00328   if (!isComputed()) { // cannot compute right now
00329     return(-1.0);
00330   }
00331   // NOTE: this is computing the *local* condest
00332   if (Condest_ == -1.0) {
00333     Condest_ = Ifpack2::Condest(*this, CT, MaxIters, Tol, matrix);
00334   }
00335   return(Condest_);
00336 }
00337 
00338 //==========================================================================
00339 template<class MatrixType>
00340 void ILUT<MatrixType>::initialize() {
00341   // clear any previous allocation
00342   IsInitialized_ = false;
00343   IsComputed_ = false;
00344   L_ = Teuchos::null;
00345   U_ = Teuchos::null;
00346 
00347   Time_.start(true);
00348 
00349   // check only in serial
00350   TEUCHOS_TEST_FOR_EXCEPTION(Comm_->getSize() == 1 && A_->getNodeNumRows() != A_->getNodeNumCols(), std::runtime_error, "Ifpack2::ILUT::Initialize ERROR, matrix must be square");
00351 
00352   NumMyRows_ = A_->getNodeNumRows();
00353 
00354   // nothing else to do here
00355   IsInitialized_ = true;
00356   ++NumInitialize_;
00357   Time_.stop();
00358   InitializeTime_ += Time_.totalElapsedTime();
00359 }
00360 
00361 template<typename ScalarType>
00362 typename Teuchos::ScalarTraits<ScalarType>::magnitudeType scalar_mag(const ScalarType& s)
00363 {
00364   return Teuchos::ScalarTraits<ScalarType>::magnitude(s);
00365 }
00366 
00367 //==========================================================================
00368 template<class MatrixType>
00369 void ILUT<MatrixType>::compute() {
00370   using Teuchos::as;
00371 
00372   //--------------------------------------------------------------------------
00373   // Ifpack2::ILUT is a translation of the Aztec ILUT implementation. The Aztec
00374   // ILUT implementation was written by Ray Tuminaro.
00375   //
00376   // This isn't an exact translation of the Aztec ILUT algorithm, for the
00377   // following reasons:
00378   // 1. Minor differences result from the fact that Aztec factors a MSR format
00379   // matrix in place, while the code below factors an input CrsMatrix which
00380   // remains untouched and stores the resulting factors in separate L and U
00381   // CrsMatrix objects.
00382   // Also, the Aztec code begins by shifting the matrix pointers back
00383   // by one, and the pointer contents back by one, and then using 1-based
00384   // Fortran-style indexing in the algorithm. This Ifpack2 code uses C-style
00385   // 0-based indexing throughout.
00386   // 2. Aztec stores the inverse of the diagonal of U. This Ifpack2 code
00387   // stores the non-inverted diagonal in U.
00388   // The triangular solves (in Ifpack2::ILUT::apply()) are performed by
00389   // calling the Tpetra::CrsMatrix::solve method on the L and U objects, and
00390   // this requires U to contain the non-inverted diagonal.
00391   //
00392   // ABW.
00393   //--------------------------------------------------------------------------
00394 
00395   if (!isInitialized()) {
00396     initialize();
00397   }
00398 
00399   Time_.start(true);
00400 
00401   NumMyRows_ = A_->getNodeNumRows();
00402 
00403   L_ = Teuchos::rcp(new MatrixType(A_->getRowMap(), A_->getColMap(), 0));
00404   U_ = Teuchos::rcp(new MatrixType(A_->getRowMap(), A_->getColMap(), 0));
00405 
00406   TEUCHOS_TEST_FOR_EXCEPTION(L_ == Teuchos::null || U_ == Teuchos::null, std::runtime_error,
00407      "Ifpack2::ILUT::Compute ERROR, failed to allocate L_ or U_");
00408 
00409   const scalar_type zero = Teuchos::ScalarTraits<scalar_type>::zero();
00410   const scalar_type one  = Teuchos::ScalarTraits<scalar_type>::one();
00411 
00412   // CGB: note, this caching approach may not be necessary anymore
00413   // We will store ArrayView objects that are views of the rows of U, so that
00414   // we don't have to repeatedly retrieve the view for each row. These will
00415   // be populated row by row as the factorization proceeds.
00416   Teuchos::Array<Teuchos::ArrayView<const local_ordinal_type> > Uindices(NumMyRows_);
00417   Teuchos::Array<Teuchos::ArrayView<const scalar_type> >       Ucoefs(NumMyRows_);
00418 
00419   // If this macro is defined, files containing the L and U factors
00420   // will be written. DON'T CHECK IN THE CODE WITH THIS MACRO ENABLED!!!
00421   // #define IFPACK2_WRITE_FACTORS
00422 #ifdef IFPACK2_WRITE_FACTORS
00423   std::ofstream ofsL("L.tif.mtx", std::ios::out);
00424   std::ofstream ofsU("U.tif.mtx", std::ios::out);
00425 #endif
00426 
00427   // mfh 28 Nov 2012: The code here uses double for fill calculations.
00428   // This really has nothing to do with magnitude_type.  The point is
00429   // to avoid rounding and overflow for integer calculations.  That
00430   // should work just fine for reasonably-sized integer values, so I'm
00431   // leaving it.  However, the fill level parameter is a
00432   // magnitude_type, so I do need to cast to double.  Typical values
00433   // of the fill level are small, so this should not cause overflow.
00434 
00435   // Calculate how much fill will be allowed in addition to the space that
00436   // corresponds to the input matrix entries.
00437   double local_nnz = static_cast<double>(A_->getNodeNumEntries());
00438   double fill;
00439   {
00440     const double fillLevel = as<double> (getLevelOfFill ());
00441     fill = ((fillLevel - 1) * local_nnz) / (2 * NumMyRows_);
00442   }
00443 
00444   // std::ceil gives the smallest integer larger than the argument.
00445   // this may give a slightly different result than Aztec's fill value in
00446   // some cases.
00447   double fill_ceil=std::ceil(fill);
00448 
00449   typedef typename Teuchos::Array<local_ordinal_type>::size_type      Tsize_t;
00450 
00451   // Similarly to Aztec, we will allow the same amount of fill for each
00452   // row, half in L and half in U.
00453   Tsize_t fillL = static_cast<Tsize_t>(fill_ceil);
00454   Tsize_t fillU = static_cast<Tsize_t>(fill_ceil);
00455 
00456   Teuchos::Array<scalar_type> InvDiagU(NumMyRows_,zero);
00457 
00458   Teuchos::Array<local_ordinal_type> tmp_idx;
00459   Teuchos::Array<scalar_type> tmpv;
00460 
00461   enum { UNUSED, ORIG, FILL };
00462   local_ordinal_type max_col = NumMyRows_;
00463 
00464   Teuchos::Array<int> pattern(max_col, UNUSED);
00465   Teuchos::Array<scalar_type> cur_row(max_col, zero);
00466   Teuchos::Array<magnitude_type> unorm(max_col);
00467   magnitude_type rownorm;
00468   Teuchos::Array<local_ordinal_type> L_cols_heap;
00469   Teuchos::Array<local_ordinal_type> U_cols;
00470   Teuchos::Array<local_ordinal_type> L_vals_heap;
00471   Teuchos::Array<local_ordinal_type> U_vals_heap;
00472 
00473   // A comparison object which will be used to create 'heaps' of indices
00474   // that are ordered according to the corresponding values in the
00475   // 'cur_row' array.
00476   greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row);
00477 
00478   // =================== //
00479   // start factorization //
00480   // =================== //
00481 
00482   Teuchos::ArrayRCP<local_ordinal_type> ColIndicesARCP;
00483   Teuchos::ArrayRCP<scalar_type>       ColValuesARCP;
00484   if(!A_->supportsRowViews()){
00485     size_t maxnz=A_->getNodeMaxNumRowEntries();
00486     ColIndicesARCP.resize(maxnz);
00487     ColValuesARCP.resize(maxnz);
00488   }
00489 
00490   for (local_ordinal_type row_i = 0 ; row_i < NumMyRows_ ; ++row_i) {
00491     Teuchos::ArrayView<const local_ordinal_type> ColIndicesA;
00492     Teuchos::ArrayView<const scalar_type> ColValuesA;
00493     size_t RowNnz;
00494 
00495     if(A_->supportsRowViews()) {
00496       A_->getLocalRowView(row_i, ColIndicesA, ColValuesA);
00497       RowNnz = ColIndicesA.size();
00498     }
00499     else {
00500       A_->getLocalRowCopy(row_i,ColIndicesARCP(),ColValuesARCP(),RowNnz);
00501       ColIndicesA=ColIndicesARCP(0,RowNnz);
00502       ColValuesA =ColValuesARCP(0,RowNnz);
00503     }
00504 
00505     // Always include the diagonal in the U factor. The value should get
00506     // set in the next loop below.
00507     U_cols.push_back(row_i);
00508     cur_row[row_i] = zero;
00509     pattern[row_i] = ORIG;
00510 
00511     Tsize_t L_cols_heaplen = 0;
00512     rownorm = Teuchos::ScalarTraits<magnitude_type>::zero ();
00513     for(size_t i=0; i<RowNnz; ++i) {
00514       if (ColIndicesA[i] < NumMyRows_) {
00515         if (ColIndicesA[i] < row_i) {
00516           add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
00517         }
00518         else if (ColIndicesA[i] > row_i) {
00519           U_cols.push_back(ColIndicesA[i]);
00520         }
00521 
00522         cur_row[ColIndicesA[i]] = ColValuesA[i];
00523         pattern[ColIndicesA[i]] = ORIG;
00524         rownorm += scalar_mag(ColValuesA[i]);
00525       }
00526     }
00527 
00528     // Alter the diagonal according to the absolute-threshold and
00529     // relative-threshold values. If not set, those values default
00530     // to zero and one respectively.
00531     const magnitude_type rthresh = getRelativeThreshold();
00532     const scalar_type& v = cur_row[row_i];
00533     cur_row[row_i] = as<scalar_type> (getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh*v;
00534 
00535     Tsize_t orig_U_len = U_cols.size();
00536     RowNnz = L_cols_heap.size() + orig_U_len;
00537     rownorm = getDropTolerance() * rownorm/RowNnz;
00538 
00539     // The following while loop corresponds to the 'L30' goto's in Aztec.
00540     Tsize_t L_vals_heaplen = 0;
00541     while(L_cols_heaplen > 0) {
00542       local_ordinal_type row_k = L_cols_heap.front();
00543 
00544       scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
00545       cur_row[row_k] = multiplier;
00546       magnitude_type mag_mult = scalar_mag(multiplier);
00547       if (mag_mult*unorm[row_k] < rownorm) {
00548         pattern[row_k] = UNUSED;
00549         rm_heap_root(L_cols_heap, L_cols_heaplen);
00550         continue;
00551       }
00552       if (pattern[row_k] != ORIG) {
00553         if (L_vals_heaplen < fillL) {
00554           add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
00555         }
00556         else if (L_vals_heaplen==0 ||
00557                  mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
00558           pattern[row_k] = UNUSED;
00559           rm_heap_root(L_cols_heap, L_cols_heaplen);
00560           continue;
00561         }
00562         else {
00563           pattern[L_vals_heap.front()] = UNUSED;
00564           rm_heap_root(L_vals_heap, L_vals_heaplen, vals_comp);
00565           add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
00566         }
00567       }
00568 
00569       /* Reduce current row */
00570 
00571       Teuchos::ArrayView<const local_ordinal_type>& ColIndicesU = Uindices[row_k];
00572       Teuchos::ArrayView<const scalar_type>& ColValuesU = Ucoefs[row_k];
00573       Tsize_t ColNnzU = ColIndicesU.size();
00574 
00575       for(Tsize_t j=0; j<ColNnzU; ++j) {
00576         if (ColIndicesU[j] > row_k) {
00577           scalar_type tmp = multiplier * ColValuesU[j];
00578           local_ordinal_type col_j = ColIndicesU[j];
00579           if (pattern[col_j] != UNUSED) {
00580             cur_row[col_j] -= tmp;
00581           }
00582           else if (scalar_mag(tmp) > rownorm) {
00583             cur_row[col_j] = -tmp;
00584             pattern[col_j] = FILL;
00585             if (col_j > row_i) {
00586               U_cols.push_back(col_j);
00587             }
00588             else {
00589               add_to_heap(col_j, L_cols_heap, L_cols_heaplen);
00590             }
00591           }
00592         }
00593       }
00594 
00595       rm_heap_root(L_cols_heap, L_cols_heaplen);
00596     }//end of while(L_cols_heaplen) loop
00597 
00598 
00599     // Put indices and values for L into arrays and then into the L_ matrix.
00600 
00601     //   first, the original entries from the L section of A:
00602     for(Tsize_t i=0; i<ColIndicesA.size(); ++i) {
00603       if (ColIndicesA[i] < row_i) {
00604         tmp_idx.push_back(ColIndicesA[i]);
00605         tmpv.push_back(cur_row[ColIndicesA[i]]);
00606         pattern[ColIndicesA[i]] = UNUSED;
00607       }
00608     }
00609 
00610     //   next, the L entries resulting from fill:
00611     for(Tsize_t j=0; j<L_vals_heaplen; ++j) {
00612       tmp_idx.push_back(L_vals_heap[j]);
00613       tmpv.push_back(cur_row[L_vals_heap[j]]);
00614       pattern[L_vals_heap[j]] = UNUSED;
00615     }
00616 
00617     // L has a one on the diagonal, but we don't explicitly store it.
00618     // If we don't store it, then the Tpetra/Kokkos kernel which performs
00619     // the triangular solve can assume a unit diagonal, take a short-cut
00620     // and perform faster.
00621 
00622     L_->insertLocalValues(row_i, tmp_idx(), tmpv());
00623 #ifdef IFPACK2_WRITE_FACTORS
00624     for(Tsize_t ii=0; ii<tmp_idx.size(); ++ii) {
00625       ofsL << row_i << " " << tmp_idx[ii] << " " << tmpv[ii] << std::endl;
00626     }
00627 #endif
00628 
00629     tmp_idx.clear();
00630     tmpv.clear();
00631 
00632     // Pick out the diagonal element, store its reciprocal.
00633     if (cur_row[row_i] == zero) {
00634       std::cerr << "Ifpack2::ILUT::Compute: zero pivot encountered! Replacing with rownorm and continuing...(You may need to set the parameter 'fact: absolute threshold'.)" << std::endl;
00635       cur_row[row_i] = rownorm;
00636     }
00637     InvDiagU[row_i] = one / cur_row[row_i];
00638 
00639     // Non-inverted diagonal is stored for U:
00640     tmp_idx.push_back(row_i);
00641     tmpv.push_back(cur_row[row_i]);
00642     unorm[row_i] = scalar_mag(cur_row[row_i]);
00643     pattern[row_i] = UNUSED;
00644 
00645     // Now put indices and values for U into arrays and then into the U_ matrix.
00646     // The first entry in U_cols is the diagonal, which we just handled, so we'll
00647     // start our loop at j=1.
00648 
00649     Tsize_t U_vals_heaplen = 0;
00650     for(Tsize_t j=1; j<U_cols.size(); ++j) {
00651       local_ordinal_type col = U_cols[j];
00652       if (pattern[col] != ORIG) {
00653         if (U_vals_heaplen < fillU) {
00654           add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
00655         }
00656         else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) >
00657                  scalar_mag(cur_row[U_vals_heap.front()])) {
00658           rm_heap_root(U_vals_heap, U_vals_heaplen, vals_comp);
00659           add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
00660         }
00661       }
00662       else {
00663         tmp_idx.push_back(col);
00664         tmpv.push_back(cur_row[col]);
00665         unorm[row_i] += scalar_mag(cur_row[col]);
00666       }
00667       pattern[col] = UNUSED;
00668     }
00669 
00670     for(Tsize_t j=0; j<U_vals_heaplen; ++j) {
00671       tmp_idx.push_back(U_vals_heap[j]);
00672       tmpv.push_back(cur_row[U_vals_heap[j]]);
00673       unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
00674     }
00675 
00676     unorm[row_i] /= (orig_U_len + U_vals_heaplen);
00677 
00678     U_->insertLocalValues(row_i, tmp_idx(), tmpv() );
00679 #ifdef IFPACK2_WRITE_FACTORS
00680     for(int ii=0; ii<tmp_idx.size(); ++ii) {
00681       ofsU <<row_i<< " " <<tmp_idx[ii]<< " " <<tmpv[ii]<< std::endl;
00682     }
00683 #endif
00684     tmp_idx.clear();
00685     tmpv.clear();
00686 
00687     U_->getLocalRowView(row_i, Uindices[row_i], Ucoefs[row_i] );
00688 
00689     L_cols_heap.clear();
00690     U_cols.clear();
00691     L_vals_heap.clear();
00692     U_vals_heap.clear();
00693   } // end of for(row_i) loop
00694 
00695   L_->fillComplete();
00696   U_->fillComplete();
00697 
00698   global_size_t MyNonzeros = L_->getGlobalNumEntries() + U_->getGlobalNumEntries();
00699   Teuchos::reduceAll(*Comm_,Teuchos::REDUCE_SUM,1,&MyNonzeros,&NumGlobalNonzeros_);
00700 
00701   IsComputed_ = true;
00702 
00703   ++NumCompute_;
00704   Time_.stop();
00705   ComputeTime_ += Time_.totalElapsedTime();
00706 }
00707 
00708 //==========================================================================
00709 template <class MatrixType>
00710 void ILUT<MatrixType>::apply(
00711            const Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& X,
00712                  Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& Y,
00713                  Teuchos::ETransp mode,
00714                typename MatrixType::scalar_type alpha,
00715                typename MatrixType::scalar_type beta) const
00716 {
00717   TEUCHOS_TEST_FOR_EXCEPTION(!isComputed(), std::runtime_error,
00718     "Ifpack2::ILUT::apply() ERROR, compute() hasn't been called yet.");
00719 
00720   TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
00721     "Ifpack2::ILUT::apply() ERROR, X.getNumVectors() != Y.getNumVectors().");
00722 
00723   Time_.start(true);
00724 
00725   // If X and Y are pointing to the same memory location,
00726   // we need to create an auxiliary vector, Xcopy
00727   Teuchos::RCP< const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> > Xcopy;
00728   if (X.getLocalMV().getValues() == Y.getLocalMV().getValues())
00729     Xcopy = Teuchos::rcp( new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(X) );
00730   else
00731     Xcopy = Teuchos::rcp( &X, false );
00732 
00733   if (mode == Teuchos::NO_TRANS)
00734   {
00735     // solves LU Y = X
00736     L_->localSolve(*Xcopy,Y,Teuchos::NO_TRANS);
00737     U_->localSolve(Y,Y,Teuchos::NO_TRANS);
00738   }
00739   else
00740   {
00741     // solves U(trans) L(trans) Y = X
00742     U_->localSolve(*Xcopy,Y,mode);
00743     L_->localSolve(Y,Y,mode);
00744   }
00745 
00746   ++NumApply_;
00747   Time_.stop();
00748   ApplyTime_ += Time_.totalElapsedTime();
00749 }
00750 
00751 //=============================================================================
00752 template <class MatrixType>
00753 std::string ILUT<MatrixType>::description() const {
00754   std::ostringstream oss;
00755   oss << Teuchos::Describable::description();
00756   if (isInitialized()) {
00757     if (isComputed()) {
00758       oss << "{status = initialized, computed";
00759     }
00760     else {
00761       oss << "{status = initialized, not computed";
00762     }
00763   }
00764   else {
00765     oss << "{status = not initialized, not computed";
00766   }
00767   oss << ", global rows = " << A_->getGlobalNumRows()
00768       << ", global cols = " << A_->getGlobalNumCols()
00769       << "}";
00770   return oss.str();
00771 }
00772 
00773 //=============================================================================
00774 template <class MatrixType>
00775 void ILUT<MatrixType>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const {
00776   using std::endl;
00777   using std::setw;
00778   using Teuchos::VERB_DEFAULT;
00779   using Teuchos::VERB_NONE;
00780   using Teuchos::VERB_LOW;
00781   using Teuchos::VERB_MEDIUM;
00782   using Teuchos::VERB_HIGH;
00783   using Teuchos::VERB_EXTREME;
00784   Teuchos::EVerbosityLevel vl = verbLevel;
00785   if (vl == VERB_DEFAULT) vl = VERB_LOW;
00786   const int myImageID = Comm_->getRank();
00787   Teuchos::OSTab tab(out);
00788   //    none: print nothing
00789   //     low: print O(1) info from node 0
00790   //  medium:
00791   //    high:
00792   // extreme:
00793   if (vl != VERB_NONE && myImageID == 0) {
00794     out << this->description() << endl;
00795     out << endl;
00796     out << "===============================================================================" << endl;
00797     out << "Level-of-fill      = " << getLevelOfFill()       << endl;
00798     out << "Absolute threshold = " << getAbsoluteThreshold() << endl;
00799     out << "Relative threshold = " << getRelativeThreshold() << endl;
00800     out << "Relax value        = " << getRelaxValue()        << endl;
00801     if   (Condest_ == -1.0) { out << "Condition number estimate       = N/A" << endl; }
00802     else                    { out << "Condition number estimate       = " << Condest_ << endl; }
00803     if (isComputed()) {
00804       out << "Number of nonzeros in A         = " << A_->getGlobalNumEntries() << endl;
00805       out << "Number of nonzeros in L + U     = " << getGlobalNumEntries()
00806           << " ( = " << 100.0 * (double)getGlobalNumEntries() / (double)A_->getGlobalNumEntries() << " % of A)" << endl;
00807       out << "nonzeros / rows                 = " << 1.0 * getGlobalNumEntries() / U_->getGlobalNumRows() << endl;
00808     }
00809     out << endl;
00810     out << "Phase           # calls    Total Time (s) " << endl;
00811     out << "------------    -------    ---------------" << endl;
00812     out << "initialize()    " << setw(7) << getNumInitialize() << "    " << setw(15) << getInitializeTime() << endl;
00813     out << "compute()       " << setw(7) << getNumCompute()    << "    " << setw(15) << getComputeTime()    << endl;
00814     out << "apply()         " << setw(7) << getNumApply()      << "    " << setw(15) << getApplyTime()      << endl;
00815     out << "==============================================================================="                << endl;
00816     out << endl;
00817   }
00818 }
00819 
00820 
00821 }//namespace Ifpack2
00822 
00823 #endif /* IFPACK2_ILUT_DEF_HPP */
00824 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends