Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_DistTsqrRB.hpp
00001 /*
00002 //@HEADER
00003 // ************************************************************************
00004 // 
00005 //          Kokkos: Node API and Parallel Node Kernels
00006 //              Copyright (2008) Sandia Corporation
00007 // 
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 // 
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00039 // 
00040 // ************************************************************************
00041 //@HEADER
00042 */
00043 
00044 #ifndef __TSQR_DistTsqrRB_hpp
00045 #define __TSQR_DistTsqrRB_hpp
00046 
00047 #include <Tsqr_ApplyType.hpp>
00048 #include <Tsqr_Combine.hpp>
00049 #include <Tsqr_Matrix.hpp>
00050 #include <Tsqr_StatTimeMonitor.hpp>
00051 
00052 #include <Teuchos_ScalarTraits.hpp>
00053 #include <Teuchos_TimeMonitor.hpp>
00054 
00055 #include <algorithm>
00056 #include <sstream>
00057 #include <stdexcept>
00058 #include <utility>
00059 #include <vector>
00060 
00061 
00062 namespace TSQR {
00063 
00072   namespace details {
00073 
00074     // Force the diagonal of R_mine to be nonnegative, where
00075     // Q_mine*R_mine is a QR factorization.
00076     //
00077     // We only made this a class because C++ (pre-C++11) does not
00078     // allow partial specialization of template functions.
00079     template<class LocalOrdinal, class Scalar, bool isComplex>
00080     class NonnegDiagForcer {
00081     public:
00082       // Force the diagonal of R_mine to be nonnegative, where
00083       // Q_mine*R_mine is a QR factorization.
00084       void
00085       force (MatView<LocalOrdinal, Scalar> Q_mine, 
00086        MatView<LocalOrdinal, Scalar> R_mine);
00087     };
00088 
00089     // The complex-arithmetic specialization does nothing, since
00090     // _GEQR{2,F} for complex arithmetic returns an R factor with
00091     // nonnegative diagonal already.
00092     template<class LocalOrdinal, class Scalar>
00093     class NonnegDiagForcer<LocalOrdinal, Scalar, true> {
00094     public:
00095       void
00096       force (MatView<LocalOrdinal, Scalar> Q_mine, 
00097        MatView<LocalOrdinal, Scalar> R_mine)
00098       {
00099   (void) Q_mine;
00100   (void) R_mine;
00101       }
00102     };
00103 
00104     // Real-arithmetic specialization.
00105     template<class LocalOrdinal, class Scalar>
00106     class NonnegDiagForcer<LocalOrdinal, Scalar, false> {
00107     public:
00108       void
00109       force (MatView<LocalOrdinal, Scalar> Q_mine, 
00110        MatView<LocalOrdinal, Scalar> R_mine)
00111       {
00112   typedef Teuchos::ScalarTraits<Scalar> STS;
00113 
00114   if (Q_mine.nrows() > 0 && Q_mine.ncols() > 0) {
00115     for (int k = 0; k < R_mine.ncols(); ++k) {
00116       if (R_mine(k,k) < STS::zero()) {
00117         // Scale column k of Q_mine.  We use a raw pointer since
00118         // typically there are many rows in Q_mine, so this
00119         // operation should be fast.
00120         Scalar* const Q_k = &Q_mine(0,k);
00121         for (int i = 0; i < Q_mine.nrows(); ++i) {
00122     Q_k[i] = -Q_k[i];
00123         }
00124         // Scale row k of R_mine.  R_mine is upper triangular,
00125         // so we only have to scale right of (and including) the
00126         // diagonal entry.
00127         for (int j = k; j < R_mine.ncols(); ++j) {
00128     R_mine(k,j) = -R_mine(k,j);
00129         }
00130       }
00131     }
00132   }
00133       }
00134     };
00135   } // namespace details
00136 
00137 
00153   template<class LocalOrdinal, class Scalar>
00154   class DistTsqrRB {
00155   public:
00156     typedef LocalOrdinal ordinal_type;
00157     typedef Scalar scalar_type;
00158     typedef typename Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type;
00159     typedef MatView<ordinal_type, scalar_type> matview_type;
00160     typedef Matrix<ordinal_type, scalar_type> matrix_type;
00161     typedef int rank_type;
00162     typedef Combine<ordinal_type, scalar_type> combine_type;
00163 
00168     DistTsqrRB (const Teuchos::RCP< MessengerBase< scalar_type > >& messenger) :
00169       messenger_ (messenger),
00170       totalTime_ (Teuchos::TimeMonitor::getNewTimer ("DistTsqrRB::factorExplicit() total time")),
00171       reduceCommTime_ (Teuchos::TimeMonitor::getNewTimer ("DistTsqrRB::factorReduce() communication time")),
00172       reduceTime_ (Teuchos::TimeMonitor::getNewTimer ("DistTsqrRB::factorReduce() total time")),
00173       bcastCommTime_ (Teuchos::TimeMonitor::getNewTimer ("DistTsqrRB::explicitQBroadcast() communication time")),
00174       bcastTime_ (Teuchos::TimeMonitor::getNewTimer ("DistTsqrRB::explicitQBroadcast() total time"))
00175     {}
00176 
00182     void
00183     getStats (std::vector< TimeStats >& stats) const
00184     {
00185       const int numTimers = 5;
00186       stats.resize (std::max (stats.size(), static_cast<size_t>(numTimers)));
00187 
00188       stats[0] = totalStats_;
00189       stats[1] = reduceCommStats_;
00190       stats[2] = reduceStats_;
00191       stats[3] = bcastCommStats_;
00192       stats[4] = bcastStats_;
00193     }
00194 
00200     void
00201     getStatsLabels (std::vector< std::string >& labels) const
00202     {
00203       const int numTimers = 5;
00204       labels.resize (std::max (labels.size(), static_cast<size_t>(numTimers)));
00205 
00206       labels[0] = totalTime_->name();
00207       labels[1] = reduceCommTime_->name();
00208       labels[2] = reduceTime_->name();
00209       labels[3] = bcastCommTime_->name();
00210       labels[4] = bcastTime_->name();
00211     }
00212 
00215     bool QR_produces_R_factor_with_nonnegative_diagonal () const {
00216       return combine_type::QR_produces_R_factor_with_nonnegative_diagonal();
00217     }
00218 
00239     void
00240     factorExplicit (matview_type R_mine, 
00241         matview_type Q_mine,
00242         const bool forceNonnegativeDiagonal=false)
00243     {
00244       StatTimeMonitor totalMonitor (*totalTime_, totalStats_);
00245 
00246       // Dimension sanity checks.  R_mine should have at least as many
00247       // rows as columns (since we will be working on the upper
00248       // triangle).  Q_mine should have the same number of rows as
00249       // R_mine has columns, but Q_mine may have any number of
00250       // columns.  (It depends on how many columns of the explicit Q
00251       // factor we want to compute.)
00252       if (R_mine.nrows() < R_mine.ncols())
00253   {
00254     std::ostringstream os;
00255     os << "R factor input has fewer rows (" << R_mine.nrows() 
00256        << ") than columns (" << R_mine.ncols() << ")";
00257     // This is a logic error because TSQR users should not be
00258     // calling this method directly.
00259     throw std::logic_error (os.str());
00260   }
00261       else if (Q_mine.nrows() != R_mine.ncols())
00262   {
00263     std::ostringstream os;
00264     os << "Q factor input must have the same number of rows as the R "
00265       "factor input has columns.  Q has " << Q_mine.nrows() 
00266        << " rows, but R has " << R_mine.ncols() << " columns.";
00267     // This is a logic error because TSQR users should not be
00268     // calling this method directly.
00269     throw std::logic_error (os.str());
00270   }
00271 
00272       // The factorization is a recursion over processors [P_first, P_last].
00273       const rank_type P_mine = messenger_->rank();
00274       const rank_type P_first = 0;
00275       const rank_type P_last = messenger_->size() - 1;
00276 
00277       // Intermediate Q factors are stored implicitly.  QFactors[k] is
00278       // an upper triangular matrix of Householder reflectors, and
00279       // tauArrays[k] contains its corresponding scaling factors (TAU,
00280       // in LAPACK notation).  These two arrays will be filled in by
00281       // factorReduce().  Different MPI processes will have different
00282       // numbers of elements in these arrays.  In fact, on some
00283       // processes these arrays may be empty on output.  This is a
00284       // feature, not a bug!  
00285       //
00286       // Even though QFactors and tauArrays have the same type has the
00287       // first resp. second elements of DistTsqr::FactorOutput, they
00288       // are not compatible with the output of DistTsqr::factor() and
00289       // cannot be used as the input to DistTsqr::apply() or
00290       // DistTsqr::explicit_Q().  This is because factor() computes a
00291       // general factorization suitable for applying Q (or Q^T or Q^*)
00292       // to any compatible matrix, whereas factorExplicit() computes a
00293       // factorization specifically for the purpose of forming the
00294       // explicit Q factor.  The latter lets us use a broadcast to
00295       // compute Q, rather than a more message-intensive all-to-all
00296       // (butterfly).
00297       std::vector< matrix_type > QFactors;
00298       std::vector< std::vector< scalar_type > > tauArrays;
00299 
00300       {
00301   StatTimeMonitor reduceMonitor (*reduceTime_, reduceStats_);
00302   factorReduce (R_mine, P_mine, P_first, P_last, QFactors, tauArrays);
00303       }
00304 
00305       if (QFactors.size() != tauArrays.size())
00306   {
00307     std::ostringstream os;
00308     os << "QFactors and tauArrays should have the same number of element"
00309       "s after factorReduce() returns, but they do not.  QFactors has " 
00310        << QFactors.size() << " elements, but tauArrays has " 
00311        << tauArrays.size() << " elements.";
00312     throw std::logic_error (os.str());
00313   }
00314 
00315       Q_mine.fill (scalar_type (0));
00316       if (messenger_->rank() == 0)
00317   {
00318     for (ordinal_type j = 0; j < Q_mine.ncols(); ++j)
00319       Q_mine(j, j) = scalar_type (1);
00320   }
00321       // Scratch space for computing results to send to other processors.
00322       matrix_type Q_other (Q_mine.nrows(), Q_mine.ncols(), scalar_type (0));
00323       const rank_type numSteps = QFactors.size() - 1;
00324 
00325       {
00326   StatTimeMonitor bcastMonitor (*bcastTime_, bcastStats_);
00327   explicitQBroadcast (R_mine, Q_mine, Q_other.view(), 
00328           P_mine, P_first, P_last,
00329           numSteps, QFactors, tauArrays);
00330       }
00331 
00332       if (forceNonnegativeDiagonal &&
00333     ! QR_produces_R_factor_with_nonnegative_diagonal()) {
00334   typedef Teuchos::ScalarTraits<Scalar> STS;
00335   details::NonnegDiagForcer<LocalOrdinal, Scalar, STS::isComplex> forcer;
00336   forcer.force (Q_mine, R_mine);
00337       }
00338     }
00339 
00340   private:
00341 
00342     void
00343     factorReduce (matview_type R_mine,
00344       const rank_type P_mine, 
00345       const rank_type P_first,
00346       const rank_type P_last,
00347       std::vector< matrix_type >& QFactors,
00348       std::vector< std::vector< scalar_type > >& tauArrays)
00349     {
00350       if (P_last < P_first)
00351   {
00352     std::ostringstream os;
00353     os << "Programming error in factorReduce() recursion: interval "
00354       "[P_first, P_last] is invalid: P_first = " << P_first 
00355        << ", P_last = " << P_last << ".";
00356     throw std::logic_error (os.str());
00357   }
00358       else if (P_mine < P_first || P_mine > P_last)
00359   {
00360     std::ostringstream os;
00361     os << "Programming error in factorReduce() recursion: P_mine (= " 
00362        << P_mine << ") is not in current process rank interval " 
00363        << "[P_first = " << P_first << ", P_last = " << P_last << "]";
00364     throw std::logic_error (os.str());
00365   }
00366       else if (P_last == P_first)
00367   return; // skip singleton intervals (see explanation below)
00368       else
00369   {
00370     // Recurse on two intervals: [P_first, P_mid-1] and [P_mid,
00371     // P_last].  For example, if [P_first, P_last] = [0, 9],
00372     // P_mid = floor( (0+9+1)/2 ) = 5 and the intervals are
00373     // [0,4] and [5,9].  
00374     // 
00375     // If [P_first, P_last] = [4,6], P_mid = floor( (4+6+1)/2 )
00376     // = 5 and the intervals are [4,4] (a singleton) and [5,6].
00377     // The latter case shows that singleton intervals may arise.
00378     // We treat them as a base case in the recursion.  Process 4
00379     // won't be skipped completely, though; it will get combined
00380     // with the result from [5,6].
00381 
00382     // Adding 1 and doing integer division works like "ceiling."
00383     const rank_type P_mid = (P_first + P_last + 1) / 2;
00384 
00385     if (P_mine < P_mid) // Interval [P_first, P_mid-1]
00386       factorReduce (R_mine, P_mine, P_first, P_mid - 1,
00387         QFactors, tauArrays);
00388     else // Interval [P_mid, P_last]
00389       factorReduce (R_mine, P_mine, P_mid, P_last,
00390         QFactors, tauArrays);
00391 
00392     // This only does anything if P_mine is either P_first or P_mid.
00393     if (P_mine == P_first)
00394       {
00395         const ordinal_type numCols = R_mine.ncols();
00396         matrix_type R_other (numCols, numCols);
00397         recv_R (R_other, P_mid);
00398 
00399         std::vector< scalar_type > tau (numCols);
00400         // Don't shrink the workspace array; doing so may
00401         // require expensive reallocation every time we send /
00402         // receive data.
00403         resizeWork (numCols);
00404         combine_.factor_pair (numCols, R_mine.get(), R_mine.lda(), 
00405             R_other.get(), R_other.lda(), 
00406             &tau[0], &work_[0]);
00407         QFactors.push_back (R_other);
00408         tauArrays.push_back (tau);
00409       }
00410     else if (P_mine == P_mid)
00411       send_R (R_mine, P_first);
00412   }
00413     }
00414 
00415     void
00416     explicitQBroadcast (matview_type R_mine,
00417       matview_type Q_mine,
00418       matview_type Q_other, // workspace
00419       const rank_type P_mine, 
00420       const rank_type P_first,
00421       const rank_type P_last,
00422       const rank_type curpos,
00423       std::vector< matrix_type >& QFactors,
00424       std::vector< std::vector< scalar_type > >& tauArrays)
00425     {
00426       if (P_last < P_first)
00427   {
00428     std::ostringstream os;
00429     os << "Programming error in explicitQBroadcast() recursion: interval"
00430       " [P_first, P_last] is invalid: P_first = " << P_first 
00431        << ", P_last = " << P_last << ".";
00432     throw std::logic_error (os.str());
00433   }
00434       else if (P_mine < P_first || P_mine > P_last)
00435   {
00436     std::ostringstream os;
00437     os << "Programming error in explicitQBroadcast() recursion: P_mine "
00438       "(= " << P_mine << ") is not in current process rank interval " 
00439        << "[P_first = " << P_first << ", P_last = " << P_last << "]";
00440     throw std::logic_error (os.str());
00441   }
00442       else if (P_last == P_first)
00443   return; // skip singleton intervals
00444       else
00445   {
00446     // Adding 1 and integer division works like "ceiling."
00447     const rank_type P_mid = (P_first + P_last + 1) / 2;
00448     rank_type newpos = curpos;
00449     if (P_mine == P_first)
00450       {
00451         if (curpos < 0)
00452     {
00453       std::ostringstream os;
00454       os << "Programming error: On the current P_first (= " 
00455          << P_first << ") proc: curpos (= " << curpos << ") < 0";
00456       throw std::logic_error (os.str());
00457     }
00458         // Q_impl, tau: implicitly stored local Q factor.
00459         matrix_type& Q_impl = QFactors[curpos];
00460         std::vector< scalar_type >& tau = tauArrays[curpos];
00461         
00462         // Apply implicitly stored local Q factor to 
00463         //   [Q_mine; 
00464         //    Q_other]
00465         // where Q_other = zeros(Q_mine.nrows(), Q_mine.ncols()).
00466         // Overwrite both Q_mine and Q_other with the result.
00467         Q_other.fill (scalar_type (0));
00468         combine_.apply_pair (ApplyType::NoTranspose, 
00469            Q_mine.ncols(), Q_impl.ncols(),
00470            Q_impl.get(), Q_impl.lda(), &tau[0],
00471            Q_mine.get(), Q_mine.lda(),
00472            Q_other.get(), Q_other.lda(), &work_[0]);
00473         // Send the resulting Q_other, and the final R factor, to P_mid.
00474         send_Q_R (Q_other, R_mine, P_mid);
00475         newpos = curpos - 1;
00476       }
00477     else if (P_mine == P_mid)
00478       // P_first computed my explicit Q factor component.
00479       // Receive it, and the final R factor, from P_first.
00480       recv_Q_R (Q_mine, R_mine, P_first);
00481 
00482     if (P_mine < P_mid) // Interval [P_first, P_mid-1]
00483       explicitQBroadcast (R_mine, Q_mine, Q_other, 
00484         P_mine, P_first, P_mid - 1,
00485         newpos, QFactors, tauArrays);
00486     else // Interval [P_mid, P_last]
00487       explicitQBroadcast (R_mine, Q_mine, Q_other, 
00488         P_mine, P_mid, P_last,
00489         newpos, QFactors, tauArrays);
00490   }
00491     }
00492 
00493     template< class ConstMatrixType1, class ConstMatrixType2 >
00494     void
00495     send_Q_R (const ConstMatrixType1& Q,
00496         const ConstMatrixType2& R,
00497         const rank_type destProc) 
00498     {
00499       StatTimeMonitor bcastCommMonitor (*bcastCommTime_, bcastCommStats_);
00500 
00501       const ordinal_type R_numCols = R.ncols();
00502       const ordinal_type Q_size = Q.nrows() * Q.ncols();
00503       const ordinal_type R_size = (R_numCols * (R_numCols + 1)) / 2;
00504       const ordinal_type numElts = Q_size + R_size;
00505 
00506       // Don't shrink the workspace array; doing so would still be
00507       // correct, but may require reallocation of data when it needs
00508       // to grow again.
00509       resizeWork (numElts);
00510 
00511       // Pack the Q data into the workspace array.
00512       matview_type Q_contig (Q.nrows(), Q.ncols(), &work_[0], Q.nrows());
00513       Q_contig.copy (Q);
00514       // Pack the R data into the workspace array.
00515       pack_R (R, &work_[Q_size]);
00516       messenger_->send (&work_[0], numElts, destProc, 0);
00517     }
00518 
00519     template< class MatrixType1, class MatrixType2 >
00520     void
00521     recv_Q_R (MatrixType1& Q, 
00522         MatrixType2& R, 
00523         const rank_type srcProc)
00524     {
00525       StatTimeMonitor bcastCommMonitor (*bcastCommTime_, bcastCommStats_);
00526 
00527       const ordinal_type R_numCols = R.ncols();
00528       const ordinal_type Q_size = Q.nrows() * Q.ncols();
00529       const ordinal_type R_size = (R_numCols * (R_numCols + 1)) / 2;
00530       const ordinal_type numElts = Q_size + R_size;
00531 
00532       // Don't shrink the workspace array; doing so would still be
00533       // correct, but may require reallocation of data when it needs
00534       // to grow again.
00535       resizeWork (numElts);
00536 
00537       messenger_->recv (&work_[0], numElts, srcProc, 0);
00538 
00539       // Unpack the C data from the workspace array.
00540       Q.copy (matview_type (Q.nrows(), Q.ncols(), &work_[0], Q.nrows()));
00541       // Unpack the R data from the workspace array.
00542       unpack_R (R, &work_[Q_size]);
00543     }
00544 
00545     template< class ConstMatrixType >
00546     void
00547     send_R (const ConstMatrixType& R, const rank_type destProc)
00548     {
00549       StatTimeMonitor reduceCommMonitor (*reduceCommTime_, reduceCommStats_);
00550 
00551       const ordinal_type numCols = R.ncols();
00552       const ordinal_type numElts = (numCols * (numCols+1)) / 2;
00553 
00554       // Don't shrink the workspace array; doing so would still be
00555       // correct, but may require reallocation of data when it needs
00556       // to grow again.
00557       resizeWork (numElts);
00558       // Pack the R data into the workspace array.
00559       pack_R (R, &work_[0]);
00560       messenger_->send (&work_[0], numElts, destProc, 0);
00561     }
00562 
00563     template< class MatrixType >
00564     void
00565     recv_R (MatrixType& R, const rank_type srcProc)
00566     {
00567       StatTimeMonitor reduceCommMonitor (*reduceCommTime_, reduceCommStats_);
00568 
00569       const ordinal_type numCols = R.ncols();
00570       const ordinal_type numElts = (numCols * (numCols+1)) / 2;
00571 
00572       // Don't shrink the workspace array; doing so would still be
00573       // correct, but may require reallocation of data when it needs
00574       // to grow again.
00575       resizeWork (numElts);
00576       messenger_->recv (&work_[0], numElts, srcProc, 0);
00577       // Unpack the R data from the workspace array.
00578       unpack_R (R, &work_[0]);
00579     }
00580 
00581     template< class MatrixType >
00582     static void 
00583     unpack_R (MatrixType& R, const scalar_type buf[])
00584     {
00585       ordinal_type curpos = 0;
00586       for (ordinal_type j = 0; j < R.ncols(); ++j)
00587   {
00588     scalar_type* const R_j = &R(0, j);
00589     for (ordinal_type i = 0; i <= j; ++i)
00590       R_j[i] = buf[curpos++];
00591   }
00592     }
00593 
00594     template< class ConstMatrixType >
00595     static void 
00596     pack_R (const ConstMatrixType& R, scalar_type buf[])
00597     {
00598       ordinal_type curpos = 0;
00599       for (ordinal_type j = 0; j < R.ncols(); ++j)
00600   {
00601     const scalar_type* const R_j = &R(0, j);
00602     for (ordinal_type i = 0; i <= j; ++i)
00603       buf[curpos++] = R_j[i];
00604   }
00605     }
00606 
00607     void
00608     resizeWork (const ordinal_type numElts)
00609     {
00610       typedef typename std::vector< scalar_type >::size_type vec_size_type;
00611       work_.resize (std::max (work_.size(), static_cast< vec_size_type >(numElts)));
00612     }
00613 
00614   private:
00615     combine_type combine_;
00616     Teuchos::RCP< MessengerBase< scalar_type > > messenger_;
00617     std::vector< scalar_type > work_;
00618 
00619     // Timers for various phases of the factorization.  Time is
00620     // cumulative over all calls of factorExplicit().
00621     Teuchos::RCP< Teuchos::Time > totalTime_;
00622     Teuchos::RCP< Teuchos::Time > reduceCommTime_;
00623     Teuchos::RCP< Teuchos::Time > reduceTime_;
00624     Teuchos::RCP< Teuchos::Time > bcastCommTime_;
00625     Teuchos::RCP< Teuchos::Time > bcastTime_;
00626 
00627     TimeStats totalStats_, reduceCommStats_, reduceStats_, bcastCommStats_, bcastStats_;
00628   };
00629 
00630 } // namespace TSQR
00631 
00632 #endif // __TSQR_DistTsqrRB_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends