Anasazi Version of the Day
TbbTsqr.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_TbbTsqr_hpp
00030 #define __TSQR_TbbTsqr_hpp
00031 
00032 #include <TbbTsqr_TbbParallelTsqr.hpp>
00033 #include <Tsqr_TimeStats.hpp>
00034 #include <Teuchos_Time.hpp>
00035 // #include <TbbRecursiveTsqr.hpp>
00036 
00037 #include <stdexcept>
00038 #include <string>
00039 #include <utility> // std::pair
00040 #include <vector>
00041 
00044 
00045 namespace TSQR {
00046   namespace TBB {
00047 
00061     template< class LocalOrdinal, class Scalar, class TimerType = Teuchos::Time >
00062     class TbbTsqr {
00063     private:
00064       // Note: this is NOT a use of the pImpl idiom.  TbbRecursiveTsqr
00065       // is a nonparallel implementation that emulates the control
00066       // flow of the parallel implementation TbbParallelTsqr.  The
00067       // latter depends on the Intel Threading Building Blocks
00068       // library.
00069       //
00070       //TbbRecursiveTsqr< LocalOrdinal, Scalar > impl_;
00071       TbbParallelTsqr< LocalOrdinal, Scalar, TimerType > impl_;
00072 
00073       // Collected running statistcs on various computations
00074       mutable TimeStats factorStats_, applyStats_, explicitQStats_, cacheBlockStats_, unCacheBlockStats_;
00075 
00076       // Timers for various computations
00077       mutable TimerType factorTimer_, applyTimer_, explicitQTimer_, cacheBlockTimer_, unCacheBlockTimer_;
00078 
00079     public:
00080       typedef Scalar scalar_type;
00081       typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00082       typedef LocalOrdinal ordinal_type;
00083       // typedef typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput FactorOutput;
00084       typedef typename TbbParallelTsqr< LocalOrdinal, Scalar, TimerType >::FactorOutput FactorOutput;
00085 
00087       size_t ncores() const { return impl_.ncores(); }
00088 
00090       size_t cache_block_size() const { return impl_.cache_block_size(); }
00091 
00104       TbbTsqr (const size_t numCores,
00105          const size_t cacheBlockSize = 0) :
00106   impl_ (numCores, cacheBlockSize),
00107   factorTimer_ ("TbbTsqr::factor"),
00108   applyTimer_ ("TbbTsqr::apply"),
00109   explicitQTimer_ ("TbbTsqr::explicit_Q"),
00110   cacheBlockTimer_ ("TbbTsqr::cache_block"),
00111   unCacheBlockTimer_ ("TbbTsqr::un_cache_block")
00112       {}
00113 
00116       static bool QR_produces_R_factor_with_nonnegative_diagonal() {
00117   typedef TbbParallelTsqr< LocalOrdinal, Scalar, TimerType > impl_type;
00118   return impl_type::QR_produces_R_factor_with_nonnegative_diagonal();
00119       }
00120 
00121       void
00122       cache_block (const LocalOrdinal nrows,
00123        const LocalOrdinal ncols, 
00124        Scalar A_out[],
00125        const Scalar A_in[],
00126        const LocalOrdinal lda_in) const
00127       {
00128   cacheBlockTimer_.start(true);
00129   impl_.cache_block (nrows, ncols, A_out, A_in, lda_in);
00130   cacheBlockStats_.update (cacheBlockTimer_.stop());
00131       }
00132 
00133       void
00134       un_cache_block (const LocalOrdinal nrows,
00135           const LocalOrdinal ncols,
00136           Scalar A_out[],
00137           const LocalOrdinal lda_out,       
00138           const Scalar A_in[]) const
00139       {
00140   unCacheBlockTimer_.start(true);
00141   impl_.un_cache_block (nrows, ncols, A_out, lda_out, A_in);
00142   unCacheBlockStats_.update (unCacheBlockTimer_.stop());
00143       }
00144 
00145       void
00146       fill_with_zeros (const LocalOrdinal nrows,
00147            const LocalOrdinal ncols,
00148            Scalar C[],
00149            const LocalOrdinal ldc, 
00150            const bool contiguous_cache_blocks = false) const
00151       {
00152   impl_.fill_with_zeros (nrows, ncols, C, ldc, contiguous_cache_blocks);
00153       }
00154 
00155       template< class MatrixViewType >
00156       MatrixViewType
00157       top_block (const MatrixViewType& C, 
00158      const bool contiguous_cache_blocks = false) const
00159       {
00160   return impl_.top_block (C, contiguous_cache_blocks);
00161       }
00162 
00198       FactorOutput
00199       factor (const LocalOrdinal nrows,
00200         const LocalOrdinal ncols, 
00201         Scalar A[],
00202         const LocalOrdinal lda,
00203         Scalar R[],
00204         const LocalOrdinal ldr,
00205         const bool contiguous_cache_blocks = false)
00206       {
00207   factorTimer_.start(true);
00208   return impl_.factor (nrows, ncols, A, lda, R, ldr, contiguous_cache_blocks);
00209   factorStats_.update (factorTimer_.stop());
00210       }
00211 
00245       void
00246       apply (const ApplyType& apply_type,
00247        const LocalOrdinal nrows,
00248        const LocalOrdinal ncols_Q,
00249        const Scalar Q[],
00250        const LocalOrdinal ldq,
00251        const FactorOutput& factor_output,
00252        const LocalOrdinal ncols_C,
00253        Scalar C[],
00254        const LocalOrdinal ldc,
00255        const bool contiguous_cache_blocks = false)
00256       {
00257   applyTimer_.start(true);
00258   impl_.apply (apply_type, nrows, ncols_Q, Q, ldq, factor_output, 
00259          ncols_C, C, ldc, contiguous_cache_blocks);
00260   applyStats_.update (applyTimer_.stop());
00261       }
00262 
00289       void
00290       explicit_Q (const LocalOrdinal nrows,
00291       const LocalOrdinal ncols_Q_in,
00292       const Scalar Q_in[],
00293       const LocalOrdinal ldq_in,
00294       const FactorOutput& factor_output,
00295       const LocalOrdinal ncols_Q_out,
00296       Scalar Q_out[],
00297       const LocalOrdinal ldq_out,
00298       const bool contiguous_cache_blocks = false)
00299       {
00300   explicitQTimer_.start(true);
00301   impl_.explicit_Q (nrows, ncols_Q_in, Q_in, ldq_in, factor_output,
00302         ncols_Q_out, Q_out, ldq_out, contiguous_cache_blocks);
00303   explicitQStats_.update (explicitQTimer_.stop());
00304       }
00305 
00310       void
00311       Q_times_B (const LocalOrdinal nrows,
00312      const LocalOrdinal ncols,
00313      Scalar Q[],
00314      const LocalOrdinal ldq,
00315      const Scalar B[],
00316      const LocalOrdinal ldb,
00317      const bool contiguous_cache_blocks = false) const
00318       {
00319   impl_.Q_times_B (nrows, ncols, Q, ldq, B, ldb, contiguous_cache_blocks);
00320       }
00321 
00329       LocalOrdinal
00330       reveal_R_rank (const LocalOrdinal ncols,
00331          Scalar R[],
00332          const LocalOrdinal ldr,
00333          Scalar U[],
00334          const LocalOrdinal ldu,
00335          const magnitude_type tol) const 
00336       {
00337   return impl_.reveal_R_rank (ncols, R, ldr, U, ldu, tol);
00338       }
00339 
00351       LocalOrdinal
00352       reveal_rank (const LocalOrdinal nrows,
00353        const LocalOrdinal ncols,
00354        Scalar Q[],
00355        const LocalOrdinal ldq,
00356        Scalar R[],
00357        const LocalOrdinal ldr,
00358        const magnitude_type tol,
00359        const bool contiguous_cache_blocks = false)
00360       {
00361   return impl_.reveal_rank (nrows, ncols, Q, ldq, R, ldr, tol, 
00362           contiguous_cache_blocks);
00363       }
00364 
00365       double
00366       min_seq_factor_timing () const { return impl_.min_seq_factor_timing(); }
00367       double
00368       max_seq_factor_timing () const { return impl_.max_seq_factor_timing(); }
00369       double
00370       min_seq_apply_timing () const { return impl_.min_seq_apply_timing(); }
00371       double
00372       max_seq_apply_timing () const { return impl_.max_seq_apply_timing(); }
00373 
00374       void getStats (std::vector< TimeStats >& stats) {
00375   const int numStats = 5;
00376   stats.resize (numStats);
00377   stats[0] = factorStats_;
00378   stats[1] = applyStats_;
00379   stats[2] = explicitQStats_;
00380   stats[3] = cacheBlockStats_;
00381   stats[4] = unCacheBlockStats_;
00382       }
00383 
00384       void getStatsLabels (std::vector< std::string >& labels) {
00385   const int numStats = 5;
00386   labels.resize (numStats);
00387   labels[0] = factorTimer_.name();
00388   labels[1] = applyTimer_.name();
00389   labels[2] = explicitQTimer_.name();
00390   labels[3] = cacheBlockTimer_.name();
00391   labels[4] = unCacheBlockTimer_.name();
00392       }
00393 
00394     }; // class TbbTsqr
00395 
00396   } // namespace TBB
00397 } // namespace TSQR
00398 
00399 #endif // __TSQR_TbbTsqr_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends