Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00045 #ifndef __TSQR_TbbTsqr_hpp
00046 #define __TSQR_TbbTsqr_hpp
00047 
00048 #include <TbbTsqr_TbbParallelTsqr.hpp>
00049 #include <Tsqr_TimeStats.hpp>
00050 #include <Teuchos_ParameterList.hpp>
00051 #include <Teuchos_ParameterListExceptions.hpp>
00052 #include <Teuchos_Time.hpp>
00053 // #include <TbbRecursiveTsqr.hpp>
00054 
00055 #include <stdexcept>
00056 #include <string>
00057 #include <utility> // std::pair
00058 #include <vector>
00059 
00060 
00061 namespace TSQR {
00062   namespace TBB {
00063 
00077     template< class LocalOrdinal, class Scalar, class TimerType = Teuchos::Time >
00078     class TbbTsqr : public Teuchos::Describable {
00079     private:
00094       TbbParallelTsqr<LocalOrdinal, Scalar, TimerType> impl_;
00095 
00096       // Collected running statistcs on various computations
00097       mutable TimeStats factorStats_;
00098       mutable TimeStats applyStats_;
00099       mutable TimeStats explicitQStats_;
00100       mutable TimeStats cacheBlockStats_;
00101       mutable TimeStats unCacheBlockStats_;
00102 
00103       // Timers for various computations
00104       mutable TimerType factorTimer_;
00105       mutable TimerType applyTimer_;
00106       mutable TimerType explicitQTimer_;
00107       mutable TimerType cacheBlockTimer_;
00108       mutable TimerType unCacheBlockTimer_;
00109 
00110     public:
00111       typedef Scalar scalar_type;
00112       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00113       typedef LocalOrdinal ordinal_type;
00114 
00122       typedef typename TbbParallelTsqr<LocalOrdinal, Scalar, TimerType>::FactorOutput FactorOutput;
00123 
00137       TbbTsqr (const size_t numCores,
00138          const size_t cacheSizeHint = 0) :
00139   impl_ (numCores, cacheSizeHint),
00140   factorTimer_ ("TbbTsqr::factor"),
00141   applyTimer_ ("TbbTsqr::apply"),
00142   explicitQTimer_ ("TbbTsqr::explicit_Q"),
00143   cacheBlockTimer_ ("TbbTsqr::cache_block"),
00144   unCacheBlockTimer_ ("TbbTsqr::un_cache_block")
00145       {}
00146 
00155       TbbTsqr (const Teuchos::RCP<Teuchos::ParameterList>& plist) :
00156   impl_ (plist),
00157   factorTimer_ ("TbbTsqr::factor"),
00158   applyTimer_ ("TbbTsqr::apply"),
00159   explicitQTimer_ ("TbbTsqr::explicit_Q"),
00160   cacheBlockTimer_ ("TbbTsqr::cache_block"),
00161   unCacheBlockTimer_ ("TbbTsqr::un_cache_block")
00162       {}
00163 
00172       TbbTsqr () :
00173   impl_ (Teuchos::null),
00174   factorTimer_ ("TbbTsqr::factor"),
00175   applyTimer_ ("TbbTsqr::apply"),
00176   explicitQTimer_ ("TbbTsqr::explicit_Q"),
00177   cacheBlockTimer_ ("TbbTsqr::cache_block"),
00178   unCacheBlockTimer_ ("TbbTsqr::un_cache_block")
00179       {}
00180       
00181       Teuchos::RCP<const Teuchos::ParameterList>
00182       getValidParameters () const
00183       {
00184   return impl_.getValidParameters ();
00185       }
00186 
00187       void 
00188       setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& plist)
00189       {
00190   impl_.setParameterList (plist);
00191       }
00192 
00197       size_t ntasks() const { return impl_.ntasks(); }
00198 
00206       size_t TEUCHOS_DEPRECATED ncores() const { return impl_.ntasks(); }
00207 
00209       size_t cache_size_hint() const { return impl_.cache_size_hint(); }
00210 
00215       size_t TEUCHOS_DEPRECATED cache_block_size() const { 
00216   return impl_.cache_size_hint(); 
00217       }
00218 
00221       static bool QR_produces_R_factor_with_nonnegative_diagonal() {
00222   typedef TbbParallelTsqr< LocalOrdinal, Scalar, TimerType > impl_type;
00223   return impl_type::QR_produces_R_factor_with_nonnegative_diagonal();
00224       }
00225 
00227       bool ready() const {
00228   return true;
00229       }
00230 
00236       std::string description () const {
00237   using std::endl;
00238 
00239   // SequentialTsqr also implements Describable, so if you
00240   // decide to implement describe(), you could call
00241   // SequentialTsqr's describe() and get a nice hierarchy of
00242   // descriptions.
00243   std::ostringstream os;
00244   os << "Intranode Tall Skinny QR (TSQR): "
00245      << "Intel Threading Building Blocks (TBB) implementation"
00246      << ", max " << ntasks() << "-way parallelism"
00247      << ", cache size hint of " << cache_size_hint() << " bytes.";
00248   return os.str();
00249       }
00250 
00251       void
00252       cache_block (const LocalOrdinal nrows,
00253        const LocalOrdinal ncols, 
00254        Scalar A_out[],
00255        const Scalar A_in[],
00256        const LocalOrdinal lda_in) const
00257       {
00258   cacheBlockTimer_.start(true);
00259   impl_.cache_block (nrows, ncols, A_out, A_in, lda_in);
00260   cacheBlockStats_.update (cacheBlockTimer_.stop());
00261       }
00262 
00263       void
00264       un_cache_block (const LocalOrdinal nrows,
00265           const LocalOrdinal ncols,
00266           Scalar A_out[],
00267           const LocalOrdinal lda_out,       
00268           const Scalar A_in[]) const
00269       {
00270   unCacheBlockTimer_.start(true);
00271   impl_.un_cache_block (nrows, ncols, A_out, lda_out, A_in);
00272   unCacheBlockStats_.update (unCacheBlockTimer_.stop());
00273       }
00274 
00275       void
00276       fill_with_zeros (const LocalOrdinal nrows,
00277            const LocalOrdinal ncols,
00278            Scalar C[],
00279            const LocalOrdinal ldc, 
00280            const bool contiguous_cache_blocks) const
00281       {
00282   impl_.fill_with_zeros (nrows, ncols, C, ldc, contiguous_cache_blocks);
00283       }
00284 
00285       template< class MatrixViewType >
00286       MatrixViewType
00287       top_block (const MatrixViewType& C, 
00288      const bool contiguous_cache_blocks) const
00289       {
00290   return impl_.top_block (C, contiguous_cache_blocks);
00291       }
00292 
00328       FactorOutput
00329       factor (const LocalOrdinal nrows,
00330         const LocalOrdinal ncols, 
00331         Scalar A[],
00332         const LocalOrdinal lda,
00333         Scalar R[],
00334         const LocalOrdinal ldr,
00335         const bool contiguous_cache_blocks) const
00336       {
00337   factorTimer_.start(true);
00338   return impl_.factor (nrows, ncols, A, lda, R, ldr, contiguous_cache_blocks);
00339   factorStats_.update (factorTimer_.stop());
00340       }
00341 
00375       void
00376       apply (const ApplyType& apply_type,
00377        const LocalOrdinal nrows,
00378        const LocalOrdinal ncols_Q,
00379        const Scalar Q[],
00380        const LocalOrdinal ldq,
00381        const FactorOutput& factor_output,
00382        const LocalOrdinal ncols_C,
00383        Scalar C[],
00384        const LocalOrdinal ldc,
00385        const bool contiguous_cache_blocks) const
00386       {
00387   applyTimer_.start(true);
00388   impl_.apply (apply_type, nrows, ncols_Q, Q, ldq, factor_output, 
00389          ncols_C, C, ldc, contiguous_cache_blocks);
00390   applyStats_.update (applyTimer_.stop());
00391       }
00392 
00419       void
00420       explicit_Q (const LocalOrdinal nrows,
00421       const LocalOrdinal ncols_Q_in,
00422       const Scalar Q_in[],
00423       const LocalOrdinal ldq_in,
00424       const FactorOutput& factor_output,
00425       const LocalOrdinal ncols_Q_out,
00426       Scalar Q_out[],
00427       const LocalOrdinal ldq_out,
00428       const bool contiguous_cache_blocks) const
00429       {
00430   explicitQTimer_.start(true);
00431   impl_.explicit_Q (nrows, ncols_Q_in, Q_in, ldq_in, factor_output,
00432         ncols_Q_out, Q_out, ldq_out, contiguous_cache_blocks);
00433   explicitQStats_.update (explicitQTimer_.stop());
00434       }
00435 
00440       void
00441       Q_times_B (const LocalOrdinal nrows,
00442      const LocalOrdinal ncols,
00443      Scalar Q[],
00444      const LocalOrdinal ldq,
00445      const Scalar B[],
00446      const LocalOrdinal ldb,
00447      const bool contiguous_cache_blocks) const
00448       {
00449   impl_.Q_times_B (nrows, ncols, Q, ldq, B, ldb, contiguous_cache_blocks);
00450       }
00451 
00459       LocalOrdinal
00460       reveal_R_rank (const LocalOrdinal ncols,
00461          Scalar R[],
00462          const LocalOrdinal ldr,
00463          Scalar U[],
00464          const LocalOrdinal ldu,
00465          const magnitude_type tol) const 
00466       {
00467   return impl_.reveal_R_rank (ncols, R, ldr, U, ldu, tol);
00468       }
00469 
00481       LocalOrdinal
00482       reveal_rank (const LocalOrdinal nrows,
00483        const LocalOrdinal ncols,
00484        Scalar Q[],
00485        const LocalOrdinal ldq,
00486        Scalar R[],
00487        const LocalOrdinal ldr,
00488        const magnitude_type tol,
00489        const bool contiguous_cache_blocks) const
00490       {
00491   return impl_.reveal_rank (nrows, ncols, Q, ldq, R, ldr, tol, 
00492           contiguous_cache_blocks);
00493       }
00494 
00495       double
00496       min_seq_factor_timing () const { return impl_.min_seq_factor_timing(); }
00497       double
00498       max_seq_factor_timing () const { return impl_.max_seq_factor_timing(); }
00499       double
00500       min_seq_apply_timing () const { return impl_.min_seq_apply_timing(); }
00501       double
00502       max_seq_apply_timing () const { return impl_.max_seq_apply_timing(); }
00503 
00504       void getStats (std::vector< TimeStats >& stats) {
00505   const int numStats = 5;
00506   stats.resize (numStats);
00507   stats[0] = factorStats_;
00508   stats[1] = applyStats_;
00509   stats[2] = explicitQStats_;
00510   stats[3] = cacheBlockStats_;
00511   stats[4] = unCacheBlockStats_;
00512       }
00513 
00514       void getStatsLabels (std::vector< std::string >& labels) {
00515   const int numStats = 5;
00516   labels.resize (numStats);
00517   labels[0] = factorTimer_.name();
00518   labels[1] = applyTimer_.name();
00519   labels[2] = explicitQTimer_.name();
00520   labels[3] = cacheBlockTimer_.name();
00521   labels[4] = unCacheBlockTimer_.name();
00522       }
00523     }; // class TbbTsqr
00524 
00525   } // namespace TBB
00526 } // namespace TSQR
00527 
00528 #endif // __TSQR_TbbTsqr_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends