Anasazi Version of the Day
TbbTsqr_TbbRecursiveTsqr.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_TbbRecursiveTsqr_hpp
00030 #define __TSQR_TbbRecursiveTsqr_hpp
00031 
00032 #include <Tsqr_ApplyType.hpp>
00033 #include <Tsqr_CacheBlocker.hpp>
00034 #include <Tsqr_SequentialTsqr.hpp>
00035 #include <TbbTsqr_Partitioner.hpp>
00036 
00037 #include <stdexcept>
00038 #include <string>
00039 #include <utility> // std::pair
00040 #include <vector>
00041 
00044 
00045 namespace TSQR {
00046   namespace TBB {
00047 
00048     template< class LocalOrdinal, class Scalar >
00049     class TbbRecursiveTsqr {
00050     public:
00057       TbbRecursiveTsqr (const size_t num_cores = 1,
00058       const size_t cache_block_size = 0);
00059 
00063       size_t ncores() const { return ncores_; }
00064 
00066       size_t cache_block_size() const { return seq_.cache_block_size(); }
00067 
00069       typedef typename SequentialTsqr< LocalOrdinal, Scalar >::FactorOutput SeqOutput;
00072       typedef std::vector< std::vector< Scalar > > ParOutput;
00076       typedef typename std::pair< std::vector< SeqOutput >, ParOutput > FactorOutput;
00077 
00081       void
00082       cache_block (const LocalOrdinal nrows,
00083        const LocalOrdinal ncols, 
00084        Scalar A_out[],
00085        const Scalar A_in[],
00086        const LocalOrdinal lda_in) const;
00087 
00092       void
00093       un_cache_block (const LocalOrdinal nrows,
00094           const LocalOrdinal ncols,
00095           Scalar A_out[],
00096           const LocalOrdinal lda_out,       
00097           const Scalar A_in[]) const;
00098 
00105       FactorOutput 
00106       factor (const LocalOrdinal nrows,
00107         const LocalOrdinal ncols, 
00108         Scalar A[],
00109         const LocalOrdinal lda,
00110         Scalar R[],
00111         const LocalOrdinal ldr,
00112         const bool contiguous_cache_blocks = false);
00113 
00117       void
00118       apply (const std::string& op,
00119        const LocalOrdinal nrows,
00120        const LocalOrdinal ncols_C,
00121        Scalar C[],
00122        const LocalOrdinal ldc,
00123        const LocalOrdinal ncols_Q,
00124        const Scalar Q[],
00125        const LocalOrdinal ldq,
00126        const FactorOutput& factor_output,
00127        const bool contiguous_cache_blocks = false);
00128 
00131       void 
00132       explicit_Q (const LocalOrdinal nrows,
00133       const LocalOrdinal ncols_Q_in,
00134       const Scalar Q_in[],
00135       const LocalOrdinal ldq_in,
00136       const LocalOrdinal ncols_Q_out,
00137       Scalar Q_out[],
00138       const LocalOrdinal ldq_out,
00139       const FactorOutput& factor_output,
00140       const bool contiguous_cache_blocks = false);
00141 
00142     private:
00143       size_t ncores_;
00144       TSQR::SequentialTsqr< LocalOrdinal, Scalar > seq_;
00145       Partitioner< LocalOrdinal, Scalar > partitioner_;
00146 
00147       typedef MatView< LocalOrdinal, Scalar > mat_view;
00148       typedef ConstMatView< LocalOrdinal, Scalar > const_mat_view;
00149       typedef std::pair< const_mat_view, const_mat_view > const_split_t;
00150       typedef std::pair< mat_view, mat_view > split_t;
00151       typedef std::pair< const_mat_view, mat_view > top_blocks_t;
00152       typedef std::vector< top_blocks_t > array_top_blocks_t;
00153 
00154       void
00155       explicit_Q_helper (const size_t P_first, 
00156        const size_t P_last,
00157        MatView< LocalOrdinal, Scalar >& Q_out,
00158        const bool contiguous_cache_blocks);
00159 
00162       MatView< LocalOrdinal, Scalar >
00163       factor_helper (const size_t P_first, 
00164          const size_t P_last,
00165          const size_t depth,
00166          MatView< LocalOrdinal, Scalar > A,
00167          std::vector< SeqOutput >& seq_outputs,
00168          ParOutput& par_outputs,
00169          Scalar R[],
00170          const LocalOrdinal ldr,
00171          const bool contiguous_cache_blocks);
00172 
00173       bool
00174       apply_helper_empty (const size_t P_first,
00175         const size_t P_last,
00176         const_mat_view &Q,
00177         mat_view& C) const;
00178 
00182       void
00183       build_partition_array (const size_t P_first,
00184            const size_t P_last,
00185            array_top_blocks_t& top_blocks,
00186            const_mat_view& Q,
00187            mat_view& C,
00188            const bool contiguous_cache_blocks) const;
00189 
00192       void
00193       apply_helper (const size_t P_first, 
00194         const size_t P_last,
00195         const_mat_view Q,
00196         mat_view C,
00197         array_top_blocks_t& top_blocks, 
00198         const FactorOutput& factor_output,
00199         const bool contiguous_cache_blocks);
00200 
00204       std::pair< ConstMatView< LocalOrdinal, Scalar >, MatView< LocalOrdinal, Scalar > >
00205       apply_transpose_helper (const std::string& op,
00206             const size_t P_first, 
00207             const size_t P_last,
00208             const_mat_view Q,
00209             mat_view C,
00210             const FactorOutput& factor_output,
00211             const bool contiguous_cache_blocks);
00212 
00213       void 
00214       factor_pair (const size_t P_top,
00215        const size_t P_bot,
00216        mat_view& A_top,
00217        mat_view& A_bot,
00218        std::vector< std::vector< Scalar > >& par_outputs,
00219        const bool contiguous_cache_blocks);
00220 
00221       void
00222       apply_pair (const std::string& trans,
00223       const size_t P_top,
00224       const size_t P_bot,
00225       const_mat_view& Q_bot,
00226       const std::vector< std::vector< Scalar > >& tau_arrays,
00227       mat_view& C_top,
00228       mat_view& C_bot,
00229       const bool contiguous_cache_blocks);
00230 
00231       void 
00232       cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00233         ConstMatView< LocalOrdinal, Scalar >& A_in,
00234         const size_t P_first,
00235         const size_t P_last) const;
00236 
00237       void 
00238       un_cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00239            const ConstMatView< LocalOrdinal, Scalar >& A_in,
00240            const size_t P_first,
00241            const size_t P_last) const;
00242 
00243     }; // class TbbRecursiveTsqr
00244   } // namespace TBB
00245 } // namespace TSQR
00246 
00247 #include <TSQR/TBB/TbbRecursiveTsqr_Def.hpp>
00248 
00249 #endif // __TSQR_TbbRecursiveTsqr_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends