Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_TbbRecursiveTsqr.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __TSQR_TbbRecursiveTsqr_hpp
00030 #define __TSQR_TbbRecursiveTsqr_hpp
00031 
00032 #include <Tsqr_ApplyType.hpp>
00033 #include <Tsqr_CacheBlocker.hpp>
00034 #include <Tsqr_SequentialTsqr.hpp>
00035 #include <TbbTsqr_Partitioner.hpp>
00036 
00037 #include <stdexcept>
00038 #include <string>
00039 #include <utility> // std::pair
00040 #include <vector>
00041 
00044 
00045 namespace TSQR {
00046   namespace TBB {
00047 
00051     template< class LocalOrdinal, class Scalar >
00052     class TbbRecursiveTsqr {
00053     public:
00063       TbbRecursiveTsqr (const size_t num_cores = 1,
00064       const size_t cache_size_hint = 0);
00065 
00069       size_t ncores() const { return ncores_; }
00070 
00075       size_t TEUCHOS_DEPRECATED cache_block_size() const { 
00076   return seq_.cache_size_hint(); 
00077       }
00078 
00080       size_t cache_size_hint() const { return seq_.cache_size_hint(); }
00081 
00083       typedef typename SequentialTsqr<LocalOrdinal, Scalar>::FactorOutput SeqOutput;
00084 
00089       typedef std::vector<std::vector<Scalar> > ParOutput;
00090 
00097       typedef typename std::pair<std::vector<SeqOutput>, ParOutput> FactorOutput;
00098 
00102       void
00103       cache_block (const LocalOrdinal nrows,
00104        const LocalOrdinal ncols, 
00105        Scalar A_out[],
00106        const Scalar A_in[],
00107        const LocalOrdinal lda_in) const;
00108 
00113       void
00114       un_cache_block (const LocalOrdinal nrows,
00115           const LocalOrdinal ncols,
00116           Scalar A_out[],
00117           const LocalOrdinal lda_out,       
00118           const Scalar A_in[]) const;
00119 
00126       FactorOutput 
00127       factor (const LocalOrdinal nrows,
00128         const LocalOrdinal ncols, 
00129         Scalar A[],
00130         const LocalOrdinal lda,
00131         Scalar R[],
00132         const LocalOrdinal ldr,
00133         const bool contiguous_cache_blocks) const;
00134 
00138       void
00139       apply (const std::string& op,
00140        const LocalOrdinal nrows,
00141        const LocalOrdinal ncols_C,
00142        Scalar C[],
00143        const LocalOrdinal ldc,
00144        const LocalOrdinal ncols_Q,
00145        const Scalar Q[],
00146        const LocalOrdinal ldq,
00147        const FactorOutput& factor_output,
00148        const bool contiguous_cache_blocks) const;
00149 
00152       void 
00153       explicit_Q (const LocalOrdinal nrows,
00154       const LocalOrdinal ncols_Q_in,
00155       const Scalar Q_in[],
00156       const LocalOrdinal ldq_in,
00157       const LocalOrdinal ncols_Q_out,
00158       Scalar Q_out[],
00159       const LocalOrdinal ldq_out,
00160       const FactorOutput& factor_output,
00161       const bool contiguous_cache_blocks) const;
00162 
00163     private:
00164       size_t ncores_;
00165       TSQR::SequentialTsqr<LocalOrdinal, Scalar> seq_;
00166       Partitioner<LocalOrdinal, Scalar> partitioner_;
00167 
00168       typedef MatView<LocalOrdinal, Scalar> mat_view;
00169       typedef ConstMatView<LocalOrdinal, Scalar> const_mat_view;
00170       typedef std::pair<const_mat_view, const_mat_view> const_split_t;
00171       typedef std::pair<mat_view, mat_view> split_t;
00172       typedef std::pair<const_mat_view, mat_view> top_blocks_t;
00173       typedef std::vector<top_blocks_t> array_top_blocks_t;
00174 
00175       void
00176       explicit_Q_helper (const size_t P_first, 
00177        const size_t P_last,
00178        MatView< LocalOrdinal, Scalar >& Q_out,
00179        const bool contiguous_cache_blocks) const;
00180 
00183       MatView<LocalOrdinal, Scalar>
00184       factor_helper (const size_t P_first, 
00185          const size_t P_last,
00186          const size_t depth,
00187          MatView< LocalOrdinal, Scalar > A,
00188          std::vector< SeqOutput >& seq_outputs,
00189          ParOutput& par_outputs,
00190          Scalar R[],
00191          const LocalOrdinal ldr,
00192          const bool contiguous_cache_blocks) const;
00193 
00194       bool
00195       apply_helper_empty (const size_t P_first,
00196         const size_t P_last,
00197         const_mat_view &Q,
00198         mat_view& C) const;
00199 
00203       void
00204       build_partition_array (const size_t P_first,
00205            const size_t P_last,
00206            array_top_blocks_t& top_blocks,
00207            const_mat_view& Q,
00208            mat_view& C,
00209            const bool contiguous_cache_blocks) const;
00210 
00213       void
00214       apply_helper (const size_t P_first, 
00215         const size_t P_last,
00216         const_mat_view Q,
00217         mat_view C,
00218         array_top_blocks_t& top_blocks, 
00219         const FactorOutput& factor_output,
00220         const bool contiguous_cache_blocks) const;
00221 
00225       std::pair< ConstMatView< LocalOrdinal, Scalar >, MatView< LocalOrdinal, Scalar > >
00226       apply_transpose_helper (const std::string& op,
00227             const size_t P_first, 
00228             const size_t P_last,
00229             const_mat_view Q,
00230             mat_view C,
00231             const FactorOutput& factor_output,
00232             const bool contiguous_cache_blocks) const;
00233 
00234       void 
00235       factor_pair (const size_t P_top,
00236        const size_t P_bot,
00237        mat_view& A_top,
00238        mat_view& A_bot,
00239        std::vector< std::vector< Scalar > >& par_outputs,
00240        const bool contiguous_cache_blocks) const;
00241 
00242       void
00243       apply_pair (const std::string& trans,
00244       const size_t P_top,
00245       const size_t P_bot,
00246       const_mat_view& Q_bot,
00247       const std::vector< std::vector< Scalar > >& tau_arrays,
00248       mat_view& C_top,
00249       mat_view& C_bot,
00250       const bool contiguous_cache_blocks) const;
00251 
00252       void 
00253       cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00254         ConstMatView< LocalOrdinal, Scalar >& A_in,
00255         const size_t P_first,
00256         const size_t P_last) const;
00257 
00258       void 
00259       un_cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00260            const ConstMatView< LocalOrdinal, Scalar >& A_in,
00261            const size_t P_first,
00262            const size_t P_last) const;
00263 
00264     }; // class TbbRecursiveTsqr
00265   } // namespace TBB
00266 } // namespace TSQR
00267 
00268 #include <TSQR/TBB/TbbRecursiveTsqr_Def.hpp>
00269 
00270 #endif // __TSQR_TbbRecursiveTsqr_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends