Anasazi Version of the Day
TbbTsqr_ApplyTask.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_TBB_ApplyTask_hpp
00030 #define __TSQR_TBB_ApplyTask_hpp
00031 
00032 #include <tbb/task.h>
00033 #include <TbbTsqr_Partitioner.hpp>
00034 #include <Tsqr_SequentialTsqr.hpp>
00035 
00038 
00039 namespace TSQR {
00040   namespace TBB {
00041     
00042     template< class LocalOrdinal, class Scalar, class TimerType >
00043     class ApplyTask : public tbb::task {
00044     public:
00045       typedef MatView< LocalOrdinal, Scalar > mat_view;
00046       typedef ConstMatView< LocalOrdinal, Scalar > const_mat_view;
00047       typedef std::pair< mat_view, mat_view > split_t;
00048       typedef std::pair< const_mat_view, const_mat_view > const_split_t;
00049       typedef std::pair< const_mat_view, mat_view > top_blocks_t;
00050       typedef std::vector< top_blocks_t > array_top_blocks_t;
00051 
00053       typedef typename SequentialTsqr< LocalOrdinal, Scalar >::FactorOutput SeqOutput;
00056       typedef std::vector< std::vector< Scalar > > ParOutput;
00060       typedef typename std::pair< std::vector< SeqOutput >, ParOutput > FactorOutput;
00061 
00062       ApplyTask (const size_t P_first__, 
00063      const size_t P_last__,
00064      ConstMatView< LocalOrdinal, Scalar > Q,
00065      MatView< LocalOrdinal, Scalar > C,
00066      array_top_blocks_t& top_blocks, 
00067      const FactorOutput& factor_output,
00068      const SequentialTsqr< LocalOrdinal, Scalar >& seq,
00069      double& my_seq_timing,
00070      double& min_seq_timing,
00071      double& max_seq_timing,
00072      const bool contiguous_cache_blocks) :
00073   P_first_ (P_first__), 
00074   P_last_ (P_last__), 
00075   Q_ (Q), 
00076   C_ (C),
00077   top_blocks_ (top_blocks), 
00078   factor_output_ (factor_output), 
00079   seq_ (seq), 
00080   apply_type_ (ApplyType::NoTranspose), // FIXME: modify to support Q^T and Q^H
00081   my_seq_timing_ (my_seq_timing),
00082   min_seq_timing_ (min_seq_timing),
00083   max_seq_timing_ (max_seq_timing),
00084   contiguous_cache_blocks_ (contiguous_cache_blocks)
00085       {}
00086 
00087       tbb::task* execute () {
00088   if (P_first_ > P_last_ || Q_.empty() || C_.empty())
00089     return NULL;
00090   else if (P_first_ == P_last_)
00091     {
00092       TimerType timer("");
00093       timer.start();
00094       const std::vector< SeqOutput >& seq_outputs = factor_output_.first;
00095       seq_.apply (apply_type_, Q_.nrows(), Q_.ncols(), 
00096       Q_.get(), Q_.lda(), seq_outputs[P_first_], 
00097       C_.ncols(), C_.get(), C_.lda(), 
00098       contiguous_cache_blocks_);
00099       my_seq_timing_ = timer.stop();
00100       return NULL;
00101     }
00102   else
00103     {
00104       // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00105       const size_t P_mid = (P_first_ + P_last_) / 2;
00106       const_split_t Q_split = 
00107         partitioner_.split (Q_, P_first_, P_mid, P_last_,
00108           contiguous_cache_blocks_);
00109       split_t C_split = 
00110         partitioner_.split (C_, P_first_, P_mid, P_last_,
00111           contiguous_cache_blocks_);
00112 
00113       double top_timing;
00114       double top_min_timing = 0.0;
00115       double top_max_timing = 0.0;
00116       double bot_timing;
00117       double bot_min_timing = 0.0;
00118       double bot_max_timing = 0.0;
00119 
00120       apply_pair (P_first_, P_mid+1);
00121       ApplyTask& topTask = *new( allocate_child() )
00122         ApplyTask (P_first_, P_mid, Q_split.first, C_split.first,
00123        top_blocks_, factor_output_, seq_, 
00124        top_timing, top_min_timing, top_max_timing,
00125        contiguous_cache_blocks_);
00126       ApplyTask& botTask = *new( allocate_child() )
00127         ApplyTask (P_mid+1, P_last_, Q_split.second, C_split.second,
00128        top_blocks_, factor_output_, seq_,
00129        bot_timing, bot_min_timing, bot_max_timing,
00130        contiguous_cache_blocks_);
00131 
00132       set_ref_count (3); // 3 children (2 + 1 for the wait)
00133       spawn (topTask);
00134       spawn_and_wait_for_all (botTask);
00135 
00136       top_min_timing = (top_min_timing == 0.0) ? top_timing : top_min_timing;
00137       top_max_timing = (top_max_timing == 0.0) ? top_timing : top_max_timing;
00138 
00139       bot_min_timing = (bot_min_timing == 0.0) ? bot_timing : bot_min_timing;
00140       bot_max_timing = (bot_max_timing == 0.0) ? bot_timing : bot_max_timing;
00141 
00142       min_seq_timing_ = std::min (top_min_timing, bot_min_timing);
00143       max_seq_timing_ = std::min (top_max_timing, bot_max_timing);
00144 
00145       return NULL;
00146     }
00147       }
00148 
00149     private:
00150       size_t P_first_, P_last_;
00151       const_mat_view Q_;
00152       mat_view C_;
00153       array_top_blocks_t& top_blocks_;
00154       const FactorOutput& factor_output_;
00155       SequentialTsqr< LocalOrdinal, Scalar > seq_;
00156       TSQR::ApplyType apply_type_;
00157       TSQR::Combine< LocalOrdinal, Scalar > combine_;
00158       Partitioner< LocalOrdinal, Scalar > partitioner_;
00159       double& my_seq_timing_;
00160       double& min_seq_timing_;
00161       double& max_seq_timing_;
00162       bool contiguous_cache_blocks_;
00163 
00164       void 
00165       apply_pair (const size_t P_top, 
00166       const size_t P_bot) 
00167       {
00168   if (P_top == P_bot) 
00169     throw std::logic_error("apply_pair: should never get here!");
00170 
00171   const_mat_view& Q_bot = top_blocks_[P_bot].first;
00172   mat_view& C_top = top_blocks_[P_top].second;
00173   mat_view& C_bot = top_blocks_[P_bot].second;
00174 
00175   const ParOutput& par_output = factor_output_.second;
00176   const std::vector< Scalar >& tau = par_output[P_bot];
00177   std::vector< Scalar > work (C_top.ncols());
00178   combine_.apply_pair (apply_type_, C_top.ncols(), Q_bot.ncols(), 
00179            Q_bot.get(), Q_bot.lda(), &tau[0],
00180            C_top.get(), C_top.lda(), 
00181            C_bot.get(), C_bot.lda(), &work[0]);
00182       }
00183 
00184     };
00185 
00186   } // namespace TBB
00187 } // namespace TSQR
00188 
00189 
00190 #endif // __TSQR_TBB_ApplyTask_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends