Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_RevealRankTask.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __TSQR_TBB_RevealRankTask_hpp
00030 #define __TSQR_TBB_RevealRankTask_hpp
00031 
00032 #include <tbb/task.h>
00033 #include <TbbTsqr_Partitioner.hpp>
00034 #include <Tsqr_SequentialTsqr.hpp>
00035 
00038 
00039 namespace TSQR {
00040   namespace TBB {
00041 
00051     template<class LocalOrdinal, class Scalar>
00052     class RevealRankTask : public tbb::task {
00053     public:
00054       typedef MatView<LocalOrdinal, Scalar> mat_view;
00055       typedef ConstMatView<LocalOrdinal, Scalar> const_mat_view;
00056       typedef std::pair<mat_view, mat_view> split_type;
00057       typedef SequentialTsqr<LocalOrdinal, Scalar> seq_tsqr_type;
00058 
00059       RevealRankTask (const size_t P_first, 
00060           const size_t P_last,
00061           const mat_view& Q,
00062           const const_mat_view& U,
00063           const seq_tsqr_type& seq,
00064           const bool contiguous_cache_blocks) :
00065   P_first_ (P_first), 
00066   P_last_ (P_last), 
00067   Q_ (Q),
00068   U_ (U),
00069   seq_ (seq),
00070   contiguous_cache_blocks_ (contiguous_cache_blocks)
00071       {}
00072 
00073       void 
00074       execute_base_case ()
00075       {
00076   // Use SequentialTsqr to compute Q*U for this core's local
00077   // part of Q.  The method is called "Q_times_B" so that it
00078   // doesn't suggest any orthogonality of the B input matrix,
00079   // though in this case B is U and U is orthogonal
00080   // (resp. unitary if Scalar is complex).
00081   seq_.Q_times_B (Q_.nrows(), Q_.ncols(), Q_.get(), Q_.lda(),
00082       U_.get(), U_.lda(), contiguous_cache_blocks_);
00083       }
00084 
00085       tbb::task* execute () 
00086       {
00087   using tbb::task;
00088 
00089   if (P_first_ > P_last_ || Q_.empty())
00090     return NULL; // shouldn't get here, but just in case...
00091   else if (P_first_ == P_last_)
00092     {
00093       execute_base_case ();
00094       return NULL;
00095     }
00096   else
00097     {
00098       // Recurse on two intervals: [P_first, P_mid] and
00099       // [P_mid+1, P_last]
00100       const size_t P_mid = (P_first_ + P_last_) / 2;
00101       split_type out_split = 
00102         partitioner_.split (Q_, P_first_, P_mid, P_last_, 
00103           contiguous_cache_blocks_);
00104       // The partitioner may decide that the current block Q_
00105       // has too few rows to be worth splitting.  In that case,
00106       // out_split.second (the bottom block) will be empty.  We
00107       // can deal with this by treating it as the base case.
00108       if (out_split.second.empty() || out_split.second.nrows() == 0)
00109         {
00110     execute_base_case ();
00111     return NULL;
00112         }
00113 
00114       // "c": continuation task
00115       tbb::empty_task& c = 
00116         *new( allocate_continuation() ) tbb::empty_task;
00117       // Recurse on the split
00118       RevealRankTask& topTask = *new( c.allocate_child() )
00119         RevealRankTask (P_first_, P_mid, out_split.first, U_, 
00120             seq_, contiguous_cache_blocks_);
00121       RevealRankTask& botTask = *new( c.allocate_child() )
00122         RevealRankTask (P_mid+1, P_last_, out_split.second, U_,
00123             seq_, contiguous_cache_blocks_);
00124       // Set reference count of parent (in this case, the
00125       // continuation task) to 2 (since 2 children -- no
00126       // additional task since no waiting).
00127       c.set_ref_count (2);
00128       c.spawn (botTask);
00129       return &topTask; // scheduler bypass optimization
00130     }
00131       }
00132 
00133     private:
00134       size_t P_first_, P_last_;
00135       mat_view Q_;
00136       const_mat_view U_;
00137       SequentialTsqr<LocalOrdinal, Scalar> seq_;
00138       Partitioner<LocalOrdinal, Scalar> partitioner_;
00139       bool contiguous_cache_blocks_;
00140     };
00141 
00142   } // namespace TBB
00143 } // namespace TSQR
00144 
00145 
00146 #endif // __TSQR_TBB_RevealRankTask_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends