Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_ExplicitQTask.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __TSQR_TBB_ExplicitQTask_hpp
00030 #define __TSQR_TBB_ExplicitQTask_hpp
00031 
00032 #include <tbb/task.h>
00033 #include <TbbTsqr_Partitioner.hpp>
00034 #include <Tsqr_SequentialTsqr.hpp>
00035 
00038 
00039 namespace TSQR {
00040   namespace TBB {
00041 
00045     template< class LocalOrdinal, class Scalar >
00046     class ExplicitQTask : public tbb::task {
00047     private:
00048       typedef MatView<LocalOrdinal, Scalar> mat_view;
00049       typedef ConstMatView<LocalOrdinal, Scalar> const_mat_view;
00050       typedef std::pair<mat_view, mat_view> split_t;
00051       typedef std::pair<const_mat_view, const_mat_view> const_split_t;
00052 
00053     public:
00056       ExplicitQTask (const size_t P_first__, 
00057          const size_t P_last__,
00058          MatView<LocalOrdinal, Scalar> Q_out,
00059          const SequentialTsqr<LocalOrdinal, Scalar>& seq,
00060          const bool contiguous_cache_blocks) :
00061   P_first_ (P_first__), P_last_ (P_last__), Q_out_ (Q_out),
00062   seq_ (seq), contiguous_cache_blocks_ (contiguous_cache_blocks)
00063       {}
00064 
00065       tbb::task* execute () 
00066       {
00067   if (P_first_ > P_last_ || Q_out_.empty())
00068     return NULL;
00069   else if (P_first_ == P_last_)
00070     {
00071       execute_base_case ();
00072       return NULL;
00073     }
00074   else
00075     {
00076       // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00077       const size_t P_mid = (P_first_ + P_last_) / 2;
00078       split_t Q_split = 
00079         partitioner_.split (Q_out_, P_first_, P_mid, P_last_,
00080           contiguous_cache_blocks_);
00081       // The partitioner may decide that the current block Q_out
00082       // has too few rows to be worth splitting.  In that case,
00083       // Q_split.second (the bottom block) will be empty.  We
00084       // can deal with this by treating it as the base case.
00085       if (Q_split.second.empty() || Q_split.second.nrows() == 0)
00086         {
00087     execute_base_case ();
00088     return NULL;
00089         }
00090 
00091       // "c": continuation task
00092       tbb::empty_task& c = 
00093         *new( allocate_continuation() ) tbb::empty_task;
00094       // Recurse on the split
00095       ExplicitQTask& topTask = *new( c.allocate_child() )
00096         ExplicitQTask (P_first_, P_mid, Q_split.first, seq_, 
00097            contiguous_cache_blocks_);
00098       ExplicitQTask& botTask = *new( c.allocate_child() )
00099         ExplicitQTask (P_mid+1, P_last_, Q_split.second, seq_, 
00100            contiguous_cache_blocks_);
00101       // Set reference count of parent (in this case, the
00102       // continuation task) to 2 (since 2 children -- no
00103       // additional task since no waiting).
00104       c.set_ref_count (2);
00105       c.spawn (botTask);
00106       return &topTask; // scheduler bypass optimization
00107     }
00108       }
00109 
00110     private:
00111       size_t P_first_, P_last_;
00112       mat_view Q_out_;
00113       SequentialTsqr<LocalOrdinal, Scalar> seq_;
00114       Partitioner<LocalOrdinal, Scalar> partitioner_;
00115       bool contiguous_cache_blocks_;
00116 
00117       void
00118       execute_base_case ()
00119       {
00120   // Fill my partition with zeros.
00121   seq_.fill_with_zeros (Q_out_.nrows(), Q_out_.ncols(), Q_out_.get(), 
00122             Q_out_.lda(), contiguous_cache_blocks_);
00123   // If our partition is the first (topmost), fill it with
00124   // the first Q_out.ncols() columns of the identity matrix.
00125   if (P_first_ == 0)
00126     {
00127       // Fetch the topmost cache block of my partition.  Its
00128       // leading dimension should be set correctly by
00129       // top_block().
00130       mat_view Q_out_top = 
00131         seq_.top_block (Q_out_, contiguous_cache_blocks_);
00132       // Set the top block of Q_out to the first ncols
00133       // columns of the identity matrix.
00134       for (LocalOrdinal j = 0; j < Q_out_top.ncols(); ++j)
00135         Q_out_top(j,j) = Scalar(1);
00136     }
00137       }
00138     };
00139 
00140   } // namespace TBB
00141 } // namespace TSQR
00142 
00143 
00144 #endif // __TSQR_TBB_ExplicitQTask_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends