Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_UnCacheBlockTask.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __TSQR_TBB_UnCacheBlockTask_hpp
00030 #define __TSQR_TBB_UnCacheBlockTask_hpp
00031 
00032 #include <tbb/task.h>
00033 #include <TbbTsqr_Partitioner.hpp>
00034 #include <Tsqr_SequentialTsqr.hpp>
00035 
00038 
00039 namespace TSQR {
00040   namespace TBB {
00041     
00049     template<class LocalOrdinal, class Scalar>
00050     class UnCacheBlockTask : public tbb::task {
00051     public:
00052       typedef MatView< LocalOrdinal, Scalar > mat_view;
00053       typedef ConstMatView< LocalOrdinal, Scalar > const_mat_view;
00054       typedef std::pair< mat_view, mat_view > split_t;
00055       typedef std::pair< const_mat_view, const_mat_view > const_split_t;
00056 
00057       UnCacheBlockTask (const size_t P_first__, 
00058       const size_t P_last__,
00059       mat_view& A_out,
00060       const_mat_view& A_in,
00061       const SequentialTsqr<LocalOrdinal, Scalar>& seq) :
00062   P_first_ (P_first__), 
00063   P_last_ (P_last__), 
00064   A_out_ (A_out), 
00065   A_in_ (A_in), 
00066   seq_ (seq)
00067       {}
00068 
00069       tbb::task* execute () 
00070       {
00071   using tbb::task;
00072 
00073   if (P_first_ > P_last_ || A_out_.empty() || A_in_.empty())
00074     return NULL;
00075   else if (P_first_ == P_last_)
00076     {
00077       execute_base_case ();
00078       return NULL;
00079     }
00080   else
00081     {
00082       // Recurse on two intervals: [P_first, P_mid] and
00083       // [P_mid+1, P_last].
00084       const size_t P_mid = (P_first_ + P_last_) / 2;
00085       split_t out_split = 
00086         partitioner_.split (A_out_, P_first_, P_mid, P_last_, false);
00087       const_split_t in_split = 
00088         partitioner_.split (A_in_, P_first_, P_mid, P_last_, true);
00089 
00090       // The partitioner may decide that the current blocks
00091       // A_out_ and A_in_ have too few rows to be worth
00092       // splitting.  (It should split both A_out_ and A_in_ in
00093       // the same way.)  In that case, out_split.second and
00094       // in_split.second (the bottom block) will be empty.  We
00095       // can deal with this by treating it as the base case.
00096       if (out_split.second.empty() || out_split.second.nrows() == 0)
00097         {
00098     execute_base_case ();
00099     return NULL;
00100         }
00101 
00102       // "c": continuation task
00103       tbb::empty_task& c = 
00104         *new( allocate_continuation() ) tbb::empty_task;
00105       // Recurse on the split
00106       UnCacheBlockTask& topTask = *new( c.allocate_child() )
00107         UnCacheBlockTask (P_first_, P_mid, out_split.first, 
00108             in_split.first, seq_);
00109       UnCacheBlockTask& botTask = *new( c.allocate_child() )
00110         UnCacheBlockTask (P_mid+1, P_last_, out_split.second, 
00111             in_split.second, seq_);
00112       // Set reference count of parent (in this case, the
00113       // continuation task) to 2 (since 2 children -- no
00114       // additional task since no waiting).
00115       c.set_ref_count (2);
00116       c.spawn (botTask);
00117       return &topTask; // scheduler bypass optimization
00118     }
00119       }
00120 
00121     private:
00122       size_t P_first_, P_last_;
00123       mat_view A_out_;
00124       const_mat_view A_in_;
00125       SequentialTsqr<LocalOrdinal, Scalar> seq_;
00126       Partitioner<LocalOrdinal, Scalar> partitioner_;
00127 
00128       void
00129       execute_base_case ()
00130       {
00131   seq_.un_cache_block (A_out_.nrows(), A_out_.ncols(), 
00132            A_out_.get(), A_out_.lda(), A_in_.get());
00133       }
00134     };
00135 
00136   } // namespace TBB
00137 } // namespace TSQR
00138 
00139 
00140 #endif // __TSQR_TBB_UnCacheBlockTask_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends