Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_ExplicitQTask.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef __TSQR_TBB_ExplicitQTask_hpp
00043 #define __TSQR_TBB_ExplicitQTask_hpp
00044 
00045 #include <tbb/task.h>
00046 #include <TbbTsqr_Partitioner.hpp>
00047 #include <Tsqr_SequentialTsqr.hpp>
00048 
00051 
00052 namespace TSQR {
00053   namespace TBB {
00054 
00058     template< class LocalOrdinal, class Scalar >
00059     class ExplicitQTask : public tbb::task {
00060     private:
00061       typedef MatView<LocalOrdinal, Scalar> mat_view;
00062       typedef ConstMatView<LocalOrdinal, Scalar> const_mat_view;
00063       typedef std::pair<mat_view, mat_view> split_t;
00064       typedef std::pair<const_mat_view, const_mat_view> const_split_t;
00065 
00066     public:
00069       ExplicitQTask (const size_t P_first__, 
00070          const size_t P_last__,
00071          MatView<LocalOrdinal, Scalar> Q_out,
00072          const SequentialTsqr<LocalOrdinal, Scalar>& seq,
00073          const bool contiguous_cache_blocks) :
00074   P_first_ (P_first__), P_last_ (P_last__), Q_out_ (Q_out),
00075   seq_ (seq), contiguous_cache_blocks_ (contiguous_cache_blocks)
00076       {}
00077 
00078       tbb::task* execute () 
00079       {
00080   if (P_first_ > P_last_ || Q_out_.empty())
00081     return NULL;
00082   else if (P_first_ == P_last_)
00083     {
00084       execute_base_case ();
00085       return NULL;
00086     }
00087   else
00088     {
00089       // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00090       const size_t P_mid = (P_first_ + P_last_) / 2;
00091       split_t Q_split = 
00092         partitioner_.split (Q_out_, P_first_, P_mid, P_last_,
00093           contiguous_cache_blocks_);
00094       // The partitioner may decide that the current block Q_out
00095       // has too few rows to be worth splitting.  In that case,
00096       // Q_split.second (the bottom block) will be empty.  We
00097       // can deal with this by treating it as the base case.
00098       if (Q_split.second.empty() || Q_split.second.nrows() == 0)
00099         {
00100     execute_base_case ();
00101     return NULL;
00102         }
00103 
00104       // "c": continuation task
00105       tbb::empty_task& c = 
00106         *new( allocate_continuation() ) tbb::empty_task;
00107       // Recurse on the split
00108       ExplicitQTask& topTask = *new( c.allocate_child() )
00109         ExplicitQTask (P_first_, P_mid, Q_split.first, seq_, 
00110            contiguous_cache_blocks_);
00111       ExplicitQTask& botTask = *new( c.allocate_child() )
00112         ExplicitQTask (P_mid+1, P_last_, Q_split.second, seq_, 
00113            contiguous_cache_blocks_);
00114       // Set reference count of parent (in this case, the
00115       // continuation task) to 2 (since 2 children -- no
00116       // additional task since no waiting).
00117       c.set_ref_count (2);
00118       c.spawn (botTask);
00119       return &topTask; // scheduler bypass optimization
00120     }
00121       }
00122 
00123     private:
00124       size_t P_first_, P_last_;
00125       mat_view Q_out_;
00126       SequentialTsqr<LocalOrdinal, Scalar> seq_;
00127       Partitioner<LocalOrdinal, Scalar> partitioner_;
00128       bool contiguous_cache_blocks_;
00129 
00130       void
00131       execute_base_case ()
00132       {
00133   // Fill my partition with zeros.
00134   seq_.fill_with_zeros (Q_out_.nrows(), Q_out_.ncols(), Q_out_.get(), 
00135             Q_out_.lda(), contiguous_cache_blocks_);
00136   // If our partition is the first (topmost), fill it with
00137   // the first Q_out.ncols() columns of the identity matrix.
00138   if (P_first_ == 0)
00139     {
00140       // Fetch the topmost cache block of my partition.  Its
00141       // leading dimension should be set correctly by
00142       // top_block().
00143       mat_view Q_out_top = 
00144         seq_.top_block (Q_out_, contiguous_cache_blocks_);
00145       // Set the top block of Q_out to the first ncols
00146       // columns of the identity matrix.
00147       for (LocalOrdinal j = 0; j < Q_out_top.ncols(); ++j)
00148         Q_out_top(j,j) = Scalar(1);
00149     }
00150       }
00151     };
00152 
00153   } // namespace TBB
00154 } // namespace TSQR
00155 
00156 
00157 #endif // __TSQR_TBB_ExplicitQTask_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends