Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_RevealRankTask.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef __TSQR_TBB_RevealRankTask_hpp
00043 #define __TSQR_TBB_RevealRankTask_hpp
00044 
00045 #include <tbb/task.h>
00046 #include <TbbTsqr_Partitioner.hpp>
00047 #include <Tsqr_SequentialTsqr.hpp>
00048 
00051 
00052 namespace TSQR {
00053   namespace TBB {
00054 
00064     template<class LocalOrdinal, class Scalar>
00065     class RevealRankTask : public tbb::task {
00066     public:
00067       typedef MatView<LocalOrdinal, Scalar> mat_view;
00068       typedef ConstMatView<LocalOrdinal, Scalar> const_mat_view;
00069       typedef std::pair<mat_view, mat_view> split_type;
00070       typedef SequentialTsqr<LocalOrdinal, Scalar> seq_tsqr_type;
00071 
00072       RevealRankTask (const size_t P_first, 
00073           const size_t P_last,
00074           const mat_view& Q,
00075           const const_mat_view& U,
00076           const seq_tsqr_type& seq,
00077           const bool contiguous_cache_blocks) :
00078   P_first_ (P_first), 
00079   P_last_ (P_last), 
00080   Q_ (Q),
00081   U_ (U),
00082   seq_ (seq),
00083   contiguous_cache_blocks_ (contiguous_cache_blocks)
00084       {}
00085 
00086       void 
00087       execute_base_case ()
00088       {
00089   // Use SequentialTsqr to compute Q*U for this core's local
00090   // part of Q.  The method is called "Q_times_B" so that it
00091   // doesn't suggest any orthogonality of the B input matrix,
00092   // though in this case B is U and U is orthogonal
00093   // (resp. unitary if Scalar is complex).
00094   seq_.Q_times_B (Q_.nrows(), Q_.ncols(), Q_.get(), Q_.lda(),
00095       U_.get(), U_.lda(), contiguous_cache_blocks_);
00096       }
00097 
00098       tbb::task* execute () 
00099       {
00100   using tbb::task;
00101 
00102   if (P_first_ > P_last_ || Q_.empty())
00103     return NULL; // shouldn't get here, but just in case...
00104   else if (P_first_ == P_last_)
00105     {
00106       execute_base_case ();
00107       return NULL;
00108     }
00109   else
00110     {
00111       // Recurse on two intervals: [P_first, P_mid] and
00112       // [P_mid+1, P_last]
00113       const size_t P_mid = (P_first_ + P_last_) / 2;
00114       split_type out_split = 
00115         partitioner_.split (Q_, P_first_, P_mid, P_last_, 
00116           contiguous_cache_blocks_);
00117       // The partitioner may decide that the current block Q_
00118       // has too few rows to be worth splitting.  In that case,
00119       // out_split.second (the bottom block) will be empty.  We
00120       // can deal with this by treating it as the base case.
00121       if (out_split.second.empty() || out_split.second.nrows() == 0)
00122         {
00123     execute_base_case ();
00124     return NULL;
00125         }
00126 
00127       // "c": continuation task
00128       tbb::empty_task& c = 
00129         *new( allocate_continuation() ) tbb::empty_task;
00130       // Recurse on the split
00131       RevealRankTask& topTask = *new( c.allocate_child() )
00132         RevealRankTask (P_first_, P_mid, out_split.first, U_, 
00133             seq_, contiguous_cache_blocks_);
00134       RevealRankTask& botTask = *new( c.allocate_child() )
00135         RevealRankTask (P_mid+1, P_last_, out_split.second, U_,
00136             seq_, contiguous_cache_blocks_);
00137       // Set reference count of parent (in this case, the
00138       // continuation task) to 2 (since 2 children -- no
00139       // additional task since no waiting).
00140       c.set_ref_count (2);
00141       c.spawn (botTask);
00142       return &topTask; // scheduler bypass optimization
00143     }
00144       }
00145 
00146     private:
00147       size_t P_first_, P_last_;
00148       mat_view Q_;
00149       const_mat_view U_;
00150       SequentialTsqr<LocalOrdinal, Scalar> seq_;
00151       Partitioner<LocalOrdinal, Scalar> partitioner_;
00152       bool contiguous_cache_blocks_;
00153     };
00154 
00155   } // namespace TBB
00156 } // namespace TSQR
00157 
00158 
00159 #endif // __TSQR_TBB_RevealRankTask_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends