Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_Partitioner.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __TSQR_TBB_Partitioner_hpp
00030 #define __TSQR_TBB_Partitioner_hpp
00031 
00032 #include <Tsqr_MatView.hpp>
00033 
00034 #include <cstring> // size_t
00035 #include <sstream>
00036 #include <stdexcept>
00037 #include <utility>
00038 #include <vector>
00039 
00042 
00043 namespace TSQR {
00044   namespace TBB {
00045 
00046     template< class Ordinal, class Scalar >
00047     class Partitioner {
00048     private:
00049       bool
00050       should_split (const Ordinal nrows,
00051         const Ordinal ncols,
00052         const size_t num_partitions) const
00053       {
00054   using std::invalid_argument;
00055   using std::ostringstream;
00056 
00057   if (nrows < ncols)
00058     {
00059       ostringstream os;
00060       os << "Partitioner::should_split: nrows (= " << nrows 
00061          << ") < ncols (= " << ncols << ")";
00062       throw invalid_argument (os.str());
00063     }
00064   else if (num_partitions == 0)
00065     {
00066       ostringstream os;
00067       os << "Partitioner::should_split: nrows (= " << nrows 
00068          << ") < ncols (= " << ncols << ")";
00069       throw invalid_argument (os.str());
00070     }
00071   // FIXME (mfh 11 Jul 2010) Need more overflow checks here.
00072   return static_cast<size_t>(nrows) / num_partitions >= static_cast<size_t>(ncols);
00073       } 
00074 
00075     public:
00079       template< class MatrixViewType >
00080       std::pair< MatrixViewType, MatrixViewType >
00081       split (const MatrixViewType& A,
00082        const size_t P_first,
00083        const size_t P_mid,
00084        const size_t P_last,
00085        const bool contiguous_cache_blocks) const
00086       {
00087   typedef typename MatrixViewType::ordinal_type ordinal_type;
00088   typedef typename MatrixViewType::pointer_type pointer_type;
00089 
00090   const size_t num_partitions_top = P_mid - P_first + 1;
00091   //const size_t num_partitions_bottom = P_last - P_mid;
00092   const size_t num_partitions = P_last - P_first + 1;
00093   const ordinal_type nrows = A.nrows();
00094   const ordinal_type ncols = A.ncols();
00095   
00096   if (! should_split (nrows, ncols, num_partitions))
00097     return std::make_pair (MatrixViewType(A), MatrixViewType());
00098   else
00099     {
00100       const ordinal_type num_rows_partition = nrows / num_partitions;
00101       const ordinal_type remainder = nrows % num_partitions;
00102       
00103       // Top partition gets the remainder rows.  Doing the
00104       // multiplication before the division might make it more
00105       // likely to avoid truncating the fraction, but may cause
00106       // overflow of ordinal_type.  
00107       const ordinal_type num_rows_top = 
00108         num_rows_partition * num_partitions_top + remainder;
00109       const ordinal_type num_rows_bot = nrows - num_rows_top;
00110 
00111       // We don't call (Const)MatView::split_top(), because that
00112       // is for splitting off a single cache block.  Each half
00113       // of the split may contain more than one cache block.
00114       if (contiguous_cache_blocks)
00115         {
00116     pointer_type A_bot_ptr = A.get() + num_rows_top * ncols;
00117     MatrixViewType A_top (num_rows_top, ncols, A.get(), num_rows_top);
00118     MatrixViewType A_bot (num_rows_bot, ncols, A_bot_ptr, num_rows_bot);
00119     return std::make_pair (A_top, A_bot);
00120         }
00121       else
00122         {
00123     pointer_type A_bot_ptr = A.get() + num_rows_top;
00124     MatrixViewType A_top (num_rows_top, ncols, A.get(), A.lda());
00125     MatrixViewType A_bot (num_rows_bot, ncols, A_bot_ptr, A.lda());
00126     return std::make_pair (A_top, A_bot);
00127         }
00128     }
00129       }
00130     }; // class Partitioner
00131   } // namespace TBB
00132 } // namespace TSQR
00133 
00134 #endif // __TSQR_TBB_Partitioner_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends