Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_TBBNode.hpp
00001 #ifndef KOKKOS_TBBNODE_HPP_
00002 #define KOKKOS_TBBNODE_HPP_
00003 
00004 #include "Kokkos_StandardNodeMemoryModel.hpp"
00005 #include "Kokkos_NodeHelpers.hpp"
00006 
00007 #include <tbb/blocked_range.h>
00008 #include <tbb/parallel_for.h>
00009 #include <tbb/parallel_reduce.h>
00010 #include <tbb/task_scheduler_init.h>
00011 
00012 namespace Teuchos {
00013   // forward declarations
00014   class ParameterList;
00015 }
00016 
00017 #include <stdlib.h>
00018 
00019 namespace Kokkos {
00020 
00021   template <class WDPin>
00022   struct BlockedRangeWDP {
00023     mutable WDPin &wd;
00024     inline BlockedRangeWDP(WDPin &in) : wd(in) {}
00025     inline void operator()(tbb::blocked_range<int> &rng) const { 
00026       for (int i=rng.begin(); i != rng.end(); ++i) wd.execute(i);
00027     }
00028   };
00029   
00030   template <class WDPin>
00031   struct BlockedRangeWDPReducer {
00032     WDPin &wd;
00033     typename WDPin::ReductionType result;
00034     BlockedRangeWDPReducer(WDPin &in) : wd(in), result(WDPin::identity()) {}
00035     BlockedRangeWDPReducer(BlockedRangeWDPReducer &in, tbb::split) : wd(in.wd) {result = wd.identity();}
00036     void operator()(tbb::blocked_range<int> &rng)
00037     { 
00038       typename WDPin::ReductionType tmpi;
00039       int end = rng.end();
00040       for (int i=rng.begin(); i != end; ++i) {
00041         tmpi = wd.generate(i);
00042         result = wd.reduce( result, tmpi );
00043       }
00044     }
00045     inline void join( const BlockedRangeWDPReducer<WDPin> &other ) {
00046       result = wd.reduce( result, other.result );
00047     }
00048   };
00049   
00053   class TBBNode : public StandardNodeMemoryModel {
00054     public:
00055   
00061       TBBNode(Teuchos::ParameterList &pl);
00062   
00065       ~TBBNode();
00066 
00071       void init(int numThreads);
00072 
00074       template <class WDP>
00075       static void parallel_for(int begin, int end, WDP wd) {
00076         BlockedRangeWDP<WDP> tbb_wd(wd);
00077         tbb::parallel_for(tbb::blocked_range<int>(begin,end), tbb_wd, tbb::auto_partitioner()); 
00078       }
00079 
00081       template <class WDP>
00082       static typename WDP::ReductionType
00083       parallel_reduce(int begin, int end, WDP wd) {
00084         BlockedRangeWDPReducer<WDP> tbb_wd(wd);
00085         tbb::parallel_reduce(tbb::blocked_range<int>(begin,end), tbb_wd, tbb::auto_partitioner());
00086         return tbb_wd.result;
00087       }
00088 
00090       inline void sync() const {};
00091   
00092     private:
00093       bool alreadyInit_;
00094       tbb::task_scheduler_init tsi_;
00095   
00096   };
00097   
00098   template <> class ArrayOfViewsHelper<TBBNode> : public ArrayOfViewsHelperTrivialImpl<TBBNode> {};
00099 
00100 } // end of Kokkos namespace
00101 
00102 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends