Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_TPINode.hpp
00001 #ifndef KOKKOS_TPINODE_HPP_
00002 #define KOKKOS_TPINODE_HPP_
00003 
00004 #include "Kokkos_StandardNodeMemoryModel.hpp"
00005 #include "Kokkos_NodeHelpers.hpp"
00006 #include <TPI.h>
00007 
00008 namespace Teuchos {
00009   // forward declarations
00010   class ParameterList;
00011 }
00012 
00013 namespace Kokkos {
00014 
00015   template <class WDP>
00016   struct WDPPlusRange {
00017     WDPPlusRange(int Beg, int End, WDP Wdp) : wdp(Wdp), beg(Beg), end(End){}
00018     WDP wdp;
00019     int beg, end;
00020   };
00021 
00022   inline
00023   void tpi_work_span(TPI_Work* work, int beg, int end, int& ibeg, int& iend)
00024   {
00025     const int chunk = ( end - beg + work->count - 1 ) / work->count ;
00026   
00027     iend = chunk * ( work->rank + 1 ) + beg;
00028     ibeg = chunk * ( work->rank ) + beg;
00029   
00030     if ( end < iend ) { iend = end; }
00031   }
00032 
00033   template<class WDP>
00034   void tpi_execute(TPI_Work * work)
00035   {
00036     // get work/data pair
00037     const WDPPlusRange<WDP>* const_wdp_wrapper = static_cast<const WDPPlusRange<WDP>*>(work->info);
00038     WDPPlusRange<WDP>* wdp_wrapper = const_cast<WDPPlusRange<WDP>*>(const_wdp_wrapper);
00039     WDP wdp = wdp_wrapper->wdp;
00040     int beg = wdp_wrapper->beg, end = wdp_wrapper->end;
00041     int ibeg, iend;
00042     // determine my share of the work
00043     tpi_work_span(work, beg, end, ibeg, iend);
00044     // do my share of the work
00045     for (int i=ibeg; i<iend; ++i) {
00046       wdp.execute(i);
00047     }
00048   }
00049 
00050   template<class WDP>
00051   void tpi_reduction_work(TPI_Work * work)
00052   {
00053     const WDPPlusRange<WDP>* wdp_wrapper = static_cast<const WDPPlusRange<WDP>*>(work->info);
00054     int beg = wdp_wrapper->beg, end = wdp_wrapper->end;
00055     WDP wdp = wdp_wrapper->wdp;
00056     int ibeg, iend;
00057     tpi_work_span(work, beg, end, ibeg, iend);
00058   
00059     typedef typename WDP::ReductionType ReductionType;
00060     ReductionType tmpi;
00061     ReductionType &res = *(static_cast<ReductionType*>(work->reduce));
00062   
00063     for (int i=ibeg; i<iend; ++i) {
00064       tmpi = wdp.generate(i);
00065       res = wdp.reduce(res, tmpi);
00066     }
00067   }
00068 
00069   template<class WDP>
00070   void tpi_reduction_join(TPI_Work * work, const void* src)
00071   {
00072     typedef typename WDP::ReductionType ReductionType;
00073   
00074     const WDPPlusRange<WDP>* wdp_wrapper = static_cast<const WDPPlusRange<WDP>*>(work->info);
00075     WDP wdp = wdp_wrapper->wdp;
00076   
00077     ReductionType& work_reduce = *(static_cast<ReductionType*>(work->reduce));
00078     const ReductionType& src_reduce  = *(static_cast<const ReductionType*>(src));
00079   
00080     work_reduce = wdp.reduce(work_reduce, src_reduce);
00081   }
00082 
00083   template<class WDP>
00084   void tpi_reduction_init(TPI_Work * work)
00085   {
00086     typedef typename WDP::ReductionType ReductionType;
00087     *(static_cast<ReductionType*>(work->reduce)) = WDP::identity();
00088   }
00089 
00093   class TPINode : public StandardNodeMemoryModel {
00094     public:
00095 
00103       TPINode(Teuchos::ParameterList &plist);
00104 
00108       void init(int numThreads);
00109 
00113       ~TPINode();
00114 
00116       template <class WDP>
00117       static void parallel_for(int beg, int end, WDP wd) {
00118         WDPPlusRange<WDP> wdp_plus(beg,end,wd);
00119         TPI_Run_threads(tpi_execute<WDP>, &wdp_plus, 0 );
00120       }
00121 
00123       template <class WDP>
00124       static typename WDP::ReductionType 
00125       parallel_reduce(int beg, int end, WDP wd) {
00126         typedef typename WDP::ReductionType ReductionType;
00127         ReductionType result = WDP::identity();
00128         WDPPlusRange<WDP> wdp_plus(beg,end,wd);
00129         TPI_Run_threads_reduce(tpi_reduction_work<WDP>, &wdp_plus,
00130                                tpi_reduction_join<WDP>,
00131                                tpi_reduction_init<WDP>, sizeof(result), &result);
00132         return result;
00133       }
00134 
00136       inline void sync() const {};
00137 
00138     private:
00139       int curNumThreads_;
00140   };
00141 
00142   template <> class ArrayOfViewsHelper<TPINode> : public ArrayOfViewsHelperTrivialImpl<TPINode> {};
00143 
00144 } // end namespace Kokkos
00145 
00146 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends