Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_ThrustGPUNode.cuh
00001 #ifndef KOKKOS_THRUSTGPUNODE_CUH_
00002 #define KOKKOS_THRUSTGPUNODE_CUH_
00003 
00004 #include "Kokkos_CUDA_util_inline_runtime.h"
00005 
00006 #include <thrust/for_each.h>
00007 #include <thrust/transform_reduce.h>
00008 #include <thrust/iterator/counting_iterator.h>
00009 
00010 // must define this before including any kernels
00011 #define KERNEL_PREFIX __device__ __host__
00012 
00013 // MUST define this to prevent bringing in implementation of CUDANodeMemoryModel (and therefore, half of Teuchos)
00014 #define KOKKOS_NO_INCLUDE_INSTANTIATIONS
00015 #include <Kokkos_ThrustGPUNode.hpp>
00016 
00017 namespace Kokkos {
00018 
00019   template <class WDPin> 
00020   struct ThrustExecuteWrapper {
00021     mutable WDPin wd;
00022 
00023     inline ThrustExecuteWrapper(WDPin in) : wd(in) {}
00024 
00025     __device__ __host__ inline void operator()(int i) const {
00026       wd.execute(i);
00027     }
00028   };
00029 
00030   template <class WDPin> 
00031   struct ThrustReduceWrapper {
00032     mutable WDPin wd;
00033     inline ThrustReduceWrapper (WDPin in) : wd(in) {}
00034 
00035     __device__ __host__ inline 
00036     typename WDPin::ReductionType 
00037     operator()(typename WDPin::ReductionType x, typename WDPin::ReductionType y) {
00038       return wd.reduce(x,y);
00039     }
00040   };
00041 
00042   template <class WDPin>
00043   struct ThrustGenerateWrapper {
00044     mutable WDPin wd;
00045     inline ThrustGenerateWrapper (WDPin in) : wd(in) {}
00046  
00047     __device__ __host__ inline 
00048     typename WDPin::ReductionType
00049     operator()(int i) {
00050       return wd.generate(i);
00051     }
00052   };
00053 
00054   template <class WDP>
00055   void ThrustGPUNode::parallel_for(int begin, int end, WDP wd) {
00056     // wrap in Thrust and hand to thrust::for_each
00057     ThrustExecuteWrapper<WDP> body(wd);  
00058     thrust::counting_iterator<int,thrust::device_space_tag> bit(begin),
00059                                                             eit(end);
00060     thrust::for_each( bit, eit, body );
00061 #ifdef HAVE_KOKKOS_DEBUG
00062 
00063     cutilCheckMsg(__FUNCTION__);
00064 #endif
00065   };
00066 
00067   template <class WDP>
00068   typename WDP::ReductionType
00069   ThrustGPUNode::parallel_reduce(int begin, int end, WDP wd) 
00070   {
00071     // wrap in Thrust and hand to thrust::transform_reduce
00072     thrust::counting_iterator<int,thrust::device_space_tag> bit(begin),
00073                                                             eit(end);
00074     ThrustReduceWrapper<WDP> ROp(wd);
00075     ThrustGenerateWrapper<WDP> TOp(wd);
00076     typename WDP::ReductionType init = wd.identity(), ret;
00077     ret = thrust::transform_reduce( bit, eit, TOp, init, ROp );
00078 #ifdef HAVE_KOKKOS_DEBUG
00079     cutilCheckMsg(__FUNCTION__);
00080 #endif
00081     return ret;
00082   }
00083 
00084 }
00085 
00086 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends