Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_ThrustGPUNode.cuh
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_THRUSTGPUNODE_CUH_
00043 #define KOKKOS_THRUSTGPUNODE_CUH_
00044 
00045 #include <thrust/for_each.h>
00046 #include <thrust/transform_reduce.h>
00047 #include <thrust/iterator/counting_iterator.h>
00048 
00049 // must define this before including any kernels
00050 #define KERNEL_PREFIX __device__ __host__
00051 
00052 #include <Kokkos_ThrustGPUWrappers.hpp>
00053 #include <Teuchos_TestForException.hpp>
00054 
00055 namespace Kokkos {
00056 
00057   namespace ThrustGPUNodeDetails {
00058 
00059     template <class WDPin> 
00060     struct ThrustExecuteWrapper {
00061       mutable WDPin wd;
00062   
00063       inline ThrustExecuteWrapper(WDPin in) : wd(in) {}
00064   
00065       __device__ __host__ inline void operator()(int i) const {
00066         wd.execute(i);
00067       }
00068     };
00069   
00070     template <class WDPin> 
00071     struct ThrustReduceWrapper {
00072       mutable WDPin wd;
00073       inline ThrustReduceWrapper (WDPin in) : wd(in) {}
00074   
00075       __device__ __host__ inline 
00076       typename WDPin::ReductionType 
00077       operator()(typename WDPin::ReductionType x, typename WDPin::ReductionType y) {
00078         return wd.reduce(x,y);
00079       }
00080     };
00081   
00082     template <class WDPin>
00083     struct ThrustGenerateWrapper {
00084       mutable WDPin wd;
00085       inline ThrustGenerateWrapper (WDPin in) : wd(in) {}
00086    
00087       __device__ __host__ inline 
00088       typename WDPin::ReductionType
00089       operator()(int i) {
00090         return wd.generate(i);
00091       }
00092     };
00093 
00094   } // end namespace ThrustGPUNodeDetails 
00095 
00096   template <class WDP>
00097   void ThrustGPUNodeDetails::parallel_for(int begin, int end, WDP wd) {
00098 #ifdef HAVE_KOKKOS_DEBUG
00099     cudaError_t err = cudaGetLastError();
00100     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00101         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00102         << "cudaGetLastError() returned error before function call:\n"
00103         << cudaGetErrorString(err) );
00104 #endif
00105     // wrap in Thrust and hand to thrust::for_each
00106     ThrustGPUNodeDetails::ThrustExecuteWrapper<WDP> body(wd);  
00107     thrust::counting_iterator<int,thrust::device_space_tag> bit(begin),
00108                                                             eit(end);
00109     thrust::for_each( bit, eit, body );
00110 #ifdef HAVE_KOKKOS_DEBUG
00111     err = cudaThreadSynchronize();
00112     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00113         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00114         << "cudaThreadSynchronize() returned error after function call:\n"
00115         << cudaGetErrorString(err) );
00116 #endif
00117   };
00118 
00119   template <class WDP>
00120   typename WDP::ReductionType
00121   ThrustGPUNodeDetails::parallel_reduce(int begin, int end, WDP wd) 
00122   {
00123 #ifdef HAVE_KOKKOS_DEBUG
00124     cudaError_t err = cudaGetLastError();
00125     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00126         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00127         << "cudaGetLastError() returned error before function call:\n"
00128         << cudaGetErrorString(err) );
00129 #endif
00130     // wrap in Thrust and hand to thrust::transform_reduce
00131     thrust::counting_iterator<int,thrust::device_space_tag> bit(begin),
00132                                                             eit(end);
00133     ThrustGPUNodeDetails::ThrustReduceWrapper<WDP> ROp(wd);
00134     ThrustGPUNodeDetails::ThrustGenerateWrapper<WDP> TOp(wd);
00135     typename WDP::ReductionType init = wd.identity(), ret;
00136     ret = thrust::transform_reduce( bit, eit, TOp, init, ROp );
00137 #ifdef HAVE_KOKKOS_DEBUG
00138     err = cudaThreadSynchronize();
00139     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00140         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00141         << "cudaThreadSynchronize() returned error after function call:\n"
00142         << cudaGetErrorString(err) );
00143 #endif
00144     return ret;
00145   }
00146 
00147 } // end namespace Kokkos
00148 
00149 #define KOKKOS_INSTANT_THRUSTGPUNODE_PARALLEL_FOR(KERN) \
00150   template void Kokkos::ThrustGPUNodeDetails::parallel_for< KERN >(int, int, KERN);
00151 
00152 #define KOKKOS_INSTANT_THRUSTGPUNODE_PARALLEL_RED(KERN) \
00153   template KERN::ReductionType Kokkos::ThrustGPUNodeDetails::parallel_reduce< KERN >(int, int, KERN );
00154 
00155 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends