Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_ThrustGPUNode.cuh
00001 /*
00002 //@HEADER
00003 // ************************************************************************
00004 // 
00005 //          Kokkos: Node API and Parallel Node Kernels
00006 //              Copyright (2008) Sandia Corporation
00007 // 
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 // 
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00039 // 
00040 // ************************************************************************
00041 //@HEADER
00042 */
00043 
00044 #ifndef KOKKOS_THRUSTGPUNODE_CUH_
00045 #define KOKKOS_THRUSTGPUNODE_CUH_
00046 
00047 #include <thrust/for_each.h>
00048 #include <thrust/transform_reduce.h>
00049 #include <thrust/iterator/counting_iterator.h>
00050 
00051 // must define this before including any kernels
00052 #define KERNEL_PREFIX __device__ __host__
00053 
00054 // MUST define this to prevent bringing in implementation of CUDANodeMemoryModel (and therefore, half of Teuchos)
00055 #define KOKKOS_NO_INCLUDE_INSTANTIATIONS
00056 #include <Kokkos_ThrustGPUNode.hpp>
00057 
00058 namespace Kokkos {
00059 
00060   template <class WDPin> 
00061   struct ThrustExecuteWrapper {
00062     mutable WDPin wd;
00063 
00064     inline ThrustExecuteWrapper(WDPin in) : wd(in) {}
00065 
00066     __device__ __host__ inline void operator()(int i) const {
00067       wd.execute(i);
00068     }
00069   };
00070 
00071   template <class WDPin> 
00072   struct ThrustReduceWrapper {
00073     mutable WDPin wd;
00074     inline ThrustReduceWrapper (WDPin in) : wd(in) {}
00075 
00076     __device__ __host__ inline 
00077     typename WDPin::ReductionType 
00078     operator()(typename WDPin::ReductionType x, typename WDPin::ReductionType y) {
00079       return wd.reduce(x,y);
00080     }
00081   };
00082 
00083   template <class WDPin>
00084   struct ThrustGenerateWrapper {
00085     mutable WDPin wd;
00086     inline ThrustGenerateWrapper (WDPin in) : wd(in) {}
00087  
00088     __device__ __host__ inline 
00089     typename WDPin::ReductionType
00090     operator()(int i) {
00091       return wd.generate(i);
00092     }
00093   };
00094 
00095   template <class WDP>
00096   void ThrustGPUNode::parallel_for(int begin, int end, WDP wd) {
00097 #ifdef HAVE_KOKKOS_DEBUG
00098     cudaError_t err = cudaGetLastError();
00099     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00100         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00101         << "cudaGetLastError() returned error before function call:\n"
00102         << cudaGetErrorString(err) );
00103 #endif
00104     // wrap in Thrust and hand to thrust::for_each
00105     ThrustExecuteWrapper<WDP> body(wd);  
00106     thrust::counting_iterator<int,thrust::device_space_tag> bit(begin),
00107                                                             eit(end);
00108     thrust::for_each( bit, eit, body );
00109 #ifdef HAVE_KOKKOS_DEBUG
00110     err = cudaThreadSynchronize();
00111     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00112         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00113         << "cudaThreadSynchronize() returned error after function call:\n"
00114         << cudaGetErrorString(err) );
00115 #endif
00116   };
00117 
00118   template <class WDP>
00119   typename WDP::ReductionType
00120   ThrustGPUNode::parallel_reduce(int begin, int end, WDP wd) 
00121   {
00122 #ifdef HAVE_KOKKOS_DEBUG
00123     cudaError_t err = cudaGetLastError();
00124     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00125         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00126         << "cudaGetLastError() returned error before function call:\n"
00127         << cudaGetErrorString(err) );
00128 #endif
00129     // wrap in Thrust and hand to thrust::transform_reduce
00130     thrust::counting_iterator<int,thrust::device_space_tag> bit(begin),
00131                                                             eit(end);
00132     ThrustReduceWrapper<WDP> ROp(wd);
00133     ThrustGenerateWrapper<WDP> TOp(wd);
00134     typename WDP::ReductionType init = wd.identity(), ret;
00135     ret = thrust::transform_reduce( bit, eit, TOp, init, ROp );
00136 #ifdef HAVE_KOKKOS_DEBUG
00137     err = cudaThreadSynchronize();
00138     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, 
00139         "Kokkos::ThrustGPUNode::" << __FUNCTION__ << ": " 
00140         << "cudaThreadSynchronize() returned error after function call:\n"
00141         << cudaGetErrorString(err) );
00142 #endif
00143     return ret;
00144   }
00145 
00146 }
00147 
00148 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends