Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CUDANodeUtils.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_CUDANODEUTILS_HPP_
00043 #define KOKKOS_CUDANODEUTILS_HPP_
00044 
00045 #include <cuda.h>
00046 #include <cuda_runtime.h>
00047 
00048 #include "Kokkos_ConfigDefs.hpp"
00049 #define KOKKOS_NO_INCLUDE_INSTANTIATIONS
00050 #include "Kokkos_CUDANodeMemoryModel.hpp"
00051 
00052 #include <Teuchos_ArrayRCP.hpp>
00053 #include <Teuchos_ArrayView.hpp>
00054 
00055 namespace Kokkos {
00056 
00057   class CUDANodeDeallocator {
00058     public:
00059       CUDANodeDeallocator(size_t sizeInBytes, const RCP<CUDANodeMemoryModel> &node);
00060       void free(void *ptr);
00061     private:
00062 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00063       const RCP<CUDANodeMemoryModel> node_;
00064       const size_t allocSize_;
00065 #endif
00066   };
00067 
00069 
00076   template <class T>
00077   class CUDANodeCopyBackDeallocator {
00078     public:
00079       CUDANodeCopyBackDeallocator(const ArrayRCP<T> &buffer, const RCP<CUDANodeMemoryModel> &node);
00080 
00082       ArrayRCP<T> alloc()const ;
00083 
00084       void free(void *ptr) const;
00085     private:
00086       // we have to keep a copy of this ArrayRCP, to know whether the underlying memory was deleted
00087       const ArrayRCP<T> devbuf_;
00088       const RCP<CUDANodeMemoryModel> node_;
00089 #ifdef HAVE_KOKKOS_DEBUG
00090       mutable T * originalHostPtr_;
00091 #endif
00092   };
00093 
00094   template <class T>
00095   CUDANodeCopyBackDeallocator<T>::CUDANodeCopyBackDeallocator(const ArrayRCP<T> &buffer,
00096                                                               const RCP<CUDANodeMemoryModel> &node)
00097   : devbuf_(buffer.create_weak())
00098   , node_(node)
00099   {
00100 #ifdef HAVE_KOKKOS_DEBUG
00101     TEUCHOS_TEST_FOR_EXCEPT(node_ == null);
00102     originalHostPtr_ = NULL;
00103 #endif
00104   }
00105 
00106   template <class T>
00107   ArrayRCP<T>
00108   CUDANodeCopyBackDeallocator<T>::alloc() const {
00109 #ifdef HAVE_KOKKOS_DEBUG
00110     TEUCHOS_TEST_FOR_EXCEPTION( originalHostPtr_ != NULL, std::runtime_error,
00111         Teuchos::typeName(*this) << "::alloc(): alloc() has already been called." );
00112 #endif
00113     T *hostPtr = NULL;
00114     // alloc page-locked ("pinned") memory on the host
00115     // TODO: review: instead of cudaHostAllocDefault, this might should be cudaHostAllocWriteCombined
00116     cudaError_t err = cudaHostAlloc( (void**)&hostPtr, devbuf_.size()*sizeof(T), cudaHostAllocDefault);
00117     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00118         "Kokkos::CUDANodeCopyBackDeallocator::alloc(): cudaHostAlloc() returned error:\n"
00119         << cudaGetErrorString(err)
00120     );
00121 #ifdef HAVE_KOKKOS_DEBUG
00122     // save the allocated address for debug checking
00123     originalHostPtr_ = hostPtr;
00124 #endif
00125     // create an ARCP<T> owning this memory, with a copy of *this for the deallocator
00126     const bool OwnsMem = true;
00127     return arcp<T>( hostPtr, 0, devbuf_.size(), *this, OwnsMem );
00128   }
00129 
00130   template <class T>
00131   void CUDANodeCopyBackDeallocator<T>::free(void *hostPtr) const {
00132 #ifdef HAVE_KOKKOS_DEBUG
00133     TEUCHOS_TEST_FOR_EXCEPTION( hostPtr != originalHostPtr_, std::logic_error,
00134         Teuchos::typeName(*this) << "::free(): pointer to free not consistent with originally allocated pointer." );
00135     originalHostPtr_ = NULL;
00136 #endif
00137     // only perform the copy back if the device ptr is still valid
00138     if (devbuf_.is_valid_ptr()) {
00139       // create temporary ArrayView for use with copyToBuffer
00140       // we must disable the lookup, or a debug build of Teuchos will freak out
00141       ArrayView<const T> tmpav((const T*)hostPtr, devbuf_.size(), Teuchos::RCP_DISABLE_NODE_LOOKUP);
00142       node_->template copyToBuffer<T>(devbuf_.size(), tmpav, devbuf_);
00143     }
00144     cudaError_t err = cudaFreeHost( hostPtr );
00145     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00146         "Kokkos::CUDANodeCopyBackDeallocator::free(): cudaFreeHost() returned error:\n"
00147         << cudaGetErrorString(err)
00148     );
00149     hostPtr = NULL;
00150   }
00151 
00153 
00158   template <class T>
00159   class CUDANodeHostPinnedDeallocator {
00160   public:
00161     // Constructor.
00162     CUDANodeHostPinnedDeallocator();
00163 
00174     ArrayRCP<T> alloc (const size_t sz) const ;
00175 
00180     void free (void *ptr) const;
00181 
00182     private:
00183 #ifdef HAVE_KOKKOS_DEBUG
00184 
00185 
00186 
00187     mutable T* originalHostPtr_;
00188 #endif // HAVE_KOKKOS_DEBUG
00189   };
00190 
00191   template <class T>
00192   CUDANodeHostPinnedDeallocator<T>::CUDANodeHostPinnedDeallocator()
00193 #ifdef HAVE_KOKKOS_DEBUG
00194   : originalHostPtr_(NULL)
00195 #endif // HAVE_KOKKOS_DEBUG
00196   { }
00197 
00198   template <class T>
00199   ArrayRCP<T>
00200   CUDANodeHostPinnedDeallocator<T>::alloc(const size_t sz) const {
00201 #ifdef HAVE_KOKKOS_DEBUG
00202     TEUCHOS_TEST_FOR_EXCEPTION(originalHostPtr_ != NULL, std::runtime_error,
00203       Teuchos::typeName(*this) << "::alloc(): alloc() has already been called." );
00204 #endif
00205     T *hostPtr = NULL;
00206     // alloc page-locked ("pinned") memory on the host
00207     cudaError_t err = cudaHostAlloc( (void**)&hostPtr, sz*sizeof(T), cudaHostAllocDefault);
00208     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00209         "Kokkos::CUDANodeHostPinnedDeallocator::alloc(): cudaHostAlloc() returned error:\n"
00210         << cudaGetErrorString(err)
00211     );
00212 #ifdef HAVE_KOKKOS_DEBUG
00213     // save the allocated address for debug checking
00214     originalHostPtr_ = hostPtr;
00215 #endif
00216     // create an ARCP<T> owning this memory, with a copy of *this for the deallocator
00217     const bool OwnsMem = true;
00218     return arcp<T>( hostPtr, 0, sz, *this, OwnsMem );
00219   }
00220 
00221   template <class T>
00222   void CUDANodeHostPinnedDeallocator<T>::free(void *hostPtr) const {
00223 #ifdef HAVE_KOKKOS_DEBUG
00224     TEUCHOS_TEST_FOR_EXCEPTION( hostPtr != originalHostPtr_, std::logic_error,
00225         Teuchos::typeName(*this) << "::free(): pointer to free not consistent with originally allocated pointer." );
00226     originalHostPtr_ = NULL;
00227 #endif
00228     cudaError_t err = cudaFreeHost( hostPtr );
00229     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00230         "Kokkos::CUDANodeHostPinnedDeallocator::free(): cudaFreeHost() returned error:\n"
00231         << cudaGetErrorString(err)
00232     );
00233   }
00234 
00235 }
00236 
00237 #endif // KOKKOS_CUDANODEUTILS_HPP_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends