Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CUDANodeUtils.hpp
00001 #ifndef KOKKOS_CUDANODEUTILS_HPP_
00002 #define KOKKOS_CUDANODEUTILS_HPP_
00003 
00004 #include <cuda.h>
00005 #include <cuda_runtime.h>
00006 
00007 #include "Kokkos_ConfigDefs.hpp"
00008 #define KOKKOS_NO_INCLUDE_INSTANTIATIONS
00009 #include "Kokkos_CUDANodeMemoryModel.hpp"
00010 
00011 #include <Teuchos_ArrayRCP.hpp>
00012 #include <Teuchos_ArrayView.hpp>
00013 
00014 namespace Kokkos {
00015 
00016   class CUDANodeDeallocator {
00017     public:
00018       CUDANodeDeallocator(size_t sizeInBytes, const RCP<CUDANodeMemoryModel> &node);
00019       void free(void *ptr);
00020     private:
00021 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00022       const RCP<CUDANodeMemoryModel> node_;
00023       const size_t allocSize_;
00024 #endif
00025   };
00026 
00028 
00035   template <class T>
00036   class CUDANodeCopyBackDeallocator {
00037     public:
00038       CUDANodeCopyBackDeallocator(const ArrayRCP<T> &buffer, const RCP<CUDANodeMemoryModel> &node);
00039 
00041       ArrayRCP<T> alloc()const ;
00042 
00043       void free(void *ptr) const;
00044     private:
00045       // we have to keep a copy of this ArrayRCP, to know whether the underlying memory was deleted
00046       const ArrayRCP<T> devbuf_;
00047       const RCP<CUDANodeMemoryModel> node_;
00048 #ifdef HAVE_KOKKOS_DEBUG
00049       mutable T * originalHostPtr_;
00050 #endif
00051   };
00052 
00053   template <class T>
00054   CUDANodeCopyBackDeallocator<T>::CUDANodeCopyBackDeallocator(const ArrayRCP<T> &buffer,   
00055                                                               const RCP<CUDANodeMemoryModel> &node)
00056   : devbuf_(buffer.create_weak())
00057   , node_(node)
00058   { 
00059 #ifdef HAVE_KOKKOS_DEBUG
00060     TEST_FOR_EXCEPT(node_ == null);
00061     originalHostPtr_ = NULL;
00062 #endif
00063   }
00064 
00065   template <class T>
00066   ArrayRCP<T>
00067   CUDANodeCopyBackDeallocator<T>::alloc() const {
00068 #ifdef HAVE_KOKKOS_DEBUG
00069     TEST_FOR_EXCEPTION( originalHostPtr_ != NULL, std::runtime_error,
00070         Teuchos::typeName(*this) << "::alloc(): alloc() has already been called." );
00071 #endif
00072     T *hostPtr = NULL;
00073     // alloc page-locked ("pinned") memory on the host
00074     cudaError_t err = cudaHostAlloc( (void**)&hostPtr, devbuf_.size()*sizeof(T), cudaHostAllocDefault);
00075     TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00076         "Kokkos::CUDANodeCopyBackDeallocator::alloc(): cudaHostAlloc() returned error:\n"
00077         << cudaGetErrorString(err) 
00078     );
00079 #ifdef HAVE_KOKKOS_DEBUG
00080     // save the allocated address for debug checking
00081     originalHostPtr_ = hostPtr; 
00082 #endif
00083     // create an ARCP<T> owning this memory, with a copy of *this for the deallocator
00084     const bool OwnsMem = true;
00085     return arcp<T>( hostPtr, 0, devbuf_.size(), *this, OwnsMem );
00086   }
00087 
00088   template <class T>
00089   void CUDANodeCopyBackDeallocator<T>::free(void *hostPtr) const {
00090 #ifdef HAVE_KOKKOS_DEBUG
00091     TEST_FOR_EXCEPTION( hostPtr != originalHostPtr_, std::logic_error,
00092         Teuchos::typeName(*this) << "::free(): pointer to free not consistent with originally allocated pointer." );
00093     originalHostPtr_ = NULL;
00094 #endif
00095     // only perform the copy back if the device ptr is still valid
00096     if (devbuf_.is_valid_ptr()) {
00097       // create temporary ArrayView for use with copyToBuffer
00098       // we must disable the lookup, or a debug build of Teuchos will freak out
00099       ArrayView<const T> tmpav((const T*)hostPtr, devbuf_.size(), Teuchos::RCP_DISABLE_NODE_LOOKUP);
00100       node_->template copyToBuffer<T>(devbuf_.size(), tmpav, devbuf_);
00101     }
00102     cudaError_t err = cudaFreeHost( (void**)hostPtr );
00103     TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00104         "Kokkos::CUDANodeCopyBackDeallocator::free(): cudaFreeHost() returned error:\n"
00105         << cudaGetErrorString(err) 
00106     );
00107     hostPtr = NULL;
00108   }
00109 
00110 }
00111 
00112 #endif // KOKKOS_CUDANODEUTILS_HPP_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends