Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CUDANodeUtils.hpp
00001 #ifndef KOKKOS_CUDANODEUTILS_HPP_
00002 #define KOKKOS_CUDANODEUTILS_HPP_
00003 
00004 #include <cuda.h>
00005 #include <cuda_runtime.h>
00006 
00007 #include "Kokkos_CUDA_util_inline_runtime.h"
00008 #define KOKKOS_NO_INCLUDE_INSTANTIATIONS
00009 #include "Kokkos_CUDANodeMemoryModel.hpp"
00010 
00011 #include <Teuchos_ArrayRCP.hpp>
00012 #include <Teuchos_ArrayView.hpp>
00013 
00014 namespace Kokkos {
00015 
00016   class CUDANodeDeallocator {
00017     public:
00018       CUDANodeDeallocator(size_t sizeInBytes, const RCP<CUDANodeMemoryModel> &node);
00019       void free(void *ptr);
00020     private:
00021 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00022       const RCP<CUDANodeMemoryModel> node_;
00023       const size_t allocSize_;
00024 #endif
00025   };
00026 
00028 
00035   template <class T>
00036   class CUDANodeCopyBackDeallocator {
00037     public:
00038       CUDANodeCopyBackDeallocator(const ArrayRCP<T> &buffer, const RCP<CUDANodeMemoryModel> &node);
00039 
00041       ArrayRCP<T> alloc()const ;
00042 
00043       void free(void *ptr) const;
00044     private:
00045       // we have to keep a copy of this ArrayRCP, to know whether the underlying memory was deleted
00046       const ArrayRCP<T> devbuf_;
00047       const RCP<CUDANodeMemoryModel> node_;
00048 #ifdef HAVE_KOKKOS_DEBUG
00049       mutable T * originalHostPtr_;
00050 #endif
00051   };
00052 
00053   template <class T>
00054   CUDANodeCopyBackDeallocator<T>::CUDANodeCopyBackDeallocator(const ArrayRCP<T> &buffer,   
00055                                                               const RCP<CUDANodeMemoryModel> &node)
00056   : devbuf_(buffer.create_weak())
00057   , node_(node)
00058   { 
00059 #ifdef HAVE_KOKKOS_DEBUG
00060     TEST_FOR_EXCEPT(node_ == null);
00061     originalHostPtr_ = NULL;
00062 #endif
00063   }
00064 
00065   template <class T>
00066   ArrayRCP<T>
00067   CUDANodeCopyBackDeallocator<T>::alloc() const {
00068 #ifdef HAVE_KOKKOS_DEBUG
00069     TEST_FOR_EXCEPTION( originalHostPtr_ != NULL, std::runtime_error,
00070         Teuchos::typeName(*this) << "::alloc(): alloc() has already been called." );
00071 #endif
00072     T *hostPtr = NULL;
00073     // alloc page-locked ("pinned") memory on the host
00074     cutilSafeCallNoSync( cudaHostAlloc( (void**)&hostPtr, devbuf_.size()*sizeof(T), cudaHostAllocDefault) );
00075 #ifdef HAVE_KOKKOS_DEBUG
00076     // save the allocated address for debug checking
00077     originalHostPtr_ = hostPtr; 
00078 #endif
00079     // create an ARCP<T> owning this memory, with a copy of *this for the deallocator
00080     const bool OwnsMem = true;
00081     return arcp<T>( hostPtr, 0, devbuf_.size(), *this, OwnsMem );
00082   }
00083 
00084   template <class T>
00085   void CUDANodeCopyBackDeallocator<T>::free(void *hostPtr) const {
00086 #ifdef HAVE_KOKKOS_DEBUG
00087     TEST_FOR_EXCEPTION( hostPtr != originalHostPtr_, std::logic_error,
00088         Teuchos::typeName(*this) << "::free(): pointer to free not consistent with originally allocated pointer." );
00089     originalHostPtr_ = NULL;
00090 #endif
00091     // only perform the copy back if the device ptr is still valid
00092     if (devbuf_.is_valid_ptr()) {
00093       // create temporary ArrayView for use with copyToBuffer
00094       // we must disable the lookup, or a debug build of Teuchos will freak out
00095       ArrayView<const T> tmpav((const T*)hostPtr, devbuf_.size(), Teuchos::RCP_DISABLE_NODE_LOOKUP);
00096       node_->template copyToBuffer<T>(devbuf_.size(), tmpav, devbuf_);
00097     }
00098     cutilSafeCallNoSync( cudaFreeHost( (void**)hostPtr ) );
00099     hostPtr = NULL;
00100   }
00101 
00102 }
00103 
00104 #endif // KOKKOS_CUDANODEUTILS_HPP_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends