Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CUDANodeMemoryModelImpl.hpp
00001 #ifndef KOKKOS_CUDA_NODE_MEMORY_MODEL_IMPL_HPP_
00002 #define KOKKOS_CUDA_NODE_MEMORY_MODEL_IMPL_HPP_
00003 
00004 #include <cuda.h>
00005 #include <cuda_runtime.h>
00006 
00007 #include <iostream>
00008 #include <cstdlib>
00009 #include <stdexcept>
00010 
00011 #include <Teuchos_ArrayRCP.hpp>
00012 #include <Teuchos_ArrayView.hpp>
00013 #include <Teuchos_ParameterList.hpp>
00014 
00015 #include "Kokkos_NodeAPIConfigDefs.hpp"
00016 #include "Kokkos_BufferMacros.hpp"
00017 #include "Kokkos_CUDANodeMemoryModel.hpp" // in case someone directly included this implementation file
00018 #include "Kokkos_CUDANodeUtils.hpp"
00019 
00020 namespace Kokkos {
00021 
00022   template <class T> inline
00023   ArrayRCP<T> 
00024   CUDANodeMemoryModel::allocBuffer(size_t size) {
00025     // FINISH: if possible, check that there is room; else, boot someone
00026     T * devptr = NULL;
00027     const size_t sizeInBytes = sizeof(T)*size;
00028     if (size > 0) {
00029       cudaError_t err = cudaMalloc( (void**)&devptr, sizeInBytes );
00030       TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00031           "Kokkos::CUDANodeMemoryModel::allocBuffer(): cudaMalloc() returned error: "
00032           << cudaGetErrorString(err) 
00033           );
00034 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00035       allocSize_ += sizeInBytes;
00036 #endif
00037     }
00038     CUDANodeDeallocator dealloc(sizeInBytes,rcpFromRef(*this));
00039     const bool OwnsMem = true;
00040     ArrayRCP<T> buff = arcp<T>(devptr,0,size,dealloc,OwnsMem);
00041     MARK_COMPUTE_BUFFER(buff);
00042     return buff;
00043   }
00044 
00045   template <class T> inline
00046   void CUDANodeMemoryModel::copyFromBuffer(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayView<T> &hostDest) {
00047     CHECK_COMPUTE_BUFFER(buffSrc);
00048     TEST_FOR_EXCEPTION( (size_t)buffSrc.size() < size || (size_t)hostDest.size() < size, std::runtime_error,
00049         "CUDANodeMemoryModel::copyFromBuffer: invalid copy.");
00050 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00051     ++numCopiesD2H_;
00052     bytesCopiedD2H_ += size*sizeof(T);
00053 #endif
00054 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00055     std::cerr << "copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00056 #endif
00057     cudaError_t err = cudaMemcpy( hostDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToHost);
00058     TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00059         "Kokkos::CUDANodeMemoryModel::copyFromBuffer(): cudaMemcpy() returned error: "
00060         << cudaGetErrorString(err) 
00061         );
00062   }
00063 
00064   template <class T> inline
00065   void CUDANodeMemoryModel::copyToBuffer(size_t size, const ArrayView<const T> &hostSrc, const ArrayRCP<T> &buffDest) {
00066     CHECK_COMPUTE_BUFFER(buffDest);
00067     TEST_FOR_EXCEPTION( hostSrc.size() < size, std::runtime_error, "CUDANodeMemoryModel::copyToBuffer: invalid copy.");
00068     TEST_FOR_EXCEPTION( buffDest.size() < size, std::runtime_error, "CUDANodeMemoryModel::copyToBuffer: invalid copy.");
00069 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00070     ++numCopiesH2D_;
00071     bytesCopiedH2D_ += size*sizeof(T);
00072 #endif
00073 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00074     std::cerr << "copyToBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00075 #endif
00076     cudaError_t err = cudaMemcpy( buffDest.getRawPtr(), hostSrc.getRawPtr(), size*sizeof(T), cudaMemcpyHostToDevice);
00077     TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00078         "Kokkos::CUDANodeMemoryModel::copyToBuffer(): cudaMemcpy() returned error: "
00079         << cudaGetErrorString(err) 
00080         );
00081   }
00082 
00083   template <class T> inline
00084   void CUDANodeMemoryModel::copyBuffers(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayRCP<T> &buffDest) {
00085     CHECK_COMPUTE_BUFFER(buffSrc);
00086     CHECK_COMPUTE_BUFFER(buffDest);
00087     TEST_FOR_EXCEPTION( buffSrc.size() < size || buffDest.size() < size, std::runtime_error,
00088         "CUDANodeMemoryModel::copyBuffers: invalid copy.");
00089 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00090     ++numCopiesD2D_;
00091     bytesCopiedD2D_ += size*sizeof(T);
00092 #endif
00093 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00094     std::cerr << "copyBuffers<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00095 #endif
00096     cudaError_t err = cudaMemcpy( buffDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToDevice);
00097     TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00098         "Kokkos::CUDANodeMemoryModel::copyBuffers(): cudaMemcpy() returned error: "
00099         << cudaGetErrorString(err) 
00100         );
00101   }
00102 
00103   template <class T> inline
00104   ArrayRCP<const T> 
00105   CUDANodeMemoryModel::viewBuffer(size_t size, ArrayRCP<const T> buff) {
00106     CHECK_COMPUTE_BUFFER(buff);
00107     ArrayRCP<T> hostBuff;
00108     if (size != 0) {
00109       hostBuff = arcp<T>(size);
00110 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00111       std::cerr << "viewBuffer() -> ";
00112 #endif
00113       this->template copyFromBuffer<T>(size,buff,hostBuff());
00114     }
00115     return hostBuff;
00116   }
00117 
00118   template <class T> inline
00119   ArrayRCP<T> 
00120   CUDANodeMemoryModel::viewBufferNonConst(ReadWriteOption rw, size_t size, const ArrayRCP<T> &buff) {
00121     CHECK_COMPUTE_BUFFER(buff);
00122     // create a copy-back deallocator that copies back to "buff"
00123     CUDANodeCopyBackDeallocator<T> dealloc(buff.persistingView(0,size), rcpFromRef(*this));
00124     // it allocates a host buffer with the appropriate deallocator embedded
00125     ArrayRCP<T> hostBuff = dealloc.alloc();
00126     if (rw == ReadWrite) {
00127 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00128       std::cerr << "viewBufferNonConst(ReadWrite) -> ";
00129 #endif
00130       this->template copyFromBuffer<T>(size, buff, hostBuff());
00131     }  
00132     else {
00133 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00134       std::cerr << "viewBufferNonConst(WriteOnly)" << std::endl;
00135 #endif
00136     }
00137     // else rw == WriteOnly, and we need no copy
00138     return hostBuff;
00139   }
00140 
00141   inline void CUDANodeMemoryModel::readyBuffers(ArrayView<ArrayRCP<const char> > buffers, ArrayView<ArrayRCP<char> > ncBuffers) {
00142 #ifdef HAVE_KOKKOS_DEBUG
00143     for (size_t i=0; i < buffers.size(); ++i) {
00144       CHECK_COMPUTE_BUFFER(buffers[i]);
00145     }
00146     for (size_t i=0; i < ncBuffers.size(); ++i) {
00147       CHECK_COMPUTE_BUFFER(ncBuffers[i]);
00148     }
00149 #endif
00150     (void)buffers;
00151     (void)ncBuffers;
00152   }
00153 
00154 } // end of namespace Kokkos
00155 
00156 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends