Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CUDANodeMemoryModelImpl.hpp
00001 #ifndef KOKKOS_CUDA_NODE_MEMORY_MODEL_IMPL_HPP_
00002 #define KOKKOS_CUDA_NODE_MEMORY_MODEL_IMPL_HPP_
00003 
00004 #include <cuda.h>
00005 #include <cuda_runtime.h>
00006 
00007 #include <iostream>
00008 #include <cstdlib>
00009 #include <stdexcept>
00010 
00011 #include <Teuchos_ArrayRCP.hpp>
00012 #include <Teuchos_ArrayView.hpp>
00013 #include <Teuchos_ParameterList.hpp>
00014 
00015 #include "Kokkos_NodeAPIConfigDefs.hpp"
00016 #include "Kokkos_BufferMacros.hpp"
00017 #include "Kokkos_CUDA_util_inline_runtime.h"
00018 #include "Kokkos_CUDANodeMemoryModel.hpp" // in case someone directly included this implementation file
00019 #include "Kokkos_CUDANodeUtils.hpp"
00020 
00021 namespace Kokkos {
00022 
00023   template <class T> inline
00024   ArrayRCP<T> 
00025   CUDANodeMemoryModel::allocBuffer(size_t size) {
00026     // FINISH: if possible, check that there is room; else, boot someone
00027     T * devptr = NULL;
00028     const size_t sizeInBytes = sizeof(T)*size;
00029     if (size > 0) {
00030       cutilSafeCallNoSync( cudaMalloc( (void**)&devptr, sizeInBytes ) );
00031 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00032       allocSize_ += sizeInBytes;
00033 #endif
00034     }
00035     CUDANodeDeallocator dealloc(sizeInBytes,rcpFromRef(*this));
00036     const bool OwnsMem = true;
00037     ArrayRCP<T> buff = arcp<T>(devptr,0,size,dealloc,OwnsMem);
00038     MARK_COMPUTE_BUFFER(buff);
00039     return buff;
00040   }
00041 
00042   template <class T> inline
00043   void CUDANodeMemoryModel::copyFromBuffer(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayView<T> &hostDest) {
00044     CHECK_COMPUTE_BUFFER(buffSrc);
00045     TEST_FOR_EXCEPTION( (size_t)buffSrc.size() < size || (size_t)hostDest.size() < size, std::runtime_error,
00046         "CUDANode::copyFromBuffer: invalid copy.");
00047 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00048     ++numCopiesD2H_;
00049     bytesCopiedD2H_ += size*sizeof(T);
00050 #endif
00051 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00052     std::cerr << "copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00053 #endif
00054     cutilSafeCallNoSync( cudaMemcpy( hostDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToHost) );
00055   }
00056 
00057   template <class T> inline
00058   void CUDANodeMemoryModel::copyToBuffer(size_t size, const ArrayView<const T> &hostSrc, const ArrayRCP<T> &buffDest) {
00059     CHECK_COMPUTE_BUFFER(buffDest);
00060     TEST_FOR_EXCEPTION( hostSrc.size() < size, std::runtime_error, "CUDANode::copyFromBuffer: invalid copy.");
00061     TEST_FOR_EXCEPTION( buffDest.size() < size, std::runtime_error, "CUDANode::copyFromBuffer: invalid copy.");
00062 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00063     ++numCopiesH2D_;
00064     bytesCopiedH2D_ += size*sizeof(T);
00065 #endif
00066 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00067     std::cerr << "copyToBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00068 #endif
00069     cutilSafeCallNoSync( cudaMemcpy( buffDest.getRawPtr(), hostSrc.getRawPtr(), size*sizeof(T), cudaMemcpyHostToDevice) );
00070   }
00071 
00072   template <class T> inline
00073   void CUDANodeMemoryModel::copyBuffers(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayRCP<T> &buffDest) {
00074     CHECK_COMPUTE_BUFFER(buffSrc);
00075     CHECK_COMPUTE_BUFFER(buffDest);
00076     TEST_FOR_EXCEPTION( buffSrc.size() < size || buffDest.size() < size, std::runtime_error,
00077         "CUDANode::copyFromBuffer: invalid copy.");
00078 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00079     ++numCopiesD2D_;
00080     bytesCopiedD2D_ += size*sizeof(T);
00081 #endif
00082 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00083     std::cerr << "copyBuffers<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00084 #endif
00085     cutilSafeCallNoSync( cudaMemcpy( buffDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToDevice) );
00086   }
00087 
00088   template <class T> inline
00089   ArrayRCP<const T> 
00090   CUDANodeMemoryModel::viewBuffer(size_t size, ArrayRCP<const T> buff) {
00091     CHECK_COMPUTE_BUFFER(buff);
00092     ArrayRCP<T> hostBuff;
00093     if (size != 0) {
00094       hostBuff = arcp<T>(size);
00095 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00096       std::cerr << "viewBuffer() -> ";
00097 #endif
00098       this->template copyFromBuffer<T>(size,buff,hostBuff());
00099     }
00100     return hostBuff;
00101   }
00102 
00103   template <class T> inline
00104   ArrayRCP<T> 
00105   CUDANodeMemoryModel::viewBufferNonConst(ReadWriteOption rw, size_t size, const ArrayRCP<T> &buff) {
00106     CHECK_COMPUTE_BUFFER(buff);
00107     // create a copy-back deallocator that copies back to "buff"
00108     CUDANodeCopyBackDeallocator<T> dealloc(buff.persistingView(0,size), rcpFromRef(*this));
00109     // it allocates a host buffer with the appropriate deallocator embedded
00110     ArrayRCP<T> hostBuff = dealloc.alloc();
00111     if (rw == ReadWrite) {
00112 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00113       std::cerr << "viewBufferNonConst(ReadWrite) -> ";
00114 #endif
00115       this->template copyFromBuffer<T>(size, buff, hostBuff());
00116     }  
00117     else {
00118 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00119       std::cerr << "viewBufferNonConst(WriteOnly)" << std::endl;
00120 #endif
00121     }
00122     // else rw == WriteOnly, and we need no copy
00123     return hostBuff;
00124   }
00125 
00126   inline void CUDANodeMemoryModel::readyBuffers(ArrayView<ArrayRCP<const char> > buffers, ArrayView<ArrayRCP<char> > ncBuffers) {
00127 #ifdef HAVE_KOKKOS_DEBUG
00128     for (size_t i=0; i < buffers.size(); ++i) {
00129       CHECK_COMPUTE_BUFFER(buffers[i]);
00130     }
00131     for (size_t i=0; i < ncBuffers.size(); ++i) {
00132       CHECK_COMPUTE_BUFFER(ncBuffers[i]);
00133     }
00134 #endif
00135     (void)buffers;
00136     (void)ncBuffers;
00137   }
00138 
00139 } // end of namespace Kokkos
00140 
00141 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends