Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CUDANodeMemoryModelImpl.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_CUDA_NODE_MEMORY_MODEL_IMPL_HPP_
00043 #define KOKKOS_CUDA_NODE_MEMORY_MODEL_IMPL_HPP_
00044 
00045 #include <cuda.h>
00046 #include <cuda_runtime.h>
00047 
00048 #include <iostream>
00049 #include <cstdlib>
00050 #include <stdexcept>
00051 
00052 #include <Teuchos_ArrayRCP.hpp>
00053 #include <Teuchos_ArrayView.hpp>
00054 #include <Teuchos_ParameterList.hpp>
00055 #include <Teuchos_TypeNameTraits.hpp>
00056 
00057 #include "Kokkos_NodeAPIConfigDefs.hpp"
00058 #include "Kokkos_BufferMacros.hpp"
00059 #include "Kokkos_CUDANodeMemoryModel.hpp" // in case someone directly included this implementation file
00060 #include "Kokkos_CUDANodeUtils.hpp"
00061 
00062 namespace Kokkos {
00063 
00064   template <class T> inline
00065   ArrayRCP<T> 
00066   CUDANodeMemoryModel::allocBuffer(size_t size) {
00067     // FINISH: if possible, check that there is room; else, boot someone
00068     T * devptr = NULL;
00069     const size_t sizeInBytes = sizeof(T)*size;
00070     if (size > 0) {
00071       cudaError_t err = cudaMalloc( (void**)&devptr, sizeInBytes );
00072       TEUCHOS_TEST_FOR_EXCEPTION( err != cudaSuccess, std::runtime_error,
00073         "Kokkos::CUDANodeMemoryModel::allocBuffer<" 
00074         << Teuchos::TypeNameTraits<T>::name () << ">: cudaMalloc() returned "
00075         "error: " << cudaGetErrorString (err) 
00076         );
00077 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00078       allocSize_ += sizeInBytes;
00079 #endif
00080     }
00081     CUDANodeDeallocator dealloc(sizeInBytes,rcpFromRef(*this));
00082     const bool OwnsMem = true;
00083     ArrayRCP<T> buff = arcp<T>(devptr,0,size,dealloc,OwnsMem);
00084     MARK_COMPUTE_BUFFER(buff);
00085     return buff;
00086   }
00087 
00088   template <class T> inline
00089   void CUDANodeMemoryModel::copyFromBuffer(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayView<T> &hostDest) {
00090     CHECK_COMPUTE_BUFFER(buffSrc);
00091     TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffSrc.size() < size, std::runtime_error,
00092       "CUDANodeMemoryModel::copyFromBuffer<" 
00093       << Teuchos::TypeNameTraits<T>::name () 
00094       << ">: invalid copy.  Device source buffer has size " << buffSrc.size () 
00095       << ", which is less than the requested copy size " << size << ".");
00096     TEUCHOS_TEST_FOR_EXCEPTION( (size_t)hostDest.size() < size, std::runtime_error,
00097       "CUDANodeMemoryModel::copyFromBuffer<" 
00098       << Teuchos::TypeNameTraits<T>::name () 
00099       << ">: invalid copy.  Host destination buffer has size " << hostDest.size () 
00100       << ", which is less than the requested copy size " << size << ".");
00101 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00102     ++numCopiesD2H_;
00103     bytesCopiedD2H_ += size*sizeof(T);
00104 #endif
00105 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00106     std::cerr << "copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00107 #endif
00108     cudaError_t err = cudaMemcpy( hostDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToHost);
00109     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00110       "Kokkos::CUDANodeMemoryModel::copyFromBuffer<"
00111       << Teuchos::TypeNameTraits<T>::name () 
00112       << ">(): cudaMemcpy() returned error: " << cudaGetErrorString (err) 
00113       );
00114   }
00115 
00116   template <class T> inline
00117   void CUDANodeMemoryModel::copyToBuffer(size_t size, const ArrayView<const T> &hostSrc, const ArrayRCP<T> &buffDest) {
00118     CHECK_COMPUTE_BUFFER(buffDest);
00119     TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffDest.size() < size, std::runtime_error,
00120       "CUDANodeMemoryModel::copyToBuffer<" 
00121       << Teuchos::TypeNameTraits<T>::name () 
00122       << ">: invalid copy.  Device destination buffer has size " << buffDest.size () 
00123       << ", which is less than the requested copy size " << size << ".");
00124     TEUCHOS_TEST_FOR_EXCEPTION( (size_t)hostSrc.size() < size, std::runtime_error,
00125       "CUDANodeMemoryModel::copyToBuffer<" 
00126       << Teuchos::TypeNameTraits<T>::name () 
00127       << ">: invalid copy.  Host source buffer has size " << hostSrc.size () 
00128       << ", which is less than the requested copy size " << size << ".");
00129 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00130     ++numCopiesH2D_;
00131     bytesCopiedH2D_ += size*sizeof(T);
00132 #endif
00133 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00134     std::cerr << "copyToBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00135 #endif
00136     cudaError_t err = cudaMemcpy( buffDest.getRawPtr(), hostSrc.getRawPtr(), size*sizeof(T), cudaMemcpyHostToDevice);
00137     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00138       "Kokkos::CUDANodeMemoryModel::copyToBuffer<"
00139       << Teuchos::TypeNameTraits<T>::name () 
00140       << ">(): cudaMemcpy() returned error: " << cudaGetErrorString (err)
00141       );
00142   }
00143 
00144   template <class T> inline
00145   void CUDANodeMemoryModel::copyBuffers(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayRCP<T> &buffDest) {
00146     CHECK_COMPUTE_BUFFER(buffSrc);
00147     CHECK_COMPUTE_BUFFER(buffDest);
00148 
00149     TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffDest.size() < size, std::runtime_error,
00150       "CUDANodeMemoryModel::copyBuffers<" 
00151       << Teuchos::TypeNameTraits<T>::name () 
00152       << ">: invalid copy.  Device destination buffer has size " << buffDest.size () 
00153       << ", which is less than the requested copy size " << size << ".");
00154     TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffSrc.size() < size, std::runtime_error,
00155       "CUDANodeMemoryModel::copyBuffers<" 
00156       << Teuchos::TypeNameTraits<T>::name () 
00157       << ">: invalid copy.  Device source buffer has size " << buffSrc.size () 
00158       << ", which is less than the requested copy size " << size << ".");
00159 
00160 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00161     ++numCopiesD2D_;
00162     bytesCopiedD2D_ += size*sizeof(T);
00163 #endif
00164 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00165     std::cerr << "copyBuffers<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
00166 #endif
00167     cudaError_t err = cudaMemcpy( buffDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToDevice);
00168     TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00169       "Kokkos::CUDANodeMemoryModel::copyBuffers<"
00170       << Teuchos::TypeNameTraits<T>::name () 
00171       << ">(): cudaMemcpy() returned error: " << cudaGetErrorString (err)
00172       );
00173   }
00174 
00175   template <class T> inline
00176   ArrayRCP<const T> 
00177   CUDANodeMemoryModel::viewBuffer(size_t size, ArrayRCP<const T> buff) {
00178     CHECK_COMPUTE_BUFFER(buff);
00179     ArrayRCP<T> hostBuff;
00180     if (size != 0) {
00181       hostBuff = arcp<T>(size);
00182 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00183       std::cerr << "viewBuffer() -> ";
00184 #endif
00185       this->template copyFromBuffer<T>(size,buff,hostBuff());
00186     }
00187     return hostBuff;
00188   }
00189 
00190   template <class T> inline
00191   ArrayRCP<T> 
00192   CUDANodeMemoryModel::viewBufferNonConst(ReadWriteOption rw, size_t size, const ArrayRCP<T> &buff) {
00193     CHECK_COMPUTE_BUFFER(buff);
00194     // Create a copy-back deallocator that copies back to buff.
00195     CUDANodeCopyBackDeallocator<T> dealloc(buff.persistingView(0,size), rcpFromRef(*this));
00196     // It allocates a host buffer with the appropriate deallocator embedded.
00197     ArrayRCP<T> hostBuff = dealloc.alloc();
00198     if (rw == ReadWrite) {
00199 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00200       std::cerr << "viewBufferNonConst(ReadWrite) -> ";
00201 #endif
00202       this->template copyFromBuffer<T>(size, buff, hostBuff());
00203     }  
00204     else {
00205 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_TRACE
00206       std::cerr << "viewBufferNonConst(WriteOnly)" << std::endl;
00207 #endif
00208     }
00209     // else rw == WriteOnly, and we need no copy
00210     return hostBuff;
00211   }
00212 
00213   inline void CUDANodeMemoryModel::readyBuffers(ArrayView<ArrayRCP<const char> > buffers, ArrayView<ArrayRCP<char> > ncBuffers) {
00214 #ifdef HAVE_KOKKOS_DEBUG
00215     for (size_t i=0; i < (size_t)buffers.size(); ++i) {
00216       CHECK_COMPUTE_BUFFER(buffers[i]);
00217     }
00218     for (size_t i=0; i < (size_t)ncBuffers.size(); ++i) {
00219       CHECK_COMPUTE_BUFFER(ncBuffers[i]);
00220     }
00221 #endif
00222     (void)buffers;
00223     (void)ncBuffers;
00224   }
00225 
00226 } // end of namespace Kokkos
00227 
00228 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends