Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_ThrustGPUNode.cpp
00001 #include "Kokkos_ThrustGPUNode.hpp"
00002 #include <Teuchos_TestForException.hpp>
00003 #include <iostream>
00004 #include <cuda_runtime.h>
00005 
00006 namespace Kokkos {
00007 
00008   ThrustGPUNode::ThrustGPUNode(Teuchos::ParameterList &pl)
00009   {
00010     using std::cout;
00011     using std::cerr;
00012     using std::endl;
00013 
00014     // get node parameters
00015     int device = pl.get<int>("Device Number",0);
00016     int verbose = pl.get<int>("Verbose",0);
00017     // set device
00018     int deviceCount; cudaGetDeviceCount(&deviceCount); 
00019     TEST_FOR_EXCEPTION(deviceCount == 0, std::runtime_error,
00020         "ThrustGPUNode::ThrustGPUNode(): system has no CUDA devices.");
00021     if (device < 0 || device >= deviceCount) {
00022       cerr << "ThrustGPUNode::ThrustGPUNode(): specified device number not valid. Using device 0." << endl;
00023       device = 0;
00024     }
00025     cudaDeviceProp deviceProp; 
00026     cudaSetDevice(device);
00027     cudaGetDeviceProperties(&deviceProp, device); 
00028     // as of CUDA 2.1, device prop contains the following fields
00029     // char name[256]; 
00030     // size_t totalGlobalMem, sharedMemPerBlock; 
00031     // int regsPerBlock, warpSize; 
00032     // size_t memPitch; 
00033     // int maxThreadsPerBlock, maxThreadsDim[3], maxGridSize[3]; 
00034     // size_t totalConstMem; 
00035     // int major, minor;
00036     // int clockRate; 
00037     // size_t textureAlignment; 
00038     // int deviceOverlap; 
00039     // int multiProcessorCount; 
00040     // int kernelExecTimeoutEnabled; 
00041     if (verbose) {
00042       cout << "ThrustGPUNode attached to device #" << device << " \"" << deviceProp.name 
00043         << "\", of compute capability " << deviceProp.major << "." << deviceProp.minor
00044         << endl;
00045     }
00046     totalMem_ = deviceProp.totalGlobalMem;
00047   } 
00048 
00049   ThrustGPUNode::~ThrustGPUNode() {}
00050 
00051   void ThrustGPUNode::sync() const {
00052     cudaError err = cudaThreadSynchronize();
00053     TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
00054         "Kokkos::ThrustGPUNode::sync(): cudaThreadSynchronize() returned error " << err );
00055   }
00056 
00057 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends