Kokkos Node API and Local Linear Algebra Kernels Version of the Day
CrsMatrix_CUSPARSE.cpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #include <Teuchos_UnitTestHarness.hpp>
00043 #include "Kokkos_DefaultArithmetic.hpp"
00044 
00045 #include <Kokkos_ConfigDefs.hpp>
00046 #include <Kokkos_ThrustGPUNode.hpp>
00047 #include <Kokkos_CUSPARSEOps.hpp>
00048 
00049 namespace {
00050 
00051   using std::endl;
00052   using Teuchos::ArrayRCP;
00053   using Teuchos::RCP;
00054   using Teuchos::arcp;
00055   using Teuchos::rcp;
00056   using Kokkos::ThrustGPUNode;
00057   using Teuchos::ParameterList;
00058   using Teuchos::null;
00059 
00060   TEUCHOS_UNIT_TEST( CrsMatrix, CUSPARSENative )
00061   {
00062     Kokkos::CUSPARSEdetails::Session::init();
00063     RCP<const cusparseHandle_t> sess = Kokkos::CUSPARSEdetails::Session::getHandle();
00064     
00065     ParameterList pl;
00066     RCP<ThrustGPUNode> gpunode = rcp(new ThrustGPUNode(pl));
00067 
00068     cusparseStatus_t status ; 
00069 
00070     // problem characteristics
00071     int n , nnz , nnz_vector ; 
00072     float dzero =0.0; 
00073     float dtwo =2.0; 
00074     float dthree =3.0; 
00075     float dfive =5.0;
00076     nnz_vector = 3; 
00077     n=4; nnz=9; 
00078 
00079     out << "Testing example" << endl;
00080 
00081     // device pointers
00082     ArrayRCP<int> cooRowIndex, cooColIndex, csrRowPtr;
00083     ArrayRCP<float> cooVal;
00084     ArrayRCP<int> xInd;
00085     ArrayRCP<float> xVal, y, z;
00086     cooRowIndex = gpunode->allocBuffer<int>(nnz);
00087     cooColIndex = gpunode->allocBuffer<int>(nnz);
00088     cooVal      = gpunode->allocBuffer<float>(nnz);
00089     y           = gpunode->allocBuffer<float>(2*n);
00090     xInd        = gpunode->allocBuffer<int>(nnz_vector);
00091     xVal        = gpunode->allocBuffer<float>(nnz_vector);
00092     csrRowPtr   = gpunode->allocBuffer<int>(n+1);
00093     z           = gpunode->allocBuffer<float>(2*(n+1));
00094 
00095     // init data on host
00096     {
00097       // host pointers
00098       ArrayRCP<int>   cooRowIndexHostPtr, cooColIndexHostPtr;
00099       ArrayRCP<float> cooValHostPtr;
00100       ArrayRCP<int> xIndHostPtr;
00101       ArrayRCP<float> xValHostPtr, yHostPtr;
00102 
00103       cooRowIndexHostPtr = gpunode->viewBufferNonConst(Kokkos::WriteOnly, cooRowIndex.size(), cooRowIndex);
00104       cooColIndexHostPtr = gpunode->viewBufferNonConst(Kokkos::WriteOnly, cooColIndex.size(), cooColIndex);
00105       cooValHostPtr      = gpunode->viewBufferNonConst(Kokkos::WriteOnly, cooVal.size(),      cooVal);
00106       yHostPtr           = gpunode->viewBufferNonConst(Kokkos::WriteOnly, y.size(),           y);
00107       xIndHostPtr        = gpunode->viewBufferNonConst(Kokkos::WriteOnly, xInd.size(),        xInd);
00108       xValHostPtr        = gpunode->viewBufferNonConst(Kokkos::WriteOnly, xVal.size(),        xVal);
00109 
00110       cooRowIndexHostPtr[0] = 0; cooColIndexHostPtr[0] = 0; cooValHostPtr[0] = 1.0; 
00111       cooRowIndexHostPtr[1] = 0; cooColIndexHostPtr[1] = 2; cooValHostPtr[1] = 2.0; 
00112       cooRowIndexHostPtr[2] = 0; cooColIndexHostPtr[2] = 3; cooValHostPtr[2] = 3.0; 
00113       cooRowIndexHostPtr[3] = 1; cooColIndexHostPtr[3] = 1; cooValHostPtr[3] = 4.0; 
00114       cooRowIndexHostPtr[4] = 2; cooColIndexHostPtr[4] = 0; cooValHostPtr[4] = 5.0; 
00115       cooRowIndexHostPtr[5] = 2; cooColIndexHostPtr[5] = 2; cooValHostPtr[5] = 6.0; 
00116       cooRowIndexHostPtr[6] = 2; cooColIndexHostPtr[6] = 3; cooValHostPtr[6] = 7.0; 
00117       cooRowIndexHostPtr[7] = 3; cooColIndexHostPtr[7] = 1; cooValHostPtr[7] = 8.0; 
00118       cooRowIndexHostPtr[8] = 3; cooColIndexHostPtr[8] = 3; cooValHostPtr[8] = 9.0; 
00119       yHostPtr[0] = 10.0; xIndHostPtr[0]=0; xValHostPtr[0]=100.0; 
00120       yHostPtr[1] = 20.0; xIndHostPtr[1]=1; xValHostPtr[1]=200.0; 
00121       yHostPtr[2] = 30.0; 
00122       yHostPtr[3] = 40.0; xIndHostPtr[2]=3; xValHostPtr[2]=400.0; 
00123       yHostPtr[4] = 50.0; 
00124       yHostPtr[5] = 60.0; 
00125       yHostPtr[6] = 70.0; 
00126       yHostPtr[7] = 80.0;
00127 
00128       cooRowIndexHostPtr = null;
00129       cooColIndexHostPtr = null;
00130       cooValHostPtr = null;
00131       yHostPtr = null;
00132       xIndHostPtr = null;
00133       xValHostPtr = null;
00134     }
00135 
00136     /* create and setup matrix descriptor */
00137     Teuchos::RCP<cusparseMatDescr_t> descr = Kokkos::CUSPARSEdetails::createMatDescr();
00138     cusparseSetMatType(      *descr , CUSPARSE_MATRIX_TYPE_GENERAL ) ; 
00139     cusparseSetMatIndexBase( *descr , CUSPARSE_INDEX_BASE_ZERO ) ;
00140     status = cusparseXcoo2csr(*sess,cooRowIndex.getRawPtr(),nnz,n, csrRowPtr.getRawPtr() , CUSPARSE_INDEX_BASE_ZERO ) ; 
00141     if ( status != CUSPARSE_STATUS_SUCCESS ) {
00142        success = false; return;
00143     }
00144     // scatter test
00145     status = cusparseSsctr(*sess , nnz_vector , xVal.getRawPtr() , xInd.getRawPtr() , y.getRawPtr()+n , CUSPARSE_INDEX_BASE_ZERO ) ;
00146     if ( status != CUSPARSE_STATUS_SUCCESS ) { 
00147       success = false; return;
00148     }
00149     // sparse matvec
00150     status = cusparseScsrmv ( *sess , CUSPARSE_OPERATION_NON_TRANSPOSE , n , n , nnz , &dtwo , *descr , cooVal.getRawPtr() , csrRowPtr.getRawPtr() , cooColIndex.getRawPtr() , y.getRawPtr() , &dthree , y.getRawPtr()+n) ; 
00151     if ( status != CUSPARSE_STATUS_SUCCESS ) {
00152        success = false; return;
00153     }
00154     // matvec result
00155     ArrayRCP<const float> yHostPtr, zHostPtr;
00156     yHostPtr = gpunode->viewBuffer<float>(y.size(), y);
00157     // mat-multivec
00158     cudaError_t cudaStat1 = cudaMemset((void *)z.getRawPtr(),0, 2*(n+1)*sizeof(z[0])); 
00159     if ( cudaStat1 != cudaSuccess ) {
00160        success = false; return;
00161     } 
00162     status = cusparseScsrmm( *sess , CUSPARSE_OPERATION_NON_TRANSPOSE , n , 2 , n , nnz , &dfive , *descr , cooVal.getRawPtr() , csrRowPtr.getRawPtr() , cooColIndex.getRawPtr() , y.getRawPtr(), n, &dzero, z.getRawPtr(), n+1); 
00163     if ( status != CUSPARSE_STATUS_SUCCESS ) {
00164        success = false; return;
00165     }
00166     zHostPtr = gpunode->viewBuffer<float>( z.size(), z );
00167     if ( status != CUSPARSE_STATUS_SUCCESS ) {
00168        success = false; return;
00169     }
00170     if ( status != CUSPARSE_STATUS_SUCCESS ) { 
00171        success = false; return;
00172     }
00173     if (    (zHostPtr[0] != 950.0)    || (zHostPtr[1] != 400.0)   || (zHostPtr[2] != 2550.0)  || (zHostPtr[3] != 2600.0) 
00174          || (zHostPtr[4] != 0.0)      || (zHostPtr[5] != 49300.0) || (zHostPtr[6] != 15200.0) || (zHostPtr[7] != 132300.0) 
00175          || (zHostPtr[8] != 131200.0) || (zHostPtr[9] != 0.0)     || (yHostPtr[0] != 10.0)    || (yHostPtr[1] != 20.0) 
00176          || (yHostPtr[2] != 30.0)     || (yHostPtr[3] != 40.0)    || (yHostPtr[4] != 680.0)   || (yHostPtr[5] != 760.0) 
00177          || (yHostPtr[6] != 1230.0)   || (yHostPtr[7] != 2240.0))
00178     {
00179        success = false;
00180     } else {
00181        success = true;
00182     }
00183   }
00184 
00185 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends