Kokkos Node API and Local Linear Algebra Kernels Version of the Day
CrsMatrix_CUSP.cpp
00001 // ************************************************************************
00002 // 
00003 //          Kokkos: A Fast Kernel Package
00004 //              Copyright (2004) Sandia Corporation
00005 // 
00006 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00007 // license for use of this work by or on behalf of the U.S. Government.
00008 // 
00009 // This library is free software; you can redistribute it and/or modify
00010 // it under the terms of the GNU Lesser General Public License as
00011 // published by the Free Software Foundation; either version 2.1 of the
00012 // License, or (at your option) any later version.
00013 //  
00014 // This library is distributed in the hope that it will be useful, but
00015 // WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 //  
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // USA
00022 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00023 // 
00024 // ************************************************************************
00025 //@HEADER
00026 
00027 #include <Teuchos_UnitTestHarness.hpp>
00028 #include <Teuchos_TimeMonitor.hpp>
00029 #include <Teuchos_Time.hpp>
00030 #include <Teuchos_TypeNameTraits.hpp>
00031 #include <Teuchos_ScalarTraits.hpp>
00032 
00033 #include "Kokkos_ConfigDefs.hpp"
00034 #include "Kokkos_DefaultArithmetic.hpp"
00035 #include "Kokkos_DefaultSparseOps.hpp"
00036 #include "Kokkos_CUSPSparseOps.hpp"
00037 #include "Kokkos_LinAlgVersion.hpp"
00038 
00039 #include "Kokkos_ThrustGPUNode.hpp"
00040 
00041 #include <cuda.h>
00042 #include <cusp/version.h>
00043 #include <cusp/io/matrix_market.h>
00044 #include <cusp/csr_matrix.h>
00045 
00046 namespace {
00047 
00048   using Kokkos::MultiVector;
00049   using Kokkos::CrsMatrix;
00050   using Kokkos::CrsGraph;
00051   using Kokkos::DefaultArithmetic;
00052   using Kokkos::DefaultKernels;
00053   using Kokkos::CUSPSparseOps;
00054 
00055   using Kokkos::SerialNode;
00056   using Kokkos::ThrustGPUNode;
00057 
00058   using Teuchos::ArrayRCP;
00059   using Teuchos::RCP;
00060   using Teuchos::rcp;
00061   using Teuchos::null;
00062 
00063   using std::endl;
00064 
00065   RCP<SerialNode   > serialnode;
00066   RCP<ThrustGPUNode> thrustnode;
00067   cusp::crs_matrix<int, double, cusp::host_memory> cuspHostCSR;
00068   int deviceNumber;
00069   int verbose;
00070   std::string matrixFile;
00071   int printMatrix;  
00072   int numIters;
00073 
00074   TEUCHOS_STATIC_SETUP()
00075   {
00076     Teuchos::CommandLineProcessor &clp = Teuchos::UnitTestRepository::getCLP();
00077     verbose = 0;
00078     deviceNumber = 0;
00079     printMatrix = 0;
00080     numIters = 10;
00081     clp.addOutputSetupOptions(true);
00082     clp.setOption("device-num",&deviceNumber,"Device number for CUDA device.");
00083     clp.setOption("matrix-file",&matrixFile,"Filename for test matrix.");
00084     clp.setOption("verbose",&verbose,"Verbosity flag, zero for quiet.");
00085     clp.setOption("print-matrix",&printMatrix,"Print matrix.");
00086     clp.setOption("num-iters",&numIters,"Number of power method iterations.");
00087   }
00088 
00089   template <class Node>
00090   RCP<Node> getNode() {
00091     TEST_FOR_EXCEPT(true);
00092   }
00093 
00094   template <>
00095   RCP<SerialNode> getNode<SerialNode>() {
00096     if (serialnode == null) {
00097       Teuchos::ParameterList pl;
00098       serialnode = rcp(new SerialNode(pl));
00099     }
00100     return serialnode;
00101   }
00102 
00103   template <>
00104   RCP<ThrustGPUNode> getNode<ThrustGPUNode>() {
00105     if (thrustnode == null) {
00106       Teuchos::ParameterList pl;
00107       pl.set<int>("Device Number",deviceNumber);
00108       pl.set<int>("Verbose",verbose);
00109       thrustnode = rcp(new ThrustGPUNode(pl));
00110     }
00111     return thrustnode;
00112   }
00113 
00114   //
00115   // UNIT TESTS
00116   // 
00117 
00118   TEUCHOS_UNIT_TEST( AAAAA_Runs_First, SayHelloAndReadMatrix )
00119   {
00120     out << Kokkos::LinAlgVersion() << std::endl
00121         << "CUDA v"   << (CUDA_VERSION/1000)  << "." << (CUDA_VERSION / 10 % 100) << "." << (CUDA_VERSION % 10)     << std::endl
00122         << "Thrust v" << THRUST_MAJOR_VERSION << "." << THRUST_MINOR_VERSION      << "." << THRUST_SUBMINOR_VERSION << std::endl
00123         << "CUSP v"   << CUSP_MAJOR_VERSION   << "." << CUSP_MINOR_VERSION        << "." << CUSP_SUBMINOR_VERSION   << std::endl;
00124     // 
00125     out << "\nReading file: " << matrixFile << "..." << std::endl; 
00126     cusp::io::read_matrix_market_file(cuspHostCSR,matrixFile);
00127     if (printMatrix) {
00128       cusp::print_matrix(cuspHostCSR);
00129     }
00130   }
00131 
00132   TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( SparseOps, SimplePowerMethod, SparseOps, Node )
00133   {
00134     RCP<Node> node = getNode<Node>();
00135     typedef typename SparseOps<void,int,Node>  OPS;
00136     typedef CrsGraph<int,Node,OPS>            GRPH;
00137     typedef CrsMatrix<double,int,Node,OPS>     MAT;
00138     typedef MultiVector<double,Node>            MV;
00139     typedef Teuchos::ScalarTraits<double>       ST;
00140     typedef DefaultArithmetic<MV>             BLAS;
00141     const double ONE = ST::one(),
00142                 ZERO = ST::zero();
00143     typename OPS::template rebind<double>::other dsm(node);
00144     out << "Testing with sparse ops: " << Teuchos::typeName(dsm) << std::endl;
00145     {
00146       // grab data from CUSP
00147       const int numRows = cuspHostCSR.row_offsets.size()-1;
00148       const int numNZ   = cuspHostCSR.column_indices.size();
00149       ArrayRCP<size_t>    rowOffsets = arcp<size_t>(numRows+1);
00150       // one is int, the other size_t. Therefore, we need to copy.
00151       std::copy( cuspHostCSR.row_offsets.begin(), cuspHostCSR.row_offsets.end(), rowOffsets.begin() );
00152       ArrayRCP<const int>    colIndices = arcp<const int>(cuspHostCSR.column_indices.data() );
00153       ArrayRCP<const double> values     = arcp<const int>(cuspHostCSR.values.data() );
00154       // create Kokkos CrsGraph and CrsMatrix objects, put data there
00155       GRPH G(numRows,node);
00156       MAT  A(G);
00157       G.set1DStructure(colIndices, rowOffsets(0,numRows), rowOffsets(1,numRows));
00158       A.set1DValues(values);
00159       A.finalize(true);
00160       // init sparse ops class
00161       dsm.initializeStructure(G);
00162       dsm.initializeValues(A);
00163     }
00164     MV z(node), q(node);
00165     z.initializeValues( numRows,1,node->template allocBuffer<double>(numRows),numRows);
00166     q.initializeValues(numRows,1,node->template allocBuffer<double>(numRows),numRows);
00167     BLAS::Init( z, ONE);
00168     BLAS::Init( q, ZERO);
00169     // power method, ten iterations
00170     for (int iter = 0; iter < numIters; ++iter) {
00171       double z_z = BLAS::Norm2Squared(z);
00172       dsm.multiply(Teuchos::NO_TRANS, ONE/ST::squareroot(z_z), q, z);
00173     }
00174     double lambda = BLAS::Norm2Squared(z);
00175     out << "After " << numIters << "power iterations, lambda == " << lambda << std::endl;
00176   }
00177 
00178   TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( SparseOps, SimplePowerMethod, DefaultDeviceSparseOps, SerialNode )
00179   TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( SparseOps, SimplePowerMethod, DefaultDeviceSparseOps, ThrustGPUNode )
00180   TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( SparseOps, SimplePowerMethod, CUSPSparseOps         , ThrustGPUNode )
00181 
00182 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends