Kokkos Node API and Local Linear Algebra Kernels Version of the Day
CrsMatrix_DefaultMultiply.cpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #include <Teuchos_UnitTestHarness.hpp>
00043 #include <Teuchos_TimeMonitor.hpp>
00044 #include <Teuchos_Time.hpp>
00045 #include <Teuchos_TypeNameTraits.hpp>
00046 #include <Teuchos_ScalarTraits.hpp>
00047 
00048 #include "Kokkos_ConfigDefs.hpp"
00049 #include "Kokkos_DefaultArithmetic.hpp"
00050 #include "Kokkos_DefaultKernels.hpp"
00051 #include "Kokkos_Version.hpp"
00052 
00053 #include "Kokkos_SerialNode.hpp"
00054 #ifdef HAVE_KOKKOS_TBB
00055 #include "Kokkos_TBBNode.hpp"
00056 #endif
00057 #ifdef HAVE_KOKKOS_THREADPOOL
00058 #include "Kokkos_TPINode.hpp"
00059 #endif
00060 #ifdef HAVE_KOKKOS_OPENMP
00061 #include "Kokkos_OpenMPNode.hpp"
00062 #endif
00063 
00064 #if defined(HAVE_KOKKOS_CUSPARSE) && defined(HAVE_KOKKOS_THRUST)
00065 #define TEST_CUSPARSE
00066 #ifdef HAVE_KOKKOS_CUDA_FLOAT
00067 #define TEST_CUSPARSE_FLOAT
00068 #endif
00069 #ifdef HAVE_KOKKOS_CUDA_DOUBLE
00070 #define TEST_CUSPARSE_DOUBLE
00071 #endif
00072 #ifdef HAVE_KOKKOS_CUDA_COMPLEX_FLOAT
00073 #define TEST_CUSPARSE_COMPLEX_FLOAT
00074 #endif
00075 #ifdef HAVE_KOKKOS_CUDA_COMPLEX_DOUBLE
00076 #define TEST_CUSPARSE_COMPLEX_DOUBLE
00077 #endif
00078 #endif
00079 
00080 #ifdef TEST_CUSPARSE
00081 #include "Kokkos_ThrustGPUNode.hpp"
00082 #include "Kokkos_CUSPARSEOps.hpp"
00083 #endif
00084 
00085 namespace {
00086 
00087   using Kokkos::MultiVector;
00088   using Kokkos::DefaultArithmetic;
00089   using Kokkos::DefaultKernels;
00090   using Teuchos::ArrayRCP;
00091   using Teuchos::RCP;
00092   using Teuchos::rcp;
00093   using Teuchos::null;
00094   using std::endl;
00095 
00096   using Kokkos::SerialNode;
00097   RCP<SerialNode> snode;
00098 #ifdef HAVE_KOKKOS_TBB
00099   using Kokkos::TBBNode;
00100   RCP<TBBNode> tbbnode;
00101 #endif
00102 #ifdef HAVE_KOKKOS_THREADPOOL
00103   using Kokkos::TPINode;
00104   RCP<TPINode> tpinode;
00105 #endif
00106 #ifdef HAVE_KOKKOS_OPENMP
00107   using Kokkos::OpenMPNode;
00108   RCP<OpenMPNode> ompnode;
00109 #endif
00110 #ifdef TEST_CUSPARSE
00111   using Kokkos::ThrustGPUNode;
00112   RCP<ThrustGPUNode> gpunode;
00113 #endif
00114 
00115   int N = 1000;
00116 
00117   TEUCHOS_STATIC_SETUP()
00118   {
00119     Teuchos::CommandLineProcessor &clp = Teuchos::UnitTestRepository::getCLP();
00120     clp.addOutputSetupOptions(true);
00121     clp.setOption("test-size",&N,"Vector length for tests.");
00122   }
00123 
00124   template <class Node>
00125   RCP<Node> getNode() {
00126     using Teuchos::TypeNameTraits;
00127     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "getNode() has not been "
00128       "specialized for the Kokkos Node type \"" << TypeNameTraits<Node>::name ()
00129       << "\".  Please report this bug to the Kokkos developers.");
00130   }
00131 
00132   template <>
00133   RCP<SerialNode> getNode<SerialNode>() {
00134     if (snode == null) {
00135       Teuchos::ParameterList pl;
00136       snode = rcp(new SerialNode(pl));
00137     }
00138     return snode;
00139   }
00140 
00141 #ifdef HAVE_KOKKOS_TBB
00142   template <>
00143   RCP<TBBNode> getNode<TBBNode>() {
00144     if (tbbnode == null) {
00145       Teuchos::ParameterList pl;
00146       tbbnode = rcp (new TBBNode (pl));
00147     }
00148     return tbbnode;
00149   }
00150 #endif
00151 
00152 #ifdef HAVE_KOKKOS_OPENMP
00153   template <>
00154   RCP<OpenMPNode> getNode<OpenMPNode>() {
00155     if (ompnode == null) {
00156       Teuchos::ParameterList pl;
00157       pl.set<int>("Num Threads",0);
00158       ompnode = rcp (new OpenMPNode (pl));
00159     }
00160     return ompnode;
00161   }
00162 #endif
00163 
00164 #ifdef HAVE_KOKKOS_THREADPOOL
00165   template <>
00166   RCP<TPINode> getNode<TPINode>() {
00167     if (tpinode == null) {
00168       Teuchos::ParameterList pl;
00169       pl.set<int>("Num Threads",0);
00170       tpinode = rcp(new TPINode(pl));
00171     }
00172     return tpinode;
00173   }
00174 #endif
00175 
00176 #ifdef TEST_CUSPARSE
00177   template <>
00178   RCP<ThrustGPUNode> getNode<ThrustGPUNode>() {
00179     if (gpunode == null) {
00180       Teuchos::ParameterList pl;
00181       pl.set<int>("Num Threads",0);
00182       gpunode = rcp(new ThrustGPUNode(pl));
00183     }
00184     return gpunode;
00185   }
00186 #endif
00187 
00188   //
00189   // UNIT TESTS
00190   //
00191 
00192   TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsMatrix, SparseMultiply, Ordinal, Scalar, Node )
00193   {
00194     RCP<Node> node = getNode<Node>();
00195     typedef typename DefaultKernels<Scalar,Ordinal,Node>::SparseOps          DSM;
00196     typedef typename DSM::template bind_scalar<Scalar>::other_type           OPS;
00197     typedef typename OPS::template matrix<Scalar,Ordinal,Node>::matrix_type  MAT;
00198     typedef typename OPS::template graph<Ordinal,Node>::graph_type          GRPH;
00199     typedef MultiVector<Scalar,Node>                                          MV;
00200     typedef Teuchos::ScalarTraits<Scalar>                                     ST;
00201     const Scalar ONE = ST::one(),
00202                 ZERO = ST::zero();
00203     // generate tridiagonal matrix:
00204     // [ 2 -1                   ]
00205     // [-1  3  -1               ]
00206     // [   -1   3  -1           ]
00207     // [                        ]
00208     // [                -1  3 -1]
00209     // [                   -1  2]
00210     if (N<2) return;
00211     RCP<GRPH> G = rcp(new GRPH (N,node,null) );
00212     RCP<MAT>  A = rcp(new MAT  (G,null) );
00213     // allocate buffers for ptrs, indices and values
00214     const size_t totalNNZ = 3*N - 2;
00215     ArrayRCP<size_t> ptrs(N+1);
00216     ArrayRCP<Ordinal>   inds(totalNNZ);
00217     ArrayRCP<Scalar>    vals(totalNNZ);
00218     // fill the buffers on the host
00219     {
00220       size_t NNZsofar = 0;
00221       ptrs[0] = NNZsofar;
00222       inds[NNZsofar] = 0; inds[NNZsofar+1] =  1;
00223       vals[NNZsofar] = 2; vals[NNZsofar+1] = -1;
00224       NNZsofar += 2;
00225       for (int i=1; i != N-1; ++i) {
00226         ptrs[i] = NNZsofar;
00227         inds[NNZsofar] = i-1; inds[NNZsofar+1] = i; inds[NNZsofar+2] = i+1;
00228         vals[NNZsofar] =  -1; vals[NNZsofar+1] = 3; vals[NNZsofar+2] =  -1;
00229         NNZsofar += 3;
00230       }
00231       ptrs[N-1] = NNZsofar;
00232       inds[NNZsofar] = N-2; inds[NNZsofar+1] = N-1;
00233       vals[NNZsofar] =  -1; vals[NNZsofar+1] = 2;
00234       NNZsofar += 2;
00235       ptrs[N]   = NNZsofar;
00236       TEUCHOS_TEST_FOR_EXCEPT(NNZsofar != totalNNZ);
00237     }
00238     G->setStructure(ptrs, inds);
00239     ptrs = Teuchos::null;
00240     inds = Teuchos::null;
00241     A->setValues(vals);
00242     vals = Teuchos::null;
00243     OPS::finalizeGraphAndMatrix(Teuchos::UNDEF_TRI,Teuchos::NON_UNIT_DIAG,*G,*A,null);
00244     Teuchos::EDiag diag;
00245     Teuchos::EUplo uplo;
00246     G->getMatDesc(uplo,diag);
00247     TEST_EQUALITY_CONST( uplo, Teuchos::UNDEF_TRI );
00248     TEST_EQUALITY_CONST( diag, Teuchos::NON_UNIT_DIAG );
00249     OPS dsm(node);
00250     out << "Testing with sparse ops: " << Teuchos::typeName(dsm) << std::endl;
00251     dsm.setGraphAndMatrix(G,A);
00252 
00253     ArrayRCP<Scalar> xdat, axdat;
00254     xdat  = node->template allocBuffer<Scalar>(N);
00255     axdat = node->template allocBuffer<Scalar>(N);
00256     MV X(node), AX(node);
00257     X.initializeValues( N,1, xdat,N);
00258     AX.initializeValues(N,1,axdat,N);
00259     DefaultArithmetic<MV>::Init( X,1);
00260 #ifdef HAVE_KOKKOS_DEBUG
00261     node->sync();
00262 #endif
00263     // AX = A*X
00264     dsm.multiply(Teuchos::NO_TRANS,ONE,X,AX);
00265 #ifdef HAVE_KOKKOS_DEBUG
00266     node->sync();
00267 #endif
00268     // AX should be all ones
00269     {
00270       ArrayRCP<const Scalar> axview = node->template viewBuffer<Scalar>(N,axdat);
00271       Scalar err = ZERO;
00272       for (int i=0; i<N; ++i) {
00273         err += ST::magnitude(ONE - axview[i]);
00274       }
00275       TEST_EQUALITY_CONST(err, ZERO);
00276     }
00277     // do that same multiplication, testing alpha=-1 and beta=1 accumulation
00278     // AX = AX - A*X = A*X - A*X = 0
00279     dsm.multiply(Teuchos::NO_TRANS,-ONE,X,ONE,AX);
00280     // AX should be zero
00281     {
00282       ArrayRCP<const Scalar> axview = node->template viewBuffer<Scalar>(N,axdat);
00283       ArrayRCP<const Scalar> xview = node->template viewBuffer<Scalar>(N,xdat);
00284       Scalar err = ZERO, nrm = ZERO;
00285       for (int i=0; i<N; ++i) {
00286         err += ST::magnitude(axview[i]);
00287         nrm += ST::magnitude(xview[i]);
00288       }
00289       TEST_INEQUALITY_CONST(nrm, ZERO);
00290       TEST_EQUALITY_CONST(err, ZERO);
00291     }
00292     xdat = null;
00293     axdat = null;
00294   }
00295 
00296 #include "CrsMatrix_DefaultMultiplyTests.hpp"
00297 
00298 #define ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, NODE ) \
00299       TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsMatrix,        SparseMultiply, ORDINAL, SCALAR, NODE ) \
00300       TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( DefaultSparseOps, NodeTest,       ORDINAL, SCALAR, NODE ) \
00301       TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( DefaultSparseOps, ResubmitMatrix, ORDINAL, SCALAR, NODE )
00302 
00303 #define UNIT_TEST_SERIALNODE(ORDINAL, SCALAR) \
00304       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, SerialNode )
00305 
00306 #ifdef HAVE_KOKKOS_TBB
00307 #define UNIT_TEST_TBBNODE(ORDINAL, SCALAR) \
00308       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, TBBNode )
00309 #else
00310 #define UNIT_TEST_TBBNODE(ORDINAL, SCALAR)
00311 #endif
00312 
00313 #ifdef HAVE_KOKKOS_OPENMP
00314 #define UNIT_TEST_OPENMPNODE(ORDINAL, SCALAR) \
00315       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, OpenMPNode )
00316 #else
00317 #define UNIT_TEST_OPENMPNODE(ORDINAL, SCALAR)
00318 #endif
00319 
00320 #ifdef HAVE_KOKKOS_THREADPOOL
00321 #define UNIT_TEST_TPINODE(ORDINAL, SCALAR) \
00322       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, TPINode )
00323 #else
00324 #define UNIT_TEST_TPINODE(ORDINAL, SCALAR)
00325 #endif
00326 
00327 #define UNIT_TEST_GROUP_ORDINAL_SCALAR( ORDINAL, SCALAR ) \
00328         UNIT_TEST_SERIALNODE( ORDINAL, SCALAR ) \
00329         UNIT_TEST_TBBNODE( ORDINAL, SCALAR ) \
00330         UNIT_TEST_OPENMPNODE( ORDINAL, SCALAR ) \
00331         UNIT_TEST_TPINODE( ORDINAL, SCALAR )
00332 
00333 #define UNIT_TEST_GROUP_ORDINAL( ORDINAL ) \
00334         UNIT_TEST_GROUP_ORDINAL_SCALAR(ORDINAL, int) \
00335         UNIT_TEST_GROUP_ORDINAL_SCALAR(ORDINAL, float)
00336 
00337 typedef std::complex<float>  ComplexFloat;
00338 typedef std::complex<double> ComplexDouble;
00339 #ifdef TEST_CUSPARSE_FLOAT
00340 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, float, ThrustGPUNode )
00341 #endif
00342 #ifdef TEST_CUSPARSE_DOUBLE
00343 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, double, ThrustGPUNode )
00344 #endif
00345 #ifdef TEST_CUSPARSE_COMPLEX_FLOAT
00346 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, ComplexFloat, ThrustGPUNode )
00347 #endif
00348 #ifdef TEST_CUSPARSE_COMPLEX_DOUBLE
00349 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, ComplexDouble, ThrustGPUNode )
00350 #endif
00351 
00352      UNIT_TEST_GROUP_ORDINAL(int)
00353      typedef short int ShortInt; UNIT_TEST_GROUP_ORDINAL(ShortInt)
00354 
00355 }
00356 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends