Kokkos Node API and Local Linear Algebra Kernels Version of the Day
CrsMatrix_DefaultMultiply.cpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #include <Teuchos_UnitTestHarness.hpp>
00043 #include <Teuchos_TimeMonitor.hpp>
00044 #include <Teuchos_Time.hpp>
00045 #include <Teuchos_TypeNameTraits.hpp>
00046 #include <Teuchos_ScalarTraits.hpp>
00047 
00048 #include "Kokkos_ConfigDefs.hpp"
00049 #include "Kokkos_DefaultArithmetic.hpp"
00050 #include "Kokkos_DefaultKernels.hpp"
00051 #include "Kokkos_Version.hpp"
00052 
00053 #include "Kokkos_SerialNode.hpp"
00054 #ifdef HAVE_KOKKOS_TBB
00055 #include "Kokkos_TBBNode.hpp"
00056 #endif
00057 #ifdef HAVE_KOKKOS_THREADPOOL
00058 #include "Kokkos_TPINode.hpp"
00059 #endif
00060 #ifdef HAVE_KOKKOS_OPENMP
00061 #include "Kokkos_OpenMPNode.hpp"
00062 #endif
00063 
00064 #if (defined(HAVE_KOKKOS_CUSPARSE) || defined(HAVE_KOKKOS_CUSP)) && defined(HAVE_KOKKOS_THRUST)
00065   #include "Kokkos_ThrustGPUNode.hpp"
00066   #ifdef HAVE_KOKKOS_CUSPARSE
00067     #include "Kokkos_CUSPARSEOps.hpp"
00068   #endif
00069   #ifdef HAVE_KOKKOS_CUSP
00070     #include "Kokkos_CuspOps.hpp"
00071   #endif
00072   #define TEST_CUDA
00073   #ifdef HAVE_KOKKOS_CUDA_FLOAT
00074     #define TEST_CUDA_FLOAT
00075   #endif
00076   #ifdef HAVE_KOKKOS_CUDA_DOUBLE
00077     #define TEST_CUDA_DOUBLE
00078   #endif
00079   #ifdef HAVE_KOKKOS_CUDA_COMPLEX_FLOAT
00080     #define TEST_CUDA_COMPLEX_FLOAT
00081   #endif
00082   #ifdef HAVE_KOKKOS_CUDA_COMPLEX_DOUBLE
00083     #define TEST_CUDA_COMPLEX_DOUBLE
00084   #endif
00085 #endif
00086 
00087 namespace {
00088 
00089   using Teuchos::ParameterList;
00090   using Teuchos::parameterList;
00091   using Kokkos::MultiVector;
00092   using Kokkos::DefaultArithmetic;
00093   using Kokkos::DefaultKernels;
00094   using Teuchos::ArrayRCP;
00095   using Teuchos::RCP;
00096   using Teuchos::rcp;
00097   using Teuchos::null;
00098   using Teuchos::tuple;
00099   using std::endl;
00100 
00101   using Kokkos::SerialNode;
00102   RCP<SerialNode> snode;
00103 #ifdef HAVE_KOKKOS_TBB
00104   using Kokkos::TBBNode;
00105   RCP<TBBNode> tbbnode;
00106 #endif
00107 #ifdef HAVE_KOKKOS_THREADPOOL
00108   using Kokkos::TPINode;
00109   RCP<TPINode> tpinode;
00110 #endif
00111 #ifdef HAVE_KOKKOS_OPENMP
00112   using Kokkos::OpenMPNode;
00113   RCP<OpenMPNode> ompnode;
00114 #endif
00115 #ifdef TEST_CUDA
00116   using Kokkos::ThrustGPUNode;
00117   RCP<ThrustGPUNode> gpunode;
00118 #endif
00119 
00120   int N = 1000;
00121 
00122   TEUCHOS_STATIC_SETUP()
00123   {
00124     Teuchos::CommandLineProcessor &clp = Teuchos::UnitTestRepository::getCLP();
00125     clp.addOutputSetupOptions(true);
00126     clp.setOption("test-size",&N,"Vector length for tests.");
00127   }
00128 
00129   template <class Node>
00130   RCP<Node> getNode() {
00131     using Teuchos::TypeNameTraits;
00132     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "getNode() has not been "
00133       "specialized for the Kokkos Node type \"" << TypeNameTraits<Node>::name ()
00134       << "\".  Please report this bug to the Kokkos developers.");
00135   }
00136 
00137   template <>
00138   RCP<SerialNode> getNode<SerialNode>() {
00139     if (snode == null) {
00140       Teuchos::ParameterList pl;
00141       snode = rcp(new SerialNode(pl));
00142     }
00143     return snode;
00144   }
00145 
00146 #ifdef HAVE_KOKKOS_TBB
00147   template <>
00148   RCP<TBBNode> getNode<TBBNode>() {
00149     if (tbbnode == null) {
00150       Teuchos::ParameterList pl;
00151       tbbnode = rcp (new TBBNode (pl));
00152     }
00153     return tbbnode;
00154   }
00155 #endif
00156 
00157 #ifdef HAVE_KOKKOS_OPENMP
00158   template <>
00159   RCP<OpenMPNode> getNode<OpenMPNode>() {
00160     if (ompnode == null) {
00161       Teuchos::ParameterList pl;
00162       pl.set<int>("Num Threads",0);
00163       ompnode = rcp (new OpenMPNode (pl));
00164     }
00165     return ompnode;
00166   }
00167 #endif
00168 
00169 #ifdef HAVE_KOKKOS_THREADPOOL
00170   template <>
00171   RCP<TPINode> getNode<TPINode>() {
00172     if (tpinode == null) {
00173       Teuchos::ParameterList pl;
00174       pl.set<int>("Num Threads",0);
00175       tpinode = rcp(new TPINode(pl));
00176     }
00177     return tpinode;
00178   }
00179 #endif
00180 
00181 #ifdef TEST_CUDA
00182   template <>
00183   RCP<ThrustGPUNode> getNode<ThrustGPUNode>() {
00184     if (gpunode == null) {
00185       Teuchos::ParameterList pl;
00186       pl.set<int>("Num Threads",0);
00187       gpunode = rcp(new ThrustGPUNode(pl));
00188     }
00189     return gpunode;
00190   }
00191 #endif
00192 
00193   //
00194   // UNIT TESTS
00195   //
00196 
00197   TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsMatrix, TransposeMultiply, Ordinal, Scalar, Node )
00198   {
00199     RCP<Node> node = getNode<Node>();
00200     typedef typename DefaultKernels<Scalar,Ordinal,Node>::SparseOps          DSM;
00201     typedef typename DSM::template bind_scalar<Scalar>::other_type           OPS;
00202     typedef typename OPS::template matrix<Scalar,Ordinal,Node>::matrix_type  MAT;
00203     typedef typename OPS::template graph<Ordinal,Node>::graph_type          GRPH;
00204     typedef MultiVector<Scalar,Node>                                          MV;
00205     typedef Teuchos::ScalarTraits<Scalar>                                     ST;
00206     const Scalar ONE = ST::one(),
00207                 ZERO = ST::zero();
00208     // generate rectangular matrix:
00209     // [-1 1]
00210     // [-1 1]
00211     // [-1 1]
00212     if (N<2) return;
00213     RCP<GRPH> G = rcp(new GRPH (N,2,node,null) );
00214     RCP<MAT>  A = rcp(new MAT  (G,null) );
00215     // allocate buffers for ptrs, indices and values
00216     const Ordinal totalNNZ = 2*N;
00217     ArrayRCP<size_t>  ptrs(N+1);
00218     ArrayRCP<Ordinal> inds(totalNNZ);
00219     ArrayRCP<Scalar>  vals(totalNNZ);
00220     // fill the buffers on the host
00221     {
00222       ptrs[0] = 0;
00223       for (int i=0; i != N; ++i) {
00224         ptrs[i] = 2*i;
00225         inds[2*i  ] = 0;
00226         inds[2*i+1] = 1;
00227         vals[2*i  ] = -1;
00228         vals[2*i+1] =  1;
00229       }
00230       ptrs[N] = totalNNZ;
00231     }
00232     G->setStructure(ptrs, inds);
00233     ptrs = Teuchos::null;
00234     inds = Teuchos::null;
00235     A->setValues(vals);
00236     vals = Teuchos::null;
00237     RCP<ParameterList> plist = parameterList();
00238     plist->set("Prepare Transpose Multiply",true);
00239     OPS::finalizeGraphAndMatrix(Teuchos::UNDEF_TRI,Teuchos::NON_UNIT_DIAG,*G,*A,plist);
00240     OPS dsm(node);
00241     out << "Testing with sparse ops: " << Teuchos::typeName(dsm) << std::endl;
00242     dsm.setGraphAndMatrix(G,A);
00243 
00244     ArrayRCP<Scalar> xdat, axdat, atxdat;
00245     xdat  = node->template allocBuffer<Scalar>(2);
00246     node->template copyToBuffer<Scalar>(2, tuple<Scalar>(1.0,2.0), xdat);
00247     axdat = node->template allocBuffer<Scalar>(N);
00248     atxdat = node->template allocBuffer<Scalar>(2);
00249     MV X(node), AX(node), ATX(node);
00250     X.initializeValues(  2,1,  xdat,2);
00251     AX.initializeValues( N,1, axdat,N);
00252     ATX.initializeValues(2,1,atxdat,2);
00253 #ifdef HAVE_KOKKOS_DEBUG
00254     node->sync();
00255 #endif
00256     // AX = A*X
00257     dsm.multiply(Teuchos::NO_TRANS,ONE,X,AX);     //  AX = ones()
00258     dsm.multiply(Teuchos::TRANS,   ONE,AX,ATX);   // ATX = [-N;N]
00259     dsm.multiply(Teuchos::TRANS,  -ONE,AX,ONE,X); // X = [1;2] - [-N;N] = [1+N;2-N]
00260 #ifdef HAVE_KOKKOS_DEBUG
00261     node->sync();
00262 #endif
00263     // AX should be all ones
00264     {
00265       ArrayRCP<const Scalar> axview = node->template viewBuffer<Scalar>(N,axdat);
00266       Scalar err = ZERO;
00267       for (int i=0; i<N; ++i) {
00268         err += ST::magnitude(ONE - axview[i]);
00269       }
00270       TEST_EQUALITY_CONST(err, ZERO);
00271     }
00272     // ATX should be [-N;N]
00273     {
00274       ArrayRCP<const Scalar> atxview = node->template viewBuffer<Scalar>(2,atxdat);
00275       TEST_COMPARE_FLOATING_ARRAYS( atxview, tuple<Scalar>(-N,N), ZERO );
00276     }
00277     // X should be [1+N;2-N]
00278     {
00279       ArrayRCP<const Scalar> xview = node->template viewBuffer<Scalar>(2,xdat);
00280       TEST_COMPARE_FLOATING_ARRAYS( xview, tuple<Scalar>(1+N,2-N), ZERO );
00281     }
00282   }
00283 
00284 
00285   TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsMatrix, SparseMultiply, Ordinal, Scalar, Node )
00286   {
00287     RCP<Node> node = getNode<Node>();
00288     typedef typename DefaultKernels<Scalar,Ordinal,Node>::SparseOps          DSM;
00289     typedef typename DSM::template bind_scalar<Scalar>::other_type           OPS;
00290     typedef typename OPS::template matrix<Scalar,Ordinal,Node>::matrix_type  MAT;
00291     typedef typename OPS::template graph<Ordinal,Node>::graph_type          GRPH;
00292     typedef MultiVector<Scalar,Node>                                          MV;
00293     typedef Teuchos::ScalarTraits<Scalar>                                     ST;
00294     const Scalar ONE = ST::one(),
00295                 ZERO = ST::zero();
00296     // generate tridiagonal matrix:
00297     // [ 1 -1                   ]
00298     // [-1  3  -1               ]
00299     // [   -1   3  -1           ]
00300     // [                        ]
00301     // [                -1  3 -1]
00302     // [                   -1  2]
00303     if (N<2) return;
00304     RCP<GRPH> G = rcp(new GRPH (N,N,node,null) );
00305     RCP<MAT>  A = rcp(new MAT  (G,null) );
00306     // allocate buffers for ptrs, indices and values
00307     const Ordinal totalNNZ = 3*N - 2;
00308     ArrayRCP<size_t>  ptrs(N+1);
00309     ArrayRCP<Ordinal> inds(totalNNZ);
00310     ArrayRCP<Scalar>  vals(totalNNZ);
00311     // fill the buffers on the host
00312     {
00313       Ordinal NNZsofar = 0;
00314       ptrs[0] = NNZsofar;
00315       inds[NNZsofar] = 0; inds[NNZsofar+1] =  1;
00316       vals[NNZsofar] = 2; vals[NNZsofar+1] = -1;
00317       NNZsofar += 2;
00318       for (int i=1; i != N-1; ++i) {
00319         ptrs[i] = NNZsofar;
00320         inds[NNZsofar] = i-1; inds[NNZsofar+1] = i; inds[NNZsofar+2] = i+1;
00321         vals[NNZsofar] =  -1; vals[NNZsofar+1] = 3; vals[NNZsofar+2] =  -1;
00322         NNZsofar += 3;
00323       }
00324       ptrs[N-1] = NNZsofar;
00325       inds[NNZsofar] = N-2; inds[NNZsofar+1] = N-1;
00326       vals[NNZsofar] =  -1; vals[NNZsofar+1] = 2;
00327       NNZsofar += 2;
00328       ptrs[N]   = NNZsofar;
00329       TEUCHOS_TEST_FOR_EXCEPT(NNZsofar != totalNNZ);
00330     }
00331     G->setStructure(ptrs, inds);
00332     ptrs = Teuchos::null;
00333     inds = Teuchos::null;
00334     A->setValues(vals);
00335     vals = Teuchos::null;
00336     OPS::finalizeGraphAndMatrix(Teuchos::UNDEF_TRI,Teuchos::NON_UNIT_DIAG,*G,*A,null);
00337     Teuchos::EDiag diag;
00338     Teuchos::EUplo uplo;
00339     G->getMatDesc(uplo,diag);
00340     TEST_EQUALITY_CONST( uplo, Teuchos::UNDEF_TRI );
00341     TEST_EQUALITY_CONST( diag, Teuchos::NON_UNIT_DIAG );
00342     OPS dsm(node);
00343     out << "Testing with sparse ops: " << Teuchos::typeName(dsm) << std::endl;
00344     dsm.setGraphAndMatrix(G,A);
00345 
00346     ArrayRCP<Scalar> xdat, axdat;
00347     xdat  = node->template allocBuffer<Scalar>(N);
00348     axdat = node->template allocBuffer<Scalar>(N);
00349     MV X(node), AX(node);
00350     X.initializeValues( N,1, xdat,N);
00351     AX.initializeValues(N,1,axdat,N);
00352     DefaultArithmetic<MV>::Init( X,1);
00353 #ifdef HAVE_KOKKOS_DEBUG
00354     node->sync();
00355 #endif
00356     // AX = A*X
00357     dsm.multiply(Teuchos::NO_TRANS,ONE,X,AX);
00358 #ifdef HAVE_KOKKOS_DEBUG
00359     node->sync();
00360 #endif
00361     // AX should be all ones
00362     {
00363       ArrayRCP<const Scalar> axview = node->template viewBuffer<Scalar>(N,axdat);
00364       Scalar err = ZERO;
00365       for (int i=0; i<N; ++i) {
00366         err += ST::magnitude(ONE - axview[i]);
00367       }
00368       TEST_EQUALITY_CONST(err, ZERO);
00369     }
00370     // do that same multiplication, testing alpha=-1 and beta=1 accumulation
00371     // AX = AX - A*X = A*X - A*X = 0
00372     dsm.multiply(Teuchos::NO_TRANS,-ONE,X,ONE,AX);
00373     // AX should be zero
00374     {
00375       ArrayRCP<const Scalar> axview = node->template viewBuffer<Scalar>(N,axdat);
00376       ArrayRCP<const Scalar> xview = node->template viewBuffer<Scalar>(N,xdat);
00377       Scalar err = ZERO, nrm = ZERO;
00378       for (int i=0; i<N; ++i) {
00379         err += ST::magnitude(axview[i]);
00380         nrm += ST::magnitude(xview[i]);
00381       }
00382       TEST_INEQUALITY_CONST(nrm, ZERO);
00383       TEST_EQUALITY_CONST(err, ZERO);
00384     }
00385     xdat = null;
00386     axdat = null;
00387   }
00388 
00389 #include "CrsMatrix_DefaultMultiplyTests.hpp"
00390 
00391 #define ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, NODE ) \
00392       TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsMatrix,        SparseMultiply, ORDINAL, SCALAR, NODE ) \
00393       TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsMatrix,     TransposeMultiply, ORDINAL, SCALAR, NODE ) \
00394       TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( DefaultSparseOps, NodeTest,       ORDINAL, SCALAR, NODE ) \
00395       TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( DefaultSparseOps, ResubmitMatrix, ORDINAL, SCALAR, NODE )
00396 
00397 #define UNIT_TEST_SERIALNODE(ORDINAL, SCALAR) \
00398       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, SerialNode )
00399 
00400 #ifdef HAVE_KOKKOS_TBB
00401 #define UNIT_TEST_TBBNODE(ORDINAL, SCALAR) \
00402       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, TBBNode )
00403 #else
00404 #define UNIT_TEST_TBBNODE(ORDINAL, SCALAR)
00405 #endif
00406 
00407 #ifdef HAVE_KOKKOS_OPENMP
00408 #define UNIT_TEST_OPENMPNODE(ORDINAL, SCALAR) \
00409       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, OpenMPNode )
00410 #else
00411 #define UNIT_TEST_OPENMPNODE(ORDINAL, SCALAR)
00412 #endif
00413 
00414 #ifdef HAVE_KOKKOS_THREADPOOL
00415 #define UNIT_TEST_TPINODE(ORDINAL, SCALAR) \
00416       ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( ORDINAL, SCALAR, TPINode )
00417 #else
00418 #define UNIT_TEST_TPINODE(ORDINAL, SCALAR)
00419 #endif
00420 
00421 #define UNIT_TEST_GROUP_ORDINAL_SCALAR( ORDINAL, SCALAR ) \
00422         UNIT_TEST_SERIALNODE( ORDINAL, SCALAR ) \
00423         UNIT_TEST_TBBNODE( ORDINAL, SCALAR ) \
00424         UNIT_TEST_OPENMPNODE( ORDINAL, SCALAR ) \
00425         UNIT_TEST_TPINODE( ORDINAL, SCALAR )
00426 
00427 #define UNIT_TEST_GROUP_ORDINAL( ORDINAL ) \
00428         UNIT_TEST_GROUP_ORDINAL_SCALAR(ORDINAL, int) \
00429         UNIT_TEST_GROUP_ORDINAL_SCALAR(ORDINAL, float)
00430 
00431 typedef std::complex<float>  ComplexFloat;
00432 typedef std::complex<double> ComplexDouble;
00433 #ifdef TEST_CUDA_FLOAT
00434 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, float, ThrustGPUNode )
00435 #endif
00436 #ifdef TEST_CUDA_DOUBLE
00437 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, double, ThrustGPUNode )
00438 #endif
00439 #ifdef TEST_CUDA_COMPLEX_FLOAT
00440 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, ComplexFloat, ThrustGPUNode )
00441 #endif
00442 #ifdef TEST_CUDA_COMPLEX_DOUBLE
00443 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( int, ComplexDouble, ThrustGPUNode )
00444 #endif
00445 
00446 #ifdef HAVE_KOKKOS_CUSP 
00447 ALL_UNIT_TESTS_ORDINAL_SCALAR_NODE( short, float, ThrustGPUNode )
00448 #endif
00449 
00450 UNIT_TEST_GROUP_ORDINAL(int)
00451 typedef short int ShortInt; UNIT_TEST_GROUP_ORDINAL(ShortInt)
00452 
00453 }
00454 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends