Kokkos Node API and Local Linear Algebra Kernels Version of the Day
DefaultSparseOps_TestSparseOps.hpp
00001 /*
00002 //@HEADER
00003 // ************************************************************************
00004 //
00005 //          Kokkos: Node API and Parallel Node Kernels
00006 //              Copyright (2008) Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ************************************************************************
00041 //@HEADER
00042 */
00043 
00044 #ifndef __CrsMatrix_TestSparseOps_hpp
00045 #define __CrsMatrix_TestSparseOps_hpp
00046 
00047 #include <Teuchos_MatrixMarket_Raw_Reader.hpp>
00048 #include <Teuchos_as.hpp>
00049 #include <Teuchos_BLAS.hpp>
00050 #include <Teuchos_LAPACK.hpp>
00051 #include <Teuchos_SerialDenseMatrix.hpp>
00052 #include <Teuchos_TimeMonitor.hpp>
00053 #include <Kokkos_MultiVector.hpp>
00054 #include <Kokkos_DefaultArithmetic.hpp>
00055 
00059 
00068 template<class SparseOpsType>
00069 class TestSparseOps {
00070 public:
00071   typedef SparseOpsType sparse_ops_type;
00072   typedef typename sparse_ops_type::scalar_type scalar_type;
00073   typedef typename Teuchos::ScalarTraits<scalar_type>::magnitudeType magnitude_type;
00074   typedef typename sparse_ops_type::ordinal_type ordinal_type;
00075   typedef typename sparse_ops_type::node_type node_type;
00076 
00077   typedef typename sparse_ops_type::template graph<ordinal_type, node_type>::graph_type graph_type;
00078   typedef typename sparse_ops_type::template matrix<scalar_type, ordinal_type, node_type>::matrix_type matrix_type;
00079 
00090 
00091 
00093   typedef Teuchos::SerialDenseMatrix<int, scalar_type> dense_matrix_type;
00094 
00095 private:
00097   typedef Teuchos::BLAS<int, scalar_type> blas_type;
00099   typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00100 
00101 public:
00103 
00138   void
00139   makeDenseTestProblem (Teuchos::RCP<dense_matrix_type>& A_out,
00140                         Teuchos::RCP<dense_matrix_type>& L_out,
00141                         Teuchos::RCP<dense_matrix_type>& U_out,
00142                         Teuchos::Array<ordinal_type>& pivots,
00143                         const ordinal_type numRows) const
00144   {
00145     using Teuchos::Array;
00146     using Teuchos::RCP;
00147     using Teuchos::rcp;
00148     typedef dense_matrix_type MT;
00149 
00150     const ordinal_type N = numRows;
00151     RCP<MT> A = rcp (new MT (N, N));
00152     A->random ();
00153     // Keep a copy of A, since LAPACK's LU factorization overwrites
00154     // its input matrix with the L and U factors.
00155     RCP<MT> A_copy = rcp (new MT (*A));
00156 
00157     // Compute the LU factorization of A.
00158     lapack_type lapack;
00159     int info = 0;
00160     Array<ordinal_type> ipiv (N);
00161     lapack.GETRF (N, N, A->values (), A->stride (), ipiv.getRawPtr (), &info);
00162     TEUCHOS_TEST_FOR_EXCEPTION(info < 0, std::logic_error, "LAPACK's _GETRF "
00163       "routine reported that its " << (-info) << "-th argument had an illegal "
00164       "value.  This probably indicates a bug in the way Kokkos is calling the "
00165       "routine.  Please report this bug to the Kokkos developers.");
00166     TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "LAPACK's _GETRF "
00167       "routine reported that the " << info << "-th diagonal element of the U "
00168       "factor is exactly zero.  This indicates that the matrix A is singular.  "
00169       "A is pseudorandom, so it is possible but unlikely that it actually is "
00170       "singular.  More likely is that the pseudorandom number generator isn't "
00171       "working correctly.  This is not a Kokkos bug, but it could be a Teuchos "
00172       "bug, since Teuchos is invoking the generator.");
00173 
00174     // Create L and U, and copy out the lower resp. upper triangle of
00175     // A into L resp. U.
00176     RCP<MT> L = rcp (new MT (N, N));
00177     RCP<MT> U = rcp (new MT (N, N));
00178     {
00179       // Get the MT refs so we don't have to dereference the RCPs each
00180       // time we change an entry.
00181       MT& LL = *L;
00182       MT& UU = *U;
00183       MT& AA = *A;
00184 
00185       for (ordinal_type i = 0; i < N; ++i) {
00186         // LL has an implicitly stored unit diagonal, so don't include j==i.
00187         for (ordinal_type j = 0; j < i; ++j) {
00188           LL(i,j) = AA(i,j);
00189         }
00190         for (ordinal_type j = i; j < N; ++j) {
00191           UU(i,j) = AA(i,j);
00192         }
00193       }
00194     }
00195 
00196     // "Commit" the outputs.
00197     pivots.resize (N);
00198     std::copy (ipiv.begin (), ipiv.end (), pivots.begin ());
00199     A_out = A_copy; // Return the "original" A, before the factorization.
00200     L_out = L;
00201     U_out = U;
00202   }
00203 
00204   // mfh 28 Jun 2012: It would be nice to use LAPACK's banded solver
00205   // to generate test problems, but it may be more trouble than it's
00206   // worth to get the indexing right.  Having a good source of valid
00207   // LU factorizations without needing N^2 data would be helpful,
00208   // though for now we disable this code, since we don't know that it
00209   // works.
00210 #if 0
00211   void
00212   makeBandedTestProblem (Teuchos::RCP<dense_matrix_type>& A_out,
00213                          Teuchos::RCP<dense_matrix_type>& L_out,
00214                          Teuchos::RCP<dense_matrix_type>& U_out,
00215                          Teuchos::Array<ordinal_type>& pivots,
00216                          const ordinal_type numRows) const
00217   {
00218     using Teuchos::Array;
00219     using Teuchos::RCP;
00220     using Teuchos::rcp;
00221     typedef dense_matrix_type MT;
00222 
00223     const ordinal_type M = numRows; // Number of rows in the matrix
00224     const ordinal_type N = numRows; // Number of columns in the matrix
00225     const ordinal_type KL = 1; // Number of subdiagonals
00226     const ordinal_type KU = 1; // Number of superdiagonals
00227     const ordinal_type LDAB = 2*KL + KU + 1; // As required by LAPACK
00228 
00229     // On input: Don't fill top KL rows of A.
00230     //
00231     // On output: The top KL rows of A may be filled with elements of
00232     // the U factor on output, due to row interchanges.
00233 
00234     RCP<MT> A = rcp (new MT (LDAB, N));
00235     // Just fill the whole matrix A with random numbers.  LAPACK won't
00236     // use some of those elements; that's OK.
00237     A->random ();
00238 
00239     // Keep a copy of A, since LAPACK's LU factorization overwrites
00240     // its input matrix with the L and U factors.
00241     RCP<MT> A_copy = rcp (new MT (*A));
00242 
00243     // Compute the LU factorization of A.
00244     lapack_type lapack;
00245     int info = 0;
00246     Array<ordinal_type> ipiv (N);
00247     lapack.GBTRF (M, N, KL, KU, A->values (), A->stride (), ipiv.getRawPtr (), &info);
00248     TEUCHOS_TEST_FOR_EXCEPTION(info < 0, std::logic_error, "LAPACK's _GBTRF "
00249       "routine reported that its " << (-info) << "-th argument had an illegal "
00250       "value.  This probably indicates a bug in the way Kokkos is calling the "
00251       "routine.  Please report this bug to the Kokkos developers.");
00252     TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "LAPACK's _GBTRF "
00253       "routine reported that the " << info << "-th diagonal element of the U "
00254       "factor is exactly zero.  This indicates that the matrix A is singular.  "
00255       "A is pseudorandom, so it is possible but unlikely that it actually is "
00256       "singular.  More likely is that the pseudorandom number generator isn't "
00257       "working correctly.  This is not a Kokkos bug, but it could be a Teuchos "
00258       "bug, since Teuchos is invoking the generator.");
00259 
00260     // Create L and U, and copy out the lower resp. upper triangle
00261     // (both packed in banded form) of A into L resp. U.
00262     const ordinal_type LDU = KL + KU + 1;
00263     RCP<MT> L = rcp (new MT (LDAB - LDU, N));
00264     RCP<MT> U = rcp (new MT (LDU, N));
00265     {
00266       // Get the MT refs so we don't have to dereference the RCPs each
00267       // time we change an entry.
00268       MT& LL = *L;
00269       MT& UU = *U;
00270       MT& AA = *A;
00271 
00272       for (ordinal_type j = 0; j < N; ++j) {
00273         for (ordinal_type i = 0; i < LDU; ++i) {
00274           U(i,j) = A(i,j);
00275         }
00276         for (ordinal_type i = LDU; i < LDAB; ++i) {
00277           L(i,j) = A(i,j);
00278         }
00279       }
00280     }
00281 
00282     // "Commit" the outputs.
00283     pivots.resize (N);
00284     std::copy (ipiv.begin (), ipiv.end (), pivots.begin ());
00285     A_out = A_copy; // Return the "original" A, before the factorization.
00286     L_out = L;
00287     U_out = U;
00288   }
00289 
00300   void
00301   denseBandedToSparseOps (Teuchos::RCP<SparseOpsType>& A_out,
00302                           Teuchos::RCP<SparseOpsType>& L_out,
00303                           Teuchos::RCP<SparseOpsType>& U_out,
00304                           const Teuchos::RCP<node_type>& node,
00305                           const Teuchos::RCP<const dense_matrix_type>& A_in,
00306                           const Teuchos::RCP<const dense_matrix_type>& L_in,
00307                           const Teuchos::RCP<const dense_matrix_type>& U_in) const
00308   {
00309     using Teuchos::ArrayRCP;
00310     using Teuchos::RCP;
00311     using Teuchos::rcp;
00312 
00313     const ordinal_type LDAB = A_in.numRows (); // == 2*KL + KU + 1
00314     const ordinal_type LDU = U_in.numRows ();  // == KL + KU + 1
00315     const ordinal_type LDL = L_in.numRows ();  // == KL
00316 
00317     const ordinal_type M = A_in.numCols ();
00318     const ordinal_type N = A_in.numCols ();
00319     const ordinal_type KL = LDL;
00320     const ordinal_type KU = LDU - LDL - 1;
00321 
00322     TEUCHOS_TEST_FOR_EXCEPTION(2*KL + KU + 1 != LDAB, std::logic_error,
00323       "Failed to compute KL and KU correctly.  2*KL + KU + 1 = "
00324       << 2*KL + KU + 1 << " != LDAB = " << LDAB << ".  KL = " << KL
00325       << ", KU = " << KU << ".  Please report this bug to the Kokkos "
00326       "developers.");
00327     TEUCHOS_TEST_FOR_EXCEPTION(KL + KU + 1 != LDU, std::logic_error,
00328       "Failed to compute KL and KU correctly.  KL + KU + 1 = "
00329       << KL + KU + 1 << " != LDU = " << LDU << ".  KL = " << KL
00330       << ", KU = " << KU << ".  Please report this bug to the Kokkos "
00331       "developers.");
00332 
00333     // Make U.
00334     RCP<SparseOpsType> U_sparse;
00335     {
00336       const ordinal_type nnz = LDU * N - ((LDU-1)*LDU)/2;
00337       ArrayRCP<ordinal_type> ptr (LDU+1);
00338       ArrayRCP<ordinal_type> ind (nnz);
00339       ArrayRCP<scalar_type> val (nnz);
00340       ordinal_type ctr = 0;
00341       ptr[0] = 0;
00342       for (ordinal_type r = 0; r < LDU; ++r) {
00343         // Row 0 of U starts in the last row of the packed U matrix, at
00344         // the (LDU-1, 0) entry (zero-based).  Read the entries in each
00345         // row diagonally and to the northeast.  Row 1 starts at (LDU-1,
00346         // 1).  In general, row r starts at (LDU-1, r), and has min(LDU,
00347         // N-r) entries.
00348         const ordinal_type numEntries = std::min (LDU, N-r);
00349         ordinal_type curRow = LDU - 1;
00350         ordinal_type curCol = r;
00351         ordinal_type c = r; // start here; it's the upper triangle
00352         for (ordinal_type k = 0; k < numEntries; ++k, ++ctr) {
00353           val[ctr] = U(curRow--, curCol++);
00354           ind[ctr] = c++;
00355         }
00356         ptr[r+1] = ctr;
00357       }
00358       U_sparse = makeSparseOps (node, ptr, ind, val, Teuchos::UPPER_TRI,
00359                                 Teuchos::NON_UNIT_DIAG);
00360     }
00361 
00362     // Make L
00363     RCP<SparseOpsType> L_sparse;
00364     {
00365       // Row 0 of L starts in the first row of the packed L matrix, at
00366       // the (0, 0) entry (zero-based).  Row 1 of L
00367 
00368       // Read the entries in each row diagonally and to the northeast.
00369 
00370       const ordinal_type nnz = LDU * N - ((LDU-1)*LDU)/2;
00371       ArrayRCP<ordinal_type> ptr (LDU+1);
00372       ArrayRCP<ordinal_type> ind (nnz);
00373       ArrayRCP<scalar_type> val (nnz);
00374       ordinal_type ctr = 0;
00375       ptr[0] = 0;
00376       for (ordinal_type r = 0; r < LDU; ++r) {
00377         // Row 0 of U starts in the last row of the packed U matrix, at
00378         // the (LDU-1, 0) entry (zero-based).  Read the entries in each
00379         // row diagonally and to the northeast.  Row 1 starts at (LDU-1,
00380         // 1).  In general, row r starts at (LDU-1, r), and has min(LDU,
00381         // N-r) entries.
00382         const ordinal_type numEntries = std::min (LDU, N-r);
00383         ordinal_type curRow = LDU - 1;
00384         ordinal_type curCol = r;
00385         ordinal_type c = r; // start here; it's the upper triangle
00386         for (ordinal_type k = 0; k < numEntries; ++k, ++ctr) {
00387           val[ctr] = U(curRow--, curCol++);
00388           ind[ctr] = c++;
00389         }
00390         ptr[r+1] = ctr;
00391       }
00392       U_sparse = makeSparseOps (node, ptr, ind, val, Teuchos::UPPER_TRI,
00393                                 Teuchos::NON_UNIT_DIAG);
00394 
00395     }
00396   }
00397   // mfh 28 Jun 2012: See note above on using LAPACK's banded solver
00398   // to generate sparse test problems.
00399 #endif // 0
00400 
00417   void
00418   readFile (size_t& numRows,
00419             size_t& numCols,
00420             Teuchos::ArrayRCP<const size_t>& rowptr,
00421             Teuchos::ArrayRCP<const ordinal_type>& colind,
00422             Teuchos::ArrayRCP<const scalar_type>& values,
00423             const std::string& filename) const
00424   {
00425     using Teuchos::ArrayRCP;
00426     using Teuchos::as;
00427     using Teuchos::arcp;
00428     using Teuchos::arcp_const_cast;
00429     using Teuchos::null;
00430     using Teuchos::ParameterList;
00431     using Teuchos::parameterList;
00432     using Teuchos::RCP;
00433     using Teuchos::rcp;
00434 
00435     // The reader wants ptr to have the same type of entries as ind.
00436     // We'll copy right before leaving the routine.
00437     ArrayRCP<ordinal_type> ptr;
00438     ArrayRCP<ordinal_type> ind;
00439     ArrayRCP<scalar_type>  val;
00440     ordinal_type nrows = 0, ncols = 0;
00441 
00442     Teuchos::MatrixMarket::Raw::Reader<scalar_type, ordinal_type> reader;
00443 
00444     // In "intolerant" mode, this will throw an exception if there is
00445     // a syntax error in the file.
00446     (void) reader.readFile (ptr, ind, val, nrows, ncols, filename);
00447 
00448     typedef ArrayRCP<size_t>::size_type size_type;
00449     ArrayRCP<size_t> ptrout (static_cast<size_type> (nrows + 1));
00450     for (size_type k = 0; k <= nrows; ++k) {
00451       ptrout[k] = as<size_t> (ptr[k]);
00452     }
00453     // Now we're done with ptr.
00454     ptr = null;
00455 
00456     // Assign the output arguments.
00457     numRows = as<size_t> (nrows);
00458     numCols = as<size_t> (ncols);
00459     rowptr = arcp_const_cast<const size_t> (ptrout);
00460     colind = arcp_const_cast<const ordinal_type> (ind);
00461     values = arcp_const_cast<const scalar_type> (val);
00462   }
00463 
00490   Teuchos::RCP<SparseOpsType>
00491   makeSparseOpsFromFile (ordinal_type& numRows,
00492                          ordinal_type& numCols,
00493                          const Teuchos::RCP<node_type>& node,
00494                          const std::string& filename,
00495                          const Teuchos::EUplo uplo = Teuchos::UNDEF_TRI,
00496                          const Teuchos::EDiag diag = Teuchos::NON_UNIT_DIAG) const
00497   {
00498     using Teuchos::ArrayRCP;
00499     using Teuchos::arcp_const_cast;
00500     using Teuchos::as;
00501 
00502     size_t theNumRows = 0, theNumCols = 0;
00503     ArrayRCP<const size_t> ptr;
00504     ArrayRCP<const ordinal_type> ind;
00505     ArrayRCP<const scalar_type> val;
00506     readFile (theNumRows, theNumCols, ptr, ind, val, filename);
00507     numRows = as<ordinal_type> (theNumRows);
00508     numCols = as<ordinal_type> (theNumCols);
00509     return makeSparseOps (node, ptr, ind, val, uplo, diag);
00510   }
00511 
00512 
00541   Teuchos::RCP<SparseOpsType>
00542   makeSparseOps (const Teuchos::RCP<node_type>& node,
00543                  const Teuchos::ArrayRCP<const size_t>& ptr,
00544                  const Teuchos::ArrayRCP<const ordinal_type>& ind,
00545                  const Teuchos::ArrayRCP<const scalar_type>& val,
00546                  const Teuchos::EUplo uplo = Teuchos::UNDEF_TRI,
00547                  const Teuchos::EDiag diag = Teuchos::NON_UNIT_DIAG) const
00548   {
00549     using Teuchos::ArrayRCP;
00550     using Teuchos::null;
00551     using Teuchos::ParameterList;
00552     using Teuchos::parameterList;
00553     using Teuchos::RCP;
00554     using Teuchos::rcp;
00555 
00556     const size_t numRows = static_cast<size_t> (ptr.size() == 0 ? 0 : ptr.size() - 1);
00557     RCP<ParameterList> graphParams = parameterList ("Graph");
00558     RCP<graph_type> graph = rcp (new graph_type (numRows, node, graphParams));
00559 
00560     graph->setStructure (ptr, ind);
00561     RCP<ParameterList> matParams = parameterList ("Matrix");
00562     RCP<matrix_type> matrix = rcp (new matrix_type (graph, matParams));
00563     matrix->setValues (val);
00564 
00565     RCP<ParameterList> finParams = parameterList ("Finalize");
00566     SparseOpsType::finalizeGraphAndMatrix (uplo, diag, *graph, *matrix, finParams);
00567 
00568     RCP<SparseOpsType> ops = rcp (new SparseOpsType (node));
00569     ops->setGraphAndMatrix (graph, matrix);
00570     return ops;
00571   }
00572 
00581   Teuchos::RCP<SparseOpsType>
00582   denseTriToSparseOps (const dense_matrix_type& A,
00583                        const Teuchos::RCP<node_type>& node,
00584                        const Teuchos::EUplo uplo,
00585                        const Teuchos::EDiag diag) const
00586   {
00587     using Teuchos::ArrayRCP;
00588     using Teuchos::arcp;
00589     using Teuchos::arcp_const_cast;
00590     using Teuchos::as;
00591     using Teuchos::null;
00592     using Teuchos::RCP;
00593     using Teuchos::rcp;
00594     using Teuchos::rcp_const_cast;
00595 
00596     const ordinal_type N = A.numRows ();
00597     // If we're not storing the diagonal entries, subtract N off the
00598     // total number of entries to store.
00599     const ordinal_type NNZ = diag == Teuchos::UNIT_DIAG ?
00600       (N*(N-1)) / 2 : // UNIT_DIAG
00601       (N*(N+1)) / 2;  // NON_UNIT_DIAG
00602 
00603     ArrayRCP<size_t> ptr (N+1);
00604     ArrayRCP<ordinal_type> ind (NNZ);
00605     ArrayRCP<scalar_type> val (NNZ);
00606 
00607     ordinal_type counter = 0;
00608     for (ordinal_type i = 0; i < N; ++i) {
00609       ptr[i] = counter;
00610       if (uplo == Teuchos::UPPER_TRI) {
00611         const ordinal_type lowerBound = (diag == Teuchos::UNIT_DIAG) ? i+1 : i;
00612         for (ordinal_type j = lowerBound; j < N; ++j) {
00613           ind[counter] = j;
00614           val[counter] = A(i, j);
00615           ++counter;
00616         }
00617       }
00618       else { // uplo == Teuchos::LOWER_TRI
00619         const ordinal_type upperBound = (diag == Teuchos::UNIT_DIAG) ? i : i+1;
00620         for (ordinal_type j = 0; j < upperBound; ++j) {
00621           ind[counter] = j;
00622           val[counter] = A(i, j);
00623           ++counter;
00624         }
00625       }
00626     }
00627     ptr[N] = counter;
00628 
00629     TEUCHOS_TEST_FOR_EXCEPTION(counter != NNZ, std::logic_error,
00630       "TestSparseOps::denseTriToSparseOps: Expected " << NNZ << " entries in "
00631       "the sparse matrix, but got " << counter << " instead.  Please report "
00632       "this bug (in tests) to the Kokkos developers.");
00633 
00634     RCP<graph_type> graph =
00635       rcp (new graph_type (as<size_t> (N), node, null));
00636     graph->setStructure (arcp_const_cast<const size_t> (ptr),
00637                          arcp_const_cast<const ordinal_type> (ind));
00638     RCP<matrix_type> matrix =
00639       rcp (new matrix_type (rcp_const_cast<const graph_type> (graph), null));
00640     matrix->setValues (arcp_const_cast<const scalar_type> (val));
00641 
00642     SparseOpsType::finalizeGraphAndMatrix (uplo, diag, *graph, *matrix, null);
00643     RCP<SparseOpsType> ops = rcp (new SparseOpsType (node));
00644     ops->setGraphAndMatrix (graph, matrix);
00645     return ops;
00646   }
00647 
00648 
00653   Teuchos::RCP<SparseOpsType>
00654   denseToSparseOps (const dense_matrix_type& A,
00655                     const Teuchos::RCP<node_type>& node) const
00656   {
00657     using Teuchos::ArrayRCP;
00658     using Teuchos::arcp;
00659     using Teuchos::arcp_const_cast;
00660     using Teuchos::as;
00661     using Teuchos::null;
00662     using Teuchos::RCP;
00663     using Teuchos::rcp;
00664     using Teuchos::rcp_const_cast;
00665 
00666     const ordinal_type N = A.numRows ();
00667     const ordinal_type NNZ = N*N;
00668 
00669     ArrayRCP<size_t> ptr (N+1);
00670     ArrayRCP<ordinal_type> ind (NNZ);
00671     ArrayRCP<scalar_type> val (NNZ);
00672 
00673     ordinal_type counter = 0;
00674     for (ordinal_type i = 0; i < N; ++i) {
00675       ptr[i] = counter;
00676       for (ordinal_type j = 0; j < N; ++j) {
00677         ind[counter] = j;
00678         val[counter] = A(i, j);
00679         ++counter;
00680       }
00681     }
00682     ptr[N] = counter;
00683 
00684     TEUCHOS_TEST_FOR_EXCEPTION(counter != NNZ, std::logic_error,
00685       "TestSparseOps::denseToSparseOps: Expected " << NNZ << " entries in "
00686       "the sparse matrix, but got " << counter << " instead.  Please report "
00687       "this bug (in tests) to the Kokkos developers.");
00688 
00689     RCP<graph_type> graph =
00690       rcp (new graph_type (as<size_t> (N), node, null));
00691     graph->setStructure (arcp_const_cast<const size_t> (ptr),
00692                          arcp_const_cast<const ordinal_type> (ind));
00693     RCP<matrix_type> matrix =
00694       rcp (new matrix_type (rcp_const_cast<const graph_type> (graph), null));
00695     matrix->setValues (arcp_const_cast<const scalar_type> (val));
00696 
00697     Teuchos::EUplo uplo = Teuchos::UNDEF_TRI;
00698     Teuchos::EDiag diag = Teuchos::NON_UNIT_DIAG;
00699     SparseOpsType::finalizeGraphAndMatrix (uplo, diag, *graph, *matrix, null);
00700     RCP<SparseOpsType> ops = rcp (new SparseOpsType (node));
00701     ops->setGraphAndMatrix (graph, matrix);
00702     return ops;
00703   }
00704 
00705 
00711   Teuchos::RCP<Kokkos::MultiVector<scalar_type, node_type> >
00712   makeMultiVector (const Teuchos::RCP<node_type>& node,
00713                    const ordinal_type numRows,
00714                    const ordinal_type numCols) const
00715   {
00716     using Teuchos::arcp;
00717     using Teuchos::as;
00718     using Teuchos::RCP;
00719     using Teuchos::rcp;
00720     typedef Teuchos::ScalarTraits<scalar_type> STS;
00721     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
00722     typedef Kokkos::DefaultArithmetic<MV> MVT;
00723 
00724     RCP<MV> X = rcp (new MV (node));
00725     X->initializeValues (as<size_t> (numRows),
00726                          as<size_t> (numCols),
00727                          arcp<scalar_type> (numRows*numCols),
00728                          as<size_t> (numRows));
00729     MVT::Init (*X, STS::zero ());
00730     return X;
00731   }
00732 
00745   magnitude_type
00746   maxRelativeError (const Teuchos::RCP<const Kokkos::MultiVector<scalar_type, node_type> >& X,
00747                     const Teuchos::RCP<const Kokkos::MultiVector<scalar_type, node_type> >& Y,
00748                     const Teuchos::RCP<Kokkos::MultiVector<scalar_type, node_type> >& Z) const
00749   {
00750     using Teuchos::Array;
00751     using Teuchos::as;
00752     typedef typename Array<magnitude_type>::size_type size_type;
00753     typedef Teuchos::ScalarTraits<scalar_type> STS;
00754     typedef Teuchos::ScalarTraits<magnitude_type> STM;
00755     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
00756     typedef Kokkos::DefaultArithmetic<MV> MVT;
00757 
00758     const ordinal_type numCols = as<ordinal_type> (X->getNumCols ());
00759     Array<magnitude_type> numerNorms (numCols);
00760     Array<magnitude_type> denomNorms (numCols);
00761 
00762     MVT::Assign (*Z, (const MV) *Y); // Z := Y
00763     MVT::GESUM (*Z, STS::one(), (const MV) *X, -STS::one()); // Z := X - Z
00764     MVT::NormInf ((const MV) *Z, numerNorms ());
00765     MVT::NormInf ((const MV) *X, denomNorms ());
00766 
00767     magnitude_type maxRelNorm = STM::zero();
00768     for (size_type k = 0; k < numCols; ++k) {
00769       // If the norm of the current column of X is zero, use the absolute error.
00770       const magnitude_type relNorm = (denomNorms[k] == STM::zero ()) ?
00771         numerNorms[k] :
00772         numerNorms[k] / denomNorms[k];
00773       if (relNorm > maxRelNorm) {
00774         maxRelNorm = relNorm;
00775       }
00776     }
00777     return maxRelNorm;
00778   }
00779 
00823   void
00824   testSparseOps (const Teuchos::RCP<node_type>& node,
00825                  const ordinal_type N,
00826                  const ordinal_type numVecs,
00827                  const magnitude_type tol) const
00828   {
00829     using Teuchos::arcp;
00830     using Teuchos::Array;
00831     using Teuchos::as;
00832     using Teuchos::null;
00833     using Teuchos::RCP;
00834     using Teuchos::rcp;
00835     using Teuchos::LEFT_SIDE;
00836     using Teuchos::LOWER_TRI;
00837     using Teuchos::UPPER_TRI;
00838     using Teuchos::NO_TRANS;
00839     using Teuchos::TRANS;
00840     using Teuchos::CONJ_TRANS;
00841     using Teuchos::NON_UNIT_DIAG;
00842     using Teuchos::UNIT_DIAG;
00843     typedef Array<size_t>::size_type size_type;
00844     typedef Teuchos::ScalarTraits<scalar_type> STS;
00845     typedef Teuchos::ScalarTraits<magnitude_type> STM;
00846     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
00847     typedef Kokkos::DefaultArithmetic<MV> MVT;
00848 
00849     TEUCHOS_TEST_FOR_EXCEPTION(N < numVecs, std::invalid_argument,
00850       "testSparseOps: Number of rows N = " << N << " < numVecs = " << numVecs
00851       << ".");
00852 
00853     // A record of error messages reported by any failed tests.
00854     // We run _all_ the tests first, then report any failures.
00855     std::ostringstream err;
00856     // Whether any tests have failed thus far.
00857     bool success = true;
00858 
00859     // Relative error of the current operation.
00860     magnitude_type relErr = STM::zero ();
00861 
00862     RCP<dense_matrix_type> A_dense, L_dense, U_dense;
00863     Array<ordinal_type> ipiv;
00864     makeDenseTestProblem (A_dense, L_dense, U_dense, ipiv, N);
00865 
00866     // Convert L_dense and U_dense into separate sparse matrices.
00867     RCP<SparseOpsType> L_sparse =
00868       denseTriToSparseOps (*L_dense, node, LOWER_TRI, UNIT_DIAG);
00869     RCP<SparseOpsType> U_sparse =
00870       denseTriToSparseOps (*U_dense, node, UPPER_TRI, NON_UNIT_DIAG);
00871     // Convert A_dense into a separate sparse matrix.
00872     RCP<SparseOpsType> A_sparse = denseToSparseOps (*A_dense, node);
00873 
00874     // Compute a random input multivector.
00875     RCP<MV> X = makeMultiVector (node, N, numVecs);
00876     MVT::Random (*X);
00877 
00878     // We make MVs both for results and for scratch space (since the
00879     // BLAS' dense triangular solve overwrites its input).
00880     RCP<MV> Y     = makeMultiVector (node, N, numVecs);
00881     RCP<MV> Y_hat = makeMultiVector (node, N, numVecs);
00882     RCP<MV> Z     = makeMultiVector (node, N, numVecs);
00883     RCP<MV> Z_hat = makeMultiVector (node, N, numVecs);
00884     RCP<MV> W     = makeMultiVector (node, N, numVecs);
00885     RCP<MV> W_hat = makeMultiVector (node, N, numVecs);
00886 
00887     blas_type blas; // For dense matrix and vector operations.
00888 
00889     // Compute Y_hat := A_dense * X and Y := A_sparse * X.
00890     blas.GEMM (NO_TRANS, NO_TRANS, N, numVecs, N,
00891                STS::one (), A_dense->values (), A_dense->stride (),
00892                X->getValues ().getRawPtr (), as<int> (X->getStride ()),
00893                STS::zero (), Y_hat->getValuesNonConst ().getRawPtr (),
00894                as<int> (Y_hat->getStride ()));
00895     A_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
00896                                                            (const MV) *X, *Y);
00897 
00898     // Compare Y and Y_hat.  Use Z as scratch space.
00899     relErr = maxRelativeError (Y_hat, Y, Z);
00900     if (relErr > tol) {
00901       err << "Sparse matrix-(multi)vector multiply test failed for general "
00902         "matrix A.  Maximum relative error " << relErr << " exceeds "
00903         "the given tolerance " << tol << ".\n";
00904       success = false;
00905     }
00906 
00907     // Compute Y_hat := A_dense^T * X and Y := A_sparse^T * X.
00908     blas.GEMM (TRANS, NO_TRANS, N, numVecs, N,
00909                STS::one (), A_dense->values (), A_dense->stride (),
00910                X->getValues ().getRawPtr (), as<int> (X->getStride ()),
00911                STS::zero (), Y_hat->getValuesNonConst ().getRawPtr (),
00912                as<int> (Y_hat->getStride ()));
00913     A_sparse->template multiply<scalar_type, scalar_type> (TRANS, STS::one (),
00914                                                            (const MV) *X, *Y);
00915     // Compare Y and Y_hat.  Use Z as scratch space.
00916     relErr = maxRelativeError (Y_hat, Y, Z);
00917     if (relErr > tol) {
00918       err << "Sparse matrix-(multi)vector multiply (transpose) test failed for "
00919         "general matrix A.  Maximum relative error " << relErr << " exceeds "
00920         "the given tolerance " << tol << ".\n";
00921       success = false;
00922     }
00923 
00924     if (STS::isComplex) {
00925       // Compute Y_hat := A_dense^H * X and Y := A_sparse^H * X.
00926       blas.GEMM (CONJ_TRANS, NO_TRANS, N, numVecs, N,
00927                  STS::one (), A_dense->values (), A_dense->stride (),
00928                  X->getValues ().getRawPtr (), as<int> (X->getStride ()),
00929                  STS::zero (), Y_hat->getValuesNonConst ().getRawPtr (),
00930                  as<int> (Y_hat->getStride ()));
00931       A_sparse->template multiply<scalar_type, scalar_type> (CONJ_TRANS, STS::one (),
00932                                                              (const MV) *X, *Y);
00933       // Compare Y and Y_hat.  Use Z as scratch space.
00934       relErr = maxRelativeError (Y_hat, Y, Z);
00935       if (relErr > tol) {
00936         err << "Sparse matrix-(multi)vector multiply (conjugate transpose) "
00937           "test failed for general matrix A.  Maximum relative error "
00938             << relErr << " exceeds the given tolerance " << tol << ".\n";
00939         success = false;
00940       }
00941     }
00942 
00943     // Compute Z_hat := U_dense * X.  First copy X into Z_hat, since TRMM
00944     // overwrites its input.
00945     MVT::Assign (*Z_hat, (const MV) *X);
00946     blas.TRMM (LEFT_SIDE, UPPER_TRI, NO_TRANS, NON_UNIT_DIAG, N, numVecs,
00947                STS::one (), U_dense->values (), U_dense->stride (),
00948                Z_hat->getValuesNonConst ().getRawPtr (),
00949                as<int> (Z_hat->getStride ()));
00950     // Compute Z := U_sparse * X.
00951     U_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
00952                                                            (const MV) *X, *Z);
00953     // Compare Z and Z_hat.  Use W as scratch space.
00954     relErr = maxRelativeError (Z_hat, Z, W);
00955     if (relErr > tol) {
00956       err << "Sparse matrix-(multi)vector multiply test failed for upper "
00957         "triangular matrix U.  Maximum relative error " << relErr << " exceeds "
00958         "the given tolerance " << tol << ".\n";
00959       success = false;
00960     }
00961 
00962     // Compute Y_hat := L_dense * Z_hat.  First copy Z_hat into Y_hat,
00963     // since TRMM overwrites its input.
00964     MVT::Assign (*Y_hat, (const MV) *Z_hat);
00965     blas.TRMM (LEFT_SIDE, LOWER_TRI, NO_TRANS, UNIT_DIAG, N, numVecs,
00966                STS::one (), L_dense->values (), L_dense->stride (),
00967                Y_hat->getValuesNonConst ().getRawPtr (),
00968                as<int> (Y_hat->getStride ()));
00969 
00970     // FIXME (mfh 19 June 2012) There's a bit of a problem with
00971     // DefaultHostSparseOps: its multiply() method doesn't appear to
00972     // respect the implicit unit diagonal indication.  Thus, for now,
00973     // we don't run this test.
00974     if (false) {
00975       // Compute Y = L_sparse * Z.
00976       L_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
00977                                                              (const MV) *Z, *Y);
00978       // Compare Y and Y_hat.  Use W as scratch space.
00979       relErr = maxRelativeError (Y_hat, Y, W);
00980       if (relErr > tol) {
00981         err << "Sparse matrix-(multi)vector multiply test failed for lower "
00982           "triangular matrix L with implicit unit diagonal.  Maximum relative "
00983           "error " << relErr << " exceeds the given tolerance " << tol << ".\n";
00984         success = false;
00985       }
00986     }
00987 
00988     //
00989     // Test sparse triangular solve.
00990     //
00991 
00992     // Compute Z_hat = L_dense \ Y_hat.
00993     MVT::Assign (*Z_hat, (const MV) *Y_hat);
00994     blas.TRSM (LEFT_SIDE, LOWER_TRI, NO_TRANS, UNIT_DIAG, N, numVecs,
00995                STS::one (), L_dense->values (), L_dense->stride (),
00996                Z_hat->getValuesNonConst ().getRawPtr (),
00997                as<int> (Z_hat->getStride ()));
00998     // Compute Z = L_sparse \ Y_hat.
00999     L_sparse->template solve<scalar_type, scalar_type> (NO_TRANS,
01000                                                         (const MV) *Y_hat, *Z);
01001     // Compare Z and Z_hat.  Use W as scratch space.
01002     relErr = maxRelativeError (Z_hat, Z, W);
01003     if (relErr > tol) {
01004       err << "Sparse triangular solve test failed for lower triangular matrix "
01005         "L with implicit unit diagonal.  Maximum relative error " << relErr
01006           << " exceeds the given tolerance " << tol << ".\n";
01007       success = false;
01008     }
01009 
01010     // Compute W_hat = U_dense \ Z_hat.
01011     MVT::Assign (*W_hat, (const MV) *Z_hat);
01012     blas.TRSM (LEFT_SIDE, UPPER_TRI, NO_TRANS, NON_UNIT_DIAG, N, numVecs,
01013                STS::one (), U_dense->values (), U_dense->stride (),
01014                W_hat->getValuesNonConst ().getRawPtr (),
01015                as<int> (W_hat->getStride ()));
01016     // Compute W = U_sparse \ Z_hat.
01017     U_sparse->template solve<scalar_type, scalar_type> (NO_TRANS,
01018                                                         (const MV) *Z_hat, *W);
01019     // Compare W and W_hat.  Use Z as scratch space.
01020     relErr = maxRelativeError (W_hat, W, Z);
01021     if (relErr > tol) {
01022       err << "Sparse triangular solve test failed for upper triangular matrix "
01023         "U.  Maximum relative error " << relErr << " exceeds the given "
01024         "tolerance " << tol << ".\n";
01025       success = false;
01026     }
01027 
01028     TEUCHOS_TEST_FOR_EXCEPTION(! success, std::runtime_error,
01029       "One or more sparse ops tests failed.  Here is the full report:\n"
01030       << err.str());
01031   }
01032 
01033   void
01034   benchmarkSparseOps (std::vector<std::pair<std::string, double> >& results,
01035                       const std::string& label,
01036                       const Teuchos::RCP<node_type>& node,
01037                       const ordinal_type numRows,
01038                       const ordinal_type numCols,
01039                       const ordinal_type numVecs,
01040                       const int numTrials) const
01041   {
01042     using Teuchos::arcp;
01043     using Teuchos::Array;
01044     using Teuchos::as;
01045     using Teuchos::RCP;
01046     using Teuchos::rcp;
01047     using Teuchos::LOWER_TRI;
01048     using Teuchos::UPPER_TRI;
01049     using Teuchos::NON_UNIT_DIAG;
01050     using Teuchos::UNIT_DIAG;
01051     typedef Array<size_t>::size_type size_type;
01052     typedef Teuchos::ScalarTraits<scalar_type> STS;
01053     typedef Teuchos::ScalarTraits<magnitude_type> STM;
01054     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
01055     typedef Kokkos::DefaultArithmetic<MV> MVT;
01056 
01057     const bool testTriSolve = (numRows == numCols);
01058 
01059     RCP<dense_matrix_type> A_dense, L_dense, U_dense;
01060     RCP<SparseOpsType> A_sparse, L_sparse, U_sparse;
01061     Array<ordinal_type> ipiv;
01062 
01063     if (testTriSolve) {
01064       makeDenseTestProblem (A_dense, L_dense, U_dense, ipiv, numRows);
01065       // Convert L_dense and U_dense into sparse matrices.
01066       L_sparse = denseTriToSparseOps (*L_dense, node, LOWER_TRI, UNIT_DIAG);
01067       U_sparse = denseTriToSparseOps (*U_dense, node, UPPER_TRI, NON_UNIT_DIAG);
01068     }
01069     else {
01070       A_dense = rcp (new dense_matrix_type (numRows, numCols));
01071     }
01072     // Convert A_dense into a sparse matrix.
01073     A_sparse = denseToSparseOps (*A_dense, node);
01074 
01075     // // Compute a random input multivector.
01076     // RCP<MV> X = makeMultiVector (node, numCols, numVecs);
01077     // MVT::Random (*X);
01078     // RCP<MV> Y = makeMultiVector (node, numRows, numVecs);
01079     // MVT::Init (*Y, STS::zero ());
01080 
01081     benchmarkSparseMatVec (results, label, *A_sparse,
01082                            numRows, numCols, numVecs, numTrials);
01083     if (testTriSolve) {
01084       benchmarkSparseTriSolve (results, label, "lower tri, unit diag",
01085                                *L_sparse, numRows, numCols, numVecs, numTrials);
01086       benchmarkSparseTriSolve (results, label, "upper tri, non unit diag",
01087                                *U_sparse, numRows, numCols, numVecs, numTrials);
01088     }
01089   }
01090 
01104   void
01105   benchmarkSparseMatVecFromFile (std::vector<std::pair<std::string, double> >& results,
01106                                  const std::string& filename,
01107                                  const std::string& label,
01108                                  const Teuchos::RCP<node_type>& node,
01109                                  const ordinal_type numVecs,
01110                                  const int numTrials) const
01111   {
01112     using Teuchos::RCP;
01113     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
01114     typedef Kokkos::DefaultArithmetic<MV> MVT;
01115 
01116     // SparseOpsType isn't required to tell us how many rows and
01117     // columns the sparse matrix has, so we find out when we read the
01118     // file.  Kokkos ignores uplo and diag for sparse mat-vec, so we
01119     // don't need to provide those arguments to
01120     // makeSparseOpsFromFile() here.
01121     ordinal_type numRows = 0;
01122     ordinal_type numCols = 0;
01123     RCP<SparseOpsType> A_sparse =
01124       makeSparseOpsFromFile (numRows, numCols, node, filename);
01125 
01126     // Compute a random input multivector.
01127     RCP<MV> X = makeMultiVector (node, numCols, numVecs);
01128     MVT::Random (*X);
01129     RCP<MV> Y = makeMultiVector (node, numRows, numVecs); // output MV.
01130 
01131     benchmarkSparseMatVec (results, label, *A_sparse,
01132                            numRows, numCols, numVecs, numTrials);
01133     // benchmarkSparseTriSolve (results, label, "lower tri, unit diag",
01134     //                          *L_sparse, numRows, numCols, numVecs, numTrials);
01135     // benchmarkSparseTriSolve (results, label, "upper tri, non unit diag",
01136     //                          *U_sparse, numRows, numCols, numVecs, numTrials);
01137   }
01138 
01139 private:
01164   void
01165   benchmarkSparseMatVec (std::vector<std::pair<std::string, double> >& results,
01166                          const std::string& label,
01167                          const SparseOpsType& ops,
01168                          const ordinal_type numRows,
01169                          const ordinal_type numCols,
01170                          const ordinal_type numVecs,
01171                          const int numTrials) const
01172   {
01173     using Teuchos::RCP;
01174     using Teuchos::Time;
01175     using Teuchos::TimeMonitor;
01176     typedef Teuchos::ScalarTraits<scalar_type> STS;
01177     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
01178     typedef Kokkos::DefaultArithmetic<MV> MVT;
01179 
01180     RCP<MV> X = makeMultiVector (ops.getNode (), numRows, numVecs);
01181     RCP<MV> Y = makeMultiVector (ops.getNode (), numRows, numVecs);
01182     //MVT::Init (*Y, STS::zero()); // makeMultiVector() already does this.
01183     MVT::Random (*X);
01184 
01185     const int numBenchmarks = STS::isComplex ? 6 : 4;
01186     results.reserve (numBenchmarks);
01187 
01188     // Time sparse matrix-vector multiply, overwrite mode, no transpose.
01189     {
01190       const std::string timerName (label + ": Y = A*X");
01191       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01192       if (timer.is_null ()) {
01193         timer = TimeMonitor::getNewCounter (timerName);
01194       }
01195       {
01196         TimeMonitor timeMon (*timer);
01197         for (int i = 0; i < numTrials; ++i) {
01198           ops.template multiply<scalar_type, scalar_type> (Teuchos::NO_TRANS,
01199                                                            STS::one(), *X, *Y);
01200         }
01201       }
01202       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01203     }
01204 
01205     // Time sparse matrix-vector multiply, update mode, no transpose.
01206     // We subtract to simulate a residual computation.
01207     {
01208       const std::string timerName (label + ": Y = Y - A*X");
01209       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01210       if (timer.is_null ()) {
01211         timer = TimeMonitor::getNewCounter (timerName);
01212       }
01213       {
01214         TimeMonitor timeMon (*timer);
01215         for (int i = 0; i < numTrials; ++i) {
01216           ops.template multiply<scalar_type, scalar_type> (Teuchos::NO_TRANS,
01217                                                            -STS::one(), *X,
01218                                                            STS::one(), *Y);
01219         }
01220       }
01221       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01222     }
01223 
01224     // Time sparse matrix-vector multiply, overwrite mode, transpose.
01225     {
01226       const std::string timerName (label + ": Y = A^T * X");
01227       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01228       if (timer.is_null ()) {
01229         timer = TimeMonitor::getNewCounter (timerName);
01230       }
01231       {
01232         TimeMonitor timeMon (*timer);
01233         for (int i = 0; i < numTrials; ++i) {
01234           ops.template multiply<scalar_type, scalar_type> (Teuchos::TRANS,
01235                                                            STS::one(), *X, *Y);
01236         }
01237       }
01238       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01239     }
01240 
01241     // Time sparse matrix-vector multiply, update mode, transpose.
01242     // We subtract to simulate a residual computation.
01243     {
01244       const std::string timerName (label + ": Y = Y - A^T * X");
01245       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01246       if (timer.is_null ()) {
01247         timer = TimeMonitor::getNewCounter (timerName);
01248       }
01249       {
01250         TimeMonitor timeMon (*timer);
01251         for (int i = 0; i < numTrials; ++i) {
01252           ops.template multiply<scalar_type, scalar_type> (Teuchos::TRANS,
01253                                                            -STS::one(), *X,
01254                                                            STS::one(), *Y);
01255         }
01256       }
01257       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01258     }
01259 
01260     // Only test conjugate transpose if scalar_type is complex.
01261     if (STS::isComplex) {
01262       // Time sparse matrix-vector multiply, overwrite mode, conjugate transpose.
01263       {
01264         const std::string timerName (label + ": Y = A^H * X");
01265         RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01266         if (timer.is_null ()) {
01267           timer = TimeMonitor::getNewCounter (timerName);
01268         }
01269         {
01270           TimeMonitor timeMon (*timer);
01271           for (int i = 0; i < numTrials; ++i) {
01272             ops.template multiply<scalar_type, scalar_type> (Teuchos::CONJ_TRANS,
01273                                                              STS::one(), *X, *Y);
01274           }
01275         }
01276         results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01277       }
01278 
01279       // Time sparse matrix-vector multiply, update mode, conjugate transpose.
01280       // We subtract to simulate a residual computation.
01281       {
01282         const std::string timerName (label + ": Y = Y - A^H * X");
01283         RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01284         if (timer.is_null ()) {
01285           timer = TimeMonitor::getNewCounter (timerName);
01286         }
01287         {
01288           TimeMonitor timeMon (*timer);
01289           for (int i = 0; i < numTrials; ++i) {
01290             ops.template multiply<scalar_type, scalar_type> (Teuchos::CONJ_TRANS,
01291                                                              -STS::one(), *X,
01292                                                              STS::one(), *Y);
01293           }
01294         }
01295         results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01296       }
01297     }
01298   }
01299 
01300 
01301 
01340   void
01341   benchmarkSparseTriSolve (std::vector<std::pair<std::string, double> >& results,
01342                            const std::string& opsLabel,
01343                            const std::string& benchmarkLabel,
01344                            const SparseOpsType& ops,
01345                            const ordinal_type numRows,
01346                            const ordinal_type numCols,
01347                            const ordinal_type numVecs,
01348                            const int numTrials) const
01349   {
01350     using Teuchos::RCP;
01351     using Teuchos::Time;
01352     using Teuchos::TimeMonitor;
01353     typedef Teuchos::ScalarTraits<scalar_type> STS;
01354     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
01355     typedef Kokkos::DefaultArithmetic<MV> MVT;
01356 
01357     RCP<MV> X = makeMultiVector (ops.getNode (), numCols, numVecs);
01358     RCP<MV> Y = makeMultiVector (ops.getNode (), numRows, numVecs);
01359     //MVT::Init (*X, STS::zero()); // makeMultiVector() already does this.
01360     MVT::Random (*Y);
01361 
01362     const int numBenchmarks = STS::isComplex ? 6 : 4;
01363     results.reserve (numBenchmarks);
01364 
01365     // Time sparse triangular solve, no transpose.
01366     {
01367       const std::string timerName (opsLabel + "Y = A \\ X (" + benchmarkLabel + ")");
01368       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01369       if (timer.is_null ()) {
01370         timer = TimeMonitor::getNewCounter (timerName);
01371       }
01372       {
01373         TimeMonitor timeMon (*timer);
01374         for (int i = 0; i < numTrials; ++i) {
01375           ops.template solve<scalar_type, scalar_type> (Teuchos::NO_TRANS,
01376                                                         *Y, *X);
01377         }
01378       }
01379       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01380     }
01381 
01382     // Time sparse triangular solve, transpose.
01383     {
01384       const std::string timerName (opsLabel + "Y = A^T \\ X (" + benchmarkLabel + ")");
01385       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01386       if (timer.is_null ()) {
01387         timer = TimeMonitor::getNewCounter (timerName);
01388       }
01389       {
01390         TimeMonitor timeMon (*timer);
01391         for (int i = 0; i < numTrials; ++i) {
01392           ops.template solve<scalar_type, scalar_type> (Teuchos::NO_TRANS,
01393                                                         *Y, *X);
01394         }
01395       }
01396       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01397     }
01398 
01399     // Only test conjugate transpose if scalar_type is complex.
01400     if (STS::isComplex) {
01401       // Time sparse triangular solve, conjugate transpose.
01402       const std::string timerName (opsLabel + "Y = A^H \\ X (" + benchmarkLabel + ")");
01403       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
01404       if (timer.is_null ()) {
01405         timer = TimeMonitor::getNewCounter (timerName);
01406       }
01407       {
01408         TimeMonitor timeMon (*timer);
01409         for (int i = 0; i < numTrials; ++i) {
01410           ops.template solve<scalar_type, scalar_type> (Teuchos::NO_TRANS,
01411                                                         *Y, *X);
01412         }
01413       }
01414       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
01415     }
01416   }
01417 };
01418 
01419 #endif // __CrsMatrix_TestSparseOps_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends