Kokkos Node API and Local Linear Algebra Kernels Version of the Day
DefaultSparseOps_TestSparseOps.hpp
Go to the documentation of this file.
00001 /*
00002 //@HEADER
00003 // ************************************************************************
00004 //
00005 //          Kokkos: Node API and Parallel Node Kernels
00006 //              Copyright (2008) Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ************************************************************************
00041 //@HEADER
00042 */
00043 
00044 #ifndef __CrsMatrix_TestSparseOps_hpp
00045 #define __CrsMatrix_TestSparseOps_hpp
00046 
00047 #include <Teuchos_MatrixMarket_Raw_Reader.hpp>
00048 #include <Teuchos_as.hpp>
00049 #include <Teuchos_BLAS.hpp>
00050 #include <Teuchos_LAPACK.hpp>
00051 #include <Teuchos_SerialDenseMatrix.hpp>
00052 #include <Teuchos_TimeMonitor.hpp>
00053 #include <Kokkos_MultiVector.hpp>
00054 #include <Kokkos_DefaultArithmetic.hpp>
00055 
00059 
00060 
00061 namespace {
00062   template<class T, class OrdinalType>
00063   class CopyOp {
00064   private:
00065     T* const out_;
00066     const T* const in_;
00067 
00068   public:
00069     CopyOp (T* const out, const T* const in) : out_ (out), in_ (in) {}
00070 
00071     // FIXME (mfh 09 Aug 2012) make a device kernel.
00072     void execute (const OrdinalType i) {
00073       out_[i] = in_[i];
00074     }
00075   };
00076 } // namespace (anonymous)
00077 
00086 template<class SparseOpsType>
00087 class TestSparseOps {
00088 public:
00089   typedef SparseOpsType sparse_ops_type;
00090   typedef typename sparse_ops_type::scalar_type scalar_type;
00091   typedef typename Teuchos::ScalarTraits<scalar_type>::magnitudeType magnitude_type;
00092   typedef typename sparse_ops_type::ordinal_type ordinal_type;
00093   typedef typename sparse_ops_type::node_type node_type;
00094 
00095   typedef typename sparse_ops_type::template graph<ordinal_type, node_type>::graph_type graph_type;
00096   typedef typename sparse_ops_type::template matrix<scalar_type, ordinal_type, node_type>::matrix_type matrix_type;
00097 
00108 
00109 
00111   typedef Teuchos::SerialDenseMatrix<int, scalar_type> dense_matrix_type;
00112 
00113 private:
00115   typedef Teuchos::BLAS<int, scalar_type> blas_type;
00117   typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00119 
00120   Teuchos::RCP<Teuchos::FancyOStream> out_;
00121   const bool verbose_;
00122   const bool debug_;
00123 
00125   void
00126   printDenseColumn (Teuchos::FancyOStream& out,
00127                     const dense_matrix_type& A,
00128                     const ordinal_type j) const
00129   {
00130     const ordinal_type numRows = A.numRows ();
00131     out << "[";
00132     for (ordinal_type i = 0; i < numRows; ++i) {
00133       out << A(i,j);
00134       if (i + Teuchos::OrdinalTraits<ordinal_type>::one() < numRows) {
00135         out << " ";
00136       }
00137     }
00138     out << "]";
00139   }
00140 
00142   void
00143   printDenseMatrix (Teuchos::FancyOStream& out,
00144                     const dense_matrix_type& A) const
00145   {
00146     const ordinal_type numRows = A.numRows ();
00147     const ordinal_type numCols = A.numCols ();
00148     out << "[" << std::endl;
00149 
00150     // Save the original format flags before changing, so that if
00151     // output throws an exception, we can restore the original flags
00152     // before rethrowing.
00153     std::ios_base::fmtflags flags = out.flags ();
00154     try {
00155       out << std::scientific;
00156       out.precision (2); // Low precision, just for error checking
00157 
00158       for (ordinal_type i = 0; i < numRows; ++i) {
00159         for (ordinal_type j = 0; j < numCols; ++j) {
00160           out << A(i,j);
00161           if (j + Teuchos::OrdinalTraits<ordinal_type>::one() < numCols) {
00162             out << " ";
00163           }
00164         }
00165         if (i + Teuchos::OrdinalTraits<ordinal_type>::one() < numRows) {
00166           out << std::endl;
00167         }
00168       }
00169     }
00170     catch (...) {
00171       out.flags (flags); // Restore original format flags.
00172       throw;
00173     }
00174     out << "]";
00175   }
00176 
00177   void
00178   printSparseMatrixAsDense (Teuchos::FancyOStream& out,
00179                             const SparseOpsType& A_sparse) const
00180   {
00181     Teuchos::RCP<dense_matrix_type> A_dense = A_sparse.asDenseMatrix ();
00182     printDenseMatrix (out, *A_dense);
00183   }
00184 
00185 public:
00192   TestSparseOps (const Teuchos::RCP<Teuchos::FancyOStream>& out,
00193                  const bool verbose=false,
00194                  const bool debug=false) :
00195     out_ (out),
00196     verbose_ (verbose),
00197     debug_ (debug)
00198   {
00199     TEUCHOS_TEST_FOR_EXCEPTION(out.is_null(), std::invalid_argument,
00200       "TestSparseOps constructor: the given output stream 'out' must be nonnull.");
00201   }
00202 
00237   void
00238   makeDenseTestProblem (Teuchos::RCP<dense_matrix_type>& A_out,
00239                         Teuchos::RCP<dense_matrix_type>& L_out,
00240                         Teuchos::RCP<dense_matrix_type>& U_out,
00241                         Teuchos::Array<ordinal_type>& pivots,
00242                         const ordinal_type numRows) const
00243   {
00244     using Teuchos::Array;
00245     using Teuchos::as;
00246     using Teuchos::RCP;
00247     using Teuchos::rcp;
00248     typedef dense_matrix_type MT;
00249 
00250     if (verbose_) {
00251       *out_ << "makeDenseTestProblem:" << std::endl;
00252     }
00253     const ordinal_type N = numRows;
00254     //const ordinal_type numCols = numRows;
00255     RCP<MT> A = rcp (new MT (N, N));
00256     A->random ();
00257 
00258     // Force A to be diagonally dominant, so that LU factorization doesn't pivot.
00259     for (ordinal_type i = 0; i < numRows; ++i) {
00260       (*A)(i,i) += as<scalar_type> (42);
00261     }
00262 
00263     // Keep a copy of A, since LAPACK's LU factorization overwrites
00264     // its input matrix with the L and U factors.
00265     RCP<MT> A_copy = rcp (new MT (*A));
00266 
00267     if (debug_) {
00268       // Show the pre-factored matrix A.
00269       Teuchos::OSTab tab2 (out_);
00270       *out_ << "Pre-factored A = ";
00271       printDenseMatrix (*out_, *A);
00272       *out_ << std::endl;
00273     }
00274 
00275     // Compute the LU factorization of A.
00276     lapack_type lapack;
00277     int info = 0;
00278     Array<ordinal_type> ipiv (N);
00279     lapack.GETRF (N, N, A->values (), A->stride (), ipiv.getRawPtr (), &info);
00280     TEUCHOS_TEST_FOR_EXCEPTION(info < 0, std::logic_error, "LAPACK's _GETRF "
00281       "routine reported that its " << (-info) << "-th argument had an illegal "
00282       "value.  This probably indicates a bug in the way Kokkos is calling the "
00283       "routine.  Please report this bug to the Kokkos developers.");
00284     TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "LAPACK's _GETRF "
00285       "routine reported that the " << info << "-th diagonal element of the U "
00286       "factor is exactly zero.  This indicates that the matrix A is singular.  "
00287       "A is pseudorandom, so it is possible but unlikely that it actually is "
00288       "singular.  More likely is that the pseudorandom number generator isn't "
00289       "working correctly.  This is not a Kokkos bug, but it could be a Teuchos "
00290       "bug, since Teuchos is invoking the generator.");
00291 
00292     // Create L and U, and copy out the lower resp. upper triangle of
00293     // A into L resp. U.
00294     RCP<MT> L = rcp (new MT (N, N));
00295     RCP<MT> U = rcp (new MT (N, N));
00296     {
00297       // Get the MT refs so we don't have to dereference the RCPs each
00298       // time we change an entry.
00299       MT& LL = *L;
00300       MT& UU = *U;
00301       MT& AA = *A;
00302 
00303       for (ordinal_type i = 0; i < N; ++i) {
00304         // LL has an implicitly stored unit diagonal, so don't include j==i.
00305         for (ordinal_type j = 0; j < i; ++j) {
00306           LL(i,j) = AA(i,j);
00307         }
00308         for (ordinal_type j = i; j < N; ++j) {
00309           UU(i,j) = AA(i,j);
00310         }
00311       }
00312     }
00313 
00314     if (debug_) {
00315       // Show the post-factored matrix A.
00316       Teuchos::OSTab tab2 (out_);
00317       *out_ << "Post-factored A = ";
00318       printDenseMatrix (*out_, *A);
00319       *out_ << std::endl;
00320     }
00321 
00322     // "Commit" the outputs.
00323     pivots.resize (N);
00324     std::copy (ipiv.begin (), ipiv.end (), pivots.begin ());
00325     A_out = A_copy; // Return the "original" A, before the factorization.
00326     L_out = L;
00327     U_out = U;
00328   }
00329 
00347   void
00348   readFile (ordinal_type& numRows,
00349             ordinal_type& numCols,
00350             Teuchos::ArrayRCP<const       size_t>& rowptr,
00351             Teuchos::ArrayRCP<const ordinal_type>& colind,
00352             Teuchos::ArrayRCP<const scalar_type>& values,
00353             const std::string& filename) const
00354   {
00355     using Teuchos::ArrayRCP;
00356     using Teuchos::as;
00357     using Teuchos::arcp;
00358     using Teuchos::arcp_const_cast;
00359     using Teuchos::null;
00360     using Teuchos::ParameterList;
00361     using Teuchos::parameterList;
00362     using Teuchos::RCP;
00363     using Teuchos::rcp;
00364 
00365     // The reader wants ptr to have the same type of entries as ind.
00366     // We'll copy right before leaving the routine.
00367     ArrayRCP<ordinal_type> ptr;
00368     ArrayRCP<ordinal_type> ind;
00369     ArrayRCP<scalar_type>  val;
00370     ordinal_type nrows = 0, ncols = 0;
00371 
00372     Teuchos::MatrixMarket::Raw::Reader<scalar_type, ordinal_type> reader;
00373 
00374     // In "intolerant" mode, this will throw an exception if there is
00375     // a syntax error in the file.
00376     (void) reader.readFile (ptr, ind, val, nrows, ncols, filename);
00377 
00378     ArrayRCP<size_t> ptrout (nrows + 1);
00379     std::copy (ptr.begin(), ptr.end(), ptrout.begin());
00380     // Now we're done with ptr.
00381     ptr = null;
00382 
00383     // Assign the output arguments.
00384     numRows = as<ordinal_type> (nrows);
00385     numCols = as<ordinal_type> (ncols);
00386     rowptr = arcp_const_cast<const size_t> (ptrout);
00387     colind = arcp_const_cast<const ordinal_type> (ind);
00388     values = arcp_const_cast<const scalar_type> (val);
00389   }
00390 
00420   Teuchos::RCP<SparseOpsType>
00421   makeSparseOpsFromFile (ordinal_type& numRows,
00422                          ordinal_type& numCols,
00423                          const Teuchos::RCP<node_type>& node,
00424                          Teuchos::ParameterList& params,
00425                          const std::string& filename,
00426                          const Teuchos::EUplo uplo = Teuchos::UNDEF_TRI,
00427                          const Teuchos::EDiag diag = Teuchos::NON_UNIT_DIAG) const
00428   {
00429     using Teuchos::ArrayRCP;
00430     using Teuchos::arcp_const_cast;
00431     using Teuchos::as;
00432 
00433     ordinal_type theNumRows = 0;
00434     ordinal_type theNumCols = 0;
00435     ArrayRCP<const size_t>       ptr;
00436     ArrayRCP<const ordinal_type> ind;
00437     ArrayRCP<const scalar_type>  val;
00438     readFile (theNumRows, theNumCols, ptr, ind, val, filename);
00439     numRows = as<ordinal_type> (theNumRows);
00440     numCols = as<ordinal_type> (theNumCols);
00441 
00442     return makeSparseOps (node, params, numRows, numCols, ptr, ind, val, uplo, diag);
00443   }
00444 
00445 
00481   Teuchos::RCP<SparseOpsType>
00482   makeSparseOps (const Teuchos::RCP<node_type>& node,
00483                  Teuchos::ParameterList& params,
00484                  const ordinal_type numRows,
00485                  const ordinal_type numCols,
00486                  Teuchos::ArrayRCP<const size_t>       &ptr,
00487                  Teuchos::ArrayRCP<const ordinal_type> &ind,
00488                  Teuchos::ArrayRCP<const scalar_type>  &val,
00489                  const Teuchos::EUplo uplo = Teuchos::UNDEF_TRI,
00490                  const Teuchos::EDiag diag = Teuchos::NON_UNIT_DIAG) const
00491   {
00492     using Teuchos::ArrayRCP;
00493     using Teuchos::arcp_const_cast;
00494     using Teuchos::as;
00495     using Teuchos::null;
00496     using Teuchos::ParameterList;
00497     using Teuchos::parameterList;
00498     using Teuchos::RCP;
00499     using Teuchos::rcp;
00500 
00501     // We don't know where ptr, ind, and val came from, so we have to
00502     // reallocate using the SparseOpsType.  We start over by counting
00503     // the number of entries in each row.
00504     ArrayRCP<size_t> numEntriesPerRow (numRows);
00505     for (ordinal_type i = 0; i < numRows; ++i) {
00506       TEUCHOS_TEST_FOR_EXCEPTION(ptr[i+1] < ptr[i], std::invalid_argument,
00507         "makeSparseOps: Invalid row offsets array on input.  "
00508         "ptr[" << (i+1) << "] = " << ptr[i+1] << " < ptr[" << i << "] = "
00509         << ptr[i] << ".");
00510       const size_t curNumEntries = ptr[i+1] - ptr[i]; // >= 0; see test above.
00511       numEntriesPerRow[i] = as<ordinal_type> (curNumEntries);
00512     }
00513 
00514     // Reallocate and copy over the three arrays for sparse matrix storage:
00515     //
00516     // ptr: row offsets
00517     // ind: column indices
00518     // val: matrix entries
00519     ptr = SparseOpsType::allocRowPtrs (node, numEntriesPerRow ());
00520     // We don't need the array of entry counts per row anymore.
00521     numEntriesPerRow = Teuchos::null;
00522     {
00523       ArrayRCP<ordinal_type> newInd =
00524         SparseOpsType::template allocStorage<ordinal_type> (node, ptr ());
00525       ArrayRCP<scalar_type> newVal =
00526         SparseOpsType::template allocStorage<scalar_type> (node, ptr ());
00527       node->parallel_for (0, ind.size(),
00528                           CopyOp<ordinal_type, ordinal_type> (newInd.getRawPtr (),
00529                                                               ind.getRawPtr ()));
00530       node->parallel_for (0, val.size(),
00531                           CopyOp<scalar_type, ordinal_type> (newVal.getRawPtr (),
00532                                                              val.getRawPtr ()));
00533       ind = arcp_const_cast<const ordinal_type> (newInd);
00534       val = arcp_const_cast<const scalar_type> (newVal);
00535     }
00536 
00537     return makeSparseOpsFromArrays (node, params, numRows, numCols,
00538                                     ptr, ind, val, uplo, diag);
00539   }
00540 
00575   Teuchos::RCP<SparseOpsType>
00576   makeSparseOpsFromArrays (const Teuchos::RCP<node_type>& node,
00577                            Teuchos::ParameterList& opsParams,
00578                            const ordinal_type numRows,
00579                            const ordinal_type numCols,
00580                            Teuchos::ArrayRCP<const size_t>       &ptr,
00581                            Teuchos::ArrayRCP<const ordinal_type> &ind,
00582                            Teuchos::ArrayRCP<const scalar_type>  &val,
00583                            const Teuchos::EUplo uplo,
00584                            const Teuchos::EDiag diag) const
00585   {
00586     using Teuchos::null;
00587     using Teuchos::RCP;
00588     using Teuchos::rcp;
00589     using Teuchos::rcp_const_cast;
00590 
00591     RCP<graph_type> graph = rcp (new graph_type (numRows, numCols, node, null));
00592     graph->setStructure (ptr, ind);
00593     RCP<matrix_type> matrix =
00594       rcp (new matrix_type (rcp_const_cast<const graph_type> (graph), null));
00595     matrix->setValues (val);
00596     SparseOpsType::finalizeGraphAndMatrix (uplo, diag, *graph, *matrix, null);
00597     // We don't need these arrays anymore.
00598     ptr = null;
00599     ind = null;
00600     val = null;
00601 
00602     RCP<SparseOpsType> ops = rcp (new SparseOpsType (node, opsParams));
00603     ops->setGraphAndMatrix (graph, matrix);
00604     return ops;
00605   }
00606 
00617   Teuchos::RCP<SparseOpsType>
00618   denseTriToSparseOps (const dense_matrix_type& A,
00619                        const Teuchos::RCP<node_type>& node,
00620                        Teuchos::ParameterList& params,
00621                        const Teuchos::EUplo uplo,
00622                        const Teuchos::EDiag diag) const
00623   {
00624     using Teuchos::ArrayRCP;
00625     using Teuchos::arcp;
00626     using Teuchos::arcp_const_cast;
00627     using Teuchos::as;
00628     using Teuchos::null;
00629     using Teuchos::RCP;
00630     using Teuchos::rcp;
00631     using Teuchos::rcp_const_cast;
00632 
00633     const ordinal_type N = A.numRows ();
00634     TEUCHOS_TEST_FOR_EXCEPTION(N < 1, std::invalid_argument,
00635       "denseTriToSparseOps: Input dense matrix must have a positive number of "
00636       "rows.");
00637     TEUCHOS_TEST_FOR_EXCEPTION(N != A.numCols (), std::invalid_argument,
00638       "denseTriToSparseOps: Input dense matrix must be square.");
00639     TEUCHOS_TEST_FOR_EXCEPTION(uplo == Teuchos::UNDEF_TRI, std::invalid_argument,
00640       "denseTriToSparseOps: uplo must be Teuchos::LOWER_TRI or Teuchos::UPPER_TRI.");
00641 
00642     // If we're not storing the diagonal entries, subtract N off the
00643     // total number of entries to store.
00644     const size_t NNZ = diag == Teuchos::UNIT_DIAG ?
00645       (N*(N-1)) / 2 : // UNIT_DIAG
00646       (N*(N+1)) / 2;  // NON_UNIT_DIAG
00647 
00648     // Compute the number of entries in each column.
00649     // This is necessary for allocating the row offsets (ptr) array.
00650     // We could just construct ptr directly, but some local sparse ops
00651     // implementations like to allocate and fill in ptr themselves in
00652     // order to ensure first-touch allocation.
00653     ArrayRCP<size_t> numEntriesPerRow (N);
00654     {
00655       size_t sumOfEntriesPerRow = 0; // correctness check
00656 
00657       // Fill in the number of entries in each row.
00658       for (ordinal_type i = 0; i < N; ++i) {
00659         const size_t implicitDiag = (diag == Teuchos::UNIT_DIAG) ? 1 : 0;
00660         if (uplo == Teuchos::LOWER_TRI) {
00661           numEntriesPerRow[i] = (i + 1) - implicitDiag;
00662         }
00663         else if (uplo == Teuchos::UPPER_TRI) {
00664           numEntriesPerRow[i] = (N - i) - implicitDiag;
00665         }
00666         sumOfEntriesPerRow += numEntriesPerRow[i];
00667       }
00668       TEUCHOS_TEST_FOR_EXCEPTION(sumOfEntriesPerRow != NNZ, std::logic_error,
00669         "TestSparseOps::denseTriToSparseOps: "
00670         "When computing number of entries per row: total number of entries "
00671         "should be NNZ = " << NNZ << ", but the sum of the number of entries in "
00672         "each row is " << sumOfEntriesPerRow << ".  Please report this bug to "
00673         "the Kokkos developers.");
00674     }
00675 
00676     // Allocate the three arrays for sparse matrix storage:
00677     //
00678     // ptr: row offsets (will be initialized correctly)
00679     // ind: column indices (possibly filled with zeros)
00680     // val: matrix entries (possibly filled with zeros)
00681     ArrayRCP<size_t> ptr =
00682       SparseOpsType::allocRowPtrs (node, numEntriesPerRow ());
00683     // We don't need the array of entry counts per row anymore.
00684     //numEntriesPerRow = Teuchos::null;
00685     ArrayRCP<ordinal_type> ind =
00686       SparseOpsType::template allocStorage<ordinal_type> (node, ptr ());
00687     ArrayRCP<scalar_type> val =
00688       SparseOpsType::template allocStorage<scalar_type> (node, ptr ());
00689 
00690     // Copy the dense triangular matrix into ind and val.
00691     size_t curOffset = 0; // current row offset
00692     for (ordinal_type i = 0; i < N; ++i) {
00693       TEUCHOS_TEST_FOR_EXCEPTION(ptr[i] != curOffset, std::logic_error,
00694         "ptr[" << i << "] = " << ptr[i] << " != curOffset = " << curOffset
00695         << ".");
00696       // Compute loop bounds
00697       ordinal_type lowerBound, upperBound;
00698       if (uplo == Teuchos::UPPER_TRI) {
00699         lowerBound = (diag == Teuchos::UNIT_DIAG) ? i+1 : i;
00700         upperBound = N; // exclusive
00701       }
00702       else { // uplo == Teuchos::LOWER_TRI
00703         lowerBound = 0;
00704         upperBound = (diag == Teuchos::UNIT_DIAG) ? i : i+1; // exclusive
00705       }
00706       TEUCHOS_TEST_FOR_EXCEPTION(
00707         as<size_t> (upperBound) - as<size_t> (lowerBound) != numEntriesPerRow[i],
00708         std::logic_error,
00709         "At row i = " << i << ", loop bounds do not match expected number of "
00710         "entries per row.  upperBound = " << upperBound << ", lowerBound = "
00711         << lowerBound << ", but numEntriesPerRow[i] = " << numEntriesPerRow[i]
00712         << ".");
00713       // Copy valid entries of the current row
00714       for (ordinal_type j = lowerBound; j < upperBound; ++j) {
00715         ind[curOffset] = j;
00716         val[curOffset] = A(i, j);
00717         ++curOffset;
00718       }
00719     }
00720     TEUCHOS_TEST_FOR_EXCEPTION(ptr[N] != curOffset, std::logic_error,
00721       "ptr[" << N << "] = " << ptr[N] << " != curOffset = " << curOffset
00722       << ".");
00723     TEUCHOS_TEST_FOR_EXCEPTION(curOffset != NNZ, std::logic_error,
00724       "TestSparseOps::denseTriToSparseOps: Expected " << NNZ << " entries in "
00725       "the sparse matrix, but got " << curOffset << " instead.  Please report "
00726       "this bug (in tests) to the Kokkos developers.");
00727 
00728     if (debug_) {
00729       // Verify the conversion process, by ensuring that every sparse
00730       // matrix entry corresponds to its dense matrix entry.
00731       for (ordinal_type i = 0; i < N; ++i) {
00732         for (size_t k = ptr[i]; k < ptr[i+1]; ++k) {
00733           const ordinal_type j = ind[k];
00734           const scalar_type A_ij = val[k];
00735           TEUCHOS_TEST_FOR_EXCEPTION(A_ij != A(i,j), std::logic_error,
00736             "denseTriToSparseOps: conversion failed: in row i = " << i
00737             << ", A_ij = " << val[k] << " in the sparse matrix, but A("
00738             << i << "," << j << ") in the dense matrix is " << A(i,j) << ".");
00739         }
00740       }
00741     }
00742 
00743     ArrayRCP<const size_t> constPtr = arcp_const_cast<const size_t> (ptr);
00744     ArrayRCP<const ordinal_type> constInd = arcp_const_cast<const ordinal_type> (ind);
00745     ArrayRCP<const scalar_type> constVal = arcp_const_cast<const scalar_type> (val);
00746     return makeSparseOpsFromArrays (node, params, N, N,
00747                                     constPtr, constInd, constVal, uplo, diag);
00748   }
00749 
00764   Teuchos::RCP<SparseOpsType>
00765   sparsifyDenseToSparseOps (const dense_matrix_type& A,
00766                             const Teuchos::RCP<node_type>& node,
00767                             Teuchos::ParameterList& params,
00768                             const Teuchos::EUplo uplo = Teuchos::UNDEF_TRI,
00769                             const Teuchos::EDiag diag = Teuchos::NON_UNIT_DIAG)
00770   {
00771     using Teuchos::ArrayRCP;
00772     using Teuchos::arcp;
00773     using Teuchos::arcp_const_cast;
00774     using Teuchos::as;
00775     using Teuchos::null;
00776     using Teuchos::RCP;
00777     using Teuchos::rcp;
00778     using Teuchos::rcp_const_cast;
00779     typedef Teuchos::OrdinalTraits<ordinal_type> OTO;
00780     typedef Teuchos::ScalarTraits<scalar_type> STS;
00781 
00782     const ordinal_type numRows = A.numRows ();
00783     const ordinal_type numCols = A.numCols ();
00784 
00785     // Array of the number of entries in each column.
00786     // Initialize to zero; compute below.
00787     ArrayRCP<size_t> numEntriesPerRow (numRows, 0);
00788 
00789     // Compute the number of nonzero entries in each row.
00790     size_t totalNumEntries = 0;
00791     for (ordinal_type j = OTO::zero(); j < numCols; ++j) {
00792       for (ordinal_type i = OTO::zero(); i < numRows; ++i) {
00793         if (A(i,j) != STS::zero()) {
00794           ++numEntriesPerRow[i];
00795           ++totalNumEntries;
00796         }
00797       }
00798     }
00799 
00800     // Allocate the three arrays for sparse matrix storage:
00801     //
00802     // ptr: row offsets (will be initialized correctly)
00803     // ind: column indices (possibly filled with zeros)
00804     // val: matrix entries (possibly filled with zeros)
00805     ArrayRCP<size_t> ptr =
00806       SparseOpsType::allocRowPtrs (node, numEntriesPerRow ());
00807     // We don't need the array of entry counts per row anymore.
00808     numEntriesPerRow = Teuchos::null;
00809     ArrayRCP<ordinal_type> ind =
00810       SparseOpsType::template allocStorage<ordinal_type> (node, ptr ());
00811     ArrayRCP<scalar_type> val =
00812       SparseOpsType::template allocStorage<scalar_type> (node, ptr ());
00813 
00814     // Copy the dense matrix into ind and val.
00815     size_t curOffset = 0;
00816     for (ordinal_type i = 0; i < numRows; ++i) {
00817       TEUCHOS_TEST_FOR_EXCEPTION(ptr[i] != curOffset, std::logic_error,
00818         "ptr[" << i << "] = " << ptr[i] << " != curOffset = " << curOffset
00819         << ".");
00820       for (ordinal_type j = 0; j < numCols; ++j) {
00821         const scalar_type A_ij = A(i,j);
00822         if (A_ij != STS::zero()) {
00823           ind[curOffset] = j;
00824           val[curOffset] = A_ij;
00825           ++curOffset;
00826         }
00827       }
00828     }
00829     TEUCHOS_TEST_FOR_EXCEPTION(ptr[numRows] != curOffset, std::logic_error,
00830       "ptr[" << numRows << "] = " << ptr[numRows] << " != curOffset = "
00831       << curOffset << ".");
00832 
00833     ArrayRCP<const size_t> constPtr = arcp_const_cast<const size_t> (ptr);
00834     ArrayRCP<const ordinal_type> constInd = arcp_const_cast<const ordinal_type> (ind);
00835     ArrayRCP<const scalar_type> constVal = arcp_const_cast<const scalar_type> (val);
00836     return makeSparseOpsFromArrays (node, params, numRows, numCols,
00837                                     constPtr, constInd, constVal, uplo, diag);
00838   }
00839 
00840 
00847   Teuchos::RCP<SparseOpsType>
00848   denseToSparseOps (const dense_matrix_type& A,
00849                     const Teuchos::RCP<node_type>& node,
00850                     Teuchos::ParameterList& params) const
00851   {
00852     using Teuchos::ArrayRCP;
00853     using Teuchos::arcp;
00854     using Teuchos::arcp_const_cast;
00855     using Teuchos::as;
00856     using Teuchos::null;
00857     using Teuchos::RCP;
00858     using Teuchos::rcp;
00859     using Teuchos::rcp_const_cast;
00860 
00861     const ordinal_type numRows = A.numRows ();
00862     const ordinal_type numCols = A.numCols ();
00863     // ordinal_type * ordinal_type could overflow, since ordinal_type
00864     // need only be big enough to hold max(numRows, numCols), not
00865     // necessarily their product (the max number of entries in the
00866     // matrix).
00867     const size_t NNZ = as<size_t> (numRows) * as<size_t> (numCols);
00868 
00869     // Array of the number of entries in each column.
00870     // This is necessary for allocating the row offsets (ptr) array.
00871     // We could just construct ptr directly, but some local sparse ops
00872     // implementations like to allocate and fill in ptr themselves in
00873     // order to ensure first-touch allocation.
00874     ArrayRCP<size_t> numEntriesPerRow (numRows, numCols);
00875 
00876     // Allocate the three arrays for sparse matrix storage:
00877     //
00878     // ptr: row offsets (will be initialized correctly)
00879     // ind: column indices (possibly filled with zeros)
00880     // val: matrix entries (possibly filled with zeros)
00881     ArrayRCP<size_t> ptr =
00882       SparseOpsType::allocRowPtrs (node, numEntriesPerRow ());
00883     // We don't need the array of entry counts per row anymore.
00884     numEntriesPerRow = Teuchos::null;
00885     ArrayRCP<ordinal_type> ind =
00886       SparseOpsType::template allocStorage<ordinal_type> (node, ptr ());
00887     ArrayRCP<scalar_type> val =
00888       SparseOpsType::template allocStorage<scalar_type> (node, ptr ());
00889 
00890     // Copy the dense matrix into ind and val.
00891     size_t curOffset = 0;
00892     for (ordinal_type i = 0; i < numRows; ++i) {
00893       TEUCHOS_TEST_FOR_EXCEPTION(ptr[i] != curOffset, std::logic_error,
00894         "ptr[" << i << "] = " << ptr[i] << " != curOffset = " << curOffset
00895         << ".");
00896       for (ordinal_type j = 0; j < numCols; ++j) {
00897         ind[curOffset] = j;
00898         val[curOffset] = A(i, j);
00899         ++curOffset;
00900       }
00901     }
00902     TEUCHOS_TEST_FOR_EXCEPTION(ptr[numRows] != curOffset, std::logic_error,
00903       "ptr[" << numRows << "] = " << ptr[numRows] << " != curOffset = "
00904       << curOffset << ".");
00905     TEUCHOS_TEST_FOR_EXCEPTION(curOffset != NNZ, std::logic_error,
00906       "TestSparseOps::denseToSparseOps: Expected " << NNZ << " entries in "
00907       "the sparse matrix, but got " << curOffset << " instead.  Please report "
00908       "this bug (in tests) to the Kokkos developers.");
00909 
00910     const Teuchos::EUplo uplo = Teuchos::UNDEF_TRI;
00911     const Teuchos::EDiag diag = Teuchos::NON_UNIT_DIAG;
00912     ArrayRCP<const size_t> constPtr = arcp_const_cast<const size_t> (ptr);
00913     ArrayRCP<const ordinal_type> constInd = arcp_const_cast<const ordinal_type> (ind);
00914     ArrayRCP<const scalar_type> constVal = arcp_const_cast<const scalar_type> (val);
00915     return makeSparseOpsFromArrays (node, params, numRows, numCols,
00916                                     constPtr, constInd, constVal, uplo, diag);
00917   }
00918 
00925   Teuchos::RCP<Kokkos::MultiVector<scalar_type, node_type> >
00926   makeMultiVector (const Teuchos::RCP<node_type>& node,
00927                    const ordinal_type numRows,
00928                    const ordinal_type numCols,
00929                    const bool random=false) const
00930   {
00931     using Teuchos::arcp;
00932     using Teuchos::ArrayRCP;
00933     using Teuchos::as;
00934     using Teuchos::RCP;
00935     using Teuchos::rcp;
00936     typedef Teuchos::ScalarTraits<scalar_type> STS;
00937     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
00938     typedef Kokkos::DefaultArithmetic<MV> MVT;
00939 
00940     RCP<MV> X = rcp (new MV (node));
00941     const size_t size = as<size_t> (numRows) * as<size_t> (numCols);
00942     ArrayRCP<scalar_type> buf = node->template allocBuffer<scalar_type> (size);
00943     const size_t stride = numRows;
00944     // This doesn't actually "initialize" anything; it just sets pointers.
00945     X->initializeValues (numRows, numCols, buf, stride);
00946     // Always initialize the data first to ensure first-touch allocation.
00947     // (MVT::Random() isn't parallel by default.)
00948     MVT::Init (*X, STS::zero ());
00949     if (random) {
00950       MVT::Random (*X);
00951     }
00952     return X;
00953   }
00954 
00967   magnitude_type
00968   maxRelativeError (const Teuchos::RCP<const Kokkos::MultiVector<scalar_type, node_type> >& X,
00969                     const Teuchos::RCP<const Kokkos::MultiVector<scalar_type, node_type> >& Y,
00970                     const Teuchos::RCP<Kokkos::MultiVector<scalar_type, node_type> >& Z) const
00971   {
00972     using Teuchos::Array;
00973     using Teuchos::as;
00974     typedef Teuchos::ScalarTraits<scalar_type> STS;
00975     typedef Teuchos::ScalarTraits<magnitude_type> STM;
00976     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
00977     typedef Kokkos::DefaultArithmetic<MV> MVT;
00978 
00979     const ordinal_type numCols = as<ordinal_type> (X->getNumCols ());
00980     Array<magnitude_type> numerNorms (numCols);
00981     Array<magnitude_type> denomNorms (numCols);
00982 
00983     MVT::Assign (*Z, (const MV) *Y); // Z := Y
00984     MVT::GESUM (*Z, STS::one(), (const MV) *X, -STS::one()); // Z := X - Z
00985     MVT::NormInf ((const MV) *Z, numerNorms ());
00986     MVT::NormInf ((const MV) *X, denomNorms ());
00987 
00988     magnitude_type maxRelNorm = STM::zero();
00989     for (ordinal_type k = 0; k < numCols; ++k) {
00990       // If the norm of the current column of X is zero, use the absolute error.
00991       const magnitude_type relNorm = (denomNorms[k] == STM::zero ()) ?
00992         numerNorms[k] :
00993         numerNorms[k] / denomNorms[k];
00994       if (relNorm > maxRelNorm) {
00995         maxRelNorm = relNorm;
00996       }
00997     }
00998     return maxRelNorm;
00999   }
01000 
01001   // TODO (mfh 10 Aug 2012)
01002   //
01003   // 1. Fix bug in ops with sparse triangular matrices for numRows >=
01004   //    39.  It's probably a bug in the test code.
01005   //
01006   // 2. Expand testSparseTriOps() to test the transpose and conjugate
01007   //    transpose cases.
01008 
01026   void
01027   testSparseMatVec (const Teuchos::RCP<node_type>& node,
01028                     Teuchos::ParameterList& params,
01029                     const ordinal_type numRows,
01030                     const ordinal_type numCols,
01031                     const ordinal_type numVecs,
01032                     const magnitude_type tol) const
01033   {
01034     using Teuchos::arcp;
01035     using Teuchos::Array;
01036     using Teuchos::as;
01037     using Teuchos::null;
01038     using Teuchos::RCP;
01039     using Teuchos::rcp;
01040     using Teuchos::LEFT_SIDE;
01041     using Teuchos::LOWER_TRI;
01042     using Teuchos::UPPER_TRI;
01043     using Teuchos::NO_TRANS;
01044     using Teuchos::TRANS;
01045     using Teuchos::CONJ_TRANS;
01046     using Teuchos::NON_UNIT_DIAG;
01047     using Teuchos::UNIT_DIAG;
01048     using std::endl;
01049     typedef Teuchos::ScalarTraits<scalar_type> STS;
01050     typedef Teuchos::ScalarTraits<magnitude_type> STM;
01051     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
01052     typedef Kokkos::DefaultArithmetic<MV> MVT;
01053 
01054     Teuchos::FancyOStream& out = *out_;
01055     if (verbose_) {
01056       out << "testSparseMatVec:";
01057       Teuchos::OSTab tab1 (out_);
01058       out << "Input parameters:" << endl;
01059       Teuchos::OSTab tab2 (out_);
01060       out << "numRows = " << numRows << endl
01061           << "numCols = " << numCols << endl
01062           << "numVecs = " << numVecs << endl
01063           << "tol     = " << tol << endl;
01064     }
01065     TEUCHOS_TEST_FOR_EXCEPTION(
01066       numRows < Teuchos::OrdinalTraits<ordinal_type>::zero(),
01067       std::invalid_argument,
01068       "testSparseMatVec: numRows = " << numRows << " < 0.");
01069     TEUCHOS_TEST_FOR_EXCEPTION(
01070       numCols < Teuchos::OrdinalTraits<ordinal_type>::zero(),
01071       std::invalid_argument,
01072       "testSparseMatVec: numCols = " << numCols << " < 0.");
01073     TEUCHOS_TEST_FOR_EXCEPTION(
01074       numVecs < Teuchos::OrdinalTraits<ordinal_type>::zero(),
01075       std::invalid_argument,
01076       "testSparseMatVec: numVecs = " << numVecs << " < 0.");
01077 
01078     // A record of error messages reported by any failed tests.
01079     // We run _all_ the tests first, then report any failures.
01080     std::ostringstream err;
01081     // Whether any tests have failed thus far.
01082     bool success = true;
01083 
01084     // Relative error of the current operation.
01085     magnitude_type relErr = STM::zero ();
01086 
01087     // Make a random dense matrix.
01088     RCP<dense_matrix_type> A_dense (new dense_matrix_type (numRows, numCols));
01089     A_dense->random ();
01090 
01091     // Convert A_dense into a separate sparse matrix.
01092     RCP<SparseOpsType> A_sparse = denseToSparseOps (*A_dense, node, params);
01093     if (verbose_) {
01094       Teuchos::OSTab tab1 (out_);
01095       out << "A_sparse description:" << endl;
01096       // const Teuchos::EVerbosityLevel verbLevel = verbose_ ?
01097       //   (debug_ ? Teuchos::VERB_EXTREME : Teuchos::VERB_HIGH) :
01098       //   Teuchos::VERB_NONE;
01099       const Teuchos::EVerbosityLevel verbLevel =
01100         verbose_ ? Teuchos::VERB_HIGH : Teuchos::VERB_NONE;
01101       A_sparse->describe (out, verbLevel);
01102     }
01103 
01104     // We make MVs for input, results, and scratch space (since the
01105     // BLAS' dense triangular solve overwrites its input).  Y and
01106     // Y_hat are for A * X.  X and X_hat are for A^T * Y and A^H * Y.
01107     // X_scratch and Y_scratch are scratch multivectors: X_scratch is
01108     // for X - X_hat, and Y_scratch is for Y - Y_hat.
01109     RCP<MV> X = makeMultiVector (node, numCols, numVecs, true);
01110     RCP<MV> X_hat = makeMultiVector (node, numCols, numVecs, true);
01111     RCP<MV> Y = makeMultiVector (node, numRows, numVecs, true);
01112     RCP<MV> Y_hat = makeMultiVector (node, numRows, numVecs, true);
01113     RCP<MV> X_scratch = makeMultiVector (node, numCols, numVecs, true);
01114     RCP<MV> Y_scratch = makeMultiVector (node, numRows, numVecs, true);
01115 
01116     blas_type blas; // For dense matrix and vector operations.
01117 
01118     // Compute Y_hat := A_dense * X and Y := A_sparse * X.
01119     blas.GEMM (NO_TRANS, NO_TRANS, numRows, numVecs, numCols,
01120                STS::one (), A_dense->values (), A_dense->stride (),
01121                X->getValues ().getRawPtr (), as<int> (X->getStride ()),
01122                STS::zero (), Y_hat->getValuesNonConst ().getRawPtr (),
01123                as<int> (Y_hat->getStride ()));
01124     A_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01125                                                            (const MV) *X, *Y);
01126     // Compare Y and Y_hat.
01127     relErr = maxRelativeError (Y_hat, Y, Y_scratch);
01128     if (relErr > tol) {
01129       err << "Sparse matrix-(multi)vector multiply test failed for general "
01130           << "matrix A, with alpha = 1 and beta = 0." << std::endl
01131           << "Maximum relative error " << relErr
01132           << " exceeds the given tolerance " << tol << "." << std::endl;
01133       success = false;
01134     }
01135 
01136     {
01137       // Fill Y_hat with random values, make Y a copy of Y_hat, and
01138       // test Y = beta*Y + alpha*A_sparse*X with different values of
01139       // alpha and beta.  We include an irrational value in the list,
01140       // since it's probably not a special case in the implementation.
01141       const scalar_type rootTwo = STS::squareroot (STS::one() + STS::one());
01142       const scalar_type alphaValues[] = {
01143         -STS::one(), STS::zero(), STS::one(), -rootTwo, rootTwo
01144       };
01145       const int numAlphaValues = 5;
01146       const scalar_type betaValues[] = {
01147         -STS::one(), STS::zero(), STS::one(), -rootTwo, rootTwo
01148       };
01149       const int numBetaValues = 5;
01150 
01151       for (int alphaInd = 0; alphaInd < numAlphaValues; ++alphaInd) {
01152         for (int betaInd = 0; betaInd < numBetaValues; ++betaInd) {
01153           const scalar_type alpha = alphaValues[alphaInd];
01154           const scalar_type beta = betaValues[betaInd];
01155 
01156           MVT::Random (*Y_hat);
01157           MVT::Assign (*Y, *Y_hat);
01158 
01159           // Y_hat := beta * Y_hat + alpha * A_dense * X
01160           blas.GEMM (NO_TRANS, NO_TRANS, numRows, numVecs, numCols,
01161                      alpha, A_dense->values (), A_dense->stride (),
01162                      X->getValues ().getRawPtr (), as<int> (X->getStride ()),
01163                      beta, Y_hat->getValuesNonConst ().getRawPtr (),
01164                      as<int> (Y_hat->getStride ()));
01165           // Y := beta * Y + alpha * A_sparse * X
01166           A_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, alpha,
01167                                                                  (const MV) *X,
01168                                                                  beta, *Y);
01169           // Compare Y and Y_hat.
01170           relErr = maxRelativeError (Y_hat, Y, Y_scratch);
01171           if (relErr > tol) {
01172             err << "Sparse matrix-(multi)vector multiply test failed for general "
01173                 << "matrix A, with alpha = " << alpha << " and beta = " << beta
01174                 << "." << std::endl
01175                 << "Maximum relative error " << relErr << " exceeds "
01176                 << "the given tolerance " << tol << "." << std::endl;
01177             success = false;
01178           }
01179         }
01180       }
01181     }
01182 
01183     // Randomize input and output multivectors in preparation for the
01184     // A^T mat-vec test.
01185     MVT::Random (*Y);
01186     MVT::Random (*X_hat);
01187     MVT::Random (*X);
01188 
01189     // Compute X_hat := A_dense^T * Y and X := A_sparse^T * Y.
01190     blas.GEMM (TRANS, NO_TRANS, numCols, numVecs, numRows,
01191                STS::one (), A_dense->values (), A_dense->stride (),
01192                Y->getValues ().getRawPtr (), as<int> (Y->getStride ()),
01193                STS::zero (), X_hat->getValuesNonConst ().getRawPtr (),
01194                as<int> (X_hat->getStride ()));
01195     A_sparse->template multiply<scalar_type, scalar_type> (TRANS, STS::one (),
01196                                                            (const MV) *Y, *X);
01197     // Compare X and X_hat.
01198     relErr = maxRelativeError (X_hat, X, X_scratch);
01199     if (relErr > tol) {
01200       err << "Sparse matrix-(multi)vector multiply (transpose) test failed for "
01201           << "general matrix A, with alpha = 1 and beta = 0." << std::endl
01202           << "Maximum relative error " << relErr
01203           << " exceeds the given tolerance " << tol << "." << std::endl;
01204       success = false;
01205     }
01206 
01207     // Randomize input and output multivectors in preparation for the
01208     // A^T mat-vec test with different alpha and beta values.
01209     MVT::Random (*Y);
01210     MVT::Random (*X_hat);
01211     MVT::Random (*X);
01212 
01213     {
01214       // Test X := beta * X + alpha * A_sparse^T * Y with different
01215       // values of alpha and beta.  We include an irrational value in
01216       // the list, since it's probably not a special case in the
01217       // implementation.
01218       const scalar_type rootTwo = STS::squareroot (STS::one() + STS::one());
01219       const scalar_type alphaValues[] = {
01220         -STS::one(), STS::zero(), STS::one(), -rootTwo, rootTwo
01221       };
01222       const int numAlphaValues = 5;
01223       const scalar_type betaValues[] = {
01224         -STS::one(), STS::zero(), STS::one(), -rootTwo, rootTwo
01225       };
01226       const int numBetaValues = 5;
01227 
01228       for (int alphaInd = 0; alphaInd < numAlphaValues; ++alphaInd) {
01229         for (int betaInd = 0; betaInd < numBetaValues; ++betaInd) {
01230           const scalar_type alpha = alphaValues[alphaInd];
01231           const scalar_type beta = betaValues[betaInd];
01232 
01233           // Fill X_hat with random values and make X a copy of X_hat.
01234           MVT::Random (*X_hat);
01235           MVT::Assign (*X, *X_hat);
01236 
01237           // X_hat := beta * X_hat + A_dense^T * Y.
01238           blas.GEMM (TRANS, NO_TRANS, numCols, numVecs, numRows,
01239                      alpha, A_dense->values (), A_dense->stride (),
01240                      Y->getValues ().getRawPtr (), as<int> (Y->getStride ()),
01241                      beta, X_hat->getValuesNonConst ().getRawPtr (),
01242                      as<int> (X_hat->getStride ()));
01243           // X := beta * X + A_sparse^T * Y.
01244           A_sparse->template multiply<scalar_type, scalar_type> (TRANS, alpha,
01245                                                                  (const MV) *Y,
01246                                                                  beta, *X);
01247           // Compare X and X_hat.
01248           relErr = maxRelativeError (X_hat, X, X_scratch);
01249           if (relErr > tol) {
01250             err << "Sparse matrix-(multi)vector multiply (transpose) test "
01251                 << "failed for general matrix A, with alpha = " << alpha
01252                 << " and beta = " << beta << "." << std::endl
01253                 << "Maximum relative error " << relErr
01254                 << " exceeds the given tolerance " << tol << "." << std::endl;
01255             success = false;
01256           }
01257         }
01258       }
01259     }
01260 
01261     // Randomize input and output multivectors in preparation for the
01262     // A^H mat-vec test.
01263     MVT::Random (*Y);
01264     MVT::Random (*X_hat);
01265     MVT::Random (*X);
01266 
01267     if (STS::isComplex) {
01268       // Compute X_hat := A_dense^H * Y and X := A_sparse^H * Y.
01269       blas.GEMM (CONJ_TRANS, NO_TRANS, numCols, numVecs, numRows,
01270                  STS::one (), A_dense->values (), A_dense->stride (),
01271                  Y->getValues ().getRawPtr (), as<int> (Y->getStride ()),
01272                  STS::zero (), X_hat->getValuesNonConst ().getRawPtr (),
01273                  as<int> (X_hat->getStride ()));
01274       A_sparse->template multiply<scalar_type, scalar_type> (CONJ_TRANS, STS::one (),
01275                                                              (const MV) *Y, *X);
01276       // Compare X and X_hat.
01277       relErr = maxRelativeError (X_hat, X, X_scratch);
01278       if (relErr > tol) {
01279         err << "Sparse matrix-(multi)vector multiply (conjugate transpose) "
01280             << "test failed for general matrix A, with alpha = 1 and beta = 0."
01281             << std::endl
01282             << "Maximum relative error " << relErr << " exceeds the given "
01283             << "tolerance " << tol << "." << std::endl;
01284         success = false;
01285       }
01286 
01287       // Randomize input and output multivectors in preparation for the
01288       // A^H mat-vec test with different alpha and beta values.
01289       MVT::Random (*Y);
01290       MVT::Random (*X_hat);
01291       MVT::Random (*X);
01292 
01293       {
01294         // Test X = beta * X + alpha * A_sparse^H * Y with different
01295         // values of alpha and beta.  We include an irrational value in
01296         // the list, since it's probably not a special case in the
01297         // implementation.
01298         const scalar_type rootTwo = STS::squareroot (STS::one() + STS::one());
01299         const scalar_type alphaValues[] = {
01300           -STS::one(), STS::zero(), STS::one(), -rootTwo, rootTwo
01301         };
01302         const int numAlphaValues = 5;
01303         const scalar_type betaValues[] = {
01304           -STS::one(), STS::zero(), STS::one(), -rootTwo, rootTwo
01305         };
01306         const int numBetaValues = 5;
01307 
01308         for (int alphaInd = 0; alphaInd < numAlphaValues; ++alphaInd) {
01309           for (int betaInd = 0; betaInd < numBetaValues; ++betaInd) {
01310             const scalar_type alpha = alphaValues[alphaInd];
01311             const scalar_type beta = betaValues[betaInd];
01312 
01313             // Fill X_hat with random values and make X a copy of X_hat.
01314             MVT::Random (*X_hat);
01315             MVT::Assign (*X, *X_hat);
01316 
01317             // X_hat := beta * X_hat + A_dense^H * Y.
01318             blas.GEMM (CONJ_TRANS, NO_TRANS, numCols, numVecs, numRows,
01319                        alpha, A_dense->values (), A_dense->stride (),
01320                        Y->getValues ().getRawPtr (), as<int> (Y->getStride ()),
01321                        beta, X_hat->getValuesNonConst ().getRawPtr (),
01322                        as<int> (X_hat->getStride ()));
01323             // X := beta * X + A_sparse^H * Y.
01324             A_sparse->template multiply<scalar_type, scalar_type> (CONJ_TRANS, alpha,
01325                                                                    (const MV) *Y,
01326                                                                    beta, *X);
01327             // Compare X and X_hat.
01328             relErr = maxRelativeError (X_hat, X, X_scratch);
01329             if (relErr > tol) {
01330               err << "Sparse matrix-(multi)vector multiply (conjugate "
01331                   << "transpose) test failed for general matrix A, with alpha = "
01332                   << alpha << " and beta = " << beta << "." << std::endl
01333                   << "Maximum relative error " << relErr
01334                   << " exceeds the given tolerance " << tol << "." << std::endl;
01335               success = false;
01336             }
01337           }
01338         }
01339       }
01340     }
01341   }
01342 
01362   void
01363   testSparseOps (const Teuchos::RCP<node_type>& node,
01364                  Teuchos::ParameterList& params,
01365                  const ordinal_type numRows,
01366                  const ordinal_type numCols,
01367                  const ordinal_type numVecs,
01368                  const magnitude_type tol,
01369                  const bool implicitUnitDiagTriMultCorrect=false)
01370   {
01371     typedef Teuchos::OrdinalTraits<ordinal_type> OTO;
01372     typedef Teuchos::ScalarTraits<magnitude_type> STM;
01373 
01374     TEUCHOS_TEST_FOR_EXCEPTION(
01375       numRows < OTO::zero() || numCols < OTO::zero() || numVecs < OTO::zero(),
01376       std::invalid_argument,
01377       "testSparseOps: the sparse matrix and dense multivector dimensions must "
01378       "all be nonnegative.");
01379     TEUCHOS_TEST_FOR_EXCEPTION(
01380       tol < STM::zero(),
01381       std::invalid_argument,
01382       "testSparseOps: the relative error tolerance must be nonnegative.");
01383 
01384     testSparseMatVec (node, params, numRows, numCols, numVecs, tol);
01385     if (numRows == numCols) {
01386       testSparseTriOps (node, params, numRows, numVecs, tol,
01387                         implicitUnitDiagTriMultCorrect);
01388     }
01389   }
01390 
01425   void
01426   testSparseTriOps (const Teuchos::RCP<node_type>& node,
01427                     Teuchos::ParameterList& params,
01428                     const ordinal_type N,
01429                     const ordinal_type numVecs,
01430                     const magnitude_type tol,
01431                     const bool implicitUnitDiagTriMultCorrect=false)
01432   {
01433     using Teuchos::arcp;
01434     using Teuchos::Array;
01435     using Teuchos::ArrayRCP;
01436     using Teuchos::as;
01437     using Teuchos::null;
01438     using Teuchos::RCP;
01439     using Teuchos::rcp;
01440     using Teuchos::LEFT_SIDE;
01441     using Teuchos::LOWER_TRI;
01442     using Teuchos::UPPER_TRI;
01443     using Teuchos::NO_TRANS;
01444     using Teuchos::TRANS;
01445     using Teuchos::CONJ_TRANS;
01446     using Teuchos::NON_UNIT_DIAG;
01447     using Teuchos::UNIT_DIAG;
01448     using std::endl;
01449     typedef Teuchos::ScalarTraits<scalar_type> STS;
01450     typedef Teuchos::ScalarTraits<magnitude_type> STM;
01451     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
01452     typedef Kokkos::DefaultArithmetic<MV> MVT;
01453 
01454     const ordinal_type numRows = N;
01455     const ordinal_type numCols = N;
01456 
01457     Teuchos::FancyOStream& out = *out_;
01458     if (verbose_) {
01459       out << "testSparseTriOps:";
01460     }
01461     Teuchos::OSTab tab1 (out_);
01462     if (verbose_) {
01463       out << "Input parameters:" << endl;
01464       Teuchos::OSTab tab2 (out_);
01465       out << "N       = " << N << endl
01466           << "numVecs = " << numVecs << endl
01467           << "tol     = " << tol << endl;
01468       if (implicitUnitDiagTriMultCorrect) {
01469         out << "Not a";
01470       }
01471       else {
01472         out << "A";
01473       }
01474       out << "ssuming that sparse mat-vec works correctly "
01475         "for implicit unit diagonal matrices." << endl;
01476     }
01477     TEUCHOS_TEST_FOR_EXCEPTION(
01478       N < Teuchos::OrdinalTraits<ordinal_type>::zero(),
01479       std::invalid_argument,
01480       "testSparseTriOps: Number of rows and columns N = " << N << " < 0.");
01481     TEUCHOS_TEST_FOR_EXCEPTION(
01482       numVecs < Teuchos::OrdinalTraits<ordinal_type>::zero(),
01483       std::invalid_argument,
01484       "testSparseTriOps: numVecs = " << numVecs << " < 0.");
01485 
01486     // A record of error messages reported by any failed tests.
01487     // We run _all_ the tests first, then report any failures.
01488     std::ostringstream err;
01489     // Whether any tests have failed thus far.
01490     bool success = true;
01491 
01492     // Relative error of the current operation.
01493     magnitude_type relErr = STM::zero ();
01494 
01495     RCP<dense_matrix_type> A_dense, L_dense, U_dense;
01496     Array<ordinal_type> ipiv;
01497     makeDenseTestProblem (A_dense, L_dense, U_dense, ipiv, N);
01498 
01499     // Convert L_dense and U_dense into separate sparse matrices.
01500     RCP<SparseOpsType> L_sparse =
01501       denseTriToSparseOps (*L_dense, node, params, LOWER_TRI, UNIT_DIAG);
01502     //sparsifyDenseToSparseOps (*L_dense, node, params, LOWER_TRI, UNIT_DIAG);
01503     if (verbose_) {
01504       out << "L_sparse description:" << endl;
01505       // const Teuchos::EVerbosityLevel verbLevel = verbose_ ?
01506       //   (debug_ ? Teuchos::VERB_EXTREME : Teuchos::VERB_HIGH) :
01507       //   Teuchos::VERB_NONE;
01508       const Teuchos::EVerbosityLevel verbLevel =
01509         verbose_ ? Teuchos::VERB_HIGH : Teuchos::VERB_NONE;
01510       L_sparse->describe (out, verbLevel);
01511     }
01512 
01513     RCP<SparseOpsType> U_sparse =
01514       denseTriToSparseOps (*U_dense, node, params, UPPER_TRI, NON_UNIT_DIAG);
01515     //sparsifyDenseToSparseOps (*U_dense, node, params, UPPER_TRI, NON_UNIT_DIAG);
01516     if (verbose_) {
01517       out << "U_sparse description:" << endl;
01518       // const Teuchos::EVerbosityLevel verbLevel = verbose_ ?
01519       //   (debug_ ? Teuchos::VERB_EXTREME : Teuchos::VERB_HIGH) :
01520       //   Teuchos::VERB_NONE;
01521       const Teuchos::EVerbosityLevel verbLevel =
01522         verbose_ ? Teuchos::VERB_HIGH : Teuchos::VERB_NONE;
01523       U_sparse->describe (out, verbLevel);
01524     }
01525 
01526     if (verbose_ && debug_) {
01527       RCP<dense_matrix_type> L_sparseAsDense = L_sparse->asDenseMatrix ();
01528       RCP<dense_matrix_type> U_sparseAsDense = U_sparse->asDenseMatrix ();
01529 
01530       out << "L_dense:" << endl;
01531       {
01532         Teuchos::OSTab tab2 (out_);
01533         printDenseMatrix (*out_, *L_dense);
01534       }
01535       out << endl << "L_sparse:" << endl;
01536       {
01537         Teuchos::OSTab tab2 (out_);
01538         //printSparseMatrixAsDense (out, *L_sparse);
01539         printDenseMatrix (*out_, *L_sparseAsDense);
01540       }
01541       out << endl << "U_dense:" << endl;
01542       {
01543         Teuchos::OSTab tab2 (out_);
01544         printDenseMatrix (*out_, *U_dense);
01545       }
01546       out << endl << "U_sparse:" << endl;
01547       {
01548         Teuchos::OSTab tab2 (out_);
01549         //printSparseMatrixAsDense (out, *U_sparse);
01550         printDenseMatrix (*out_, *U_sparseAsDense);
01551       }
01552       out << endl;
01553 
01554       // Check whether U_sparse == U_dense.
01555       TEUCHOS_TEST_FOR_EXCEPTION(
01556         *U_dense != *U_sparseAsDense, std::logic_error,
01557         "Conversion of the upper triangular factor U to sparse format is "
01558         "incorrect, since converting the sparse matrix back to dense results "
01559         "in a different matrix than the original dense one.");
01560 
01561       // Check whether L_sparse == L_dense.  This requires first
01562       // subtracting away the unit diagonal from L_sparseAsDense,
01563       // which asDenseMatrix() helpfully added.
01564       for (ordinal_type i = 0; i < N; ++i) {
01565         (*L_sparseAsDense)(i,i) -= STS::one ();
01566       }
01567       TEUCHOS_TEST_FOR_EXCEPTION(
01568         *L_dense != *L_sparseAsDense, std::logic_error,
01569         "Conversion of the lower triangular factor L to sparse format is "
01570         "incorrect, since converting the sparse matrix back to dense results "
01571         "in a different matrix than the original dense one.");
01572     }
01573 
01574     // Convert A_dense into a separate sparse matrix.
01575     RCP<SparseOpsType> A_sparse = denseToSparseOps (*A_dense, node, params);
01576     if (verbose_) {
01577       out << "A_sparse description:" << endl;
01578       // const Teuchos::EVerbosityLevel verbLevel = verbose_ ?
01579       //   (debug_ ? Teuchos::VERB_EXTREME : Teuchos::VERB_HIGH) :
01580       //   Teuchos::VERB_NONE;
01581       const Teuchos::EVerbosityLevel verbLevel =
01582         verbose_ ? Teuchos::VERB_HIGH : Teuchos::VERB_NONE;
01583       A_sparse->describe (out, verbLevel);
01584     }
01585 
01586     // We make MVs for input, results, and scratch space (since the
01587     // BLAS' dense triangular solve overwrites its input).  Y and
01588     // Y_hat are for {L,U} * X.  X and X_hat are for {L,U}^T * Y and
01589     // {L,U}^H * Y.  X_scratch and Y_scratch are scratch multivectors:
01590     // X_scratch is for X - X_hat, and Y_scratch is for Y - Y_hat.
01591     RCP<MV> X = makeMultiVector (node, N, numVecs, true);
01592     RCP<MV> X_hat = makeMultiVector (node, N, numVecs, true);
01593     RCP<MV> Y = makeMultiVector (node, N, numVecs, true);
01594     RCP<MV> Y_hat = makeMultiVector (node, N, numVecs, true);
01595     RCP<MV> X_scratch = makeMultiVector (node, N, numVecs, true);
01596     RCP<MV> Y_scratch = makeMultiVector (node, N, numVecs, true);
01597 
01598     blas_type blas; // For dense matrix and vector operations.
01599 
01601     // Debug mode test that (L,U,A)_sparse == (L,U,A)_dense
01603 
01604     // Test that (L,U,A)_sparse == (L,U,A)_dense, by using mat-vecs to
01605     // sample their columns one at a time.  Only do this for small
01606     // numbers of rows and columns, since this is slow otherwise.
01607     if (N <= 50) {
01608       if (verbose_) {
01609         out << "Comparing A_sparse to A_dense, column by column" << endl;
01610       }
01611       magnitude_type maxRelErr = STM::zero ();
01612       for (ordinal_type j = 0; j < N; ++j) {
01613         MVT::Init (*X, STS::zero ());
01614         X->getValuesNonConst ()[j] = STS::one (); // X := e_j
01615         MVT::Random (*Y_hat);
01616         MVT::Random (*Y);
01617 
01618         // Compute Y_hat := A_dense * X and Y := A_sparse * X.
01619         blas.GEMM (NO_TRANS, NO_TRANS, numRows, numVecs, numCols,
01620                    STS::one (), A_dense->values (), A_dense->stride (),
01621                    X->getValues ().getRawPtr (), as<int> (X->getStride ()),
01622                    STS::zero (), Y_hat->getValuesNonConst ().getRawPtr (),
01623                    as<int> (Y_hat->getStride ()));
01624         A_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01625                                                                (const MV) *X, *Y);
01626         // Compare Y and Y_hat.
01627         relErr = maxRelativeError (Y_hat, Y, Y_scratch);
01628         if (relErr > tol) {
01629           err << "Columns " << (j+1) << " of A_sparse and A_dense differ by "
01630             "maximum relative error of " << relErr << ", which exceeds the "
01631             "given tolerance " << tol << "." << endl;
01632           success = false;
01633         }
01634         maxRelErr = std::max (maxRelErr, relErr);
01635       } // for each column j of A_{dense,sparse}
01636       if (verbose_) {
01637         Teuchos::OSTab tab2 (out_);
01638         out << "Max relative column error: " << maxRelErr << endl;
01639       }
01640 
01641       if (verbose_) {
01642         out << "Comparing U_sparse to U_dense, column by column" << endl;
01643       }
01644       maxRelErr = STM::zero ();
01645       for (ordinal_type j = 0; j < N; ++j) {
01646         MVT::Init (*X, STS::zero ());
01647         X->getValuesNonConst ()[j] = STS::one (); // X := e_j
01648         MVT::Random (*Y_hat);
01649         MVT::Random (*Y);
01650 
01651         // Compute Y_hat := U_dense * X.
01652         MVT::Assign (*Y_hat, (const MV) *X);
01653         blas.TRMM (LEFT_SIDE, UPPER_TRI, NO_TRANS, NON_UNIT_DIAG, N, numVecs,
01654                    STS::one (), U_dense->values (), U_dense->stride (),
01655                    Y_hat->getValuesNonConst ().getRawPtr (),
01656                    as<int> (Y_hat->getStride ()));
01657         // Compute Y := U_sparse * X.
01658         U_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01659                                                                (const MV) *X, *Y);
01660         // Compare Y and Y_hat.
01661         relErr = maxRelativeError (Y_hat, Y, Y_scratch);
01662         if (relErr > tol) {
01663           err << "Columns " << (j+1) << " of U_sparse and U_dense differ by "
01664             "maximum relative error of " << relErr << ", which exceeds the "
01665             "given tolerance " << tol << "." << endl;
01666           success = false;
01667         }
01668         maxRelErr = std::max (maxRelErr, relErr);
01669       } // for each column j of U_{dense,sparse}
01670       if (verbose_) {
01671         Teuchos::OSTab tab2 (out_);
01672         out << "Max relative column error: " << maxRelErr << endl;
01673       }
01674 
01675       if (verbose_) {
01676         out << "Comparing L_sparse to L_dense, column by column" << endl;
01677       }
01678       maxRelErr = STM::zero ();
01679       for (ordinal_type j = 0; j < N; ++j) {
01680         MVT::Init (*X, STS::zero ());
01681         X->getValuesNonConst ()[j] = STS::one (); // X := e_j
01682         MVT::Random (*Y_hat);
01683         MVT::Random (*Y);
01684 
01685         // Compute Y_hat := L_dense * X.
01686         MVT::Assign (*Y_hat, (const MV) *X);
01687         blas.TRMM (LEFT_SIDE, LOWER_TRI, NO_TRANS, UNIT_DIAG, N, numVecs,
01688                    STS::one (), L_dense->values (), L_dense->stride (),
01689                    Y_hat->getValuesNonConst ().getRawPtr (),
01690                    as<int> (Y_hat->getStride ()));
01691         // Compute Y := L_sparse * X.
01692         if (implicitUnitDiagTriMultCorrect) {
01693           L_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01694                                                                  (const MV) *X, *Y);
01695         }
01696         else {
01697           MVT::Assign (*Y, *X);
01698           L_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01699                                                                  (const MV) *X,
01700                                                                  STS::one (), *Y);
01701         }
01702         // Compare Y and Y_hat.
01703         relErr = maxRelativeError (Y_hat, Y, Y_scratch);
01704         if (relErr > tol) {
01705           err << "Columns " << (j+1) << " of L_sparse and L_dense differ by "
01706             "maximum relative error of " << relErr << ", which exceeds the "
01707             "given tolerance " << tol << "." << endl;
01708           success = false;
01709         }
01710         maxRelErr = std::max (maxRelErr, relErr);
01711       } // for each column j of L_{dense,sparse}
01712       if (verbose_) {
01713         Teuchos::OSTab tab2 (out_);
01714         out << "Max relative column error: " << maxRelErr << endl;
01715       }
01716     } // if N <= 50
01717 
01719     // Test sparse triangular matrix-(multi)vector multiply.
01721 
01722     if (verbose_) {
01723       out << "Testing upper triangular sparse mat-vec" << endl;
01724     }
01725     // Compute Y_hat := U_dense * X.  First copy X into Y_hat, since
01726     // TRMM overwrites its input.
01727     MVT::Assign (*Y_hat, (const MV) *X);
01728     blas.TRMM (LEFT_SIDE, UPPER_TRI, NO_TRANS, NON_UNIT_DIAG, N, numVecs,
01729                STS::one (), U_dense->values (), U_dense->stride (),
01730                Y_hat->getValuesNonConst ().getRawPtr (),
01731                as<int> (Y_hat->getStride ()));
01732     // Compute Y := U_sparse * X.  The local sparse operations don't
01733     // overwrite their input.
01734     U_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01735                                                            (const MV) *X, *Y);
01736     // Compare Y and Y_hat.
01737     relErr = maxRelativeError (Y_hat, Y, Y_scratch);
01738     if (verbose_) {
01739       Teuchos::OSTab tab2 (out_);
01740       out << "Relative error: " << relErr << endl;
01741     }
01742     if (relErr > tol) {
01743       err << "Sparse matrix-(multi)vector multiply test failed for upper "
01744         "triangular matrix U." << std::endl << "Maximum relative error "
01745           << relErr << " exceeds the given tolerance " << tol << "."
01746           << std::endl;
01747       success = false;
01748     }
01749 
01751     // Test sparse triangular mat-vec: transpose case.
01753 
01754     // Start over with a fresh random Y.
01755     MVT::Random (*Y);
01756     // Fill X_hat and X with random values, just to make sure the
01757     // operations below aren't fakely reported correct by means of
01758     // leftover values.
01759     MVT::Random (*X_hat);
01760     MVT::Random (*X);
01761 
01762     if (verbose_) {
01763       out << "Testing upper triangular transpose sparse mat-vec" << endl;
01764     }
01765     // Compute X_hat := U_dense^T * Y.
01766     MVT::Assign (*X_hat, (const MV) *Y);
01767     blas.TRMM (LEFT_SIDE, UPPER_TRI, TRANS, NON_UNIT_DIAG, N, numVecs,
01768                STS::one (), U_dense->values (), U_dense->stride (),
01769                X_hat->getValuesNonConst ().getRawPtr (),
01770                as<int> (X_hat->getStride ()));
01771     // Compute X := U_sparse^T * Y.
01772     U_sparse->template multiply<scalar_type, scalar_type> (TRANS, STS::one (),
01773                                                            (const MV) *Y, *X);
01774     // Compare X and X_hat.
01775     relErr = maxRelativeError (X_hat, X, X_scratch);
01776     if (verbose_) {
01777       Teuchos::OSTab tab2 (out_);
01778       out << "Relative error: " << relErr << endl;
01779     }
01780     if (relErr > tol) {
01781       err << "Sparse matrix-(multi)vector multiply test failed for "
01782         "transpose of upper triangular matrix U." << std::endl
01783           << "Maximum relative error " << relErr
01784           << " exceeds the given tolerance " << tol << "." << std::endl;
01785       success = false;
01786     }
01787 
01789     // Test sparse triangular mat-vec: conjugate transpose case.
01791 
01792     if (STS::isComplex) {
01793       // Start over with a fresh random Y.
01794       MVT::Random (*Y);
01795       // Fill X_hat and X with random values, just to make sure the
01796       // operations below aren't fakely reported correct by means of
01797       // leftover values.
01798       MVT::Random (*X_hat);
01799       MVT::Random (*X);
01800 
01801       if (verbose_) {
01802         out << "Testing upper triangular conjugate transpose sparse mat-vec" << endl;
01803       }
01804       // Compute X_hat := U_dense^T * Y.
01805       MVT::Assign (*X_hat, (const MV) *Y);
01806       blas.TRMM (LEFT_SIDE, UPPER_TRI, CONJ_TRANS, NON_UNIT_DIAG, N, numVecs,
01807                  STS::one (), U_dense->values (), U_dense->stride (),
01808                  X_hat->getValuesNonConst ().getRawPtr (),
01809                  as<int> (X_hat->getStride ()));
01810       // Compute Y := U_sparse^H * X.
01811       U_sparse->template multiply<scalar_type, scalar_type> (CONJ_TRANS,
01812                                                              STS::one (),
01813                                                              (const MV) *Y, *X);
01814       // Compare X and X_hat.
01815       relErr = maxRelativeError (X_hat, X, X_scratch);
01816       if (verbose_) {
01817         Teuchos::OSTab tab2 (out_);
01818         out << "Relative error: " << relErr << endl;
01819       }
01820       if (relErr > tol) {
01821         err << "Sparse matrix-(multi)vector multiply test failed for "
01822           "conjugate transpose of upper triangular matrix U." << std::endl
01823             << "Maximum relative error " << relErr
01824             << " exceeds the given tolerance " << tol << "." << std::endl;
01825         success = false;
01826       }
01827     }
01828 
01830     // Test lower triangular, implicit unit diagonal, sparse
01831     // triangular mat-vec.
01833 
01834     // Start over with a fresh random X.
01835     MVT::Random (*X);
01836     // Fill Y_hat and Y with random values, just to make sure the
01837     // operations below aren't fakely reported correct by means of
01838     // leftover values.
01839     MVT::Random (*Y_hat);
01840     MVT::Random (*Y);
01841 
01842     if (verbose_) {
01843       out << "Testing implicit unit diagonal lower triangular "
01844         "sparse mat-vec" << endl;
01845     }
01846     // Compute Y_hat := L_dense * X.
01847     MVT::Assign (*Y_hat, (const MV) *X);
01848     blas.TRMM (LEFT_SIDE, LOWER_TRI, NO_TRANS, UNIT_DIAG, N, numVecs,
01849                STS::one (), L_dense->values (), L_dense->stride (),
01850                Y_hat->getValuesNonConst ().getRawPtr (),
01851                as<int> (Y_hat->getStride ()));
01852     // Compute Y := L_sparse * X.  The local sparse operations
01853     // don't overwrite their input.
01854     if (implicitUnitDiagTriMultCorrect) {
01855       L_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01856                                                              (const MV) *X, *Y);
01857     }
01858     else {
01859       // Compute Y := X + L_sparse * X in order to simulate the effect
01860       // of an implicitly stored unit diagonal.  We do this by first
01861       // assigning X to Y, and then using beta=1, though it might be
01862       // better to test beta=0 and use MVT::GESUM instead.
01863       MVT::Assign (*Y, *X);
01864       L_sparse->template multiply<scalar_type, scalar_type> (NO_TRANS, STS::one (),
01865                                                              (const MV) *X,
01866                                                              STS::one (), *Y);
01867     }
01868     // Compare Y and Y_hat.
01869     relErr = maxRelativeError (Y_hat, Y, Y_scratch);
01870     if (verbose_) {
01871       Teuchos::OSTab tab2 (out_);
01872       out << "Relative error: " << relErr << endl;
01873     }
01874     if (relErr > tol) {
01875       err << "Sparse matrix-(multi)vector multiply test failed for lower "
01876         "triangular matrix L with implicit unit diagonal." << std::endl
01877           << "Maximum relative error " << relErr
01878           << " exceeds the given tolerance " << tol << "." << std::endl;
01879       success = false;
01880     }
01881 
01883     // Test sparse triangular solve.
01885 
01886     // Start over with a fresh random Y (input multivector in this case).
01887     MVT::Random (*Y);
01888     // Fill X_hat and X (output multivectors, in this case) with
01889     // random values, just to make sure the operations below aren't
01890     // fakely reported correct by means of leftover values.
01891     MVT::Random (*X_hat);
01892     MVT::Random (*X);
01893 
01894     if (verbose_) {
01895       out << "Testing implicit unit diagonal lower triangular "
01896         "sparse triangular solve" << endl;
01897     }
01898     // Compute X_hat := L_dense \ Y.
01899     MVT::Assign (*X_hat, (const MV) *Y);
01900     blas.TRSM (LEFT_SIDE, LOWER_TRI, NO_TRANS, UNIT_DIAG, N, numVecs,
01901                STS::one (), L_dense->values (), L_dense->stride (),
01902                X_hat->getValuesNonConst ().getRawPtr (),
01903                as<int> (X_hat->getStride ()));
01904     // Compute X := L_sparse \ Y.
01905     L_sparse->template solve<scalar_type, scalar_type> (NO_TRANS, (const MV) *Y, *X);
01906     // Compare X and X_hat.
01907     relErr = maxRelativeError (X_hat, X, X_scratch);
01908     if (verbose_) {
01909       Teuchos::OSTab tab2 (out_);
01910       out << "Relative error: " << relErr << endl;
01911     }
01912     if (relErr > tol) {
01913       err << "Sparse triangular solve test failed for lower triangular matrix "
01914         "L with implicit unit diagonal." << std::endl
01915           << "Maximum relative error " << relErr
01916           << " exceeds the given tolerance " << tol << "." << std::endl;
01917       success = false;
01918     }
01919 
01920     // Start over with a fresh random Y (input multivector in this case).
01921     MVT::Random (*Y);
01922     // Fill X_hat and X (output multivectors, in this case) with
01923     // random values, just to make sure the operations below aren't
01924     // fakely reported correct by means of leftover values.
01925     MVT::Random (*X_hat);
01926     MVT::Random (*X);
01927 
01928     if (verbose_) {
01929       out << "Testing upper triangular sparse triangular solve" << endl;
01930     }
01931     // Compute X_hat = U_dense \ Y.
01932     MVT::Assign (*X_hat, (const MV) *Y);
01933     blas.TRSM (LEFT_SIDE, UPPER_TRI, NO_TRANS, NON_UNIT_DIAG, N, numVecs,
01934                STS::one (), U_dense->values (), U_dense->stride (),
01935                X_hat->getValuesNonConst ().getRawPtr (),
01936                as<int> (X_hat->getStride ()));
01937     // Compute X = U_sparse \ Y.
01938     U_sparse->template solve<scalar_type, scalar_type> (NO_TRANS,
01939                                                         (const MV) *Y, *X);
01940     // Compare X and X_hat.
01941     relErr = maxRelativeError (X_hat, X, X_scratch);
01942     if (verbose_) {
01943       Teuchos::OSTab tab2 (out_);
01944       out << "Relative error: " << relErr << endl;
01945     }
01946     if (relErr > tol) {
01947       err << "Sparse triangular solve test failed for upper triangular matrix "
01948         "U." << std::endl << "Maximum relative error " << relErr
01949           << " exceeds the given tolerance " << tol << "." << std::endl;
01950       success = false;
01951     }
01952 
01953     TEUCHOS_TEST_FOR_EXCEPTION(! success, std::runtime_error,
01954       "One or more sparse ops tests failed.  Here is the full report:\n"
01955       << err.str());
01956   }
01957 
01958 
01959   void
01960   benchmarkSparseOps (std::vector<std::pair<std::string, double> >& results,
01961                       const std::string& label,
01962                       const Teuchos::RCP<node_type>& node,
01963                       Teuchos::ParameterList& params,
01964                       const ordinal_type numRows,
01965                       const ordinal_type numCols,
01966                       const ordinal_type numVecs,
01967                       const int numTrials) const
01968   {
01969     using Teuchos::arcp;
01970     using Teuchos::Array;
01971     using Teuchos::as;
01972     using Teuchos::RCP;
01973     using Teuchos::rcp;
01974     using Teuchos::LOWER_TRI;
01975     using Teuchos::UPPER_TRI;
01976     using Teuchos::NON_UNIT_DIAG;
01977     using Teuchos::UNIT_DIAG;
01978     typedef Teuchos::ScalarTraits<scalar_type> STS;
01979     typedef Teuchos::ScalarTraits<magnitude_type> STM;
01980     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
01981     typedef Kokkos::DefaultArithmetic<MV> MVT;
01982 
01983     const bool testTriSolve = (numRows == numCols);
01984 
01985     RCP<dense_matrix_type> A_dense, L_dense, U_dense;
01986     RCP<SparseOpsType> A_sparse, L_sparse, U_sparse;
01987     Array<ordinal_type> ipiv;
01988 
01989     if (testTriSolve) {
01990       makeDenseTestProblem (A_dense, L_dense, U_dense, ipiv, numRows);
01991       // Convert L_dense and U_dense into sparse matrices.
01992       L_sparse = denseTriToSparseOps (*L_dense, node, params, LOWER_TRI, UNIT_DIAG);
01993       U_sparse = denseTriToSparseOps (*U_dense, node, params, UPPER_TRI, NON_UNIT_DIAG);
01994     }
01995     else {
01996       A_dense = rcp (new dense_matrix_type (numRows, numCols));
01997     }
01998     // Convert A_dense into a sparse matrix.
01999     A_sparse = denseToSparseOps (*A_dense, node, params);
02000 
02001     // // Compute a random input multivector.
02002     // RCP<MV> X = makeMultiVector (node, numCols, numVecs);
02003     // MVT::Random (*X);
02004     // RCP<MV> Y = makeMultiVector (node, numRows, numVecs);
02005     // MVT::Init (*Y, STS::zero ());
02006 
02007     benchmarkSparseMatVec (results, label, *A_sparse,
02008                            numRows, numCols, numVecs, numTrials);
02009     if (testTriSolve) {
02010       benchmarkSparseTriSolve (results, label, "lower tri, unit diag",
02011                                *L_sparse, numRows, numCols, numVecs, numTrials);
02012       benchmarkSparseTriSolve (results, label, "upper tri, non unit diag",
02013                                *U_sparse, numRows, numCols, numVecs, numTrials);
02014     }
02015   }
02016 
02030   void
02031   benchmarkSparseMatVecFromFile (std::vector<std::pair<std::string, double> >& results,
02032                                  const std::string& filename,
02033                                  const std::string& label,
02034                                  const Teuchos::RCP<node_type>& node,
02035                                  Teuchos::ParameterList& params,
02036                                  const ordinal_type numVecs,
02037                                  const int numTrials) const
02038   {
02039     using Teuchos::RCP;
02040     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
02041     typedef Kokkos::DefaultArithmetic<MV> MVT;
02042 
02043     // SparseOpsType isn't required to tell us how many rows and
02044     // columns the sparse matrix has, so we find out when we read the
02045     // file.  Kokkos ignores uplo and diag for sparse mat-vec, so we
02046     // don't need to provide those arguments to
02047     // makeSparseOpsFromFile() here.
02048     ordinal_type numRows = 0;
02049     ordinal_type numCols = 0;
02050     RCP<SparseOpsType> A_sparse =
02051       makeSparseOpsFromFile (numRows, numCols, node, params, filename);
02052 
02053     // Compute a random input multivector.
02054     RCP<MV> X = makeMultiVector (node, numCols, numVecs);
02055     MVT::Random (*X);
02056     RCP<MV> Y = makeMultiVector (node, numRows, numVecs); // output MV.
02057 
02058     benchmarkSparseMatVec (results, label, *A_sparse,
02059                            numRows, numCols, numVecs, numTrials);
02060     // benchmarkSparseTriSolve (results, label, "lower tri, unit diag",
02061     //                          *L_sparse, numRows, numCols, numVecs, numTrials);
02062     // benchmarkSparseTriSolve (results, label, "upper tri, non unit diag",
02063     //                          *U_sparse, numRows, numCols, numVecs, numTrials);
02064   }
02065 
02066 private:
02091   void
02092   benchmarkSparseMatVec (std::vector<std::pair<std::string, double> >& results,
02093                          const std::string& label,
02094                          const SparseOpsType& ops,
02095                          const ordinal_type numRows,
02096                          const ordinal_type numCols,
02097                          const ordinal_type numVecs,
02098                          const int numTrials) const
02099   {
02100     using Teuchos::RCP;
02101     using Teuchos::Time;
02102     using Teuchos::TimeMonitor;
02103     typedef Teuchos::ScalarTraits<scalar_type> STS;
02104     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
02105     typedef Kokkos::DefaultArithmetic<MV> MVT;
02106 
02107     RCP<MV> X = makeMultiVector (ops.getNode (), numRows, numVecs);
02108     RCP<MV> Y = makeMultiVector (ops.getNode (), numRows, numVecs);
02109     //MVT::Init (*Y, STS::zero()); // makeMultiVector() already does this.
02110     MVT::Random (*X);
02111 
02112     const int numBenchmarks = STS::isComplex ? 9 : 7;
02113     results.reserve (numBenchmarks);
02114 
02115     // Time sparse matrix-vector multiply, overwrite mode, no transpose.
02116     {
02117       const std::string timerName (label + ": Y = A*X");
02118       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02119       if (timer.is_null ()) {
02120         timer = TimeMonitor::getNewCounter (timerName);
02121       }
02122       {
02123         TimeMonitor timeMon (*timer);
02124         for (int i = 0; i < numTrials; ++i) {
02125           ops.template multiply<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02126                                                            STS::one(), *X, *Y);
02127         }
02128       }
02129       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02130     }
02131 
02132     // Time sparse matrix-vector multiply, update mode, no transpose.
02133     {
02134       const std::string timerName (label + ": Y = Y + A*X");
02135       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02136       if (timer.is_null ()) {
02137         timer = TimeMonitor::getNewCounter (timerName);
02138       }
02139       {
02140         TimeMonitor timeMon (*timer);
02141         for (int i = 0; i < numTrials; ++i) {
02142           ops.template multiply<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02143                                                            STS::one(), *X,
02144                                                            STS::one(), *Y);
02145         }
02146       }
02147       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02148     }
02149 
02150     // Time sparse matrix-vector multiply, update mode, no transpose.
02151     // We subtract to simulate a residual computation.
02152     {
02153       const std::string timerName (label + ": Y = Y - A*X");
02154       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02155       if (timer.is_null ()) {
02156         timer = TimeMonitor::getNewCounter (timerName);
02157       }
02158       {
02159         TimeMonitor timeMon (*timer);
02160         for (int i = 0; i < numTrials; ++i) {
02161           ops.template multiply<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02162                                                            -STS::one(), *X,
02163                                                            STS::one(), *Y);
02164         }
02165       }
02166       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02167     }
02168 
02169     // Time sparse matrix-vector multiply, update mode, no transpose.
02170     // We subtract to simulate an alternate form for a residual computation.
02171     {
02172       const std::string timerName (label + ": Y = -Y + A*X");
02173       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02174       if (timer.is_null ()) {
02175         timer = TimeMonitor::getNewCounter (timerName);
02176       }
02177       {
02178         TimeMonitor timeMon (*timer);
02179         for (int i = 0; i < numTrials; ++i) {
02180           ops.template multiply<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02181                                                            STS::one(), *X,
02182                                                            -STS::one(), *Y);
02183         }
02184       }
02185       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02186     }
02187 
02188     // Time sparse matrix-vector multiply with alpha=0 and beta=-1.
02189     {
02190       const std::string timerName (label + ": Y = -Y + 0*A*X");
02191       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02192       if (timer.is_null ()) {
02193         timer = TimeMonitor::getNewCounter (timerName);
02194       }
02195       {
02196         TimeMonitor timeMon (*timer);
02197         for (int i = 0; i < numTrials; ++i) {
02198           ops.template multiply<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02199                                                            STS::zero(), *X,
02200                                                            -STS::one(), *Y);
02201         }
02202       }
02203       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02204     }
02205 
02206     // Time sparse matrix-vector multiply, overwrite mode, transpose.
02207     {
02208       const std::string timerName (label + ": Y = A^T * X");
02209       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02210       if (timer.is_null ()) {
02211         timer = TimeMonitor::getNewCounter (timerName);
02212       }
02213       {
02214         TimeMonitor timeMon (*timer);
02215         for (int i = 0; i < numTrials; ++i) {
02216           ops.template multiply<scalar_type, scalar_type> (Teuchos::TRANS,
02217                                                            STS::one(), *X, *Y);
02218         }
02219       }
02220       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02221     }
02222 
02223     // Time sparse matrix-vector multiply, update mode, transpose.
02224     // We subtract to simulate a residual computation.
02225     {
02226       const std::string timerName (label + ": Y = Y - A^T * X");
02227       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02228       if (timer.is_null ()) {
02229         timer = TimeMonitor::getNewCounter (timerName);
02230       }
02231       {
02232         TimeMonitor timeMon (*timer);
02233         for (int i = 0; i < numTrials; ++i) {
02234           ops.template multiply<scalar_type, scalar_type> (Teuchos::TRANS,
02235                                                            -STS::one(), *X,
02236                                                            STS::one(), *Y);
02237         }
02238       }
02239       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02240     }
02241 
02242     // Only test conjugate transpose if scalar_type is complex.
02243     if (STS::isComplex) {
02244       // Time sparse matrix-vector multiply, overwrite mode, conjugate transpose.
02245       {
02246         const std::string timerName (label + ": Y = A^H * X");
02247         RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02248         if (timer.is_null ()) {
02249           timer = TimeMonitor::getNewCounter (timerName);
02250         }
02251         {
02252           TimeMonitor timeMon (*timer);
02253           for (int i = 0; i < numTrials; ++i) {
02254             ops.template multiply<scalar_type, scalar_type> (Teuchos::CONJ_TRANS,
02255                                                              STS::one(), *X, *Y);
02256           }
02257         }
02258         results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02259       }
02260 
02261       // Time sparse matrix-vector multiply, update mode, conjugate transpose.
02262       // We subtract to simulate a residual computation.
02263       {
02264         const std::string timerName (label + ": Y = Y - A^H * X");
02265         RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02266         if (timer.is_null ()) {
02267           timer = TimeMonitor::getNewCounter (timerName);
02268         }
02269         {
02270           TimeMonitor timeMon (*timer);
02271           for (int i = 0; i < numTrials; ++i) {
02272             ops.template multiply<scalar_type, scalar_type> (Teuchos::CONJ_TRANS,
02273                                                              -STS::one(), *X,
02274                                                              STS::one(), *Y);
02275           }
02276         }
02277         results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02278       }
02279     }
02280   }
02281 
02282 
02283 
02322   void
02323   benchmarkSparseTriSolve (std::vector<std::pair<std::string, double> >& results,
02324                            const std::string& opsLabel,
02325                            const std::string& benchmarkLabel,
02326                            const SparseOpsType& ops,
02327                            const ordinal_type numRows,
02328                            const ordinal_type numCols,
02329                            const ordinal_type numVecs,
02330                            const int numTrials) const
02331   {
02332     using Teuchos::RCP;
02333     using Teuchos::Time;
02334     using Teuchos::TimeMonitor;
02335     typedef Teuchos::ScalarTraits<scalar_type> STS;
02336     typedef Kokkos::MultiVector<scalar_type, node_type> MV;
02337     typedef Kokkos::DefaultArithmetic<MV> MVT;
02338 
02339     RCP<MV> X = makeMultiVector (ops.getNode (), numCols, numVecs);
02340     RCP<MV> Y = makeMultiVector (ops.getNode (), numRows, numVecs);
02341     //MVT::Init (*X, STS::zero()); // makeMultiVector() already does this.
02342     MVT::Random (*Y);
02343 
02344     const int numBenchmarks = STS::isComplex ? 6 : 4;
02345     results.reserve (numBenchmarks);
02346 
02347     // Time sparse triangular solve, no transpose.
02348     {
02349       const std::string timerName (opsLabel + ": Y = A \\ X (" + benchmarkLabel + ")");
02350       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02351       if (timer.is_null ()) {
02352         timer = TimeMonitor::getNewCounter (timerName);
02353       }
02354       {
02355         TimeMonitor timeMon (*timer);
02356         for (int i = 0; i < numTrials; ++i) {
02357           ops.template solve<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02358                                                         *Y, *X);
02359         }
02360       }
02361       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02362     }
02363 
02364     // Time sparse triangular solve, transpose.
02365     {
02366       const std::string timerName (opsLabel + ": Y = A^T \\ X (" + benchmarkLabel + ")");
02367       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02368       if (timer.is_null ()) {
02369         timer = TimeMonitor::getNewCounter (timerName);
02370       }
02371       {
02372         TimeMonitor timeMon (*timer);
02373         for (int i = 0; i < numTrials; ++i) {
02374           ops.template solve<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02375                                                         *Y, *X);
02376         }
02377       }
02378       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02379     }
02380 
02381     // Only test conjugate transpose if scalar_type is complex.
02382     if (STS::isComplex) {
02383       // Time sparse triangular solve, conjugate transpose.
02384       const std::string timerName (opsLabel + ": Y = A^H \\ X (" + benchmarkLabel + ")");
02385       RCP<Time> timer = TimeMonitor::lookupCounter (timerName);
02386       if (timer.is_null ()) {
02387         timer = TimeMonitor::getNewCounter (timerName);
02388       }
02389       {
02390         TimeMonitor timeMon (*timer);
02391         for (int i = 0; i < numTrials; ++i) {
02392           ops.template solve<scalar_type, scalar_type> (Teuchos::NO_TRANS,
02393                                                         *Y, *X);
02394         }
02395       }
02396       results.push_back (std::make_pair (timerName, timer->totalElapsedTime ()));
02397     }
02398   }
02399 };
02400 
02401 #endif // __CrsMatrix_TestSparseOps_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends