Anasazi Version of the Day
Tsqr_SeqTest.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_Test_SeqTest_hpp
00030 #define __TSQR_Test_SeqTest_hpp
00031 
00032 #include <Tsqr_Config.hpp>
00033 #include <Tsqr_Random_NormalGenerator.hpp>
00034 #include <Tsqr_nodeTestProblem.hpp>
00035 #include <Tsqr_verifyTimerConcept.hpp>
00036 
00037 #include <Tsqr_Blas.hpp>
00038 #include <Tsqr_Lapack.hpp>
00039 #include <Tsqr_LocalVerify.hpp>
00040 #include <Tsqr_Matrix.hpp>
00041 #include <Tsqr_ScalarTraits.hpp>
00042 #include <Tsqr_SequentialTsqr.hpp>
00043 #include <Tsqr_Util.hpp>
00044 
00045 #include <algorithm>
00046 #include <cstring> // size_t definition
00047 #include <iomanip>
00048 #include <iostream>
00049 #include <limits>
00050 #include <sstream>
00051 #include <stdexcept>
00052 #include <vector>
00053 
00056 
00057 namespace TSQR {
00058   namespace Test {
00059 
00060     template< class Ordinal, class Scalar >
00061     static Ordinal
00062     lworkQueryLapackQr (LAPACK< Ordinal, Scalar >& lapack,
00063       const Ordinal nrows,
00064       const Ordinal ncols,
00065       const Ordinal lda)
00066     {
00067       typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00068       using std::ostringstream;
00069       using std::endl;
00070 
00071       Scalar d_lwork_geqrf = Scalar (0);
00072       int INFO = 0;
00073       lapack.GEQRF (nrows, ncols, NULL, lda, NULL, &d_lwork_geqrf, -1, &INFO);
00074       if (INFO != 0)
00075   {
00076     ostringstream os;
00077     os << "LAPACK _GEQRF workspace size query failed: INFO = " << INFO;
00078     // It's a logic error and not a runtime error, because the
00079     // LWORK query should only fail if the input parameters have
00080     // invalid (e.g., out of range) values.
00081     throw std::logic_error (os.str());
00082   }
00083 
00084       Scalar d_lwork_orgqr = Scalar (0);
00085       // A workspace query appropriate for computing the explicit Q
00086       // factor (nrows x ncols) in place, from the QR factorization of
00087       // an nrows x ncols matrix with leading dimension lda.
00088       lapack.ORGQR (nrows, ncols, ncols, NULL, lda, NULL, &d_lwork_orgqr, -1, &INFO);
00089       if (INFO != 0)
00090   {
00091     ostringstream os;
00092     os << "LAPACK _ORGQR workspace size query failed: INFO = " << INFO;
00093     // It's a logic error and not a runtime error, because the
00094     // LWORK query should only fail if the input parameters have
00095     // invalid (e.g., out of range) values.
00096     throw std::logic_error (os.str());
00097   }
00098 
00099       // LAPACK workspace queries do return their results as a
00100       // double-precision floating-point value, but LAPACK promises
00101       // that that value will fit in an int.  Thus, we don't need to
00102       // check for valid casts to int below.  I include the checks
00103       // just to be "bulletproof" and also to show how to do the
00104       // checks for later reference.
00105       const magnitude_type lwork_geqrf_test = 
00106   static_cast< magnitude_type > (static_cast< Ordinal > (ScalarTraits< Scalar >::abs (d_lwork_geqrf)));
00107       if (lwork_geqrf_test != ScalarTraits< Scalar >::abs (d_lwork_geqrf))
00108   {
00109     ostringstream os;
00110     os << "LAPACK _GEQRF workspace query returned a result, " 
00111        << d_lwork_geqrf << ", bigger than the max Ordinal value, " 
00112        << std::numeric_limits< Ordinal >::max();
00113     throw std::range_error (os.str());
00114   }
00115       const Scalar lwork_orgqr_test = 
00116   static_cast< magnitude_type > (static_cast< Ordinal > (ScalarTraits< Scalar >::abs ((d_lwork_orgqr))));
00117       if (lwork_orgqr_test != ScalarTraits< Scalar >::abs (d_lwork_orgqr))
00118   {
00119     ostringstream os;
00120     os << "LAPACK _ORGQR workspace query returned a result, " 
00121        << d_lwork_orgqr << ", bigger than the max Ordinal value, " 
00122        << std::numeric_limits< Ordinal >::max();
00123     throw std::range_error (os.str());
00124   }
00125       return std::max (static_cast< Ordinal > (ScalarTraits< Scalar >::abs (d_lwork_geqrf)),
00126            static_cast< Ordinal > (ScalarTraits< Scalar >::abs (d_lwork_orgqr)));
00127     }
00128 
00129 
00133     void
00134     verifySeqTsqr (std::ostream& out,
00135        const int nrows, 
00136        const int ncols, 
00137        const size_t cache_block_size,
00138        const bool test_complex_arithmetic,
00139        const bool save_matrices,
00140        const bool contiguous_cache_blocks,
00141        const bool human_readable,
00142        const bool b_debug = false);
00143 
00144 
00147     void
00148     verifyLapack (const int nrows, 
00149       const int ncols, 
00150       const bool test_complex_arithmetic,
00151       const bool human_readable,
00152       const bool b_debug = false);
00153         
00154     
00160     template< class Ordinal, class Scalar, class TimerType >
00161     class SeqTsqrBenchmarker {
00162     public:
00163       typedef Ordinal ordinal_type;
00164       typedef Scalar scalar_type;
00165 
00170       SeqTsqrBenchmarker (const std::string& scalarTypeName,
00171         std::ostream& out = std::cout,
00172         const bool humanReadable = false) : 
00173   scalarTypeName_ (scalarTypeName),
00174   out_ (out), 
00175   humanReadable_ (humanReadable)
00176       {
00177   TSQR::Test::verifyTimerConcept< TimerType >();
00178       }
00179 
00180       void 
00181       benchmark (const int numTrials,
00182      const Ordinal numRows,
00183      const Ordinal numCols,
00184      const size_t cacheBlockSize,
00185      const bool contiguousCacheBlocks)
00186       {
00187   SequentialTsqr< Ordinal, Scalar > actor (cacheBlockSize);
00188 
00189   Matrix< Ordinal, Scalar > A (numRows, numCols);
00190   Matrix< Ordinal, Scalar > A_copy (numRows, numCols);
00191   Matrix< Ordinal, Scalar > Q (numRows, numCols);
00192   Matrix< Ordinal, Scalar > R (numCols, numCols);
00193   const Ordinal lda = numRows;
00194   const Ordinal ldq = numRows;
00195 
00196   // Create a test problem
00197   nodeTestProblem (gen_, numRows, numCols, A.get(), lda, false);
00198 
00199   // Copy A into A_copy, since TSQR overwrites the input
00200   A_copy.copy (A);
00201 
00202   // Benchmark sequential TSQR for numTrials trials.
00203   //
00204   // Name of timer doesn't matter here; we only need the timing.
00205   TimerType timer("SeqTSQR");
00206   timer.start();
00207   for (int trialNum = 0; trialNum < numTrials; ++trialNum)
00208     {
00209       // Factor the matrix and extract the resulting R factor
00210       typedef typename SequentialTsqr< Ordinal, Scalar >::FactorOutput 
00211         factor_output_type;
00212       factor_output_type factorOutput = 
00213         actor.factor (numRows, numCols, A_copy.get(), lda, 
00214           R.get(), R.lda(), contiguousCacheBlocks);
00215       // Compute the explicit Q factor.  Unlike with LAPACK QR,
00216       // this doesn't happen in place: the implicit Q factor is
00217       // stored in A_copy, and the explicit Q factor is written to
00218       // Q.
00219       actor.explicit_Q (numRows, numCols, A_copy.get(), lda, factorOutput, 
00220             numCols, Q.get(), ldq, contiguousCacheBlocks);
00221     }
00222   const double seqTsqrTiming = timer.stop();
00223   reportResults (numTrials, numRows, numCols, actor.cache_block_size(),
00224            contiguousCacheBlocks, seqTsqrTiming);
00225       }
00226 
00227 
00228     private:
00231       TSQR::Random::NormalGenerator< ordinal_type, scalar_type > gen_;
00232       
00235       std::string scalarTypeName_;
00236 
00239       std::ostream& out_;
00240 
00243       bool humanReadable_;
00244 
00247       void 
00248       reportResults (const int numTrials,
00249          const Ordinal numRows,
00250          const Ordinal numCols,
00251          const size_t actualCacheBlockSize,
00252          const bool contiguousCacheBlocks,
00253          const double seqTsqrTiming)
00254       {
00255   using std::endl;
00256   if (humanReadable_)
00257     out_ << "Sequential (cache-blocked) TSQR:" << endl
00258          << "Scalar type = " << scalarTypeName_ << endl
00259          << "# rows = " << numRows << endl
00260          << "# columns = " << numCols << endl
00261          << "cache block # bytes = " << actualCacheBlockSize << endl
00262          << "contiguous cache blocks? " << contiguousCacheBlocks << endl
00263          << "# trials = " << numTrials << endl
00264          << "Total time (s) = " << seqTsqrTiming << endl 
00265          << endl;
00266   else
00267     out_ << "SeqTSQR" 
00268          << "," << scalarTypeName_
00269          << "," << numRows
00270          << "," << numCols
00271          << "," << actualCacheBlockSize
00272          << "," << contiguousCacheBlocks
00273          << "," << numTrials << "," 
00274          << seqTsqrTiming << endl;
00275       }
00276     };
00277 
00278 
00287     template< class TimerType >
00288     void
00289     benchmarkSeqTsqr (std::ostream& out,
00290           const int numRows,
00291           const int numCols,
00292           const int numTrials,
00293           const size_t cacheBlockSize,
00294           const bool contiguousCacheBlocks,
00295           const bool testComplex,
00296           const bool humanReadable)
00297     {
00298       typedef TimerType timer_type;
00299       const bool testReal = true;
00300       using std::string;
00301 
00302       if (testReal)
00303   {
00304     { // Scalar=float
00305       typedef SeqTsqrBenchmarker< int, float, timer_type > benchmark_type;
00306       string scalarTypeName ("float");
00307       benchmark_type widget (scalarTypeName, out, humanReadable);
00308       widget.benchmark (numTrials, numRows, numCols, cacheBlockSize, 
00309             contiguousCacheBlocks);
00310     }
00311     { // Scalar=double
00312       typedef SeqTsqrBenchmarker< int, double, timer_type > benchmark_type;
00313       string scalarTypeName ("double");
00314       benchmark_type widget (scalarTypeName, out, humanReadable);
00315       widget.benchmark (numTrials, numRows, numCols, cacheBlockSize, 
00316             contiguousCacheBlocks);
00317     }
00318   }
00319 
00320       if (testComplex)
00321   {
00322 #ifdef HAVE_TSQR_COMPLEX
00323     using std::complex;
00324     { // Scalar=complex<float>
00325       typedef SeqTsqrBenchmarker< int, complex<float>, timer_type > benchmark_type;
00326       string scalarTypeName ("complex<float>");
00327       benchmark_type widget (scalarTypeName, out, humanReadable);
00328       widget.benchmark (numTrials, numRows, numCols, cacheBlockSize, 
00329             contiguousCacheBlocks);
00330     }
00331     { // Scalar=complex<double>
00332       typedef SeqTsqrBenchmarker< int, complex<double>, timer_type > benchmark_type;
00333       string scalarTypeName ("complex<double>");
00334       benchmark_type widget (scalarTypeName, out, humanReadable);
00335       widget.benchmark (numTrials, numRows, numCols, cacheBlockSize, 
00336             contiguousCacheBlocks);
00337     }
00338 #else // Don't HAVE_TSQR_COMPLEX
00339     throw std::logic_error("TSQR not built with complex arithmetic support");
00340 #endif // HAVE_TSQR_COMPLEX
00341   }
00342     }
00343 
00344 
00352     template< class Ordinal, class Scalar, class Generator, class TimerType >
00353     void
00354     benchmarkLapack (Generator& generator,
00355          const int ntrials,
00356          const Ordinal nrows, 
00357          const Ordinal ncols, 
00358          const bool human_readable)
00359     {
00360       using std::ostringstream;
00361       using std::cerr;
00362       using std::cout;
00363       using std::endl;
00364 
00365       TSQR::Test::verifyTimerConcept< TimerType >();
00366 
00367       LAPACK< Ordinal, Scalar > lapack;
00368       Matrix< Ordinal, Scalar > A (nrows, ncols);
00369       Matrix< Ordinal, Scalar > Q (nrows, ncols);
00370       Matrix< Ordinal, Scalar > R (ncols, ncols);
00371       const Ordinal lda = nrows;
00372       const Ordinal ldq = nrows;
00373       const Ordinal ldr = ncols;
00374 
00375       // Create a test problem
00376       nodeTestProblem (generator, nrows, ncols, A.get(), lda, false);
00377 
00378       // Copy A into Q, since LAPACK QR overwrites the input.  We only
00379       // need Q because LAPACK's computation of the explicit Q factor
00380       // occurs in place.  This doesn't work with TSQR.  To give
00381       // LAPACK QR the fullest possible advantage over TSQR, we don't
00382       // allocate an A_copy here (as we would when benchmarking TSQR).
00383       Q.copy (A);
00384 
00385       // Determine the required workspace for the factorization
00386       const Ordinal lwork = lworkQueryLapackQr (lapack, nrows, ncols, lda);
00387       std::vector< Scalar > work (lwork);
00388       std::vector< Scalar > tau (ncols);
00389 
00390       // Benchmark LAPACK's QR factorization for ntrials trials
00391       TimerType timer("LapackQR");
00392       timer.start();
00393       for (int trial_num = 0; trial_num < ntrials; ++trial_num)
00394   {
00395     // Compute the QR factorization
00396     int info = 0; // INFO is always an int
00397     lapack.GEQRF (nrows, ncols, Q.get(), ldq, &tau[0], &work[0], lwork, &info);
00398     if (info != 0)
00399       {
00400         ostringstream os;
00401         os << "LAPACK QR factorization (_GEQRF) failed: INFO = " << info;
00402         throw std::runtime_error (os.str());
00403       }
00404 
00405     // Extract the upper triangular factor R from Q (where it
00406     // was computed in place by GEQRF), since ORGQR will
00407     // overwrite all of Q with the explicit Q factor.
00408     copy_upper_triangle (nrows, ncols, R.get(), ldr, Q.get(), ldq);
00409 
00410     // Compute the explicit Q factor
00411     lapack.ORGQR (nrows, ncols, ncols, Q.get(), ldq,
00412       &tau[0], &work[0], lwork, &info);
00413     if (info != 0)
00414       {
00415         ostringstream os;
00416         os << "LAPACK explicit Q computation (_ORGQR) failed: INFO = " << info;
00417         throw std::runtime_error (os.str());
00418       }
00419   }
00420       const double lapack_timing = timer.stop();
00421 
00422       // Print the results  
00423       if (human_readable)
00424   cout << "LAPACK\'s QR factorization (DGEQRF + DORGQR):" << endl
00425        << "nrows = " << nrows << endl
00426        << "ncols = " << ncols << endl
00427        << "ntrials = " << ntrials << endl
00428        << "Total time (s) = " << lapack_timing << endl << endl;
00429       else
00430   // "0" refers to the cache block size, which is not applicable
00431   // in this case.
00432   cout << "LAPACK" 
00433        << "," << nrows 
00434        << "," << ncols
00435        << "," << 0 
00436        << "," << ntrials 
00437        << "," << lapack_timing
00438        << endl;
00439     }
00440 
00441   } // namespace Test
00442 } // namespace TSQR
00443 
00444 #endif // __TSQR_Test_SeqTest_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends