Anasazi Version of the Day
Tsqr_SeqTest.cpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #include <Tsqr_Config.hpp>
00030 #include <Tsqr_SeqTest.hpp>
00031 
00032 #include <Tsqr_Random_NormalGenerator.hpp>
00033 #include <Tsqr_nodeTestProblem.hpp>
00034 #include <Tsqr_verifyTimerConcept.hpp>
00035 
00036 #include <Tsqr_Blas.hpp>
00037 #include <Tsqr_Lapack.hpp>
00038 #include <Tsqr_LocalVerify.hpp>
00039 #include <Tsqr_Matrix.hpp>
00040 #include <Tsqr_ScalarTraits.hpp>
00041 #include <Tsqr_SequentialTsqr.hpp>
00042 #include <Tsqr_Util.hpp>
00043 
00044 #include <algorithm>
00045 #include <cstring> // size_t definition
00046 #include <fstream>
00047 #include <iomanip>
00048 #include <iostream>
00049 #include <limits>
00050 #include <sstream>
00051 #include <stdexcept>
00052 #include <vector>
00053 
00056 
00057 namespace TSQR {
00058   namespace Test {
00059 
00063     template< class Ordinal, class Scalar >
00064     static void
00065     verifySeqTsqrTemplate (std::ostream& out,
00066          TSQR::Random::NormalGenerator< Ordinal, Scalar >& generator,
00067          const std::string& datatype,
00068          const std::string& shortDatatype,
00069          const Ordinal nrows, 
00070          const Ordinal ncols, 
00071          const size_t cache_block_size,
00072          const bool contiguous_cache_blocks,
00073          const bool save_matrices,
00074          const bool human_readable,
00075          const bool b_debug)
00076     {
00077       typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00078       using std::cerr;
00079       using std::endl;
00080       using std::pair;
00081       using std::string;
00082 
00083       SequentialTsqr< Ordinal, Scalar > actor (cache_block_size);
00084       Ordinal numCacheBlocks;
00085 
00086       if (b_debug)
00087   {
00088     cerr << "Sequential TSQR test problem:" << endl
00089          << "* " << nrows << " x " << ncols << endl
00090          << "* Cache block of " << actor.cache_block_size() << " bytes" << endl;
00091     if (contiguous_cache_blocks)
00092       cerr << "* Contiguous cache blocks" << endl;
00093   }
00094 
00095       Matrix< Ordinal, Scalar > A (nrows, ncols);
00096       Matrix< Ordinal, Scalar > A_copy (nrows, ncols);
00097       Matrix< Ordinal, Scalar > Q (nrows, ncols);
00098       Matrix< Ordinal, Scalar > R (ncols, ncols);
00099       if (std::numeric_limits< Scalar >::has_quiet_NaN)
00100   {
00101     A.fill (std::numeric_limits< Scalar>::quiet_NaN());
00102     A_copy.fill (std::numeric_limits< Scalar >::quiet_NaN());
00103     Q.fill (std::numeric_limits< Scalar >::quiet_NaN());
00104     R.fill (std::numeric_limits< Scalar >::quiet_NaN());
00105   }
00106       const Ordinal lda = nrows;
00107       const Ordinal ldq = nrows;
00108       const Ordinal ldr = ncols;
00109 
00110       // Create a test problem
00111       nodeTestProblem (generator, nrows, ncols, A.get(), A.lda(), true);
00112 
00113       if (save_matrices)
00114   {
00115     string filename = "A_" + shortDatatype + ".txt";
00116     if (b_debug)
00117       cerr << "-- Saving test problem to \"" << filename << "\"" << endl;
00118     std::ofstream fileOut (filename.c_str());
00119     print_local_matrix (fileOut, nrows, ncols, A.get(), A.lda());
00120     fileOut.close();
00121   }
00122 
00123       if (b_debug)
00124   cerr << "-- Generated test problem" << endl;
00125 
00126       // Copy A into A_copy, since TSQR overwrites the input.  If
00127       // specified, rearrange the data in A_copy so that the data in
00128       // each cache block is contiguously stored.   
00129       if (! contiguous_cache_blocks)
00130   {
00131     A_copy.copy (A);
00132     if (b_debug)
00133       cerr << "-- Copied test problem from A into A_copy" << endl;
00134   }
00135       else
00136   {
00137     actor.cache_block (nrows, ncols, A_copy.get(), A.get(), A.lda());
00138     if (b_debug)
00139       cerr << "-- Reorganized test matrix to have contiguous "
00140         "cache blocks" << endl;
00141 
00142     // Verify cache blocking, when in debug mode.
00143     if (b_debug)
00144       {
00145         Matrix< Ordinal, Scalar > A2 (nrows, ncols);
00146         if (std::numeric_limits< Scalar >::has_quiet_NaN)
00147     A2.fill (std::numeric_limits< Scalar >::quiet_NaN());
00148 
00149         actor.un_cache_block (nrows, ncols, A2.get(), A2.lda(), A_copy.get());
00150         if (A == A2)
00151     {
00152       if (b_debug)
00153         cerr << "-- Cache blocking test succeeded!" << endl;
00154     }
00155         else
00156     throw std::logic_error ("Cache blocking failed");
00157       }
00158   }
00159 
00160       // Fill R with zeros, since the factorization may not overwrite
00161       // the strict lower triangle of R.
00162       R.fill (Scalar(0));
00163 
00164       // Count the number of cache blocks that factor() will use. 
00165       // This is only for diagnostic purposes.
00166       numCacheBlocks = 
00167   actor.factor_num_cache_blocks (nrows, ncols, A_copy.get(), 
00168                A_copy.lda(), contiguous_cache_blocks);
00169       // In debug mode, report how many cache blocks factor() will use.
00170       if (b_debug)
00171   cerr << "-- Number of cache blocks factor() will use: " 
00172        << numCacheBlocks << endl << endl;
00173 
00174       // Factor the matrix and compute the explicit Q factor
00175       typedef typename SequentialTsqr< Ordinal, Scalar >::FactorOutput 
00176   factor_output_type;
00177       factor_output_type factorOutput = 
00178   actor.factor (nrows, ncols, A_copy.get(), A_copy.lda(), 
00179           R.get(), R.lda(), contiguous_cache_blocks);
00180       if (b_debug)
00181   cerr << "-- Finished SequentialTsqr::factor" << endl;
00182 
00183       if (save_matrices)
00184   {
00185     string filename = "R_" + shortDatatype + ".txt";
00186     if (b_debug)
00187       cerr << "-- Saving R factor to \"" << filename << "\"" << endl;
00188     std::ofstream fileOut (filename.c_str());
00189     print_local_matrix (fileOut, ncols, ncols, R.get(), R.lda());
00190     fileOut.close();
00191   }
00192 
00193       actor.explicit_Q (nrows, ncols, A_copy.get(), lda, factorOutput,
00194       ncols, Q.get(), Q.lda(), contiguous_cache_blocks);
00195       if (b_debug)
00196   cerr << "-- Finished SequentialTsqr::explicit_Q" << endl;
00197 
00198       // "Un"-cache-block the output, if contiguous cache blocks were
00199       // used.  This is only necessary because local_verify() doesn't
00200       // currently support contiguous cache blocks.
00201       if (contiguous_cache_blocks)
00202   {
00203     // Use A_copy as temporary storage for un-cache-blocking Q.
00204     actor.un_cache_block (nrows, ncols, A_copy.get(), A_copy.lda(), Q.get());
00205     Q.copy (A_copy);
00206     if (b_debug)
00207       cerr << "-- Un-cache-blocked output Q factor" << endl;
00208   }
00209 
00210       if (save_matrices)
00211   {
00212     string filename = "Q_" + shortDatatype + ".txt";
00213     if (b_debug)
00214       cerr << "-- Saving Q factor to \"" << filename << "\"" << endl;
00215     std::ofstream fileOut (filename.c_str());
00216     print_local_matrix (fileOut, nrows, ncols, Q.get(), Q.lda());
00217     fileOut.close();
00218   }
00219 
00220       // Print out the R factor
00221       if (false && b_debug)
00222   {
00223     cerr << endl << "-- R factor:" << endl;
00224     print_local_matrix (cerr, ncols, ncols, R.get(), R.lda());
00225     cerr << endl;
00226   }
00227 
00228       // Validate the factorization
00229       pair< magnitude_type, magnitude_type > results =
00230   local_verify (nrows, ncols, A.get(), lda, Q.get(), ldq, R.get(), ldr);
00231       if (b_debug)
00232   cerr << "-- Finished local_verify" << endl;
00233 
00234       // Print the results
00235       if (human_readable)
00236   out << "Sequential (cache-blocked) TSQR:" << endl
00237       << "Datatype: " << datatype << endl
00238       << "Relative residual: " << results.first << endl
00239       << "Relative orthogonality: " << results.second 
00240       << endl << endl;
00241       else
00242   out << "SeqTSQR"
00243       << "," << datatype
00244       << "," << nrows
00245       << "," << ncols
00246       << "," << actor.cache_block_size()
00247       << "," << numCacheBlocks
00248       << "," << contiguous_cache_blocks 
00249       << "," << results.first 
00250       << "," << results.second
00251       << endl;
00252     }
00253 
00254 
00255     void
00256     verifySeqTsqr (std::ostream& out,
00257        const int nrows, 
00258        const int ncols, 
00259        const size_t cache_block_size,
00260        const bool test_complex_arithmetic,
00261        const bool save_matrices,
00262        const bool contiguous_cache_blocks,
00263        const bool human_readable,
00264        const bool b_debug)
00265     {
00266       using TSQR::Random::NormalGenerator;
00267       using std::complex;
00268       using std::string;
00269       using std::vector;
00270 
00271       //
00272       // We do tests one after another, using the seed from the
00273       // previous test in the current test, so that the pseudorandom
00274       // streams used by the tests are independent.
00275       //
00276 
00277       // On output: Seed for the next pseudorandom number generator.
00278       vector< int > iseed(4);
00279       string datatype; // name of the current datatype being tested
00280       string shortDatatype; // one-letter version of datatype
00281 
00282       // First test.  The PRNG seeds itself with a default value.
00283       // This will be the same each time, so if you want
00284       // nondeterministic behavior, you should pick the seed values
00285       // yourself.
00286       NormalGenerator< int, float > normgenS;
00287       datatype = "float";
00288       shortDatatype = "S";
00289       verifySeqTsqrTemplate (out, normgenS, datatype, shortDatatype, nrows, ncols, 
00290            cache_block_size, contiguous_cache_blocks, 
00291            save_matrices, human_readable, b_debug);
00292       // Fetch the pseudorandom seed from the previous test.
00293       normgenS.getSeed (iseed);
00294       NormalGenerator< int, double > normgenD (iseed);
00295       // Next test.
00296       datatype = "double";
00297       shortDatatype = "D";
00298       verifySeqTsqrTemplate (out, normgenD, datatype, shortDatatype, nrows, ncols, 
00299            cache_block_size, contiguous_cache_blocks, 
00300            save_matrices, human_readable, b_debug);
00301 
00302       if (test_complex_arithmetic)
00303   {
00304     normgenD.getSeed (iseed);
00305     NormalGenerator< int, complex<float> > normgenC (iseed);
00306     datatype = "complex<float>";
00307     shortDatatype = "C";
00308     verifySeqTsqrTemplate (out, normgenC, datatype, shortDatatype, nrows, ncols, 
00309          cache_block_size, contiguous_cache_blocks, 
00310          save_matrices, human_readable, b_debug);
00311     normgenC.getSeed (iseed);
00312     NormalGenerator< int, complex<double> > normgenZ (iseed);
00313     datatype = "complex<double>";
00314     shortDatatype = "Z";
00315     verifySeqTsqrTemplate (out, normgenZ, datatype, shortDatatype, nrows, ncols, 
00316          cache_block_size, contiguous_cache_blocks, 
00317          save_matrices, human_readable, b_debug);
00318   }
00319     }
00320 
00321 
00322 
00323     template< class Ordinal, class Scalar >
00324     static void
00325     verifyLapackTemplate (TSQR::Random::NormalGenerator< Ordinal, Scalar >& generator,
00326         const std::string& datatype,
00327         const Ordinal nrows, 
00328         const Ordinal ncols, 
00329         const bool human_readable,
00330         const bool b_debug)
00331     {
00332       typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00333       using std::ostringstream;
00334       using std::cerr;
00335       using std::cout;
00336       using std::endl;
00337 
00338       // Initialize LAPACK.
00339       LAPACK< Ordinal, Scalar > lapack;
00340 
00341       if (b_debug)
00342   cerr << "LAPACK test problem:" << endl
00343        << "* " << nrows << " x " << ncols << endl;
00344 
00345       Matrix< Ordinal, Scalar > A (nrows, ncols);
00346       Matrix< Ordinal, Scalar > A_copy (nrows, ncols);
00347       Matrix< Ordinal, Scalar > Q (nrows, ncols);
00348       Matrix< Ordinal, Scalar > R (ncols, ncols);
00349       if (std::numeric_limits< Scalar >::has_quiet_NaN)
00350   {
00351     A.fill (std::numeric_limits< Scalar>::quiet_NaN());
00352     A_copy.fill (std::numeric_limits< Scalar >::quiet_NaN());
00353     Q.fill (std::numeric_limits< Scalar >::quiet_NaN());
00354     R.fill (std::numeric_limits< Scalar >::quiet_NaN());
00355   }
00356       const Ordinal lda = nrows;
00357       const Ordinal ldq = nrows;
00358       const Ordinal ldr = ncols;
00359 
00360       // Create a test problem
00361       nodeTestProblem (generator, nrows, ncols, A.get(), A.lda(), true);
00362 
00363       if (b_debug)
00364   cerr << "-- Generated test problem" << endl;
00365 
00366       // Copy A into A_copy, since LAPACK QR overwrites the input.
00367       A_copy.copy (A);
00368       if (b_debug)
00369   cerr << "-- Copied test problem from A into A_copy" << endl;
00370 
00371       // Now determine the required workspace for the factorization.
00372       const Ordinal lwork = lworkQueryLapackQr (lapack, nrows, ncols, A_copy.lda());
00373       std::vector< Scalar > work (lwork);
00374       std::vector< Scalar > tau (ncols);
00375 
00376       // Fill R with zeros, since the factorization may not overwrite
00377       // the strict lower triangle of R.
00378       R.fill (Scalar(0));
00379 
00380       // Compute the QR factorization
00381       int info = 0; // INFO is always an int
00382       lapack.GEQRF (nrows, ncols, A_copy.get(), A_copy.lda(), 
00383         &tau[0], &work[0], lwork, &info);
00384       if (info != 0)
00385   {
00386     ostringstream os;
00387     os << "LAPACK QR factorization (_GEQRF) failed: INFO = " << info;
00388     throw std::runtime_error (os.str());
00389   }
00390 
00391       // Copy out the R factor from A_copy (where we computed the QR
00392       // factorization in place) into R.
00393       copy_upper_triangle (ncols, ncols, R.get(), ldr, A_copy.get(), lda);
00394 
00395       if (b_debug)
00396   {
00397     cerr << endl << "-- R factor:" << endl;
00398     print_local_matrix (cerr, ncols, ncols, R.get(), R.lda());
00399     cerr << endl;
00400   }
00401 
00402       // The explicit Q factor will be computed in place, so copy the
00403       // result of the factorization into Q.
00404       Q.copy (A_copy);
00405 
00406       // Compute the explicit Q factor
00407       lapack.ORGQR (nrows, ncols, ncols, Q.get(), ldq, &tau[0], &work[0], lwork, &info);
00408       if (info != 0)
00409   {
00410     ostringstream os;
00411     os << "LAPACK explicit Q computation (_ORGQR) failed: INFO = " << info;
00412     throw std::runtime_error (os.str());
00413   }
00414   
00415       // Validate the factorization
00416       std::pair< magnitude_type, magnitude_type > results = 
00417   local_verify (nrows, ncols, A.get(), lda, Q.get(), ldq, R.get(), ldr);
00418 
00419       // Print the results
00420       if (human_readable)
00421   cout << "LAPACK QR (DGEQRF and DORGQR):" << endl
00422        << "Datatype: " << datatype << endl
00423        << "Relative residual: " << results.first << endl
00424        << "Relative orthogonality: " << results.second << endl;
00425       else
00426   cout << "LAPACK"
00427        << "," << datatype
00428        << "," << nrows
00429        << "," << ncols
00430        << "," << size_t(0) // cache_block_size
00431        << "," << false     // contiguous_cache_blocks 
00432        << "," << results.first 
00433        << "," << results.second
00434        << endl;
00435     }
00436 
00437 
00438     void
00439     verifyLapack (const int nrows, 
00440       const int ncols, 
00441       const bool test_complex_arithmetic,
00442       const bool human_readable,
00443       const bool b_debug)
00444     {
00445       using TSQR::Random::NormalGenerator;
00446       using std::complex;
00447       using std::string;
00448       using std::vector;
00449 
00450       //
00451       // We do tests one after another, using the seed from the
00452       // previous test in the current test, so that the pseudorandom
00453       // streams used by the tests are independent.
00454       //
00455 
00456       // On output: Seed for the next pseudorandom number generator.
00457       vector< int > iseed(4);
00458       string datatype; // name of the current datatype being tested
00459 
00460       // First test.  The PRNG seeds itself with a default value.
00461       // This will be the same each time, so if you want
00462       // nondeterministic behavior, you should pick the seed values
00463       // yourself.
00464       NormalGenerator< int, float > normgenS;
00465       datatype = "float";
00466       verifyLapackTemplate (normgenS, datatype, nrows, ncols, 
00467           human_readable, b_debug);
00468       // Fetch the pseudorandom seed from the previous test.
00469       normgenS.getSeed (iseed);
00470       NormalGenerator< int, double > normgenD (iseed);
00471       // Next test.
00472       datatype = "double";
00473       verifyLapackTemplate (normgenD, datatype, nrows, ncols, 
00474           human_readable, b_debug);
00475 
00476       if (test_complex_arithmetic)
00477   {
00478     normgenD.getSeed (iseed);
00479     NormalGenerator< int, complex<float> > normgenC (iseed);
00480     datatype = "complex<float>";
00481     verifyLapackTemplate (normgenC, datatype, nrows, ncols, 
00482         human_readable, b_debug);
00483     normgenC.getSeed (iseed);
00484     NormalGenerator< int, complex<double> > normgenZ (iseed);
00485     datatype = "complex<double>";
00486     verifyLapackTemplate (normgenZ, datatype, nrows, ncols, 
00487         human_readable, b_debug);
00488   }
00489     }
00490 
00491 
00492   } // namespace Test
00493 } // namespace TSQR
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends