Anasazi Version of the Day
Tsqr_ParTest.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_Test_DistTest_hpp
00030 #define __TSQR_Test_DistTest_hpp
00031 
00032 #include <Tsqr_Config.hpp>
00033 #include <Tsqr_Random_NormalGenerator.hpp>
00034 #include <Tsqr_verifyTimerConcept.hpp>
00035 
00036 #include <Tsqr_generateStack.hpp>
00037 #include <Tsqr_DistTsqr.hpp>
00038 #include <Tsqr_GlobalVerify.hpp>
00039 #include <Tsqr_printGlobalMatrix.hpp>
00040 
00041 #include <algorithm>
00042 #include <iomanip>
00043 #include <iostream>
00044 #include <vector>
00045 
00048 
00049 namespace TSQR {
00050   namespace Test {
00051 
00055     template< class Ordinal, class Scalar >
00056     class DistTsqrVerifier {
00057       TSQR::Random::NormalGenerator< Ordinal, Scalar > gen_;
00058       Teuchos::RCP< MessengerBase< Ordinal > > const ordinalComm_;
00059       Teuchos::RCP< MessengerBase< Scalar > > const scalarComm_;
00060       std::string scalarTypeName_;
00061       std::ostream& out_;
00062       std::ostream& err_;
00063       const bool testFactorExplicit_, testFactorImplicit_;
00064       const bool humanReadable_, printMatrices_, debug_;
00065 
00066     public:
00067       typedef Ordinal ordinal_type;
00068       typedef Scalar scalar_type;
00069       typedef typename ScalarTraits< scalar_type >::magnitude_type magnitude_type;
00070       typedef typename std::pair< magnitude_type, magnitude_type > result_type;
00071       typedef Matrix< ordinal_type, scalar_type > matrix_type;
00072       
00093       DistTsqrVerifier (const Teuchos::RCP< MessengerBase< Ordinal > >& ordinalComm,
00094       const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm,
00095       const std::vector<int>& seed,
00096       const std::string& scalarTypeName,
00097       std::ostream& out,
00098       std::ostream& err,
00099       const bool testFactorExplicit,
00100       const bool testFactorImplicit,
00101       const bool humanReadable,
00102       const bool printMatrices,
00103       const bool debug) :
00104   gen_ (seed), 
00105   ordinalComm_ (ordinalComm),
00106   scalarComm_ (scalarComm),
00107   scalarTypeName_ (scalarTypeName), 
00108   out_ (out), 
00109   err_ (err),
00110   testFactorExplicit_ (testFactorExplicit),
00111   testFactorImplicit_ (testFactorImplicit),
00112   humanReadable_ (humanReadable), 
00113   printMatrices_ (printMatrices),
00114   debug_ (debug)
00115       {}
00116 
00138       DistTsqrVerifier (const Teuchos::RCP< MessengerBase< Ordinal > >& ordinalComm,
00139       const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm,
00140       const std::string& scalarTypeName,
00141       std::ostream& out,
00142       std::ostream& err,
00143       const bool testFactorExplicit,
00144       const bool testFactorImplicit,
00145       const bool humanReadable,
00146       const bool printMatrices,
00147       const bool debug) :
00148   ordinalComm_ (ordinalComm),
00149   scalarComm_ (scalarComm),
00150   scalarTypeName_ (scalarTypeName), 
00151   out_ (out), 
00152   err_ (err),
00153   testFactorExplicit_ (testFactorExplicit),
00154   testFactorImplicit_ (testFactorImplicit),
00155   humanReadable_ (humanReadable), 
00156   printMatrices_ (printMatrices),
00157   debug_ (debug)
00158       {}
00159 
00166       void 
00167       getSeed (std::vector<int>& seed) const
00168       {
00169   gen_.getSeed (seed);
00170       }
00171 
00176       void 
00177       verify (const Ordinal numCols)
00178       {
00179   using std::endl;
00180 
00181   const int myRank = scalarComm_->rank();
00182   if (debug_)
00183     {
00184       scalarComm_->barrier();
00185       if (myRank == 0)
00186         err_ << "Verifying DistTsqr:" << endl;
00187       scalarComm_->barrier();
00188     }
00189 
00190   // Generate test problem.
00191   Matrix< Ordinal, Scalar > A_local, Q_local, R;
00192   testProblem (A_local, Q_local, R, numCols);
00193   if (debug_)
00194     {
00195       scalarComm_->barrier();
00196       if (myRank == 0)
00197         err_ << "-- Generated test problem." << endl;
00198       scalarComm_->barrier();
00199     }
00200 
00201   // Set up TSQR implementation.
00202   DistTsqr< Ordinal, Scalar > par (scalarComm_);
00203   if (debug_)
00204     {
00205       scalarComm_->barrier();
00206       if (myRank == 0)
00207         err_ << "-- DistTsqr object initialized" << endl << endl;
00208     }
00209 
00210   // Whether we've printed column headers yet (only matters for
00211   // non-humanReadable output).
00212   bool printedHeaders = false;
00213 
00214   // Test DistTsqr::factor() and DistTsqr::explicit_Q().
00215   if (testFactorImplicit_)
00216     {
00217       // Factor the matrix A (copied into R, which will be
00218       // overwritten on output)
00219       typedef typename DistTsqr< Ordinal, Scalar >::FactorOutput 
00220         factor_output_type;
00221       factor_output_type factorOutput = par.factor (R.view());
00222       if (debug_)
00223         {
00224     scalarComm_->barrier();
00225     if (myRank == 0)
00226       err_ << "-- Finished DistTsqr::factor" << endl;
00227         }
00228       // Compute the explicit Q factor
00229       par.explicit_Q (numCols, Q_local.get(), Q_local.lda(), factorOutput);
00230       if (debug_)
00231         {
00232     scalarComm_->barrier();
00233     if (myRank == 0)
00234       err_ << "-- Finished DistTsqr::explicit_Q" << endl;
00235         }
00236       // Verify the factorization
00237       result_type result = 
00238         global_verify (numCols, numCols, A_local.get(), A_local.lda(),
00239            Q_local.get(), Q_local.lda(), R.get(), R.lda(), 
00240            scalarComm_.get());
00241       if (debug_)
00242         {
00243     scalarComm_->barrier();
00244     if (myRank == 0)
00245       err_ << "-- Finished global_verify" << endl;
00246         }
00247       reportResults ("DistTsqr", numCols, result, (! printedHeaders));
00248       if (! printedHeaders)
00249         printedHeaders = true;
00250     }
00251 
00252   // Test DistTsqr::factorExplicit()
00253   if (testFactorExplicit_)
00254     {
00255       // Factor the matrix and compute the explicit Q factor, both
00256       // in a single operation.
00257       par.factorExplicit (R.view(), Q_local.view());
00258       if (debug_)
00259         {
00260     scalarComm_->barrier();
00261     if (myRank == 0)
00262       err_ << "-- Finished DistTsqr::factorExplicit" << endl;
00263         }
00264 
00265       if (printMatrices_)
00266         {
00267     if (myRank == 0)
00268       err_ << std::endl << "Computed Q factor:" << std::endl;
00269     printGlobalMatrix (err_, Q_local, scalarComm_.get(), ordinalComm_.get());
00270     if (myRank == 0)
00271       {
00272         err_ << std::endl << "Computed R factor:" << std::endl;
00273         print_local_matrix (err_, R.nrows(), R.ncols(), R.get(), R.lda());
00274         err_ << std::endl;
00275       }
00276         }
00277 
00278       // Verify the factorization
00279       result_type result = 
00280         global_verify (numCols, numCols, A_local.get(), A_local.lda(),
00281            Q_local.get(), Q_local.lda(), R.get(), R.lda(), 
00282            scalarComm_.get());
00283       if (debug_)
00284         {
00285     scalarComm_->barrier();
00286     if (myRank == 0)
00287       err_ << "-- Finished global_verify" << endl;
00288         }
00289       reportResults ("DistTsqrRB", numCols, result, (! printedHeaders));
00290       if (! printedHeaders)
00291         printedHeaders = true;
00292     }
00293       }
00294 
00295     private:
00302       void 
00303       reportResults (const std::string& method, 
00304          const Ordinal numCols, 
00305          const result_type& result,
00306          const bool printHeaders)
00307       {
00308   using std::endl;
00309 
00310   const int numProcs = scalarComm_->size();
00311   const int myRank = scalarComm_->rank();
00312 
00313   if (myRank == 0)
00314     {
00315       if (humanReadable_)
00316         {
00317     out_ << method << " accuracy results:" << endl
00318          << "Scalar type = " << scalarTypeName_ << endl
00319          << "Number of columns = " << numCols << endl
00320          << "Number of (MPI) processes = " << numProcs << endl
00321          << "Relative residual $\\|A - Q*R\\|_2 / \\|A\\|_2$ = " 
00322          << result.first << endl
00323          << "Relative orthogonality $\\|I - Q^T*Q\\|_2$ = " 
00324          << result.second << endl;
00325         }
00326       else
00327         {
00328     // Use scientific notation for floating-point numbers
00329     out_ << std::scientific;
00330 
00331     if (printHeaders)
00332       out_ << "%method,scalarType,numCols,numProcs,relResid,relOrthog" << endl;
00333 
00334     out_ << method
00335          << "," << scalarTypeName_
00336          << "," << numCols
00337          << "," << numProcs
00338          << "," << result.first
00339          << "," << result.second
00340          << endl;
00341         }
00342     }
00343       }
00344 
00345       void 
00346       testProblem (Matrix< Ordinal, Scalar >& A_local,
00347        Matrix< Ordinal, Scalar >& Q_local,
00348        Matrix< Ordinal, Scalar >& R,
00349        const Ordinal numCols)
00350       {
00351   const Ordinal numRowsLocal = numCols;
00352 
00353   // A_local: Space for the matrix A to factor -- local to each
00354   //   processor.
00355   //
00356   // A_global: Global matrix (only nonempty on Proc 0); only
00357   //   used temporarily.
00358   Matrix< Ordinal, Scalar > A_global;
00359 
00360   // This modifies A_local on all procs, and A_global on Proc 0.
00361   par_tsqr_test_problem (gen_, A_local, A_global, numCols, scalarComm_);
00362   
00363   if (printMatrices_)
00364     {
00365       const int myRank = scalarComm_->rank();
00366 
00367       if (myRank == 0)
00368         err_ << "Input matrix A:" << std::endl;
00369       printGlobalMatrix (err_, A_local, scalarComm_.get(), ordinalComm_.get());
00370       if (myRank == 0)
00371         err_ << std::endl;
00372     }
00373 
00374   // Copy the test problem input into R, since the factorization
00375   // will overwrite it in place with the final R factor.
00376   R.reshape (numCols, numCols);
00377   R.fill (Scalar(0));
00378   R.copy (A_local);
00379 
00380   // Prepare space in which to construct the explicit Q factor
00381   // (local component on this processor)
00382   Q_local.reshape (numRowsLocal, numCols);
00383   Q_local.fill (Scalar(0));
00384       }
00385     };
00386 
00387 
00391     template< class Ordinal, class Scalar, class TimerType >
00392     class DistTsqrBenchmarker {
00393       TSQR::Random::NormalGenerator< Ordinal, Scalar > gen_;
00394       Teuchos::RCP< MessengerBase< Scalar > > scalarComm_;
00395       Teuchos::RCP< MessengerBase< double > > doubleComm_; 
00396       std::string scalarTypeName_;
00397 
00398       std::ostream& out_;
00399       std::ostream& err_;
00400       const bool testFactorExplicit_, testFactorImplicit_;
00401       const bool humanReadable_, debug_;
00402 
00403     public:
00404       typedef Ordinal ordinal_type;
00405       typedef Scalar scalar_type;
00406       typedef typename ScalarTraits< scalar_type >::magnitude_type magnitude_type;
00407       typedef TimerType timer_type;
00408 
00432       DistTsqrBenchmarker (const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm,
00433          const Teuchos::RCP< MessengerBase< double > >& doubleComm,
00434          const std::vector<int>& seed,
00435          const std::string& scalarTypeName,
00436          std::ostream& out,
00437          std::ostream& err,
00438          const bool testFactorExplicit,
00439          const bool testFactorImplicit,
00440          const bool humanReadable,
00441          const bool debug) :
00442   gen_ (seed), 
00443   scalarComm_ (scalarComm),
00444   doubleComm_ (doubleComm),
00445   scalarTypeName_ (scalarTypeName), 
00446   out_ (out), 
00447   err_ (err),
00448   testFactorExplicit_ (testFactorExplicit),
00449   testFactorImplicit_ (testFactorImplicit),
00450   humanReadable_ (humanReadable), 
00451   debug_ (debug)
00452       {}
00453 
00478       DistTsqrBenchmarker (const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm,
00479          const Teuchos::RCP< MessengerBase< double > >& doubleComm,
00480          const std::string& scalarTypeName,
00481          std::ostream& out,
00482          std::ostream& err,
00483          const bool testFactorExplicit,
00484          const bool testFactorImplicit,
00485          const bool humanReadable,
00486          const bool debug) :
00487   scalarComm_ (scalarComm),
00488   doubleComm_ (doubleComm),
00489   scalarTypeName_ (scalarTypeName), 
00490   out_ (out), 
00491   err_ (err),
00492   testFactorExplicit_ (testFactorExplicit),
00493   testFactorImplicit_ (testFactorImplicit),
00494   humanReadable_ (humanReadable), 
00495   debug_ (debug)
00496       {}
00497 
00504       void 
00505       getSeed (std::vector<int>& seed) const
00506       {
00507   gen_.getSeed (seed);
00508       }
00509 
00516       void 
00517       benchmark (const int numTrials, const Ordinal numCols)
00518       {
00519   using std::endl;
00520 
00521   // Set up test problem.
00522   Matrix< Ordinal, Scalar > A_local, Q_local, R;
00523   testProblem (A_local, Q_local, R, numCols);
00524 
00525   // Set up TSQR implementation.
00526   DistTsqr< Ordinal, Scalar > par (scalarComm_);
00527 
00528   // Whether we've printed column headers yet (only matters for
00529   // non-humanReadable output).
00530   bool printedHeaders = false;
00531 
00532   if (testFactorImplicit_)
00533     {
00534       std::string timerName ("DistTsqr");
00535 
00536       // Benchmark DistTsqr (factor() and explicit_Q()) for
00537       // numTrials trials.
00538       timer_type timer (timerName);
00539       timer.start();
00540       for (int trialNum = 0; trialNum < numTrials; ++trialNum)
00541         {
00542     // Factor the matrix A (copied into R, which will be
00543     // overwritten on output)
00544     typedef typename DistTsqr< Ordinal, Scalar >::FactorOutput 
00545       factor_output_type;
00546     factor_output_type factorOutput = par.factor (R.view());
00547 
00548     // Compute the explicit Q factor
00549     par.explicit_Q (numCols, Q_local.get(), Q_local.lda(), factorOutput);
00550         }
00551       // Cumulative timing on this MPI process.
00552       // "Cumulative" means the elapsed time of numTrials executions.
00553       const double localCumulativeTiming = timer.stop();
00554 
00555       // reportResults() must be called on all processes, since this
00556       // figures out the min and max timings over all processes.
00557       reportResults (timerName, numTrials, numCols, localCumulativeTiming, 
00558          (! printedHeaders));
00559       if (! printedHeaders)
00560         printedHeaders = true;
00561     }
00562 
00563   if (testFactorExplicit_)
00564     {
00565       std::string timerName ("DistTsqrRB");
00566 
00567       // Benchmark DistTsqr::factorExplicit() for numTrials trials.
00568       timer_type timer (timerName);
00569       timer.start();
00570       for (int trialNum = 0; trialNum < numTrials; ++trialNum)
00571         {
00572     par.factorExplicit (R.view(), Q_local.view());
00573         }
00574       // Cumulative timing on this MPI process.
00575       // "Cumulative" means the elapsed time of numTrials executions.
00576       const double localCumulativeTiming = timer.stop();
00577 
00578       // Report cumulative (not per-invocation) timing results
00579       reportResults (timerName, numTrials, numCols, 
00580          localCumulativeTiming, (! printedHeaders));
00581       if (! printedHeaders)
00582         printedHeaders = true;
00583 
00584       // Per-invocation timings (for factorExplicit() benchmark
00585       // only).  localTimings were computed on this MPI process;
00586       // globalTimings are statistical summaries of those over
00587       // all MPI processes.  We only collect that data for
00588       // factorExplicit().
00589       std::vector< TimeStats > localTimings;
00590       std::vector< TimeStats > globalTimings;
00591       par.getFactorExplicitTimings (localTimings);
00592       for (std::vector< TimeStats >::size_type k = 0; k < localTimings.size(); ++k)
00593         globalTimings.push_back (TimeStats::globalTimeStats (doubleComm_.get(), localTimings[k]));
00594       std::vector< std::string > timingLabels;
00595       par.getFactorExplicitTimingLabels (timingLabels);
00596 
00597       if (humanReadable_)
00598         out_ << timerName << " per-invocation benchmark results:" << endl;
00599 
00600       const std::string labelLabel ("label,scalarType");
00601       for (std::vector< std::string >::size_type k = 0; k < timingLabels.size(); ++k)
00602         {
00603     // Only print column headers once
00604     const bool printHeaders = (k == 0);
00605     globalTimings[k].print (out_, humanReadable_, 
00606           timingLabels[k] + "," + scalarTypeName_, 
00607           labelLabel, printHeaders);
00608         }
00609     }
00610       }
00611 
00612     private:
00625       void 
00626       reportResults (const std::string& method,
00627          const int numTrials,
00628          const ordinal_type numCols,
00629          const double localTiming,
00630          const bool printHeaders)
00631       {
00632   using std::endl;
00633 
00634   // Find min and max timing over all MPI processes
00635   TimeStats localStats;
00636   localStats.update (localTiming);
00637   TimeStats globalStats = TimeStats::globalTimeStats (doubleComm_.get(), localStats);
00638 
00639   // Only Rank 0 prints the final results.
00640   const bool printResults = (doubleComm_->rank() == 0);
00641   if (printResults)
00642     {
00643       const int numProcs = doubleComm_->size();
00644       if (humanReadable_)
00645         {
00646     out_ << method << " cumulative benchmark results (total time over all trials):" << endl
00647          << "Scalar type = " << scalarTypeName_ << endl
00648          << "Number of columns = " << numCols << endl
00649          << "Number of (MPI) processes = " << numProcs << endl
00650          << "Number of trials = " << numTrials << endl
00651          << "Min timing (in seconds) = " << globalStats.min() << endl
00652          << "Mean timing (in seconds) = " << globalStats.mean() << endl
00653          << "Max timing (in seconds) = " << globalStats.max() << endl
00654          << endl;
00655         }
00656       else
00657         {
00658     // Use scientific notation for floating-point numbers
00659     out_ << std::scientific;
00660 
00661     if (printHeaders)
00662       out_ << "%method,scalarType,numCols,numProcs,numTrials,min,mean,max" << endl;
00663 
00664     out_ << method
00665          << "," << scalarTypeName_
00666          << "," << numCols
00667          << "," << numProcs
00668          << "," << numTrials
00669          << "," << globalStats.min()
00670          << "," << globalStats.mean()
00671          << "," << globalStats.max()
00672          << endl;
00673         }
00674     }
00675       }
00676 
00677       void 
00678       testProblem (Matrix< Ordinal, Scalar >& A_local,
00679        Matrix< Ordinal, Scalar >& Q_local,
00680        Matrix< Ordinal, Scalar >& R,
00681        const Ordinal numCols)
00682       {
00683   const Ordinal numRowsLocal = numCols;
00684 
00685   // A_local: Space for the matrix A to factor -- local to each
00686   //   processor.
00687   //
00688   // A_global: Global matrix (only nonempty on Proc 0); only
00689   //   used temporarily.
00690   Matrix< Ordinal, Scalar > A_global;
00691 
00692   // This modifies A_local on all procs, and A_global on Proc 0.
00693   par_tsqr_test_problem (gen_, A_local, A_global, numCols, scalarComm_);
00694 
00695   // Copy the test problem input into R, since the factorization
00696   // will overwrite it in place with the final R factor.
00697   R.reshape (numCols, numCols);
00698   R.copy (A_local);
00699 
00700   // Prepare space in which to construct the explicit Q factor
00701   // (local component on this processor)
00702   Q_local.reshape (numRowsLocal, numCols);
00703   Q_local.fill (Scalar(0));
00704       }
00705 
00708       static void
00709       conceptChecks () 
00710       {
00711   verifyTimerConcept< timer_type >();
00712       }
00713     };
00714 
00715 
00716   } // namespace Test
00717 } // namespace TSQR
00718 
00719 #endif // __TSQR_Test_DistTest_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends