Teuchos Package Browser (Single Doxygen Collection) Version of the Day
Teuchos_TimeMonitor.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_TimeMonitor.hpp"
00043 #include "Teuchos_CommHelpers.hpp"
00044 #include "Teuchos_DefaultComm.hpp"
00045 #include "Teuchos_TableColumn.hpp"
00046 #include "Teuchos_TableFormat.hpp"
00047 #include <functional>
00048 
00049 namespace Teuchos {
00050 
00051   typedef std::map<std::string, std::pair<double, int> > timer_map_t;
00052 
00053   TimeMonitor::TimeMonitor (Time& timer, bool reset) 
00054     : PerformanceMonitorBase<Time>(timer, reset)
00055   {
00056     if (!isRecursiveCall()) counter().start(reset);
00057   }
00058 
00059   TimeMonitor::~TimeMonitor() {
00060     if (!isRecursiveCall()) counter().stop();
00061   }
00062 
00063   void 
00064   TimeMonitor::zeroOutTimers()
00065   {
00066     const Array<RCP<Time> > timers = counters();
00067   
00068     // In debug mode, loop first to check whether any of the timers
00069     // are running, before resetting them.  This ensures that this
00070     // method satisfies the strong exception guarantee (either it
00071     // completes normally, or there are no side effects).
00072 #ifdef TEUCHOS_DEBUG
00073     typedef Array<RCP<Time> >::size_type size_type;
00074     const size_type numTimers = timers.size();
00075     for (size_type i = 0; i < numTimers; ++i) 
00076       {
00077   Time &timer = *timers[i];
00078   // We throw a runtime_error rather than a logic_error, because
00079   // logic_error suggests a bug in the implementation of
00080   // TimeMonitor.  Calling zeroOutTimers() when a timer is
00081   // running is not TimeMonitor's fault.
00082   TEST_FOR_EXCEPTION(timer.isRunning(), std::runtime_error,
00083          "The timer i = " << i << " with name \"" 
00084          << timer.name() << "\" is currently running and may not "
00085          "be reset.");
00086       }
00087 #endif // TEUCHOS_DEBUG
00088 
00089     for (Array<RCP<Time> >::const_iterator it = timers.begin(); 
00090    it != timers.end(); ++it)
00091       (*it)->reset ();
00092   }
00093 
00094   // An anonymous namespace is the standard way of limiting linkage of
00095   // its contained routines to file scope.
00096   namespace {
00097     // \brief Return an "empty" local timer datum.
00098     // 
00099     // "Empty" means the datum has zero elapsed time and zero call
00100     // count.  This function does not actually create a timer.
00101     //
00102     // \param name The timer's name.
00103     std::pair<std::string, std::pair<double, int> >
00104     makeEmptyTimerDatum (const std::string& name)
00105     {
00106       return std::make_pair (name, std::make_pair (double(0), int(0)));
00107     }
00108 
00109     // \brief Locally filter out timer data with zero call counts.
00110     //
00111     // \param timerData [in/out]
00112     void
00113     filterZeroData (timer_map_t& timerData)
00114     {
00115       timer_map_t newTimerData;
00116       for (timer_map_t::const_iterator it = timerData.begin(); 
00117      it != timerData.end(); ++it)
00118   {
00119     if (it->second.second > 0)
00120       newTimerData[it->first] = it->second;
00121   }
00122       timerData.swap (newTimerData);
00123     }
00124 
00125     //
00126     // \brief Collect and sort local timer data by timer names.
00127     //
00128     void
00129     collectLocalTimerData (timer_map_t& localData,
00130          const Array<RCP<Time> >& localCounters)
00131     {
00132       using std::make_pair;
00133       typedef timer_map_t::const_iterator const_iter_t;
00134       typedef timer_map_t::iterator iter_t;
00135 
00136       timer_map_t theLocalData;
00137       for (Array<RCP<Time> >::const_iterator it = localCounters.begin();
00138      it != localCounters.end(); ++it)
00139   {
00140     const std::string& name = (*it)->name();
00141     const double timing = (*it)->totalElapsedTime();
00142     const int numCalls = (*it)->numCalls();
00143 
00144     // Merge timers with duplicate labels, by summing their
00145     // total elapsed times and call counts.
00146     iter_t loc = theLocalData.find (name);
00147     if (loc == theLocalData.end())
00148       // Use loc as an insertion location hint.
00149       theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
00150     else
00151       {
00152         loc->second.first += timing;
00153         loc->second.second += numCalls;
00154       }
00155   }
00156       // This avoids copying the map, and also makes this method
00157       // satisfy the strong exception guarantee.
00158       localData.swap (theLocalData);
00159     }
00160   } // namespace (anonymous)
00161 
00162   void 
00163   TimeMonitor::summarize (std::ostream &out,
00164         const bool alwaysWriteLocal,
00165         const bool writeGlobalStats,
00166         const bool writeZeroTimers,
00167         const ECounterSetOp setOp)
00168   {
00169     using std::cerr;
00170     using std::endl;
00171     using std::make_pair;
00172     using std::string;
00173     
00174     const bool debug = false;
00175 
00176     // The default communicator.  If Trilinos was built with MPI
00177     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00178     // be a "serial" (no MPI, one "process") communicator.
00179     RCP<const Comm<int> > pComm = DefaultComm<int>::getComm ();
00180 
00181     // Callers may or may not have initialized MPI before calling
00182     // summarize().  Just because they built with MPI, doesn't mean
00183     // they want to use MPI.  It's not my responsibility to initialize
00184     // MPI for them, and I don't have the context I need in order to
00185     // do so anyway.  Thus, if Trilinos was built with MPI and MPI has
00186     // not yet been initialized, make pComm a "serial" communicator.
00187 #ifdef HAVE_MPI
00188     {
00189       int mpiHasBeenStarted = 0;
00190       MPI_Initialized (&mpiHasBeenStarted);
00191       if (! mpiHasBeenStarted)
00192         {
00193     // mfh 19 Jul 2011
00194     //
00195     // The first line commented out below compiles and runs
00196     // correctly with GCC 4.5.1, but gives a compiler error with
00197     // Intel's C++ compiler (version 11.1).  Intel's compiler
00198     // also rejects the two commented-out lines that follow.  It
00199     // seems that as of 19 July 2011, this code is the only code
00200     // in Trilinos that uses getDefaultSerialComm().  Intel's
00201     // compiler claims that the "Ordinal" template parameter of
00202     // DefaultComm is really the Teuchos::Ordinal typedef; the
00203     // Ordinal template parameter _should_ shadow the typedef in
00204     // Teuchos, and does with GCC, but does not in Intel's
00205     // compiler.  This may be the case with other compilers as
00206     // well, but I haven't tested them yet.
00207     //
00208     //pComm = DefaultComm<int>::getDefaultSerialComm (null);
00209     //
00210     //RCP<const Comm<int> > nullComm; // is null.
00211     //pComm = DefaultComm<int>::getDefaultSerialComm (nullComm);
00212 
00213     pComm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
00214   }
00215     }
00216 #endif // HAVE_MPI
00217 
00218     const int numProcs = pComm->getSize();
00219     const int myRank = pComm->getRank();
00220 
00221     if (debug && myRank == 0)
00222       {
00223   cerr << "summarize (out, "
00224        << "alwaysWriteLocal=" << alwaysWriteLocal 
00225        << ", writeGlobalStats=" << writeGlobalStats 
00226        << ", writeZeroTimers=" << writeZeroTimers 
00227        << ", setOp=" << (setOp==Union ? "Union" : "Intersection") 
00228        << ")" << endl;
00229       }
00230 
00231     // Collect and sort local timer data by timer names.
00232     timer_map_t localTimerData;
00233     collectLocalTimerData (localTimerData, counters());
00234 
00235     // In debug mode, print out local timer data on each process,
00236     // before possibly filtering out data with zero call counts.
00237     if (debug)
00238       {
00239   for (int p = 0; p < numProcs; ++p)
00240     {
00241       if (myRank == p)
00242         {
00243     cerr << "Proc " << myRank << ": Local timer data:" << endl;
00244     for (timer_map_t::const_iterator it = localTimerData.begin(); 
00245          it != localTimerData.end(); ++it)
00246       cerr << "-- " << it->first << ", " << it->second.first 
00247            << ", " << it->second.second << endl;
00248         }
00249       // Two barriers generally synchronize output, at least
00250       // when debugging with multiple MPI processes on one node.
00251       barrier (*pComm);
00252       barrier (*pComm);
00253     }
00254       }
00255 
00256     // Filter out zero data locally first.  This ensures that if we
00257     // are writing global stats, and if a timer name exists in the set
00258     // of global names, then that timer has a nonzero call count on at
00259     // least one MPI process.
00260     if (! writeZeroTimers)
00261       {
00262   filterZeroData (localTimerData);
00263 
00264   // In debug mode, print out local timer data on each process,
00265   // after possibly filtering out data with zero call counts.
00266   if (debug)
00267     {
00268       for (int p = 0; p < numProcs; ++p)
00269         {
00270     if (myRank == p)
00271       {
00272         cerr << "Proc " << myRank << ": Local timer data, "
00273           "after filtering zero call counts:" << endl;
00274         for (timer_map_t::const_iterator it = localTimerData.begin(); 
00275        it != localTimerData.end(); ++it)
00276           cerr << "-- " << it->first << ", " << it->second.first 
00277          << ", " << it->second.second << endl;
00278       }
00279     // Two barriers generally synchronize output, at least
00280     // when debugging with multiple MPI processes on one node.
00281     barrier (*pComm);
00282     barrier (*pComm);
00283         }
00284     }
00285       }
00286 
00287     // Extract the set of local timer names.  The std::map keeps them
00288     // sorted alphabetically.
00289     Array<string> localTimerNames;
00290     localTimerNames.reserve (localTimerData.size());
00291     for (timer_map_t::const_iterator it = localTimerData.begin(); 
00292        it != localTimerData.end(); ++it)
00293       localTimerNames.push_back (it->first);
00294 
00295     if (debug)
00296       {
00297   for (int p = 0; p < numProcs; ++p)
00298     {
00299       if (myRank == p)
00300         {
00301     cerr << "Proc " << myRank << ": Local timer names:" << endl;
00302     for (Array<string>::const_iterator it = localTimerNames.begin(); 
00303          it != localTimerNames.end(); ++it)
00304       cerr << "-- " << *it << endl;
00305         }
00306       barrier (*pComm);
00307       barrier (*pComm);
00308     }
00309       }
00310 
00311     // globalTimerData and globalTimerNames are only valid if
00312     // writeGlobalStats is true.
00313     Array<string> globalTimerNames;
00314     timer_map_t globalTimerData;
00315 
00316     // If writeGlobalStats is true (i.e., if we are computing global
00317     // stats), there may be some global timers that are not local
00318     // timers on the calling MPI process(es).  In that case, if
00319     // alwaysWriteLocal is true, then we need to fill in the "missing"
00320     // local timers.  That will ensure that both global and local
00321     // timer columns in the output table have the same number of rows.
00322     // If writeZeroTimers==false, we already filtered out local timers
00323     // with zero call counts above.  Thus, any inserted local timer
00324     // data won't get filtered out again.
00325     //
00326     // Inserting new local data with zero call counts will result in
00327     // local timers with zero call counts, which violates the
00328     // expectation of writeZeroTimers == false for the local call
00329     // counts.  However, writeZeroTimers == false will still do what
00330     // it says for the global call counts.
00331     if (writeGlobalStats)
00332       { 
00333   // This does the correct and inexpensive thing (just copies
00334   // the timer data) if numProcs == 1.  Otherwise, it initiates
00335   // a communication with \f$O(\log P)\f$ messages along the
00336   // critical path.
00337   mergeCounterNames (*pComm, localTimerNames, globalTimerNames, setOp);
00338 
00339   if (debug)
00340     {
00341       // Sanity check that all MPI procs have the name number of
00342       // global timer names.
00343       const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
00344       timer_map_t::size_type minNumGlobalNames = 0;
00345       timer_map_t::size_type maxNumGlobalNames = 0;
00346       reduceAll (*pComm, REDUCE_MIN, myNumGlobalNames, 
00347            outArg (minNumGlobalNames));
00348       reduceAll (*pComm, REDUCE_MAX, myNumGlobalNames, 
00349            outArg (maxNumGlobalNames));
00350       TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
00351              std::logic_error,
00352              "Min # global timer names = " << minNumGlobalNames 
00353              << " != max # global timer names = " << maxNumGlobalNames
00354              << ".  Please report this bug to the Teuchos developers.");
00355       TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
00356              std::logic_error,
00357              "My # global timer names = " << myNumGlobalNames 
00358              << " != min # global timer names = " << minNumGlobalNames
00359              << ".  Please report this bug to the Teuchos developers.");
00360     }
00361 
00362   // mergeTimers() just merges timer names, not their actual
00363   // data.  Now we need to fill globalTimerData with this MPI
00364   // process' timer data for those timers in globalTimerNames.
00365   //
00366   // All processes need the full list of global timers, since
00367   // there may be some global timers that are not local timers.
00368   // That's why mergeCounterNames() has to be an all-reduce, not
00369   // just a reduction to Proc 0.
00370   //
00371   // If there are some global timers that are not local timers,
00372   // and if we want to print local timers, we insert a local
00373   // timer datum with zero elapsed time and zero call count into
00374   // localTimerData as well, for the reason mentioned above.
00375   //
00376   // Insertion optimization; if the iterator given to
00377   // map::insert points right before where we want to insert,
00378   // insertion is O(1).  globalTimerNames is sorted, so feeding
00379   // the iterator output of map::insert into the next
00380   // invocation's input should make the whole insertion O(N)
00381   // where N is the number of entries in globalTimerNames.
00382   timer_map_t::iterator globalMapIter = globalTimerData.begin();
00383   timer_map_t::iterator localMapIter;
00384   for (Array<string>::const_iterator it = globalTimerNames.begin(); 
00385        it != globalTimerNames.end(); ++it)
00386     {
00387       const std::string& globalName = *it;
00388       localMapIter = localTimerData.find (globalName);
00389 
00390       if (localMapIter == localTimerData.end())
00391         {
00392     if (alwaysWriteLocal)
00393       {
00394         // We really only need to do this on MPI Proc 0, which is
00395         // the only process that currently may print local timers.
00396         // However, we do it on all MPI processes, just in case
00397         // someone later wants to modify this function to print
00398         // out local timer data for some MPI process other than
00399         // Rank 0.  This extra computation won't affect the cost
00400         // along the critical path, for future computations in
00401         // which Proc 0 participates.
00402         localMapIter = 
00403           localTimerData.insert (localMapIter, 
00404                makeEmptyTimerDatum (globalName));
00405 
00406         // Make sure the missing global name gets added to
00407         // the list of local names.  We'll resort it below.
00408         localTimerNames.push_back (globalName);
00409       }
00410     globalMapIter = 
00411       globalTimerData.insert (globalMapIter, 
00412             makeEmptyTimerDatum (globalName));
00413         }
00414       else
00415         globalMapIter = 
00416     globalTimerData.insert (globalMapIter, 
00417           make_pair (globalName, 
00418                localMapIter->second));
00419     }
00420 
00421   if (alwaysWriteLocal)
00422     // Resort the list of local timer names, since we may have
00423     // inserted "missing" names above.
00424     std::sort (localTimerNames.begin(), localTimerNames.end());
00425 
00426   if (debug)
00427     {
00428       // Sanity check that all MPI procs have the name number of
00429       // global timers.
00430       const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
00431       timer_map_t::size_type minNumGlobalTimers = 0;
00432       timer_map_t::size_type maxNumGlobalTimers = 0;
00433       reduceAll (*pComm, REDUCE_MIN, myNumGlobalTimers, 
00434            outArg (minNumGlobalTimers));
00435       reduceAll (*pComm, REDUCE_MAX, myNumGlobalTimers, 
00436            outArg (maxNumGlobalTimers));
00437       TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
00438              std::logic_error,
00439              "Min # global timers = " << minNumGlobalTimers 
00440              << " != max # global timers = " << maxNumGlobalTimers
00441              << ".  Please report this bug to the Teuchos developers.");
00442       TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
00443              std::logic_error,
00444              "My # global timers = " << myNumGlobalTimers 
00445              << " != min # global timers = " << minNumGlobalTimers
00446              << ".  Please report this bug to the Teuchos developers.");
00447     }
00448       } // if (writeGlobalStats)
00449 
00450     // Extract the timer names (global or local) into a single array
00451     // of strings, representing the column labels in the table.
00452     Array<string> timerNames;
00453     timerNames.reserve (globalTimerData.size());
00454     if (writeGlobalStats) 
00455       // Use global timer names as the column names.
00456       std::copy (globalTimerNames.begin(), globalTimerNames.end(), 
00457      std::back_inserter (timerNames));
00458     else 
00459       // Use local timer names as the column labels.
00460       std::copy (localTimerNames.begin(), localTimerNames.end(), 
00461      std::back_inserter (timerNames));
00462 
00463     if (debug)
00464       {
00465   for (int p = 0; p < numProcs; ++p)
00466     {
00467       if (myRank == p)
00468         {
00469     cerr << "Proc " << myRank << ": Global timer names:" << endl;
00470     for (Array<std::string>::const_iterator it = globalTimerNames.begin(); 
00471          it != globalTimerNames.end(); ++it)
00472       cerr << "-- " << *it << endl;
00473         }
00474       barrier (*pComm);
00475       barrier (*pComm);
00476     }
00477   for (int p = 0; p < numProcs; ++p)
00478     {
00479       if (myRank == p)
00480         {
00481     cerr << "Proc " << myRank << ": Global timer data:" << endl;
00482     for (timer_map_t::const_iterator it = globalTimerData.begin(); 
00483          it != globalTimerData.end(); ++it)
00484       cerr << "-- " << it->first << ", " << it->second.first 
00485            << ", " << it->second.second << endl;
00486         }
00487       barrier (*pComm);
00488       barrier (*pComm);
00489     }
00490       }
00491 
00492     const int precision = format().precision();
00493 
00494     // All columns of the table, in order.
00495     Array<TableColumn> tableColumns;
00496 
00497     // Labels of all the columns of the table.
00498     // We will append to this when we add each column.
00499     Array<string> titles;
00500 
00501     // Widths (in number of characters) of each column.
00502     // We will append to this when we add each column.
00503     Array<int> columnWidths;
00504 
00505     // Table column containing all timer labels.
00506     {
00507       titles.append ("Timer Name");
00508       TableColumn nameCol (timerNames);
00509       tableColumns.append (nameCol);
00510 
00511       // Each column is as wide as it needs to be to hold both its title
00512       // and all of the column data.  This column's title is the current
00513       // last entry of the titles array.
00514       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), 
00515                 nameCol));
00516     }
00517 
00518     // Table column containing local timer stats, if applicable.  We
00519     // only write local stats if asked, only on MPI Proc 0, and only
00520     // if there is more than one MPI process in the communicator
00521     // (otherwise local stats == global stats, so we just print the
00522     // global stats).
00523     if (alwaysWriteLocal && numProcs > 1 && myRank == 0)
00524       {
00525   titles.append ("Local time (num calls)");
00526 
00527   // Copy local timer data out of the array-of-structs into
00528   // separate arrays, for display in the table.
00529   Array<double> localTimings;
00530   Array<double> localNumCalls;
00531   for (timer_map_t::const_iterator it = localTimerData.begin();
00532        it != localTimerData.end(); ++it)
00533     {
00534       localTimings.push_back (it->second.first);
00535       localNumCalls.push_back (static_cast<double> (it->second.second));
00536     }
00537   TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
00538   tableColumns.append (timeAndCalls);
00539   columnWidths.append (format().computeRequiredColumnWidth (titles.back(), 
00540                   timeAndCalls));
00541       }
00542 
00543     if (writeGlobalStats)
00544       {
00545   const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
00546 
00547   // Copy global timer data out of the array-of-structs into
00548   // separate arrays, for display in the table and/or for
00549   // computing statistics.
00550   Array<double> globalTimings;
00551   Array<double> globalNumCalls;
00552   for (timer_map_t::const_iterator it = globalTimerData.begin();
00553        it != globalTimerData.end(); ++it)
00554     {
00555       globalTimings.push_back (it->second.first);
00556       globalNumCalls.push_back (static_cast<double> (it->second.second));
00557     }
00558 
00559   if (numProcs == 1)
00560     { 
00561       // Don't display statistics in the case of only 1 MPI process.
00562       // Just display the elapsed times and the call counts.
00563       titles.append ("Global time (num calls)");
00564       TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
00565       tableColumns.append (timeAndCalls);
00566       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), 
00567                       timeAndCalls));
00568     }
00569   else // numProcs > 1
00570     {
00571       // Table column containing min of global timer stats, if
00572       // applicable.
00573       //
00574       // NOTE (mfh 18 Jul 2011) The report minimum global number
00575       // of calls may be for a different MPI process than the
00576       // minimum global timing.  Ditto for the other statistics.
00577       // What this means is that you should not divide the
00578       // reported (minimum, mean, maximum) elapsed time by the
00579       // reported (minimum, mean, maximum) call count to get an
00580       // "average," since those quantities are not necessarily
00581       // comparable.
00582       {
00583         titles.append ("Min over procs");
00584         Array<double> minGlobalTimings (numGlobalTimers);
00585         Array<double> minGlobalNumCalls (numGlobalTimers);
00586 
00587         // Teuchos_CommHelpers.hpp doesn't currently have a reduce(); it
00588         // only has a reduceAll().  It would be better just to reduce to
00589         // Proc 0, but that has to wait until reduce() is implemented.
00590         if (numGlobalTimers > 0)
00591     {
00592       reduceAll (*pComm, REDUCE_MIN, 
00593            static_cast<int> (numGlobalTimers), 
00594            &globalTimings[0], &minGlobalTimings[0]);
00595       reduceAll (*pComm, REDUCE_MIN, 
00596            static_cast<int> (numGlobalTimers),
00597            &globalNumCalls[0], &minGlobalNumCalls[0]);
00598     }
00599         TableColumn timeAndCalls (minGlobalTimings, minGlobalNumCalls, 
00600           precision, true);
00601         tableColumns.append (timeAndCalls);
00602         columnWidths.append (format().computeRequiredColumnWidth (titles.back(), 
00603                   timeAndCalls));
00604       }
00605       // Table column containing arithmetic mean of global timer
00606       // stats, if applicable.
00607       {
00608         titles.append ("Mean over procs");
00609 
00610         // Scale first, so that the reduction can sum.  This avoids
00611         // unnecessary overflow, in case the sum is large but the number
00612         // of processors is also large.
00613         Array<double> scaledGlobalTimings (numGlobalTimers);
00614         Array<double> scaledGlobalNumCalls (numGlobalTimers);
00615         std::transform (globalTimings.begin(), globalTimings.end(), 
00616             scaledGlobalTimings.begin(), 
00617             std::bind2nd (std::divides<double>(), 
00618               static_cast<double> (numProcs)));
00619         std::transform (globalNumCalls.begin(), globalNumCalls.end(), 
00620             scaledGlobalNumCalls.begin(), 
00621             std::bind2nd (std::divides<double>(), 
00622               static_cast<double> (numProcs)));
00623         Array<double> avgGlobalTimings (numGlobalTimers);
00624         Array<double> avgGlobalNumCalls (numGlobalTimers);
00625         if (numGlobalTimers > 0)
00626     {
00627       reduceAll (*pComm, REDUCE_SUM, 
00628            static_cast<int> (numGlobalTimers),
00629            &scaledGlobalTimings[0], &avgGlobalTimings[0]);
00630       reduceAll (*pComm, REDUCE_SUM, 
00631            static_cast<int> (numGlobalTimers),
00632            &scaledGlobalNumCalls[0], &avgGlobalNumCalls[0]);
00633     }
00634         TableColumn timeAndCalls (avgGlobalTimings, avgGlobalNumCalls, 
00635           precision, true);
00636         tableColumns.append (timeAndCalls);
00637         columnWidths.append (format().computeRequiredColumnWidth (titles.back(), 
00638                   timeAndCalls));
00639       }
00640 
00641       // Table column containing max of global timer stats, if
00642       // applicable.
00643       {
00644         titles.append("Max over procs");
00645         Array<double> maxGlobalTimings (numGlobalTimers);
00646         Array<double> maxGlobalNumCalls (numGlobalTimers);
00647 
00648         // Teuchos_CommHelpers.hpp doesn't currently have a reduce(); it
00649         // only has a reduceAll().  It would be better just to reduce to
00650         // Proc 0, but that has to wait until reduce() is implemented.
00651         if (numGlobalTimers > 0)
00652     {
00653       reduceAll (*pComm, REDUCE_MAX, 
00654            static_cast<int> (numGlobalTimers),
00655            &globalTimings[0], &maxGlobalTimings[0]);
00656       reduceAll (*pComm, REDUCE_MAX, 
00657            static_cast<int> (numGlobalTimers),
00658            &globalNumCalls[0], &maxGlobalNumCalls[0]);
00659     }
00660         TableColumn timeAndCalls (maxGlobalTimings, maxGlobalNumCalls, 
00661           precision, true);
00662         tableColumns.append (timeAndCalls);
00663         columnWidths.append (format ().computeRequiredColumnWidth (titles.back(), 
00664                    timeAndCalls));
00665       }
00666     }
00667       }
00668 
00669     // Print the whole table to the given output stream on MPI Rank 0.
00670     format ().setColumnWidths (columnWidths);
00671     if (myRank == 0)
00672       {
00673   std::ostringstream theTitle;
00674   theTitle << "TimeMonitor results over " << numProcs << " processor" 
00675      << (numProcs > 1 ? "s" : "");
00676   format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
00677       }
00678   }
00679 
00680 
00681 } // namespace Teuchos
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines