Teuchos - Trilinos Tools Package Version of the Day
Teuchos_TimeMonitor.cpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_TimeMonitor.hpp"
00043 #include "Teuchos_CommHelpers.hpp"
00044 #include "Teuchos_DefaultComm.hpp"
00045 #include "Teuchos_TableColumn.hpp"
00046 #include "Teuchos_TableFormat.hpp"
00047 #include <functional>
00048 #ifdef HAVE_TEUCHOS_YAML_CPP
00049 #  include <yaml-cpp/yaml.h>
00050 #endif // HAVE_TEUCHOS_YAML_CPP
00051 
00052 
00053 namespace Teuchos {
00103   template<class Ordinal, class ScalarType, class IndexType>
00104   class MaxLoc :
00105     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00106   public:
00107     void
00108     reduce (const Ordinal count,
00109             const std::pair<ScalarType, IndexType> inBuffer[],
00110             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00111   };
00112 
00113   template<class Ordinal>
00114   class MaxLoc<Ordinal, double, int> :
00115     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00116   public:
00117     void
00118     reduce (const Ordinal count,
00119             const std::pair<double, int> inBuffer[],
00120             std::pair<double, int> inoutBuffer[]) const
00121     {
00122       for (Ordinal ind = 0; ind < count; ++ind) {
00123         const std::pair<double, int>& in = inBuffer[ind];
00124         std::pair<double, int>& inout = inoutBuffer[ind];
00125 
00126         if (in.first > inout.first) {
00127           inout.first = in.first;
00128           inout.second = in.second;
00129         } else if (in.first < inout.first) {
00130           // Don't need to do anything; inout has the values.
00131         } else { // equal, or at least one is NaN.
00132           inout.first = in.first;
00133           inout.second = std::min (in.second, inout.second);
00134         }
00135       }
00136     }
00137   };
00138 
00166   template<class Ordinal, class ScalarType, class IndexType>
00167   class MinLoc :
00168     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00169   public:
00170     void
00171     reduce (const Ordinal count,
00172             const std::pair<ScalarType, IndexType> inBuffer[],
00173             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00174   };
00175 
00176   template<class Ordinal>
00177   class MinLoc<Ordinal, double, int> :
00178     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00179   public:
00180     void
00181     reduce (const Ordinal count,
00182             const std::pair<double, int> inBuffer[],
00183             std::pair<double, int> inoutBuffer[]) const
00184     {
00185       for (Ordinal ind = 0; ind < count; ++ind) {
00186         const std::pair<double, int>& in = inBuffer[ind];
00187         std::pair<double, int>& inout = inoutBuffer[ind];
00188 
00189         if (in.first < inout.first) {
00190           inout.first = in.first;
00191           inout.second = in.second;
00192         } else if (in.first > inout.first) {
00193           // Don't need to do anything; inout has the values.
00194         } else { // equal, or at least one is NaN.
00195           inout.first = in.first;
00196           inout.second = std::min (in.second, inout.second);
00197         }
00198       }
00199     }
00200   };
00201 
00202   // Typedef used internally by TimeMonitor::summarize() and its
00203   // helper functions.  The map is keyed on timer label (a string).
00204   // Each value is a pair: (total number of seconds over all calls to
00205   // that timer, total number of calls to that timer).
00206   typedef std::map<std::string, std::pair<double, int> > timer_map_t;
00207 
00208   TimeMonitor::TimeMonitor (Time& timer, bool reset)
00209     : PerformanceMonitorBase<Time>(timer, reset)
00210   {
00211     if (!isRecursiveCall()) counter().start(reset);
00212   }
00213 
00214   TimeMonitor::~TimeMonitor() {
00215     if (!isRecursiveCall()) counter().stop();
00216   }
00217 
00218   void
00219   TimeMonitor::zeroOutTimers()
00220   {
00221     const Array<RCP<Time> > timers = counters();
00222 
00223     // In debug mode, loop first to check whether any of the timers
00224     // are running, before resetting them.  This ensures that this
00225     // method satisfies the strong exception guarantee (either it
00226     // completes normally, or there are no side effects).
00227 #ifdef TEUCHOS_DEBUG
00228     typedef Array<RCP<Time> >::size_type size_type;
00229     const size_type numTimers = timers.size();
00230     for (size_type i = 0; i < numTimers; ++i) {
00231       Time &timer = *timers[i];
00232       // We throw a runtime_error rather than a logic_error, because
00233       // logic_error suggests a bug in the implementation of
00234       // TimeMonitor.  Calling zeroOutTimers() when a timer is
00235       // running is not TimeMonitor's fault.
00236       TEUCHOS_TEST_FOR_EXCEPTION(timer.isRunning(), std::runtime_error,
00237                                  "The timer i = " << i << " with name \""
00238                                  << timer.name() << "\" is currently running and may not "
00239                                  "be reset.");
00240     }
00241 #endif // TEUCHOS_DEBUG
00242 
00243     for (Array<RCP<Time> >::const_iterator it = timers.begin();
00244          it != timers.end(); ++it) {
00245       (*it)->reset ();
00246     }
00247   }
00248 
00249   // An anonymous namespace is the standard way of limiting linkage of
00250   // its contained routines to file scope.
00251   namespace {
00252     // \brief Return an "empty" local timer datum.
00253     //
00254     // "Empty" means the datum has zero elapsed time and zero call
00255     // count.  This function does not actually create a timer.
00256     //
00257     // \param name The timer's name.
00258     std::pair<std::string, std::pair<double, int> >
00259     makeEmptyTimerDatum (const std::string& name)
00260     {
00261       return std::make_pair (name, std::make_pair (double(0), int(0)));
00262     }
00263 
00264     // \fn collectLocalTimerData
00265     // \brief Collect and sort local timer data by timer names.
00266     //
00267     // \param localData [out] Map whose keys are the timer names, and
00268     //   whose value for each key is the total elapsed time (in
00269     //   seconds) and the call count for the timer with that name.
00270     //
00271     // \param localCounters [in] Timers from which to extract data.
00272     //
00273     // Extract the total elapsed time and call count from each timer
00274     // in the given array.  Merge results for timers with duplicate
00275     // labels, by summing their total elapsed times and call counts
00276     // pairwise.
00277     void
00278     collectLocalTimerData (timer_map_t& localData,
00279                            ArrayView<const RCP<Time> > localCounters)
00280     {
00281       using std::make_pair;
00282       typedef timer_map_t::const_iterator const_iter_t;
00283       typedef timer_map_t::iterator iter_t;
00284 
00285       timer_map_t theLocalData;
00286       for (ArrayView<const RCP<Time> >::const_iterator it = localCounters.begin();
00287            it != localCounters.end(); ++it) {
00288         const std::string& name = (*it)->name();
00289         const double timing = (*it)->totalElapsedTime();
00290         const int numCalls = (*it)->numCalls();
00291 
00292         // Merge timers with duplicate labels, by summing their
00293         // total elapsed times and call counts.
00294         iter_t loc = theLocalData.find (name);
00295         if (loc == theLocalData.end()) {
00296           // Use loc as an insertion location hint.
00297           theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
00298         }
00299         else {
00300           loc->second.first += timing;
00301           loc->second.second += numCalls;
00302         }
00303       }
00304       // This avoids copying the map, and also makes this method
00305       // satisfy the strong exception guarantee.
00306       localData.swap (theLocalData);
00307     }
00308 
00309     // \brief Locally filter out timer data with zero call counts.
00310     //
00311     // \param timerData [in/out]
00312     void
00313     filterZeroData (timer_map_t& timerData)
00314     {
00315       timer_map_t newTimerData;
00316       for (timer_map_t::const_iterator it = timerData.begin();
00317            it != timerData.end(); ++it) {
00318         if (it->second.second > 0) {
00319           newTimerData[it->first] = it->second;
00320         }
00321       }
00322       timerData.swap (newTimerData);
00323     }
00324 
00342     void
00343     collectLocalTimerDataAndNames (timer_map_t& localTimerData,
00344                                    Array<std::string>& localTimerNames,
00345                                    ArrayView<const RCP<Time> > localTimers,
00346                                    const bool writeZeroTimers)
00347     {
00348       // Collect and sort local timer data by timer names.
00349       collectLocalTimerData (localTimerData, localTimers);
00350 
00351       // Filter out zero data locally first.  This ensures that if we
00352       // are writing global stats, and if a timer name exists in the
00353       // set of global names, then that timer has a nonzero call count
00354       // on at least one MPI process.
00355       if (! writeZeroTimers) {
00356         filterZeroData (localTimerData);
00357       }
00358 
00359       // Extract the set of local timer names.  The std::map keeps
00360       // them sorted alphabetically.
00361       localTimerNames.reserve (localTimerData.size());
00362       for (timer_map_t::const_iterator it = localTimerData.begin();
00363            it != localTimerData.end(); ++it) {
00364         localTimerNames.push_back (it->first);
00365       }
00366     }
00367 
00402     void
00403     collectGlobalTimerData (timer_map_t& globalTimerData,
00404                             Array<std::string>& globalTimerNames,
00405                             timer_map_t& localTimerData,
00406                             Array<std::string>& localTimerNames,
00407                             Ptr<const Comm<int> > comm,
00408                             const bool alwaysWriteLocal,
00409                             const ECounterSetOp setOp)
00410     {
00411       // There may be some global timers that are not local timers on
00412       // the calling MPI process(es).  In that case, if
00413       // alwaysWriteLocal is true, then we need to fill in the
00414       // "missing" local timers.  That will ensure that both global
00415       // and local timer columns in the output table have the same
00416       // number of rows.  The collectLocalTimerDataAndNames() method
00417       // may have already filtered out local timers with zero call
00418       // counts (if its writeZeroTimers argument was false), but we
00419       // won't be filtering again.  Thus, any local timer data we
00420       // insert here won't get filtered out.
00421       //
00422       // Note that calling summarize() with writeZeroTimers == false
00423       // will still do what it says, even if we insert local timers
00424       // with zero call counts here.
00425 
00426       // This does the correct and inexpensive thing (just copies the
00427       // timer data) if numProcs == 1.  Otherwise, it initiates a
00428       // communication with \f$O(\log P)\f$ messages along the
00429       // critical path, where \f$P\f$ is the number of participating
00430       // processes.
00431       mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
00432 
00433 #ifdef TEUCHOS_DEBUG
00434       {
00435         // Sanity check that all processes have the name number of
00436         // global timer names.
00437         const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
00438         timer_map_t::size_type minNumGlobalNames = 0;
00439         timer_map_t::size_type maxNumGlobalNames = 0;
00440         reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
00441                    outArg (minNumGlobalNames));
00442         reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
00443                    outArg (maxNumGlobalNames));
00444         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
00445           std::logic_error, "Min # global timer names = " << minNumGlobalNames
00446           << " != max # global timer names = " << maxNumGlobalNames
00447           << ".  Please report this bug to the Teuchos developers.");
00448         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
00449           std::logic_error, "My # global timer names = " << myNumGlobalNames
00450           << " != min # global timer names = " << minNumGlobalNames
00451           << ".  Please report this bug to the Teuchos developers.");
00452       }
00453 #endif // TEUCHOS_DEBUG
00454 
00455       // mergeCounterNames() just merges the counters' names, not
00456       // their actual data.  Now we need to fill globalTimerData with
00457       // this process' timer data for the timers in globalTimerNames.
00458       //
00459       // All processes need the full list of global timers, since
00460       // there may be some global timers that are not local timers.
00461       // That's why mergeCounterNames() has to be an all-reduce, not
00462       // just a reduction to Proc 0.
00463       //
00464       // Insertion optimization: if the iterator given to map::insert
00465       // points right before where we want to insert, insertion is
00466       // O(1).  globalTimerNames is sorted, so feeding the iterator
00467       // output of map::insert into the next invocation's input should
00468       // make the whole insertion O(N) where N is the number of
00469       // entries in globalTimerNames.
00470       timer_map_t::iterator globalMapIter = globalTimerData.begin();
00471       timer_map_t::iterator localMapIter;
00472       for (Array<string>::const_iterator it = globalTimerNames.begin();
00473            it != globalTimerNames.end(); ++it) {
00474         const std::string& globalName = *it;
00475         localMapIter = localTimerData.find (globalName);
00476 
00477         if (localMapIter == localTimerData.end()) {
00478           if (alwaysWriteLocal) {
00479             // If there are some global timers that are not local
00480             // timers, and if we want to print local timers, we insert
00481             // a local timer datum with zero elapsed time and zero
00482             // call count into localTimerData as well.  This will
00483             // ensure that both global and local timer columns in the
00484             // output table have the same number of rows.
00485             //
00486             // We really only need to do this on Proc 0, which is the
00487             // only process that currently may print local timers.
00488             // However, we do it on all processes, just in case
00489             // someone later wants to modify this function to print
00490             // out local timer data for some process other than Proc
00491             // 0.  This extra computation won't affect the cost along
00492             // the critical path, for future computations in which
00493             // Proc 0 participates.
00494             localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
00495 
00496             // Make sure the missing global name gets added to the
00497             // list of local names.  We'll re-sort the list of local
00498             // names below.
00499             localTimerNames.push_back (globalName);
00500           }
00501           // There's a global timer that's not a local timer.  Add it
00502           // to our pre-merge version of the global timer data so that
00503           // we can safely merge the global timer data later.
00504           globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
00505         }
00506         else {
00507           // We have this global timer name in our local timer list.
00508           // Fill in our pre-merge version of the global timer data
00509           // with our local data.
00510           globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
00511         }
00512       }
00513 
00514       if (alwaysWriteLocal) {
00515         // Re-sort the list of local timer names, since we may have
00516         // inserted "missing" names above.
00517         std::sort (localTimerNames.begin(), localTimerNames.end());
00518       }
00519 
00520 #ifdef TEUCHOS_DEBUG
00521       {
00522         // Sanity check that all processes have the name number of
00523         // global timers.
00524         const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
00525         timer_map_t::size_type minNumGlobalTimers = 0;
00526         timer_map_t::size_type maxNumGlobalTimers = 0;
00527         reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
00528                    outArg (minNumGlobalTimers));
00529         reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
00530                    outArg (maxNumGlobalTimers));
00531         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
00532                                    std::logic_error, "Min # global timers = " << minNumGlobalTimers
00533                                    << " != max # global timers = " << maxNumGlobalTimers
00534                                    << ".  Please report this bug to the Teuchos developers.");
00535         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
00536                                    std::logic_error, "My # global timers = " << myNumGlobalTimers
00537                                    << " != min # global timers = " << minNumGlobalTimers
00538                                    << ".  Please report this bug to the Teuchos developers.");
00539       }
00540 #endif // TEUCHOS_DEBUG
00541     }
00542 
00580     void
00581     computeGlobalTimerStats (stat_map_type& statData,
00582                              std::vector<std::string>& statNames,
00583                              Ptr<const Comm<int> > comm,
00584                              const timer_map_t& globalTimerData)
00585     {
00586       const int numTimers = static_cast<int> (globalTimerData.size());
00587       const int numProcs = comm->getSize();
00588 
00589       // Extract pre-reduction timings and call counts into a
00590       // sequential array.  This array will be in the same order as
00591       // the global timer names are in the map.
00592       Array<std::pair<double, int> > timingsAndCallCounts;
00593       timingsAndCallCounts.reserve (numTimers);
00594       for (timer_map_t::const_iterator it = globalTimerData.begin();
00595            it != globalTimerData.end(); ++it) {
00596         timingsAndCallCounts.push_back (it->second);
00597       }
00598 
00599       // For each timer name, compute the min timing and its
00600       // corresponding call count.  If two processes have the same
00601       // timing but different call counts, the minimum call count will
00602       // be used.
00603       Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
00604       if (numTimers > 0) {
00605         reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
00606                    &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
00607       }
00608 
00609       // For each timer name, compute the max timing and its
00610       // corresponding call count.  If two processes have the same
00611       // timing but different call counts, the minimum call count will
00612       // be used.
00613       Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
00614       if (numTimers > 0) {
00615         reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
00616                    &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
00617       }
00618 
00619       // For each timer name, compute the mean-over-processes timing,
00620       // the mean call count, and the mean-over-call-counts timing.
00621       // The mean call count is reported as a double to allow a
00622       // fractional value.
00623       //
00624       // Each local timing is really the total timing over all local
00625       // invocations.  The number of local invocations is the call
00626       // count.  Thus, the mean-over-call-counts timing is the sum of
00627       // all the timings (over all processes), divided by the sum of
00628       // all the call counts (over all processes).  We compute it in a
00629       // different way to over unnecessary overflow.
00630       Array<double> meanOverCallCountsTimings (numTimers);
00631       Array<double> meanOverProcsTimings (numTimers);
00632       Array<double> meanCallCounts (numTimers);
00633       {
00634         // When summing, first scale by the number of processes.  This
00635         // avoids unnecessary overflow, and also gives us the mean
00636         // call count automatically.
00637         Array<double> scaledTimings (numTimers);
00638         Array<double> scaledCallCounts (numTimers);
00639         const double P = static_cast<double> (numProcs);
00640         for (int k = 0; k < numTimers; ++k) {
00641           const double timing = timingsAndCallCounts[k].first;
00642           const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
00643 
00644           scaledTimings[k] = timing / P;
00645           scaledCallCounts[k] = callCount / P;
00646         }
00647         if (numTimers > 0) {
00648           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
00649                      &meanOverProcsTimings[0]);
00650           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
00651                      &meanCallCounts[0]);
00652         }
00653         // We don't have to undo the scaling for the mean timings;
00654         // just divide by the scaled call count.
00655         for (int k = 0; k < numTimers; ++k) {
00656           meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
00657         }
00658       }
00659 
00660       // Reformat the data into the map of statistics.  Be sure that
00661       // each value (the std::vector of (timing, call count) pairs,
00662       // each entry of which is a different statistic) preserves the
00663       // order of statNames.
00664       statNames.resize (4);
00665       statNames[0] = "MinOverProcs";
00666       statNames[1] = "MeanOverProcs";
00667       statNames[2] = "MaxOverProcs";
00668       statNames[3] = "MeanOverCallCounts";
00669 
00670       stat_map_type::iterator statIter = statData.end();
00671       timer_map_t::const_iterator it = globalTimerData.begin();
00672       for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
00673         std::vector<std::pair<double, double> > curData (4);
00674         curData[0] = minTimingsAndCallCounts[k];
00675         curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
00676         curData[2] = maxTimingsAndCallCounts[k];
00677         curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
00678 
00679         // statIter gives an insertion location hint that makes each
00680         // insertion O(1), since we remember the location of the last
00681         // insertion.
00682         statIter = statData.insert (statIter, std::make_pair (it->first, curData));
00683       }
00684     }
00685 
00686 
00703     RCP<const Comm<int> >
00704     getDefaultComm ()
00705     {
00706       // The default communicator.  If Trilinos was built with MPI
00707       // enabled, this should be MPI_COMM_WORLD.  (If MPI has not yet
00708       // been initialized, it's not valid to use the communicator!)
00709       // Otherwise, this should be a "serial" (no MPI, one "process")
00710       // communicator.
00711       RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
00712 
00713 #ifdef HAVE_MPI
00714       {
00715         int mpiHasBeenStarted = 0;
00716         MPI_Initialized (&mpiHasBeenStarted);
00717         if (! mpiHasBeenStarted) {
00718           // Make pComm a new "serial communicator."
00719           comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
00720         }
00721       }
00722 #endif // HAVE_MPI
00723       return comm;
00724     }
00725 
00726   } // namespace (anonymous)
00727 
00728 
00729   void
00730   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00731                                              std::vector<std::string>& statNames,
00732                                              Ptr<const Comm<int> > comm,
00733                                              const ECounterSetOp setOp)
00734   {
00735     // Collect local timer data and names.  Filter out timers with
00736     // zero call counts if writeZeroTimers is false.
00737     timer_map_t localTimerData;
00738     Array<std::string> localTimerNames;
00739     const bool writeZeroTimers = false;
00740     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00741                                    counters(), writeZeroTimers);
00742     // Merge the local timer data and names into global timer data and
00743     // names.
00744     timer_map_t globalTimerData;
00745     Array<std::string> globalTimerNames;
00746     const bool alwaysWriteLocal = false;
00747     collectGlobalTimerData (globalTimerData, globalTimerNames,
00748                             localTimerData, localTimerNames,
00749                             comm, alwaysWriteLocal, setOp);
00750     // Compute statistics on the data.
00751     computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00752   }
00753 
00754 
00755   void
00756   TimeMonitor::summarize (Ptr<const Comm<int> > comm,
00757                           std::ostream& out,
00758                           const bool alwaysWriteLocal,
00759                           const bool writeGlobalStats,
00760                           const bool writeZeroTimers,
00761                           const ECounterSetOp setOp)
00762   {
00763     //
00764     // We can't just call computeGlobalTimerStatistics(), since
00765     // summarize() has different options that affect whether global
00766     // statistics are computed and printed.
00767     //
00768     const int numProcs = comm->getSize();
00769     const int myRank = comm->getRank();
00770 
00771     // Collect local timer data and names.  Filter out timers with
00772     // zero call counts if writeZeroTimers is false.
00773     timer_map_t localTimerData;
00774     Array<std::string> localTimerNames;
00775     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00776                                    counters(), writeZeroTimers);
00777 
00778     // If we're computing global statistics, merge the local timer
00779     // data and names into global timer data and names, and compute
00780     // global timer statistics.  Otherwise, leave the global data
00781     // empty.
00782     timer_map_t globalTimerData;
00783     Array<std::string> globalTimerNames;
00784     stat_map_type statData;
00785     std::vector<std::string> statNames;
00786     if (writeGlobalStats) {
00787       collectGlobalTimerData (globalTimerData, globalTimerNames,
00788                               localTimerData, localTimerNames,
00789                               comm, alwaysWriteLocal, setOp);
00790       // Compute statistics on the data, but only if the communicator
00791       // contains more than one process.  Otherwise, statistics don't
00792       // make sense and we don't print them (see below).
00793       if (numProcs > 1) {
00794         computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00795       }
00796     }
00797 
00798     // Precision of floating-point numbers in the table.
00799     const int precision = format().precision();
00800 
00801     // All columns of the table, in order.
00802     Array<TableColumn> tableColumns;
00803 
00804     // Labels of all the columns of the table.
00805     // We will append to this when we add each column.
00806     Array<std::string> titles;
00807 
00808     // Widths (in number of characters) of each column.
00809     // We will append to this when we add each column.
00810     Array<int> columnWidths;
00811 
00812     // Table column containing all timer names.  If writeGlobalStats
00813     // is true, we use the global timer names, otherwise we use the
00814     // local timer names.  We build the table on all processes
00815     // redundantly, but only print on Rank 0.
00816     {
00817       titles.append ("Timer Name");
00818 
00819       // The column labels depend on whether we are computing global statistics.
00820       TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
00821       tableColumns.append (nameCol);
00822 
00823       // Each column is as wide as it needs to be to hold both its
00824       // title and all of the column data.  This column's title is the
00825       // current last entry of the titles array.
00826       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
00827     }
00828 
00829     // Table column containing local timer stats, if applicable.  We
00830     // only write local stats if asked, only on MPI Proc 0, and only
00831     // if there is more than one MPI process in the communicator
00832     // (otherwise local stats == global stats, so we just print the
00833     // global stats).  In this case, we've padded the local data on
00834     // Proc 0 if necessary to match the global timer list, so that the
00835     // columns have the same number of rows.
00836     if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
00837       titles.append ("Local time (num calls)");
00838 
00839       // Copy local timer data out of the array-of-structs into
00840       // separate arrays, for display in the table.
00841       Array<double> localTimings;
00842       Array<double> localNumCalls;
00843       for (timer_map_t::const_iterator it = localTimerData.begin();
00844            it != localTimerData.end(); ++it) {
00845         localTimings.push_back (it->second.first);
00846         localNumCalls.push_back (static_cast<double> (it->second.second));
00847       }
00848       TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
00849       tableColumns.append (timeAndCalls);
00850       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00851     }
00852 
00853     if (writeGlobalStats) {
00854       // If there's only 1 process in the communicator, don't display
00855       // statistics; statistics don't make sense in that case.  Just
00856       // display the timings and call counts.  If there's more than 1
00857       // process, do display statistics.
00858       if (numProcs == 1) {
00859         // Extract timings and the call counts from globalTimerData.
00860         Array<double> globalTimings;
00861         Array<double> globalNumCalls;
00862         for (timer_map_t::const_iterator it = globalTimerData.begin();
00863              it != globalTimerData.end(); ++it) {
00864           globalTimings.push_back (it->second.first);
00865           globalNumCalls.push_back (static_cast<double> (it->second.second));
00866         }
00867         // Print the table column.
00868         titles.append ("Global time (num calls)");
00869         TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
00870         tableColumns.append (timeAndCalls);
00871         columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00872       }
00873       else { // numProcs > 1
00874         // Print a table column for each statistic.  statNames and
00875         // each value in statData use the same ordering, so we can
00876         // iterate over valid indices of statNames to display the
00877         // statistics in the right order.
00878         const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
00879         for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
00880           // Extract lists of timings and their call counts for the
00881           // current statistic.
00882           Array<double> statTimings (numGlobalTimers);
00883           Array<double> statCallCounts (numGlobalTimers);
00884           stat_map_type::const_iterator it = statData.begin();
00885           for (int k = 0; it != statData.end(); ++it, ++k) {
00886             statTimings[k] = (it->second[statInd]).first;
00887             statCallCounts[k] = (it->second[statInd]).second;
00888           }
00889           // Print the table column.
00890           const std::string& statisticName = statNames[statInd];
00891           const std::string titleString = statisticName;
00892           titles.append (titleString);
00893           TableColumn timeAndCalls (statTimings, statCallCounts, precision, true);
00894           tableColumns.append (timeAndCalls);
00895           columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00896         }
00897       }
00898     }
00899 
00900     // Print the whole table to the given output stream on MPI Rank 0.
00901     format().setColumnWidths (columnWidths);
00902     if (myRank == 0) {
00903       std::ostringstream theTitle;
00904       theTitle << "TimeMonitor results over " << numProcs << " processor"
00905                << (numProcs > 1 ? "s" : "");
00906       format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
00907     }
00908   }
00909 
00910   void
00911   TimeMonitor::summarize (std::ostream &out,
00912                           const bool alwaysWriteLocal,
00913                           const bool writeGlobalStats,
00914                           const bool writeZeroTimers,
00915                           const ECounterSetOp setOp)
00916   {
00917     // The default communicator.  If Trilinos was built with MPI
00918     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00919     // be a "serial" (no MPI, one "process") communicator.
00920     RCP<const Comm<int> > comm = getDefaultComm();
00921 
00922     summarize (comm.ptr(), out, alwaysWriteLocal,
00923                writeGlobalStats, writeZeroTimers, setOp);
00924   }
00925 
00926   void
00927   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00928                                              std::vector<std::string>& statNames,
00929                                              const ECounterSetOp setOp)
00930   {
00931     // The default communicator.  If Trilinos was built with MPI
00932     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00933     // be a "serial" (no MPI, one "process") communicator.
00934     RCP<const Comm<int> > comm = getDefaultComm();
00935 
00936     computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp);
00937   }
00938 
00939 
00940   void TimeMonitor::
00941   summarizeToYaml (Ptr<const Comm<int> > comm, std::ostream &out)
00942   {
00943 #ifdef HAVE_TEUCHOS_YAML_CPP
00944     // const bool writeGlobalStats = true;
00945     // const bool writeZeroTimers = true;
00946     // const bool alwaysWriteLocal = false;
00947     const ECounterSetOp setOp = Intersection;
00948 
00949     stat_map_type statData;
00950     std::vector<std::string> statNames;
00951     computeGlobalTimerStatistics (statData, statNames, setOp);
00952 
00953     const int numProcs = comm->getSize();
00954     const int myRank = comm->getRank();
00955 
00956     if (myRank == 0) {
00957       YAML::Emitter emi;
00958       emi << YAML::BeginDoc; // Begin YAML output
00959       emi << "Teuchos::TimeMonitor timing results";
00960       emi << YAML::BeginMap // Begin timing results map
00961           << YAML::Key << "Number of processes"
00962           << YAML::Value << numProcs
00963           << YAML::Key << "Global timer statistics"
00964           << YAML::Value;
00965       // For each timer name, print all its statistics.
00966       emi << YAML::BeginMap; // Begin timer names
00967       for (stat_map_type::const_iterator statDataIter = statData.begin();
00968            statDataIter != statData.end(); ++statDataIter) {
00969         // Key: Timer's name
00970         emi << YAML::Key << statDataIter->first;
00971         // Value: The timer's statistics, as a map.
00972         emi << YAML::Value << YAML::BeginMap; // Begin current timer's statistics
00973         for (std::vector<std::string>::size_type statInd = 0;
00974              statInd < statNames.size (); ++statInd) {
00975           // Key: current statistic's name.
00976           emi << YAML::Key << statNames[statInd]
00977               << YAML::Value;
00978           // Value is a map: "Time (s)" => time in seconds for current
00979           // statistic, "Call count" => call count for current statistic.
00980           const double curTime = (statDataIter->second)[statInd].first;
00981           const double curCallCount = (statDataIter->second)[statInd].second;
00982           emi << YAML::BeginMap
00983               << YAML::Key << "Time (s)"
00984               << YAML::Value << curTime
00985               << YAML::Key << "Call count"
00986               << YAML::Value << curCallCount
00987               << YAML::EndMap;
00988         }
00989         emi << YAML::EndMap; // End current timer's statistics
00990       }
00991       emi << YAML::EndMap; // End timer names
00992       emi << YAML::EndMap; // End timing results map
00993       emi << YAML::EndDoc; // End YAML output
00994 
00995       // Write YAML output to the given output stream.
00996       out << emi.c_str ();
00997     }
00998 #else  // Don't HAVE_TEUCHOS_YAML_CPP
00999     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Teuchos::TimeMonitor: YAML output currently requires building Trilinos with the yaml-cpp library.  Please download and install yaml-cpp from http://code.google.com/p/yaml-cpp/.  Then, enable yaml-cpp support when building Trilinos: 1. Set the CMake Boolean option TPL_ENABLE_yaml-cpp to ON.  2. Set the CMake option yaml-cpp_INCLUDE_DIRS to the path of the yaml-cpp include files (not including the yaml-cpp directory in include/).  3. Set the CMake option yaml-cpp_LIBRARY_DIRS to the location of the yaml-cpp library.  4. Clear the CMake cache if necesssary.  5. Run CMake again and rebuild Trilinos.");
01000 #endif // HAVE_TEUCHOS_YAML_CPP
01001   }
01002 
01003   void TimeMonitor::
01004   summarizeToYaml (std::ostream &out)
01005   {
01006     // The default communicator.  If Trilinos was built with MPI
01007     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
01008     // be a "serial" (no MPI, one "process") communicator.
01009     RCP<const Comm<int> > comm = getDefaultComm ();
01010 
01011     summarizeToYaml (comm.ptr (), out);
01012   }
01013 
01014   // Default value is false.  We'll set to true once
01015   // setReportParameters() completes successfully.
01016   bool TimeMonitor::setParams_ = false;
01017 
01018   // We have to declare all of these here in order to avoid linker errors.
01019   TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ =
01020              TimeMonitor::REPORT_FORMAT_TABLE;
01021   ECounterSetOp TimeMonitor::setOp_ = Intersection;
01022   bool TimeMonitor::alwaysWriteLocal_ = false;
01023   bool TimeMonitor::writeGlobalStats_ = true;
01024   bool TimeMonitor::writeZeroTimers_ = true;
01025 
01026   void
01027   TimeMonitor::setReportFormatParameter (ParameterList& plist)
01028   {
01029     const std::string name ("Report format");
01030     const std::string defaultValue ("Table");
01031     const std::string docString ("Output format for report of timer statistics");
01032     Array<std::string> strings;
01033     Array<std::string> docs;
01034     Array<ETimeMonitorReportFormat> values;
01035 
01036     strings.push_back ("YAML");
01037     docs.push_back ("YAML (see yaml.org) format");
01038     values.push_back (REPORT_FORMAT_YAML);
01039     strings.push_back ("Table");
01040     docs.push_back ("Tabular format via Teuchos::TableFormat");
01041     values.push_back (REPORT_FORMAT_TABLE);
01042 
01043     setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
01044                                                             docString,
01045                                                             strings (), docs (),
01046                                                             values (), &plist);
01047   }
01048 
01049   void
01050   TimeMonitor::setSetOpParameter (ParameterList& plist)
01051   {
01052     const std::string name ("How to merge timer sets");
01053     const std::string defaultValue ("Intersection");
01054     const std::string docString ("How to merge differing sets of timers "
01055                                  "across processes");
01056     Array<std::string> strings;
01057     Array<std::string> docs;
01058     Array<ECounterSetOp> values;
01059 
01060     strings.push_back ("Intersection");
01061     docs.push_back ("Compute intersection of timer sets over processes");
01062     values.push_back (Intersection);
01063     strings.push_back ("Union");
01064     docs.push_back ("Compute union of timer sets over processes");
01065     values.push_back (Union);
01066 
01067     setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
01068                                                  strings (), docs (), values (),
01069                                                  &plist);
01070   }
01071 
01072   RCP<const ParameterList>
01073   TimeMonitor::getValidReportParameters ()
01074   {
01075     // Our implementation favors recomputation over persistent
01076     // storage.  That is, we simply recreate the list every time we
01077     // need it.
01078     RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
01079 
01080     const bool alwaysWriteLocal = false;
01081     const bool writeGlobalStats = true;
01082     const bool writeZeroTimers = true;
01083 
01084     setReportFormatParameter (*plist);
01085     setSetOpParameter (*plist);
01086     plist->set ("alwaysWriteLocal", alwaysWriteLocal,
01087                 "Always output local timers' values on Proc 0");
01088     plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
01089                 "statistics, even if there is only one process in the "
01090                 "communicator");
01091     plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
01092                 "timers that have never been called");
01093     return rcp_const_cast<const ParameterList> (plist);
01094   }
01095 
01096   void
01097   TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
01098   {
01099     ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
01100     ECounterSetOp setOp = Intersection;
01101     bool alwaysWriteLocal = false;
01102     bool writeGlobalStats = true;
01103     bool writeZeroTimers = true;
01104 
01105     if (params.is_null ()) {
01106       // If we've set parameters before, leave their current values.
01107       // Otherwise, set defaults (below).
01108       if (setParams_) {
01109         return;
01110       }
01111     }
01112     else { // params is nonnull.  Let's read it!
01113       params->validateParametersAndSetDefaults (*getValidReportParameters ());
01114 
01115       reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
01116       setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
01117       alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
01118       writeGlobalStats = params->get<bool> ("writeGlobalStats");
01119       writeZeroTimers = params->get<bool> ("writeZeroTimers");
01120     }
01121     // Defer setting state until here, to ensure the strong exception
01122     // guarantee for this method (either it throws with no externally
01123     // visible state changes, or it returns normally).
01124     reportFormat_ = reportFormat;
01125     setOp_ = setOp;
01126     alwaysWriteLocal_ = alwaysWriteLocal;
01127     writeGlobalStats_ = writeGlobalStats;
01128     writeZeroTimers_ = writeZeroTimers;
01129 
01130     setParams_ = true; // Yay, we successfully set parameters!
01131   }
01132 
01133   void
01134   TimeMonitor::report (Ptr<const Comm<int> > comm,
01135                        std::ostream& out,
01136                        const RCP<ParameterList>& params)
01137   {
01138     setReportParameters (params);
01139 
01140     if (reportFormat_ == REPORT_FORMAT_YAML) {
01141       summarizeToYaml (comm, out);
01142     }
01143     else if (reportFormat_ == REPORT_FORMAT_TABLE) {
01144       summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
01145                  writeZeroTimers_, setOp_);
01146     }
01147     else {
01148       TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
01149         "Invalid report format.  This should never happen; ParameterList "
01150         "validation should have caught this.  Please report this bug to the "
01151         "Teuchos developers.");
01152     }
01153   }
01154 
01155   void
01156   TimeMonitor::report (std::ostream& out,
01157                        const RCP<ParameterList>& params)
01158   {
01159     RCP<const Comm<int> > comm = getDefaultComm ();
01160     report (comm.ptr (), out, params);
01161   }
01162 
01163 } // namespace Teuchos
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines