Teuchos Package Browser (Single Doxygen Collection) Version of the Day
Teuchos_TimeMonitor.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_TimeMonitor.hpp"
00043 #include "Teuchos_CommHelpers.hpp"
00044 #include "Teuchos_DefaultComm.hpp"
00045 #include "Teuchos_TableColumn.hpp"
00046 #include "Teuchos_TableFormat.hpp"
00047 #include "Teuchos_StandardParameterEntryValidators.hpp"
00048 #include "Teuchos_ScalarTraits.hpp"
00049 #include <functional>
00050 
00051 
00052 namespace Teuchos {
00105   template<class Ordinal, class ScalarType, class IndexType>
00106   class MaxLoc :
00107     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00108   public:
00109     void
00110     reduce (const Ordinal count,
00111             const std::pair<ScalarType, IndexType> inBuffer[],
00112             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00113   };
00114 
00115   template<class Ordinal>
00116   class MaxLoc<Ordinal, double, int> :
00117     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00118   public:
00119     void
00120     reduce (const Ordinal count,
00121             const std::pair<double, int> inBuffer[],
00122             std::pair<double, int> inoutBuffer[]) const
00123     {
00124       for (Ordinal ind = 0; ind < count; ++ind) {
00125         const std::pair<double, int>& in = inBuffer[ind];
00126         std::pair<double, int>& inout = inoutBuffer[ind];
00127 
00128         if (in.first > inout.first) {
00129           inout.first = in.first;
00130           inout.second = in.second;
00131         } else if (in.first < inout.first) {
00132           // Don't need to do anything; inout has the values.
00133         } else { // equal, or at least one is NaN.
00134           inout.first = in.first;
00135           inout.second = std::min (in.second, inout.second);
00136         }
00137       }
00138     }
00139   };
00140 
00167   template<class Ordinal, class ScalarType, class IndexType>
00168   class MinLoc :
00169     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00170   public:
00171     void
00172     reduce (const Ordinal count,
00173             const std::pair<ScalarType, IndexType> inBuffer[],
00174             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00175   };
00176 
00177   template<class Ordinal>
00178   class MinLoc<Ordinal, double, int> :
00179     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00180   public:
00181     void
00182     reduce (const Ordinal count,
00183             const std::pair<double, int> inBuffer[],
00184             std::pair<double, int> inoutBuffer[]) const
00185     {
00186       for (Ordinal ind = 0; ind < count; ++ind) {
00187         const std::pair<double, int>& in = inBuffer[ind];
00188         std::pair<double, int>& inout = inoutBuffer[ind];
00189 
00190         if (in.first < inout.first) {
00191           inout.first = in.first;
00192           inout.second = in.second;
00193         } else if (in.first > inout.first) {
00194           // Don't need to do anything; inout has the values.
00195         } else { // equal, or at least one is NaN.
00196           inout.first = in.first;
00197           inout.second = std::min (in.second, inout.second);
00198         }
00199       }
00200     }
00201   };
00202 
00206   template<class Ordinal, class ScalarType, class IndexType>
00207   class MinLocNonzero :
00208     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00209   public:
00210     void
00211     reduce (const Ordinal count,
00212             const std::pair<ScalarType, IndexType> inBuffer[],
00213             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00214   };
00215 
00216   template<class Ordinal>
00217   class MinLocNonzero<Ordinal, double, int> :
00218     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00219   public:
00220     void
00221     reduce (const Ordinal count,
00222             const std::pair<double, int> inBuffer[],
00223             std::pair<double, int> inoutBuffer[]) const
00224     {
00225       for (Ordinal ind = 0; ind < count; ++ind) {
00226         const std::pair<double, int>& in = inBuffer[ind];
00227         std::pair<double, int>& inout = inoutBuffer[ind];
00228 
00229         if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
00230           inout.first = in.first;
00231           inout.second = in.second;
00232         } else if (in.first > inout.first) {
00233           // Don't need to do anything; inout has the values.
00234         } else { // equal, or at least one is NaN.
00235           inout.first = in.first;
00236           inout.second = std::min (in.second, inout.second);
00237         }
00238       }
00239     }
00240   };
00241 
00242   // Typedef used internally by TimeMonitor::summarize() and its
00243   // helper functions.  The map is keyed on timer label (a string).
00244   // Each value is a pair: (total number of seconds over all calls to
00245   // that timer, total number of calls to that timer).
00246   typedef std::map<std::string, std::pair<double, int> > timer_map_t;
00247 
00248   TimeMonitor::TimeMonitor (Time& timer, bool reset)
00249     : PerformanceMonitorBase<Time>(timer, reset)
00250   {
00251     if (!isRecursiveCall()) counter().start(reset);
00252   }
00253 
00254   TimeMonitor::~TimeMonitor() {
00255     if (!isRecursiveCall()) counter().stop();
00256   }
00257 
00258   void
00259   TimeMonitor::disableTimer (const std::string& name)
00260   {
00261     RCP<Time> timer = lookupCounter (name);
00262     TEUCHOS_TEST_FOR_EXCEPTION(
00263       timer == null, std::invalid_argument,
00264       "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
00265     timer->disable ();
00266   }
00267 
00268   void
00269   TimeMonitor::enableTimer (const std::string& name)
00270   {
00271     RCP<Time> timer = lookupCounter (name);
00272     TEUCHOS_TEST_FOR_EXCEPTION(
00273       timer == null, std::invalid_argument,
00274       "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
00275     timer->enable ();
00276   }
00277 
00278   void
00279   TimeMonitor::zeroOutTimers()
00280   {
00281     typedef std::map<std::string, RCP<Time> > map_type;
00282     typedef map_type::iterator iter_type;
00283     map_type& ctrs = counters ();
00284 
00285     // In debug mode, loop first to check whether any of the timers
00286     // are running, before resetting them.  This ensures that this
00287     // method satisfies the strong exception guarantee (either it
00288     // completes normally, or there are no side effects).
00289 #ifdef TEUCHOS_DEBUG
00290     for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
00291       // We throw a runtime_error rather than a logic_error, because
00292       // logic_error suggests a bug in the implementation of
00293       // TimeMonitor.  Calling zeroOutTimers() when a timer is running
00294       // is not TimeMonitor's fault.
00295       TEUCHOS_TEST_FOR_EXCEPTION(
00296         it->second->isRunning (), std::runtime_error,
00297         "Timer \"" << it->second->name () << "\" is currently running.  "
00298         "You are not allowed to reset running timers.");
00299     }
00300 #endif // TEUCHOS_DEBUG
00301 
00302     for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
00303       it->second->reset ();
00304     }
00305   }
00306 
00307   // An anonymous namespace is the standard way of limiting linkage of
00308   // its contained routines to file scope.
00309   namespace {
00310     // \brief Return an "empty" local timer datum.
00311     //
00312     // "Empty" means the datum has zero elapsed time and zero call
00313     // count.  This function does not actually create a timer.
00314     //
00315     // \param name The timer's name.
00316     std::pair<std::string, std::pair<double, int> >
00317     makeEmptyTimerDatum (const std::string& name)
00318     {
00319       return std::make_pair (name, std::make_pair (double(0), int(0)));
00320     }
00321 
00322     // \fn collectLocalTimerData
00323     // \brief Collect and sort local timer data by timer names.
00324     //
00325     // \param localData [out] Map whose keys are the timer names, and
00326     //   whose value for each key is the total elapsed time (in
00327     //   seconds) and the call count for the timer with that name.
00328     //
00329     // \param localCounters [in] Timers from which to extract data.
00330     //
00331     // \param filter [in] Filter for timer labels.  If filter is not
00332     //   empty, this method will only collect data for local timers
00333     //   whose labels begin with this string.
00334     //
00335     // Extract the total elapsed time and call count from each timer
00336     // in the given array.  Merge results for timers with duplicate
00337     // labels, by summing their total elapsed times and call counts
00338     // pairwise.
00339     void
00340     collectLocalTimerData (timer_map_t& localData,
00341                            const std::map<std::string, RCP<Time> >& localCounters,
00342                            const std::string& filter="")
00343     {
00344       using std::make_pair;
00345       typedef timer_map_t::iterator iter_t;
00346 
00347       timer_map_t theLocalData;
00348       for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
00349            it != localCounters.end(); ++it) {
00350         const std::string& name = it->second->name ();
00351 
00352         // Filter current timer name, if provided filter is nonempty.
00353         // Filter string must _start_ the timer label, not just be in it.
00354         const bool skipThisOne = (filter != "" && name.find (filter) != 0);
00355         if (! skipThisOne) {
00356           const double timing = it->second->totalElapsedTime ();
00357           const int numCalls = it->second->numCalls ();
00358 
00359           // Merge timers with duplicate labels, by summing their
00360           // total elapsed times and call counts.
00361           iter_t loc = theLocalData.find (name);
00362           if (loc == theLocalData.end()) {
00363             // Use loc as an insertion location hint.
00364             theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
00365           }
00366           else {
00367             loc->second.first += timing;
00368             loc->second.second += numCalls;
00369           }
00370         }
00371       }
00372       // This avoids copying the map, and also makes this method
00373       // satisfy the strong exception guarantee.
00374       localData.swap (theLocalData);
00375     }
00376 
00377     // \brief Locally filter out timer data with zero call counts.
00378     //
00379     // \param timerData [in/out]
00380     void
00381     filterZeroData (timer_map_t& timerData)
00382     {
00383       // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
00384       // iterator hint, instead of rebuilding the map completely.
00385       timer_map_t newTimerData;
00386       for (timer_map_t::const_iterator it = timerData.begin();
00387            it != timerData.end(); ++it) {
00388         if (it->second.second > 0) {
00389           newTimerData[it->first] = it->second;
00390         }
00391       }
00392       timerData.swap (newTimerData);
00393     }
00394 
00416     void
00417     collectLocalTimerDataAndNames (timer_map_t& localTimerData,
00418                                    Array<std::string>& localTimerNames,
00419                                    const std::map<std::string, RCP<Time> >& localTimers,
00420                                    const bool writeZeroTimers,
00421                                    const std::string& filter="")
00422     {
00423       // Collect and sort local timer data by timer names.
00424       collectLocalTimerData (localTimerData, localTimers, filter);
00425 
00426       // Filter out zero data locally first.  This ensures that if we
00427       // are writing global stats, and if a timer name exists in the
00428       // set of global names, then that timer has a nonzero call count
00429       // on at least one MPI process.
00430       if (! writeZeroTimers) {
00431         filterZeroData (localTimerData);
00432       }
00433 
00434       // Extract the set of local timer names.  The std::map keeps
00435       // them sorted alphabetically.
00436       localTimerNames.reserve (localTimerData.size());
00437       for (timer_map_t::const_iterator it = localTimerData.begin();
00438            it != localTimerData.end(); ++it) {
00439         localTimerNames.push_back (it->first);
00440       }
00441     }
00442 
00477     void
00478     collectGlobalTimerData (timer_map_t& globalTimerData,
00479                             Array<std::string>& globalTimerNames,
00480                             timer_map_t& localTimerData,
00481                             Array<std::string>& localTimerNames,
00482                             Ptr<const Comm<int> > comm,
00483                             const bool alwaysWriteLocal,
00484                             const ECounterSetOp setOp)
00485     {
00486       // There may be some global timers that are not local timers on
00487       // the calling MPI process(es).  In that case, if
00488       // alwaysWriteLocal is true, then we need to fill in the
00489       // "missing" local timers.  That will ensure that both global
00490       // and local timer columns in the output table have the same
00491       // number of rows.  The collectLocalTimerDataAndNames() method
00492       // may have already filtered out local timers with zero call
00493       // counts (if its writeZeroTimers argument was false), but we
00494       // won't be filtering again.  Thus, any local timer data we
00495       // insert here won't get filtered out.
00496       //
00497       // Note that calling summarize() with writeZeroTimers == false
00498       // will still do what it says, even if we insert local timers
00499       // with zero call counts here.
00500 
00501       // This does the correct and inexpensive thing (just copies the
00502       // timer data) if numProcs == 1.  Otherwise, it initiates a
00503       // communication with \f$O(\log P)\f$ messages along the
00504       // critical path, where \f$P\f$ is the number of participating
00505       // processes.
00506       mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
00507 
00508 #ifdef TEUCHOS_DEBUG
00509       {
00510         // Sanity check that all processes have the name number of
00511         // global timer names.
00512         const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
00513         timer_map_t::size_type minNumGlobalNames = 0;
00514         timer_map_t::size_type maxNumGlobalNames = 0;
00515         reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
00516                    outArg (minNumGlobalNames));
00517         reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
00518                    outArg (maxNumGlobalNames));
00519         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
00520           std::logic_error, "Min # global timer names = " << minNumGlobalNames
00521           << " != max # global timer names = " << maxNumGlobalNames
00522           << ".  Please report this bug to the Teuchos developers.");
00523         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
00524           std::logic_error, "My # global timer names = " << myNumGlobalNames
00525           << " != min # global timer names = " << minNumGlobalNames
00526           << ".  Please report this bug to the Teuchos developers.");
00527       }
00528 #endif // TEUCHOS_DEBUG
00529 
00530       // mergeCounterNames() just merges the counters' names, not
00531       // their actual data.  Now we need to fill globalTimerData with
00532       // this process' timer data for the timers in globalTimerNames.
00533       //
00534       // All processes need the full list of global timers, since
00535       // there may be some global timers that are not local timers.
00536       // That's why mergeCounterNames() has to be an all-reduce, not
00537       // just a reduction to Proc 0.
00538       //
00539       // Insertion optimization: if the iterator given to map::insert
00540       // points right before where we want to insert, insertion is
00541       // O(1).  globalTimerNames is sorted, so feeding the iterator
00542       // output of map::insert into the next invocation's input should
00543       // make the whole insertion O(N) where N is the number of
00544       // entries in globalTimerNames.
00545       timer_map_t::iterator globalMapIter = globalTimerData.begin();
00546       timer_map_t::iterator localMapIter;
00547       for (Array<string>::const_iterator it = globalTimerNames.begin();
00548            it != globalTimerNames.end(); ++it) {
00549         const std::string& globalName = *it;
00550         localMapIter = localTimerData.find (globalName);
00551 
00552         if (localMapIter == localTimerData.end()) {
00553           if (alwaysWriteLocal) {
00554             // If there are some global timers that are not local
00555             // timers, and if we want to print local timers, we insert
00556             // a local timer datum with zero elapsed time and zero
00557             // call count into localTimerData as well.  This will
00558             // ensure that both global and local timer columns in the
00559             // output table have the same number of rows.
00560             //
00561             // We really only need to do this on Proc 0, which is the
00562             // only process that currently may print local timers.
00563             // However, we do it on all processes, just in case
00564             // someone later wants to modify this function to print
00565             // out local timer data for some process other than Proc
00566             // 0.  This extra computation won't affect the cost along
00567             // the critical path, for future computations in which
00568             // Proc 0 participates.
00569             localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
00570 
00571             // Make sure the missing global name gets added to the
00572             // list of local names.  We'll re-sort the list of local
00573             // names below.
00574             localTimerNames.push_back (globalName);
00575           }
00576           // There's a global timer that's not a local timer.  Add it
00577           // to our pre-merge version of the global timer data so that
00578           // we can safely merge the global timer data later.
00579           globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
00580         }
00581         else {
00582           // We have this global timer name in our local timer list.
00583           // Fill in our pre-merge version of the global timer data
00584           // with our local data.
00585           globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
00586         }
00587       }
00588 
00589       if (alwaysWriteLocal) {
00590         // Re-sort the list of local timer names, since we may have
00591         // inserted "missing" names above.
00592         std::sort (localTimerNames.begin(), localTimerNames.end());
00593       }
00594 
00595 #ifdef TEUCHOS_DEBUG
00596       {
00597         // Sanity check that all processes have the name number of
00598         // global timers.
00599         const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
00600         timer_map_t::size_type minNumGlobalTimers = 0;
00601         timer_map_t::size_type maxNumGlobalTimers = 0;
00602         reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
00603                    outArg (minNumGlobalTimers));
00604         reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
00605                    outArg (maxNumGlobalTimers));
00606         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
00607                                    std::logic_error, "Min # global timers = " << minNumGlobalTimers
00608                                    << " != max # global timers = " << maxNumGlobalTimers
00609                                    << ".  Please report this bug to the Teuchos developers.");
00610         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
00611                                    std::logic_error, "My # global timers = " << myNumGlobalTimers
00612                                    << " != min # global timers = " << minNumGlobalTimers
00613                                    << ".  Please report this bug to the Teuchos developers.");
00614       }
00615 #endif // TEUCHOS_DEBUG
00616     }
00617 
00664     void
00665     computeGlobalTimerStats (stat_map_type& statData,
00666                              std::vector<std::string>& statNames,
00667                              Ptr<const Comm<int> > comm,
00668                              const timer_map_t& globalTimerData,
00669                              const bool ignoreZeroTimers)
00670     {
00671       using Teuchos::ScalarTraits;
00672 
00673       const int numTimers = static_cast<int> (globalTimerData.size());
00674       const int numProcs = comm->getSize();
00675 
00676       // Extract pre-reduction timings and call counts into a
00677       // sequential array.  This array will be in the same order as
00678       // the global timer names are in the map.
00679       Array<std::pair<double, int> > timingsAndCallCounts;
00680       timingsAndCallCounts.reserve (numTimers);
00681       for (timer_map_t::const_iterator it = globalTimerData.begin();
00682            it != globalTimerData.end(); ++it) {
00683         timingsAndCallCounts.push_back (it->second);
00684       }
00685 
00686       // For each timer name, compute the min timing and its
00687       // corresponding call count.  If two processes have the same
00688       // timing but different call counts, the minimum call count will
00689       // be used.
00690       Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
00691       if (numTimers > 0) {
00692         if (ignoreZeroTimers)
00693           reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
00694                      &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
00695         else
00696           reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
00697                      &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
00698       }
00699 
00700       // For each timer name, compute the max timing and its
00701       // corresponding call count.  If two processes have the same
00702       // timing but different call counts, the minimum call count will
00703       // be used.
00704       Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
00705       if (numTimers > 0) {
00706         reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
00707                    &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
00708       }
00709 
00710       // For each timer name, compute the mean-over-processes timing,
00711       // the mean call count, and the mean-over-call-counts timing.
00712       // The mean call count is reported as a double to allow a
00713       // fractional value.
00714       //
00715       // Each local timing is really the total timing over all local
00716       // invocations.  The number of local invocations is the call
00717       // count.  Thus, the mean-over-call-counts timing is the sum of
00718       // all the timings (over all processes), divided by the sum of
00719       // all the call counts (over all processes).  We compute it in a
00720       // different way to over unnecessary overflow.
00721       Array<double> meanOverCallCountsTimings (numTimers);
00722       Array<double> meanOverProcsTimings (numTimers);
00723       Array<double> meanCallCounts (numTimers);
00724       Array<int>    ICallThisTimer (numTimers);
00725       Array<int>    numProcsCallingEachTimer (numTimers);
00726       {
00727         // Figure out how many processors actually call each timer.
00728         if (ignoreZeroTimers) {
00729           for (int k = 0; k < numTimers; ++k) {
00730             const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
00731             if (callCount > 0) ICallThisTimer[k] = 1;
00732             else               ICallThisTimer[k] = 0;
00733           }
00734           if (numTimers > 0) {
00735             reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
00736                        &numProcsCallingEachTimer[0]);
00737           }
00738         }
00739 
00740         // When summing, first scale by the number of processes.  This
00741         // avoids unnecessary overflow, and also gives us the mean
00742         // call count automatically.
00743         Array<double> scaledTimings (numTimers);
00744         Array<double> scaledCallCounts (numTimers);
00745         const double P = static_cast<double> (numProcs);
00746 
00747         if (ignoreZeroTimers) {
00748           for (int k = 0; k < numTimers; ++k) {
00749             const double timing = timingsAndCallCounts[k].first;
00750             const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
00751 
00752             scaledTimings[k] = timing / numProcsCallingEachTimer[k];
00753             scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
00754           }
00755         }
00756         else {
00757           for (int k = 0; k < numTimers; ++k) {
00758             const double timing = timingsAndCallCounts[k].first;
00759             const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
00760 
00761             scaledTimings[k] = timing / P;
00762             scaledCallCounts[k] = callCount / P;
00763           }
00764         }
00765 
00766         if (numTimers > 0) {
00767           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
00768                      &meanOverProcsTimings[0]);
00769           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
00770                      &meanCallCounts[0]);
00771         }
00772         // We don't have to undo the scaling for the mean timings;
00773         // just divide by the scaled call count.
00774         for (int k = 0; k < numTimers; ++k) {
00775           if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
00776             meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
00777           }
00778           else {
00779             meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
00780           }
00781         }
00782       }
00783 
00784       // Reformat the data into the map of statistics.  Be sure that
00785       // each value (the std::vector of (timing, call count) pairs,
00786       // each entry of which is a different statistic) preserves the
00787       // order of statNames.
00788       statNames.resize (4);
00789       statNames[0] = "MinOverProcs";
00790       statNames[1] = "MeanOverProcs";
00791       statNames[2] = "MaxOverProcs";
00792       statNames[3] = "MeanOverCallCounts";
00793 
00794       stat_map_type::iterator statIter = statData.end();
00795       timer_map_t::const_iterator it = globalTimerData.begin();
00796       for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
00797         std::vector<std::pair<double, double> > curData (4);
00798         curData[0] = minTimingsAndCallCounts[k];
00799         curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
00800         curData[2] = maxTimingsAndCallCounts[k];
00801         curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
00802 
00803         // statIter gives an insertion location hint that makes each
00804         // insertion O(1), since we remember the location of the last
00805         // insertion.
00806         statIter = statData.insert (statIter, std::make_pair (it->first, curData));
00807       }
00808     }
00809 
00810 
00827     RCP<const Comm<int> >
00828     getDefaultComm ()
00829     {
00830       // The default communicator.  If Trilinos was built with MPI
00831       // enabled, this should be MPI_COMM_WORLD.  (If MPI has not yet
00832       // been initialized, it's not valid to use the communicator!)
00833       // Otherwise, this should be a "serial" (no MPI, one "process")
00834       // communicator.
00835       RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
00836 
00837 #ifdef HAVE_MPI
00838       {
00839         int mpiHasBeenStarted = 0;
00840         MPI_Initialized (&mpiHasBeenStarted);
00841         if (! mpiHasBeenStarted) {
00842           // Make pComm a new "serial communicator."
00843           comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
00844         }
00845       }
00846 #endif // HAVE_MPI
00847       return comm;
00848     }
00849 
00850   } // namespace (anonymous)
00851 
00852 
00853   void
00854   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00855                                              std::vector<std::string>& statNames,
00856                                              Ptr<const Comm<int> > comm,
00857                                              const ECounterSetOp setOp,
00858                                              const std::string& filter)
00859   {
00860     // Collect local timer data and names.  Filter out timers with
00861     // zero call counts if writeZeroTimers is false.  Also, apply the
00862     // timer label filter at this point, so we don't have to compute
00863     // statistics on timers we don't want to display anyway.
00864     timer_map_t localTimerData;
00865     Array<std::string> localTimerNames;
00866     const bool writeZeroTimers = false;
00867     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00868                                    counters(), writeZeroTimers, filter);
00869     // Merge the local timer data and names into global timer data and
00870     // names.
00871     timer_map_t globalTimerData;
00872     Array<std::string> globalTimerNames;
00873     const bool alwaysWriteLocal = false;
00874     collectGlobalTimerData (globalTimerData, globalTimerNames,
00875                             localTimerData, localTimerNames,
00876                             comm, alwaysWriteLocal, setOp);
00877     // Compute statistics on the data.
00878     computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
00879   }
00880 
00881 
00882   void
00883   TimeMonitor::summarize (Ptr<const Comm<int> > comm,
00884                           std::ostream& out,
00885                           const bool alwaysWriteLocal,
00886                           const bool writeGlobalStats,
00887                           const bool writeZeroTimers,
00888                           const ECounterSetOp setOp,
00889                           const std::string& filter,
00890                           const bool ignoreZeroTimers)
00891   {
00892     //
00893     // We can't just call computeGlobalTimerStatistics(), since
00894     // summarize() has different options that affect whether global
00895     // statistics are computed and printed.
00896     //
00897     const int numProcs = comm->getSize();
00898     const int myRank = comm->getRank();
00899 
00900     // Collect local timer data and names.  Filter out timers with
00901     // zero call counts if writeZeroTimers is false.  Also, apply the
00902     // timer label filter at this point, so we don't have to compute
00903     // statistics on timers we don't want to display anyway.
00904     timer_map_t localTimerData;
00905     Array<std::string> localTimerNames;
00906     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00907                                    counters(), writeZeroTimers, filter);
00908 
00909     // If we're computing global statistics, merge the local timer
00910     // data and names into global timer data and names, and compute
00911     // global timer statistics.  Otherwise, leave the global data
00912     // empty.
00913     timer_map_t globalTimerData;
00914     Array<std::string> globalTimerNames;
00915     stat_map_type statData;
00916     std::vector<std::string> statNames;
00917     if (writeGlobalStats) {
00918       collectGlobalTimerData (globalTimerData, globalTimerNames,
00919                               localTimerData, localTimerNames,
00920                               comm, alwaysWriteLocal, setOp);
00921       // Compute statistics on the data, but only if the communicator
00922       // contains more than one process.  Otherwise, statistics don't
00923       // make sense and we don't print them (see below).
00924       if (numProcs > 1) {
00925         computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
00926       }
00927     }
00928 
00929     // Precision of floating-point numbers in the table.
00930     const int precision = format().precision();
00931 
00932     // All columns of the table, in order.
00933     Array<TableColumn> tableColumns;
00934 
00935     // Labels of all the columns of the table.
00936     // We will append to this when we add each column.
00937     Array<std::string> titles;
00938 
00939     // Widths (in number of characters) of each column.
00940     // We will append to this when we add each column.
00941     Array<int> columnWidths;
00942 
00943     // Table column containing all timer names.  If writeGlobalStats
00944     // is true, we use the global timer names, otherwise we use the
00945     // local timer names.  We build the table on all processes
00946     // redundantly, but only print on Rank 0.
00947     {
00948       titles.append ("Timer Name");
00949 
00950       // The column labels depend on whether we are computing global statistics.
00951       TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
00952       tableColumns.append (nameCol);
00953 
00954       // Each column is as wide as it needs to be to hold both its
00955       // title and all of the column data.  This column's title is the
00956       // current last entry of the titles array.
00957       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
00958     }
00959 
00960     // Table column containing local timer stats, if applicable.  We
00961     // only write local stats if asked, only on MPI Proc 0, and only
00962     // if there is more than one MPI process in the communicator
00963     // (otherwise local stats == global stats, so we just print the
00964     // global stats).  In this case, we've padded the local data on
00965     // Proc 0 if necessary to match the global timer list, so that the
00966     // columns have the same number of rows.
00967     if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
00968       titles.append ("Local time (num calls)");
00969 
00970       // Copy local timer data out of the array-of-structs into
00971       // separate arrays, for display in the table.
00972       Array<double> localTimings;
00973       Array<double> localNumCalls;
00974       for (timer_map_t::const_iterator it = localTimerData.begin();
00975            it != localTimerData.end(); ++it) {
00976         localTimings.push_back (it->second.first);
00977         localNumCalls.push_back (static_cast<double> (it->second.second));
00978       }
00979       TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
00980       tableColumns.append (timeAndCalls);
00981       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00982     }
00983 
00984     if (writeGlobalStats) {
00985       // If there's only 1 process in the communicator, don't display
00986       // statistics; statistics don't make sense in that case.  Just
00987       // display the timings and call counts.  If there's more than 1
00988       // process, do display statistics.
00989       if (numProcs == 1) {
00990         // Extract timings and the call counts from globalTimerData.
00991         Array<double> globalTimings;
00992         Array<double> globalNumCalls;
00993         for (timer_map_t::const_iterator it = globalTimerData.begin();
00994              it != globalTimerData.end(); ++it) {
00995           globalTimings.push_back (it->second.first);
00996           globalNumCalls.push_back (static_cast<double> (it->second.second));
00997         }
00998         // Print the table column.
00999         titles.append ("Global time (num calls)");
01000         TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
01001         tableColumns.append (timeAndCalls);
01002         columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
01003       }
01004       else { // numProcs > 1
01005         // Print a table column for each statistic.  statNames and
01006         // each value in statData use the same ordering, so we can
01007         // iterate over valid indices of statNames to display the
01008         // statistics in the right order.
01009         const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
01010         for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
01011           // Extract lists of timings and their call counts for the
01012           // current statistic.
01013           Array<double> statTimings (numGlobalTimers);
01014           Array<double> statCallCounts (numGlobalTimers);
01015           stat_map_type::const_iterator it = statData.begin();
01016           for (int k = 0; it != statData.end(); ++it, ++k) {
01017             statTimings[k] = (it->second[statInd]).first;
01018             statCallCounts[k] = (it->second[statInd]).second;
01019           }
01020           // Print the table column.
01021           const std::string& statisticName = statNames[statInd];
01022           const std::string titleString = statisticName;
01023           titles.append (titleString);
01024           TableColumn timeAndCalls (statTimings, statCallCounts, precision, true);
01025           tableColumns.append (timeAndCalls);
01026           columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
01027         }
01028       }
01029     }
01030 
01031     // Print the whole table to the given output stream on MPI Rank 0.
01032     format().setColumnWidths (columnWidths);
01033     if (myRank == 0) {
01034       std::ostringstream theTitle;
01035       theTitle << "TimeMonitor results over " << numProcs << " processor"
01036                << (numProcs > 1 ? "s" : "");
01037       format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
01038     }
01039   }
01040 
01041   void
01042   TimeMonitor::summarize (std::ostream &out,
01043                           const bool alwaysWriteLocal,
01044                           const bool writeGlobalStats,
01045                           const bool writeZeroTimers,
01046                           const ECounterSetOp setOp,
01047                           const std::string& filter,
01048                           const bool ignoreZeroTimers)
01049   {
01050     // The default communicator.  If Trilinos was built with MPI
01051     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
01052     // be a "serial" (no MPI, one "process") communicator.
01053     RCP<const Comm<int> > comm = getDefaultComm();
01054 
01055     summarize (comm.ptr(), out, alwaysWriteLocal,
01056                writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
01057   }
01058 
01059   void
01060   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
01061                                              std::vector<std::string>& statNames,
01062                                              const ECounterSetOp setOp,
01063                                              const std::string& filter)
01064   {
01065     // The default communicator.  If Trilinos was built with MPI
01066     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
01067     // be a "serial" (no MPI, one "process") communicator.
01068     RCP<const Comm<int> > comm = getDefaultComm();
01069 
01070     computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
01071   }
01072 
01073 
01074   namespace {
01098     std::string
01099     quoteLabelForYaml (const std::string& label)
01100     {
01101       // YAML allows empty keys in key: value pairs.  See Section 7.2
01102       // of the YAML 1.2 spec.  We thus let an empty label pass
01103       // through without quoting or other special treatment.
01104       if (label.empty ()) {
01105         return label;
01106       }
01107 
01108       // Check whether the label is already quoted.  If so, we don't
01109       // need to quote it again.  However, we do need to quote any
01110       // quote symbols in the string inside the outer quotes.
01111       const bool alreadyQuoted = label.size () >= 2 &&
01112         label[0] == '"' && label[label.size() - 1] == '"';
01113 
01114       // We need to quote if there are any colons or (inner) quotes in
01115       // the string.  We'll determine this as we read through the
01116       // string and escape any characters that need escaping.
01117       bool needToQuote = false;
01118 
01119       std::string out; // To fill with the return value
01120       out.reserve (label.size ());
01121 
01122       const size_t startPos = alreadyQuoted ? 1 : 0;
01123       const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
01124       for (size_t i = startPos; i < endPos; ++i) {
01125         const char c = label[i];
01126         if (c == '"' || c == '\\') {
01127           out.push_back ('\\'); // Escape the quote or backslash.
01128           needToQuote = true;
01129         }
01130         else if (c == ':') {
01131           needToQuote = true;
01132         }
01133         out.push_back (c);
01134       }
01135 
01136       if (needToQuote || alreadyQuoted) {
01137         // If the input string was already quoted, then out doesn't
01138         // include its quotes, so we have to add them back in.
01139         return "\"" + out + "\"";
01140       }
01141       else {
01142         return out;
01143       }
01144     }
01145 
01146   } // namespace (anonymous)
01147 
01148 
01149   void TimeMonitor::
01150   summarizeToYaml (Ptr<const Comm<int> > comm,
01151                    std::ostream &out,
01152                    const ETimeMonitorYamlFormat yamlStyle,
01153                    const std::string& filter)
01154   {
01155     using Teuchos::FancyOStream;
01156     using Teuchos::fancyOStream;
01157     using Teuchos::getFancyOStream;
01158     using Teuchos::OSTab;
01159     using Teuchos::RCP;
01160     using Teuchos::rcpFromRef;
01161     using std::endl;
01162     typedef std::vector<std::string>::size_type size_type;
01163 
01164     const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
01165 
01166     // const bool writeGlobalStats = true;
01167     // const bool writeZeroTimers = true;
01168     // const bool alwaysWriteLocal = false;
01169     const ECounterSetOp setOp = Intersection;
01170 
01171     stat_map_type statData;
01172     std::vector<std::string> statNames;
01173     computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
01174 
01175     const int numProcs = comm->getSize();
01176 
01177     // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
01178     // " as the line prefix does not work, else I would prefer that
01179     // method for printing each line of a YAML block sequence (see
01180     // Section 8.2.1 of the YAML 1.2 spec).
01181     //
01182     // Also, I have to set the tab indent string here, rather than in
01183     // OSTab's constructor.  This is because line prefix (which for
01184     // some reason is what OSTab's constructor takes, rather than tab
01185     // indent string) means something different from tab indent
01186     // string, and turning on the line prefix prints all sorts of
01187     // things including "|" for some reason.
01188     RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
01189     pfout->setTabIndentStr ("  ");
01190     FancyOStream& fout = *pfout;
01191 
01192     fout << "# Teuchos::TimeMonitor report" << endl
01193          << "---" << endl;
01194 
01195     // mfh 19 Aug 2012: An important goal of our chosen output format
01196     // was to minimize the nesting depth.  We have managed to keep the
01197     // nesting depth to 3, which is the limit that the current version
01198     // of PylotDB imposes for its YAML input.
01199 
01200     // Outermost level is a dictionary.  (Individual entries of a
01201     // dictionary do _not_ begin with "- ".)  We always print the
01202     // outermost level in standard style, not flow style, for better
01203     // readability.  We begin the outermost level with metadata.
01204     fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
01205          << "Number of processes: " << numProcs << endl
01206          << "Time unit: s" << endl;
01207     // For a key: value pair where the value is a sequence or
01208     // dictionary on the following line, YAML requires a space after
01209     // the colon.
01210     fout << "Statistics collected: ";
01211     // Print list of the names of all the statistics we collected.
01212     if (compact) {
01213       fout << " [";
01214       for (size_type i = 0; i < statNames.size (); ++i) {
01215         fout << quoteLabelForYaml (statNames[i]);
01216         if (i + 1 < statNames.size ()) {
01217           fout << ", ";
01218         }
01219       }
01220       fout << "]" << endl;
01221     }
01222     else {
01223       fout << endl;
01224       OSTab tab1 (pfout);
01225       for (size_type i = 0; i < statNames.size (); ++i) {
01226         fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
01227       }
01228     }
01229 
01230     // Print the list of timer names.
01231     //
01232     // It might be nicer instead to print a map from timer name to all
01233     // of its data, but keeping the maximum nesting depth small
01234     // ensures better compatibility with different parsing tools.
01235     fout << "Timer names: ";
01236     if (compact) {
01237       fout << " [";
01238       size_type ind = 0;
01239       for (stat_map_type::const_iterator it = statData.begin();
01240            it != statData.end(); ++it, ++ind) {
01241         fout << quoteLabelForYaml (it->first);
01242         if (ind + 1 < statData.size ()) {
01243           fout << ", ";
01244         }
01245       }
01246       fout << "]" << endl;
01247     }
01248     else {
01249       fout << endl;
01250       OSTab tab1 (pfout);
01251       for (stat_map_type::const_iterator it = statData.begin();
01252            it != statData.end(); ++it) {
01253         fout << "- " << quoteLabelForYaml (it->first) << endl;
01254       }
01255     }
01256 
01257     // Print times for each timer, as a map from statistic name to its time.
01258     fout << "Total times: ";
01259     if (compact) {
01260       fout << " {";
01261       size_type outerInd = 0;
01262       for (stat_map_type::const_iterator outerIter = statData.begin();
01263            outerIter != statData.end(); ++outerIter, ++outerInd) {
01264         // Print timer name.
01265         fout << quoteLabelForYaml (outerIter->first) << ": ";
01266         // Print that timer's data.
01267         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01268         fout << "{";
01269         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01270           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01271                << curData[innerInd].first;
01272           if (innerInd + 1 < curData.size ()) {
01273             fout << ", ";
01274           }
01275         }
01276         fout << "}";
01277         if (outerInd + 1 < statData.size ()) {
01278           fout << ", ";
01279         }
01280       }
01281       fout << "}" << endl;
01282     }
01283     else {
01284       fout << endl;
01285       OSTab tab1 (pfout);
01286       size_type outerInd = 0;
01287       for (stat_map_type::const_iterator outerIter = statData.begin();
01288            outerIter != statData.end(); ++outerIter, ++outerInd) {
01289         // Print timer name.
01290         fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
01291         // Print that timer's data.
01292         OSTab tab2 (pfout);
01293         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01294         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01295           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01296                << curData[innerInd].first << endl;
01297         }
01298       }
01299     }
01300 
01301     // Print call counts for each timer, for each statistic name.
01302     fout << "Call counts:";
01303     if (compact) {
01304       fout << " {";
01305       size_type outerInd = 0;
01306       for (stat_map_type::const_iterator outerIter = statData.begin();
01307            outerIter != statData.end(); ++outerIter, ++outerInd) {
01308         // Print timer name.
01309         fout << quoteLabelForYaml (outerIter->first) << ": ";
01310         // Print that timer's data.
01311         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01312         fout << "{";
01313         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01314           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01315                << curData[innerInd].second;
01316           if (innerInd + 1 < curData.size ()) {
01317             fout << ", ";
01318           }
01319         }
01320         fout << "}";
01321         if (outerInd + 1 < statData.size ()) {
01322           fout << ", ";
01323         }
01324       }
01325       fout << "}" << endl;
01326     }
01327     else {
01328       fout << endl;
01329       OSTab tab1 (pfout);
01330       size_type outerInd = 0;
01331       for (stat_map_type::const_iterator outerIter = statData.begin();
01332            outerIter != statData.end(); ++outerIter, ++outerInd) {
01333         // Print timer name.
01334         fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
01335         // Print that timer's data.
01336         OSTab tab2 (pfout);
01337         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01338         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01339           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01340                << curData[innerInd].second << endl;
01341         }
01342       }
01343     }
01344   }
01345 
01346   void TimeMonitor::
01347   summarizeToYaml (std::ostream &out,
01348                    const ETimeMonitorYamlFormat yamlStyle,
01349                    const std::string& filter)
01350   {
01351     // The default communicator.  If Trilinos was built with MPI
01352     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
01353     // be a "serial" (no MPI, one "process") communicator.
01354     RCP<const Comm<int> > comm = getDefaultComm ();
01355 
01356     summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
01357   }
01358 
01359   // Default value is false.  We'll set to true once
01360   // setReportParameters() completes successfully.
01361   bool TimeMonitor::setParams_ = false;
01362 
01363   // We have to declare all of these here in order to avoid linker errors.
01364   TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
01365   TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
01366   ECounterSetOp TimeMonitor::setOp_ = Intersection;
01367   bool TimeMonitor::alwaysWriteLocal_ = false;
01368   bool TimeMonitor::writeGlobalStats_ = true;
01369   bool TimeMonitor::writeZeroTimers_ = true;
01370 
01371   void
01372   TimeMonitor::setReportFormatParameter (ParameterList& plist)
01373   {
01374     const std::string name ("Report format");
01375     const std::string defaultValue ("Table");
01376     const std::string docString ("Output format for report of timer statistics");
01377     Array<std::string> strings;
01378     Array<std::string> docs;
01379     Array<ETimeMonitorReportFormat> values;
01380 
01381     strings.push_back ("YAML");
01382     docs.push_back ("YAML (see yaml.org) format");
01383     values.push_back (REPORT_FORMAT_YAML);
01384     strings.push_back ("Table");
01385     docs.push_back ("Tabular format via Teuchos::TableFormat");
01386     values.push_back (REPORT_FORMAT_TABLE);
01387 
01388     setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
01389                                                             docString,
01390                                                             strings (), docs (),
01391                                                             values (), &plist);
01392   }
01393 
01394   void
01395   TimeMonitor::setYamlFormatParameter (ParameterList& plist)
01396   {
01397     const std::string name ("YAML style");
01398     const std::string defaultValue ("spacious");
01399     const std::string docString ("YAML-specific output format");
01400     Array<std::string> strings;
01401     Array<std::string> docs;
01402     Array<ETimeMonitorYamlFormat> values;
01403 
01404     strings.push_back ("compact");
01405     docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
01406                     "yaml.org) for most sequences except the outermost sequence");
01407     values.push_back (YAML_FORMAT_COMPACT);
01408 
01409     strings.push_back ("spacious");
01410     docs.push_back ("Spacious format: avoid flow style");
01411     values.push_back (YAML_FORMAT_SPACIOUS);
01412 
01413     setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
01414                                                           docString,
01415                                                           strings (), docs (),
01416                                                           values (), &plist);
01417   }
01418 
01419   void
01420   TimeMonitor::setSetOpParameter (ParameterList& plist)
01421   {
01422     const std::string name ("How to merge timer sets");
01423     const std::string defaultValue ("Intersection");
01424     const std::string docString ("How to merge differing sets of timers "
01425                                  "across processes");
01426     Array<std::string> strings;
01427     Array<std::string> docs;
01428     Array<ECounterSetOp> values;
01429 
01430     strings.push_back ("Intersection");
01431     docs.push_back ("Compute intersection of timer sets over processes");
01432     values.push_back (Intersection);
01433     strings.push_back ("Union");
01434     docs.push_back ("Compute union of timer sets over processes");
01435     values.push_back (Union);
01436 
01437     setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
01438                                                  strings (), docs (), values (),
01439                                                  &plist);
01440   }
01441 
01442   RCP<const ParameterList>
01443   TimeMonitor::getValidReportParameters ()
01444   {
01445     // Our implementation favors recomputation over persistent
01446     // storage.  That is, we simply recreate the list every time we
01447     // need it.
01448     RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
01449 
01450     const bool alwaysWriteLocal = false;
01451     const bool writeGlobalStats = true;
01452     const bool writeZeroTimers = true;
01453 
01454     setReportFormatParameter (*plist);
01455     setYamlFormatParameter (*plist);
01456     setSetOpParameter (*plist);
01457     plist->set ("alwaysWriteLocal", alwaysWriteLocal,
01458                 "Always output local timers' values on Proc 0");
01459     plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
01460                 "statistics, even if there is only one process in the "
01461                 "communicator");
01462     plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
01463                 "timers that have never been called");
01464 
01465     return rcp_const_cast<const ParameterList> (plist);
01466   }
01467 
01468   void
01469   TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
01470   {
01471     ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
01472     ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
01473     ECounterSetOp setOp = Intersection;
01474     bool alwaysWriteLocal = false;
01475     bool writeGlobalStats = true;
01476     bool writeZeroTimers = true;
01477 
01478     if (params.is_null ()) {
01479       // If we've set parameters before, leave their current values.
01480       // Otherwise, set defaults (below).
01481       if (setParams_) {
01482         return;
01483       }
01484     }
01485     else { // params is nonnull.  Let's read it!
01486       params->validateParametersAndSetDefaults (*getValidReportParameters ());
01487 
01488       reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
01489       yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
01490       setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
01491       alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
01492       writeGlobalStats = params->get<bool> ("writeGlobalStats");
01493       writeZeroTimers = params->get<bool> ("writeZeroTimers");
01494     }
01495     // Defer setting state until here, to ensure the strong exception
01496     // guarantee for this method (either it throws with no externally
01497     // visible state changes, or it returns normally).
01498     reportFormat_ = reportFormat;
01499     yamlStyle_ = yamlStyle;
01500     setOp_ = setOp;
01501     alwaysWriteLocal_ = alwaysWriteLocal;
01502     writeGlobalStats_ = writeGlobalStats;
01503     writeZeroTimers_ = writeZeroTimers;
01504 
01505     setParams_ = true; // Yay, we successfully set parameters!
01506   }
01507 
01508   void
01509   TimeMonitor::report (Ptr<const Comm<int> > comm,
01510                        std::ostream& out,
01511                        const std::string& filter,
01512                        const RCP<ParameterList>& params)
01513   {
01514     setReportParameters (params);
01515 
01516     if (reportFormat_ == REPORT_FORMAT_YAML) {
01517       summarizeToYaml (comm, out, yamlStyle_, filter);
01518     }
01519     else if (reportFormat_ == REPORT_FORMAT_TABLE) {
01520       summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
01521                  writeZeroTimers_, setOp_, filter);
01522     }
01523     else {
01524       TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
01525         "Invalid report format.  This should never happen; ParameterList "
01526         "validation should have caught this.  Please report this bug to the "
01527         "Teuchos developers.");
01528     }
01529   }
01530 
01531   void
01532   TimeMonitor::report (Ptr<const Comm<int> > comm,
01533                        std::ostream& out,
01534                        const RCP<ParameterList>& params)
01535   {
01536     report (comm, out, "", params);
01537   }
01538 
01539   void
01540   TimeMonitor::report (std::ostream& out,
01541                        const std::string& filter,
01542                        const RCP<ParameterList>& params)
01543   {
01544     RCP<const Comm<int> > comm = getDefaultComm ();
01545     report (comm.ptr (), out, filter, params);
01546   }
01547 
01548   void
01549   TimeMonitor::report (std::ostream& out,
01550                        const RCP<ParameterList>& params)
01551   {
01552     RCP<const Comm<int> > comm = getDefaultComm ();
01553     report (comm.ptr (), out, "", params);
01554   }
01555 
01556 } // namespace Teuchos
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines