Teuchos - Trilinos Tools Package Version of the Day
Teuchos_TimeMonitor.cpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_TimeMonitor.hpp"
00043 #include "Teuchos_CommHelpers.hpp"
00044 #include "Teuchos_DefaultComm.hpp"
00045 #include "Teuchos_TableColumn.hpp"
00046 #include "Teuchos_TableFormat.hpp"
00047 #include <functional>
00048 
00049 
00050 namespace Teuchos {
00103   template<class Ordinal, class ScalarType, class IndexType>
00104   class MaxLoc :
00105     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00106   public:
00107     void
00108     reduce (const Ordinal count,
00109             const std::pair<ScalarType, IndexType> inBuffer[],
00110             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00111   };
00112 
00113   template<class Ordinal>
00114   class MaxLoc<Ordinal, double, int> :
00115     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00116   public:
00117     void
00118     reduce (const Ordinal count,
00119             const std::pair<double, int> inBuffer[],
00120             std::pair<double, int> inoutBuffer[]) const
00121     {
00122       for (Ordinal ind = 0; ind < count; ++ind) {
00123         const std::pair<double, int>& in = inBuffer[ind];
00124         std::pair<double, int>& inout = inoutBuffer[ind];
00125 
00126         if (in.first > inout.first) {
00127           inout.first = in.first;
00128           inout.second = in.second;
00129         } else if (in.first < inout.first) {
00130           // Don't need to do anything; inout has the values.
00131         } else { // equal, or at least one is NaN.
00132           inout.first = in.first;
00133           inout.second = std::min (in.second, inout.second);
00134         }
00135       }
00136     }
00137   };
00138 
00165   template<class Ordinal, class ScalarType, class IndexType>
00166   class MinLoc :
00167     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00168   public:
00169     void
00170     reduce (const Ordinal count,
00171             const std::pair<ScalarType, IndexType> inBuffer[],
00172             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00173   };
00174 
00175   template<class Ordinal>
00176   class MinLoc<Ordinal, double, int> :
00177     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00178   public:
00179     void
00180     reduce (const Ordinal count,
00181             const std::pair<double, int> inBuffer[],
00182             std::pair<double, int> inoutBuffer[]) const
00183     {
00184       for (Ordinal ind = 0; ind < count; ++ind) {
00185         const std::pair<double, int>& in = inBuffer[ind];
00186         std::pair<double, int>& inout = inoutBuffer[ind];
00187 
00188         if (in.first < inout.first) {
00189           inout.first = in.first;
00190           inout.second = in.second;
00191         } else if (in.first > inout.first) {
00192           // Don't need to do anything; inout has the values.
00193         } else { // equal, or at least one is NaN.
00194           inout.first = in.first;
00195           inout.second = std::min (in.second, inout.second);
00196         }
00197       }
00198     }
00199   };
00200 
00201   // Typedef used internally by TimeMonitor::summarize() and its
00202   // helper functions.  The map is keyed on timer label (a string).
00203   // Each value is a pair: (total number of seconds over all calls to
00204   // that timer, total number of calls to that timer).
00205   typedef std::map<std::string, std::pair<double, int> > timer_map_t;
00206 
00207   TimeMonitor::TimeMonitor (Time& timer, bool reset)
00208     : PerformanceMonitorBase<Time>(timer, reset)
00209   {
00210     if (!isRecursiveCall()) counter().start(reset);
00211   }
00212 
00213   TimeMonitor::~TimeMonitor() {
00214     if (!isRecursiveCall()) counter().stop();
00215   }
00216 
00217   void
00218   TimeMonitor::zeroOutTimers()
00219   {
00220     const Array<RCP<Time> > timers = counters();
00221 
00222     // In debug mode, loop first to check whether any of the timers
00223     // are running, before resetting them.  This ensures that this
00224     // method satisfies the strong exception guarantee (either it
00225     // completes normally, or there are no side effects).
00226 #ifdef TEUCHOS_DEBUG
00227     typedef Array<RCP<Time> >::size_type size_type;
00228     const size_type numTimers = timers.size();
00229     for (size_type i = 0; i < numTimers; ++i) {
00230       Time &timer = *timers[i];
00231       // We throw a runtime_error rather than a logic_error, because
00232       // logic_error suggests a bug in the implementation of
00233       // TimeMonitor.  Calling zeroOutTimers() when a timer is
00234       // running is not TimeMonitor's fault.
00235       TEUCHOS_TEST_FOR_EXCEPTION(timer.isRunning(), std::runtime_error,
00236                                  "The timer i = " << i << " with name \""
00237                                  << timer.name() << "\" is currently running and may not "
00238                                  "be reset.");
00239     }
00240 #endif // TEUCHOS_DEBUG
00241 
00242     for (Array<RCP<Time> >::const_iterator it = timers.begin();
00243          it != timers.end(); ++it) {
00244       (*it)->reset ();
00245     }
00246   }
00247 
00248   // An anonymous namespace is the standard way of limiting linkage of
00249   // its contained routines to file scope.
00250   namespace {
00251     // \brief Return an "empty" local timer datum.
00252     //
00253     // "Empty" means the datum has zero elapsed time and zero call
00254     // count.  This function does not actually create a timer.
00255     //
00256     // \param name The timer's name.
00257     std::pair<std::string, std::pair<double, int> >
00258     makeEmptyTimerDatum (const std::string& name)
00259     {
00260       return std::make_pair (name, std::make_pair (double(0), int(0)));
00261     }
00262 
00263     // \fn collectLocalTimerData
00264     // \brief Collect and sort local timer data by timer names.
00265     //
00266     // \param localData [out] Map whose keys are the timer names, and
00267     //   whose value for each key is the total elapsed time (in
00268     //   seconds) and the call count for the timer with that name.
00269     //
00270     // \param localCounters [in] Timers from which to extract data.
00271     //
00272     // \param filter [in] Filter for timer labels.  If filter is not
00273     //   empty, this method will only collect data for local timers
00274     //   whose labels begin with this string.
00275     //
00276     // Extract the total elapsed time and call count from each timer
00277     // in the given array.  Merge results for timers with duplicate
00278     // labels, by summing their total elapsed times and call counts
00279     // pairwise.
00280     void
00281     collectLocalTimerData (timer_map_t& localData,
00282                            ArrayView<const RCP<Time> > localCounters,
00283                            const std::string& filter="")
00284     {
00285       using std::make_pair;
00286       typedef timer_map_t::const_iterator const_iter_t;
00287       typedef timer_map_t::iterator iter_t;
00288 
00289       timer_map_t theLocalData;
00290       for (ArrayView<const RCP<Time> >::const_iterator it = localCounters.begin();
00291            it != localCounters.end(); ++it) {
00292         const std::string& name = (*it)->name();
00293 
00294         // Filter current timer name, if provided filter is nonempty.
00295         // Filter string must _start_ the timer label, not just be in it.
00296         const bool skipThisOne = (filter != "" && name.find (filter) != 0);
00297         if (! skipThisOne) {
00298           const double timing = (*it)->totalElapsedTime();
00299           const int numCalls = (*it)->numCalls();
00300 
00301           // Merge timers with duplicate labels, by summing their
00302           // total elapsed times and call counts.
00303           iter_t loc = theLocalData.find (name);
00304           if (loc == theLocalData.end()) {
00305             // Use loc as an insertion location hint.
00306             theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
00307           }
00308           else {
00309             loc->second.first += timing;
00310             loc->second.second += numCalls;
00311           }
00312         }
00313       }
00314       // This avoids copying the map, and also makes this method
00315       // satisfy the strong exception guarantee.
00316       localData.swap (theLocalData);
00317     }
00318 
00319     // \brief Locally filter out timer data with zero call counts.
00320     //
00321     // \param timerData [in/out]
00322     void
00323     filterZeroData (timer_map_t& timerData)
00324     {
00325       timer_map_t newTimerData;
00326       for (timer_map_t::const_iterator it = timerData.begin();
00327            it != timerData.end(); ++it) {
00328         if (it->second.second > 0) {
00329           newTimerData[it->first] = it->second;
00330         }
00331       }
00332       timerData.swap (newTimerData);
00333     }
00334 
00356     void
00357     collectLocalTimerDataAndNames (timer_map_t& localTimerData,
00358                                    Array<std::string>& localTimerNames,
00359                                    ArrayView<const RCP<Time> > localTimers,
00360                                    const bool writeZeroTimers,
00361                                    const std::string& filter="")
00362     {
00363       // Collect and sort local timer data by timer names.
00364       collectLocalTimerData (localTimerData, localTimers, filter);
00365 
00366       // Filter out zero data locally first.  This ensures that if we
00367       // are writing global stats, and if a timer name exists in the
00368       // set of global names, then that timer has a nonzero call count
00369       // on at least one MPI process.
00370       if (! writeZeroTimers) {
00371         filterZeroData (localTimerData);
00372       }
00373 
00374       // Extract the set of local timer names.  The std::map keeps
00375       // them sorted alphabetically.
00376       localTimerNames.reserve (localTimerData.size());
00377       for (timer_map_t::const_iterator it = localTimerData.begin();
00378            it != localTimerData.end(); ++it) {
00379         localTimerNames.push_back (it->first);
00380       }
00381     }
00382 
00417     void
00418     collectGlobalTimerData (timer_map_t& globalTimerData,
00419                             Array<std::string>& globalTimerNames,
00420                             timer_map_t& localTimerData,
00421                             Array<std::string>& localTimerNames,
00422                             Ptr<const Comm<int> > comm,
00423                             const bool alwaysWriteLocal,
00424                             const ECounterSetOp setOp)
00425     {
00426       // There may be some global timers that are not local timers on
00427       // the calling MPI process(es).  In that case, if
00428       // alwaysWriteLocal is true, then we need to fill in the
00429       // "missing" local timers.  That will ensure that both global
00430       // and local timer columns in the output table have the same
00431       // number of rows.  The collectLocalTimerDataAndNames() method
00432       // may have already filtered out local timers with zero call
00433       // counts (if its writeZeroTimers argument was false), but we
00434       // won't be filtering again.  Thus, any local timer data we
00435       // insert here won't get filtered out.
00436       //
00437       // Note that calling summarize() with writeZeroTimers == false
00438       // will still do what it says, even if we insert local timers
00439       // with zero call counts here.
00440 
00441       // This does the correct and inexpensive thing (just copies the
00442       // timer data) if numProcs == 1.  Otherwise, it initiates a
00443       // communication with \f$O(\log P)\f$ messages along the
00444       // critical path, where \f$P\f$ is the number of participating
00445       // processes.
00446       mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
00447 
00448 #ifdef TEUCHOS_DEBUG
00449       {
00450         // Sanity check that all processes have the name number of
00451         // global timer names.
00452         const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
00453         timer_map_t::size_type minNumGlobalNames = 0;
00454         timer_map_t::size_type maxNumGlobalNames = 0;
00455         reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
00456                    outArg (minNumGlobalNames));
00457         reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
00458                    outArg (maxNumGlobalNames));
00459         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
00460           std::logic_error, "Min # global timer names = " << minNumGlobalNames
00461           << " != max # global timer names = " << maxNumGlobalNames
00462           << ".  Please report this bug to the Teuchos developers.");
00463         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
00464           std::logic_error, "My # global timer names = " << myNumGlobalNames
00465           << " != min # global timer names = " << minNumGlobalNames
00466           << ".  Please report this bug to the Teuchos developers.");
00467       }
00468 #endif // TEUCHOS_DEBUG
00469 
00470       // mergeCounterNames() just merges the counters' names, not
00471       // their actual data.  Now we need to fill globalTimerData with
00472       // this process' timer data for the timers in globalTimerNames.
00473       //
00474       // All processes need the full list of global timers, since
00475       // there may be some global timers that are not local timers.
00476       // That's why mergeCounterNames() has to be an all-reduce, not
00477       // just a reduction to Proc 0.
00478       //
00479       // Insertion optimization: if the iterator given to map::insert
00480       // points right before where we want to insert, insertion is
00481       // O(1).  globalTimerNames is sorted, so feeding the iterator
00482       // output of map::insert into the next invocation's input should
00483       // make the whole insertion O(N) where N is the number of
00484       // entries in globalTimerNames.
00485       timer_map_t::iterator globalMapIter = globalTimerData.begin();
00486       timer_map_t::iterator localMapIter;
00487       for (Array<string>::const_iterator it = globalTimerNames.begin();
00488            it != globalTimerNames.end(); ++it) {
00489         const std::string& globalName = *it;
00490         localMapIter = localTimerData.find (globalName);
00491 
00492         if (localMapIter == localTimerData.end()) {
00493           if (alwaysWriteLocal) {
00494             // If there are some global timers that are not local
00495             // timers, and if we want to print local timers, we insert
00496             // a local timer datum with zero elapsed time and zero
00497             // call count into localTimerData as well.  This will
00498             // ensure that both global and local timer columns in the
00499             // output table have the same number of rows.
00500             //
00501             // We really only need to do this on Proc 0, which is the
00502             // only process that currently may print local timers.
00503             // However, we do it on all processes, just in case
00504             // someone later wants to modify this function to print
00505             // out local timer data for some process other than Proc
00506             // 0.  This extra computation won't affect the cost along
00507             // the critical path, for future computations in which
00508             // Proc 0 participates.
00509             localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
00510 
00511             // Make sure the missing global name gets added to the
00512             // list of local names.  We'll re-sort the list of local
00513             // names below.
00514             localTimerNames.push_back (globalName);
00515           }
00516           // There's a global timer that's not a local timer.  Add it
00517           // to our pre-merge version of the global timer data so that
00518           // we can safely merge the global timer data later.
00519           globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
00520         }
00521         else {
00522           // We have this global timer name in our local timer list.
00523           // Fill in our pre-merge version of the global timer data
00524           // with our local data.
00525           globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
00526         }
00527       }
00528 
00529       if (alwaysWriteLocal) {
00530         // Re-sort the list of local timer names, since we may have
00531         // inserted "missing" names above.
00532         std::sort (localTimerNames.begin(), localTimerNames.end());
00533       }
00534 
00535 #ifdef TEUCHOS_DEBUG
00536       {
00537         // Sanity check that all processes have the name number of
00538         // global timers.
00539         const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
00540         timer_map_t::size_type minNumGlobalTimers = 0;
00541         timer_map_t::size_type maxNumGlobalTimers = 0;
00542         reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
00543                    outArg (minNumGlobalTimers));
00544         reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
00545                    outArg (maxNumGlobalTimers));
00546         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
00547                                    std::logic_error, "Min # global timers = " << minNumGlobalTimers
00548                                    << " != max # global timers = " << maxNumGlobalTimers
00549                                    << ".  Please report this bug to the Teuchos developers.");
00550         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
00551                                    std::logic_error, "My # global timers = " << myNumGlobalTimers
00552                                    << " != min # global timers = " << minNumGlobalTimers
00553                                    << ".  Please report this bug to the Teuchos developers.");
00554       }
00555 #endif // TEUCHOS_DEBUG
00556     }
00557 
00595     void
00596     computeGlobalTimerStats (stat_map_type& statData,
00597                              std::vector<std::string>& statNames,
00598                              Ptr<const Comm<int> > comm,
00599                              const timer_map_t& globalTimerData)
00600     {
00601       const int numTimers = static_cast<int> (globalTimerData.size());
00602       const int numProcs = comm->getSize();
00603 
00604       // Extract pre-reduction timings and call counts into a
00605       // sequential array.  This array will be in the same order as
00606       // the global timer names are in the map.
00607       Array<std::pair<double, int> > timingsAndCallCounts;
00608       timingsAndCallCounts.reserve (numTimers);
00609       for (timer_map_t::const_iterator it = globalTimerData.begin();
00610            it != globalTimerData.end(); ++it) {
00611         timingsAndCallCounts.push_back (it->second);
00612       }
00613 
00614       // For each timer name, compute the min timing and its
00615       // corresponding call count.  If two processes have the same
00616       // timing but different call counts, the minimum call count will
00617       // be used.
00618       Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
00619       if (numTimers > 0) {
00620         reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
00621                    &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
00622       }
00623 
00624       // For each timer name, compute the max timing and its
00625       // corresponding call count.  If two processes have the same
00626       // timing but different call counts, the minimum call count will
00627       // be used.
00628       Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
00629       if (numTimers > 0) {
00630         reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
00631                    &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
00632       }
00633 
00634       // For each timer name, compute the mean-over-processes timing,
00635       // the mean call count, and the mean-over-call-counts timing.
00636       // The mean call count is reported as a double to allow a
00637       // fractional value.
00638       //
00639       // Each local timing is really the total timing over all local
00640       // invocations.  The number of local invocations is the call
00641       // count.  Thus, the mean-over-call-counts timing is the sum of
00642       // all the timings (over all processes), divided by the sum of
00643       // all the call counts (over all processes).  We compute it in a
00644       // different way to over unnecessary overflow.
00645       Array<double> meanOverCallCountsTimings (numTimers);
00646       Array<double> meanOverProcsTimings (numTimers);
00647       Array<double> meanCallCounts (numTimers);
00648       {
00649         // When summing, first scale by the number of processes.  This
00650         // avoids unnecessary overflow, and also gives us the mean
00651         // call count automatically.
00652         Array<double> scaledTimings (numTimers);
00653         Array<double> scaledCallCounts (numTimers);
00654         const double P = static_cast<double> (numProcs);
00655         for (int k = 0; k < numTimers; ++k) {
00656           const double timing = timingsAndCallCounts[k].first;
00657           const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
00658 
00659           scaledTimings[k] = timing / P;
00660           scaledCallCounts[k] = callCount / P;
00661         }
00662         if (numTimers > 0) {
00663           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
00664                      &meanOverProcsTimings[0]);
00665           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
00666                      &meanCallCounts[0]);
00667         }
00668         // We don't have to undo the scaling for the mean timings;
00669         // just divide by the scaled call count.
00670         for (int k = 0; k < numTimers; ++k) {
00671           meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
00672         }
00673       }
00674 
00675       // Reformat the data into the map of statistics.  Be sure that
00676       // each value (the std::vector of (timing, call count) pairs,
00677       // each entry of which is a different statistic) preserves the
00678       // order of statNames.
00679       statNames.resize (4);
00680       statNames[0] = "MinOverProcs";
00681       statNames[1] = "MeanOverProcs";
00682       statNames[2] = "MaxOverProcs";
00683       statNames[3] = "MeanOverCallCounts";
00684 
00685       stat_map_type::iterator statIter = statData.end();
00686       timer_map_t::const_iterator it = globalTimerData.begin();
00687       for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
00688         std::vector<std::pair<double, double> > curData (4);
00689         curData[0] = minTimingsAndCallCounts[k];
00690         curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
00691         curData[2] = maxTimingsAndCallCounts[k];
00692         curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
00693 
00694         // statIter gives an insertion location hint that makes each
00695         // insertion O(1), since we remember the location of the last
00696         // insertion.
00697         statIter = statData.insert (statIter, std::make_pair (it->first, curData));
00698       }
00699     }
00700 
00701 
00718     RCP<const Comm<int> >
00719     getDefaultComm ()
00720     {
00721       // The default communicator.  If Trilinos was built with MPI
00722       // enabled, this should be MPI_COMM_WORLD.  (If MPI has not yet
00723       // been initialized, it's not valid to use the communicator!)
00724       // Otherwise, this should be a "serial" (no MPI, one "process")
00725       // communicator.
00726       RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
00727 
00728 #ifdef HAVE_MPI
00729       {
00730         int mpiHasBeenStarted = 0;
00731         MPI_Initialized (&mpiHasBeenStarted);
00732         if (! mpiHasBeenStarted) {
00733           // Make pComm a new "serial communicator."
00734           comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
00735         }
00736       }
00737 #endif // HAVE_MPI
00738       return comm;
00739     }
00740 
00741   } // namespace (anonymous)
00742 
00743 
00744   void
00745   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00746                                              std::vector<std::string>& statNames,
00747                                              Ptr<const Comm<int> > comm,
00748                                              const ECounterSetOp setOp,
00749                                              const std::string& filter)
00750   {
00751     // Collect local timer data and names.  Filter out timers with
00752     // zero call counts if writeZeroTimers is false.  Also, apply the
00753     // timer label filter at this point, so we don't have to compute
00754     // statistics on timers we don't want to display anyway.
00755     timer_map_t localTimerData;
00756     Array<std::string> localTimerNames;
00757     const bool writeZeroTimers = false;
00758     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00759                                    counters(), writeZeroTimers, filter);
00760     // Merge the local timer data and names into global timer data and
00761     // names.
00762     timer_map_t globalTimerData;
00763     Array<std::string> globalTimerNames;
00764     const bool alwaysWriteLocal = false;
00765     collectGlobalTimerData (globalTimerData, globalTimerNames,
00766                             localTimerData, localTimerNames,
00767                             comm, alwaysWriteLocal, setOp);
00768     // Compute statistics on the data.
00769     computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00770   }
00771 
00772 
00773   void
00774   TimeMonitor::summarize (Ptr<const Comm<int> > comm,
00775                           std::ostream& out,
00776                           const bool alwaysWriteLocal,
00777                           const bool writeGlobalStats,
00778                           const bool writeZeroTimers,
00779                           const ECounterSetOp setOp,
00780                           const std::string& filter)
00781   {
00782     //
00783     // We can't just call computeGlobalTimerStatistics(), since
00784     // summarize() has different options that affect whether global
00785     // statistics are computed and printed.
00786     //
00787     const int numProcs = comm->getSize();
00788     const int myRank = comm->getRank();
00789 
00790     // Collect local timer data and names.  Filter out timers with
00791     // zero call counts if writeZeroTimers is false.  Also, apply the
00792     // timer label filter at this point, so we don't have to compute
00793     // statistics on timers we don't want to display anyway.
00794     timer_map_t localTimerData;
00795     Array<std::string> localTimerNames;
00796     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00797                                    counters(), writeZeroTimers, filter);
00798 
00799     // If we're computing global statistics, merge the local timer
00800     // data and names into global timer data and names, and compute
00801     // global timer statistics.  Otherwise, leave the global data
00802     // empty.
00803     timer_map_t globalTimerData;
00804     Array<std::string> globalTimerNames;
00805     stat_map_type statData;
00806     std::vector<std::string> statNames;
00807     if (writeGlobalStats) {
00808       collectGlobalTimerData (globalTimerData, globalTimerNames,
00809                               localTimerData, localTimerNames,
00810                               comm, alwaysWriteLocal, setOp);
00811       // Compute statistics on the data, but only if the communicator
00812       // contains more than one process.  Otherwise, statistics don't
00813       // make sense and we don't print them (see below).
00814       if (numProcs > 1) {
00815         computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00816       }
00817     }
00818 
00819     // Precision of floating-point numbers in the table.
00820     const int precision = format().precision();
00821 
00822     // All columns of the table, in order.
00823     Array<TableColumn> tableColumns;
00824 
00825     // Labels of all the columns of the table.
00826     // We will append to this when we add each column.
00827     Array<std::string> titles;
00828 
00829     // Widths (in number of characters) of each column.
00830     // We will append to this when we add each column.
00831     Array<int> columnWidths;
00832 
00833     // Table column containing all timer names.  If writeGlobalStats
00834     // is true, we use the global timer names, otherwise we use the
00835     // local timer names.  We build the table on all processes
00836     // redundantly, but only print on Rank 0.
00837     {
00838       titles.append ("Timer Name");
00839 
00840       // The column labels depend on whether we are computing global statistics.
00841       TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
00842       tableColumns.append (nameCol);
00843 
00844       // Each column is as wide as it needs to be to hold both its
00845       // title and all of the column data.  This column's title is the
00846       // current last entry of the titles array.
00847       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
00848     }
00849 
00850     // Table column containing local timer stats, if applicable.  We
00851     // only write local stats if asked, only on MPI Proc 0, and only
00852     // if there is more than one MPI process in the communicator
00853     // (otherwise local stats == global stats, so we just print the
00854     // global stats).  In this case, we've padded the local data on
00855     // Proc 0 if necessary to match the global timer list, so that the
00856     // columns have the same number of rows.
00857     if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
00858       titles.append ("Local time (num calls)");
00859 
00860       // Copy local timer data out of the array-of-structs into
00861       // separate arrays, for display in the table.
00862       Array<double> localTimings;
00863       Array<double> localNumCalls;
00864       for (timer_map_t::const_iterator it = localTimerData.begin();
00865            it != localTimerData.end(); ++it) {
00866         localTimings.push_back (it->second.first);
00867         localNumCalls.push_back (static_cast<double> (it->second.second));
00868       }
00869       TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
00870       tableColumns.append (timeAndCalls);
00871       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00872     }
00873 
00874     if (writeGlobalStats) {
00875       // If there's only 1 process in the communicator, don't display
00876       // statistics; statistics don't make sense in that case.  Just
00877       // display the timings and call counts.  If there's more than 1
00878       // process, do display statistics.
00879       if (numProcs == 1) {
00880         // Extract timings and the call counts from globalTimerData.
00881         Array<double> globalTimings;
00882         Array<double> globalNumCalls;
00883         for (timer_map_t::const_iterator it = globalTimerData.begin();
00884              it != globalTimerData.end(); ++it) {
00885           globalTimings.push_back (it->second.first);
00886           globalNumCalls.push_back (static_cast<double> (it->second.second));
00887         }
00888         // Print the table column.
00889         titles.append ("Global time (num calls)");
00890         TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
00891         tableColumns.append (timeAndCalls);
00892         columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00893       }
00894       else { // numProcs > 1
00895         // Print a table column for each statistic.  statNames and
00896         // each value in statData use the same ordering, so we can
00897         // iterate over valid indices of statNames to display the
00898         // statistics in the right order.
00899         const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
00900         for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
00901           // Extract lists of timings and their call counts for the
00902           // current statistic.
00903           Array<double> statTimings (numGlobalTimers);
00904           Array<double> statCallCounts (numGlobalTimers);
00905           stat_map_type::const_iterator it = statData.begin();
00906           for (int k = 0; it != statData.end(); ++it, ++k) {
00907             statTimings[k] = (it->second[statInd]).first;
00908             statCallCounts[k] = (it->second[statInd]).second;
00909           }
00910           // Print the table column.
00911           const std::string& statisticName = statNames[statInd];
00912           const std::string titleString = statisticName;
00913           titles.append (titleString);
00914           TableColumn timeAndCalls (statTimings, statCallCounts, precision, true);
00915           tableColumns.append (timeAndCalls);
00916           columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00917         }
00918       }
00919     }
00920 
00921     // Print the whole table to the given output stream on MPI Rank 0.
00922     format().setColumnWidths (columnWidths);
00923     if (myRank == 0) {
00924       std::ostringstream theTitle;
00925       theTitle << "TimeMonitor results over " << numProcs << " processor"
00926                << (numProcs > 1 ? "s" : "");
00927       format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
00928     }
00929   }
00930 
00931   void
00932   TimeMonitor::summarize (std::ostream &out,
00933                           const bool alwaysWriteLocal,
00934                           const bool writeGlobalStats,
00935                           const bool writeZeroTimers,
00936                           const ECounterSetOp setOp,
00937                           const std::string& filter)
00938   {
00939     // The default communicator.  If Trilinos was built with MPI
00940     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00941     // be a "serial" (no MPI, one "process") communicator.
00942     RCP<const Comm<int> > comm = getDefaultComm();
00943 
00944     summarize (comm.ptr(), out, alwaysWriteLocal,
00945                writeGlobalStats, writeZeroTimers, setOp, filter);
00946   }
00947 
00948   void
00949   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00950                                              std::vector<std::string>& statNames,
00951                                              const ECounterSetOp setOp,
00952                                              const std::string& filter)
00953   {
00954     // The default communicator.  If Trilinos was built with MPI
00955     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00956     // be a "serial" (no MPI, one "process") communicator.
00957     RCP<const Comm<int> > comm = getDefaultComm();
00958 
00959     computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
00960   }
00961 
00962 
00963   namespace {
00987     std::string
00988     quoteLabelForYaml (const std::string& label)
00989     {
00990       // YAML allows empty keys in key: value pairs.  See Section 7.2
00991       // of the YAML 1.2 spec.  We thus let an empty label pass
00992       // through without quoting or other special treatment.
00993       if (label.empty ()) {
00994         return label;
00995       }
00996 
00997       // Check whether the label is already quoted.  If so, we don't
00998       // need to quote it again.  However, we do need to quote any
00999       // quote symbols in the string inside the outer quotes.
01000       const bool alreadyQuoted = label.size () >= 2 &&
01001         label[0] == '"' && label[label.size() - 1] == '"';
01002 
01003       // We need to quote if there are any colons or (inner) quotes in
01004       // the string.  We'll determine this as we read through the
01005       // string and escape any characters that need escaping.
01006       bool needToQuote = false;
01007 
01008       std::string out; // To fill with the return value
01009       out.reserve (label.size ());
01010 
01011       const size_t startPos = alreadyQuoted ? 1 : 0;
01012       const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
01013       for (size_t i = startPos; i < endPos; ++i) {
01014         const char c = label[i];
01015         if (c == '"' || c == '\\') {
01016           out.push_back ('\\'); // Escape the quote or backslash.
01017           needToQuote = true;
01018         }
01019         else if (c == ':') {
01020           needToQuote = true;
01021         }
01022         out.push_back (c);
01023       }
01024 
01025       if (needToQuote || alreadyQuoted) {
01026         // If the input string was already quoted, then out doesn't
01027         // include its quotes, so we have to add them back in.
01028         return "\"" + out + "\"";
01029       }
01030       else {
01031         return out;
01032       }
01033     }
01034 
01035   } // namespace (anonymous)
01036 
01037 
01038   void TimeMonitor::
01039   summarizeToYaml (Ptr<const Comm<int> > comm,
01040                    std::ostream &out,
01041                    const ETimeMonitorYamlFormat yamlStyle,
01042                    const std::string& filter)
01043   {
01044     using Teuchos::FancyOStream;
01045     using Teuchos::fancyOStream;
01046     using Teuchos::getFancyOStream;
01047     using Teuchos::OSTab;
01048     using Teuchos::RCP;
01049     using Teuchos::rcpFromRef;
01050     using std::endl;
01051     typedef std::vector<std::string>::size_type size_type;
01052 
01053     const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
01054 
01055     // const bool writeGlobalStats = true;
01056     // const bool writeZeroTimers = true;
01057     // const bool alwaysWriteLocal = false;
01058     const ECounterSetOp setOp = Intersection;
01059 
01060     stat_map_type statData;
01061     std::vector<std::string> statNames;
01062     computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
01063 
01064     const int numProcs = comm->getSize();
01065 
01066     // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
01067     // " as the line prefix does not work, else I would prefer that
01068     // method for printing each line of a YAML block sequence (see
01069     // Section 8.2.1 of the YAML 1.2 spec).
01070     //
01071     // Also, I have to set the tab indent string here, rather than in
01072     // OSTab's constructor.  This is because line prefix (which for
01073     // some reason is what OSTab's constructor takes, rather than tab
01074     // indent string) means something different from tab indent
01075     // string, and turning on the line prefix prints all sorts of
01076     // things including "|" for some reason.
01077     RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
01078     pfout->setTabIndentStr ("  ");
01079     FancyOStream& fout = *pfout;
01080 
01081     fout << "# Teuchos::TimeMonitor report" << endl
01082          << "---" << endl;
01083 
01084     // mfh 19 Aug 2012: An important goal of our chosen output format
01085     // was to minimize the nesting depth.  We have managed to keep the
01086     // nesting depth to 3, which is the limit that the current version
01087     // of PylotDB imposes for its YAML input.
01088 
01089     // Outermost level is a dictionary.  (Individual entries of a
01090     // dictionary do _not_ begin with "- ".)  We always print the
01091     // outermost level in standard style, not flow style, for better
01092     // readability.  We begin the outermost level with metadata.
01093     fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
01094          << "Number of processes: " << numProcs << endl
01095          << "Time unit: s" << endl;
01096     // For a key: value pair where the value is a sequence or
01097     // dictionary on the following line, YAML requires a space after
01098     // the colon.
01099     fout << "Statistics collected: ";
01100     // Print list of the names of all the statistics we collected.
01101     if (compact) {
01102       fout << " [";
01103       for (size_type i = 0; i < statNames.size (); ++i) {
01104         fout << quoteLabelForYaml (statNames[i]);
01105         if (i + 1 < statNames.size ()) {
01106           fout << ", ";
01107         }
01108       }
01109       fout << "]" << endl;
01110     }
01111     else {
01112       fout << endl;
01113       OSTab tab1 (pfout);
01114       for (size_type i = 0; i < statNames.size (); ++i) {
01115         fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
01116       }
01117     }
01118 
01119     // Print the list of timer names.
01120     //
01121     // It might be nicer instead to print a map from timer name to all
01122     // of its data, but keeping the maximum nesting depth small
01123     // ensures better compatibility with different parsing tools.
01124     fout << "Timer names: ";
01125     if (compact) {
01126       fout << " [";
01127       size_type ind = 0;
01128       for (stat_map_type::const_iterator it = statData.begin();
01129            it != statData.end(); ++it, ++ind) {
01130         fout << quoteLabelForYaml (it->first);
01131         if (ind + 1 < statData.size ()) {
01132           fout << ", ";
01133         }
01134       }
01135       fout << "]" << endl;
01136     }
01137     else {
01138       fout << endl;
01139       OSTab tab1 (pfout);
01140       for (stat_map_type::const_iterator it = statData.begin();
01141            it != statData.end(); ++it) {
01142         fout << "- " << quoteLabelForYaml (it->first) << endl;
01143       }
01144     }
01145 
01146     // Print times for each timer, as a map from statistic name to its time.
01147     fout << "Total times: ";
01148     if (compact) {
01149       fout << " {";
01150       size_type outerInd = 0;
01151       for (stat_map_type::const_iterator outerIter = statData.begin();
01152            outerIter != statData.end(); ++outerIter, ++outerInd) {
01153         // Print timer name.
01154         fout << quoteLabelForYaml (outerIter->first) << ": ";
01155         // Print that timer's data.
01156         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01157         fout << "{";
01158         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01159           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01160                << curData[innerInd].first;
01161           if (innerInd + 1 < curData.size ()) {
01162             fout << ", ";
01163           }
01164         }
01165         fout << "}";
01166         if (outerInd + 1 < statData.size ()) {
01167           fout << ", ";
01168         }
01169       }
01170       fout << "}" << endl;
01171     }
01172     else {
01173       fout << endl;
01174       OSTab tab1 (pfout);
01175       size_type outerInd = 0;
01176       for (stat_map_type::const_iterator outerIter = statData.begin();
01177            outerIter != statData.end(); ++outerIter, ++outerInd) {
01178         // Print timer name.
01179         fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
01180         // Print that timer's data.
01181         OSTab tab2 (pfout);
01182         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01183         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01184           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01185                << curData[innerInd].first << endl;
01186         }
01187       }
01188     }
01189 
01190     // Print call counts for each timer, for each statistic name.
01191     fout << "Call counts:";
01192     if (compact) {
01193       fout << " {";
01194       size_type outerInd = 0;
01195       for (stat_map_type::const_iterator outerIter = statData.begin();
01196            outerIter != statData.end(); ++outerIter, ++outerInd) {
01197         // Print timer name.
01198         fout << quoteLabelForYaml (outerIter->first) << ": ";
01199         // Print that timer's data.
01200         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01201         fout << "{";
01202         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01203           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01204                << curData[innerInd].second;
01205           if (innerInd + 1 < curData.size ()) {
01206             fout << ", ";
01207           }
01208         }
01209         fout << "}";
01210         if (outerInd + 1 < statData.size ()) {
01211           fout << ", ";
01212         }
01213       }
01214       fout << "}" << endl;
01215     }
01216     else {
01217       fout << endl;
01218       OSTab tab1 (pfout);
01219       size_type outerInd = 0;
01220       for (stat_map_type::const_iterator outerIter = statData.begin();
01221            outerIter != statData.end(); ++outerIter, ++outerInd) {
01222         // Print timer name.
01223         fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
01224         // Print that timer's data.
01225         OSTab tab2 (pfout);
01226         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01227         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01228           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01229                << curData[innerInd].second << endl;
01230         }
01231       }
01232     }
01233   }
01234 
01235   void TimeMonitor::
01236   summarizeToYaml (std::ostream &out,
01237                    const ETimeMonitorYamlFormat yamlStyle,
01238                    const std::string& filter)
01239   {
01240     // The default communicator.  If Trilinos was built with MPI
01241     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
01242     // be a "serial" (no MPI, one "process") communicator.
01243     RCP<const Comm<int> > comm = getDefaultComm ();
01244 
01245     summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
01246   }
01247 
01248   // Default value is false.  We'll set to true once
01249   // setReportParameters() completes successfully.
01250   bool TimeMonitor::setParams_ = false;
01251 
01252   // We have to declare all of these here in order to avoid linker errors.
01253   TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
01254   TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
01255   ECounterSetOp TimeMonitor::setOp_ = Intersection;
01256   bool TimeMonitor::alwaysWriteLocal_ = false;
01257   bool TimeMonitor::writeGlobalStats_ = true;
01258   bool TimeMonitor::writeZeroTimers_ = true;
01259 
01260   void
01261   TimeMonitor::setReportFormatParameter (ParameterList& plist)
01262   {
01263     const std::string name ("Report format");
01264     const std::string defaultValue ("Table");
01265     const std::string docString ("Output format for report of timer statistics");
01266     Array<std::string> strings;
01267     Array<std::string> docs;
01268     Array<ETimeMonitorReportFormat> values;
01269 
01270     strings.push_back ("YAML");
01271     docs.push_back ("YAML (see yaml.org) format");
01272     values.push_back (REPORT_FORMAT_YAML);
01273     strings.push_back ("Table");
01274     docs.push_back ("Tabular format via Teuchos::TableFormat");
01275     values.push_back (REPORT_FORMAT_TABLE);
01276 
01277     setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
01278                                                             docString,
01279                                                             strings (), docs (),
01280                                                             values (), &plist);
01281   }
01282 
01283   void
01284   TimeMonitor::setYamlFormatParameter (ParameterList& plist)
01285   {
01286     const std::string name ("YAML style");
01287     const std::string defaultValue ("spacious");
01288     const std::string docString ("YAML-specific output format");
01289     Array<std::string> strings;
01290     Array<std::string> docs;
01291     Array<ETimeMonitorYamlFormat> values;
01292 
01293     strings.push_back ("compact");
01294     docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
01295                     "yaml.org) for most sequences except the outermost sequence");
01296     values.push_back (YAML_FORMAT_COMPACT);
01297 
01298     strings.push_back ("spacious");
01299     docs.push_back ("Spacious format: avoid flow style");
01300     values.push_back (YAML_FORMAT_SPACIOUS);
01301 
01302     setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
01303                                                           docString,
01304                                                           strings (), docs (),
01305                                                           values (), &plist);
01306   }
01307 
01308   void
01309   TimeMonitor::setSetOpParameter (ParameterList& plist)
01310   {
01311     const std::string name ("How to merge timer sets");
01312     const std::string defaultValue ("Intersection");
01313     const std::string docString ("How to merge differing sets of timers "
01314                                  "across processes");
01315     Array<std::string> strings;
01316     Array<std::string> docs;
01317     Array<ECounterSetOp> values;
01318 
01319     strings.push_back ("Intersection");
01320     docs.push_back ("Compute intersection of timer sets over processes");
01321     values.push_back (Intersection);
01322     strings.push_back ("Union");
01323     docs.push_back ("Compute union of timer sets over processes");
01324     values.push_back (Union);
01325 
01326     setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
01327                                                  strings (), docs (), values (),
01328                                                  &plist);
01329   }
01330 
01331   RCP<const ParameterList>
01332   TimeMonitor::getValidReportParameters ()
01333   {
01334     // Our implementation favors recomputation over persistent
01335     // storage.  That is, we simply recreate the list every time we
01336     // need it.
01337     RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
01338 
01339     const bool alwaysWriteLocal = false;
01340     const bool writeGlobalStats = true;
01341     const bool writeZeroTimers = true;
01342 
01343     setReportFormatParameter (*plist);
01344     setYamlFormatParameter (*plist);
01345     setSetOpParameter (*plist);
01346     plist->set ("alwaysWriteLocal", alwaysWriteLocal,
01347                 "Always output local timers' values on Proc 0");
01348     plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
01349                 "statistics, even if there is only one process in the "
01350                 "communicator");
01351     plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
01352                 "timers that have never been called");
01353 
01354     return rcp_const_cast<const ParameterList> (plist);
01355   }
01356 
01357   void
01358   TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
01359   {
01360     ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
01361     ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
01362     ECounterSetOp setOp = Intersection;
01363     bool alwaysWriteLocal = false;
01364     bool writeGlobalStats = true;
01365     bool writeZeroTimers = true;
01366 
01367     if (params.is_null ()) {
01368       // If we've set parameters before, leave their current values.
01369       // Otherwise, set defaults (below).
01370       if (setParams_) {
01371         return;
01372       }
01373     }
01374     else { // params is nonnull.  Let's read it!
01375       params->validateParametersAndSetDefaults (*getValidReportParameters ());
01376 
01377       reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
01378       yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
01379       setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
01380       alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
01381       writeGlobalStats = params->get<bool> ("writeGlobalStats");
01382       writeZeroTimers = params->get<bool> ("writeZeroTimers");
01383     }
01384     // Defer setting state until here, to ensure the strong exception
01385     // guarantee for this method (either it throws with no externally
01386     // visible state changes, or it returns normally).
01387     reportFormat_ = reportFormat;
01388     yamlStyle_ = yamlStyle;
01389     setOp_ = setOp;
01390     alwaysWriteLocal_ = alwaysWriteLocal;
01391     writeGlobalStats_ = writeGlobalStats;
01392     writeZeroTimers_ = writeZeroTimers;
01393 
01394     setParams_ = true; // Yay, we successfully set parameters!
01395   }
01396 
01397   void
01398   TimeMonitor::report (Ptr<const Comm<int> > comm,
01399                        std::ostream& out,
01400                        const std::string& filter,
01401                        const RCP<ParameterList>& params)
01402   {
01403     setReportParameters (params);
01404 
01405     if (reportFormat_ == REPORT_FORMAT_YAML) {
01406       summarizeToYaml (comm, out, yamlStyle_, filter);
01407     }
01408     else if (reportFormat_ == REPORT_FORMAT_TABLE) {
01409       summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
01410                  writeZeroTimers_, setOp_, filter);
01411     }
01412     else {
01413       TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
01414         "Invalid report format.  This should never happen; ParameterList "
01415         "validation should have caught this.  Please report this bug to the "
01416         "Teuchos developers.");
01417     }
01418   }
01419 
01420   void
01421   TimeMonitor::report (Ptr<const Comm<int> > comm,
01422                        std::ostream& out,
01423                        const RCP<ParameterList>& params)
01424   {
01425     report (comm, out, "", params);
01426   }
01427 
01428   void
01429   TimeMonitor::report (std::ostream& out,
01430                        const std::string& filter,
01431                        const RCP<ParameterList>& params)
01432   {
01433     RCP<const Comm<int> > comm = getDefaultComm ();
01434     report (comm.ptr (), out, filter, params);
01435   }
01436 
01437   void
01438   TimeMonitor::report (std::ostream& out,
01439                        const RCP<ParameterList>& params)
01440   {
01441     RCP<const Comm<int> > comm = getDefaultComm ();
01442     report (comm.ptr (), out, "", params);
01443   }
01444 
01445 
01446 
01447 
01448 } // namespace Teuchos
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines