Teuchos - Trilinos Tools Package Version of the Day
Teuchos_TimeMonitor.cpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_TimeMonitor.hpp"
00043 #include "Teuchos_CommHelpers.hpp"
00044 #include "Teuchos_DefaultComm.hpp"
00045 #include "Teuchos_TableColumn.hpp"
00046 #include "Teuchos_TableFormat.hpp"
00047 #include "Teuchos_StandardParameterEntryValidators.hpp"
00048 #include "Teuchos_ScalarTraits.hpp"
00049 #include <functional>
00050 
00051 
00052 namespace Teuchos {
00105   template<class Ordinal, class ScalarType, class IndexType>
00106   class MaxLoc :
00107     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00108   public:
00109     void
00110     reduce (const Ordinal count,
00111             const std::pair<ScalarType, IndexType> inBuffer[],
00112             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00113   };
00114 
00115   template<class Ordinal>
00116   class MaxLoc<Ordinal, double, int> :
00117     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00118   public:
00119     void
00120     reduce (const Ordinal count,
00121             const std::pair<double, int> inBuffer[],
00122             std::pair<double, int> inoutBuffer[]) const
00123     {
00124       for (Ordinal ind = 0; ind < count; ++ind) {
00125         const std::pair<double, int>& in = inBuffer[ind];
00126         std::pair<double, int>& inout = inoutBuffer[ind];
00127 
00128         if (in.first > inout.first) {
00129           inout.first = in.first;
00130           inout.second = in.second;
00131         } else if (in.first < inout.first) {
00132           // Don't need to do anything; inout has the values.
00133         } else { // equal, or at least one is NaN.
00134           inout.first = in.first;
00135           inout.second = std::min (in.second, inout.second);
00136         }
00137       }
00138     }
00139   };
00140 
00167   template<class Ordinal, class ScalarType, class IndexType>
00168   class MinLoc :
00169     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00170   public:
00171     void
00172     reduce (const Ordinal count,
00173             const std::pair<ScalarType, IndexType> inBuffer[],
00174             std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00175   };
00176 
00177   template<class Ordinal>
00178   class MinLoc<Ordinal, double, int> :
00179     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00180   public:
00181     void
00182     reduce (const Ordinal count,
00183             const std::pair<double, int> inBuffer[],
00184             std::pair<double, int> inoutBuffer[]) const
00185     {
00186       for (Ordinal ind = 0; ind < count; ++ind) {
00187         const std::pair<double, int>& in = inBuffer[ind];
00188         std::pair<double, int>& inout = inoutBuffer[ind];
00189 
00190         if (in.first < inout.first) {
00191           inout.first = in.first;
00192           inout.second = in.second;
00193         } else if (in.first > inout.first) {
00194           // Don't need to do anything; inout has the values.
00195         } else { // equal, or at least one is NaN.
00196           inout.first = in.first;
00197           inout.second = std::min (in.second, inout.second);
00198         }
00199       }
00200     }
00201   };
00202 
00203   // Typedef used internally by TimeMonitor::summarize() and its
00204   // helper functions.  The map is keyed on timer label (a string).
00205   // Each value is a pair: (total number of seconds over all calls to
00206   // that timer, total number of calls to that timer).
00207   typedef std::map<std::string, std::pair<double, int> > timer_map_t;
00208 
00209   TimeMonitor::TimeMonitor (Time& timer, bool reset)
00210     : PerformanceMonitorBase<Time>(timer, reset)
00211   {
00212     if (!isRecursiveCall()) counter().start(reset);
00213   }
00214 
00215   TimeMonitor::~TimeMonitor() {
00216     if (!isRecursiveCall()) counter().stop();
00217   }
00218 
00219   void
00220   TimeMonitor::zeroOutTimers()
00221   {
00222     const Array<RCP<Time> > timers = counters();
00223 
00224     // In debug mode, loop first to check whether any of the timers
00225     // are running, before resetting them.  This ensures that this
00226     // method satisfies the strong exception guarantee (either it
00227     // completes normally, or there are no side effects).
00228 #ifdef TEUCHOS_DEBUG
00229     typedef Array<RCP<Time> >::size_type size_type;
00230     const size_type numTimers = timers.size();
00231     for (size_type i = 0; i < numTimers; ++i) {
00232       Time &timer = *timers[i];
00233       // We throw a runtime_error rather than a logic_error, because
00234       // logic_error suggests a bug in the implementation of
00235       // TimeMonitor.  Calling zeroOutTimers() when a timer is
00236       // running is not TimeMonitor's fault.
00237       TEUCHOS_TEST_FOR_EXCEPTION(timer.isRunning(), std::runtime_error,
00238                                  "The timer i = " << i << " with name \""
00239                                  << timer.name() << "\" is currently running and may not "
00240                                  "be reset.");
00241     }
00242 #endif // TEUCHOS_DEBUG
00243 
00244     for (Array<RCP<Time> >::const_iterator it = timers.begin();
00245          it != timers.end(); ++it) {
00246       (*it)->reset ();
00247     }
00248   }
00249 
00250   // An anonymous namespace is the standard way of limiting linkage of
00251   // its contained routines to file scope.
00252   namespace {
00253     // \brief Return an "empty" local timer datum.
00254     //
00255     // "Empty" means the datum has zero elapsed time and zero call
00256     // count.  This function does not actually create a timer.
00257     //
00258     // \param name The timer's name.
00259     std::pair<std::string, std::pair<double, int> >
00260     makeEmptyTimerDatum (const std::string& name)
00261     {
00262       return std::make_pair (name, std::make_pair (double(0), int(0)));
00263     }
00264 
00265     // \fn collectLocalTimerData
00266     // \brief Collect and sort local timer data by timer names.
00267     //
00268     // \param localData [out] Map whose keys are the timer names, and
00269     //   whose value for each key is the total elapsed time (in
00270     //   seconds) and the call count for the timer with that name.
00271     //
00272     // \param localCounters [in] Timers from which to extract data.
00273     //
00274     // \param filter [in] Filter for timer labels.  If filter is not
00275     //   empty, this method will only collect data for local timers
00276     //   whose labels begin with this string.
00277     //
00278     // Extract the total elapsed time and call count from each timer
00279     // in the given array.  Merge results for timers with duplicate
00280     // labels, by summing their total elapsed times and call counts
00281     // pairwise.
00282     void
00283     collectLocalTimerData (timer_map_t& localData,
00284                            ArrayView<const RCP<Time> > localCounters,
00285                            const std::string& filter="")
00286     {
00287       using std::make_pair;
00288       typedef timer_map_t::const_iterator const_iter_t;
00289       typedef timer_map_t::iterator iter_t;
00290 
00291       timer_map_t theLocalData;
00292       for (ArrayView<const RCP<Time> >::const_iterator it = localCounters.begin();
00293            it != localCounters.end(); ++it) {
00294         const std::string& name = (*it)->name();
00295 
00296         // Filter current timer name, if provided filter is nonempty.
00297         // Filter string must _start_ the timer label, not just be in it.
00298         const bool skipThisOne = (filter != "" && name.find (filter) != 0);
00299         if (! skipThisOne) {
00300           const double timing = (*it)->totalElapsedTime();
00301           const int numCalls = (*it)->numCalls();
00302 
00303           // Merge timers with duplicate labels, by summing their
00304           // total elapsed times and call counts.
00305           iter_t loc = theLocalData.find (name);
00306           if (loc == theLocalData.end()) {
00307             // Use loc as an insertion location hint.
00308             theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
00309           }
00310           else {
00311             loc->second.first += timing;
00312             loc->second.second += numCalls;
00313           }
00314         }
00315       }
00316       // This avoids copying the map, and also makes this method
00317       // satisfy the strong exception guarantee.
00318       localData.swap (theLocalData);
00319     }
00320 
00321     // \brief Locally filter out timer data with zero call counts.
00322     //
00323     // \param timerData [in/out]
00324     void
00325     filterZeroData (timer_map_t& timerData)
00326     {
00327       timer_map_t newTimerData;
00328       for (timer_map_t::const_iterator it = timerData.begin();
00329            it != timerData.end(); ++it) {
00330         if (it->second.second > 0) {
00331           newTimerData[it->first] = it->second;
00332         }
00333       }
00334       timerData.swap (newTimerData);
00335     }
00336 
00358     void
00359     collectLocalTimerDataAndNames (timer_map_t& localTimerData,
00360                                    Array<std::string>& localTimerNames,
00361                                    ArrayView<const RCP<Time> > localTimers,
00362                                    const bool writeZeroTimers,
00363                                    const std::string& filter="")
00364     {
00365       // Collect and sort local timer data by timer names.
00366       collectLocalTimerData (localTimerData, localTimers, filter);
00367 
00368       // Filter out zero data locally first.  This ensures that if we
00369       // are writing global stats, and if a timer name exists in the
00370       // set of global names, then that timer has a nonzero call count
00371       // on at least one MPI process.
00372       if (! writeZeroTimers) {
00373         filterZeroData (localTimerData);
00374       }
00375 
00376       // Extract the set of local timer names.  The std::map keeps
00377       // them sorted alphabetically.
00378       localTimerNames.reserve (localTimerData.size());
00379       for (timer_map_t::const_iterator it = localTimerData.begin();
00380            it != localTimerData.end(); ++it) {
00381         localTimerNames.push_back (it->first);
00382       }
00383     }
00384 
00419     void
00420     collectGlobalTimerData (timer_map_t& globalTimerData,
00421                             Array<std::string>& globalTimerNames,
00422                             timer_map_t& localTimerData,
00423                             Array<std::string>& localTimerNames,
00424                             Ptr<const Comm<int> > comm,
00425                             const bool alwaysWriteLocal,
00426                             const ECounterSetOp setOp)
00427     {
00428       // There may be some global timers that are not local timers on
00429       // the calling MPI process(es).  In that case, if
00430       // alwaysWriteLocal is true, then we need to fill in the
00431       // "missing" local timers.  That will ensure that both global
00432       // and local timer columns in the output table have the same
00433       // number of rows.  The collectLocalTimerDataAndNames() method
00434       // may have already filtered out local timers with zero call
00435       // counts (if its writeZeroTimers argument was false), but we
00436       // won't be filtering again.  Thus, any local timer data we
00437       // insert here won't get filtered out.
00438       //
00439       // Note that calling summarize() with writeZeroTimers == false
00440       // will still do what it says, even if we insert local timers
00441       // with zero call counts here.
00442 
00443       // This does the correct and inexpensive thing (just copies the
00444       // timer data) if numProcs == 1.  Otherwise, it initiates a
00445       // communication with \f$O(\log P)\f$ messages along the
00446       // critical path, where \f$P\f$ is the number of participating
00447       // processes.
00448       mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
00449 
00450 #ifdef TEUCHOS_DEBUG
00451       {
00452         // Sanity check that all processes have the name number of
00453         // global timer names.
00454         const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
00455         timer_map_t::size_type minNumGlobalNames = 0;
00456         timer_map_t::size_type maxNumGlobalNames = 0;
00457         reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
00458                    outArg (minNumGlobalNames));
00459         reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
00460                    outArg (maxNumGlobalNames));
00461         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
00462           std::logic_error, "Min # global timer names = " << minNumGlobalNames
00463           << " != max # global timer names = " << maxNumGlobalNames
00464           << ".  Please report this bug to the Teuchos developers.");
00465         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
00466           std::logic_error, "My # global timer names = " << myNumGlobalNames
00467           << " != min # global timer names = " << minNumGlobalNames
00468           << ".  Please report this bug to the Teuchos developers.");
00469       }
00470 #endif // TEUCHOS_DEBUG
00471 
00472       // mergeCounterNames() just merges the counters' names, not
00473       // their actual data.  Now we need to fill globalTimerData with
00474       // this process' timer data for the timers in globalTimerNames.
00475       //
00476       // All processes need the full list of global timers, since
00477       // there may be some global timers that are not local timers.
00478       // That's why mergeCounterNames() has to be an all-reduce, not
00479       // just a reduction to Proc 0.
00480       //
00481       // Insertion optimization: if the iterator given to map::insert
00482       // points right before where we want to insert, insertion is
00483       // O(1).  globalTimerNames is sorted, so feeding the iterator
00484       // output of map::insert into the next invocation's input should
00485       // make the whole insertion O(N) where N is the number of
00486       // entries in globalTimerNames.
00487       timer_map_t::iterator globalMapIter = globalTimerData.begin();
00488       timer_map_t::iterator localMapIter;
00489       for (Array<string>::const_iterator it = globalTimerNames.begin();
00490            it != globalTimerNames.end(); ++it) {
00491         const std::string& globalName = *it;
00492         localMapIter = localTimerData.find (globalName);
00493 
00494         if (localMapIter == localTimerData.end()) {
00495           if (alwaysWriteLocal) {
00496             // If there are some global timers that are not local
00497             // timers, and if we want to print local timers, we insert
00498             // a local timer datum with zero elapsed time and zero
00499             // call count into localTimerData as well.  This will
00500             // ensure that both global and local timer columns in the
00501             // output table have the same number of rows.
00502             //
00503             // We really only need to do this on Proc 0, which is the
00504             // only process that currently may print local timers.
00505             // However, we do it on all processes, just in case
00506             // someone later wants to modify this function to print
00507             // out local timer data for some process other than Proc
00508             // 0.  This extra computation won't affect the cost along
00509             // the critical path, for future computations in which
00510             // Proc 0 participates.
00511             localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
00512 
00513             // Make sure the missing global name gets added to the
00514             // list of local names.  We'll re-sort the list of local
00515             // names below.
00516             localTimerNames.push_back (globalName);
00517           }
00518           // There's a global timer that's not a local timer.  Add it
00519           // to our pre-merge version of the global timer data so that
00520           // we can safely merge the global timer data later.
00521           globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
00522         }
00523         else {
00524           // We have this global timer name in our local timer list.
00525           // Fill in our pre-merge version of the global timer data
00526           // with our local data.
00527           globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
00528         }
00529       }
00530 
00531       if (alwaysWriteLocal) {
00532         // Re-sort the list of local timer names, since we may have
00533         // inserted "missing" names above.
00534         std::sort (localTimerNames.begin(), localTimerNames.end());
00535       }
00536 
00537 #ifdef TEUCHOS_DEBUG
00538       {
00539         // Sanity check that all processes have the name number of
00540         // global timers.
00541         const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
00542         timer_map_t::size_type minNumGlobalTimers = 0;
00543         timer_map_t::size_type maxNumGlobalTimers = 0;
00544         reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
00545                    outArg (minNumGlobalTimers));
00546         reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
00547                    outArg (maxNumGlobalTimers));
00548         TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
00549                                    std::logic_error, "Min # global timers = " << minNumGlobalTimers
00550                                    << " != max # global timers = " << maxNumGlobalTimers
00551                                    << ".  Please report this bug to the Teuchos developers.");
00552         TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
00553                                    std::logic_error, "My # global timers = " << myNumGlobalTimers
00554                                    << " != min # global timers = " << minNumGlobalTimers
00555                                    << ".  Please report this bug to the Teuchos developers.");
00556       }
00557 #endif // TEUCHOS_DEBUG
00558     }
00559 
00597     void
00598     computeGlobalTimerStats (stat_map_type& statData,
00599                              std::vector<std::string>& statNames,
00600                              Ptr<const Comm<int> > comm,
00601                              const timer_map_t& globalTimerData)
00602     {
00603       using Teuchos::ScalarTraits;
00604 
00605       const int numTimers = static_cast<int> (globalTimerData.size());
00606       const int numProcs = comm->getSize();
00607 
00608       // Extract pre-reduction timings and call counts into a
00609       // sequential array.  This array will be in the same order as
00610       // the global timer names are in the map.
00611       Array<std::pair<double, int> > timingsAndCallCounts;
00612       timingsAndCallCounts.reserve (numTimers);
00613       for (timer_map_t::const_iterator it = globalTimerData.begin();
00614            it != globalTimerData.end(); ++it) {
00615         timingsAndCallCounts.push_back (it->second);
00616       }
00617 
00618       // For each timer name, compute the min timing and its
00619       // corresponding call count.  If two processes have the same
00620       // timing but different call counts, the minimum call count will
00621       // be used.
00622       Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
00623       if (numTimers > 0) {
00624         reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
00625                    &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
00626       }
00627 
00628       // For each timer name, compute the max timing and its
00629       // corresponding call count.  If two processes have the same
00630       // timing but different call counts, the minimum call count will
00631       // be used.
00632       Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
00633       if (numTimers > 0) {
00634         reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
00635                    &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
00636       }
00637 
00638       // For each timer name, compute the mean-over-processes timing,
00639       // the mean call count, and the mean-over-call-counts timing.
00640       // The mean call count is reported as a double to allow a
00641       // fractional value.
00642       //
00643       // Each local timing is really the total timing over all local
00644       // invocations.  The number of local invocations is the call
00645       // count.  Thus, the mean-over-call-counts timing is the sum of
00646       // all the timings (over all processes), divided by the sum of
00647       // all the call counts (over all processes).  We compute it in a
00648       // different way to over unnecessary overflow.
00649       Array<double> meanOverCallCountsTimings (numTimers);
00650       Array<double> meanOverProcsTimings (numTimers);
00651       Array<double> meanCallCounts (numTimers);
00652       {
00653         // When summing, first scale by the number of processes.  This
00654         // avoids unnecessary overflow, and also gives us the mean
00655         // call count automatically.
00656         Array<double> scaledTimings (numTimers);
00657         Array<double> scaledCallCounts (numTimers);
00658         const double P = static_cast<double> (numProcs);
00659         for (int k = 0; k < numTimers; ++k) {
00660           const double timing = timingsAndCallCounts[k].first;
00661           const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
00662 
00663           scaledTimings[k] = timing / P;
00664           scaledCallCounts[k] = callCount / P;
00665         }
00666         if (numTimers > 0) {
00667           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
00668                      &meanOverProcsTimings[0]);
00669           reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
00670                      &meanCallCounts[0]);
00671         }
00672         // We don't have to undo the scaling for the mean timings;
00673         // just divide by the scaled call count.
00674         for (int k = 0; k < numTimers; ++k) {
00675           if ( meanCallCounts[k] > ScalarTraits<double>::zero() ) {
00676             meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
00677           }
00678           else {
00679             meanOverCallCountsTimings[k] = ScalarTraits<double>::zero();
00680           }
00681         }
00682       }
00683 
00684       // Reformat the data into the map of statistics.  Be sure that
00685       // each value (the std::vector of (timing, call count) pairs,
00686       // each entry of which is a different statistic) preserves the
00687       // order of statNames.
00688       statNames.resize (4);
00689       statNames[0] = "MinOverProcs";
00690       statNames[1] = "MeanOverProcs";
00691       statNames[2] = "MaxOverProcs";
00692       statNames[3] = "MeanOverCallCounts";
00693 
00694       stat_map_type::iterator statIter = statData.end();
00695       timer_map_t::const_iterator it = globalTimerData.begin();
00696       for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
00697         std::vector<std::pair<double, double> > curData (4);
00698         curData[0] = minTimingsAndCallCounts[k];
00699         curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
00700         curData[2] = maxTimingsAndCallCounts[k];
00701         curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
00702 
00703         // statIter gives an insertion location hint that makes each
00704         // insertion O(1), since we remember the location of the last
00705         // insertion.
00706         statIter = statData.insert (statIter, std::make_pair (it->first, curData));
00707       }
00708     }
00709 
00710 
00727     RCP<const Comm<int> >
00728     getDefaultComm ()
00729     {
00730       // The default communicator.  If Trilinos was built with MPI
00731       // enabled, this should be MPI_COMM_WORLD.  (If MPI has not yet
00732       // been initialized, it's not valid to use the communicator!)
00733       // Otherwise, this should be a "serial" (no MPI, one "process")
00734       // communicator.
00735       RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
00736 
00737 #ifdef HAVE_MPI
00738       {
00739         int mpiHasBeenStarted = 0;
00740         MPI_Initialized (&mpiHasBeenStarted);
00741         if (! mpiHasBeenStarted) {
00742           // Make pComm a new "serial communicator."
00743           comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
00744         }
00745       }
00746 #endif // HAVE_MPI
00747       return comm;
00748     }
00749 
00750   } // namespace (anonymous)
00751 
00752 
00753   void
00754   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00755                                              std::vector<std::string>& statNames,
00756                                              Ptr<const Comm<int> > comm,
00757                                              const ECounterSetOp setOp,
00758                                              const std::string& filter)
00759   {
00760     // Collect local timer data and names.  Filter out timers with
00761     // zero call counts if writeZeroTimers is false.  Also, apply the
00762     // timer label filter at this point, so we don't have to compute
00763     // statistics on timers we don't want to display anyway.
00764     timer_map_t localTimerData;
00765     Array<std::string> localTimerNames;
00766     const bool writeZeroTimers = false;
00767     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00768                                    counters(), writeZeroTimers, filter);
00769     // Merge the local timer data and names into global timer data and
00770     // names.
00771     timer_map_t globalTimerData;
00772     Array<std::string> globalTimerNames;
00773     const bool alwaysWriteLocal = false;
00774     collectGlobalTimerData (globalTimerData, globalTimerNames,
00775                             localTimerData, localTimerNames,
00776                             comm, alwaysWriteLocal, setOp);
00777     // Compute statistics on the data.
00778     computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00779   }
00780 
00781 
00782   void
00783   TimeMonitor::summarize (Ptr<const Comm<int> > comm,
00784                           std::ostream& out,
00785                           const bool alwaysWriteLocal,
00786                           const bool writeGlobalStats,
00787                           const bool writeZeroTimers,
00788                           const ECounterSetOp setOp,
00789                           const std::string& filter)
00790   {
00791     //
00792     // We can't just call computeGlobalTimerStatistics(), since
00793     // summarize() has different options that affect whether global
00794     // statistics are computed and printed.
00795     //
00796     const int numProcs = comm->getSize();
00797     const int myRank = comm->getRank();
00798 
00799     // Collect local timer data and names.  Filter out timers with
00800     // zero call counts if writeZeroTimers is false.  Also, apply the
00801     // timer label filter at this point, so we don't have to compute
00802     // statistics on timers we don't want to display anyway.
00803     timer_map_t localTimerData;
00804     Array<std::string> localTimerNames;
00805     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00806                                    counters(), writeZeroTimers, filter);
00807 
00808     // If we're computing global statistics, merge the local timer
00809     // data and names into global timer data and names, and compute
00810     // global timer statistics.  Otherwise, leave the global data
00811     // empty.
00812     timer_map_t globalTimerData;
00813     Array<std::string> globalTimerNames;
00814     stat_map_type statData;
00815     std::vector<std::string> statNames;
00816     if (writeGlobalStats) {
00817       collectGlobalTimerData (globalTimerData, globalTimerNames,
00818                               localTimerData, localTimerNames,
00819                               comm, alwaysWriteLocal, setOp);
00820       // Compute statistics on the data, but only if the communicator
00821       // contains more than one process.  Otherwise, statistics don't
00822       // make sense and we don't print them (see below).
00823       if (numProcs > 1) {
00824         computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00825       }
00826     }
00827 
00828     // Precision of floating-point numbers in the table.
00829     const int precision = format().precision();
00830 
00831     // All columns of the table, in order.
00832     Array<TableColumn> tableColumns;
00833 
00834     // Labels of all the columns of the table.
00835     // We will append to this when we add each column.
00836     Array<std::string> titles;
00837 
00838     // Widths (in number of characters) of each column.
00839     // We will append to this when we add each column.
00840     Array<int> columnWidths;
00841 
00842     // Table column containing all timer names.  If writeGlobalStats
00843     // is true, we use the global timer names, otherwise we use the
00844     // local timer names.  We build the table on all processes
00845     // redundantly, but only print on Rank 0.
00846     {
00847       titles.append ("Timer Name");
00848 
00849       // The column labels depend on whether we are computing global statistics.
00850       TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
00851       tableColumns.append (nameCol);
00852 
00853       // Each column is as wide as it needs to be to hold both its
00854       // title and all of the column data.  This column's title is the
00855       // current last entry of the titles array.
00856       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
00857     }
00858 
00859     // Table column containing local timer stats, if applicable.  We
00860     // only write local stats if asked, only on MPI Proc 0, and only
00861     // if there is more than one MPI process in the communicator
00862     // (otherwise local stats == global stats, so we just print the
00863     // global stats).  In this case, we've padded the local data on
00864     // Proc 0 if necessary to match the global timer list, so that the
00865     // columns have the same number of rows.
00866     if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
00867       titles.append ("Local time (num calls)");
00868 
00869       // Copy local timer data out of the array-of-structs into
00870       // separate arrays, for display in the table.
00871       Array<double> localTimings;
00872       Array<double> localNumCalls;
00873       for (timer_map_t::const_iterator it = localTimerData.begin();
00874            it != localTimerData.end(); ++it) {
00875         localTimings.push_back (it->second.first);
00876         localNumCalls.push_back (static_cast<double> (it->second.second));
00877       }
00878       TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
00879       tableColumns.append (timeAndCalls);
00880       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00881     }
00882 
00883     if (writeGlobalStats) {
00884       // If there's only 1 process in the communicator, don't display
00885       // statistics; statistics don't make sense in that case.  Just
00886       // display the timings and call counts.  If there's more than 1
00887       // process, do display statistics.
00888       if (numProcs == 1) {
00889         // Extract timings and the call counts from globalTimerData.
00890         Array<double> globalTimings;
00891         Array<double> globalNumCalls;
00892         for (timer_map_t::const_iterator it = globalTimerData.begin();
00893              it != globalTimerData.end(); ++it) {
00894           globalTimings.push_back (it->second.first);
00895           globalNumCalls.push_back (static_cast<double> (it->second.second));
00896         }
00897         // Print the table column.
00898         titles.append ("Global time (num calls)");
00899         TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
00900         tableColumns.append (timeAndCalls);
00901         columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00902       }
00903       else { // numProcs > 1
00904         // Print a table column for each statistic.  statNames and
00905         // each value in statData use the same ordering, so we can
00906         // iterate over valid indices of statNames to display the
00907         // statistics in the right order.
00908         const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
00909         for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
00910           // Extract lists of timings and their call counts for the
00911           // current statistic.
00912           Array<double> statTimings (numGlobalTimers);
00913           Array<double> statCallCounts (numGlobalTimers);
00914           stat_map_type::const_iterator it = statData.begin();
00915           for (int k = 0; it != statData.end(); ++it, ++k) {
00916             statTimings[k] = (it->second[statInd]).first;
00917             statCallCounts[k] = (it->second[statInd]).second;
00918           }
00919           // Print the table column.
00920           const std::string& statisticName = statNames[statInd];
00921           const std::string titleString = statisticName;
00922           titles.append (titleString);
00923           TableColumn timeAndCalls (statTimings, statCallCounts, precision, true);
00924           tableColumns.append (timeAndCalls);
00925           columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00926         }
00927       }
00928     }
00929 
00930     // Print the whole table to the given output stream on MPI Rank 0.
00931     format().setColumnWidths (columnWidths);
00932     if (myRank == 0) {
00933       std::ostringstream theTitle;
00934       theTitle << "TimeMonitor results over " << numProcs << " processor"
00935                << (numProcs > 1 ? "s" : "");
00936       format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
00937     }
00938   }
00939 
00940   void
00941   TimeMonitor::summarize (std::ostream &out,
00942                           const bool alwaysWriteLocal,
00943                           const bool writeGlobalStats,
00944                           const bool writeZeroTimers,
00945                           const ECounterSetOp setOp,
00946                           const std::string& filter)
00947   {
00948     // The default communicator.  If Trilinos was built with MPI
00949     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00950     // be a "serial" (no MPI, one "process") communicator.
00951     RCP<const Comm<int> > comm = getDefaultComm();
00952 
00953     summarize (comm.ptr(), out, alwaysWriteLocal,
00954                writeGlobalStats, writeZeroTimers, setOp, filter);
00955   }
00956 
00957   void
00958   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00959                                              std::vector<std::string>& statNames,
00960                                              const ECounterSetOp setOp,
00961                                              const std::string& filter)
00962   {
00963     // The default communicator.  If Trilinos was built with MPI
00964     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00965     // be a "serial" (no MPI, one "process") communicator.
00966     RCP<const Comm<int> > comm = getDefaultComm();
00967 
00968     computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
00969   }
00970 
00971 
00972   namespace {
00996     std::string
00997     quoteLabelForYaml (const std::string& label)
00998     {
00999       // YAML allows empty keys in key: value pairs.  See Section 7.2
01000       // of the YAML 1.2 spec.  We thus let an empty label pass
01001       // through without quoting or other special treatment.
01002       if (label.empty ()) {
01003         return label;
01004       }
01005 
01006       // Check whether the label is already quoted.  If so, we don't
01007       // need to quote it again.  However, we do need to quote any
01008       // quote symbols in the string inside the outer quotes.
01009       const bool alreadyQuoted = label.size () >= 2 &&
01010         label[0] == '"' && label[label.size() - 1] == '"';
01011 
01012       // We need to quote if there are any colons or (inner) quotes in
01013       // the string.  We'll determine this as we read through the
01014       // string and escape any characters that need escaping.
01015       bool needToQuote = false;
01016 
01017       std::string out; // To fill with the return value
01018       out.reserve (label.size ());
01019 
01020       const size_t startPos = alreadyQuoted ? 1 : 0;
01021       const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
01022       for (size_t i = startPos; i < endPos; ++i) {
01023         const char c = label[i];
01024         if (c == '"' || c == '\\') {
01025           out.push_back ('\\'); // Escape the quote or backslash.
01026           needToQuote = true;
01027         }
01028         else if (c == ':') {
01029           needToQuote = true;
01030         }
01031         out.push_back (c);
01032       }
01033 
01034       if (needToQuote || alreadyQuoted) {
01035         // If the input string was already quoted, then out doesn't
01036         // include its quotes, so we have to add them back in.
01037         return "\"" + out + "\"";
01038       }
01039       else {
01040         return out;
01041       }
01042     }
01043 
01044   } // namespace (anonymous)
01045 
01046 
01047   void TimeMonitor::
01048   summarizeToYaml (Ptr<const Comm<int> > comm,
01049                    std::ostream &out,
01050                    const ETimeMonitorYamlFormat yamlStyle,
01051                    const std::string& filter)
01052   {
01053     using Teuchos::FancyOStream;
01054     using Teuchos::fancyOStream;
01055     using Teuchos::getFancyOStream;
01056     using Teuchos::OSTab;
01057     using Teuchos::RCP;
01058     using Teuchos::rcpFromRef;
01059     using std::endl;
01060     typedef std::vector<std::string>::size_type size_type;
01061 
01062     const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
01063 
01064     // const bool writeGlobalStats = true;
01065     // const bool writeZeroTimers = true;
01066     // const bool alwaysWriteLocal = false;
01067     const ECounterSetOp setOp = Intersection;
01068 
01069     stat_map_type statData;
01070     std::vector<std::string> statNames;
01071     computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
01072 
01073     const int numProcs = comm->getSize();
01074 
01075     // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
01076     // " as the line prefix does not work, else I would prefer that
01077     // method for printing each line of a YAML block sequence (see
01078     // Section 8.2.1 of the YAML 1.2 spec).
01079     //
01080     // Also, I have to set the tab indent string here, rather than in
01081     // OSTab's constructor.  This is because line prefix (which for
01082     // some reason is what OSTab's constructor takes, rather than tab
01083     // indent string) means something different from tab indent
01084     // string, and turning on the line prefix prints all sorts of
01085     // things including "|" for some reason.
01086     RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
01087     pfout->setTabIndentStr ("  ");
01088     FancyOStream& fout = *pfout;
01089 
01090     fout << "# Teuchos::TimeMonitor report" << endl
01091          << "---" << endl;
01092 
01093     // mfh 19 Aug 2012: An important goal of our chosen output format
01094     // was to minimize the nesting depth.  We have managed to keep the
01095     // nesting depth to 3, which is the limit that the current version
01096     // of PylotDB imposes for its YAML input.
01097 
01098     // Outermost level is a dictionary.  (Individual entries of a
01099     // dictionary do _not_ begin with "- ".)  We always print the
01100     // outermost level in standard style, not flow style, for better
01101     // readability.  We begin the outermost level with metadata.
01102     fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
01103          << "Number of processes: " << numProcs << endl
01104          << "Time unit: s" << endl;
01105     // For a key: value pair where the value is a sequence or
01106     // dictionary on the following line, YAML requires a space after
01107     // the colon.
01108     fout << "Statistics collected: ";
01109     // Print list of the names of all the statistics we collected.
01110     if (compact) {
01111       fout << " [";
01112       for (size_type i = 0; i < statNames.size (); ++i) {
01113         fout << quoteLabelForYaml (statNames[i]);
01114         if (i + 1 < statNames.size ()) {
01115           fout << ", ";
01116         }
01117       }
01118       fout << "]" << endl;
01119     }
01120     else {
01121       fout << endl;
01122       OSTab tab1 (pfout);
01123       for (size_type i = 0; i < statNames.size (); ++i) {
01124         fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
01125       }
01126     }
01127 
01128     // Print the list of timer names.
01129     //
01130     // It might be nicer instead to print a map from timer name to all
01131     // of its data, but keeping the maximum nesting depth small
01132     // ensures better compatibility with different parsing tools.
01133     fout << "Timer names: ";
01134     if (compact) {
01135       fout << " [";
01136       size_type ind = 0;
01137       for (stat_map_type::const_iterator it = statData.begin();
01138            it != statData.end(); ++it, ++ind) {
01139         fout << quoteLabelForYaml (it->first);
01140         if (ind + 1 < statData.size ()) {
01141           fout << ", ";
01142         }
01143       }
01144       fout << "]" << endl;
01145     }
01146     else {
01147       fout << endl;
01148       OSTab tab1 (pfout);
01149       for (stat_map_type::const_iterator it = statData.begin();
01150            it != statData.end(); ++it) {
01151         fout << "- " << quoteLabelForYaml (it->first) << endl;
01152       }
01153     }
01154 
01155     // Print times for each timer, as a map from statistic name to its time.
01156     fout << "Total times: ";
01157     if (compact) {
01158       fout << " {";
01159       size_type outerInd = 0;
01160       for (stat_map_type::const_iterator outerIter = statData.begin();
01161            outerIter != statData.end(); ++outerIter, ++outerInd) {
01162         // Print timer name.
01163         fout << quoteLabelForYaml (outerIter->first) << ": ";
01164         // Print that timer's data.
01165         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01166         fout << "{";
01167         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01168           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01169                << curData[innerInd].first;
01170           if (innerInd + 1 < curData.size ()) {
01171             fout << ", ";
01172           }
01173         }
01174         fout << "}";
01175         if (outerInd + 1 < statData.size ()) {
01176           fout << ", ";
01177         }
01178       }
01179       fout << "}" << endl;
01180     }
01181     else {
01182       fout << endl;
01183       OSTab tab1 (pfout);
01184       size_type outerInd = 0;
01185       for (stat_map_type::const_iterator outerIter = statData.begin();
01186            outerIter != statData.end(); ++outerIter, ++outerInd) {
01187         // Print timer name.
01188         fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
01189         // Print that timer's data.
01190         OSTab tab2 (pfout);
01191         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01192         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01193           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01194                << curData[innerInd].first << endl;
01195         }
01196       }
01197     }
01198 
01199     // Print call counts for each timer, for each statistic name.
01200     fout << "Call counts:";
01201     if (compact) {
01202       fout << " {";
01203       size_type outerInd = 0;
01204       for (stat_map_type::const_iterator outerIter = statData.begin();
01205            outerIter != statData.end(); ++outerIter, ++outerInd) {
01206         // Print timer name.
01207         fout << quoteLabelForYaml (outerIter->first) << ": ";
01208         // Print that timer's data.
01209         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01210         fout << "{";
01211         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01212           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01213                << curData[innerInd].second;
01214           if (innerInd + 1 < curData.size ()) {
01215             fout << ", ";
01216           }
01217         }
01218         fout << "}";
01219         if (outerInd + 1 < statData.size ()) {
01220           fout << ", ";
01221         }
01222       }
01223       fout << "}" << endl;
01224     }
01225     else {
01226       fout << endl;
01227       OSTab tab1 (pfout);
01228       size_type outerInd = 0;
01229       for (stat_map_type::const_iterator outerIter = statData.begin();
01230            outerIter != statData.end(); ++outerIter, ++outerInd) {
01231         // Print timer name.
01232         fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
01233         // Print that timer's data.
01234         OSTab tab2 (pfout);
01235         const std::vector<std::pair<double, double> >& curData = outerIter->second;
01236         for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
01237           fout << quoteLabelForYaml (statNames[innerInd]) << ": "
01238                << curData[innerInd].second << endl;
01239         }
01240       }
01241     }
01242   }
01243 
01244   void TimeMonitor::
01245   summarizeToYaml (std::ostream &out,
01246                    const ETimeMonitorYamlFormat yamlStyle,
01247                    const std::string& filter)
01248   {
01249     // The default communicator.  If Trilinos was built with MPI
01250     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
01251     // be a "serial" (no MPI, one "process") communicator.
01252     RCP<const Comm<int> > comm = getDefaultComm ();
01253 
01254     summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
01255   }
01256 
01257   // Default value is false.  We'll set to true once
01258   // setReportParameters() completes successfully.
01259   bool TimeMonitor::setParams_ = false;
01260 
01261   // We have to declare all of these here in order to avoid linker errors.
01262   TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
01263   TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
01264   ECounterSetOp TimeMonitor::setOp_ = Intersection;
01265   bool TimeMonitor::alwaysWriteLocal_ = false;
01266   bool TimeMonitor::writeGlobalStats_ = true;
01267   bool TimeMonitor::writeZeroTimers_ = true;
01268 
01269   void
01270   TimeMonitor::setReportFormatParameter (ParameterList& plist)
01271   {
01272     const std::string name ("Report format");
01273     const std::string defaultValue ("Table");
01274     const std::string docString ("Output format for report of timer statistics");
01275     Array<std::string> strings;
01276     Array<std::string> docs;
01277     Array<ETimeMonitorReportFormat> values;
01278 
01279     strings.push_back ("YAML");
01280     docs.push_back ("YAML (see yaml.org) format");
01281     values.push_back (REPORT_FORMAT_YAML);
01282     strings.push_back ("Table");
01283     docs.push_back ("Tabular format via Teuchos::TableFormat");
01284     values.push_back (REPORT_FORMAT_TABLE);
01285 
01286     setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
01287                                                             docString,
01288                                                             strings (), docs (),
01289                                                             values (), &plist);
01290   }
01291 
01292   void
01293   TimeMonitor::setYamlFormatParameter (ParameterList& plist)
01294   {
01295     const std::string name ("YAML style");
01296     const std::string defaultValue ("spacious");
01297     const std::string docString ("YAML-specific output format");
01298     Array<std::string> strings;
01299     Array<std::string> docs;
01300     Array<ETimeMonitorYamlFormat> values;
01301 
01302     strings.push_back ("compact");
01303     docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
01304                     "yaml.org) for most sequences except the outermost sequence");
01305     values.push_back (YAML_FORMAT_COMPACT);
01306 
01307     strings.push_back ("spacious");
01308     docs.push_back ("Spacious format: avoid flow style");
01309     values.push_back (YAML_FORMAT_SPACIOUS);
01310 
01311     setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
01312                                                           docString,
01313                                                           strings (), docs (),
01314                                                           values (), &plist);
01315   }
01316 
01317   void
01318   TimeMonitor::setSetOpParameter (ParameterList& plist)
01319   {
01320     const std::string name ("How to merge timer sets");
01321     const std::string defaultValue ("Intersection");
01322     const std::string docString ("How to merge differing sets of timers "
01323                                  "across processes");
01324     Array<std::string> strings;
01325     Array<std::string> docs;
01326     Array<ECounterSetOp> values;
01327 
01328     strings.push_back ("Intersection");
01329     docs.push_back ("Compute intersection of timer sets over processes");
01330     values.push_back (Intersection);
01331     strings.push_back ("Union");
01332     docs.push_back ("Compute union of timer sets over processes");
01333     values.push_back (Union);
01334 
01335     setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
01336                                                  strings (), docs (), values (),
01337                                                  &plist);
01338   }
01339 
01340   RCP<const ParameterList>
01341   TimeMonitor::getValidReportParameters ()
01342   {
01343     // Our implementation favors recomputation over persistent
01344     // storage.  That is, we simply recreate the list every time we
01345     // need it.
01346     RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
01347 
01348     const bool alwaysWriteLocal = false;
01349     const bool writeGlobalStats = true;
01350     const bool writeZeroTimers = true;
01351 
01352     setReportFormatParameter (*plist);
01353     setYamlFormatParameter (*plist);
01354     setSetOpParameter (*plist);
01355     plist->set ("alwaysWriteLocal", alwaysWriteLocal,
01356                 "Always output local timers' values on Proc 0");
01357     plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
01358                 "statistics, even if there is only one process in the "
01359                 "communicator");
01360     plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
01361                 "timers that have never been called");
01362 
01363     return rcp_const_cast<const ParameterList> (plist);
01364   }
01365 
01366   void
01367   TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
01368   {
01369     ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
01370     ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
01371     ECounterSetOp setOp = Intersection;
01372     bool alwaysWriteLocal = false;
01373     bool writeGlobalStats = true;
01374     bool writeZeroTimers = true;
01375 
01376     if (params.is_null ()) {
01377       // If we've set parameters before, leave their current values.
01378       // Otherwise, set defaults (below).
01379       if (setParams_) {
01380         return;
01381       }
01382     }
01383     else { // params is nonnull.  Let's read it!
01384       params->validateParametersAndSetDefaults (*getValidReportParameters ());
01385 
01386       reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
01387       yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
01388       setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
01389       alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
01390       writeGlobalStats = params->get<bool> ("writeGlobalStats");
01391       writeZeroTimers = params->get<bool> ("writeZeroTimers");
01392     }
01393     // Defer setting state until here, to ensure the strong exception
01394     // guarantee for this method (either it throws with no externally
01395     // visible state changes, or it returns normally).
01396     reportFormat_ = reportFormat;
01397     yamlStyle_ = yamlStyle;
01398     setOp_ = setOp;
01399     alwaysWriteLocal_ = alwaysWriteLocal;
01400     writeGlobalStats_ = writeGlobalStats;
01401     writeZeroTimers_ = writeZeroTimers;
01402 
01403     setParams_ = true; // Yay, we successfully set parameters!
01404   }
01405 
01406   void
01407   TimeMonitor::report (Ptr<const Comm<int> > comm,
01408                        std::ostream& out,
01409                        const std::string& filter,
01410                        const RCP<ParameterList>& params)
01411   {
01412     setReportParameters (params);
01413 
01414     if (reportFormat_ == REPORT_FORMAT_YAML) {
01415       summarizeToYaml (comm, out, yamlStyle_, filter);
01416     }
01417     else if (reportFormat_ == REPORT_FORMAT_TABLE) {
01418       summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
01419                  writeZeroTimers_, setOp_, filter);
01420     }
01421     else {
01422       TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
01423         "Invalid report format.  This should never happen; ParameterList "
01424         "validation should have caught this.  Please report this bug to the "
01425         "Teuchos developers.");
01426     }
01427   }
01428 
01429   void
01430   TimeMonitor::report (Ptr<const Comm<int> > comm,
01431                        std::ostream& out,
01432                        const RCP<ParameterList>& params)
01433   {
01434     report (comm, out, "", params);
01435   }
01436 
01437   void
01438   TimeMonitor::report (std::ostream& out,
01439                        const std::string& filter,
01440                        const RCP<ParameterList>& params)
01441   {
01442     RCP<const Comm<int> > comm = getDefaultComm ();
01443     report (comm.ptr (), out, filter, params);
01444   }
01445 
01446   void
01447   TimeMonitor::report (std::ostream& out,
01448                        const RCP<ParameterList>& params)
01449   {
01450     RCP<const Comm<int> > comm = getDefaultComm ();
01451     report (comm.ptr (), out, "", params);
01452   }
01453 
01454 } // namespace Teuchos
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines