Teuchos Package Browser (Single Doxygen Collection) Version of the Day
Teuchos_TimeMonitor.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_TimeMonitor.hpp"
00043 #include "Teuchos_CommHelpers.hpp"
00044 #include "Teuchos_DefaultComm.hpp"
00045 #include "Teuchos_TableColumn.hpp"
00046 #include "Teuchos_TableFormat.hpp"
00047 #include <functional>
00048 
00049 namespace Teuchos {
00099   template<class Ordinal, class ScalarType, class IndexType>
00100   class MaxLoc : 
00101     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00102   public:
00103     void 
00104     reduce (const Ordinal count,
00105       const std::pair<ScalarType, IndexType> inBuffer[],
00106       std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00107   };
00108 
00109   template<class Ordinal>
00110   class MaxLoc<Ordinal, double, int> :
00111     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00112   public:
00113     void 
00114     reduce (const Ordinal count,
00115       const std::pair<double, int> inBuffer[],
00116       std::pair<double, int> inoutBuffer[]) const
00117     {
00118       for (Ordinal ind = 0; ind < count; ++ind) {
00119   const std::pair<double, int>& in = inBuffer[ind];
00120   std::pair<double, int>& inout = inoutBuffer[ind];
00121 
00122   if (in.first > inout.first) {
00123     inout.first = in.first;
00124     inout.second = in.second;
00125   } else if (in.first < inout.first) {
00126     // Don't need to do anything; inout has the values.
00127   } else { // equal, or at least one is NaN.
00128     inout.first = in.first;
00129     inout.second = std::min (in.second, inout.second);
00130   }
00131       }
00132     }
00133   };
00134 
00162   template<class Ordinal, class ScalarType, class IndexType>
00163   class MinLoc : 
00164     public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
00165   public:
00166     void 
00167     reduce (const Ordinal count,
00168       const std::pair<ScalarType, IndexType> inBuffer[],
00169       std::pair<ScalarType, IndexType> inoutBuffer[]) const;
00170   };
00171 
00172   template<class Ordinal>
00173   class MinLoc<Ordinal, double, int> :
00174     public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
00175   public:
00176     void 
00177     reduce (const Ordinal count,
00178       const std::pair<double, int> inBuffer[],
00179       std::pair<double, int> inoutBuffer[]) const
00180     {
00181       for (Ordinal ind = 0; ind < count; ++ind) {
00182   const std::pair<double, int>& in = inBuffer[ind];
00183   std::pair<double, int>& inout = inoutBuffer[ind];
00184 
00185   if (in.first < inout.first) {
00186     inout.first = in.first;
00187     inout.second = in.second;
00188   } else if (in.first > inout.first) {
00189     // Don't need to do anything; inout has the values.
00190   } else { // equal, or at least one is NaN.
00191     inout.first = in.first;
00192     inout.second = std::min (in.second, inout.second);
00193   }
00194       }
00195     }
00196   };
00197 
00198   // Typedef used internally by TimeMonitor::summarize() and its
00199   // helper functions.
00200   typedef std::map<std::string, std::pair<double, int> > timer_map_t;
00201 
00202   TimeMonitor::TimeMonitor (Time& timer, bool reset) 
00203     : PerformanceMonitorBase<Time>(timer, reset)
00204   {
00205     if (!isRecursiveCall()) counter().start(reset);
00206   }
00207 
00208   TimeMonitor::~TimeMonitor() {
00209     if (!isRecursiveCall()) counter().stop();
00210   }
00211 
00212   void 
00213   TimeMonitor::zeroOutTimers()
00214   {
00215     const Array<RCP<Time> > timers = counters();
00216   
00217     // In debug mode, loop first to check whether any of the timers
00218     // are running, before resetting them.  This ensures that this
00219     // method satisfies the strong exception guarantee (either it
00220     // completes normally, or there are no side effects).
00221 #ifdef TEUCHOS_DEBUG
00222     typedef Array<RCP<Time> >::size_type size_type;
00223     const size_type numTimers = timers.size();
00224     for (size_type i = 0; i < numTimers; ++i) 
00225       {
00226   Time &timer = *timers[i];
00227   // We throw a runtime_error rather than a logic_error, because
00228   // logic_error suggests a bug in the implementation of
00229   // TimeMonitor.  Calling zeroOutTimers() when a timer is
00230   // running is not TimeMonitor's fault.
00231   TEUCHOS_TEST_FOR_EXCEPTION(timer.isRunning(), std::runtime_error,
00232          "The timer i = " << i << " with name \"" 
00233          << timer.name() << "\" is currently running and may not "
00234          "be reset.");
00235       }
00236 #endif // TEUCHOS_DEBUG
00237 
00238     for (Array<RCP<Time> >::const_iterator it = timers.begin(); 
00239    it != timers.end(); ++it)
00240       (*it)->reset ();
00241   }
00242 
00243   // An anonymous namespace is the standard way of limiting linkage of
00244   // its contained routines to file scope.
00245   namespace {
00246     // \brief Return an "empty" local timer datum.
00247     // 
00248     // "Empty" means the datum has zero elapsed time and zero call
00249     // count.  This function does not actually create a timer.
00250     //
00251     // \param name The timer's name.
00252     std::pair<std::string, std::pair<double, int> >
00253     makeEmptyTimerDatum (const std::string& name)
00254     {
00255       return std::make_pair (name, std::make_pair (double(0), int(0)));
00256     }
00257 
00258     // \brief Locally filter out timer data with zero call counts.
00259     //
00260     // \param timerData [in/out]
00261     void
00262     filterZeroData (timer_map_t& timerData)
00263     {
00264       timer_map_t newTimerData;
00265       for (timer_map_t::const_iterator it = timerData.begin(); 
00266      it != timerData.end(); ++it)
00267   {
00268     if (it->second.second > 0)
00269       newTimerData[it->first] = it->second;
00270   }
00271       timerData.swap (newTimerData);
00272     }
00273 
00274     //
00275     // \brief Collect and sort local timer data by timer names.
00276     //
00277     void
00278     collectLocalTimerData (timer_map_t& localData,
00279          const Array<RCP<Time> >& localCounters)
00280     {
00281       using std::make_pair;
00282       typedef timer_map_t::const_iterator const_iter_t;
00283       typedef timer_map_t::iterator iter_t;
00284 
00285       timer_map_t theLocalData;
00286       for (Array<RCP<Time> >::const_iterator it = localCounters.begin();
00287      it != localCounters.end(); ++it)
00288   {
00289     const std::string& name = (*it)->name();
00290     const double timing = (*it)->totalElapsedTime();
00291     const int numCalls = (*it)->numCalls();
00292 
00293     // Merge timers with duplicate labels, by summing their
00294     // total elapsed times and call counts.
00295     iter_t loc = theLocalData.find (name);
00296     if (loc == theLocalData.end())
00297       // Use loc as an insertion location hint.
00298       theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
00299     else
00300       {
00301         loc->second.first += timing;
00302         loc->second.second += numCalls;
00303       }
00304   }
00305       // This avoids copying the map, and also makes this method
00306       // satisfy the strong exception guarantee.
00307       localData.swap (theLocalData);
00308     }
00309 
00323     void
00324     collectLocalTimerDataAndNames (timer_map_t& localTimerData,
00325            Array<std::string>& localTimerNames,
00326            ArrayView<const RCP<Time> > localTimers,
00327            const bool writeZeroTimers)
00328     {
00329       // Collect and sort local timer data by timer names.
00330       collectLocalTimerData (localTimerData, localTimers);
00331 
00332       // Filter out zero data locally first.  This ensures that if we
00333       // are writing global stats, and if a timer name exists in the
00334       // set of global names, then that timer has a nonzero call count
00335       // on at least one MPI process.
00336       if (! writeZeroTimers) {
00337   filterZeroData (localTimerData);
00338       }
00339 
00340       // Extract the set of local timer names.  The std::map keeps
00341       // them sorted alphabetically.
00342       localTimerNames.reserve (localTimerData.size());
00343       for (timer_map_t::const_iterator it = localTimerData.begin(); 
00344      it != localTimerData.end(); ++it) {
00345   localTimerNames.push_back (it->first);
00346       }
00347     }
00348 
00378     void
00379     collectGlobalTimerData (timer_map_t& globalTimerData,
00380           Array<std::string>& globalTimerNames,
00381           timer_map_t& localTimerData,
00382           Array<std::string>& localTimerNames,
00383           Ptr<const Comm<int> > comm,
00384           const bool alwaysWriteLocal,
00385           const ECounterSetOp setOp)
00386     {
00387       // There may be some global timers that are not local timers on
00388       // the calling MPI process(es).  In that case, if
00389       // alwaysWriteLocal is true, then we need to fill in the
00390       // "missing" local timers.  That will ensure that both global
00391       // and local timer columns in the output table have the same
00392       // number of rows.  The collectLocalTimerDataAndNames() method
00393       // may have already filtered out local timers with zero call
00394       // counts (if its writeZeroTimers argument was false), but we
00395       // won't be filtering again.  Thus, any local timer data we
00396       // insert here won't get filtered out.
00397       //
00398       // Note that calling summarize() with writeZeroTimers == false
00399       // will still do what it says, even if we insert local timers
00400       // with zero call counts here.
00401 
00402       // This does the correct and inexpensive thing (just copies the
00403       // timer data) if numProcs == 1.  Otherwise, it initiates a
00404       // communication with \f$O(\log P)\f$ messages along the
00405       // critical path, where \f$P\f$ is the number of participating
00406       // processes.
00407       mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
00408 
00409 #ifdef TEUCHOS_DEBUG
00410       {
00411   // Sanity check that all processes have the name number of
00412   // global timer names.
00413   const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
00414   timer_map_t::size_type minNumGlobalNames = 0;
00415   timer_map_t::size_type maxNumGlobalNames = 0;
00416   reduceAll (*comm, REDUCE_MIN, myNumGlobalNames, 
00417        outArg (minNumGlobalNames));
00418   reduceAll (*comm, REDUCE_MAX, myNumGlobalNames, 
00419        outArg (maxNumGlobalNames));
00420   TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
00421           std::logic_error, "Min # global timer names = " << minNumGlobalNames 
00422     << " != max # global timer names = " << maxNumGlobalNames
00423     << ".  Please report this bug to the Teuchos developers.");
00424   TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
00425     std::logic_error, "My # global timer names = " << myNumGlobalNames 
00426     << " != min # global timer names = " << minNumGlobalNames
00427     << ".  Please report this bug to the Teuchos developers.");
00428       }
00429 #endif // TEUCHOS_DEBUG
00430 
00431       // mergeCounterNames() just merges the counters' names, not
00432       // their actual data.  Now we need to fill globalTimerData with
00433       // this process' timer data for the timers in globalTimerNames.
00434       //
00435       // All processes need the full list of global timers, since
00436       // there may be some global timers that are not local timers.
00437       // That's why mergeCounterNames() has to be an all-reduce, not
00438       // just a reduction to Proc 0.
00439       //
00440       // Insertion optimization: if the iterator given to map::insert
00441       // points right before where we want to insert, insertion is
00442       // O(1).  globalTimerNames is sorted, so feeding the iterator
00443       // output of map::insert into the next invocation's input should
00444       // make the whole insertion O(N) where N is the number of
00445       // entries in globalTimerNames.
00446       timer_map_t::iterator globalMapIter = globalTimerData.begin();
00447       timer_map_t::iterator localMapIter;
00448       for (Array<string>::const_iterator it = globalTimerNames.begin(); 
00449      it != globalTimerNames.end(); ++it) {
00450   const std::string& globalName = *it;
00451   localMapIter = localTimerData.find (globalName);
00452 
00453   if (localMapIter == localTimerData.end()) {
00454     if (alwaysWriteLocal) {
00455       // If there are some global timers that are not local
00456       // timers, and if we want to print local timers, we insert
00457       // a local timer datum with zero elapsed time and zero
00458       // call count into localTimerData as well.  This will
00459       // ensure that both global and local timer columns in the
00460       // output table have the same number of rows.
00461       //
00462       // We really only need to do this on Proc 0, which is the
00463       // only process that currently may print local timers.
00464       // However, we do it on all processes, just in case
00465       // someone later wants to modify this function to print
00466       // out local timer data for some process other than Proc
00467       // 0.  This extra computation won't affect the cost along
00468       // the critical path, for future computations in which
00469       // Proc 0 participates.
00470       localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
00471 
00472       // Make sure the missing global name gets added to the
00473       // list of local names.  We'll re-sort the list of local
00474       // names below.
00475       localTimerNames.push_back (globalName);
00476     }
00477     // There's a global timer that's not a local timer.  Add it
00478     // to our pre-merge version of the global timer data so that
00479     // we can safely merge the global timer data later.
00480     globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
00481   }
00482   else {
00483     // We have this global timer name in our local timer list.
00484     // Fill in our pre-merge version of the global timer data
00485     // with our local data.
00486     globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
00487   }
00488       }
00489 
00490       if (alwaysWriteLocal) {
00491   // Re-sort the list of local timer names, since we may have
00492   // inserted "missing" names above.
00493   std::sort (localTimerNames.begin(), localTimerNames.end());
00494       }
00495 
00496 #ifdef TEUCHOS_DEBUG
00497       {
00498   // Sanity check that all processes have the name number of
00499   // global timers.
00500   const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
00501   timer_map_t::size_type minNumGlobalTimers = 0;
00502   timer_map_t::size_type maxNumGlobalTimers = 0;
00503   reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers, 
00504        outArg (minNumGlobalTimers));
00505   reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers, 
00506        outArg (maxNumGlobalTimers));
00507   TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
00508            std::logic_error, "Min # global timers = " << minNumGlobalTimers 
00509            << " != max # global timers = " << maxNumGlobalTimers
00510            << ".  Please report this bug to the Teuchos developers.");
00511   TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
00512            std::logic_error, "My # global timers = " << myNumGlobalTimers 
00513            << " != min # global timers = " << minNumGlobalTimers
00514            << ".  Please report this bug to the Teuchos developers.");
00515       }
00516 #endif // TEUCHOS_DEBUG
00517     }
00518 
00550     void
00551     computeGlobalTimerStats (stat_map_type& statData,
00552            std::vector<std::string>& statNames,
00553            Ptr<const Comm<int> > comm,
00554            const timer_map_t& globalTimerData)
00555     {
00556       const int numTimers = static_cast<int> (globalTimerData.size());
00557       const int numProcs = comm->getSize();
00558 
00559       // Extract pre-reduction timings and call counts into a
00560       // sequential array.  This array will be in the same order as
00561       // the global timer names are in the map.
00562       Array<std::pair<double, int> > timingsAndCallCounts;
00563       timingsAndCallCounts.reserve (numTimers);
00564       for (timer_map_t::const_iterator it = globalTimerData.begin(); 
00565      it != globalTimerData.end(); ++it) {
00566   timingsAndCallCounts.push_back (it->second);
00567       }
00568 
00569       // For each timer name, compute the min timing and its
00570       // corresponding call count.
00571       Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
00572       if (numTimers > 0) {
00573   reduceAll (*comm, MinLoc<int, double, int>(), numTimers, 
00574        &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
00575       }
00576 
00577       // For each timer name, compute the max timing and its
00578       // corresponding call count.
00579       Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
00580       if (numTimers > 0) {
00581   reduceAll (*comm, MaxLoc<int, double, int>(), numTimers, 
00582        &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
00583       }
00584 
00585       // For each timer name, compute the mean timing and the mean
00586       // call count.  The mean call count is reported as a double to
00587       // allow a fractional value.
00588       // 
00589       // Each local timing is really the total timing over all local
00590       // invocations.  The number of local invocations is the call
00591       // count.  Thus, the mean timing is really the sum of all the
00592       // timings (over all processes), divided by the sum of all the
00593       // call counts (over all processes).
00594       Array<double> meanTimings (numTimers);
00595       Array<double> meanCallCounts (numTimers);
00596       {
00597   // When summing, first scale by the number of processes.  This
00598   // avoids unnecessary overflow, and also gives us the mean
00599   // call count automatically.
00600   Array<double> scaledTimings (numTimers);
00601   Array<double> scaledCallCounts (numTimers);
00602   const double P = static_cast<double> (numProcs);
00603   for (int k = 0; k < numTimers; ++k) {
00604     const double timing = timingsAndCallCounts[k].first;
00605     const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
00606 
00607     scaledTimings[k] = timing / P;
00608     scaledCallCounts[k] = callCount / P;
00609   }
00610   if (numTimers > 0) {
00611     reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0], &meanTimings[0]);
00612     reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0], &meanCallCounts[0]);
00613   }
00614   // We don't have to undo the scaling for the mean timings;
00615   // just divide by the scaled call count.
00616   for (int k = 0; k < numTimers; ++k) {
00617     meanTimings[k] = meanTimings[k] / meanCallCounts[k];
00618   }
00619       }
00620 
00621       // Reformat the data into the map of statistics.  Be sure that
00622       // each value (the std::vector of (timing, call count) pairs,
00623       // each entry of which is a different statistic) preserves the
00624       // order of statNames.
00625       statNames.resize (3);
00626       statNames[0] = "Min";
00627       statNames[1] = "Mean";
00628       statNames[2] = "Max";
00629 
00630       stat_map_type::iterator statIter = statData.end();
00631       timer_map_t::const_iterator it = globalTimerData.begin();
00632       for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
00633   std::vector<std::pair<double, double> > curData (3);
00634   curData[0] = minTimingsAndCallCounts[k];
00635   curData[1] = std::make_pair (meanTimings[k], meanCallCounts[k]);
00636   curData[2] = maxTimingsAndCallCounts[k];
00637 
00638   // statIter gives an insertion location hint that makes each
00639   // insertion O(1), since we remember the location of the last
00640   // insertion.
00641   statIter = statData.insert (statIter, std::make_pair (it->first, curData));
00642       }
00643     }
00644 
00645 
00662     RCP<const Comm<int> > 
00663     getDefaultComm ()
00664     {
00665       // The default communicator.  If Trilinos was built with MPI
00666       // enabled, this should be MPI_COMM_WORLD.  (If MPI has not yet
00667       // been initialized, it's not valid to use the communicator!)
00668       // Otherwise, this should be a "serial" (no MPI, one "process")
00669       // communicator.
00670       RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
00671 
00672 #ifdef HAVE_MPI
00673       {
00674   int mpiHasBeenStarted = 0;
00675   MPI_Initialized (&mpiHasBeenStarted);
00676   if (! mpiHasBeenStarted) {
00677     // Make pComm a new "serial communicator."
00678     comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
00679   }
00680       }
00681 #endif // HAVE_MPI
00682       return comm;
00683     }
00684 
00685   } // namespace (anonymous)
00686 
00687 
00688   void
00689   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00690                std::vector<std::string>& statNames,
00691                Ptr<const Comm<int> > comm,
00692                const ECounterSetOp setOp)
00693   {
00694     // Collect local timer data and names.  Filter out timers with
00695     // zero call counts if writeZeroTimers is false.
00696     timer_map_t localTimerData;
00697     Array<std::string> localTimerNames;
00698     const bool writeZeroTimers = false;
00699     collectLocalTimerDataAndNames (localTimerData, localTimerNames, counters(), writeZeroTimers);
00700 
00701     // Merge the local timer data and names into global timer data and
00702     // names.
00703     timer_map_t globalTimerData;
00704     Array<std::string> globalTimerNames;
00705     const bool alwaysWriteLocal = false;
00706     collectGlobalTimerData (globalTimerData, globalTimerNames, 
00707           localTimerData, localTimerNames,
00708           comm, alwaysWriteLocal, setOp);
00709     // Compute statistics on the data.
00710     computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00711   }
00712 
00713 
00714   void 
00715   TimeMonitor::summarize (Ptr<const Comm<int> > comm,
00716         std::ostream& out,
00717         const bool alwaysWriteLocal,
00718         const bool writeGlobalStats,
00719         const bool writeZeroTimers,
00720         const ECounterSetOp setOp)
00721   {
00722     //
00723     // We can't just call computeGlobalTimerStatistics(), since
00724     // summarize() has different options that affect whether global
00725     // statistics are computed and printed.
00726     //
00727     const int numProcs = comm->getSize();
00728     const int myRank = comm->getRank();
00729 
00730     // Collect local timer data and names.  Filter out timers with
00731     // zero call counts if writeZeroTimers is false.
00732     timer_map_t localTimerData;
00733     Array<std::string> localTimerNames;
00734     collectLocalTimerDataAndNames (localTimerData, localTimerNames,
00735            counters(), writeZeroTimers);
00736 
00737     // If we're computing global statistics, merge the local timer
00738     // data and names into global timer data and names, and compute
00739     // global timer statistics.  Otherwise, leave the global data
00740     // empty.
00741     timer_map_t globalTimerData;
00742     Array<std::string> globalTimerNames;
00743     stat_map_type statData;
00744     std::vector<std::string> statNames;
00745     if (writeGlobalStats) {
00746       collectGlobalTimerData (globalTimerData, globalTimerNames, 
00747             localTimerData, localTimerNames,
00748             comm, alwaysWriteLocal, setOp);
00749       // Compute statistics on the data, but only if the communicator
00750       // contains more than one process.  Otherwise, statistics don't
00751       // make sense and we don't print them (see below).
00752       if (numProcs > 1) { 
00753   computeGlobalTimerStats (statData, statNames, comm, globalTimerData);
00754       }
00755     }
00756 
00757     // Precision of floating-point numbers in the table.
00758     const int precision = format().precision();
00759 
00760     // All columns of the table, in order.
00761     Array<TableColumn> tableColumns;
00762 
00763     // Labels of all the columns of the table.
00764     // We will append to this when we add each column.
00765     Array<std::string> titles;
00766 
00767     // Widths (in number of characters) of each column.
00768     // We will append to this when we add each column.
00769     Array<int> columnWidths;
00770 
00771     // Table column containing all timer names.  If writeGlobalStats
00772     // is true, we use the global timer names, otherwise we use the
00773     // local timer names.  We build the table on all processes
00774     // redundantly, but only print on Rank 0.
00775     {
00776       titles.append ("Timer Name");
00777 
00778       // The column labels depend on whether we are computing global statistics.
00779       TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
00780       tableColumns.append (nameCol);
00781 
00782       // Each column is as wide as it needs to be to hold both its
00783       // title and all of the column data.  This column's title is the
00784       // current last entry of the titles array.
00785       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
00786     }
00787 
00788     // Table column containing local timer stats, if applicable.  We
00789     // only write local stats if asked, only on MPI Proc 0, and only
00790     // if there is more than one MPI process in the communicator
00791     // (otherwise local stats == global stats, so we just print the
00792     // global stats).  In this case, we've padded the local data on
00793     // Proc 0 if necessary to match the global timer list, so that the
00794     // columns have the same number of rows.
00795     if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
00796       titles.append ("Local time (num calls)");
00797 
00798       // Copy local timer data out of the array-of-structs into
00799       // separate arrays, for display in the table.
00800       Array<double> localTimings;
00801       Array<double> localNumCalls;
00802       for (timer_map_t::const_iterator it = localTimerData.begin();
00803      it != localTimerData.end(); ++it) {
00804   localTimings.push_back (it->second.first);
00805   localNumCalls.push_back (static_cast<double> (it->second.second));
00806       }
00807       TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
00808       tableColumns.append (timeAndCalls);
00809       columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00810     }
00811 
00812     if (writeGlobalStats) {
00813       // If there's only 1 process in the communicator, don't display
00814       // statistics; statistics don't make sense in that case.  Just
00815       // display the timings and call counts.  If there's more than 1
00816       // process, do display statistics.
00817       if (numProcs == 1) {
00818   // Extract timings and the call counts from globalTimerData.
00819   Array<double> globalTimings;
00820   Array<double> globalNumCalls;
00821   for (timer_map_t::const_iterator it = globalTimerData.begin();
00822        it != globalTimerData.end(); ++it) {
00823     globalTimings.push_back (it->second.first);
00824     globalNumCalls.push_back (static_cast<double> (it->second.second));
00825   }
00826   // Print the table column.
00827   titles.append ("Global time (num calls)");
00828   TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
00829   tableColumns.append (timeAndCalls);
00830   columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00831       }
00832       else { // numProcs > 1
00833   // Print a table column for each statistic.  statNames and
00834   // each value in statData use the same ordering, so we can
00835   // iterate over valid indices of statNames to display the
00836   // statistics in the right order.
00837   const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
00838   for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
00839     // Extract lists of timings and their call counts for the
00840     // current statistic.
00841     Array<double> statTimings (numGlobalTimers);
00842     Array<double> statCallCounts (numGlobalTimers);
00843     stat_map_type::const_iterator it = statData.begin();
00844     for (int k = 0; it != statData.end(); ++it, ++k) {
00845       statTimings[k] = (it->second[statInd]).first;
00846       statCallCounts[k] = (it->second[statInd]).second;
00847     }
00848     // Print the table column.
00849     const std::string& statisticName = statNames[statInd];
00850     const std::string titleString = statisticName + " over procs";
00851     titles.append (titleString);
00852     TableColumn timeAndCalls (statTimings, statCallCounts, precision, true);
00853     tableColumns.append (timeAndCalls);
00854     columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
00855   }
00856       }
00857     }
00858 
00859     // Print the whole table to the given output stream on MPI Rank 0.
00860     format ().setColumnWidths (columnWidths);
00861     if (myRank == 0) {
00862       std::ostringstream theTitle;
00863       theTitle << "TimeMonitor results over " << numProcs << " processor" 
00864          << (numProcs > 1 ? "s" : "");
00865       format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
00866     }
00867   }
00868 
00869   void 
00870   TimeMonitor::summarize (std::ostream &out,
00871         const bool alwaysWriteLocal,
00872         const bool writeGlobalStats,
00873         const bool writeZeroTimers,
00874         const ECounterSetOp setOp)
00875   {
00876     // The default communicator.  If Trilinos was built with MPI
00877     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00878     // be a "serial" (no MPI, one "process") communicator.
00879     RCP<const Comm<int> > comm = getDefaultComm();
00880 
00881     summarize (comm.ptr(), out, alwaysWriteLocal, 
00882          writeGlobalStats, writeZeroTimers, setOp);
00883   }
00884 
00885   void 
00886   TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData,
00887                std::vector<std::string>& statNames,
00888                const ECounterSetOp setOp)
00889   {
00890     // The default communicator.  If Trilinos was built with MPI
00891     // enabled, this should be MPI_COMM_WORLD.  Otherwise, this should
00892     // be a "serial" (no MPI, one "process") communicator.
00893     RCP<const Comm<int> > comm = getDefaultComm();
00894 
00895     computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp);
00896   }
00897 
00898 
00899 } // namespace Teuchos
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines