|
Teuchos - Trilinos Tools Package Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Teuchos: Common Tools Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 #include "Teuchos_TimeMonitor.hpp" 00043 #include "Teuchos_CommHelpers.hpp" 00044 #include "Teuchos_DefaultComm.hpp" 00045 #include "Teuchos_TableColumn.hpp" 00046 #include "Teuchos_TableFormat.hpp" 00047 #include <functional> 00048 #ifdef HAVE_TEUCHOS_YAML_CPP 00049 # include <yaml-cpp/yaml.h> 00050 #endif // HAVE_TEUCHOS_YAML_CPP 00051 00052 00053 namespace Teuchos { 00103 template<class Ordinal, class ScalarType, class IndexType> 00104 class MaxLoc : 00105 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > { 00106 public: 00107 void 00108 reduce (const Ordinal count, 00109 const std::pair<ScalarType, IndexType> inBuffer[], 00110 std::pair<ScalarType, IndexType> inoutBuffer[]) const; 00111 }; 00112 00113 template<class Ordinal> 00114 class MaxLoc<Ordinal, double, int> : 00115 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > { 00116 public: 00117 void 00118 reduce (const Ordinal count, 00119 const std::pair<double, int> inBuffer[], 00120 std::pair<double, int> inoutBuffer[]) const 00121 { 00122 for (Ordinal ind = 0; ind < count; ++ind) { 00123 const std::pair<double, int>& in = inBuffer[ind]; 00124 std::pair<double, int>& inout = inoutBuffer[ind]; 00125 00126 if (in.first > inout.first) { 00127 inout.first = in.first; 00128 inout.second = in.second; 00129 } else if (in.first < inout.first) { 00130 // Don't need to do anything; inout has the values. 00131 } else { // equal, or at least one is NaN. 00132 inout.first = in.first; 00133 inout.second = std::min (in.second, inout.second); 00134 } 00135 } 00136 } 00137 }; 00138 00166 template<class Ordinal, class ScalarType, class IndexType> 00167 class MinLoc : 00168 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > { 00169 public: 00170 void 00171 reduce (const Ordinal count, 00172 const std::pair<ScalarType, IndexType> inBuffer[], 00173 std::pair<ScalarType, IndexType> inoutBuffer[]) const; 00174 }; 00175 00176 template<class Ordinal> 00177 class MinLoc<Ordinal, double, int> : 00178 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > { 00179 public: 00180 void 00181 reduce (const Ordinal count, 00182 const std::pair<double, int> inBuffer[], 00183 std::pair<double, int> inoutBuffer[]) const 00184 { 00185 for (Ordinal ind = 0; ind < count; ++ind) { 00186 const std::pair<double, int>& in = inBuffer[ind]; 00187 std::pair<double, int>& inout = inoutBuffer[ind]; 00188 00189 if (in.first < inout.first) { 00190 inout.first = in.first; 00191 inout.second = in.second; 00192 } else if (in.first > inout.first) { 00193 // Don't need to do anything; inout has the values. 00194 } else { // equal, or at least one is NaN. 00195 inout.first = in.first; 00196 inout.second = std::min (in.second, inout.second); 00197 } 00198 } 00199 } 00200 }; 00201 00202 // Typedef used internally by TimeMonitor::summarize() and its 00203 // helper functions. The map is keyed on timer label (a string). 00204 // Each value is a pair: (total number of seconds over all calls to 00205 // that timer, total number of calls to that timer). 00206 typedef std::map<std::string, std::pair<double, int> > timer_map_t; 00207 00208 TimeMonitor::TimeMonitor (Time& timer, bool reset) 00209 : PerformanceMonitorBase<Time>(timer, reset) 00210 { 00211 if (!isRecursiveCall()) counter().start(reset); 00212 } 00213 00214 TimeMonitor::~TimeMonitor() { 00215 if (!isRecursiveCall()) counter().stop(); 00216 } 00217 00218 void 00219 TimeMonitor::zeroOutTimers() 00220 { 00221 const Array<RCP<Time> > timers = counters(); 00222 00223 // In debug mode, loop first to check whether any of the timers 00224 // are running, before resetting them. This ensures that this 00225 // method satisfies the strong exception guarantee (either it 00226 // completes normally, or there are no side effects). 00227 #ifdef TEUCHOS_DEBUG 00228 typedef Array<RCP<Time> >::size_type size_type; 00229 const size_type numTimers = timers.size(); 00230 for (size_type i = 0; i < numTimers; ++i) { 00231 Time &timer = *timers[i]; 00232 // We throw a runtime_error rather than a logic_error, because 00233 // logic_error suggests a bug in the implementation of 00234 // TimeMonitor. Calling zeroOutTimers() when a timer is 00235 // running is not TimeMonitor's fault. 00236 TEUCHOS_TEST_FOR_EXCEPTION(timer.isRunning(), std::runtime_error, 00237 "The timer i = " << i << " with name \"" 00238 << timer.name() << "\" is currently running and may not " 00239 "be reset."); 00240 } 00241 #endif // TEUCHOS_DEBUG 00242 00243 for (Array<RCP<Time> >::const_iterator it = timers.begin(); 00244 it != timers.end(); ++it) { 00245 (*it)->reset (); 00246 } 00247 } 00248 00249 // An anonymous namespace is the standard way of limiting linkage of 00250 // its contained routines to file scope. 00251 namespace { 00252 // \brief Return an "empty" local timer datum. 00253 // 00254 // "Empty" means the datum has zero elapsed time and zero call 00255 // count. This function does not actually create a timer. 00256 // 00257 // \param name The timer's name. 00258 std::pair<std::string, std::pair<double, int> > 00259 makeEmptyTimerDatum (const std::string& name) 00260 { 00261 return std::make_pair (name, std::make_pair (double(0), int(0))); 00262 } 00263 00264 // \fn collectLocalTimerData 00265 // \brief Collect and sort local timer data by timer names. 00266 // 00267 // \param localData [out] Map whose keys are the timer names, and 00268 // whose value for each key is the total elapsed time (in 00269 // seconds) and the call count for the timer with that name. 00270 // 00271 // \param localCounters [in] Timers from which to extract data. 00272 // 00273 // Extract the total elapsed time and call count from each timer 00274 // in the given array. Merge results for timers with duplicate 00275 // labels, by summing their total elapsed times and call counts 00276 // pairwise. 00277 void 00278 collectLocalTimerData (timer_map_t& localData, 00279 ArrayView<const RCP<Time> > localCounters) 00280 { 00281 using std::make_pair; 00282 typedef timer_map_t::const_iterator const_iter_t; 00283 typedef timer_map_t::iterator iter_t; 00284 00285 timer_map_t theLocalData; 00286 for (ArrayView<const RCP<Time> >::const_iterator it = localCounters.begin(); 00287 it != localCounters.end(); ++it) { 00288 const std::string& name = (*it)->name(); 00289 const double timing = (*it)->totalElapsedTime(); 00290 const int numCalls = (*it)->numCalls(); 00291 00292 // Merge timers with duplicate labels, by summing their 00293 // total elapsed times and call counts. 00294 iter_t loc = theLocalData.find (name); 00295 if (loc == theLocalData.end()) { 00296 // Use loc as an insertion location hint. 00297 theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls))); 00298 } 00299 else { 00300 loc->second.first += timing; 00301 loc->second.second += numCalls; 00302 } 00303 } 00304 // This avoids copying the map, and also makes this method 00305 // satisfy the strong exception guarantee. 00306 localData.swap (theLocalData); 00307 } 00308 00309 // \brief Locally filter out timer data with zero call counts. 00310 // 00311 // \param timerData [in/out] 00312 void 00313 filterZeroData (timer_map_t& timerData) 00314 { 00315 timer_map_t newTimerData; 00316 for (timer_map_t::const_iterator it = timerData.begin(); 00317 it != timerData.end(); ++it) { 00318 if (it->second.second > 0) { 00319 newTimerData[it->first] = it->second; 00320 } 00321 } 00322 timerData.swap (newTimerData); 00323 } 00324 00342 void 00343 collectLocalTimerDataAndNames (timer_map_t& localTimerData, 00344 Array<std::string>& localTimerNames, 00345 ArrayView<const RCP<Time> > localTimers, 00346 const bool writeZeroTimers) 00347 { 00348 // Collect and sort local timer data by timer names. 00349 collectLocalTimerData (localTimerData, localTimers); 00350 00351 // Filter out zero data locally first. This ensures that if we 00352 // are writing global stats, and if a timer name exists in the 00353 // set of global names, then that timer has a nonzero call count 00354 // on at least one MPI process. 00355 if (! writeZeroTimers) { 00356 filterZeroData (localTimerData); 00357 } 00358 00359 // Extract the set of local timer names. The std::map keeps 00360 // them sorted alphabetically. 00361 localTimerNames.reserve (localTimerData.size()); 00362 for (timer_map_t::const_iterator it = localTimerData.begin(); 00363 it != localTimerData.end(); ++it) { 00364 localTimerNames.push_back (it->first); 00365 } 00366 } 00367 00402 void 00403 collectGlobalTimerData (timer_map_t& globalTimerData, 00404 Array<std::string>& globalTimerNames, 00405 timer_map_t& localTimerData, 00406 Array<std::string>& localTimerNames, 00407 Ptr<const Comm<int> > comm, 00408 const bool alwaysWriteLocal, 00409 const ECounterSetOp setOp) 00410 { 00411 // There may be some global timers that are not local timers on 00412 // the calling MPI process(es). In that case, if 00413 // alwaysWriteLocal is true, then we need to fill in the 00414 // "missing" local timers. That will ensure that both global 00415 // and local timer columns in the output table have the same 00416 // number of rows. The collectLocalTimerDataAndNames() method 00417 // may have already filtered out local timers with zero call 00418 // counts (if its writeZeroTimers argument was false), but we 00419 // won't be filtering again. Thus, any local timer data we 00420 // insert here won't get filtered out. 00421 // 00422 // Note that calling summarize() with writeZeroTimers == false 00423 // will still do what it says, even if we insert local timers 00424 // with zero call counts here. 00425 00426 // This does the correct and inexpensive thing (just copies the 00427 // timer data) if numProcs == 1. Otherwise, it initiates a 00428 // communication with \f$O(\log P)\f$ messages along the 00429 // critical path, where \f$P\f$ is the number of participating 00430 // processes. 00431 mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp); 00432 00433 #ifdef TEUCHOS_DEBUG 00434 { 00435 // Sanity check that all processes have the name number of 00436 // global timer names. 00437 const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size(); 00438 timer_map_t::size_type minNumGlobalNames = 0; 00439 timer_map_t::size_type maxNumGlobalNames = 0; 00440 reduceAll (*comm, REDUCE_MIN, myNumGlobalNames, 00441 outArg (minNumGlobalNames)); 00442 reduceAll (*comm, REDUCE_MAX, myNumGlobalNames, 00443 outArg (maxNumGlobalNames)); 00444 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames, 00445 std::logic_error, "Min # global timer names = " << minNumGlobalNames 00446 << " != max # global timer names = " << maxNumGlobalNames 00447 << ". Please report this bug to the Teuchos developers."); 00448 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames, 00449 std::logic_error, "My # global timer names = " << myNumGlobalNames 00450 << " != min # global timer names = " << minNumGlobalNames 00451 << ". Please report this bug to the Teuchos developers."); 00452 } 00453 #endif // TEUCHOS_DEBUG 00454 00455 // mergeCounterNames() just merges the counters' names, not 00456 // their actual data. Now we need to fill globalTimerData with 00457 // this process' timer data for the timers in globalTimerNames. 00458 // 00459 // All processes need the full list of global timers, since 00460 // there may be some global timers that are not local timers. 00461 // That's why mergeCounterNames() has to be an all-reduce, not 00462 // just a reduction to Proc 0. 00463 // 00464 // Insertion optimization: if the iterator given to map::insert 00465 // points right before where we want to insert, insertion is 00466 // O(1). globalTimerNames is sorted, so feeding the iterator 00467 // output of map::insert into the next invocation's input should 00468 // make the whole insertion O(N) where N is the number of 00469 // entries in globalTimerNames. 00470 timer_map_t::iterator globalMapIter = globalTimerData.begin(); 00471 timer_map_t::iterator localMapIter; 00472 for (Array<string>::const_iterator it = globalTimerNames.begin(); 00473 it != globalTimerNames.end(); ++it) { 00474 const std::string& globalName = *it; 00475 localMapIter = localTimerData.find (globalName); 00476 00477 if (localMapIter == localTimerData.end()) { 00478 if (alwaysWriteLocal) { 00479 // If there are some global timers that are not local 00480 // timers, and if we want to print local timers, we insert 00481 // a local timer datum with zero elapsed time and zero 00482 // call count into localTimerData as well. This will 00483 // ensure that both global and local timer columns in the 00484 // output table have the same number of rows. 00485 // 00486 // We really only need to do this on Proc 0, which is the 00487 // only process that currently may print local timers. 00488 // However, we do it on all processes, just in case 00489 // someone later wants to modify this function to print 00490 // out local timer data for some process other than Proc 00491 // 0. This extra computation won't affect the cost along 00492 // the critical path, for future computations in which 00493 // Proc 0 participates. 00494 localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName)); 00495 00496 // Make sure the missing global name gets added to the 00497 // list of local names. We'll re-sort the list of local 00498 // names below. 00499 localTimerNames.push_back (globalName); 00500 } 00501 // There's a global timer that's not a local timer. Add it 00502 // to our pre-merge version of the global timer data so that 00503 // we can safely merge the global timer data later. 00504 globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName)); 00505 } 00506 else { 00507 // We have this global timer name in our local timer list. 00508 // Fill in our pre-merge version of the global timer data 00509 // with our local data. 00510 globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second)); 00511 } 00512 } 00513 00514 if (alwaysWriteLocal) { 00515 // Re-sort the list of local timer names, since we may have 00516 // inserted "missing" names above. 00517 std::sort (localTimerNames.begin(), localTimerNames.end()); 00518 } 00519 00520 #ifdef TEUCHOS_DEBUG 00521 { 00522 // Sanity check that all processes have the name number of 00523 // global timers. 00524 const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size(); 00525 timer_map_t::size_type minNumGlobalTimers = 0; 00526 timer_map_t::size_type maxNumGlobalTimers = 0; 00527 reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers, 00528 outArg (minNumGlobalTimers)); 00529 reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers, 00530 outArg (maxNumGlobalTimers)); 00531 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers, 00532 std::logic_error, "Min # global timers = " << minNumGlobalTimers 00533 << " != max # global timers = " << maxNumGlobalTimers 00534 << ". Please report this bug to the Teuchos developers."); 00535 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers, 00536 std::logic_error, "My # global timers = " << myNumGlobalTimers 00537 << " != min # global timers = " << minNumGlobalTimers 00538 << ". Please report this bug to the Teuchos developers."); 00539 } 00540 #endif // TEUCHOS_DEBUG 00541 } 00542 00580 void 00581 computeGlobalTimerStats (stat_map_type& statData, 00582 std::vector<std::string>& statNames, 00583 Ptr<const Comm<int> > comm, 00584 const timer_map_t& globalTimerData) 00585 { 00586 const int numTimers = static_cast<int> (globalTimerData.size()); 00587 const int numProcs = comm->getSize(); 00588 00589 // Extract pre-reduction timings and call counts into a 00590 // sequential array. This array will be in the same order as 00591 // the global timer names are in the map. 00592 Array<std::pair<double, int> > timingsAndCallCounts; 00593 timingsAndCallCounts.reserve (numTimers); 00594 for (timer_map_t::const_iterator it = globalTimerData.begin(); 00595 it != globalTimerData.end(); ++it) { 00596 timingsAndCallCounts.push_back (it->second); 00597 } 00598 00599 // For each timer name, compute the min timing and its 00600 // corresponding call count. If two processes have the same 00601 // timing but different call counts, the minimum call count will 00602 // be used. 00603 Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers); 00604 if (numTimers > 0) { 00605 reduceAll (*comm, MinLoc<int, double, int>(), numTimers, 00606 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]); 00607 } 00608 00609 // For each timer name, compute the max timing and its 00610 // corresponding call count. If two processes have the same 00611 // timing but different call counts, the minimum call count will 00612 // be used. 00613 Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers); 00614 if (numTimers > 0) { 00615 reduceAll (*comm, MaxLoc<int, double, int>(), numTimers, 00616 &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]); 00617 } 00618 00619 // For each timer name, compute the mean-over-processes timing, 00620 // the mean call count, and the mean-over-call-counts timing. 00621 // The mean call count is reported as a double to allow a 00622 // fractional value. 00623 // 00624 // Each local timing is really the total timing over all local 00625 // invocations. The number of local invocations is the call 00626 // count. Thus, the mean-over-call-counts timing is the sum of 00627 // all the timings (over all processes), divided by the sum of 00628 // all the call counts (over all processes). We compute it in a 00629 // different way to over unnecessary overflow. 00630 Array<double> meanOverCallCountsTimings (numTimers); 00631 Array<double> meanOverProcsTimings (numTimers); 00632 Array<double> meanCallCounts (numTimers); 00633 { 00634 // When summing, first scale by the number of processes. This 00635 // avoids unnecessary overflow, and also gives us the mean 00636 // call count automatically. 00637 Array<double> scaledTimings (numTimers); 00638 Array<double> scaledCallCounts (numTimers); 00639 const double P = static_cast<double> (numProcs); 00640 for (int k = 0; k < numTimers; ++k) { 00641 const double timing = timingsAndCallCounts[k].first; 00642 const double callCount = static_cast<double> (timingsAndCallCounts[k].second); 00643 00644 scaledTimings[k] = timing / P; 00645 scaledCallCounts[k] = callCount / P; 00646 } 00647 if (numTimers > 0) { 00648 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0], 00649 &meanOverProcsTimings[0]); 00650 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0], 00651 &meanCallCounts[0]); 00652 } 00653 // We don't have to undo the scaling for the mean timings; 00654 // just divide by the scaled call count. 00655 for (int k = 0; k < numTimers; ++k) { 00656 meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k]; 00657 } 00658 } 00659 00660 // Reformat the data into the map of statistics. Be sure that 00661 // each value (the std::vector of (timing, call count) pairs, 00662 // each entry of which is a different statistic) preserves the 00663 // order of statNames. 00664 statNames.resize (4); 00665 statNames[0] = "MinOverProcs"; 00666 statNames[1] = "MeanOverProcs"; 00667 statNames[2] = "MaxOverProcs"; 00668 statNames[3] = "MeanOverCallCounts"; 00669 00670 stat_map_type::iterator statIter = statData.end(); 00671 timer_map_t::const_iterator it = globalTimerData.begin(); 00672 for (int k = 0; it != globalTimerData.end(); ++k, ++it) { 00673 std::vector<std::pair<double, double> > curData (4); 00674 curData[0] = minTimingsAndCallCounts[k]; 00675 curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]); 00676 curData[2] = maxTimingsAndCallCounts[k]; 00677 curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]); 00678 00679 // statIter gives an insertion location hint that makes each 00680 // insertion O(1), since we remember the location of the last 00681 // insertion. 00682 statIter = statData.insert (statIter, std::make_pair (it->first, curData)); 00683 } 00684 } 00685 00686 00703 RCP<const Comm<int> > 00704 getDefaultComm () 00705 { 00706 // The default communicator. If Trilinos was built with MPI 00707 // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet 00708 // been initialized, it's not valid to use the communicator!) 00709 // Otherwise, this should be a "serial" (no MPI, one "process") 00710 // communicator. 00711 RCP<const Comm<int> > comm = DefaultComm<int>::getComm (); 00712 00713 #ifdef HAVE_MPI 00714 { 00715 int mpiHasBeenStarted = 0; 00716 MPI_Initialized (&mpiHasBeenStarted); 00717 if (! mpiHasBeenStarted) { 00718 // Make pComm a new "serial communicator." 00719 comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ())); 00720 } 00721 } 00722 #endif // HAVE_MPI 00723 return comm; 00724 } 00725 00726 } // namespace (anonymous) 00727 00728 00729 void 00730 TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData, 00731 std::vector<std::string>& statNames, 00732 Ptr<const Comm<int> > comm, 00733 const ECounterSetOp setOp) 00734 { 00735 // Collect local timer data and names. Filter out timers with 00736 // zero call counts if writeZeroTimers is false. 00737 timer_map_t localTimerData; 00738 Array<std::string> localTimerNames; 00739 const bool writeZeroTimers = false; 00740 collectLocalTimerDataAndNames (localTimerData, localTimerNames, 00741 counters(), writeZeroTimers); 00742 // Merge the local timer data and names into global timer data and 00743 // names. 00744 timer_map_t globalTimerData; 00745 Array<std::string> globalTimerNames; 00746 const bool alwaysWriteLocal = false; 00747 collectGlobalTimerData (globalTimerData, globalTimerNames, 00748 localTimerData, localTimerNames, 00749 comm, alwaysWriteLocal, setOp); 00750 // Compute statistics on the data. 00751 computeGlobalTimerStats (statData, statNames, comm, globalTimerData); 00752 } 00753 00754 00755 void 00756 TimeMonitor::summarize (Ptr<const Comm<int> > comm, 00757 std::ostream& out, 00758 const bool alwaysWriteLocal, 00759 const bool writeGlobalStats, 00760 const bool writeZeroTimers, 00761 const ECounterSetOp setOp) 00762 { 00763 // 00764 // We can't just call computeGlobalTimerStatistics(), since 00765 // summarize() has different options that affect whether global 00766 // statistics are computed and printed. 00767 // 00768 const int numProcs = comm->getSize(); 00769 const int myRank = comm->getRank(); 00770 00771 // Collect local timer data and names. Filter out timers with 00772 // zero call counts if writeZeroTimers is false. 00773 timer_map_t localTimerData; 00774 Array<std::string> localTimerNames; 00775 collectLocalTimerDataAndNames (localTimerData, localTimerNames, 00776 counters(), writeZeroTimers); 00777 00778 // If we're computing global statistics, merge the local timer 00779 // data and names into global timer data and names, and compute 00780 // global timer statistics. Otherwise, leave the global data 00781 // empty. 00782 timer_map_t globalTimerData; 00783 Array<std::string> globalTimerNames; 00784 stat_map_type statData; 00785 std::vector<std::string> statNames; 00786 if (writeGlobalStats) { 00787 collectGlobalTimerData (globalTimerData, globalTimerNames, 00788 localTimerData, localTimerNames, 00789 comm, alwaysWriteLocal, setOp); 00790 // Compute statistics on the data, but only if the communicator 00791 // contains more than one process. Otherwise, statistics don't 00792 // make sense and we don't print them (see below). 00793 if (numProcs > 1) { 00794 computeGlobalTimerStats (statData, statNames, comm, globalTimerData); 00795 } 00796 } 00797 00798 // Precision of floating-point numbers in the table. 00799 const int precision = format().precision(); 00800 00801 // All columns of the table, in order. 00802 Array<TableColumn> tableColumns; 00803 00804 // Labels of all the columns of the table. 00805 // We will append to this when we add each column. 00806 Array<std::string> titles; 00807 00808 // Widths (in number of characters) of each column. 00809 // We will append to this when we add each column. 00810 Array<int> columnWidths; 00811 00812 // Table column containing all timer names. If writeGlobalStats 00813 // is true, we use the global timer names, otherwise we use the 00814 // local timer names. We build the table on all processes 00815 // redundantly, but only print on Rank 0. 00816 { 00817 titles.append ("Timer Name"); 00818 00819 // The column labels depend on whether we are computing global statistics. 00820 TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames); 00821 tableColumns.append (nameCol); 00822 00823 // Each column is as wide as it needs to be to hold both its 00824 // title and all of the column data. This column's title is the 00825 // current last entry of the titles array. 00826 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol)); 00827 } 00828 00829 // Table column containing local timer stats, if applicable. We 00830 // only write local stats if asked, only on MPI Proc 0, and only 00831 // if there is more than one MPI process in the communicator 00832 // (otherwise local stats == global stats, so we just print the 00833 // global stats). In this case, we've padded the local data on 00834 // Proc 0 if necessary to match the global timer list, so that the 00835 // columns have the same number of rows. 00836 if (alwaysWriteLocal && numProcs > 1 && myRank == 0) { 00837 titles.append ("Local time (num calls)"); 00838 00839 // Copy local timer data out of the array-of-structs into 00840 // separate arrays, for display in the table. 00841 Array<double> localTimings; 00842 Array<double> localNumCalls; 00843 for (timer_map_t::const_iterator it = localTimerData.begin(); 00844 it != localTimerData.end(); ++it) { 00845 localTimings.push_back (it->second.first); 00846 localNumCalls.push_back (static_cast<double> (it->second.second)); 00847 } 00848 TableColumn timeAndCalls (localTimings, localNumCalls, precision, true); 00849 tableColumns.append (timeAndCalls); 00850 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls)); 00851 } 00852 00853 if (writeGlobalStats) { 00854 // If there's only 1 process in the communicator, don't display 00855 // statistics; statistics don't make sense in that case. Just 00856 // display the timings and call counts. If there's more than 1 00857 // process, do display statistics. 00858 if (numProcs == 1) { 00859 // Extract timings and the call counts from globalTimerData. 00860 Array<double> globalTimings; 00861 Array<double> globalNumCalls; 00862 for (timer_map_t::const_iterator it = globalTimerData.begin(); 00863 it != globalTimerData.end(); ++it) { 00864 globalTimings.push_back (it->second.first); 00865 globalNumCalls.push_back (static_cast<double> (it->second.second)); 00866 } 00867 // Print the table column. 00868 titles.append ("Global time (num calls)"); 00869 TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true); 00870 tableColumns.append (timeAndCalls); 00871 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls)); 00872 } 00873 else { // numProcs > 1 00874 // Print a table column for each statistic. statNames and 00875 // each value in statData use the same ordering, so we can 00876 // iterate over valid indices of statNames to display the 00877 // statistics in the right order. 00878 const timer_map_t::size_type numGlobalTimers = globalTimerData.size(); 00879 for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) { 00880 // Extract lists of timings and their call counts for the 00881 // current statistic. 00882 Array<double> statTimings (numGlobalTimers); 00883 Array<double> statCallCounts (numGlobalTimers); 00884 stat_map_type::const_iterator it = statData.begin(); 00885 for (int k = 0; it != statData.end(); ++it, ++k) { 00886 statTimings[k] = (it->second[statInd]).first; 00887 statCallCounts[k] = (it->second[statInd]).second; 00888 } 00889 // Print the table column. 00890 const std::string& statisticName = statNames[statInd]; 00891 const std::string titleString = statisticName; 00892 titles.append (titleString); 00893 TableColumn timeAndCalls (statTimings, statCallCounts, precision, true); 00894 tableColumns.append (timeAndCalls); 00895 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls)); 00896 } 00897 } 00898 } 00899 00900 // Print the whole table to the given output stream on MPI Rank 0. 00901 format().setColumnWidths (columnWidths); 00902 if (myRank == 0) { 00903 std::ostringstream theTitle; 00904 theTitle << "TimeMonitor results over " << numProcs << " processor" 00905 << (numProcs > 1 ? "s" : ""); 00906 format().writeWholeTable (out, theTitle.str(), titles, tableColumns); 00907 } 00908 } 00909 00910 void 00911 TimeMonitor::summarize (std::ostream &out, 00912 const bool alwaysWriteLocal, 00913 const bool writeGlobalStats, 00914 const bool writeZeroTimers, 00915 const ECounterSetOp setOp) 00916 { 00917 // The default communicator. If Trilinos was built with MPI 00918 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should 00919 // be a "serial" (no MPI, one "process") communicator. 00920 RCP<const Comm<int> > comm = getDefaultComm(); 00921 00922 summarize (comm.ptr(), out, alwaysWriteLocal, 00923 writeGlobalStats, writeZeroTimers, setOp); 00924 } 00925 00926 void 00927 TimeMonitor::computeGlobalTimerStatistics (stat_map_type& statData, 00928 std::vector<std::string>& statNames, 00929 const ECounterSetOp setOp) 00930 { 00931 // The default communicator. If Trilinos was built with MPI 00932 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should 00933 // be a "serial" (no MPI, one "process") communicator. 00934 RCP<const Comm<int> > comm = getDefaultComm(); 00935 00936 computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp); 00937 } 00938 00939 00940 void TimeMonitor:: 00941 summarizeToYaml (Ptr<const Comm<int> > comm, std::ostream &out) 00942 { 00943 #ifdef HAVE_TEUCHOS_YAML_CPP 00944 // const bool writeGlobalStats = true; 00945 // const bool writeZeroTimers = true; 00946 // const bool alwaysWriteLocal = false; 00947 const ECounterSetOp setOp = Intersection; 00948 00949 stat_map_type statData; 00950 std::vector<std::string> statNames; 00951 computeGlobalTimerStatistics (statData, statNames, setOp); 00952 00953 const int numProcs = comm->getSize(); 00954 const int myRank = comm->getRank(); 00955 00956 if (myRank == 0) { 00957 YAML::Emitter emi; 00958 emi << YAML::BeginDoc; // Begin YAML output 00959 emi << "Teuchos::TimeMonitor timing results"; 00960 emi << YAML::BeginMap // Begin timing results map 00961 << YAML::Key << "Number of processes" 00962 << YAML::Value << numProcs 00963 << YAML::Key << "Global timer statistics" 00964 << YAML::Value; 00965 // For each timer name, print all its statistics. 00966 emi << YAML::BeginMap; // Begin timer names 00967 for (stat_map_type::const_iterator statDataIter = statData.begin(); 00968 statDataIter != statData.end(); ++statDataIter) { 00969 // Key: Timer's name 00970 emi << YAML::Key << statDataIter->first; 00971 // Value: The timer's statistics, as a map. 00972 emi << YAML::Value << YAML::BeginMap; // Begin current timer's statistics 00973 for (std::vector<std::string>::size_type statInd = 0; 00974 statInd < statNames.size (); ++statInd) { 00975 // Key: current statistic's name. 00976 emi << YAML::Key << statNames[statInd] 00977 << YAML::Value; 00978 // Value is a map: "Time (s)" => time in seconds for current 00979 // statistic, "Call count" => call count for current statistic. 00980 const double curTime = (statDataIter->second)[statInd].first; 00981 const double curCallCount = (statDataIter->second)[statInd].second; 00982 emi << YAML::BeginMap 00983 << YAML::Key << "Time (s)" 00984 << YAML::Value << curTime 00985 << YAML::Key << "Call count" 00986 << YAML::Value << curCallCount 00987 << YAML::EndMap; 00988 } 00989 emi << YAML::EndMap; // End current timer's statistics 00990 } 00991 emi << YAML::EndMap; // End timer names 00992 emi << YAML::EndMap; // End timing results map 00993 emi << YAML::EndDoc; // End YAML output 00994 00995 // Write YAML output to the given output stream. 00996 out << emi.c_str (); 00997 } 00998 #else // Don't HAVE_TEUCHOS_YAML_CPP 00999 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Teuchos::TimeMonitor: YAML output currently requires building Trilinos with the yaml-cpp library. Please download and install yaml-cpp from http://code.google.com/p/yaml-cpp/. Then, enable yaml-cpp support when building Trilinos: 1. Set the CMake Boolean option TPL_ENABLE_yaml-cpp to ON. 2. Set the CMake option yaml-cpp_INCLUDE_DIRS to the path of the yaml-cpp include files (not including the yaml-cpp directory in include/). 3. Set the CMake option yaml-cpp_LIBRARY_DIRS to the location of the yaml-cpp library. 4. Clear the CMake cache if necesssary. 5. Run CMake again and rebuild Trilinos."); 01000 #endif // HAVE_TEUCHOS_YAML_CPP 01001 } 01002 01003 void TimeMonitor:: 01004 summarizeToYaml (std::ostream &out) 01005 { 01006 // The default communicator. If Trilinos was built with MPI 01007 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should 01008 // be a "serial" (no MPI, one "process") communicator. 01009 RCP<const Comm<int> > comm = getDefaultComm (); 01010 01011 summarizeToYaml (comm.ptr (), out); 01012 } 01013 01014 // Default value is false. We'll set to true once 01015 // setReportParameters() completes successfully. 01016 bool TimeMonitor::setParams_ = false; 01017 01018 // We have to declare all of these here in order to avoid linker errors. 01019 TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = 01020 TimeMonitor::REPORT_FORMAT_TABLE; 01021 ECounterSetOp TimeMonitor::setOp_ = Intersection; 01022 bool TimeMonitor::alwaysWriteLocal_ = false; 01023 bool TimeMonitor::writeGlobalStats_ = true; 01024 bool TimeMonitor::writeZeroTimers_ = true; 01025 01026 void 01027 TimeMonitor::setReportFormatParameter (ParameterList& plist) 01028 { 01029 const std::string name ("Report format"); 01030 const std::string defaultValue ("Table"); 01031 const std::string docString ("Output format for report of timer statistics"); 01032 Array<std::string> strings; 01033 Array<std::string> docs; 01034 Array<ETimeMonitorReportFormat> values; 01035 01036 strings.push_back ("YAML"); 01037 docs.push_back ("YAML (see yaml.org) format"); 01038 values.push_back (REPORT_FORMAT_YAML); 01039 strings.push_back ("Table"); 01040 docs.push_back ("Tabular format via Teuchos::TableFormat"); 01041 values.push_back (REPORT_FORMAT_TABLE); 01042 01043 setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue, 01044 docString, 01045 strings (), docs (), 01046 values (), &plist); 01047 } 01048 01049 void 01050 TimeMonitor::setSetOpParameter (ParameterList& plist) 01051 { 01052 const std::string name ("How to merge timer sets"); 01053 const std::string defaultValue ("Intersection"); 01054 const std::string docString ("How to merge differing sets of timers " 01055 "across processes"); 01056 Array<std::string> strings; 01057 Array<std::string> docs; 01058 Array<ECounterSetOp> values; 01059 01060 strings.push_back ("Intersection"); 01061 docs.push_back ("Compute intersection of timer sets over processes"); 01062 values.push_back (Intersection); 01063 strings.push_back ("Union"); 01064 docs.push_back ("Compute union of timer sets over processes"); 01065 values.push_back (Union); 01066 01067 setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString, 01068 strings (), docs (), values (), 01069 &plist); 01070 } 01071 01072 RCP<const ParameterList> 01073 TimeMonitor::getValidReportParameters () 01074 { 01075 // Our implementation favors recomputation over persistent 01076 // storage. That is, we simply recreate the list every time we 01077 // need it. 01078 RCP<ParameterList> plist = parameterList ("TimeMonitor::report"); 01079 01080 const bool alwaysWriteLocal = false; 01081 const bool writeGlobalStats = true; 01082 const bool writeZeroTimers = true; 01083 01084 setReportFormatParameter (*plist); 01085 setSetOpParameter (*plist); 01086 plist->set ("alwaysWriteLocal", alwaysWriteLocal, 01087 "Always output local timers' values on Proc 0"); 01088 plist->set ("writeGlobalStats", writeGlobalStats, "Always output global " 01089 "statistics, even if there is only one process in the " 01090 "communicator"); 01091 plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for " 01092 "timers that have never been called"); 01093 return rcp_const_cast<const ParameterList> (plist); 01094 } 01095 01096 void 01097 TimeMonitor::setReportParameters (const RCP<ParameterList>& params) 01098 { 01099 ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE; 01100 ECounterSetOp setOp = Intersection; 01101 bool alwaysWriteLocal = false; 01102 bool writeGlobalStats = true; 01103 bool writeZeroTimers = true; 01104 01105 if (params.is_null ()) { 01106 // If we've set parameters before, leave their current values. 01107 // Otherwise, set defaults (below). 01108 if (setParams_) { 01109 return; 01110 } 01111 } 01112 else { // params is nonnull. Let's read it! 01113 params->validateParametersAndSetDefaults (*getValidReportParameters ()); 01114 01115 reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format"); 01116 setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets"); 01117 alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal"); 01118 writeGlobalStats = params->get<bool> ("writeGlobalStats"); 01119 writeZeroTimers = params->get<bool> ("writeZeroTimers"); 01120 } 01121 // Defer setting state until here, to ensure the strong exception 01122 // guarantee for this method (either it throws with no externally 01123 // visible state changes, or it returns normally). 01124 reportFormat_ = reportFormat; 01125 setOp_ = setOp; 01126 alwaysWriteLocal_ = alwaysWriteLocal; 01127 writeGlobalStats_ = writeGlobalStats; 01128 writeZeroTimers_ = writeZeroTimers; 01129 01130 setParams_ = true; // Yay, we successfully set parameters! 01131 } 01132 01133 void 01134 TimeMonitor::report (Ptr<const Comm<int> > comm, 01135 std::ostream& out, 01136 const RCP<ParameterList>& params) 01137 { 01138 setReportParameters (params); 01139 01140 if (reportFormat_ == REPORT_FORMAT_YAML) { 01141 summarizeToYaml (comm, out); 01142 } 01143 else if (reportFormat_ == REPORT_FORMAT_TABLE) { 01144 summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_, 01145 writeZeroTimers_, setOp_); 01146 } 01147 else { 01148 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: " 01149 "Invalid report format. This should never happen; ParameterList " 01150 "validation should have caught this. Please report this bug to the " 01151 "Teuchos developers."); 01152 } 01153 } 01154 01155 void 01156 TimeMonitor::report (std::ostream& out, 01157 const RCP<ParameterList>& params) 01158 { 01159 RCP<const Comm<int> > comm = getDefaultComm (); 01160 report (comm.ptr (), out, params); 01161 } 01162 01163 } // namespace Teuchos
1.7.4