Sierra Toolkit Version of the Day
PrintTimer.cpp
00001 /*------------------------------------------------------------------------*/
00002 /*                 Copyright 2010 - 2011 Sandia Corporation.              */
00003 /*  Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive   */
00004 /*  license for use of this work by or on behalf of the U.S. Government.  */
00005 /*  Export of this program may require a license from the                 */
00006 /*  United States Government.                                             */
00007 /*------------------------------------------------------------------------*/
00008 
00009 #include <stk_util/diag/PrintTimer.hpp>
00010 #include <stk_util/util/PrintTable.hpp>
00011 
00012 #include <iomanip>
00013 #include <ostream>
00014 #include <stdexcept>
00015 #include <typeinfo>
00016 #include <utility>
00017 #include <algorithm>
00018 #include <limits>
00019 
00020 #include <stk_util/diag/Writer.hpp>
00021 #include <stk_util/diag/WriterManip.hpp>
00022 #include <stk_util/diag/WriterExt.hpp>
00023 #include <stk_util/util/string_case_compare.hpp>
00024 #include <stk_util/util/Marshal.hpp>
00025 
00026 namespace stk {
00027 namespace diag {
00028 namespace {
00029 struct ParallelTimer;
00030 }}
00031 
00032 template <class T>
00033 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t);
00034 
00035 Marshal &operator<<(Marshal &mout, const diag::Timer &t);
00036 
00037 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t);
00038 }
00039 
00040 namespace stk {
00041 namespace diag {
00042 
00043 namespace {
00044 
00051 struct Percent
00052 {
00053   Percent(double numerator, double denominator)
00054     : m_numerator(numerator),
00055       m_denominator(denominator)
00056   {}
00057 
00068   std::ostream &operator()(std::ostream &os) const;
00069 
00070 private:
00071   double    m_numerator;
00072   double    m_denominator;
00073 };
00074 
00075 
00076 std::ostream &
00077 Percent::operator()(
00078   std::ostream &  os) const
00079 {
00080   std::ostringstream strout;
00081 
00082   if (m_numerator == 0.0)
00083     strout << "(0.00%)";
00084   else if (m_denominator == 0.0)
00085     strout << "( NaN)";
00086   else {
00087     double ratio = m_numerator/m_denominator*100.0;
00088     if (ratio < 0.01)
00089       strout << "(<0.01%)";
00090     else if (ratio >= 100.0)
00091       strout << "(" << std::setw(5) << std::setprecision(1) << std::fixed << ratio << "%)";
00092     else
00093       strout << "(" << std::setw(5) << std::setprecision(2) << std::fixed << ratio << "%)";
00094   }
00095 
00096   return os << strout.str();
00097 }
00098 
00099 
00109 inline std::ostream &operator<<(std::ostream &os, const Percent &p) {
00110   return p(os);
00111 }
00112 
00113 struct ParallelTimer
00114 {
00115   template <typename T>
00116   struct Metric
00117   {
00118     Metric()
00119       : m_value(0),
00120         m_sum(0.0),
00121         m_min(std::numeric_limits<double>::max()),
00122         m_max(0.0)
00123     {}
00124 
00125     typename MetricTraits<T>::Type  m_value;  
00126     typename MetricTraits<T>::Type  m_checkpoint;  
00127     double                          m_sum;    
00128     double                              m_min;    
00129     double                    m_max;          
00130 
00131     void accumulate(const Metric<T> &metric, bool checkpoint) {
00132       double value = static_cast<double>(metric.m_value);
00133       if (checkpoint)
00134         value -= static_cast<double>(metric.m_checkpoint);
00135       
00136       m_sum += value;
00137       m_min = std::min(m_min, value);
00138       m_max = std::max(m_max, value);
00139     }
00140 
00141     Writer &dump(Writer &dout) const {
00142       if (dout.shouldPrint()) {
00143         dout << "Metric<" << typeid(typename MetricTraits<T>::Type) << ">" << push << dendl;
00144         dout << "m_value " << m_value << dendl;
00145         dout << "m_checkpoint " << m_value << dendl;
00146         dout << "m_sum " << m_sum << dendl;
00147         dout << "m_min " << m_min << dendl;
00148         dout << "m_max " << m_max << dendl;
00149         dout << pop;
00150       }
00151       return dout;
00152     }
00153   };
00154 
00155   ParallelTimer()
00156     : m_name(),
00157       m_timerMask(0),
00158       m_subtimerLapCount(0),
00159       m_lapCount(),
00160       m_cpuTime(),
00161       m_wallTime(),
00162       m_MPICount(),
00163       m_MPIByteCount(),
00164       m_heapAlloc(),
00165       m_subtimerList()
00166   {}
00167 
00168   ParallelTimer(const ParallelTimer &parallel_timer)
00169     : m_name(parallel_timer.m_name),
00170       m_timerMask(parallel_timer.m_timerMask),
00171       m_subtimerLapCount(parallel_timer.m_subtimerLapCount),
00172       m_lapCount(parallel_timer.m_lapCount),
00173       m_cpuTime(parallel_timer.m_cpuTime),
00174       m_wallTime(parallel_timer.m_wallTime),
00175       m_MPICount(parallel_timer.m_MPICount),
00176       m_MPIByteCount(parallel_timer.m_MPIByteCount),
00177       m_heapAlloc(parallel_timer.m_heapAlloc),
00178       m_subtimerList(parallel_timer.m_subtimerList)
00179   {}
00180 
00181   ParallelTimer &operator=(const ParallelTimer &parallel_timer) {
00182     m_name = parallel_timer.m_name;
00183     m_timerMask = parallel_timer.m_timerMask;
00184     m_subtimerLapCount = parallel_timer.m_subtimerLapCount;
00185     m_lapCount = parallel_timer.m_lapCount;
00186     m_cpuTime = parallel_timer.m_cpuTime;
00187     m_wallTime = parallel_timer.m_wallTime;
00188     m_MPICount = parallel_timer.m_MPICount;
00189     m_heapAlloc = parallel_timer.m_heapAlloc;
00190     m_subtimerList = parallel_timer.m_subtimerList;
00191 
00192     return *this;
00193   }
00194 
00195   template <class T>
00196   const Metric<T> &getMetric() const;
00197 
00198   std::string                   m_name;                 
00199   TimerMask                     m_timerMask;
00200   double                        m_subtimerLapCount;     
00201 
00202   Metric<LapCount>              m_lapCount;             
00203   Metric<CPUTime>               m_cpuTime;              
00204   Metric<WallTime>              m_wallTime;             
00205   Metric<MPICount>              m_MPICount;             
00206   Metric<MPIByteCount>          m_MPIByteCount;         
00207   Metric<HeapAlloc>             m_heapAlloc;            
00208 
00209   std::list<ParallelTimer>      m_subtimerList;         
00210 
00211   Writer &dump(Writer &dout) const;
00212 };
00213 
00214 template<>
00215 const ParallelTimer::Metric<LapCount> &
00216 ParallelTimer::getMetric<LapCount>() const {
00217   return m_lapCount;
00218 }
00219 
00220 
00221 template<>
00222 const ParallelTimer::Metric<CPUTime> &
00223 ParallelTimer::getMetric<CPUTime>() const {
00224   return m_cpuTime;
00225 }
00226 
00227 
00228 template<>
00229 const ParallelTimer::Metric<WallTime> &
00230 ParallelTimer::getMetric<WallTime>() const {
00231   return m_wallTime;
00232 }
00233 
00234 
00235 template<>
00236 const ParallelTimer::Metric<MPICount> &
00237 ParallelTimer::getMetric<MPICount>() const {
00238   return m_MPICount;
00239 }
00240 
00241 
00242 template<>
00243 const ParallelTimer::Metric<MPIByteCount> &
00244 ParallelTimer::getMetric<MPIByteCount>() const {
00245   return m_MPIByteCount;
00246 }
00247 
00248 
00249 template<>
00250 const ParallelTimer::Metric<HeapAlloc> &
00251 ParallelTimer::getMetric<HeapAlloc>() const {
00252   return m_heapAlloc;
00253 }
00254 
00255 
00256 template <typename T>
00257 Writer &operator<<(Writer &dout, const ParallelTimer::Metric<T> &t) {
00258   return t.dump(dout);
00259 }
00260 
00261 Writer &operator<<(Writer &dout, const ParallelTimer &parallel_timer) {
00262   return parallel_timer.dump(dout);
00263 }
00264 
00265 Writer &
00266 ParallelTimer::dump(Writer &dout) const {
00267   if (dout.shouldPrint()) {
00268     dout << "ParallelTimer " << m_name << push << dendl;
00269     dout << "m_name " << m_name << dendl;
00270     dout << "m_timerMask " << hex << m_timerMask << dendl;
00271     dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl;
00272     dout << "m_lapCount " << m_lapCount << dendl;
00273     dout << "m_cpuTime " << m_cpuTime << dendl;
00274     dout << "m_wallTime " << m_wallTime << dendl;
00275     dout << "m_MPICount " << m_MPICount << dendl;
00276     dout << "m_MPIByteCount " << m_MPIByteCount << dendl;
00277     dout << "m_heapAlloc " << m_heapAlloc << dendl;
00278     dout << "m_subtimerList " << m_subtimerList << dendl;
00279     dout << pop;
00280   }
00281   return dout;
00282 }
00283 
00284 #ifdef __INTEL_COMPILER
00285 #pragma warning(push)
00286 #pragma warning(disable: 444)
00287 #endif
00288 class finder : public std::unary_function<ParallelTimer, bool>
00289 {
00290 public:
00291   finder(const std::string &name)
00292     : m_name(name)
00293   {}
00294 
00295   bool operator()(const ParallelTimer &parallel_timer) const {
00296     return equal_case(parallel_timer.m_name, m_name);
00297   }
00298 
00299 private:
00300   std::string           m_name;
00301 };
00302 #ifdef __INTEL_COMPILER
00303 #pragma warning(pop)
00304 #endif
00305 
00306 
00307 void
00308 merge_parallel_timer(
00309   ParallelTimer &       p0,
00310   const ParallelTimer & p1,
00311   bool                  checkpoint)
00312 {
00313   p0.m_timerMask = p1.m_timerMask;
00314   p0.m_subtimerLapCount += p1.m_subtimerLapCount;
00315   p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint);
00316   p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint);
00317   p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint);
00318   p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint);
00319   p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint);
00320   p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint);
00321 
00322 
00323   for (std::list<ParallelTimer>::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) {
00324     std::list<ParallelTimer>::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name));
00325     if (p0_it == p0.m_subtimerList.end()) {
00326       p0.m_subtimerList.push_back((*p1_it));
00327       p0_it = --p0.m_subtimerList.end();
00328       merge_parallel_timer(*p0_it, *p1_it, checkpoint);
00329     }
00330     else
00331       merge_parallel_timer(*p0_it, *p1_it, checkpoint);
00332   }
00333 }
00334 
00335 
00336 void
00337 collect_timers(
00338   Timer &               root_timer, 
00339   ParallelTimer &       parallel_timer,
00340   bool                  checkpoint,
00341   ParallelMachine       comm)
00342 {
00343   Marshal mout;
00344   mout << root_timer;
00345 
00346 #ifdef STK_HAS_MPI
00347   const int parallel_root = 0 ;
00348   const int parallel_size = parallel_machine_size(comm);
00349   const int parallel_rank = parallel_machine_rank(comm);
00350 
00351   // Gather the send counts on root processor
00352   std::string send_string(mout.str());
00353   int send_count = send_string.size();
00354   std::vector<int> recv_count(parallel_size, 0);
00355   int * const recv_count_ptr = &recv_count[0] ;
00356 
00357   int result = MPI_Gather(&send_count, 1, MPI_INT,
00358                           recv_count_ptr, 1, MPI_INT,
00359                           parallel_root, comm);
00360   if (MPI_SUCCESS != result) {
00361     std::ostringstream message ;
00362     message << "stk::diag::collect_timers FAILED: MPI_Gather = " << result ;
00363     throw std::runtime_error(message.str());
00364   }
00365 
00366   // Receive counts are only non-zero on the root processor:
00367   std::vector<int> recv_displ(parallel_size + 1, 0);
00368 
00369   for (int i = 0 ; i < parallel_size ; ++i) {
00370     recv_displ[i + 1] = recv_displ[i] + recv_count[i] ;
00371   }
00372 
00373   const int recv_size = recv_displ[parallel_size] ;
00374 
00375   std::vector<char> buffer(recv_size);
00376 
00377   {
00378     const char * const send_ptr = send_string.data();
00379     char * const recv_ptr = recv_size ? & buffer[0] : 0;
00380     int * const recv_displ_ptr = & recv_displ[0] ;
00381 
00382     result = MPI_Gatherv((void *) send_ptr, send_count, MPI_CHAR,
00383                          recv_ptr, recv_count_ptr, recv_displ_ptr, MPI_CHAR,
00384                          parallel_root, comm);
00385     if (MPI_SUCCESS != result) {
00386       std::ostringstream message ;
00387       message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ;
00388       throw std::runtime_error(message.str());
00389     }
00390 
00391     std::vector<ParallelTimer> parallel_timer_vector(parallel_size);
00392 
00393     if (parallel_rank == parallel_root) {
00394       for (int j = 0; j < parallel_size; ++j) {
00395         Marshal min(std::string(recv_ptr + recv_displ[j], recv_ptr + recv_displ[j + 1]));
00396         min >> parallel_timer_vector[j];
00397       }
00398 
00399       parallel_timer = parallel_timer_vector[0];
00400 
00401       for (size_t j = 0; j < parallel_timer_vector.size(); ++j)
00402         merge_parallel_timer(parallel_timer, parallel_timer_vector[j], checkpoint);
00403     }
00404   }
00405 #endif
00406 }
00407 
00408 // PrintTable &printTable(PrintTable &table, MPI_Comm mpi_comm, MetricsMask metrics_mask) const;
00409 
00410 PrintTable &
00411 printSubtable(
00412   PrintTable &      table,
00413   const Timer &                 root_timer,
00414   const Timer &                 timer,
00415   MetricsMask      metrics_mask,
00416   int        depth,
00417   bool        timer_checkpoint)
00418 {
00419   if (timer.getSubtimerLapCount() != 0.0) {
00420     if (timer.shouldRecord()) {
00421       if (timer.getTimerMask() == 0 || timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) > 0) {
00422         table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.getName() << end_col
00423               << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) << end_col;
00424 
00425         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00426           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint))
00427                 << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint)) << end_col;
00428         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00429           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint))
00430                 << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint)) << end_col;
00431         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00432           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint))
00433                 << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint)) << end_col;
00434         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00435           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint))
00436                 << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint)) << end_col;
00437         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00438           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint))
00439                 << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint)) << end_col;
00440       }
00441       else
00442         table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.getName() << end_col;
00443 
00444       table << end_row;
00445       depth++;
00446     }
00447 
00448     for (TimerList::const_iterator it = timer.begin(); it != timer.end(); ++it)
00449       printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint);
00450   }
00451 
00452   return table;
00453 }
00454 
00455 
00456 PrintTable &
00457 printSubtable(
00458   PrintTable &      table,
00459   const ParallelTimer &         root_timer,
00460   const ParallelTimer &         timer,
00461   MetricsMask      metrics_mask,
00462   int        depth,
00463   bool        timer_checkpoint)
00464 {
00465   if (timer.m_subtimerLapCount != 0.0) {
00466     if (timer.m_timerMask == 0 || timer.getMetric<LapCount>().m_sum > 0) {
00467       table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.m_name << end_col
00468             << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().m_sum << end_col;
00469 
00470       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00471         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_sum)
00472               << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_sum, root_timer.getMetric<CPUTime>().m_sum) << end_col
00473               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_min)
00474               << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_min, root_timer.getMetric<CPUTime>().m_sum) << end_col
00475               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_max)
00476               << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_max, root_timer.getMetric<CPUTime>().m_sum) << end_col;
00477       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00478         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_sum)
00479               << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_sum, root_timer.getMetric<WallTime>().m_sum) << end_col
00480               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_min)
00481               << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_min, root_timer.getMetric<WallTime>().m_sum) << end_col
00482               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_max)
00483               << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_max, root_timer.getMetric<WallTime>().m_sum) << end_col;
00484       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00485         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_sum)
00486               << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_sum, root_timer.getMetric<MPICount>().m_sum) << end_col
00487               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_min)
00488               << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_min, root_timer.getMetric<MPICount>().m_sum) << end_col
00489               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_max)
00490               << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_max, root_timer.getMetric<MPICount>().m_sum) << end_col;
00491       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00492         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_sum)
00493               << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_sum, root_timer.getMetric<MPIByteCount>().m_sum) << end_col
00494               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_min)
00495               << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_min, root_timer.getMetric<MPIByteCount>().m_sum) << end_col
00496               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_max)
00497               << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_max, root_timer.getMetric<MPIByteCount>().m_sum) << end_col;
00498       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00499         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_sum)
00500               << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_sum, root_timer.getMetric<HeapAlloc>().m_sum) << end_col
00501               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_min)
00502               << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_min, root_timer.getMetric<HeapAlloc>().m_sum) << end_col
00503               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_max)
00504               << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_max, root_timer.getMetric<HeapAlloc>().m_sum) << end_col;
00505     }
00506     else 
00507       table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.m_name << end_col;
00508 
00509     table << end_row;
00510     depth++;
00511   }
00512 
00513   for (std::list<ParallelTimer>::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it)
00514     printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint);
00515 
00516   return table;
00517 }
00518 
00519 
00520 PrintTable &
00521 printTable(
00522   PrintTable &          table,
00523   Timer &               root_timer,
00524   MetricsMask           metrics_mask,
00525   size_t                name_width,
00526   bool                  timer_checkpoint)
00527 {
00528   updateRootTimer(root_timer);
00529 
00530   root_timer.accumulateSubtimerLapCounts();
00531 
00532   if (metrics_mask & getEnabledTimerMetricsMask()) {
00533     table.setAutoEndCol(false);
00534 
00535     table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col
00536           << justify(PrintTable::Cell::CENTER) << "Count"  << end_col;
00537 
00538     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00539       table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col;
00540     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00541       table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col;
00542     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00543       table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col;
00544     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00545       table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col;
00546     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00547       table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col;
00548 
00549     table << end_header;
00550 
00551     printSubtable(table, root_timer, root_timer, metrics_mask, 0, timer_checkpoint);
00552 
00553     if (timer_checkpoint)
00554       root_timer.checkpoint();
00555   }
00556 
00557   return table;
00558 }
00559 
00560 
00561 PrintTable &
00562 printTable(
00563   PrintTable &          table,
00564   Timer &               root_timer,
00565   MetricsMask           metrics_mask,
00566   size_t                name_width,
00567   bool                  timer_checkpoint,
00568   ParallelMachine       parallel_machine)
00569 {
00570   updateRootTimer(root_timer);
00571 
00572   root_timer.accumulateSubtimerLapCounts();
00573 
00574   ParallelTimer parallel_timer;
00575 
00576   stk::diag::collect_timers(root_timer, parallel_timer, timer_checkpoint, parallel_machine);
00577 
00578   int parallel_rank = parallel_machine_rank(parallel_machine);
00579   if (parallel_rank == 0) {
00580     if (metrics_mask & getEnabledTimerMetricsMask()) {
00581       table.setAutoEndCol(false);
00582 
00583       table << end_col << end_col;
00584       
00585       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00586         table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col
00587               << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col
00588               << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col;
00589       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00590         table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col
00591               << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col
00592               << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col;
00593       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00594         table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col
00595               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col
00596               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col;
00597       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00598         table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col
00599               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col
00600               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col;
00601       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00602         table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col
00603               << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col
00604               << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col;
00605 
00606       table << end_header;
00607       table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col
00608             << justify(PrintTable::Cell::CENTER) << "Count"  << end_col;
00609 
00610       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00611         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00612               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00613               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00614       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00615         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00616               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00617               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00618       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00619         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00620               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00621               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00622       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00623         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00624               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00625               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00626       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00627         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00628               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00629               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00630 
00631       table << end_header;
00632 
00633       printSubtable(table, parallel_timer, parallel_timer, metrics_mask, 0, timer_checkpoint);
00634     }
00635     
00636     if (timer_checkpoint)
00637       root_timer.checkpoint();
00638   }
00639 
00640   return table;
00641 }
00642 
00643 } // namespace <empty>
00644 
00645 
00646 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint)
00647 {
00648   stk::PrintTable print_table;
00649 
00650   printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint);
00651 
00652   os << print_table;
00653 
00654   return os;
00655 }
00656 
00657 
00658 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint, ParallelMachine parallel_machine)
00659 {
00660   stk::PrintTable print_table;
00661   
00662   int parallel_size = parallel_machine_size(parallel_machine);
00663   if (parallel_size == 1)
00664     printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint);
00665   else
00666     printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint, parallel_machine);
00667   
00668   os << print_table;
00669   
00670   return os;
00671 }
00672 
00673 
00674 // std::ostream &printXML(std::ostream &os, MPI_Comm mpi_comm, MetricsMask metrics_mask) const;
00675 std::ostream &printXML(std::ostream &os, MetricsMask metrics_mask, bool timer_checkpoint);
00676 
00677 std::ostream &printSubXML(std::ostream &os, MetricsMask metrics_mask, int depth, bool timer_checkpoint);
00678 
00679 } // namespace diag
00680 
00681 Marshal &operator<<(stk::Marshal &mout, const diag::Timer &t);
00682 
00683 template <class T>
00684 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t) {
00685   mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true);
00686 
00687   return mout;
00688 }
00689 
00690 Marshal &operator<<(Marshal &mout, const diag::Timer &t) {
00691   mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount()
00692        << t.getMetric<diag::LapCount>() << t.getMetric<diag::CPUTime>() << t.getMetric<diag::WallTime>()
00693        << t.getMetric<diag::MPICount>() << t.getMetric<diag::MPIByteCount>() << t.getMetric<diag::HeapAlloc>();
00694 
00695   mout << t.getTimerList();
00696 
00697   return mout;
00698 }
00699 
00700 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t) {
00701   min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount
00702       >> t.m_lapCount.m_value
00703       >> t.m_lapCount.m_checkpoint
00704       >> t.m_cpuTime.m_value
00705       >> t.m_cpuTime.m_checkpoint
00706       >> t.m_wallTime.m_value
00707       >> t.m_wallTime.m_checkpoint
00708       >> t.m_MPICount.m_value
00709       >> t.m_MPICount.m_checkpoint
00710       >> t.m_MPIByteCount.m_value
00711       >> t.m_MPIByteCount.m_checkpoint
00712       >> t.m_heapAlloc.m_value
00713       >> t.m_heapAlloc.m_checkpoint;
00714 
00715   min >> t.m_subtimerList;
00716 
00717   return min;
00718 }
00719 
00720 } // namespace stk
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends