Sierra Toolkit Version of the Day
PrintTimer.cpp
00001 /*------------------------------------------------------------------------*/
00002 /*                 Copyright 2010 - 2011 Sandia Corporation.              */
00003 /*  Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive   */
00004 /*  license for use of this work by or on behalf of the U.S. Government.  */
00005 /*  Export of this program may require a license from the                 */
00006 /*  United States Government.                                             */
00007 /*------------------------------------------------------------------------*/
00008 
00009 #include <stk_util/diag/PrintTimer.hpp>
00010 #include <stk_util/util/PrintTable.hpp>
00011 
00012 #include <iomanip>
00013 #include <ostream>
00014 #include <stdexcept>
00015 #include <typeinfo>
00016 #include <utility>
00017 #include <algorithm>
00018 #include <limits>
00019 
00020 #include <stk_util/diag/Writer.hpp>
00021 #include <stk_util/diag/WriterManip.hpp>
00022 #include <stk_util/diag/WriterExt.hpp>
00023 #include <stk_util/util/string_case_compare.hpp>
00024 #include <stk_util/util/Marshal.hpp>
00025 
00026 namespace stk {
00027 namespace diag {
00028 namespace {
00029 struct ParallelTimer;
00030 }}
00031 
00032 template <class T>
00033 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t);
00034 
00035 Marshal &operator<<(Marshal &mout, const diag::Timer &t);
00036 
00037 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t);
00038 }
00039 
00040 namespace stk {
00041 namespace diag {
00042 
00043 namespace {
00044 
00051 struct Percent
00052 {
00053   Percent(double numerator, double denominator)
00054     : m_numerator(numerator),
00055       m_denominator(denominator)
00056   {}
00057 
00068   std::ostream &operator()(std::ostream &os) const;
00069 
00070 private:
00071   double    m_numerator;
00072   double    m_denominator;
00073 };
00074 
00075 
00076 std::ostream &
00077 Percent::operator()(
00078   std::ostream &  os) const
00079 {
00080   std::ostringstream strout;
00081 
00082   if (m_numerator == 0.0)
00083     strout << "(0.00%)";
00084   else if (m_denominator == 0.0)
00085     strout << "( NaN)";
00086   else {
00087     double ratio = m_numerator/m_denominator*100.0;
00088     if (ratio < 0.01)
00089       strout << "(<0.01%)";
00090     else if (ratio >= 100.0)
00091       strout << "(" << std::setw(5) << std::setprecision(1) << std::fixed << ratio << "%)";
00092     else
00093       strout << "(" << std::setw(5) << std::setprecision(2) << std::fixed << ratio << "%)";
00094   }
00095 
00096   return os << strout.str();
00097 }
00098 
00099 
00109 inline std::ostream &operator<<(std::ostream &os, const Percent &p) {
00110   return p(os);
00111 }
00112 
00113 struct ParallelTimer
00114 {
00115   template <typename T>
00116   struct Metric
00117   {
00118     Metric()
00119       : m_value(0),
00120         m_sum(0.0),
00121         m_min(std::numeric_limits<double>::max()),
00122         m_max(0.0)
00123     {}
00124 
00125     typename MetricTraits<T>::Type  m_value;  
00126     typename MetricTraits<T>::Type  m_checkpoint;  
00127     double                          m_sum;    
00128     double                              m_min;    
00129     double                    m_max;          
00130 
00131     void accumulate(const Metric<T> &metric, bool checkpoint) {
00132       double value = static_cast<double>(metric.m_value);
00133       if (checkpoint)
00134         value -= static_cast<double>(metric.m_checkpoint);
00135       
00136       m_sum += value;
00137       m_min = std::min(m_min, value);
00138       m_max = std::max(m_max, value);
00139     }
00140 
00141     Writer &dump(Writer &dout) const {
00142       if (dout.shouldPrint()) {
00143         dout << "Metric<" << typeid(typename MetricTraits<T>::Type) << ">" << push << dendl;
00144         dout << "m_value " << m_value << dendl;
00145         dout << "m_checkpoint " << m_value << dendl;
00146         dout << "m_sum " << m_sum << dendl;
00147         dout << "m_min " << m_min << dendl;
00148         dout << "m_max " << m_max << dendl;
00149         dout << pop;
00150       }
00151       return dout;
00152     }
00153   };
00154 
00155   ParallelTimer()
00156     : m_name(),
00157       m_timerMask(0),
00158       m_subtimerLapCount(0),
00159       m_lapCount(),
00160       m_cpuTime(),
00161       m_wallTime(),
00162       m_MPICount(),
00163       m_MPIByteCount(),
00164       m_heapAlloc(),
00165       m_subtimerList()
00166   {}
00167 
00168   ParallelTimer(const ParallelTimer &parallel_timer)
00169     : m_name(parallel_timer.m_name),
00170       m_timerMask(parallel_timer.m_timerMask),
00171       m_subtimerLapCount(parallel_timer.m_subtimerLapCount),
00172       m_lapCount(parallel_timer.m_lapCount),
00173       m_cpuTime(parallel_timer.m_cpuTime),
00174       m_wallTime(parallel_timer.m_wallTime),
00175       m_MPICount(parallel_timer.m_MPICount),
00176       m_MPIByteCount(parallel_timer.m_MPIByteCount),
00177       m_heapAlloc(parallel_timer.m_heapAlloc),
00178       m_subtimerList(parallel_timer.m_subtimerList)
00179   {}
00180 
00181   ParallelTimer &operator=(const ParallelTimer &parallel_timer) {
00182     m_name = parallel_timer.m_name;
00183     m_timerMask = parallel_timer.m_timerMask;
00184     m_subtimerLapCount = parallel_timer.m_subtimerLapCount;
00185     m_lapCount = parallel_timer.m_lapCount;
00186     m_cpuTime = parallel_timer.m_cpuTime;
00187     m_wallTime = parallel_timer.m_wallTime;
00188     m_MPICount = parallel_timer.m_MPICount;
00189     m_heapAlloc = parallel_timer.m_heapAlloc;
00190     m_subtimerList = parallel_timer.m_subtimerList;
00191 
00192     return *this;
00193   }
00194 
00195   template <class T>
00196   const Metric<T> &getMetric() const;
00197 
00198   std::string                   m_name;                 
00199   TimerMask                     m_timerMask;
00200   double                        m_subtimerLapCount;     
00201 
00202   Metric<LapCount>              m_lapCount;             
00203   Metric<CPUTime>               m_cpuTime;              
00204   Metric<WallTime>              m_wallTime;             
00205   Metric<MPICount>              m_MPICount;             
00206   Metric<MPIByteCount>          m_MPIByteCount;         
00207   Metric<HeapAlloc>             m_heapAlloc;            
00208 
00209   std::list<ParallelTimer>      m_subtimerList;         
00210 
00211   Writer &dump(Writer &dout) const;
00212 };
00213 
00214 template<>
00215 const ParallelTimer::Metric<LapCount> &
00216 ParallelTimer::getMetric<LapCount>() const {
00217   return m_lapCount;
00218 }
00219 
00220 
00221 template<>
00222 const ParallelTimer::Metric<CPUTime> &
00223 ParallelTimer::getMetric<CPUTime>() const {
00224   return m_cpuTime;
00225 }
00226 
00227 
00228 template<>
00229 const ParallelTimer::Metric<WallTime> &
00230 ParallelTimer::getMetric<WallTime>() const {
00231   return m_wallTime;
00232 }
00233 
00234 
00235 template<>
00236 const ParallelTimer::Metric<MPICount> &
00237 ParallelTimer::getMetric<MPICount>() const {
00238   return m_MPICount;
00239 }
00240 
00241 
00242 template<>
00243 const ParallelTimer::Metric<MPIByteCount> &
00244 ParallelTimer::getMetric<MPIByteCount>() const {
00245   return m_MPIByteCount;
00246 }
00247 
00248 
00249 template<>
00250 const ParallelTimer::Metric<HeapAlloc> &
00251 ParallelTimer::getMetric<HeapAlloc>() const {
00252   return m_heapAlloc;
00253 }
00254 
00255 
00256 template <typename T>
00257 Writer &operator<<(Writer &dout, const ParallelTimer::Metric<T> &t) {
00258   return t.dump(dout);
00259 }
00260 
00261 Writer &operator<<(Writer &dout, const ParallelTimer &parallel_timer) {
00262   return parallel_timer.dump(dout);
00263 }
00264 
00265 Writer &
00266 ParallelTimer::dump(Writer &dout) const {
00267   if (dout.shouldPrint()) {
00268     dout << "ParallelTimer " << m_name << push << dendl;
00269     dout << "m_name " << m_name << dendl;
00270     dout << "m_timerMask " << hex << m_timerMask << dendl;
00271     dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl;
00272     dout << "m_lapCount " << m_lapCount << dendl;
00273     dout << "m_cpuTime " << m_cpuTime << dendl;
00274     dout << "m_wallTime " << m_wallTime << dendl;
00275     dout << "m_MPICount " << m_MPICount << dendl;
00276     dout << "m_MPIByteCount " << m_MPIByteCount << dendl;
00277     dout << "m_heapAlloc " << m_heapAlloc << dendl;
00278     dout << "m_subtimerList " << m_subtimerList << dendl;
00279     dout << pop;
00280   }
00281   return dout;
00282 }
00283 
00284 #ifdef __INTEL_COMPILER
00285 #pragma warning(push)
00286 #pragma warning(disable: 444)
00287 #endif
00288 class finder : public std::unary_function<ParallelTimer, bool>
00289 {
00290 public:
00291   finder(const std::string &name)
00292     : m_name(name)
00293   {}
00294 
00295   bool operator()(const ParallelTimer &parallel_timer) const {
00296     return equal_case(parallel_timer.m_name, m_name);
00297   }
00298 
00299 private:
00300   std::string           m_name;
00301 };
00302 #ifdef __INTEL_COMPILER
00303 #pragma warning(pop)
00304 #endif
00305 
00306 
00307 void
00308 merge_parallel_timer(
00309   ParallelTimer &       p0,
00310   const ParallelTimer & p1,
00311   bool                  checkpoint)
00312 {
00313   p0.m_timerMask = p1.m_timerMask;
00314   p0.m_subtimerLapCount += p1.m_subtimerLapCount;
00315   p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint);
00316   p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint);
00317   p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint);
00318   p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint);
00319   p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint);
00320   p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint);
00321 
00322 
00323   for (std::list<ParallelTimer>::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) {
00324     std::list<ParallelTimer>::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name));
00325     if (p0_it == p0.m_subtimerList.end()) {
00326       p0.m_subtimerList.push_back((*p1_it));
00327       p0_it = --p0.m_subtimerList.end();
00328       merge_parallel_timer(*p0_it, *p1_it, checkpoint);
00329     }
00330     else
00331       merge_parallel_timer(*p0_it, *p1_it, checkpoint);
00332   }
00333 }
00334 
00335 
00336 void
00337 collect_timers(
00338   Timer &               root_timer, 
00339   ParallelTimer &       parallel_timer,
00340   bool                  checkpoint,
00341   ParallelMachine       comm)
00342 {
00343   Marshal mout;
00344   mout << root_timer;
00345 
00346 #ifdef STK_HAS_MPI
00347   const int parallel_root = 0 ;
00348   const int parallel_size = parallel_machine_size(comm);
00349   const int parallel_rank = parallel_machine_rank(comm);
00350 
00351   // Gather the send counts on root processor
00352   std::string send_string(mout.str());
00353 
00354   ParallelTimer root_parallel_timer;
00355 
00356   //We need to gather the timer data in a number of 'cycles' where we
00357   //only receive from a portion of the other processors each cycle.
00358   //This is because buffer allocation-failures have been observed for
00359   //runs on very large numbers of processors if the 'root' processor tries
00360   //to allocate a buffer large enough to hold timing data from all other
00361   //procesors.
00362   int num_cycles = 16;
00363   if (parallel_size < 1024) {
00364     //If less than 1024 processors, just do them all at once.
00365     num_cycles = 1;
00366   }
00367 
00368   std::vector<char> buffer;
00369 
00370   for(int ii=0; ii<num_cycles; ++ii) {
00371     std::vector<int> recv_count(parallel_size, 0);
00372     int * const recv_count_ptr = &recv_count[0] ;
00373   
00374     //send_count is the amount of data this processor needs to send.
00375     int send_count = send_string.size();
00376 
00377     //should this processor send on the current cycle ? If not, set send_count to 0.
00378     if ((parallel_rank+ii)%num_cycles!=0) {
00379       send_count = 0;
00380     }
00381 
00382     int result = MPI_Gather(&send_count, 1, MPI_INT,
00383                             recv_count_ptr, 1, MPI_INT,
00384                             parallel_root, comm);
00385     if (MPI_SUCCESS != result) {
00386       std::ostringstream message ;
00387       message << "stk::diag::collect_timers FAILED: MPI_Gather = " << result ;
00388       throw std::runtime_error(message.str());
00389     }
00390   
00391     // Receive counts are only non-zero on the root processor:
00392     std::vector<int> recv_displ(parallel_size + 1, 0);
00393   
00394     for (int i = 0 ; i < parallel_size ; ++i) {
00395       recv_displ[i + 1] = recv_displ[i] + recv_count[i] ;
00396     }
00397   
00398     const int recv_size = recv_displ[parallel_size] ;
00399   
00400     buffer.assign(recv_size, 0);
00401   
00402     {
00403       const char * const send_ptr = send_string.data();
00404       char * const recv_ptr = recv_size ? & buffer[0] : 0;
00405       int * const recv_displ_ptr = & recv_displ[0] ;
00406   
00407       result = MPI_Gatherv((void *) send_ptr, send_count, MPI_CHAR,
00408                            recv_ptr, recv_count_ptr, recv_displ_ptr, MPI_CHAR,
00409                            parallel_root, comm);
00410       if (MPI_SUCCESS != result) {
00411         std::ostringstream message ;
00412         message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ;
00413         throw std::runtime_error(message.str());
00414       }
00415   
00416       std::vector<ParallelTimer> parallel_timer_vector;
00417       parallel_timer_vector.reserve(parallel_size);
00418   
00419       if (parallel_rank == parallel_root) {
00420         for (int j = 0; j < parallel_size; ++j) {
00421           int received_count = recv_displ[j+1] - recv_displ[j];
00422           if (received_count > 0) {
00423             //grow parallel_timer_vector by 1:
00424             parallel_timer_vector.resize(parallel_timer_vector.size()+1);
00425             Marshal min(std::string(recv_ptr + recv_displ[j], recv_ptr + recv_displ[j + 1]));
00426             //put this data into the last entry of parallel_timer_vector:
00427             min >> parallel_timer_vector[parallel_timer_vector.size()-1];
00428           }
00429         }
00430   
00431         if (parallel_rank==parallel_root && send_count>0) root_parallel_timer = parallel_timer_vector[0];
00432   
00433         for (size_t j = 0; j < parallel_timer_vector.size(); ++j)
00434           merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint);
00435       }
00436     }
00437   }
00438   parallel_timer = root_parallel_timer;
00439 #endif
00440 }
00441 
00442 // PrintTable &printTable(PrintTable &table, MPI_Comm mpi_comm, MetricsMask metrics_mask) const;
00443 
00444 PrintTable &
00445 printSubtable(
00446   PrintTable &      table,
00447   const Timer &                 root_timer,
00448   const Timer &                 timer,
00449   MetricsMask      metrics_mask,
00450   int        depth,
00451   bool        timer_checkpoint)
00452 {
00453   if (timer.getSubtimerLapCount() != 0.0) {
00454     if (timer.shouldRecord()) {
00455       if (timer.getTimerMask() == 0 || timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) > 0) {
00456         table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.getName() << end_col
00457               << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) << end_col;
00458 
00459         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00460           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint))
00461                 << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint)) << end_col;
00462         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00463           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint))
00464                 << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint)) << end_col;
00465         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00466           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint))
00467                 << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint)) << end_col;
00468         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00469           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint))
00470                 << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint)) << end_col;
00471         if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00472           table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint))
00473                 << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint)) << end_col;
00474       }
00475       else
00476         table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.getName() << end_col;
00477 
00478       table << end_row;
00479       depth++;
00480     }
00481 
00482     for (TimerList::const_iterator it = timer.begin(); it != timer.end(); ++it)
00483       printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint);
00484   }
00485 
00486   return table;
00487 }
00488 
00489 
00490 PrintTable &
00491 printSubtable(
00492   PrintTable &      table,
00493   const ParallelTimer &         root_timer,
00494   const ParallelTimer &         timer,
00495   MetricsMask      metrics_mask,
00496   int        depth,
00497   bool        timer_checkpoint)
00498 {
00499   if (timer.m_subtimerLapCount != 0.0) {
00500     if (timer.m_timerMask == 0 || timer.getMetric<LapCount>().m_sum > 0) {
00501       table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.m_name << end_col
00502             << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().m_sum << end_col;
00503 
00504       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00505         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_sum)
00506               << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_sum, root_timer.getMetric<CPUTime>().m_sum) << end_col
00507               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_min)
00508               << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_min, root_timer.getMetric<CPUTime>().m_sum) << end_col
00509               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_max)
00510               << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_max, root_timer.getMetric<CPUTime>().m_sum) << end_col;
00511       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00512         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_sum)
00513               << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_sum, root_timer.getMetric<WallTime>().m_sum) << end_col
00514               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_min)
00515               << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_min, root_timer.getMetric<WallTime>().m_sum) << end_col
00516               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_max)
00517               << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_max, root_timer.getMetric<WallTime>().m_sum) << end_col;
00518       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00519         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_sum)
00520               << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_sum, root_timer.getMetric<MPICount>().m_sum) << end_col
00521               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_min)
00522               << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_min, root_timer.getMetric<MPICount>().m_sum) << end_col
00523               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_max)
00524               << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_max, root_timer.getMetric<MPICount>().m_sum) << end_col;
00525       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00526         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_sum)
00527               << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_sum, root_timer.getMetric<MPIByteCount>().m_sum) << end_col
00528               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_min)
00529               << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_min, root_timer.getMetric<MPIByteCount>().m_sum) << end_col
00530               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_max)
00531               << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_max, root_timer.getMetric<MPIByteCount>().m_sum) << end_col;
00532       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00533         table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_sum)
00534               << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_sum, root_timer.getMetric<HeapAlloc>().m_sum) << end_col
00535               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_min)
00536               << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_min, root_timer.getMetric<HeapAlloc>().m_sum) << end_col
00537               << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_max)
00538               << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_max, root_timer.getMetric<HeapAlloc>().m_sum) << end_col;
00539     }
00540     else 
00541       table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.m_name << end_col;
00542 
00543     table << end_row;
00544     depth++;
00545   }
00546 
00547   for (std::list<ParallelTimer>::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it)
00548     printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint);
00549 
00550   return table;
00551 }
00552 
00553 
00554 PrintTable &
00555 printTable(
00556   PrintTable &          table,
00557   Timer &               root_timer,
00558   MetricsMask           metrics_mask,
00559   size_t                name_width,
00560   bool                  timer_checkpoint)
00561 {
00562   updateRootTimer(root_timer);
00563 
00564   root_timer.accumulateSubtimerLapCounts();
00565 
00566   if (metrics_mask & getEnabledTimerMetricsMask()) {
00567     table.setAutoEndCol(false);
00568 
00569     table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col
00570           << justify(PrintTable::Cell::CENTER) << "Count"  << end_col;
00571 
00572     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00573       table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col;
00574     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00575       table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col;
00576     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00577       table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col;
00578     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00579       table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col;
00580     if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00581       table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col;
00582 
00583     table << end_header;
00584 
00585     printSubtable(table, root_timer, root_timer, metrics_mask, 0, timer_checkpoint);
00586 
00587     if (timer_checkpoint)
00588       root_timer.checkpoint();
00589   }
00590 
00591   return table;
00592 }
00593 
00594 
00595 PrintTable &
00596 printTable(
00597   PrintTable &          table,
00598   Timer &               root_timer,
00599   MetricsMask           metrics_mask,
00600   size_t                name_width,
00601   bool                  timer_checkpoint,
00602   ParallelMachine       parallel_machine)
00603 {
00604   updateRootTimer(root_timer);
00605 
00606   root_timer.accumulateSubtimerLapCounts();
00607 
00608   ParallelTimer parallel_timer;
00609 
00610   stk::diag::collect_timers(root_timer, parallel_timer, timer_checkpoint, parallel_machine);
00611 
00612   int parallel_rank = parallel_machine_rank(parallel_machine);
00613   if (parallel_rank == 0) {
00614     if (metrics_mask & getEnabledTimerMetricsMask()) {
00615       table.setAutoEndCol(false);
00616 
00617       table << end_col << end_col;
00618       
00619       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00620         table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col
00621               << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col
00622               << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col;
00623       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00624         table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col
00625               << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col
00626               << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col;
00627       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00628         table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col
00629               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col
00630               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col;
00631       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00632         table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col
00633               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col
00634               << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col;
00635       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00636         table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col
00637               << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col
00638               << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col;
00639 
00640       table << end_header;
00641       table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col
00642             << justify(PrintTable::Cell::CENTER) << "Count"  << end_col;
00643 
00644       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
00645         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00646               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00647               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00648       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
00649         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00650               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00651               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00652       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
00653         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00654               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00655               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00656       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
00657         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00658               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00659               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00660       if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
00661         table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
00662               << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
00663               << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
00664 
00665       table << end_header;
00666 
00667       printSubtable(table, parallel_timer, parallel_timer, metrics_mask, 0, timer_checkpoint);
00668     }
00669     
00670     if (timer_checkpoint)
00671       root_timer.checkpoint();
00672   }
00673 
00674   return table;
00675 }
00676 
00677 } // namespace <empty>
00678 
00679 
00680 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint)
00681 {
00682   stk::PrintTable print_table;
00683 
00684   printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint);
00685 
00686   os << print_table;
00687 
00688   return os;
00689 }
00690 
00691 
00692 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint, ParallelMachine parallel_machine)
00693 {
00694   stk::PrintTable print_table;
00695   
00696   int parallel_size = parallel_machine_size(parallel_machine);
00697   if (parallel_size == 1)
00698     printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint);
00699   else
00700     printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint, parallel_machine);
00701   
00702   os << print_table;
00703   
00704   return os;
00705 }
00706 
00707 
00708 // std::ostream &printXML(std::ostream &os, MPI_Comm mpi_comm, MetricsMask metrics_mask) const;
00709 std::ostream &printXML(std::ostream &os, MetricsMask metrics_mask, bool timer_checkpoint);
00710 
00711 std::ostream &printSubXML(std::ostream &os, MetricsMask metrics_mask, int depth, bool timer_checkpoint);
00712 
00713 } // namespace diag
00714 
00715 Marshal &operator<<(stk::Marshal &mout, const diag::Timer &t);
00716 
00717 template <class T>
00718 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t) {
00719   mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true);
00720 
00721   return mout;
00722 }
00723 
00724 Marshal &operator<<(Marshal &mout, const diag::Timer &t) {
00725   mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount()
00726        << t.getMetric<diag::LapCount>() << t.getMetric<diag::CPUTime>() << t.getMetric<diag::WallTime>()
00727        << t.getMetric<diag::MPICount>() << t.getMetric<diag::MPIByteCount>() << t.getMetric<diag::HeapAlloc>();
00728 
00729   mout << t.getTimerList();
00730 
00731   return mout;
00732 }
00733 
00734 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t) {
00735   min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount
00736       >> t.m_lapCount.m_value
00737       >> t.m_lapCount.m_checkpoint
00738       >> t.m_cpuTime.m_value
00739       >> t.m_cpuTime.m_checkpoint
00740       >> t.m_wallTime.m_value
00741       >> t.m_wallTime.m_checkpoint
00742       >> t.m_MPICount.m_value
00743       >> t.m_MPICount.m_checkpoint
00744       >> t.m_MPIByteCount.m_value
00745       >> t.m_MPIByteCount.m_checkpoint
00746       >> t.m_heapAlloc.m_value
00747       >> t.m_heapAlloc.m_checkpoint;
00748 
00749   min >> t.m_subtimerList;
00750 
00751   return min;
00752 }
00753 
00754 } // namespace stk
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends