00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <cassert>
00033 #include <iomanip>
00034 #include <vector>
00035
00036 #include "Sacado_ScalarFlopCounter.hpp"
00037 #include "Sacado_DynamicArrayTraits.hpp"
00038
00039
00040 const char*
00041 Sacado::FlopCounterPack::FlopCounts::flopCountsNames[] =
00042 {
00043 "="
00044 ,"+"
00045 ,"+="
00046 ,"unary +"
00047 ,"-"
00048 ,"-="
00049 ,"unary -"
00050 ,"*"
00051 ,"*="
00052 ,"/"
00053 ,"/="
00054 ,">"
00055 ,">="
00056 ,"<"
00057 ,"<="
00058 ,"=="
00059 ,"exp"
00060 ,"log"
00061 ,"log10"
00062 ,"sqrt"
00063 ,"cos"
00064 ,"sin"
00065 ,"tan"
00066 ,"acos"
00067 ,"asin"
00068 ,"atan"
00069 ,"atan2"
00070 ,"cosh"
00071 ,"sinh"
00072 ,"tanh"
00073 ,"abs"
00074 ,"pow"
00075 ,"max"
00076 ,"min"
00077 };
00078 const char*
00079 Sacado::FlopCounterPack::FlopCounts::summaryFlopCountsNames[] =
00080 {
00081 "="
00082 ,"all +-"
00083 ,"all *"
00084 ,"all /"
00085 ,"<,>,=="
00086 ,"nonlinear"
00087 };
00088 unsigned int
00089 Sacado::FlopCounterPack::FlopCounts::flopGranularity = 100000000;
00090
00091 Sacado::FlopCounterPack::FlopCounts::FlopCounts()
00092 {
00093 reset();
00094 }
00095
00096 void
00097 Sacado::FlopCounterPack::FlopCounts::reset()
00098 {
00099 ds_array<unsigned int>::zero( &partialFlopCounts[0], int(NUM_OPS) );
00100 ds_array<unsigned int>::zero( &partialSummaryFlopCounts[0],
00101 int(NUM_SUMMARY_OPS) );
00102 ds_array<double>::zero( &flopCounts[0], int(NUM_OPS) );
00103 ds_array<double>::zero( &summaryFlopCounts[0], int(NUM_SUMMARY_OPS) );
00104 totalFlopCount = 0.0;
00105 }
00106
00107 void
00108 Sacado::FlopCounterPack::FlopCounts::finalize()
00109 {
00110 for (int i=0; i<NUM_OPS; i++) {
00111 flopCounts[i] += static_cast<double>(partialFlopCounts[i]);
00112 partialFlopCounts[i] = 0;
00113 }
00114 for (int i=0; i<NUM_SUMMARY_OPS; i++) {
00115 summaryFlopCounts[i] += static_cast<double>(partialSummaryFlopCounts[i]);
00116 partialSummaryFlopCounts[i] = 0;
00117 }
00118 totalFlopCount = 0;
00119 for (int i=0; i<NUM_OPS; i++)
00120 totalFlopCount += flopCounts[i];
00121 }
00122
00123 void
00124 Sacado::FlopCounterPack::FlopCounts::increment(Sacado::FlopCounterPack::FlopCounts::EFlopType ft)
00125 {
00126 ESummaryFlopType sft = getSummaryType(ft);
00127 if (partialFlopCounts[ft] > flopGranularity) {
00128 flopCounts[ft] += static_cast<double>(partialFlopCounts[ft]);
00129 partialFlopCounts[ft] =0;
00130 }
00131 if (partialSummaryFlopCounts[sft] > flopGranularity) {
00132 summaryFlopCounts[sft] +=
00133 static_cast<double>(partialSummaryFlopCounts[sft]);
00134 partialSummaryFlopCounts[sft] = 0;
00135 }
00136 ++partialFlopCounts[ft];
00137 ++partialSummaryFlopCounts[sft];
00138 }
00139
00140 Sacado::FlopCounterPack::FlopCounts::ESummaryFlopType
00141 Sacado::FlopCounterPack::FlopCounts::getSummaryType(Sacado::FlopCounterPack::FlopCounts::EFlopType ft)
00142 {
00143 switch(ft) {
00144 case ASSIGN:
00145 return SUMMARY_ASSIGN;
00146 break;
00147 case PLUS:
00148 case PLUS_ASSIGN:
00149 case UNARY_PLUS:
00150 case MINUS:
00151 case MINUS_ASSIGN:
00152 case UNARY_MINUS:
00153 return SUMMARY_PLUS_MINUS;
00154 break;
00155 case MULTIPLY:
00156 case MULTIPLY_ASSIGN:
00157 return SUMMARY_MULTIPLY;
00158 break;
00159 case DIVIDE:
00160 case DIVIDE_ASSIGN:
00161 return SUMMARY_DIVIDE;
00162 break;
00163 case EXP:
00164 case LOG:
00165 case LOG10:
00166 case SQRT:
00167 case COS:
00168 case SIN:
00169 case TAN:
00170 case ACOS:
00171 case ASIN:
00172 case ATAN:
00173 case ATAN2:
00174 case COSH:
00175 case SINH:
00176 case TANH:
00177 case ABS:
00178 case POW:
00179 case MAX:
00180 case MIN:
00181 return SUMMARY_NONLINEAR;
00182 break;
00183 case GREATER_THAN:
00184 case GREATER_THAN_EQUAL:
00185 case LESS_THAN:
00186 case LESS_THAN_EQUAL:
00187 case EQUAL:
00188 return SUMMARY_COMPARISON;
00189 break;
00190 default:
00191 assert(0);
00192 }
00193
00194
00195
00196 return SUMMARY_ASSIGN;
00197 }
00198
00199 std::ostream&
00200 Sacado::FlopCounterPack::printCountersTable(const int n,
00201 const char* names[],
00202 const char* abbr[],
00203 const FlopCounts counts[],
00204 std::ostream &out)
00205 {
00206 assert( n >= 1 && names && abbr && counts );
00207 const int wo = 10;
00208 const int wc = 20;
00209 const char spacero[] = "----------";
00210 const char spacerc[] = "--------------------";
00211
00212 if(names) {
00213 out << "\nLegend\n------\n";
00214 for( int j = 0; j < n; ++j )
00215 out << " " << abbr[j] << " = " << names[j] << std::endl;
00216 out << std::endl;
00217 }
00218
00219 out << std::left << " " << std::setw(wo) << "op\\count";
00220 for( int j = 0; j < n; ++j ) out << " " << std::setw(wc) << abbr[j];
00221 out << std::endl;
00222 out << std::right << " " << std::setw(wo) << spacero;
00223 for( int j = 0; j < n; ++j ) out << " " << std::setw(wc) << spacerc;
00224 out << std::endl;
00225
00226 for( int i = 0; i < FlopCounts::NUM_OPS; ++i ) {
00227 double theseFlops = 0;
00228 for( int j = 0; j < n; ++j ) theseFlops += counts[j].flopCounts[i];
00229 if(theseFlops) {
00230 out << " " << std::setw(wo) << FlopCounts::flopCountsNames[i];
00231 for( int j = 0; j < n; ++j ) out << " " << std::setw(wc) << counts[j].flopCounts[i];
00232 out << std::endl;
00233 }
00234 }
00235 out << std::right << " " << std::setw(wo) << spacero;
00236 for( int j = 0; j < n; ++j ) out << " " << std::setw(wc) << spacerc;
00237 out << std::endl;
00238
00239 std::vector<double> totalFlops(n);
00240 for( int i = 0; i < FlopCounts::NUM_SUMMARY_OPS; ++i ) {
00241 double theseFlops = 0;
00242 for( int j = 0; j < n; ++j ) {
00243 const double flops = counts[j].summaryFlopCounts[i];
00244 theseFlops += flops;
00245 totalFlops[j] += flops;
00246 }
00247 if(theseFlops) {
00248 out << " " << std::setw(wo) << FlopCounts::summaryFlopCountsNames[i];
00249 for( int j = 0; j < n; ++j )
00250 out << " " << std::setw(wc) << counts[j].summaryFlopCounts[i];
00251 out << std::endl;
00252 }
00253 }
00254 out << std::right << " " << std::setw(wo) << spacero;
00255 for( int j = 0; j < n; ++j ) out << " " << std::setw(wc) << spacerc;
00256 out << std::endl;
00257
00258 out << " " << std::setw(wo) << "all flops";
00259 for( int j = 0; j < n; ++j ) out << " " << std::setw(wc) << totalFlops[j];
00260 out << std::endl;
00261
00262 return out;
00263 }