|
Sacado Package Browser (Single Doxygen Collection) Version of the Day
|
00001 // $Id$ 00002 // $Source$ 00003 // @HEADER 00004 // *********************************************************************** 00005 // 00006 // Sacado Package 00007 // Copyright (2006) Sandia Corporation 00008 // 00009 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00010 // the U.S. Government retains certain rights in this software. 00011 // 00012 // This library is free software; you can redistribute it and/or modify 00013 // it under the terms of the GNU Lesser General Public License as 00014 // published by the Free Software Foundation; either version 2.1 of the 00015 // License, or (at your option) any later version. 00016 // 00017 // This library is distributed in the hope that it will be useful, but 00018 // WITHOUT ANY WARRANTY; without even the implied warranty of 00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00020 // Lesser General Public License for more details. 00021 // 00022 // You should have received a copy of the GNU Lesser General Public 00023 // License along with this library; if not, write to the Free Software 00024 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00025 // USA 00026 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps 00027 // (etphipp@sandia.gov). 00028 // 00029 // *********************************************************************** 00030 // @HEADER 00031 00032 #include "Sacado_Random.hpp" 00033 #include "Sacado.hpp" 00034 #include "Teuchos_BLAS.hpp" 00035 #include "Sacado_Fad_BLAS.hpp" 00036 00037 #include "Teuchos_Time.hpp" 00038 #include "Teuchos_CommandLineProcessor.hpp" 00039 00040 // A performance test that compares the cost of differentiating BLAS routines 00041 // with Fad 00042 00043 double 00044 do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k, 00045 unsigned int nloop) 00046 { 00047 Sacado::Random<double> urand(0.0, 1.0); 00048 Teuchos::BLAS<int,double> blas; 00049 00050 std::vector<double> A(m*k), B(k*n), C(m*n); 00051 for (unsigned int j=0; j<k; j++) 00052 for (unsigned int i=0; i<m; i++) 00053 A[i+j*m] = urand.number(); 00054 for (unsigned int j=0; j<n; j++) 00055 for (unsigned int i=0; i<k; i++) 00056 B[i+j*k] = urand.number(); 00057 for (unsigned int j=0; j<n; j++) 00058 for (unsigned int i=0; i<m; i++) 00059 C[i+j*m] = urand.number(); 00060 double alpha = urand.number(); 00061 double beta = urand.number(); 00062 00063 Teuchos::Time timer("Teuchos Double GEMM", false); 00064 timer.start(true); 00065 for (unsigned int j=0; j<nloop; j++) { 00066 blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m, 00067 &B[0], k, beta, &C[0], m); 00068 } 00069 timer.stop(); 00070 00071 return timer.totalElapsedTime() / nloop; 00072 } 00073 00074 double 00075 do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop) 00076 { 00077 Sacado::Random<double> urand(0.0, 1.0); 00078 Teuchos::BLAS<int,double> blas; 00079 00080 std::vector<double> A(m*n), B(n), C(m); 00081 for (unsigned int j=0; j<n; j++) { 00082 for (unsigned int i=0; i<m; i++) 00083 A[i+j*m] = urand.number(); 00084 B[j] = urand.number(); 00085 } 00086 for (unsigned int i=0; i<m; i++) 00087 C[i] = urand.number(); 00088 double alpha = urand.number(); 00089 double beta = urand.number(); 00090 00091 Teuchos::Time timer("Teuchos Double GEMV", false); 00092 timer.start(true); 00093 for (unsigned int j=0; j<nloop; j++) { 00094 blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1); 00095 } 00096 timer.stop(); 00097 00098 return timer.totalElapsedTime() / nloop; 00099 } 00100 00101 double 00102 do_time_teuchos_double_dot(unsigned int m, unsigned int nloop) 00103 { 00104 Sacado::Random<double> urand(0.0, 1.0); 00105 Teuchos::BLAS<int,double> blas; 00106 00107 std::vector<double> X(m), Y(m); 00108 for (unsigned int i=0; i<m; i++) { 00109 X[i] = urand.number(); 00110 Y[i] = urand.number(); 00111 } 00112 00113 Teuchos::Time timer("Teuchos Double DOT", false); 00114 timer.start(true); 00115 double z; 00116 for (unsigned int j=0; j<nloop; j++) { 00117 z = blas.DOT(m, &X[0], 1, &Y[0], 1); 00118 } 00119 timer.stop(); 00120 00121 return timer.totalElapsedTime() / nloop; 00122 } 00123 00124 template <typename FadType> 00125 double 00126 do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k, 00127 unsigned int ndot, unsigned int nloop) 00128 { 00129 Sacado::Random<double> urand(0.0, 1.0); 00130 Teuchos::BLAS<int,FadType> blas; 00131 00132 std::vector<FadType> A(m*k), B(k*n), C(m*n); 00133 for (unsigned int j=0; j<k; j++) { 00134 for (unsigned int i=0; i<m; i++) { 00135 A[i+j*m] = FadType(ndot, urand.number()); 00136 for (unsigned int l=0; l<ndot; l++) 00137 A[i+j*m].fastAccessDx(l) = urand.number(); 00138 } 00139 } 00140 for (unsigned int j=0; j<n; j++) { 00141 for (unsigned int i=0; i<k; i++) { 00142 B[i+j*k] = FadType(ndot, urand.number()); 00143 for (unsigned int l=0; l<ndot; l++) 00144 B[i+j*k].fastAccessDx(l) = urand.number(); 00145 } 00146 } 00147 for (unsigned int j=0; j<n; j++) { 00148 for (unsigned int i=0; i<m; i++) { 00149 C[i+j*m] = FadType(ndot, urand.number()); 00150 for (unsigned int l=0; l<ndot; l++) 00151 C[i+j*m].fastAccessDx(l) = urand.number(); 00152 } 00153 } 00154 FadType alpha(ndot, urand.number()); 00155 FadType beta(ndot, urand.number()); 00156 for (unsigned int l=0; l<ndot; l++) { 00157 alpha.fastAccessDx(l) = urand.number(); 00158 beta.fastAccessDx(l) = urand.number(); 00159 } 00160 00161 Teuchos::Time timer("Teuchos Fad GEMM", false); 00162 timer.start(true); 00163 for (unsigned int j=0; j<nloop; j++) { 00164 blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m, 00165 &B[0], k, beta, &C[0], m); 00166 } 00167 timer.stop(); 00168 00169 return timer.totalElapsedTime() / nloop; 00170 } 00171 00172 template <typename FadType> 00173 double 00174 do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, 00175 unsigned int nloop) 00176 { 00177 Sacado::Random<double> urand(0.0, 1.0); 00178 Teuchos::BLAS<int,FadType> blas; 00179 00180 std::vector<FadType> A(m*n), B(n), C(m); 00181 for (unsigned int j=0; j<n; j++) { 00182 for (unsigned int i=0; i<m; i++) { 00183 //A[i+j*m] = urand.number(); 00184 A[i+j*m] = FadType(ndot, urand.number()); 00185 for (unsigned int k=0; k<ndot; k++) 00186 A[i+j*m].fastAccessDx(k) = urand.number(); 00187 } 00188 B[j] = FadType(ndot, urand.number()); 00189 for (unsigned int k=0; k<ndot; k++) 00190 B[j].fastAccessDx(k) = urand.number(); 00191 } 00192 for (unsigned int i=0; i<m; i++) { 00193 C[i] = FadType(ndot, urand.number()); 00194 for (unsigned int k=0; k<ndot; k++) 00195 C[i].fastAccessDx(k) = urand.number(); 00196 } 00197 FadType alpha(ndot, urand.number()); 00198 FadType beta(ndot, urand.number()); 00199 for (unsigned int k=0; k<ndot; k++) { 00200 alpha.fastAccessDx(k) = urand.number(); 00201 beta.fastAccessDx(k) = urand.number(); 00202 } 00203 00204 Teuchos::Time timer("Teuchos Fad GEMV", false); 00205 timer.start(true); 00206 for (unsigned int j=0; j<nloop; j++) { 00207 blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1); 00208 } 00209 timer.stop(); 00210 00211 return timer.totalElapsedTime() / nloop; 00212 } 00213 00214 template <typename FadType> 00215 double 00216 do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop) 00217 { 00218 Sacado::Random<double> urand(0.0, 1.0); 00219 Teuchos::BLAS<int,FadType> blas; 00220 00221 std::vector<FadType> X(m), Y(m); 00222 for (unsigned int i=0; i<m; i++) { 00223 X[i] = FadType(ndot, urand.number()); 00224 Y[i] = FadType(ndot, urand.number()); 00225 for (unsigned int k=0; k<ndot; k++) { 00226 X[i].fastAccessDx(k) = urand.number(); 00227 Y[i].fastAccessDx(k) = urand.number(); 00228 } 00229 } 00230 00231 Teuchos::Time timer("Teuchos Fad DOT", false); 00232 timer.start(true); 00233 for (unsigned int j=0; j<nloop; j++) { 00234 FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1); 00235 } 00236 timer.stop(); 00237 00238 return timer.totalElapsedTime() / nloop; 00239 } 00240 00241 template <typename FadType> 00242 double 00243 do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k, 00244 unsigned int ndot, unsigned int nloop, bool use_dynamic) 00245 { 00246 Sacado::Random<double> urand(0.0, 1.0); 00247 unsigned int sz = (m*k+k*n+m*n)*(1+ndot); 00248 Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz); 00249 00250 Sacado::Fad::Vector<unsigned int, FadType> A(m*k,ndot), B(k*n,ndot), C 00251 (m*n,ndot); 00252 for (unsigned int j=0; j<k; j++) { 00253 for (unsigned int i=0; i<m; i++) { 00254 A[i+j*m] = FadType(ndot, urand.number()); 00255 for (unsigned int l=0; l<ndot; l++) 00256 A[i+j*m].fastAccessDx(l) = urand.number(); 00257 } 00258 } 00259 for (unsigned int j=0; j<n; j++) { 00260 for (unsigned int i=0; i<k; i++) { 00261 B[i+j*k] = FadType(ndot, urand.number()); 00262 for (unsigned int l=0; l<ndot; l++) 00263 B[i+j*k].fastAccessDx(l) = urand.number(); 00264 } 00265 } 00266 for (unsigned int j=0; j<n; j++) { 00267 for (unsigned int i=0; i<m; i++) { 00268 C[i+j*m] = FadType(ndot, urand.number()); 00269 for (unsigned int l=0; l<ndot; l++) 00270 C[i+j*m].fastAccessDx(l) = urand.number(); 00271 } 00272 } 00273 FadType alpha(ndot, urand.number()); 00274 FadType beta(ndot, urand.number()); 00275 for (unsigned int l=0; l<ndot; l++) { 00276 alpha.fastAccessDx(l) = urand.number(); 00277 beta.fastAccessDx(l) = urand.number(); 00278 } 00279 00280 Teuchos::Time timer("Teuchos Fad GEMM", false); 00281 timer.start(true); 00282 for (unsigned int j=0; j<nloop; j++) { 00283 blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m, 00284 &B[0], k, beta, &C[0], m); 00285 } 00286 timer.stop(); 00287 00288 return timer.totalElapsedTime() / nloop; 00289 } 00290 00291 template <typename FadType> 00292 double 00293 do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, 00294 unsigned int nloop, bool use_dynamic) 00295 { 00296 Sacado::Random<double> urand(0.0, 1.0); 00297 unsigned int sz = m*n*(1+ndot) + 2*n*(1+ndot); 00298 Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz); 00299 00300 Sacado::Fad::Vector<unsigned int, FadType> A(m*n,ndot), B(n,ndot), C(m,ndot); 00301 for (unsigned int j=0; j<n; j++) { 00302 for (unsigned int i=0; i<m; i++) { 00303 //A[i+j*m] = urand.number(); 00304 A[i+j*m] = FadType(ndot, urand.number()); 00305 for (unsigned int k=0; k<ndot; k++) 00306 A[i+j*m].fastAccessDx(k) = urand.number(); 00307 } 00308 B[j] = FadType(ndot, urand.number()); 00309 for (unsigned int k=0; k<ndot; k++) 00310 B[j].fastAccessDx(k) = urand.number(); 00311 } 00312 for (unsigned int i=0; i<m; i++) { 00313 C[i] = FadType(ndot, urand.number()); 00314 for (unsigned int k=0; k<ndot; k++) 00315 C[i].fastAccessDx(k) = urand.number(); 00316 } 00317 FadType alpha(ndot, urand.number()); 00318 FadType beta(ndot, urand.number()); 00319 for (unsigned int k=0; k<ndot; k++) { 00320 alpha.fastAccessDx(k) = urand.number(); 00321 beta.fastAccessDx(k) = urand.number(); 00322 } 00323 00324 Teuchos::Time timer("Teuchos Fad GEMV", false); 00325 timer.start(true); 00326 for (unsigned int j=0; j<nloop; j++) { 00327 blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1); 00328 } 00329 timer.stop(); 00330 00331 return timer.totalElapsedTime() / nloop; 00332 } 00333 00334 template <typename FadType> 00335 double 00336 do_time_sacado_fad_dot(unsigned int m, unsigned int ndot, 00337 unsigned int nloop, bool use_dynamic) 00338 { 00339 Sacado::Random<double> urand(0.0, 1.0); 00340 unsigned int sz = 2*m*(1+ndot); 00341 Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz); 00342 00343 Sacado::Fad::Vector<unsigned int, FadType> X(m,ndot), Y(m,ndot); 00344 for (unsigned int i=0; i<m; i++) { 00345 X[i] = FadType(ndot, urand.number()); 00346 Y[i] = FadType(ndot, urand.number()); 00347 for (unsigned int k=0; k<ndot; k++) { 00348 X[i].fastAccessDx(k) = urand.number(); 00349 Y[i].fastAccessDx(k) = urand.number(); 00350 } 00351 } 00352 00353 Teuchos::Time timer("Teuchos Fad DOT", false); 00354 timer.start(true); 00355 for (unsigned int j=0; j<nloop; j++) { 00356 FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1); 00357 } 00358 timer.stop(); 00359 00360 return timer.totalElapsedTime() / nloop; 00361 } 00362 00363 int main(int argc, char* argv[]) { 00364 int ierr = 0; 00365 00366 try { 00367 double t, tb; 00368 int p = 2; 00369 int w = p+7; 00370 00371 // Set up command line options 00372 Teuchos::CommandLineProcessor clp; 00373 clp.setDocString("This program tests the speed of differentiating BLAS routines using Fad"); 00374 int m = 10; 00375 clp.setOption("m", &m, "Number of rows"); 00376 int n = 10; 00377 clp.setOption("n", &n, "Number of columns"); 00378 int k = 10; 00379 clp.setOption("k", &k, "Number of columns for GEMM"); 00380 int ndot = 10; 00381 clp.setOption("ndot", &ndot, "Number of derivative components"); 00382 int nloop = 100000; 00383 clp.setOption("nloop", &nloop, "Number of loops"); 00384 int dynamic = 1; 00385 clp.setOption("dynamic", &dynamic, "Use dynamic allocation"); 00386 00387 // Parse options 00388 Teuchos::CommandLineProcessor::EParseCommandLineReturn 00389 parseReturn= clp.parse(argc, argv); 00390 if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) 00391 return 1; 00392 bool use_dynamic = (dynamic != 0); 00393 00394 std::cout.setf(std::ios::scientific); 00395 std::cout.precision(p); 00396 std::cout << "Times (sec) for m = " << m << ", n = " << n 00397 << ", ndot = " << ndot << ", nloop = " << nloop 00398 << ", dynamic = " << use_dynamic << ": " 00399 << std::endl; 00400 00401 tb = do_time_teuchos_double_gemm(m,n,k,nloop); 00402 std::cout << "GEMM: " << std::setw(w) << tb << std::endl; 00403 00404 t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic); 00405 std::cout << "Sacado DVFad GEMM: " << std::setw(w) << t << "\t" 00406 << std::setw(w) << t/tb << std::endl; 00407 00408 t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic); 00409 std::cout << "Sacado DFad GEMM: " << std::setw(w) << t << "\t" 00410 << std::setw(w) << t/tb << std::endl; 00411 00412 t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop); 00413 std::cout << "Teuchos DFad GEMM: " << std::setw(w) << t << "\t" 00414 << std::setw(w) << t/tb << std::endl; 00415 00416 // t = do_time_teuchos_fad_gemm< Sacado::ELRFad::DFad<double> >(m,n,k,ndot,nloop); 00417 // std::cout << "Teuchos ELRDFad GEMM: " << std::setw(w) << t << "\t" 00418 // << std::setw(w) << t/tb << std::endl; 00419 00420 t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop); 00421 std::cout << "Teuchos DVFad GEMM: " << std::setw(w) << t << "\t" 00422 << std::setw(w) << t/tb << std::endl; 00423 00424 std::cout << std::endl; 00425 00426 tb = do_time_teuchos_double_gemv(m,n,nloop); 00427 std::cout << "GEMV: " << std::setw(w) << tb << std::endl; 00428 00429 t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic); 00430 std::cout << "Sacado DVFad GEMV: " << std::setw(w) << t << "\t" 00431 << std::setw(w) << t/tb << std::endl; 00432 00433 t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic); 00434 std::cout << "Sacado DFad GEMV: " << std::setw(w) << t << "\t" 00435 << std::setw(w) << t/tb << std::endl; 00436 00437 t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10); 00438 std::cout << "Teuchos DFad GEMV: " << std::setw(w) << t << "\t" 00439 << std::setw(w) << t/tb << std::endl; 00440 00441 // t = do_time_teuchos_fad_gemv< Sacado::ELRFad::DFad<double> >(m,n,ndot,nloop*10); 00442 // std::cout << "Teuchos ELRDFad GEMV: " << std::setw(w) << t << "\t" 00443 // << std::setw(w) << t/tb << std::endl; 00444 00445 t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10); 00446 std::cout << "Teuchos DVFad GEMV: " << std::setw(w) << t << "\t" 00447 << std::setw(w) << t/tb << std::endl; 00448 00449 std::cout << std::endl; 00450 00451 tb = do_time_teuchos_double_dot(m,nloop*100); 00452 std::cout << "DOT: " << std::setw(w) << tb << std::endl; 00453 00454 t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic); 00455 std::cout << "Sacado DVFad DOT: " << std::setw(w) << t << "\t" 00456 << std::setw(w) << t/tb << std::endl; 00457 00458 t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic); 00459 std::cout << "Sacado DFad DOT: " << std::setw(w) << t << "\t" 00460 << std::setw(w) << t/tb << std::endl; 00461 00462 t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100); 00463 std::cout << "Teuchos DFad DOT: " << std::setw(w) << t << "\t" 00464 << std::setw(w) << t/tb << std::endl; 00465 00466 // t = do_time_teuchos_fad_dot< Sacado::ELRFad::DFad<double> >(m,ndot,nloop*100); 00467 // std::cout << "Teuchos ELRDFad DOT: " << std::setw(w) << t << "\t" 00468 // << std::setw(w) << t/tb << std::endl; 00469 00470 t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100); 00471 std::cout << "Teuchos DVFad DOT: " << std::setw(w) << t << "\t" 00472 << std::setw(w) << t/tb << std::endl; 00473 00474 } 00475 catch (std::exception& e) { 00476 std::cout << e.what() << std::endl; 00477 ierr = 1; 00478 } 00479 catch (const char *s) { 00480 std::cout << s << std::endl; 00481 ierr = 1; 00482 } 00483 catch (...) { 00484 std::cout << "Caught unknown exception!" << std::endl; 00485 ierr = 1; 00486 } 00487 00488 return ierr; 00489 }
1.7.4