fad_blas.cpp

Go to the documentation of this file.
00001 // $Id$ 
00002 // $Source$ 
00003 // @HEADER
00004 // ***********************************************************************
00005 // 
00006 //                           Sacado Package
00007 //                 Copyright (2006) Sandia Corporation
00008 // 
00009 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00010 // the U.S. Government retains certain rights in this software.
00011 // 
00012 // This library is free software; you can redistribute it and/or modify
00013 // it under the terms of the GNU Lesser General Public License as
00014 // published by the Free Software Foundation; either version 2.1 of the
00015 // License, or (at your option) any later version.
00016 //  
00017 // This library is distributed in the hope that it will be useful, but
00018 // WITHOUT ANY WARRANTY; without even the implied warranty of
00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00020 // Lesser General Public License for more details.
00021 //  
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License along with this library; if not, write to the Free Software
00024 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00025 // USA
00026 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
00027 // (etphipp@sandia.gov).
00028 // 
00029 // ***********************************************************************
00030 // @HEADER
00031 
00032 #include "Sacado_Random.hpp"
00033 #include "Sacado.hpp"
00034 #include "Teuchos_BLAS.hpp"
00035 #include "Sacado_Fad_BLAS.hpp"
00036 
00037 #include "Teuchos_Time.hpp"
00038 #include "Teuchos_CommandLineProcessor.hpp"
00039 
00040 // A performance test that compares the cost of differentiating BLAS routines
00041 // with Fad
00042 
00043 double
00044 do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k,
00045           unsigned int nloop)
00046 {
00047   Sacado::Random<double> urand(0.0, 1.0);
00048   Teuchos::BLAS<int,double> blas;
00049 
00050   std::vector<double> A(m*k), B(k*n), C(m*n);
00051   for (unsigned int j=0; j<k; j++)
00052     for (unsigned int i=0; i<m; i++)
00053       A[i+j*m] = urand.number();
00054   for (unsigned int j=0; j<n; j++)
00055     for (unsigned int i=0; i<k; i++)
00056     B[i+j*k] = urand.number();
00057   for (unsigned int j=0; j<n; j++)
00058     for (unsigned int i=0; i<m; i++)
00059       C[i+j*m] = urand.number();
00060   double alpha = urand.number();
00061   double beta = urand.number();
00062   
00063   Teuchos::Time timer("Teuchos Double GEMM", false);
00064   timer.start(true);
00065   for (unsigned int j=0; j<nloop; j++) {
00066     blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m, 
00067         &B[0], k, beta, &C[0], m);
00068   }
00069   timer.stop();
00070 
00071   return timer.totalElapsedTime() / nloop;
00072 }
00073 
00074 double
00075 do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
00076 {
00077   Sacado::Random<double> urand(0.0, 1.0);
00078   Teuchos::BLAS<int,double> blas;
00079 
00080   std::vector<double> A(m*n), B(n), C(m);
00081   for (unsigned int j=0; j<n; j++) {
00082     for (unsigned int i=0; i<m; i++)
00083       A[i+j*m] = urand.number();
00084     B[j] = urand.number();
00085   }
00086   for (unsigned int i=0; i<m; i++)
00087     C[i] = urand.number();
00088   double alpha = urand.number();
00089   double beta = urand.number();
00090   
00091   Teuchos::Time timer("Teuchos Double GEMV", false);
00092   timer.start(true);
00093   for (unsigned int j=0; j<nloop; j++) {
00094     blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
00095   }
00096   timer.stop();
00097 
00098   return timer.totalElapsedTime() / nloop;
00099 }
00100 
00101 double
00102 do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
00103 {
00104   Sacado::Random<double> urand(0.0, 1.0);
00105   Teuchos::BLAS<int,double> blas;
00106 
00107   std::vector<double> X(m), Y(m);
00108   for (unsigned int i=0; i<m; i++) {
00109     X[i] = urand.number();
00110     Y[i] = urand.number();
00111   }
00112   
00113   Teuchos::Time timer("Teuchos Double DOT", false);
00114   timer.start(true);
00115   double z;
00116   for (unsigned int j=0; j<nloop; j++) {
00117     z = blas.DOT(m, &X[0], 1, &Y[0], 1);
00118   }
00119   timer.stop();
00120 
00121   return timer.totalElapsedTime() / nloop;
00122 }
00123 
00124 template <typename FadType>
00125 double
00126 do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
00127        unsigned int ndot, unsigned int nloop)
00128 {
00129   Sacado::Random<double> urand(0.0, 1.0);
00130   Teuchos::BLAS<int,FadType> blas;
00131 
00132   std::vector<FadType> A(m*k), B(k*n), C(m*n);
00133   for (unsigned int j=0; j<k; j++) {
00134     for (unsigned int i=0; i<m; i++) {
00135       A[i+j*m] = FadType(ndot, urand.number());
00136       for (unsigned int l=0; l<ndot; l++)
00137         A[i+j*m].fastAccessDx(l) = urand.number();
00138     }
00139   }
00140   for (unsigned int j=0; j<n; j++) {
00141     for (unsigned int i=0; i<k; i++) {
00142       B[i+j*k] = FadType(ndot, urand.number());
00143       for (unsigned int l=0; l<ndot; l++)
00144   B[i+j*k].fastAccessDx(l) = urand.number();
00145     }
00146   }
00147   for (unsigned int j=0; j<n; j++) {
00148     for (unsigned int i=0; i<m; i++) {
00149       C[i+j*m] = FadType(ndot, urand.number());
00150       for (unsigned int l=0; l<ndot; l++)
00151   C[i+j*m].fastAccessDx(l) = urand.number();
00152     }
00153   }
00154   FadType alpha(ndot, urand.number());
00155   FadType beta(ndot, urand.number());
00156   for (unsigned int l=0; l<ndot; l++) {
00157     alpha.fastAccessDx(l) = urand.number();
00158     beta.fastAccessDx(l) = urand.number();
00159   }
00160   
00161   Teuchos::Time timer("Teuchos Fad GEMM", false);
00162   timer.start(true);
00163   for (unsigned int j=0; j<nloop; j++) {
00164     blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m, 
00165         &B[0], k, beta, &C[0], m);
00166   }
00167   timer.stop();
00168 
00169   return timer.totalElapsedTime() / nloop;
00170 }
00171 
00172 template <typename FadType>
00173 double
00174 do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, 
00175        unsigned int nloop)
00176 {
00177   Sacado::Random<double> urand(0.0, 1.0);
00178   Teuchos::BLAS<int,FadType> blas;
00179 
00180   std::vector<FadType> A(m*n), B(n), C(m);
00181   for (unsigned int j=0; j<n; j++) {
00182     for (unsigned int i=0; i<m; i++) {
00183       //A[i+j*m] = urand.number();
00184       A[i+j*m] = FadType(ndot, urand.number());
00185       for (unsigned int k=0; k<ndot; k++)
00186         A[i+j*m].fastAccessDx(k) = urand.number();
00187     }
00188     B[j] = FadType(ndot, urand.number());
00189     for (unsigned int k=0; k<ndot; k++)
00190       B[j].fastAccessDx(k) = urand.number();
00191   }
00192   for (unsigned int i=0; i<m; i++) {
00193     C[i] = FadType(ndot, urand.number());
00194     for (unsigned int k=0; k<ndot; k++)
00195       C[i].fastAccessDx(k) = urand.number();
00196   }
00197   FadType alpha(ndot, urand.number());
00198   FadType beta(ndot, urand.number());
00199   for (unsigned int k=0; k<ndot; k++) {
00200     alpha.fastAccessDx(k) = urand.number();
00201     beta.fastAccessDx(k) = urand.number();
00202   }
00203   
00204   Teuchos::Time timer("Teuchos Fad GEMV", false);
00205   timer.start(true);
00206   for (unsigned int j=0; j<nloop; j++) {
00207     blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
00208   }
00209   timer.stop();
00210 
00211   return timer.totalElapsedTime() / nloop;
00212 }
00213 
00214 template <typename FadType>
00215 double
00216 do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
00217 {
00218   Sacado::Random<double> urand(0.0, 1.0);
00219   Teuchos::BLAS<int,FadType> blas;
00220 
00221   std::vector<FadType> X(m), Y(m);
00222   for (unsigned int i=0; i<m; i++) {
00223     X[i] = FadType(ndot, urand.number());
00224     Y[i] = FadType(ndot, urand.number());
00225     for (unsigned int k=0; k<ndot; k++) {
00226       X[i].fastAccessDx(k) = urand.number();
00227       Y[i].fastAccessDx(k) = urand.number();
00228     }
00229   }
00230   
00231   Teuchos::Time timer("Teuchos Fad DOT", false);
00232   timer.start(true);
00233   for (unsigned int j=0; j<nloop; j++) {
00234     FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
00235   }
00236   timer.stop();
00237 
00238   return timer.totalElapsedTime() / nloop;
00239 }
00240 
00241 template <typename FadType>
00242 double
00243 do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
00244       unsigned int ndot, unsigned int nloop, bool use_dynamic)
00245 {
00246   Sacado::Random<double> urand(0.0, 1.0);
00247   unsigned int sz = (m*k+k*n+m*n)*(1+ndot);
00248   Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
00249 
00250   Sacado::Fad::Vector<unsigned int, FadType> A(m*k,ndot), B(k*n,ndot), C
00251     (m*n,ndot);
00252   for (unsigned int j=0; j<k; j++) {
00253     for (unsigned int i=0; i<m; i++) {
00254       A[i+j*m] = FadType(ndot, urand.number());
00255       for (unsigned int l=0; l<ndot; l++)
00256         A[i+j*m].fastAccessDx(l) = urand.number();
00257     }
00258   }
00259   for (unsigned int j=0; j<n; j++) {
00260     for (unsigned int i=0; i<k; i++) {
00261       B[i+j*k] = FadType(ndot, urand.number());
00262       for (unsigned int l=0; l<ndot; l++)
00263   B[i+j*k].fastAccessDx(l) = urand.number();
00264     }
00265   }
00266   for (unsigned int j=0; j<n; j++) {
00267     for (unsigned int i=0; i<m; i++) {
00268       C[i+j*m] = FadType(ndot, urand.number());
00269       for (unsigned int l=0; l<ndot; l++)
00270   C[i+j*m].fastAccessDx(l) = urand.number();
00271     }
00272   }
00273   FadType alpha(ndot, urand.number());
00274   FadType beta(ndot, urand.number());
00275   for (unsigned int l=0; l<ndot; l++) {
00276     alpha.fastAccessDx(l) = urand.number();
00277     beta.fastAccessDx(l) = urand.number();
00278   }
00279   
00280   Teuchos::Time timer("Teuchos Fad GEMM", false);
00281   timer.start(true);
00282   for (unsigned int j=0; j<nloop; j++) {
00283     blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m, 
00284         &B[0], k, beta, &C[0], m);
00285   }
00286   timer.stop();
00287 
00288   return timer.totalElapsedTime() / nloop;
00289 }
00290 
00291 template <typename FadType>
00292 double
00293 do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, 
00294       unsigned int nloop, bool use_dynamic)
00295 {
00296   Sacado::Random<double> urand(0.0, 1.0);
00297   unsigned int sz = m*n*(1+ndot) + 2*n*(1+ndot);
00298   Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
00299 
00300   Sacado::Fad::Vector<unsigned int, FadType> A(m*n,ndot), B(n,ndot), C(m,ndot);
00301   for (unsigned int j=0; j<n; j++) {
00302     for (unsigned int i=0; i<m; i++) {
00303       //A[i+j*m] = urand.number();
00304       A[i+j*m] = FadType(ndot, urand.number());
00305       for (unsigned int k=0; k<ndot; k++)
00306         A[i+j*m].fastAccessDx(k) = urand.number();
00307     }
00308     B[j] = FadType(ndot, urand.number());
00309     for (unsigned int k=0; k<ndot; k++)
00310       B[j].fastAccessDx(k) = urand.number();
00311   }
00312   for (unsigned int i=0; i<m; i++) {
00313     C[i] = FadType(ndot, urand.number());
00314     for (unsigned int k=0; k<ndot; k++)
00315       C[i].fastAccessDx(k) = urand.number();
00316   }
00317   FadType alpha(ndot, urand.number());
00318   FadType beta(ndot, urand.number());
00319   for (unsigned int k=0; k<ndot; k++) {
00320     alpha.fastAccessDx(k) = urand.number();
00321     beta.fastAccessDx(k) = urand.number();
00322   }
00323   
00324   Teuchos::Time timer("Teuchos Fad GEMV", false);
00325   timer.start(true);
00326   for (unsigned int j=0; j<nloop; j++) {
00327     blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
00328   }
00329   timer.stop();
00330 
00331   return timer.totalElapsedTime() / nloop;
00332 }
00333 
00334 template <typename FadType>
00335 double
00336 do_time_sacado_fad_dot(unsigned int m, unsigned int ndot, 
00337            unsigned int nloop, bool use_dynamic)
00338 {
00339   Sacado::Random<double> urand(0.0, 1.0);
00340   unsigned int sz = 2*m*(1+ndot);
00341   Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
00342 
00343   Sacado::Fad::Vector<unsigned int, FadType> X(m,ndot), Y(m,ndot);
00344   for (unsigned int i=0; i<m; i++) {
00345     X[i] = FadType(ndot, urand.number());
00346     Y[i] = FadType(ndot, urand.number());
00347     for (unsigned int k=0; k<ndot; k++) {
00348       X[i].fastAccessDx(k) = urand.number();
00349       Y[i].fastAccessDx(k) = urand.number();
00350     }
00351   }
00352   
00353   Teuchos::Time timer("Teuchos Fad DOT", false);
00354   timer.start(true);
00355   for (unsigned int j=0; j<nloop; j++) {
00356     FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
00357   }
00358   timer.stop();
00359 
00360   return timer.totalElapsedTime() / nloop;
00361 }
00362 
00363 int main(int argc, char* argv[]) {
00364   int ierr = 0;
00365 
00366   try {
00367     double t, tb;
00368     int p = 2;
00369     int w = p+7;
00370 
00371     // Set up command line options
00372     Teuchos::CommandLineProcessor clp;
00373     clp.setDocString("This program tests the speed of differentiating BLAS routines using Fad");
00374     int m = 10;
00375     clp.setOption("m", &m, "Number of rows");
00376     int n = 10;
00377     clp.setOption("n", &n, "Number of columns");
00378     int k = 10;
00379     clp.setOption("k", &k, "Number of columns for GEMM");
00380     int ndot = 10;
00381     clp.setOption("ndot", &ndot, "Number of derivative components");
00382     int nloop = 100000;
00383     clp.setOption("nloop", &nloop, "Number of loops");
00384     int dynamic = 1;
00385     clp.setOption("dynamic", &dynamic, "Use dynamic allocation");
00386 
00387     // Parse options
00388     Teuchos::CommandLineProcessor::EParseCommandLineReturn
00389       parseReturn= clp.parse(argc, argv);
00390     if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
00391       return 1;
00392     bool use_dynamic = (dynamic != 0);
00393 
00394     std::cout.setf(std::ios::scientific);
00395     std::cout.precision(p);
00396     std::cout << "Times (sec) for m = " << m << ", n = " << n 
00397         << ", ndot = " << ndot << ", nloop =  " << nloop 
00398         << ", dynamic = " << use_dynamic << ":  " 
00399         << std::endl;
00400 
00401     tb = do_time_teuchos_double_gemm(m,n,k,nloop);
00402     std::cout << "GEMM:                 " << std::setw(w) << tb << std::endl;
00403 
00404     t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic);
00405     std::cout << "Sacado DVFad GEMM:    " << std::setw(w) << t << "\t" 
00406         << std::setw(w) << t/tb << std::endl;
00407 
00408     t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic);
00409     std::cout << "Sacado DFad GEMM:     " << std::setw(w) << t << "\t" 
00410         << std::setw(w) << t/tb << std::endl;
00411     
00412     t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop);
00413     std::cout << "Teuchos DFad GEMM:    " << std::setw(w) << t << "\t" 
00414         << std::setw(w) << t/tb << std::endl;
00415 
00416     // t = do_time_teuchos_fad_gemm< Sacado::ELRFad::DFad<double> >(m,n,k,ndot,nloop);
00417     // std::cout << "Teuchos ELRDFad GEMM:  " << std::setw(w) << t << "\t" 
00418     //        << std::setw(w) << t/tb << std::endl;
00419 
00420     t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop);
00421     std::cout << "Teuchos DVFad GEMM:   " << std::setw(w) << t << "\t" 
00422             << std::setw(w) << t/tb << std::endl;
00423     
00424     std::cout << std::endl;
00425 
00426     tb = do_time_teuchos_double_gemv(m,n,nloop);
00427     std::cout << "GEMV:                 " << std::setw(w) << tb << std::endl;
00428 
00429     t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic);
00430     std::cout << "Sacado DVFad GEMV:    " << std::setw(w) << t << "\t" 
00431         << std::setw(w) << t/tb << std::endl;
00432 
00433     t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic);
00434     std::cout << "Sacado DFad GEMV:     " << std::setw(w) << t << "\t" 
00435         << std::setw(w) << t/tb << std::endl;
00436     
00437     t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10);
00438     std::cout << "Teuchos DFad GEMV:    " << std::setw(w) << t << "\t" 
00439         << std::setw(w) << t/tb << std::endl;
00440 
00441     // t = do_time_teuchos_fad_gemv< Sacado::ELRFad::DFad<double> >(m,n,ndot,nloop*10);
00442     // std::cout << "Teuchos ELRDFad GEMV:  " << std::setw(w) << t << "\t" 
00443     //        << std::setw(w) << t/tb << std::endl;
00444 
00445     t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10);
00446     std::cout << "Teuchos DVFad GEMV:   " << std::setw(w) << t << "\t" 
00447             << std::setw(w) << t/tb << std::endl;
00448     
00449     std::cout << std::endl;
00450 
00451     tb = do_time_teuchos_double_dot(m,nloop*100);
00452     std::cout << "DOT:                  " << std::setw(w) << tb << std::endl;
00453 
00454     t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic);
00455     std::cout << "Sacado DVFad DOT:     " << std::setw(w) << t << "\t" 
00456         << std::setw(w) << t/tb << std::endl;
00457 
00458     t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic);
00459     std::cout << "Sacado DFad DOT:      " << std::setw(w) << t << "\t" 
00460         << std::setw(w) << t/tb << std::endl;
00461     
00462     t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100);
00463     std::cout << "Teuchos DFad DOT:     " << std::setw(w) << t << "\t" 
00464         << std::setw(w) << t/tb << std::endl;
00465 
00466     // t = do_time_teuchos_fad_dot< Sacado::ELRFad::DFad<double> >(m,ndot,nloop*100);
00467     // std::cout << "Teuchos ELRDFad DOT:  " << std::setw(w) << t << "\t" 
00468     //        << std::setw(w) << t/tb << std::endl;
00469 
00470     t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100);
00471     std::cout << "Teuchos DVFad DOT:    " << std::setw(w) << t << "\t" 
00472             << std::setw(w) << t/tb << std::endl;
00473     
00474   }
00475   catch (std::exception& e) {
00476     std::cout << e.what() << std::endl;
00477     ierr = 1;
00478   }
00479   catch (const char *s) {
00480     std::cout << s << std::endl;
00481     ierr = 1;
00482   }
00483   catch (...) {
00484     std::cout << "Caught unknown exception!" << std::endl;
00485     ierr = 1;
00486   }
00487 
00488   return ierr;
00489 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Generated on Wed Apr 13 10:19:30 2011 for Sacado Package Browser (Single Doxygen Collection) by  doxygen 1.6.3