fad_fe_jac_fill.cpp

Go to the documentation of this file.
00001 // $Id$ 
00002 // $Source$ 
00003 // @HEADER
00004 // ***********************************************************************
00005 // 
00006 //                           Sacado Package
00007 //                 Copyright (2006) Sandia Corporation
00008 // 
00009 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00010 // the U.S. Government retains certain rights in this software.
00011 // 
00012 // This library is free software; you can redistribute it and/or modify
00013 // it under the terms of the GNU Lesser General Public License as
00014 // published by the Free Software Foundation; either version 2.1 of the
00015 // License, or (at your option) any later version.
00016 //  
00017 // This library is distributed in the hope that it will be useful, but
00018 // WITHOUT ANY WARRANTY; without even the implied warranty of
00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00020 // Lesser General Public License for more details.
00021 //  
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License along with this library; if not, write to the Free Software
00024 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00025 // USA
00026 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
00027 // (etphipp@sandia.gov).
00028 // 
00029 // ***********************************************************************
00030 // @HEADER
00031 
00032 #include "Sacado.hpp"
00033 #include "Sacado_Fad_SimpleFad.hpp"
00034 #include "Sacado_CacheFad_DFad.hpp"
00035 
00036 #include "Fad/fad.h"
00037 #include "TinyFadET/tfad.h"
00038 
00039 #include "Teuchos_Time.hpp"
00040 #include "Teuchos_CommandLineProcessor.hpp"
00041 
00042 // ADOL-C includes
00043 #ifdef HAVE_ADOLC
00044 #ifdef PACKAGE
00045 #undef PACKAGE
00046 #endif
00047 #ifdef PACKAGE_NAME
00048 #undef PACKAGE_NAME
00049 #endif
00050 #ifdef PACKAGE_BUGREPORT
00051 #undef PACKAGE_BUGREPORT
00052 #endif
00053 #ifdef PACKAGE_STRING
00054 #undef PACKAGE_STRING
00055 #endif
00056 #ifdef PACKAGE_TARNAME
00057 #undef PACKAGE_TARNAME
00058 #endif
00059 #ifdef PACKAGE_VERSION
00060 #undef PACKAGE_VERSION
00061 #endif
00062 #ifdef VERSION
00063 #undef VERSION
00064 #endif
00065 #define ADOLC_TAPELESS
00066 #define NUMBER_DIRECTIONS 100
00067 #include "adolc/adouble.h"
00068 #include "adolc/drivers/drivers.h"
00069 #include "adolc/interfaces.h"
00070 #endif
00071 
00072 // A performance test that computes a finite-element-like Jacobian using
00073 // several Fad variants
00074 
00075 template <>
00076 Sacado::Fad::MemPool* Sacado::Fad::MemPoolStorage<double>::defaultPool_ = NULL;
00077 
00078 void FAD::error(const char *msg) {
00079   std::cout << msg << std::endl;
00080 }
00081 
00082 struct ElemData {
00083   static const unsigned int nqp = 2;
00084   static const unsigned int nnode = 2;
00085   double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
00086   unsigned int gid[nnode];
00087 
00088   ElemData(double mesh_spacing) {
00089     // Quadrature points
00090     double xi[nqp];
00091     xi[0] = -1.0 / std::sqrt(3.0);
00092     xi[1] =  1.0 / std::sqrt(3.0);
00093     
00094     for (unsigned int i=0; i<nqp; i++) {
00095       // Weights
00096       w[i] = 1.0;
00097       
00098       // Element transformation Jacobian
00099       jac[i] = 0.5*mesh_spacing;
00100       
00101       // Shape functions & derivatives
00102       phi[i][0] = 0.5*(1.0 - xi[i]);
00103       phi[i][1] = 0.5*(1.0 + xi[i]);
00104       dphi[i][0] = -0.5;
00105       dphi[i][1] = 0.5;
00106     }
00107   }
00108 };
00109 
00110 template <class FadType>
00111 void fad_init_fill(const ElemData& e,
00112        unsigned int neqn,
00113        const std::vector<double>& x, 
00114        std::vector<FadType>& x_fad) {
00115   for (unsigned int node=0; node<e.nnode; node++)
00116     for (unsigned int eqn=0; eqn<neqn; eqn++)
00117       x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn, 
00118              x[e.gid[node]*neqn+eqn]);
00119 }
00120 
00121 #ifdef HAVE_ADOLC
00122 #ifndef ADOLC_TAPELESS
00123 void adolc_init_fill(bool retape,
00124          int tag,
00125          const ElemData& e,
00126          unsigned int neqn,
00127          const std::vector<double>& x, 
00128          std::vector<double>& x_local,
00129          std::vector<adouble>& x_ad) {
00130   if (retape)
00131     trace_on(tag);
00132   for (unsigned int node=0; node<e.nnode; node++)
00133     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00134       x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00135       if (retape)
00136   x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
00137     }
00138 }
00139 
00140 #else
00141 
00142 void adolc_tapeless_init_fill(const ElemData& e,
00143             unsigned int neqn,
00144             const std::vector<double>& x, 
00145             std::vector<adtl::adouble>& x_ad) {
00146   for (unsigned int node=0; node<e.nnode; node++)
00147     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00148       x_ad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00149       for (unsigned int i=0; i<neqn*e.nnode; i++)
00150   x_ad[node*neqn+eqn].setADValue(i, 0.0);
00151       x_ad[node*neqn+eqn].setADValue(node*neqn+eqn, 1.0);
00152     }
00153   
00154 }
00155 #endif
00156 #endif
00157 
00158 void analytic_init_fill(const ElemData& e,
00159       unsigned int neqn,
00160       const std::vector<double>& x, 
00161       std::vector<double>& x_local) {
00162   for (unsigned int node=0; node<e.nnode; node++) 
00163     for (unsigned int eqn=0; eqn<neqn; eqn++)
00164       x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00165 }
00166 
00167 template <class T>
00168 void template_element_fill(const ElemData& e, 
00169          unsigned int neqn,
00170          const std::vector<T>& x, 
00171          std::vector<T>& u, 
00172          std::vector<T>& du, 
00173          std::vector<T>& f) {
00174   // Construct element solution, derivative
00175   for (unsigned int qp=0; qp<e.nqp; qp++) {
00176     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00177       u[qp*neqn+eqn] = 0.0;
00178       du[qp*neqn+eqn] = 0.0;
00179       for (unsigned int node=0; node<e.nnode; node++) {
00180   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
00181   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
00182       }
00183     }
00184   }
00185 
00186   // Compute sum of equations for coupling
00187   std::vector<T> s(e.nqp*neqn);
00188   for (unsigned int qp=0; qp<e.nqp; qp++) {
00189     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00190       s[qp*neqn+eqn] = 0.0;
00191       for (unsigned int j=0; j<neqn; j++) {
00192         if (j != eqn)
00193           s[qp*neqn+eqn] += u[qp*neqn+j]; 
00194       }
00195     }
00196   }
00197 
00198   // Evaluate element residual
00199   for (unsigned int node=0; node<e.nnode; node++) {
00200     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00201       unsigned int row = node*neqn+eqn;
00202       f[row] = 0.0;
00203       for (unsigned int qp=0; qp<e.nqp; qp++) {
00204   f[row] += 
00205     e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
00206            du[qp*neqn+eqn]*e.dphi[qp][node] + 
00207            e.phi[qp][node]*(exp(u[qp*neqn+eqn]) + 
00208                 u[qp*neqn+eqn]*s[qp*neqn+eqn]));
00209       }
00210     }
00211   }
00212 }
00213 
00214 void analytic_element_fill(const ElemData& e, 
00215          unsigned int neqn,
00216          const std::vector<double>& x, 
00217          std::vector<double>& u, 
00218          std::vector<double>& du, 
00219          std::vector<double>& f,
00220          std::vector< std::vector<double> >& jac) {
00221   // Construct element solution, derivative
00222   for (unsigned int qp=0; qp<e.nqp; qp++) {
00223     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00224       u[qp*neqn+eqn] = 0.0;
00225       du[qp*neqn+eqn] = 0.0;
00226       for (unsigned int node=0; node<e.nnode; node++) {
00227   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
00228   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
00229       }
00230     }
00231   }
00232 
00233   // Compute sum of equations for coupling
00234   std::vector<double> s(e.nqp*neqn);
00235   for (unsigned int qp=0; qp<e.nqp; qp++) {
00236     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00237       s[qp*neqn+eqn] = 0.0;
00238       for (unsigned int j=0; j<neqn; j++) {
00239   if (j != eqn)
00240     s[qp*neqn+eqn] += u[qp*neqn+j]; 
00241       }
00242     }
00243   }
00244 
00245   // Evaluate element residual
00246   for (unsigned int node_row=0; node_row<e.nnode; node_row++) {
00247     for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00248       unsigned int row = node_row*neqn+eqn_row;
00249       f[row] = 0.0;
00250       for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00251   for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00252     unsigned int col = node_col*neqn+eqn_col;
00253     jac[row][col] = 0.0;
00254   }
00255       }
00256       for (unsigned int qp=0; qp<e.nqp; qp++) {
00257   double c1 = e.w[qp]*e.jac[qp];
00258   double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node_row];
00259   double c3 = e.phi[qp][node_row]*(exp(u[qp*neqn+eqn_row]));
00260   double c4 = e.phi[qp][node_row]*u[qp*neqn+eqn_row];
00261   double c5 = e.phi[qp][node_row]*s[qp*neqn+eqn_row];
00262   double c35 = c3+c5;
00263   double c14 = c1*c4;
00264   f[row] += c1*(c2*du[qp*neqn+eqn_row] + c3 + c4*s[qp*neqn+eqn_row]);
00265   for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00266     jac[row][node_col*neqn+eqn_row] += 
00267       c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col]);
00268     for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00269       if (eqn_col != eqn_row)
00270         jac[row][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col];
00271     }
00272   }
00273       }
00274     }
00275   }
00276 }
00277 
00278 template <class FadType>
00279 void fad_process_fill(const ElemData& e,
00280           unsigned int neqn,
00281           const std::vector<FadType>& f_fad, 
00282           std::vector<double>& f,
00283           std::vector< std::vector<double> >& jac) {
00284   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00285     f[e.gid[0]*neqn+eqn_row] += f_fad[0*neqn+eqn_row].val();
00286     f[e.gid[1]*neqn+eqn_row] += f_fad[1*neqn+eqn_row].val();
00287     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00288       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00289   unsigned int col = node_col*neqn+eqn_col;
00290   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00291   jac[e.gid[0]*neqn+eqn_row][next_col] += 
00292     f_fad[0*neqn+eqn_row].fastAccessDx(col);
00293   jac[e.gid[1]*neqn+eqn_row][col] += 
00294     f_fad[1*neqn+eqn_row].fastAccessDx(col);
00295       }
00296     }
00297   }
00298 }
00299 
00300 #ifdef HAVE_ADOLC
00301 #ifndef ADOLC_TAPELESS
00302 void adolc_process_fill(bool retape,
00303       int tag, 
00304       const ElemData& e,
00305       unsigned int neqn,
00306       std::vector<double>& x_local,
00307       std::vector<adouble>& f_ad, 
00308       std::vector<double>& f,
00309       std::vector<double>& f_local,
00310       std::vector< std::vector<double> >& jac,
00311       double **seed,
00312       double **jac_local) {
00313   if (retape) {
00314     for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
00315       f_ad[0*neqn+eqn_row] >>= f_local[0*neqn+eqn_row];
00316     for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
00317       f_ad[1*neqn+eqn_row] >>= f_local[1*neqn+eqn_row];
00318     trace_off();
00319   }
00320 
00321   //jacobian(tag, neqn*e.nnode, neqn*e.nnode, &x_local[0], jac_local);
00322   forward(tag, neqn*e.nnode, neqn*e.nnode, neqn*e.nnode, &x_local[0],
00323     seed, &f_local[0], jac_local);
00324   
00325   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00326     f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
00327     f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
00328     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00329       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00330   unsigned int col = node_col*neqn+eqn_col;
00331   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00332   jac[e.gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
00333   jac[e.gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
00334       }
00335     }
00336   }
00337 }
00338 
00339 #else
00340 
00341 void adolc_tapeless_process_fill(const ElemData& e,
00342          unsigned int neqn,
00343          const std::vector<adtl::adouble>& f_ad, 
00344          std::vector<double>& f,
00345          std::vector< std::vector<double> >& jac) {
00346   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00347     f[e.gid[0]*neqn+eqn_row] += f_ad[0*neqn+eqn_row].getValue();
00348     f[e.gid[1]*neqn+eqn_row] += f_ad[1*neqn+eqn_row].getValue();
00349     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00350       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00351   unsigned int col = node_col*neqn+eqn_col;
00352   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00353   jac[e.gid[0]*neqn+eqn_row][next_col] += 
00354     f_ad[0*neqn+eqn_row].getADValue(col);
00355   jac[e.gid[1]*neqn+eqn_row][col] += 
00356     f_ad[1*neqn+eqn_row].getADValue(col);
00357       }
00358     }
00359   }
00360 }
00361 #endif
00362 #endif
00363 
00364 void analytic_process_fill(const ElemData& e,
00365          unsigned int neqn,
00366          const std::vector<double>& f_local, 
00367          const std::vector< std::vector<double> >& jac_local, 
00368          std::vector<double>& f,
00369          std::vector< std::vector<double> >& jac) {
00370   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00371     f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
00372     f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
00373     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00374       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00375   unsigned int col = node_col*neqn+eqn_col;
00376   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00377   jac[e.gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
00378   jac[e.gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
00379       }
00380     }
00381   }
00382 }
00383 
00384 void residual_process_fill(const ElemData& e,
00385          unsigned int neqn,
00386          const std::vector<double>& f_local, 
00387          std::vector<double>& f) {
00388   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00389     f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
00390     f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
00391   }
00392 }
00393 
00394 template <class FadType>
00395 double fad_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00396         double mesh_spacing) {
00397   ElemData e(mesh_spacing);
00398 
00399   // Solution vector, residual, jacobian
00400   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00401   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00402   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00403     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00404       unsigned int row = node_row*num_eqns + eqn_row;
00405       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00406       f[row] = 0.0;
00407       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00408       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00409   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00410     unsigned int col = node_col*num_eqns + eqn_col;
00411     jac[row][col] = 0.0;
00412   }
00413       }
00414     }
00415   }
00416 
00417   Teuchos::Time timer("FE Fad Jacobian Fill", false);
00418   timer.start(true);
00419   std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
00420   std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00421   for (unsigned int i=0; i<num_nodes-1; i++) {
00422     e.gid[0] = i;
00423     e.gid[1] = i+1;
00424     
00425     fad_init_fill(e, num_eqns, x, x_fad);
00426     template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
00427     fad_process_fill(e, num_eqns, f_fad, f, jac);
00428   }
00429   timer.stop();
00430 
00431   // std::cout << "Fad Residual = " << std::endl;
00432   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00433   //   std::cout << "\t" << f[i] << std::endl;
00434 
00435   // std::cout.precision(8);
00436   // std::cout << "Fad Jacobian = " << std::endl;
00437   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00438   //   std::cout << "\t";
00439   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00440   //     std::cout << jac[i][j] << "\t";
00441   //   std::cout << std::endl;
00442   // }
00443 
00444   return timer.totalElapsedTime();
00445 }
00446 
00447 #ifdef HAVE_ADOLC
00448 #ifndef ADOLC_TAPELESS
00449 double adolc_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00450           double mesh_spacing) {
00451   ElemData e(mesh_spacing);
00452 
00453   // Solution vector, residual, jacobian
00454   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00455   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00456   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00457     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00458       unsigned int row = node_row*num_eqns + eqn_row;
00459       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00460       f[row] = 0.0;
00461       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00462       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00463   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00464     unsigned int col = node_col*num_eqns + eqn_col;
00465     jac[row][col] = 0.0;
00466   }
00467       }
00468     }
00469   }
00470 
00471   Teuchos::Time timer("FE ADOL-C Jacobian Fill", false);
00472   timer.start(true);
00473   std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00474   std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00475   std::vector<double> x_local(e.nnode*num_eqns);
00476   std::vector<double> f_local(e.nnode*num_eqns);
00477   double **jac_local = new double*[e.nnode*num_eqns];
00478   double **seed = new double*[e.nnode*num_eqns];
00479   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00480     jac_local[i] = new double[e.nnode*num_eqns];
00481     seed[i] = new double[e.nnode*num_eqns];
00482     for (unsigned int j=0; j<e.nnode*num_eqns; j++)
00483       seed[i][j] = 0.0;
00484     seed[i][i] = 1.0;
00485   }
00486   
00487   // Tape first element
00488   e.gid[0] = 0;
00489   e.gid[1] = 1;
00490   adolc_init_fill(true, 0, e, num_eqns, x, x_local, x_ad);
00491   template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00492   adolc_process_fill(true, 0, e, num_eqns, x_local, f_ad, f, f_local,
00493          jac, seed, jac_local);
00494 
00495   // Now do remaining fills reusing tape
00496   for (unsigned int i=1; i<num_nodes-1; i++) {
00497     e.gid[0] = i;
00498     e.gid[1] = i+1;
00499     
00500     adolc_init_fill(false, 0, e, num_eqns, x, x_local, x_ad);
00501     adolc_process_fill(false, 0, e, num_eqns, x_local, f_ad, f, f_local,
00502            jac, seed, jac_local);
00503   }
00504   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00505     delete [] jac_local[i];
00506     delete [] seed[i];
00507   }
00508   delete [] jac_local;
00509   delete [] seed;
00510   timer.stop();
00511 
00512   // std::cout << "ADOL-C Residual = " << std::endl;
00513   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00514   //   std::cout << "\t" << f[i] << std::endl;
00515 
00516   // std::cout.precision(8);
00517   // std::cout << "ADOL-C Jacobian = " << std::endl;
00518   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00519   //   std::cout << "\t";
00520   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00521   //     std::cout << jac[i][j] << "\t";
00522   //   std::cout << std::endl;
00523   // }
00524 
00525   return timer.totalElapsedTime();
00526 }
00527 
00528 double adolc_retape_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00529            double mesh_spacing) {
00530   ElemData e(mesh_spacing);
00531 
00532   // Solution vector, residual, jacobian
00533   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00534   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00535   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00536     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00537       unsigned int row = node_row*num_eqns + eqn_row;
00538       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00539       f[row] = 0.0;
00540       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00541       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00542   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00543     unsigned int col = node_col*num_eqns + eqn_col;
00544     jac[row][col] = 0.0;
00545   }
00546       }
00547     }
00548   }
00549 
00550   Teuchos::Time timer("FE ADOL-C Retape Jacobian Fill", false);
00551   timer.start(true);
00552   std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00553   std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00554   std::vector<double> x_local(e.nnode*num_eqns);
00555   std::vector<double> f_local(e.nnode*num_eqns);
00556   double **jac_local = new double*[e.nnode*num_eqns];
00557   double **seed = new double*[e.nnode*num_eqns];
00558   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00559     jac_local[i] = new double[e.nnode*num_eqns];
00560     seed[i] = new double[e.nnode*num_eqns];
00561     for (unsigned int j=0; j<e.nnode*num_eqns; j++)
00562       seed[i][j] = 0.0;
00563     seed[i][i] = 1.0;
00564   }
00565   for (unsigned int i=0; i<num_nodes-1; i++) {
00566     e.gid[0] = i;
00567     e.gid[1] = i+1;
00568     
00569     adolc_init_fill(true, 1, e, num_eqns, x, x_local, x_ad);
00570     template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00571     adolc_process_fill(true, 1, e, num_eqns, x_local, f_ad, f, f_local,
00572            jac, seed, jac_local);
00573   }
00574   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00575     delete [] jac_local[i];
00576     delete [] seed[i];
00577   }
00578   delete [] jac_local;
00579   delete [] seed;
00580   timer.stop();
00581 
00582   // std::cout << "ADOL-C Residual (retaped) = " << std::endl;
00583   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00584   //   std::cout << "\t" << f[i] << std::endl;
00585 
00586   // std::cout.precision(8);
00587   // std::cout << "ADOL-C Jacobian (retaped) = " << std::endl;
00588   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00589   //   std::cout << "\t";
00590   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00591   //     std::cout << jac[i][j] << "\t";
00592   //   std::cout << std::endl;
00593   // }
00594 
00595   return timer.totalElapsedTime();
00596 }
00597 
00598 #else
00599 
00600 double adolc_tapeless_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00601              double mesh_spacing) {
00602   ElemData e(mesh_spacing);
00603 
00604   // Solution vector, residual, jacobian
00605   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00606   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00607   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00608     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00609       unsigned int row = node_row*num_eqns + eqn_row;
00610       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00611       f[row] = 0.0;
00612       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00613       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00614   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00615     unsigned int col = node_col*num_eqns + eqn_col;
00616     jac[row][col] = 0.0;
00617   }
00618       }
00619     }
00620   }
00621 
00622   Teuchos::Time timer("FE Tapeless ADOL-C Jacobian Fill", false);
00623   timer.start(true);
00624   std::vector<adtl::adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00625   std::vector<adtl::adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00626   for (unsigned int i=0; i<num_nodes-1; i++) {
00627     e.gid[0] = i;
00628     e.gid[1] = i+1;
00629     
00630     adolc_tapeless_init_fill(e, num_eqns, x, x_ad);
00631     template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00632     adolc_tapeless_process_fill(e, num_eqns, f_ad, f, jac);
00633   }
00634   timer.stop();
00635 
00636   // std::cout << "Tapeless ADOL-C Residual = " << std::endl;
00637   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00638   //   std::cout << "\t" << f[i] << std::endl;
00639 
00640   // std::cout.precision(8);
00641   // std::cout << "Tapeless ADOL-C Jacobian = " << std::endl;
00642   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00643   //   std::cout << "\t";
00644   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00645   //     std::cout << jac[i][j] << "\t";
00646   //   std::cout << std::endl;
00647   // }
00648 
00649   return timer.totalElapsedTime();
00650 }
00651 #endif
00652 #endif
00653 
00654 double analytic_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00655        double mesh_spacing) {
00656   ElemData e(mesh_spacing);
00657 
00658   // Solution vector, residual, jacobian
00659   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00660   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00661   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00662     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00663       unsigned int row = node_row*num_eqns + eqn_row;
00664       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00665       f[row] = 0.0;
00666       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00667       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00668   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00669     unsigned int col = node_col*num_eqns + eqn_col;
00670     jac[row][col] = 0.0;
00671   }
00672       }
00673     }
00674   }
00675 
00676   Teuchos::Time timer("FE Analytic Jacobian Fill", false);
00677   timer.start(true);
00678   std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
00679   std::vector< std::vector<double> > jac_local(e.nnode*num_eqns);
00680   std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00681   for (unsigned int i=0; i<e.nnode*num_eqns; i++)
00682     jac_local[i] = std::vector<double>(e.nnode*num_eqns);
00683   for (unsigned int i=0; i<num_nodes-1; i++) {
00684     e.gid[0] = i;
00685     e.gid[1] = i+1;
00686     
00687     analytic_init_fill(e, num_eqns, x, x_local);
00688     analytic_element_fill(e, num_eqns, x_local, u, du, f_local, jac_local);
00689     analytic_process_fill(e, num_eqns, f_local, jac_local, f, jac);
00690   }
00691   timer.stop();
00692 
00693   // std::cout.precision(8);
00694   // std::cout << "Analytic Residual = " << std::endl;
00695   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00696   //   std::cout << "\t" << f[i] << std::endl;
00697 
00698   // std::cout.precision(8);
00699   // std::cout << "Analytic Jacobian = " << std::endl;
00700   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00701   //   std::cout << "\t";
00702   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00703   //     std::cout << jac[i][j] << "\t";
00704   //   std::cout << std::endl;
00705   // }
00706 
00707   return timer.totalElapsedTime();
00708 }
00709 
00710 double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
00711        double mesh_spacing) {
00712   ElemData e(mesh_spacing);
00713 
00714   // Solution vector, residual, jacobian
00715   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00716   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00717     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00718       unsigned int row = node_row*num_eqns + eqn_row;
00719       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00720       f[row] = 0.0;
00721     }
00722   }
00723 
00724   Teuchos::Time timer("FE Residual Fill", false);
00725   timer.start(true);
00726   std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
00727   std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00728   for (unsigned int i=0; i<num_nodes-1; i++) {
00729     e.gid[0] = i;
00730     e.gid[1] = i+1;
00731     
00732     analytic_init_fill(e, num_eqns, x, x_local);
00733     template_element_fill(e, num_eqns, x_local, u, du, f_local);
00734     residual_process_fill(e, num_eqns, f_local, f);
00735   }
00736   timer.stop();
00737 
00738   // std::cout.precision(8);
00739   // std::cout << "Analytic Residual = " << std::endl;
00740   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00741   //   std::cout << "\t" << f[i] << std::endl;
00742 
00743   return timer.totalElapsedTime();
00744 }
00745 
00746 int main(int argc, char* argv[]) {
00747   int ierr = 0;
00748 
00749   try {
00750     double t, ta, tr;
00751     int p = 2;
00752     int w = p+7;
00753 
00754     // Maximum number of derivative components for SLFad
00755     const int slfad_max = 100;
00756 
00757     // Set up command line options
00758     Teuchos::CommandLineProcessor clp;
00759     clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
00760     int num_nodes = 100000;
00761     int num_eqns = 2;
00762     int rt = 0;
00763     clp.setOption("n", &num_nodes, "Number of nodes");
00764     clp.setOption("p", &num_eqns, "Number of equations");
00765     clp.setOption("rt", &rt, "Include ADOL-C retaping test");
00766 
00767     // Parse options
00768     Teuchos::CommandLineProcessor::EParseCommandLineReturn
00769       parseReturn= clp.parse(argc, argv);
00770     if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
00771       return 1;
00772 
00773     double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
00774 
00775     // Memory pool & manager
00776     Sacado::Fad::MemPoolManager<double> poolManager(num_nodes*num_eqns);
00777     Sacado::Fad::MemPool* pool = poolManager.getMemoryPool(num_nodes*num_eqns);
00778     Sacado::Fad::DMFad<double>::setDefaultPool(pool);
00779 
00780     std::cout.setf(std::ios::scientific);
00781     std::cout.precision(p);
00782     std::cout << "num_nodes =  " << num_nodes 
00783         << ", num_eqns = " << num_eqns << ":  " << std::endl
00784         << "           " << "   Time" << "\t"<< "Time/Analytic" << "\t"
00785         << "Time/Residual" << std::endl;
00786 
00787     ta = 1.0;
00788     tr = 1.0;
00789 
00790     tr = residual_fill(num_nodes, num_eqns, mesh_spacing);
00791 
00792     ta = analytic_jac_fill(num_nodes, num_eqns, mesh_spacing);
00793     std::cout << "Analytic:  " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
00794 
00795 #ifdef HAVE_ADOLC
00796 #ifndef ADOLC_TAPELESS
00797     t = adolc_jac_fill(num_nodes, num_eqns, mesh_spacing);
00798     std::cout << "ADOL-C:    " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00799 
00800     if (rt != 0) {
00801       t = adolc_retape_jac_fill(num_nodes, num_eqns, mesh_spacing);
00802       std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00803     }
00804 
00805 #else
00806     t = adolc_tapeless_jac_fill(num_nodes, num_eqns, mesh_spacing);
00807     std::cout << "ADOL-C(tl):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00808 #endif
00809 #endif
00810 
00811     if (num_eqns*2 == 4) {
00812       t = fad_jac_fill< FAD::TFad<4,double> >(num_nodes, num_eqns, mesh_spacing);
00813       std::cout << "TFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00814     }
00815     else if (num_eqns*2 == 40) {
00816       t = fad_jac_fill< FAD::TFad<40,double> >(num_nodes, num_eqns, mesh_spacing);
00817       std::cout << "TFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00818     }
00819 
00820     t = fad_jac_fill< FAD::Fad<double> >(num_nodes, num_eqns, mesh_spacing);
00821     std::cout << "Fad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00822 
00823     if (num_eqns*2 == 4) {
00824       t = fad_jac_fill< Sacado::Fad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
00825       std::cout << "SFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00826     }
00827     else if (num_eqns*2 == 40) {
00828       t = fad_jac_fill< Sacado::Fad::SFad<double,40> >(num_nodes, num_eqns, mesh_spacing);
00829       std::cout << "SFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00830     }
00831 
00832     if (num_eqns*2 < slfad_max) {
00833       t = fad_jac_fill< Sacado::Fad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
00834       std::cout << "SLFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00835     }
00836     
00837     t = fad_jac_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
00838     std::cout << "DFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00839 
00840     t = fad_jac_fill< Sacado::Fad::SimpleFad<double> >(num_nodes, num_eqns, mesh_spacing);
00841     std::cout << "SimpleFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00842 
00843     t = fad_jac_fill< Sacado::Fad::DMFad<double> >(num_nodes, num_eqns, mesh_spacing);
00844     std::cout << "DMFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 
00845 
00846     if (num_eqns*2 == 4) {
00847       t = fad_jac_fill< Sacado::ELRFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
00848       std::cout << "ELRSFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00849     }
00850     else if (num_eqns*2 == 40) {
00851       t = fad_jac_fill< Sacado::ELRFad::SFad<double,40> >(num_nodes, num_eqns, mesh_spacing);
00852       std::cout << "ELRSFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00853     }
00854 
00855     if (num_eqns*2 < slfad_max) {
00856       t = fad_jac_fill< Sacado::ELRFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
00857       std::cout << "ELRSLFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00858     }
00859 
00860     t = fad_jac_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
00861     std::cout << "ELRDFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00862     
00863     t = fad_jac_fill< Sacado::CacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
00864     std::cout << "CacheFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00865 
00866     t = fad_jac_fill< Sacado::Fad::DVFad<double> >(num_nodes, num_eqns, mesh_spacing);
00867     std::cout << "DVFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00868     
00869   }
00870   catch (std::exception& e) {
00871     cout << e.what() << endl;
00872     ierr = 1;
00873   }
00874   catch (const char *s) {
00875     cout << s << endl;
00876     ierr = 1;
00877   }
00878   catch (...) {
00879     cout << "Caught unknown exception!" << endl;
00880     ierr = 1;
00881   }
00882 
00883   return ierr;
00884 }

Generated on Wed May 12 21:39:32 2010 for Sacado Package Browser (Single Doxygen Collection) by  doxygen 1.4.7