rad_fe_adj_fill.cpp

Go to the documentation of this file.
00001 // $Id$ 
00002 // $Source$ 
00003 // @HEADER
00004 // ***********************************************************************
00005 // 
00006 //                           Sacado Package
00007 //                 Copyright (2006) Sandia Corporation
00008 // 
00009 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00010 // the U.S. Government retains certain rights in this software.
00011 // 
00012 // This library is free software; you can redistribute it and/or modify
00013 // it under the terms of the GNU Lesser General Public License as
00014 // published by the Free Software Foundation; either version 2.1 of the
00015 // License, or (at your option) any later version.
00016 //  
00017 // This library is distributed in the hope that it will be useful, but
00018 // WITHOUT ANY WARRANTY; without even the implied warranty of
00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00020 // Lesser General Public License for more details.
00021 //  
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License along with this library; if not, write to the Free Software
00024 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00025 // USA
00026 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
00027 // (etphipp@sandia.gov).
00028 // 
00029 // ***********************************************************************
00030 // @HEADER
00031 
00032 #include "Sacado.hpp"
00033 #include "Sacado_tradvec.hpp"
00034 
00035 #include "Teuchos_Time.hpp"
00036 #include "Teuchos_CommandLineProcessor.hpp"
00037 
00038 // ADOL-C includes
00039 #ifdef HAVE_ADOLC
00040 #ifdef PACKAGE
00041 #undef PACKAGE
00042 #endif
00043 #ifdef PACKAGE_NAME
00044 #undef PACKAGE_NAME
00045 #endif
00046 #ifdef PACKAGE_BUGREPORT
00047 #undef PACKAGE_BUGREPORT
00048 #endif
00049 #ifdef PACKAGE_STRING
00050 #undef PACKAGE_STRING
00051 #endif
00052 #ifdef PACKAGE_TARNAME
00053 #undef PACKAGE_TARNAME
00054 #endif
00055 #ifdef PACKAGE_VERSION
00056 #undef PACKAGE_VERSION
00057 #endif
00058 #ifdef VERSION
00059 #undef VERSION
00060 #endif
00061 #include "adolc/adouble.h"
00062 #include "adolc/drivers/drivers.h"
00063 #include "adolc/interfaces.h"
00064 #endif
00065 
00066 // A performance test that computes a finite-element-like adjoint using
00067 // Rad and ADOL-C
00068 
00069 typedef Sacado::Rad::ADvar<double> RadType;
00070 typedef Sacado::RadVec::ADvar<double> VRadType;
00071 
00072 struct ElemData {
00073   static const unsigned int nqp = 2;
00074   static const unsigned int nnode = 2;
00075   double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
00076   unsigned int gid[nnode];
00077 
00078   ElemData(double mesh_spacing) {
00079     // Quadrature points
00080     double xi[nqp];
00081     xi[0] = -1.0 / std::sqrt(3.0);
00082     xi[1] =  1.0 / std::sqrt(3.0);
00083     
00084     for (unsigned int i=0; i<nqp; i++) {
00085       // Weights
00086       w[i] = 1.0;
00087       
00088       // Element transformation Jacobian
00089       jac[i] = 0.5*mesh_spacing;
00090       
00091       // Shape functions & derivatives
00092       phi[i][0] = 0.5*(1.0 - xi[i]);
00093       phi[i][1] = 0.5*(1.0 + xi[i]);
00094       dphi[i][0] = -0.5;
00095       dphi[i][1] = 0.5;
00096     }
00097   }
00098 };
00099 
00100 template <class FadType>
00101 void fad_init_fill(const ElemData& e,
00102        unsigned int neqn,
00103        const std::vector<double>& x,
00104        const std::vector< std::vector<double> >& w,
00105        std::vector<FadType>& x_fad,
00106        std::vector< std::vector<double> >& w_local) {
00107   for (unsigned int node=0; node<e.nnode; node++)
00108     for (unsigned int eqn=0; eqn<neqn; eqn++)
00109       x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn, 
00110              x[e.gid[node]*neqn+eqn]);
00111 
00112   for (unsigned int col=0; col<w.size(); col++)
00113     for (unsigned int node=0; node<e.nnode; node++)
00114       for (unsigned int eqn=0; eqn<neqn; eqn++)
00115   w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
00116 }
00117 
00118 void rad_init_fill(const ElemData& e,
00119        unsigned int neqn,
00120        const std::vector<double>& x,
00121        const std::vector< std::vector<double> >& w,
00122        std::vector<RadType>& x_rad,
00123        std::vector< std::vector<double> >& w_local) {
00124   for (unsigned int node=0; node<e.nnode; node++)
00125     for (unsigned int eqn=0; eqn<neqn; eqn++)
00126       x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00127 
00128   for (unsigned int col=0; col<w.size(); col++)
00129     for (unsigned int node=0; node<e.nnode; node++)
00130       for (unsigned int eqn=0; eqn<neqn; eqn++)
00131   w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
00132 }
00133 
00134 void vrad_init_fill(const ElemData& e,
00135        unsigned int neqn,
00136        const std::vector<double>& x,
00137        const std::vector< std::vector<double> >& w,
00138        std::vector<VRadType>& x_rad,
00139        double** w_local) {
00140   for (unsigned int node=0; node<e.nnode; node++)
00141     for (unsigned int eqn=0; eqn<neqn; eqn++)
00142       x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00143 
00144   for (unsigned int col=0; col<w.size(); col++)
00145     for (unsigned int node=0; node<e.nnode; node++)
00146       for (unsigned int eqn=0; eqn<neqn; eqn++)
00147   w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
00148 }
00149 
00150 #ifdef HAVE_ADOLC
00151 void adolc_init_fill(bool retape,
00152          int tag,
00153          const ElemData& e,
00154          unsigned int neqn,
00155          const std::vector<double>& x,
00156          const std::vector< std::vector<double> >& w,
00157          std::vector<double>& x_local,
00158          std::vector<adouble>& x_ad,
00159          double** w_local) {
00160   if (retape)
00161     trace_on(tag, 1);
00162   for (unsigned int node=0; node<e.nnode; node++)
00163     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00164       x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00165       if (retape)
00166   x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
00167     }
00168 
00169   for (unsigned int col=0; col<w.size(); col++)
00170     for (unsigned int node=0; node<e.nnode; node++)
00171       for (unsigned int eqn=0; eqn<neqn; eqn++)
00172   w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
00173 }
00174 #endif
00175 
00176 void analytic_init_fill(const ElemData& e,
00177       unsigned int neqn,
00178       const std::vector<double>& x,
00179       const std::vector< std::vector<double> >& w,
00180       std::vector<double>& x_local,
00181       std::vector< std::vector<double> >& w_local) {
00182   for (unsigned int node=0; node<e.nnode; node++) 
00183     for (unsigned int eqn=0; eqn<neqn; eqn++)
00184       x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00185 
00186   for (unsigned int node=0; node<e.nnode; node++) 
00187     for (unsigned int eqn=0; eqn<neqn; eqn++)
00188       for (unsigned int col=0; col<w.size(); col++)
00189   w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
00190 }
00191 
00192 template <class T>
00193 void template_element_fill(const ElemData& e, 
00194          unsigned int neqn,
00195          const std::vector<T>& x, 
00196          std::vector<T>& u, 
00197          std::vector<T>& du, 
00198          std::vector<T>& f) {
00199   // Construct element solution, derivative
00200   for (unsigned int qp=0; qp<e.nqp; qp++) {
00201     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00202       u[qp*neqn+eqn] = 0.0;
00203       du[qp*neqn+eqn] = 0.0;
00204       for (unsigned int node=0; node<e.nnode; node++) {
00205   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
00206   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
00207       }
00208     }
00209   }
00210 
00211   // Compute sum of equations for coupling
00212   std::vector<T> s(e.nqp*neqn);
00213   for (unsigned int qp=0; qp<e.nqp; qp++) {
00214     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00215       s[qp*neqn+eqn] = 0.0;
00216       for (unsigned int j=0; j<neqn; j++) {
00217         if (j != eqn)
00218           s[qp*neqn+eqn] += u[qp*neqn+j]; 
00219       }
00220     }
00221   }
00222 
00223   // Evaluate element residual
00224   for (unsigned int node=0; node<e.nnode; node++) {
00225     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00226       unsigned int row = node*neqn+eqn;
00227       f[row] = 0.0;
00228       for (unsigned int qp=0; qp<e.nqp; qp++) {
00229   f[row] += 
00230     e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
00231            du[qp*neqn+eqn]*e.dphi[qp][node] + 
00232            e.phi[qp][node]*(exp(u[qp*neqn+eqn]) + 
00233                 u[qp*neqn+eqn]*s[qp*neqn+eqn]));
00234       }
00235     }
00236   }
00237 }
00238 
00239 void analytic_element_fill(const ElemData& e, 
00240          unsigned int neqn,
00241          const std::vector<double>& x,
00242          std::vector< std::vector<double> >& w,
00243          std::vector<double>& u, 
00244          std::vector<double>& du, 
00245          std::vector<double>& f,
00246          std::vector< std::vector<double> >& adj) {
00247   // Construct element solution, derivative
00248   for (unsigned int qp=0; qp<e.nqp; qp++) {
00249     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00250       u[qp*neqn+eqn] = 0.0;
00251       du[qp*neqn+eqn] = 0.0;
00252       for (unsigned int node=0; node<e.nnode; node++) {
00253   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
00254   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
00255       }
00256     }
00257   }
00258 
00259   // Compute sum of equations for coupling
00260   std::vector<double> s(e.nqp*neqn);
00261   for (unsigned int qp=0; qp<e.nqp; qp++) {
00262     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00263       s[qp*neqn+eqn] = 0.0;
00264       for (unsigned int j=0; j<neqn; j++) {
00265   if (j != eqn)
00266     s[qp*neqn+eqn] += u[qp*neqn+j]; 
00267       }
00268     }
00269   }
00270 
00271   for (unsigned int col=0; col<w.size(); col++)
00272     for (unsigned int node=0; node<e.nnode; node++)
00273       for (unsigned int eqn=0; eqn<neqn; eqn++)
00274   adj[col][node*neqn+eqn] = 0.0;
00275 
00276   // Evaluate element residual
00277   for (unsigned int node=0; node<e.nnode; node++) {
00278     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00279       unsigned int row = node*neqn+eqn;
00280       f[row] = 0.0;
00281       for (unsigned int qp=0; qp<e.nqp; qp++) {
00282   double c1 = e.w[qp]*e.jac[qp];
00283   double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node];
00284   double c3 = e.phi[qp][node]*(exp(u[qp*neqn+eqn]));
00285   double c4 = e.phi[qp][node]*u[qp*neqn+eqn];
00286   double c5 = e.phi[qp][node]*s[qp*neqn+eqn];
00287   double c35 = c3+c5;
00288   double c14 = c1*c4;
00289   f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
00290   for (unsigned int col=0; col<w.size(); col++) {
00291     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00292       adj[col][node_col*neqn+eqn] += 
00293         c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col])*w[col][row];
00294       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00295         if (eqn_col != eqn)
00296     adj[col][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col]*w[col][row];
00297       }
00298     }
00299   }
00300       }
00301     }
00302   }
00303 }
00304 
00305 template <class FadType>
00306 void fad_process_fill(const ElemData& e,
00307           unsigned int neqn,
00308           const std::vector< std::vector<double> >& w_local,
00309           const std::vector<FadType>& f_fad, 
00310           std::vector<double>& f,
00311           std::vector< std::vector<double> >& adj) {
00312   // Get residual
00313   for (unsigned int node=0; node<e.nnode; node++)
00314     for (unsigned int eqn=0; eqn<neqn; eqn++)
00315       f[e.gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
00316 
00317   // Get adjoint for each adjoint direction
00318   for (unsigned int col=0; col<w_local.size(); col++) {
00319     FadType z = 0.0;
00320     for (unsigned int node=0; node<e.nnode; node++)
00321       for (unsigned int eqn=0; eqn<neqn; eqn++) 
00322   z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
00323 
00324     for (unsigned int node=0; node<e.nnode; node++)
00325       for (unsigned int eqn=0; eqn<neqn; eqn++) 
00326   adj[col][e.gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
00327   }
00328 }
00329 
00330 void rad_process_fill(const ElemData& e,
00331           unsigned int neqn,
00332           std::vector< std::vector<double> >& w_local,
00333           std::vector<RadType>& x_rad,
00334           std::vector<RadType>& f_rad, 
00335           std::vector<RadType*>& ff_rad,
00336           std::vector<double>& f,
00337           std::vector< std::vector<double> >& adj) {
00338   // Get residual
00339   for (unsigned int node=0; node<e.nnode; node++)
00340     for (unsigned int eqn=0; eqn<neqn; eqn++)
00341       f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
00342 
00343   // Get adjoint for each adjoint direction
00344   for (unsigned int col=0; col<w_local.size(); col++) {
00345     Sacado::Rad::ADcontext<double>::Weighted_Gradcomp(neqn*e.nnode, 
00346                   &ff_rad[0],
00347                   &w_local[col][0]);
00348     for (unsigned int node=0; node<e.nnode; node++)
00349       for (unsigned int eqn=0; eqn<neqn; eqn++) 
00350   adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
00351   }
00352 }
00353 
00354 void vrad_process_fill(const ElemData& e,
00355            unsigned int neqn,
00356            unsigned int ncol,
00357            std::vector<std::size_t>& vn,
00358            double** w_local,
00359            std::vector<VRadType>& x_rad,
00360            std::vector<VRadType>& f_rad, 
00361            VRadType*** vf_rad,
00362            std::vector<double>& f,
00363            std::vector< std::vector<double> >& adj) {
00364   // Get residual
00365   for (unsigned int node=0; node<e.nnode; node++)
00366     for (unsigned int eqn=0; eqn<neqn; eqn++)
00367       f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
00368 
00369   // Get adjoint for each adjoint direction
00370   Sacado::RadVec::ADcontext<double>::Weighted_GradcompVec(ncol,
00371                 &vn[0], 
00372                 vf_rad,
00373                 w_local);
00374   for (unsigned int col=0; col<ncol; col++)
00375     for (unsigned int node=0; node<e.nnode; node++)
00376       for (unsigned int eqn=0; eqn<neqn; eqn++) 
00377   adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
00378 }
00379 
00380 #ifdef HAVE_ADOLC
00381 void adolc_process_fill(bool retape,
00382       int tag, 
00383       const ElemData& e,
00384       unsigned int neqn,
00385       unsigned int ncol,
00386       std::vector<double>& x_local,
00387       double **w_local,
00388       std::vector<adouble>& f_ad,
00389       std::vector<double>& f_local,
00390       std::vector<double>& f,
00391       double **adj_local,
00392       std::vector< std::vector<double> >& adj) {
00393   if (retape) {
00394     for (unsigned int node=0; node<e.nnode; node++)
00395       for (unsigned int eqn=0; eqn<neqn; eqn++)
00396   f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
00397     trace_off();
00398   }
00399   else
00400     zos_forward(tag, neqn*e.nnode, neqn*e.nnode, 1, &x_local[0], &f_local[0]);
00401 
00402   fov_reverse(tag, neqn*e.nnode, neqn*e.nnode, ncol, w_local, adj_local);
00403   
00404   for (unsigned int node=0; node<e.nnode; node++)
00405     for (unsigned int eqn=0; eqn<neqn; eqn++)
00406       f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
00407 
00408   for (unsigned int col=0; col<ncol; col++)
00409     for (unsigned int node=0; node<e.nnode; node++)
00410       for (unsigned int eqn=0; eqn<neqn; eqn++)
00411   adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
00412 }
00413 #endif
00414 
00415 void analytic_process_fill(const ElemData& e,
00416          unsigned int neqn,
00417          const std::vector<double>& f_local, 
00418          const std::vector< std::vector<double> >& adj_local, 
00419          std::vector<double>& f,
00420          std::vector< std::vector<double> >& adj) {
00421   for (unsigned int node=0; node<e.nnode; node++)
00422     for (unsigned int eqn=0; eqn<neqn; eqn++)
00423       f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
00424 
00425   for (unsigned int col=0; col<adj.size(); col++)
00426     for (unsigned int node=0; node<e.nnode; node++)
00427       for (unsigned int eqn=0; eqn<neqn; eqn++)
00428   adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
00429 }
00430 
00431 void residual_process_fill(const ElemData& e,
00432          unsigned int neqn,
00433          const std::vector<double>& f_local, 
00434          std::vector<double>& f) {
00435   for (unsigned int node=0; node<e.nnode; node++)
00436     for (unsigned int eqn=0; eqn<neqn; eqn++)
00437       f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
00438 }
00439 
00440 template <typename FadType>
00441 double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
00442         unsigned int num_cols, double mesh_spacing) {
00443   ElemData e(mesh_spacing);
00444 
00445   // Solution vector, residual, adjoint
00446   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00447   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
00448   std::vector< std::vector<double> > adj(num_cols);
00449   for (unsigned int node=0; node<num_nodes; node++)
00450     for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00451       x[node*num_eqns+eqn] = 
00452   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
00453       f[node*num_eqns+eqn] = 0.0;
00454     }
00455 
00456   for (unsigned int col=0; col<num_cols; col++) {
00457     w[col] = std::vector<double>(num_nodes*num_eqns);
00458     w_local[col] = std::vector<double>(e.nnode*num_eqns);
00459     adj[col] = std::vector<double>(num_nodes*num_eqns);
00460     for (unsigned int node=0; node<num_nodes; node++)
00461       for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00462   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
00463   adj[col][node*num_eqns+eqn] = 0.0;
00464       }
00465   }
00466 
00467   Teuchos::Time timer("FE Fad Adjoint Fill", false);
00468   timer.start(true);
00469   std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
00470   std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00471   for (unsigned int i=0; i<num_nodes-1; i++) {
00472     e.gid[0] = i;
00473     e.gid[1] = i+1;
00474     
00475     fad_init_fill(e, num_eqns, x, w, x_fad, w_local);
00476     template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
00477     fad_process_fill(e, num_eqns, w_local, f_fad, f, adj);
00478   }
00479   timer.stop();
00480 
00481   // std::cout << "Fad Residual = " << std::endl;
00482   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00483   //   std::cout << "\t" << f[i] << std::endl;
00484 
00485   // std::cout.precision(8);
00486   // std::cout << "Fad Adjoint = " << std::endl;
00487   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00488   //   std::cout << "\t";
00489   //   for (unsigned int j=0; j<num_cols; j++)
00490   //     std::cout << adj[j][i] << "\t";
00491   //   std::cout << std::endl;
00492   // }
00493 
00494   return timer.totalElapsedTime();
00495 }
00496 
00497 double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
00498         unsigned int num_cols, double mesh_spacing) {
00499   ElemData e(mesh_spacing);
00500 
00501   // Solution vector, residual, adjoint
00502   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00503   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
00504   std::vector< std::vector<double> > adj(num_cols);
00505   for (unsigned int node=0; node<num_nodes; node++)
00506     for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00507       x[node*num_eqns+eqn] = 
00508   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
00509       f[node*num_eqns+eqn] = 0.0;
00510     }
00511 
00512   for (unsigned int col=0; col<num_cols; col++) {
00513     w[col] = std::vector<double>(num_nodes*num_eqns);
00514     w_local[col] = std::vector<double>(e.nnode*num_eqns);
00515     adj[col] = std::vector<double>(num_nodes*num_eqns);
00516     for (unsigned int node=0; node<num_nodes; node++)
00517       for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00518   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
00519   adj[col][node*num_eqns+eqn] = 0.0;
00520       }
00521   }
00522 
00523   Teuchos::Time timer("FE Rad Adjoint Fill", false);
00524   timer.start(true);
00525   std::vector<RadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
00526   std::vector<RadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00527   std::vector<RadType*> ff_rad(f_rad.size());
00528   for (unsigned int i=0; i<f_rad.size(); i++)
00529     ff_rad[i] = &f_rad[i];
00530   for (unsigned int i=0; i<num_nodes-1; i++) {
00531     e.gid[0] = i;
00532     e.gid[1] = i+1;
00533     
00534     rad_init_fill(e, num_eqns, x, w, x_rad, w_local);
00535     template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
00536     rad_process_fill(e, num_eqns, w_local, x_rad, f_rad, ff_rad, f, adj);
00537   }
00538   timer.stop();
00539 
00540   // std::cout << "Rad Residual = " << std::endl;
00541   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00542   //   std::cout << "\t" << f[i] << std::endl;
00543 
00544   // std::cout.precision(8);
00545   // std::cout << "Rad Adjoint = " << std::endl;
00546   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00547   //   std::cout << "\t";
00548   //   for (unsigned int j=0; j<num_cols; j++)
00549   //     std::cout << adj[j][i] << "\t";
00550   //   std::cout << std::endl;
00551   // }
00552 
00553   return timer.totalElapsedTime();
00554 }
00555 
00556 double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
00557          unsigned int num_cols, double mesh_spacing) {
00558   ElemData e(mesh_spacing);
00559 
00560   // Solution vector, residual, adjoint
00561   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00562   std::vector< std::vector<double> > w(num_cols);
00563   double **w_local = new double*[num_cols];
00564   std::vector< std::vector<double> > adj(num_cols);
00565   for (unsigned int node=0; node<num_nodes; node++)
00566     for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00567       x[node*num_eqns+eqn] = 
00568   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
00569       f[node*num_eqns+eqn] = 0.0;
00570     }
00571 
00572   for (unsigned int col=0; col<num_cols; col++) {
00573     w[col] = std::vector<double>(num_nodes*num_eqns);
00574     w_local[col] = new double[e.nnode*num_eqns];
00575     adj[col] = std::vector<double>(num_nodes*num_eqns);
00576     for (unsigned int node=0; node<num_nodes; node++)
00577       for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00578   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
00579   adj[col][node*num_eqns+eqn] = 0.0;
00580       }
00581   }
00582 
00583   Teuchos::Time timer("FE Vector Rad Adjoint Fill", false);
00584   timer.start(true);
00585   std::vector<VRadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
00586   std::vector<VRadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00587   VRadType ***vf_rad = new VRadType**[num_cols];
00588   std::vector<std::size_t> vn(num_cols);
00589   for (unsigned int i=0; i<num_cols; i++) {
00590     vf_rad[i] = new VRadType*[num_eqns*e.nnode];
00591     vn[i] = num_eqns*e.nnode;
00592     for (unsigned int j=0; j<num_eqns*e.nnode; j++)
00593       vf_rad[i][j] = &f_rad[j];
00594   }
00595   for (unsigned int i=0; i<num_nodes-1; i++) {
00596     e.gid[0] = i;
00597     e.gid[1] = i+1;
00598     
00599     vrad_init_fill(e, num_eqns, x, w, x_rad, w_local);
00600     template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
00601     vrad_process_fill(e, num_eqns, num_cols, vn, w_local, x_rad, f_rad, vf_rad,
00602           f, adj);
00603   }
00604   for (unsigned int col=0; col<num_cols; col++) {
00605     delete [] w_local[col];
00606     delete [] vf_rad[col];
00607   }
00608   delete [] w_local;
00609   delete [] vf_rad;
00610   timer.stop();
00611 
00612   // std::cout << "Vector Rad Residual = " << std::endl;
00613   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00614   //   std::cout << "\t" << f[i] << std::endl;
00615 
00616   // std::cout.precision(8);
00617   // std::cout << "Vector Rad Adjoint = " << std::endl;
00618   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00619   //   std::cout << "\t";
00620   //   for (unsigned int j=0; j<num_cols; j++)
00621   //     std::cout << adj[j][i] << "\t";
00622   //   std::cout << std::endl;
00623   // }
00624 
00625   return timer.totalElapsedTime();
00626 }
00627 
00628 #ifdef HAVE_ADOLC
00629 double adolc_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
00630           unsigned int num_cols, double mesh_spacing) {
00631   ElemData e(mesh_spacing);
00632 
00633   // Solution vector, residual, adjoint
00634   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00635   std::vector< std::vector<double> > w(num_cols);
00636   std::vector< std::vector<double> > adj(num_cols);
00637   for (unsigned int node=0; node<num_nodes; node++)
00638     for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00639       x[node*num_eqns+eqn] = 
00640   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
00641       f[node*num_eqns+eqn] = 0.0;
00642     }
00643 
00644   for (unsigned int col=0; col<num_cols; col++) {
00645     w[col] = std::vector<double>(num_nodes*num_eqns);
00646     adj[col] = std::vector<double>(num_nodes*num_eqns);
00647     for (unsigned int node=0; node<num_nodes; node++)
00648       for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00649   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
00650   adj[col][node*num_eqns+eqn] = 0.0;
00651       }
00652   }
00653 
00654   Teuchos::Time timer("FE ADOL-C Adjoint Fill", false);
00655   timer.start(true);
00656   std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00657   std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00658   std::vector<double> x_local(e.nnode*num_eqns);
00659   std::vector<double> f_local(e.nnode*num_eqns);
00660   double **adj_local = new double*[num_cols];
00661   double **w_local = new double*[num_cols];
00662   for (unsigned int i=0; i<num_cols; i++) {
00663     adj_local[i] = new double[e.nnode*num_eqns];
00664     w_local[i] = new double[e.nnode*num_eqns];
00665   }
00666   
00667   // Tape first element
00668   e.gid[0] = 0;
00669   e.gid[1] = 1;
00670   adolc_init_fill(true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
00671   template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00672   adolc_process_fill(true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad, 
00673          f_local, f, adj_local, adj);
00674 
00675   // Now do remaining fills reusing tape
00676   for (unsigned int i=1; i<num_nodes-1; i++) {
00677     e.gid[0] = i;
00678     e.gid[1] = i+1;
00679     
00680     adolc_init_fill(false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
00681     adolc_process_fill(false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad, 
00682            f_local, f, adj_local, adj);
00683   }
00684   for (unsigned int i=0; i<num_cols; i++) {
00685     delete [] adj_local[i];
00686     delete [] w_local[i];
00687   }
00688   delete [] adj_local;
00689   delete [] w_local;
00690   timer.stop();
00691 
00692   // std::cout << "ADOL-C Residual = " << std::endl;
00693   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00694   //   std::cout << "\t" << f[i] << std::endl;
00695 
00696   // std::cout.precision(8);
00697   // std::cout << "ADOL-C Adjoint = " << std::endl;
00698   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00699   //   std::cout << "\t";
00700   //   for (unsigned int j=0; j<num_cols; j++)
00701   //     std::cout << adj[j][i] << "\t";
00702   //   std::cout << std::endl;
00703   // }
00704 
00705   return timer.totalElapsedTime();
00706 }
00707 
00708 double adolc_retape_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
00709            unsigned int num_cols, double mesh_spacing) {
00710   ElemData e(mesh_spacing);
00711 
00712   // Solution vector, residual, adjoint
00713   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00714   std::vector< std::vector<double> > w(num_cols);
00715   std::vector< std::vector<double> > adj(num_cols);
00716   for (unsigned int node=0; node<num_nodes; node++)
00717     for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00718       x[node*num_eqns+eqn] = 
00719   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
00720       f[node*num_eqns+eqn] = 0.0;
00721     }
00722 
00723   for (unsigned int col=0; col<num_cols; col++) {
00724     w[col] = std::vector<double>(num_nodes*num_eqns);
00725     adj[col] = std::vector<double>(num_nodes*num_eqns);
00726     for (unsigned int node=0; node<num_nodes; node++)
00727       for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00728   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
00729   adj[col][node*num_eqns+eqn] = 0.0;
00730       }
00731   }
00732 
00733   Teuchos::Time timer("FE ADOL-C Retape Adjoint Fill", false);
00734   timer.start(true);
00735   std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00736   std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00737   std::vector<double> x_local(e.nnode*num_eqns);
00738   std::vector<double> f_local(e.nnode*num_eqns);
00739   double **adj_local = new double*[num_cols];
00740   double **w_local = new double*[num_cols];
00741   for (unsigned int i=0; i<num_cols; i++) {
00742     adj_local[i] = new double[e.nnode*num_eqns];
00743     w_local[i] = new double[e.nnode*num_eqns];
00744   }
00745 
00746   for (unsigned int i=0; i<num_nodes-1; i++) {
00747     e.gid[0] = i;
00748     e.gid[1] = i+1;
00749     
00750     adolc_init_fill(true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
00751     template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00752     adolc_process_fill(true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad, 
00753            f_local, f, adj_local, adj);
00754   }
00755   for (unsigned int i=0; i<num_cols; i++) {
00756     delete [] adj_local[i];
00757     delete [] w_local[i];
00758   }
00759   delete [] adj_local;
00760   delete [] w_local;
00761   timer.stop();
00762 
00763   // std::cout << "ADOL-C Residual (retaped) = " << std::endl;
00764   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00765   //   std::cout << "\t" << f[i] << std::endl;
00766 
00767   // std::cout.precision(8);
00768   // std::cout << "ADOL-C Adjoint (retaped) = " << std::endl;
00769   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00770   //   std::cout << "\t";
00771   //   for (unsigned int j=0; j<num_cols; j++)
00772   //     std::cout << adj[j][i] << "\t";
00773   //   std::cout << std::endl;
00774   // }
00775 
00776   return timer.totalElapsedTime();
00777 }
00778 #endif
00779 
00780 double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
00781        unsigned int num_cols, double mesh_spacing) {
00782   ElemData e(mesh_spacing);
00783 
00784   // Solution vector, residual, adjoint
00785   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00786   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
00787   std::vector< std::vector<double> > adj(num_cols);
00788   for (unsigned int node=0; node<num_nodes; node++)
00789     for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00790       x[node*num_eqns+eqn] = 
00791   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
00792       f[node*num_eqns+eqn] = 0.0;
00793     }
00794 
00795   for (unsigned int col=0; col<num_cols; col++) {
00796     w[col] = std::vector<double>(num_nodes*num_eqns);
00797     w_local[col] = std::vector<double>(e.nnode*num_eqns);
00798     adj[col] = std::vector<double>(num_nodes*num_eqns);
00799     for (unsigned int node=0; node<num_nodes; node++)
00800       for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00801   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
00802   adj[col][node*num_eqns+eqn] = 0.0;
00803       }
00804   }
00805 
00806   Teuchos::Time timer("FE Analytic Adjoint Fill", false);
00807   timer.start(true);
00808   std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
00809   std::vector< std::vector<double> > adj_local(num_cols);
00810   std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00811   for (unsigned int i=0; i<num_cols; i++)
00812     adj_local[i] = std::vector<double>(e.nnode*num_eqns);
00813   for (unsigned int i=0; i<num_nodes-1; i++) {
00814     e.gid[0] = i;
00815     e.gid[1] = i+1;
00816     
00817     analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
00818     analytic_element_fill(e, num_eqns, x_local, w_local, u, du, f_local, 
00819             adj_local);
00820     analytic_process_fill(e, num_eqns, f_local, adj_local, f, adj);
00821   }
00822   timer.stop();
00823 
00824   // std::cout.precision(8);
00825   // std::cout << "Analytic Residual = " << std::endl;
00826   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00827   //   std::cout << "\t" << f[i] << std::endl;
00828 
00829   // std::cout.precision(8);
00830   // std::cout << "Analytic Adjoint = " << std::endl;
00831   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00832   //   std::cout << "\t";
00833   //   for (unsigned int j=0; j<num_cols; j++)
00834   //     std::cout << adj[j][i] << "\t";
00835   //   std::cout << std::endl;
00836   // }
00837 
00838   return timer.totalElapsedTime();
00839 }
00840 
00841 double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
00842          unsigned int num_cols, double mesh_spacing) {
00843   ElemData e(mesh_spacing);
00844 
00845   // Solution vector, residual, jacobian
00846   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00847   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
00848   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00849     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00850       unsigned int row = node_row*num_eqns + eqn_row;
00851       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00852       f[row] = 0.0;
00853     }
00854   }
00855 
00856   for (unsigned int col=0; col<num_cols; col++) {
00857     w[col] = std::vector<double>(num_nodes*num_eqns);
00858     w_local[col] = std::vector<double>(e.nnode*num_eqns);
00859     for (unsigned int node=0; node<num_nodes; node++)
00860       for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
00861   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
00862       }
00863   }
00864 
00865   Teuchos::Time timer("FE Residual Fill", false);
00866   timer.start(true);
00867   std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
00868   std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00869   for (unsigned int i=0; i<num_nodes-1; i++) {
00870     e.gid[0] = i;
00871     e.gid[1] = i+1;
00872     
00873     analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
00874     template_element_fill(e, num_eqns, x_local, u, du, f_local);
00875     residual_process_fill(e, num_eqns, f_local, f);
00876   }
00877   timer.stop();
00878 
00879   // std::cout.precision(8);
00880   // std::cout << "Analytic Residual = " << std::endl;
00881   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00882   //   std::cout << "\t" << f[i] << std::endl;
00883 
00884   return timer.totalElapsedTime();
00885 }
00886 
00887 int main(int argc, char* argv[]) {
00888   int ierr = 0;
00889 
00890   try {
00891     double t, ta, tr;
00892     int p = 2;
00893     int w = p+7;
00894 
00895     // Set up command line options
00896     Teuchos::CommandLineProcessor clp;
00897     clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
00898     int num_nodes = 100000;
00899     int num_eqns = 2;
00900     int num_cols = 4;
00901     int rt = 0;
00902     clp.setOption("n", &num_nodes, "Number of nodes");
00903     clp.setOption("p", &num_eqns, "Number of equations");
00904     clp.setOption("q", &num_cols, "Number of adjoint directions");
00905     clp.setOption("rt", &rt, "Include ADOL-C retaping test");
00906 
00907     // Parse options
00908     Teuchos::CommandLineProcessor::EParseCommandLineReturn
00909       parseReturn= clp.parse(argc, argv);
00910     if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
00911       return 1;
00912 
00913     double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
00914 
00915     std::cout.setf(std::ios::scientific);
00916     std::cout.precision(p);
00917     std::cout << "num_nodes = " << num_nodes 
00918         << ", num_eqns = " << num_eqns 
00919         << ", num_cols = " << num_cols << ":  " << std::endl
00920         << "           " << "   Time" << "\t"<< "Time/Analytic" << "\t"
00921         << "Time/Residual" << std::endl;
00922 
00923     ta = 1.0;
00924     tr = 1.0;
00925 
00926     tr = residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
00927 
00928     ta = analytic_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
00929     std::cout << "Analytic:  " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
00930 
00931 #ifdef HAVE_ADOLC
00932     t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
00933     std::cout << "ADOL-C:    " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00934 
00935     if (rt != 0) {
00936       t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
00937       std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00938     }
00939 #endif
00940     
00941     t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
00942     std::cout << "DFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00943 
00944     t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
00945     std::cout << "ELRDFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00946 
00947     t = rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
00948     std::cout << "Rad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00949 
00950     t = vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
00951     std::cout << "Vector Rad:" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
00952 
00953   }
00954   catch (std::exception& e) {
00955     std::cout << e.what() << std::endl;
00956     ierr = 1;
00957   }
00958   catch (const char *s) {
00959     std::cout << s << std::endl;
00960     ierr = 1;
00961   }
00962   catch (...) {
00963     std::cout << "Caught unknown exception!" << std::endl;
00964     ierr = 1;
00965   }
00966 
00967   return ierr;
00968 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Generated on Wed Apr 13 10:19:30 2011 for Sacado Package Browser (Single Doxygen Collection) by  doxygen 1.6.3