Sacado Package Browser (Single Doxygen Collection) Version of the Day
fad_fe_jac_fill.cpp
Go to the documentation of this file.
00001 // $Id$ 
00002 // $Source$ 
00003 // @HEADER
00004 // ***********************************************************************
00005 // 
00006 //                           Sacado Package
00007 //                 Copyright (2006) Sandia Corporation
00008 // 
00009 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00010 // the U.S. Government retains certain rights in this software.
00011 // 
00012 // This library is free software; you can redistribute it and/or modify
00013 // it under the terms of the GNU Lesser General Public License as
00014 // published by the Free Software Foundation; either version 2.1 of the
00015 // License, or (at your option) any later version.
00016 //  
00017 // This library is distributed in the hope that it will be useful, but
00018 // WITHOUT ANY WARRANTY; without even the implied warranty of
00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00020 // Lesser General Public License for more details.
00021 //  
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License along with this library; if not, write to the Free Software
00024 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00025 // USA
00026 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
00027 // (etphipp@sandia.gov).
00028 // 
00029 // ***********************************************************************
00030 // @HEADER
00031 
00032 #include "Sacado.hpp"
00033 #include "Sacado_Fad_SimpleFad.hpp"
00034 #include "Sacado_CacheFad_DFad.hpp"
00035 #include "Sacado_CacheFad_SFad.hpp"
00036 #include "Sacado_CacheFad_SLFad.hpp"
00037 
00038 #include "Fad/fad.h"
00039 #include "TinyFadET/tfad.h"
00040 
00041 #include "Teuchos_Time.hpp"
00042 #include "Teuchos_CommandLineProcessor.hpp"
00043 
00044 // ADOL-C includes
00045 #ifdef HAVE_ADOLC
00046 #ifdef PACKAGE
00047 #undef PACKAGE
00048 #endif
00049 #ifdef PACKAGE_NAME
00050 #undef PACKAGE_NAME
00051 #endif
00052 #ifdef PACKAGE_BUGREPORT
00053 #undef PACKAGE_BUGREPORT
00054 #endif
00055 #ifdef PACKAGE_STRING
00056 #undef PACKAGE_STRING
00057 #endif
00058 #ifdef PACKAGE_TARNAME
00059 #undef PACKAGE_TARNAME
00060 #endif
00061 #ifdef PACKAGE_VERSION
00062 #undef PACKAGE_VERSION
00063 #endif
00064 #ifdef VERSION
00065 #undef VERSION
00066 #endif
00067 //#define ADOLC_TAPELESS
00068 #define NUMBER_DIRECTIONS 100
00069 #include "adolc/adouble.h"
00070 #include "adolc/drivers/drivers.h"
00071 #include "adolc/interfaces.h"
00072 #endif
00073 
00074 #ifdef HAVE_ADIC
00075 // We have included an ADIC differentiated version of the element fill 
00076 // routine to compare the speed of operator overloading to source
00077 // transformation.  To run this code, all that is necessary is to turn
00078 // on the ADIC TPL.  However to modify the code, it is necessary to
00079 // re-run the ADIC source transformation tool.  To do so, first update
00080 // the changes to adic_element_fill.c.  Then set the following environment
00081 // variables:
00082 //     export ADIC_ARCH=linux
00083 //     export ADIC=/home/etphipp/AD_libs/adic
00084 // Next run ADIC via in the tests/performance source directory:
00085 //     ${ADIC}/bin/linux/adiC -vd gradient -i adic_element_fill.init
00086 // Finally, copy the resulting differentiated function in adic_element_fill.ad.c
00087 // back into this file.  The function will need to be edited by changing
00088 // the allocation of s to a std::vector<DERIV_TYPE> (the compiler 
00089 // doesn't seem to like malloc), and commenting out the g_filenum lines.
00090 #define ad_GRAD_MAX 100
00091 #include "ad_deriv.h"
00092 #endif
00093 
00094 // A performance test that computes a finite-element-like Jacobian using
00095 // several Fad variants
00096 
00097 template <>
00098 Sacado::Fad::MemPool* Sacado::Fad::MemPoolStorage<double>::defaultPool_ = NULL;
00099 
00100 void FAD::error(const char *msg) {
00101   std::cout << msg << std::endl;
00102 }
00103 
00104 struct ElemData {
00105   static const unsigned int nqp = 2;
00106   static const unsigned int nnode = 2;
00107   double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
00108   unsigned int gid[nnode];
00109 
00110   ElemData(double mesh_spacing) {
00111     // Quadrature points
00112     double xi[nqp];
00113     xi[0] = -1.0 / std::sqrt(3.0);
00114     xi[1] =  1.0 / std::sqrt(3.0);
00115     
00116     for (unsigned int i=0; i<nqp; i++) {
00117       // Weights
00118       w[i] = 1.0;
00119       
00120       // Element transformation Jacobian
00121       jac[i] = 0.5*mesh_spacing;
00122       
00123       // Shape functions & derivatives
00124       phi[i][0] = 0.5*(1.0 - xi[i]);
00125       phi[i][1] = 0.5*(1.0 + xi[i]);
00126       dphi[i][0] = -0.5;
00127       dphi[i][1] = 0.5;
00128     }
00129   }
00130 };
00131 
00132 template <class FadType>
00133 void fad_init_fill(const ElemData& e,
00134        unsigned int neqn,
00135        const std::vector<double>& x, 
00136        std::vector<FadType>& x_fad) {
00137   for (unsigned int node=0; node<e.nnode; node++)
00138     for (unsigned int eqn=0; eqn<neqn; eqn++)
00139       x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn, 
00140              x[e.gid[node]*neqn+eqn]);
00141 }
00142 
00143 #ifdef HAVE_ADOLC
00144 #ifndef ADOLC_TAPELESS
00145 void adolc_init_fill(bool retape,
00146          int tag,
00147          const ElemData& e,
00148          unsigned int neqn,
00149          const std::vector<double>& x, 
00150          std::vector<double>& x_local,
00151          std::vector<adouble>& x_ad) {
00152   if (retape)
00153     trace_on(tag);
00154   for (unsigned int node=0; node<e.nnode; node++)
00155     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00156       x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00157       if (retape)
00158   x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
00159     }
00160 }
00161 
00162 #else
00163 
00164 void adolc_tapeless_init_fill(const ElemData& e,
00165             unsigned int neqn,
00166             const std::vector<double>& x, 
00167             std::vector<adtl::adouble>& x_ad) {
00168   for (unsigned int node=0; node<e.nnode; node++)
00169     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00170       x_ad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00171       for (unsigned int i=0; i<neqn*e.nnode; i++)
00172   x_ad[node*neqn+eqn].setADValue(i, 0.0);
00173       x_ad[node*neqn+eqn].setADValue(node*neqn+eqn, 1.0);
00174     }
00175   
00176 }
00177 #endif
00178 #endif
00179 
00180 #ifdef HAVE_ADIC
00181 void adic_init_fill(const ElemData& e,
00182         unsigned int neqn,
00183         const std::vector<double>& x, 
00184         std::vector<DERIV_TYPE>& x_fad) {
00185   static bool first = true;
00186   for (unsigned int node=0; node<e.nnode; node++)
00187     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00188       x_fad[node*neqn+eqn].value = x[e.gid[node]*neqn+eqn];
00189       if (first)
00190   ad_AD_SetIndep(x_fad[node*neqn+eqn]);
00191     }
00192   if (first) {
00193     ad_AD_SetIndepDone();
00194     first = false;
00195   }
00196 }
00197 #endif
00198 
00199 void analytic_init_fill(const ElemData& e,
00200       unsigned int neqn,
00201       const std::vector<double>& x, 
00202       std::vector<double>& x_local) {
00203   for (unsigned int node=0; node<e.nnode; node++) 
00204     for (unsigned int eqn=0; eqn<neqn; eqn++)
00205       x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
00206 }
00207 
00208 template <class T>
00209 void template_element_fill(const ElemData& e, 
00210          unsigned int neqn,
00211          const std::vector<T>& x, 
00212          std::vector<T>& u, 
00213          std::vector<T>& du, 
00214          std::vector<T>& f) {
00215   // Construct element solution, derivative
00216   for (unsigned int qp=0; qp<e.nqp; qp++) {
00217     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00218       u[qp*neqn+eqn] = 0.0;
00219       du[qp*neqn+eqn] = 0.0;
00220       for (unsigned int node=0; node<e.nnode; node++) {
00221   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
00222   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
00223       }
00224     }
00225   }
00226 
00227   // Compute sum of equations for coupling
00228   std::vector<T> s(e.nqp*neqn);
00229   for (unsigned int qp=0; qp<e.nqp; qp++) {
00230     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00231       s[qp*neqn+eqn] = 0.0;
00232       for (unsigned int j=0; j<neqn; j++) {
00233         if (j != eqn)
00234           s[qp*neqn+eqn] += u[qp*neqn+j]; 
00235       }
00236     }
00237   }
00238 
00239   // Evaluate element residual
00240   for (unsigned int node=0; node<e.nnode; node++) {
00241     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00242       unsigned int row = node*neqn+eqn;
00243       f[row] = 0.0;
00244       for (unsigned int qp=0; qp<e.nqp; qp++) {
00245   f[row] += 
00246     e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
00247            du[qp*neqn+eqn]*e.dphi[qp][node] + 
00248            e.phi[qp][node]*(exp(u[qp*neqn+eqn]) + 
00249                 u[qp*neqn+eqn]*s[qp*neqn+eqn]));
00250       }
00251     }
00252   }
00253 }
00254 
00255 #ifdef HAVE_ADIC
00256 /************************** DISCLAIMER ********************************/
00257 /*                                                                    */
00258 /*   This file was generated on 01/12/10 10:38:06 by the version of   */
00259 /*   ADIC compiled on  08/30/00 16:47:46                              */
00260 /*                                                                    */
00261 /*   ADIC was prepared as an account of work sponsored by an          */
00262 /*   agency of the United States Government and the University of     */
00263 /*   Chicago.  NEITHER THE AUTHOR(S), THE UNITED STATES GOVERNMENT    */
00264 /*   NOR ANY AGENCY THEREOF, NOR THE UNIVERSITY OF CHICAGO, INCLUDING */
00265 /*   ANY OF THEIR EMPLOYEES OR OFFICERS, MAKES ANY WARRANTY, EXPRESS  */
00266 /*   OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR */
00267 /*   THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION OR  */
00268 /*   PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE */
00269 /*   PRIVATELY OWNED RIGHTS.                                          */
00270 /*                                                                    */
00271 /**********************************************************************/
00272 void   adic_element_fill(ElemData  *e,unsigned int  neqn,DERIV_TYPE  *x,DERIV_TYPE  *u,DERIV_TYPE  *du,DERIV_TYPE  *f) {
00273 unsigned int  ad_var_0, ad_var_1, ad_var_2, ad_var_3, ad_var_4, ad_var_5, ad_var_6, ad_var_7, ad_var_8;
00274 DERIV_TYPE  ad_var_9;
00275 double  ad_adji_0;
00276     double  ad_loc_0;
00277     double  ad_loc_1;
00278     double  ad_loc_2;
00279     double  ad_loc_3;
00280     double  ad_loc_4;
00281     double  ad_loc_5;
00282     double  ad_loc_6;
00283     double  ad_loc_7;
00284     double  ad_loc_8;
00285     double  ad_loc_9;
00286     double  ad_loc_10;
00287     double  ad_loc_11;
00288     double  ad_adj_0;
00289     double  ad_adj_1;
00290     double  ad_adj_2;
00291     double  ad_adj_3;
00292     double  ad_adj_4;
00293 
00294         // static int g_filenum = 0;
00295         // if (g_filenum == 0) {
00296         //     adintr_ehsfid(&g_filenum, __FILE__, "ad_c_element_residual_fill");
00297         // }
00298             for (unsigned int  qp = 0;     qp < e->nqp;     )    {
00299         for (unsigned int  eqn = 0;         eqn < neqn;         )        {
00300             {
00301                 ad_grad_axpy_0(&(u[qp * neqn + eqn]));
00302                 DERIV_val(u[qp * neqn + eqn]) = 0.0;
00303             }
00304             {
00305                 ad_grad_axpy_0(&(du[qp * neqn + eqn]));
00306                 DERIV_val(du[qp * neqn + eqn]) = 0.0;
00307             }
00308             for (unsigned int  node = 0;             node < e->nnode;             )            {
00309                 {
00310                     ad_loc_0 = DERIV_val(x[node * neqn + eqn]) * e->phi[qp][node];
00311                     ad_loc_1 = DERIV_val(u[qp * neqn + eqn]) + ad_loc_0;
00312                     ad_grad_axpy_2(&(u[qp * neqn + eqn]), 1.000000000000000e+00, &(u[qp * neqn + eqn]), e->phi[qp][node], &(x[node * neqn + eqn]));
00313                     DERIV_val(u[qp * neqn + eqn]) = ad_loc_1;
00314                 }
00315                 {
00316                     ad_loc_0 = DERIV_val(x[node * neqn + eqn]) * e->dphi[qp][node];
00317                     ad_loc_1 = DERIV_val(du[qp * neqn + eqn]) + ad_loc_0;
00318                     ad_grad_axpy_2(&(du[qp * neqn + eqn]), 1.000000000000000e+00, &(du[qp * neqn + eqn]), e->dphi[qp][node], &(x[node * neqn + eqn]));
00319                     DERIV_val(du[qp * neqn + eqn]) = ad_loc_1;
00320                 }
00321                 ad_var_2 = node++;
00322             }
00323             ad_var_1 = eqn++;
00324         }
00325         ad_var_0 = qp++;
00326     }
00327       std::vector<DERIV_TYPE> s(e->nqp * neqn);
00328     for (unsigned int  qp = 0;     qp < e->nqp;     )    {
00329         for (unsigned int  eqn = 0;         eqn < neqn;         )        {
00330             {
00331                 ad_grad_axpy_0(&(s[qp * neqn + eqn]));
00332                 DERIV_val(s[qp * neqn + eqn]) = 0.0;
00333             }
00334             for (unsigned int  j = 0;             j < neqn;             )            {
00335                 if (j != eqn)                 {
00336                     {
00337                         ad_loc_0 = DERIV_val(s[qp * neqn + eqn]) + DERIV_val(u[qp * neqn + j]);
00338                         ad_grad_axpy_2(&(s[qp * neqn + eqn]), 1.000000000000000e+00, &(s[qp * neqn + eqn]), 1.000000000000000e+00, &(u[qp * neqn + j]));
00339                         DERIV_val(s[qp * neqn + eqn]) = ad_loc_0;
00340                     }
00341                 }
00342                 ad_var_5 = j++;
00343             }
00344             ad_var_4 = eqn++;
00345         }
00346         ad_var_3 = qp++;
00347     }
00348     for (unsigned int  node = 0;     node < e->nnode;     )    {
00349         for (unsigned int  eqn = 0;         eqn < neqn;         )        {
00350 unsigned int  row = node * neqn + eqn;
00351             {
00352                 ad_grad_axpy_0(&(f[row]));
00353                 DERIV_val(f[row]) = 0.0;
00354             }
00355             for (unsigned int  qp = 0;             qp < e->nqp;             )            {
00356      DERIV_val(ad_var_9) = exp(( DERIV_val(u[qp * neqn + eqn])));  /*exp*/
00357       ad_adji_0 = DERIV_val(ad_var_9);
00358                 {
00359                     ad_grad_axpy_1(&(ad_var_9), ad_adji_0, &(u[qp * neqn + eqn]));
00360                 }
00361                 {
00362                     ad_loc_0 = e->w[qp] * e->jac[qp];
00363                     ad_loc_1 = e->jac[qp] * e->jac[qp];
00364                     ad_loc_2 = 1.0 / ad_loc_1;
00365                     ad_loc_3 =  -ad_loc_2;
00366                     ad_loc_4 = ad_loc_3 * DERIV_val(du[qp * neqn + eqn]);
00367                     ad_loc_5 = ad_loc_4 * e->dphi[qp][node];
00368                     ad_loc_6 = DERIV_val(u[qp * neqn + eqn]) * DERIV_val(s[qp * neqn + eqn]);
00369                     ad_loc_7 = DERIV_val(ad_var_9) + ad_loc_6;
00370                     ad_loc_8 = e->phi[qp][node] * ad_loc_7;
00371                     ad_loc_9 = ad_loc_5 + ad_loc_8;
00372                     ad_loc_10 = ad_loc_0 * ad_loc_9;
00373                     ad_loc_11 = DERIV_val(f[row]) + ad_loc_10;
00374                     ad_adj_0 = e->phi[qp][node] * ad_loc_0;
00375                     ad_adj_1 = DERIV_val(u[qp * neqn + eqn]) * ad_adj_0;
00376                     ad_adj_2 = DERIV_val(s[qp * neqn + eqn]) * ad_adj_0;
00377                     ad_adj_3 = e->dphi[qp][node] * ad_loc_0;
00378                     ad_adj_4 = ad_loc_3 * ad_adj_3;
00379                     ad_grad_axpy_5(&(f[row]), 1.000000000000000e+00, &(f[row]), ad_adj_4, &(du[qp * neqn + eqn]), ad_adj_0, &(ad_var_9), ad_adj_2, &(u[qp * neqn + eqn]), ad_adj_1, &(s[qp * neqn + eqn]));
00380                     DERIV_val(f[row]) = ad_loc_11;
00381                 }
00382                 ad_var_8 = qp++;
00383             }
00384             ad_var_7 = eqn++;
00385         }
00386         ad_var_6 = node++;
00387     }
00388     //free(s);
00389 }
00390 void   AD_Init(int  arg0) {
00391     ad_AD_GradInit(arg0);
00392 
00393 }
00394 void   AD_Final() {
00395     ad_AD_GradFinal();
00396 
00397 }
00398 #endif
00399 
00400 void analytic_element_fill(const ElemData& e, 
00401          unsigned int neqn,
00402          const std::vector<double>& x, 
00403          std::vector<double>& u, 
00404          std::vector<double>& du, 
00405          std::vector<double>& f,
00406          std::vector< std::vector<double> >& jac) {
00407   // Construct element solution, derivative
00408   for (unsigned int qp=0; qp<e.nqp; qp++) {
00409     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00410       u[qp*neqn+eqn] = 0.0;
00411       du[qp*neqn+eqn] = 0.0;
00412       for (unsigned int node=0; node<e.nnode; node++) {
00413   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
00414   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
00415       }
00416     }
00417   }
00418 
00419   // Compute sum of equations for coupling
00420   std::vector<double> s(e.nqp*neqn);
00421   for (unsigned int qp=0; qp<e.nqp; qp++) {
00422     for (unsigned int eqn=0; eqn<neqn; eqn++) {
00423       s[qp*neqn+eqn] = 0.0;
00424       for (unsigned int j=0; j<neqn; j++) {
00425   if (j != eqn)
00426     s[qp*neqn+eqn] += u[qp*neqn+j]; 
00427       }
00428     }
00429   }
00430 
00431   // Evaluate element residual
00432   for (unsigned int node_row=0; node_row<e.nnode; node_row++) {
00433     for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00434       unsigned int row = node_row*neqn+eqn_row;
00435       f[row] = 0.0;
00436       for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00437   for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00438     unsigned int col = node_col*neqn+eqn_col;
00439     jac[row][col] = 0.0;
00440   }
00441       }
00442       for (unsigned int qp=0; qp<e.nqp; qp++) {
00443   double c1 = e.w[qp]*e.jac[qp];
00444   double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node_row];
00445   double c3 = e.phi[qp][node_row]*(exp(u[qp*neqn+eqn_row]));
00446   double c4 = e.phi[qp][node_row]*u[qp*neqn+eqn_row];
00447   double c5 = e.phi[qp][node_row]*s[qp*neqn+eqn_row];
00448   double c35 = c3+c5;
00449   double c14 = c1*c4;
00450   f[row] += c1*(c2*du[qp*neqn+eqn_row] + c3 + c4*s[qp*neqn+eqn_row]);
00451   for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00452     jac[row][node_col*neqn+eqn_row] += 
00453       c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col]);
00454     for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00455       if (eqn_col != eqn_row)
00456         jac[row][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col];
00457     }
00458   }
00459       }
00460     }
00461   }
00462 }
00463 
00464 template <class FadType>
00465 void fad_process_fill(const ElemData& e,
00466           unsigned int neqn,
00467           const std::vector<FadType>& f_fad, 
00468           std::vector<double>& f,
00469           std::vector< std::vector<double> >& jac) {
00470   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00471     f[e.gid[0]*neqn+eqn_row] += f_fad[0*neqn+eqn_row].val();
00472     f[e.gid[1]*neqn+eqn_row] += f_fad[1*neqn+eqn_row].val();
00473     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00474       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00475   unsigned int col = node_col*neqn+eqn_col;
00476   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00477   jac[e.gid[0]*neqn+eqn_row][next_col] += 
00478     f_fad[0*neqn+eqn_row].fastAccessDx(col);
00479   jac[e.gid[1]*neqn+eqn_row][col] += 
00480     f_fad[1*neqn+eqn_row].fastAccessDx(col);
00481       }
00482     }
00483   }
00484 }
00485 
00486 #ifdef HAVE_ADOLC
00487 #ifndef ADOLC_TAPELESS
00488 void adolc_process_fill(bool retape,
00489       int tag, 
00490       const ElemData& e,
00491       unsigned int neqn,
00492       std::vector<double>& x_local,
00493       std::vector<adouble>& f_ad, 
00494       std::vector<double>& f,
00495       std::vector<double>& f_local,
00496       std::vector< std::vector<double> >& jac,
00497       double **seed,
00498       double **jac_local) {
00499   if (retape) {
00500     for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
00501       f_ad[0*neqn+eqn_row] >>= f_local[0*neqn+eqn_row];
00502     for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
00503       f_ad[1*neqn+eqn_row] >>= f_local[1*neqn+eqn_row];
00504     trace_off();
00505   }
00506 
00507   //jacobian(tag, neqn*e.nnode, neqn*e.nnode, &x_local[0], jac_local);
00508   forward(tag, neqn*e.nnode, neqn*e.nnode, neqn*e.nnode, &x_local[0],
00509     seed, &f_local[0], jac_local);
00510   
00511   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00512     f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
00513     f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
00514     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00515       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00516   unsigned int col = node_col*neqn+eqn_col;
00517   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00518   jac[e.gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
00519   jac[e.gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
00520       }
00521     }
00522   }
00523 }
00524 
00525 #else
00526 
00527 void adolc_tapeless_process_fill(const ElemData& e,
00528          unsigned int neqn,
00529          const std::vector<adtl::adouble>& f_ad, 
00530          std::vector<double>& f,
00531          std::vector< std::vector<double> >& jac) {
00532   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00533     f[e.gid[0]*neqn+eqn_row] += f_ad[0*neqn+eqn_row].getValue();
00534     f[e.gid[1]*neqn+eqn_row] += f_ad[1*neqn+eqn_row].getValue();
00535     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00536       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00537   unsigned int col = node_col*neqn+eqn_col;
00538   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00539   jac[e.gid[0]*neqn+eqn_row][next_col] += 
00540     f_ad[0*neqn+eqn_row].getADValue(col);
00541   jac[e.gid[1]*neqn+eqn_row][col] += 
00542     f_ad[1*neqn+eqn_row].getADValue(col);
00543       }
00544     }
00545   }
00546 }
00547 #endif
00548 #endif
00549 
00550 #ifdef HAVE_ADIC
00551 void adic_process_fill(const ElemData& e,
00552            unsigned int neqn,
00553            const std::vector<DERIV_TYPE>& f_fad, 
00554            std::vector<double>& f,
00555            std::vector< std::vector<double> >& jac) {
00556   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00557     f[e.gid[0]*neqn+eqn_row] += f_fad[0*neqn+eqn_row].value;
00558     f[e.gid[1]*neqn+eqn_row] += f_fad[1*neqn+eqn_row].value;
00559     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00560       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00561   unsigned int col = node_col*neqn+eqn_col;
00562   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00563   jac[e.gid[0]*neqn+eqn_row][next_col] += 
00564     f_fad[0*neqn+eqn_row].grad[col];
00565   jac[e.gid[1]*neqn+eqn_row][col] += 
00566     f_fad[1*neqn+eqn_row].grad[col];
00567       }
00568     }
00569   }
00570 }
00571 #endif
00572 
00573 void analytic_process_fill(const ElemData& e,
00574          unsigned int neqn,
00575          const std::vector<double>& f_local, 
00576          const std::vector< std::vector<double> >& jac_local, 
00577          std::vector<double>& f,
00578          std::vector< std::vector<double> >& jac) {
00579   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00580     f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
00581     f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
00582     for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
00583       for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
00584   unsigned int col = node_col*neqn+eqn_col;
00585   unsigned int next_col = (node_col+1)*neqn+eqn_col;
00586   jac[e.gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
00587   jac[e.gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
00588       }
00589     }
00590   }
00591 }
00592 
00593 void residual_process_fill(const ElemData& e,
00594          unsigned int neqn,
00595          const std::vector<double>& f_local, 
00596          std::vector<double>& f) {
00597   for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
00598     f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
00599     f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
00600   }
00601 }
00602 
00603 template <class FadType>
00604 double fad_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00605         double mesh_spacing) {
00606   ElemData e(mesh_spacing);
00607 
00608   // Solution vector, residual, jacobian
00609   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00610   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00611   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00612     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00613       unsigned int row = node_row*num_eqns + eqn_row;
00614       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00615       f[row] = 0.0;
00616       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00617       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00618   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00619     unsigned int col = node_col*num_eqns + eqn_col;
00620     jac[row][col] = 0.0;
00621   }
00622       }
00623     }
00624   }
00625 
00626   Teuchos::Time timer("FE Fad Jacobian Fill", false);
00627   timer.start(true);
00628   std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
00629   std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00630   for (unsigned int i=0; i<num_nodes-1; i++) {
00631     e.gid[0] = i;
00632     e.gid[1] = i+1;
00633     
00634     fad_init_fill(e, num_eqns, x, x_fad);
00635     template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
00636     fad_process_fill(e, num_eqns, f_fad, f, jac);
00637   }
00638   timer.stop();
00639 
00640   // std::cout << "Fad Residual = " << std::endl;
00641   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00642   //   std::cout << "\t" << f[i] << std::endl;
00643 
00644   // std::cout.precision(8);
00645   // std::cout << "Fad Jacobian = " << std::endl;
00646   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00647   //   std::cout << "\t";
00648   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00649   //     std::cout << jac[i][j] << "\t";
00650   //   std::cout << std::endl;
00651   // }
00652 
00653   return timer.totalElapsedTime();
00654 }
00655 
00656 #ifdef HAVE_ADOLC
00657 #ifndef ADOLC_TAPELESS
00658 double adolc_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00659           double mesh_spacing) {
00660   ElemData e(mesh_spacing);
00661 
00662   // Solution vector, residual, jacobian
00663   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00664   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00665   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00666     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00667       unsigned int row = node_row*num_eqns + eqn_row;
00668       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00669       f[row] = 0.0;
00670       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00671       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00672   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00673     unsigned int col = node_col*num_eqns + eqn_col;
00674     jac[row][col] = 0.0;
00675   }
00676       }
00677     }
00678   }
00679 
00680   Teuchos::Time timer("FE ADOL-C Jacobian Fill", false);
00681   timer.start(true);
00682   std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00683   std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00684   std::vector<double> x_local(e.nnode*num_eqns);
00685   std::vector<double> f_local(e.nnode*num_eqns);
00686   double **jac_local = new double*[e.nnode*num_eqns];
00687   double **seed = new double*[e.nnode*num_eqns];
00688   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00689     jac_local[i] = new double[e.nnode*num_eqns];
00690     seed[i] = new double[e.nnode*num_eqns];
00691     for (unsigned int j=0; j<e.nnode*num_eqns; j++)
00692       seed[i][j] = 0.0;
00693     seed[i][i] = 1.0;
00694   }
00695   
00696   // Tape first element
00697   e.gid[0] = 0;
00698   e.gid[1] = 1;
00699   adolc_init_fill(true, 0, e, num_eqns, x, x_local, x_ad);
00700   template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00701   adolc_process_fill(true, 0, e, num_eqns, x_local, f_ad, f, f_local,
00702          jac, seed, jac_local);
00703 
00704   // Now do remaining fills reusing tape
00705   for (unsigned int i=1; i<num_nodes-1; i++) {
00706     e.gid[0] = i;
00707     e.gid[1] = i+1;
00708     
00709     adolc_init_fill(false, 0, e, num_eqns, x, x_local, x_ad);
00710     adolc_process_fill(false, 0, e, num_eqns, x_local, f_ad, f, f_local,
00711            jac, seed, jac_local);
00712   }
00713   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00714     delete [] jac_local[i];
00715     delete [] seed[i];
00716   }
00717   delete [] jac_local;
00718   delete [] seed;
00719   timer.stop();
00720 
00721   // std::cout << "ADOL-C Residual = " << std::endl;
00722   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00723   //   std::cout << "\t" << f[i] << std::endl;
00724 
00725   // std::cout.precision(8);
00726   // std::cout << "ADOL-C Jacobian = " << std::endl;
00727   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00728   //   std::cout << "\t";
00729   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00730   //     std::cout << jac[i][j] << "\t";
00731   //   std::cout << std::endl;
00732   // }
00733 
00734   return timer.totalElapsedTime();
00735 }
00736 
00737 double adolc_retape_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00738            double mesh_spacing) {
00739   ElemData e(mesh_spacing);
00740 
00741   // Solution vector, residual, jacobian
00742   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00743   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00744   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00745     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00746       unsigned int row = node_row*num_eqns + eqn_row;
00747       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00748       f[row] = 0.0;
00749       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00750       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00751   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00752     unsigned int col = node_col*num_eqns + eqn_col;
00753     jac[row][col] = 0.0;
00754   }
00755       }
00756     }
00757   }
00758 
00759   Teuchos::Time timer("FE ADOL-C Retape Jacobian Fill", false);
00760   timer.start(true);
00761   std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00762   std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00763   std::vector<double> x_local(e.nnode*num_eqns);
00764   std::vector<double> f_local(e.nnode*num_eqns);
00765   double **jac_local = new double*[e.nnode*num_eqns];
00766   double **seed = new double*[e.nnode*num_eqns];
00767   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00768     jac_local[i] = new double[e.nnode*num_eqns];
00769     seed[i] = new double[e.nnode*num_eqns];
00770     for (unsigned int j=0; j<e.nnode*num_eqns; j++)
00771       seed[i][j] = 0.0;
00772     seed[i][i] = 1.0;
00773   }
00774   for (unsigned int i=0; i<num_nodes-1; i++) {
00775     e.gid[0] = i;
00776     e.gid[1] = i+1;
00777     
00778     adolc_init_fill(true, 1, e, num_eqns, x, x_local, x_ad);
00779     template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00780     adolc_process_fill(true, 1, e, num_eqns, x_local, f_ad, f, f_local,
00781            jac, seed, jac_local);
00782   }
00783   for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
00784     delete [] jac_local[i];
00785     delete [] seed[i];
00786   }
00787   delete [] jac_local;
00788   delete [] seed;
00789   timer.stop();
00790 
00791   // std::cout << "ADOL-C Residual (retaped) = " << std::endl;
00792   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00793   //   std::cout << "\t" << f[i] << std::endl;
00794 
00795   // std::cout.precision(8);
00796   // std::cout << "ADOL-C Jacobian (retaped) = " << std::endl;
00797   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00798   //   std::cout << "\t";
00799   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00800   //     std::cout << jac[i][j] << "\t";
00801   //   std::cout << std::endl;
00802   // }
00803 
00804   return timer.totalElapsedTime();
00805 }
00806 
00807 #else
00808 
00809 double adolc_tapeless_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00810              double mesh_spacing) {
00811   ElemData e(mesh_spacing);
00812 
00813   // Solution vector, residual, jacobian
00814   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00815   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00816   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00817     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00818       unsigned int row = node_row*num_eqns + eqn_row;
00819       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00820       f[row] = 0.0;
00821       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00822       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00823   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00824     unsigned int col = node_col*num_eqns + eqn_col;
00825     jac[row][col] = 0.0;
00826   }
00827       }
00828     }
00829   }
00830 
00831   Teuchos::Time timer("FE Tapeless ADOL-C Jacobian Fill", false);
00832   timer.start(true);
00833   std::vector<adtl::adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
00834   std::vector<adtl::adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00835   for (unsigned int i=0; i<num_nodes-1; i++) {
00836     e.gid[0] = i;
00837     e.gid[1] = i+1;
00838     
00839     adolc_tapeless_init_fill(e, num_eqns, x, x_ad);
00840     template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
00841     adolc_tapeless_process_fill(e, num_eqns, f_ad, f, jac);
00842   }
00843   timer.stop();
00844 
00845   // std::cout << "Tapeless ADOL-C Residual = " << std::endl;
00846   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00847   //   std::cout << "\t" << f[i] << std::endl;
00848 
00849   // std::cout.precision(8);
00850   // std::cout << "Tapeless ADOL-C Jacobian = " << std::endl;
00851   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00852   //   std::cout << "\t";
00853   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00854   //     std::cout << jac[i][j] << "\t";
00855   //   std::cout << std::endl;
00856   // }
00857 
00858   return timer.totalElapsedTime();
00859 }
00860 #endif
00861 #endif
00862 
00863 #ifdef HAVE_ADIC
00864 double adic_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00865        double mesh_spacing) {
00866   AD_Init(0);
00867   ElemData e(mesh_spacing);
00868 
00869   // Solution vector, residual, jacobian
00870   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00871   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00872   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00873     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00874       unsigned int row = node_row*num_eqns + eqn_row;
00875       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00876       f[row] = 0.0;
00877       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00878       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00879   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00880     unsigned int col = node_col*num_eqns + eqn_col;
00881     jac[row][col] = 0.0;
00882   }
00883       }
00884     }
00885   }
00886 
00887   Teuchos::Time timer("FE ADIC Jacobian Fill", false);
00888   timer.start(true);
00889   std::vector<DERIV_TYPE> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
00890   std::vector<DERIV_TYPE> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00891   for (unsigned int i=0; i<num_nodes-1; i++) {
00892     e.gid[0] = i;
00893     e.gid[1] = i+1;
00894     
00895     adic_init_fill(e, num_eqns, x, x_fad);
00896     adic_element_fill(&e, num_eqns, &x_fad[0], &u[0], &du[0], &f_fad[0]);
00897     adic_process_fill(e, num_eqns, f_fad, f, jac);
00898   }
00899   timer.stop();
00900   AD_Final();
00901 
00902   // std::cout.precision(8);
00903   // std::cout << "ADIC Residual = " << std::endl;
00904   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00905   //   std::cout << "\t" << f[i] << std::endl;
00906 
00907   // std::cout.precision(8);
00908   // std::cout << "ADIC Jacobian = " << std::endl;
00909   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00910   //   std::cout << "\t";
00911   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00912   //     std::cout << jac[i][j] << "\t";
00913   //   std::cout << std::endl;
00914   // }
00915 
00916   return timer.totalElapsedTime();
00917 }
00918 #endif
00919 
00920 double analytic_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
00921        double mesh_spacing) {
00922   ElemData e(mesh_spacing);
00923 
00924   // Solution vector, residual, jacobian
00925   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00926   std::vector< std::vector<double> > jac(num_nodes*num_eqns);
00927   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00928     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00929       unsigned int row = node_row*num_eqns + eqn_row;
00930       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00931       f[row] = 0.0;
00932       jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
00933       for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
00934   for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) { 
00935     unsigned int col = node_col*num_eqns + eqn_col;
00936     jac[row][col] = 0.0;
00937   }
00938       }
00939     }
00940   }
00941 
00942   Teuchos::Time timer("FE Analytic Jacobian Fill", false);
00943   timer.start(true);
00944   std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
00945   std::vector< std::vector<double> > jac_local(e.nnode*num_eqns);
00946   std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00947   for (unsigned int i=0; i<e.nnode*num_eqns; i++)
00948     jac_local[i] = std::vector<double>(e.nnode*num_eqns);
00949   for (unsigned int i=0; i<num_nodes-1; i++) {
00950     e.gid[0] = i;
00951     e.gid[1] = i+1;
00952     
00953     analytic_init_fill(e, num_eqns, x, x_local);
00954     analytic_element_fill(e, num_eqns, x_local, u, du, f_local, jac_local);
00955     analytic_process_fill(e, num_eqns, f_local, jac_local, f, jac);
00956   }
00957   timer.stop();
00958 
00959   // std::cout.precision(8);
00960   // std::cout << "Analytic Residual = " << std::endl;
00961   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
00962   //   std::cout << "\t" << f[i] << std::endl;
00963 
00964   // std::cout.precision(8);
00965   // std::cout << "Analytic Jacobian = " << std::endl;
00966   // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
00967   //   std::cout << "\t";
00968   //   for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
00969   //     std::cout << jac[i][j] << "\t";
00970   //   std::cout << std::endl;
00971   // }
00972 
00973   return timer.totalElapsedTime();
00974 }
00975 
00976 double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
00977        double mesh_spacing) {
00978   ElemData e(mesh_spacing);
00979 
00980   // Solution vector, residual, jacobian
00981   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
00982   for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
00983     for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
00984       unsigned int row = node_row*num_eqns + eqn_row;
00985       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
00986       f[row] = 0.0;
00987     }
00988   }
00989 
00990   Teuchos::Time timer("FE Residual Fill", false);
00991   timer.start(true);
00992   std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
00993   std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
00994   for (unsigned int i=0; i<num_nodes-1; i++) {
00995     e.gid[0] = i;
00996     e.gid[1] = i+1;
00997     
00998     analytic_init_fill(e, num_eqns, x, x_local);
00999     template_element_fill(e, num_eqns, x_local, u, du, f_local);
01000     residual_process_fill(e, num_eqns, f_local, f);
01001   }
01002   timer.stop();
01003 
01004   // std::cout.precision(8);
01005   // std::cout << "Analytic Residual = " << std::endl;
01006   // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
01007   //   std::cout << "\t" << f[i] << std::endl;
01008 
01009   return timer.totalElapsedTime();
01010 }
01011 
01012 int main(int argc, char* argv[]) {
01013   int ierr = 0;
01014 
01015   try {
01016     double t, ta, tr;
01017     int p = 2;
01018     int w = p+7;
01019 
01020     // Maximum number of derivative components for SLFad
01021     const int slfad_max = 100;
01022 
01023     // Set up command line options
01024     Teuchos::CommandLineProcessor clp;
01025     clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
01026     int num_nodes = 100000;
01027     int num_eqns = 2;
01028     int rt = 0;
01029     clp.setOption("n", &num_nodes, "Number of nodes");
01030     clp.setOption("p", &num_eqns, "Number of equations");
01031     clp.setOption("rt", &rt, "Include ADOL-C retaping test");
01032 
01033     // Parse options
01034     Teuchos::CommandLineProcessor::EParseCommandLineReturn
01035       parseReturn= clp.parse(argc, argv);
01036     if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
01037       return 1;
01038 
01039     double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
01040 
01041     // Memory pool & manager
01042     Sacado::Fad::MemPoolManager<double> poolManager(num_nodes*num_eqns);
01043     Sacado::Fad::MemPool* pool = poolManager.getMemoryPool(num_nodes*num_eqns);
01044     Sacado::Fad::DMFad<double>::setDefaultPool(pool);
01045 
01046     std::cout.setf(std::ios::scientific);
01047     std::cout.precision(p);
01048     std::cout << "num_nodes =  " << num_nodes 
01049         << ", num_eqns = " << num_eqns << ":  " << std::endl
01050         << "               " << "   Time   " << "\t"<< "Time/Analytic" << "\t"
01051         << "Time/Residual" << std::endl;
01052 
01053     ta = 1.0;
01054     tr = 1.0;
01055 
01056     tr = residual_fill(num_nodes, num_eqns, mesh_spacing);
01057 
01058     ta = analytic_jac_fill(num_nodes, num_eqns, mesh_spacing);
01059     std::cout << "Analytic:      " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
01060 
01061 #ifdef HAVE_ADOLC
01062 #ifndef ADOLC_TAPELESS
01063     t = adolc_jac_fill(num_nodes, num_eqns, mesh_spacing);
01064     std::cout << "ADOL-C:        " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01065 
01066     if (rt != 0) {
01067       t = adolc_retape_jac_fill(num_nodes, num_eqns, mesh_spacing);
01068       std::cout << "ADOL-C(rt):  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01069     }
01070 
01071 #else
01072     t = adolc_tapeless_jac_fill(num_nodes, num_eqns, mesh_spacing);
01073     std::cout << "ADOL-C(tl):    " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01074 #endif
01075 #endif
01076 
01077 #ifdef HAVE_ADIC
01078     t = adic_jac_fill(num_nodes, num_eqns, mesh_spacing);
01079     std::cout << "ADIC:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01080 #endif
01081 
01082     if (num_eqns*2 == 4) {
01083       t = fad_jac_fill< FAD::TFad<16,double> >(num_nodes, num_eqns, mesh_spacing);
01084       std::cout << "TFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01085     }
01086     else if (num_eqns*2 == 16) {
01087       t = fad_jac_fill< FAD::TFad<16,double> >(num_nodes, num_eqns, mesh_spacing);
01088       std::cout << "TFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01089     }
01090 
01091     t = fad_jac_fill< FAD::Fad<double> >(num_nodes, num_eqns, mesh_spacing);
01092     std::cout << "Fad:           " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01093 
01094     if (num_eqns*2 == 4) {
01095       t = fad_jac_fill< Sacado::Fad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
01096       std::cout << "SFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01097     }
01098     else if (num_eqns*2 == 16) {
01099       t = fad_jac_fill< Sacado::Fad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
01100       std::cout << "SFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01101     }
01102 
01103     if (num_eqns*2 < slfad_max) {
01104       t = fad_jac_fill< Sacado::Fad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
01105       std::cout << "SLFad:         " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01106     }
01107     
01108     t = fad_jac_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
01109     std::cout << "DFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01110 
01111     t = fad_jac_fill< Sacado::Fad::SimpleFad<double> >(num_nodes, num_eqns, mesh_spacing);
01112     std::cout << "SimpleFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01113 
01114     t = fad_jac_fill< Sacado::Fad::DMFad<double> >(num_nodes, num_eqns, mesh_spacing);
01115     std::cout << "DMFad:         " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 
01116 
01117     if (num_eqns*2 == 4) {
01118       t = fad_jac_fill< Sacado::ELRFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
01119       std::cout << "ELRSFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01120     }
01121     else if (num_eqns*2 == 16) {
01122       t = fad_jac_fill< Sacado::ELRFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
01123       std::cout << "ELRSFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01124     }
01125 
01126     if (num_eqns*2 < slfad_max) {
01127       t = fad_jac_fill< Sacado::ELRFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
01128       std::cout << "ELRSLFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01129     }
01130 
01131     t = fad_jac_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
01132     std::cout << "ELRDFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01133 
01134     if (num_eqns*2 == 4) {
01135       t = fad_jac_fill< Sacado::CacheFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
01136       std::cout << "CacheSFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01137     }
01138     else if (num_eqns*2 == 16) {
01139       t = fad_jac_fill< Sacado::CacheFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
01140       std::cout << "CacheSFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01141     }
01142 
01143     if (num_eqns*2 < slfad_max) {
01144       t = fad_jac_fill< Sacado::CacheFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
01145       std::cout << "CacheSLFad:    " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01146     }
01147     
01148     t = fad_jac_fill< Sacado::CacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
01149     std::cout << "CacheFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01150 
01151     if (num_eqns*2 == 4) {
01152       t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
01153       std::cout << "ELRCacheSFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01154     }
01155     else if (num_eqns*2 == 16) {
01156       t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
01157       std::cout << "ELRCacheSFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01158     }
01159 
01160     if (num_eqns*2 < slfad_max) {
01161       t = fad_jac_fill< Sacado::ELRCacheFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
01162       std::cout << "ELRCacheSLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01163     }
01164 
01165     t = fad_jac_fill< Sacado::ELRCacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
01166     std::cout << "ELRCacheFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01167 
01168     t = fad_jac_fill< Sacado::Fad::DVFad<double> >(num_nodes, num_eqns, mesh_spacing);
01169     std::cout << "DVFad:         " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
01170     
01171   }
01172   catch (std::exception& e) {
01173     cout << e.what() << endl;
01174     ierr = 1;
01175   }
01176   catch (const char *s) {
01177     cout << s << endl;
01178     ierr = 1;
01179   }
01180   catch (...) {
01181     cout << "Caught unknown exception!" << endl;
01182     ierr = 1;
01183   }
01184 
01185   return ierr;
01186 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines