|
Sacado Package Browser (Single Doxygen Collection) Version of the Day
|
00001 // $Id$ 00002 // $Source$ 00003 // @HEADER 00004 // *********************************************************************** 00005 // 00006 // Sacado Package 00007 // Copyright (2006) Sandia Corporation 00008 // 00009 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00010 // the U.S. Government retains certain rights in this software. 00011 // 00012 // This library is free software; you can redistribute it and/or modify 00013 // it under the terms of the GNU Lesser General Public License as 00014 // published by the Free Software Foundation; either version 2.1 of the 00015 // License, or (at your option) any later version. 00016 // 00017 // This library is distributed in the hope that it will be useful, but 00018 // WITHOUT ANY WARRANTY; without even the implied warranty of 00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00020 // Lesser General Public License for more details. 00021 // 00022 // You should have received a copy of the GNU Lesser General Public 00023 // License along with this library; if not, write to the Free Software 00024 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00025 // USA 00026 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps 00027 // (etphipp@sandia.gov). 00028 // 00029 // *********************************************************************** 00030 // @HEADER 00031 00032 #include "Sacado.hpp" 00033 #include "Sacado_tradvec.hpp" 00034 00035 #include "Teuchos_Time.hpp" 00036 #include "Teuchos_CommandLineProcessor.hpp" 00037 00038 // ADOL-C includes 00039 #ifdef HAVE_ADOLC 00040 #ifdef PACKAGE 00041 #undef PACKAGE 00042 #endif 00043 #ifdef PACKAGE_NAME 00044 #undef PACKAGE_NAME 00045 #endif 00046 #ifdef PACKAGE_BUGREPORT 00047 #undef PACKAGE_BUGREPORT 00048 #endif 00049 #ifdef PACKAGE_STRING 00050 #undef PACKAGE_STRING 00051 #endif 00052 #ifdef PACKAGE_TARNAME 00053 #undef PACKAGE_TARNAME 00054 #endif 00055 #ifdef PACKAGE_VERSION 00056 #undef PACKAGE_VERSION 00057 #endif 00058 #ifdef VERSION 00059 #undef VERSION 00060 #endif 00061 #include "adolc/adouble.h" 00062 #include "adolc/drivers/drivers.h" 00063 #include "adolc/interfaces.h" 00064 #endif 00065 00066 // A performance test that computes a finite-element-like adjoint using 00067 // Rad and ADOL-C 00068 00069 typedef Sacado::Rad::ADvar<double> RadType; 00070 typedef Sacado::RadVec::ADvar<double> VRadType; 00071 00072 struct ElemData { 00073 static const unsigned int nqp = 2; 00074 static const unsigned int nnode = 2; 00075 double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode]; 00076 unsigned int gid[nnode]; 00077 00078 ElemData(double mesh_spacing) { 00079 // Quadrature points 00080 double xi[nqp]; 00081 xi[0] = -1.0 / std::sqrt(3.0); 00082 xi[1] = 1.0 / std::sqrt(3.0); 00083 00084 for (unsigned int i=0; i<nqp; i++) { 00085 // Weights 00086 w[i] = 1.0; 00087 00088 // Element transformation Jacobian 00089 jac[i] = 0.5*mesh_spacing; 00090 00091 // Shape functions & derivatives 00092 phi[i][0] = 0.5*(1.0 - xi[i]); 00093 phi[i][1] = 0.5*(1.0 + xi[i]); 00094 dphi[i][0] = -0.5; 00095 dphi[i][1] = 0.5; 00096 } 00097 } 00098 }; 00099 00100 template <class FadType> 00101 void fad_init_fill(const ElemData& e, 00102 unsigned int neqn, 00103 const std::vector<double>& x, 00104 const std::vector< std::vector<double> >& w, 00105 std::vector<FadType>& x_fad, 00106 std::vector< std::vector<double> >& w_local) { 00107 for (unsigned int node=0; node<e.nnode; node++) 00108 for (unsigned int eqn=0; eqn<neqn; eqn++) 00109 x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn, 00110 x[e.gid[node]*neqn+eqn]); 00111 00112 for (unsigned int col=0; col<w.size(); col++) 00113 for (unsigned int node=0; node<e.nnode; node++) 00114 for (unsigned int eqn=0; eqn<neqn; eqn++) 00115 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn]; 00116 } 00117 00118 void rad_init_fill(const ElemData& e, 00119 unsigned int neqn, 00120 const std::vector<double>& x, 00121 const std::vector< std::vector<double> >& w, 00122 std::vector<RadType>& x_rad, 00123 std::vector< std::vector<double> >& w_local) { 00124 for (unsigned int node=0; node<e.nnode; node++) 00125 for (unsigned int eqn=0; eqn<neqn; eqn++) 00126 x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn]; 00127 00128 for (unsigned int col=0; col<w.size(); col++) 00129 for (unsigned int node=0; node<e.nnode; node++) 00130 for (unsigned int eqn=0; eqn<neqn; eqn++) 00131 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn]; 00132 } 00133 00134 void vrad_init_fill(const ElemData& e, 00135 unsigned int neqn, 00136 const std::vector<double>& x, 00137 const std::vector< std::vector<double> >& w, 00138 std::vector<VRadType>& x_rad, 00139 double** w_local) { 00140 for (unsigned int node=0; node<e.nnode; node++) 00141 for (unsigned int eqn=0; eqn<neqn; eqn++) 00142 x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn]; 00143 00144 for (unsigned int col=0; col<w.size(); col++) 00145 for (unsigned int node=0; node<e.nnode; node++) 00146 for (unsigned int eqn=0; eqn<neqn; eqn++) 00147 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn]; 00148 } 00149 00150 #ifdef HAVE_ADOLC 00151 void adolc_init_fill(bool retape, 00152 int tag, 00153 const ElemData& e, 00154 unsigned int neqn, 00155 const std::vector<double>& x, 00156 const std::vector< std::vector<double> >& w, 00157 std::vector<double>& x_local, 00158 std::vector<adouble>& x_ad, 00159 double** w_local) { 00160 if (retape) 00161 trace_on(tag, 1); 00162 for (unsigned int node=0; node<e.nnode; node++) 00163 for (unsigned int eqn=0; eqn<neqn; eqn++) { 00164 x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn]; 00165 if (retape) 00166 x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn]; 00167 } 00168 00169 for (unsigned int col=0; col<w.size(); col++) 00170 for (unsigned int node=0; node<e.nnode; node++) 00171 for (unsigned int eqn=0; eqn<neqn; eqn++) 00172 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn]; 00173 } 00174 #endif 00175 00176 void analytic_init_fill(const ElemData& e, 00177 unsigned int neqn, 00178 const std::vector<double>& x, 00179 const std::vector< std::vector<double> >& w, 00180 std::vector<double>& x_local, 00181 std::vector< std::vector<double> >& w_local) { 00182 for (unsigned int node=0; node<e.nnode; node++) 00183 for (unsigned int eqn=0; eqn<neqn; eqn++) 00184 x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn]; 00185 00186 for (unsigned int node=0; node<e.nnode; node++) 00187 for (unsigned int eqn=0; eqn<neqn; eqn++) 00188 for (unsigned int col=0; col<w.size(); col++) 00189 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn]; 00190 } 00191 00192 template <class T> 00193 void template_element_fill(const ElemData& e, 00194 unsigned int neqn, 00195 const std::vector<T>& x, 00196 std::vector<T>& u, 00197 std::vector<T>& du, 00198 std::vector<T>& f) { 00199 // Construct element solution, derivative 00200 for (unsigned int qp=0; qp<e.nqp; qp++) { 00201 for (unsigned int eqn=0; eqn<neqn; eqn++) { 00202 u[qp*neqn+eqn] = 0.0; 00203 du[qp*neqn+eqn] = 0.0; 00204 for (unsigned int node=0; node<e.nnode; node++) { 00205 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node]; 00206 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node]; 00207 } 00208 } 00209 } 00210 00211 // Compute sum of equations for coupling 00212 std::vector<T> s(e.nqp*neqn); 00213 for (unsigned int qp=0; qp<e.nqp; qp++) { 00214 for (unsigned int eqn=0; eqn<neqn; eqn++) { 00215 s[qp*neqn+eqn] = 0.0; 00216 for (unsigned int j=0; j<neqn; j++) { 00217 if (j != eqn) 00218 s[qp*neqn+eqn] += u[qp*neqn+j]; 00219 } 00220 } 00221 } 00222 00223 // Evaluate element residual 00224 for (unsigned int node=0; node<e.nnode; node++) { 00225 for (unsigned int eqn=0; eqn<neqn; eqn++) { 00226 unsigned int row = node*neqn+eqn; 00227 f[row] = 0.0; 00228 for (unsigned int qp=0; qp<e.nqp; qp++) { 00229 f[row] += 00230 e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))* 00231 du[qp*neqn+eqn]*e.dphi[qp][node] + 00232 e.phi[qp][node]*(exp(u[qp*neqn+eqn]) + 00233 u[qp*neqn+eqn]*s[qp*neqn+eqn])); 00234 } 00235 } 00236 } 00237 } 00238 00239 void analytic_element_fill(const ElemData& e, 00240 unsigned int neqn, 00241 const std::vector<double>& x, 00242 std::vector< std::vector<double> >& w, 00243 std::vector<double>& u, 00244 std::vector<double>& du, 00245 std::vector<double>& f, 00246 std::vector< std::vector<double> >& adj) { 00247 // Construct element solution, derivative 00248 for (unsigned int qp=0; qp<e.nqp; qp++) { 00249 for (unsigned int eqn=0; eqn<neqn; eqn++) { 00250 u[qp*neqn+eqn] = 0.0; 00251 du[qp*neqn+eqn] = 0.0; 00252 for (unsigned int node=0; node<e.nnode; node++) { 00253 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node]; 00254 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node]; 00255 } 00256 } 00257 } 00258 00259 // Compute sum of equations for coupling 00260 std::vector<double> s(e.nqp*neqn); 00261 for (unsigned int qp=0; qp<e.nqp; qp++) { 00262 for (unsigned int eqn=0; eqn<neqn; eqn++) { 00263 s[qp*neqn+eqn] = 0.0; 00264 for (unsigned int j=0; j<neqn; j++) { 00265 if (j != eqn) 00266 s[qp*neqn+eqn] += u[qp*neqn+j]; 00267 } 00268 } 00269 } 00270 00271 for (unsigned int col=0; col<w.size(); col++) 00272 for (unsigned int node=0; node<e.nnode; node++) 00273 for (unsigned int eqn=0; eqn<neqn; eqn++) 00274 adj[col][node*neqn+eqn] = 0.0; 00275 00276 // Evaluate element residual 00277 for (unsigned int node=0; node<e.nnode; node++) { 00278 for (unsigned int eqn=0; eqn<neqn; eqn++) { 00279 unsigned int row = node*neqn+eqn; 00280 f[row] = 0.0; 00281 for (unsigned int qp=0; qp<e.nqp; qp++) { 00282 double c1 = e.w[qp]*e.jac[qp]; 00283 double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node]; 00284 double c3 = e.phi[qp][node]*(exp(u[qp*neqn+eqn])); 00285 double c4 = e.phi[qp][node]*u[qp*neqn+eqn]; 00286 double c5 = e.phi[qp][node]*s[qp*neqn+eqn]; 00287 double c35 = c3+c5; 00288 double c14 = c1*c4; 00289 f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]); 00290 for (unsigned int col=0; col<w.size(); col++) { 00291 for (unsigned int node_col=0; node_col<e.nnode; node_col++) { 00292 adj[col][node_col*neqn+eqn] += 00293 c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col])*w[col][row]; 00294 for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) { 00295 if (eqn_col != eqn) 00296 adj[col][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col]*w[col][row]; 00297 } 00298 } 00299 } 00300 } 00301 } 00302 } 00303 } 00304 00305 template <class FadType> 00306 void fad_process_fill(const ElemData& e, 00307 unsigned int neqn, 00308 const std::vector< std::vector<double> >& w_local, 00309 const std::vector<FadType>& f_fad, 00310 std::vector<double>& f, 00311 std::vector< std::vector<double> >& adj) { 00312 // Get residual 00313 for (unsigned int node=0; node<e.nnode; node++) 00314 for (unsigned int eqn=0; eqn<neqn; eqn++) 00315 f[e.gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val(); 00316 00317 // Get adjoint for each adjoint direction 00318 for (unsigned int col=0; col<w_local.size(); col++) { 00319 FadType z = 0.0; 00320 for (unsigned int node=0; node<e.nnode; node++) 00321 for (unsigned int eqn=0; eqn<neqn; eqn++) 00322 z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn]; 00323 00324 for (unsigned int node=0; node<e.nnode; node++) 00325 for (unsigned int eqn=0; eqn<neqn; eqn++) 00326 adj[col][e.gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn); 00327 } 00328 } 00329 00330 void rad_process_fill(const ElemData& e, 00331 unsigned int neqn, 00332 std::vector< std::vector<double> >& w_local, 00333 std::vector<RadType>& x_rad, 00334 std::vector<RadType>& f_rad, 00335 std::vector<RadType*>& ff_rad, 00336 std::vector<double>& f, 00337 std::vector< std::vector<double> >& adj) { 00338 // Get residual 00339 for (unsigned int node=0; node<e.nnode; node++) 00340 for (unsigned int eqn=0; eqn<neqn; eqn++) 00341 f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val(); 00342 00343 // Get adjoint for each adjoint direction 00344 for (unsigned int col=0; col<w_local.size(); col++) { 00345 Sacado::Rad::ADcontext<double>::Weighted_Gradcomp(neqn*e.nnode, 00346 &ff_rad[0], 00347 &w_local[col][0]); 00348 for (unsigned int node=0; node<e.nnode; node++) 00349 for (unsigned int eqn=0; eqn<neqn; eqn++) 00350 adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(); 00351 } 00352 } 00353 00354 void vrad_process_fill(const ElemData& e, 00355 unsigned int neqn, 00356 unsigned int ncol, 00357 std::vector<std::size_t>& vn, 00358 double** w_local, 00359 std::vector<VRadType>& x_rad, 00360 std::vector<VRadType>& f_rad, 00361 VRadType*** vf_rad, 00362 std::vector<double>& f, 00363 std::vector< std::vector<double> >& adj) { 00364 // Get residual 00365 for (unsigned int node=0; node<e.nnode; node++) 00366 for (unsigned int eqn=0; eqn<neqn; eqn++) 00367 f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val(); 00368 00369 // Get adjoint for each adjoint direction 00370 Sacado::RadVec::ADcontext<double>::Weighted_GradcompVec(ncol, 00371 &vn[0], 00372 vf_rad, 00373 w_local); 00374 for (unsigned int col=0; col<ncol; col++) 00375 for (unsigned int node=0; node<e.nnode; node++) 00376 for (unsigned int eqn=0; eqn<neqn; eqn++) 00377 adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col); 00378 } 00379 00380 #ifdef HAVE_ADOLC 00381 void adolc_process_fill(bool retape, 00382 int tag, 00383 const ElemData& e, 00384 unsigned int neqn, 00385 unsigned int ncol, 00386 std::vector<double>& x_local, 00387 double **w_local, 00388 std::vector<adouble>& f_ad, 00389 std::vector<double>& f_local, 00390 std::vector<double>& f, 00391 double **adj_local, 00392 std::vector< std::vector<double> >& adj) { 00393 if (retape) { 00394 for (unsigned int node=0; node<e.nnode; node++) 00395 for (unsigned int eqn=0; eqn<neqn; eqn++) 00396 f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn]; 00397 trace_off(); 00398 } 00399 else 00400 zos_forward(tag, neqn*e.nnode, neqn*e.nnode, 1, &x_local[0], &f_local[0]); 00401 00402 fov_reverse(tag, neqn*e.nnode, neqn*e.nnode, ncol, w_local, adj_local); 00403 00404 for (unsigned int node=0; node<e.nnode; node++) 00405 for (unsigned int eqn=0; eqn<neqn; eqn++) 00406 f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn]; 00407 00408 for (unsigned int col=0; col<ncol; col++) 00409 for (unsigned int node=0; node<e.nnode; node++) 00410 for (unsigned int eqn=0; eqn<neqn; eqn++) 00411 adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn]; 00412 } 00413 #endif 00414 00415 void analytic_process_fill(const ElemData& e, 00416 unsigned int neqn, 00417 const std::vector<double>& f_local, 00418 const std::vector< std::vector<double> >& adj_local, 00419 std::vector<double>& f, 00420 std::vector< std::vector<double> >& adj) { 00421 for (unsigned int node=0; node<e.nnode; node++) 00422 for (unsigned int eqn=0; eqn<neqn; eqn++) 00423 f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn]; 00424 00425 for (unsigned int col=0; col<adj.size(); col++) 00426 for (unsigned int node=0; node<e.nnode; node++) 00427 for (unsigned int eqn=0; eqn<neqn; eqn++) 00428 adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn]; 00429 } 00430 00431 void residual_process_fill(const ElemData& e, 00432 unsigned int neqn, 00433 const std::vector<double>& f_local, 00434 std::vector<double>& f) { 00435 for (unsigned int node=0; node<e.nnode; node++) 00436 for (unsigned int eqn=0; eqn<neqn; eqn++) 00437 f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn]; 00438 } 00439 00440 template <typename FadType> 00441 double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, 00442 unsigned int num_cols, double mesh_spacing) { 00443 ElemData e(mesh_spacing); 00444 00445 // Solution vector, residual, adjoint 00446 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns); 00447 std::vector< std::vector<double> > w(num_cols), w_local(num_cols); 00448 std::vector< std::vector<double> > adj(num_cols); 00449 for (unsigned int node=0; node<num_nodes; node++) 00450 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00451 x[node*num_eqns+eqn] = 00452 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5); 00453 f[node*num_eqns+eqn] = 0.0; 00454 } 00455 00456 for (unsigned int col=0; col<num_cols; col++) { 00457 w[col] = std::vector<double>(num_nodes*num_eqns); 00458 w_local[col] = std::vector<double>(e.nnode*num_eqns); 00459 adj[col] = std::vector<double>(num_nodes*num_eqns); 00460 for (unsigned int node=0; node<num_nodes; node++) 00461 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00462 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn]; 00463 adj[col][node*num_eqns+eqn] = 0.0; 00464 } 00465 } 00466 00467 Teuchos::Time timer("FE Fad Adjoint Fill", false); 00468 timer.start(true); 00469 std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns); 00470 std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns); 00471 for (unsigned int i=0; i<num_nodes-1; i++) { 00472 e.gid[0] = i; 00473 e.gid[1] = i+1; 00474 00475 fad_init_fill(e, num_eqns, x, w, x_fad, w_local); 00476 template_element_fill(e, num_eqns, x_fad, u, du, f_fad); 00477 fad_process_fill(e, num_eqns, w_local, f_fad, f, adj); 00478 } 00479 timer.stop(); 00480 00481 // std::cout << "Fad Residual = " << std::endl; 00482 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) 00483 // std::cout << "\t" << f[i] << std::endl; 00484 00485 // std::cout.precision(8); 00486 // std::cout << "Fad Adjoint = " << std::endl; 00487 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) { 00488 // std::cout << "\t"; 00489 // for (unsigned int j=0; j<num_cols; j++) 00490 // std::cout << adj[j][i] << "\t"; 00491 // std::cout << std::endl; 00492 // } 00493 00494 return timer.totalElapsedTime(); 00495 } 00496 00497 double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, 00498 unsigned int num_cols, double mesh_spacing) { 00499 ElemData e(mesh_spacing); 00500 00501 // Solution vector, residual, adjoint 00502 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns); 00503 std::vector< std::vector<double> > w(num_cols), w_local(num_cols); 00504 std::vector< std::vector<double> > adj(num_cols); 00505 for (unsigned int node=0; node<num_nodes; node++) 00506 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00507 x[node*num_eqns+eqn] = 00508 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5); 00509 f[node*num_eqns+eqn] = 0.0; 00510 } 00511 00512 for (unsigned int col=0; col<num_cols; col++) { 00513 w[col] = std::vector<double>(num_nodes*num_eqns); 00514 w_local[col] = std::vector<double>(e.nnode*num_eqns); 00515 adj[col] = std::vector<double>(num_nodes*num_eqns); 00516 for (unsigned int node=0; node<num_nodes; node++) 00517 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00518 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn]; 00519 adj[col][node*num_eqns+eqn] = 0.0; 00520 } 00521 } 00522 00523 Teuchos::Time timer("FE Rad Adjoint Fill", false); 00524 timer.start(true); 00525 std::vector<RadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns); 00526 std::vector<RadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns); 00527 std::vector<RadType*> ff_rad(f_rad.size()); 00528 for (unsigned int i=0; i<f_rad.size(); i++) 00529 ff_rad[i] = &f_rad[i]; 00530 for (unsigned int i=0; i<num_nodes-1; i++) { 00531 e.gid[0] = i; 00532 e.gid[1] = i+1; 00533 00534 rad_init_fill(e, num_eqns, x, w, x_rad, w_local); 00535 template_element_fill(e, num_eqns, x_rad, u, du, f_rad); 00536 rad_process_fill(e, num_eqns, w_local, x_rad, f_rad, ff_rad, f, adj); 00537 } 00538 timer.stop(); 00539 00540 // std::cout << "Rad Residual = " << std::endl; 00541 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) 00542 // std::cout << "\t" << f[i] << std::endl; 00543 00544 // std::cout.precision(8); 00545 // std::cout << "Rad Adjoint = " << std::endl; 00546 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) { 00547 // std::cout << "\t"; 00548 // for (unsigned int j=0; j<num_cols; j++) 00549 // std::cout << adj[j][i] << "\t"; 00550 // std::cout << std::endl; 00551 // } 00552 00553 return timer.totalElapsedTime(); 00554 } 00555 00556 double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, 00557 unsigned int num_cols, double mesh_spacing) { 00558 ElemData e(mesh_spacing); 00559 00560 // Solution vector, residual, adjoint 00561 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns); 00562 std::vector< std::vector<double> > w(num_cols); 00563 double **w_local = new double*[num_cols]; 00564 std::vector< std::vector<double> > adj(num_cols); 00565 for (unsigned int node=0; node<num_nodes; node++) 00566 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00567 x[node*num_eqns+eqn] = 00568 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5); 00569 f[node*num_eqns+eqn] = 0.0; 00570 } 00571 00572 for (unsigned int col=0; col<num_cols; col++) { 00573 w[col] = std::vector<double>(num_nodes*num_eqns); 00574 w_local[col] = new double[e.nnode*num_eqns]; 00575 adj[col] = std::vector<double>(num_nodes*num_eqns); 00576 for (unsigned int node=0; node<num_nodes; node++) 00577 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00578 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn]; 00579 adj[col][node*num_eqns+eqn] = 0.0; 00580 } 00581 } 00582 00583 Teuchos::Time timer("FE Vector Rad Adjoint Fill", false); 00584 timer.start(true); 00585 std::vector<VRadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns); 00586 std::vector<VRadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns); 00587 VRadType ***vf_rad = new VRadType**[num_cols]; 00588 std::vector<std::size_t> vn(num_cols); 00589 for (unsigned int i=0; i<num_cols; i++) { 00590 vf_rad[i] = new VRadType*[num_eqns*e.nnode]; 00591 vn[i] = num_eqns*e.nnode; 00592 for (unsigned int j=0; j<num_eqns*e.nnode; j++) 00593 vf_rad[i][j] = &f_rad[j]; 00594 } 00595 for (unsigned int i=0; i<num_nodes-1; i++) { 00596 e.gid[0] = i; 00597 e.gid[1] = i+1; 00598 00599 vrad_init_fill(e, num_eqns, x, w, x_rad, w_local); 00600 template_element_fill(e, num_eqns, x_rad, u, du, f_rad); 00601 vrad_process_fill(e, num_eqns, num_cols, vn, w_local, x_rad, f_rad, vf_rad, 00602 f, adj); 00603 } 00604 for (unsigned int col=0; col<num_cols; col++) { 00605 delete [] w_local[col]; 00606 delete [] vf_rad[col]; 00607 } 00608 delete [] w_local; 00609 delete [] vf_rad; 00610 timer.stop(); 00611 00612 // std::cout << "Vector Rad Residual = " << std::endl; 00613 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) 00614 // std::cout << "\t" << f[i] << std::endl; 00615 00616 // std::cout.precision(8); 00617 // std::cout << "Vector Rad Adjoint = " << std::endl; 00618 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) { 00619 // std::cout << "\t"; 00620 // for (unsigned int j=0; j<num_cols; j++) 00621 // std::cout << adj[j][i] << "\t"; 00622 // std::cout << std::endl; 00623 // } 00624 00625 return timer.totalElapsedTime(); 00626 } 00627 00628 #ifdef HAVE_ADOLC 00629 double adolc_adj_fill(unsigned int num_nodes, unsigned int num_eqns, 00630 unsigned int num_cols, double mesh_spacing) { 00631 ElemData e(mesh_spacing); 00632 00633 // Solution vector, residual, adjoint 00634 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns); 00635 std::vector< std::vector<double> > w(num_cols); 00636 std::vector< std::vector<double> > adj(num_cols); 00637 for (unsigned int node=0; node<num_nodes; node++) 00638 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00639 x[node*num_eqns+eqn] = 00640 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5); 00641 f[node*num_eqns+eqn] = 0.0; 00642 } 00643 00644 for (unsigned int col=0; col<num_cols; col++) { 00645 w[col] = std::vector<double>(num_nodes*num_eqns); 00646 adj[col] = std::vector<double>(num_nodes*num_eqns); 00647 for (unsigned int node=0; node<num_nodes; node++) 00648 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00649 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn]; 00650 adj[col][node*num_eqns+eqn] = 0.0; 00651 } 00652 } 00653 00654 Teuchos::Time timer("FE ADOL-C Adjoint Fill", false); 00655 timer.start(true); 00656 std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns); 00657 std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns); 00658 std::vector<double> x_local(e.nnode*num_eqns); 00659 std::vector<double> f_local(e.nnode*num_eqns); 00660 double **adj_local = new double*[num_cols]; 00661 double **w_local = new double*[num_cols]; 00662 for (unsigned int i=0; i<num_cols; i++) { 00663 adj_local[i] = new double[e.nnode*num_eqns]; 00664 w_local[i] = new double[e.nnode*num_eqns]; 00665 } 00666 00667 // Tape first element 00668 e.gid[0] = 0; 00669 e.gid[1] = 1; 00670 adolc_init_fill(true, 0, e, num_eqns, x, w, x_local, x_ad, w_local); 00671 template_element_fill(e, num_eqns, x_ad, u, du, f_ad); 00672 adolc_process_fill(true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad, 00673 f_local, f, adj_local, adj); 00674 00675 // Now do remaining fills reusing tape 00676 for (unsigned int i=1; i<num_nodes-1; i++) { 00677 e.gid[0] = i; 00678 e.gid[1] = i+1; 00679 00680 adolc_init_fill(false, 0, e, num_eqns, x, w, x_local, x_ad, w_local); 00681 adolc_process_fill(false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad, 00682 f_local, f, adj_local, adj); 00683 } 00684 for (unsigned int i=0; i<num_cols; i++) { 00685 delete [] adj_local[i]; 00686 delete [] w_local[i]; 00687 } 00688 delete [] adj_local; 00689 delete [] w_local; 00690 timer.stop(); 00691 00692 // std::cout << "ADOL-C Residual = " << std::endl; 00693 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) 00694 // std::cout << "\t" << f[i] << std::endl; 00695 00696 // std::cout.precision(8); 00697 // std::cout << "ADOL-C Adjoint = " << std::endl; 00698 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) { 00699 // std::cout << "\t"; 00700 // for (unsigned int j=0; j<num_cols; j++) 00701 // std::cout << adj[j][i] << "\t"; 00702 // std::cout << std::endl; 00703 // } 00704 00705 return timer.totalElapsedTime(); 00706 } 00707 00708 double adolc_retape_adj_fill(unsigned int num_nodes, unsigned int num_eqns, 00709 unsigned int num_cols, double mesh_spacing) { 00710 ElemData e(mesh_spacing); 00711 00712 // Solution vector, residual, adjoint 00713 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns); 00714 std::vector< std::vector<double> > w(num_cols); 00715 std::vector< std::vector<double> > adj(num_cols); 00716 for (unsigned int node=0; node<num_nodes; node++) 00717 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00718 x[node*num_eqns+eqn] = 00719 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5); 00720 f[node*num_eqns+eqn] = 0.0; 00721 } 00722 00723 for (unsigned int col=0; col<num_cols; col++) { 00724 w[col] = std::vector<double>(num_nodes*num_eqns); 00725 adj[col] = std::vector<double>(num_nodes*num_eqns); 00726 for (unsigned int node=0; node<num_nodes; node++) 00727 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00728 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn]; 00729 adj[col][node*num_eqns+eqn] = 0.0; 00730 } 00731 } 00732 00733 Teuchos::Time timer("FE ADOL-C Retape Adjoint Fill", false); 00734 timer.start(true); 00735 std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns); 00736 std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns); 00737 std::vector<double> x_local(e.nnode*num_eqns); 00738 std::vector<double> f_local(e.nnode*num_eqns); 00739 double **adj_local = new double*[num_cols]; 00740 double **w_local = new double*[num_cols]; 00741 for (unsigned int i=0; i<num_cols; i++) { 00742 adj_local[i] = new double[e.nnode*num_eqns]; 00743 w_local[i] = new double[e.nnode*num_eqns]; 00744 } 00745 00746 for (unsigned int i=0; i<num_nodes-1; i++) { 00747 e.gid[0] = i; 00748 e.gid[1] = i+1; 00749 00750 adolc_init_fill(true, 1, e, num_eqns, x, w, x_local, x_ad, w_local); 00751 template_element_fill(e, num_eqns, x_ad, u, du, f_ad); 00752 adolc_process_fill(true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad, 00753 f_local, f, adj_local, adj); 00754 } 00755 for (unsigned int i=0; i<num_cols; i++) { 00756 delete [] adj_local[i]; 00757 delete [] w_local[i]; 00758 } 00759 delete [] adj_local; 00760 delete [] w_local; 00761 timer.stop(); 00762 00763 // std::cout << "ADOL-C Residual (retaped) = " << std::endl; 00764 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) 00765 // std::cout << "\t" << f[i] << std::endl; 00766 00767 // std::cout.precision(8); 00768 // std::cout << "ADOL-C Adjoint (retaped) = " << std::endl; 00769 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) { 00770 // std::cout << "\t"; 00771 // for (unsigned int j=0; j<num_cols; j++) 00772 // std::cout << adj[j][i] << "\t"; 00773 // std::cout << std::endl; 00774 // } 00775 00776 return timer.totalElapsedTime(); 00777 } 00778 #endif 00779 00780 double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, 00781 unsigned int num_cols, double mesh_spacing) { 00782 ElemData e(mesh_spacing); 00783 00784 // Solution vector, residual, adjoint 00785 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns); 00786 std::vector< std::vector<double> > w(num_cols), w_local(num_cols); 00787 std::vector< std::vector<double> > adj(num_cols); 00788 for (unsigned int node=0; node<num_nodes; node++) 00789 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00790 x[node*num_eqns+eqn] = 00791 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5); 00792 f[node*num_eqns+eqn] = 0.0; 00793 } 00794 00795 for (unsigned int col=0; col<num_cols; col++) { 00796 w[col] = std::vector<double>(num_nodes*num_eqns); 00797 w_local[col] = std::vector<double>(e.nnode*num_eqns); 00798 adj[col] = std::vector<double>(num_nodes*num_eqns); 00799 for (unsigned int node=0; node<num_nodes; node++) 00800 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00801 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn]; 00802 adj[col][node*num_eqns+eqn] = 0.0; 00803 } 00804 } 00805 00806 Teuchos::Time timer("FE Analytic Adjoint Fill", false); 00807 timer.start(true); 00808 std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns); 00809 std::vector< std::vector<double> > adj_local(num_cols); 00810 std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns); 00811 for (unsigned int i=0; i<num_cols; i++) 00812 adj_local[i] = std::vector<double>(e.nnode*num_eqns); 00813 for (unsigned int i=0; i<num_nodes-1; i++) { 00814 e.gid[0] = i; 00815 e.gid[1] = i+1; 00816 00817 analytic_init_fill(e, num_eqns, x, w, x_local, w_local); 00818 analytic_element_fill(e, num_eqns, x_local, w_local, u, du, f_local, 00819 adj_local); 00820 analytic_process_fill(e, num_eqns, f_local, adj_local, f, adj); 00821 } 00822 timer.stop(); 00823 00824 // std::cout.precision(8); 00825 // std::cout << "Analytic Residual = " << std::endl; 00826 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) 00827 // std::cout << "\t" << f[i] << std::endl; 00828 00829 // std::cout.precision(8); 00830 // std::cout << "Analytic Adjoint = " << std::endl; 00831 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) { 00832 // std::cout << "\t"; 00833 // for (unsigned int j=0; j<num_cols; j++) 00834 // std::cout << adj[j][i] << "\t"; 00835 // std::cout << std::endl; 00836 // } 00837 00838 return timer.totalElapsedTime(); 00839 } 00840 00841 double residual_fill(unsigned int num_nodes, unsigned int num_eqns, 00842 unsigned int num_cols, double mesh_spacing) { 00843 ElemData e(mesh_spacing); 00844 00845 // Solution vector, residual, jacobian 00846 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns); 00847 std::vector< std::vector<double> > w(num_cols), w_local(num_cols); 00848 for (unsigned int node_row=0; node_row<num_nodes; node_row++) { 00849 for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 00850 unsigned int row = node_row*num_eqns + eqn_row; 00851 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5); 00852 f[row] = 0.0; 00853 } 00854 } 00855 00856 for (unsigned int col=0; col<num_cols; col++) { 00857 w[col] = std::vector<double>(num_nodes*num_eqns); 00858 w_local[col] = std::vector<double>(e.nnode*num_eqns); 00859 for (unsigned int node=0; node<num_nodes; node++) 00860 for (unsigned int eqn=0; eqn<num_eqns; eqn++) { 00861 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn]; 00862 } 00863 } 00864 00865 Teuchos::Time timer("FE Residual Fill", false); 00866 timer.start(true); 00867 std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns); 00868 std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns); 00869 for (unsigned int i=0; i<num_nodes-1; i++) { 00870 e.gid[0] = i; 00871 e.gid[1] = i+1; 00872 00873 analytic_init_fill(e, num_eqns, x, w, x_local, w_local); 00874 template_element_fill(e, num_eqns, x_local, u, du, f_local); 00875 residual_process_fill(e, num_eqns, f_local, f); 00876 } 00877 timer.stop(); 00878 00879 // std::cout.precision(8); 00880 // std::cout << "Analytic Residual = " << std::endl; 00881 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) 00882 // std::cout << "\t" << f[i] << std::endl; 00883 00884 return timer.totalElapsedTime(); 00885 } 00886 00887 int main(int argc, char* argv[]) { 00888 int ierr = 0; 00889 00890 try { 00891 double t, ta, tr; 00892 int p = 2; 00893 int w = p+7; 00894 00895 // Set up command line options 00896 Teuchos::CommandLineProcessor clp; 00897 clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill"); 00898 int num_nodes = 100000; 00899 int num_eqns = 2; 00900 int num_cols = 4; 00901 int rt = 0; 00902 clp.setOption("n", &num_nodes, "Number of nodes"); 00903 clp.setOption("p", &num_eqns, "Number of equations"); 00904 clp.setOption("q", &num_cols, "Number of adjoint directions"); 00905 clp.setOption("rt", &rt, "Include ADOL-C retaping test"); 00906 00907 // Parse options 00908 Teuchos::CommandLineProcessor::EParseCommandLineReturn 00909 parseReturn= clp.parse(argc, argv); 00910 if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) 00911 return 1; 00912 00913 double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1); 00914 00915 std::cout.setf(std::ios::scientific); 00916 std::cout.precision(p); 00917 std::cout << "num_nodes = " << num_nodes 00918 << ", num_eqns = " << num_eqns 00919 << ", num_cols = " << num_cols << ": " << std::endl 00920 << " " << " Time" << "\t"<< "Time/Analytic" << "\t" 00921 << "Time/Residual" << std::endl; 00922 00923 ta = 1.0; 00924 tr = 1.0; 00925 00926 tr = residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing); 00927 00928 ta = analytic_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing); 00929 std::cout << "Analytic: " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl; 00930 00931 #ifdef HAVE_ADOLC 00932 t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing); 00933 std::cout << "ADOL-C: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 00934 00935 if (rt != 0) { 00936 t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing); 00937 std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 00938 } 00939 #endif 00940 00941 t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing); 00942 std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 00943 00944 t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing); 00945 std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 00946 00947 t = rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing); 00948 std::cout << "Rad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 00949 00950 t = vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing); 00951 std::cout << "Vector Rad:" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl; 00952 00953 } 00954 catch (std::exception& e) { 00955 std::cout << e.what() << std::endl; 00956 ierr = 1; 00957 } 00958 catch (const char *s) { 00959 std::cout << s << std::endl; 00960 ierr = 1; 00961 } 00962 catch (...) { 00963 std::cout << "Caught unknown exception!" << std::endl; 00964 ierr = 1; 00965 } 00966 00967 return ierr; 00968 }
1.7.4