krylov_dh.c

00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Euclid_dh.h"
00031 #include "krylov_dh.h"
00032 #include "Mem_dh.h"
00033 #include "Parser_dh.h"
00034 #include "Mat_dh.h"
00035 
00036 
00037 #undef __FUNC__
00038 #define __FUNC__ "bicgstab_euclid"
00039 void
00040 bicgstab_euclid (Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
00041 {
00042   START_FUNC_DH int its, m = ctx->m;
00043   bool monitor;
00044   int maxIts = ctx->maxIts;
00045   double atol = ctx->atol, rtol = ctx->rtol;
00046 
00047   /* scalars */
00048   double alpha, alpha_1,
00049     beta_1,
00050     widget, widget_1, rho_1, rho_2, s_norm, eps, exit_a, b_iprod, r_iprod;
00051 
00052   /* vectors */
00053   double *t, *s, *s_hat, *v, *p, *p_hat, *r, *r_hat;
00054 
00055   monitor = Parser_dhHasSwitch (parser_dh, "-monitor");
00056 
00057   /* allocate working space */
00058   t = (double *) MALLOC_DH (m * sizeof (double));
00059   s = (double *) MALLOC_DH (m * sizeof (double));
00060   s_hat = (double *) MALLOC_DH (m * sizeof (double));
00061   v = (double *) MALLOC_DH (m * sizeof (double));
00062   p = (double *) MALLOC_DH (m * sizeof (double));
00063   p_hat = (double *) MALLOC_DH (m * sizeof (double));
00064   r = (double *) MALLOC_DH (m * sizeof (double));
00065   r_hat = (double *) MALLOC_DH (m * sizeof (double));
00066 
00067   /* r = b - Ax */
00068   Mat_dhMatVec (A, x, s);   /* s = Ax */
00069   CopyVec (m, b, r);        /* r = b */
00070   Axpy (m, -1.0, s, r);     /* r = b-Ax */
00071   CopyVec (m, r, r_hat);    /* r_hat = r */
00072 
00073   /* compute stopping criteria */
00074   b_iprod = InnerProd (m, b, b);
00075   CHECK_V_ERROR;
00076   exit_a = atol * atol * b_iprod;
00077   CHECK_V_ERROR;        /* absolute stopping criteria */
00078   eps = rtol * rtol * b_iprod;  /* relative stoping criteria (residual reduction) */
00079 
00080   its = 0;
00081   while (1)
00082     {
00083       ++its;
00084       rho_1 = InnerProd (m, r_hat, r);
00085       if (rho_1 == 0)
00086     {
00087       SET_V_ERROR ("(r_hat . r) = 0; method fails");
00088     }
00089 
00090       if (its == 1)
00091     {
00092       CopyVec (m, r, p);    /* p = r_0 */
00093       CHECK_V_ERROR;
00094     }
00095       else
00096     {
00097       beta_1 = (rho_1 / rho_2) * (alpha_1 / widget_1);
00098 
00099       /* p_i = r_(i-1) + beta_(i-1)*( p_(i-1) - w_(i-1)*v_(i-1) ) */
00100       Axpy (m, -widget_1, v, p);
00101       CHECK_V_ERROR;
00102       ScaleVec (m, beta_1, p);
00103       CHECK_V_ERROR;
00104       Axpy (m, 1.0, r, p);
00105       CHECK_V_ERROR;
00106     }
00107 
00108       /* solve M*p_hat = p_i */
00109       Euclid_dhApply (ctx, p, p_hat);
00110       CHECK_V_ERROR;
00111 
00112       /* v_i = A*p_hat */
00113       Mat_dhMatVec (A, p_hat, v);
00114       CHECK_V_ERROR;
00115 
00116       /* alpha_i = rho_(i-1) / (r_hat^T . v_i ) */
00117       {
00118     double tmp = InnerProd (m, r_hat, v);
00119     CHECK_V_ERROR;
00120     alpha = rho_1 / tmp;
00121       }
00122 
00123       /* s = r_(i-1) - alpha_i*v_i */
00124       CopyVec (m, r, s);
00125       CHECK_V_ERROR;
00126       Axpy (m, -alpha, v, s);
00127       CHECK_V_ERROR;
00128 
00129       /* check norm of s; if small enough:
00130        * set x_i = x_(i-1) + alpha_i*p_i and stop.
00131        * (Actually, we use the square of the norm)
00132        */
00133       s_norm = InnerProd (m, s, s);
00134       if (s_norm < exit_a)
00135     {
00136       SET_INFO ("reached absolute stopping criteria");
00137       break;
00138     }
00139 
00140       /* solve M*s_hat = s */
00141       Euclid_dhApply (ctx, s, s_hat);
00142       CHECK_V_ERROR;
00143 
00144       /* t = A*s_hat */
00145       Mat_dhMatVec (A, s_hat, t);
00146       CHECK_V_ERROR;
00147 
00148       /* w_i = (t . s)/(t . t) */
00149       {
00150     double tmp1, tmp2;
00151     tmp1 = InnerProd (m, t, s);
00152     CHECK_V_ERROR;
00153     tmp2 = InnerProd (m, t, t);
00154     CHECK_V_ERROR;
00155     widget = tmp1 / tmp2;
00156       }
00157 
00158       /* x_i = x_(i-1) + alpha_i*p_hat + w_i*s_hat */
00159       Axpy (m, alpha, p_hat, x);
00160       CHECK_V_ERROR;
00161       Axpy (m, widget, s_hat, x);
00162       CHECK_V_ERROR;
00163 
00164       /* r_i = s - w_i*t */
00165       CopyVec (m, s, r);
00166       CHECK_V_ERROR;
00167       Axpy (m, -widget, t, r);
00168       CHECK_V_ERROR;
00169 
00170       /* check convergence; continue if necessary;
00171        * for continuation it is necessary thea w != 0.
00172        */
00173       r_iprod = InnerProd (m, r, r);
00174       CHECK_V_ERROR;
00175       if (r_iprod < eps)
00176     {
00177       SET_INFO ("stipulated residual reduction achieved");
00178       break;
00179     }
00180 
00181       /* monitor convergence */
00182       if (monitor && myid_dh == 0)
00183     {
00184       fprintf (stderr, "[it = %i] %e\n", its, sqrt (r_iprod / b_iprod));
00185     }
00186 
00187       /* prepare for next iteration */
00188       rho_2 = rho_1;
00189       widget_1 = widget;
00190       alpha_1 = alpha;
00191 
00192       if (its >= maxIts)
00193     {
00194       its = -its;
00195       break;
00196     }
00197     }
00198 
00199   *itsOUT = its;
00200 
00201   FREE_DH (t);
00202   FREE_DH (s);
00203   FREE_DH (s_hat);
00204   FREE_DH (v);
00205   FREE_DH (p);
00206   FREE_DH (p_hat);
00207   FREE_DH (r);
00208   FREE_DH (r_hat);
00209 END_FUNC_DH}
00210 
00211 #undef __FUNC__
00212 #define __FUNC__ "cg_euclid"
00213 void
00214 cg_euclid (Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
00215 {
00216   START_FUNC_DH int its, m = A->m;
00217   double *p, *r, *s;
00218   double alpha, beta, gamma, gamma_old, eps, bi_prod, i_prod;
00219   bool monitor;
00220   int maxIts = ctx->maxIts;
00221   /* double atol = ctx->atol */
00222   double rtol = ctx->rtol;
00223 
00224   monitor = Parser_dhHasSwitch (parser_dh, "-monitor");
00225 
00226   /* compute square of absolute stopping threshold  */
00227   /* bi_prod = <b,b> */
00228   bi_prod = InnerProd (m, b, b);
00229   CHECK_V_ERROR;
00230   eps = (rtol * rtol) * bi_prod;
00231 
00232   p = (double *) MALLOC_DH (m * sizeof (double));
00233   s = (double *) MALLOC_DH (m * sizeof (double));
00234   r = (double *) MALLOC_DH (m * sizeof (double));
00235 
00236   /* r = b - Ax */
00237   Mat_dhMatVec (A, x, r);   /* r = Ax */
00238   CHECK_V_ERROR;
00239   ScaleVec (m, -1.0, r);    /* r = b */
00240   CHECK_V_ERROR;
00241   Axpy (m, 1.0, b, r);      /* r = r + b */
00242   CHECK_V_ERROR;
00243 
00244   /* solve Mp = r */
00245   Euclid_dhApply (ctx, r, p);
00246   CHECK_V_ERROR;
00247 
00248   /* gamma = <r,p> */
00249   gamma = InnerProd (m, r, p);
00250   CHECK_V_ERROR;
00251 
00252   its = 0;
00253   while (1)
00254     {
00255       ++its;
00256 
00257       /* s = A*p */
00258       Mat_dhMatVec (A, p, s);
00259       CHECK_V_ERROR;
00260 
00261       /* alpha = gamma / <s,p> */
00262       {
00263     double tmp = InnerProd (m, s, p);
00264     CHECK_V_ERROR;
00265     alpha = gamma / tmp;
00266     gamma_old = gamma;
00267       }
00268 
00269       /* x = x + alpha*p */
00270       Axpy (m, alpha, p, x);
00271       CHECK_V_ERROR;
00272 
00273       /* r = r - alpha*s */
00274       Axpy (m, -alpha, s, r);
00275       CHECK_V_ERROR;
00276 
00277       /* solve Ms = r */
00278       Euclid_dhApply (ctx, r, s);
00279       CHECK_V_ERROR;
00280 
00281       /* gamma = <r,s> */
00282       gamma = InnerProd (m, r, s);
00283       CHECK_V_ERROR;
00284 
00285       /* set i_prod for convergence test */
00286       i_prod = InnerProd (m, r, r);
00287       CHECK_V_ERROR;
00288 
00289       if (monitor && myid_dh == 0)
00290     {
00291       fprintf (stderr, "iter = %i  rel. resid. norm: %e\n", its,
00292            sqrt (i_prod / bi_prod));
00293     }
00294 
00295       /* check for convergence */
00296       if (i_prod < eps)
00297     break;
00298 
00299       /* beta = gamma / gamma_old */
00300       beta = gamma / gamma_old;
00301 
00302       /* p = s + beta p */
00303       ScaleVec (m, beta, p);
00304       CHECK_V_ERROR;
00305       Axpy (m, 1.0, s, p);
00306       CHECK_V_ERROR;
00307 
00308       if (its >= maxIts)
00309     {
00310       its = -its;
00311       break;
00312     }
00313     }
00314 
00315   *itsOUT = its;
00316 
00317   FREE_DH (p);
00318   FREE_DH (s);
00319   FREE_DH (r);
00320 END_FUNC_DH}

Generated on Tue Jul 13 09:27:13 2010 for IFPACK by  doxygen 1.4.7