Euclid_dh.c

00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Euclid_dh.h"
00031 #include "Mem_dh.h"
00032 #include "Mat_dh.h"
00033 #include "Vec_dh.h"
00034 #include "Factor_dh.h"
00035 #include "getRow_dh.h"
00036 #include "ilu_dh.h"
00037 #include "Parser_dh.h"
00038 #include "SortedList_dh.h"
00039 #include "SubdomainGraph_dh.h"
00040 #include "ExternalRows_dh.h"
00041 #include "krylov_dh.h"
00042 
00043 static void get_runtime_params_private (Euclid_dh ctx);
00044 static void invert_diagonals_private (Euclid_dh ctx);
00045 static void compute_rho_private (Euclid_dh ctx);
00046 static void factor_private (Euclid_dh ctx);
00047 /* static void discard_indices_private(Euclid_dh ctx); */
00048 static void reduce_timings_private (Euclid_dh ctx);
00049 
00050 #undef __FUNC__
00051 #define __FUNC__ "Euclid_dhCreate"
00052 void
00053 Euclid_dhCreate (Euclid_dh * ctxOUT)
00054 {
00055   START_FUNC_DH
00056     struct _mpi_interface_dh *ctx =
00057     (struct _mpi_interface_dh *)
00058     MALLOC_DH (sizeof (struct _mpi_interface_dh));
00059   CHECK_V_ERROR;
00060   *ctxOUT = ctx;
00061 
00062   ctx->isSetup = false;
00063 
00064   ctx->rho_init = 2.0;
00065   ctx->rho_final = 0.0;
00066 
00067   ctx->m = 0;
00068   ctx->n = 0;
00069   ctx->rhs = NULL;
00070   ctx->A = NULL;
00071   ctx->F = NULL;
00072   ctx->sg = NULL;
00073 
00074   ctx->scale = NULL;
00075   ctx->isScaled = false;
00076   ctx->work = NULL;
00077   ctx->work2 = NULL;
00078   ctx->from = 0;
00079   ctx->to = 0;
00080 
00081   strcpy (ctx->algo_par, "pilu");
00082   strcpy (ctx->algo_ilu, "iluk");
00083   ctx->level = 1;
00084   ctx->droptol = DEFAULT_DROP_TOL;
00085   ctx->sparseTolA = 0.0;
00086   ctx->sparseTolF = 0.0;
00087   ctx->pivotMin = 0.0;
00088   ctx->pivotFix = PIVOT_FIX_DEFAULT;
00089   ctx->maxVal = 0.0;
00090 
00091   ctx->slist = NULL;
00092   ctx->extRows = NULL;
00093 
00094   strcpy (ctx->krylovMethod, "bicgstab");
00095   ctx->maxIts = 200;
00096   ctx->rtol = 1e-5;
00097   ctx->atol = 1e-50;
00098   ctx->its = 0;
00099   ctx->itsTotal = 0;
00100   ctx->setupCount = 0;
00101   ctx->logging = 0;
00102   ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats"));
00103 
00104   {
00105     int i;
00106     for (i = 0; i < TIMING_BINS; ++i)
00107       ctx->timing[i] = 0.0;
00108     for (i = 0; i < STATS_BINS; ++i)
00109       ctx->stats[i] = 0.0;
00110   }
00111   ctx->timingsWereReduced = false;
00112 
00113   ++ref_counter;
00114 END_FUNC_DH}
00115 
00116 
00117 #undef __FUNC__
00118 #define __FUNC__ "Euclid_dhDestroy"
00119 void
00120 Euclid_dhDestroy (Euclid_dh ctx)
00121 {
00122   START_FUNC_DH
00123     if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging)
00124     {
00125       /* insert switch so memory report will also be printed */
00126       Parser_dhInsert (parser_dh, "-eu_mem", "1");
00127       CHECK_V_ERROR;
00128       Euclid_dhPrintHypreReport (ctx, stdout);
00129       CHECK_V_ERROR;
00130     }
00131 
00132   if (ctx->setupCount > 1 && ctx->printStats)
00133     {
00134       Euclid_dhPrintStatsShorter (ctx, stdout);
00135       CHECK_V_ERROR;
00136     }
00137 
00138   if (ctx->F != NULL)
00139     {
00140       Factor_dhDestroy (ctx->F);
00141       CHECK_V_ERROR;
00142     }
00143   if (ctx->sg != NULL)
00144     {
00145       SubdomainGraph_dhDestroy (ctx->sg);
00146       CHECK_V_ERROR;
00147     }
00148   if (ctx->scale != NULL)
00149     {
00150       FREE_DH (ctx->scale);
00151       CHECK_V_ERROR;
00152     }
00153   if (ctx->work != NULL)
00154     {
00155       FREE_DH (ctx->work);
00156       CHECK_V_ERROR;
00157     }
00158   if (ctx->work2 != NULL)
00159     {
00160       FREE_DH (ctx->work2);
00161       CHECK_V_ERROR;
00162     }
00163   if (ctx->slist != NULL)
00164     {
00165       SortedList_dhDestroy (ctx->slist);
00166       CHECK_V_ERROR;
00167     }
00168   if (ctx->extRows != NULL)
00169     {
00170       ExternalRows_dhDestroy (ctx->extRows);
00171       CHECK_V_ERROR;
00172     }
00173   FREE_DH (ctx);
00174   CHECK_V_ERROR;
00175 
00176   --ref_counter;
00177 END_FUNC_DH}
00178 
00179 
00180 /* on entry, "A" must have been set.  If this context is being
00181    reused, user must ensure ???
00182 */
00183 #undef __FUNC__
00184 #define __FUNC__ "Euclid_dhSetup"
00185 void
00186 Euclid_dhSetup (Euclid_dh ctx)
00187 {
00188   START_FUNC_DH int m, n, beg_row;
00189   double t1;
00190   bool isSetup = ctx->isSetup;
00191   bool bj = false;
00192 
00193   /*----------------------------------------------------
00194    * If Euclid was previously setup, print summary of
00195    * what happened during previous setup/solve
00196    *----------------------------------------------------*/
00197   if (ctx->setupCount && ctx->printStats)
00198     {
00199       Euclid_dhPrintStatsShorter (ctx, stdout);
00200       CHECK_V_ERROR;
00201       ctx->its = 0;
00202     }
00203 
00204   /*----------------------------------------------------
00205    * zero array for statistical reporting
00206    *----------------------------------------------------*/
00207   {
00208     int i;
00209     for (i = 0; i < STATS_BINS; ++i)
00210       ctx->stats[i] = 0.0;
00211   }
00212 
00213   /*----------------------------------------------------
00214    * internal timing
00215    *----------------------------------------------------*/
00216   ctx->timing[SOLVE_START_T] = MPI_Wtime ();
00217   /* sum timing from last linear solve cycle, if any */
00218   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00219   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00220 
00221   if (ctx->F != NULL)
00222     {
00223       Factor_dhDestroy (ctx->F);
00224       CHECK_V_ERROR;
00225       ctx->F = NULL;
00226     }
00227 
00228   if (ctx->A == NULL)
00229     {
00230       SET_V_ERROR ("must set ctx->A before calling init");
00231     }
00232   EuclidGetDimensions (ctx->A, &beg_row, &m, &n);
00233   CHECK_V_ERROR;
00234   ctx->m = m;
00235   ctx->n = n;
00236 
00237   if (Parser_dhHasSwitch (parser_dh, "-print_size"))
00238     {
00239       printf_dh
00240     ("setting up linear system; global rows: %i  local rows: %i (on P_0)\n",
00241      n, m);
00242     }
00243 
00244   sprintf (msgBuf_dh, "localRow= %i;  globalRows= %i;  beg_row= %i", m, n,
00245        beg_row);
00246   SET_INFO (msgBuf_dh);
00247 
00248   bj = Parser_dhHasSwitch (parser_dh, "-bj");
00249 
00250   /*------------------------------------------------------------------------
00251    * Setup the SubdomainGraph, which contains connectivity and
00252    * and permutation information.  If this context is being reused,
00253    * this may already have been done; if being resused, the underlying
00254    * subdomain graph cannot change (user's responsibility?)
00255    *------------------------------------------------------------------------*/
00256   if (ctx->sg == NULL)
00257     {
00258       int blocks = np_dh;
00259       t1 = MPI_Wtime ();
00260       if (np_dh == 1)
00261     {
00262       Parser_dhReadInt (parser_dh, "-blocks", &blocks);
00263       CHECK_V_ERROR;
00264       SubdomainGraph_dhCreate (&(ctx->sg));
00265       CHECK_V_ERROR;
00266       SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A);
00267       CHECK_V_ERROR;
00268     }
00269       else
00270     {
00271       SubdomainGraph_dhCreate (&(ctx->sg));
00272       CHECK_V_ERROR;
00273       SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A);
00274       CHECK_V_ERROR;
00275     }
00276       ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1);
00277     }
00278 
00279 
00280 /* SubdomainGraph_dhDump(ctx->sg, "SG.dump"); CHECK_V_ERROR; */
00281 
00282   /*----------------------------------------------------
00283    * for debugging
00284    *----------------------------------------------------*/
00285   if (Parser_dhHasSwitch (parser_dh, "-doNotFactor"))
00286     {
00287       goto END_OF_FUNCTION;
00288     }
00289 
00290 
00291   /*----------------------------------------------------
00292    * query parser for runtime parameters
00293    *----------------------------------------------------*/
00294   if (!isSetup)
00295     {
00296       get_runtime_params_private (ctx);
00297       CHECK_V_ERROR;
00298     }
00299   if (!strcmp (ctx->algo_par, "bj"))
00300     bj = false;
00301 
00302   /*--------------------------------------------------------- 
00303    * allocate and initialize storage for row-scaling
00304    * (ctx->isScaled is set in get_runtime_params_private(); )
00305    *---------------------------------------------------------*/
00306   if (ctx->scale == NULL)
00307     {
00308       ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00309       CHECK_V_ERROR;
00310     }
00311   {
00312     int i;
00313     for (i = 0; i < m; ++i)
00314       ctx->scale[i] = 1.0;
00315   }
00316 
00317   /*------------------------------------------------------------------ 
00318    * allocate work vectors; used in factorization and triangular solves;
00319    *------------------------------------------------------------------*/
00320   if (ctx->work == NULL)
00321     {
00322       ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00323       CHECK_V_ERROR;
00324     }
00325   if (ctx->work2 == NULL)
00326     {
00327       ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00328       CHECK_V_ERROR;
00329     }
00330 
00331   /*-----------------------------------------------------------------
00332    * perform the incomplete factorization (this should be, at least
00333    * for higher level ILUK, the most time-intensive portion of setup)
00334    *-----------------------------------------------------------------*/
00335   t1 = MPI_Wtime ();
00336   factor_private (ctx);
00337   CHECK_V_ERROR;
00338   ctx->timing[FACTOR_T] += (MPI_Wtime () - t1);
00339 
00340   /*-------------------------------------------------------------- 
00341    * invert diagonals, for faster triangular solves
00342    *--------------------------------------------------------------*/
00343   if (strcmp (ctx->algo_par, "none"))
00344     {
00345       invert_diagonals_private (ctx);
00346       CHECK_V_ERROR;
00347     }
00348 
00349   /*-------------------------------------------------------------- 
00350    * compute rho_final: global ratio of nzF/nzA
00351    * also, if -sparseA > 0,  compute ratio of nzA 
00352    * used in factorization
00353    *--------------------------------------------------------------*/
00354   /* for some reason compute_rho_private() was expensive, so now it's
00355      an option, unless there's only one mpi task.
00356    */
00357   if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1)
00358     {
00359       if (strcmp (ctx->algo_par, "none"))
00360     {
00361       t1 = MPI_Wtime ();
00362       compute_rho_private (ctx);
00363       CHECK_V_ERROR;
00364       ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1);
00365     }
00366     }
00367 
00368   /*-------------------------------------------------------------- 
00369    * if using PILU, set up persistent comms and global-to-local
00370    * number scheme, for efficient triangular solves.
00371    * (Thanks to Edmond Chow for these algorithmic ideas.)
00372    *--------------------------------------------------------------*/
00373 
00374   if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1)
00375     {
00376       t1 = MPI_Wtime ();
00377       Factor_dhSolveSetup (ctx->F, ctx->sg);
00378       CHECK_V_ERROR;
00379       ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1);
00380     }
00381 
00382 END_OF_FUNCTION:;
00383 
00384   /*-------------------------------------------------------
00385    * internal timing
00386    *-------------------------------------------------------*/
00387   ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]);
00388   ctx->setupCount += 1;
00389 
00390   ctx->isSetup = true;
00391 
00392 END_FUNC_DH}
00393 
00394 
00395 #undef __FUNC__
00396 #define __FUNC__ "get_runtime_params_private"
00397 void
00398 get_runtime_params_private (Euclid_dh ctx)
00399 {
00400   START_FUNC_DH char *tmp;
00401 
00402   /* params for use of internal solvers */
00403   Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts));
00404   Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol));
00405   Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol));
00406 
00407   /* parallelization strategy (bj, pilu, none) */
00408   tmp = NULL;
00409   Parser_dhReadString (parser_dh, "-par", &tmp);
00410   if (tmp != NULL)
00411     {
00412       strcpy (ctx->algo_par, tmp);
00413     }
00414   if (Parser_dhHasSwitch (parser_dh, "-bj"))
00415     {
00416       strcpy (ctx->algo_par, "bj");
00417     }
00418 
00419 
00420   /* factorization parameters */
00421   Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init));
00422   /* inital storage allocation for factor */
00423   Parser_dhReadInt (parser_dh, "-level", &ctx->level);
00424   Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level);
00425 
00426   if (Parser_dhHasSwitch (parser_dh, "-ilut"))
00427     {
00428       Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol);
00429       ctx->isScaled = true;
00430       strcpy (ctx->algo_ilu, "ilut");
00431     }
00432 
00433   /* make sure both algo_par and algo_ilu are set to "none,"
00434      if at least one is.
00435    */
00436   if (!strcmp (ctx->algo_par, "none"))
00437     {
00438       strcpy (ctx->algo_ilu, "none");
00439     }
00440   else if (!strcmp (ctx->algo_ilu, "none"))
00441     {
00442       strcpy (ctx->algo_par, "none");
00443     }
00444 
00445 
00446   Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA));
00447   /* sparsify A before factoring */
00448   Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF));
00449   /* sparsify after factoring */
00450   Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin));
00451   /* adjust pivots if smaller than this */
00452   Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix));
00453   /* how to adjust pivots */
00454 
00455   /* set row scaling for mandatory cases */
00456   if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut"))
00457     {
00458       ctx->isScaled = true;
00459     }
00460 
00461   /* solve method */
00462   tmp = NULL;
00463   Parser_dhReadString (parser_dh, "-ksp_type", &tmp);
00464   if (tmp != NULL)
00465     {
00466       strcpy (ctx->krylovMethod, tmp);
00467 
00468       if (!strcmp (ctx->krylovMethod, "bcgs"))
00469     {
00470       strcpy (ctx->krylovMethod, "bicgstab");
00471     }
00472     }
00473 END_FUNC_DH}
00474 
00475 #undef __FUNC__
00476 #define __FUNC__ "invert_diagonals_private"
00477 void
00478 invert_diagonals_private (Euclid_dh ctx)
00479 {
00480   START_FUNC_DH REAL_DH * aval = ctx->F->aval;
00481   int *diag = ctx->F->diag;
00482   if (aval == NULL || diag == NULL)
00483     {
00484       SET_INFO ("can't invert diags; either F->aval or F->diag is NULL");
00485     }
00486   else
00487     {
00488       int i, m = ctx->F->m;
00489       for (i = 0; i < m; ++i)
00490     {
00491       aval[diag[i]] = 1.0 / aval[diag[i]];
00492     }
00493     }
00494 END_FUNC_DH}
00495 
00496 
00497 /* records rho_final (max for all processors) */
00498 #undef __FUNC__
00499 #define __FUNC__ "compute_rho_private"
00500 void
00501 compute_rho_private (Euclid_dh ctx)
00502 {
00503   START_FUNC_DH if (ctx->F != NULL)
00504     {
00505       double bufLocal[3], bufGlobal[3];
00506       int m = ctx->m;
00507 
00508       ctx->stats[NZF_STATS] = (double) ctx->F->rp[m];
00509       bufLocal[0] = ctx->stats[NZA_STATS];  /* nzA */
00510       bufLocal[1] = ctx->stats[NZF_STATS];  /* nzF */
00511       bufLocal[2] = ctx->stats[NZA_USED_STATS]; /* nzA used */
00512 
00513       if (np_dh == 1)
00514     {
00515       bufGlobal[0] = bufLocal[0];
00516       bufGlobal[1] = bufLocal[1];
00517       bufGlobal[2] = bufLocal[2];
00518     }
00519       else
00520     {
00521       MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
00522               comm_dh);
00523     }
00524 
00525       if (myid_dh == 0)
00526     {
00527 
00528       /* compute rho */
00529       if (bufGlobal[0] && bufGlobal[1])
00530         {
00531           ctx->rho_final = bufGlobal[1] / bufGlobal[0];
00532         }
00533       else
00534         {
00535           ctx->rho_final = -1;
00536         }
00537 
00538       /* compute ratio of nonzeros in A that were used */
00539       if (bufGlobal[0] && bufGlobal[2])
00540         {
00541           ctx->stats[NZA_RATIO_STATS] =
00542         100.0 * bufGlobal[2] / bufGlobal[0];
00543         }
00544       else
00545         {
00546           ctx->stats[NZA_RATIO_STATS] = 100.0;
00547         }
00548     }
00549     }
00550 END_FUNC_DH}
00551 
00552 #undef __FUNC__
00553 #define __FUNC__ "factor_private"
00554 void
00555 factor_private (Euclid_dh ctx)
00556 {
00557   START_FUNC_DH
00558   /*-------------------------------------------------------------
00559    * special case, for testing/debugging: no preconditioning
00560    *-------------------------------------------------------------*/
00561     if (!strcmp (ctx->algo_par, "none"))
00562     {
00563       goto DO_NOTHING;
00564     }
00565 
00566   /*-------------------------------------------------------------
00567    * Initialize object to hold factor.
00568    *-------------------------------------------------------------*/
00569   {
00570     int br = 0;
00571     int id = np_dh;
00572     if (ctx->sg != NULL)
00573       {
00574     br = ctx->sg->beg_rowP[myid_dh];
00575     id = ctx->sg->o2n_sub[myid_dh];
00576       }
00577     Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F));
00578     CHECK_V_ERROR;
00579     ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh];
00580     ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
00581     if (!strcmp (ctx->algo_par, "bj"))
00582       ctx->F->blockJacobi = true;
00583     if (Parser_dhHasSwitch (parser_dh, "-bj"))
00584       ctx->F->blockJacobi = true;
00585   }
00586 
00587   /*-------------------------------------------------------------
00588    * single mpi task with single or multiple subdomains
00589    *-------------------------------------------------------------*/
00590   if (np_dh == 1)
00591     {
00592 
00593       /* ILU(k) factorization */
00594       if (!strcmp (ctx->algo_ilu, "iluk"))
00595     {
00596       ctx->from = 0;
00597       ctx->to = ctx->m;
00598 
00599       /* only for debugging: use ilu_mpi_pilu */
00600       if (Parser_dhHasSwitch (parser_dh, "-mpi"))
00601         {
00602           if (ctx->sg != NULL && ctx->sg->blocks > 1)
00603         {
00604           SET_V_ERROR
00605             ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
00606         }
00607           iluk_mpi_pilu (ctx);
00608           CHECK_V_ERROR;
00609         }
00610 
00611       /* "normal" operation */
00612       else
00613         {
00614           iluk_seq_block (ctx);
00615           CHECK_V_ERROR;
00616           /* note: iluk_seq_block() performs block jacobi iluk if ctx->algo_par == bj.  */
00617         }
00618     }
00619 
00620       /* ILUT factorization */
00621       else if (!strcmp (ctx->algo_ilu, "ilut"))
00622     {
00623       ctx->from = 0;
00624       ctx->to = ctx->m;
00625       ilut_seq (ctx);
00626       CHECK_V_ERROR;
00627     }
00628 
00629       /* all other factorization methods */
00630       else
00631     {
00632       sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00633            ctx->algo_ilu);
00634       SET_V_ERROR (msgBuf_dh);
00635     }
00636     }
00637 
00638   /*-------------------------------------------------------------
00639    * multiple mpi tasks with multiple subdomains
00640    *-------------------------------------------------------------*/
00641   else
00642     {
00643       /* block jacobi */
00644       if (!strcmp (ctx->algo_par, "bj"))
00645     {
00646       ctx->from = 0;
00647       ctx->to = ctx->m;
00648       iluk_mpi_bj (ctx);
00649       CHECK_V_ERROR;
00650     }
00651 
00652       /* iluk */
00653       else if (!strcmp (ctx->algo_ilu, "iluk"))
00654     {
00655       bool bj = ctx->F->blockJacobi;    /* for debugging */
00656 
00657       /* printf_dh("\n@@@ starting ilu_mpi_pilu @@@\n"); */
00658 
00659       SortedList_dhCreate (&(ctx->slist));
00660       CHECK_V_ERROR;
00661       SortedList_dhInit (ctx->slist, ctx->sg);
00662       CHECK_V_ERROR;
00663       ExternalRows_dhCreate (&(ctx->extRows));
00664       CHECK_V_ERROR;
00665       ExternalRows_dhInit (ctx->extRows, ctx);
00666       CHECK_V_ERROR;
00667 
00668       /* factor interior rows */
00669       ctx->from = 0;
00670       ctx->to = ctx->F->first_bdry;
00671 
00672 /*
00673 if (Parser_dhHasSwitch(parser_dh, "-test")) {
00674        printf("[%i] Euclid_dh :: TESTING ilu_seq\n", myid_dh);
00675        iluk_seq(ctx); CHECK_V_ERROR; 
00676 } else {
00677        iluk_mpi_pilu(ctx); CHECK_V_ERROR; 
00678 }
00679 */
00680 
00681       iluk_seq (ctx);
00682       CHECK_V_ERROR;
00683 
00684       /* get external rows from lower ordered neighbors in the
00685          subdomain graph; these rows are needed for factoring
00686          this subdomain's boundary rows.
00687        */
00688       if (!bj)
00689         {
00690           ExternalRows_dhRecvRows (ctx->extRows);
00691           CHECK_V_ERROR;
00692         }
00693 
00694       /* factor boundary rows */
00695       ctx->from = ctx->F->first_bdry;
00696       ctx->to = ctx->F->m;
00697       iluk_mpi_pilu (ctx);
00698       CHECK_V_ERROR;
00699 
00700       /* send this processor's boundary rows to higher ordered
00701          neighbors in the subdomain graph.
00702        */
00703       if (!bj)
00704         {
00705           ExternalRows_dhSendRows (ctx->extRows);
00706           CHECK_V_ERROR;
00707         }
00708 
00709       /* discard column indices in factor if they would alter
00710          the subdomain graph (any such elements are in upper
00711          triangular portion of the row)
00712        */
00713 
00714 
00715       SortedList_dhDestroy (ctx->slist);
00716       CHECK_V_ERROR;
00717       ctx->slist = NULL;
00718       ExternalRows_dhDestroy (ctx->extRows);
00719       CHECK_V_ERROR;
00720       ctx->extRows = NULL;
00721     }
00722 
00723       /* all other factorization methods */
00724       else
00725     {
00726       sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00727            ctx->algo_ilu);
00728       SET_V_ERROR (msgBuf_dh);
00729     }
00730     }
00731 
00732 DO_NOTHING:;
00733 
00734 END_FUNC_DH}
00735 
00736 #if 0
00737 
00738 #undef __FUNC__
00739 #define __FUNC__ "discard_indices_private"
00740 void
00741 discard_indices_private (Euclid_dh ctx)
00742 {
00743   START_FUNC_DH
00744 #if 0
00745   int *rp = ctx->F->rp, *cval = ctx->F->cval;
00746   double *aval = ctx->F->aval;
00747   int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount;
00748   int i, j, k, idx, count = 0, start_of_row;
00749   int beg_row = ctx->beg_row, end_row = beg_row + m;
00750   int *diag = ctx->F->diag;
00751 
00752   /* if col is not locally owned, and doesn't belong to a
00753    * nabor in the (original) subdomain graph, we need to discard
00754    * the column index and associated value.  First, we'll flag all
00755    * such indices for deletion.
00756    */
00757   for (i = 0; i < m; ++i)
00758     {
00759       for (j = rp[i]; j < rp[i + 1]; ++j)
00760     {
00761       int col = cval[j];
00762       if (col < beg_row || col >= end_row)
00763         {
00764           bool flag = true;
00765           int owner = find_owner_private_mpi (ctx, col);
00766           CHECK_V_ERROR;
00767 
00768           for (k = 0; k < nc; ++k)
00769         {
00770           if (nabors[k] == owner)
00771             {
00772               flag = false;
00773               break;
00774             }
00775         }
00776 
00777           if (flag)
00778         {
00779           cval[j] = -1;
00780           ++count;
00781         }
00782         }
00783     }
00784     }
00785 
00786   sprintf (msgBuf_dh,
00787        "deleting %i indices that would alter the subdomain graph", count);
00788   SET_INFO (msgBuf_dh);
00789 
00790   /* Second, perform the actual deletion */
00791   idx = 0;
00792   start_of_row = 0;
00793   for (i = 0; i < m; ++i)
00794     {
00795       for (j = start_of_row; j < rp[i + 1]; ++j)
00796     {
00797       int col = cval[j];
00798       double val = aval[j];
00799       if (col != -1)
00800         {
00801           cval[idx] = col;
00802           aval[idx] = val;
00803           ++idx;
00804         }
00805     }
00806       start_of_row = rp[i + 1];
00807       rp[i + 1] = idx;
00808     }
00809 
00810   /* rebuild diagonal pointers */
00811   for (i = 0; i < m; ++i)
00812     {
00813       for (j = rp[i]; j < rp[i + 1]; ++j)
00814     {
00815       if (cval[j] == i + beg_row)
00816         {
00817           diag[i] = j;
00818           break;
00819         }
00820     }
00821     }
00822 #endif
00823 END_FUNC_DH}
00824 #endif
00825 
00826 #undef __FUNC__
00827 #define __FUNC__ "Euclid_dhSolve"
00828 void
00829 Euclid_dhSolve (Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
00830 {
00831   START_FUNC_DH int itsOUT;
00832   Mat_dh A = (Mat_dh) ctx->A;
00833 
00834   if (!strcmp (ctx->krylovMethod, "cg"))
00835     {
00836       cg_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00837       ERRCHKA;
00838     }
00839   else if (!strcmp (ctx->krylovMethod, "bicgstab"))
00840     {
00841       bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00842       ERRCHKA;
00843     }
00844   else
00845     {
00846       sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod);
00847       SET_V_ERROR (msgBuf_dh);
00848     }
00849   *its = itsOUT;
00850 END_FUNC_DH}
00851 
00852 #undef __FUNC__
00853 #define __FUNC__ "Euclid_dhPrintStats"
00854 void
00855 Euclid_dhPrintStats (Euclid_dh ctx, FILE * fp)
00856 {
00857   START_FUNC_DH double *timing;
00858   int nz;
00859 
00860   nz = Factor_dhReadNz (ctx->F);
00861   CHECK_V_ERROR;
00862   timing = ctx->timing;
00863 
00864   /* add in timing from lasst setup (if any) */
00865   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00866   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00867 
00868   reduce_timings_private (ctx);
00869   CHECK_V_ERROR;
00870 
00871   fprintf_dh (fp,
00872           "\n==================== Euclid report (start) ====================\n");
00873   fprintf_dh (fp, "\nruntime parameters\n");
00874   fprintf_dh (fp, "------------------\n");
00875   fprintf_dh (fp, "   setups:                 %i\n", ctx->setupCount);
00876   fprintf_dh (fp, "   tri solves:             %i\n", ctx->itsTotal);
00877   fprintf_dh (fp, "   parallelization method: %s\n", ctx->algo_par);
00878   fprintf_dh (fp, "   factorization method:   %s\n", ctx->algo_ilu);
00879   fprintf_dh (fp, "   matrix was row scaled:  %i\n", ctx->isScaled);
00880 
00881   fprintf_dh (fp, "   matrix row count:       %i\n", ctx->n);
00882   fprintf_dh (fp, "   nzF:                    %i\n", nz);
00883   fprintf_dh (fp, "   rho:                    %g\n", ctx->rho_final);
00884   fprintf_dh (fp, "   level:                  %i\n", ctx->level);
00885   fprintf_dh (fp, "   sparseA:                %g\n", ctx->sparseTolA);
00886 
00887   fprintf_dh (fp, "\nEuclid timing report\n");
00888   fprintf_dh (fp, "--------------------\n");
00889   fprintf_dh (fp, "   solves total:  %0.2f (see docs)\n",
00890           timing[TOTAL_SOLVE_T]);
00891   fprintf_dh (fp, "   tri solves:    %0.2f\n", timing[TRI_SOLVE_T]);
00892   fprintf_dh (fp, "   setups:        %0.2f\n", timing[SETUP_T]);
00893   fprintf_dh (fp, "      subdomain graph setup:  %0.2f\n",
00894           timing[SUB_GRAPH_T]);
00895   fprintf_dh (fp, "      factorization:          %0.2f\n", timing[FACTOR_T]);
00896   fprintf_dh (fp, "      solve setup:            %0.2f\n",
00897           timing[SOLVE_SETUP_T]);
00898   fprintf_dh (fp, "      rho:                    %0.2f\n",
00899           ctx->timing[COMPUTE_RHO_T]);
00900   fprintf_dh (fp, "      misc (should be small): %0.2f\n",
00901           timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
00902                  timing[SOLVE_SETUP_T] +
00903                  timing[COMPUTE_RHO_T]));
00904 
00905   if (ctx->sg != NULL)
00906     {
00907       SubdomainGraph_dhPrintStats (ctx->sg, fp);
00908       CHECK_V_ERROR;
00909       SubdomainGraph_dhPrintRatios (ctx->sg, fp);
00910       CHECK_V_ERROR;
00911     }
00912 
00913 
00914   fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n");
00915   fprintf_dh (fp, "---------------------------------------------------\n");
00916   fprintf_dh (fp, "   solve method: %s\n", ctx->krylovMethod);
00917   fprintf_dh (fp, "   maxIts:       %i\n", ctx->maxIts);
00918   fprintf_dh (fp, "   rtol:         %g\n", ctx->rtol);
00919   fprintf_dh (fp, "   atol:         %g\n", ctx->atol);
00920   fprintf_dh (fp,
00921           "\n==================== Euclid report (end) ======================\n");
00922 END_FUNC_DH}
00923 
00924 
00925 /* nzA ratio and rho refer to most recent solve, if more than
00926    one solve (call to Setup) was performed.  Other stats
00927    are cumulative.
00928 */
00929 #undef __FUNC__
00930 #define __FUNC__ "Euclid_dhPrintStatsShort"
00931 void
00932 Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve,
00933               FILE * fp)
00934 {
00935   START_FUNC_DH double *timing = ctx->timing;
00936   /* double *stats = ctx->stats; */
00937   /* double setup_factor; */
00938   /* double setup_other; */
00939   double apply_total;
00940   double apply_per_it;
00941   /* double nzUsedRatio; */
00942   double perIt;
00943   int blocks = np_dh;
00944 
00945   if (np_dh == 1)
00946     blocks = ctx->sg->blocks;
00947 
00948   reduce_timings_private (ctx);
00949   CHECK_V_ERROR;
00950 
00951   /* setup_factor  = timing[FACTOR_T]; */
00952   /* setup_other   = timing[SETUP_T] - setup_factor; */
00953   apply_total = timing[TRI_SOLVE_T];
00954   apply_per_it = apply_total / (double) ctx->its;
00955   /* nzUsedRatio   = stats[NZA_RATIO_STATS]; */
00956   perIt = solve / (double) ctx->its;
00957 
00958   fprintf_dh (fp, "\n");
00959   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00960           "method", "subdms", "level", "its", "setup", "solve", "total",
00961           "perIt", "perIt", "rows");
00962   fprintf_dh (fp,
00963           "------  -----  -----  -----  -----  -----  -----  -----  -----  -----  XX\n");
00964   fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g  XXX\n", ctx->algo_par,    /* parallelization strategy [pilu, bj] */
00965           blocks,       /* number of subdomains */
00966           ctx->level,   /* level, for ILU(k) */
00967           ctx->its,     /* iterations */
00968           setup,        /* total setup time, from caller */
00969           solve,        /* total setup time, from caller */
00970           setup + solve,    /* total time, from caller */
00971           perIt,        /* time per iteration, solver+precond. */
00972           apply_per_it, /* time per iteration, solver+precond. */
00973           (double) ctx->n   /* global unknnowns */
00974     );
00975 
00976 
00977 
00978 
00979 #if 0
00980   fprintf_dh (fp, "\n");
00981   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00982           "", "", "", "", "", "setup", "setup", "", "", "", "", "", "");
00983 
00984   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00985           "method", "subdms", "level", "its", "total", "factor",
00986           "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows");
00987   fprintf_dh (fp,
00988           "------  -----  -----  -----  -----  -----  -----  -----  -----  -----  -----  -----  ----- XX\n");
00989 
00990 
00991   fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g  XXX\n", ctx->algo_par,    /* parallelization strategy [pilu, bj] */
00992           blocks,       /* number of subdomains */
00993           ctx->level,   /* level, for ILU(k) */
00994           ctx->its,     /* iterations */
00995           setup,        /* total setup time, from caller */
00996           solve,        /* total setup time, from caller */
00997           setup_factor, /* pc solve: factorization */
00998           setup_other,  /* pc setup: other */
00999           apply_total,  /* triangular solve time */
01000           apply_per_it, /* time for one triangular solve */
01001           ctx->rho_final,   /* rho */
01002           ctx->sparseTolA,  /* sparseA tolerance */
01003           nzUsedRatio,  /* percent of A that was used */
01004           (double) ctx->n   /* global unknnowns */
01005     );
01006 #endif
01007 
01008 #if 0
01009   /* special: for scalability studies */
01010   fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level",
01011           "subGph", "factor", "solveS", "perIt");
01012   fprintf_dh (fp, "------  -----  -----  -----  -----  -----  WW\n");
01013   fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f  WWW\n",
01014           ctx->algo_par,
01015           ctx->level,
01016           timing[SUB_GRAPH_T],
01017           timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it);
01018 #endif
01019 END_FUNC_DH}
01020 
01021 
01022 /* its during last solve; rho; nzaUsed */
01023 #undef __FUNC__
01024 #define __FUNC__ "Euclid_dhPrintStatsShorter"
01025 void
01026 Euclid_dhPrintStatsShorter (Euclid_dh ctx, FILE * fp)
01027 {
01028   START_FUNC_DH double *stats = ctx->stats;
01029 
01030   int its = ctx->its;
01031   double rho = ctx->rho_final;
01032   double nzUsedRatio = stats[NZA_RATIO_STATS];
01033 
01034 
01035   fprintf_dh (fp, "\nStats from last linear solve: YY\n");
01036   fprintf_dh (fp, "%6s %6s %6s     YY\n", "its", "rho", "A_%");
01037   fprintf_dh (fp, " -----  -----  -----     YY\n");
01038   fprintf_dh (fp, "%6i %6.2f %6.2f     YYY\n", its, rho, nzUsedRatio);
01039 
01040 END_FUNC_DH}
01041 
01042 #undef __FUNC__
01043 #define __FUNC__ "Euclid_dhPrintScaling"
01044 void
01045 Euclid_dhPrintScaling (Euclid_dh ctx, FILE * fp)
01046 {
01047   START_FUNC_DH int i, m = ctx->m;
01048 
01049   if (m > 10)
01050     m = 10;
01051 
01052   if (ctx->scale == NULL)
01053     {
01054       SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?");
01055     }
01056 
01057   fprintf (fp, "\n---------- 1st %i row scaling values:\n", m);
01058   for (i = 0; i < m; ++i)
01059     {
01060       fprintf (fp, "   %i  %g  \n", i + 1, ctx->scale[i]);
01061     }
01062 END_FUNC_DH}
01063 
01064 
01065 #undef __FUNC__
01066 #define __FUNC__ "reduce_timings_private"
01067 void
01068 reduce_timings_private (Euclid_dh ctx)
01069 {
01070   START_FUNC_DH if (np_dh > 1)
01071     {
01072       double bufOUT[TIMING_BINS];
01073 
01074       memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double));
01075       MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
01076           comm_dh);
01077     }
01078 
01079   ctx->timingsWereReduced = true;
01080 END_FUNC_DH}
01081 
01082 #undef __FUNC__
01083 #define __FUNC__ "Euclid_dhPrintHypreReport"
01084 void
01085 Euclid_dhPrintHypreReport (Euclid_dh ctx, FILE * fp)
01086 {
01087   START_FUNC_DH double *timing;
01088   int nz;
01089 
01090   nz = Factor_dhReadNz (ctx->F);
01091   CHECK_V_ERROR;
01092   timing = ctx->timing;
01093 
01094   /* add in timing from lasst setup (if any) */
01095   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
01096   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
01097 
01098   reduce_timings_private (ctx);
01099   CHECK_V_ERROR;
01100 
01101   if (myid_dh == 0)
01102     {
01103 
01104       fprintf (fp,
01105            "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
01106       fprintf_dh (fp, "\nruntime parameters\n");
01107       fprintf_dh (fp, "------------------\n");
01108       fprintf_dh (fp, "   setups:                 %i\n", ctx->setupCount);
01109       fprintf_dh (fp, "   tri solves:             %i\n", ctx->itsTotal);
01110       fprintf_dh (fp, "   parallelization method: %s\n", ctx->algo_par);
01111       fprintf_dh (fp, "   factorization method:   %s\n", ctx->algo_ilu);
01112       if (!strcmp (ctx->algo_ilu, "iluk"))
01113     {
01114       fprintf_dh (fp, "      level:               %i\n", ctx->level);
01115     }
01116 
01117       if (ctx->isScaled)
01118     {
01119       fprintf_dh (fp, "   matrix was row scaled\n");
01120     }
01121 
01122       fprintf_dh (fp, "   global matrix row count: %i\n", ctx->n);
01123       fprintf_dh (fp, "   nzF:                     %i\n", nz);
01124       fprintf_dh (fp, "   rho:                     %g\n", ctx->rho_final);
01125       fprintf_dh (fp, "   sparseA:                 %g\n", ctx->sparseTolA);
01126 
01127       fprintf_dh (fp, "\nEuclid timing report\n");
01128       fprintf_dh (fp, "--------------------\n");
01129       fprintf_dh (fp, "   solves total:  %0.2f (see docs)\n",
01130           timing[TOTAL_SOLVE_T]);
01131       fprintf_dh (fp, "   tri solves:    %0.2f\n", timing[TRI_SOLVE_T]);
01132       fprintf_dh (fp, "   setups:        %0.2f\n", timing[SETUP_T]);
01133       fprintf_dh (fp, "      subdomain graph setup:  %0.2f\n",
01134           timing[SUB_GRAPH_T]);
01135       fprintf_dh (fp, "      factorization:          %0.2f\n",
01136           timing[FACTOR_T]);
01137       fprintf_dh (fp, "      solve setup:            %0.2f\n",
01138           timing[SOLVE_SETUP_T]);
01139       fprintf_dh (fp, "      rho:                    %0.2f\n",
01140           ctx->timing[COMPUTE_RHO_T]);
01141       fprintf_dh (fp, "      misc (should be small): %0.2f\n",
01142           timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
01143                      timing[SOLVE_SETUP_T] +
01144                      timing[COMPUTE_RHO_T]));
01145 
01146       if (ctx->sg != NULL)
01147     {
01148       SubdomainGraph_dhPrintStats (ctx->sg, fp);
01149       CHECK_V_ERROR;
01150       SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01151       CHECK_V_ERROR;
01152     }
01153 
01154       fprintf (fp,
01155            "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
01156 
01157     }
01158 
01159 END_FUNC_DH}
01160 
01161 #undef __FUNC__
01162 #define __FUNC__ "Euclid_dhPrintTestData"
01163 void
01164 Euclid_dhPrintTestData (Euclid_dh ctx, FILE * fp)
01165 {
01166   START_FUNC_DH
01167     /* Print data that should remain that will hopefully 
01168        remain the same for any platform.
01169        Possibly "tri solves" may change . . .
01170      */
01171     if (myid_dh == 0)
01172     {
01173       fprintf (fp, "   setups:                 %i\n", ctx->setupCount);
01174       fprintf (fp, "   tri solves:             %i\n", ctx->its);
01175       fprintf (fp, "   parallelization method: %s\n", ctx->algo_par);
01176       fprintf (fp, "   factorization method:   %s\n", ctx->algo_ilu);
01177       fprintf (fp, "   level:                  %i\n", ctx->level);
01178       fprintf (fp, "   row scaling:            %i\n", ctx->isScaled);
01179     }
01180   SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01181   CHECK_V_ERROR;
01182 END_FUNC_DH}

Generated on Tue Jul 13 09:27:12 2010 for IFPACK by  doxygen 1.4.7