Euclid_dh.c

Go to the documentation of this file.
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Euclid_dh.h"
00031 #include "Mem_dh.h"
00032 #include "Mat_dh.h"
00033 #include "Vec_dh.h"
00034 #include "Factor_dh.h"
00035 #include "getRow_dh.h"
00036 #include "ilu_dh.h"
00037 #include "Parser_dh.h"
00038 #include "SortedList_dh.h"
00039 #include "SubdomainGraph_dh.h"
00040 #include "ExternalRows_dh.h"
00041 #include "krylov_dh.h"
00042 
00043 static void get_runtime_params_private (Euclid_dh ctx);
00044 static void invert_diagonals_private (Euclid_dh ctx);
00045 static void compute_rho_private (Euclid_dh ctx);
00046 static void factor_private (Euclid_dh ctx);
00047 /* static void discard_indices_private(Euclid_dh ctx); */
00048 static void reduce_timings_private (Euclid_dh ctx);
00049 
00050 #undef __FUNC__
00051 #define __FUNC__ "Euclid_dhCreate"
00052 void
00053 Euclid_dhCreate (Euclid_dh * ctxOUT)
00054 {
00055   START_FUNC_DH
00056     struct _mpi_interface_dh *ctx =
00057     (struct _mpi_interface_dh *)
00058     MALLOC_DH (sizeof (struct _mpi_interface_dh));
00059   CHECK_V_ERROR;
00060   *ctxOUT = ctx;
00061 
00062   ctx->isSetup = false;
00063 
00064   ctx->rho_init = 2.0;
00065   ctx->rho_final = 0.0;
00066 
00067   ctx->m = 0;
00068   ctx->n = 0;
00069   ctx->rhs = NULL;
00070   ctx->A = NULL;
00071   ctx->F = NULL;
00072   ctx->sg = NULL;
00073 
00074   ctx->scale = NULL;
00075   ctx->isScaled = false;
00076   ctx->work = NULL;
00077   ctx->work2 = NULL;
00078   ctx->from = 0;
00079   ctx->to = 0;
00080 
00081   strcpy (ctx->algo_par, "pilu");
00082   strcpy (ctx->algo_ilu, "iluk");
00083   ctx->level = 1;
00084   ctx->droptol = DEFAULT_DROP_TOL;
00085   ctx->sparseTolA = 0.0;
00086   ctx->sparseTolF = 0.0;
00087   ctx->pivotMin = 0.0;
00088   ctx->pivotFix = PIVOT_FIX_DEFAULT;
00089   ctx->maxVal = 0.0;
00090 
00091   ctx->slist = NULL;
00092   ctx->extRows = NULL;
00093 
00094   strcpy (ctx->krylovMethod, "bicgstab");
00095   ctx->maxIts = 200;
00096   ctx->rtol = 1e-5;
00097   ctx->atol = 1e-50;
00098   ctx->its = 0;
00099   ctx->itsTotal = 0;
00100   ctx->setupCount = 0;
00101   ctx->logging = 0;
00102   ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats"));
00103 
00104   {
00105     int i;
00106     for (i = 0; i < TIMING_BINS; ++i)
00107       ctx->timing[i] = 0.0;
00108     for (i = 0; i < STATS_BINS; ++i)
00109       ctx->stats[i] = 0.0;
00110   }
00111   ctx->timingsWereReduced = false;
00112 
00113   ++ref_counter;
00114 END_FUNC_DH}
00115 
00116 
00117 #undef __FUNC__
00118 #define __FUNC__ "Euclid_dhDestroy"
00119 void
00120 Euclid_dhDestroy (Euclid_dh ctx)
00121 {
00122   START_FUNC_DH
00123     if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging)
00124     {
00125       /* insert switch so memory report will also be printed */
00126       Parser_dhInsert (parser_dh, "-eu_mem", "1");
00127       CHECK_V_ERROR;
00128       Euclid_dhPrintHypreReport (ctx, stdout);
00129       CHECK_V_ERROR;
00130     }
00131 
00132   if (ctx->setupCount > 1 && ctx->printStats)
00133     {
00134       Euclid_dhPrintStatsShorter (ctx, stdout);
00135       CHECK_V_ERROR;
00136     }
00137 
00138   if (ctx->F != NULL)
00139     {
00140       Factor_dhDestroy (ctx->F);
00141       CHECK_V_ERROR;
00142     }
00143   if (ctx->sg != NULL)
00144     {
00145       SubdomainGraph_dhDestroy (ctx->sg);
00146       CHECK_V_ERROR;
00147     }
00148   if (ctx->scale != NULL)
00149     {
00150       FREE_DH (ctx->scale);
00151       CHECK_V_ERROR;
00152     }
00153   if (ctx->work != NULL)
00154     {
00155       FREE_DH (ctx->work);
00156       CHECK_V_ERROR;
00157     }
00158   if (ctx->work2 != NULL)
00159     {
00160       FREE_DH (ctx->work2);
00161       CHECK_V_ERROR;
00162     }
00163   if (ctx->slist != NULL)
00164     {
00165       SortedList_dhDestroy (ctx->slist);
00166       CHECK_V_ERROR;
00167     }
00168   if (ctx->extRows != NULL)
00169     {
00170       ExternalRows_dhDestroy (ctx->extRows);
00171       CHECK_V_ERROR;
00172     }
00173   FREE_DH (ctx);
00174   CHECK_V_ERROR;
00175 
00176   --ref_counter;
00177 END_FUNC_DH}
00178 
00179 
00180 /* on entry, "A" must have been set.  If this context is being
00181    reused, user must ensure ???
00182 */
00183 #undef __FUNC__
00184 #define __FUNC__ "Euclid_dhSetup"
00185 void
00186 Euclid_dhSetup (Euclid_dh ctx)
00187 {
00188   START_FUNC_DH int m, n, beg_row;
00189   double t1;
00190   bool isSetup = ctx->isSetup;
00191   bool bj = false;
00192 
00193   /*----------------------------------------------------
00194    * If Euclid was previously setup, print summary of
00195    * what happened during previous setup/solve
00196    *----------------------------------------------------*/
00197   if (ctx->setupCount && ctx->printStats)
00198     {
00199       Euclid_dhPrintStatsShorter (ctx, stdout);
00200       CHECK_V_ERROR;
00201       ctx->its = 0;
00202     }
00203 
00204   /*----------------------------------------------------
00205    * zero array for statistical reporting
00206    *----------------------------------------------------*/
00207   {
00208     int i;
00209     for (i = 0; i < STATS_BINS; ++i)
00210       ctx->stats[i] = 0.0;
00211   }
00212 
00213   /*----------------------------------------------------
00214    * internal timing
00215    *----------------------------------------------------*/
00216   ctx->timing[SOLVE_START_T] = MPI_Wtime ();
00217   /* sum timing from last linear solve cycle, if any */
00218   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00219   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00220 
00221   if (ctx->F != NULL)
00222     {
00223       Factor_dhDestroy (ctx->F);
00224       CHECK_V_ERROR;
00225       ctx->F = NULL;
00226     }
00227 
00228   if (ctx->A == NULL)
00229     {
00230       SET_V_ERROR ("must set ctx->A before calling init");
00231     }
00232   EuclidGetDimensions (ctx->A, &beg_row, &m, &n);
00233   CHECK_V_ERROR;
00234   ctx->m = m;
00235   ctx->n = n;
00236 
00237   if (Parser_dhHasSwitch (parser_dh, "-print_size"))
00238     {
00239       printf_dh
00240   ("setting up linear system; global rows: %i  local rows: %i (on P_0)\n",
00241    n, m);
00242     }
00243 
00244   sprintf (msgBuf_dh, "localRow= %i;  globalRows= %i;  beg_row= %i", m, n,
00245      beg_row);
00246   SET_INFO (msgBuf_dh);
00247 
00248   bj = Parser_dhHasSwitch (parser_dh, "-bj");
00249 
00250   /*------------------------------------------------------------------------
00251    * Setup the SubdomainGraph, which contains connectivity and
00252    * and permutation information.  If this context is being reused,
00253    * this may already have been done; if being resused, the underlying
00254    * subdomain graph cannot change (user's responsibility?)
00255    *------------------------------------------------------------------------*/
00256   if (ctx->sg == NULL)
00257     {
00258       int blocks = np_dh;
00259       t1 = MPI_Wtime ();
00260       if (np_dh == 1)
00261   {
00262     Parser_dhReadInt (parser_dh, "-blocks", &blocks);
00263     CHECK_V_ERROR;
00264     SubdomainGraph_dhCreate (&(ctx->sg));
00265     CHECK_V_ERROR;
00266     SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A);
00267     CHECK_V_ERROR;
00268   }
00269       else
00270   {
00271     SubdomainGraph_dhCreate (&(ctx->sg));
00272     CHECK_V_ERROR;
00273     SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A);
00274     CHECK_V_ERROR;
00275   }
00276       ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1);
00277     }
00278 
00279 
00280 /* SubdomainGraph_dhDump(ctx->sg, "SG.dump"); CHECK_V_ERROR; */
00281 
00282   /*----------------------------------------------------
00283    * for debugging
00284    *----------------------------------------------------*/
00285   if (Parser_dhHasSwitch (parser_dh, "-doNotFactor"))
00286     {
00287       goto END_OF_FUNCTION;
00288     }
00289 
00290 
00291   /*----------------------------------------------------
00292    * query parser for runtime parameters
00293    *----------------------------------------------------*/
00294   if (!isSetup)
00295     {
00296       get_runtime_params_private (ctx);
00297       CHECK_V_ERROR;
00298     }
00299   if (!strcmp (ctx->algo_par, "bj"))
00300     bj = false;
00301 
00302   /*--------------------------------------------------------- 
00303    * allocate and initialize storage for row-scaling
00304    * (ctx->isScaled is set in get_runtime_params_private(); )
00305    *---------------------------------------------------------*/
00306   if (ctx->scale == NULL)
00307     {
00308       ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00309       CHECK_V_ERROR;
00310     }
00311   {
00312     int i;
00313     for (i = 0; i < m; ++i)
00314       ctx->scale[i] = 1.0;
00315   }
00316 
00317   /*------------------------------------------------------------------ 
00318    * allocate work vectors; used in factorization and triangular solves;
00319    *------------------------------------------------------------------*/
00320   if (ctx->work == NULL)
00321     {
00322       ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00323       CHECK_V_ERROR;
00324     }
00325   if (ctx->work2 == NULL)
00326     {
00327       ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00328       CHECK_V_ERROR;
00329     }
00330 
00331   /*-----------------------------------------------------------------
00332    * perform the incomplete factorization (this should be, at least
00333    * for higher level ILUK, the most time-intensive portion of setup)
00334    *-----------------------------------------------------------------*/
00335   t1 = MPI_Wtime ();
00336   factor_private (ctx);
00337   CHECK_V_ERROR;
00338   ctx->timing[FACTOR_T] += (MPI_Wtime () - t1);
00339 
00340   /*-------------------------------------------------------------- 
00341    * invert diagonals, for faster triangular solves
00342    *--------------------------------------------------------------*/
00343   if (strcmp (ctx->algo_par, "none"))
00344     {
00345       invert_diagonals_private (ctx);
00346       CHECK_V_ERROR;
00347     }
00348 
00349   /*-------------------------------------------------------------- 
00350    * compute rho_final: global ratio of nzF/nzA
00351    * also, if -sparseA > 0,  compute ratio of nzA 
00352    * used in factorization
00353    *--------------------------------------------------------------*/
00354   /* for some reason compute_rho_private() was expensive, so now it's
00355      an option, unless there's only one mpi task.
00356    */
00357   if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1)
00358     {
00359       if (strcmp (ctx->algo_par, "none"))
00360   {
00361     t1 = MPI_Wtime ();
00362     compute_rho_private (ctx);
00363     CHECK_V_ERROR;
00364     ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1);
00365   }
00366     }
00367 
00368   /*-------------------------------------------------------------- 
00369    * if using PILU, set up persistent comms and global-to-local
00370    * number scheme, for efficient triangular solves.
00371    * (Thanks to Edmond Chow for these algorithmic ideas.)
00372    *--------------------------------------------------------------*/
00373 
00374   if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1)
00375     {
00376       t1 = MPI_Wtime ();
00377       Factor_dhSolveSetup (ctx->F, ctx->sg);
00378       CHECK_V_ERROR;
00379       ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1);
00380     }
00381 
00382 END_OF_FUNCTION:;
00383 
00384   /*-------------------------------------------------------
00385    * internal timing
00386    *-------------------------------------------------------*/
00387   ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]);
00388   ctx->setupCount += 1;
00389 
00390   ctx->isSetup = true;
00391 
00392 END_FUNC_DH}
00393 
00394 
00395 #undef __FUNC__
00396 #define __FUNC__ "get_runtime_params_private"
00397 void
00398 get_runtime_params_private (Euclid_dh ctx)
00399 {
00400   START_FUNC_DH char *tmp;
00401 
00402   /* params for use of internal solvers */
00403   Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts));
00404   Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol));
00405   Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol));
00406 
00407   /* parallelization strategy (bj, pilu, none) */
00408   tmp = NULL;
00409   Parser_dhReadString (parser_dh, "-par", &tmp);
00410   if (tmp != NULL)
00411     {
00412       strcpy (ctx->algo_par, tmp);
00413     }
00414   if (Parser_dhHasSwitch (parser_dh, "-bj"))
00415     {
00416       strcpy (ctx->algo_par, "bj");
00417     }
00418 
00419 
00420   /* factorization parameters */
00421   Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init));
00422   /* inital storage allocation for factor */
00423   Parser_dhReadInt (parser_dh, "-level", &ctx->level);
00424   Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level);
00425 
00426   if (Parser_dhHasSwitch (parser_dh, "-ilut"))
00427     {
00428       Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol);
00429       ctx->isScaled = true;
00430       strcpy (ctx->algo_ilu, "ilut");
00431     }
00432 
00433   /* make sure both algo_par and algo_ilu are set to "none,"
00434      if at least one is.
00435    */
00436   if (!strcmp (ctx->algo_par, "none"))
00437     {
00438       strcpy (ctx->algo_ilu, "none");
00439     }
00440   else if (!strcmp (ctx->algo_ilu, "none"))
00441     {
00442       strcpy (ctx->algo_par, "none");
00443     }
00444 
00445 
00446   Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA));
00447   /* sparsify A before factoring */
00448   Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF));
00449   /* sparsify after factoring */
00450   Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin));
00451   /* adjust pivots if smaller than this */
00452   Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix));
00453   /* how to adjust pivots */
00454 
00455   /* set row scaling for mandatory cases */
00456   if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut"))
00457     {
00458       ctx->isScaled = true;
00459     }
00460 
00461   /* solve method */
00462   tmp = NULL;
00463   Parser_dhReadString (parser_dh, "-ksp_type", &tmp);
00464   if (tmp != NULL)
00465     {
00466       strcpy (ctx->krylovMethod, tmp);
00467 
00468       if (!strcmp (ctx->krylovMethod, "bcgs"))
00469   {
00470     strcpy (ctx->krylovMethod, "bicgstab");
00471   }
00472     }
00473 END_FUNC_DH}
00474 
00475 #undef __FUNC__
00476 #define __FUNC__ "invert_diagonals_private"
00477 void
00478 invert_diagonals_private (Euclid_dh ctx)
00479 {
00480   START_FUNC_DH REAL_DH * aval = ctx->F->aval;
00481   int *diag = ctx->F->diag;
00482   if (aval == NULL || diag == NULL)
00483     {
00484       SET_INFO ("can't invert diags; either F->aval or F->diag is NULL");
00485     }
00486   else
00487     {
00488       int i, m = ctx->F->m;
00489       for (i = 0; i < m; ++i)
00490   {
00491     aval[diag[i]] = 1.0 / aval[diag[i]];
00492   }
00493     }
00494 END_FUNC_DH}
00495 
00496 
00497 /* records rho_final (max for all processors) */
00498 #undef __FUNC__
00499 #define __FUNC__ "compute_rho_private"
00500 void
00501 compute_rho_private (Euclid_dh ctx)
00502 {
00503   START_FUNC_DH if (ctx->F != NULL)
00504     {
00505       double bufLocal[3], bufGlobal[3];
00506       int m = ctx->m;
00507 
00508       ctx->stats[NZF_STATS] = (double) ctx->F->rp[m];
00509       bufLocal[0] = ctx->stats[NZA_STATS];  /* nzA */
00510       bufLocal[1] = ctx->stats[NZF_STATS];  /* nzF */
00511       bufLocal[2] = ctx->stats[NZA_USED_STATS]; /* nzA used */
00512 
00513       if (np_dh == 1)
00514   {
00515     bufGlobal[0] = bufLocal[0];
00516     bufGlobal[1] = bufLocal[1];
00517     bufGlobal[2] = bufLocal[2];
00518   }
00519       else
00520   {
00521     MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
00522           comm_dh);
00523   }
00524 
00525       if (myid_dh == 0)
00526   {
00527 
00528     /* compute rho */
00529     if (bufGlobal[0] && bufGlobal[1])
00530       {
00531         ctx->rho_final = bufGlobal[1] / bufGlobal[0];
00532       }
00533     else
00534       {
00535         ctx->rho_final = -1;
00536       }
00537 
00538     /* compute ratio of nonzeros in A that were used */
00539     if (bufGlobal[0] && bufGlobal[2])
00540       {
00541         ctx->stats[NZA_RATIO_STATS] =
00542     100.0 * bufGlobal[2] / bufGlobal[0];
00543       }
00544     else
00545       {
00546         ctx->stats[NZA_RATIO_STATS] = 100.0;
00547       }
00548   }
00549     }
00550 END_FUNC_DH}
00551 
00552 #undef __FUNC__
00553 #define __FUNC__ "factor_private"
00554 void
00555 factor_private (Euclid_dh ctx)
00556 {
00557   START_FUNC_DH
00558   /*-------------------------------------------------------------
00559    * special case, for testing/debugging: no preconditioning
00560    *-------------------------------------------------------------*/
00561     if (!strcmp (ctx->algo_par, "none"))
00562     {
00563       goto DO_NOTHING;
00564     }
00565 
00566   /*-------------------------------------------------------------
00567    * Initialize object to hold factor.
00568    *-------------------------------------------------------------*/
00569   {
00570     int br = 0;
00571     int id = np_dh;
00572     if (ctx->sg != NULL)
00573       {
00574   br = ctx->sg->beg_rowP[myid_dh];
00575   id = ctx->sg->o2n_sub[myid_dh];
00576       }
00577     Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F));
00578     CHECK_V_ERROR;
00579     ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh];
00580     ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
00581     if (!strcmp (ctx->algo_par, "bj"))
00582       ctx->F->blockJacobi = true;
00583     if (Parser_dhHasSwitch (parser_dh, "-bj"))
00584       ctx->F->blockJacobi = true;
00585   }
00586 
00587   /*-------------------------------------------------------------
00588    * single mpi task with single or multiple subdomains
00589    *-------------------------------------------------------------*/
00590   if (np_dh == 1)
00591     {
00592 
00593       /* ILU(k) factorization */
00594       if (!strcmp (ctx->algo_ilu, "iluk"))
00595   {
00596     ctx->from = 0;
00597     ctx->to = ctx->m;
00598 
00599     /* only for debugging: use ilu_mpi_pilu */
00600     if (Parser_dhHasSwitch (parser_dh, "-mpi"))
00601       {
00602         if (ctx->sg != NULL && ctx->sg->blocks > 1)
00603     {
00604       SET_V_ERROR
00605         ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
00606     }
00607         iluk_mpi_pilu (ctx);
00608         CHECK_V_ERROR;
00609       }
00610 
00611     /* "normal" operation */
00612     else
00613       {
00614         iluk_seq_block (ctx);
00615         CHECK_V_ERROR;
00616         /* note: iluk_seq_block() performs block jacobi iluk if ctx->algo_par == bj.  */
00617       }
00618   }
00619 
00620       /* ILUT factorization */
00621       else if (!strcmp (ctx->algo_ilu, "ilut"))
00622   {
00623     ctx->from = 0;
00624     ctx->to = ctx->m;
00625     ilut_seq (ctx);
00626     CHECK_V_ERROR;
00627   }
00628 
00629       /* all other factorization methods */
00630       else
00631   {
00632     sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00633        ctx->algo_ilu);
00634     SET_V_ERROR (msgBuf_dh);
00635   }
00636     }
00637 
00638   /*-------------------------------------------------------------
00639    * multiple mpi tasks with multiple subdomains
00640    *-------------------------------------------------------------*/
00641   else
00642     {
00643       /* block jacobi */
00644       if (!strcmp (ctx->algo_par, "bj"))
00645   {
00646     ctx->from = 0;
00647     ctx->to = ctx->m;
00648     iluk_mpi_bj (ctx);
00649     CHECK_V_ERROR;
00650   }
00651 
00652       /* iluk */
00653       else if (!strcmp (ctx->algo_ilu, "iluk"))
00654   {
00655     bool bj = ctx->F->blockJacobi;  /* for debugging */
00656 
00657     /* printf_dh("\n@@@ starting ilu_mpi_pilu @@@\n"); */
00658 
00659     SortedList_dhCreate (&(ctx->slist));
00660     CHECK_V_ERROR;
00661     SortedList_dhInit (ctx->slist, ctx->sg);
00662     CHECK_V_ERROR;
00663     ExternalRows_dhCreate (&(ctx->extRows));
00664     CHECK_V_ERROR;
00665     ExternalRows_dhInit (ctx->extRows, ctx);
00666     CHECK_V_ERROR;
00667 
00668     /* factor interior rows */
00669     ctx->from = 0;
00670     ctx->to = ctx->F->first_bdry;
00671 
00672 /*
00673 if (Parser_dhHasSwitch(parser_dh, "-test")) {
00674        printf("[%i] Euclid_dh :: TESTING ilu_seq\n", myid_dh);
00675        iluk_seq(ctx); CHECK_V_ERROR; 
00676 } else {
00677        iluk_mpi_pilu(ctx); CHECK_V_ERROR; 
00678 }
00679 */
00680 
00681     iluk_seq (ctx);
00682     CHECK_V_ERROR;
00683 
00684     /* get external rows from lower ordered neighbors in the
00685        subdomain graph; these rows are needed for factoring
00686        this subdomain's boundary rows.
00687      */
00688     if (!bj)
00689       {
00690         ExternalRows_dhRecvRows (ctx->extRows);
00691         CHECK_V_ERROR;
00692       }
00693 
00694     /* factor boundary rows */
00695     ctx->from = ctx->F->first_bdry;
00696     ctx->to = ctx->F->m;
00697     iluk_mpi_pilu (ctx);
00698     CHECK_V_ERROR;
00699 
00700     /* send this processor's boundary rows to higher ordered
00701        neighbors in the subdomain graph.
00702      */
00703     if (!bj)
00704       {
00705         ExternalRows_dhSendRows (ctx->extRows);
00706         CHECK_V_ERROR;
00707       }
00708 
00709     /* discard column indices in factor if they would alter
00710        the subdomain graph (any such elements are in upper
00711        triangular portion of the row)
00712      */
00713 
00714 
00715     SortedList_dhDestroy (ctx->slist);
00716     CHECK_V_ERROR;
00717     ctx->slist = NULL;
00718     ExternalRows_dhDestroy (ctx->extRows);
00719     CHECK_V_ERROR;
00720     ctx->extRows = NULL;
00721   }
00722 
00723       /* all other factorization methods */
00724       else
00725   {
00726     sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00727        ctx->algo_ilu);
00728     SET_V_ERROR (msgBuf_dh);
00729   }
00730     }
00731 
00732 DO_NOTHING:;
00733 
00734 END_FUNC_DH}
00735 
00736 #if 0
00737 
00738 #undef __FUNC__
00739 #define __FUNC__ "discard_indices_private"
00740 void
00741 discard_indices_private (Euclid_dh ctx)
00742 {
00743   START_FUNC_DH
00744 #if 0
00745   int *rp = ctx->F->rp, *cval = ctx->F->cval;
00746   double *aval = ctx->F->aval;
00747   int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount;
00748   int i, j, k, idx, count = 0, start_of_row;
00749   int beg_row = ctx->beg_row, end_row = beg_row + m;
00750   int *diag = ctx->F->diag;
00751 
00752   /* if col is not locally owned, and doesn't belong to a
00753    * nabor in the (original) subdomain graph, we need to discard
00754    * the column index and associated value.  First, we'll flag all
00755    * such indices for deletion.
00756    */
00757   for (i = 0; i < m; ++i)
00758     {
00759       for (j = rp[i]; j < rp[i + 1]; ++j)
00760   {
00761     int col = cval[j];
00762     if (col < beg_row || col >= end_row)
00763       {
00764         bool flag = true;
00765         int owner = find_owner_private_mpi (ctx, col);
00766         CHECK_V_ERROR;
00767 
00768         for (k = 0; k < nc; ++k)
00769     {
00770       if (nabors[k] == owner)
00771         {
00772           flag = false;
00773           break;
00774         }
00775     }
00776 
00777         if (flag)
00778     {
00779       cval[j] = -1;
00780       ++count;
00781     }
00782       }
00783   }
00784     }
00785 
00786   sprintf (msgBuf_dh,
00787      "deleting %i indices that would alter the subdomain graph", count);
00788   SET_INFO (msgBuf_dh);
00789 
00790   /* Second, perform the actual deletion */
00791   idx = 0;
00792   start_of_row = 0;
00793   for (i = 0; i < m; ++i)
00794     {
00795       for (j = start_of_row; j < rp[i + 1]; ++j)
00796   {
00797     int col = cval[j];
00798     double val = aval[j];
00799     if (col != -1)
00800       {
00801         cval[idx] = col;
00802         aval[idx] = val;
00803         ++idx;
00804       }
00805   }
00806       start_of_row = rp[i + 1];
00807       rp[i + 1] = idx;
00808     }
00809 
00810   /* rebuild diagonal pointers */
00811   for (i = 0; i < m; ++i)
00812     {
00813       for (j = rp[i]; j < rp[i + 1]; ++j)
00814   {
00815     if (cval[j] == i + beg_row)
00816       {
00817         diag[i] = j;
00818         break;
00819       }
00820   }
00821     }
00822 #endif
00823 END_FUNC_DH}
00824 #endif
00825 
00826 #undef __FUNC__
00827 #define __FUNC__ "Euclid_dhSolve"
00828 void
00829 Euclid_dhSolve (Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
00830 {
00831   START_FUNC_DH int itsOUT;
00832   Mat_dh A = (Mat_dh) ctx->A;
00833 
00834   if (!strcmp (ctx->krylovMethod, "cg"))
00835     {
00836       cg_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00837       ERRCHKA;
00838     }
00839   else if (!strcmp (ctx->krylovMethod, "bicgstab"))
00840     {
00841       bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00842       ERRCHKA;
00843     }
00844   else
00845     {
00846       sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod);
00847       SET_V_ERROR (msgBuf_dh);
00848     }
00849   *its = itsOUT;
00850 END_FUNC_DH}
00851 
00852 #undef __FUNC__
00853 #define __FUNC__ "Euclid_dhPrintStats"
00854 void
00855 Euclid_dhPrintStats (Euclid_dh ctx, FILE * fp)
00856 {
00857   START_FUNC_DH double *timing;
00858   int nz;
00859 
00860   nz = Factor_dhReadNz (ctx->F);
00861   CHECK_V_ERROR;
00862   timing = ctx->timing;
00863 
00864   /* add in timing from lasst setup (if any) */
00865   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00866   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00867 
00868   reduce_timings_private (ctx);
00869   CHECK_V_ERROR;
00870 
00871   fprintf_dh (fp,
00872         "\n==================== Euclid report (start) ====================\n");
00873   fprintf_dh (fp, "\nruntime parameters\n");
00874   fprintf_dh (fp, "------------------\n");
00875   fprintf_dh (fp, "   setups:                 %i\n", ctx->setupCount);
00876   fprintf_dh (fp, "   tri solves:             %i\n", ctx->itsTotal);
00877   fprintf_dh (fp, "   parallelization method: %s\n", ctx->algo_par);
00878   fprintf_dh (fp, "   factorization method:   %s\n", ctx->algo_ilu);
00879   fprintf_dh (fp, "   matrix was row scaled:  %i\n", ctx->isScaled);
00880 
00881   fprintf_dh (fp, "   matrix row count:       %i\n", ctx->n);
00882   fprintf_dh (fp, "   nzF:                    %i\n", nz);
00883   fprintf_dh (fp, "   rho:                    %g\n", ctx->rho_final);
00884   fprintf_dh (fp, "   level:                  %i\n", ctx->level);
00885   fprintf_dh (fp, "   sparseA:                %g\n", ctx->sparseTolA);
00886 
00887   fprintf_dh (fp, "\nEuclid timing report\n");
00888   fprintf_dh (fp, "--------------------\n");
00889   fprintf_dh (fp, "   solves total:  %0.2f (see docs)\n",
00890         timing[TOTAL_SOLVE_T]);
00891   fprintf_dh (fp, "   tri solves:    %0.2f\n", timing[TRI_SOLVE_T]);
00892   fprintf_dh (fp, "   setups:        %0.2f\n", timing[SETUP_T]);
00893   fprintf_dh (fp, "      subdomain graph setup:  %0.2f\n",
00894         timing[SUB_GRAPH_T]);
00895   fprintf_dh (fp, "      factorization:          %0.2f\n", timing[FACTOR_T]);
00896   fprintf_dh (fp, "      solve setup:            %0.2f\n",
00897         timing[SOLVE_SETUP_T]);
00898   fprintf_dh (fp, "      rho:                    %0.2f\n",
00899         ctx->timing[COMPUTE_RHO_T]);
00900   fprintf_dh (fp, "      misc (should be small): %0.2f\n",
00901         timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
00902          timing[SOLVE_SETUP_T] +
00903          timing[COMPUTE_RHO_T]));
00904 
00905   if (ctx->sg != NULL)
00906     {
00907       SubdomainGraph_dhPrintStats (ctx->sg, fp);
00908       CHECK_V_ERROR;
00909       SubdomainGraph_dhPrintRatios (ctx->sg, fp);
00910       CHECK_V_ERROR;
00911     }
00912 
00913 
00914   fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n");
00915   fprintf_dh (fp, "---------------------------------------------------\n");
00916   fprintf_dh (fp, "   solve method: %s\n", ctx->krylovMethod);
00917   fprintf_dh (fp, "   maxIts:       %i\n", ctx->maxIts);
00918   fprintf_dh (fp, "   rtol:         %g\n", ctx->rtol);
00919   fprintf_dh (fp, "   atol:         %g\n", ctx->atol);
00920   fprintf_dh (fp,
00921         "\n==================== Euclid report (end) ======================\n");
00922 END_FUNC_DH}
00923 
00924 
00925 /* nzA ratio and rho refer to most recent solve, if more than
00926    one solve (call to Setup) was performed.  Other stats
00927    are cumulative.
00928 */
00929 #undef __FUNC__
00930 #define __FUNC__ "Euclid_dhPrintStatsShort"
00931 void
00932 Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve,
00933         FILE * fp)
00934 {
00935   START_FUNC_DH double *timing = ctx->timing;
00936   /* double *stats = ctx->stats; */
00937   /* double setup_factor; */
00938   /* double setup_other; */
00939   double apply_total;
00940   double apply_per_it;
00941   /* double nzUsedRatio; */
00942   double perIt;
00943   int blocks = np_dh;
00944 
00945   if (np_dh == 1)
00946     blocks = ctx->sg->blocks;
00947 
00948   reduce_timings_private (ctx);
00949   CHECK_V_ERROR;
00950 
00951   /* setup_factor  = timing[FACTOR_T]; */
00952   /* setup_other   = timing[SETUP_T] - setup_factor; */
00953   apply_total = timing[TRI_SOLVE_T];
00954   apply_per_it = apply_total / (double) ctx->its;
00955   /* nzUsedRatio   = stats[NZA_RATIO_STATS]; */
00956   perIt = solve / (double) ctx->its;
00957 
00958   fprintf_dh (fp, "\n");
00959   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00960         "method", "subdms", "level", "its", "setup", "solve", "total",
00961         "perIt", "perIt", "rows");
00962   fprintf_dh (fp,
00963         "------  -----  -----  -----  -----  -----  -----  -----  -----  -----  XX\n");
00964   fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g  XXX\n", ctx->algo_par,  /* parallelization strategy [pilu, bj] */
00965         blocks,   /* number of subdomains */
00966         ctx->level, /* level, for ILU(k) */
00967         ctx->its,   /* iterations */
00968         setup,    /* total setup time, from caller */
00969         solve,    /* total setup time, from caller */
00970         setup + solve,  /* total time, from caller */
00971         perIt,    /* time per iteration, solver+precond. */
00972         apply_per_it, /* time per iteration, solver+precond. */
00973         (double) ctx->n /* global unknnowns */
00974     );
00975 
00976 
00977 
00978 
00979 #if 0
00980   fprintf_dh (fp, "\n");
00981   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00982         "", "", "", "", "", "setup", "setup", "", "", "", "", "", "");
00983 
00984   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00985         "method", "subdms", "level", "its", "total", "factor",
00986         "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows");
00987   fprintf_dh (fp,
00988         "------  -----  -----  -----  -----  -----  -----  -----  -----  -----  -----  -----  ----- XX\n");
00989 
00990 
00991   fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g  XXX\n", ctx->algo_par,  /* parallelization strategy [pilu, bj] */
00992         blocks,   /* number of subdomains */
00993         ctx->level, /* level, for ILU(k) */
00994         ctx->its,   /* iterations */
00995         setup,    /* total setup time, from caller */
00996         solve,    /* total setup time, from caller */
00997         setup_factor, /* pc solve: factorization */
00998         setup_other,  /* pc setup: other */
00999         apply_total,  /* triangular solve time */
01000         apply_per_it, /* time for one triangular solve */
01001         ctx->rho_final, /* rho */
01002         ctx->sparseTolA,  /* sparseA tolerance */
01003         nzUsedRatio,  /* percent of A that was used */
01004         (double) ctx->n /* global unknnowns */
01005     );
01006 #endif
01007 
01008 #if 0
01009   /* special: for scalability studies */
01010   fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level",
01011         "subGph", "factor", "solveS", "perIt");
01012   fprintf_dh (fp, "------  -----  -----  -----  -----  -----  WW\n");
01013   fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f  WWW\n",
01014         ctx->algo_par,
01015         ctx->level,
01016         timing[SUB_GRAPH_T],
01017         timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it);
01018 #endif
01019 END_FUNC_DH}
01020 
01021 
01022 /* its during last solve; rho; nzaUsed */
01023 #undef __FUNC__
01024 #define __FUNC__ "Euclid_dhPrintStatsShorter"
01025 void
01026 Euclid_dhPrintStatsShorter (Euclid_dh ctx, FILE * fp)
01027 {
01028   START_FUNC_DH double *stats = ctx->stats;
01029 
01030   int its = ctx->its;
01031   double rho = ctx->rho_final;
01032   double nzUsedRatio = stats[NZA_RATIO_STATS];
01033 
01034 
01035   fprintf_dh (fp, "\nStats from last linear solve: YY\n");
01036   fprintf_dh (fp, "%6s %6s %6s     YY\n", "its", "rho", "A_%");
01037   fprintf_dh (fp, " -----  -----  -----     YY\n");
01038   fprintf_dh (fp, "%6i %6.2f %6.2f     YYY\n", its, rho, nzUsedRatio);
01039 
01040 END_FUNC_DH}
01041 
01042 #undef __FUNC__
01043 #define __FUNC__ "Euclid_dhPrintScaling"
01044 void
01045 Euclid_dhPrintScaling (Euclid_dh ctx, FILE * fp)
01046 {
01047   START_FUNC_DH int i, m = ctx->m;
01048 
01049   if (m > 10)
01050     m = 10;
01051 
01052   if (ctx->scale == NULL)
01053     {
01054       SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?");
01055     }
01056 
01057   fprintf (fp, "\n---------- 1st %i row scaling values:\n", m);
01058   for (i = 0; i < m; ++i)
01059     {
01060       fprintf (fp, "   %i  %g  \n", i + 1, ctx->scale[i]);
01061     }
01062 END_FUNC_DH}
01063 
01064 
01065 #undef __FUNC__
01066 #define __FUNC__ "reduce_timings_private"
01067 void
01068 reduce_timings_private (Euclid_dh ctx)
01069 {
01070   START_FUNC_DH if (np_dh > 1)
01071     {
01072       double bufOUT[TIMING_BINS];
01073 
01074       memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double));
01075       MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
01076       comm_dh);
01077     }
01078 
01079   ctx->timingsWereReduced = true;
01080 END_FUNC_DH}
01081 
01082 #undef __FUNC__
01083 #define __FUNC__ "Euclid_dhPrintHypreReport"
01084 void
01085 Euclid_dhPrintHypreReport (Euclid_dh ctx, FILE * fp)
01086 {
01087   START_FUNC_DH double *timing;
01088   int nz;
01089 
01090   nz = Factor_dhReadNz (ctx->F);
01091   CHECK_V_ERROR;
01092   timing = ctx->timing;
01093 
01094   /* add in timing from lasst setup (if any) */
01095   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
01096   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
01097 
01098   reduce_timings_private (ctx);
01099   CHECK_V_ERROR;
01100 
01101   if (myid_dh == 0)
01102     {
01103 
01104       fprintf (fp,
01105          "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
01106       fprintf_dh (fp, "\nruntime parameters\n");
01107       fprintf_dh (fp, "------------------\n");
01108       fprintf_dh (fp, "   setups:                 %i\n", ctx->setupCount);
01109       fprintf_dh (fp, "   tri solves:             %i\n", ctx->itsTotal);
01110       fprintf_dh (fp, "   parallelization method: %s\n", ctx->algo_par);
01111       fprintf_dh (fp, "   factorization method:   %s\n", ctx->algo_ilu);
01112       if (!strcmp (ctx->algo_ilu, "iluk"))
01113   {
01114     fprintf_dh (fp, "      level:               %i\n", ctx->level);
01115   }
01116 
01117       if (ctx->isScaled)
01118   {
01119     fprintf_dh (fp, "   matrix was row scaled\n");
01120   }
01121 
01122       fprintf_dh (fp, "   global matrix row count: %i\n", ctx->n);
01123       fprintf_dh (fp, "   nzF:                     %i\n", nz);
01124       fprintf_dh (fp, "   rho:                     %g\n", ctx->rho_final);
01125       fprintf_dh (fp, "   sparseA:                 %g\n", ctx->sparseTolA);
01126 
01127       fprintf_dh (fp, "\nEuclid timing report\n");
01128       fprintf_dh (fp, "--------------------\n");
01129       fprintf_dh (fp, "   solves total:  %0.2f (see docs)\n",
01130       timing[TOTAL_SOLVE_T]);
01131       fprintf_dh (fp, "   tri solves:    %0.2f\n", timing[TRI_SOLVE_T]);
01132       fprintf_dh (fp, "   setups:        %0.2f\n", timing[SETUP_T]);
01133       fprintf_dh (fp, "      subdomain graph setup:  %0.2f\n",
01134       timing[SUB_GRAPH_T]);
01135       fprintf_dh (fp, "      factorization:          %0.2f\n",
01136       timing[FACTOR_T]);
01137       fprintf_dh (fp, "      solve setup:            %0.2f\n",
01138       timing[SOLVE_SETUP_T]);
01139       fprintf_dh (fp, "      rho:                    %0.2f\n",
01140       ctx->timing[COMPUTE_RHO_T]);
01141       fprintf_dh (fp, "      misc (should be small): %0.2f\n",
01142       timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
01143              timing[SOLVE_SETUP_T] +
01144              timing[COMPUTE_RHO_T]));
01145 
01146       if (ctx->sg != NULL)
01147   {
01148     SubdomainGraph_dhPrintStats (ctx->sg, fp);
01149     CHECK_V_ERROR;
01150     SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01151     CHECK_V_ERROR;
01152   }
01153 
01154       fprintf (fp,
01155          "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
01156 
01157     }
01158 
01159 END_FUNC_DH}
01160 
01161 #undef __FUNC__
01162 #define __FUNC__ "Euclid_dhPrintTestData"
01163 void
01164 Euclid_dhPrintTestData (Euclid_dh ctx, FILE * fp)
01165 {
01166   START_FUNC_DH
01167     /* Print data that should remain that will hopefully 
01168        remain the same for any platform.
01169        Possibly "tri solves" may change . . .
01170      */
01171     if (myid_dh == 0)
01172     {
01173       fprintf (fp, "   setups:                 %i\n", ctx->setupCount);
01174       fprintf (fp, "   tri solves:             %i\n", ctx->its);
01175       fprintf (fp, "   parallelization method: %s\n", ctx->algo_par);
01176       fprintf (fp, "   factorization method:   %s\n", ctx->algo_ilu);
01177       fprintf (fp, "   level:                  %i\n", ctx->level);
01178       fprintf (fp, "   row scaling:            %i\n", ctx->isScaled);
01179     }
01180   SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01181   CHECK_V_ERROR;
01182 END_FUNC_DH}
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Generated on Wed Apr 13 10:05:33 2011 for Ifpack Package Browser (Single Doxygen Collection) by  doxygen 1.6.3