SubdomainGraph_dh.c

Go to the documentation of this file.
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "SubdomainGraph_dh.h"
00031 #include "getRow_dh.h"
00032 #include "Mem_dh.h"
00033 #include "Parser_dh.h"
00034 #include "Hash_i_dh.h"
00035 #include "mat_dh_private.h"
00036 #include "io_dh.h"
00037 #include "SortedSet_dh.h"
00038 #include "shellSort_dh.h"
00039 
00040 
00041 /* for debugging only! */
00042 #include <unistd.h>
00043 
00044 static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj,
00045             void *A);
00046 static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj,
00047             void *A);
00048 /*
00049 static void partition_metis_private(SubdomainGraph_dh s, void *A);
00050 
00051   grep for same below!
00052 */
00053 static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m,
00054               bool bj);
00055 static void form_subdomaingraph_mpi_private (SubdomainGraph_dh s);
00056 static void form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m,
00057                void *A);
00058 static void find_all_neighbors_sym_private (SubdomainGraph_dh s, int m,
00059               void *A);
00060 static void find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m,
00061                 void *A);
00062 static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
00063            int *interiorNodes, int *bdryNodes,
00064            int *interiorCount, int *bdryCount);
00065 static void find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m,
00066              void *A, int *interiorNodes,
00067              int *bdryNodes, int *interiorCount,
00068              int *bdryCount);
00069 
00070 static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A);
00071   /* above also forms n2o[] and o2n[] */
00072 
00073 static void find_ordered_neighbors_private (SubdomainGraph_dh s);
00074 static void color_subdomain_graph_private (SubdomainGraph_dh s);
00075 static void adjust_matrix_perms_private (SubdomainGraph_dh s, int m);
00076 
00077 #undef __FUNC__
00078 #define __FUNC__ "SubdomainGraph_dhCreate"
00079 void
00080 SubdomainGraph_dhCreate (SubdomainGraph_dh * s)
00081 {
00082   START_FUNC_DH
00083     struct _subdomain_dh *tmp =
00084     (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh));
00085   CHECK_V_ERROR;
00086   *s = tmp;
00087 
00088   tmp->blocks = 1;
00089   tmp->ptrs = tmp->adj = NULL;
00090   tmp->colors = 1;
00091   tmp->colorVec = NULL;
00092   tmp->o2n_sub = tmp->n2o_sub = NULL;
00093   tmp->beg_row = tmp->beg_rowP = NULL;
00094   tmp->bdry_count = tmp->row_count = NULL;
00095   tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
00096   tmp->loCount = tmp->hiCount = tmp->allCount = 0;
00097 
00098   tmp->m = 0;
00099   tmp->n2o_row = tmp->o2n_col = NULL;
00100   tmp->o2n_ext = tmp->n2o_ext = NULL;
00101 
00102   tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor");
00103   tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph");
00104   {
00105     int i;
00106     for (i = 0; i < TIMING_BINS_SG; ++i)
00107       tmp->timing[i] = 0.0;
00108   }
00109 END_FUNC_DH}
00110 
00111 #undef __FUNC__
00112 #define __FUNC__ "SubdomainGraph_dhDestroy"
00113 void
00114 SubdomainGraph_dhDestroy (SubdomainGraph_dh s)
00115 {
00116   START_FUNC_DH if (s->ptrs != NULL)
00117     {
00118       FREE_DH (s->ptrs);
00119       CHECK_V_ERROR;
00120     }
00121   if (s->adj != NULL)
00122     {
00123       FREE_DH (s->adj);
00124       CHECK_V_ERROR;
00125     }
00126   if (s->colorVec != NULL)
00127     {
00128       FREE_DH (s->colorVec);
00129       CHECK_V_ERROR;
00130     }
00131   if (s->o2n_sub != NULL)
00132     {
00133       FREE_DH (s->o2n_sub);
00134       CHECK_V_ERROR;
00135     }
00136   if (s->n2o_sub != NULL)
00137     {
00138       FREE_DH (s->n2o_sub);
00139       CHECK_V_ERROR;
00140     }
00141 
00142   if (s->beg_row != NULL)
00143     {
00144       FREE_DH (s->beg_row);
00145       CHECK_V_ERROR;
00146     }
00147   if (s->beg_rowP != NULL)
00148     {
00149       FREE_DH (s->beg_rowP);
00150       CHECK_V_ERROR;
00151     }
00152   if (s->row_count != NULL)
00153     {
00154       FREE_DH (s->row_count);
00155       CHECK_V_ERROR;
00156     }
00157   if (s->bdry_count != NULL)
00158     {
00159       FREE_DH (s->bdry_count);
00160       CHECK_V_ERROR;
00161     }
00162   if (s->loNabors != NULL)
00163     {
00164       FREE_DH (s->loNabors);
00165       CHECK_V_ERROR;
00166     }
00167   if (s->hiNabors != NULL)
00168     {
00169       FREE_DH (s->hiNabors);
00170       CHECK_V_ERROR;
00171     }
00172   if (s->allNabors != NULL)
00173     {
00174       FREE_DH (s->allNabors);
00175       CHECK_V_ERROR;
00176     }
00177 
00178   if (s->n2o_row != NULL)
00179     {
00180       FREE_DH (s->n2o_row);
00181       CHECK_V_ERROR;
00182     }
00183   if (s->o2n_col != NULL)
00184     {
00185       FREE_DH (s->o2n_col);
00186       CHECK_V_ERROR;
00187     }
00188   if (s->o2n_ext != NULL)
00189     {
00190       Hash_i_dhDestroy (s->o2n_ext);
00191       CHECK_V_ERROR;
00192     }
00193   if (s->n2o_ext != NULL)
00194     {
00195       Hash_i_dhDestroy (s->n2o_ext);
00196       CHECK_V_ERROR;
00197     }
00198   FREE_DH (s);
00199   CHECK_V_ERROR;
00200 END_FUNC_DH}
00201 
00202 #undef __FUNC__
00203 #define __FUNC__ "SubdomainGraph_dhInit"
00204 void
00205 SubdomainGraph_dhInit (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00206 {
00207   START_FUNC_DH double t1 = MPI_Wtime ();
00208 
00209   if (blocks < 1)
00210     blocks = 1;
00211 
00212   if (np_dh == 1 || blocks > 1)
00213     {
00214       s->blocks = blocks;
00215       init_seq_private (s, blocks, bj, A);
00216       CHECK_V_ERROR;
00217     }
00218   else
00219     {
00220       s->blocks = np_dh;
00221       init_mpi_private (s, np_dh, bj, A);
00222       CHECK_V_ERROR;
00223     }
00224 
00225   s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
00226 END_FUNC_DH}
00227 
00228 
00229 #undef __FUNC__
00230 #define __FUNC__ "SubdomainGraph_dhFindOwner"
00231 int
00232 SubdomainGraph_dhFindOwner (SubdomainGraph_dh s, int idx, bool permuted)
00233 {
00234   START_FUNC_DH int sd;
00235   int *beg_row = s->beg_row;
00236   int *row_count = s->row_count;
00237   int owner = -1, blocks = s->blocks;
00238 
00239   if (permuted)
00240     beg_row = s->beg_rowP;
00241 
00242   /* determine the subdomain that contains "idx" */
00243   for (sd = 0; sd < blocks; ++sd)
00244     {
00245       if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
00246   {
00247     owner = sd;
00248     break;
00249   }
00250     }
00251 
00252   if (owner == -1)
00253     {
00254 
00255       fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx);
00256       fprintf (stderr, "blocks= %i\n", blocks);
00257 
00258       sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx);
00259       SET_ERROR (-1, msgBuf_dh);
00260     }
00261 
00262 END_FUNC_VAL (owner)}
00263 
00264 
00265 #undef __FUNC__
00266 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
00267 void
00268 SubdomainGraph_dhPrintStatsLong (SubdomainGraph_dh s, FILE * fp)
00269 {
00270   START_FUNC_DH int i, j, k;
00271   double max = 0, min = INT_MAX;
00272 
00273   fprintf (fp,
00274      "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
00275   fprintf (fp, "colors used     = %i\n", s->colors);
00276   fprintf (fp, "subdomain count = %i\n", s->blocks);
00277 
00278 
00279   fprintf (fp, "\ninterior/boundary node ratios:\n");
00280 
00281   for (i = 0; i < s->blocks; ++i)
00282     {
00283       int inNodes = s->row_count[i] - s->bdry_count[i];
00284       int bdNodes = s->bdry_count[i];
00285       double ratio;
00286 
00287       if (bdNodes == 0)
00288   {
00289     ratio = -1;
00290   }
00291       else
00292   {
00293     ratio = (double) inNodes / (double) bdNodes;
00294   }
00295 
00296       max = MAX (max, ratio);
00297       min = MIN (min, ratio);
00298       fprintf (fp,
00299          "   P_%i: first= %3i  rowCount= %3i  interior= %3i  bdry= %3i  ratio= %0.1f\n",
00300          i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
00301          ratio);
00302     }
00303 
00304 
00305   fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max);
00306   fprintf (fp, "min interior/bdry ratio = %.1f\n", min);
00307 
00308 
00309     /*-----------------------------------------
00310      * subdomain graph
00311      *-----------------------------------------*/
00312   if (s->adj != NULL)
00313     {
00314       fprintf (fp, "\nunpermuted subdomain graph: \n");
00315       for (i = 0; i < s->blocks; ++i)
00316   {
00317     fprintf (fp, "%i :: ", i);
00318     for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
00319       {
00320         fprintf (fp, "%i  ", s->adj[j]);
00321       }
00322     fprintf (fp, "\n");
00323   }
00324     }
00325 
00326 
00327     /*-----------------------------------------
00328      * subdomain permutation
00329      *-----------------------------------------*/
00330   fprintf (fp, "\no2n subdomain permutation:\n");
00331   for (i = 0; i < s->blocks; ++i)
00332     {
00333       fprintf (fp, "  %i %i\n", i, s->o2n_sub[i]);
00334     }
00335   fprintf (fp, "\n");
00336 
00337   if (np_dh > 1)
00338     {
00339 
00340     /*-----------------------------------------
00341      * local n2o_row permutation
00342      *-----------------------------------------*/
00343       fprintf (fp, "\nlocal n2o_row permutation:\n   ");
00344       for (i = 0; i < s->row_count[myid_dh]; ++i)
00345   {
00346     fprintf (fp, "%i ", s->n2o_row[i]);
00347   }
00348       fprintf (fp, "\n");
00349 
00350     /*-----------------------------------------
00351      * local n2o permutation
00352      *-----------------------------------------*/
00353       fprintf (fp, "\nlocal o2n_col permutation:\n   ");
00354       for (i = 0; i < s->row_count[myid_dh]; ++i)
00355   {
00356     fprintf (fp, "%i ", s->o2n_col[i]);
00357   }
00358       fprintf (fp, "\n");
00359 
00360     }
00361   else
00362     {
00363     /*-----------------------------------------
00364      * local n2o_row permutation 
00365      *-----------------------------------------*/
00366       fprintf (fp, "\nlocal n2o_row permutation:\n");
00367       fprintf (fp, "--------------------------\n");
00368       for (k = 0; k < s->blocks; ++k)
00369   {
00370     int beg_row = s->beg_row[k];
00371     int end_row = beg_row + s->row_count[k];
00372 
00373     for (i = beg_row; i < end_row; ++i)
00374       {
00375         fprintf (fp, "%i ", s->n2o_row[i]);
00376       }
00377     fprintf (fp, "\n");
00378   }
00379 
00380     /*-----------------------------------------
00381      * local n2o permutation
00382      *-----------------------------------------*/
00383       fprintf (fp, "\nlocal o2n_col permutation:\n");
00384       fprintf (fp, "--------------------------\n");
00385       for (k = 0; k < s->blocks; ++k)
00386   {
00387     int beg_row = s->beg_row[k];
00388     int end_row = beg_row + s->row_count[k];
00389 
00390     for (i = beg_row; i < end_row; ++i)
00391       {
00392         fprintf (fp, "%i ", s->o2n_col[i]);
00393       }
00394     fprintf (fp, "\n");
00395   }
00396 
00397 
00398     }
00399 
00400 END_FUNC_DH}
00401 
00402 #undef __FUNC__
00403 #define __FUNC__ "init_seq_private"
00404 void
00405 init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00406 {
00407   START_FUNC_DH int m, n, beg_row;
00408   double t1;
00409 
00410   /*-------------------------------------------------------
00411    * get number of local rows (m), global rows (n), and
00412    * global numbering of first locally owned row
00413    * (for sequential, beg_row=0 and m == n
00414    *-------------------------------------------------------*/
00415   EuclidGetDimensions (A, &beg_row, &m, &n);
00416   CHECK_V_ERROR;
00417   s->m = n;
00418 
00419   /*-------------------------------------------------------
00420    * allocate storage for all data structures
00421    * EXCEPT s->adj and hash tables.
00422    * (but note that hash tables aren't used for sequential)
00423    *-------------------------------------------------------*/
00424   allocate_storage_private (s, blocks, m, bj);
00425   CHECK_V_ERROR;
00426 
00427   /*-------------------------------------------------------------
00428    * Fill in: beg_row[]
00429    *          beg_rowP[]
00430    *          row_count[]
00431    * At this point, beg_rowP[] is a copy of beg_row[])
00432    *-------------------------------------------------------------*/
00433   {
00434     int i;
00435     int rpp = m / blocks;
00436 
00437     if (rpp * blocks < m)
00438       ++rpp;
00439 
00440     s->beg_row[0] = 0;
00441     for (i = 1; i < blocks; ++i)
00442       s->beg_row[i] = rpp + s->beg_row[i - 1];
00443     for (i = 0; i < blocks; ++i)
00444       s->row_count[i] = rpp;
00445     s->row_count[blocks - 1] = m - rpp * (blocks - 1);
00446   }
00447   memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int));
00448 
00449 
00450   /*-----------------------------------------------------------------
00451    * Find all neighboring processors in subdomain graph.
00452    * This block fills in: allNabors[]
00453    *-----------------------------------------------------------------*/
00454   /* NA for sequential! */
00455 
00456 
00457   /*-------------------------------------------------------
00458    * Count number of interior nodes for each subdomain;
00459    * also, form permutation vector to order boundary
00460    * nodes last in each subdomain.
00461    * This block fills in: bdry_count[]
00462    *                      n2o_col[]
00463    *                      o2n_row[]
00464    *-------------------------------------------------------*/
00465   t1 = MPI_Wtime ();
00466   if (!bj)
00467     {
00468       find_bdry_nodes_seq_private (s, m, A);
00469       CHECK_V_ERROR;
00470     }
00471   else
00472     {
00473       int i;
00474       for (i = 0; i < m; ++i)
00475   {
00476     s->n2o_row[i] = i;
00477     s->o2n_col[i] = i;
00478   }
00479     }
00480   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00481 
00482   /*-------------------------------------------------------
00483    * Form subdomain graph,
00484    * then color and reorder subdomain graph.
00485    * This block fills in: ptr[]
00486    *                      adj[]
00487    *                      o2n_sub[]
00488    *                      n2o_sub[]
00489    *                      beg_rowP[]
00490    *-------------------------------------------------------*/
00491   t1 = MPI_Wtime ();
00492   if (!bj)
00493     {
00494       form_subdomaingraph_seq_private (s, m, A);
00495       CHECK_V_ERROR;
00496       if (s->doNotColor)
00497   {
00498     int i;
00499     printf_dh ("subdomain coloring and reordering is OFF\n");
00500     for (i = 0; i < blocks; ++i)
00501       {
00502         s->o2n_sub[i] = i;
00503         s->n2o_sub[i] = i;
00504         s->colorVec[i] = 0;
00505       }
00506   }
00507       else
00508   {
00509     SET_INFO ("subdomain coloring and reordering is ON");
00510     color_subdomain_graph_private (s);
00511     CHECK_V_ERROR;
00512   }
00513     }
00514 
00515   /* bj setup */
00516   else
00517     {
00518       int i;
00519       for (i = 0; i < blocks; ++i)
00520   {
00521     s->o2n_sub[i] = i;
00522     s->n2o_sub[i] = i;
00523   }
00524     }
00525   s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00526 
00527   /*-------------------------------------------------------
00528    * Here's a step we DON'T do for the parallel case:
00529    * we need to adjust the matrix row and column perms
00530    * to reflect subdomain reordering (for the parallel
00531    * case, these permutation vectors are purely local and
00532    * zero-based)
00533    *-------------------------------------------------------*/
00534   if (!bj)
00535     {
00536       adjust_matrix_perms_private (s, m);
00537       CHECK_V_ERROR;
00538     }
00539 
00540   /*-------------------------------------------------------
00541    * Build lists of lower and higher ordered neighbors.
00542    * This block fills in: loNabors[]
00543    *                      hiNabors[]
00544    *-------------------------------------------------------*/
00545   /* NA for sequential */
00546 
00547   /*-------------------------------------------------------
00548    *  Exchange boundary node permutation information with
00549    *  neighboring processors in the subdomain graph.
00550    *  This block fills in: o2n_ext (hash table)
00551    *                       n2o_ext (hash table)
00552    *-------------------------------------------------------*/
00553   /* NA for sequential */
00554 
00555 
00556 END_FUNC_DH}
00557 
00558 
00559 #if 0
00560 #undef __FUNC__
00561 #define __FUNC__ "partition_metis_private"
00562 void
00563 partition_metis_private (SubdomainGraph_dh s, void *A)
00564 {
00565   START_FUNC_DH if (ignoreMe)
00566     SET_V_ERROR ("not implemented");
00567 END_FUNC_DH}
00568 #endif
00569 
00570 #undef __FUNC__
00571 #define __FUNC__ "allocate_storage_private"
00572 void
00573 allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, bool bj)
00574 {
00575   START_FUNC_DH if (!bj)
00576     {
00577       s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int));
00578       CHECK_V_ERROR;
00579       s->ptrs[0] = 0;
00580       s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int));
00581       CHECK_V_ERROR;
00582       s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00583       CHECK_V_ERROR;
00584       s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00585       CHECK_V_ERROR;
00586       s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00587       CHECK_V_ERROR;
00588     }
00589 
00590   s->n2o_row = (int *) MALLOC_DH (m * sizeof (int));
00591   CHECK_V_ERROR;
00592   s->o2n_col = (int *) MALLOC_DH (m * sizeof (int));
00593   CHECK_V_ERROR;
00594 
00595   /* these are probably only needed for single mpi task -- ?? */
00596   /* nope; beg_row and row_ct are needed by ilu_mpi_bj; yuck! */
00597   s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int));
00598   CHECK_V_ERROR;
00599   s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int));
00600   CHECK_V_ERROR;
00601   s->row_count = (int *) MALLOC_DH (blocks * sizeof (int));
00602   CHECK_V_ERROR;
00603   s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int));
00604   CHECK_V_ERROR;
00605   s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00606   CHECK_V_ERROR;
00607   s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00608   CHECK_V_ERROR;
00609 
00610 END_FUNC_DH}
00611 
00612 /*-----------------------------------------------------------------*/
00613 
00614 
00615 #undef __FUNC__
00616 #define __FUNC__ "init_mpi_private"
00617 void
00618 init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00619 {
00620   START_FUNC_DH int m, n, beg_row;
00621   bool symmetric;
00622   double t1;
00623 
00624   symmetric = Parser_dhHasSwitch (parser_dh, "-sym");
00625   CHECK_V_ERROR;
00626   if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric"))
00627     {
00628       symmetric = true;
00629     }
00630 
00631   /*-------------------------------------------------------
00632    * get number of local rows (m), global rows (n), and
00633    * global numbering of first locally owned row
00634    *-------------------------------------------------------*/
00635   EuclidGetDimensions (A, &beg_row, &m, &n);
00636   CHECK_V_ERROR;
00637   s->m = m;
00638 
00639 
00640   /*-------------------------------------------------------
00641    * allocate storage for all data structures
00642    * EXCEPT s->adj and hash tables.
00643    *-------------------------------------------------------*/
00644   allocate_storage_private (s, blocks, m, bj);
00645   CHECK_V_ERROR;
00646 
00647   /*-------------------------------------------------------------
00648    * Fill in: beg_row[]
00649    *          beg_rowP[]
00650    *          row_count[]
00651    * At this point, beg_rowP[] is a copy of beg_row[])
00652    *-------------------------------------------------------------*/
00653   if (!bj)
00654     {
00655       MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
00656       MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
00657       memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int));
00658     }
00659   else
00660     {
00661       s->beg_row[myid_dh] = beg_row;
00662       s->beg_rowP[myid_dh] = beg_row;
00663       s->row_count[myid_dh] = m;
00664     }
00665 
00666   /*-----------------------------------------------------------------
00667    * Find all neighboring processors in subdomain graph.
00668    * This block fills in: allNabors[]
00669    *-----------------------------------------------------------------*/
00670   if (!bj)
00671     {
00672       t1 = MPI_Wtime ();
00673       if (symmetric)
00674   {
00675     find_all_neighbors_sym_private (s, m, A);
00676     CHECK_V_ERROR;
00677   }
00678       else
00679   {
00680     find_all_neighbors_unsym_private (s, m, A);
00681     CHECK_V_ERROR;
00682   }
00683       s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
00684     }
00685 
00686 
00687   /*-----------------------------------------------------------------
00688    *  determine which rows are boundary rows, and which are interior
00689    *  rows; also, form permutation vector to order interior
00690    *  nodes first within each subdomain
00691    *  This block fills in: bdry_count[]
00692    *                       n2o_col[]
00693    *                       o2n_row[]
00694    *-----------------------------------------------------------------*/
00695   t1 = MPI_Wtime ();
00696   if (!bj)
00697     {
00698       int *interiorNodes, *bdryNodes;
00699       int interiorCount, bdryCount;
00700       int *o2n = s->o2n_col, idx;
00701       int i;
00702 
00703       interiorNodes = (int *) MALLOC_DH (m * sizeof (int));
00704       CHECK_V_ERROR;
00705       bdryNodes = (int *) MALLOC_DH (m * sizeof (int));
00706       CHECK_V_ERROR;
00707 
00708       /* divide this subdomain's rows into interior and boundary rows;
00709          the returned lists are with respect to local numbering.
00710        */
00711       if (symmetric)
00712   {
00713     find_bdry_nodes_sym_private (s, m, A,
00714                interiorNodes, bdryNodes,
00715                &interiorCount, &bdryCount);
00716     CHECK_V_ERROR;
00717   }
00718       else
00719   {
00720     find_bdry_nodes_unsym_private (s, m, A,
00721            interiorNodes, bdryNodes,
00722            &interiorCount, &bdryCount);
00723     CHECK_V_ERROR;
00724   }
00725 
00726       /* exchange number of boundary rows with all neighbors */
00727       MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
00728          comm_dh);
00729 
00730       /* form local permutation */
00731       idx = 0;
00732       for (i = 0; i < interiorCount; ++i)
00733   {
00734     o2n[interiorNodes[i]] = idx++;
00735   }
00736       for (i = 0; i < bdryCount; ++i)
00737   {
00738     o2n[bdryNodes[i]] = idx++;
00739   }
00740 
00741       /* invert permutation */
00742       invert_perm (m, o2n, s->n2o_row);
00743       CHECK_V_ERROR;
00744 
00745       FREE_DH (interiorNodes);
00746       CHECK_V_ERROR;
00747       FREE_DH (bdryNodes);
00748       CHECK_V_ERROR;
00749     }
00750 
00751   /* bj setup */
00752   else
00753     {
00754       int *o2n = s->o2n_col, *n2o = s->n2o_row;
00755       int i, m = s->m;
00756 
00757       for (i = 0; i < m; ++i)
00758   {
00759     o2n[i] = i;
00760     n2o[i] = i;
00761   }
00762     }
00763   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00764 
00765   /*-------------------------------------------------------
00766    * Form subdomain graph,
00767    * then color and reorder subdomain graph.
00768    * This block fills in: ptr[]
00769    *                      adj[]
00770    *                      o2n_sub[]
00771    *                      n2o_sub[]
00772    *                      beg_rowP[]
00773    *-------------------------------------------------------*/
00774   if (!bj)
00775     {
00776       t1 = MPI_Wtime ();
00777       form_subdomaingraph_mpi_private (s);
00778       CHECK_V_ERROR;
00779       if (s->doNotColor)
00780   {
00781     int i;
00782     printf_dh ("subdomain coloring and reordering is OFF\n");
00783     for (i = 0; i < blocks; ++i)
00784       {
00785         s->o2n_sub[i] = i;
00786         s->n2o_sub[i] = i;
00787         s->colorVec[i] = 0;
00788       }
00789   }
00790       else
00791   {
00792     SET_INFO ("subdomain coloring and reordering is ON");
00793     color_subdomain_graph_private (s);
00794     CHECK_V_ERROR;
00795   }
00796       s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00797     }
00798 
00799   /*-------------------------------------------------------
00800    * Build lists of lower and higher ordered neighbors.
00801    * This block fills in: loNabors[]
00802    *                      hiNabors[]
00803    *-------------------------------------------------------*/
00804   if (!bj)
00805     {
00806       find_ordered_neighbors_private (s);
00807       CHECK_V_ERROR;
00808     }
00809 
00810   /*-------------------------------------------------------
00811    *  Exchange boundary node permutation information with
00812    *  neighboring processors in the subdomain graph.
00813    *  This block fills in: o2n_ext (hash table)
00814    *                       n2o_ext (hash table)
00815    *-------------------------------------------------------*/
00816   if (!bj)
00817     {
00818       t1 = MPI_Wtime ();
00819       SubdomainGraph_dhExchangePerms (s);
00820       CHECK_V_ERROR;
00821       s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
00822     }
00823 
00824 END_FUNC_DH}
00825 
00826 
00827 
00828 #undef __FUNC__
00829 #define __FUNC__ "SubdomainGraph_dhExchangePerms"
00830 void
00831 SubdomainGraph_dhExchangePerms (SubdomainGraph_dh s)
00832 {
00833   START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
00834   MPI_Status *status = NULL;
00835   int *nabors = s->allNabors, naborCount = s->allCount;
00836   int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
00837   int m = s->row_count[myid_dh];
00838   int beg_row = s->beg_row[myid_dh];
00839   int beg_rowP = s->beg_rowP[myid_dh];
00840   int *bdryNodeCounts = s->bdry_count;
00841   int myBdryCount = s->bdry_count[myid_dh];
00842   bool debug = false;
00843   int myFirstBdry = m - myBdryCount;
00844   int *n2o_row = s->n2o_row;
00845   Hash_i_dh n2o_table, o2n_table;
00846 
00847   if (logFile != NULL && s->debug)
00848     debug = true;
00849 
00850   /* allocate send buffer, and copy permutation info to buffer;
00851      each entry is a <old_value, new_value> pair.
00852    */
00853   sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int));
00854   CHECK_V_ERROR;
00855 
00856 
00857   if (debug)
00858     {
00859       fprintf (logFile,
00860          "\nSUBG myFirstBdry= %i  myBdryCount= %i  m= %i  beg_rowP= %i\n",
00861          1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
00862       fflush (logFile);
00863     }
00864 
00865   for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00866     {
00867       sendBuf[2 * j] = n2o_row[i] + beg_row;
00868       sendBuf[2 * j + 1] = i + beg_rowP;
00869     }
00870 
00871   if (debug)
00872     {
00873       fprintf (logFile, "\nSUBG SEND_BUF:\n");
00874       for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00875   {
00876     fprintf (logFile, "SUBG  %i, %i\n", 1 + sendBuf[2 * j],
00877        1 + sendBuf[2 * j + 1]);
00878   }
00879       fflush (logFile);
00880     }
00881 
00882   /* allocate a receive buffer for each nabor in the subdomain graph,
00883      and set up index array for locating the beginning of each
00884      nabor's buffers.
00885    */
00886   naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int));
00887   CHECK_V_ERROR;
00888   naborIdx[0] = 0;
00889   nz = 0;
00890   for (i = 0; i < naborCount; ++i)
00891     {
00892       nz += (2 * bdryNodeCounts[nabors[i]]);
00893       naborIdx[i + 1] = nz;
00894     }
00895 
00896 
00897   recvBuf = (int *) MALLOC_DH (nz * sizeof (int));
00898   CHECK_V_ERROR;
00899 
00900 
00901 /* for (i=0; i<nz; ++i) recvBuf[i] = -10; */
00902 
00903   /* perform sends and receives */
00904   recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00905   CHECK_V_ERROR;
00906   send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00907   CHECK_V_ERROR;
00908   status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status));
00909   CHECK_V_ERROR;
00910 
00911   for (i = 0; i < naborCount; ++i)
00912     {
00913       int nabr = nabors[i];
00914       int *buf = recvBuf + naborIdx[i];
00915       int ct = 2 * bdryNodeCounts[nabr];
00916 
00917 
00918       MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
00919      &(send_req[i]));
00920 
00921       if (debug)
00922   {
00923     fprintf (logFile, "SUBG   sending %i elts to %i\n", 2 * myBdryCount,
00924        nabr);
00925     fflush (logFile);
00926   }
00927 
00928       MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
00929 
00930       if (debug)
00931   {
00932     fprintf (logFile, "SUBG  receiving %i elts from %i\n", ct, nabr);
00933     fflush (logFile);
00934   }
00935     }
00936 
00937   MPI_Waitall (naborCount, send_req, status);
00938   MPI_Waitall (naborCount, recv_req, status);
00939 
00940   Hash_i_dhCreate (&n2o_table, nz / 2);
00941   CHECK_V_ERROR;
00942   Hash_i_dhCreate (&o2n_table, nz / 2);
00943   CHECK_V_ERROR;
00944   s->n2o_ext = n2o_table;
00945   s->o2n_ext = o2n_table;
00946 
00947   /* insert non-local boundary node permutations in lookup tables */
00948   for (i = 0; i < nz; i += 2)
00949     {
00950       int old = recvBuf[i];
00951       int new = recvBuf[i + 1];
00952 
00953       if (debug)
00954   {
00955     fprintf (logFile, "SUBG  i= %i  old= %i  new= %i\n", i, old + 1,
00956        new + 1);
00957     fflush (logFile);
00958   }
00959 
00960       Hash_i_dhInsert (o2n_table, old, new);
00961       CHECK_V_ERROR;
00962       Hash_i_dhInsert (n2o_table, new, old);
00963       CHECK_V_ERROR;
00964     }
00965 
00966 
00967   if (recvBuf != NULL)
00968     {
00969       FREE_DH (recvBuf);
00970       CHECK_V_ERROR;
00971     }
00972   if (naborIdx != NULL)
00973     {
00974       FREE_DH (naborIdx);
00975       CHECK_V_ERROR;
00976     }
00977   if (sendBuf != NULL)
00978     {
00979       FREE_DH (sendBuf);
00980       CHECK_V_ERROR;
00981     }
00982   if (recv_req != NULL)
00983     {
00984       FREE_DH (recv_req);
00985       CHECK_V_ERROR;
00986     }
00987   if (send_req != NULL)
00988     {
00989       FREE_DH (send_req);
00990       CHECK_V_ERROR;
00991     }
00992   if (status != NULL)
00993     {
00994       FREE_DH (status);
00995       CHECK_V_ERROR;
00996     }
00997 
00998 END_FUNC_DH}
00999 
01000 
01001 
01002 #undef __FUNC__
01003 #define __FUNC__ "form_subdomaingraph_mpi_private"
01004 void
01005 form_subdomaingraph_mpi_private (SubdomainGraph_dh s)
01006 {
01007   START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount;
01008   int *idxAll = NULL;
01009   int i, j, nz, *adj, *ptrs = s->ptrs;
01010   MPI_Request *recvReqs = NULL, sendReq;
01011   MPI_Status *statuses = NULL, status;
01012 
01013   /* all processors tell root how many nabors they have */
01014   if (myid_dh == 0)
01015     {
01016       idxAll = (int *) MALLOC_DH (np_dh * sizeof (int));
01017       CHECK_V_ERROR;
01018     }
01019   MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
01020 
01021   /* root counts edges in graph, and broacasts to all */
01022   if (myid_dh == 0)
01023     {
01024       nz = 0;
01025       for (i = 0; i < np_dh; ++i)
01026   nz += idxAll[i];
01027     }
01028   MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
01029 
01030   /* allocate space for adjacency lists (memory for the
01031      pointer array was previously allocated)
01032    */
01033   adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int));
01034   CHECK_V_ERROR;
01035 
01036   /* root receives adjacency lists from all processors */
01037   if (myid_dh == 0)
01038     {
01039       recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
01040       CHECK_V_ERROR;
01041       statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
01042       CHECK_V_ERROR;
01043 
01044       /* first, set up row pointer array */
01045       ptrs[0] = 0;
01046       for (j = 0; j < np_dh; ++j)
01047   ptrs[j + 1] = ptrs[j] + idxAll[j];
01048 
01049       /* second, start the receives */
01050       for (j = 0; j < np_dh; ++j)
01051   {
01052     int ct = idxAll[j];
01053 
01054     MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
01055          recvReqs + j);
01056   }
01057     }
01058 
01059   /* all processors send root their adjacency list */
01060   MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
01061 
01062   /* wait for comms to go through */
01063   if (myid_dh == 0)
01064     {
01065       MPI_Waitall (np_dh, recvReqs, statuses);
01066     }
01067   MPI_Wait (&sendReq, &status);
01068 
01069   /* root broadcasts assembled subdomain graph to all processors */
01070   MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
01071   MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
01072 
01073   if (idxAll != NULL)
01074     {
01075       FREE_DH (idxAll);
01076       CHECK_V_ERROR;
01077     }
01078   if (recvReqs != NULL)
01079     {
01080       FREE_DH (recvReqs);
01081       CHECK_V_ERROR;
01082     }
01083   if (statuses != NULL)
01084     {
01085       FREE_DH (statuses);
01086       CHECK_V_ERROR;
01087     }
01088 
01089 END_FUNC_DH}
01090 
01091 /* this is ugly and inefficient; but seq mode is primarily
01092    for debugging and testing, so there.
01093 */
01094 #undef __FUNC__
01095 #define __FUNC__ "form_subdomaingraph_seq_private"
01096 void
01097 form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, void *A)
01098 {
01099   START_FUNC_DH int *dense, i, j, row, blocks = s->blocks;
01100   int *cval, len, *adj;
01101   int idx = 0, *ptrs = s->ptrs;
01102 
01103   /* allocate storage for adj[]; since this function is intended
01104      for debugging/testing, and the number of blocks should be
01105      relatively small, we'll punt and allocate the maximum
01106      possibly needed.
01107    */
01108   adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01109   CHECK_V_ERROR;
01110 
01111   dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01112   CHECK_V_ERROR;
01113   for (i = 0; i < blocks * blocks; ++i)
01114     dense[i] = 0;
01115 
01116   /* loop over each block's rows to identify all boundary nodes */
01117   for (i = 0; i < blocks; ++i)
01118     {
01119       int beg_row = s->beg_row[i];
01120       int end_row = beg_row + s->row_count[i];
01121 
01122       for (row = beg_row; row < end_row; ++row)
01123   {
01124     EuclidGetRow (A, row, &len, &cval, NULL);
01125     CHECK_V_ERROR;
01126     for (j = 0; j < len; ++j)
01127       {
01128         int col = cval[j];
01129         if (col < beg_row || col >= end_row)
01130     {
01131       int owner = SubdomainGraph_dhFindOwner (s, col, false);
01132       CHECK_V_ERROR;
01133       dense[i * blocks + owner] = 1;
01134       dense[owner * blocks + i] = 1;
01135     }
01136       }
01137     EuclidRestoreRow (A, row, &len, &cval, NULL);
01138     CHECK_V_ERROR;
01139   }
01140     }
01141 
01142   /* form sparse csr representation of subdomain graph
01143      from dense representation
01144    */
01145   ptrs[0] = 0;
01146   for (i = 0; i < blocks; ++i)
01147     {
01148       for (j = 0; j < blocks; ++j)
01149   {
01150     if (dense[i * blocks + j])
01151       {
01152         adj[idx++] = j;
01153       }
01154   }
01155       ptrs[i + 1] = idx;
01156     }
01157 
01158   FREE_DH (dense);
01159   CHECK_V_ERROR;
01160 END_FUNC_DH}
01161 
01162 
01163 #undef __FUNC__
01164 #define __FUNC__ "find_all_neighbors_sym_private"
01165 void
01166 find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, void *A)
01167 {
01168   START_FUNC_DH int *marker, i, j, beg_row, end_row;
01169   int row, len, *cval, ct = 0;
01170   int *nabors = s->allNabors;
01171 
01172   marker = (int *) MALLOC_DH (m * sizeof (int));
01173   CHECK_V_ERROR;
01174   for (i = 0; i < m; ++i)
01175     marker[i] = 0;
01176 
01177   SET_INFO
01178     ("finding nabors in subdomain graph for structurally symmetric matrix");
01179   SET_INFO ("(if this isn't what you want, use '-sym 0' switch)");
01180 
01181   beg_row = s->beg_row[myid_dh];
01182   end_row = beg_row + s->row_count[myid_dh];
01183 
01184   for (row = beg_row; row < end_row; ++row)
01185     {
01186       EuclidGetRow (A, row, &len, &cval, NULL);
01187       CHECK_V_ERROR;
01188       for (j = 0; j < len; ++j)
01189   {
01190     int col = cval[j];
01191     if (col < beg_row || col >= end_row)
01192       {
01193         int owner = SubdomainGraph_dhFindOwner (s, col, false);
01194         CHECK_V_ERROR;
01195         if (!marker[owner])
01196     {
01197       marker[owner] = 1;
01198       nabors[ct++] = owner;
01199     }
01200       }
01201   }
01202       EuclidRestoreRow (A, row, &len, &cval, NULL);
01203       CHECK_V_ERROR;
01204     }
01205   s->allCount = ct;
01206 
01207 /* fprintf(logFile, "@@@@@ allCount= %i\n", ct); */
01208 
01209   if (marker != NULL)
01210     {
01211       FREE_DH (marker);
01212       CHECK_V_ERROR;
01213     }
01214 END_FUNC_DH}
01215 
01216 #undef __FUNC__
01217 #define __FUNC__ "find_all_neighbors_unsym_private"
01218 void
01219 find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, void *A)
01220 {
01221   START_FUNC_DH int i, j, row, beg_row, end_row;
01222   int *marker;
01223   int *cval, len, idx = 0;
01224   int nz, *nabors = s->allNabors, *myNabors;
01225 
01226   myNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
01227   CHECK_V_ERROR;
01228   marker = (int *) MALLOC_DH (np_dh * sizeof (int));
01229   CHECK_V_ERROR;
01230   for (i = 0; i < np_dh; ++i)
01231     marker[i] = 0;
01232 
01233   SET_INFO
01234     ("finding nabors in subdomain graph for structurally unsymmetric matrix");
01235 
01236   /* loop over this block's boundary rows, finding all nabors in
01237      subdomain graph
01238    */
01239   beg_row = s->beg_row[myid_dh];
01240   end_row = beg_row + s->row_count[myid_dh];
01241 
01242 
01243 
01244   /*for each locally owned row ...   */
01245   for (row = beg_row; row < end_row; ++row)
01246     {
01247       EuclidGetRow (A, row, &len, &cval, NULL);
01248       CHECK_V_ERROR;
01249       for (j = 0; j < len; ++j)
01250   {
01251     int col = cval[j];
01252     /*for each column that corresponds to a non-locally owned row ...  */
01253     if (col < beg_row || col >= end_row)
01254       {
01255         int owner = SubdomainGraph_dhFindOwner (s, col, false);
01256         CHECK_V_ERROR;
01257         /*if I've not yet done so ...   */
01258         if (!marker[owner])
01259     {
01260       marker[owner] = 1;
01261       /*append the non-local row's owner in to the list of my nabors
01262          in the subdomain graph     */
01263       myNabors[idx++] = owner;
01264     }
01265       }
01266   }
01267       EuclidRestoreRow (A, row, &len, &cval, NULL);
01268       CHECK_V_ERROR;
01269     }
01270 
01271   /*
01272      at this point, idx = the number of my neighbors in the subdomain
01273      graph; equivalently, idx is the number of meaningfull slots in
01274      the myNabors array.  -dah 1/31/06
01275    */
01276 
01277   /*
01278      at this point: marker[j] = 0 indicates that processor j is NOT my nabor
01279      marker[j] = 1 indicates that processor j IS my nabor
01280      however, there may be some nabors that can't be discovered in the above loop
01281      "//for each locally owned row;" this can happen if the matrix is
01282      structurally unsymmetric.
01283      -dah 1/31/06
01284    */
01285 
01286 /* fprintf(stderr, "[%i] marker: ", myid_dh);
01287 for (j=0; j<np_dh; j++) {
01288   fprintf(stderr, "[%i] (j=%d) %d\n", myid_dh, j,  marker[j]);
01289 }
01290 fprintf(stderr, "\n");
01291 */
01292 
01293   /* find out who my neighbors are that I cannot discern locally */
01294   MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
01295   CHECK_V_ERROR;
01296 
01297   /* add in neighbors that I know about from scanning my adjacency lists */
01298   for (i = 0; i < idx; ++i)
01299     nabors[myNabors[i]] = 1;
01300 
01301   /* remove self from the adjacency list */
01302   nabors[myid_dh] = 0;
01303 
01304   /*
01305      at this point: marker[j] = 0 indicates that processor j is NOT my nabor
01306      marker[j] = 1 indicates that processor j IS my nabor
01307      and this is guaranteed to be complete.
01308    */
01309 
01310   /* form final list of neighboring processors */
01311   nz = 0;
01312   for (i = 0; i < np_dh; ++i)
01313     {
01314       if (nabors[i])
01315   myNabors[nz++] = i;
01316     }
01317   s->allCount = nz;
01318   memcpy (nabors, myNabors, nz * sizeof (int));
01319 
01320   if (marker != NULL)
01321     {
01322       FREE_DH (marker);
01323       CHECK_V_ERROR;
01324     }
01325   if (myNabors != NULL)
01326     {
01327       FREE_DH (myNabors);
01328       CHECK_V_ERROR;
01329     }
01330 END_FUNC_DH}
01331 
01332 /*================================================================*/
01333 
01334 #undef __FUNC__
01335 #define __FUNC__ "find_bdry_nodes_sym_private"
01336 void
01337 find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
01338            int *interiorNodes, int *bdryNodes,
01339            int *interiorCount, int *bdryCount)
01340 {
01341   START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01342   int end_row = beg_row + s->row_count[myid_dh];
01343   int row, inCt = 0, bdCt = 0;
01344 
01345   int j;
01346   int *cval;
01347 
01348   /* determine if the row is a boundary row */
01349   for (row = beg_row; row < end_row; ++row)
01350     {       /* for each row in the subdomain */
01351       bool isBdry = false;
01352       int len;
01353       EuclidGetRow (A, row, &len, &cval, NULL);
01354       CHECK_V_ERROR;
01355 
01356       for (j = 0; j < len; ++j)
01357   {     /* for each column in the row */
01358     int col = cval[j];
01359     if (col < beg_row || col >= end_row)
01360       {
01361         isBdry = true;
01362         break;
01363       }
01364   }
01365       EuclidRestoreRow (A, row, &len, &cval, NULL);
01366       CHECK_V_ERROR;
01367 
01368       if (isBdry)
01369   {
01370     bdryNodes[bdCt++] = row - beg_row;
01371   }
01372       else
01373   {
01374     interiorNodes[inCt++] = row - beg_row;
01375   }
01376     }
01377 
01378   *interiorCount = inCt;
01379   *bdryCount = bdCt;
01380 
01381 END_FUNC_DH}
01382 
01383 #define BDRY_NODE_TAG 42
01384 
01385 #undef __FUNC__
01386 #define __FUNC__ "find_bdry_nodes_unsym_private"
01387 void
01388 find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, void *A,
01389              int *interiorNodes, int *boundaryNodes,
01390              int *interiorCount, int *bdryCount)
01391 {
01392   START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01393   int end_row = beg_row + s->row_count[myid_dh];
01394   int i, j, row, max;
01395   int *cval;
01396   int *list, count;
01397   int *rpIN = NULL, *rpOUT = NULL;
01398   int *sendBuf, *recvBuf;
01399   int *marker, inCt, bdCt;
01400   int *bdryNodes, nz;
01401   int sendCt, recvCt;
01402   MPI_Request *sendReq, *recvReq;
01403   MPI_Status *status;
01404   SortedSet_dh ss;
01405 
01406   SortedSet_dhCreate (&ss, m);
01407   CHECK_V_ERROR;
01408 
01409   /*-----------------------------------------------------
01410    * identify all boundary nodes possible using locally
01411    * owned adjacency lists
01412    *-----------------------------------------------------*/
01413   for (row = beg_row; row < end_row; ++row)
01414     {
01415       bool isBdry = false;
01416       int len;
01417       EuclidGetRow (A, row, &len, &cval, NULL);
01418       CHECK_V_ERROR;
01419 
01420       for (j = 0; j < len; ++j)
01421   {
01422     int col = cval[j];
01423     if (col < beg_row || col >= end_row)
01424       {
01425         isBdry = true;  /* this row is a boundary node */
01426         SortedSet_dhInsert (ss, col);
01427         CHECK_V_ERROR;
01428         /* the row "col" is also a boundary node */
01429       }
01430   }
01431       EuclidRestoreRow (A, row, &len, &cval, NULL);
01432       CHECK_V_ERROR;
01433 
01434       if (isBdry)
01435   {
01436     SortedSet_dhInsert (ss, row);
01437     CHECK_V_ERROR;
01438   }
01439     }
01440 
01441   /*-----------------------------------------------------
01442    * scan the sorted list to determine what boundary
01443    * node information to send to whom
01444    *-----------------------------------------------------*/
01445   sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01446   CHECK_V_ERROR;
01447   recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01448   CHECK_V_ERROR;
01449   rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01450   CHECK_V_ERROR;
01451   rpOUT[0] = 0;
01452   for (i = 0; i < np_dh; ++i)
01453     sendBuf[i] = 0;
01454 
01455   sendCt = 0;     /* total number of processor to whom we will send lists */
01456   SortedSet_dhGetList (ss, &list, &count);
01457   CHECK_V_ERROR;
01458 
01459   for (i = 0; i < count; /* i is set below */ )
01460     {
01461       int node = list[i];
01462       int owner;
01463       int last;
01464 
01465       owner = SubdomainGraph_dhFindOwner (s, node, false);
01466       CHECK_V_ERROR;
01467       last = s->beg_row[owner] + s->row_count[owner];
01468 
01469       /* determine the other boundary nodes that belong to owner */
01470       while ((i < count) && (list[i] < last))
01471   ++i;
01472       ++sendCt;
01473       rpOUT[sendCt] = i;
01474       sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
01475 
01476     }
01477 
01478   /*-----------------------------------------------------
01479    * processors tell each other how much information
01480    * each will send to whom
01481    *-----------------------------------------------------*/
01482   MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
01483   CHECK_V_ERROR;
01484 
01485   /*-----------------------------------------------------
01486    * exchange boundary node information
01487    * (note that we also exchange information with ourself!)
01488    *-----------------------------------------------------*/
01489 
01490   /* first, set up data structures to hold incoming information */
01491   rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01492   CHECK_V_ERROR;
01493   rpIN[0] = 0;
01494   nz = 0;
01495   recvCt = 0;
01496   for (i = 0; i < np_dh; ++i)
01497     {
01498       if (recvBuf[i])
01499   {
01500     ++recvCt;
01501     nz += recvBuf[i];
01502     rpIN[recvCt] = nz;
01503   }
01504     }
01505   bdryNodes = (int *) MALLOC_DH (nz * sizeof (int));
01506   CHECK_V_ERROR;
01507   sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request));
01508   CHECK_V_ERROR;
01509   recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request));
01510   CHECK_V_ERROR;
01511   max = MAX (sendCt, recvCt);
01512   status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status));
01513   CHECK_V_ERROR;
01514 
01515   /* second, start receives for incoming data */
01516   j = 0;
01517   for (i = 0; i < np_dh; ++i)
01518     {
01519       if (recvBuf[i])
01520   {
01521     MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
01522          i, BDRY_NODE_TAG, comm_dh, recvReq + j);
01523     ++j;
01524   }
01525     }
01526 
01527   /* third, start sends for outgoing data */
01528   j = 0;
01529   for (i = 0; i < np_dh; ++i)
01530     {
01531       if (sendBuf[i])
01532   {
01533     MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
01534          i, BDRY_NODE_TAG, comm_dh, sendReq + j);
01535     ++j;
01536   }
01537     }
01538 
01539   /* fourth, wait for all comms to finish */
01540   MPI_Waitall (sendCt, sendReq, status);
01541   MPI_Waitall (recvCt, recvReq, status);
01542 
01543   /* fifth, convert from global to local indices */
01544   for (i = 0; i < nz; ++i)
01545     bdryNodes[i] -= beg_row;
01546 
01547   /*-----------------------------------------------------
01548    * consolidate information from all processors to
01549    * identify all local boundary nodes
01550    *-----------------------------------------------------*/
01551   marker = (int *) MALLOC_DH (m * sizeof (int));
01552   CHECK_V_ERROR;
01553   for (i = 0; i < m; ++i)
01554     marker[i] = 0;
01555   for (i = 0; i < nz; ++i)
01556     marker[bdryNodes[i]] = 1;
01557 
01558   inCt = bdCt = 0;
01559   for (i = 0; i < m; ++i)
01560     {
01561       if (marker[i])
01562   {
01563     boundaryNodes[bdCt++] = i;
01564   }
01565       else
01566   {
01567     interiorNodes[inCt++] = i;
01568   }
01569     }
01570   *interiorCount = inCt;
01571   *bdryCount = bdCt;
01572 
01573   /*-----------------------------------------------------
01574    * clean up
01575    *-----------------------------------------------------*/
01576   SortedSet_dhDestroy (ss);
01577   CHECK_V_ERROR;
01578   if (rpIN != NULL)
01579     {
01580       FREE_DH (rpIN);
01581       CHECK_V_ERROR;
01582     }
01583   if (rpOUT != NULL)
01584     {
01585       FREE_DH (rpOUT);
01586       CHECK_V_ERROR;
01587     }
01588   if (sendBuf != NULL)
01589     {
01590       FREE_DH (sendBuf);
01591       CHECK_V_ERROR;
01592     }
01593   if (recvBuf != NULL)
01594     {
01595       FREE_DH (recvBuf);
01596       CHECK_V_ERROR;
01597     }
01598   if (bdryNodes != NULL)
01599     {
01600       FREE_DH (bdryNodes);
01601       CHECK_V_ERROR;
01602     }
01603   if (marker != NULL)
01604     {
01605       FREE_DH (marker);
01606       CHECK_V_ERROR;
01607     }
01608   if (sendReq != NULL)
01609     {
01610       FREE_DH (sendReq);
01611       CHECK_V_ERROR;
01612     }
01613   if (recvReq != NULL)
01614     {
01615       FREE_DH (recvReq);
01616       CHECK_V_ERROR;
01617     }
01618   if (status != NULL)
01619     {
01620       FREE_DH (status);
01621       CHECK_V_ERROR;
01622     }
01623 END_FUNC_DH}
01624 
01625 
01626 #undef __FUNC__
01627 #define __FUNC__ "find_ordered_neighbors_private"
01628 void
01629 find_ordered_neighbors_private (SubdomainGraph_dh s)
01630 {
01631   START_FUNC_DH int *loNabors = s->loNabors;
01632   int *hiNabors = s->hiNabors;
01633   int *allNabors = s->allNabors, allCount = s->allCount;
01634   int loCt = 0, hiCt = 0;
01635   int *o2n = s->o2n_sub;
01636   int i, myNewId = o2n[myid_dh];
01637 
01638   for (i = 0; i < allCount; ++i)
01639     {
01640       int nabor = allNabors[i];
01641       if (o2n[nabor] < myNewId)
01642   {
01643     loNabors[loCt++] = nabor;
01644   }
01645       else
01646   {
01647     hiNabors[hiCt++] = nabor;
01648   }
01649     }
01650 
01651   s->loCount = loCt;
01652   s->hiCount = hiCt;
01653 END_FUNC_DH}
01654 
01655 
01656 #undef __FUNC__
01657 #define __FUNC__ "color_subdomain_graph_private"
01658 void
01659 color_subdomain_graph_private (SubdomainGraph_dh s)
01660 {
01661   START_FUNC_DH int i, n = np_dh;
01662   int *rp = s->ptrs, *cval = s->adj;
01663   int j, *marker, thisNodesColor, *colorCounter;
01664   int *o2n = s->o2n_sub;
01665   int *color = s->colorVec;
01666 
01667   if (np_dh == 1)
01668     n = s->blocks;
01669 
01670   marker = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01671   colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01672   for (i = 0; i <= n; ++i)
01673     {
01674       marker[i] = -1;
01675       colorCounter[i] = 0;
01676     }
01677 
01678   /*------------------------------------------------------------------
01679    * color the nodes
01680    *------------------------------------------------------------------*/
01681   for (i = 0; i < n; ++i)
01682     {       /* color node "i" */
01683       /* mark colors of "i"s nabors as unavailable;
01684          only need to mark nabors that are (per the input ordering)
01685          numbered less than "i."
01686        */
01687       for (j = rp[i]; j < rp[i + 1]; ++j)
01688   {
01689     int nabor = cval[j];
01690     if (nabor < i)
01691       {
01692         int naborsColor = color[nabor];
01693         marker[naborsColor] = i;
01694       }
01695   }
01696 
01697       /* assign vertex i the "smallest" possible color */
01698       thisNodesColor = -1;
01699       for (j = 0; j < n; ++j)
01700   {
01701     if (marker[j] != i)
01702       {
01703         thisNodesColor = j;
01704         break;
01705       }
01706   }
01707       color[i] = thisNodesColor;
01708       colorCounter[1 + thisNodesColor] += 1;
01709     }
01710 
01711   /*------------------------------------------------------------------
01712    * build ordering vector; if two nodes are similarly colored,
01713    * they will have the same relative ordering as before.
01714    *------------------------------------------------------------------*/
01715   /* prefix-sum to find lowest-numbered node for each color */
01716   for (i = 1; i < n; ++i)
01717     {
01718       if (colorCounter[i] == 0)
01719   break;
01720       colorCounter[i] += colorCounter[i - 1];
01721     }
01722 
01723   for (i = 0; i < n; ++i)
01724     {
01725       o2n[i] = colorCounter[color[i]];
01726       colorCounter[color[i]] += 1;
01727     }
01728 
01729   /* invert permutation */
01730   invert_perm (n, s->o2n_sub, s->n2o_sub);
01731   CHECK_V_ERROR;
01732 
01733 
01734   /*------------------------------------------------------------------
01735    * count the number of colors used
01736    *------------------------------------------------------------------*/
01737   {
01738     int ct = 0;
01739     for (j = 0; j < n; ++j)
01740       {
01741   if (marker[j] == -1)
01742     break;
01743   ++ct;
01744       }
01745     s->colors = ct;
01746   }
01747 
01748 
01749   /*------------------------------------------------------------------
01750    * (re)build the beg_rowP array
01751    *------------------------------------------------------------------*/
01752   {
01753     int sum = 0;
01754     for (i = 0; i < n; ++i)
01755       {
01756   int old = s->n2o_sub[i];
01757   s->beg_rowP[old] = sum;
01758   sum += s->row_count[old];
01759       }
01760   }
01761 
01762   FREE_DH (marker);
01763   CHECK_V_ERROR;
01764   FREE_DH (colorCounter);
01765   CHECK_V_ERROR;
01766 END_FUNC_DH}
01767 
01768 
01769 #undef __FUNC__
01770 #define __FUNC__ "SubdomainGraph_dhDump"
01771 void
01772 SubdomainGraph_dhDump (SubdomainGraph_dh s, char *filename)
01773 {
01774   START_FUNC_DH int i;
01775   int sCt = np_dh;
01776   FILE *fp;
01777 
01778   if (np_dh == 1)
01779     sCt = s->blocks;
01780 
01781 
01782   /* ---------------------------------------------------------
01783    *  for seq and par runs, 1st processor prints information
01784    *  that is common to all processors
01785    * ---------------------------------------------------------*/
01786   fp = openFile_dh (filename, "w");
01787   CHECK_V_ERROR;
01788 
01789   /* write subdomain ordering permutations */
01790   fprintf (fp, "----- colors used\n");
01791   fprintf (fp, "%i\n", s->colors);
01792   if (s->colorVec == NULL)
01793     {
01794       fprintf (fp, "s->colorVec == NULL\n");
01795     }
01796   else
01797     {
01798       fprintf (fp, "----- colorVec\n");
01799       for (i = 0; i < sCt; ++i)
01800   {
01801     fprintf (fp, "%i ", s->colorVec[i]);
01802   }
01803       fprintf (fp, "\n");
01804     }
01805 
01806   if (s->o2n_sub == NULL || s->o2n_sub == NULL)
01807     {
01808       fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n");
01809     }
01810   else
01811     {
01812       fprintf (fp, "----- o2n_sub\n");
01813       for (i = 0; i < sCt; ++i)
01814   {
01815     fprintf (fp, "%i ", s->o2n_sub[i]);
01816   }
01817       fprintf (fp, "\n");
01818       fprintf (fp, "----- n2o_sub\n");
01819       for (i = 0; i < sCt; ++i)
01820   {
01821     fprintf (fp, "%i ", s->n2o_sub[i]);
01822   }
01823       fprintf (fp, "\n");
01824     }
01825 
01826   /* write begin row arrays */
01827   if (s->beg_row == NULL || s->beg_rowP == NULL)
01828     {
01829       fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n");
01830     }
01831   else
01832     {
01833       fprintf (fp, "----- beg_row\n");
01834       for (i = 0; i < sCt; ++i)
01835   {
01836     fprintf (fp, "%i ", 1 + s->beg_row[i]);
01837   }
01838       fprintf (fp, "\n");
01839       fprintf (fp, "----- beg_rowP\n");
01840       for (i = 0; i < sCt; ++i)
01841   {
01842     fprintf (fp, "%i ", 1 + s->beg_rowP[i]);
01843   }
01844       fprintf (fp, "\n");
01845     }
01846 
01847   /* write row count  and bdry count arrays */
01848   if (s->row_count == NULL || s->bdry_count == NULL)
01849     {
01850       fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n");
01851     }
01852   else
01853     {
01854       fprintf (fp, "----- row_count\n");
01855       for (i = 0; i < sCt; ++i)
01856   {
01857     fprintf (fp, "%i ", s->row_count[i]);
01858   }
01859       fprintf (fp, "\n");
01860       fprintf (fp, "----- bdry_count\n");
01861       for (i = 0; i < sCt; ++i)
01862   {
01863     fprintf (fp, "%i ", s->bdry_count[i]);
01864   }
01865       fprintf (fp, "\n");
01866 
01867     }
01868 
01869   /* write subdomain graph */
01870   if (s->ptrs == NULL || s->adj == NULL)
01871     {
01872       fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n");
01873     }
01874   else
01875     {
01876       int j;
01877       int ct;
01878       fprintf (fp, "----- subdomain graph\n");
01879       for (i = 0; i < sCt; ++i)
01880   {
01881     fprintf (fp, "%i :: ", i);
01882     ct = s->ptrs[i + 1] - s->ptrs[i];
01883     if (ct)
01884       {
01885         shellSort_int (ct, s->adj + s->ptrs[i]);
01886         CHECK_V_ERROR;
01887       }
01888     for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
01889       {
01890         fprintf (fp, "%i ", s->adj[j]);
01891       }
01892     fprintf (fp, "\n");
01893   }
01894     }
01895   closeFile_dh (fp);
01896   CHECK_V_ERROR;
01897 
01898   /* ---------------------------------------------------------
01899    *  next print info that differs across processors for par
01900    *  trials.  deal with this as two cases: seq and par
01901    * ---------------------------------------------------------*/
01902   if (s->beg_rowP == NULL)
01903     {
01904       SET_V_ERROR ("s->beg_rowP == NULL; can't continue");
01905     }
01906   if (s->row_count == NULL)
01907     {
01908       SET_V_ERROR ("s->row_count == NULL; can't continue");
01909     }
01910   if (s->o2n_sub == NULL)
01911     {
01912       SET_V_ERROR ("s->o2n_sub == NULL; can't continue");
01913     }
01914 
01915 
01916   if (np_dh == 1)
01917     {
01918       fp = openFile_dh (filename, "a");
01919       CHECK_V_ERROR;
01920 
01921       /* write n2o_row  and o2n_col */
01922       if (s->n2o_row == NULL || s->o2n_col == NULL)
01923   {
01924     fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n");
01925   }
01926       else
01927   {
01928     fprintf (fp, "----- n2o_row\n");
01929     for (i = 0; i < s->m; ++i)
01930       {
01931         fprintf (fp, "%i ", 1 + s->n2o_row[i]);
01932       }
01933     fprintf (fp, "\n");
01934 
01935 #if 0
01936 /*
01937 note: this won't match the parallel case, since
01938       parallel permutation vecs are zero-based and purely
01939       local
01940 */
01941 
01942     fprintf (fp, "----- o2n_col\n");
01943     for (i = 0; i < sCt; ++i)
01944       {
01945         int br = s->beg_row[i];
01946         int er = br + s->row_count[i];
01947 
01948         for (j = br; j < er; ++j)
01949     {
01950       fprintf (fp, "%i ", 1 + s->o2n_col[j]);
01951     }
01952         fprintf (fp, "\n");
01953       }
01954     fprintf (fp, "\n");
01955 
01956 #endif
01957 
01958   }
01959       closeFile_dh (fp);
01960       CHECK_V_ERROR;
01961     }
01962 
01963   /* parallel case */
01964   else
01965     {
01966       int id = s->n2o_sub[myid_dh];
01967       int m = s->m;
01968       int pe;
01969       int beg_row = 0;
01970       if (s->beg_row != 0)
01971   beg_row = s->beg_row[myid_dh];
01972 
01973       /* write n2o_row */
01974       for (pe = 0; pe < np_dh; ++pe)
01975   {
01976     MPI_Barrier (comm_dh);
01977     if (id == pe)
01978       {
01979         fp = openFile_dh (filename, "a");
01980         CHECK_V_ERROR;
01981         if (id == 0)
01982     fprintf (fp, "----- n2o_row\n");
01983 
01984         for (i = 0; i < m; ++i)
01985     {
01986       fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row);
01987     }
01988         if (id == np_dh - 1)
01989     fprintf (fp, "\n");
01990         closeFile_dh (fp);
01991         CHECK_V_ERROR;
01992       }
01993   }
01994 
01995 #if 0
01996 
01997       /* write o2n_col */
01998       for (pe = 0; pe < np_dh; ++pe)
01999   {
02000     MPI_Barrier (comm_dh);
02001     if (myid_dh == pe)
02002       {
02003         fp = openFile_dh (filename, "a");
02004         CHECK_V_ERROR;
02005         if (myid_dh == 0)
02006     fprintf (fp, "----- o2n_col\n");
02007 
02008         for (i = 0; i < m; ++i)
02009     {
02010       fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row);
02011     }
02012         fprintf (fp, "\n");
02013 
02014         if (myid_dh == np_dh - 1)
02015     fprintf (fp, "\n");
02016 
02017         closeFile_dh (fp);
02018         CHECK_V_ERROR;
02019       }
02020   }
02021 
02022 #endif
02023 
02024     }
02025 
02026 END_FUNC_DH}
02027 
02028 
02029 
02030 #undef __FUNC__
02031 #define __FUNC__ "find_bdry_nodes_seq_private"
02032 void
02033 find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A)
02034 {
02035   START_FUNC_DH int i, j, row, blocks = s->blocks;
02036   int *cval, *tmp;
02037 
02038   tmp = (int *) MALLOC_DH (m * sizeof (int));
02039   CHECK_V_ERROR;
02040   for (i = 0; i < m; ++i)
02041     tmp[i] = 0;
02042 
02043     /*------------------------------------------ 
02044      * mark all boundary nodes
02045      *------------------------------------------ */
02046   for (i = 0; i < blocks; ++i)
02047     {
02048       int beg_row = s->beg_row[i];
02049       int end_row = beg_row + s->row_count[i];
02050 
02051       for (row = beg_row; row < end_row; ++row)
02052   {
02053     bool isBdry = false;
02054     int len;
02055     EuclidGetRow (A, row, &len, &cval, NULL);
02056     CHECK_V_ERROR;
02057 
02058     for (j = 0; j < len; ++j)
02059       {     /* for each column in the row */
02060         int col = cval[j];
02061 
02062         if (col < beg_row || col >= end_row)
02063     {
02064       tmp[col] = 1;
02065       isBdry = true;
02066     }
02067       }
02068     if (isBdry)
02069       tmp[row] = 1;
02070     EuclidRestoreRow (A, row, &len, &cval, NULL);
02071     CHECK_V_ERROR;
02072   }
02073     }
02074 
02075     /*------------------------------------------
02076      * fill in the bdry_count[] array
02077      *------------------------------------------ */
02078   for (i = 0; i < blocks; ++i)
02079     {
02080       int beg_row = s->beg_row[i];
02081       int end_row = beg_row + s->row_count[i];
02082       int ct = 0;
02083       for (row = beg_row; row < end_row; ++row)
02084   {
02085     if (tmp[row])
02086       ++ct;
02087   }
02088       s->bdry_count[i] = ct;
02089     }
02090 
02091     /*------------------------------------------
02092      * form the o2n_col[] permutation
02093      *------------------------------------------ */
02094   for (i = 0; i < blocks; ++i)
02095     {
02096       int beg_row = s->beg_row[i];
02097       int end_row = beg_row + s->row_count[i];
02098       int interiorIDX = beg_row;
02099       int bdryIDX = end_row - s->bdry_count[i];
02100 
02101       for (row = beg_row; row < end_row; ++row)
02102   {
02103     if (tmp[row])
02104       {
02105         s->o2n_col[row] = bdryIDX++;
02106       }
02107     else
02108       {
02109         s->o2n_col[row] = interiorIDX++;
02110       }
02111   }
02112     }
02113 
02114   /* invert permutation */
02115   invert_perm (m, s->o2n_col, s->n2o_row);
02116   CHECK_V_ERROR;
02117   FREE_DH (tmp);
02118   CHECK_V_ERROR;
02119 
02120 END_FUNC_DH}
02121 
02122 #undef __FUNC__
02123 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
02124 void
02125 SubdomainGraph_dhPrintSubdomainGraph (SubdomainGraph_dh s, FILE * fp)
02126 {
02127   START_FUNC_DH if (myid_dh == 0)
02128     {
02129       int i, j;
02130 
02131       fprintf (fp,
02132          "\n-----------------------------------------------------\n");
02133       fprintf (fp, "SubdomainGraph, and coloring and ordering information\n");
02134       fprintf (fp, "-----------------------------------------------------\n");
02135       fprintf (fp, "colors used: %i\n", s->colors);
02136 
02137       fprintf (fp, "o2n ordering vector: ");
02138       for (i = 0; i < s->blocks; ++i)
02139   fprintf (fp, "%i ", s->o2n_sub[i]);
02140 
02141       fprintf (fp, "\ncoloring vector (node, color): \n");
02142       for (i = 0; i < s->blocks; ++i)
02143   fprintf (fp, "  %i, %i\n", i, s->colorVec[i]);
02144 
02145       fprintf (fp, "\n");
02146       fprintf (fp, "Adjacency lists:\n");
02147 
02148       for (i = 0; i < s->blocks; ++i)
02149   {
02150     fprintf (fp, "   P_%i :: ", i);
02151     for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
02152       {
02153         fprintf (fp, "%i ", s->adj[j]);
02154       }
02155     fprintf (fp, "\n");
02156   }
02157       fprintf (fp, "-----------------------------------------------------\n");
02158     }
02159 END_FUNC_DH}
02160 
02161 
02162 #undef __FUNC__
02163 #define __FUNC__ "adjust_matrix_perms_private"
02164 void
02165 adjust_matrix_perms_private (SubdomainGraph_dh s, int m)
02166 {
02167   START_FUNC_DH int i, j, blocks = s->blocks;
02168   int *o2n = s->o2n_col;
02169 
02170   for (i = 0; i < blocks; ++i)
02171     {
02172       int beg_row = s->beg_row[i];
02173       int end_row = beg_row + s->row_count[i];
02174       int adjust = s->beg_rowP[i] - s->beg_row[i];
02175       for (j = beg_row; j < end_row; ++j)
02176   o2n[j] += adjust;
02177     }
02178 
02179   invert_perm (m, s->o2n_col, s->n2o_row);
02180   CHECK_V_ERROR;
02181 END_FUNC_DH}
02182 
02183 #undef __FUNC__
02184 #define __FUNC__ "SubdomainGraph_dhPrintRatios"
02185 void
02186 SubdomainGraph_dhPrintRatios (SubdomainGraph_dh s, FILE * fp)
02187 {
02188   START_FUNC_DH int i;
02189   int blocks = np_dh;
02190   double ratio[25];
02191 
02192   if (myid_dh == 0)
02193     {
02194       if (np_dh == 1)
02195   blocks = s->blocks;
02196       if (blocks > 25)
02197   blocks = 25;
02198 
02199       fprintf (fp, "\n");
02200       fprintf (fp, "Subdomain interior/boundary node ratios\n");
02201       fprintf (fp, "---------------------------------------\n");
02202 
02203       /* compute ratios */
02204       for (i = 0; i < blocks; ++i)
02205   {
02206     if (s->bdry_count[i] == 0)
02207       {
02208         ratio[i] = -1;
02209       }
02210     else
02211       {
02212         ratio[i] =
02213     (double) (s->row_count[i] -
02214         s->bdry_count[i]) / (double) s->bdry_count[i];
02215       }
02216   }
02217 
02218       /* sort ratios */
02219       shellSort_float (blocks, ratio);
02220 
02221       /* print ratios */
02222       if (blocks <= 20)
02223   {     /* print all ratios */
02224     int j = 0;
02225     for (i = 0; i < blocks; ++i)
02226       {
02227         fprintf (fp, "%0.2g  ", ratio[i]);
02228         ++j;
02229         if (j == 10)
02230     {
02231       fprintf (fp, "\n");
02232     }
02233       }
02234     fprintf (fp, "\n");
02235   }
02236       else
02237   {     /* print 10 largest and 10 smallest ratios */
02238     fprintf (fp, "10 smallest ratios: ");
02239     for (i = 0; i < 10; ++i)
02240       {
02241         fprintf (fp, "%0.2g  ", ratio[i]);
02242       }
02243     fprintf (fp, "\n");
02244     fprintf (fp, "10 largest ratios:  ");
02245     {
02246       int start = blocks - 6, stop = blocks - 1;
02247       for (i = start; i < stop; ++i)
02248         {
02249     fprintf (fp, "%0.2g  ", ratio[i]);
02250         }
02251       fprintf (fp, "\n");
02252     }
02253   }
02254     }
02255 
02256 END_FUNC_DH}
02257 
02258 
02259 #undef __FUNC__
02260 #define __FUNC__ "SubdomainGraph_dhPrintStats"
02261 void
02262 SubdomainGraph_dhPrintStats (SubdomainGraph_dh sg, FILE * fp)
02263 {
02264   START_FUNC_DH double *timing = sg->timing;
02265 
02266   fprintf_dh (fp, "\nSubdomainGraph timing report\n");
02267   fprintf_dh (fp, "-----------------------------\n");
02268   fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]);
02269   fprintf_dh (fp, "  find neighbors in subdomain graph: %0.2f\n",
02270         timing[FIND_NABORS_SGT]);
02271   fprintf_dh (fp, "  locally order interiors and bdry:  %0.2f\n",
02272         timing[ORDER_BDRY_SGT]);
02273   fprintf_dh (fp, "  form and color subdomain graph:    %0.2f\n",
02274         timing[FORM_GRAPH_SGT]);
02275   fprintf_dh (fp, "  exchange bdry permutations:        %0.2f\n",
02276         timing[EXCHANGE_PERMS_SGT]);
02277   fprintf_dh (fp, "  everything else (should be small): %0.2f\n",
02278         timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
02279            timing[ORDER_BDRY_SGT] +
02280            timing[FORM_GRAPH_SGT] +
02281            timing[EXCHANGE_PERMS_SGT]));
02282 END_FUNC_DH}
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Generated on Wed Apr 13 10:05:35 2011 for Ifpack Package Browser (Single Doxygen Collection) by  doxygen 1.6.3