SubdomainGraph_dh.c

00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "SubdomainGraph_dh.h"
00031 #include "getRow_dh.h"
00032 #include "Mem_dh.h"
00033 #include "Parser_dh.h"
00034 #include "Hash_i_dh.h"
00035 #include "mat_dh_private.h"
00036 #include "io_dh.h"
00037 #include "SortedSet_dh.h"
00038 #include "shellSort_dh.h"
00039 
00040 
00041 /* for debugging only! */
00042 #include <unistd.h>
00043 
00044 static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj,
00045                   void *A);
00046 static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj,
00047                   void *A);
00048 /*
00049 static void partition_metis_private(SubdomainGraph_dh s, void *A);
00050 
00051   grep for same below!
00052 */
00053 static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m,
00054                       bool bj);
00055 static void form_subdomaingraph_mpi_private (SubdomainGraph_dh s);
00056 static void form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m,
00057                          void *A);
00058 static void find_all_neighbors_sym_private (SubdomainGraph_dh s, int m,
00059                         void *A);
00060 static void find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m,
00061                           void *A);
00062 static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
00063                      int *interiorNodes, int *bdryNodes,
00064                      int *interiorCount, int *bdryCount);
00065 static void find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m,
00066                        void *A, int *interiorNodes,
00067                        int *bdryNodes, int *interiorCount,
00068                        int *bdryCount);
00069 
00070 static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A);
00071   /* above also forms n2o[] and o2n[] */
00072 
00073 static void find_ordered_neighbors_private (SubdomainGraph_dh s);
00074 static void color_subdomain_graph_private (SubdomainGraph_dh s);
00075 static void adjust_matrix_perms_private (SubdomainGraph_dh s, int m);
00076 
00077 #undef __FUNC__
00078 #define __FUNC__ "SubdomainGraph_dhCreate"
00079 void
00080 SubdomainGraph_dhCreate (SubdomainGraph_dh * s)
00081 {
00082   START_FUNC_DH
00083     struct _subdomain_dh *tmp =
00084     (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh));
00085   CHECK_V_ERROR;
00086   *s = tmp;
00087 
00088   tmp->blocks = 1;
00089   tmp->ptrs = tmp->adj = NULL;
00090   tmp->colors = 1;
00091   tmp->colorVec = NULL;
00092   tmp->o2n_sub = tmp->n2o_sub = NULL;
00093   tmp->beg_row = tmp->beg_rowP = NULL;
00094   tmp->bdry_count = tmp->row_count = NULL;
00095   tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
00096   tmp->loCount = tmp->hiCount = tmp->allCount = 0;
00097 
00098   tmp->m = 0;
00099   tmp->n2o_row = tmp->o2n_col = NULL;
00100   tmp->o2n_ext = tmp->n2o_ext = NULL;
00101 
00102   tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor");
00103   tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph");
00104   {
00105     int i;
00106     for (i = 0; i < TIMING_BINS_SG; ++i)
00107       tmp->timing[i] = 0.0;
00108   }
00109 END_FUNC_DH}
00110 
00111 #undef __FUNC__
00112 #define __FUNC__ "SubdomainGraph_dhDestroy"
00113 void
00114 SubdomainGraph_dhDestroy (SubdomainGraph_dh s)
00115 {
00116   START_FUNC_DH if (s->ptrs != NULL)
00117     {
00118       FREE_DH (s->ptrs);
00119       CHECK_V_ERROR;
00120     }
00121   if (s->adj != NULL)
00122     {
00123       FREE_DH (s->adj);
00124       CHECK_V_ERROR;
00125     }
00126   if (s->colorVec != NULL)
00127     {
00128       FREE_DH (s->colorVec);
00129       CHECK_V_ERROR;
00130     }
00131   if (s->o2n_sub != NULL)
00132     {
00133       FREE_DH (s->o2n_sub);
00134       CHECK_V_ERROR;
00135     }
00136   if (s->n2o_sub != NULL)
00137     {
00138       FREE_DH (s->n2o_sub);
00139       CHECK_V_ERROR;
00140     }
00141 
00142   if (s->beg_row != NULL)
00143     {
00144       FREE_DH (s->beg_row);
00145       CHECK_V_ERROR;
00146     }
00147   if (s->beg_rowP != NULL)
00148     {
00149       FREE_DH (s->beg_rowP);
00150       CHECK_V_ERROR;
00151     }
00152   if (s->row_count != NULL)
00153     {
00154       FREE_DH (s->row_count);
00155       CHECK_V_ERROR;
00156     }
00157   if (s->bdry_count != NULL)
00158     {
00159       FREE_DH (s->bdry_count);
00160       CHECK_V_ERROR;
00161     }
00162   if (s->loNabors != NULL)
00163     {
00164       FREE_DH (s->loNabors);
00165       CHECK_V_ERROR;
00166     }
00167   if (s->hiNabors != NULL)
00168     {
00169       FREE_DH (s->hiNabors);
00170       CHECK_V_ERROR;
00171     }
00172   if (s->allNabors != NULL)
00173     {
00174       FREE_DH (s->allNabors);
00175       CHECK_V_ERROR;
00176     }
00177 
00178   if (s->n2o_row != NULL)
00179     {
00180       FREE_DH (s->n2o_row);
00181       CHECK_V_ERROR;
00182     }
00183   if (s->o2n_col != NULL)
00184     {
00185       FREE_DH (s->o2n_col);
00186       CHECK_V_ERROR;
00187     }
00188   if (s->o2n_ext != NULL)
00189     {
00190       Hash_i_dhDestroy (s->o2n_ext);
00191       CHECK_V_ERROR;
00192     }
00193   if (s->n2o_ext != NULL)
00194     {
00195       Hash_i_dhDestroy (s->n2o_ext);
00196       CHECK_V_ERROR;
00197     }
00198   FREE_DH (s);
00199   CHECK_V_ERROR;
00200 END_FUNC_DH}
00201 
00202 #undef __FUNC__
00203 #define __FUNC__ "SubdomainGraph_dhInit"
00204 void
00205 SubdomainGraph_dhInit (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00206 {
00207   START_FUNC_DH double t1 = MPI_Wtime ();
00208 
00209   if (blocks < 1)
00210     blocks = 1;
00211 
00212   if (np_dh == 1 || blocks > 1)
00213     {
00214       s->blocks = blocks;
00215       init_seq_private (s, blocks, bj, A);
00216       CHECK_V_ERROR;
00217     }
00218   else
00219     {
00220       s->blocks = np_dh;
00221       init_mpi_private (s, np_dh, bj, A);
00222       CHECK_V_ERROR;
00223     }
00224 
00225   s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
00226 END_FUNC_DH}
00227 
00228 
00229 #undef __FUNC__
00230 #define __FUNC__ "SubdomainGraph_dhFindOwner"
00231 int
00232 SubdomainGraph_dhFindOwner (SubdomainGraph_dh s, int idx, bool permuted)
00233 {
00234   START_FUNC_DH int sd;
00235   int *beg_row = s->beg_row;
00236   int *row_count = s->row_count;
00237   int owner = -1, blocks = s->blocks;
00238 
00239   if (permuted)
00240     beg_row = s->beg_rowP;
00241 
00242   /* determine the subdomain that contains "idx" */
00243   for (sd = 0; sd < blocks; ++sd)
00244     {
00245       if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
00246     {
00247       owner = sd;
00248       break;
00249     }
00250     }
00251 
00252   if (owner == -1)
00253     {
00254 
00255       fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx);
00256       fprintf (stderr, "blocks= %i\n", blocks);
00257 
00258       sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx);
00259       SET_ERROR (-1, msgBuf_dh);
00260     }
00261 
00262 END_FUNC_VAL (owner)}
00263 
00264 
00265 #undef __FUNC__
00266 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
00267 void
00268 SubdomainGraph_dhPrintStatsLong (SubdomainGraph_dh s, FILE * fp)
00269 {
00270   START_FUNC_DH int i, j, k;
00271   double max = 0, min = INT_MAX;
00272 
00273   fprintf (fp,
00274        "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
00275   fprintf (fp, "colors used     = %i\n", s->colors);
00276   fprintf (fp, "subdomain count = %i\n", s->blocks);
00277 
00278 
00279   fprintf (fp, "\ninterior/boundary node ratios:\n");
00280 
00281   for (i = 0; i < s->blocks; ++i)
00282     {
00283       int inNodes = s->row_count[i] - s->bdry_count[i];
00284       int bdNodes = s->bdry_count[i];
00285       double ratio;
00286 
00287       if (bdNodes == 0)
00288     {
00289       ratio = -1;
00290     }
00291       else
00292     {
00293       ratio = (double) inNodes / (double) bdNodes;
00294     }
00295 
00296       max = MAX (max, ratio);
00297       min = MIN (min, ratio);
00298       fprintf (fp,
00299            "   P_%i: first= %3i  rowCount= %3i  interior= %3i  bdry= %3i  ratio= %0.1f\n",
00300            i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
00301            ratio);
00302     }
00303 
00304 
00305   fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max);
00306   fprintf (fp, "min interior/bdry ratio = %.1f\n", min);
00307 
00308 
00309     /*-----------------------------------------
00310      * subdomain graph
00311      *-----------------------------------------*/
00312   if (s->adj != NULL)
00313     {
00314       fprintf (fp, "\nunpermuted subdomain graph: \n");
00315       for (i = 0; i < s->blocks; ++i)
00316     {
00317       fprintf (fp, "%i :: ", i);
00318       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
00319         {
00320           fprintf (fp, "%i  ", s->adj[j]);
00321         }
00322       fprintf (fp, "\n");
00323     }
00324     }
00325 
00326 
00327     /*-----------------------------------------
00328      * subdomain permutation
00329      *-----------------------------------------*/
00330   fprintf (fp, "\no2n subdomain permutation:\n");
00331   for (i = 0; i < s->blocks; ++i)
00332     {
00333       fprintf (fp, "  %i %i\n", i, s->o2n_sub[i]);
00334     }
00335   fprintf (fp, "\n");
00336 
00337   if (np_dh > 1)
00338     {
00339 
00340     /*-----------------------------------------
00341      * local n2o_row permutation
00342      *-----------------------------------------*/
00343       fprintf (fp, "\nlocal n2o_row permutation:\n   ");
00344       for (i = 0; i < s->row_count[myid_dh]; ++i)
00345     {
00346       fprintf (fp, "%i ", s->n2o_row[i]);
00347     }
00348       fprintf (fp, "\n");
00349 
00350     /*-----------------------------------------
00351      * local n2o permutation
00352      *-----------------------------------------*/
00353       fprintf (fp, "\nlocal o2n_col permutation:\n   ");
00354       for (i = 0; i < s->row_count[myid_dh]; ++i)
00355     {
00356       fprintf (fp, "%i ", s->o2n_col[i]);
00357     }
00358       fprintf (fp, "\n");
00359 
00360     }
00361   else
00362     {
00363     /*-----------------------------------------
00364      * local n2o_row permutation 
00365      *-----------------------------------------*/
00366       fprintf (fp, "\nlocal n2o_row permutation:\n");
00367       fprintf (fp, "--------------------------\n");
00368       for (k = 0; k < s->blocks; ++k)
00369     {
00370       int beg_row = s->beg_row[k];
00371       int end_row = beg_row + s->row_count[k];
00372 
00373       for (i = beg_row; i < end_row; ++i)
00374         {
00375           fprintf (fp, "%i ", s->n2o_row[i]);
00376         }
00377       fprintf (fp, "\n");
00378     }
00379 
00380     /*-----------------------------------------
00381      * local n2o permutation
00382      *-----------------------------------------*/
00383       fprintf (fp, "\nlocal o2n_col permutation:\n");
00384       fprintf (fp, "--------------------------\n");
00385       for (k = 0; k < s->blocks; ++k)
00386     {
00387       int beg_row = s->beg_row[k];
00388       int end_row = beg_row + s->row_count[k];
00389 
00390       for (i = beg_row; i < end_row; ++i)
00391         {
00392           fprintf (fp, "%i ", s->o2n_col[i]);
00393         }
00394       fprintf (fp, "\n");
00395     }
00396 
00397 
00398     }
00399 
00400 END_FUNC_DH}
00401 
00402 #undef __FUNC__
00403 #define __FUNC__ "init_seq_private"
00404 void
00405 init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00406 {
00407   START_FUNC_DH int m, n, beg_row;
00408   double t1;
00409 
00410   /*-------------------------------------------------------
00411    * get number of local rows (m), global rows (n), and
00412    * global numbering of first locally owned row
00413    * (for sequential, beg_row=0 and m == n
00414    *-------------------------------------------------------*/
00415   EuclidGetDimensions (A, &beg_row, &m, &n);
00416   CHECK_V_ERROR;
00417   s->m = n;
00418 
00419   /*-------------------------------------------------------
00420    * allocate storage for all data structures
00421    * EXCEPT s->adj and hash tables.
00422    * (but note that hash tables aren't used for sequential)
00423    *-------------------------------------------------------*/
00424   allocate_storage_private (s, blocks, m, bj);
00425   CHECK_V_ERROR;
00426 
00427   /*-------------------------------------------------------------
00428    * Fill in: beg_row[]
00429    *          beg_rowP[]
00430    *          row_count[]
00431    * At this point, beg_rowP[] is a copy of beg_row[])
00432    *-------------------------------------------------------------*/
00433   {
00434     int i;
00435     int rpp = m / blocks;
00436 
00437     if (rpp * blocks < m)
00438       ++rpp;
00439 
00440     s->beg_row[0] = 0;
00441     for (i = 1; i < blocks; ++i)
00442       s->beg_row[i] = rpp + s->beg_row[i - 1];
00443     for (i = 0; i < blocks; ++i)
00444       s->row_count[i] = rpp;
00445     s->row_count[blocks - 1] = m - rpp * (blocks - 1);
00446   }
00447   memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int));
00448 
00449 
00450   /*-----------------------------------------------------------------
00451    * Find all neighboring processors in subdomain graph.
00452    * This block fills in: allNabors[]
00453    *-----------------------------------------------------------------*/
00454   /* NA for sequential! */
00455 
00456 
00457   /*-------------------------------------------------------
00458    * Count number of interior nodes for each subdomain;
00459    * also, form permutation vector to order boundary
00460    * nodes last in each subdomain.
00461    * This block fills in: bdry_count[]
00462    *                      n2o_col[]
00463    *                      o2n_row[]
00464    *-------------------------------------------------------*/
00465   t1 = MPI_Wtime ();
00466   if (!bj)
00467     {
00468       find_bdry_nodes_seq_private (s, m, A);
00469       CHECK_V_ERROR;
00470     }
00471   else
00472     {
00473       int i;
00474       for (i = 0; i < m; ++i)
00475     {
00476       s->n2o_row[i] = i;
00477       s->o2n_col[i] = i;
00478     }
00479     }
00480   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00481 
00482   /*-------------------------------------------------------
00483    * Form subdomain graph,
00484    * then color and reorder subdomain graph.
00485    * This block fills in: ptr[]
00486    *                      adj[]
00487    *                      o2n_sub[]
00488    *                      n2o_sub[]
00489    *                      beg_rowP[]
00490    *-------------------------------------------------------*/
00491   t1 = MPI_Wtime ();
00492   if (!bj)
00493     {
00494       form_subdomaingraph_seq_private (s, m, A);
00495       CHECK_V_ERROR;
00496       if (s->doNotColor)
00497     {
00498       int i;
00499       printf_dh ("subdomain coloring and reordering is OFF\n");
00500       for (i = 0; i < blocks; ++i)
00501         {
00502           s->o2n_sub[i] = i;
00503           s->n2o_sub[i] = i;
00504           s->colorVec[i] = 0;
00505         }
00506     }
00507       else
00508     {
00509       SET_INFO ("subdomain coloring and reordering is ON");
00510       color_subdomain_graph_private (s);
00511       CHECK_V_ERROR;
00512     }
00513     }
00514 
00515   /* bj setup */
00516   else
00517     {
00518       int i;
00519       for (i = 0; i < blocks; ++i)
00520     {
00521       s->o2n_sub[i] = i;
00522       s->n2o_sub[i] = i;
00523     }
00524     }
00525   s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00526 
00527   /*-------------------------------------------------------
00528    * Here's a step we DON'T do for the parallel case:
00529    * we need to adjust the matrix row and column perms
00530    * to reflect subdomain reordering (for the parallel
00531    * case, these permutation vectors are purely local and
00532    * zero-based)
00533    *-------------------------------------------------------*/
00534   if (!bj)
00535     {
00536       adjust_matrix_perms_private (s, m);
00537       CHECK_V_ERROR;
00538     }
00539 
00540   /*-------------------------------------------------------
00541    * Build lists of lower and higher ordered neighbors.
00542    * This block fills in: loNabors[]
00543    *                      hiNabors[]
00544    *-------------------------------------------------------*/
00545   /* NA for sequential */
00546 
00547   /*-------------------------------------------------------
00548    *  Exchange boundary node permutation information with
00549    *  neighboring processors in the subdomain graph.
00550    *  This block fills in: o2n_ext (hash table)
00551    *                       n2o_ext (hash table)
00552    *-------------------------------------------------------*/
00553   /* NA for sequential */
00554 
00555 
00556 END_FUNC_DH}
00557 
00558 
00559 #if 0
00560 #undef __FUNC__
00561 #define __FUNC__ "partition_metis_private"
00562 void
00563 partition_metis_private (SubdomainGraph_dh s, void *A)
00564 {
00565   START_FUNC_DH if (ignoreMe)
00566     SET_V_ERROR ("not implemented");
00567 END_FUNC_DH}
00568 #endif
00569 
00570 #undef __FUNC__
00571 #define __FUNC__ "allocate_storage_private"
00572 void
00573 allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, bool bj)
00574 {
00575   START_FUNC_DH if (!bj)
00576     {
00577       s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int));
00578       CHECK_V_ERROR;
00579       s->ptrs[0] = 0;
00580       s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int));
00581       CHECK_V_ERROR;
00582       s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00583       CHECK_V_ERROR;
00584       s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00585       CHECK_V_ERROR;
00586       s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00587       CHECK_V_ERROR;
00588     }
00589 
00590   s->n2o_row = (int *) MALLOC_DH (m * sizeof (int));
00591   CHECK_V_ERROR;
00592   s->o2n_col = (int *) MALLOC_DH (m * sizeof (int));
00593   CHECK_V_ERROR;
00594 
00595   /* these are probably only needed for single mpi task -- ?? */
00596   /* nope; beg_row and row_ct are needed by ilu_mpi_bj; yuck! */
00597   s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int));
00598   CHECK_V_ERROR;
00599   s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int));
00600   CHECK_V_ERROR;
00601   s->row_count = (int *) MALLOC_DH (blocks * sizeof (int));
00602   CHECK_V_ERROR;
00603   s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int));
00604   CHECK_V_ERROR;
00605   s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00606   CHECK_V_ERROR;
00607   s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00608   CHECK_V_ERROR;
00609 
00610 END_FUNC_DH}
00611 
00612 /*-----------------------------------------------------------------*/
00613 
00614 
00615 #undef __FUNC__
00616 #define __FUNC__ "init_mpi_private"
00617 void
00618 init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00619 {
00620   START_FUNC_DH int m, n, beg_row;
00621   bool symmetric;
00622   double t1;
00623 
00624   symmetric = Parser_dhHasSwitch (parser_dh, "-sym");
00625   CHECK_V_ERROR;
00626   if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric"))
00627     {
00628       symmetric = true;
00629     }
00630 
00631   /*-------------------------------------------------------
00632    * get number of local rows (m), global rows (n), and
00633    * global numbering of first locally owned row
00634    *-------------------------------------------------------*/
00635   EuclidGetDimensions (A, &beg_row, &m, &n);
00636   CHECK_V_ERROR;
00637   s->m = m;
00638 
00639 
00640   /*-------------------------------------------------------
00641    * allocate storage for all data structures
00642    * EXCEPT s->adj and hash tables.
00643    *-------------------------------------------------------*/
00644   allocate_storage_private (s, blocks, m, bj);
00645   CHECK_V_ERROR;
00646 
00647   /*-------------------------------------------------------------
00648    * Fill in: beg_row[]
00649    *          beg_rowP[]
00650    *          row_count[]
00651    * At this point, beg_rowP[] is a copy of beg_row[])
00652    *-------------------------------------------------------------*/
00653   if (!bj)
00654     {
00655       MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
00656       MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
00657       memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int));
00658     }
00659   else
00660     {
00661       s->beg_row[myid_dh] = beg_row;
00662       s->beg_rowP[myid_dh] = beg_row;
00663       s->row_count[myid_dh] = m;
00664     }
00665 
00666   /*-----------------------------------------------------------------
00667    * Find all neighboring processors in subdomain graph.
00668    * This block fills in: allNabors[]
00669    *-----------------------------------------------------------------*/
00670   if (!bj)
00671     {
00672       t1 = MPI_Wtime ();
00673       if (symmetric)
00674     {
00675       find_all_neighbors_sym_private (s, m, A);
00676       CHECK_V_ERROR;
00677     }
00678       else
00679     {
00680       find_all_neighbors_unsym_private (s, m, A);
00681       CHECK_V_ERROR;
00682     }
00683       s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
00684     }
00685 
00686 
00687   /*-----------------------------------------------------------------
00688    *  determine which rows are boundary rows, and which are interior
00689    *  rows; also, form permutation vector to order interior
00690    *  nodes first within each subdomain
00691    *  This block fills in: bdry_count[]
00692    *                       n2o_col[]
00693    *                       o2n_row[]
00694    *-----------------------------------------------------------------*/
00695   t1 = MPI_Wtime ();
00696   if (!bj)
00697     {
00698       int *interiorNodes, *bdryNodes;
00699       int interiorCount, bdryCount;
00700       int *o2n = s->o2n_col, idx;
00701       int i;
00702 
00703       interiorNodes = (int *) MALLOC_DH (m * sizeof (int));
00704       CHECK_V_ERROR;
00705       bdryNodes = (int *) MALLOC_DH (m * sizeof (int));
00706       CHECK_V_ERROR;
00707 
00708       /* divide this subdomain's rows into interior and boundary rows;
00709          the returned lists are with respect to local numbering.
00710        */
00711       if (symmetric)
00712     {
00713       find_bdry_nodes_sym_private (s, m, A,
00714                        interiorNodes, bdryNodes,
00715                        &interiorCount, &bdryCount);
00716       CHECK_V_ERROR;
00717     }
00718       else
00719     {
00720       find_bdry_nodes_unsym_private (s, m, A,
00721                      interiorNodes, bdryNodes,
00722                      &interiorCount, &bdryCount);
00723       CHECK_V_ERROR;
00724     }
00725 
00726       /* exchange number of boundary rows with all neighbors */
00727       MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
00728              comm_dh);
00729 
00730       /* form local permutation */
00731       idx = 0;
00732       for (i = 0; i < interiorCount; ++i)
00733     {
00734       o2n[interiorNodes[i]] = idx++;
00735     }
00736       for (i = 0; i < bdryCount; ++i)
00737     {
00738       o2n[bdryNodes[i]] = idx++;
00739     }
00740 
00741       /* invert permutation */
00742       invert_perm (m, o2n, s->n2o_row);
00743       CHECK_V_ERROR;
00744 
00745       FREE_DH (interiorNodes);
00746       CHECK_V_ERROR;
00747       FREE_DH (bdryNodes);
00748       CHECK_V_ERROR;
00749     }
00750 
00751   /* bj setup */
00752   else
00753     {
00754       int *o2n = s->o2n_col, *n2o = s->n2o_row;
00755       int i, m = s->m;
00756 
00757       for (i = 0; i < m; ++i)
00758     {
00759       o2n[i] = i;
00760       n2o[i] = i;
00761     }
00762     }
00763   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00764 
00765   /*-------------------------------------------------------
00766    * Form subdomain graph,
00767    * then color and reorder subdomain graph.
00768    * This block fills in: ptr[]
00769    *                      adj[]
00770    *                      o2n_sub[]
00771    *                      n2o_sub[]
00772    *                      beg_rowP[]
00773    *-------------------------------------------------------*/
00774   if (!bj)
00775     {
00776       t1 = MPI_Wtime ();
00777       form_subdomaingraph_mpi_private (s);
00778       CHECK_V_ERROR;
00779       if (s->doNotColor)
00780     {
00781       int i;
00782       printf_dh ("subdomain coloring and reordering is OFF\n");
00783       for (i = 0; i < blocks; ++i)
00784         {
00785           s->o2n_sub[i] = i;
00786           s->n2o_sub[i] = i;
00787           s->colorVec[i] = 0;
00788         }
00789     }
00790       else
00791     {
00792       SET_INFO ("subdomain coloring and reordering is ON");
00793       color_subdomain_graph_private (s);
00794       CHECK_V_ERROR;
00795     }
00796       s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00797     }
00798 
00799   /*-------------------------------------------------------
00800    * Build lists of lower and higher ordered neighbors.
00801    * This block fills in: loNabors[]
00802    *                      hiNabors[]
00803    *-------------------------------------------------------*/
00804   if (!bj)
00805     {
00806       find_ordered_neighbors_private (s);
00807       CHECK_V_ERROR;
00808     }
00809 
00810   /*-------------------------------------------------------
00811    *  Exchange boundary node permutation information with
00812    *  neighboring processors in the subdomain graph.
00813    *  This block fills in: o2n_ext (hash table)
00814    *                       n2o_ext (hash table)
00815    *-------------------------------------------------------*/
00816   if (!bj)
00817     {
00818       t1 = MPI_Wtime ();
00819       SubdomainGraph_dhExchangePerms (s);
00820       CHECK_V_ERROR;
00821       s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
00822     }
00823 
00824 END_FUNC_DH}
00825 
00826 
00827 
00828 #undef __FUNC__
00829 #define __FUNC__ "SubdomainGraph_dhExchangePerms"
00830 void
00831 SubdomainGraph_dhExchangePerms (SubdomainGraph_dh s)
00832 {
00833   START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
00834   MPI_Status *status = NULL;
00835   int *nabors = s->allNabors, naborCount = s->allCount;
00836   int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
00837   int m = s->row_count[myid_dh];
00838   int beg_row = s->beg_row[myid_dh];
00839   int beg_rowP = s->beg_rowP[myid_dh];
00840   int *bdryNodeCounts = s->bdry_count;
00841   int myBdryCount = s->bdry_count[myid_dh];
00842   bool debug = false;
00843   int myFirstBdry = m - myBdryCount;
00844   int *n2o_row = s->n2o_row;
00845   Hash_i_dh n2o_table, o2n_table;
00846 
00847   if (logFile != NULL && s->debug)
00848     debug = true;
00849 
00850   /* allocate send buffer, and copy permutation info to buffer;
00851      each entry is a <old_value, new_value> pair.
00852    */
00853   sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int));
00854   CHECK_V_ERROR;
00855 
00856 
00857   if (debug)
00858     {
00859       fprintf (logFile,
00860            "\nSUBG myFirstBdry= %i  myBdryCount= %i  m= %i  beg_rowP= %i\n",
00861            1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
00862       fflush (logFile);
00863     }
00864 
00865   for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00866     {
00867       sendBuf[2 * j] = n2o_row[i] + beg_row;
00868       sendBuf[2 * j + 1] = i + beg_rowP;
00869     }
00870 
00871   if (debug)
00872     {
00873       fprintf (logFile, "\nSUBG SEND_BUF:\n");
00874       for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00875     {
00876       fprintf (logFile, "SUBG  %i, %i\n", 1 + sendBuf[2 * j],
00877            1 + sendBuf[2 * j + 1]);
00878     }
00879       fflush (logFile);
00880     }
00881 
00882   /* allocate a receive buffer for each nabor in the subdomain graph,
00883      and set up index array for locating the beginning of each
00884      nabor's buffers.
00885    */
00886   naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int));
00887   CHECK_V_ERROR;
00888   naborIdx[0] = 0;
00889   nz = 0;
00890   for (i = 0; i < naborCount; ++i)
00891     {
00892       nz += (2 * bdryNodeCounts[nabors[i]]);
00893       naborIdx[i + 1] = nz;
00894     }
00895 
00896 
00897   recvBuf = (int *) MALLOC_DH (nz * sizeof (int));
00898   CHECK_V_ERROR;
00899 
00900 
00901 /* for (i=0; i<nz; ++i) recvBuf[i] = -10; */
00902 
00903   /* perform sends and receives */
00904   recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00905   CHECK_V_ERROR;
00906   send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00907   CHECK_V_ERROR;
00908   status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status));
00909   CHECK_V_ERROR;
00910 
00911   for (i = 0; i < naborCount; ++i)
00912     {
00913       int nabr = nabors[i];
00914       int *buf = recvBuf + naborIdx[i];
00915       int ct = 2 * bdryNodeCounts[nabr];
00916 
00917 
00918       MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
00919          &(send_req[i]));
00920 
00921       if (debug)
00922     {
00923       fprintf (logFile, "SUBG   sending %i elts to %i\n", 2 * myBdryCount,
00924            nabr);
00925       fflush (logFile);
00926     }
00927 
00928       MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
00929 
00930       if (debug)
00931     {
00932       fprintf (logFile, "SUBG  receiving %i elts from %i\n", ct, nabr);
00933       fflush (logFile);
00934     }
00935     }
00936 
00937   MPI_Waitall (naborCount, send_req, status);
00938   MPI_Waitall (naborCount, recv_req, status);
00939 
00940   Hash_i_dhCreate (&n2o_table, nz / 2);
00941   CHECK_V_ERROR;
00942   Hash_i_dhCreate (&o2n_table, nz / 2);
00943   CHECK_V_ERROR;
00944   s->n2o_ext = n2o_table;
00945   s->o2n_ext = o2n_table;
00946 
00947   /* insert non-local boundary node permutations in lookup tables */
00948   for (i = 0; i < nz; i += 2)
00949     {
00950       int old = recvBuf[i];
00951       int new = recvBuf[i + 1];
00952 
00953       if (debug)
00954     {
00955       fprintf (logFile, "SUBG  i= %i  old= %i  new= %i\n", i, old + 1,
00956            new + 1);
00957       fflush (logFile);
00958     }
00959 
00960       Hash_i_dhInsert (o2n_table, old, new);
00961       CHECK_V_ERROR;
00962       Hash_i_dhInsert (n2o_table, new, old);
00963       CHECK_V_ERROR;
00964     }
00965 
00966 
00967   if (recvBuf != NULL)
00968     {
00969       FREE_DH (recvBuf);
00970       CHECK_V_ERROR;
00971     }
00972   if (naborIdx != NULL)
00973     {
00974       FREE_DH (naborIdx);
00975       CHECK_V_ERROR;
00976     }
00977   if (sendBuf != NULL)
00978     {
00979       FREE_DH (sendBuf);
00980       CHECK_V_ERROR;
00981     }
00982   if (recv_req != NULL)
00983     {
00984       FREE_DH (recv_req);
00985       CHECK_V_ERROR;
00986     }
00987   if (send_req != NULL)
00988     {
00989       FREE_DH (send_req);
00990       CHECK_V_ERROR;
00991     }
00992   if (status != NULL)
00993     {
00994       FREE_DH (status);
00995       CHECK_V_ERROR;
00996     }
00997 
00998 END_FUNC_DH}
00999 
01000 
01001 
01002 #undef __FUNC__
01003 #define __FUNC__ "form_subdomaingraph_mpi_private"
01004 void
01005 form_subdomaingraph_mpi_private (SubdomainGraph_dh s)
01006 {
01007   START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount;
01008   int *idxAll = NULL;
01009   int i, j, nz, *adj, *ptrs = s->ptrs;
01010   MPI_Request *recvReqs = NULL, sendReq;
01011   MPI_Status *statuses = NULL, status;
01012 
01013   /* all processors tell root how many nabors they have */
01014   if (myid_dh == 0)
01015     {
01016       idxAll = (int *) MALLOC_DH (np_dh * sizeof (int));
01017       CHECK_V_ERROR;
01018     }
01019   MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
01020 
01021   /* root counts edges in graph, and broacasts to all */
01022   if (myid_dh == 0)
01023     {
01024       nz = 0;
01025       for (i = 0; i < np_dh; ++i)
01026     nz += idxAll[i];
01027     }
01028   MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
01029 
01030   /* allocate space for adjacency lists (memory for the
01031      pointer array was previously allocated)
01032    */
01033   adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int));
01034   CHECK_V_ERROR;
01035 
01036   /* root receives adjacency lists from all processors */
01037   if (myid_dh == 0)
01038     {
01039       recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
01040       CHECK_V_ERROR;
01041       statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
01042       CHECK_V_ERROR;
01043 
01044       /* first, set up row pointer array */
01045       ptrs[0] = 0;
01046       for (j = 0; j < np_dh; ++j)
01047     ptrs[j + 1] = ptrs[j] + idxAll[j];
01048 
01049       /* second, start the receives */
01050       for (j = 0; j < np_dh; ++j)
01051     {
01052       int ct = idxAll[j];
01053 
01054       MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
01055              recvReqs + j);
01056     }
01057     }
01058 
01059   /* all processors send root their adjacency list */
01060   MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
01061 
01062   /* wait for comms to go through */
01063   if (myid_dh == 0)
01064     {
01065       MPI_Waitall (np_dh, recvReqs, statuses);
01066     }
01067   MPI_Wait (&sendReq, &status);
01068 
01069   /* root broadcasts assembled subdomain graph to all processors */
01070   MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
01071   MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
01072 
01073   if (idxAll != NULL)
01074     {
01075       FREE_DH (idxAll);
01076       CHECK_V_ERROR;
01077     }
01078   if (recvReqs != NULL)
01079     {
01080       FREE_DH (recvReqs);
01081       CHECK_V_ERROR;
01082     }
01083   if (statuses != NULL)
01084     {
01085       FREE_DH (statuses);
01086       CHECK_V_ERROR;
01087     }
01088 
01089 END_FUNC_DH}
01090 
01091 /* this is ugly and inefficient; but seq mode is primarily
01092    for debugging and testing, so there.
01093 */
01094 #undef __FUNC__
01095 #define __FUNC__ "form_subdomaingraph_seq_private"
01096 void
01097 form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, void *A)
01098 {
01099   START_FUNC_DH int *dense, i, j, row, blocks = s->blocks;
01100   int *cval, len, *adj;
01101   int idx = 0, *ptrs = s->ptrs;
01102 
01103   /* allocate storage for adj[]; since this function is intended
01104      for debugging/testing, and the number of blocks should be
01105      relatively small, we'll punt and allocate the maximum
01106      possibly needed.
01107    */
01108   adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01109   CHECK_V_ERROR;
01110 
01111   dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01112   CHECK_V_ERROR;
01113   for (i = 0; i < blocks * blocks; ++i)
01114     dense[i] = 0;
01115 
01116   /* loop over each block's rows to identify all boundary nodes */
01117   for (i = 0; i < blocks; ++i)
01118     {
01119       int beg_row = s->beg_row[i];
01120       int end_row = beg_row + s->row_count[i];
01121 
01122       for (row = beg_row; row < end_row; ++row)
01123     {
01124       EuclidGetRow (A, row, &len, &cval, NULL);
01125       CHECK_V_ERROR;
01126       for (j = 0; j < len; ++j)
01127         {
01128           int col = cval[j];
01129           if (col < beg_row || col >= end_row)
01130         {
01131           int owner = SubdomainGraph_dhFindOwner (s, col, false);
01132           CHECK_V_ERROR;
01133           dense[i * blocks + owner] = 1;
01134           dense[owner * blocks + i] = 1;
01135         }
01136         }
01137       EuclidRestoreRow (A, row, &len, &cval, NULL);
01138       CHECK_V_ERROR;
01139     }
01140     }
01141 
01142   /* form sparse csr representation of subdomain graph
01143      from dense representation
01144    */
01145   ptrs[0] = 0;
01146   for (i = 0; i < blocks; ++i)
01147     {
01148       for (j = 0; j < blocks; ++j)
01149     {
01150       if (dense[i * blocks + j])
01151         {
01152           adj[idx++] = j;
01153         }
01154     }
01155       ptrs[i + 1] = idx;
01156     }
01157 
01158   FREE_DH (dense);
01159   CHECK_V_ERROR;
01160 END_FUNC_DH}
01161 
01162 
01163 #undef __FUNC__
01164 #define __FUNC__ "find_all_neighbors_sym_private"
01165 void
01166 find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, void *A)
01167 {
01168   START_FUNC_DH int *marker, i, j, beg_row, end_row;
01169   int row, len, *cval, ct = 0;
01170   int *nabors = s->allNabors;
01171 
01172   marker = (int *) MALLOC_DH (m * sizeof (int));
01173   CHECK_V_ERROR;
01174   for (i = 0; i < m; ++i)
01175     marker[i] = 0;
01176 
01177   SET_INFO
01178     ("finding nabors in subdomain graph for structurally symmetric matrix");
01179   SET_INFO ("(if this isn't what you want, use '-sym 0' switch)");
01180 
01181   beg_row = s->beg_row[myid_dh];
01182   end_row = beg_row + s->row_count[myid_dh];
01183 
01184   for (row = beg_row; row < end_row; ++row)
01185     {
01186       EuclidGetRow (A, row, &len, &cval, NULL);
01187       CHECK_V_ERROR;
01188       for (j = 0; j < len; ++j)
01189     {
01190       int col = cval[j];
01191       if (col < beg_row || col >= end_row)
01192         {
01193           int owner = SubdomainGraph_dhFindOwner (s, col, false);
01194           CHECK_V_ERROR;
01195           if (!marker[owner])
01196         {
01197           marker[owner] = 1;
01198           nabors[ct++] = owner;
01199         }
01200         }
01201     }
01202       EuclidRestoreRow (A, row, &len, &cval, NULL);
01203       CHECK_V_ERROR;
01204     }
01205   s->allCount = ct;
01206 
01207 /* fprintf(logFile, "@@@@@ allCount= %i\n", ct); */
01208 
01209   if (marker != NULL)
01210     {
01211       FREE_DH (marker);
01212       CHECK_V_ERROR;
01213     }
01214 END_FUNC_DH}
01215 
01216 #undef __FUNC__
01217 #define __FUNC__ "find_all_neighbors_unsym_private"
01218 void
01219 find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, void *A)
01220 {
01221   START_FUNC_DH int i, j, row, beg_row, end_row;
01222   int *marker;
01223   int *cval, len, idx = 0;
01224   int nz, *nabors = s->allNabors, *myNabors;
01225 
01226   myNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
01227   CHECK_V_ERROR;
01228   marker = (int *) MALLOC_DH (np_dh * sizeof (int));
01229   CHECK_V_ERROR;
01230   for (i = 0; i < np_dh; ++i)
01231     marker[i] = 0;
01232 
01233   SET_INFO
01234     ("finding nabors in subdomain graph for structurally unsymmetric matrix");
01235 
01236   /* loop over this block's boundary rows, finding all nabors in
01237      subdomain graph
01238    */
01239   beg_row = s->beg_row[myid_dh];
01240   end_row = beg_row + s->row_count[myid_dh];
01241 
01242 
01243 
01244   /*for each locally owned row ...   */
01245   for (row = beg_row; row < end_row; ++row)
01246     {
01247       EuclidGetRow (A, row, &len, &cval, NULL);
01248       CHECK_V_ERROR;
01249       for (j = 0; j < len; ++j)
01250     {
01251       int col = cval[j];
01252       /*for each column that corresponds to a non-locally owned row ...  */
01253       if (col < beg_row || col >= end_row)
01254         {
01255           int owner = SubdomainGraph_dhFindOwner (s, col, false);
01256           CHECK_V_ERROR;
01257           /*if I've not yet done so ...   */
01258           if (!marker[owner])
01259         {
01260           marker[owner] = 1;
01261           /*append the non-local row's owner in to the list of my nabors
01262              in the subdomain graph     */
01263           myNabors[idx++] = owner;
01264         }
01265         }
01266     }
01267       EuclidRestoreRow (A, row, &len, &cval, NULL);
01268       CHECK_V_ERROR;
01269     }
01270 
01271   /*
01272      at this point, idx = the number of my neighbors in the subdomain
01273      graph; equivalently, idx is the number of meaningfull slots in
01274      the myNabors array.  -dah 1/31/06
01275    */
01276 
01277   /*
01278      at this point: marker[j] = 0 indicates that processor j is NOT my nabor
01279      marker[j] = 1 indicates that processor j IS my nabor
01280      however, there may be some nabors that can't be discovered in the above loop
01281      "//for each locally owned row;" this can happen if the matrix is
01282      structurally unsymmetric.
01283      -dah 1/31/06
01284    */
01285 
01286 /* fprintf(stderr, "[%i] marker: ", myid_dh);
01287 for (j=0; j<np_dh; j++) {
01288   fprintf(stderr, "[%i] (j=%d) %d\n", myid_dh, j,  marker[j]);
01289 }
01290 fprintf(stderr, "\n");
01291 */
01292 
01293   /* find out who my neighbors are that I cannot discern locally */
01294   MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
01295   CHECK_V_ERROR;
01296 
01297   /* add in neighbors that I know about from scanning my adjacency lists */
01298   for (i = 0; i < idx; ++i)
01299     nabors[myNabors[i]] = 1;
01300 
01301   /* remove self from the adjacency list */
01302   nabors[myid_dh] = 0;
01303 
01304   /*
01305      at this point: marker[j] = 0 indicates that processor j is NOT my nabor
01306      marker[j] = 1 indicates that processor j IS my nabor
01307      and this is guaranteed to be complete.
01308    */
01309 
01310   /* form final list of neighboring processors */
01311   nz = 0;
01312   for (i = 0; i < np_dh; ++i)
01313     {
01314       if (nabors[i])
01315     myNabors[nz++] = i;
01316     }
01317   s->allCount = nz;
01318   memcpy (nabors, myNabors, nz * sizeof (int));
01319 
01320   if (marker != NULL)
01321     {
01322       FREE_DH (marker);
01323       CHECK_V_ERROR;
01324     }
01325   if (myNabors != NULL)
01326     {
01327       FREE_DH (myNabors);
01328       CHECK_V_ERROR;
01329     }
01330 END_FUNC_DH}
01331 
01332 /*================================================================*/
01333 
01334 #undef __FUNC__
01335 #define __FUNC__ "find_bdry_nodes_sym_private"
01336 void
01337 find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
01338                  int *interiorNodes, int *bdryNodes,
01339                  int *interiorCount, int *bdryCount)
01340 {
01341   START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01342   int end_row = beg_row + s->row_count[myid_dh];
01343   int row, inCt = 0, bdCt = 0;
01344 
01345   int j;
01346   int *cval;
01347 
01348   /* determine if the row is a boundary row */
01349   for (row = beg_row; row < end_row; ++row)
01350     {               /* for each row in the subdomain */
01351       bool isBdry = false;
01352       int len;
01353       EuclidGetRow (A, row, &len, &cval, NULL);
01354       CHECK_V_ERROR;
01355 
01356       for (j = 0; j < len; ++j)
01357     {           /* for each column in the row */
01358       int col = cval[j];
01359       if (col < beg_row || col >= end_row)
01360         {
01361           isBdry = true;
01362           break;
01363         }
01364     }
01365       EuclidRestoreRow (A, row, &len, &cval, NULL);
01366       CHECK_V_ERROR;
01367 
01368       if (isBdry)
01369     {
01370       bdryNodes[bdCt++] = row - beg_row;
01371     }
01372       else
01373     {
01374       interiorNodes[inCt++] = row - beg_row;
01375     }
01376     }
01377 
01378   *interiorCount = inCt;
01379   *bdryCount = bdCt;
01380 
01381 END_FUNC_DH}
01382 
01383 #define BDRY_NODE_TAG 42
01384 
01385 #undef __FUNC__
01386 #define __FUNC__ "find_bdry_nodes_unsym_private"
01387 void
01388 find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, void *A,
01389                    int *interiorNodes, int *boundaryNodes,
01390                    int *interiorCount, int *bdryCount)
01391 {
01392   START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01393   int end_row = beg_row + s->row_count[myid_dh];
01394   int i, j, row, max;
01395   int *cval;
01396   int *list, count;
01397   int *rpIN = NULL, *rpOUT = NULL;
01398   int *sendBuf, *recvBuf;
01399   int *marker, inCt, bdCt;
01400   int *bdryNodes, nz;
01401   int sendCt, recvCt;
01402   MPI_Request *sendReq, *recvReq;
01403   MPI_Status *status;
01404   SortedSet_dh ss;
01405 
01406   SortedSet_dhCreate (&ss, m);
01407   CHECK_V_ERROR;
01408 
01409   /*-----------------------------------------------------
01410    * identify all boundary nodes possible using locally
01411    * owned adjacency lists
01412    *-----------------------------------------------------*/
01413   for (row = beg_row; row < end_row; ++row)
01414     {
01415       bool isBdry = false;
01416       int len;
01417       EuclidGetRow (A, row, &len, &cval, NULL);
01418       CHECK_V_ERROR;
01419 
01420       for (j = 0; j < len; ++j)
01421     {
01422       int col = cval[j];
01423       if (col < beg_row || col >= end_row)
01424         {
01425           isBdry = true;    /* this row is a boundary node */
01426           SortedSet_dhInsert (ss, col);
01427           CHECK_V_ERROR;
01428           /* the row "col" is also a boundary node */
01429         }
01430     }
01431       EuclidRestoreRow (A, row, &len, &cval, NULL);
01432       CHECK_V_ERROR;
01433 
01434       if (isBdry)
01435     {
01436       SortedSet_dhInsert (ss, row);
01437       CHECK_V_ERROR;
01438     }
01439     }
01440 
01441   /*-----------------------------------------------------
01442    * scan the sorted list to determine what boundary
01443    * node information to send to whom
01444    *-----------------------------------------------------*/
01445   sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01446   CHECK_V_ERROR;
01447   recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01448   CHECK_V_ERROR;
01449   rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01450   CHECK_V_ERROR;
01451   rpOUT[0] = 0;
01452   for (i = 0; i < np_dh; ++i)
01453     sendBuf[i] = 0;
01454 
01455   sendCt = 0;           /* total number of processor to whom we will send lists */
01456   SortedSet_dhGetList (ss, &list, &count);
01457   CHECK_V_ERROR;
01458 
01459   for (i = 0; i < count; /* i is set below */ )
01460     {
01461       int node = list[i];
01462       int owner;
01463       int last;
01464 
01465       owner = SubdomainGraph_dhFindOwner (s, node, false);
01466       CHECK_V_ERROR;
01467       last = s->beg_row[owner] + s->row_count[owner];
01468 
01469       /* determine the other boundary nodes that belong to owner */
01470       while ((i < count) && (list[i] < last))
01471     ++i;
01472       ++sendCt;
01473       rpOUT[sendCt] = i;
01474       sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
01475 
01476     }
01477 
01478   /*-----------------------------------------------------
01479    * processors tell each other how much information
01480    * each will send to whom
01481    *-----------------------------------------------------*/
01482   MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
01483   CHECK_V_ERROR;
01484 
01485   /*-----------------------------------------------------
01486    * exchange boundary node information
01487    * (note that we also exchange information with ourself!)
01488    *-----------------------------------------------------*/
01489 
01490   /* first, set up data structures to hold incoming information */
01491   rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01492   CHECK_V_ERROR;
01493   rpIN[0] = 0;
01494   nz = 0;
01495   recvCt = 0;
01496   for (i = 0; i < np_dh; ++i)
01497     {
01498       if (recvBuf[i])
01499     {
01500       ++recvCt;
01501       nz += recvBuf[i];
01502       rpIN[recvCt] = nz;
01503     }
01504     }
01505   bdryNodes = (int *) MALLOC_DH (nz * sizeof (int));
01506   CHECK_V_ERROR;
01507   sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request));
01508   CHECK_V_ERROR;
01509   recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request));
01510   CHECK_V_ERROR;
01511   max = MAX (sendCt, recvCt);
01512   status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status));
01513   CHECK_V_ERROR;
01514 
01515   /* second, start receives for incoming data */
01516   j = 0;
01517   for (i = 0; i < np_dh; ++i)
01518     {
01519       if (recvBuf[i])
01520     {
01521       MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
01522              i, BDRY_NODE_TAG, comm_dh, recvReq + j);
01523       ++j;
01524     }
01525     }
01526 
01527   /* third, start sends for outgoing data */
01528   j = 0;
01529   for (i = 0; i < np_dh; ++i)
01530     {
01531       if (sendBuf[i])
01532     {
01533       MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
01534              i, BDRY_NODE_TAG, comm_dh, sendReq + j);
01535       ++j;
01536     }
01537     }
01538 
01539   /* fourth, wait for all comms to finish */
01540   MPI_Waitall (sendCt, sendReq, status);
01541   MPI_Waitall (recvCt, recvReq, status);
01542 
01543   /* fifth, convert from global to local indices */
01544   for (i = 0; i < nz; ++i)
01545     bdryNodes[i] -= beg_row;
01546 
01547   /*-----------------------------------------------------
01548    * consolidate information from all processors to
01549    * identify all local boundary nodes
01550    *-----------------------------------------------------*/
01551   marker = (int *) MALLOC_DH (m * sizeof (int));
01552   CHECK_V_ERROR;
01553   for (i = 0; i < m; ++i)
01554     marker[i] = 0;
01555   for (i = 0; i < nz; ++i)
01556     marker[bdryNodes[i]] = 1;
01557 
01558   inCt = bdCt = 0;
01559   for (i = 0; i < m; ++i)
01560     {
01561       if (marker[i])
01562     {
01563       boundaryNodes[bdCt++] = i;
01564     }
01565       else
01566     {
01567       interiorNodes[inCt++] = i;
01568     }
01569     }
01570   *interiorCount = inCt;
01571   *bdryCount = bdCt;
01572 
01573   /*-----------------------------------------------------
01574    * clean up
01575    *-----------------------------------------------------*/
01576   SortedSet_dhDestroy (ss);
01577   CHECK_V_ERROR;
01578   if (rpIN != NULL)
01579     {
01580       FREE_DH (rpIN);
01581       CHECK_V_ERROR;
01582     }
01583   if (rpOUT != NULL)
01584     {
01585       FREE_DH (rpOUT);
01586       CHECK_V_ERROR;
01587     }
01588   if (sendBuf != NULL)
01589     {
01590       FREE_DH (sendBuf);
01591       CHECK_V_ERROR;
01592     }
01593   if (recvBuf != NULL)
01594     {
01595       FREE_DH (recvBuf);
01596       CHECK_V_ERROR;
01597     }
01598   if (bdryNodes != NULL)
01599     {
01600       FREE_DH (bdryNodes);
01601       CHECK_V_ERROR;
01602     }
01603   if (marker != NULL)
01604     {
01605       FREE_DH (marker);
01606       CHECK_V_ERROR;
01607     }
01608   if (sendReq != NULL)
01609     {
01610       FREE_DH (sendReq);
01611       CHECK_V_ERROR;
01612     }
01613   if (recvReq != NULL)
01614     {
01615       FREE_DH (recvReq);
01616       CHECK_V_ERROR;
01617     }
01618   if (status != NULL)
01619     {
01620       FREE_DH (status);
01621       CHECK_V_ERROR;
01622     }
01623 END_FUNC_DH}
01624 
01625 
01626 #undef __FUNC__
01627 #define __FUNC__ "find_ordered_neighbors_private"
01628 void
01629 find_ordered_neighbors_private (SubdomainGraph_dh s)
01630 {
01631   START_FUNC_DH int *loNabors = s->loNabors;
01632   int *hiNabors = s->hiNabors;
01633   int *allNabors = s->allNabors, allCount = s->allCount;
01634   int loCt = 0, hiCt = 0;
01635   int *o2n = s->o2n_sub;
01636   int i, myNewId = o2n[myid_dh];
01637 
01638   for (i = 0; i < allCount; ++i)
01639     {
01640       int nabor = allNabors[i];
01641       if (o2n[nabor] < myNewId)
01642     {
01643       loNabors[loCt++] = nabor;
01644     }
01645       else
01646     {
01647       hiNabors[hiCt++] = nabor;
01648     }
01649     }
01650 
01651   s->loCount = loCt;
01652   s->hiCount = hiCt;
01653 END_FUNC_DH}
01654 
01655 
01656 #undef __FUNC__
01657 #define __FUNC__ "color_subdomain_graph_private"
01658 void
01659 color_subdomain_graph_private (SubdomainGraph_dh s)
01660 {
01661   START_FUNC_DH int i, n = np_dh;
01662   int *rp = s->ptrs, *cval = s->adj;
01663   int j, *marker, thisNodesColor, *colorCounter;
01664   int *o2n = s->o2n_sub;
01665   int *color = s->colorVec;
01666 
01667   if (np_dh == 1)
01668     n = s->blocks;
01669 
01670   marker = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01671   colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01672   for (i = 0; i <= n; ++i)
01673     {
01674       marker[i] = -1;
01675       colorCounter[i] = 0;
01676     }
01677 
01678   /*------------------------------------------------------------------
01679    * color the nodes
01680    *------------------------------------------------------------------*/
01681   for (i = 0; i < n; ++i)
01682     {               /* color node "i" */
01683       /* mark colors of "i"s nabors as unavailable;
01684          only need to mark nabors that are (per the input ordering)
01685          numbered less than "i."
01686        */
01687       for (j = rp[i]; j < rp[i + 1]; ++j)
01688     {
01689       int nabor = cval[j];
01690       if (nabor < i)
01691         {
01692           int naborsColor = color[nabor];
01693           marker[naborsColor] = i;
01694         }
01695     }
01696 
01697       /* assign vertex i the "smallest" possible color */
01698       thisNodesColor = -1;
01699       for (j = 0; j < n; ++j)
01700     {
01701       if (marker[j] != i)
01702         {
01703           thisNodesColor = j;
01704           break;
01705         }
01706     }
01707       color[i] = thisNodesColor;
01708       colorCounter[1 + thisNodesColor] += 1;
01709     }
01710 
01711   /*------------------------------------------------------------------
01712    * build ordering vector; if two nodes are similarly colored,
01713    * they will have the same relative ordering as before.
01714    *------------------------------------------------------------------*/
01715   /* prefix-sum to find lowest-numbered node for each color */
01716   for (i = 1; i < n; ++i)
01717     {
01718       if (colorCounter[i] == 0)
01719     break;
01720       colorCounter[i] += colorCounter[i - 1];
01721     }
01722 
01723   for (i = 0; i < n; ++i)
01724     {
01725       o2n[i] = colorCounter[color[i]];
01726       colorCounter[color[i]] += 1;
01727     }
01728 
01729   /* invert permutation */
01730   invert_perm (n, s->o2n_sub, s->n2o_sub);
01731   CHECK_V_ERROR;
01732 
01733 
01734   /*------------------------------------------------------------------
01735    * count the number of colors used
01736    *------------------------------------------------------------------*/
01737   {
01738     int ct = 0;
01739     for (j = 0; j < n; ++j)
01740       {
01741     if (marker[j] == -1)
01742       break;
01743     ++ct;
01744       }
01745     s->colors = ct;
01746   }
01747 
01748 
01749   /*------------------------------------------------------------------
01750    * (re)build the beg_rowP array
01751    *------------------------------------------------------------------*/
01752   {
01753     int sum = 0;
01754     for (i = 0; i < n; ++i)
01755       {
01756     int old = s->n2o_sub[i];
01757     s->beg_rowP[old] = sum;
01758     sum += s->row_count[old];
01759       }
01760   }
01761 
01762   FREE_DH (marker);
01763   CHECK_V_ERROR;
01764   FREE_DH (colorCounter);
01765   CHECK_V_ERROR;
01766 END_FUNC_DH}
01767 
01768 
01769 #undef __FUNC__
01770 #define __FUNC__ "SubdomainGraph_dhDump"
01771 void
01772 SubdomainGraph_dhDump (SubdomainGraph_dh s, char *filename)
01773 {
01774   START_FUNC_DH int i;
01775   int sCt = np_dh;
01776   FILE *fp;
01777 
01778   if (np_dh == 1)
01779     sCt = s->blocks;
01780 
01781 
01782   /* ---------------------------------------------------------
01783    *  for seq and par runs, 1st processor prints information
01784    *  that is common to all processors
01785    * ---------------------------------------------------------*/
01786   fp = openFile_dh (filename, "w");
01787   CHECK_V_ERROR;
01788 
01789   /* write subdomain ordering permutations */
01790   fprintf (fp, "----- colors used\n");
01791   fprintf (fp, "%i\n", s->colors);
01792   if (s->colorVec == NULL)
01793     {
01794       fprintf (fp, "s->colorVec == NULL\n");
01795     }
01796   else
01797     {
01798       fprintf (fp, "----- colorVec\n");
01799       for (i = 0; i < sCt; ++i)
01800     {
01801       fprintf (fp, "%i ", s->colorVec[i]);
01802     }
01803       fprintf (fp, "\n");
01804     }
01805 
01806   if (s->o2n_sub == NULL || s->o2n_sub == NULL)
01807     {
01808       fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n");
01809     }
01810   else
01811     {
01812       fprintf (fp, "----- o2n_sub\n");
01813       for (i = 0; i < sCt; ++i)
01814     {
01815       fprintf (fp, "%i ", s->o2n_sub[i]);
01816     }
01817       fprintf (fp, "\n");
01818       fprintf (fp, "----- n2o_sub\n");
01819       for (i = 0; i < sCt; ++i)
01820     {
01821       fprintf (fp, "%i ", s->n2o_sub[i]);
01822     }
01823       fprintf (fp, "\n");
01824     }
01825 
01826   /* write begin row arrays */
01827   if (s->beg_row == NULL || s->beg_rowP == NULL)
01828     {
01829       fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n");
01830     }
01831   else
01832     {
01833       fprintf (fp, "----- beg_row\n");
01834       for (i = 0; i < sCt; ++i)
01835     {
01836       fprintf (fp, "%i ", 1 + s->beg_row[i]);
01837     }
01838       fprintf (fp, "\n");
01839       fprintf (fp, "----- beg_rowP\n");
01840       for (i = 0; i < sCt; ++i)
01841     {
01842       fprintf (fp, "%i ", 1 + s->beg_rowP[i]);
01843     }
01844       fprintf (fp, "\n");
01845     }
01846 
01847   /* write row count  and bdry count arrays */
01848   if (s->row_count == NULL || s->bdry_count == NULL)
01849     {
01850       fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n");
01851     }
01852   else
01853     {
01854       fprintf (fp, "----- row_count\n");
01855       for (i = 0; i < sCt; ++i)
01856     {
01857       fprintf (fp, "%i ", s->row_count[i]);
01858     }
01859       fprintf (fp, "\n");
01860       fprintf (fp, "----- bdry_count\n");
01861       for (i = 0; i < sCt; ++i)
01862     {
01863       fprintf (fp, "%i ", s->bdry_count[i]);
01864     }
01865       fprintf (fp, "\n");
01866 
01867     }
01868 
01869   /* write subdomain graph */
01870   if (s->ptrs == NULL || s->adj == NULL)
01871     {
01872       fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n");
01873     }
01874   else
01875     {
01876       int j;
01877       int ct;
01878       fprintf (fp, "----- subdomain graph\n");
01879       for (i = 0; i < sCt; ++i)
01880     {
01881       fprintf (fp, "%i :: ", i);
01882       ct = s->ptrs[i + 1] - s->ptrs[i];
01883       if (ct)
01884         {
01885           shellSort_int (ct, s->adj + s->ptrs[i]);
01886           CHECK_V_ERROR;
01887         }
01888       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
01889         {
01890           fprintf (fp, "%i ", s->adj[j]);
01891         }
01892       fprintf (fp, "\n");
01893     }
01894     }
01895   closeFile_dh (fp);
01896   CHECK_V_ERROR;
01897 
01898   /* ---------------------------------------------------------
01899    *  next print info that differs across processors for par
01900    *  trials.  deal with this as two cases: seq and par
01901    * ---------------------------------------------------------*/
01902   if (s->beg_rowP == NULL)
01903     {
01904       SET_V_ERROR ("s->beg_rowP == NULL; can't continue");
01905     }
01906   if (s->row_count == NULL)
01907     {
01908       SET_V_ERROR ("s->row_count == NULL; can't continue");
01909     }
01910   if (s->o2n_sub == NULL)
01911     {
01912       SET_V_ERROR ("s->o2n_sub == NULL; can't continue");
01913     }
01914 
01915 
01916   if (np_dh == 1)
01917     {
01918       fp = openFile_dh (filename, "a");
01919       CHECK_V_ERROR;
01920 
01921       /* write n2o_row  and o2n_col */
01922       if (s->n2o_row == NULL || s->o2n_col == NULL)
01923     {
01924       fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n");
01925     }
01926       else
01927     {
01928       fprintf (fp, "----- n2o_row\n");
01929       for (i = 0; i < s->m; ++i)
01930         {
01931           fprintf (fp, "%i ", 1 + s->n2o_row[i]);
01932         }
01933       fprintf (fp, "\n");
01934 
01935 #if 0
01936 /*
01937 note: this won't match the parallel case, since
01938       parallel permutation vecs are zero-based and purely
01939       local
01940 */
01941 
01942       fprintf (fp, "----- o2n_col\n");
01943       for (i = 0; i < sCt; ++i)
01944         {
01945           int br = s->beg_row[i];
01946           int er = br + s->row_count[i];
01947 
01948           for (j = br; j < er; ++j)
01949         {
01950           fprintf (fp, "%i ", 1 + s->o2n_col[j]);
01951         }
01952           fprintf (fp, "\n");
01953         }
01954       fprintf (fp, "\n");
01955 
01956 #endif
01957 
01958     }
01959       closeFile_dh (fp);
01960       CHECK_V_ERROR;
01961     }
01962 
01963   /* parallel case */
01964   else
01965     {
01966       int id = s->n2o_sub[myid_dh];
01967       int m = s->m;
01968       int pe;
01969       int beg_row = 0;
01970       if (s->beg_row != 0)
01971     beg_row = s->beg_row[myid_dh];
01972 
01973       /* write n2o_row */
01974       for (pe = 0; pe < np_dh; ++pe)
01975     {
01976       MPI_Barrier (comm_dh);
01977       if (id == pe)
01978         {
01979           fp = openFile_dh (filename, "a");
01980           CHECK_V_ERROR;
01981           if (id == 0)
01982         fprintf (fp, "----- n2o_row\n");
01983 
01984           for (i = 0; i < m; ++i)
01985         {
01986           fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row);
01987         }
01988           if (id == np_dh - 1)
01989         fprintf (fp, "\n");
01990           closeFile_dh (fp);
01991           CHECK_V_ERROR;
01992         }
01993     }
01994 
01995 #if 0
01996 
01997       /* write o2n_col */
01998       for (pe = 0; pe < np_dh; ++pe)
01999     {
02000       MPI_Barrier (comm_dh);
02001       if (myid_dh == pe)
02002         {
02003           fp = openFile_dh (filename, "a");
02004           CHECK_V_ERROR;
02005           if (myid_dh == 0)
02006         fprintf (fp, "----- o2n_col\n");
02007 
02008           for (i = 0; i < m; ++i)
02009         {
02010           fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row);
02011         }
02012           fprintf (fp, "\n");
02013 
02014           if (myid_dh == np_dh - 1)
02015         fprintf (fp, "\n");
02016 
02017           closeFile_dh (fp);
02018           CHECK_V_ERROR;
02019         }
02020     }
02021 
02022 #endif
02023 
02024     }
02025 
02026 END_FUNC_DH}
02027 
02028 
02029 
02030 #undef __FUNC__
02031 #define __FUNC__ "find_bdry_nodes_seq_private"
02032 void
02033 find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A)
02034 {
02035   START_FUNC_DH int i, j, row, blocks = s->blocks;
02036   int *cval, *tmp;
02037 
02038   tmp = (int *) MALLOC_DH (m * sizeof (int));
02039   CHECK_V_ERROR;
02040   for (i = 0; i < m; ++i)
02041     tmp[i] = 0;
02042 
02043     /*------------------------------------------ 
02044      * mark all boundary nodes
02045      *------------------------------------------ */
02046   for (i = 0; i < blocks; ++i)
02047     {
02048       int beg_row = s->beg_row[i];
02049       int end_row = beg_row + s->row_count[i];
02050 
02051       for (row = beg_row; row < end_row; ++row)
02052     {
02053       bool isBdry = false;
02054       int len;
02055       EuclidGetRow (A, row, &len, &cval, NULL);
02056       CHECK_V_ERROR;
02057 
02058       for (j = 0; j < len; ++j)
02059         {           /* for each column in the row */
02060           int col = cval[j];
02061 
02062           if (col < beg_row || col >= end_row)
02063         {
02064           tmp[col] = 1;
02065           isBdry = true;
02066         }
02067         }
02068       if (isBdry)
02069         tmp[row] = 1;
02070       EuclidRestoreRow (A, row, &len, &cval, NULL);
02071       CHECK_V_ERROR;
02072     }
02073     }
02074 
02075     /*------------------------------------------
02076      * fill in the bdry_count[] array
02077      *------------------------------------------ */
02078   for (i = 0; i < blocks; ++i)
02079     {
02080       int beg_row = s->beg_row[i];
02081       int end_row = beg_row + s->row_count[i];
02082       int ct = 0;
02083       for (row = beg_row; row < end_row; ++row)
02084     {
02085       if (tmp[row])
02086         ++ct;
02087     }
02088       s->bdry_count[i] = ct;
02089     }
02090 
02091     /*------------------------------------------
02092      * form the o2n_col[] permutation
02093      *------------------------------------------ */
02094   for (i = 0; i < blocks; ++i)
02095     {
02096       int beg_row = s->beg_row[i];
02097       int end_row = beg_row + s->row_count[i];
02098       int interiorIDX = beg_row;
02099       int bdryIDX = end_row - s->bdry_count[i];
02100 
02101       for (row = beg_row; row < end_row; ++row)
02102     {
02103       if (tmp[row])
02104         {
02105           s->o2n_col[row] = bdryIDX++;
02106         }
02107       else
02108         {
02109           s->o2n_col[row] = interiorIDX++;
02110         }
02111     }
02112     }
02113 
02114   /* invert permutation */
02115   invert_perm (m, s->o2n_col, s->n2o_row);
02116   CHECK_V_ERROR;
02117   FREE_DH (tmp);
02118   CHECK_V_ERROR;
02119 
02120 END_FUNC_DH}
02121 
02122 #undef __FUNC__
02123 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
02124 void
02125 SubdomainGraph_dhPrintSubdomainGraph (SubdomainGraph_dh s, FILE * fp)
02126 {
02127   START_FUNC_DH if (myid_dh == 0)
02128     {
02129       int i, j;
02130 
02131       fprintf (fp,
02132            "\n-----------------------------------------------------\n");
02133       fprintf (fp, "SubdomainGraph, and coloring and ordering information\n");
02134       fprintf (fp, "-----------------------------------------------------\n");
02135       fprintf (fp, "colors used: %i\n", s->colors);
02136 
02137       fprintf (fp, "o2n ordering vector: ");
02138       for (i = 0; i < s->blocks; ++i)
02139     fprintf (fp, "%i ", s->o2n_sub[i]);
02140 
02141       fprintf (fp, "\ncoloring vector (node, color): \n");
02142       for (i = 0; i < s->blocks; ++i)
02143     fprintf (fp, "  %i, %i\n", i, s->colorVec[i]);
02144 
02145       fprintf (fp, "\n");
02146       fprintf (fp, "Adjacency lists:\n");
02147 
02148       for (i = 0; i < s->blocks; ++i)
02149     {
02150       fprintf (fp, "   P_%i :: ", i);
02151       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
02152         {
02153           fprintf (fp, "%i ", s->adj[j]);
02154         }
02155       fprintf (fp, "\n");
02156     }
02157       fprintf (fp, "-----------------------------------------------------\n");
02158     }
02159 END_FUNC_DH}
02160 
02161 
02162 #undef __FUNC__
02163 #define __FUNC__ "adjust_matrix_perms_private"
02164 void
02165 adjust_matrix_perms_private (SubdomainGraph_dh s, int m)
02166 {
02167   START_FUNC_DH int i, j, blocks = s->blocks;
02168   int *o2n = s->o2n_col;
02169 
02170   for (i = 0; i < blocks; ++i)
02171     {
02172       int beg_row = s->beg_row[i];
02173       int end_row = beg_row + s->row_count[i];
02174       int adjust = s->beg_rowP[i] - s->beg_row[i];
02175       for (j = beg_row; j < end_row; ++j)
02176     o2n[j] += adjust;
02177     }
02178 
02179   invert_perm (m, s->o2n_col, s->n2o_row);
02180   CHECK_V_ERROR;
02181 END_FUNC_DH}
02182 
02183 #undef __FUNC__
02184 #define __FUNC__ "SubdomainGraph_dhPrintRatios"
02185 void
02186 SubdomainGraph_dhPrintRatios (SubdomainGraph_dh s, FILE * fp)
02187 {
02188   START_FUNC_DH int i;
02189   int blocks = np_dh;
02190   double ratio[25];
02191 
02192   if (myid_dh == 0)
02193     {
02194       if (np_dh == 1)
02195     blocks = s->blocks;
02196       if (blocks > 25)
02197     blocks = 25;
02198 
02199       fprintf (fp, "\n");
02200       fprintf (fp, "Subdomain interior/boundary node ratios\n");
02201       fprintf (fp, "---------------------------------------\n");
02202 
02203       /* compute ratios */
02204       for (i = 0; i < blocks; ++i)
02205     {
02206       if (s->bdry_count[i] == 0)
02207         {
02208           ratio[i] = -1;
02209         }
02210       else
02211         {
02212           ratio[i] =
02213         (double) (s->row_count[i] -
02214               s->bdry_count[i]) / (double) s->bdry_count[i];
02215         }
02216     }
02217 
02218       /* sort ratios */
02219       shellSort_float (blocks, ratio);
02220 
02221       /* print ratios */
02222       if (blocks <= 20)
02223     {           /* print all ratios */
02224       int j = 0;
02225       for (i = 0; i < blocks; ++i)
02226         {
02227           fprintf (fp, "%0.2g  ", ratio[i]);
02228           ++j;
02229           if (j == 10)
02230         {
02231           fprintf (fp, "\n");
02232         }
02233         }
02234       fprintf (fp, "\n");
02235     }
02236       else
02237     {           /* print 10 largest and 10 smallest ratios */
02238       fprintf (fp, "10 smallest ratios: ");
02239       for (i = 0; i < 10; ++i)
02240         {
02241           fprintf (fp, "%0.2g  ", ratio[i]);
02242         }
02243       fprintf (fp, "\n");
02244       fprintf (fp, "10 largest ratios:  ");
02245       {
02246         int start = blocks - 6, stop = blocks - 1;
02247         for (i = start; i < stop; ++i)
02248           {
02249         fprintf (fp, "%0.2g  ", ratio[i]);
02250           }
02251         fprintf (fp, "\n");
02252       }
02253     }
02254     }
02255 
02256 END_FUNC_DH}
02257 
02258 
02259 #undef __FUNC__
02260 #define __FUNC__ "SubdomainGraph_dhPrintStats"
02261 void
02262 SubdomainGraph_dhPrintStats (SubdomainGraph_dh sg, FILE * fp)
02263 {
02264   START_FUNC_DH double *timing = sg->timing;
02265 
02266   fprintf_dh (fp, "\nSubdomainGraph timing report\n");
02267   fprintf_dh (fp, "-----------------------------\n");
02268   fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]);
02269   fprintf_dh (fp, "  find neighbors in subdomain graph: %0.2f\n",
02270           timing[FIND_NABORS_SGT]);
02271   fprintf_dh (fp, "  locally order interiors and bdry:  %0.2f\n",
02272           timing[ORDER_BDRY_SGT]);
02273   fprintf_dh (fp, "  form and color subdomain graph:    %0.2f\n",
02274           timing[FORM_GRAPH_SGT]);
02275   fprintf_dh (fp, "  exchange bdry permutations:        %0.2f\n",
02276           timing[EXCHANGE_PERMS_SGT]);
02277   fprintf_dh (fp, "  everything else (should be small): %0.2f\n",
02278           timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
02279                    timing[ORDER_BDRY_SGT] +
02280                    timing[FORM_GRAPH_SGT] +
02281                    timing[EXCHANGE_PERMS_SGT]));
02282 END_FUNC_DH}

Generated on Wed May 12 21:30:18 2010 for IFPACK by  doxygen 1.4.7