mat_dh_private.c

00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "mat_dh_private.h"
00031 #include "Parser_dh.h"
00032 #include "Hash_i_dh.h"
00033 #include "Mat_dh.h"
00034 #include "Mem_dh.h"
00035 #include "Vec_dh.h"
00036 
00037 #define IS_UPPER_TRI 97
00038 #define IS_LOWER_TRI 98
00039 #define IS_FULL      99
00040 static int isTriangular (int m, int *rp, int *cval);
00041 
00042 /* Instantiates Aout; allocates storage for rp, cval, and aval arrays;
00043    uses rowLengths[] and rowToBlock[] data to fill in rp[].
00044 */
00045 static void mat_par_read_allocate_private (Mat_dh * Aout, int n,
00046                        int *rowLengths, int *rowToBlock);
00047 
00048 /* Currently, divides (partitions)matrix by contiguous sections of rows.
00049    For future expansion: use metis.
00050 */
00051 void mat_partition_private (Mat_dh A, int blocks, int *o2n_row,
00052                 int *rowToBlock);
00053 
00054 
00055 static void convert_triples_to_scr_private (int m, int nz,
00056                         int *I, int *J, double *A,
00057                         int *rp, int *cval, double *aval);
00058 
00059 #if 0
00060 #undef __FUNC__
00061 #define __FUNC__ "mat_dh_print_graph_private"
00062 void
00063 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval,
00064                 double *aval, int *n2o, int *o2n, Hash_i_dh hash,
00065                 FILE * fp)
00066 {
00067   START_FUNC_DH int i, j, row, col;
00068   double val;
00069   bool private_n2o = false;
00070   bool private_hash = false;
00071 
00072   if (n2o == NULL)
00073     {
00074       private_n2o = true;
00075       create_nat_ordering_private (m, &n2o);
00076       CHECK_V_ERROR;
00077       create_nat_ordering_private (m, &o2n);
00078       CHECK_V_ERROR;
00079     }
00080 
00081   if (hash == NULL)
00082     {
00083       private_hash = true;
00084       Hash_i_dhCreate (&hash, -1);
00085       CHECK_V_ERROR;
00086     }
00087 
00088   for (i = 0; i < m; ++i)
00089     {
00090       row = n2o[i];
00091       for (j = rp[row]; j < rp[row + 1]; ++j)
00092     {
00093       col = cval[j];
00094       if (col < beg_row || col >= beg_row + m)
00095         {
00096           int tmp = col;
00097 
00098           /* nonlocal column: get permutation from hash table */
00099           tmp = Hash_i_dhLookup (hash, col);
00100           CHECK_V_ERROR;
00101           if (tmp == -1)
00102         {
00103           sprintf (msgBuf_dh,
00104                "beg_row= %i  m= %i; nonlocal column= %i not in hash table",
00105                beg_row, m, col);
00106           SET_V_ERROR (msgBuf_dh);
00107         }
00108           else
00109         {
00110           col = tmp;
00111         }
00112         }
00113       else
00114         {
00115           col = o2n[col];
00116         }
00117 
00118       if (aval == NULL)
00119         {
00120           val = _MATLAB_ZERO_;
00121         }
00122       else
00123         {
00124           val = aval[j];
00125         }
00126       fprintf (fp, "%i %i %g\n", 1 + row + beg_row, 1 + col, val);
00127     }
00128     }
00129 
00130   if (private_n2o)
00131     {
00132       destroy_nat_ordering_private (n2o);
00133       CHECK_V_ERROR;
00134       destroy_nat_ordering_private (o2n);
00135       CHECK_V_ERROR;
00136     }
00137 
00138   if (private_hash)
00139     {
00140       Hash_i_dhDestroy (hash);
00141       CHECK_V_ERROR;
00142     }
00143 END_FUNC_DH}
00144 
00145 #endif
00146 
00147 
00148 /* currently only for unpermuted */
00149 #undef __FUNC__
00150 #define __FUNC__ "mat_dh_print_graph_private"
00151 void
00152 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval,
00153                 double *aval, int *n2o, int *o2n, Hash_i_dh hash,
00154                 FILE * fp)
00155 {
00156   START_FUNC_DH int i, j, row, col;
00157   bool private_n2o = false;
00158   bool private_hash = false;
00159   int *work = NULL;
00160 
00161   work = (int *) MALLOC_DH (m * sizeof (int));
00162   CHECK_V_ERROR;
00163 
00164   if (n2o == NULL)
00165     {
00166       private_n2o = true;
00167       create_nat_ordering_private (m, &n2o);
00168       CHECK_V_ERROR;
00169       create_nat_ordering_private (m, &o2n);
00170       CHECK_V_ERROR;
00171     }
00172 
00173   if (hash == NULL)
00174     {
00175       private_hash = true;
00176       Hash_i_dhCreate (&hash, -1);
00177       CHECK_V_ERROR;
00178     }
00179 
00180   for (i = 0; i < m; ++i)
00181     {
00182       for (j = 0; j < m; ++j)
00183     work[j] = 0;
00184       row = n2o[i];
00185       for (j = rp[row]; j < rp[row + 1]; ++j)
00186     {
00187       col = cval[j];
00188 
00189       /* local column */
00190       if (col >= beg_row || col < beg_row + m)
00191         {
00192           col = o2n[col];
00193         }
00194 
00195       /* nonlocal column: get permutation from hash table */
00196       else
00197         {
00198           int tmp = col;
00199 
00200           tmp = Hash_i_dhLookup (hash, col);
00201           CHECK_V_ERROR;
00202           if (tmp == -1)
00203         {
00204           sprintf (msgBuf_dh,
00205                "beg_row= %i  m= %i; nonlocal column= %i not in hash table",
00206                beg_row, m, col);
00207           SET_V_ERROR (msgBuf_dh);
00208         }
00209           else
00210         {
00211           col = tmp;
00212         }
00213         }
00214 
00215       work[col] = 1;
00216     }
00217 
00218       for (j = 0; j < m; ++j)
00219     {
00220       if (work[j])
00221         {
00222           fprintf (fp, " x ");
00223         }
00224       else
00225         {
00226           fprintf (fp, "   ");
00227         }
00228     }
00229       fprintf (fp, "\n");
00230     }
00231 
00232   if (private_n2o)
00233     {
00234       destroy_nat_ordering_private (n2o);
00235       CHECK_V_ERROR;
00236       destroy_nat_ordering_private (o2n);
00237       CHECK_V_ERROR;
00238     }
00239 
00240   if (private_hash)
00241     {
00242       Hash_i_dhDestroy (hash);
00243       CHECK_V_ERROR;
00244     }
00245 
00246   if (work != NULL)
00247     {
00248       FREE_DH (work);
00249       CHECK_V_ERROR;
00250     }
00251 END_FUNC_DH}
00252 
00253 #undef __FUNC__
00254 #define __FUNC__ "create_nat_ordering_private"
00255 void
00256 create_nat_ordering_private (int m, int **p)
00257 {
00258   START_FUNC_DH int *tmp, i;
00259 
00260   tmp = *p = (int *) MALLOC_DH (m * sizeof (int));
00261   CHECK_V_ERROR;
00262   for (i = 0; i < m; ++i)
00263     {
00264       tmp[i] = i;
00265     }
00266 END_FUNC_DH}
00267 
00268 #undef __FUNC__
00269 #define __FUNC__ "destroy_nat_ordering_private"
00270 void
00271 destroy_nat_ordering_private (int *p)
00272 {
00273   START_FUNC_DH FREE_DH (p);
00274   CHECK_V_ERROR;
00275 END_FUNC_DH}
00276 
00277 
00278 #undef __FUNC__
00279 #define __FUNC__ "invert_perm"
00280 void
00281 invert_perm (int m, int *pIN, int *pOUT)
00282 {
00283   START_FUNC_DH int i;
00284 
00285   for (i = 0; i < m; ++i)
00286     pOUT[pIN[i]] = i;
00287 END_FUNC_DH}
00288 
00289 
00290 
00291 /* only implemented for a single cpu! */
00292 #undef __FUNC__
00293 #define __FUNC__ "mat_dh_print_csr_private"
00294 void
00295 mat_dh_print_csr_private (int m, int *rp, int *cval, double *aval, FILE * fp)
00296 {
00297   START_FUNC_DH int i, nz = rp[m];
00298 
00299   /* print header line */
00300   fprintf (fp, "%i %i\n", m, rp[m]);
00301 
00302   /* print rp[] */
00303   for (i = 0; i <= m; ++i)
00304     fprintf (fp, "%i ", rp[i]);
00305   fprintf (fp, "\n");
00306 
00307   /* print cval[] */
00308   for (i = 0; i < nz; ++i)
00309     fprintf (fp, "%i ", cval[i]);
00310   fprintf (fp, "\n");
00311 
00312   /* print aval[] */
00313   for (i = 0; i < nz; ++i)
00314     fprintf (fp, "%1.19e ", aval[i]);
00315   fprintf (fp, "\n");
00316 
00317 END_FUNC_DH}
00318 
00319 
00320 /* only implemented for a single cpu! */
00321 #undef __FUNC__
00322 #define __FUNC__ "mat_dh_read_csr_private"
00323 void
00324 mat_dh_read_csr_private (int *mOUT, int **rpOUT, int **cvalOUT,
00325              double **avalOUT, FILE * fp)
00326 {
00327   START_FUNC_DH int i, m, nz, items;
00328   int *rp, *cval;
00329   double *aval;
00330 
00331   /* read header line */
00332   items = fscanf (fp, "%d %d", &m, &nz);
00333   if (items != 2)
00334     {
00335       SET_V_ERROR ("failed to read header");
00336     }
00337   else
00338     {
00339       printf ("mat_dh_read_csr_private:: m= %i  nz= %i\n", m, nz);
00340     }
00341 
00342   *mOUT = m;
00343   rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
00344   CHECK_V_ERROR;
00345   cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
00346   CHECK_V_ERROR;
00347   aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
00348   CHECK_V_ERROR;
00349 
00350   /* read rp[] block */
00351   for (i = 0; i <= m; ++i)
00352     {
00353       items = fscanf (fp, "%d", &(rp[i]));
00354       if (items != 1)
00355     {
00356       sprintf (msgBuf_dh, "failed item %i of %i in rp block", i, m + 1);
00357       SET_V_ERROR (msgBuf_dh);
00358     }
00359     }
00360 
00361   /* read cval[] block */
00362   for (i = 0; i < nz; ++i)
00363     {
00364       items = fscanf (fp, "%d", &(cval[i]));
00365       if (items != 1)
00366     {
00367       sprintf (msgBuf_dh, "failed item %i of %i in cval block", i, m + 1);
00368       SET_V_ERROR (msgBuf_dh);
00369     }
00370     }
00371 
00372   /* read aval[] block */
00373   for (i = 0; i < nz; ++i)
00374     {
00375       items = fscanf (fp, "%lg", &(aval[i]));
00376       if (items != 1)
00377     {
00378       sprintf (msgBuf_dh, "failed item %i of %i in aval block", i, m + 1);
00379       SET_V_ERROR (msgBuf_dh);
00380     }
00381     }
00382 END_FUNC_DH}
00383 
00384 /*============================================*/
00385 #define MAX_JUNK 200
00386 
00387 #undef __FUNC__
00388 #define __FUNC__ "mat_dh_read_triples_private"
00389 void
00390 mat_dh_read_triples_private (int ignore, int *mOUT, int **rpOUT,
00391                  int **cvalOUT, double **avalOUT, FILE * fp)
00392 {
00393   START_FUNC_DH int m, n, nz, items, i, j;
00394   int idx = 0;
00395   int *cval, *rp, *I, *J;
00396   double *aval, *A, v;
00397   char junk[MAX_JUNK];
00398   fpos_t fpos;
00399 
00400   /* skip over header */
00401   if (ignore && myid_dh == 0)
00402     {
00403       printf
00404     ("mat_dh_read_triples_private:: ignoring following header lines:\n");
00405       printf
00406     ("--------------------------------------------------------------\n");
00407       for (i = 0; i < ignore; ++i)
00408     {
00409       fgets (junk, MAX_JUNK, fp);
00410       printf ("%s", junk);
00411     }
00412       printf
00413     ("--------------------------------------------------------------\n");
00414       if (fgetpos (fp, &fpos))
00415     SET_V_ERROR ("fgetpos failed!");
00416       printf ("\nmat_dh_read_triples_private::1st two non-ignored lines:\n");
00417       printf
00418     ("--------------------------------------------------------------\n");
00419       for (i = 0; i < 2; ++i)
00420     {
00421       fgets (junk, MAX_JUNK, fp);
00422       printf ("%s", junk);
00423     }
00424       printf
00425     ("--------------------------------------------------------------\n");
00426       if (fsetpos (fp, &fpos))
00427     SET_V_ERROR ("fsetpos failed!");
00428     }
00429 
00430 
00431   if (feof (fp))
00432     printf ("trouble!");
00433 
00434   /* determine matrix dimensions */
00435   m = n = nz = 0;
00436   while (!feof (fp))
00437     {
00438       items = fscanf (fp, "%d %d %lg", &i, &j, &v);
00439       if (items != 3)
00440     {
00441       break;
00442     }
00443       ++nz;
00444       if (i > m)
00445     m = i;
00446       if (j > n)
00447     n = j;
00448     }
00449 
00450   if (myid_dh == 0)
00451     {
00452       printf ("mat_dh_read_triples_private: m= %i  nz= %i\n", m, nz);
00453     }
00454 
00455 
00456   /* reset file, and skip over header again */
00457   rewind (fp);
00458   for (i = 0; i < ignore; ++i)
00459     {
00460       fgets (junk, MAX_JUNK, fp);
00461     }
00462 
00463   /* error check for squareness */
00464   if (m != n)
00465     {
00466       sprintf (msgBuf_dh, "matrix is not square; row= %i, cols= %i", m, n);
00467       SET_V_ERROR (msgBuf_dh);
00468     }
00469 
00470   *mOUT = m;
00471 
00472   /* allocate storage */
00473   rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
00474   CHECK_V_ERROR;
00475   cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
00476   CHECK_V_ERROR;
00477   aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
00478   CHECK_V_ERROR;
00479 
00480   I = (int *) MALLOC_DH (nz * sizeof (int));
00481   CHECK_V_ERROR;
00482   J = (int *) MALLOC_DH (nz * sizeof (int));
00483   CHECK_V_ERROR;
00484   A = (double *) MALLOC_DH (nz * sizeof (double));
00485   CHECK_V_ERROR;
00486 
00487   /* read <row, col, value> triples into arrays */
00488   while (!feof (fp))
00489     {
00490       items = fscanf (fp, "%d %d %lg", &i, &j, &v);
00491       if (items < 3)
00492     break;
00493       j--;
00494       i--;
00495       I[idx] = i;
00496       J[idx] = j;
00497       A[idx] = v;
00498       ++idx;
00499     }
00500 
00501   /* convert from triples to sparse-compressed-row storage */
00502   convert_triples_to_scr_private (m, nz, I, J, A, rp, cval, aval);
00503   CHECK_V_ERROR;
00504 
00505   /* if matrix is triangular */
00506   {
00507     int type;
00508     type = isTriangular (m, rp, cval);
00509     CHECK_V_ERROR;
00510     if (type == IS_UPPER_TRI)
00511       {
00512     printf ("CAUTION: matrix is upper triangular; converting to full\n");
00513       }
00514     else if (type == IS_LOWER_TRI)
00515       {
00516     printf ("CAUTION: matrix is lower triangular; converting to full\n");
00517       }
00518 
00519     if (type == IS_UPPER_TRI || type == IS_LOWER_TRI)
00520       {
00521     make_full_private (m, &rp, &cval, &aval);
00522     CHECK_V_ERROR;
00523       }
00524   }
00525 
00526   *rpOUT = rp;
00527   *cvalOUT = cval;
00528   *avalOUT = aval;
00529 
00530   FREE_DH (I);
00531   CHECK_V_ERROR;
00532   FREE_DH (J);
00533   CHECK_V_ERROR;
00534   FREE_DH (A);
00535   CHECK_V_ERROR;
00536 
00537 END_FUNC_DH}
00538 
00539 #undef __FUNC__
00540 #define __FUNC__ "convert_triples_to_scr_private"
00541 void
00542 convert_triples_to_scr_private (int m, int nz, int *I, int *J, double *A,
00543                 int *rp, int *cval, double *aval)
00544 {
00545   START_FUNC_DH int i;
00546   int *rowCounts;
00547 
00548   rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
00549   CHECK_V_ERROR;
00550   for (i = 0; i < m; ++i)
00551     rowCounts[i] = 0;
00552 
00553   /* count number of entries in each row */
00554   for (i = 0; i < nz; ++i)
00555     {
00556       int row = I[i];
00557       rowCounts[row] += 1;
00558     }
00559 
00560   /* prefix-sum to form rp[] */
00561   rp[0] = 0;
00562   for (i = 1; i <= m; ++i)
00563     {
00564       rp[i] = rp[i - 1] + rowCounts[i - 1];
00565     }
00566   memcpy (rowCounts, rp, (m + 1) * sizeof (int));
00567 
00568   /* write SCR arrays */
00569   for (i = 0; i < nz; ++i)
00570     {
00571       int row = I[i];
00572       int col = J[i];
00573       double val = A[i];
00574       int idx = rowCounts[row];
00575       rowCounts[row] += 1;
00576 
00577       cval[idx] = col;
00578       aval[idx] = val;
00579     }
00580 
00581 
00582   FREE_DH (rowCounts);
00583   CHECK_V_ERROR;
00584 END_FUNC_DH}
00585 
00586 
00587 /*======================================================================
00588  * utilities for use in drivers that read, write, convert, and/or
00589  * compare different file types
00590  *======================================================================*/
00591 
00592 void fix_diags_private (Mat_dh A);
00593 void insert_missing_diags_private (Mat_dh A);
00594 
00595 #undef __FUNC__
00596 #define __FUNC__ "readMat"
00597 void
00598 readMat (Mat_dh * Aout, char *ft, char *fn, int ignore)
00599 {
00600   START_FUNC_DH bool makeStructurallySymmetric;
00601   bool fixDiags;
00602   *Aout = NULL;
00603 
00604   makeStructurallySymmetric =
00605     Parser_dhHasSwitch (parser_dh, "-makeSymmetric");
00606   fixDiags = Parser_dhHasSwitch (parser_dh, "-fixDiags");
00607 
00608   if (fn == NULL)
00609     {
00610       SET_V_ERROR ("passed NULL filename; can't open for reading!");
00611     }
00612 
00613   if (!strcmp (ft, "csr"))
00614     {
00615       Mat_dhReadCSR (Aout, fn);
00616       CHECK_V_ERROR;
00617     }
00618 
00619   else if (!strcmp (ft, "trip"))
00620     {
00621       Mat_dhReadTriples (Aout, ignore, fn);
00622       CHECK_V_ERROR;
00623     }
00624 
00625   else if (!strcmp (ft, "ebin"))
00626     {
00627       Mat_dhReadBIN (Aout, fn);
00628       CHECK_V_ERROR;
00629     }
00630 
00631   else if (!strcmp (ft, "petsc"))
00632     {
00633       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00634       SET_V_ERROR (msgBuf_dh);
00635     }
00636 
00637   else
00638     {
00639       sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft);
00640       SET_V_ERROR (msgBuf_dh);
00641     }
00642 
00643   if (makeStructurallySymmetric)
00644     {
00645       printf ("\npadding with zeros to make structurally symmetric\n");
00646       Mat_dhMakeStructurallySymmetric (*Aout);
00647       CHECK_V_ERROR;
00648     }
00649 
00650   if ((*Aout)->m == 0)
00651     {
00652       SET_V_ERROR ("row count = 0; something's wrong!");
00653     }
00654 
00655   if (fixDiags)
00656     {
00657       fix_diags_private (*Aout);
00658       CHECK_V_ERROR;
00659     }
00660 
00661 END_FUNC_DH}
00662 
00663 
00664 #undef __FUNC__
00665 #define __FUNC__ "fix_diags_private"
00666 void
00667 fix_diags_private (Mat_dh A)
00668 {
00669   START_FUNC_DH int i, j, m = A->m, *rp = A->rp, *cval = A->cval;
00670   double *aval = A->aval;
00671   bool insertDiags = false;
00672 
00673   /* verify that all diagonals are present */
00674   for (i = 0; i < m; ++i)
00675     {
00676       bool isMissing = true;
00677       for (j = rp[i]; j < rp[i + 1]; ++j)
00678     {
00679       if (cval[j] == i)
00680         {
00681           isMissing = false;
00682           break;
00683         }
00684     }
00685       if (isMissing)
00686     {
00687       insertDiags = true;
00688       break;
00689     }
00690     }
00691 
00692   if (insertDiags)
00693     {
00694       insert_missing_diags_private (A);
00695       CHECK_V_ERROR;
00696       rp = A->rp;
00697       cval = A->cval;
00698       aval = A->aval;
00699     }
00700 
00701   /* set value of all diags to largest absolute value in each row */
00702   for (i = 0; i < m; ++i)
00703     {
00704       double sum = 0;
00705       for (j = rp[i]; j < rp[i + 1]; ++j)
00706     {
00707       sum = MAX (sum, fabs (aval[j]));
00708     }
00709       for (j = rp[i]; j < rp[i + 1]; ++j)
00710     {
00711       if (cval[j] == i)
00712         {
00713           aval[j] = sum;
00714           break;
00715         }
00716     }
00717     }
00718 
00719 END_FUNC_DH}
00720 
00721 #undef __FUNC__
00722 #define __FUNC__ "insert_missing_diags_private"
00723 void
00724 insert_missing_diags_private (Mat_dh A)
00725 {
00726   START_FUNC_DH int *RP = A->rp, *CVAL = A->cval, m = A->m;
00727   int *rp, *cval;
00728   double *AVAL = A->aval, *aval;
00729   int i, j, nz = RP[m] + m;
00730   int idx = 0;
00731 
00732   rp = A->rp = (int *) MALLOC_DH ((1 + m) * sizeof (int));
00733   CHECK_V_ERROR;
00734   cval = A->cval = (int *) MALLOC_DH (nz * sizeof (int));
00735   CHECK_V_ERROR;
00736   aval = A->aval = (double *) MALLOC_DH (nz * sizeof (double));
00737   CHECK_V_ERROR;
00738   rp[0] = 0;
00739 
00740   for (i = 0; i < m; ++i)
00741     {
00742       bool isMissing = true;
00743       for (j = RP[i]; j < RP[i + 1]; ++j)
00744     {
00745       cval[idx] = CVAL[j];
00746       aval[idx] = AVAL[j];
00747       ++idx;
00748       if (CVAL[j] == i)
00749         isMissing = false;
00750     }
00751       if (isMissing)
00752     {
00753       cval[idx] = i;
00754       aval[idx] = 0.0;
00755       ++idx;
00756     }
00757       rp[i + 1] = idx;
00758     }
00759 
00760   FREE_DH (RP);
00761   CHECK_V_ERROR;
00762   FREE_DH (CVAL);
00763   CHECK_V_ERROR;
00764   FREE_DH (AVAL);
00765   CHECK_V_ERROR;
00766 END_FUNC_DH}
00767 
00768 #undef __FUNC__
00769 #define __FUNC__ "readVec"
00770 void
00771 readVec (Vec_dh * bout, char *ft, char *fn, int ignore)
00772 {
00773   START_FUNC_DH * bout = NULL;
00774 
00775   if (fn == NULL)
00776     {
00777       SET_V_ERROR ("passed NULL filename; can't open for reading!");
00778     }
00779 
00780   if (!strcmp (ft, "csr") || !strcmp (ft, "trip"))
00781     {
00782       Vec_dhRead (bout, ignore, fn);
00783       CHECK_V_ERROR;
00784     }
00785 
00786   else if (!strcmp (ft, "ebin"))
00787     {
00788       Vec_dhReadBIN (bout, fn);
00789       CHECK_V_ERROR;
00790     }
00791 
00792   else if (!strcmp (ft, "petsc"))
00793     {
00794       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00795       SET_V_ERROR (msgBuf_dh);
00796     }
00797 
00798   else
00799     {
00800       sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft);
00801       SET_V_ERROR (msgBuf_dh);
00802     }
00803 
00804 END_FUNC_DH}
00805 
00806 
00807 #undef __FUNC__
00808 #define __FUNC__ "writeMat"
00809 void
00810 writeMat (Mat_dh Ain, char *ft, char *fn)
00811 {
00812   START_FUNC_DH if (fn == NULL)
00813     {
00814       SET_V_ERROR ("passed NULL filename; can't open for writing!");
00815     }
00816 
00817   if (!strcmp (ft, "csr"))
00818     {
00819       Mat_dhPrintCSR (Ain, NULL, fn);
00820       CHECK_V_ERROR;
00821     }
00822 
00823   else if (!strcmp (ft, "trip"))
00824     {
00825       Mat_dhPrintTriples (Ain, NULL, fn);
00826       CHECK_V_ERROR;
00827     }
00828 
00829   else if (!strcmp (ft, "ebin"))
00830     {
00831       Mat_dhPrintBIN (Ain, NULL, fn);
00832       CHECK_V_ERROR;
00833     }
00834 
00835 
00836   else if (!strcmp (ft, "petsc"))
00837     {
00838       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00839       SET_V_ERROR (msgBuf_dh);
00840     }
00841 
00842   else
00843     {
00844       sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft);
00845       SET_V_ERROR (msgBuf_dh);
00846     }
00847 
00848 END_FUNC_DH}
00849 
00850 #undef __FUNC__
00851 #define __FUNC__ "writeVec"
00852 void
00853 writeVec (Vec_dh bin, char *ft, char *fn)
00854 {
00855   START_FUNC_DH if (fn == NULL)
00856     {
00857       SET_V_ERROR ("passed NULL filename; can't open for writing!");
00858     }
00859 
00860   if (!strcmp (ft, "csr") || !strcmp (ft, "trip"))
00861     {
00862       Vec_dhPrint (bin, NULL, fn);
00863       CHECK_V_ERROR;
00864     }
00865 
00866   else if (!strcmp (ft, "ebin"))
00867     {
00868       Vec_dhPrintBIN (bin, NULL, fn);
00869       CHECK_V_ERROR;
00870     }
00871 
00872   else if (!strcmp (ft, "petsc"))
00873     {
00874       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00875       SET_V_ERROR (msgBuf_dh);
00876     }
00877 
00878   else
00879     {
00880       sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft);
00881       SET_V_ERROR (msgBuf_dh);
00882     }
00883 
00884 END_FUNC_DH}
00885 
00886 #undef __FUNC__
00887 #define __FUNC__ "isTriangular"
00888 int
00889 isTriangular (int m, int *rp, int *cval)
00890 {
00891   START_FUNC_DH int row, j;
00892   int type;
00893   bool type_lower = false, type_upper = false;
00894 
00895   if (np_dh > 1)
00896     {
00897       SET_ERROR (-1, "only implemented for a single cpu");
00898     }
00899 
00900   for (row = 0; row < m; ++row)
00901     {
00902       for (j = rp[row]; j < rp[row + 1]; ++j)
00903     {
00904       int col = cval[j];
00905       if (col < row)
00906         type_lower = true;
00907       if (col > row)
00908         type_upper = true;
00909     }
00910       if (type_lower && type_upper)
00911     break;
00912     }
00913 
00914   if (type_lower && type_upper)
00915     {
00916       type = IS_FULL;
00917     }
00918   else if (type_lower)
00919     {
00920       type = IS_LOWER_TRI;
00921     }
00922   else
00923     {
00924       type = IS_UPPER_TRI;
00925     }
00926 END_FUNC_VAL (type)}
00927 
00928 /*-----------------------------------------------------------------------------------*/
00929 
00930 static void mat_dh_transpose_reuse_private_private (bool allocateMem, int m,
00931                             int *rpIN, int *cvalIN,
00932                             double *avalIN,
00933                             int **rpOUT,
00934                             int **cvalOUT,
00935                             double **avalOUT);
00936 
00937 
00938 #undef __FUNC__
00939 #define __FUNC__ "mat_dh_transpose_reuse_private"
00940 void
00941 mat_dh_transpose_reuse_private (int m,
00942                 int *rpIN, int *cvalIN, double *avalIN,
00943                 int *rpOUT, int *cvalOUT, double *avalOUT)
00944 {
00945   START_FUNC_DH
00946     mat_dh_transpose_reuse_private_private (false, m, rpIN, cvalIN, avalIN,
00947                         &rpOUT, &cvalOUT, &avalOUT);
00948   CHECK_V_ERROR;
00949 END_FUNC_DH}
00950 
00951 
00952 #undef __FUNC__
00953 #define __FUNC__ "mat_dh_transpose_private"
00954 void
00955 mat_dh_transpose_private (int m, int *RP, int **rpOUT,
00956               int *CVAL, int **cvalOUT,
00957               double *AVAL, double **avalOUT)
00958 {
00959   START_FUNC_DH
00960     mat_dh_transpose_reuse_private_private (true, m, RP, CVAL, AVAL,
00961                         rpOUT, cvalOUT, avalOUT);
00962   CHECK_V_ERROR;
00963 END_FUNC_DH}
00964 
00965 #undef __FUNC__
00966 #define __FUNC__ "mat_dh_transpose_private_private"
00967 void
00968 mat_dh_transpose_reuse_private_private (bool allocateMem, int m,
00969                     int *RP, int *CVAL, double *AVAL,
00970                     int **rpOUT, int **cvalOUT,
00971                     double **avalOUT)
00972 {
00973   START_FUNC_DH int *rp, *cval, *tmp;
00974   int i, j, nz = RP[m];
00975   double *aval;
00976 
00977   if (allocateMem)
00978     {
00979       rp = *rpOUT = (int *) MALLOC_DH ((1 + m) * sizeof (int));
00980       CHECK_V_ERROR;
00981       cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
00982       CHECK_V_ERROR;
00983       if (avalOUT != NULL)
00984     {
00985       aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
00986       CHECK_V_ERROR;
00987     }
00988     }
00989   else
00990     {
00991       rp = *rpOUT;
00992       cval = *cvalOUT;
00993       if (avalOUT != NULL)
00994     aval = *avalOUT;
00995     }
00996 
00997 
00998   tmp = (int *) MALLOC_DH ((1 + m) * sizeof (int));
00999   CHECK_V_ERROR;
01000   for (i = 0; i <= m; ++i)
01001     tmp[i] = 0;
01002 
01003   for (i = 0; i < m; ++i)
01004     {
01005       for (j = RP[i]; j < RP[i + 1]; ++j)
01006     {
01007       int col = CVAL[j];
01008       tmp[col + 1] += 1;
01009     }
01010     }
01011   for (i = 1; i <= m; ++i)
01012     tmp[i] += tmp[i - 1];
01013   memcpy (rp, tmp, (m + 1) * sizeof (int));
01014 
01015   if (avalOUT != NULL)
01016     {
01017       for (i = 0; i < m; ++i)
01018     {
01019       for (j = RP[i]; j < RP[i + 1]; ++j)
01020         {
01021           int col = CVAL[j];
01022           int idx = tmp[col];
01023           cval[idx] = i;
01024           aval[idx] = AVAL[j];
01025           tmp[col] += 1;
01026         }
01027     }
01028     }
01029 
01030   else
01031     {
01032       for (i = 0; i < m; ++i)
01033     {
01034       for (j = RP[i]; j < RP[i + 1]; ++j)
01035         {
01036           int col = CVAL[j];
01037           int idx = tmp[col];
01038           cval[idx] = i;
01039           tmp[col] += 1;
01040         }
01041     }
01042     }
01043 
01044   FREE_DH (tmp);
01045   CHECK_V_ERROR;
01046 END_FUNC_DH}
01047 
01048 /*-----------------------------------------------------------------------------------*/
01049 
01050 #undef __FUNC__
01051 #define __FUNC__ "mat_find_owner"
01052 int
01053 mat_find_owner (int *beg_rows, int *end_rows, int index)
01054 {
01055   START_FUNC_DH int pe, owner = -1;
01056 
01057   for (pe = 0; pe < np_dh; ++pe)
01058     {
01059       if (index >= beg_rows[pe] && index < end_rows[pe])
01060     {
01061       owner = pe;
01062       break;
01063     }
01064     }
01065 
01066   if (owner == -1)
01067     {
01068       sprintf (msgBuf_dh, "failed to find owner for index= %i", index);
01069       SET_ERROR (-1, msgBuf_dh);
01070     }
01071 
01072 END_FUNC_VAL (owner)}
01073 
01074 
01075 #define AVAL_TAG 2
01076 #define CVAL_TAG 3
01077 void partition_and_distribute_private (Mat_dh A, Mat_dh * Bout);
01078 void partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout);
01079 
01080 #undef __FUNC__
01081 #define __FUNC__ "readMat_par"
01082 void
01083 readMat_par (Mat_dh * Aout, char *fileType, char *fileName, int ignore)
01084 {
01085   START_FUNC_DH Mat_dh A = NULL;
01086 
01087   if (myid_dh == 0)
01088     {
01089       int tmp = np_dh;
01090       np_dh = 1;
01091       readMat (&A, fileType, fileName, ignore);
01092       CHECK_V_ERROR;
01093       np_dh = tmp;
01094     }
01095 
01096   if (np_dh == 1)
01097     {
01098       *Aout = A;
01099     }
01100   else
01101     {
01102       if (Parser_dhHasSwitch (parser_dh, "-metis"))
01103     {
01104       partition_and_distribute_metis_private (A, Aout);
01105       CHECK_V_ERROR;
01106     }
01107       else
01108     {
01109       partition_and_distribute_private (A, Aout);
01110       CHECK_V_ERROR;
01111     }
01112     }
01113 
01114   if (np_dh > 1 && A != NULL)
01115     {
01116       Mat_dhDestroy (A);
01117       CHECK_V_ERROR;
01118     }
01119 
01120 
01121   if (Parser_dhHasSwitch (parser_dh, "-printMAT"))
01122     {
01123       char xname[] = "A", *name = xname;
01124       Parser_dhReadString (parser_dh, "-printMat", &name);
01125       Mat_dhPrintTriples (*Aout, NULL, name);
01126       CHECK_V_ERROR;
01127       printf_dh ("\n@@@ readMat_par: printed mat to %s\n\n", xname);
01128     }
01129 
01130 
01131 END_FUNC_DH}
01132 
01133 /* this is bad code! */
01134 #undef __FUNC__
01135 #define __FUNC__ "partition_and_distribute_metis_private"
01136 void
01137 partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout)
01138 {
01139   START_FUNC_DH Mat_dh B = NULL;
01140   Mat_dh C = NULL;
01141   int i, m;
01142   int *rowLengths = NULL;
01143   int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
01144   int *beg_row = NULL, *row_count = NULL;
01145   MPI_Request *send_req = NULL;
01146   MPI_Request *rcv_req = NULL;
01147   MPI_Status *send_status = NULL;
01148   MPI_Status *rcv_status = NULL;
01149 
01150   MPI_Barrier (comm_dh);
01151   printf_dh ("@@@ partitioning with metis\n");
01152 
01153   /* broadcast number of rows to all processors */
01154   if (myid_dh == 0)
01155     m = A->m;
01156   MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
01157 
01158   /* broadcast number of nonzeros in each row to all processors */
01159   rowLengths = (int *) MALLOC_DH (m * sizeof (int));
01160   CHECK_V_ERROR;
01161   rowToBlock = (int *) MALLOC_DH (m * sizeof (int));
01162   CHECK_V_ERROR;
01163 
01164   if (myid_dh == 0)
01165     {
01166       int *tmp = A->rp;
01167       for (i = 0; i < m; ++i)
01168     {
01169       rowLengths[i] = tmp[i + 1] - tmp[i];
01170     }
01171     }
01172   MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
01173 
01174   /* partition matrix */
01175   if (myid_dh == 0)
01176     {
01177       int idx = 0;
01178       int j;
01179 
01180       /* partition and permute matrix */
01181       Mat_dhPartition (A, np_dh, &beg_row, &row_count, &n2o_col, &o2n_row);
01182       ERRCHKA;
01183       Mat_dhPermute (A, n2o_col, &C);
01184       ERRCHKA;
01185 
01186       /* form rowToBlock array */
01187       for (i = 0; i < np_dh; ++i)
01188     {
01189       for (j = beg_row[i]; j < beg_row[i] + row_count[i]; ++j)
01190         {
01191           rowToBlock[idx++] = i;
01192         }
01193     }
01194     }
01195 
01196   /* broadcast partitiioning information to all processors */
01197   MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
01198 
01199   /* allocate storage for local portion of matrix */
01200   mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
01201   CHECK_V_ERROR;
01202 
01203   /* root sends each processor its portion of the matrix */
01204   if (myid_dh == 0)
01205     {
01206       int *cval = C->cval, *rp = C->rp;
01207       double *aval = C->aval;
01208       send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01209       CHECK_V_ERROR;
01210       send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01211       CHECK_V_ERROR;
01212       for (i = 0; i < m; ++i)
01213     {
01214       int owner = rowToBlock[i];
01215       int count = rp[i + 1] - rp[i];
01216 
01217       /* error check for empty row */
01218       if (!count)
01219         {
01220           sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m);
01221           SET_V_ERROR (msgBuf_dh);
01222         }
01223 
01224       MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
01225              send_req + 2 * i);
01226       MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
01227              comm_dh, send_req + 2 * i + 1);
01228     }
01229     }
01230 
01231   /* all processors receive their local rows */
01232   {
01233     int *cval = B->cval;
01234     int *rp = B->rp;
01235     double *aval = B->aval;
01236     m = B->m;
01237 
01238     rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01239     CHECK_V_ERROR;
01240     rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01241     CHECK_V_ERROR;
01242 
01243     for (i = 0; i < m; ++i)
01244       {
01245 
01246     /* error check for empty row */
01247     int count = rp[i + 1] - rp[i];
01248     if (!count)
01249       {
01250         sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m);
01251         SET_V_ERROR (msgBuf_dh);
01252       }
01253 
01254     MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
01255            rcv_req + 2 * i);
01256     MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
01257            rcv_req + 2 * i + 1);
01258       }
01259   }
01260 
01261   /* wait for all sends/receives to finish */
01262   if (myid_dh == 0)
01263     {
01264       MPI_Waitall (m * 2, send_req, send_status);
01265     }
01266   MPI_Waitall (2 * B->m, rcv_req, rcv_status);
01267 
01268   /* clean up */
01269   if (rowLengths != NULL)
01270     {
01271       FREE_DH (rowLengths);
01272       CHECK_V_ERROR;
01273     }
01274   if (o2n_row != NULL)
01275     {
01276       FREE_DH (o2n_row);
01277       CHECK_V_ERROR;
01278     }
01279   if (n2o_col != NULL)
01280     {
01281       FREE_DH (n2o_col);
01282       CHECK_V_ERROR;
01283     }
01284   if (rowToBlock != NULL)
01285     {
01286       FREE_DH (rowToBlock);
01287       CHECK_V_ERROR;
01288     }
01289   if (send_req != NULL)
01290     {
01291       FREE_DH (send_req);
01292       CHECK_V_ERROR;
01293     }
01294   if (rcv_req != NULL)
01295     {
01296       FREE_DH (rcv_req);
01297       CHECK_V_ERROR;
01298     }
01299   if (send_status != NULL)
01300     {
01301       FREE_DH (send_status);
01302       CHECK_V_ERROR;
01303     }
01304   if (rcv_status != NULL)
01305     {
01306       FREE_DH (rcv_status);
01307       CHECK_V_ERROR;
01308     }
01309   if (beg_row != NULL)
01310     {
01311       FREE_DH (beg_row);
01312       CHECK_V_ERROR;
01313     }
01314   if (row_count != NULL)
01315     {
01316       FREE_DH (row_count);
01317       CHECK_V_ERROR;
01318     }
01319   if (C != NULL)
01320     {
01321       Mat_dhDestroy (C);
01322       ERRCHKA;
01323     }
01324 
01325   *Bout = B;
01326 
01327 END_FUNC_DH}
01328 
01329 
01330 #undef __FUNC__
01331 #define __FUNC__ "partition_and_distribute_private"
01332 void
01333 partition_and_distribute_private (Mat_dh A, Mat_dh * Bout)
01334 {
01335   START_FUNC_DH Mat_dh B = NULL;
01336   int i, m;
01337   int *rowLengths = NULL;
01338   int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
01339   MPI_Request *send_req = NULL;
01340   MPI_Request *rcv_req = NULL;
01341   MPI_Status *send_status = NULL;
01342   MPI_Status *rcv_status = NULL;
01343 
01344   MPI_Barrier (comm_dh);
01345 
01346   /* broadcast number of rows to all processors */
01347   if (myid_dh == 0)
01348     m = A->m;
01349   MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
01350 
01351   /* broadcast number of nonzeros in each row to all processors */
01352   rowLengths = (int *) MALLOC_DH (m * sizeof (int));
01353   CHECK_V_ERROR;
01354   if (myid_dh == 0)
01355     {
01356       int *tmp = A->rp;
01357       for (i = 0; i < m; ++i)
01358     {
01359       rowLengths[i] = tmp[i + 1] - tmp[i];
01360     }
01361     }
01362   MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
01363 
01364   /* partition matrix */
01365   rowToBlock = (int *) MALLOC_DH (m * sizeof (int));
01366   CHECK_V_ERROR;
01367 
01368   if (myid_dh == 0)
01369     {
01370       o2n_row = (int *) MALLOC_DH (m * sizeof (int));
01371       CHECK_V_ERROR;
01372       mat_partition_private (A, np_dh, o2n_row, rowToBlock);
01373       CHECK_V_ERROR;
01374     }
01375 
01376   /* broadcast partitiioning information to all processors */
01377   MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
01378 
01379   /* allocate storage for local portion of matrix */
01380   mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
01381   CHECK_V_ERROR;
01382 
01383   /* root sends each processor its portion of the matrix */
01384   if (myid_dh == 0)
01385     {
01386       int *cval = A->cval, *rp = A->rp;
01387       double *aval = A->aval;
01388       send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01389       CHECK_V_ERROR;
01390       send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01391       CHECK_V_ERROR;
01392       for (i = 0; i < m; ++i)
01393     {
01394       int owner = rowToBlock[i];
01395       int count = rp[i + 1] - rp[i];
01396 
01397       /* error check for empty row */
01398       if (!count)
01399         {
01400           sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m);
01401           SET_V_ERROR (msgBuf_dh);
01402         }
01403 
01404       MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
01405              send_req + 2 * i);
01406       MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
01407              comm_dh, send_req + 2 * i + 1);
01408     }
01409     }
01410 
01411   /* all processors receive their local rows */
01412   {
01413     int *cval = B->cval;
01414     int *rp = B->rp;
01415     double *aval = B->aval;
01416     m = B->m;
01417 
01418     rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01419     CHECK_V_ERROR;
01420     rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01421     CHECK_V_ERROR;
01422 
01423     for (i = 0; i < m; ++i)
01424       {
01425 
01426     /* error check for empty row */
01427     int count = rp[i + 1] - rp[i];
01428     if (!count)
01429       {
01430         sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m);
01431         SET_V_ERROR (msgBuf_dh);
01432       }
01433 
01434     MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
01435            rcv_req + 2 * i);
01436     MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
01437            rcv_req + 2 * i + 1);
01438       }
01439   }
01440 
01441   /* wait for all sends/receives to finish */
01442   if (myid_dh == 0)
01443     {
01444       MPI_Waitall (m * 2, send_req, send_status);
01445     }
01446   MPI_Waitall (2 * B->m, rcv_req, rcv_status);
01447 
01448   /* clean up */
01449   if (rowLengths != NULL)
01450     {
01451       FREE_DH (rowLengths);
01452       CHECK_V_ERROR;
01453     }
01454   if (o2n_row != NULL)
01455     {
01456       FREE_DH (o2n_row);
01457       CHECK_V_ERROR;
01458     }
01459   if (n2o_col != NULL)
01460     {
01461       FREE_DH (n2o_col);
01462       CHECK_V_ERROR;
01463     }
01464   if (rowToBlock != NULL)
01465     {
01466       FREE_DH (rowToBlock);
01467       CHECK_V_ERROR;
01468     }
01469   if (send_req != NULL)
01470     {
01471       FREE_DH (send_req);
01472       CHECK_V_ERROR;
01473     }
01474   if (rcv_req != NULL)
01475     {
01476       FREE_DH (rcv_req);
01477       CHECK_V_ERROR;
01478     }
01479   if (send_status != NULL)
01480     {
01481       FREE_DH (send_status);
01482       CHECK_V_ERROR;
01483     }
01484   if (rcv_status != NULL)
01485     {
01486       FREE_DH (rcv_status);
01487       CHECK_V_ERROR;
01488     }
01489 
01490   *Bout = B;
01491 
01492 END_FUNC_DH}
01493 
01494 #undef __FUNC__
01495 #define __FUNC__ "mat_par_read_allocate_private"
01496 void
01497 mat_par_read_allocate_private (Mat_dh * Aout, int n, int *rowLengths,
01498                    int *rowToBlock)
01499 {
01500   START_FUNC_DH Mat_dh A;
01501   int i, m, nz, beg_row, *rp, idx;
01502 
01503   Mat_dhCreate (&A);
01504   CHECK_V_ERROR;
01505   *Aout = A;
01506   A->n = n;
01507 
01508   /* count number of rows owned by this processor */
01509   m = 0;
01510   for (i = 0; i < n; ++i)
01511     {
01512       if (rowToBlock[i] == myid_dh)
01513     ++m;
01514     }
01515   A->m = m;
01516 
01517   /* compute global numbering of first  locally owned row */
01518   beg_row = 0;
01519   for (i = 0; i < n; ++i)
01520     {
01521       if (rowToBlock[i] < myid_dh)
01522     ++beg_row;
01523     }
01524   A->beg_row = beg_row;
01525 
01526   /* allocate storage for row-pointer array */
01527   A->rp = rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01528   CHECK_V_ERROR;
01529   rp[0] = 0;
01530 
01531   /* count number of nonzeros owned by this processor, and form rp array */
01532   nz = 0;
01533   idx = 1;
01534   for (i = 0; i < n; ++i)
01535     {
01536       if (rowToBlock[i] == myid_dh)
01537     {
01538       nz += rowLengths[i];
01539       rp[idx++] = nz;
01540     }
01541     }
01542 
01543   /* allocate storage for column indices and values arrays */
01544   A->cval = (int *) MALLOC_DH (nz * sizeof (int));
01545   CHECK_V_ERROR;
01546   A->aval = (double *) MALLOC_DH (nz * sizeof (double));
01547   CHECK_V_ERROR;
01548 END_FUNC_DH}
01549 
01550 
01551 #undef __FUNC__
01552 #define __FUNC__ "mat_partition_private"
01553 void
01554 mat_partition_private (Mat_dh A, int blocks, int *o2n_row, int *rowToBlock)
01555 {
01556   START_FUNC_DH int i, j, n = A->n;
01557   int rpb = n / blocks;     /* rows per block (except possibly last block) */
01558   int idx = 0;
01559 
01560   while (rpb * blocks < n)
01561     ++rpb;
01562 
01563   if (rpb * (blocks - 1) == n)
01564     {
01565       --rpb;
01566       printf_dh ("adjusted rpb to: %i\n", rpb);
01567     }
01568 
01569   for (i = 0; i < n; ++i)
01570     o2n_row[i] = i;
01571 
01572   /* assign all rows to blocks, except for last block, which may
01573      contain less than "rpb" rows
01574    */
01575   for (i = 0; i < blocks - 1; ++i)
01576     {
01577       for (j = 0; j < rpb; ++j)
01578     {
01579       rowToBlock[idx++] = i;
01580     }
01581     }
01582 
01583   /* now deal with the last block in the partition */
01584   i = blocks - 1;
01585   while (idx < n)
01586     rowToBlock[idx++] = i;
01587 
01588 END_FUNC_DH}
01589 
01590 
01591 /* may produce incorrect result if input is not triangular! */
01592 #undef __FUNC__
01593 #define __FUNC__ "make_full_private"
01594 void
01595 make_full_private (int m, int **rpIN, int **cvalIN, double **avalIN)
01596 {
01597   START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
01598   double *avalNew, *aval = *avalIN;
01599   int nz, *rowCounts = NULL;
01600 
01601   /* count the number of nonzeros in each row */
01602   rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01603   CHECK_V_ERROR;
01604   for (i = 0; i <= m; ++i)
01605     rowCounts[i] = 0;
01606 
01607   for (i = 0; i < m; ++i)
01608     {
01609       for (j = rp[i]; j < rp[i + 1]; ++j)
01610     {
01611       int col = cval[j];
01612       rowCounts[i + 1] += 1;
01613       if (col != i)
01614         rowCounts[col + 1] += 1;
01615     }
01616     }
01617 
01618   /* prefix sum to form row pointers for full representation */
01619   rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01620   CHECK_V_ERROR;
01621   for (i = 1; i <= m; ++i)
01622     rowCounts[i] += rowCounts[i - 1];
01623   memcpy (rpNew, rowCounts, (m + 1) * sizeof (int));
01624 
01625   /* form full representation */
01626   nz = rpNew[m];
01627 
01628   cvalNew = (int *) MALLOC_DH (nz * sizeof (int));
01629   CHECK_V_ERROR;
01630   avalNew = (double *) MALLOC_DH (nz * sizeof (double));
01631   CHECK_V_ERROR;
01632   for (i = 0; i < m; ++i)
01633     {
01634       for (j = rp[i]; j < rp[i + 1]; ++j)
01635     {
01636       int col = cval[j];
01637       double val = aval[j];
01638 
01639       cvalNew[rowCounts[i]] = col;
01640       avalNew[rowCounts[i]] = val;
01641       rowCounts[i] += 1;
01642       if (col != i)
01643         {
01644           cvalNew[rowCounts[col]] = i;
01645           avalNew[rowCounts[col]] = val;
01646           rowCounts[col] += 1;
01647         }
01648     }
01649     }
01650 
01651   if (rowCounts != NULL)
01652     {
01653       FREE_DH (rowCounts);
01654       CHECK_V_ERROR;
01655     }
01656   FREE_DH (cval);
01657   CHECK_V_ERROR;
01658   FREE_DH (rp);
01659   CHECK_V_ERROR;
01660   FREE_DH (aval);
01661   CHECK_V_ERROR;
01662   *rpIN = rpNew;
01663   *cvalIN = cvalNew;
01664   *avalIN = avalNew;
01665 END_FUNC_DH}
01666 
01667 #undef __FUNC__
01668 #define __FUNC__ "make_symmetric_private"
01669 void
01670 make_symmetric_private (int m, int **rpIN, int **cvalIN, double **avalIN)
01671 {
01672   START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
01673   double *avalNew, *aval = *avalIN;
01674   int nz, *rowCounts = NULL;
01675   int *rpTrans, *cvalTrans;
01676   int *work;
01677   double *avalTrans;
01678   int nzCount = 0, transCount = 0;
01679 
01680   mat_dh_transpose_private (m, rp, &rpTrans,
01681                 cval, &cvalTrans, aval, &avalTrans);
01682   CHECK_V_ERROR;
01683 
01684   /* count the number of nonzeros in each row */
01685   work = (int *) MALLOC_DH (m * sizeof (int));
01686   CHECK_V_ERROR;
01687   for (i = 0; i < m; ++i)
01688     work[i] = -1;
01689   rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01690   CHECK_V_ERROR;
01691   for (i = 0; i <= m; ++i)
01692     rowCounts[i] = 0;
01693 
01694   for (i = 0; i < m; ++i)
01695     {
01696       int ct = 0;
01697       for (j = rp[i]; j < rp[i + 1]; ++j)
01698     {
01699       int col = cval[j];
01700       work[col] = i;
01701       ++ct;
01702       ++nzCount;
01703     }
01704       for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
01705     {
01706       int col = cvalTrans[j];
01707       if (work[col] != i)
01708         {
01709           ++ct;
01710           ++transCount;
01711         }
01712     }
01713       rowCounts[i + 1] = ct;
01714     }
01715 
01716   /*---------------------------------------------------------
01717    * if matrix is already symmetric, do nothing
01718    *---------------------------------------------------------*/
01719   if (transCount == 0)
01720     {
01721       printf
01722     ("make_symmetric_private: matrix is already structurally symmetric!\n");
01723       FREE_DH (rpTrans);
01724       CHECK_V_ERROR;
01725       FREE_DH (cvalTrans);
01726       CHECK_V_ERROR;
01727       FREE_DH (avalTrans);
01728       CHECK_V_ERROR;
01729       FREE_DH (work);
01730       CHECK_V_ERROR;
01731       FREE_DH (rowCounts);
01732       CHECK_V_ERROR;
01733       goto END_OF_FUNCTION;
01734     }
01735 
01736   /*---------------------------------------------------------
01737    * otherwise, finish symmetrizing
01738    *---------------------------------------------------------*/
01739   else
01740     {
01741       printf ("original nz= %i\n", rp[m]);
01742       printf ("zeros added= %i\n", transCount);
01743       printf
01744     ("ratio of added zeros to nonzeros = %0.2f (assumes all original entries were nonzero!)\n",
01745      (double) transCount / (double) (nzCount));
01746     }
01747 
01748   /* prefix sum to form row pointers for full representation */
01749   rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01750   CHECK_V_ERROR;
01751   for (i = 1; i <= m; ++i)
01752     rowCounts[i] += rowCounts[i - 1];
01753   memcpy (rpNew, rowCounts, (m + 1) * sizeof (int));
01754   for (i = 0; i < m; ++i)
01755     work[i] = -1;
01756 
01757   /* form full representation */
01758   nz = rpNew[m];
01759   cvalNew = (int *) MALLOC_DH (nz * sizeof (int));
01760   CHECK_V_ERROR;
01761   avalNew = (double *) MALLOC_DH (nz * sizeof (double));
01762   CHECK_V_ERROR;
01763   for (i = 0; i < m; ++i)
01764     work[i] = -1;
01765 
01766   for (i = 0; i < m; ++i)
01767     {
01768       for (j = rp[i]; j < rp[i + 1]; ++j)
01769     {
01770       int col = cval[j];
01771       double val = aval[j];
01772       work[col] = i;
01773       cvalNew[rowCounts[i]] = col;
01774       avalNew[rowCounts[i]] = val;
01775       rowCounts[i] += 1;
01776     }
01777       for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
01778     {
01779       int col = cvalTrans[j];
01780       if (work[col] != i)
01781         {
01782           cvalNew[rowCounts[i]] = col;
01783           avalNew[rowCounts[i]] = 0.0;
01784           rowCounts[i] += 1;
01785         }
01786     }
01787     }
01788 
01789   if (rowCounts != NULL)
01790     {
01791       FREE_DH (rowCounts);
01792       CHECK_V_ERROR;
01793     }
01794   FREE_DH (work);
01795   CHECK_V_ERROR;
01796   FREE_DH (cval);
01797   CHECK_V_ERROR;
01798   FREE_DH (rp);
01799   CHECK_V_ERROR;
01800   FREE_DH (aval);
01801   CHECK_V_ERROR;
01802   FREE_DH (cvalTrans);
01803   CHECK_V_ERROR;
01804   FREE_DH (rpTrans);
01805   CHECK_V_ERROR;
01806   FREE_DH (avalTrans);
01807   CHECK_V_ERROR;
01808   *rpIN = rpNew;
01809   *cvalIN = cvalNew;
01810   *avalIN = avalNew;
01811 
01812 END_OF_FUNCTION:;
01813 
01814 END_FUNC_DH}
01815 
01816 #undef __FUNC__
01817 #define __FUNC__ "profileMat"
01818 void
01819 profileMat (Mat_dh A)
01820 {
01821   START_FUNC_DH Mat_dh B = NULL;
01822   int type;
01823   int m;
01824   int i, j;
01825   int *work1;
01826   double *work2;
01827   bool isStructurallySymmetric = true;
01828   bool isNumericallySymmetric = true;
01829   bool is_Triangular = false;
01830   int zeroCount = 0, nz;
01831 
01832   if (myid_dh > 0)
01833     {
01834       SET_V_ERROR ("only for a single MPI task!");
01835     }
01836 
01837   m = A->m;
01838 
01839   printf ("\nYY----------------------------------------------------\n");
01840 
01841   /* count number of explicit zeros */
01842   nz = A->rp[m];
01843   for (i = 0; i < nz; ++i)
01844     {
01845       if (A->aval[i] == 0)
01846     ++zeroCount;
01847     }
01848   printf ("YY  row count:      %i\n", m);
01849   printf ("YY  nz count:       %i\n", nz);
01850   printf ("YY  explicit zeros: %i (entire matrix)\n", zeroCount);
01851 
01852   /* count number of missing or zero diagonals */
01853   {
01854     int m_diag = 0, z_diag = 0;
01855     for (i = 0; i < m; ++i)
01856       {
01857     bool flag = true;
01858     for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
01859       {
01860         int col = A->cval[j];
01861 
01862         /* row has an explicit diagonal element */
01863         if (col == i)
01864           {
01865         double val = A->aval[j];
01866         flag = false;
01867         if (val == 0.0)
01868           ++z_diag;
01869         break;
01870           }
01871       }
01872 
01873     /* row has an implicit zero diagonal element */
01874     if (flag)
01875       ++m_diag;
01876       }
01877     printf ("YY  missing diagonals:   %i\n", m_diag);
01878     printf ("YY  explicit zero diags: %i\n", z_diag);
01879   }
01880 
01881   /* check to see if matrix is triangular */
01882   type = isTriangular (m, A->rp, A->cval);
01883   CHECK_V_ERROR;
01884   if (type == IS_UPPER_TRI)
01885     {
01886       printf ("YY  matrix is upper triangular\n");
01887       is_Triangular = true;
01888       goto END_OF_FUNCTION;
01889     }
01890   else if (type == IS_LOWER_TRI)
01891     {
01892       printf ("YY  matrix is lower triangular\n");
01893       is_Triangular = true;
01894       goto END_OF_FUNCTION;
01895     }
01896 
01897   /* if not triangular, count nz in each triangle */
01898   {
01899     int unz = 0, lnz = 0;
01900     for (i = 0; i < m; ++i)
01901       {
01902     for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
01903       {
01904         int col = A->cval[j];
01905         if (col < i)
01906           ++lnz;
01907         if (col > i)
01908           ++unz;
01909       }
01910       }
01911     printf ("YY  strict upper triangular nonzeros: %i\n", unz);
01912     printf ("YY  strict lower triangular nonzeros: %i\n", lnz);
01913   }
01914 
01915 
01916 
01917 
01918   Mat_dhTranspose (A, &B);
01919   CHECK_V_ERROR;
01920 
01921   /* check for structural and numerical symmetry */
01922 
01923   work1 = (int *) MALLOC_DH (m * sizeof (int));
01924   CHECK_V_ERROR;
01925   work2 = (double *) MALLOC_DH (m * sizeof (double));
01926   CHECK_V_ERROR;
01927   for (i = 0; i < m; ++i)
01928     work1[i] = -1;
01929   for (i = 0; i < m; ++i)
01930     work2[i] = 0.0;
01931 
01932   for (i = 0; i < m; ++i)
01933     {
01934       for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
01935     {
01936       int col = A->cval[j];
01937       double val = A->aval[j];
01938       work1[col] = i;
01939       work2[col] = val;
01940     }
01941       for (j = B->rp[i]; j < B->rp[i + 1]; ++j)
01942     {
01943       int col = B->cval[j];
01944       double val = B->aval[j];
01945 
01946       if (work1[col] != i)
01947         {
01948           isStructurallySymmetric = false;
01949           isNumericallySymmetric = false;
01950           goto END_OF_FUNCTION;
01951         }
01952       if (work2[col] != val)
01953         {
01954           isNumericallySymmetric = false;
01955           work2[col] = 0.0;
01956         }
01957     }
01958     }
01959 
01960 
01961 END_OF_FUNCTION:;
01962 
01963   if (!is_Triangular)
01964     {
01965       printf ("YY  matrix is NOT triangular\n");
01966       if (isStructurallySymmetric)
01967     {
01968       printf ("YY  matrix IS structurally symmetric\n");
01969     }
01970       else
01971     {
01972       printf ("YY  matrix is NOT structurally symmetric\n");
01973     }
01974       if (isNumericallySymmetric)
01975     {
01976       printf ("YY  matrix IS numerically symmetric\n");
01977     }
01978       else
01979     {
01980       printf ("YY  matrix is NOT numerically symmetric\n");
01981     }
01982     }
01983 
01984   if (work1 != NULL)
01985     {
01986       FREE_DH (work1);
01987       CHECK_V_ERROR;
01988     }
01989   if (work2 != NULL)
01990     {
01991       FREE_DH (work2);
01992       CHECK_V_ERROR;
01993     }
01994   if (B != NULL)
01995     {
01996       Mat_dhDestroy (B);
01997       CHECK_V_ERROR;
01998     }
01999 
02000   printf ("YY----------------------------------------------------\n");
02001 
02002 END_FUNC_DH}

Generated on Tue Jul 13 09:27:14 2010 for IFPACK by  doxygen 1.4.7