Mat_dh.c

00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2009) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include "Mat_dh.h"
00031 #include "getRow_dh.h"
00032 #include "SubdomainGraph_dh.h"
00033 #include "TimeLog_dh.h"
00034 #include "Mem_dh.h"
00035 #include "Numbering_dh.h"
00036 #include "Parser_dh.h"
00037 #include "mat_dh_private.h"
00038 #include "io_dh.h"
00039 #include "Hash_i_dh.h"
00040 
00041 static void setup_matvec_sends_private (Mat_dh mat, int *inlist);
00042 static void setup_matvec_receives_private (Mat_dh mat, int *beg_rows,
00043                        int *end_rows, int reqlen,
00044                        int *reqind, int *outlist);
00045 
00046 #if 0
00047 
00048 partial (?)
00049      implementation below;
00050      not used anyplace, I think;
00051 for future
00052      expansion ?[mar 21, 2 K + 1]
00053        static void Mat_dhAllocate_getRow_private (Mat_dh A);
00054 #endif
00055 
00056      static bool commsOnly = false; /* experimental, for matvec functions */
00057 
00058 #undef __FUNC__
00059 #define __FUNC__ "Mat_dhCreate"
00060      void Mat_dhCreate (Mat_dh * mat)
00061 {
00062   START_FUNC_DH
00063     struct _mat_dh *tmp =
00064     (struct _mat_dh *) MALLOC_DH (sizeof (struct _mat_dh));
00065   CHECK_V_ERROR;
00066   *mat = tmp;
00067 
00068   commsOnly = Parser_dhHasSwitch (parser_dh, "-commsOnly");
00069   if (myid_dh == 0 && commsOnly == true)
00070     {
00071 /*     printf("\n@@@ commsOnly == true for matvecs! @@@\n"); */
00072       fflush (stdout);
00073     }
00074 
00075   tmp->m = 0;
00076   tmp->n = 0;
00077   tmp->beg_row = 0;
00078   tmp->bs = 1;
00079 
00080   tmp->rp = NULL;
00081   tmp->len = NULL;
00082   tmp->cval = NULL;
00083   tmp->aval = NULL;
00084   tmp->diag = NULL;
00085   tmp->fill = NULL;
00086   tmp->owner = true;
00087 
00088   tmp->len_private = 0;
00089   tmp->rowCheckedOut = -1;
00090   tmp->cval_private = NULL;
00091   tmp->aval_private = NULL;
00092 
00093   tmp->row_perm = NULL;
00094 
00095   tmp->num_recv = 0;
00096   tmp->num_send = 0;
00097   tmp->recv_req = NULL;
00098   tmp->send_req = NULL;
00099   tmp->status = NULL;
00100   tmp->recvbuf = NULL;
00101   tmp->sendbuf = NULL;
00102   tmp->sendind = NULL;
00103   tmp->sendlen = 0;
00104   tmp->recvlen = 0;
00105   tmp->numb = NULL;
00106   tmp->matvecIsSetup = false;
00107 
00108   Mat_dhZeroTiming (tmp);
00109   CHECK_V_ERROR;
00110   tmp->matvec_timing = true;
00111 
00112   tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Mat");
00113 END_FUNC_DH}
00114 
00115 #undef __FUNC__
00116 #define __FUNC__ "Mat_dhDestroy"
00117 void
00118 Mat_dhDestroy (Mat_dh mat)
00119 {
00120   START_FUNC_DH int i;
00121 
00122   if (mat->owner)
00123     {
00124       if (mat->rp != NULL)
00125     {
00126       FREE_DH (mat->rp);
00127       CHECK_V_ERROR;
00128     }
00129       if (mat->len != NULL)
00130     {
00131       FREE_DH (mat->len);
00132       CHECK_V_ERROR;
00133     }
00134       if (mat->cval != NULL)
00135     {
00136       FREE_DH (mat->cval);
00137       CHECK_V_ERROR;
00138     }
00139       if (mat->aval != NULL)
00140     {
00141       FREE_DH (mat->aval);
00142       CHECK_V_ERROR;
00143     }
00144       if (mat->diag != NULL)
00145     {
00146       FREE_DH (mat->diag);
00147       CHECK_V_ERROR;
00148     }
00149       if (mat->fill != NULL)
00150     {
00151       FREE_DH (mat->fill);
00152       CHECK_V_ERROR;
00153     }
00154       if (mat->cval_private != NULL)
00155     {
00156       FREE_DH (mat->cval_private);
00157       CHECK_V_ERROR;
00158     }
00159       if (mat->aval_private != NULL)
00160     {
00161       FREE_DH (mat->aval_private);
00162       CHECK_V_ERROR;
00163     }
00164       if (mat->row_perm != NULL)
00165     {
00166       FREE_DH (mat->row_perm);
00167       CHECK_V_ERROR;
00168     }
00169     }
00170 
00171   for (i = 0; i < mat->num_recv; i++)
00172     MPI_Request_free (&mat->recv_req[i]);
00173   for (i = 0; i < mat->num_send; i++)
00174     MPI_Request_free (&mat->send_req[i]);
00175   if (mat->recv_req != NULL)
00176     {
00177       FREE_DH (mat->recv_req);
00178       CHECK_V_ERROR;
00179     }
00180   if (mat->send_req != NULL)
00181     {
00182       FREE_DH (mat->send_req);
00183       CHECK_V_ERROR;
00184     }
00185   if (mat->status != NULL)
00186     {
00187       FREE_DH (mat->status);
00188       CHECK_V_ERROR;
00189     }
00190   if (mat->recvbuf != NULL)
00191     {
00192       FREE_DH (mat->recvbuf);
00193       CHECK_V_ERROR;
00194     }
00195   if (mat->sendbuf != NULL)
00196     {
00197       FREE_DH (mat->sendbuf);
00198       CHECK_V_ERROR;
00199     }
00200   if (mat->sendind != NULL)
00201     {
00202       FREE_DH (mat->sendind);
00203       CHECK_V_ERROR;
00204     }
00205 
00206   if (mat->matvecIsSetup)
00207     {
00208       Mat_dhMatVecSetdown (mat);
00209       CHECK_V_ERROR;
00210     }
00211   if (mat->numb != NULL)
00212     {
00213       Numbering_dhDestroy (mat->numb);
00214       CHECK_V_ERROR;
00215     }
00216   FREE_DH (mat);
00217   CHECK_V_ERROR;
00218 END_FUNC_DH}
00219 
00220 
00221 /* this should put the cval array back the way it was! */
00222 #undef __FUNC__
00223 #define __FUNC__ "Mat_dhMatVecSetDown"
00224 void
00225 Mat_dhMatVecSetdown (Mat_dh mat)
00226 {
00227   START_FUNC_DH if (ignoreMe)
00228     SET_V_ERROR ("not implemented");
00229 END_FUNC_DH}
00230 
00231 
00232 /* adopted from Edmond Chow's ParaSails */
00233 #undef __FUNC__
00234 #define __FUNC__ "Mat_dhMatVecSetup"
00235 void
00236 Mat_dhMatVecSetup (Mat_dh mat)
00237 {
00238   START_FUNC_DH if (np_dh == 1)
00239     {
00240       goto DO_NOTHING;
00241     }
00242 
00243   else
00244     {
00245       int *outlist, *inlist;
00246       int ierr, i, row, *rp = mat->rp, *cval = mat->cval;
00247       Numbering_dh numb;
00248       int m = mat->m;
00249       int firstLocal = mat->beg_row;
00250       int lastLocal = firstLocal + m;
00251       int *beg_rows, *end_rows;
00252 
00253       mat->recv_req =
00254     (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
00255       CHECK_V_ERROR;
00256       mat->send_req =
00257     (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
00258       CHECK_V_ERROR;
00259       mat->status = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
00260       CHECK_V_ERROR;
00261       beg_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
00262       CHECK_V_ERROR;
00263       end_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
00264       CHECK_V_ERROR;
00265 
00266       if (np_dh == 1)
00267     {           /* this is for debugging purposes in some of the drivers */
00268       beg_rows[0] = 0;
00269       end_rows[0] = m;
00270     }
00271       else
00272     {
00273       ierr =
00274         MPI_Allgather (&firstLocal, 1, MPI_INT, beg_rows, 1, MPI_INT,
00275                comm_dh);
00276 
00277       CHECK_MPI_V_ERROR (ierr);
00278 
00279       ierr =
00280         MPI_Allgather (&lastLocal, 1, MPI_INT, end_rows, 1, MPI_INT,
00281                comm_dh);
00282       CHECK_MPI_V_ERROR (ierr);
00283     }
00284 
00285       outlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00286       CHECK_V_ERROR;
00287       inlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00288       CHECK_V_ERROR;
00289       for (i = 0; i < np_dh; ++i)
00290     {
00291       outlist[i] = 0;
00292       inlist[i] = 0;
00293     }
00294 
00295       /* Create Numbering object */
00296       Numbering_dhCreate (&(mat->numb));
00297       CHECK_V_ERROR;
00298       numb = mat->numb;
00299       Numbering_dhSetup (numb, mat);
00300       CHECK_V_ERROR;
00301 
00302       setup_matvec_receives_private (mat, beg_rows, end_rows, numb->num_ext,
00303                      numb->idx_ext, outlist);
00304       CHECK_V_ERROR;
00305 
00306       if (np_dh == 1)
00307     {           /* this is for debugging purposes in some of the drivers */
00308       inlist[0] = outlist[0];
00309     }
00310       else
00311     {
00312       ierr =
00313         MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh);
00314       CHECK_MPI_V_ERROR (ierr);
00315     }
00316 
00317       setup_matvec_sends_private (mat, inlist);
00318       CHECK_V_ERROR;
00319 
00320       /* Convert to local indices */
00321       for (row = 0; row < m; row++)
00322     {
00323       int len = rp[row + 1] - rp[row];
00324       int *ind = cval + rp[row];
00325       Numbering_dhGlobalToLocal (numb, len, ind, ind);
00326       CHECK_V_ERROR;
00327     }
00328 
00329       FREE_DH (outlist);
00330       CHECK_V_ERROR;
00331       FREE_DH (inlist);
00332       CHECK_V_ERROR;
00333       FREE_DH (beg_rows);
00334       CHECK_V_ERROR;
00335       FREE_DH (end_rows);
00336       CHECK_V_ERROR;
00337     }
00338 
00339 DO_NOTHING:;
00340 
00341 END_FUNC_DH}
00342 
00343 /* adopted from Edmond Chow's ParaSails */
00344 #undef __FUNC__
00345 #define __FUNC__ "setup_matvec_receives_private"
00346 void
00347 setup_matvec_receives_private (Mat_dh mat, int *beg_rows, int *end_rows,
00348                    int reqlen, int *reqind, int *outlist)
00349 {
00350   START_FUNC_DH int ierr, i, j, this_pe;
00351   MPI_Request request;
00352   int m = mat->m;
00353 
00354   mat->num_recv = 0;
00355 
00356   /* Allocate recvbuf */
00357   /* recvbuf has numlocal entries saved for local part of x, used in matvec */
00358   mat->recvbuf = (double *) MALLOC_DH ((reqlen + m) * sizeof (double));
00359 
00360   for (i = 0; i < reqlen; i = j)
00361     {               /* j is set below */
00362       /* The processor that owns the row with index reqind[i] */
00363       this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]);
00364       CHECK_V_ERROR;
00365 
00366       /* Figure out other rows we need from this_pe */
00367       for (j = i + 1; j < reqlen; j++)
00368     {
00369       /* if row is on different pe */
00370       if (reqind[j] < beg_rows[this_pe] || reqind[j] > end_rows[this_pe])
00371         break;
00372     }
00373 
00374       /* Request rows in reqind[i..j-1] */
00375       ierr =
00376     MPI_Isend (&reqind[i], j - i, MPI_INT, this_pe, 444, comm_dh,
00377            &request);
00378       CHECK_MPI_V_ERROR (ierr);
00379       ierr = MPI_Request_free (&request);
00380       CHECK_MPI_V_ERROR (ierr);
00381 
00382       /* Count of number of number of indices needed from this_pe */
00383       outlist[this_pe] = j - i;
00384 
00385       ierr =
00386     MPI_Recv_init (&mat->recvbuf[i + m], j - i, MPI_DOUBLE, this_pe, 555,
00387                comm_dh, &mat->recv_req[mat->num_recv]);
00388       CHECK_MPI_V_ERROR (ierr);
00389 
00390       mat->num_recv++;
00391       mat->recvlen += j - i;    /* only used for statistical reporting */
00392     }
00393 END_FUNC_DH}
00394 
00395 
00396 /* adopted from Edmond Chow's ParaSails */
00397 #undef __FUNC__
00398 #define __FUNC__ "setup_matvec_sends_private"
00399 void
00400 setup_matvec_sends_private (Mat_dh mat, int *inlist)
00401 {
00402   START_FUNC_DH int ierr, i, j, sendlen, first = mat->beg_row;
00403   MPI_Request *requests;
00404   MPI_Status *statuses;
00405 
00406   requests = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
00407   CHECK_V_ERROR;
00408   statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
00409   CHECK_V_ERROR;
00410 
00411   /* Determine size of and allocate sendbuf and sendind */
00412   sendlen = 0;
00413   for (i = 0; i < np_dh; i++)
00414     sendlen += inlist[i];
00415   mat->sendlen = sendlen;
00416   mat->sendbuf = (double *) MALLOC_DH (sendlen * sizeof (double));
00417   CHECK_V_ERROR;
00418   mat->sendind = (int *) MALLOC_DH (sendlen * sizeof (int));
00419   CHECK_V_ERROR;
00420 
00421   j = 0;
00422   mat->num_send = 0;
00423   for (i = 0; i < np_dh; i++)
00424     {
00425       if (inlist[i] != 0)
00426     {
00427       /* Post receive for the actual indices */
00428       ierr =
00429         MPI_Irecv (&mat->sendind[j], inlist[i], MPI_INT, i, 444, comm_dh,
00430                &requests[mat->num_send]);
00431       CHECK_MPI_V_ERROR (ierr);
00432       /* Set up the send */
00433       ierr =
00434         MPI_Send_init (&mat->sendbuf[j], inlist[i], MPI_DOUBLE, i, 555,
00435                comm_dh, &mat->send_req[mat->num_send]);
00436       CHECK_MPI_V_ERROR (ierr);
00437 
00438       mat->num_send++;
00439       j += inlist[i];
00440     }
00441     }
00442 
00443   /* total bytes to be sent during matvec */
00444   mat->time[MATVEC_WORDS] = j;
00445 
00446 
00447   ierr = MPI_Waitall (mat->num_send, requests, statuses);
00448   CHECK_MPI_V_ERROR (ierr);
00449   /* convert global indices to local indices */
00450   /* these are all indices on this processor */
00451   for (i = 0; i < mat->sendlen; i++)
00452     mat->sendind[i] -= first;
00453 
00454   FREE_DH (requests);
00455   FREE_DH (statuses);
00456 END_FUNC_DH}
00457 
00458 
00459 /* unthreaded MPI version */
00460 #undef __FUNC__
00461 #define __FUNC__ "Mat_dhMatVec"
00462 void
00463 Mat_dhMatVec (Mat_dh mat, double *x, double *b)
00464 {
00465   START_FUNC_DH if (np_dh == 1)
00466     {
00467       Mat_dhMatVec_uni (mat, x, b);
00468       CHECK_V_ERROR;
00469     }
00470 
00471   else
00472     {
00473       int ierr, i, row, m = mat->m;
00474       int *rp = mat->rp, *cval = mat->cval;
00475       double *aval = mat->aval;
00476       int *sendind = mat->sendind;
00477       int sendlen = mat->sendlen;
00478       double *sendbuf = mat->sendbuf;
00479       double *recvbuf = mat->recvbuf;
00480       double t1 = 0, t2 = 0, t3 = 0, t4 = 0;
00481       bool timeFlag = mat->matvec_timing;
00482 
00483 
00484       if (timeFlag)
00485     t1 = MPI_Wtime ();
00486 
00487       /* Put components of x into the right outgoing buffers */
00488       if (!commsOnly)
00489     {
00490       for (i = 0; i < sendlen; i++)
00491         sendbuf[i] = x[sendind[i]];
00492     }
00493 
00494       if (timeFlag)
00495     {
00496       t2 = MPI_Wtime ();
00497       mat->time[MATVEC_TIME] += (t2 - t1);
00498 
00499     }
00500 
00501       ierr = MPI_Startall (mat->num_recv, mat->recv_req);
00502       CHECK_MPI_V_ERROR (ierr);
00503       ierr = MPI_Startall (mat->num_send, mat->send_req);
00504       CHECK_MPI_V_ERROR (ierr);
00505       ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status);
00506       CHECK_MPI_V_ERROR (ierr);
00507       ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status);
00508       CHECK_MPI_V_ERROR (ierr);
00509 
00510 
00511       if (timeFlag)
00512     {
00513       t3 = MPI_Wtime ();
00514       mat->time[MATVEC_MPI_TIME] += (t3 - t2);
00515     }
00516 
00517       /* Copy local part of x into top part of recvbuf */
00518       if (!commsOnly)
00519     {
00520       for (i = 0; i < m; i++)
00521         recvbuf[i] = x[i];
00522 
00523       /* do the multiply */
00524       for (row = 0; row < m; row++)
00525         {
00526           int len = rp[row + 1] - rp[row];
00527           int *ind = cval + rp[row];
00528           double *val = aval + rp[row];
00529           double temp = 0.0;
00530           for (i = 0; i < len; i++)
00531         {
00532           temp += (val[i] * recvbuf[ind[i]]);
00533         }
00534           b[row] = temp;
00535         }
00536     }           /* if (! commsOnly) */
00537 
00538       if (timeFlag)
00539     {
00540       t4 = MPI_Wtime ();
00541       mat->time[MATVEC_TOTAL_TIME] += (t4 - t1);
00542       mat->time[MATVEC_TIME] += (t4 - t3);
00543     }
00544     }
00545 END_FUNC_DH}
00546 
00547 /* OpenMP/MPI version */
00548 #undef __FUNC__
00549 #define __FUNC__ "Mat_dhMatVec_omp"
00550 void
00551 Mat_dhMatVec_omp (Mat_dh mat, double *x, double *b)
00552 {
00553   START_FUNC_DH int ierr, i, row, m = mat->m;
00554   int *rp = mat->rp, *cval = mat->cval;
00555   double *aval = mat->aval;
00556   int *sendind = mat->sendind;
00557   int sendlen = mat->sendlen;
00558   double *sendbuf = mat->sendbuf;
00559   double *recvbuf = mat->recvbuf;
00560   double t1 = 0, t2 = 0, t3 = 0, t4 = 0, tx = 0;
00561   double *val, temp;
00562   int len, *ind;
00563   bool timeFlag = mat->matvec_timing;
00564 
00565   if (timeFlag)
00566     t1 = MPI_Wtime ();
00567 
00568   /* Put components of x into the right outgoing buffers */
00569   for (i = 0; i < sendlen; i++)
00570     sendbuf[i] = x[sendind[i]];
00571 
00572   if (timeFlag)
00573     {
00574       t2 = MPI_Wtime ();
00575       mat->time[MATVEC_TIME] += (t2 - t1);
00576     }
00577 
00578   ierr = MPI_Startall (mat->num_recv, mat->recv_req);
00579   CHECK_MPI_V_ERROR (ierr);
00580   ierr = MPI_Startall (mat->num_send, mat->send_req);
00581   CHECK_MPI_V_ERROR (ierr);
00582   ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status);
00583   CHECK_MPI_V_ERROR (ierr);
00584   ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status);
00585   CHECK_MPI_V_ERROR (ierr);
00586 
00587   if (timeFlag)
00588     {
00589       t3 = MPI_Wtime ();
00590       mat->time[MATVEC_MPI_TIME] += (t3 - t2);
00591     }
00592 
00593   /* Copy local part of x into top part of recvbuf */
00594   for (i = 0; i < m; i++)
00595     recvbuf[i] = x[i];
00596 
00597   if (timeFlag)
00598     {
00599       tx = MPI_Wtime ();
00600       mat->time[MATVEC_MPI_TIME2] += (tx - t1);
00601     }
00602 
00603 
00604   /* do the multiply */
00605   for (row = 0; row < m; row++)
00606     {
00607       len = rp[row + 1] - rp[row];
00608       ind = cval + rp[row];
00609       val = aval + rp[row];
00610       temp = 0.0;
00611       for (i = 0; i < len; i++)
00612     {
00613       temp += (val[i] * recvbuf[ind[i]]);
00614     }
00615       b[row] = temp;
00616     }
00617 
00618   if (timeFlag)
00619     {
00620       t4 = MPI_Wtime ();
00621       mat->time[MATVEC_TOTAL_TIME] += (t4 - t1);
00622       mat->time[MATVEC_TIME] += (t4 - t3);
00623     }
00624 
00625 END_FUNC_DH}
00626 
00627 
00628 /* OpenMP/single primary task version */
00629 #undef __FUNC__
00630 #define __FUNC__ "Mat_dhMatVec_uni_omp"
00631 void
00632 Mat_dhMatVec_uni_omp (Mat_dh mat, double *x, double *b)
00633 {
00634   START_FUNC_DH int i, row, m = mat->m;
00635   int *rp = mat->rp, *cval = mat->cval;
00636   double *aval = mat->aval;
00637   double t1 = 0, t2 = 0;
00638   bool timeFlag = mat->matvec_timing;
00639 
00640   if (timeFlag)
00641     {
00642       t1 = MPI_Wtime ();
00643     }
00644 
00645   /* do the multiply */
00646   for (row = 0; row < m; row++)
00647     {
00648       int len = rp[row + 1] - rp[row];
00649       int *ind = cval + rp[row];
00650       double *val = aval + rp[row];
00651       double temp = 0.0;
00652       for (i = 0; i < len; i++)
00653     {
00654       temp += (val[i] * x[ind[i]]);
00655     }
00656       b[row] = temp;
00657     }
00658 
00659   if (timeFlag)
00660     {
00661       t2 = MPI_Wtime ();
00662       mat->time[MATVEC_TIME] += (t2 - t1);
00663       mat->time[MATVEC_TOTAL_TIME] += (t2 - t1);
00664     }
00665 
00666 END_FUNC_DH}
00667 
00668 
00669 /* unthreaded, single-task version */
00670 #undef __FUNC__
00671 #define __FUNC__ "Mat_dhMatVec_uni"
00672 void
00673 Mat_dhMatVec_uni (Mat_dh mat, double *x, double *b)
00674 {
00675   START_FUNC_DH int i, row, m = mat->m;
00676   int *rp = mat->rp, *cval = mat->cval;
00677   double *aval = mat->aval;
00678   double t1 = 0, t2 = 0;
00679   bool timeFlag = mat->matvec_timing;
00680 
00681   if (timeFlag)
00682     t1 = MPI_Wtime ();
00683 
00684   for (row = 0; row < m; row++)
00685     {
00686       int len = rp[row + 1] - rp[row];
00687       int *ind = cval + rp[row];
00688       double *val = aval + rp[row];
00689       double temp = 0.0;
00690       for (i = 0; i < len; i++)
00691     {
00692       temp += (val[i] * x[ind[i]]);
00693     }
00694       b[row] = temp;
00695     }
00696 
00697   if (timeFlag)
00698     {
00699       t2 = MPI_Wtime ();
00700       mat->time[MATVEC_TIME] += (t2 - t1);
00701       mat->time[MATVEC_TOTAL_TIME] += (t2 - t1);
00702     }
00703 
00704 END_FUNC_DH}
00705 
00706 
00707 #undef __FUNC__
00708 #define __FUNC__ "Mat_dhReadNz"
00709 int
00710 Mat_dhReadNz (Mat_dh mat)
00711 {
00712   START_FUNC_DH int ierr, retval = mat->rp[mat->m];
00713   int nz = retval;
00714   ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh);
00715   CHECK_MPI_ERROR (ierr);
00716 END_FUNC_VAL (retval)}
00717 
00718 
00719 
00720 #if 0
00721 
00722 #undef __FUNC__
00723 #define __FUNC__ "Mat_dhAllocate_getRow_private"
00724 void
00725 Mat_dhAllocate_getRow_private (Mat_dh A)
00726 {
00727   START_FUNC_DH int i, *rp = A->rp, len = 0;
00728   int m = A->m;
00729 
00730   /* find longest row in matrix */
00731   for (i = 0; i < m; ++i)
00732     len = MAX (len, rp[i + 1] - rp[i]);
00733   len *= A->bs;
00734 
00735   /* free any previously allocated private storage */
00736   if (len > A->len_private)
00737     {
00738       if (A->cval_private != NULL)
00739     {
00740       FREE_DH (A->cval_private);
00741       CHECK_V_ERROR;
00742     }
00743       if (A->aval_private != NULL)
00744     {
00745       FREE_DH (A->aval_private);
00746       CHECK_V_ERROR;
00747     }
00748     }
00749 
00750   /* allocate private storage */
00751   A->cval_private = (int *) MALLOC_DH (len * sizeof (int));
00752   CHECK_V_ERROR;
00753   A->aval_private = (double *) MALLOC_DH (len * sizeof (double));
00754   CHECK_V_ERROR;
00755   A->len_private = len;
00756 END_FUNC_DH}
00757 
00758 #endif
00759 
00760 #undef __FUNC__
00761 #define __FUNC__ "Mat_dhZeroTiming"
00762 void
00763 Mat_dhZeroTiming (Mat_dh mat)
00764 {
00765   START_FUNC_DH int i;
00766 
00767   for (i = 0; i < MAT_DH_BINS; ++i)
00768     {
00769       mat->time[i] = 0;
00770       mat->time_max[i] = 0;
00771       mat->time_min[i] = 0;
00772     }
00773 END_FUNC_DH}
00774 
00775 #undef __FUNC__
00776 #define __FUNC__ "Mat_dhReduceTiming"
00777 void
00778 Mat_dhReduceTiming (Mat_dh mat)
00779 {
00780   START_FUNC_DH if (mat->time[MATVEC_MPI_TIME])
00781     {
00782       mat->time[MATVEC_RATIO] =
00783     mat->time[MATVEC_TIME] / mat->time[MATVEC_MPI_TIME];
00784     }
00785   MPI_Allreduce (mat->time, mat->time_min, MAT_DH_BINS, MPI_DOUBLE, MPI_MIN,
00786          comm_dh);
00787   MPI_Allreduce (mat->time, mat->time_max, MAT_DH_BINS, MPI_DOUBLE, MPI_MAX,
00788          comm_dh);
00789 END_FUNC_DH}
00790 
00791 #undef __FUNC__
00792 #define __FUNC__ "Mat_dhPermute"
00793 void
00794 Mat_dhPermute (Mat_dh A, int *n2o, Mat_dh * Bout)
00795 {
00796   START_FUNC_DH Mat_dh B;
00797   int i, j, *RP = A->rp, *CVAL = A->cval;
00798   int *o2n, *rp, *cval, m = A->m, nz = RP[m];
00799   double *aval, *AVAL = A->aval;
00800 
00801   Mat_dhCreate (&B);
00802   CHECK_V_ERROR;
00803   B->m = B->n = m;
00804   *Bout = B;
00805 
00806   /* form inverse permutation */
00807   o2n = (int *) MALLOC_DH (m * sizeof (int));
00808   CHECK_V_ERROR;
00809   for (i = 0; i < m; ++i)
00810     o2n[n2o[i]] = i;
00811 
00812   /* allocate storage for permuted matrix */
00813   rp = B->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
00814   CHECK_V_ERROR;
00815   cval = B->cval = (int *) MALLOC_DH (nz * sizeof (int));
00816   CHECK_V_ERROR;
00817   aval = B->aval = (double *) MALLOC_DH (nz * sizeof (double));
00818   CHECK_V_ERROR;
00819 
00820   /* form new rp array */
00821   rp[0] = 0;
00822   for (i = 0; i < m; ++i)
00823     {
00824       int oldRow = n2o[i];
00825       rp[i + 1] = RP[oldRow + 1] - RP[oldRow];
00826     }
00827   for (i = 1; i <= m; ++i)
00828     rp[i] = rp[i] + rp[i - 1];
00829 
00830   for (i = 0; i < m; ++i)
00831     {
00832       int oldRow = n2o[i];
00833       int idx = rp[i];
00834       for (j = RP[oldRow]; j < RP[oldRow + 1]; ++j)
00835     {
00836       cval[idx] = o2n[CVAL[j]];
00837       aval[idx] = AVAL[j];
00838       ++idx;
00839     }
00840     }
00841 
00842   FREE_DH (o2n);
00843   CHECK_V_ERROR;
00844 END_FUNC_DH}
00845 
00846 
00847 /*----------------------------------------------------------------------
00848  * Print methods
00849  *----------------------------------------------------------------------*/
00850 
00851 /* seq or mpi */
00852 #undef __FUNC__
00853 #define __FUNC__ "Mat_dhPrintGraph"
00854 void
00855 Mat_dhPrintGraph (Mat_dh A, SubdomainGraph_dh sg, FILE * fp)
00856 {
00857   START_FUNC_DH int pe, id = myid_dh;
00858   int ierr;
00859 
00860   if (sg != NULL)
00861     {
00862       id = sg->o2n_sub[id];
00863     }
00864 
00865   for (pe = 0; pe < np_dh; ++pe)
00866     {
00867       ierr = MPI_Barrier (comm_dh);
00868       CHECK_MPI_V_ERROR (ierr);
00869       if (id == pe)
00870     {
00871       if (sg == NULL)
00872         {
00873           mat_dh_print_graph_private (A->m, A->beg_row, A->rp, A->cval,
00874                       A->aval, NULL, NULL, NULL, fp);
00875           CHECK_V_ERROR;
00876         }
00877       else
00878         {
00879           int beg_row = sg->beg_rowP[myid_dh];
00880           mat_dh_print_graph_private (A->m, beg_row, A->rp, A->cval,
00881                       A->aval, sg->n2o_row, sg->o2n_col,
00882                       sg->o2n_ext, fp);
00883           CHECK_V_ERROR;
00884         }
00885     }
00886     }
00887 END_FUNC_DH}
00888 
00889 
00890 #undef __FUNC__
00891 #define __FUNC__ "Mat_dhPrintRows"
00892 void
00893 Mat_dhPrintRows (Mat_dh A, SubdomainGraph_dh sg, FILE * fp)
00894 {
00895   START_FUNC_DH bool noValues;
00896   int m = A->m, *rp = A->rp, *cval = A->cval;
00897   double *aval = A->aval;
00898 
00899   noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
00900   if (noValues)
00901     aval = NULL;
00902 
00903   /*---------------------------------------------------------------- 
00904    * case 1: print local portion of unpermuted matrix
00905    *----------------------------------------------------------------*/
00906   if (sg == NULL)
00907     {
00908       int i, j;
00909       int beg_row = A->beg_row;
00910 
00911       fprintf (fp,
00912            "\n----- A, unpermuted ------------------------------------\n");
00913       for (i = 0; i < m; ++i)
00914     {
00915       fprintf (fp, "%i :: ", 1 + i + beg_row);
00916       for (j = rp[i]; j < rp[i + 1]; ++j)
00917         {
00918           if (noValues)
00919         {
00920           fprintf (fp, "%i ", 1 + cval[j]);
00921         }
00922           else
00923         {
00924           fprintf (fp, "%i,%g ; ", 1 + cval[j], aval[j]);
00925         }
00926         }
00927       fprintf (fp, "\n");
00928     }
00929     }
00930 
00931   /*---------------------------------------------------------------- 
00932    * case 2: single mpi task, with multiple subdomains
00933    *----------------------------------------------------------------*/
00934   else if (np_dh == 1)
00935     {
00936       int i, k, idx = 1;
00937       int oldRow;
00938 
00939       for (i = 0; i < sg->blocks; ++i)
00940     {
00941       int oldBlock = sg->n2o_sub[i];
00942 
00943       /* here, 'beg_row' and 'end_row' refer to rows in the
00944          original ordering of A.
00945        */
00946       int beg_row = sg->beg_row[oldBlock];
00947       int end_row = beg_row + sg->row_count[oldBlock];
00948 
00949       fprintf (fp, "\n");
00950       fprintf (fp,
00951            "\n----- A, permuted, single mpi task  ------------------\n");
00952       fprintf (fp, "---- new subdomain: %i;  old subdomain: %i\n", i,
00953            oldBlock);
00954       fprintf (fp, "     old beg_row:   %i;  new beg_row:   %i\n",
00955            sg->beg_row[oldBlock], sg->beg_rowP[oldBlock]);
00956       fprintf (fp, "     local rows in this block: %i\n",
00957            sg->row_count[oldBlock]);
00958       fprintf (fp, "     bdry rows in this block:  %i\n",
00959            sg->bdry_count[oldBlock]);
00960       fprintf (fp, "     1st bdry row= %i \n",
00961            1 + end_row - sg->bdry_count[oldBlock]);
00962 
00963       for (oldRow = beg_row; oldRow < end_row; ++oldRow)
00964         {
00965           int len = 0, *cval;
00966           double *aval;
00967 
00968           fprintf (fp, "%3i (old= %3i) :: ", idx, 1 + oldRow);
00969           ++idx;
00970           Mat_dhGetRow (A, oldRow, &len, &cval, &aval);
00971           CHECK_V_ERROR;
00972 
00973           for (k = 0; k < len; ++k)
00974         {
00975           if (noValues)
00976             {
00977               fprintf (fp, "%i ", 1 + sg->o2n_col[cval[k]]);
00978             }
00979           else
00980             {
00981               fprintf (fp, "%i,%g ; ", 1 + sg->o2n_col[cval[k]],
00982                    aval[k]);
00983             }
00984         }
00985 
00986           fprintf (fp, "\n");
00987           Mat_dhRestoreRow (A, oldRow, &len, &cval, &aval);
00988           CHECK_V_ERROR;
00989         }
00990     }
00991     }
00992 
00993   /*---------------------------------------------------------------- 
00994    * case 3: multiple mpi tasks, one subdomain per task
00995    *----------------------------------------------------------------*/
00996   else
00997     {
00998       Hash_i_dh hash = sg->o2n_ext;
00999       int *o2n_col = sg->o2n_col, *n2o_row = sg->n2o_row;
01000       int beg_row = sg->beg_row[myid_dh];
01001       int beg_rowP = sg->beg_rowP[myid_dh];
01002       int i, j;
01003 
01004       for (i = 0; i < m; ++i)
01005     {
01006       int row = n2o_row[i];
01007       fprintf (fp, "%3i (old= %3i) :: ", 1 + i + beg_rowP,
01008            1 + row + beg_row);
01009       for (j = rp[row]; j < rp[row + 1]; ++j)
01010         {
01011           int col = cval[j];
01012 
01013           /* find permuted (old-to-new) value for the column */
01014           /* case i: column is locally owned */
01015           if (col >= beg_row && col < beg_row + m)
01016         {
01017           col = o2n_col[col - beg_row] + beg_rowP;
01018         }
01019 
01020           /* case ii: column is external */
01021           else
01022         {
01023           int tmp = col;
01024           tmp = Hash_i_dhLookup (hash, col);
01025           CHECK_V_ERROR;
01026           if (tmp == -1)
01027             {
01028               sprintf (msgBuf_dh,
01029                    "nonlocal column= %i not in hash table",
01030                    1 + col);
01031               SET_V_ERROR (msgBuf_dh);
01032             }
01033           else
01034             {
01035               col = tmp;
01036             }
01037         }
01038 
01039           if (noValues)
01040         {
01041           fprintf (fp, "%i ", 1 + col);
01042         }
01043           else
01044         {
01045           fprintf (fp, "%i,%g ; ", 1 + col, aval[j]);
01046         }
01047         }
01048       fprintf (fp, "\n");
01049     }
01050     }
01051 END_FUNC_DH}
01052 
01053 
01054 
01055 #undef __FUNC__
01056 #define __FUNC__ "Mat_dhPrintTriples"
01057 void
01058 Mat_dhPrintTriples (Mat_dh A, SubdomainGraph_dh sg, char *filename)
01059 {
01060   START_FUNC_DH int m = A->m, *rp = A->rp, *cval = A->cval;
01061   double *aval = A->aval;
01062   bool noValues;
01063   bool matlab;
01064   FILE *fp;
01065 
01066   noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
01067   if (noValues)
01068     aval = NULL;
01069   matlab = (Parser_dhHasSwitch (parser_dh, "-matlab"));
01070 
01071   /*---------------------------------------------------------------- 
01072    * case 1: unpermuted matrix, single or multiple mpi tasks
01073    *----------------------------------------------------------------*/
01074   if (sg == NULL)
01075     {
01076       int i, j, pe;
01077       int beg_row = A->beg_row;
01078       double val;
01079 
01080       for (pe = 0; pe < np_dh; ++pe)
01081     {
01082       MPI_Barrier (comm_dh);
01083       if (pe == myid_dh)
01084         {
01085           if (pe == 0)
01086         {
01087           fp = openFile_dh (filename, "w");
01088           CHECK_V_ERROR;
01089         }
01090           else
01091         {
01092           fp = openFile_dh (filename, "a");
01093           CHECK_V_ERROR;
01094         }
01095 
01096           for (i = 0; i < m; ++i)
01097         {
01098           for (j = rp[i]; j < rp[i + 1]; ++j)
01099             {
01100               if (noValues)
01101             {
01102               fprintf (fp, "%i %i\n", 1 + i + beg_row,
01103                    1 + cval[j]);
01104             }
01105               else
01106             {
01107               val = aval[j];
01108               if (val == 0.0 && matlab)
01109                 val = _MATLAB_ZERO_;
01110               fprintf (fp, TRIPLES_FORMAT, 1 + i + beg_row,
01111                    1 + cval[j], val);
01112             }
01113             }
01114         }
01115           closeFile_dh (fp);
01116           CHECK_V_ERROR;
01117         }
01118     }
01119     }
01120 
01121   /*---------------------------------------------------------------- 
01122    * case 2: single mpi task, with multiple subdomains
01123    *----------------------------------------------------------------*/
01124   else if (np_dh == 1)
01125     {
01126       int i, j, k, idx = 1;
01127 
01128       fp = openFile_dh (filename, "w");
01129       CHECK_V_ERROR;
01130 
01131       for (i = 0; i < sg->blocks; ++i)
01132     {
01133       int oldBlock = sg->n2o_sub[i];
01134       int beg_row = sg->beg_rowP[oldBlock];
01135       int end_row = beg_row + sg->row_count[oldBlock];
01136 
01137       for (j = beg_row; j < end_row; ++j)
01138         {
01139           int len = 0, *cval;
01140           double *aval;
01141           int oldRow = sg->n2o_row[j];
01142 
01143           Mat_dhGetRow (A, oldRow, &len, &cval, &aval);
01144           CHECK_V_ERROR;
01145 
01146           if (noValues)
01147         {
01148           for (k = 0; k < len; ++k)
01149             {
01150               fprintf (fp, "%i %i\n", idx, 1 + sg->o2n_col[cval[k]]);
01151             }
01152           ++idx;
01153         }
01154 
01155           else
01156         {
01157           for (k = 0; k < len; ++k)
01158             {
01159               double val = aval[k];
01160               if (val == 0.0 && matlab)
01161             val = _MATLAB_ZERO_;
01162               fprintf (fp, TRIPLES_FORMAT, idx,
01163                    1 + sg->o2n_col[cval[k]], val);
01164             }
01165           ++idx;
01166         }
01167           Mat_dhRestoreRow (A, oldRow, &len, &cval, &aval);
01168           CHECK_V_ERROR;
01169         }
01170     }
01171     }
01172 
01173   /*---------------------------------------------------------------- 
01174    * case 3: multiple mpi tasks, one subdomain per task
01175    *----------------------------------------------------------------*/
01176   else
01177     {
01178       Hash_i_dh hash = sg->o2n_ext;
01179       int *o2n_col = sg->o2n_col, *n2o_row = sg->n2o_row;
01180       int beg_row = sg->beg_row[myid_dh];
01181       int beg_rowP = sg->beg_rowP[myid_dh];
01182       int i, j, pe;
01183       int id = sg->o2n_sub[myid_dh];
01184 
01185       for (pe = 0; pe < np_dh; ++pe)
01186     {
01187       MPI_Barrier (comm_dh);
01188       if (id == pe)
01189         {
01190           if (pe == 0)
01191         {
01192           fp = openFile_dh (filename, "w");
01193           CHECK_V_ERROR;
01194         }
01195           else
01196         {
01197           fp = openFile_dh (filename, "a");
01198           CHECK_V_ERROR;
01199         }
01200 
01201           for (i = 0; i < m; ++i)
01202         {
01203           int row = n2o_row[i];
01204           for (j = rp[row]; j < rp[row + 1]; ++j)
01205             {
01206               int col = cval[j];
01207               double val = 0.0;
01208 
01209               if (aval != NULL)
01210             val = aval[j];
01211               if (val == 0.0 && matlab)
01212             val = _MATLAB_ZERO_;
01213 
01214               /* find permuted (old-to-new) value for the column */
01215               /* case i: column is locally owned */
01216               if (col >= beg_row && col < beg_row + m)
01217             {
01218               col = o2n_col[col - beg_row] + beg_rowP;
01219             }
01220 
01221               /* case ii: column is external */
01222               else
01223             {
01224               int tmp = col;
01225               tmp = Hash_i_dhLookup (hash, col);
01226               CHECK_V_ERROR;
01227               if (tmp == -1)
01228                 {
01229                   sprintf (msgBuf_dh,
01230                        "nonlocal column= %i not in hash table",
01231                        1 + col);
01232                   SET_V_ERROR (msgBuf_dh);
01233                 }
01234               else
01235                 {
01236                   col = tmp;
01237                 }
01238             }
01239 
01240               if (noValues)
01241             {
01242               fprintf (fp, "%i %i\n", 1 + i + beg_rowP, 1 + col);
01243             }
01244               else
01245             {
01246               fprintf (fp, TRIPLES_FORMAT, 1 + i + beg_rowP,
01247                    1 + col, val);
01248             }
01249             }
01250         }
01251           closeFile_dh (fp);
01252           CHECK_V_ERROR;
01253         }
01254     }
01255     }
01256 END_FUNC_DH}
01257 
01258 
01259 /* seq only */
01260 #undef __FUNC__
01261 #define __FUNC__ "Mat_dhPrintCSR"
01262 void
01263 Mat_dhPrintCSR (Mat_dh A, SubdomainGraph_dh sg, char *filename)
01264 {
01265   START_FUNC_DH FILE * fp;
01266 
01267   if (np_dh > 1)
01268     {
01269       SET_V_ERROR ("only implemented for a single mpi task");
01270     }
01271   if (sg != NULL)
01272     {
01273       SET_V_ERROR
01274     ("not implemented for reordered matrix (SubdomainGraph_dh should be NULL)");
01275     }
01276 
01277   fp = openFile_dh (filename, "w");
01278   CHECK_V_ERROR;
01279 
01280   if (sg == NULL)
01281     {
01282       mat_dh_print_csr_private (A->m, A->rp, A->cval, A->aval, fp);
01283       CHECK_V_ERROR;
01284     }
01285   else
01286     {
01287       mat_dh_print_csr_private (A->m, A->rp, A->cval, A->aval, fp);
01288       CHECK_V_ERROR;
01289     }
01290   closeFile_dh (fp);
01291   CHECK_V_ERROR;
01292 END_FUNC_DH}
01293 
01294 /* seq */
01295 /* no reordering */
01296 #undef __FUNC__
01297 #define __FUNC__ "Mat_dhPrintBIN"
01298 void
01299 Mat_dhPrintBIN (Mat_dh A, SubdomainGraph_dh sg, char *filename)
01300 {
01301   START_FUNC_DH if (np_dh > 1)
01302     {
01303       SET_V_ERROR ("only implemented for a single MPI task");
01304     }
01305 /*  if (n2o != NULL || o2n != NULL || hash != NULL) {
01306 */
01307   if (sg != NULL)
01308     {
01309       SET_V_ERROR ("not implemented for reordering; ensure sg=NULL");
01310     }
01311 
01312   io_dh_print_ebin_mat_private (A->m, A->beg_row, A->rp, A->cval, A->aval,
01313                 NULL, NULL, NULL, filename);
01314   CHECK_V_ERROR;
01315 END_FUNC_DH}
01316 
01317 
01318 /*----------------------------------------------------------------------
01319  * Read methods
01320  *----------------------------------------------------------------------*/
01321 /* seq only */
01322 #undef __FUNC__
01323 #define __FUNC__ "Mat_dhReadCSR"
01324 void
01325 Mat_dhReadCSR (Mat_dh * mat, char *filename)
01326 {
01327   START_FUNC_DH Mat_dh A;
01328   FILE *fp;
01329 
01330   if (np_dh > 1)
01331     {
01332       SET_V_ERROR ("only implemented for a single MPI task");
01333     }
01334 
01335   fp = openFile_dh (filename, "r");
01336   CHECK_V_ERROR;
01337 
01338   Mat_dhCreate (&A);
01339   CHECK_V_ERROR;
01340   mat_dh_read_csr_private (&A->m, &A->rp, &A->cval, &A->aval, fp);
01341   CHECK_V_ERROR;
01342   A->n = A->m;
01343   *mat = A;
01344 
01345   closeFile_dh (fp);
01346   CHECK_V_ERROR;
01347 END_FUNC_DH}
01348 
01349 /* seq only */
01350 #undef __FUNC__
01351 #define __FUNC__ "Mat_dhReadTriples"
01352 void
01353 Mat_dhReadTriples (Mat_dh * mat, int ignore, char *filename)
01354 {
01355   START_FUNC_DH FILE * fp = NULL;
01356   Mat_dh A = NULL;
01357 
01358   if (np_dh > 1)
01359     {
01360       SET_V_ERROR ("only implemented for a single MPI task");
01361     }
01362 
01363   fp = openFile_dh (filename, "r");
01364   CHECK_V_ERROR;
01365 
01366   Mat_dhCreate (&A);
01367   CHECK_V_ERROR;
01368   mat_dh_read_triples_private (ignore, &A->m, &A->rp, &A->cval, &A->aval, fp);
01369   CHECK_V_ERROR;
01370   A->n = A->m;
01371   *mat = A;
01372 
01373   closeFile_dh (fp);
01374   CHECK_V_ERROR;
01375 END_FUNC_DH}
01376 
01377 /* here we pass the private function a filename, instead of an open file,
01378    the reason being that Euclid's binary format is more complicated,
01379    i.e, the other "Read" methods are only for a single mpi task.
01380 */
01381 #undef __FUNC__
01382 #define __FUNC__ "Mat_dhReadBIN"
01383 void
01384 Mat_dhReadBIN (Mat_dh * mat, char *filename)
01385 {
01386   START_FUNC_DH Mat_dh A;
01387 
01388   if (np_dh > 1)
01389     {
01390       SET_V_ERROR ("only implemented for a single MPI task");
01391     }
01392 
01393   Mat_dhCreate (&A);
01394   CHECK_V_ERROR;
01395   io_dh_read_ebin_mat_private (&A->m, &A->rp, &A->cval, &A->aval, filename);
01396   CHECK_V_ERROR;
01397   A->n = A->m;
01398   *mat = A;
01399 END_FUNC_DH}
01400 
01401 #undef __FUNC__
01402 #define __FUNC__ "Mat_dhTranspose"
01403 void
01404 Mat_dhTranspose (Mat_dh A, Mat_dh * Bout)
01405 {
01406   START_FUNC_DH Mat_dh B;
01407 
01408   if (np_dh > 1)
01409     {
01410       SET_V_ERROR ("only for sequential");
01411     }
01412 
01413   Mat_dhCreate (&B);
01414   CHECK_V_ERROR;
01415   *Bout = B;
01416   B->m = B->n = A->m;
01417   mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
01418                 A->aval, &B->aval);
01419   CHECK_V_ERROR;
01420 END_FUNC_DH}
01421 
01422 #undef __FUNC__
01423 #define __FUNC__ "Mat_dhMakeStructurallySymmetric"
01424 void
01425 Mat_dhMakeStructurallySymmetric (Mat_dh A)
01426 {
01427   START_FUNC_DH if (np_dh > 1)
01428     {
01429       SET_V_ERROR ("only for sequential");
01430     }
01431   make_symmetric_private (A->m, &A->rp, &A->cval, &A->aval);
01432   CHECK_V_ERROR;
01433 END_FUNC_DH}
01434 
01435 void insert_diags_private (Mat_dh A, int ct);
01436 
01437 /* inserts diagonal if not explicitly present;
01438    sets diagonal value in row i to sum of absolute
01439    values of all elts in row i.
01440 */
01441 #undef __FUNC__
01442 #define __FUNC__ "Mat_dhFixDiags"
01443 void
01444 Mat_dhFixDiags (Mat_dh A)
01445 {
01446   START_FUNC_DH int i, j;
01447   int *rp = A->rp, *cval = A->cval, m = A->m;
01448   bool ct = 0;          /* number of missing diagonals */
01449   double *aval = A->aval;
01450 
01451   /* determine if any diagonals are missing */
01452   for (i = 0; i < m; ++i)
01453     {
01454       bool flag = true;
01455       for (j = rp[i]; j < rp[i + 1]; ++j)
01456     {
01457       int col = cval[j];
01458       if (col == i)
01459         {
01460           flag = false;
01461           break;
01462         }
01463     }
01464       if (flag)
01465     ++ct;
01466     }
01467 
01468   /* insert any missing diagonal elements */
01469   if (ct)
01470     {
01471       printf
01472     ("\nMat_dhFixDiags:: %i diags not explicitly present; inserting!\n",
01473      ct);
01474       insert_diags_private (A, ct);
01475       CHECK_V_ERROR;
01476       rp = A->rp;
01477       cval = A->cval;
01478       aval = A->aval;
01479     }
01480 
01481   /* set the value of all diagonal elements */
01482   for (i = 0; i < m; ++i)
01483     {
01484       double sum = 0.0;
01485       for (j = rp[i]; j < rp[i + 1]; ++j)
01486     {
01487       sum += fabs (aval[j]);
01488     }
01489       for (j = rp[i]; j < rp[i + 1]; ++j)
01490     {
01491       if (cval[j] == i)
01492         {
01493           aval[j] = sum;
01494         }
01495     }
01496     }
01497 END_FUNC_DH}
01498 
01499 
01500 #undef __FUNC__
01501 #define __FUNC__ "insert_diags_private"
01502 void
01503 insert_diags_private (Mat_dh A, int ct)
01504 {
01505   START_FUNC_DH int *RP = A->rp, *CVAL = A->cval;
01506   int *rp, *cval, m = A->m;
01507   double *aval, *AVAL = A->aval;
01508   int nz = RP[m] + ct;
01509   int i, j, idx = 0;
01510 
01511   rp = A->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01512   CHECK_V_ERROR;
01513   cval = A->cval = (int *) MALLOC_DH (nz * sizeof (int));
01514   CHECK_V_ERROR;
01515   aval = A->aval = (double *) MALLOC_DH (nz * sizeof (double));
01516   CHECK_V_ERROR;
01517   rp[0] = 0;
01518 
01519   for (i = 0; i < m; ++i)
01520     {
01521       bool flag = true;
01522       for (j = RP[i]; j < RP[i + 1]; ++j)
01523     {
01524       cval[idx] = CVAL[j];
01525       aval[idx] = AVAL[j];
01526       ++idx;
01527       if (CVAL[j] == i)
01528         flag = false;
01529     }
01530 
01531       if (flag)
01532     {
01533       cval[idx] = i;
01534       aval[idx] = 0.0;
01535       ++idx;
01536     }
01537       rp[i + 1] = idx;
01538     }
01539 
01540   FREE_DH (RP);
01541   CHECK_V_ERROR;
01542   FREE_DH (CVAL);
01543   CHECK_V_ERROR;
01544   FREE_DH (AVAL);
01545   CHECK_V_ERROR;
01546 
01547 END_FUNC_DH}
01548 
01549 #undef __FUNC__
01550 #define __FUNC__ "Mat_dhPrintDiags"
01551 void
01552 Mat_dhPrintDiags (Mat_dh A, FILE * fp)
01553 {
01554   START_FUNC_DH int i, j, m = A->m;
01555   int *rp = A->rp, *cval = A->cval;
01556   double *aval = A->aval;
01557 
01558   fprintf (fp,
01559        "=================== diagonal elements ====================\n");
01560   for (i = 0; i < m; ++i)
01561     {
01562       bool flag = true;
01563       for (j = rp[i]; j < rp[i + 1]; ++j)
01564     {
01565       if (cval[j] == i)
01566         {
01567           fprintf (fp, "%i  %g\n", i + 1, aval[j]);
01568           flag = false;
01569           break;
01570         }
01571     }
01572       if (flag)
01573     {
01574       fprintf (fp, "%i  ---------- missing\n", i + 1);
01575     }
01576     }
01577 END_FUNC_DH}
01578 
01579 
01580 #undef __FUNC__
01581 #define __FUNC__ "Mat_dhGetRow"
01582 void
01583 Mat_dhGetRow (Mat_dh B, int globalRow, int *len, int **ind, double **val)
01584 {
01585   START_FUNC_DH int row = globalRow - B->beg_row;
01586   if (row > B->m)
01587     {
01588       sprintf (msgBuf_dh,
01589            "requested globalRow= %i, which is local row= %i, but only have %i rows!",
01590            globalRow, row, B->m);
01591       SET_V_ERROR (msgBuf_dh);
01592     }
01593   *len = B->rp[row + 1] - B->rp[row];
01594   if (ind != NULL)
01595     *ind = B->cval + B->rp[row];
01596   if (val != NULL)
01597     *val = B->aval + B->rp[row];
01598 END_FUNC_DH}
01599 
01600 #undef __FUNC__
01601 #define __FUNC__ "Mat_dhRestoreRow"
01602 void
01603 Mat_dhRestoreRow (Mat_dh B, int row, int *len, int **ind, double **val)
01604 {
01605 START_FUNC_DH END_FUNC_DH}
01606 
01607 #undef __FUNC__
01608 #define __FUNC__ "Mat_dhRowPermute"
01609 void
01610 Mat_dhRowPermute (Mat_dh mat)
01611 {
01612   START_FUNC_DH if (ignoreMe)
01613     SET_V_ERROR ("turned off; compilation problem on blue");
01614 
01615 #if 0
01616   int i, j, m = mat->m, nz = mat->rp[m];
01617   int *o2n, *cval;
01618   int algo = 1;
01619   double *r1, *c1;
01620   bool debug = mat->debug;
01621   bool isNatural;
01622   Mat_dh B;
01623 
01624 #if 0
01625 * = 1:Compute a row permutation of the matrix so that the
01626     * permuted matrix has as many entries on its diagonal as
01627     * possible.The values on the diagonal are of arbitrary size.
01628     * HSL subroutine MC21A / AD is used for this
01629   . * = 2: Compute a row permutation of the matrix so that the smallest * value on the diagonal of the permuted matrix is maximized. * = 3:Compute a row permutation of the matrix so that the smallest
01630       * value on the diagonal of the permuted matrix is maximized.
01631       * The algorithm differs from the one used for JOB
01632     = 2 and may * have quite a different performance. * = 4: Compute a row permutation of the matrix so that the sum * of the diagonal entries of the permuted matrix is maximized. * = 5:Compute a row permutation of the matrix so that the product
01633     * of the diagonal entries of the permuted matrix is maximized
01634     * and vectors to scale the matrix so that the nonzero diagonal
01635     * entries of the permuted matrix are one in absolute value and
01636     * all the off - diagonal entries are less than or equal to one in
01637     * absolute value.
01638 #endif
01639     Parser_dhReadInt (parser_dh, "-rowPermute", &algo);
01640   CHECK_V_ERROR;
01641   if (algo < 1)
01642     algo = 1;
01643   if (algo > 5)
01644     algo = 1;
01645   sprintf (msgBuf_dh, "calling row permutation with algo= %i", algo);
01646   SET_INFO (msgBuf_dh);
01647 
01648   r1 = (double *) MALLOC_DH (m * sizeof (double));
01649   CHECK_V_ERROR;
01650   c1 = (double *) MALLOC_DH (m * sizeof (double));
01651   CHECK_V_ERROR;
01652   if (mat->row_perm == NULL)
01653     {
01654       mat->row_perm = o2n = (int *) MALLOC_DH (m * sizeof (int));
01655       CHECK_V_ERROR;
01656     }
01657   else
01658     {
01659       o2n = mat->row_perm;
01660     }
01661 
01662   Mat_dhTranspose (mat, &B);
01663   CHECK_V_ERROR;
01664 
01665   /* get row permutation and scaling vectors */
01666   dldperm (algo, m, nz, B->rp, B->cval, B->aval, o2n, r1, c1);
01667 
01668   /* permute column indices, then turn the matrix rightside up */
01669   cval = B->cval;
01670   for (i = 0; i < nz; ++i)
01671     cval[i] = o2n[cval[i]];
01672 
01673   /* debug block */
01674   if (debug && logFile != NULL)
01675     {
01676       fprintf (logFile, "\n-------- row permutation vector --------\n");
01677       for (i = 0; i < m; ++i)
01678     fprintf (logFile, "%i ", 1 + o2n[i]);
01679       fprintf (logFile, "\n");
01680 
01681       if (myid_dh == 0)
01682     {
01683       printf ("\n-------- row permutation vector --------\n");
01684       for (i = 0; i < m; ++i)
01685         printf ("%i ", 1 + o2n[i]);
01686       printf ("\n");
01687     }
01688     }
01689 
01690   /* check to see if permutation is non-natural */
01691   isNatural = true;
01692   for (i = 0; i < m; ++i)
01693     {
01694       if (o2n[i] != i)
01695     {
01696       isNatural = false;
01697       break;
01698     }
01699     }
01700 
01701   if (isNatural)
01702     {
01703       printf ("@@@ [%i] Mat_dhRowPermute :: got natural ordering!\n",
01704           myid_dh);
01705     }
01706   else
01707     {
01708       int *rp = B->rp, *cval = B->cval;
01709       double *aval = B->aval;
01710 
01711       if (algo == 5)
01712     {
01713       printf
01714         ("@@@ [%i] Mat_dhRowPermute :: scaling matrix rows and columns!\n",
01715          myid_dh);
01716 
01717       /* scale matrix */
01718       for (i = 0; i < m; i++)
01719         {
01720           r1[i] = exp (r1[i]);
01721           c1[i] = exp (c1[i]);
01722         }
01723       for (i = 0; i < m; i++)
01724         for (j = rp[i]; j < rp[i + 1]; j++)
01725           aval[j] *= r1[cval[j]] * c1[i];
01726     }
01727 
01728       mat_dh_transpose_reuse_private (B->m, B->rp, B->cval, B->aval,
01729                       mat->rp, mat->cval, mat->aval);
01730       CHECK_V_ERROR;
01731     }
01732 
01733 
01734   Mat_dhDestroy (B);
01735   CHECK_V_ERROR;
01736   FREE_DH (r1);
01737   CHECK_V_ERROR;
01738   FREE_DH (c1);
01739   CHECK_V_ERROR;
01740 
01741 #endif
01742 END_FUNC_DH}
01743 
01744 
01745 /*==============================================================================*/
01746 #undef __FUNC__
01747 #define __FUNC__ "Mat_dhPartition"
01748 void
01749 build_adj_lists_private (Mat_dh mat, int **rpOUT, int **cvalOUT)
01750 {
01751   START_FUNC_DH int m = mat->m;
01752   int *RP = mat->rp, *CVAL = mat->cval;
01753   int nz = RP[m];
01754   int i, j, *rp, *cval, idx = 0;
01755 
01756   rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01757   CHECK_V_ERROR;
01758   cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
01759   CHECK_V_ERROR;
01760   rp[0] = 0;
01761 
01762   /* assume symmetry for now! */
01763   for (i = 0; i < m; ++i)
01764     {
01765       for (j = RP[i]; j < RP[i + 1]; ++j)
01766     {
01767       int col = CVAL[j];
01768       if (col != i)
01769         {
01770           cval[idx++] = col;
01771         }
01772     }
01773       rp[i + 1] = idx;
01774     }
01775 END_FUNC_DH}
01776 
01777 
01778 #undef __FUNC__
01779 #define __FUNC__ "Mat_dhPartition"
01780 void
01781 Mat_dhPartition (Mat_dh mat, int blocks,
01782          int **beg_rowOUT, int **row_countOUT, int **n2oOUT,
01783          int **o2nOUT)
01784 {
01785   START_FUNC_DH
01786 #ifndef HAVE_METIS_DH
01787     if (ignoreMe)
01788     SET_V_ERROR ("not compiled for metis!");
01789 
01790 #else
01791   int *beg_row, *row_count, *n2o, *o2n, bk, new, *part;
01792   int m = mat->m;
01793   int i, cutEdgeCount;
01794   double zero = 0.0;
01795   int metisOpts[5] = { 0, 0, 0, 0, 0 };
01796   int *rp, *cval;
01797 
01798   /* allocate storage for returned arrays */
01799   beg_row = *beg_rowOUT = (int *) MALLOC_DH (blocks * sizeof (int));
01800   CHECK_V_ERROR;
01801   row_count = *row_countOUT = (int *) MALLOC_DH (blocks * sizeof (int));
01802   CHECK_V_ERROR;
01803   *n2oOUT = n2o = (int *) MALLOC_DH (m * sizeof (int));
01804   CHECK_V_ERROR;
01805   *o2nOUT = o2n = (int *) MALLOC_DH (m * sizeof (int));
01806   CHECK_V_ERROR;
01807 
01808 #if 0
01809 == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == = Metis arguments:
01810 
01811   n - number of nodes rp[], cval[]NULL, NULL, 0 /*no edge or vertex weights */
01812     0               /*use zero-based numbering */
01813     blocksIN, options[5] = 0::0 / 1 use defauls;
01814   use uptions 1. .4
01815     1::
01816     edgecutOUT,
01817     part[]
01818     == == == == == == == == == == == == == == == == == == == == == == == == ==
01819     == == == == == =
01820 #endif
01821     /* form the graph representation that metis wants */
01822     build_adj_lists_private (mat, &rp, &cval);
01823   CHECK_V_ERROR;
01824   part = (int *) MALLOC_DH (m * sizeof (int));
01825   CHECK_V_ERROR;
01826 
01827   /* get parition vector from metis */
01828   METIS_PartGraphKway (&m, rp, cval, NULL, NULL,
01829                &zero, &zero, &blocks, metisOpts, &cutEdgeCount, part);
01830 
01831   FREE_DH (rp);
01832   CHECK_V_ERROR;
01833   FREE_DH (cval);
01834   CHECK_V_ERROR;
01835 
01836   if (mat->debug)
01837     {
01838       printf_dh ("\nmetis partitioning vector; blocks= %i\n", blocks);
01839       for (i = 0; i < m; ++i)
01840     printf_dh ("  %i %i\n", i + 1, part[i]);
01841     }
01842 
01843   /* compute beg_row, row_count arrays from partition vector */
01844   for (i = 0; i < blocks; ++i)
01845     row_count[i] = 0;
01846   for (i = 0; i < m; ++i)
01847     {
01848       bk = part[i];     /* block to which row i belongs */
01849       row_count[bk] += 1;
01850     }
01851   beg_row[0] = 0;
01852   for (i = 1; i < blocks; ++i)
01853     beg_row[i] = beg_row[i - 1] + row_count[i - 1];
01854 
01855   if (mat->debug)
01856     {
01857       printf_dh ("\nrow_counts: ");
01858       for (i = 0; i < blocks; ++i)
01859     printf_dh (" %i", row_count[i]);
01860       printf_dh ("\nbeg_row: ");
01861       for (i = 0; i < blocks; ++i)
01862     printf_dh (" %i", beg_row[i] + 1);
01863       printf_dh ("\n");
01864     }
01865 
01866   /* compute permutation vector */
01867   {
01868     int *tmp = (int *) MALLOC_DH (blocks * sizeof (int));
01869     CHECK_V_ERROR;
01870     memcpy (tmp, beg_row, blocks * sizeof (int));
01871     for (i = 0; i < m; ++i)
01872       {
01873     bk = part[i];       /* block to which row i belongs */
01874     new = tmp[bk];
01875     tmp[bk] += 1;
01876     o2n[i] = new;
01877     n2o[new] = i;
01878       }
01879     FREE_DH (tmp);
01880   }
01881 
01882   FREE_DH (part);
01883   CHECK_V_ERROR;
01884 
01885 #endif
01886 
01887 END_FUNC_DH}

Generated on Wed May 12 21:30:18 2010 for IFPACK by  doxygen 1.4.7