Amesos Package Browser (Single Doxygen Collection) Development
amesos_paraklete_factorize.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === paraklete_factorize ================================================== */
00003 /* ========================================================================== */
00004 
00005 #include "amesos_paraklete_decl.h"
00006 
00007 /* LU = paraklete_factorize (A, LUsymbolic, Common) factorizes P*A*Q into L*U.
00008  * Returns NULL if A is singular or if memory is exhausted.
00009  *
00010  * PARAKLETE version 0.3: parallel sparse LU factorization.  Nov 13, 2007
00011  * Copyright (C) 2007, Univ. of Florida.  Author: Timothy A. Davis
00012  * See License.txt for the Version 2.1 of the GNU Lesser General Public License
00013  * http://www.cise.ufl.edu/research/sparse
00014  */
00015 
00016 /* ========================================================================== */
00017 /* === paraklete_allocate_numeric =========================================== */
00018 /* ========================================================================== */
00019 
00020 /* Allocate initial part of LU factors */
00021 
00022 static paraklete_numeric *amesos_paraklete_allocate_numeric
00023 (
00024     paraklete_symbolic *LUsymbolic,
00025     paraklete_common *Common
00026 )
00027 {
00028     paraklete_numeric *LU ;
00029     paraklete_node *LUnode ;
00030     cholmod_common *cm ;
00031     Int *Cstart, *Sched, *Childp ;
00032     Int c, n, ncomponents, myid ;
00033 
00034     cm = &(Common->cm) ;
00035     myid = Common->myid ;
00036     LU = CHOLMOD (malloc) (1, sizeof (paraklete_numeric), cm) ;
00037 
00038     if (cm->status != CHOLMOD_OK)
00039     {
00040         /* TODO return NULL, and broadcast error to all processes */
00041         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
00042     }
00043 
00044     n = LUsymbolic->n ;
00045     ncomponents = LUsymbolic->ncomponents ;
00046 
00047     LU->magic = PARAKLETE_MAGIC ;
00048     LU->n = n ;
00049     LU->ncomponents = ncomponents ;
00050     LU->LUnode = CHOLMOD (malloc) (ncomponents, sizeof (paraklete_node *), cm) ;
00051     LU->P = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
00052     LU->Q = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
00053     LU->Pinv = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
00054     LU->Qinv = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
00055     LU->W = NULL ;
00056     LU->Ep2 = NULL ;
00057     LU->E = NULL ;
00058 
00059     if (myid == 0)
00060     {
00061   /* allocate workspace for subsequent solve */
00062   LU->W = CHOLMOD (malloc) (n, sizeof (double), cm) ;
00063   /* allocate workspace for distributing the input matrix to the nodes */
00064   LU->Ep2 = CHOLMOD (malloc) (n+1, sizeof (Int), cm) ;
00065     }
00066 
00067     if (LU->LUnode != NULL)
00068     {
00069   for (c = 0 ; c < ncomponents ; c++)
00070   {
00071       LU->LUnode [c] = CHOLMOD (malloc) (1, sizeof (paraklete_node), cm) ;
00072   }
00073     }
00074 
00075     Cstart = LUsymbolic->Cstart ;
00076     Sched = LUsymbolic->Sched ;
00077     Childp = LUsymbolic->Childp ;
00078 
00079     if (cm->status != CHOLMOD_OK)
00080     {
00081         /* TODO return NULL, and broadcast error to all processes, and
00082          * free the LU object */
00083         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
00084     }
00085 
00086     for (c = 0 ; c < ncomponents ; c++)
00087     {
00088   /* Each process has an LUnode [c] for each node in the tree, but it
00089    * will be populated only with the parts this process needs */
00090   LUnode = (LU->LUnode == NULL) ? NULL : (LU->LUnode [c]) ;
00091 
00092   if (LUnode != NULL)
00093   {
00094 
00095       LUnode->nk = Cstart [c+1] - Cstart [c] ;
00096       LUnode->nchild = Childp [c+1] - Childp [c] ;
00097 
00098       /* no LU factorization of this node yet */
00099       LUnode->PK_STATUS = PK_UNKNOWN ;
00100       LUnode->PK_NN = 0 ;
00101       LUnode->PK_NPIV = 0 ;
00102       LUnode->PK_NFOUND = 0 ;
00103       LUnode->PK_NLOST = 0 ;
00104       LUnode->lusize = 0 ;
00105       LUnode->llen = NULL ;
00106       LUnode->lp = NULL ;
00107       LUnode->ulen = NULL ;
00108       LUnode->up = NULL ;
00109       LUnode->ix = NULL ;
00110 
00111       /* information and messages from each child of node c */
00112       if (Sched [c] == myid)
00113       {
00114     LUnode->Lost = CHOLMOD (malloc) (LUnode->nchild,
00115       sizeof (Int), cm) ;
00116     LUnode->Lostp = CHOLMOD (malloc) (LUnode->nchild+1,
00117       sizeof (Int), cm);
00118     MPI (LUnode->Req = CHOLMOD (malloc) (LUnode->nchild,
00119       sizeof (MPI_Request), cm)) ;
00120       }
00121       else
00122       {
00123     LUnode->Lost = NULL ;
00124     LUnode->Lostp = NULL ;
00125     MPI (LUnode->Req = NULL) ;
00126       }
00127 
00128       /* no permutation vectors yet */
00129       LUnode->Pglobal = NULL ;
00130       LUnode->Qglobal = NULL ;
00131       LUnode->Plocal = NULL ;
00132       LUnode->Qlocal = NULL ;
00133       LUnode->Pinv = NULL ;
00134       LUnode->Qinv = NULL ;
00135 
00136       /* no Schur complement yet */
00137       LUnode->PK_SSIZE = 0 ;
00138       LUnode->PK_SNZ = 0 ;
00139       LUnode->PK_SN = 0 ;
00140       LUnode->slen = NULL ;
00141       LUnode->sp = NULL ;
00142       LUnode->sx = NULL ;
00143 
00144       /* no solution and right-hand-side yet */
00145       LUnode->B = NULL ;
00146       LUnode->X = NULL ;
00147 
00148       /* no input matrix yet */
00149       LUnode->A = NULL ;
00150       LUnode->C = NULL ;
00151 
00152       /* no workspace yet */
00153       LUnode->W2 = NULL ;
00154   }
00155     }
00156 
00157     if (cm->status != CHOLMOD_OK)
00158     {
00159         /* TODO return NULL, and broadcast error to all processes, and */
00160         /* free the LU object */
00161         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
00162     }
00163 
00164     PR1 ((Common->file, "proc "ID" LU done\n", myid)) ;
00165     return (LU) ;
00166 }
00167 
00168 
00169 /* ========================================================================== */
00170 /* === paraklete_free_numeric =============================================== */
00171 /* ========================================================================== */
00172 
00173 /* Free the numeric object on all processors */
00174 
00175 void amesos_paraklete_free_numeric
00176 (
00177     paraklete_numeric **LUHandle,
00178     paraklete_common *Common
00179 )
00180 {
00181     paraklete_numeric *LU ;
00182     paraklete_node *LUnode ;
00183     cholmod_common *cm ;
00184     Int c ;
00185 
00186     if (LUHandle == NULL)
00187     {
00188   /* nothing to do */
00189   return ;
00190     }
00191     LU = *LUHandle ;
00192     if (LU == NULL)
00193     {
00194   /* nothing to do */
00195   return ;
00196     }
00197 
00198     cm = &(Common->cm) ;
00199 
00200     /* global P and Q, broadcast to all processors */
00201     CHOLMOD (free) (LU->n, sizeof (Int), LU->P,    cm) ;
00202     CHOLMOD (free) (LU->n, sizeof (Int), LU->Q,    cm) ;
00203     CHOLMOD (free) (LU->n, sizeof (Int), LU->Pinv, cm) ;
00204     CHOLMOD (free) (LU->n, sizeof (Int), LU->Qinv, cm) ;
00205     CHOLMOD (free) (LU->n, sizeof (double), LU->W,    cm) ;
00206     CHOLMOD (free) (LU->n+1, sizeof (Int), LU->Ep2,  cm) ;
00207     CHOLMOD (free_sparse) (&(LU->E), cm) ;
00208 
00209     if (LU->LUnode != NULL)
00210     {
00211   for (c = 0 ; c < LU->ncomponents ; c++)
00212   {
00213       LUnode = LU->LUnode [c] ;
00214       if (LUnode != NULL)
00215       {
00216     /* solution and right-hand-side at this node */
00217     PR2 ((Common->file,"proc "ID" node "ID" free numeric, nk "ID" B %p\n",
00218       Common->myid, c, LUnode->nk, (void *) (LUnode->B))) ;
00219     CHOLMOD (free) (LUnode->nk, sizeof (double), LUnode->B, cm) ;
00220     CHOLMOD (free) (LUnode->PK_NN, sizeof (double), LUnode->X, cm) ;
00221 
00222     /* LU factors at this node */
00223     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->llen, cm) ;
00224     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->lp, cm) ;
00225     CHOLMOD (free) (LUnode->PK_NN, sizeof (Int), LUnode->ulen, cm) ;
00226     CHOLMOD (free) (LUnode->PK_NN, sizeof (Int), LUnode->up, cm) ;
00227     CHOLMOD (free) (LUnode->lusize, sizeof (double), LUnode->ix, cm) ;
00228     CHOLMOD (free) (LUnode->nchild, sizeof (Int), LUnode->Lost, cm) ;
00229     CHOLMOD (free) (LUnode->nchild+1, sizeof (Int),
00230       LUnode->Lostp, cm) ;
00231     MPI (CHOLMOD (free) (LUnode->nchild,
00232           sizeof (MPI_Request), LUnode->Req, cm)) ;
00233 
00234     /* P and Q at this node */
00235     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
00236       LUnode->Pglobal, cm) ;
00237     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
00238       LUnode->Qglobal, cm) ;
00239     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
00240       LUnode->Plocal, cm) ;
00241     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
00242       LUnode->Qlocal, cm) ;
00243     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->Pinv, cm) ;
00244     CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->Qinv, cm) ;
00245 
00246     /* Schur complement of this node */
00247     CHOLMOD (free) (LUnode->PK_SN, sizeof (Int), LUnode->sp, cm) ;
00248     CHOLMOD (free) (LUnode->PK_SN, sizeof (Int), LUnode->slen, cm) ;
00249     CHOLMOD (free) (LUnode->PK_SSIZE, sizeof (double),
00250       LUnode->sx, cm) ;
00251 
00252     /* input matrix and sum of Schur complements at this node */
00253     CHOLMOD (free_sparse) (&(LUnode->A), cm) ;
00254     CHOLMOD (free_sparse) (&(LUnode->C), cm) ;
00255 
00256     CHOLMOD (free) (2*(LUnode->nlost_in), sizeof (Int),
00257       LUnode->W2, cm) ;
00258 
00259     /* free the LUnode itself */
00260     CHOLMOD (free) (1, sizeof (paraklete_node), LUnode, cm) ;
00261       }
00262   }
00263     }
00264 
00265     CHOLMOD (free) (LU->ncomponents, sizeof (paraklete_node *), LU->LUnode, cm) ;
00266     *LUHandle = CHOLMOD (free) (1, sizeof (paraklete_numeric), (*LUHandle), cm) ;
00267 }
00268 
00269 
00270 /* ========================================================================== */
00271 /* === paraklete_permute ==================================================== */
00272 /* ========================================================================== */
00273 
00274 /* E = A(p,q) */
00275 
00276 static cholmod_sparse *paraklete_permute
00277 (
00278     cholmod_sparse *A,
00279     paraklete_numeric *LU,
00280     paraklete_symbolic *LUsymbolic,
00281     paraklete_common *Common
00282 )
00283 {
00284     double *Ax, *Ex ;
00285     Int *Ap, *Ai, *Ep2, *Ep, *Ei, *Cperm, *RpermInv, *Rperm ;
00286     cholmod_common *cm ;
00287     cholmod_sparse *E ;
00288     Int n, enz, anz, k, j, p ;
00289 
00290     cm = &(Common->cm) ;
00291     ASSERT (Common->myid == 0) ;
00292 
00293     Cperm = LUsymbolic->Cperm ;
00294     Rperm = LUsymbolic->Rperm ;
00295     Rperm = (Rperm == NULL) ? Cperm : Rperm ;
00296     RpermInv = LUsymbolic->RpermInv ;
00297 
00298     n = A->nrow ;
00299     ASSERT (n == LUsymbolic->n) ;
00300     Ap = A->p ;
00301     Ai = A->i ;
00302     Ax = A->x ;
00303     anz = Ap [n] ;
00304     Ep2 = LU->Ep2 ;
00305 
00306 #ifndef NDEBUG
00307     for (k = 0 ; k < n ; k++)
00308     {
00309         PR3 ((Common->file, "Cperm ("ID") = "ID" ;\n", k+1, Cperm[k]+1));
00310     }
00311     for (k = 0 ; k < n ; k++)
00312     {
00313         PR3 ((Common->file, "Rperm ("ID") = "ID" ;\n", k+1, Rperm[k]+1));
00314     }
00315     for (k = 0 ; k < n ; k++)
00316     {
00317         PR3 ((Common->file, "RpermInv ("ID") = "ID" ;\n", k+1, RpermInv [k]+1)) ;
00318     }
00319 #endif
00320 
00321     E = CHOLMOD (allocate_sparse) (n, n, anz, FALSE, TRUE, 0, TRUE, cm) ;
00322 
00323     if (cm->status != CHOLMOD_OK)
00324     {
00325   /* out of memory */
00326   PR0 ((Common->file, "failed to allocate E\n")) ;
00327   return (NULL) ;
00328     }
00329 
00330     Ep = E->p ;
00331     Ei = E->i ;
00332     Ex = E->x ;
00333     enz = 0 ;
00334     for (k = 0 ; k < n ; k++)
00335     {
00336   /* column j of A becomes column k of E */
00337   j = Cperm [k] ;
00338   PR1 ((Common->file, "Col "ID" of A becomes col "ID" of E\n", j, k)) ;
00339   Ep [k] = enz ;
00340   for (p = Ap [j] ; p < Ap [j+1] ; p++)
00341   {
00342       /* row i = Ai [p] becomes row RpermInv[i] of E */
00343       PR2 ((Common->file, "    "ID" %g -> "ID"\n",
00344                 Ai [p], Ax [p], RpermInv [Ai [p]])) ;
00345       Ei [enz] = RpermInv [Ai [p]] ;
00346       Ex [enz] = Ax [p] ;
00347       enz++ ;
00348       ASSERT (enz <= anz) ;
00349   }
00350     }
00351     Ep [n] = enz ;
00352     DEBUG (CHOLMOD (print_sparse) (E, "E = A(p,p)", cm)) ;
00353     CHOLMOD (sort) (E, cm) ;
00354 
00355     if (cm->status != CHOLMOD_OK)
00356     {
00357   /* out of memory */
00358   PR0 ((Common->file, "out of memory to sort E\n")) ;
00359   CHOLMOD (free_sparse) (&E, cm) ;
00360   return (NULL) ;
00361     }
00362 
00363     DEBUG (CHOLMOD (print_sparse) (E, "E = A(p,p), sorted", cm)) ;
00364     for (k = 0 ; k <= n ; k++)
00365     {
00366   Ep2 [k] = Ep [k] ;
00367     }
00368 
00369     return (E) ;
00370 }
00371 
00372 
00373 /* ========================================================================== */
00374 /* === paraklete_factorize ================================================== */
00375 /* ========================================================================== */
00376 
00377 paraklete_numeric *amesos_paraklete_factorize
00378 (
00379     /* inputs, not modified */
00380     cholmod_sparse *A,
00381     paraklete_symbolic *LUsymbolic,
00382     paraklete_common *Common
00383 )
00384 {
00385     paraklete_numeric *LU ;
00386     cholmod_common *cm ;
00387     cholmod_sparse *E, *C ;
00388     double *Ex, *Cx ;
00389     Int *Cperm, *Cn, *Cnz, *Ep, *Ei, *Ep2, *Map, *Cparent, *Rperm,
00390   *Cstart, *P, *Q, *Cp, *Ci, *Pc, *Qc, *Pinv, *Qinv, *Sched ;
00391     Int cj, i, n, ncomponents, k, p, c, a, cn, k1, k2, kk, cnz,
00392   nfound, myid, npiv ;
00393     int ok, all_ok ;
00394 
00395     MPI (MPI_Status ms) ;
00396     MPI (MPI_Request req) ;
00397 
00398     /* TODO: return NULL if any input argument is NULL */
00399 
00400     Common->status = PK_OK ;
00401 
00402     /* ---------------------------------------------------------------------- */
00403     /* get inputs */
00404     /* ---------------------------------------------------------------------- */
00405 
00406     cm = &(Common->cm) ;
00407     cm->status = CHOLMOD_OK ;
00408 
00409     myid = Common->myid ;
00410 
00411     PR0 ((Common->file, "proc "ID" start factorize\n", myid)) ;
00412 
00413     n = LUsymbolic->n ;
00414     /*
00415     printf ("   factorize n "ID" LUsymbolic %p\n", n, (void *) LUsymbolic) ;
00416     */
00417     ncomponents = LUsymbolic->ncomponents ;
00418     Cperm   = LUsymbolic->Cperm ;
00419     Rperm   = LUsymbolic->Rperm ;
00420     Rperm   = (Rperm == NULL) ? Cperm : Rperm ;
00421     Cn      = LUsymbolic->Cn ;
00422     Cnz     = LUsymbolic->Cnz ;
00423     Cparent = LUsymbolic->Cparent ;
00424     Cstart  = LUsymbolic->Cstart  ;
00425     Sched   = LUsymbolic->Sched ;
00426 
00427     PR0 ((Common->file, "in factor: proc "ID" my_tries "ID"\n", myid, my_tries)) ;
00428 
00429 #ifndef NDEBUG
00430     for (c = 0 ; c < ncomponents ; c++)
00431     {
00432   PR1 ((Common->file, "proc: "ID" node "ID": "ID" to "ID", parent "ID" Sched "ID"\n",
00433     Common->myid, c, Cstart [c], Cstart [c+1]-1, Cparent [c],
00434     Sched [c])) ;
00435     }
00436 #endif
00437 
00438     /* ---------------------------------------------------------------------- */
00439     /* allocate and initialize the global LU factors */
00440     /* ---------------------------------------------------------------------- */
00441 
00442     LU = amesos_paraklete_allocate_numeric (LUsymbolic, Common) ;
00443     ok = (LU != NULL) ;
00444     PR1 ((Common->file, "factor proc "ID" alloc ok: "ID"\n", myid, ok)) ;
00445 
00446     /* ---------------------------------------------------------------------- */
00447     /* E = A(p,p), with sorted column indices */
00448     /* ---------------------------------------------------------------------- */
00449 
00450     E = NULL ;
00451     Ep2 = NULL ;
00452     if (ok && myid == 0)
00453     {
00454   /* only the root constructs the matrix E = A (p,p) */
00455   Ep2 = LU->Ep2 ;
00456   E = LU->E = paraklete_permute (A, LU, LUsymbolic, Common) ;
00457   ok = (E != NULL) ;
00458   DEBUG (CHOLMOD (print_sparse) (E, "E = A(p,p) on master node", cm)) ;
00459     }
00460 
00461     /* ---------------------------------------------------------------------- */
00462     /* allocate space for submatrices at each node */
00463     /* ---------------------------------------------------------------------- */
00464 
00465     for (c = 0 ; ok && c < ncomponents ; c++)
00466     {
00467   if (myid == 0 || Sched [c] == myid)
00468   {
00469       /* Two copies are made, one on the root process and one on the
00470        * process that factorizes that submatrix.  If the root process
00471        * is factorizing the node, then only one copy is made. */
00472       C = CHOLMOD (allocate_sparse) (Cn [c], Cn [c], Cnz [c], TRUE, TRUE,
00473         0, TRUE, cm) ;
00474       LU->LUnode [c]->A = C ;
00475       ok = (C != NULL) ;
00476   }
00477     }
00478 
00479     /* ---------------------------------------------------------------------- */
00480     /* all processes find out if initial allocation fails for any process */
00481     /* ---------------------------------------------------------------------- */
00482 
00483     all_ok = ok ;
00484     MPI (MPI_Allreduce (&ok, &all_ok, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD)) ;
00485     if (!all_ok)
00486     {
00487   /* out of memory; inform all processes */
00488   PR0 ((Common->file, "proc "ID" all fail factorize\n", Common->myid)) ;
00489   Common->status = PK_OUT_OF_MEMORY ;
00490   amesos_paraklete_free_numeric (&LU, Common) ;
00491   return (NULL) ;
00492     }
00493 
00494     /* ---------------------------------------------------------------------- */
00495     /* distribute the permuted matrix to the nodes */
00496     /* ---------------------------------------------------------------------- */
00497 
00498     if (myid == 0)
00499     {
00500 
00501   /* ------------------------------------------------------------------ */
00502   /* process 0 partitions the input matrix and sends to all processes */
00503   /* ------------------------------------------------------------------ */
00504 
00505   Ep = E->p ;
00506   Ei = E->i ;
00507   Ex = E->x ;
00508 
00509 #ifndef NDEBUG
00510   for (k = 0 ; k < n ; k++)
00511   {
00512       for (p = Ep [k] ; p < Ep [k+1] ; p++)
00513       {
00514     PR3 ((Common->file, "E ("ID","ID") = %g ;\n", 1+Ei[p], 1+k, Ex[p]));
00515       }
00516   }
00517   for (c = 0 ; c <= ncomponents ; c++)
00518         {
00519       PR3 ((Common->file, "Cstart ("ID") = "ID" ;\n", 1+c, 1+Cstart [c])) ;
00520         }
00521 #endif
00522 
00523   Map = cm->Iwork ;
00524 
00525   for (c = 0 ; c < ncomponents ; c++)
00526   {
00527       PR1 ((Common->file, "distribute node c = "ID"\n", c)) ;
00528       cn = Cn [c] ;
00529 
00530       /* find mapping of global rows/columns to node c's local indices */
00531       cj = 0 ;
00532       for (a = c ; a != EMPTY ; a = Cparent [a])
00533       {
00534     PR2 ((Common->file, "ancestor "ID", for c "ID", ncomp "ID"\n",
00535       a, c, ncomponents)) ;
00536     ASSERT (a >= c && a < ncomponents) ;
00537     k1 = Cstart [a] ;
00538     k2 = Cstart [a+1] ;
00539     PR2 ((Common->file, "k1 "ID" k2 "ID"\n", k1, k2)) ;
00540     for (k = k1 ; k < k2 ; k++)
00541     {
00542         /* global index k becomes local index j */
00543         PR3 ((Common->file, "  global: "ID" local "ID"\n", k, cj)) ;
00544         Map [k] = cj++ ;
00545     }
00546       }
00547       ASSERT (cj == cn) ;
00548 
00549       /* get the local matrix for node c */
00550       C = LU->LUnode [c]->A ;
00551       Cp = C->p ;
00552       Ci = C->i ;
00553       Cx = C->x ;
00554 
00555       cj = 0 ;
00556       cnz = 0 ;
00557 
00558       /* create columns 0:(k2-k1-1) of C, containing candidate pivot
00559        * columns for node c */
00560       k1 = Cstart [c] ;
00561       k2 = Cstart [c+1] ;
00562       PR2 ((Common->file, "c: "ID" k1 to k2-1: "ID" "ID"\n", c, k1, k2-1)) ;
00563       for (k = k1 ; k < k2 ; k++)
00564       {
00565     Cp [cj++] = cnz ;
00566     for (p = Ep2 [k] ; p < Ep [k+1] ; p++)
00567     {
00568         i = Ei [p] ;
00569         ASSERT (Ei [p] >= k1) ;
00570         PR3 ((Common->file,
00571                         "E(1+"ID",1+"ID") = %g ; ce(1+"ID",1+"ID") = "ID"\n",
00572             i,k, Ex[p], i,k,c)) ;
00573         Ci [cnz] = Map [i] ;
00574         Cx [cnz] = Ex [p] ;
00575         cnz++ ;
00576         ASSERT (cnz <= Cnz [c]) ;
00577     }
00578       }
00579 
00580       /* create columns for the ancestors of C.  These will become columns
00581        * of U2 and S for node c. */
00582       for (a = Cparent [c] ; a != EMPTY ; a = Cparent [a])
00583       {
00584     PR2 ((Common->file, "ancestor "ID"\n", a)) ;
00585     PR2 ((Common->file, "k1 "ID" k2 "ID"\n", Cstart [a], Cstart [a+1]));
00586     for (k = Cstart [a] ; k < Cstart [a+1] ; k++)
00587     {
00588         Cp [cj++] = cnz ;
00589         ASSERT (cj <= cn) ;
00590         for (p = Ep2 [k] ; p < Ep [k+1] ; p++)
00591         {
00592       i = Ei [p] ;
00593       ASSERT (i >= k1) ;
00594       if (i >= k2)
00595       {
00596           /* only get rows Cstart [c] to Cstart [c+1]-1 */
00597           break ;
00598       }
00599       PR3 ((Common->file,
00600                             "E(1+"ID",1+"ID") = %g ; ce(1+"ID",1+"ID") = "ID"\n",
00601           i,k, Ex[p], i,k,c)) ;
00602       Ci [cnz] = Map [i] ;
00603       Cx [cnz] = Ex [p] ;
00604       cnz++ ;
00605       ASSERT (cnz <= Cnz [c]) ;
00606         }
00607         /* next component will start here */
00608         Ep2 [k] = p ;
00609     }
00610       }
00611       ASSERT (cj == cn) ;
00612       ASSERT (cnz == Cnz [c]) ;
00613       Cp [cn] = cnz ;
00614 
00615       /* place matrix C in node c of the tree */
00616       PR2 ((Common->file, "node: "ID" sched: "ID"\n", c, Sched [c])) ;
00617 
00618       if (Sched [c] != myid)
00619       {
00620     /* send this matrix to the process that owns node c.
00621        Note that the Isend requests are immediately free'd,
00622        because the sends are synchronized with an barrier,
00623        later on. */
00624 
00625     /*
00626     if (cnz == 4040)
00627     {
00628         Int k3 = 1084 ;
00629         fprintf (Common->file,
00630       "sending "ID": ["ID" %g]\n", k3, Ci [k3], Cx [k3]) ;
00631     }
00632     */
00633 
00634     PR2 ((Common->file, "n "ID" nz "ID"\n", cn, cnz)) ;
00635     DEBUG (CHOLMOD (print_sparse) (C, "send C to a node", cm)) ;
00636 
00637     MPI (MPI_Isend (Cp, cn+1, MPI_Int, Sched [c],
00638           TAG0, MPI_COMM_WORLD, &req)) ;
00639     MPI (MPI_Request_free (&req)) ;
00640     MPI (MPI_Isend (Ci, cnz,  MPI_Int, Sched [c],
00641           TAG0, MPI_COMM_WORLD, &req)) ;
00642     MPI (MPI_Request_free (&req)) ;
00643     MPI (MPI_Isend (Cx, cnz,  MPI_DOUBLE, Sched [c],
00644           TAG0, MPI_COMM_WORLD, &req)) ;
00645     MPI (MPI_Request_free (&req)) ;
00646       }
00647   }
00648 
00649     }
00650     else
00651     {
00652 
00653   /* ------------------------------------------------------------------ */
00654   /* all other processes acquire their input matrix from process 0 */
00655   /* ------------------------------------------------------------------ */
00656 
00657   for (c = 0 ; c < ncomponents ; c++)
00658   {
00659       if (Sched [c] == myid)
00660       {
00661     /*
00662     {
00663         Int *Mi = LU->LUnode [c]->A->i ;
00664         double *Mx = LU->LUnode [c]->A->x ;
00665         Mi [1084] = -9999999 ;
00666         Mx [1084] = -9999999 ;
00667         fprintf (Common->file,
00668       "pre ["ID" %g]\n", Mi [1084], Mx [1084]) ;
00669     }
00670     */
00671 
00672     MPI (MPI_Recv (LU->LUnode [c]->A->p, Cn [c] +1, MPI_Int, 0,
00673           TAG0, MPI_COMM_WORLD, &ms)) ;
00674     MPI (MPI_Recv (LU->LUnode [c]->A->i, Cnz [c], MPI_Int, 0,
00675           TAG0, MPI_COMM_WORLD, &ms)) ;
00676     MPI (MPI_Recv (LU->LUnode [c]->A->x, Cnz [c], MPI_DOUBLE, 0,
00677           TAG0, MPI_COMM_WORLD, &ms)) ;
00678 
00679     /*
00680     {
00681         Int *Mi = LU->LUnode [c]->A->i ;
00682         double *Mx = LU->LUnode [c]->A->x ;
00683         char *CC = (char *) (LU->LUnode [c]->A->x) ;
00684         Int k3 = 1084 ;
00685         fprintf (Common->file,
00686       "got "ID" ["ID" %g]\n", k3, Mi [k3], Mx [k3]) ;
00687         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3])) ;
00688         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+1])) ;
00689         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+2])) ;
00690         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+3])) ;
00691         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+4])) ;
00692         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+5])) ;
00693         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+6])) ;
00694         fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+7])) ;
00695     }
00696     */
00697 
00698     PR1 ((Common->file, "proc "ID" Node "ID" got orig:\n", myid, c));
00699     PR2 ((Common->file, "n "ID" nz "ID"\n", Cn [c], Cnz [c])) ;
00700     DEBUG (CHOLMOD (print_sparse) (LU->LUnode [c]->A, "got A", cm)) ;
00701 #ifndef NDEBUG
00702     /* {
00703         Int *Mp, *Mi, j ;
00704         double *Mx ;
00705         Mp = LU->LUnode [c]->A->p ;
00706         Mi = LU->LUnode [c]->A->i ;
00707         Mx = LU->LUnode [c]->A->x ;
00708         for (j = 0 ; j < Cn [c] ; j++)
00709         {
00710       fprintf (Common->file, "my own col "ID"\n", j) ;
00711       for (k = Mp [j] ; k < Mp [j+1] ; k++)
00712       {
00713           Int ii = Mi [k] ;
00714           double x = Mx [k] ;
00715           fprintf (Common->file, " is: "ID" %g\n", ii, x) ;
00716       }
00717         }
00718         k = 0 ;
00719     } */
00720 #endif
00721       }
00722   }
00723     }
00724 
00725     /* ---------------------------------------------------------------------- */
00726     /* free temporary copy of A(p,p) */
00727     /* ---------------------------------------------------------------------- */
00728 
00729     /* only the root needs to do this */
00730     if (myid == 0)
00731     {
00732   LU->Ep2 = CHOLMOD (free) (n+1, sizeof (Int), LU->Ep2, cm) ;
00733   CHOLMOD (free_sparse) (&(LU->E), cm) ;
00734     }
00735 
00736     /* process 0 frees LUnode [c]->A, but only when recvd by c */
00737     MPI (MPI_Barrier (MPI_COMM_WORLD)) ;
00738     PR1 ((Common->file, "proc "ID" everybody OK so far2\n", Common->myid)) ;
00739     if (myid == 0)
00740     {
00741   for (c = 0 ; c < ncomponents ; c++)
00742   {
00743       if (Sched [c] != 0)
00744       {
00745     /* root process no longer needs submatrices sent to others */
00746     CHOLMOD (free_sparse) (&(LU->LUnode [c]->A), cm) ;
00747       }
00748   }
00749     }
00750 
00751 #ifndef NDEBUG
00752     for (c = 0 ; c < ncomponents ; c++)
00753     {
00754   if (Sched [c] == myid)
00755   {
00756       PR1 ((Common->file, "proc "ID" Node "ID" original matrix:\n", myid, c));
00757       DEBUG (CHOLMOD (print_sparse) (LU->LUnode [c]->A, "A", cm)) ;
00758   }
00759     }
00760 #endif
00761 
00762     /* ---------------------------------------------------------------------- */
00763     /* factorize each node */
00764     /* ---------------------------------------------------------------------- */
00765 
00766     ok = TRUE ;
00767     for (c = 0 ; c < ncomponents ; c++)
00768     {
00769   if (Sched [c] == myid)
00770   {
00771       PR1 ((Common->file, "proc "ID" doing node "ID"\n", myid, c)) ;
00772       if (!amesos_paraklete_factorize_node (c, LU, LUsymbolic, Common))
00773       {
00774     ok = FALSE ;
00775       }
00776   }
00777     }
00778 
00779     /* ---------------------------------------------------------------------- */
00780     /* determine global ordering, including Cperm and numerical pivoting */
00781     /* ---------------------------------------------------------------------- */
00782 
00783     P = LU->P ;
00784     Q = LU->Q ;
00785     kk = 0 ;
00786     Common->status = TRUE ;
00787     for (c = 0 ; c < ncomponents ; c++)
00788     {
00789   /* Give all processors nfound and npiv for each node of the tree.
00790    * This also allows us to determine if the matrix is singular, or if
00791    * any process ran out of memory. */
00792   MPI (MPI_Bcast (LU->LUnode [c]->header, PK_HEADER, MPI_Int,
00793         Sched [c], MPI_COMM_WORLD)) ;
00794   kk += LU->LUnode [c]->PK_NFOUND ;
00795   Common->status = MIN (Common->status, LU->LUnode [c]->PK_STATUS) ;
00796     }
00797 
00798     if (kk < n)
00799     {
00800   PR0 ((Common->file, "proc "ID" Singular: "ID" of "ID"\n", myid, kk, n)) ;
00801   /*
00802         printf ("proc "ID" Singular: "ID" of "ID"\n", myid, kk, n) ;
00803         */
00804   Common->status = PK_SINGULAR ;
00805     }
00806 
00807     if (Common->status != PK_OK)
00808     {
00809   /* out of memory, singular matrix, or unknown error */
00810   amesos_paraklete_free_numeric (&LU, Common) ;
00811   return (NULL) ;
00812     }
00813 
00814     /* all processes allocate space for the global permutation */
00815     for (c = 0 ; c < ncomponents ; c++)
00816     {
00817   npiv = LU->LUnode [c]->PK_NPIV ;
00818   if (LU->LUnode [c]->Pglobal == NULL)
00819   {
00820       LU->LUnode [c]->Pglobal = CHOLMOD (malloc) (npiv, sizeof (Int), cm) ;
00821       LU->LUnode [c]->Qglobal = CHOLMOD (malloc) (npiv, sizeof (Int), cm) ;
00822   }
00823     }
00824 
00825     ok = ok && (kk == n) && (cm->status == CHOLMOD_OK) ;
00826 
00827     /* check to see if all processes had space for P and Q */
00828     PR1 ((Common->file, "proc "ID" here in PQ: ok "ID"\n", Common->myid, ok)) ;
00829     all_ok = ok ;
00830     MPI (MPI_Allreduce (&ok, &all_ok, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD)) ;
00831     if (!all_ok)
00832     {
00833         /* TODO return NULL, broadcast error to all processes, and free LU */
00834         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
00835     }
00836 
00837     kk = 0 ;
00838     for (c = 0 ; c < ncomponents ; c++)
00839     {
00840   npiv = LU->LUnode [c]->PK_NPIV ;
00841 
00842   /* give Pglobal and Qglobal to all processors */
00843   MPI (MPI_Bcast (LU->LUnode [c]->Pglobal, npiv, MPI_Int, Sched [c],
00844         MPI_COMM_WORLD)) ;
00845   MPI (MPI_Bcast (LU->LUnode [c]->Qglobal, npiv, MPI_Int, Sched [c],
00846         MPI_COMM_WORLD)) ;
00847 
00848   Pc = LU->LUnode [c]->Pglobal ;
00849   Qc = LU->LUnode [c]->Qglobal ;
00850 
00851   nfound = LU->LUnode [c]->PK_NFOUND ;
00852   for (k = 0 ; k < nfound ; k++)
00853   {
00854       /* row Pc[k] and column Qc[k] of E are the kth pivot row/column of
00855        * node c.  They are the kk-th pivot row/column of the global LU
00856        * factors. */
00857       P [kk] = Rperm [Pc [k]] ;
00858       Q [kk] = Cperm [Qc [k]] ;
00859       kk++ ;
00860   }
00861     }
00862     ASSERT (kk == n) ;
00863 
00864     /* compute Pinv and Qinv.  TODO: this is not needed */
00865     Pinv = LU->Pinv ;
00866     Qinv = LU->Qinv ;
00867     for (k = 0 ; k < n ; k++)
00868     {
00869   Pinv [P [k]] = k ;
00870   Qinv [Q [k]] = k ;
00871     }
00872 
00873 #ifndef NDEBUG
00874 
00875     /* ---------------------------------------------------------------------- */
00876     /* dump the matrix */
00877     /* ---------------------------------------------------------------------- */
00878 
00879     if (Common->dump > 1)
00880     {
00881 
00882   if (myid == 0)
00883   {
00884       Int *Ap, *Ai ;
00885       double *Ax ;
00886       Int j ;
00887 
00888       Ap = A->p ;
00889       Ai = A->i ;
00890       Ax = A->x ;
00891 
00892       for (k = 0 ; k < n ; k++) printf ("P (1+"ID") = "ID" ;\n", k, P [k]) ;
00893       for (k = 0 ; k < n ; k++) printf ("Q (1+"ID") = "ID" ;\n", k, Q [k]) ;
00894 
00895       printf ("P = 1+P ;\n") ;
00896       printf ("Q = 1+Q ;\n") ;
00897 
00898       for (j = 0 ; j < n ; j++)
00899       {
00900     for (p = Ap [j] ; p < Ap [j+1] ; p++)
00901     {
00902         printf ("A (1+"ID",1+"ID") = %.16g ;\n", Ai [p], j, Ax [p]) ;
00903     }
00904       }
00905   }
00906 
00907   for (c = 0 ; c < ncomponents ; c++)
00908   {
00909       paraklete_node *LUnode ;
00910       Int *Lip, *Llen, *Li, *Uip, *Ulen, *Ui ;
00911       double *LUix, *Lx, *Ux ;
00912       Int llen, ulen, j ;
00913 
00914       MPI (MPI_Barrier (MPI_COMM_WORLD)) ;
00915       if (Sched [c] != myid) continue ;
00916       printf ("\n%% ---- node "ID"\n", c) ;
00917 
00918       /* dump L */
00919       LUnode = LU->LUnode [c] ;
00920       Lip = LUnode->lp ;
00921       Llen = LUnode->llen ;
00922       LUix = LUnode->ix ;
00923       nfound = LUnode->PK_NFOUND ;
00924       for (j = 0 ; j < nfound ; j++)
00925       {
00926     GET_COLUMN (Lip, Llen, LUix, j, Li, Lx, llen) ;
00927     printf ("\nL (1+"ID",1+"ID") = 1 ;\n", j, j) ;
00928     for (p = 1 ; p < llen ; p++)
00929     {
00930         printf ("L (1+"ID",1+"ID") = %.16g ;\n", Li [p], j, Lx [p]) ;
00931     }
00932       }
00933 
00934       /* dump U */
00935       cn = LUnode->PK_NN ;
00936       Uip = LUnode->up ;
00937       Ulen = LUnode->ulen ;
00938       for (j = cn-1 ; j >= nfound ; j--)
00939       {
00940     printf ("\n") ;
00941     GET_COLUMN (Uip, Ulen, LUix, j, Ui, Ux, ulen) ;
00942     for (p = 0 ; p < ulen ; p++)
00943     {
00944         printf ("U (1+"ID",1+"ID") = %.16g ;\n", Ui [p], j, Ux [p]) ;
00945     }
00946       }
00947       for ( ; j >= 0 ; j--)
00948       {
00949     GET_COLUMN (Uip, Ulen, LUix, j, Ui, Ux, ulen) ;
00950     printf ("\nU (1+"ID",1+"ID") = %.16g ; %% pivot\n",
00951         j, j, Ux [ulen-1]) ;
00952     for (p = 0 ; p < ulen-1 ; p++)
00953     {
00954         printf ("U (1+"ID",1+"ID") = %.16g ;\n", Ui [p], j, Ux [p]) ;
00955     }
00956       }
00957   }
00958 
00959   if (Common->nproc == 1 && Common->dump > 1)
00960   {
00961       printf ("norm (L*U-A(P,Q))\n") ;
00962   }
00963     }
00964 #endif
00965 
00966     return (LU) ;
00967 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines