Amesos Package Browser (Single Doxygen Collection) Development
amesos_paraklete_analyze.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === paraklete_analyze ==================================================== */
00003 /* ========================================================================== */
00004 
00005 #include "amesos_paraklete_decl.h"
00006 
00007 /* LUsymbolic = paraklete_analyze (A, Common) finds a fill-reducing permutation
00008  * of A and its separator tree, and returns a paraklete_symbolic object
00009  * containing the symbolic analysis.
00010  *
00011  * TODO: check return values of MPI
00012  *
00013  * PARAKLETE version 0.3: parallel sparse LU factorization.  Nov 13, 2007
00014  * Copyright (C) 2007, Univ. of Florida.  Author: Timothy A. Davis
00015  * See License.txt for the Version 2.1 of the GNU Lesser General Public License
00016  * http://www.cise.ufl.edu/research/sparse
00017  */
00018 
00019 /* ========================================================================== */
00020 /* === paraklete_bcast_symbolic ============================================= */
00021 /* ========================================================================== */
00022 
00023 /* Process 0 has computed the symbolic object; broadcast it to all processes.
00024  * Returns TRUE and a non-NULL LUsymbolic object if successful.  Otherwise,
00025  * returns FALSE and a NULL LUsymbolic object.  This routine is not used for
00026  * the sequential case.
00027  *
00028  * If the symbolic analysis fails, all processes receive an object with
00029  * n=-1, which denotes a failure.  All processes then return NULL.
00030  */
00031 
00032 #ifndef NMPI
00033 
00034 static Int paraklete_bcast_symbolic
00035 (
00036     paraklete_symbolic **LUsymbolicHandle,
00037     paraklete_common *Common
00038 )
00039 
00040 {
00041     paraklete_symbolic *LUsymbolic = NULL ;
00042     Int n, ncomponents, header [2] ;
00043     int ok, all_ok ;
00044     cholmod_common *cm ;
00045 
00046     cm = &(Common->cm) ;
00047     n = EMPTY ;
00048     ncomponents = EMPTY ;
00049 
00050     if (Common->myid == 0)
00051     {
00052   LUsymbolic = *LUsymbolicHandle ;
00053   if (LUsymbolic != NULL)
00054   {
00055       n = LUsymbolic->n ;
00056       ncomponents = LUsymbolic->ncomponents ;
00057   }
00058     }
00059     else
00060     {
00061   /* other processes do not yet have the symbolic object */
00062   *LUsymbolicHandle = NULL ;
00063     }
00064 
00065     /* broadcast the size of the object, or -1 if a failure occured */
00066     header [0] = n ;
00067     header [1] = ncomponents ;
00068     MPI_Bcast (&header, 2, MPI_Int, TAG0, MPI_COMM_WORLD) ;
00069     n = header [0] ;
00070     ncomponents = header [1] ;
00071     if (n == EMPTY)
00072     {
00073   /* the analysis in the root process failed */
00074   PR0 ((Common->file, "proc "ID" root analyze fails\n", Common->myid)) ;
00075   return (FALSE) ;
00076     }
00077 
00078     PR1 ((Common->file, "proc "ID" in bcast symbolic: status "ID" header "ID" "ID"\n",
00079       Common->myid, cm->status, header [0], header [1])) ;
00080 
00081     if (Common->myid != 0)
00082     {
00083   LUsymbolic = amesos_paraklete_alloc_symbolic (n, ncomponents, FALSE, Common) ;
00084   *LUsymbolicHandle = LUsymbolic ;
00085     }
00086 
00087     ok = (cm->status == CHOLMOD_OK) && (LUsymbolic != NULL) ;
00088     all_ok = ok ;
00089 
00090     /* all processes find out if any one process fails to allocate memory */
00091     MPI_Allreduce (&ok, &all_ok, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD) ;
00092     if (!all_ok)
00093     {
00094   /* out of memory; inform all processes */
00095   PR0 ((Common->file, "proc "ID" all fail in analyze\n", Common->myid)) ;
00096   amesos_paraklete_free_symbolic (&LUsymbolic, Common) ;
00097   *LUsymbolicHandle = NULL ;
00098   return (FALSE) ;
00099     }
00100 
00101     /* broadcast the contents of the symbolic object */
00102     MPI_Bcast (LUsymbolic->Mem_n, 3*n, MPI_Int, TAG0, MPI_COMM_WORLD) ;
00103     MPI_Bcast (LUsymbolic->Mem_c, 7*ncomponents+2, MPI_Int, TAG0,
00104   MPI_COMM_WORLD) ;
00105 
00106 #if 0
00107     {
00108   /* each of size ncomponents: */
00109   Int *Child = LUsymbolic->Child ;
00110   Int *Clnz  = LUsymbolic->Clnz ;
00111   Int *Cn    = LUsymbolic->Cn ;
00112   Int *Cnz   = LUsymbolic->Cnz ;
00113   Int *Sched = LUsymbolic->Sched ;
00114 
00115   /* each of size ncomponents+1: */
00116   Int *Cstart = LUsymbolic->Cstart ;
00117   Int *Childp = LUsymbolic->Childp ;
00118   Int cc ;
00119 
00120   for (cc = 0 ; cc < ncomponents ; cc++)
00121   {
00122       printf ("component "ID"\n", cc) ;
00123       printf ("Child "ID"\n", Child [cc]) ;
00124       printf ("Clnz "ID"\n", Clnz [cc]) ;
00125       printf ("Cn "ID"\n", Cn [cc]) ;
00126       printf ("Cnz "ID"\n", Cnz [cc]) ;
00127       printf ("Sched "ID"\n", Sched [cc]) ;
00128       printf ("Cstart "ID"\n", Cstart [cc]) ;
00129       printf ("Childp "ID"\n", Childp [cc]) ;
00130   }
00131       printf ("Cstart "ID"\n", Cstart [ncomponents]) ;
00132       printf ("Childp "ID"\n", Childp [ncomponents]) ;
00133 
00134     }
00135 #endif
00136 
00137     return (TRUE) ;
00138 }
00139 #endif
00140 
00141 
00142 /* ========================================================================== */
00143 /* === paraklete_analyze ==================================================== */
00144 /* ========================================================================== */
00145 
00146 paraklete_symbolic *amesos_paraklete_analyze
00147 (
00148     /* matrix to analyze */ 
00149     cholmod_sparse *A,
00150     paraklete_common *Common
00151 )
00152 {
00153     double cnt ;
00154     double *Cwork ;
00155     cholmod_common *cm ;
00156     paraklete_symbolic *LUsymbolic ;
00157     cholmod_sparse *C, *AT, *Elo, *Eup ;
00158     Int *Cperm, *RpermInv, *Cparent, *Cmember, *ColCount, *Cstart, *Childp,
00159   *Clnz, *Cn, *Cnz, *Parent, *Post, *First, *Level, *Child, *Ap, *Ai,
00160   *Sched, *W, *Rperm,
00161         *Lo_id, *Hi_id ;
00162     double one [2] = {1,1} ;
00163     Int p, k, n, ncomponents, ci, cj, i, j, clast, c, parent, nparent, nroots,
00164   nproc, cp, nc0, nc1 ;
00165     int ok = TRUE ;
00166     size_t n5, nc7 ;
00167 
00168 #if 0
00169     double work ;
00170     Int proc = 0, parent2, ncomp2, c2, cc, cmerge, nleaves,
00171     Int *NewNode, *Cparent2, *Merged, *Leaves, *Nchildren ;
00172 #endif
00173 
00174     /* ---------------------------------------------------------------------- */
00175     /* all processes except process 0 get the symbolic object from process 0 */
00176     /* ---------------------------------------------------------------------- */
00177 
00178     LUsymbolic = NULL ;
00179     if (Common->myid != 0)
00180     {
00181   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00182   return (LUsymbolic) ;
00183     }
00184 
00185     /* ---------------------------------------------------------------------- */
00186     /* process 0 does the analysis */
00187     /* ---------------------------------------------------------------------- */
00188 
00189     nproc = Common->nproc ;
00190 
00191     /* ---------------------------------------------------------------------- */
00192     /* get inputs */
00193     /* ---------------------------------------------------------------------- */
00194 
00195 #if 0
00196     RETURN_IF_NULL_COMMON (NULL) ;
00197     RETURN_IF_NULL (A, NULL) ;
00198     if (A->nrow != A->ncol || A->stype)
00199     {
00200   CHOLMOD (error) (CHOLMOD_INVALID, "paraklete: invalid matrix", cm) ;
00201   return (NULL) ;
00202     }
00203 #endif
00204 
00205     n = A->nrow ;
00206     C = NULL ;
00207     W = NULL ;
00208 
00209     /* ---------------------------------------------------------------------- */
00210     /* allocate workspace */
00211     /* ---------------------------------------------------------------------- */
00212 
00213     cm = &(Common->cm) ;
00214     CHOLMOD (allocate_work) (n, 2*n, n, cm) ;
00215     if (cm->status != CHOLMOD_OK)
00216     {
00217   /* out of memory; inform all processes */
00218   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00219         /*
00220         printf ("   analyze failed 1!\n") ;
00221         */
00222   return (NULL) ;
00223     }
00224 
00225     /* ---------------------------------------------------------------------- */
00226     /* allocate first part of symbolic factor */
00227     /* ---------------------------------------------------------------------- */
00228 
00229     LUsymbolic = amesos_paraklete_alloc_symbolic (n, 0, TRUE, Common) ;
00230 
00231     if (LUsymbolic == NULL)
00232     {
00233   /* out of memory; inform all processes */
00234   PR0 ((Common->file, "oops, proc 0 ran out\n")) ;
00235   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00236         /*
00237         printf ("   analyze failed 2!\n") ;
00238         */
00239   return (NULL) ;
00240     }
00241 
00242     Cperm    = LUsymbolic->Cperm ;
00243     RpermInv = LUsymbolic->RpermInv ;
00244     Cparent  = LUsymbolic->Cparent ;
00245 
00246     Cmember = RpermInv ;      /* use RpermInv as workspace for Cmember */
00247 
00248     /* ---------------------------------------------------------------------- */
00249     /* C = pattern of triu (A+A'), in symmetric/upper form */
00250     /* ---------------------------------------------------------------------- */
00251 
00252     /*
00253     printf ("pattern of A+A', n = "ID" ("ID")\n", A->nrow, cm->status) ;
00254     printf ("A "ID" by "ID", nzmax "ID"\n", A->nrow, A->ncol, A->nzmax) ;
00255     */
00256     AT = CHOLMOD (transpose) (A, FALSE, cm) ;
00257     /*
00258     printf ("AT is %p ("ID")\n", (void *) AT, cm->status) ;
00259     */
00260     C = CHOLMOD (add) (A, AT, one, one, FALSE, FALSE, cm) ;
00261     /*
00262     printf ("C is %p ("ID")\n", (void *) C, cm->status) ;
00263     */
00264     CHOLMOD (free_sparse) (&AT, cm) ;
00265     CHOLMOD (band_inplace) (0, n, 0, C, cm) ;
00266     /*
00267     printf ("status is ("ID")\n", cm->status) ;
00268     */
00269 
00270     if (cm->status != CHOLMOD_OK)
00271     {
00272   /* out of memory; inform all processes */
00273   PR0 ((Common->file, "oops, proc 0 ran out here2\n")) ;
00274   CHOLMOD (free_sparse) (&C, cm) ;
00275   amesos_paraklete_free_symbolic (&LUsymbolic, Common) ;
00276   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00277         /*
00278         printf ("   analyze failed 3!\n") ;
00279         */
00280   return (NULL) ;
00281     }
00282 
00283     C->stype = 1 ;
00284 
00285     /* ---------------------------------------------------------------------- */
00286     /* fill-reducing nested dissection ordering of C */
00287     /* ---------------------------------------------------------------------- */
00288 
00289     /* Cperm [k] = i if row/col i of A is the kth row/col of A(p,p)
00290      * ncomponents = # of components in separator tree
00291      * Cparent [c] is parent of c in separator tree, or EMPTY if c is a root
00292      * Cmember [i] = c if row/col i of A is in component c
00293      */
00294 
00295     /* TODO rename Common->nleaves to be something else */
00296     cm->method [0].nd_oksep = 0.1 ;
00297 
00298     if (Common->nleaves <= 0)
00299     {
00300         cm->method [0].nd_small = MAX (1000, -(Common->nleaves)) ;
00301     }
00302     else
00303     {
00304         cm->method [0].nd_small = n / Common->nleaves ;
00305     }
00306 
00307     cm->current = 0 ;
00308     cm->method [0].nd_components = 0 ;  /* default value */
00309     /*
00310     printf ("nd_components "ID"\n", cm->method [0].nd_components) ;
00311     */
00312 
00313     ncomponents = CHOLMOD (nested_dissection) (C, NULL, 0, Cperm, Cparent,
00314         Cmember, cm) ;
00315 
00316     nc0 = ncomponents ; /* from CHOLMOD (nested_dissection) */
00317     nc1 = ncomponents ; /* after collapsing */
00318 
00319 #ifndef NDEBUG
00320     /* check results: */
00321     clast = EMPTY ;
00322     for (k = 0 ; k < n ; k++)
00323     {
00324   c = Cmember [Cperm [k]] ;
00325   if (c != clast)
00326   {
00327       /*
00328       printf ("Cmember ["ID"] = "ID"\n", k, Cmember [Cperm [k]]) ;
00329       printf ("start of component\n") ;
00330       */
00331       /* if (c != clast+1) { printf ("ERROR!\n") ; exit (1) ; } */
00332       ASSERT (c == clast + 1) ;
00333   }
00334   clast = c ;
00335     }
00336 #endif
00337 
00338     if (cm->status != CHOLMOD_OK)
00339     {
00340   /* out of memory; inform all processes */
00341   PR0 ((Common->file, "oops, proc 0 ran out here3\n")) ;
00342   amesos_paraklete_free_symbolic (&LUsymbolic, Common) ;
00343   CHOLMOD (free_sparse) (&C, cm) ;
00344   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00345         /*
00346         printf ("   analyze failed 4!\n") ;
00347         */
00348   return (NULL) ;
00349     }
00350 
00351     /* ---------------------------------------------------------------------- */
00352     /* Elo = C (p,p)', Eup = Elo' */
00353     /* ---------------------------------------------------------------------- */
00354 
00355     Elo = CHOLMOD (ptranspose) (C, FALSE, Cperm, NULL, 0, cm) ;
00356     CHOLMOD (free_sparse) (&C, cm) ;
00357     Eup = CHOLMOD (transpose) (Elo, FALSE, cm) ;
00358 
00359     /* ---------------------------------------------------------------------- */
00360     /* allocate more workspace */
00361     /* ---------------------------------------------------------------------- */
00362 
00363     /* n5 = 5*(n+1) */
00364     n5 = CHOLMOD (mult_size_t) (n+1, 5, &ok) ;
00365     if (!ok) PARAKLETE_ERROR (PK_TOO_LARGE, "problem too large") ;
00366 
00367     W = CHOLMOD (malloc) (n5, sizeof (Int), cm) ;
00368     ColCount = W ;      /* size n [ */
00369     Parent   = W + n ;      /* size n [ */
00370     Post     = W + 2*n ;    /* size n [ */
00371     First    = W + 3*n ;    /* size n [ */
00372     Level    = W + 4*n ;    /* size n [ */
00373 
00374     if (cm->status != CHOLMOD_OK)
00375     {
00376   /* out of memory; inform all processes */
00377   PR0 ((Common->file, "oops, proc 0 ran out here4\n")) ;
00378   amesos_paraklete_free_symbolic (&LUsymbolic, Common) ;
00379   CHOLMOD (free_sparse) (&Eup, cm) ;
00380   CHOLMOD (free_sparse) (&Elo, cm) ;
00381   CHOLMOD (free) (n5, sizeof (Int), W, cm) ;
00382   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00383         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
00384   return (NULL) ;
00385     }
00386 
00387     /* ---------------------------------------------------------------------- */
00388     /* get an estimate of the # entries in L and U */
00389     /* ---------------------------------------------------------------------- */
00390 
00391     /* This assumes an LU factorization of C, with no partial pivoting */
00392     CHOLMOD (etree) (Eup, Parent, cm) ;
00393     CHOLMOD (postorder) (Parent, n, NULL, Post, cm) ;
00394     CHOLMOD (rowcolcounts) (Elo, NULL, 0, Parent, Post, NULL, ColCount,
00395       First, Level, cm) ;
00396 
00397     CHOLMOD (free_sparse) (&Eup, cm) ;
00398     CHOLMOD (free_sparse) (&Elo, cm) ;
00399 
00400     /* Parent, Post, First, Level no longer needed ]]]] */
00401 
00402     if (cm->status != CHOLMOD_OK)
00403     {
00404   /* out of memory or other error; inform all processes */
00405         PARAKLETE_ERROR (PK_UNKNOWN, "out of memory or other error") ;
00406   amesos_paraklete_free_symbolic (&LUsymbolic, Common) ;
00407   CHOLMOD (free) (n5, sizeof (Int), W, cm) ;
00408   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00409   return (NULL) ;
00410     }
00411 
00412     /* ---------------------------------------------------------------------- */
00413     /* compute Cwork [c] = flops to be done at component c */
00414     /* ---------------------------------------------------------------------- */
00415 
00416     Cwork = cm->Xwork ;
00417     for (c = 0 ; c < ncomponents ; c++)
00418     {
00419   Cwork [c] = 0 ;
00420     }
00421     for (k = 0 ; k < n ; k++)
00422     {
00423   c = Cmember [Cperm [k]] ;
00424   cnt = ColCount [k] ;
00425   Cwork [c] += cnt * cnt ;
00426     }
00427 
00428 #ifndef NDEBUG
00429     for (c = 0 ; c < ncomponents ; c++)
00430     {
00431   PR1 ((Common->file, "Node "ID" work %g parent "ID"\n",
00432         c, Cwork [c], Cparent [c])) ;
00433     }
00434 #endif
00435 
00436 #if 0
00437 
00438     /* ---------------------------------------------------------------------- */
00439     /* compress the tree until it has <= nproc leaves */
00440     /* ---------------------------------------------------------------------- */
00441 
00442     /* Nchildren [c] = the number of children of node c */
00443     /* Merged [c] = node that c is merged into, or EMPTY if c is not merged */
00444     /* Leaves [0..nleaves-1] is a list of the leaves of the current tree */
00445 
00446     /* Note that W [0..n-1] is still in use for ColCount [0..n-1] */
00447     Merged = W + n ;        /* size ncomponents+1 [ */
00448     Nchildren = W + n + ncomponents + 1 ; /* size ncomponents+1 [ */
00449     Leaves = W + n + 2 * (ncomponents+1) ;  /* size ncomponents [ */
00450 
00451     for (c = 0 ; c <= ncomponents ; c++)
00452     {
00453   Nchildren [c] = 0 ;
00454   Merged [c] = EMPTY ;
00455     }
00456     for (c = 0 ; c < ncomponents ; c++)
00457     {
00458   parent = Cparent [c] ;
00459   if (parent == EMPTY)
00460   {
00461       parent = ncomponents ;
00462   }
00463   ASSERT (parent > c && parent <= ncomponents) ;
00464   Nchildren [parent]++ ;
00465     }
00466 
00467     /* make a list of all leaves */
00468     nleaves = 0 ;
00469     for (c = 0 ; c < ncomponents ; c++)
00470     {
00471   PR1 ((Common->file, "Node "ID" has "ID" children\n", c, Nchildren [c])) ;
00472   if (Nchildren [c] == 0)
00473   {
00474       PR1 ((Common->file, "Leaf: "ID"\n", c)) ;
00475       Leaves [nleaves++] = c ;
00476   }
00477     }
00478 
00479     /* CHOLMOD (nested_dissection) returns a graph with
00480      * 2 nodes (one parent and one child) for vanHeukelum/cage3
00481      *
00482      * could use a heap for the leaves
00483      */
00484 
00485     while (nleaves > target_nleaves)
00486     {
00487   PR1 ((Common->file, "\n------------ nleaves: "ID"\n", nleaves)) ;
00488 
00489   /* find the lightest leaf (skip node ncomponents-1) */
00490   work = EMPTY ;
00491   c = EMPTY ;
00492   cp = EMPTY ;
00493   for (p = 0 ; p < nleaves ; p++)
00494   {
00495       PR2 ((Common->file, "Leaf "ID" work %g\n",
00496       Leaves [p], Cwork [Leaves [p]])) ;
00497       ASSERT (Merged [Leaves [p]] == EMPTY) ;
00498       if (Leaves [p] == ncomponents-1)
00499       {
00500     /* node ncomponents-1 has no live node to its right (that is,
00501      * a higher-numbered node), so skip it.  The alternative is to
00502      * merge a node cmerge to its left into node ncomponents-1.
00503      * This may lead to a better balance of work, but is more
00504      * complicated.  The parent of the children of cmerge would
00505      * have to be updated.  FUTURE WORK: consider handling this
00506      * case. */
00507     continue ;
00508       }
00509       if (work == EMPTY || Cwork [Leaves [p]] < work)
00510       {
00511     c = Leaves [p] ;
00512     work = Cwork [c] ;
00513     cp = p ;
00514       }
00515   }
00516   ASSERT (c != EMPTY) ;
00517   PR2 ((Common->file,"Lightest leaf is "ID" with work %g\n", c, Cwork [c]));
00518   ASSERT (c < ncomponents-1) ;
00519   ASSERT (Nchildren [c] == 0) ;
00520 
00521   /* find the live node to the right of this node */
00522   for (cmerge = c+1 ; cmerge < ncomponents ; cmerge++)
00523   {
00524       if (Merged [cmerge] == EMPTY)
00525       {
00526     break ;
00527       }
00528   }
00529 
00530   /* find the parent of c */
00531   parent = Cparent [c] ;
00532   if (parent == EMPTY)
00533   {
00534       parent = ncomponents ;
00535   }
00536 
00537   /* merge c into cmerge node, where c is a leaf */
00538   PR1 ((Common->file,"merge "ID" into "ID", parent "ID"\n", c, cmerge, parent));
00539   ASSERT (cmerge < ncomponents) ;
00540   Cwork [cmerge] += Cwork [c] ;
00541   Cwork [c] = 0 ;
00542   Merged [c] = cmerge ;
00543   Leaves [cp] = Leaves [--nleaves] ;
00544   Nchildren [parent]-- ;
00545   ASSERT (Merged [parent] == EMPTY) ;
00546 
00547   if (Nchildren [parent] == 0 && parent != ncomponents)
00548   {
00549       /* parent is a new leaf, add it to the list of Leaves */
00550       PR1 ((Common->file, "parent is new leaf: "ID"\n", parent)) ;
00551       Leaves [nleaves++] = parent ;
00552   }
00553     }
00554 
00555     /* Leaves no longer needed ] */
00556 
00557     PR1 ((Common->file, "\n-------------------------- done merging leaves\n")) ;
00558 
00559     /* merge nodes that have just one child, with the one child */
00560     for (c = 0 ; c < ncomponents-1 ; c++)
00561     {
00562   if (Merged [c] == EMPTY)
00563   {
00564       parent = Cparent [c] ;
00565       if (parent == EMPTY) continue ;
00566       if (Nchildren [parent] == 1)
00567       {
00568     PR1 ((Common->file, "\nparent "ID" of c "ID" has one child\n",
00569           parent, c));
00570     Cwork [parent] += Cwork [c] ;
00571     Cwork [c] = 0 ;
00572     Merged [c] = parent ;
00573     for (cc = c+1 ; cc < parent ; cc++)
00574     {
00575         PR1 ((Common->file, "merge "ID" into "ID"\n", cc, parent)) ;
00576         ASSERT (Merged [cc] != EMPTY) ;
00577         Merged [cc] = parent ;
00578     }
00579       }
00580   }
00581     }
00582 
00583     /* Nchildren no longer needed ] */
00584 
00585     /* compress the paths in Merged */
00586     for (c = 0 ; c < ncomponents ; c++)
00587     {
00588   /* find the ultimate node that node c was merged into */
00589   PR1 ((Common->file, "\nFind ultimate node for "ID"\n", c)) ;
00590   for (cc = c ; Merged [cc] != EMPTY ; cc = Merged [cc]) ;
00591   for (c2 = c ; Merged [c2] != EMPTY ; c2 = Merged [c2])
00592   {
00593       PR1 ((Common->file, "   merge "ID" into "ID"\n", c2, cc)) ;
00594       Merged [c2] = cc ;
00595   }
00596     }
00597 
00598     /* find the new node numbering, using Leaves as workspace */
00599     NewNode = Leaves ;
00600     ncomp2 = 0 ;
00601     for (c = 0 ; c < ncomponents ; c++)
00602     {
00603   if (Merged [c] == EMPTY)
00604   {
00605       PR1 ((Common->file, "Live node "ID" becomes node "ID"\n", c, ncomp2)) ;
00606       NewNode [c] = ncomp2++ ;
00607   }
00608     }
00609     for (c = 0 ; c < ncomponents ; c++)
00610     {
00611   if (Merged [c] != EMPTY)
00612   {
00613       NewNode [c] = NewNode [Merged [c]] ;
00614       PR1 ((Common->file, "Dead node "ID" becomes part of node "ID"\n",
00615       c, NewNode [c])) ;
00616   }
00617     }
00618 
00619     /* fix Cmember */
00620     for (k = 0 ; k < n ; k++)
00621     {
00622   c = Cmember [k] ;
00623   c = NewNode [c] ;
00624   Cmember [k] = c ;
00625     }
00626 
00627     /* fix Cparent, using Nchildren as workspace */
00628     Cparent2 = Nchildren ;
00629     for (c = 0 ; c < ncomponents ; c++)
00630     {
00631   if (Merged [c] == EMPTY)
00632   {
00633       c2 = NewNode [c] ;
00634       parent = Cparent [c] ;
00635       parent2 = (parent == EMPTY) ? EMPTY : (NewNode [parent]) ;
00636       Cparent2 [c2] = parent2 ;
00637   }
00638     }
00639 
00640     /* Merged no longer needed ] */
00641 
00642     for (c = 0 ; c < ncomponents ; c++)
00643     {
00644   Cwork [c] = 0 ;
00645     }
00646 
00647     ncomponents = ncomp2 ;
00648     for (c = 0 ; c < ncomponents ; c++)
00649     {
00650   Cparent [c] = Cparent2 [c] ;
00651     }
00652 
00653 #ifndef NDEBUG
00654     printf ("Final components: "ID" leaves: "ID"\n", ncomponents, nleaves) ;
00655     for (c = 0 ; c < ncomponents ; c++)
00656     {
00657   PR1 ((Common->file, "New node: "ID" new parent "ID"\n", c, Cparent [c])) ;
00658     }
00659 #endif
00660 
00661 #endif
00662 
00663     /* ---------------------------------------------------------------------- */
00664     /* allocate remainder of symbolic factor */
00665     /* ---------------------------------------------------------------------- */
00666 
00667     /* nc7 = 7*ncomponents + 2 */
00668     nc7 = CHOLMOD (mult_size_t) (ncomponents, 7, &ok) ;
00669     nc7 = CHOLMOD (add_size_t) (nc7, 2, &ok) ;
00670     if (!ok) PARAKLETE_ERROR (PK_TOO_LARGE, "problem too large") ;
00671 
00672     LUsymbolic->Mem_c = CHOLMOD (malloc) (nc7, sizeof (Int), cm) ;
00673 
00674     /* each of size ncomponents: */
00675     Child = LUsymbolic->Child  = LUsymbolic->Mem_c ;
00676     Clnz  = LUsymbolic->Clnz   = LUsymbolic->Mem_c + ncomponents ;
00677     Cn    = LUsymbolic->Cn     = LUsymbolic->Mem_c + 2*ncomponents ;
00678     Cnz   = LUsymbolic->Cnz    = LUsymbolic->Mem_c + 3*ncomponents ;
00679     Sched = LUsymbolic->Sched  = LUsymbolic->Mem_c + 4*ncomponents ;
00680 
00681     /* each of size ncomponents+1: */
00682     Cstart = LUsymbolic->Cstart = LUsymbolic->Mem_c + 5*ncomponents ;
00683     Childp = LUsymbolic->Childp = LUsymbolic->Mem_c + 6*ncomponents + 1 ;
00684 
00685     LUsymbolic->ncomponents = ncomponents ;
00686     LUsymbolic->ncomp0 = nc0 ;
00687     LUsymbolic->ncomp1 = nc1 ;
00688 
00689     if (cm->status != CHOLMOD_OK)
00690     {
00691   /* out of memory or other error; inform all processes */
00692   amesos_paraklete_free_symbolic (&LUsymbolic, Common) ;
00693   CHOLMOD (free) (n5, sizeof (Int), W, cm) ;
00694   MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
00695         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
00696   return (NULL) ;
00697     }
00698 
00699     /* ---------------------------------------------------------------------- */
00700     /* Cstart = start and end nodes of each component */
00701     /* ---------------------------------------------------------------------- */
00702 
00703     clast = EMPTY ;
00704     for (k = 0 ; k < n ; k++)
00705     {
00706   c = Cmember [Cperm [k]] ;
00707   if (c != clast)
00708   {
00709       /* if (c != clast+1) { printf ("Error!\n") ; exit (1) ; } */
00710       ASSERT (c == clast + 1) ;
00711       Cstart [c] = k ;
00712   }
00713   clast = c ;
00714     }
00715     Cstart [ncomponents] = n ;
00716 
00717     /* ---------------------------------------------------------------------- */
00718     /* Clnz = estimate of # of entries in L for each component */
00719     /* ---------------------------------------------------------------------- */
00720 
00721     for (c = 0 ; c < ncomponents ; c++)
00722     {
00723         size_t s = 0 ;
00724   for (k = Cstart [c] ; k < Cstart [c+1] ; k++)
00725   {
00726       s = CHOLMOD (add_size_t) (s, ColCount [k], &ok) ;
00727   }
00728         if (!ok)
00729         {
00730             /* TODO return NULL, and broadcast error to all processes */
00731             PARAKLETE_ERROR (PK_TOO_LARGE, "problem too large") ;
00732         }
00733   /*
00734   printf ("Clnz ["ID"] = "ID", cols "ID" to "ID"\n", c, Clnz [c],
00735       Cstart [c], Cstart [c+1]-1) ;
00736   */
00737   Clnz [c] = s ;
00738     }
00739 
00740     /* ColCount no longer needed ] */
00741 
00742     /* ---------------------------------------------------------------------- */
00743     /* Child, Childp: list of children of each component */
00744     /* ---------------------------------------------------------------------- */
00745 
00746     /* count the number of children of each node, using Cn as workspace */
00747     for (c = 0 ; c < ncomponents ; c++)
00748     {
00749   Cn [c] = 0 ;
00750     }
00751     nroots = 0 ;
00752     for (c = 0 ; c < ncomponents ; c++)
00753     {
00754   parent = Cparent [c] ;
00755   PR1 ((Common->file, "node "ID": parent "ID"\n", c, parent)) ;
00756   if (parent == EMPTY)
00757   {
00758       nroots++ ;
00759   }
00760   else
00761   {
00762       ASSERT (parent > 0 && parent < ncomponents) ;
00763       Cn [parent]++ ;
00764   }
00765     }
00766 
00767     if (nroots > 1)
00768     {
00769         /* TODO - this is an assertion */
00770         PARAKLETE_ERROR (PK_UNKNOWN, "separator tree cannot be forest") ;
00771         abort ( ) ;
00772     }
00773 
00774 #ifndef NDEBUG
00775     for (c = 0 ; c < ncomponents ; c++)
00776     {
00777   PR1 ((Common->file, "node "ID": "ID" children\n", c, Cn [c])) ;
00778     }
00779 #endif
00780 
00781     /* find the cumulative sum of the children */
00782     k = 0 ;
00783     for (c = 0 ; c < ncomponents ; c++)
00784     {
00785   Childp [c] = k ;
00786   k += Cn [c] ;
00787     }
00788     PR1 ((Common->file, "k "ID" ncomponents "ID"\n", k, ncomponents)) ;
00789     ASSERT (k == ncomponents - nroots) ;
00790     Childp [ncomponents] = k ;
00791 
00792     /* create a list of children for each node */
00793     for (c = 0 ; c < ncomponents ; c++)
00794     {
00795   Cn [c] = Childp [c] ;
00796     }
00797     for (k = 0 ; k < ncomponents ; k++)
00798     {
00799   Child [k] = -1 ;
00800     }
00801     for (c = 0 ; c < ncomponents ; c++)
00802     {
00803   parent = Cparent [c] ;
00804   if (parent != EMPTY)
00805   {
00806       Child [Cn [parent]++] = c ;
00807   }
00808     }
00809 
00810 #ifndef NDEBUG
00811     for (c = 0 ; c < ncomponents ; c++)
00812     {
00813   PR1 ((Common->file, "Node "ID" children: ", c)) ;
00814   for (cp = Childp [c] ; cp < Childp [c+1] ; cp++)
00815   {
00816       PR1 ((Common->file, ""ID" ", Child [cp])) ;
00817   }
00818   PR1 ((Common->file, "\n")) ;
00819     }
00820 #endif
00821 
00822     /* ---------------------------------------------------------------------- */
00823     /* find the nominal dimensions of each node (assuming no pivot delays) */
00824     /* ---------------------------------------------------------------------- */
00825 
00826     for (c = ncomponents - 1 ; c >= 0 ; c--)
00827     {
00828   parent = Cparent [c] ;
00829   nparent = (parent == EMPTY) ? 0 : Cn [parent] ;
00830   Cn [c] = (Cstart [c+1] - Cstart [c]) + nparent ;
00831   PR1 ((Common->file, "node "ID" Cn: "ID"\n", c, Cn [c])) ;
00832     }
00833 
00834     /* ---------------------------------------------------------------------- */
00835     /* count the nonzeros in A that map to node c */
00836     /* ---------------------------------------------------------------------- */
00837 
00838     for (c = 0 ; c < ncomponents ; c++)
00839     {
00840   Cnz [c] = 0 ;
00841     }
00842     Ap = A->p ;
00843     Ai = A->i ;
00844     for (j = 0 ; j < n ; j++)
00845     {
00846   cj = Cmember [j] ;
00847   for (p = Ap [j] ; p < Ap [j+1] ; p++)
00848   {
00849       i = Ai [p] ;
00850       ci = Cmember [i] ;
00851       c = MIN (ci, cj) ;
00852       Cnz [c]++ ;
00853       PR3 ((Common->file, "A(1+"ID",1+"ID") = %g ; c(1+"ID",1+"ID") = "ID" ;\n",
00854     i,j, 
00855                 A->x ? 0 : (((double *) (A->x)) [p]),
00856                 i,j, c)) ;
00857   }
00858     }
00859 
00860 #ifndef NDEBUG
00861     for (c = 0 ; c < ncomponents ; c++)
00862     {
00863   PR0 ((Common->file, "Cnz ["ID"] = "ID"\n", c, Cnz [c])) ;
00864     }
00865 #endif
00866 
00867     /* ---------------------------------------------------------------------- */
00868     /* compute RpermInv = inverse of Cperm */
00869     /* ---------------------------------------------------------------------- */
00870 
00871     Rperm = LUsymbolic->Rperm ;
00872     ASSERT (Rperm != NULL) ;
00873 
00874     /* Rperm starts out equal to Cperm */
00875     for (k = 0 ; k < n ; k++)
00876     {
00877   Rperm [k] = Cperm [k] ;
00878     }
00879     for (k = 0 ; k < n ; k++)
00880     {
00881   RpermInv [Rperm [k]] = k ;
00882     }
00883 
00884     /* ---------------------------------------------------------------------- */
00885     /* schedule the processes to the nodes */
00886     /* ---------------------------------------------------------------------- */
00887 
00888 #ifndef NMPI
00889     /* processes Lo_id [c] to Hi_id [c] are assigned to node c or descendants */
00890     Lo_id = W ;
00891     Hi_id = W + ncomponents ; 
00892 
00893     for (c = 0 ; c < ncomponents ; c++)
00894     {
00895   Sched [c] = EMPTY ;
00896         Lo_id [c] = -1 ;
00897         Hi_id [c] = -1 ;
00898     }
00899 
00900     Lo_id [ncomponents-1] = 0 ;
00901     Hi_id [ncomponents-1] = nproc-1 ;
00902 
00903     for (c = ncomponents - 1 ; c >= 0 ; c--)
00904     {
00905         Int nchild, child, child_left, child_right, c_nproc ;
00906 
00907         /* first available process does this node */
00908         Sched [c] = Lo_id [c] ;
00909 
00910         /* split the processes amongst the children */
00911   nchild = Childp [c+1] - Childp [c] ;
00912         cp = Childp [c] ;
00913 
00914         if (nchild == 0)
00915         {
00916             /* nothing else to do */
00917         }
00918         else if (nchild == 1)
00919         {
00920             /* all processes go to the one child */
00921             child = Child [cp] ;
00922             Lo_id [child] = Lo_id [c] ;
00923             Hi_id [child] = Hi_id [c] ;
00924         }
00925         else if (nchild == 2)
00926         {
00927             /* two children; split the processors between them */
00928             child_left  = Child [cp] ;
00929             child_right = Child [cp+1] ;
00930 
00931             c_nproc = Hi_id [c] - Lo_id [c] + 1 ;
00932             if (c_nproc > 1)
00933             {
00934                 Lo_id [child_left ] = Lo_id [c] ;
00935                 Hi_id [child_left ] = Lo_id [c] + c_nproc / 2 - 1 ;
00936                 Lo_id [child_right] = Lo_id [c] + c_nproc / 2 ;
00937                 Hi_id [child_right] = Hi_id [c] ;
00938             }
00939             else
00940             {
00941                 Lo_id [child_left ] = Lo_id [c] ;
00942                 Hi_id [child_left ] = Lo_id [c] ;
00943                 Lo_id [child_right] = Lo_id [c] ;
00944                 Hi_id [child_right] = Lo_id [c] ;
00945             }
00946         }
00947         else
00948         {
00949             /* TODO this is an assertion - it cannot occur */
00950             PARAKLETE_ERROR (PK_UNKNOWN, "invalid separator tree") ;
00951             abort ( ) ;
00952         }
00953     }
00954 
00955 #if 0
00956     proc = 0 ;
00957     for (c = 0 ; c < ncomponents ; c++)
00958     {
00959   Int nchild = Childp [c+1] - Childp [c] ;
00960   if (nchild == 0)
00961   {
00962       PR1 ((Common->file,"\nSchedule child "ID" to process "ID"\n", c, proc));
00963       for (cc = c ; cc != EMPTY && Sched [cc] == EMPTY ; cc = Cparent[cc])
00964       {
00965     PR1 ((Common->file, "  node "ID" to process "ID"\n", cc, proc)) ;
00966     Sched [cc] = proc ;
00967       }
00968             /* advance to the next process */
00969       proc = (proc + 1) % nproc ;
00970   }
00971     }
00972 #endif
00973 
00974 #else
00975     /* all components are done by process zero */
00976     proc = 0 ;
00977     for (c = 0 ; c < ncomponents ; c++)
00978     {
00979   Sched [c] = proc ;
00980     }
00981 #endif
00982 
00983 #ifndef NDEBUG
00984     PR0 ((Common->file, "\nncomponents:: "ID"\n", ncomponents)) ;
00985     for (c = 0 ; c < ncomponents ; c++)
00986     {
00987   PR0 ((Common->file, "    node "ID" Sched "ID" : Cparent "ID" proc "ID"\n",
00988         c, Sched [c], Cparent [c],
00989         (Cparent [c] == EMPTY) ? EMPTY : Sched [Cparent [c]])) ;
00990     }
00991 #endif
00992 
00993 #if 0
00994     for (c = 0 ; c < ncomponents ; c++)
00995     {
00996         printf ("   node "ID" on "ID" : Cparent "ID" on "ID"\n",
00997         c, Sched [c], Cparent [c],
00998         (Cparent [c] == EMPTY) ? EMPTY : Sched [Cparent [c]]) ;
00999     }
01000 #endif
01001 
01002     /* ---------------------------------------------------------------------- */
01003     /* return the symbolic factorization, and broadcast it to all processes */
01004     /* ---------------------------------------------------------------------- */
01005 
01006     CHOLMOD (free) (n5, sizeof (Int), W, cm) ;
01007     PR1 ((Common->file, "analysis done\n")) ;
01008     MPI (paraklete_bcast_symbolic (&LUsymbolic, Common)) ;
01009     return (LUsymbolic) ;
01010 }
01011 
01012 
01013 /* ========================================================================== */
01014 /* === paraklete_alloc_symbolic ============================================= */
01015 /* ========================================================================== */
01016 
01017 /* allocate a symbolic object */
01018 
01019 paraklete_symbolic *amesos_paraklete_alloc_symbolic
01020 (
01021     Int n,
01022     Int ncomponents,
01023     Int do_Rperm,
01024     paraklete_common *Common
01025 )
01026 {
01027     paraklete_symbolic *LUsymbolic ;
01028     cholmod_common *cm ;
01029     size_t n3, nc7 ;
01030     int ok = TRUE ;
01031 
01032     cm = &(Common->cm) ;
01033 
01034     /* n3 = 3*n */
01035     n3 = CHOLMOD (mult_size_t) (n, 3, &ok) ;
01036     if (!ok) PARAKLETE_ERROR (PK_TOO_LARGE, "problem too large") ;
01037 
01038     /* nc7 = 7*ncomponents + 2 */
01039     nc7 = CHOLMOD (mult_size_t) (ncomponents, 7, &ok) ;
01040     nc7 = CHOLMOD (add_size_t) (nc7, 2, &ok) ;
01041     if (!ok) PARAKLETE_ERROR (PK_TOO_LARGE, "problem too large") ;
01042 
01043     LUsymbolic = CHOLMOD (malloc) (1, sizeof (paraklete_symbolic), cm) ;
01044 
01045     if (cm->status != CHOLMOD_OK)
01046     {
01047         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
01048   return (NULL) ;
01049     }
01050 
01051     if (n > 0)
01052     {
01053   LUsymbolic->Mem_n    = CHOLMOD (malloc) (n3, sizeof (Int), cm) ;
01054   LUsymbolic->Cperm    = LUsymbolic->Mem_n ;
01055   LUsymbolic->RpermInv = LUsymbolic->Mem_n + n ;
01056   LUsymbolic->Cparent  = LUsymbolic->Mem_n + 2*n ;
01057 
01058   if (do_Rperm)
01059   {
01060       LUsymbolic->Rperm = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
01061   }
01062   else
01063   {
01064       /* fill-reducing ordering is symmetric, Rperm is implicitly
01065        * equal to Cperm */
01066       LUsymbolic->Rperm = NULL ;
01067   }
01068     }
01069     else
01070     {
01071   LUsymbolic->Mem_n    = NULL ;
01072   LUsymbolic->Cperm    = NULL ;
01073   LUsymbolic->RpermInv = NULL ;
01074   LUsymbolic->Cparent  = NULL ;
01075   LUsymbolic->Rperm    = NULL ;
01076     }
01077 
01078     if (ncomponents > 0)
01079     {
01080 
01081   LUsymbolic->Mem_c  = CHOLMOD (malloc) (nc7, sizeof(Int), cm) ;
01082 
01083   /* each of size ncomponents: */
01084   LUsymbolic->Child  = LUsymbolic->Mem_c ;
01085   LUsymbolic->Clnz   = LUsymbolic->Mem_c + ncomponents ;
01086   LUsymbolic->Cn     = LUsymbolic->Mem_c + 2*ncomponents ;
01087   LUsymbolic->Cnz    = LUsymbolic->Mem_c + 3*ncomponents ;
01088   LUsymbolic->Sched  = LUsymbolic->Mem_c + 4*ncomponents ;
01089 
01090   /* each of size ncomponents+1: */
01091   LUsymbolic->Cstart = LUsymbolic->Mem_c + 5*ncomponents ;
01092   LUsymbolic->Childp = LUsymbolic->Mem_c + 6*ncomponents + 1 ;
01093 
01094     }
01095     else
01096     {
01097   LUsymbolic->Mem_c  = NULL ;
01098   LUsymbolic->Child  = NULL ;
01099   LUsymbolic->Clnz   = NULL ;
01100   LUsymbolic->Cn     = NULL ;
01101   LUsymbolic->Cnz    = NULL ;
01102   LUsymbolic->Sched  = NULL ;
01103   LUsymbolic->Cstart = NULL ;
01104   LUsymbolic->Childp = NULL ;
01105     }
01106 
01107     LUsymbolic->n = n ;
01108     LUsymbolic->ncomponents = ncomponents ;
01109 
01110     if (cm->status != CHOLMOD_OK)
01111     {
01112         /* out of memory */
01113         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
01114   amesos_paraklete_free_symbolic (&LUsymbolic, Common) ;
01115     }
01116 
01117     return (LUsymbolic) ;
01118 }
01119 
01120 
01121 /* ========================================================================== */
01122 /* === paraklete_free_symbolic ============================================== */
01123 /* ========================================================================== */
01124 
01125 /* Free the symbolic object.  All processes own a copy after it is broadcast. */
01126 
01127 void amesos_paraklete_free_symbolic
01128 (
01129     paraklete_symbolic **LUsymbolicHandle,
01130     paraklete_common *Common
01131 )
01132 {
01133     paraklete_symbolic *LUsymbolic ;
01134     cholmod_common *cm ;
01135     Int n, ncomponents ;
01136 
01137     if (LUsymbolicHandle == NULL)
01138     {
01139   /* nothing to do */
01140   return ;
01141     }
01142     LUsymbolic = *LUsymbolicHandle ;
01143     if (LUsymbolic == NULL)
01144     {
01145   /* nothing to do */
01146   return ;
01147     }
01148 
01149     cm = &(Common->cm) ;
01150     ncomponents = LUsymbolic->ncomponents ;
01151     n = LUsymbolic->n ;
01152 
01153     /* size-3n space, in Mem_n */
01154     CHOLMOD (free) (3*n, sizeof (Int), LUsymbolic->Mem_n, cm) ;
01155     LUsymbolic->Cperm = NULL ; 
01156     LUsymbolic->RpermInv = NULL ; 
01157     LUsymbolic->Cparent = NULL ; 
01158 
01159     /* size-n space, only used for reanalyze/refactorize, or if the fill-
01160      * reducing ordering is unsymmetric.  Otherwise, Rperm is implicitly
01161      * equal to Cperm. */
01162     CHOLMOD (free) (n, sizeof (Int), LUsymbolic->Rperm, cm) ;
01163     LUsymbolic->Rperm = NULL ; 
01164 
01165     /* size-(7*components+2) space, in Mem_c */
01166     CHOLMOD (free) (7*ncomponents + 2, sizeof (Int), LUsymbolic->Mem_c, cm) ;
01167     LUsymbolic->Cstart = NULL ;
01168     LUsymbolic->Child = NULL ;
01169     LUsymbolic->Childp = NULL ;
01170     LUsymbolic->Clnz = NULL ;
01171     LUsymbolic->Cn = NULL ;
01172     LUsymbolic->Cnz = NULL ;
01173     LUsymbolic->Sched = NULL ;
01174 
01175     *LUsymbolicHandle = CHOLMOD (free) (
01176       1, sizeof (paraklete_symbolic), (*LUsymbolicHandle), cm) ;
01177 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines