Amesos Package Browser (Single Doxygen Collection) Development
amesos_paraklete_kernel.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === paraklete_kernel ===================================================== */
00003 /* ========================================================================== */
00004 
00005 #include "amesos_paraklete_decl.h"
00006 
00007 /* Sparse left-looking LU factorization, with partial pivoting.  Based on
00008  * Gilbert & Peierl's method, with a non-recursive DFS and with Eisenstat &
00009  * Liu's symmetric pruning.  No user-callable routines are in this file.
00010  *
00011  * PARAKLETE version 0.3: parallel sparse LU factorization.  Nov 13, 2007
00012  * Copyright (C) 2007, Univ. of Florida.  Author: Timothy A. Davis
00013  * See License.txt for the Version 2.1 of the GNU Lesser General Public License
00014  * http://www.cise.ufl.edu/research/sparse
00015  */
00016 
00017 #ifndef NDEBUG
00018 /* global variables for debugging only */
00019 static Int debug_k, debug_nfound, debug_n, debug_npiv ;
00020 #endif
00021 
00022 /* ========================================================================== */
00023 /* === dfs ================================================================== */
00024 /* ========================================================================== */
00025 
00026 /* Does a depth-first-search, starting at node j. */
00027 
00028 static Int dfs    /* returns new top of output stack */
00029 (
00030     /* input, not modified on output: */
00031     Int npiv,
00032     Int j,    /* node at which to start the DFS */
00033     Int mark,   /* mark value for Flag */
00034     Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or EMPTY if
00035        * row i is not yet pivotal.  Only used in phase 1. */
00036     Int Llen [ ],
00037     Int Lip [ ],
00038 
00039     Int phase1,   /* TRUE if in phase 1, FALSE if in phase 2 */
00040 
00041     /* workspace, not defined on input or output */
00042     Int Stack [ ],  /* size n */
00043 
00044     /* input/output: */
00045     Int Flag [ ], /* Flag [i] >= mark means i is marked */
00046     Int Lprune [ ], /* for symmetric pruning */
00047     Int top,    /* top of stack on input */
00048     double LU [ ],
00049     Int Li [ ],   /* resulting column of L */
00050     Int *plength,
00051 
00052     /* other, not defined on input or output */
00053     Int Pstack [ ]  /* keeps track of position in adj list during DFS */
00054 )
00055 {
00056     Int i, pos, jnew, head, llen ;
00057     Int *Lj ;
00058 
00059     llen = *plength ;
00060     head = 0 ;
00061     Stack [0] = j ;
00062     ASSERT (Flag [j] < mark) ;
00063 
00064     while (head >= 0)
00065     {
00066   /* In phase 1, j is in the original row index space of A.  In
00067    * phase 2, j is in the final pivotal order.  In both phases, jnew is
00068    * in the pivotal row order. */
00069   j = Stack [head] ;
00070   jnew = (phase1 && j < npiv) ? (Pinv [j]) : (j) ;
00071 
00072   ASSERT (IMPLIES ( phase1, jnew >= 0 && jnew < debug_k)) ;
00073   ASSERT (IMPLIES (!phase1, jnew >= 0 && jnew < debug_nfound)) ;
00074 
00075   if (Flag [j] < mark)      /* a node is not yet visited */
00076   {
00077       /* first time that j has been visited */
00078       Flag [j] = mark ;
00079       /* set Pstack [head] to one past the last entry in col j to scan */
00080       Pstack [head] = (Lprune [jnew] == EMPTY) ?
00081     (Llen [jnew]) : (Lprune [jnew]) ;
00082   }
00083 
00084   /* add the adjacent nodes to the recursive stack by iterating through
00085    * until finding another non-visited pivotal node */
00086   Lj = (Int *) (LU + Lip [jnew]) ;
00087   for (pos = --Pstack [head] ; pos > 0 ; --pos)
00088   {
00089       /* In phase 1, i is in the original row index space of A.  In
00090        * phase 2, i is in the final pivotal order. */
00091       i = Lj [pos] ;
00092 
00093       ASSERT (IMPLIES ( phase1, i >= 0 && i < debug_n)) ;
00094       ASSERT (IMPLIES (!phase1, i >= 0 && i < debug_nfound)) ;
00095 
00096       if (Flag [i] < mark)
00097       {
00098     /* node i is not yet visited */
00099     if (i < npiv && (!phase1 || Pinv [i] >= 0))
00100     {
00101         ASSERT (i < npiv) ;
00102         /* keep track of where we left off in the scan of the
00103          * adjacency list of node j so we can restart j where we
00104          * left off. */
00105         Pstack [head] = pos ;
00106 
00107         /* node i is pivotal; push it onto the recursive stack
00108          * and immediately break so we can recurse on node i. */
00109         Stack [++head] = i ;
00110         break ;
00111     }
00112     else
00113     {
00114         /* node i is not pivotal (no outgoing edges).
00115          * Flag as visited and store directly into L,
00116          * and continue with current node j.
00117          * This cannot occur during phase 2. */
00118         Flag [i] = mark ;
00119         Li [llen++] = i ;
00120     }
00121       }
00122   }
00123 
00124   if (pos == 0)
00125   {
00126       /* if all adjacent nodes of j are already visited, pop j from
00127        * recursive stack and push j onto output stack */
00128       head-- ;
00129       Stack [--top] = j ;
00130   }
00131     }
00132 
00133     *plength = llen ;
00134     return (top) ;      /* return top of output stack */
00135 }
00136 
00137 
00138 /* ========================================================================== */
00139 /* === lsolve_symbolic ====================================================== */
00140 /* ========================================================================== */
00141 
00142 /* Finds the pattern of x, for the solution of Lx=b */
00143 
00144 static void lsolve_symbolic
00145 (
00146     /* input, not modified on output: */
00147     Int n,              /* L is n-by-n, where n >= 0 */
00148     Int k,    /* kth step of factorization */
00149     Int mark,   /* mark for Flag */
00150     Int kcol,   /* b = A (:,kcol) */
00151     Int Ap [ ],
00152     Int Ai [ ],
00153     Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i
00154        * is not yet pivotal.  In phase 2, all rows are in
00155        * their final order, and Pinv is a complete inverse
00156        * permutation. */
00157 
00158     Int phase1,   /* TRUE if in phase 1, FALSE if in phase 2 */
00159     Int nfound,   /* used in phase 2 only */
00160     Int npiv,
00161 
00162     /* workspace, not defined on input or output */
00163     Int Stack [ ],  /* size n */
00164 
00165     /* workspace, defined on input and output */
00166     Int Flag [ ], /* size n.  Initially, all of Flag [0..n-1] < mark.
00167        * After lsolve_symbolic is done, Flag [i] == mark if i
00168        * is in the pattern of the output, and
00169        * Flag [0..n-1] <= mark. */
00170 
00171     /* other */
00172     Int Lprune [ ], /* for symmetric pruning */
00173     Int Pstack [ ], /* workspace used in dfs */
00174 
00175     /* TODO: comment this */
00176     double LU [ ],
00177     Int *plu,
00178     Int Llen [ ],
00179     Int Ulen [ ],
00180     Int Lip [ ],
00181     Int Uip [ ]
00182 )
00183 {
00184     Int i, p, p2, pend, top, llen, ulen, lup ;
00185     Int *Li, *Ui ;
00186 
00187     top = n ;
00188     llen = 0 ;
00189     lup = *plu ;
00190 
00191     if (phase1)
00192     {
00193   Lip [k] = lup ;
00194     }
00195     Li = (Int *) (LU + lup) ;
00196     pend = Ap [kcol+1] ;
00197 
00198     for (p = Ap [kcol] ; p < pend ; p++)
00199     {
00200   /* Ai is always in original row order, since it is never modified.
00201    * In phase 1, we need to leave i in its original row index space.
00202    * In phase 2, i needs to refer to the ith pivot row instead. */
00203   if (phase1)
00204   {
00205       i = Ai [p] ;
00206   }
00207   else
00208   {
00209       i = Ai [p] ;
00210       i = (i < npiv) ? Pinv [i] : i ;
00211       if (i >= nfound) continue ;
00212   }
00213 
00214   /* (i,k) is an entry in the block.  start a DFS at node i */
00215   if (Flag [i] < mark)
00216   {
00217       if (i < npiv && (!phase1 || Pinv [i] >= 0))
00218       {
00219     top = dfs (npiv, i, mark, Pinv, Llen, Lip, phase1, Stack, Flag,
00220          Lprune, top, LU, Li, &llen, Pstack) ;
00221       }
00222       else
00223       {
00224     /* i is not pivotal, and not flagged. Flag and put in L.
00225      * This cannot occur during phase 2. */
00226     Flag [i] = mark ;
00227     Li [llen++] = i ;
00228       }
00229   }
00230     }
00231 
00232     /* for phase 2, llen will be zero */
00233     if (phase1)
00234     {
00235   Llen [k] = llen ;
00236     }
00237     ASSERT (IMPLIES (!phase1, llen == 0)) ;
00238 
00239     /* advance LU pointer past the pattern and values of the new column of L */ 
00240     lup += ((llen + 1) / 2) + llen ;
00241 
00242     /* find the length of the column of U */
00243     ulen = n - top ;
00244     if (phase1)
00245     {
00246   /* add one for the pivot itself */
00247   ulen++ ;
00248     }
00249     Ulen [k] = ulen ;
00250     Ui = (Int *) (LU + lup) ;
00251     Uip [k] = lup ;
00252 
00253     /* advance LU pointer past the pattern and values of the new column of U */ 
00254     lup += ((ulen + 1) / 2) + ulen ;
00255 
00256     /* extract Stack [top..n-1] to Ui */
00257     for (p = top, p2 = 0 ; p < n ; p++, p2++)
00258     {
00259   Ui [p2] = Stack [p] ;
00260   ASSERT (IMPLIES (!phase1, Ui [p2] < debug_nfound)) ;
00261     }
00262 
00263     /* position the lu index at the starting point for next column */
00264     *plu = lup ;
00265 }
00266 
00267 
00268 /* ========================================================================== */
00269 /* === construct_column ===================================================== */
00270 /* ========================================================================== */
00271 
00272 /* Scatter the column of A into the workspace X */
00273 
00274 static void construct_column
00275 (
00276     /* inputs, not modified on output */
00277     Int kcol,     /* the column of A to construct */
00278     Int Ap [ ],
00279     Int Ai [ ],
00280     double Ax [ ],
00281     Int phase1,     /* phase 1: computing L1, L2 and U1.  phase 2: just U2 */
00282     Int nfound,     /* only used in phase 2 */
00283     Int npiv,
00284     Int Pinv [ ],   /* only used in phase 2 */
00285 
00286     /* zero on input, modified on output */
00287     double X [ ]
00288 )
00289 {
00290     Int p, pend, i ;
00291 
00292     pend = Ap [kcol+1] ;
00293     if (phase1)
00294     {
00295   /* scatter the entire column of A */
00296   for (p = Ap [kcol] ; p < pend ; p++)
00297   {
00298       X [Ai [p]] = Ax [p] ;
00299   }
00300     }
00301     else
00302     {
00303   /* scatter only the pivotal rows of A (for the U2 part only),
00304    * and permute to their final pivot order as well. */
00305   for (p = Ap [kcol] ; p < pend ; p++)
00306   {
00307       i = Ai [p] ;
00308       i = (i < npiv) ? Pinv [i] : i ;
00309       if (i < nfound)
00310       {
00311     X [i] = Ax [p] ;
00312       }
00313   }
00314     }
00315 }
00316 
00317 
00318 /* ========================================================================== */
00319 /* === lsolve_numeric ======================================================= */
00320 /* ========================================================================== */
00321 
00322 /* Computes the numerical values of x, for the solution of Lx=b.  Note that x
00323  * may include explicit zeros if numerical cancelation occurs.  L is assumed
00324  * to be unit-diagonal, with possibly unsorted columns (but the first entry in
00325  * the column must always be the diagonal entry). */
00326 
00327 static void lsolve_numeric
00328 (
00329     /* input, not modified on output: */
00330     Int npiv,
00331     Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i
00332        * is not yet pivotal.  */
00333 
00334     /* TODO comment this */
00335     double *LU,
00336     Int Uip [ ],
00337     Int Lip [ ],
00338     Int Ulen [ ],
00339     Int Llen [ ],
00340     Int k,
00341 
00342     Int phase1,
00343     Int Lphase_len [ ], /* phase 1: Llen, phase 2: L1_len */
00344 
00345     /* output, must be zero on input: */
00346     double X [ ]  /* size n, initially zero.  On output,
00347        * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]]
00348        * contains the solution. */
00349 )
00350 {
00351     double xj ;
00352     double *Lx ;
00353     Int p, s, j, jnew, llen, ulen ;
00354     Int *Ui, *Li ;  
00355 
00356     Ui = (Int *) (LU + Uip [k]) ;
00357     ulen = Ulen [k] ;
00358 
00359     if (phase1) ulen-- ;    /* skip the diagonal */
00360 
00361     /* solve Lx=b */
00362     for (s = 0 ; s < ulen ; s++)
00363     {
00364   /* forward solve with column j of L (skip the unit diagonal of L).
00365    * In phase 1, all of L1 and L2 is used.  Ui not yet in pivotal order.
00366    * In phase 2, only L1 is used.  Ui already in pivotal order. */
00367   j = Ui [s] ;
00368   jnew = (phase1 && j < npiv) ? (Pinv [j]) : j ;
00369 
00370   ASSERT (IMPLIES ( phase1, jnew >= 0 && jnew < debug_n)) ;
00371   ASSERT (IMPLIES (!phase1, jnew >= 0 && jnew < debug_nfound)) ;
00372 
00373   xj = X [j] ;
00374   GET_COLUMN (Lip, Llen, LU, jnew, Li, Lx, llen) ;
00375 
00376   /* phase 1: solve with entire column of L (L1 and L2).
00377    * phase 2: solve with L1 only */
00378   llen = Lphase_len [jnew] ;
00379 
00380   for (p = 1 ; p < llen ; p++)
00381   {
00382       ASSERT (IMPLIES (!phase1, Li [p] > j && Li [p] < debug_nfound)) ;
00383       X [Li [p]] -= Lx [p] * xj ;
00384   }
00385     }
00386 }
00387 
00388 
00389 /* ========================================================================== */
00390 /* === lsolve =============================================================== */
00391 /* ========================================================================== */
00392 
00393 /* Sparse symbolic and numeric solve of Lx=b to compute the kth column of L and
00394  * U.  In phase 1: L1, L2, and U1 are computed.  In phase 2: only U2 is
00395  * computed. */
00396 
00397 static void lsolve
00398 (
00399    Int phase1,
00400    Int nfound,
00401    Int npiv,
00402    Int n,
00403    Int k,
00404    Int kcol,
00405    Int Ap [ ],
00406    Int Ai [ ],
00407    double Ax [ ],
00408 
00409    double *LU,
00410    Int *lup,
00411    Int Lip [ ],
00412    Int Uip [ ],
00413    Int Llen [ ],
00414    Int Ulen [ ],
00415 
00416    Int Lphase_len [ ],
00417 
00418    Int Pinv [ ],
00419    Int Stack [ ],
00420    Int Lprune [ ],
00421    Int Pstack [ ],
00422    Int Flag [ ],
00423    Int mark,
00424    double X [ ]
00425 )
00426 {
00427     double *Ux ;
00428     Int i, p, ulen ;
00429     Int *Ui ;
00430 
00431     /* ---------------------------------------------------------------------- */
00432     /* compute the nonzero pattern of the kth column of L and U */
00433     /* ---------------------------------------------------------------------- */
00434 
00435     lsolve_symbolic (n, k, mark, kcol, Ap, Ai, Pinv, phase1, nfound, npiv,
00436       Stack, Flag, Lprune, Pstack, LU, lup, Llen, Ulen, Lip, Uip) ;
00437 
00438     /* ---------------------------------------------------------------------- */
00439     /* get the column of the matrix to factorize and scatter into X */
00440     /* ---------------------------------------------------------------------- */
00441 
00442     construct_column (kcol, Ap, Ai, Ax, phase1, nfound, npiv, Pinv, X) ;
00443 
00444     /* ---------------------------------------------------------------------- */
00445     /* compute the numerical values of the kth column (s = L \ A(:,kcol)) */
00446     /* ---------------------------------------------------------------------- */
00447 
00448     lsolve_numeric (npiv,
00449       Pinv, LU, Uip, Lip, Ulen, Llen, k, phase1, Lphase_len, X) ;
00450 
00451     /* --------------------------------------------------------------------- */
00452     /* finalize the kth column of U (pivot excluded) and clear X */
00453     /* --------------------------------------------------------------------- */
00454 
00455     GET_COLUMN (Uip, Ulen, LU, k, Ui, Ux, ulen) ;
00456 
00457     if (phase1)
00458     {
00459   /* phase 1: modify row indices in Ui to be in their final pivot order */
00460   ulen-- ;      /* skip the diagonal */
00461   for (p = 0 ; p < ulen ; p++)
00462   {
00463       i = Ui [p] ;
00464       ASSERT (i >= 0 && i < npiv) ;
00465       ASSERT (Pinv [i] >= 0 && Pinv [i] < npiv) ;
00466       Ui [p] = Pinv [i] ;
00467       Ux [p] = X [i] ;
00468       X [i] = 0 ;
00469   }
00470     }
00471     else
00472     {
00473   /* phase 2: Ui is already in pivotal order */
00474   for (p = 0 ; p < ulen ; p++)
00475   {
00476       i = Ui [p] ;
00477       ASSERT (i >= 0 && i < nfound) ;
00478       Ux [p] = X [i] ;
00479       X [i] = 0 ;
00480   }
00481     }
00482 }
00483 
00484 
00485 /* ========================================================================== */
00486 /* === lpivot =============================================================== */
00487 /* ========================================================================== */
00488 
00489 /* Find a pivot via threshold partial pivoting, and scale the column of L. 
00490  * This routine is active during phase 1 only.  Returns TRUE if pivot found,
00491  * FALSE otherwise. */
00492 
00493 static Int lpivot
00494 (
00495     Int diagrow,
00496     Int *p_pivrow,
00497     double *p_pivot,
00498     double *p_abs_pivot,
00499     double tol_diag,
00500     double tol_offdiag,
00501     double X [ ],
00502     double *LU,
00503     Int Lip [ ],
00504     Int Llen [ ],
00505     Int k,
00506     Int npiv
00507 )
00508 {
00509     double x, pivot, abs_pivot, max_entry ;
00510     double *Lx ;
00511     Int p, i, ppivrow, pdiag, pivrow, pivot_found, llen ;
00512     Int *Li ;
00513 
00514     /* ---------------------------------------------------------------------- */
00515     /* look in Li [0 ... Llen [k] - 1 ] for a pivot row */
00516     /* ---------------------------------------------------------------------- */
00517 
00518     GET_COLUMN (Lip, Llen, LU, k, Li, Lx, llen) ;
00519 
00520     pdiag = EMPTY ;
00521     abs_pivot = -1 ;
00522     max_entry = -1 ;
00523     ppivrow = EMPTY ;
00524 
00525     for (p = 0 ; p < llen ; p++)
00526     {
00527   /* gather the entry from X and store in L */
00528   i = Li [p] ;
00529   x = X [i] ;
00530   X [i] = 0 ;
00531 
00532   Lx [p] = x ;
00533   x = fabs (x) ;
00534 
00535   /* find the diagonal */
00536   if (i == diagrow)
00537   {
00538       pdiag = p ;
00539   }
00540 
00541   /* find the partial-pivoting choice (constrained to rows 0..npiv-1) */
00542   if (x > abs_pivot && i < npiv)
00543   {
00544       abs_pivot = x ;
00545       ppivrow = p ;
00546   }
00547 
00548   /* find the max entry in this column (any row) */
00549   max_entry = MAX (max_entry, x) ;
00550     }
00551 
00552     /* compare the diagonal with the largest entry */
00553     if (pdiag != EMPTY)
00554     {
00555   x = fabs (Lx [pdiag]) ;
00556   if (x >= tol_diag * max_entry)
00557   {
00558       /* the diagonal is large enough */
00559       abs_pivot = x ;
00560       ppivrow = pdiag ;
00561   }
00562   else
00563   {
00564       /* diagonal entry is too small, discard it */
00565       pdiag = EMPTY ;
00566   }
00567     }
00568 
00569     /* if diagonal not found or not chosen, try for an off-diagonal pivot */
00570     if (pdiag == EMPTY && ppivrow != EMPTY)
00571     {
00572   /* no diagonal pivot.  Try to pick the off-diagonal pivot */
00573   if (abs_pivot < tol_offdiag * max_entry)
00574   {
00575       /* off-diagonal pivot is too small, discard it */
00576       ppivrow = EMPTY ;
00577   }
00578     }
00579 
00580     pivot_found = (ppivrow != EMPTY && abs_pivot > 0) ;
00581 
00582     if (pivot_found)
00583     {
00584   /* swap pivot row to first entry in column k of L */
00585   pivrow = Li [ppivrow] ;
00586   pivot  = Lx [ppivrow] ;
00587   Li [ppivrow] = Li [0] ;
00588   Lx [ppivrow] = Lx [0] ;
00589   Li [0] = pivrow ;
00590   Lx [0] = 1.0 ;
00591 
00592   ASSERT (pivrow >= 0 && pivrow < npiv) ;
00593   ASSERT (pivot != 0) ;
00594 
00595   /* divide L by the pivot value */
00596   for (p = 1 ; p < Llen [k] ; p++)
00597   {
00598       Lx [p] /= pivot ;
00599   }
00600 
00601   *p_pivrow = pivrow ;
00602   *p_pivot = pivot ;
00603   *p_abs_pivot = abs_pivot ;
00604     }
00605 
00606     return (pivot_found) ;
00607 }
00608 
00609 
00610 /* ========================================================================== */
00611 /* === prune ================================================================ */
00612 /* ========================================================================== */
00613 
00614 /* Prune the columns of L to reduce work in subsequent depth-first searches.
00615  * This routine is not active during phase 2. */
00616 
00617 static void prune
00618 (
00619     /* TODO comment this */
00620     Int npiv,
00621     Int Lprune [ ],
00622     Int Pinv [ ],
00623     Int k,
00624     Int pivrow,
00625     double *LU,
00626     Int Uip [ ],
00627     Int Lip [ ],
00628     Int Ulen [ ],
00629     Int Llen [ ]
00630 )
00631 {
00632     double x ;
00633     double *Lx, *Ux ;
00634     Int p, i, j, p2, phead, ptail, ulen, llen ;
00635     Int *Li, *Ui ;
00636 
00637     /* ---------------------------------------------------------------------- */
00638     /* check to see if any column of L can be pruned */
00639     /* ---------------------------------------------------------------------- */
00640 
00641     GET_COLUMN (Uip, Ulen, LU, k, Ui, Ux, ulen) ;
00642     ulen-- ;  /* skip the diagonal entry */
00643 
00644     for (p = 0 ; p < ulen ; p++)
00645     {
00646   j = Ui [p] ;
00647   ASSERT (j >= 0 && j < k) ;
00648   if (Lprune [j] == EMPTY)
00649   {
00650       /* scan column j of L for the pivot row */
00651       GET_COLUMN (Lip, Llen, LU, j, Li, Lx, llen) ;
00652 
00653       for (p2 = 0 ; p2 < Llen [j] ; p2++)
00654       {
00655     if (pivrow == Li [p2])
00656     {
00657         /* found it!  This column can be pruned.
00658          * partition column j of L.  The unit diagonal of L
00659          * remains as the first entry in the column of L. */
00660         phead = 0 ;
00661         ptail = Llen [j] ;
00662         while (phead < ptail)
00663         {
00664       i = Li [phead] ;
00665       if (i < npiv && Pinv [i] >= 0)
00666       {
00667           /* leave at the head */
00668           phead++ ;
00669       }
00670       else
00671       {
00672           /* swap with the tail */
00673           ptail-- ;
00674           Li [phead] = Li [ptail] ;
00675           Li [ptail] = i ;
00676           x = Lx [phead] ;
00677           Lx [phead] = Lx [ptail] ;
00678           Lx [ptail] = x ;
00679       }
00680         }
00681 
00682         /* set Lprune to one past the last entry in the
00683          * first part of the column of L.  Entries in
00684          * Li [0 ... Lprune [j]-1] are the only part of
00685          * column j of L that needs to be scanned in the DFS.
00686          * Lprune [j] was EMPTY; setting it >= 0 also flags
00687          * column j as pruned. */
00688         Lprune [j] = ptail ;
00689         break ;
00690     }
00691       }
00692   }
00693     }
00694 }
00695 
00696 
00697 /* ========================================================================== */
00698 /* === paraklete_kernel ===================================================== */
00699 /* ========================================================================== */
00700 
00701 /* Factorize P*A*Q into L*U+S.  Returns TRUE if successful, FALSE if out of
00702  * memory.  If memory runs out, the factors of this node (LUnode) are not
00703  * freed; that is done by the caller.   If the matrix is singular, the
00704  * Schur complement (S) has a larger dimension than expected. */
00705 
00706 Int amesos_paraklete_kernel
00707 (
00708     cholmod_sparse *A,
00709     paraklete_node *LUnode,
00710     paraklete_common *Common
00711 )
00712 { 
00713     cholmod_common *cm ;
00714     double pivot, abs_pivot, umin, umax, ujk, x, tol_diag, tol_offdiag, growth ;
00715     double *Lx, *Ux, *LU, *S, *Sx, *Ax, *X ;
00716     Int *Li, *Ui, *Si, *Qinv, *L1_len, *Ap, *Ai, *Llen, *Ulen, *Lip, *Uip, *P,
00717   *Q, *Pinv, *Sip, *Slen, *Flag, *Queue, *Iwork, *Stack, *Pstack,
00718   *Lprune ;
00719     Int k, p, i, j, pivrow, kbar, diagrow, noffdiag, lup, mark, unpruned,
00720   phead, ptail, sp, slen, llen, ulen, p2, pend, ngrow, qhead, qtail, sn,
00721   found, kcol, second_try, nfound, n, npiv, lnz, unz, snz, lup_orig ;
00722     size_t lusize, ssize ;
00723 
00724     /* ---------------------------------------------------------------------- */
00725     /* get inputs */
00726     /* ---------------------------------------------------------------------- */
00727 
00728     cm = &(Common->cm) ;  /* CHOLMOD Common */
00729 
00730     /* Matrix to factorize */
00731     n = A->nrow ;   /* A is n-by-n */
00732     Ap = A->p ;     /* size n+1, column pointers for A */
00733     Ai = A->i ;     /* size nz = Ap [n], row indices for A */
00734     Ax = A->x ;     /* size nz, values of A */
00735 
00736     /* LU factors of this node */
00737     Llen = LUnode->llen ; /* size n, column length of L */ 
00738     Ulen = LUnode->ulen ; /* size n, column length of U */
00739     Lip = LUnode->lp ;    /* size n, column pointers for L */
00740     Uip = LUnode->up ;    /* size n, column pointers for U */
00741     LU = LUnode->ix ;   /* size LU->size on input and output */
00742     lusize = LUnode->lusize ; /* initial and final size of LU */
00743     npiv = LUnode->PK_NPIV  ; /* number of pivots to find */
00744 
00745     P = LUnode->Plocal ;  /* size npiv, row permutation, where P [k] = i
00746          * if row i is the kth pivot row. */
00747     Q = LUnode->Qlocal ;  /* size npiv, column permutation */
00748     Pinv = LUnode->Pinv ; /* size npiv, inverse row permutation, where
00749          * Pinv [i] = k if row i is the kth pivot row */
00750     Qinv = LUnode->Qinv ; /* size npiv, inverse of Q */
00751 
00752     tol_diag = Common->tol_diag ;     /* partial pivoting tolerance */
00753     tol_offdiag = Common->tol_offdiag ;    /* partial pivoting tolerance */
00754     growth = Common->growth ;       /* memory growth factor */
00755 
00756     /* ---------------------------------------------------------------------- */
00757     /* get workspace */
00758     /* ---------------------------------------------------------------------- */
00759 
00760     CHOLMOD (allocate_work) (n, 3*n, n, cm) ;
00761     if (cm->status != CHOLMOD_OK)
00762     {
00763   /* out of memory */
00764   return (FALSE) ;
00765     }
00766 
00767     /* workspace, not defined on input or output: */
00768     Flag = cm->Flag ;   /* size n */
00769     Queue = cm->Head ;    /* size n+1 */
00770     Iwork = cm->Iwork ;
00771     Stack  = Iwork ;    /* size n */
00772     Pstack = Iwork + n ;  /* size n */
00773     Lprune = Iwork + 2*n ;  /* size n */
00774 
00775     /* workspace, not defined on input */
00776     X = cm->Xwork ;   /* size n, undefined on input, zero on output */
00777 
00778     /* ====================================================================== */
00779     /* PHASE 1: compute L1, L2, and U1 ====================================== */
00780     /* ====================================================================== */
00781 
00782     /* ---------------------------------------------------------------------- */
00783     /* initializations */
00784     /* ---------------------------------------------------------------------- */
00785 
00786     npiv = MAX (0, MIN (npiv, n)) ;
00787     nfound = 0 ;
00788 
00789     DEBUG (debug_n = n) ;
00790     DEBUG (debug_npiv = npiv) ;
00791 
00792     lnz = 0 ;
00793     unz = 0 ;
00794     snz = 0 ;
00795 
00796     umin = 0 ;
00797     umax = 0 ;
00798     noffdiag = 0 ;
00799     lup = 0 ;
00800     LUnode->nrealloc = 0 ;
00801     for (k = 0 ; k < n ; k++)
00802     {
00803   X [k] = 0 ;
00804   Flag [k] = EMPTY ;  /* flag k as unmarked */
00805   Lprune [k] = EMPTY ;  /* flag k as not pruned */
00806     }
00807     mark = 0 ;
00808 
00809     ngrow = 
00810   (n+1)   /* reals, in L and U (both diag L and U are stored) */
00811   + (n+1)/2 /* ints */
00812   + 2 ;   /* Int to real may be rounded up */
00813 
00814   /*
00815   (3*n/2) + 2 ;
00816   */
00817 
00818     /* ---------------------------------------------------------------------- */
00819     /* mark all rows as non-pivotal and determine initial diagonal mapping */
00820     /* ---------------------------------------------------------------------- */
00821 
00822     /* initial Q is identity, so the diagonal entries are the natural ones */
00823     for (k = 0 ; k < npiv ; k++)
00824     {
00825   P [k] = k ;
00826   Pinv [k] = FLIP (k) ; /* mark all candidiate rows as non-pivotal */
00827     }
00828 
00829     /* (P [k] = row) means that (UNFLIP (Pinv [row]) = k), and visa versa.
00830      * If row is pivotal, then Pinv [row] >= 0.  A row is initially "flipped"
00831      * (Pinv [k] < EMPTY), and then marked "unflipped" when it becomes
00832      * pivotal. */
00833 
00834     /* ---------------------------------------------------------------------- */
00835     /* place all potential pivot columns in the candidate column queue */
00836     /* ---------------------------------------------------------------------- */
00837 
00838     for (k = 0 ; k < npiv ; k++)
00839     {
00840   Queue [k] = k ;
00841     }
00842     qhead = 0 ;
00843     qtail = npiv ;
00844 
00845     /* ---------------------------------------------------------------------- */
00846     /* factorize up to npiv columns */
00847     /* ---------------------------------------------------------------------- */
00848 
00849     for (k = 0 ; k < npiv ; k++)
00850     {
00851 
00852   PR1 ((Common->file, "\n\n==========================L,U1:  k: "ID"\n", k));
00853   DEBUG (debug_k = k) ;
00854 
00855   /* ------------------------------------------------------------------ */
00856   /* determine if LU factors have grown too big */
00857   /* ------------------------------------------------------------------ */
00858 
00859         /* LU can grow by at most 3n/2 + 2 entries if the column is dense */
00860   if (lup + ngrow > (Int) lusize)
00861   {
00862       size_t new = (growth * lusize + ngrow + 2) ;
00863       PR1 ((Common->file, "growing LU from "ID" to "ID"\n", lusize, new)) ;
00864       LU = CHOLMOD (realloc) (new, sizeof (double), LU, &lusize, cm) ;
00865       LUnode->ix = LU ;
00866       LUnode->lusize = lusize ;
00867       if (cm->status != CHOLMOD_OK)
00868       {
00869                 /* TODO return FALSE, and broadcast error to all processes */
00870                 PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
00871     return (FALSE) ;
00872       }
00873   }
00874 
00875   /* ------------------------------------------------------------------ */
00876   /* find a good pivot column, if possible */
00877   /* ------------------------------------------------------------------ */
00878 
00879   found = FALSE ;
00880   diagrow = EMPTY ;
00881   kcol = EMPTY ;
00882   while (!found && qhead != qtail)
00883   {
00884 
00885       /* -------------------------------------------------------------- */
00886       /* grab a pivot column candidate from the head of the queue */
00887       /* -------------------------------------------------------------- */
00888 
00889       kcol = Queue [qhead] ;
00890       qhead = (qhead + 1) % (npiv + 1) ;
00891       second_try = (kcol < 0) ;
00892       kcol = UNFLIP (kcol) ;
00893 
00894       /* -------------------------------------------------------------- */
00895       /* s = L \ A (:, kcol) ; store result in L(:,k) and U(:,k)  */
00896       /* -------------------------------------------------------------- */
00897 
00898       lup_orig = lup ;
00899       lsolve (TRUE, nfound, npiv,
00900     n, k, kcol, Ap, Ai, Ax, LU, &lup, Lip, Uip,
00901     Llen, Ulen, Llen, Pinv, Stack, Lprune, Pstack, Flag, mark, X) ;
00902 
00903       /* -------------------------------------------------------------- */
00904       /* partial pivoting with diagonal preference */
00905       /* -------------------------------------------------------------- */
00906 
00907       /* determine what the "diagonal" is */
00908       diagrow = P [k] ;   /* might already be pivotal */
00909       PR1 ((Common->file, "k "ID", diagrow = "ID", UNFLIP(diagrow) = "ID"\n",
00910     k, diagrow, UNFLIP (diagrow))) ;
00911 
00912       /* find a pivot and scale the pivot column */
00913       found = lpivot (diagrow, &pivrow, &pivot, &abs_pivot, tol_diag,
00914         tol_offdiag, X, LU, Lip, Llen, k, npiv) ;
00915 
00916       if (!found)
00917       {
00918     /* pivot column candidate failed.  If this was already its
00919      * second chance, discard it.  Otherwise, flag it and put it
00920      * at the end of the queue.  The kth column of L and U 
00921      * computed by lsolve, above, is discarded. */
00922     if (!second_try)
00923     {
00924         PR1 ((Common->file, "requeue kcol "ID" for 2nd try\n", kcol));
00925         Queue [qtail] = FLIP (kcol) ;
00926         qtail = (qtail + 1) % (npiv + 1) ;
00927     }
00928     else
00929     {
00930         PR1 ((Common->file, "discarding kcol "ID"\n", kcol)) ;
00931     }
00932     PR1 ((Common->file, "restore lup, "ID" to "ID"\n", lup_orig, lup)) ;
00933     lup = lup_orig ;
00934       }
00935       else
00936       {
00937     /* we found a pivot column */
00938     Q [nfound++] = kcol ;
00939     PR1 ((Common->file,"found kcol: "ID" nfound "ID"\n", kcol, nfound));
00940       }
00941 
00942       /* clear the flag array */
00943       mark++ ;
00944   }
00945 
00946   DEBUG (debug_nfound = nfound) ;
00947 
00948   if (!found)
00949   {
00950       /* No more pivots to be found. Go to phase 2 to compute U2 */
00951       break ;
00952   }
00953 
00954   /* ------------------------------------------------------------------ */
00955   /* we now have a valid pivot row and column */
00956   /* ------------------------------------------------------------------ */
00957 
00958   PR1 ((Common->file, "\nk "ID": pivcol "ID" pivrow "ID": %g\n",
00959       k, kcol, pivrow, pivot)) ;
00960   ASSERT (pivrow >= 0 && pivrow < npiv) ;
00961   ASSERT (Pinv [pivrow] < 0) ;
00962 
00963   /* ------------------------------------------------------------------ */
00964   /* keep track of the smallest and largest entry on the diagonal of U */
00965   /* ------------------------------------------------------------------ */
00966 
00967   if (k == 0)
00968   {
00969       umin = abs_pivot ;
00970       umax = abs_pivot ;
00971   }
00972   else if (abs_pivot < umin)
00973   {
00974       umin = abs_pivot ;
00975   }
00976   else if (abs_pivot > umax)
00977   {
00978       umax = abs_pivot ;
00979   }
00980 
00981   /* ------------------------------------------------------------------ */
00982   /* copy the pivot value as the last entry in the column of U */
00983   /* ------------------------------------------------------------------ */
00984 
00985   GET_COLUMN (Uip, Ulen, LU, k, Ui, Ux, ulen) ;
00986   PR1 ((Common->file, "k "ID" Ulen[k] "ID"\n", k, Ulen [k])) ;
00987   ASSERT (ulen > 0) ;
00988   Ui [ulen-1] = k ;
00989   Ux [ulen-1] = pivot ;
00990   PR1 ((Common->file, "pivot: Ui "ID" Ux %g\n", Ui [ulen-1], Ux [ulen-1])) ;
00991 
00992   /* ------------------------------------------------------------------ */
00993   /* log the pivot permutation */
00994   /* ------------------------------------------------------------------ */
00995 
00996   DEBUG (kbar = UNFLIP (Pinv [diagrow])) ;
00997   ASSERT (kbar < n) ;
00998   ASSERT (P [kbar] == diagrow) ;
00999 
01000   if (pivrow != diagrow)
01001   {
01002       /* an off-diagonal pivot has been chosen */
01003       noffdiag++ ;
01004             /*
01005             printf ("pivrow "ID" k "ID" noffdiagn "ID"\n", pivrow, k, noffdiag) ;
01006             */
01007       PR1 ((Common->file,">>>>>>>>>>>>>>>> pivrow "ID" k "ID" off-diagonal\n",
01008       pivrow, k)) ;
01009       if (Pinv [diagrow] < 0)
01010       {
01011     /* the former diagonal row index, diagrow, has not yet been
01012      * chosen as a pivot row.  Log this diagrow as the "diagonal"
01013      * entry in the column kbar for which the chosen pivot row,
01014      * pivrow, was originally logged as the "diagonal" */
01015     kbar = FLIP (Pinv [pivrow]) ;
01016     P [kbar] = diagrow ;
01017     Pinv [diagrow] = FLIP (kbar) ;
01018       }
01019   }
01020   P [k] = pivrow ;
01021   Pinv [pivrow] = k ;
01022 
01023 #ifndef NDEBUG
01024   for (p = 0 ; p < ulen ; p++)
01025   {
01026       PR2 ((Common->file,"Column "ID" of U: "ID": %g\n", k, Ui [p], Ux [p])) ;
01027   }
01028   GET_COLUMN (Lip, Llen, LU, k, Li, Lx, llen) ;
01029   for (p = 0 ; p < llen ; p++)
01030   {
01031       PR2 ((Common->file,"Column "ID" of L: "ID": %g\n", k, Li [p], Lx [p])) ;
01032   }
01033 #endif
01034 
01035   /* ------------------------------------------------------------------ */
01036   /* symmetric pruning */
01037   /* ------------------------------------------------------------------ */
01038 
01039   prune (npiv, Lprune, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ;
01040 
01041   lnz += Llen [k] ;
01042   unz += Ulen [k] ;
01043     }
01044 
01045     LUnode->noffdiag = noffdiag ; /* # of off-diagonal pivots chosen */
01046     LUnode->umin = umin ;   /* smallest entry on diagonal of U */
01047     LUnode->umax = umax ;   /* largest entry on the diagonal of U */
01048     LUnode->PK_NFOUND = nfound ;    /* number of pivots found */
01049     LUnode->lnz = lnz ;     /* nnz in L */
01050 
01051     PR1 ((Common->file, "noffdiag "ID"\n", noffdiag)) ;
01052     PR1 ((Common->file, "umin %g umax %g condest %g\n", umin, umax, umax/umin));
01053 
01054     /* ====================================================================== */
01055     /* PHASE 2: compute U2 and S ============================================ */
01056     /* ====================================================================== */
01057 
01058     /* ---------------------------------------------------------------------- */
01059     /* finalize the row and column permutations */
01060     /* ---------------------------------------------------------------------- */
01061 
01062     DEBUG (for (i = 0 ; i < n ; i++) ASSERT (Flag [i] < mark)) ;
01063 
01064     k = nfound ;
01065     for (i = 0 ; i < npiv ; i++)
01066     {
01067   if (Pinv [i] < 0)
01068   {
01069       /* a non-pivotal row */
01070       P [k] = i ;
01071       Pinv [i] = k++ ;
01072   }
01073     }
01074 
01075     for (j = 0 ; j < npiv ; j++)
01076     {
01077   Qinv [j] = EMPTY ;
01078     }
01079     for (k = 0 ; k < nfound ; k++)
01080     {
01081   PR2 ((Common->file, "k "ID" Q [k] "ID" npiv "ID"\n", k, Q [k], npiv)) ;
01082   ASSERT (Q [k] >= 0 && Q [k] < npiv) ;
01083   Qinv [Q [k]] = k ;
01084     }
01085 
01086     k = nfound ;
01087     for (j = 0 ; j < npiv ; j++)
01088     {
01089   if (Qinv [j] == EMPTY)
01090   {
01091       /* a non-pivotal column */
01092       Q [k] = j ;
01093       Qinv [j] = k++ ;
01094   }
01095     }
01096     ASSERT (k == npiv) ;
01097 
01098 #ifndef NDEBUG
01099     for (i = 0 ; i < npiv ; i++)
01100     {
01101   if (i == nfound) PR1 ((Common->file,"-------- nfound = "ID"\n", nfound)) ;
01102   if (i == npiv  ) PR1 ((Common->file,"-------- npiv   = "ID"\n", npiv  )) ;
01103   PR2 ((Common->file,"P["ID"] = "ID" Pinv["ID"] = "ID"\n", i, P[i], i, Pinv[i])) ;
01104   PR2 ((Common->file,"Q["ID"] = "ID" Qinv["ID"] = "ID"\n", i, Q[i], i, Qinv[i])) ;
01105     }
01106 #endif
01107 
01108     /* ---------------------------------------------------------------------- */
01109     /* partition the columns of L into L1 (pivotal) and L2 (non-pivotal) */
01110     /* ---------------------------------------------------------------------- */
01111 
01112     /* use Queue as workspace for L1_len */
01113     L1_len = Queue ;
01114 
01115     for (j = 0 ; j < nfound ; j++)
01116     {
01117   GET_COLUMN (Lip, Llen, LU, j, Li, Lx, llen) ;
01118 
01119   /* change all row indices in L1 and L2 to final row indices */
01120   for (p = 0 ; p < llen ; p++) 
01121   {
01122       i = Li [p] ;
01123       if (i < npiv)
01124       {
01125     Li [p] = Pinv [i] ;
01126       }
01127   }
01128 
01129   unpruned = (Lprune [j] == EMPTY) ;
01130 
01131   phead = (unpruned) ? 1 : (Lprune [j]) ;
01132   ptail = llen ;
01133 
01134 #ifndef NDEBUG
01135   /* any pruned part of the column is already pivotal */
01136   for (p = 0 ; p < phead ; p++)
01137   {
01138       ASSERT (Li [p] < nfound) ;
01139   }
01140 #endif
01141 
01142   /* partition row indices in L1 (< nfound) and L2 (>= nfound) */
01143   while (phead < ptail)
01144   {
01145       i = Li [phead] ;
01146       if (i < nfound)
01147       {
01148     /* leave at the head */
01149     phead++ ;
01150       }
01151       else
01152       {
01153     /* swap with the tail */
01154     ptail-- ;
01155     Li [phead] = Li [ptail] ;
01156     Li [ptail] = i ;
01157     x = Lx [phead] ;
01158     Lx [phead] = Lx [ptail] ;
01159     Lx [ptail] = x ;
01160       }
01161   }
01162 
01163   L1_len [j] = ptail ;
01164 
01165   /* prune the column for the next lsolve */
01166   if (unpruned)
01167   {
01168       Lprune [j] = L1_len [j] ;
01169   }
01170 
01171 #ifndef NDEBUG
01172   /* the column is now partitioned */
01173   for (p = 0 ; p < Lprune [j] ; p++)
01174   {
01175       ASSERT (Li [p] < nfound) ;
01176   }
01177   ASSERT (Lprune [j] <= L1_len [j]) ;
01178   for ( ; p < L1_len [j] ; p++)
01179   {
01180       ASSERT (Li [p] < nfound) ;
01181   }
01182   for ( ; p < Llen [j] ; p++)
01183   {
01184       ASSERT (Li [p] >= nfound) ;
01185   }
01186 #endif
01187 
01188     }
01189 
01190     /* ---------------------------------------------------------------------- */
01191     /* compute the U2 block, U2 = L1 \ A12 */
01192     /* ---------------------------------------------------------------------- */
01193 
01194     ngrow = 
01195   (nfound+1)  /* reals, in L and U (both diag L and U are stored) */
01196   + (nfound+1)/2  /* ints */
01197   + 2 ;   /* Int to real may be rounded up */
01198 
01199     for (k = nfound ; k < n ; k++)
01200     {
01201   PR1 ((Common->file, "\n\n=============================U2: k: "ID"\n", k));
01202   DEBUG (debug_k = k) ;
01203 
01204   /* ------------------------------------------------------------------ */
01205   /* determine if LU factors have grown too big */
01206   /* ------------------------------------------------------------------ */
01207 
01208         /* LU can grow by at most 3*nfound/2+2 entries if the column is dense */
01209   if (lup + ngrow > (Int) lusize)
01210   {
01211       size_t new = (growth * lusize + ngrow + 2) ;
01212       LU = CHOLMOD (realloc) (new, sizeof (double), LU, &lusize, cm) ;
01213       LUnode->ix = LU ;
01214       LUnode->lusize = lusize ;
01215       if (cm->status != CHOLMOD_OK)
01216       {
01217                 /* TODO return FALSE, and broadcast error to all processes */
01218                 PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
01219     return (FALSE) ;
01220       }
01221   }
01222 
01223   /* ------------------------------------------------------------------ */
01224   /* determine which column of A this is */
01225   /* ------------------------------------------------------------------ */
01226 
01227   kcol = (k < npiv) ? Q [k] : k ;
01228 
01229   /* ------------------------------------------------------------------ */
01230   /* s = L \ A (:, kcol) and store result in U(:,k)  */
01231   /* ------------------------------------------------------------------ */
01232 
01233   lsolve (FALSE, nfound, npiv,
01234       n, k, kcol, Ap, Ai, Ax, LU, &lup, Lip, Uip,
01235       Llen, Ulen, L1_len, Pinv, Stack, Lprune, Pstack, Flag, mark, X) ;
01236 
01237   PR2 ((Common->file, "lup is now "ID"\n", lup)) ;
01238 
01239 #ifndef NDEBUG
01240   GET_COLUMN (Uip, Ulen, LU, k, Ui, Ux, ulen) ;
01241   for (p = 0 ; p < ulen ; p++)
01242   {
01243       PR2 ((Common->file, "Column "ID" of U2: "ID": %g\n", k, Ui[p], Ux[p])) ;
01244   }
01245 #endif
01246 
01247   unz += Ulen [k] ;
01248 
01249   /* clear the flag array */
01250   mark++ ;
01251     }
01252 
01253     LUnode->unz = unz ;     /* nnz in U */
01254 
01255     /* ---------------------------------------------------------------------- */
01256     /* shrink the LU factors to just the required size */
01257     /* ---------------------------------------------------------------------- */
01258 
01259     LU = CHOLMOD (realloc) (lup, sizeof (double), LU, &lusize, cm) ;
01260     LUnode->ix = LU ; 
01261     LUnode->lusize = lusize ;
01262     ASSERT (cm->status == CHOLMOD_OK) ;
01263 
01264     /* ---------------------------------------------------------------------- */
01265     /* compute the Schur complement, S = A22 - L2*U2 */
01266     /* ---------------------------------------------------------------------- */
01267 
01268     sn = n - nfound ;
01269     ssize = lusize ;
01270     LUnode->PK_SSIZE = ssize ;
01271     PR1 ((Common->file, "allocate S, : ssize "ID" sn "ID" \n", ssize, sn)) ;
01272 
01273     S = CHOLMOD (malloc) (ssize, sizeof (double), cm) ;
01274     Sip = CHOLMOD (malloc) (sn, sizeof (Int), cm) ;
01275     Slen = CHOLMOD (malloc) (sn, sizeof (Int), cm) ;
01276     LUnode->sx = S ;
01277     LUnode->sp = Sip ;
01278     LUnode->slen = Slen ;
01279     LUnode->PK_SSIZE = ssize ; 
01280     LUnode->PK_SN = sn ;
01281     if (cm->status != CHOLMOD_OK)
01282     {
01283         /* TODO return FALSE, and broadcast error to all processes */
01284         PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
01285   return (FALSE) ;
01286     }
01287 
01288     sp = 0 ;
01289     ngrow = (3 * (n - nfound) / 2) + 2 ;
01290 
01291     /* S is square, of order n-nfound, with rows/cols in range 0 : n-nfound-1 */
01292 
01293     for (k = nfound ; k < n ; k++)
01294     {
01295   PR2 ((Common->file, "\n\n========================== S:  k: "ID" sk: "ID"\n",
01296         k, k-nfound)) ;
01297   DEBUG (for (i = 0 ; i < n ; i++) ASSERT (Flag [i] < mark)) ;
01298 
01299   /* ------------------------------------------------------------------ */
01300   /* determine if the Schur complement needs to grow */
01301   /* ------------------------------------------------------------------ */
01302 
01303   if (sp + ngrow > (Int) ssize)
01304   {
01305       size_t new = (growth * ssize + ngrow + 2) ;
01306       PR1 ((Common->file, "proc "ID" growing S from "ID" to "ID"\n",
01307     Common->myid, ssize, new)) ;
01308       S = CHOLMOD (realloc) (new, sizeof (double), S, &ssize, cm) ;
01309       LUnode->sx = S ;
01310       LUnode->PK_SSIZE = ssize ;
01311       if (cm->status != CHOLMOD_OK)
01312       {
01313                 /* TODO return FALSE, and broadcast error to all processes */
01314                 PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
01315     return (FALSE) ;
01316       }
01317   }
01318 
01319   /* ------------------------------------------------------------------ */
01320   /* compute the kth column of S */
01321   /* ------------------------------------------------------------------ */
01322 
01323   Sip [k - nfound] = sp ;
01324   Si = (Int *) (S + sp) ;
01325 
01326   /* ------------------------------------------------------------------ */
01327   /* scatter the kth column of A*Q (namely, A(:,kcol)) into X */
01328   /* ------------------------------------------------------------------ */
01329 
01330   kcol = (k < npiv) ? Q [k] : k ;
01331   slen = 0 ;
01332   DEBUG (for (i = 0 ; i < n ; i++) ASSERT (X [i] == 0)) ;
01333 
01334   /* scatter only the non-pivotal rows of A (for the S part only) */
01335   pend = Ap [kcol+1] ;
01336   for (p = Ap [kcol] ; p < pend ; p++)
01337   {
01338       i = Ai [p] ;
01339       i = (i < npiv) ? Pinv [i] : i ;
01340       if (i >= nfound)
01341       {
01342     PR3 ((Common->file, "S: Entry in A: "ID" %g\n", i, Ax [p])) ;
01343     X [i] = Ax [p] ;
01344     Flag [i] = mark ;
01345     Si [slen++] = i - nfound ;
01346       }
01347   }
01348 
01349   /* ------------------------------------------------------------------ */
01350   /* X = X - L2 * U2 (:,k) */
01351   /* ------------------------------------------------------------------ */
01352 
01353   /* get the pointers for the kth column of U */
01354   GET_COLUMN (Uip, Ulen, LU, k, Ui, Ux, ulen) ;
01355   PR2 ((Common->file, "Multiply L2 by U2(:,"ID") :\n", k)) ;
01356 
01357   /* for each j for which U (j,k) is nonzero, do: */
01358   for (p = 0 ; p < ulen ; p++)
01359   {
01360       /* X = X - L (nfound:n, j) * U (j,k) */
01361       j = Ui [p] ;
01362       ASSERT (j >= 0 && j < nfound) ;
01363       ujk = Ux [p] ;
01364       PR2 ((Common->file, "U("ID","ID") = %g\n", j, k, ujk)) ;
01365       GET_COLUMN (Lip, Llen, LU, j, Li, Lx, llen) ;
01366       for (p2 = L1_len [j] ; p2 < llen ; p2++)
01367       {
01368     i = Li [p2] ;
01369     ASSERT (i >= nfound && i < n) ;
01370     PR3 ((Common->file, "    L("ID","ID") = %g\n", i, j, Lx [p2])) ;
01371     if (Flag [i] < mark)
01372     {
01373         PR3 ((Common->file, "    new entry in Si, slen "ID"\n",slen));
01374         Si [slen++] = i - nfound ;
01375         Flag [i] = mark ;
01376     }
01377     X [i] -= Lx [p2] * ujk ;
01378       }
01379   }
01380 
01381   /* ------------------------------------------------------------------ */
01382   /* allocate space for S(:,k) */
01383   /* ------------------------------------------------------------------ */
01384 
01385   /* if slen is odd, this entry is allocated but not accessed */
01386   if (slen % 2 == 1)
01387   {
01388       Si [slen] = EMPTY ;
01389   }
01390 
01391   Slen [k - nfound] = slen ;
01392   PR2 ((Common->file, "Slen ["ID"] = "ID"\n", k - nfound, Slen [k - nfound]));
01393   snz += slen ;
01394   sp += ((slen + 1) / 2) ;
01395   Sx = (double *) (S + sp) ;
01396   sp += slen ;
01397 
01398   /* ------------------------------------------------------------------ */
01399   /* gather X into S and clear X */
01400   /* ------------------------------------------------------------------ */
01401 
01402   PR2 ((Common->file, "snz so far: "ID". Final col of S(:,"ID")=\n", snz, k));
01403   for (p = 0 ; p < slen ; p++)
01404   {
01405       i = Si [p] + nfound ;
01406       Sx [p] = X [i] ;
01407       X [i] = 0 ;
01408       PR3 ((Common->file, " S("ID","ID") = %g\n", i-nfound, k-nfound, Sx[p]));
01409   }
01410   DEBUG (for (i = 0 ; i < n ; i++) ASSERT (X [i] == 0)) ;
01411 
01412   /* clear the flag array */
01413   mark++ ;
01414     }
01415 
01416     /* ---------------------------------------------------------------------- */
01417     /* shrink the Schur complement to just the required size */
01418     /* ---------------------------------------------------------------------- */
01419 
01420     PR0 ((Common->file, "Final ssize = "ID" sn = "ID" snz: "ID"\n", sp, sn, snz)) ;
01421     S = CHOLMOD (realloc) (sp, sizeof (double), S, &ssize, cm) ;
01422     LUnode->sx = S ; 
01423     LUnode->PK_SNZ = snz ; 
01424     LUnode->PK_SSIZE = ssize ;
01425 
01426     return (TRUE) ;
01427 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines