Amesos Package Browser (Single Doxygen Collection) Development
amesos_klu_impl.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === klu ================================================================== */
00003 /* ========================================================================== */
00004 
00005 /* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
00006  * optional symmetric pruning by Eisenstat and Liu [2].  The code is by Tim
00007  * Davis.  This algorithm is what appears as the default sparse LU routine in
00008  * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
00009  * Note that no column ordering is provided (see COLAMD or AMD for suitable
00010  * orderings).  SuperLU is based on this algorithm, except that it adds the
00011  * use of dense matrix operations on "supernodes" (adjacent columns with
00012  * identical).  This code doesn't use supernodes, thus its name ("Kent" LU,
00013  * as in "Clark Kent", in contrast with Super-LU...).  This algorithm is slower
00014  * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
00015  * factors (such as for most finite-element problems).  However, for matrices
00016  * with very sparse LU factors, this algorithm is typically faster than both
00017  * SuperLU and UMFPACK, since in this case there is little chance to exploit
00018  * dense matrix kernels (the BLAS).
00019  *
00020  * Only one block of A is factorized, in the BTF form.  The input n is the
00021  * size of the block; k1 is the first row and column in the block.
00022  *
00023  * NOTE: no error checking is done on the inputs.  This version is not meant to
00024  * be called directly by the user.  Use klu_factor instead.
00025  *
00026  * No fill-reducing ordering is provided.  The ordering quality of
00027  * klu_kernel_factor is the responsibility of the caller.  The input A must
00028  * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
00029  * be provided.
00030  *
00031  * The input matrix A must be in compressed-column form, with either sorted
00032  * or unsorted row indices.  Row indices for column j of A is in
00033  * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
00034  * numerical values.  No duplicate entries are allowed.
00035  *
00036  * Copyright 2004-2007, Tim Davis.  All rights reserved.  See the README
00037  * file for details on permitted use.  Note that no code from The MathWorks,
00038  * Inc, or from SuperLU, or from any other source appears here.  The code is
00039  * written from scratch, from the algorithmic description in Gilbert & Peierls'
00040  * and Eisenstat & Liu's journal papers [1,2].
00041  *
00042  * If an input permutation Q is provided, the factorization L*U = A (P,Q)
00043  * is computed, where P is determined by partial pivoting, and Q is the input
00044  * ordering.  If the pivot tolerance is less than 1, the "diagonal" entry that
00045  * KLU attempts to choose is the diagonal of A (Q,Q).  In other words, the
00046  * input permutation is applied symmetrically to the input matrix.  The output
00047  * permutation P includes both the partial pivoting ordering and the input
00048  * permutation.  If Q is NULL, then it is assumed to be the identity
00049  * permutation.  Q is not modified.
00050  *
00051  * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
00052  *  Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
00053  *  vol 9, pp.  862-874, 1988.
00054  * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
00055  *  Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
00056  *  Applic., vol 13, pp.  202-211, 1992.
00057  */
00058 
00059 /* ========================================================================== */
00060 
00061 #include "amesos_klu_internal.h"
00062 
00063 size_t KLU_kernel_factor      /* 0 if failure, size of LU if OK */
00064 (
00065     /* inputs, not modified */
00066     Int n,      /* A is n-by-n. n must be > 0. */
00067     Int Ap [ ],     /* size n+1, column pointers for A */
00068     Int Ai [ ],     /* size nz = Ap [n], row indices for A */
00069     Entry Ax [ ],   /* size nz, values of A */
00070     Int Q [ ],      /* size n, optional column permutation */
00071     double Lsize,   /* estimate of number of nonzeros in L */
00072 
00073     /* outputs, not defined on input */
00074     Unit **p_LU,  /* row indices and values of L and U */
00075     Entry Udiag [ ],  /* size n, diagonal of U */
00076     Int Llen [ ], /* size n, column length of L */
00077     Int Ulen [ ], /* size n, column length of U */
00078     Int Lip [ ],  /* size n, column pointers for L */
00079     Int Uip [ ],  /* size n, column pointers for U */
00080     Int P [ ],    /* row permutation, size n */
00081     Int *lnz,   /* size of L */
00082     Int *unz,   /* size of U */
00083 
00084     /* workspace, undefined on input */
00085     Entry *X,     /* size n double's, zero on output */
00086     Int *Work,      /* size 5n Int's */
00087 
00088     /* inputs, not modified on output */
00089     Int k1,   /* the block of A is from k1 to k2-1 */
00090     Int PSinv [ ],    /* inverse of P from symbolic factorization */
00091     double Rs [ ],    /* scale factors for A */
00092 
00093     /* inputs, modified on output */
00094     Int Offp [ ],   /* off-diagonal matrix (modified by this routine) */
00095     Int Offi [ ],
00096     Entry Offx [ ],
00097     /* --------------- */
00098     KLU_common *Common
00099 )
00100 {
00101     double maxlnz, dunits ;
00102     Unit *LU ;
00103     Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
00104     Int lsize, usize, anz, ok ;
00105     size_t lusize ;
00106     ASSERT (Common != NULL) ;
00107 
00108     /* ---------------------------------------------------------------------- */
00109     /* get control parameters, or use defaults */
00110     /* ---------------------------------------------------------------------- */
00111 
00112     n = MAX (1, n) ;
00113     anz = Ap [n+k1] - Ap [k1] ;
00114 
00115     if (Lsize <= 0)
00116     {
00117   Lsize = -Lsize ;
00118   Lsize = MAX (Lsize, 1.0) ;
00119   lsize = (int) ( Lsize * anz + n ) ;
00120     }
00121     else
00122     {
00123   lsize = (int) Lsize ;
00124     }
00125 
00126     usize = lsize ;
00127 
00128     lsize  = MAX (n+1, lsize) ;
00129     usize  = MAX (n+1, usize) ;
00130 
00131     maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
00132     maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
00133     lsize  = (int) ( MIN (maxlnz, lsize) ) ;
00134     usize  = (int) ( MIN (maxlnz, usize) ) ;
00135 
00136     PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
00137   n, anz, k1, lsize, usize, maxlnz)) ;
00138 
00139     /* ---------------------------------------------------------------------- */
00140     /* allocate workspace and outputs */
00141     /* ---------------------------------------------------------------------- */
00142 
00143     /* return arguments are not yet assigned */
00144     *p_LU = (Unit *) NULL ;
00145 
00146     /* these computations are safe from size_t overflow */
00147     W = Work ;
00148     Pinv = (Int *) W ;      W += n ;
00149     Stack = (Int *) W ;     W += n ;
00150     Flag = (Int *) W ;      W += n ;
00151     Lpend = (Int *) W ;     W += n ;
00152     Ap_pos = (Int *) W ;    W += n ;
00153 
00154     dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
00155              DUNITS (Int, usize) + DUNITS (Entry, usize) ;
00156     lusize = (size_t) dunits ;
00157     ok = !INT_OVERFLOW (dunits) ; 
00158     LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ;
00159     if (LU == NULL)
00160     {
00161   /* out of memory, or problem too large */
00162   Common->status = KLU_OUT_OF_MEMORY ;
00163   lusize = 0 ;
00164   return (lusize) ;
00165     }
00166 
00167     /* ---------------------------------------------------------------------- */
00168     /* factorize */
00169     /* ---------------------------------------------------------------------- */
00170 
00171     /* with pruning, and non-recursive depth-first-search */
00172     lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize,
00173       Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
00174       X, Stack, Flag, Ap_pos, Lpend,
00175       k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
00176 
00177     /* ---------------------------------------------------------------------- */
00178     /* return LU factors, or return nothing if an error occurred */
00179     /* ---------------------------------------------------------------------- */
00180 
00181     if (Common->status < KLU_OK)
00182     {
00183   LU = KLU_free (LU, lusize, sizeof (Unit), Common) ;
00184   lusize = 0 ;
00185     }
00186     *p_LU = LU ;
00187     PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
00188     return (lusize) ;
00189 }
00190 
00191 
00192 /* ========================================================================== */
00193 /* === KLU_lsolve =========================================================== */
00194 /* ========================================================================== */
00195 
00196 /* Solve Lx=b.  Assumes L is unit lower triangular and where the unit diagonal
00197  * entry is NOT stored.  Overwrites B  with the solution X.  B is n-by-nrhs
00198  * and is stored in ROW form with row dimension nrhs.  nrhs must be in the
00199  * range 1 to 4. */
00200 
00201 void KLU_lsolve
00202 (
00203     /* inputs, not modified: */
00204     Int n,
00205     Int Lip [ ],
00206     Int Llen [ ],
00207     Unit LU [ ],
00208     Int nrhs,
00209     /* right-hand-side on input, solution to Lx=b on output */
00210     Entry X [ ]
00211 )
00212 {
00213     Entry x [4], lik ;
00214     Int *Li ;
00215     Entry *Lx ;
00216     Int k, p, len, i ;
00217 
00218     switch (nrhs)
00219     {
00220 
00221   case 1:
00222       for (k = 0 ; k < n ; k++)
00223       {
00224     x [0] = X [k] ;
00225         if (x[0] == 0.0) continue;
00226     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00227     /* unit diagonal of L is not stored*/
00228     for (p = 0 ; p < len ; p++)
00229     {
00230         /* X [Li [p]] -= Lx [p] * x [0] ; */
00231         MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
00232     }
00233       }
00234       break ;
00235 
00236   case 2:
00237 
00238       for (k = 0 ; k < n ; k++)
00239       {
00240     x [0] = X [2*k    ] ;
00241     x [1] = X [2*k + 1] ;
00242         if (x[0] == 0.0 && x[1] == 0.0) continue;
00243     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00244     for (p = 0 ; p < len ; p++)
00245     {
00246         i = Li [p] ;
00247         lik = Lx [p] ;
00248         MULT_SUB (X [2*i], lik, x [0]) ;
00249         MULT_SUB (X [2*i + 1], lik, x [1]) ;
00250     }
00251       }
00252       break ;
00253 
00254   case 3:
00255 
00256       for (k = 0 ; k < n ; k++)
00257       {
00258     x [0] = X [3*k    ] ;
00259     x [1] = X [3*k + 1] ;
00260     x [2] = X [3*k + 2] ;
00261         if (x[0] == 0.0 && x[1] == 0.0 && x[2] == 0.0) continue;
00262     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00263     for (p = 0 ; p < len ; p++)
00264     {
00265         i = Li [p] ;
00266         lik = Lx [p] ;
00267         MULT_SUB (X [3*i], lik, x [0]) ;
00268         MULT_SUB (X [3*i + 1], lik, x [1]) ;
00269         MULT_SUB (X [3*i + 2], lik, x [2]) ;
00270     }
00271       }
00272       break ;
00273 
00274   case 4:
00275 
00276       for (k = 0 ; k < n ; k++)
00277       {
00278     x [0] = X [4*k    ] ;
00279     x [1] = X [4*k + 1] ;
00280     x [2] = X [4*k + 2] ;
00281     x [3] = X [4*k + 3] ;
00282         if (x[0] == 0.0 && x[1] == 0.0 && x[2] == 0.0 && x[3] == 0.0) continue;
00283     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00284     for (p = 0 ; p < len ; p++)
00285     {
00286         i = Li [p] ;
00287         lik = Lx [p] ;
00288         MULT_SUB (X [4*i], lik, x [0]) ;
00289         MULT_SUB (X [4*i + 1], lik, x [1]) ;
00290         MULT_SUB (X [4*i + 2], lik, x [2]) ;
00291         MULT_SUB (X [4*i + 3], lik, x [3]) ;
00292     }
00293       }
00294       break ;
00295 
00296     }
00297 }
00298 
00299 /* ========================================================================== */
00300 /* === KLU_usolve =========================================================== */
00301 /* ========================================================================== */
00302 
00303 /* Solve Ux=b.  Assumes U is non-unit upper triangular and where the diagonal
00304  * entry is NOT stored.  Overwrites B with the solution X.  B is n-by-nrhs
00305  * and is stored in ROW form with row dimension nrhs.  nrhs must be in the
00306  * range 1 to 4. */
00307 
00308 void KLU_usolve
00309 (
00310     /* inputs, not modified: */
00311     Int n,
00312     Int Uip [ ],
00313     Int Ulen [ ],
00314     Unit LU [ ],
00315     Entry Udiag [ ],
00316     Int nrhs,
00317     /* right-hand-side on input, solution to Ux=b on output */
00318     Entry X [ ]
00319 )
00320 {
00321     Entry x [4], uik, ukk ;
00322     Int *Ui ;
00323     Entry *Ux ;
00324     Int k, p, len, i ;
00325 
00326     switch (nrhs)
00327     {
00328 
00329   case 1:
00330 
00331       for (k = n-1 ; k >= 0 ; k--)
00332       {
00333     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00334     /* x [0] = X [k] / Udiag [k] ; */
00335         if (X [k] == 0.0) continue;
00336     DIV (x [0], X [k], Udiag [k]) ;
00337     X [k] = x [0] ;
00338     for (p = 0 ; p < len ; p++)
00339     {
00340         /* X [Ui [p]] -= Ux [p] * x [0] ; */
00341         MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
00342 
00343     }
00344       }
00345 
00346       break ;
00347 
00348   case 2:
00349 
00350       for (k = n-1 ; k >= 0 ; k--)
00351       {
00352     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00353     ukk = Udiag [k] ;
00354     /* x [0] = X [2*k    ] / ukk ;
00355     x [1] = X [2*k + 1] / ukk ; */
00356         if (X [2*k] == 0.0 && X [2*k + 1] == 0.0) continue;
00357     DIV (x [0], X [2*k], ukk) ;
00358     DIV (x [1], X [2*k + 1], ukk) ;
00359 
00360     X [2*k    ] = x [0] ;
00361     X [2*k + 1] = x [1] ;
00362     for (p = 0 ; p < len ; p++)
00363     {
00364         i = Ui [p] ;
00365         uik = Ux [p] ;
00366         /* X [2*i    ] -= uik * x [0] ;
00367         X [2*i + 1] -= uik * x [1] ; */
00368         MULT_SUB (X [2*i], uik, x [0]) ;
00369         MULT_SUB (X [2*i + 1], uik, x [1]) ;
00370     }
00371       }
00372 
00373       break ;
00374 
00375   case 3:
00376 
00377       for (k = n-1 ; k >= 0 ; k--)
00378       {
00379     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00380     ukk = Udiag [k] ;
00381 
00382         if (X [3*k] == 0.0 && X [3*k + 1] == 0.0 &&  X [3*k + 2] == 0.0) continue;
00383     DIV (x [0], X [3*k], ukk) ;
00384     DIV (x [1], X [3*k + 1], ukk) ;
00385     DIV (x [2], X [3*k + 2], ukk) ;
00386 
00387     X [3*k    ] = x [0] ;
00388     X [3*k + 1] = x [1] ;
00389     X [3*k + 2] = x [2] ;
00390     for (p = 0 ; p < len ; p++)
00391     {
00392         i = Ui [p] ;
00393         uik = Ux [p] ;
00394         MULT_SUB (X [3*i], uik, x [0]) ;
00395         MULT_SUB (X [3*i + 1], uik, x [1]) ;
00396         MULT_SUB (X [3*i + 2], uik, x [2]) ;
00397     }
00398       }
00399 
00400       break ;
00401 
00402   case 4:
00403 
00404       for (k = n-1 ; k >= 0 ; k--)
00405       {
00406     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00407     ukk = Udiag [k] ;
00408 
00409         if (X [4*k] == 0.0 && X [4*k + 1] == 0.0 &&  X [4*k + 2] == 0.0 && X [4*k + 3] == 0.0) continue;
00410     DIV (x [0], X [4*k], ukk) ;
00411     DIV (x [1], X [4*k + 1], ukk) ;
00412     DIV (x [2], X [4*k + 2], ukk) ;
00413     DIV (x [3], X [4*k + 3], ukk) ;
00414 
00415     X [4*k    ] = x [0] ;
00416     X [4*k + 1] = x [1] ;
00417     X [4*k + 2] = x [2] ;
00418     X [4*k + 3] = x [3] ;
00419     for (p = 0 ; p < len ; p++)
00420     {
00421         i = Ui [p] ;
00422         uik = Ux [p] ;
00423 
00424         MULT_SUB (X [4*i], uik, x [0]) ;
00425         MULT_SUB (X [4*i + 1], uik, x [1]) ;
00426         MULT_SUB (X [4*i + 2], uik, x [2]) ;
00427         MULT_SUB (X [4*i + 3], uik, x [3]) ;
00428     }
00429       }
00430 
00431       break ;
00432 
00433     }
00434 }
00435 
00436 
00437 /* ========================================================================== */
00438 /* === KLU_ltsolve ========================================================== */
00439 /* ========================================================================== */
00440 
00441 /* Solve L'x=b.  Assumes L is unit lower triangular and where the unit diagonal
00442  * entry is NOT stored.  Overwrites B with the solution X.  B is n-by-nrhs
00443  * and is stored in ROW form with row dimension nrhs.  nrhs must in the
00444  * range 1 to 4. */
00445 
00446 void KLU_ltsolve
00447 (
00448     /* inputs, not modified: */
00449     Int n,
00450     Int Lip [ ],
00451     Int Llen [ ],
00452     Unit LU [ ],
00453     Int nrhs,
00454 #ifdef COMPLEX
00455     Int conj_solve,
00456 #endif
00457     /* right-hand-side on input, solution to L'x=b on output */
00458     Entry X [ ]
00459 )
00460 {
00461     Entry x [4], lik ;
00462     Int *Li ;
00463     Entry *Lx ;
00464     Int k, p, len, i ;
00465 
00466     switch (nrhs)
00467     {
00468 
00469   case 1:
00470 
00471       for (k = n-1 ; k >= 0 ; k--)
00472       {
00473     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00474     x [0] = X [k] ;
00475     for (p = 0 ; p < len ; p++)
00476     {
00477             if (X[Li[p]] == 0.0 ) continue;
00478 #ifdef COMPLEX
00479         if (conj_solve)
00480         {
00481       /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
00482       MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
00483         }
00484         else
00485 #endif
00486         {
00487       /*x [0] -= Lx [p] * X [Li [p]] ;*/
00488       MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
00489         }
00490     }
00491     X [k] = x [0] ;
00492       }
00493       break ;
00494 
00495   case 2:
00496 
00497       for (k = n-1 ; k >= 0 ; k--)
00498       {
00499     x [0] = X [2*k    ] ;
00500     x [1] = X [2*k + 1] ;
00501     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00502     for (p = 0 ; p < len ; p++)
00503     {
00504         i = Li [p] ;
00505             if (X[2*i] == 0.0 && X[2*i + 1] == 0.0) continue;
00506 #ifdef COMPLEX
00507         if (conj_solve)
00508         {
00509       CONJ (lik, Lx [p]) ;
00510         }
00511         else
00512 #endif
00513         {
00514       lik = Lx [p] ;
00515         }
00516         MULT_SUB (x [0], lik, X [2*i]) ;
00517         MULT_SUB (x [1], lik, X [2*i + 1]) ;
00518     }
00519     X [2*k    ] = x [0] ;
00520     X [2*k + 1] = x [1] ;
00521       }
00522       break ;
00523 
00524   case 3:
00525 
00526       for (k = n-1 ; k >= 0 ; k--)
00527       {
00528     x [0] = X [3*k    ] ;
00529     x [1] = X [3*k + 1] ;
00530     x [2] = X [3*k + 2] ;
00531     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00532     for (p = 0 ; p < len ; p++)
00533     {
00534         i = Li [p] ;
00535             if (X[3*i] == 0.0 && X[3*i + 1] == 0.0 && X[3*i + 2] == 0.0) continue;
00536 #ifdef COMPLEX
00537         if (conj_solve)
00538         {
00539       CONJ (lik, Lx [p]) ;
00540         }
00541         else
00542 #endif
00543         {
00544       lik = Lx [p] ;
00545         }
00546         MULT_SUB (x [0], lik, X [3*i]) ;
00547         MULT_SUB (x [1], lik, X [3*i + 1]) ;
00548         MULT_SUB (x [2], lik, X [3*i + 2]) ;
00549     }
00550     X [3*k    ] = x [0] ;
00551     X [3*k + 1] = x [1] ;
00552     X [3*k + 2] = x [2] ;
00553       }
00554       break ;
00555 
00556   case 4:
00557 
00558       for (k = n-1 ; k >= 0 ; k--)
00559       {
00560     x [0] = X [4*k    ] ;
00561     x [1] = X [4*k + 1] ;
00562     x [2] = X [4*k + 2] ;
00563     x [3] = X [4*k + 3] ;
00564     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
00565     for (p = 0 ; p < len ; p++)
00566     {
00567         i = Li [p] ;
00568             if (X[4*i] == 0.0 && X[4*i + 1] == 0.0 && X[4*i + 2] == 0.0 && X[4*i + 3] == 0.0) continue;
00569 #ifdef COMPLEX
00570         if (conj_solve)
00571         {
00572       CONJ (lik, Lx [p]) ;
00573         }
00574         else
00575 #endif
00576         {
00577       lik = Lx [p] ;
00578         }
00579         MULT_SUB (x [0], lik, X [4*i]) ;
00580         MULT_SUB (x [1], lik, X [4*i + 1]) ;
00581         MULT_SUB (x [2], lik, X [4*i + 2]) ;
00582         MULT_SUB (x [3], lik, X [4*i + 3]) ;
00583     }
00584     X [4*k    ] = x [0] ;
00585     X [4*k + 1] = x [1] ;
00586     X [4*k + 2] = x [2] ;
00587     X [4*k + 3] = x [3] ;
00588       }
00589       break ;
00590     }
00591 }
00592 
00593 
00594 /* ========================================================================== */
00595 /* === KLU_utsolve ========================================================== */
00596 /* ========================================================================== */
00597 
00598 /* Solve U'x=b.  Assumes U is non-unit upper triangular and where the diagonal
00599  * entry is stored (and appears last in each column of U).  Overwrites B
00600  * with the solution X.  B is n-by-nrhs and is stored in ROW form with row
00601  * dimension nrhs.  nrhs must be in the range 1 to 4. */
00602 
00603 void KLU_utsolve
00604 (
00605     /* inputs, not modified: */
00606     Int n,
00607     Int Uip [ ],
00608     Int Ulen [ ],
00609     Unit LU [ ],
00610     Entry Udiag [ ],
00611     Int nrhs,
00612 #ifdef COMPLEX
00613     Int conj_solve,
00614 #endif
00615     /* right-hand-side on input, solution to Ux=b on output */
00616     Entry X [ ]
00617 )
00618 {
00619     Entry x [4], uik, ukk ;
00620     Int k, p, len, i ;
00621     Int *Ui ;
00622     Entry *Ux ;
00623 
00624     switch (nrhs)
00625     {
00626 
00627   case 1:
00628 
00629       for (k = 0 ; k < n ; k++)
00630       {
00631     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00632     x [0] = X [k] ;
00633     for (p = 0 ; p < len ; p++)
00634     {
00635             if (X[Ui[p]] == 0.0) continue;
00636 #ifdef COMPLEX
00637         if (conj_solve)
00638         {
00639       /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
00640       MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
00641         }
00642         else
00643 #endif
00644         {
00645       /* x [0] -= Ux [p] * X [Ui [p]] ; */
00646       MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
00647         }
00648     }
00649 #ifdef COMPLEX
00650     if (conj_solve)
00651     {
00652         CONJ (ukk, Udiag [k]) ;
00653     }
00654     else
00655 #endif
00656     {
00657         ukk = Udiag [k] ;
00658     }
00659     DIV (X [k], x [0], ukk) ;
00660       }
00661       break ;
00662 
00663   case 2:
00664 
00665       for (k = 0 ; k < n ; k++)
00666       {
00667     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00668     x [0] = X [2*k    ] ;
00669     x [1] = X [2*k + 1] ;
00670     for (p = 0 ; p < len ; p++)
00671     {
00672         i = Ui [p] ;
00673             if (X[2*i] == 0.0 && X[2*i + 1] == 0.0 ) continue;
00674 #ifdef COMPLEX
00675         if (conj_solve)
00676         {
00677       CONJ (uik, Ux [p]) ;
00678         }
00679         else
00680 #endif
00681         {
00682       uik = Ux [p] ;
00683         }
00684         MULT_SUB (x [0], uik, X [2*i]) ;
00685         MULT_SUB (x [1], uik, X [2*i + 1]) ;
00686     }
00687 #ifdef COMPLEX
00688     if (conj_solve)
00689     {
00690         CONJ (ukk, Udiag [k]) ;
00691     }
00692     else
00693 #endif
00694     {
00695         ukk = Udiag [k] ;
00696     }
00697     DIV (X [2*k], x [0], ukk) ;
00698     DIV (X [2*k + 1], x [1], ukk) ;
00699       }
00700       break ;
00701 
00702   case 3:
00703 
00704       for (k = 0 ; k < n ; k++)
00705       {
00706     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00707     x [0] = X [3*k    ] ;
00708     x [1] = X [3*k + 1] ;
00709     x [2] = X [3*k + 2] ;
00710     for (p = 0 ; p < len ; p++)
00711     {
00712         i = Ui [p] ;
00713             if (X[3*i] == 0.0 && X[3*i + 1] == 0.0 && X[3*i + 2] == 0.0 ) continue;
00714 #ifdef COMPLEX
00715         if (conj_solve)
00716         {
00717       CONJ (uik, Ux [p]) ;
00718         }
00719         else
00720 #endif
00721         {
00722       uik = Ux [p] ;
00723         }
00724         MULT_SUB (x [0], uik, X [3*i]) ;
00725         MULT_SUB (x [1], uik, X [3*i + 1]) ;
00726         MULT_SUB (x [2], uik, X [3*i + 2]) ;
00727     }
00728 #ifdef COMPLEX
00729     if (conj_solve)
00730     {
00731         CONJ (ukk, Udiag [k]) ;
00732     }
00733     else
00734 #endif
00735     {
00736         ukk = Udiag [k] ;
00737     }
00738     DIV (X [3*k], x [0], ukk) ;
00739     DIV (X [3*k + 1], x [1], ukk) ;
00740     DIV (X [3*k + 2], x [2], ukk) ;
00741       }
00742       break ;
00743 
00744   case 4:
00745 
00746       for (k = 0 ; k < n ; k++)
00747       {
00748     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
00749     x [0] = X [4*k    ] ;
00750     x [1] = X [4*k + 1] ;
00751     x [2] = X [4*k + 2] ;
00752     x [3] = X [4*k + 3] ;
00753     for (p = 0 ; p < len ; p++)
00754     {
00755         i = Ui [p] ;
00756             if (X[4*i] == 0.0 && X[4*i + 1] == 0.0 && X[4*i + 2] == 0.0 && X[4*i + 3] == 0.0) continue;
00757 #ifdef COMPLEX
00758         if (conj_solve)
00759         {
00760       CONJ (uik, Ux [p]) ;
00761         }
00762         else
00763 #endif
00764         {
00765       uik = Ux [p] ;
00766         }
00767         MULT_SUB (x [0], uik, X [4*i]) ;
00768         MULT_SUB (x [1], uik, X [4*i + 1]) ;
00769         MULT_SUB (x [2], uik, X [4*i + 2]) ;
00770         MULT_SUB (x [3], uik, X [4*i + 3]) ;
00771     }
00772 #ifdef COMPLEX
00773     if (conj_solve)
00774     {
00775         CONJ (ukk, Udiag [k]) ;
00776     }
00777     else
00778 #endif
00779     {
00780         ukk = Udiag [k] ;
00781     }
00782     DIV (X [4*k], x [0], ukk) ;
00783     DIV (X [4*k + 1], x [1], ukk) ;
00784     DIV (X [4*k + 2], x [2], ukk) ;
00785     DIV (X [4*k + 3], x [3], ukk) ;
00786       }
00787       break ;
00788     }
00789 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines